I made a simple example of a cnn layer where convolutional weights are defined as linear combination of predefined filters. The goal is to train the coefficients of linear combination while keeping predefined filters fixed. This can be easily achieved in tensorflow using tf.nn.conv2d. However here model.weight is not changing and model.weight.grad=None?
Can anyone please have a look? Thanks.
from __future__ import print_function
import torch.nn.functional as F
# from torch.autograd import Variable
# import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
class ConvNet(nn.Module):
def __init__(self, device='cuda'):
super(ConvNet, self).__init__()
self.device = device
self.filter = torch.Tensor(
[[[0.06, 0, 0],
[0.1, 0, 0.2],
[0.06, 0.1, 0]],
[[0.1, 0, 0],
[0.2, 0, 0],
[0.1, 0, 0]]]).to(self.device)
# print(self.filter.shape)
self.weight = nn.Parameter(torch.Tensor(1, 1, 2).to(self.device))
self.bias = nn.Parameter(torch.Tensor(1)).to(self.device)
nn.init.xavier_uniform_(self.weight)
self.bias.data.uniform_(-1, 1)
self.kernel = nn.Parameter(
torch.einsum("ijk, klm -> ijlm",
self.weight, self.filter).to(self.device),
requires_grad=False)
# print(self.kernel.shape)
self.bn1 = nn.BatchNorm2d(1)
self.fc = nn.Linear(1*13*13, 10)
def forward(self, x):
out = F.conv2d(input=x, weight=self.kernel, bias=self.bias)
out = self.bn1(out)
out = nn.ReLU()(out)
out = nn.MaxPool2d(kernel_size=2, stride=2)(out)
# print("out.shape: ", out.shape)
out = out.reshape(out.size(0), -1)
# print("out.shape: ", out.shape)
out = self.fc(out)
return out
def main(argv=None):
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 1
batch_size = 1024
learning_rate = 0.001
log_interval = 10
# MNIST dataset
train_dataset = datasets.MNIST(
root='../../data/',
train=True,
transform=transforms.ToTensor(),
download=True)
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
model = ConvNet(device=device)
model.to(device)
for name, param in model.named_parameters():
print(name, '\t\t', param.shape)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
for epoch in range(1, num_epochs + 1):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, target)
# print(model.weight)
# print(model.kernel)
# print(model.weight.grad)
a = list(model.parameters())[0].clone()
loss.backward()
optimizer.step()
b = list(model.parameters())[0].clone()
print(torch.equal(a.data, b.data))
if batch_idx % log_interval == 0:
_, predicted = torch.max(output.data, 1)
total = target.size(0)
correct = (predicted == target).sum().item()
print('batch Accuracy: {} %'.format(100 * correct / total))
if __name__ == '__main__':
main()