I use “expand” to repeat the tensor of N x D to be N x D x H x W in the module, and the following is a simplified version of the code:
import torch
from torch.autograd import Variable
class TwoLayerNet(torch.nn.Module):
def __init__(self, D_in, H, D_out, h, w):
super(TwoLayerNet, self).__init__()
self.linear1 = torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H * h * w, D_out)
def forward(self, x):
h_relu = self.linear1(x).clamp(min=0)
h_relu = torch.unsqueeze(torch.unsqueeze(h_relu, 2), 3) # -> N x H x 1 x 1
h_expand = h_relu.expand([64, H, h, w]).contiguous().view(64, -1) # -> N x H x h x w
y_pred = self.linear2(h_expand) # -> N x D_out
return y_pred
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out, h, w = 64, 1000, 100, 10, 6, 6
x = Variable(torch.randn(N, D_in), requires_grad=True)
y = Variable(torch.randn(N, D_out), requires_grad=False)
model = TwoLayerNet(D_in, H, D_out, h, w)
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
y_pred = model(x)
loss = criterion(y_pred, y)
print(t, loss.data[0])
optimizer.zero_grad()
loss.backward()
optimizer.step()
The forward is OK and output: (0, 667.63525390625)
But I get the error:
Traceback (most recent call last):
File “script_test.py”, line 36, in
loss.backward()
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py”, line 151, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/init.py”, line 98, in backward
variables, grad_variables, retain_graph)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/function.py”, line 90, in apply
return self._forward_cls.backward(self, *args)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/_functions/pointwise.py”, line 286, in backward
return grad_output * mask, None
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py”, line 789, in mul
return self.mul(other)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py”, line 310, in mul
return Mul.apply(self, other)
File “/usr/local/lib/python2.7/dist-packages/torch/autograd/_functions/basic_ops.py”, line 50, in forward
return a.mul(b)
RuntimeError: inconsistent tensor size at ~/pytorch/torch/lib/TH/generic/THTensorMath.c:875
Can anyone help me to figure out the problem ?