I am currently implementing RELAX gradient estimate for stochastic function, in which gradient of variance of gradient should be computed, and after updating from 0.3 to master I’ve got following error message:

```
Expected 4-dimensional input for 4-dimensional weight [10],
but got input of size [11, 1, 32, 32] instead
```

In this particular line:

```
grad_phi = torch.autograd.grad([val], phi)[:len(phi)]
```

Here’s my code for forward/backward pass, where error appears:

```
input, target = Variable(batch_x), Variable(batch_y).type(torch.LongTensor)
mean, log_std = net(input)
dist = Normal(mean=mean, std=log_std.exp())
rsample = dist.rsample()
sample = rsample.detach()
f = hamming_loss(sample, target)
c = reduce_net(criterion(rsample, target))
log_prob = dist.log_prob(sample).sum()
first_term = torch.autograd.grad([log_prob], theta, create_graph=True)[:len(theta)]
second_term = list(torch.autograd.grad([c], theta, create_graph=True)[:len(theta)])
val = 0.
for grad_id in range(len(second_term)):
theta[grad_id].grad = first_term[grad_id] * (f - c) + second_term[grad_id]
val += (theta[grad_id].grad ** 2).sum()
grad_phi = torch.autograd.grad([val], phi)[:len(phi)]
for j, param in enumerate(phi): param.grad = grad_phi[j]
```

Here is code for modules and hamming loss:

```
class NormalParametersNet(nn.Module):
def __init__(self):
super(NormalParametersNet, self).__init__()
self.conv1 = nn.Conv2d(1, 10, 5)
self.conv2 = nn.Conv2d(10, 20, 5)
self.fc1_mean = nn.Linear(5 * 5 * 20, 140)
self.fc2_mean = nn.Linear(140, num_classes)
self.fc1_log_std = nn.Linear(5 * 5 * 20, 140)
self.fc2_log_std = nn.Linear(140, num_classes)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, 5 * 5 * 20)
mean = F.relu(self.fc1_mean(x))
mean = self.fc2_mean(mean)
log_std = F.relu(self.fc1_log_std(x))
log_std = self.fc2_log_std(log_std)
return mean, log_std
class ApproxNet(nn.Module):
def __init__(self, num_neurons=20):
super(ApproxNet, self).__init__()
self.num = num_neurons
self.linear1 = nn.Linear(1, self.num)
self.linear2 = nn.Linear(self.num, self.num)
self.linear3 = nn.Linear(self.num, 1)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
return self.linear3(x)
net = NormalParametersNet()
reduce_net = ApproxNet()
def hamming_loss(output, target):
return 1 - (output.data.max(1)[1] == target.data).sum() / target.size(0)
```

Normal is class from `torch.distributions`

. Input has shape of `11 x 1 x 32 x 32`

. So can somebody point out the mistake.

Forgot to mention:

```
criterion = nn.CrossEntropyLoss()
theta = list(net.parameters())
phi = list(reduce_net.parameters())
```

if i change input in the following way:

```
input = Variable(batch_x, requires_grad=True)
```

it fails in `.zero_grad()`

with `Can't detach views in-place. Use detach() instead`

, however if comment out zero grad everything begins to work fine.