My problem is that when I use nn.ReLU
my network learns nicely. But when I use nn.Sigmoid
then the weights are not updated at all.
after loss.backward()
the list(model.parameters())[0].grad
shows all 0.
while with nn.ReLU
they are 0.0000
My input data is OneHot encoded and target is normalize to be between 0 and 1
EDIT:
When i remove the additional layer, then ( w2, b2) then sigmoid also starts to learn. The the fact it has two layers stops it from learning. But why?
class LinearRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
torch.manual_seed(1)
super(LinearRegressionModel, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.w = torch.nn.Parameter(torch.empty(input_dim, 20).uniform_(0, 1))
self.b = torch.nn.Parameter(torch.empty(20).uniform_(0, 1))
self.w2 = torch.nn.Parameter(torch.empty(20, output_dim).uniform_(0, 1))
self.b2 = torch.nn.Parameter(torch.empty(output_dim).uniform_(0, 1))
def activation(self):
return torch.nn.Sigmoid()
def forward(self, x):
x = x.view((x.shape[0], 1, self.input_dim))
exp_w = self.w.expand(x.shape[0], self.w.size(0), self.w.size(1))
out = torch.add(torch.bmm(x, exp_w), self.b)
exp_w2 = self.w2.expand(out.shape[0], self.w2.size(0), self.w2.size(1))
out = self.activation()(torch.add(torch.bmm(out, exp_w2), self.b2))
return out.view(x.shape[0])