I’m making predictor with LSTMCells but it don’t want to improve no matter what loss function or optimizer im using. When i checked values that my cells are getting and what comes out as output it all seams alright, but i think the problem might be because I’m feeding my model with multidimensional data. This is how my model.parameters() looks inside after 100 epochs, I’m printing grad variable of each parameter:
tensor([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]]) torch.Size([8, 5])
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]]) torch.Size([8, 2])
tensor([0., 0., 0., 0., 0., 0., 0., 0.]) torch.Size([8])
tensor([0., 0., 0., 0., 0., 0., 0., 0.]) torch.Size([8])
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]]) torch.Size([8, 2])
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]]) torch.Size([8, 2])
tensor([0., 0., 0., 0., 0., 0., 0., 0.]) torch.Size([8])
tensor([0., 0., 0., 0., 0., 0., 0., 0.]) torch.Size([8])
tensor([[ 0.0000, 224.0376],
[ 0.0000, 224.9807],
[ 0.0000, 222.3457],
[ 0.0000, 226.0519],
[ 0.0000, 209874.9688]]) torch.Size([5, 2])
tensor([ -224.0376, -224.9806, -222.3456, -226.0518, -209874.8438]) torch.Size([5])
Changing learning rate only slows down or speeds up the rate of everything becoming zeros.
Im using fairly complicated method to feed input into my model:
def forward(self, x: Tensor, future=0):
outputs = []
n_samples = x.size(0)
h_t = zeros(n_samples, self.n_hidden, dtype=torch.float, device=self.device) # hidden state for lstm1
c_t = zeros(n_samples, self.n_hidden, dtype=torch.float, device=self.device) # initial cell state for lstm1
h_t2 = zeros(n_samples, self.n_hidden, dtype=torch.float, device=self.device) # hidden state for lstm2
c_t2 = zeros(n_samples, self.n_hidden, dtype=torch.float, device=self.device) # initial cell state for lstm2
output = None
stockLayer = x.split([1, 1, 1, 1, 1], -2)
for iT in range(0, stockLayer[0].size(-1)):
in0 = stockLayer[0].split(1, -1)[iT].view(-1)
in1 = stockLayer[1].split(1, -1)[iT].view(-1)
in2 = stockLayer[2].split(1, -1)[iT].view(-1)
in3 = stockLayer[3].split(1, -1)[iT].view(-1)
in4 = stockLayer[4].split(1, -1)[iT].view(-1)
in_t = torch.stack((in0, in1, in2, in3, in4), -1)
h_t, c_t = self.lstm1(in_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs.append(output)
for i in range(future):
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs.append(output)
outputs = torch.cat(outputs, dim=1)
return outputs
Here is an example of the input, 2 tensors from around middle of the for loop (print of in_t)
tensor([[0.9684, 0.9715, 0.9742, 0.9760, 1.2034],
[0.8043, 0.8114, 0.8204, 0.8042, 0.1811],
[0.9530, 0.9529, 0.9530, 0.9525, 0.0000],
...,
[1.0169, 1.0169, 1.0169, 1.0169, 0.5000],
[1.0000, 1.0000, 1.0000, 1.0000, 0.0000],
[1.0290, 1.0290, 1.0290, 1.0290, 0.0000]])
tensor([[0.9736, 0.9648, 0.9720, 0.9775, 1.9208],
[0.8140, 0.8232, 0.8164, 0.8061, 0.2725],
[0.9520, 0.9508, 0.9528, 0.9517, 0.0000],
...,
[1.0169, 1.0169, 1.0169, 1.0169, 0.2500],
[1.0000, 1.0000, 1.0000, 1.0000, 0.0000],
[1.0290, 1.0290, 1.0290, 1.0290, 0.0000]])
Loss function evaluates everything quite good from what I was able to test.
And lastly my train method:
net = LSTMModule(self.hid, self.device)
net.float()
if os.path.exists(self.name):
net.load(self.name)
# Training setup
lossFun = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=self.learningRate, momentum=0.3)
samples, outputs = self.getSamples(period, tckList)
# Training
for epoch in range(epochs):
totalLoss = 0.0
for (batchS, batchO) in zip(samples, outputs):
optimizer.zero_grad(True)
prediction = net(batchS)
loss = lossFun(prediction, batchO)
totalLoss += loss
print(loss)
print(loss.backward())
optimizer.step()