LSTM model not responding anything while training

bidurbhurtel · November 19, 2020, 4:11pm

I have built a lstm model that takes input data with 3 features and the rolling window size is 18. While training the model in jupyter notebook, it shows the training process is going on but does not show any accuracy parameters. It seems like the training process is stuck somewhere. I had implemented the same model with the same data in Keras, it gave those training errors quite quickly.

Here is the model that I built.

class LSTMnetwork(nn.Module):
    def __init__(self,input_size=3,hidden_size1=24, hidden_size2=50, hidden_size3=20,output_size=1):
        super().__init__()
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.hidden_size3 = hidden_size3
        
        # Add an LSTM and dropout layer:
        self.lstm1 = nn.LSTM(input_size,hidden_size1)
        self.dropout1 = nn.Dropout(p=0.2)
        
        # Add second LSTM and dropout layer:
        self.lstm2 = nn.LSTM(hidden_size1,hidden_size2)
        self.dropout2 = nn.Dropout(p=0.2)
        
        # Add a fully-connected layer:
        self.fc1 = nn.Linear(hidden_size2,hidden_size3)
        
        # Add another fully-connected layer:
        self.fc2 = nn.Linear(hidden_size3,output_size)
        
        # Initialize h0 and c0:
        self.hidden1 = (torch.zeros(1,1,self.hidden_size1),
                       torch.zeros(1,1,self.hidden_size1))
        
        # Initialize h1 and c1:
        self.hidden2 = (torch.zeros(1,1,self.hidden_size2),
                       torch.zeros(1,1,self.hidden_size2))

    def forward(self,seq):
        lstm1_out, self.hidden1 = self.lstm1(seq.view(len(seq),1,-1), self.hidden1)
        dropout1 = self.dropout1(lstm1_out)
        lstm2_out, self.hidden2 = self.lstm2(dropout1.view(len(dropout1),1,-1), self.hidden2)
        dropout2 = self.dropout2(lstm2_out)
        fc1_out = F.relu(self.fc1(dropout2))
        fc2_out = self.fc2(fc1_out)
        return fc2_out[-1] 

model = LSTMnetwork()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

#training the model
epochs = 10

#to monitor training time
start_time = time.time()

for epoch in range(epochs):
    for seq, label in train_data:
        
        #reset the parameters and hidden states
        optimizer.zero_grad()        
        model.hidden1 = (torch.zeros(1,1,model.hidden_size1),
                       torch.zeros(1,1,model.hidden_size1))        
        
        model.hidden2 = (torch.zeros(1,1,model.hidden_size2),
                       torch.zeros(1,1,model.hidden_size2))
        
        y_pred_train = model(seq)
        
        loss = criterion(y_pred_train, label.reshape(1,1))
        loss.backward()
        optimizer.step()
        
    # print training result
    print(f'Epoch: {epoch+1:2} Loss: {loss.item():10.8f}')
    
print(f'\nDuration: {time.time() - start_time:.0f} seconds')

ptrblck · November 22, 2020, 9:16am

I get a result in ~1 second using your code on the CPU:

model = LSTMnetwork()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

#training the model
epochs = 10

#to monitor training time
start_time = time.time()

for epoch in range(epochs):
    #reset the parameters and hidden states
    optimizer.zero_grad()        
    model.hidden1 = (torch.zeros(1,1,model.hidden_size1),
                   torch.zeros(1,1,model.hidden_size1))        
    
    model.hidden2 = (torch.zeros(1,1,model.hidden_size2),
                   torch.zeros(1,1,model.hidden_size2))
    
    seq = torch.randn(128, 1, 3)
    label = torch.randn(1, 1)    
    y_pred_train = model(seq)
    
    loss = criterion(y_pred_train, label.reshape(1,1))
    loss.backward()
    optimizer.step()
    
    # print training result
    print(f'Epoch: {epoch+1:2} Loss: {loss.item():10.8f}')

print(f'\nDuration: {time.time() - start_time:.0f} seconds')

so I guess your script might hang somewhere.
Could you remove the data loading part and check if random data also returns quickly?

bidurbhurtel · November 28, 2020, 10:13pm

Sure, I will try that. Thank you.