Cannot run LSTM with large input

mattNest · May 27, 2020, 8:05am

Hello, below is a model LSTM. I tried to run the training process, but it kept stuck at EPOCH = 0. This problem doesn’t exist when I smaller down the size of the training data.

Now the size of each phase of data:

training data: torch.Size([1426233, 110, 1]) torch.Size([1426233, 1])
validation data: torch.Size([250571, 110, 1]) torch.Size([250571, 1])
testing data: torch.Size([190521, 110, 1]) torch.Size([190521, 1])

Below is the model architecture of LSTM:

# Build model
#####################
input_dim = 1
hidden_dim = 128
num_layers = 1 
output_dim = 1


# Here we define our model as a class
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        
        out, _ = self.lstm(x, None)
        out = self.fc(out[:, -1, :]) 
        return out

and the training/testing class:

class history_package():
    def __init__(self, neural_net, train_loader, test_loader, EPOCH, LR, model_type):
        
        self.net = neural_net
        self.optimizer = torch.optim.Adam(neural_net.parameters(), lr = LR)
        self.criterion = nn.MSELoss()
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.EPOCH_ = EPOCH
        self.LR_ = LR
        self.net = self.net.to(device)
        self.model_type = model_type
        
        if device == 'cuda':
            torch.backends.cudnn.benchmark = True

    def start(self):

        train_history_loss = []
        test_history_loss = []
        
        for epoch in range(self.EPOCH_):
            
            print('Epoch:', epoch)
            print("============================")
            
            train_loss = self.train()
            test_loss = self.test()

            train_history_loss.append(train_loss)
            test_history_loss.append(test_loss)
        
        return train_history_loss, test_history_loss

    def train(self):
        
        self.net.train()
        train_loss = 0
        
        for step, (batch_X, batch_y) in enumerate(self.train_loader):
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            self.optimizer.zero_grad()
            outputs = self.net(batch_X)
            
            
            #print(outputs.shape)
            #print(batch_y.shape)
            
            loss = self.criterion(outputs, batch_y)
            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()           
        

        print('【Training】Loss: %.3f' % (train_loss))
        
        # save the model
        
        if self.model_type == "RNN":
            torch.save(self.net.state_dict(), 'RNN_model.pth')
        
        if self.model_type == "LSTM":
            torch.save(self.net.state_dict(), 'LSTM_model.pth')
        
        if self.model_type == "GRU":
            torch.save(self.net.state_dict(), 'GRU_model.pth')
        
        if self.model_type == "LSTM_all_batch_1":
            torch.save(self.net.state_dict(), 'LSTM_all_batch_1.pth')
            

        return train_loss
    
    def test(self):
        
        self.net.eval()
        test_loss = 0
        
        with torch.no_grad(): 
            for step, (batch_X, batch_y) in enumerate(self.test_loader):
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = self.net(batch_X)
                
                loss = self.criterion(outputs, batch_y)
                
                test_loss += loss.item()
        
        print('【Validation】Loss: %.3f' % (test_loss))
        
        return test_loss

Should I increase the batch size in the training / validation loader in order to solve this problem? Now I use batch size = 1 for both training and validation loader. Or should I adjust the model architecture in order to accommodate large input size of data?