Hi, I an new to PyTorch. I am trying to solve a Time-Series problem using a CONV-LSTM. But the training loss does not seem to be decreasing. Can someone help me understand the reason.Below is my models architecture
I have a scaler target [-1 to 1] and the loss I am using is MAPE
My optimiser and schedular are
num_epochs = 25 optimizer = optim.Adam(model_to_train.parameters(), lr=1e-1 ) #,weight_decay=1e-3 scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(num_epochs*0.25),int(num_epochs*0.5),int(num_epochs*0.75),int(num_epochs*0.9)], gamma=1e-1)
I am doing optimizer.zero_grad() and scheduler.step()
My data has aprox 71k samples with around 100 features each with 72 time steps
Thanks is advance and really appriciate the help.
class Conv2DLSTMModel(nn.Module): ''' This Class defines a CONV LSTM Parameters: hidden_size_lstm = Number of hidden state units for LSTM num_layers_lstm = Number of layers in LSTM output_size = Size of the prediction feature_num_extract = Number for the CONV layer to extract from feature_num_in as an input to LSTM layer time_window = Time window to be considred at a time by CONV layer while extracting feature_num_extract number of features feature_window = Feature to be clubbed at a time by CONV layer while extracting feature_num_extract number of features ''' def __init__(self,in_channels, hidden_size_lstm, num_layers_lstm, output_size,feature_num_extract,feature_window,time_window,droup_out_prec = 0.2): super(Conv2DLSTMModel, self).__init__() self.hidden_size_lstm = hidden_size_lstm self.num_layers_lstm = num_layers_lstm # As the 2D conv will move along the Height and Width axis , We set Time as Height and features as Width. So we extract timeXfeatures values self.conv1 = nn.Conv2d(in_channels= 1, out_channels = 2*feature_num_extract ,kernel_size = (time_window,feature_window), stride =1) self.dropout1 = nn.Dropout(droup_out_prec) self.conv2 = nn.Conv2d(in_channels= 2*feature_num_extract , out_channels = feature_num_extract,kernel_size = (time_window,feature_window), stride =2) self.max_pool = nn.MaxPool2d(kernel_size=(4,4)) self.dropout2 = nn.Dropout(droup_out_prec) self.lstm = nn.LSTM(input_size = feature_num_extract, hidden_size = hidden_size_lstm, num_layers = num_layers_lstm, batch_first=True,bidirectional = False) self.dropout3 = nn.Dropout(droup_out_prec) # self.tdd = nn.Conv2d(1, num_of_output_channels, (num_of_input_channels, 1)) self.fc1 = nn.Linear(hidden_size_lstm, int(hidden_size_lstm/2)) self.fc1.time_distributed = True self.dropout4 = nn.Dropout(droup_out_prec) self.fc2 = nn.Linear(int(hidden_size_lstm/2), output_size) self.tanh = nn.Tanh() self.tanh1 = nn.Tanh() self.tanh2 = nn.Tanh() self.sigmoid = nn.Sigmoid() def forward(self, x): # The conv layer x = self.conv1(x) x = self.dropout1(x) x = self.conv2(x) x = self.dropout2(x) x= self.max_pool(x) # reshapes the tensor without copying memory to colapse/flatten the last demention x = x.view(x.size(0),x.size(1),-1 ) # Max pool 2D outputs (N,Chanel,W , H ) which we flatened into (N,channel,W X H) and LSTM takes (N,Lenght,Features) where feature is fixed so we make (N,W X H,channel) # as last dim has to be fixed x = torch.permute(x,(0,2,1)) #x.premute(0,2,1) # We need self.hidden_size numbers as dummy hidden state for each input in batch [Batch size] for every LSTM layer. Thus (self.num_layers, x.size(0), self.hidden_size) h0 = torch.zeros(self.num_layers_lstm, x.size(0), self.hidden_size_lstm).to(device=x.device) # Hidden state c0 = torch.zeros(self.num_layers_lstm, x.size(0), self.hidden_size_lstm).to(device=x.device) # Cell state out, (hn, cn) = self.lstm(x, (h0, c0)) out = self.tanh(out) out = self.dropout3(out) # LSTM output shape is (batch size,seq length,hidden state), we use -1 in seq length as we only need to feed the last time step output out_reg = self.fc1(out[:, -1, :]) out_reg = self.tanh1(out_reg) out_reg = self.dropout4(out_reg) out_reg = self.fc2(out_reg) out_reg = self.tanh2(out_reg) return out_reg