Train a new model in every loop iteration

riogrande · July 3, 2024, 4:13pm

Hi!
I’m very new to using PyTorch and so am struggling to wrap my head around a task I’m trying to complete.

I have written out a LSTM model and I can train and validate it fine.

I have various datasets, each for which I want to train a new model for. I can load them in one at a time, pass them through my code and they give me a prediction. Then I clear the kernel, load in the next dataset and train the model from new for that data and save the output, etc.

I have many datasets I need to train using the PyTorch LSTM network I have written.

I want to be able to loop through each dataset, pass it through the neural network, get the the output then get back to the beginning of the loop and train the next dataset through this network from new (like I do when I restart the kerne, load and train that dataset individually).

My problem is that it seems that whenever I restart the loop and load in a new dataset to train, the model is already trained to the previous one and my model seems to overfit the data and my model starts producing worse results as each dataset is passed through the loop.

So I just want to be able to loop through the model every time with a new dataset and without the weights and biases set in from the training from the previous dataset in this loop.

I have attached my training loop below.
Thanks so much!

# I load in a new dataset here
class DatasetClass(Dataset):

      def __init__(self, feature, label):
          self.feature = feature # define this variable within this class
          self.label = label

      def __len__(self):  # i can now call len(object)
          return len(self.feature)

      def __getitem__(self,idx):

          feature_i = self.feature[idx]
          label_i = self.label[idx]

          return dict(
              sequence=torch.Tensor(feature_i),
              label = torch.tensor(label_i).float()
          )

  class DataModule(L.LightningDataModule):

    def __init__(self , train_sequences, train_labels,test_sequences , test_labels, batch_size):
      super().__init__()
      self.train_sequences = train_sequences
      self.train_labels = train_labels
      self.test_sequences = test_sequences
      self.test_labels = test_labels
      self.batch_size = batch_size

    def setup(self, stage=None):
      self.train_data = DatasetClass(self.train_sequences, self.train_labels)
      self.test_data = DatasetClass(self.test_sequences, self.test_labels)

    def train_dataloader(self):
      return DataLoader( self.train_data ,batch_size = self.batch_size , shuffle=False)

    def test_dataloader(self):
      return DataLoader( self.test_data ,batch_size = self.batch_size , shuffle=False)



  class LSTMmodel(nn.Module):
      def __init__(self, input_size , hidden_size):

          super().__init__()
          self.lstm = nn.LSTM(input_size = input_size,
                              hidden_size = hidden_size,
                              batch_first=True)

          self.regressor = nn.Linear(hidden_size,1)
          self.relu = nn.ReLU()

      def forward(self,x,labels):
          self.lstm.flatten_parameters()
          _, (lstm_out,_) = self.lstm(x)

          output = lstm_out[-1]
          Foutput = self.regressor(output)
          return self.relu(Foutput)

  class LSTMpredictor(L.LightningModule):

      def __init__(self,input_size,hidden_size):
        super().__init__()
        self.model = LSTMmodel(input_size,hidden_size)
        self.criterion = nn.MSELoss()

      def forward(self,x,labels=None):

        output = self.model(x,labels)
        loss = 0

        if labels is not None:

          loss = self.criterion(output , labels.unsqueeze(dim=1))

        return loss , output

      def training_step(self,batch, b_ind):

        sequences = batch["sequence"]
        labels = batch["label"]

        loss , outputs = self(sequences,labels)

        self.log("train_loss" , loss, prog_bar=True , logger=False)

        return loss

      def test_step(self,batch, b_ind):

        sequences = batch["sequence"]
        labels = batch["label"]

        loss , outputs = self(sequences,labels)

        self.log("test_loss" , loss, prog_bar=True , logger=False)

        return loss


      def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.05) 


  data_module = DataModule(train_inputs,train_labels, test_inputs, test_labels,batch_size=bs)
  model = LSTMpredictor(input_size= n_features ,hidden_size=hs)

  trainer = L.Trainer(max_epochs = epc , accelerator="auto" , devices = "auto")
  trainer.fit(model,data_module)

  model.eval()

labels = []
 predictions = []

 test_dataset = DatasetClass(test_inputs,test_labels)

 for item in tqdm(test_dataset):

   sequence = item[ "sequence" ]
   label = item["label"]

    _ , output = model(sequence.unsqueeze(dim=0))
    predictions.append(output.item())
    labels.append(label.item())

labels = np.expand_dims(labels,1)
predictions = np.expand_dims(predictions,1)

#Save these predictions and then load in a new dataset at the beginning of this loop
# that trains a whole new model, forgetting about the dataset that was just trained.