Pytorch Lightning for prediction

anil_kumar1 · August 3, 2021, 7:11am

Hi There,

I am getting an error when i run the below code. The error says

MisconfigurationException: No training_step() method defined. Lightning Trainer expects as minimum a training_step(), train_dataloader() and configure_optimizers() to be defined.

Can someone please let me know what is the issue here? I am very new to Pytorch. I am trying to simulate Sin wave using MLP

import numpy as np ## using again numpy library for Sin function
import torch  ## using pytorch 
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import torch.optim as optim
from torch import nn
from pytorch_lightning import Trainer
from sklearn.model_selection import train_test_split
import pandas as pd
from torch.utils.data import DataLoader

N=1000 ## 1000 samples to be generated
L=1000 ## length of each sample
T=20 ## width of wave
x = np.random.randn(1000)
y = np.sin(x)
df = pd.DataFrame({'x':x, 'y':y})
train, test = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)
target_fields=['y']
train_features, train_targets = train.drop(target_fields, axis=1), train[target_fields]
test_features, test_targets = test.drop(target_fields, axis=1), test[target_fields]
class MLP(pl.LightningModule):
  def __init__(self):
    super(MLP,self).__init__()
    self.fc1 = nn.Linear(1, 10)
    self.fc2 = nn.Linear(10, 1)
def forward(self, x):
        x = torch.Relu(self.fc1(x))
        x = self.fc2(x)
        return x
l_rate = 0.2
mse_loss = nn.MSELoss(reduction = 'mean')

def train_dataloader(self):
        train_dataset = TensorDataset(torch.tensor(train_features.values).float(), torch.tensor(train_targets[['cnt']].values).float())
        train_loader = DataLoader(dataset = train_dataset, batch_size = 128)
        return train_loader
def test_dataloader(self):
        test_dataset = TensorDataset(torch.tensor(test_features.values).float(), torch.tensor(test_targets[['cnt']].values).float())
        test_loader = DataLoader(dataset = test_dataset, batch_size = 128)
        return test_loader
def configure_optimizers(self):
        return optim.SGD(self.parameters(), lr=l_rate)
def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = mse_loss(logits, y)
        # Add logging
        logs = {'loss': loss}
        return {'loss': loss, 'log': logs}
def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = mse_loss(logits, y)
        correct = torch.sum(logits == y.data)
        predictions_pred.append(logits)
        predictions_actual.append(y.data)
        return {'test_loss': loss, 'test_correct': correct, 'logits': logits}
def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        logs = {'test_loss': avg_loss}      
        return {'avg_test_loss': avg_loss, 'log': logs, 'progress_bar': logs }

model = MLP()
trainer = Trainer(max_epochs = 50)  
trainer.fit(model)

adrianwaelchli · August 3, 2021, 10:09am

Hi Anil

It looks like you have a problem with indentation. The trainin_step etc. methods you defined are outside the LightningModule. If you just tab them in you should not see the error anymore. If you have trouble converting your own project, you can always find some ready to run examples from our docs to try out first.

anil_kumar1 · August 3, 2021, 11:19am

Hi Adrian,

Thank you for pointing out. I am new to Pytorch and this is my first experiment trying to do.
I corrected my indentation. now, i see a new error. Can you please help me how do I correct this. I have attached link to my colab file below for your reference.

Is there any examples i can refer to understand time series predictions using pytorch lightning?

Please let me know.

Thank you.

adrianwaelchli · August 3, 2021, 11:54pm

There is actually a simple time series example in the official PyTorch repo:

Here is my take on a converted version of that in Lightning (copy-paste, ready to run):


import matplotlib
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

from pytorch_lightning import Trainer, seed_everything, LightningDataModule, LightningModule

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np


class SeqDataModule(LightningDataModule):
    def __init__(self):
        super().__init__()
        self.data = torch.load("traindata.pt")

    def train_dataloader(self):
        input = torch.from_numpy(self.data[3:, :-1])
        target = torch.from_numpy(self.data[3:, 1:])
        dataset = TensorDataset(input, target)
        print(input.shape, target.shape)
        return DataLoader(dataset, batch_size=len(input))

    def val_dataloader(self):
        input = torch.from_numpy(self.data[:3, :-1])
        target = torch.from_numpy(self.data[:3, 1:])
        dataset = TensorDataset(input, target)
        return DataLoader(dataset, batch_size=len(input))


class Sequence(LightningModule):
    def __init__(self):
        super().__init__()
        self.lstm1 = nn.LSTMCell(1, 51)
        self.lstm2 = nn.LSTMCell(51, 51)
        self.linear = nn.Linear(51, 1)

    def forward(self, input, future=0):
        outputs = []
        h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)

        for input_t in input.split(1, dim=1):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        for i in range(future):  # if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        outputs = torch.cat(outputs, dim=1)
        return outputs

    def configure_optimizers(self):
        return optim.LBFGS(self.parameters(), lr=0.8)

    def training_step(self, batch, batch_idx):
        input, target = batch
        out = self(input)
        loss = F.mse_loss(out, target)
        self.print("loss:", loss.item())
        return loss

    def validation_step(self, batch, batch_idx):
        input, target = batch
        future = 1000
        pred = self(input, future=future)
        loss = F.mse_loss(pred[:, :-future], target)
        print("test loss:", loss.item())
        y = pred.detach().numpy()
        # draw the result

        plt.figure(figsize=(30, 10))
        plt.title("Predict future values for time sequences\n(Dashlines are predicted values)", fontsize=30)
        plt.xlabel("x", fontsize=20)
        plt.ylabel("y", fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)

        def draw(yi, color):
            plt.plot(np.arange(input.size(1)), yi[: input.size(1)], color, linewidth=2.0)
            plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1) :], color + ":", linewidth=2.0)

        draw(y[0], "r")
        draw(y[1], "g")
        draw(y[2], "b")
        plt.savefig(f"predict{self.global_step:d}.pdf")
        plt.close()


if __name__ == "__main__":
    seed_everything(0)
    trainer = Trainer(max_steps=15, precision=64)
    model = Sequence()
    datamodule = SeqDataModule()
    trainer.fit(model, datamodule)

You need to generate the data first using this code:

import numpy as np
import torch

np.random.seed(2)

T = 20
L = 1000
N = 100

x = np.empty((N, L), 'int64')
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
data = np.sin(x / 1.0 / T).astype('float64')
torch.save(data, open('traindata.pt', 'wb'))

The above is equivalent to the pytorch example and produces the same output. Hope this helps.

Ross_Allen · December 20, 2021, 8:54pm

Thank you for translating the time series example to Lightning. I’ve run this code and find that the MSE loss steadily improves (decreases) up to the 4th epoch, getting down to < 1e-4. During the 5th epoch, the loss U-turns and rapidly explodes to > 1e+20 and never recovers for the subsequent epochs.

Wondering if you saw something similar? I’m trying to learn pytorch lightning for the first time so I’m trying to to figure out if it is a problem with the original pytorch example, with the translation to lightning, or with the translation to my code (the last seems unlikely because I tried directly copy-and-pasting your code and still got the same result)

Thanks!

Ross_Allen · December 20, 2021, 9:19pm

As a follow up to my previous comment, it appears that the problem is in the underlying pytorch example.

Probably setting a lower learning rate should fix.