Training loop for a Multi-Input Architecture

Hello there, I’m tryiing to apply multi-task learning for using multiple inputs however I do not know how to customize the training loop. For this I have written my own custom dataset which I feedforward to my neural network architecture. A sample code is provided below:

X1 = np.random.randint(0, 33, (1, 2000,3000))
X2 = np.random.randint(0, 33, (1, 2000,3000))
y= np.random.randint(0, 4, (3000))


class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.features1 = nn.Sequential(
            nn.Conv1d(9999, 3, 3, 1, 1),
            nn.MaxPool1d(2),
            nn.ReLU(),
        )
        self.features2 = nn.Sequential(
            nn.Conv1d(1, 3, 3, 1, 1),
            nn.MaxPool1d(2),
            nn.ReLU(),
        )
        self.classifier = nn.Linear(128*128*3 + 32*32*3 + 5, 4)
        
    def forward(self, x1, x2):
        x1 = self.features1(x1)
        x2 = self.features2(x2)

        x1 = x1.view(x1.size(0), -1)
        x2 = x2.view(x2.size(0), -1)
        
        x = torch.cat((x1, x2), dim=1)
        x = self.classifier(x)
        return x


class MultiTaskDataset:
    def __init__(self, amplitude, target):
        self.amplitude = amplitude
        #self.phase = phase
        self.target = target
        
    def __len__(self):
        return self.amplitude.shape[0]
    
    def __getitem__(self, idx):
        amplitude = self.amplitude[0][idx, :]
        phase = self.amplitude[1][idx,:]
        target = self.target[idx]
        return ([torch.tensor(amplitude, dtype=torch.float32), 
                torch.tensor(phase, dtype=torch.float32)],
                torch.tensor(target, dtype=torch.long))


#MultiTaskLearning
ds = MultiTaskDataset(X1, X2, y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

ds = MultiTaskDataset(X1,X2, y)
test_loader = DataLoader(dataset, batch_size=32, shuffle=True)

random_seed = 1 # or any of your favorite number 
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import numpy as np
np.random.seed(random_seed)

model = MyModel()

def train(epoch):
    model.train()
    #exp_lr_scheduler.step()
    
    first_batch = next(iter(train_loader))
    for batch_idx, (data, target) in enumerate(train_loader):
        #data, target =
        #amp, phase = data
        #next_batch = first_batch 
        #amp, phase, target = first_batch  
        
        print(data.shape)
        
        #print(amp)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        
        optimizer.zero_grad()
        output = model(*data)
        loss = criterion(output, target.long())
        
        #loss.requires_grad = True
        loss.backward()
        optimizer.step()
        
        #if (batch_idx + 1)% 2 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.data))


model = MyModel().double()

optimizer = optim.Adam(model.parameters(), lr=0.003)

criterion = nn.CrossEntropyLoss()


n_epochs = 10

for epoch in range(n_epochs):
    train(epoch)

I would appreciate guidance.

1 Like

I believe the issue was the indexing into the randomly created data tensors basically causing the data loader to assume the dataset has a length of one.

I modified this script to run, and although some of the changes may conflict with the original intent, I think they should illustrate the types of changes needed to get multi-input to work:

import torch
from torch import nn, optim
import numpy as np
from torch.utils.data import DataLoader

X1 = np.random.randint(0, 33, (1, 2000,3000))
X2 = np.random.randint(0, 33, (1, 2000,3000))
y= np.random.randint(0, 4, (3000))


class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.features1 = nn.Sequential(
            nn.Conv1d(1, 3, 3, 1, 1),
            nn.MaxPool1d(2),
            nn.ReLU(),
        )
        self.features2 = nn.Sequential(
            nn.Conv1d(1, 3, 3, 1, 1),
            nn.MaxPool1d(2),
            nn.ReLU(),
        )
        self.classifier = nn.LazyLinear(4)

    def forward(self, x1, x2):
        x1 = self.features1(x1)
        x2 = self.features2(x2)
                                                                                                                                                                                                                                                     x1 = x1.view(x1.size(0), -1)                                                                                                                                                                                                                 x2 = x2.view(x2.size(0), -1)
                                                                                                                                                                                                                                                     x = torch.cat((x1, x2), dim=1)
        x = self.classifier(x)                                                                                                                                                                                                                       return x


class MultiTaskDataset:
    def __init__(self, amplitude, phase, target):
        self.amplitude = amplitude
        self.phase = phase
        self.target = target

    def __len__(self):
        return self.amplitude.shape[-1]

    def __getitem__(self, idx):
        amplitude = self.amplitude[:, :, idx]
        phase = self.phase[:, :, idx]
        target = self.target[idx]
        return ([torch.tensor(amplitude, dtype=torch.float32),
                torch.tensor(phase, dtype=torch.float32)],
                torch.tensor(target, dtype=torch.long))


#MultiTaskLearning
ds = MultiTaskDataset(X1, X2, y)
train_loader = DataLoader(ds, batch_size=32, shuffle=True)

ds = MultiTaskDataset(X1, X2, y)
test_loader = DataLoader(ds, batch_size=32, shuffle=True)

random_seed = 1 # or any of your favorite number
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import numpy as np
np.random.seed(random_seed)

model = MyModel().cuda()

def train(epoch):
    model.train()
    #exp_lr_scheduler.step()

    first_batch = next(iter(train_loader))
    for batch_idx, (data, target) in enumerate(train_loader):
        amp, phase = data
        print(amp.shape, phase.shape)

        if torch.cuda.is_available():
            amp = amp.cuda()
            phase = phase.cuda()
            target = target.cuda()


        optimizer.zero_grad()
        output = model(amp, phase)
        loss = criterion(output, target.long())

        loss.backward()
        optimizer.step()

        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.data))


optimizer = optim.Adam(model.parameters(), lr=0.003)

criterion = nn.CrossEntropyLoss()


n_epochs = 10

for epoch in range(n_epochs):
    train(epoch)

Hey thank you for the fast reply. This indeed solved my problem. Thank you very much.

1 Like