RuntimeError: Given groups=1, weight of size [28, 1, 3], expected input[1, 128, 19994] to have 1 channels, but got 128 channels instead

I’m trying to convert my tensorflow model into pytorch, however, I’m a bit confused about the ordering / how things such as batch_size and channels have to be passed in which sequence.

I started creating my own dataset. Afterwards I defined my model however with receiving the following RuntimeError.

RuntimeError: Given groups=1, weight of size [28, 1, 3], expected input[1, 128, 19994] to have 1 channels, but got 128 channels instead

My current code looks like this:


import torch
import torch.nn as nn
import torch.optim as optim 
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F


X1 = torch.randn(1,9999, 4000)
X2 = torch.rand((1,9999, 4000))
aux1 = torch.randn(4000,)
aux2 = torch.randn(4000,)
aux3 = torch.randn(4000,)
y = torch.randn(4000,)

class Dataset:
    def __init__(self, x1, 
                 x2, 
                 aux1,
                 aux2,
                 aux3,
                 y):
        self.x1 = x1
        self.x2= x2
        self.aux1 = aux1
        self.aux2 = aux2
        self.aux3 = aux3
        self.y = y

    def __len__(self):
        return self.x1.shape[-1]

    def __getitem__(self, idx):
        #inputs
        x1 = self.x1[:,:,idx]
        x2 = self.x2[:,:,idx]
        aux1 = self.aux1[idx]
        aux2 = self.aux2[idx]
        aux3 = self.aux3[idx]
        
        #outputs
        y = self.y[idx]
        
        return ([torch.tensor(x1, dtype=torch.float32),
                torch.tensor(x2, dtype=torch.float32),
                torch.tensor(aux1, dtype=torch.float32),
                torch.tensor(aux2, dtype=torch.float32),
                torch.tensor(aux3, dtype=torch.float32)],
                torch.tensor(y, dtype=torch.long))
    
# train loader
dataset = Dataset(X1, X2, aux1, aux2, aux3,y) 
train_loader = DataLoader(dataset, batch_size=128, shuffle=True)


class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.features_amp = nn.Sequential(
            nn.Conv1d(1, 1, 3, 1),
        )
        self.features_phase = nn.Sequential(
            nn.Conv1d(1, 1, 3, 1),
        )
        self.backbone = nn.Sequential(
            nn.Conv1d(1,28,3,1),
            nn.Conv1d(28,28,3,1),
            nn.AvgPool1d(3),
            nn.ReLU(),
        )

        self.classifier = nn.LazyLinear(3)
        

    def forward(self, x1, x2, aux1, aux2, aux3):
        x1 = self.features_amp(x1)
        x2 = self.features_phase(x2)
                                                                                                                                                                                                                                                     
        x1 = x1.view(x1.size(0), -1)                                                                                                                                                                                                                 
                                                                                                                                                                                                                                                     
        x2 = x2.view(x2.size(0), -1)                                                                                                                                                                                                                                                    
        x = torch.cat((x1, x2), dim=-1)
        
        #print(x.size())
        x = self.backbone(x)
        
        x = torch.flatten(x)
        #print(x.size())
        x = torch.cat([x, aux1, aux2, aux3], dim=-1)
        
        shelf_life_clf = self.classifier(x)                                                                                                                                                                                                                      
        return shelf_life_clf


model = MyModel()#.type(torch.LongTensor)#.double()
#model = MyModel().double()

optimizer = optim.Adam(model.parameters(), lr=0.003)


def train(epoch):
    model.train()
    #exp_lr_scheduler.step()
    arr_loss = []
    #first_batch = next(iter(train_loader))
    for batch_idx, (data, target) in enumerate(train_loader):
        amp, phase, weight, temperture, humidity = data        
        #print(np.array(target).shape)
        if torch.cuda.is_available():
            amp = amp.cuda()
            phase = phase.cuda()
            weight = weight.cuda()
            temperature = temperature.cuda()
            humidity = humidity.cuda()
            target = target.cuda()


        optimizer.zero_grad()
        output1 = model(*data)
        
        loss = criterion1(output1, target)

        loss.backward()
        optimizer.step()

        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.data))
        arr_loss.append(loss.data)
    return arr_loss




optimizer = optim.Adam(model.parameters(), lr=0.00003)

criterion1 = nn.CrossEntropyLoss()

n_epochs = 10

for epoch in range(n_epochs):
    loss = train(epoch)

What I dont understand is that the expected output of 128 corresponds to by batch size. However this is something I’m getting out of batch_idx. This corresponds also to the question on weither I’m passing my cutom dataset to the dataLoader correctly. I would be glad about some guidance.

The error is raised in the first conv layer in self.backbone as the input activation is passed as a 2-dimensional tensor in the shape [128, 19994].
In the common use case nn.Conv1d expects a 3-dimensional tensor in the shape [batch_size, channels, seq_len]. However, in newer versions you can also pass a 2-dimensional tensor to this layer which will then assume you are passing a single sample (implicit batch_size=1) in the shape [channels, seq_len]. In this case, this layer will unsqueeze the batch dimension and create the tensor of [1, 128, 19994], which creates the error message.
Since 128 is your batch size, make sure to pass an input in the shape of [batch_size=128, 1, seq_len] via:

x = x.unsqueeze(1)
x = self.backbone(x)

Hello,

thank you for your reply. I tried changing my code accordingly and it seems to work except for the last linear layer where the model output doesnt match with the target.
What I receive is the size mismatch RuntimeError: size mismatch (got input: [3], target: [128]). Maybe you can help me to understand this better.

Here is a revision of my code:

import torch
import torch.nn as nn
import torch.optim as optim 
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F


X1 = torch.randn(4000, 1, 9999)
X2 = torch.randn((4000,1, 9999))
aux1 = torch.randn(4000,)
aux2 = torch.randn(4000,)
aux3 = torch.randn(4000,)
y = torch.randn(4000,)

class Dataset:
    def __init__(self, x1, 
                 x2, 
                 aux1,
                 aux2,
                 aux3,
                 y):
        self.x1 = x1
        self.x2= x2
        self.aux1 = aux1
        self.aux2 = aux2
        self.aux3 = aux3
        self.y = y

    def __len__(self):
        return self.x1.shape[0]

    def __getitem__(self, idx):
        #inputs
        x1 = self.x1[idx]
        x2 = self.x2[idx]
        aux1 = self.aux1[idx]
        aux2 = self.aux2[idx]
        aux3 = self.aux3[idx]
        
        #outputs
        y = self.y[idx]
        
        return ([torch.tensor(x1, dtype=torch.float32),
                torch.tensor(x2, dtype=torch.float32),
                torch.tensor(aux1, dtype=torch.float32),
                torch.tensor(aux2, dtype=torch.float32),
                torch.tensor(aux3, dtype=torch.float32)],
                torch.tensor(y, dtype=torch.long))
    
# train loader
dataset = Dataset(X1, X2, aux1, aux2, aux3,y) 
train_loader = DataLoader(dataset, batch_size=128, shuffle=True)

# test loader
dataset = Dataset(X1, X2, aux1, aux2, aux3,y) 
test_loader = DataLoader(dataset, batch_size=128, shuffle=True)

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.features_amp = nn.Sequential(
            nn.Conv1d(1, 1, 3, 1),
        )
        self.features_phase = nn.Sequential(
            nn.Conv1d(1, 1, 3, 1),
        )
        
        self.backbone = nn.Sequential(
            nn.Conv1d(1,28,3,1),
            nn.Conv1d(28,28,3,1),
            nn.AvgPool1d(3),
            nn.ReLU(),
        )
        
        self.classifier  = nn.LazyLinear(3)

    def forward(self, x1, x2, aux1, aux2, aux3):
        x1 = self.features_amp(x1)
        x2 = self.features_phase(x2)
                                                                                                                                                                                                                                                     
        x1 = x1.view(x1.size(0), -1)                                                                                                                                                                                                                 
                                                                                                                                                                                                                                                     
        x2 = x2.view(x2.size(0), -1)                                                                                                                                                                                                                                                    
        x = torch.cat((x1, x2), dim=-1)
        
        x = x.unsqueeze(1)
        x = self.backbone(x)

        x = torch.flatten(x)

        #print(x.size())
        x = torch.cat([x, aux1, aux2, aux3], dim=-1)
        print(x.size())
        
        shelf_life_clf = self.classifier(x)     
        
        return shelf_life_clf


model = MyModel()#.type(torch.LongTensor)#.double()
#model = MyModel().double()

optimizer = optim.Adam(model.parameters(), lr=0.003)


def train(epoch):
    model.train()
    #exp_lr_scheduler.step()
    arr_loss = []
    #first_batch = next(iter(train_loader))
    for batch_idx, (data, target) in enumerate(train_loader):
        amp, phase, weight, temperture, humidity = data        
        #print(np.array(target).shape)
        if torch.cuda.is_available():
            amp = amp.cuda()
            phase = phase.cuda()
            weight = weight.cuda()
            temperature = temperature.cuda()
            humidity = humidity.cuda()
            target = target.cuda()


        optimizer.zero_grad()
        output1 = model(*data)
        
        loss = criterion1(output1, target)

        loss.backward()
        optimizer.step()

        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.data))
        arr_loss.append(loss.data)
    return arr_loss




optimizer = optim.Adam(model.parameters(), lr=0.00003)

criterion1 = nn.CrossEntropyLoss()

n_epochs = 10

for epoch in range(n_epochs):
    loss = train(epoch)

The issue is raised as x = torch.flatten(x) will flatten all dimensions, while you most likely want to keep the batch dimension and flatten the others.
You could use the same view operation from previous calls (x = x.view(x.size(0), -1)) or use x = torch.flatten(x, start_dim=1, end_dim=-1).
Afterwards the code with fail in the torch.cat operation as auxX are 1-dimensional tensors.
Add the feature dimension via:

aux1 = torch.randn(4000,1)
aux2 = torch.randn(4000,1)
aux3 = torch.randn(4000,1)

and this issue should also be fixed.

Lastly, the loss calculation will fail as the target is initialized via randn while class indices are expected.
Use

y = torch.randint(0, 3, (4000,))

and it should also work.

Hello,

thank you very much for the support. I was now also able to make it run for another usecase. Thank you very much.