RuntimeError: Given groups=1, weight of size [16, 4, 3, 3], expected input[128, 1, 1, 256] to have 4 channels, but got 1 channels instead

I’m getting channel error for my code.
Shape of x_train features: torch.Size([75202, 4, 250])
Shape of y_train labels: torch.Size([75202, 1])
Shape of x_test features: torch.Size([18801, 4, 250])
Shape of y_test labels: torch.Size([18801, 1])

Code I’m working on:

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = []
        for feature in features:
            if len(feature) >= 2:
                self.features.append(feature)
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]
    
class RelationAwareFeatureExtractor(nn.Module):
    def __init__(self):
        super(RelationAwareFeatureExtractor, self).__init__()

        # ConvNet layers
        self.conv1 = nn.Conv2d(4, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv2 = nn.Conv2d(16, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv3 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))    
#         self.conv1 = nn.Conv2d(4, 16, kernel_size=1, stride=1, padding=1)
#         self.conv2 = nn.Conv2d(16, 64, kernel_size=1, stride=1, padding=1)
#         self.conv3 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=1)
        self.fc1 = nn.Linear(32, 1024)
        self.fc2 = nn.Linear(1024, 256)      


    def forward(self, x):
        print("first shape",x.shape)
        # ConvNet layers
        x = self.conv1(x)
        x = F.relu(x)
        print("2 shape",x.shape)
        x = F.relu(self.conv2(x))
        print("3 shape",x.shape)
        x = F.relu(self.conv3(x))
        print("4 shape",x.shape)
        #x = x.view(-1, 256 * 14 * 14)
        #print(x.shape)
        #x = x.view(-1, 1024)
        #print(x.shape)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        print("5 shape",x.shape)
        return x

class SelfAttention(nn.Module):
    def __init__(self, hidden_size):
        super(SelfAttention, self).__init__()

        self.W = nn.Linear(hidden_size,hidden_size)
        #self.v = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        #x = x.unsqueeze(1)
        #x = x.permute(0, 2, 1)
        x = self.W(x).squeeze(2)
        #x = x.squeeze(1)
        #x = x.permute(0, 2, 1)
        x = F.softmax(x, dim=1).unsqueeze(2)
        return x

class ConditionalRandomFields(nn.Module):
    def __init__(self, hidden_size):
        super(ConditionalRandomFields, self).__init__()

        self.relation_aware_feature_extractor = RelationAwareFeatureExtractor()
        self.self_attention = SelfAttention(hidden_size)
        self.crf = nn.Linear(hidden_size, 2)

    def forward(self, x):
        x = self.relation_aware_feature_extractor(x)
        x = self.self_attention(x)
        x = self.crf(x)
        return x

class AnomalyDetector(nn.Module):
    def __init__(self):
        super(AnomalyDetector, self).__init__()
        
        # Feature extractor
        self.feature_extractor = RelationAwareFeatureExtractor()

        # Self-attention layer
        self.self_attention = SelfAttention(256)

        # Conditional random fields layer
        self.conditional_random_fields = ConditionalRandomFields(256)

    def forward(self, x):
        # Extract features
        x = self.feature_extractor(x)
        print("6 shape",x.shape)
        # Apply self-attention
        #x = x.permute(0, 2, 1) 
        #print("7 shape",x.shape)
        x = self.self_attention(x)
        print("7 shape",x.shape)
        #x = x.permute(0, 2, 1)
        #print("8 shape",x.shape)
        # Compute the log-likelihood
        log_likelihood = self.conditional_random_fields(x)

        return log_likelihood

batch_size = 40
input_dim = 4
hidden_dim = 50
output_dim = 124

# Define the train and test datasets
train_dataset = CustomDataset(x_train, y_train)
test_dataset = CustomDataset(x_test, y_test)

# Define the train loader
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

num_epochs = 10

# Create an instance of the AnomalyDetector
model = AnomalyDetector()

# Define the loss function (negative log-likelihood)
criterion = nn.NLLLoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    for inputs, labels in train_loader:  # Iterate over training dataset
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        
        # Calculate the loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

# After training, saving the model
torch.save(model.state_dict(), 'anomaly_detector.pth')

The error I’m facing:

first shape torch.Size([4, 4, 250])
2 shape torch.Size([16, 2, 125])
3 shape torch.Size([64, 1, 63])
4 shape torch.Size([128, 1, 32])
5 shape torch.Size([128, 1, 256])
6 shape torch.Size([128, 1, 256])
7 shape torch.Size([128, 1, 1, 256])
first shape torch.Size([128, 1, 1, 256])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [21], in <cell line: 22>()
     24 optimizer.zero_grad()
     26 # Forward pass
---> 27 outputs = model(inputs)
     29 # Calculate the loss
     30 loss = criterion(outputs, labels)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [18], in AnomalyDetector.forward(self, x)
    107 print("7 shape",x.shape)
    108 #x = x.permute(0, 2, 1)
    109 #print("8 shape",x.shape)
    110 # Compute the log-likelihood
--> 111 log_likelihood = self.conditional_random_fields(x)
    113 return log_likelihood

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [18], in ConditionalRandomFields.forward(self, x)
     80 def forward(self, x):
---> 81     x = self.relation_aware_feature_extractor(x)
     82     x = self.self_attention(x)
     83     x = self.crf(x)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [18], in RelationAwareFeatureExtractor.forward(self, x)
     37 print("first shape",x.shape)
     38 # ConvNet layers
---> 39 x = self.conv1(x)
     40 x = F.relu(x)
     41 print("2 shape",x.shape)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~\anaconda3\lib\site-packages\torch\nn\modules\conv.py:457, in Conv2d.forward(self, input)
    456 def forward(self, input: Tensor) -> Tensor:
--> 457     return self._conv_forward(input, self.weight, self.bias)

File ~\anaconda3\lib\site-packages\torch\nn\modules\conv.py:453, in Conv2d._conv_forward(self, input, weight, bias)
    449 if self.padding_mode != 'zeros':
    450     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    451                     weight, bias, self.stride,
    452                     _pair(0), self.dilation, self.groups)
--> 453 return F.conv2d(input, weight, bias, self.stride,
    454                 self.padding, self.dilation, self.groups)

RuntimeError: Given groups=1, weight of size [16, 4, 3, 3], expected input[128, 1, 1, 256] to have 4 channels, but got 1 channels instead

it’s probably better to provide a minimal example with what you’re expecting to happen and why. Your code fails at conv1, so the rest isn’t really relevant.

You want to pass a tensor of size [4, 4, 250] to a nn.Conv2d(4, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)). in_channels=4 and out_channels=16. Are you intending for your first 4 supposed to be interpreted as a batch dimension or channel dimension or?

Normally, when calling Conv2d, you’d pass a tensor of NxCxHxW but you only have 3 dimensions. What are you trying to do? It looks like there’s a special case described in a “NOTE” in the docs for groups==in_channels, is that what you’re trying to do?

https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html