Input size error

Below is the error for the code that I’m trying to run. I’m not able to understand why this error is occurring. Please help me with debugging and understanding of the error

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [20], in <cell line: 41>()
     43 optimizer.zero_grad()
     45 # Reshape inputs to have the correct shape
     46 #inputs = torch.unsqueeze(inputs, dim=1)  # Add a channel dimension
     47 #inputs = inputs.reshape(16,4,2)
     48 
     49 # Forward pass
---> 50 outputs = model(inputs)
     52 # Calculate the loss
     53 loss = criterion(outputs, labels)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [1], in AnomalyDetector.forward(self, x)
     90 def forward(self, x):
     91     # Extract features
---> 92     x = self.feature_extractor(x)
     94     # Apply self-attention
     95     x = self.self_attention(x)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [1], in RelationAwareFeatureExtractor.forward(self, x)
     34 print(x.shape)
     35 # ConvNet layers
---> 36 x = self.conv1(x)
     37 x = F.relu(x)
     38 print(x.shape)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~\anaconda3\lib\site-packages\torch\nn\modules\conv.py:457, in Conv2d.forward(self, input)
    456 def forward(self, input: Tensor) -> Tensor:
--> 457     return self._conv_forward(input, self.weight, self.bias)

File ~\anaconda3\lib\site-packages\torch\nn\modules\conv.py:453, in Conv2d._conv_forward(self, input, weight, bias)
    449 if self.padding_mode != 'zeros':
    450     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    451                     weight, bias, self.stride,
    452                     _pair(0), self.dilation, self.groups)
--> 453 return F.conv2d(input, weight, bias, self.stride,
    454                 self.padding, self.dilation, self.groups)

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [16, 1000]

Features and labels shape before splitting:
features:
Shape: torch.Size([94003, 1000])
Size: torch.Size([94003, 1000])
Length: 94003

labels:
Array Shape: (94003,)
Array Contents: [1. 1. 1. … 0. 0. 0.]

After splitting it:

X_train shape is:
torch.Size([75202, 1000])

Y_train shape is:
torch.Size([75202])

X_test shape is:
torch.Size([18801, 1000])

Y_test shape is:
torch.Size([18801])

My code:

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = []
        for feature in features:
            if len(feature) >= 2:
                self.features.append(feature)
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]
    
class RelationAwareFeatureExtractor(nn.Module):
    def __init__(self):
        super(RelationAwareFeatureExtractor, self).__init__()

        # ConvNet layers
        self.conv1 = nn.Conv3d(4, 64, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv3d(64, 128, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv3d(128, 256, kernel_size=3, stride=2, padding=1)
        self.fc1 = nn.Linear(256 * 14 * 14, 1024)
        self.fc2 = nn.Linear(1024, 512)      


    def forward(self, x):
        print(x.shape)
        # ConvNet layers
        x = self.conv1(x)
        x = F.relu(x)
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = x.view(-1, 256 * 14 * 14)
        x = x.view(-1, 1024)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

class SelfAttention(nn.Module):
    def __init__(self, hidden_size):
        super(SelfAttention, self).__init__()

        self.W = nn.Linear(hidden_size, hidden_size)
        self.v = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.W(x)
        x = x.squeeze(1)
        x = F.softmax(self.v(x), dim=1)
        return x

class ConditionalRandomFields(nn.Module):
    def __init__(self, hidden_size):
        super(ConditionalRandomFields, self).__init__()

        self.relation_aware_feature_extractor = RelationAwareFeatureExtractor()
        self.self_attention = SelfAttention(hidden_size)
        self.crf = nn.Linear(hidden_size, 2)

    def forward(self, x):
        x = self.relation_aware_feature_extractor(x)
        x = self.self_attention(x)
        x = self.crf(x)
        return x

class AnomalyDetector(nn.Module):
    def __init__(self):
        super(AnomalyDetector, self).__init__()

        # Feature extractor
        self.feature_extractor = RelationAwareFeatureExtractor()

        # Self-attention layer
        self.self_attention = SelfAttention(256)

        # Conditional random fields layer
        self.conditional_random_fields = ConditionalRandomFields(256)

    def forward(self, x):
        # Extract features
        x = self.feature_extractor(x)

        # Apply self-attention
        x = self.self_attention(x)

        # Compute the log-likelihood
        log_likelihood = self.conditional_random_fields(x)

        return log_likelihood


batch_size = 40
input_dim = 1000
hidden_dim = 50
output_dim = 128


# Define the train and test datasets
train_dataset = CustomDataset(x_train, y_train)
test_dataset = CustomDataset(x_test, y_test)

# Define the train loader
batch_size = 16  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

num_epochs = 10

# Create an instance of the AnomalyDetector
model = AnomalyDetector()

# Define the loss function (negative log-likelihood)
criterion = nn.NLLLoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    for inputs, labels in train_loader:  # Iterating over training dataset
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Calculate the loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

# After training, saving the model
torch.save(model.state_dict(), 'anomaly_detector.pth')

The error is raised by an nn.Conv2d module, which isn’t used in your model so I guess you have posted the wrong code.
In any case, nn.Conv2d expects a 3- (unbatched) or 4-dimensional (batched) input as seen here:

conv = nn.Conv2d(3, 16, 3, 1, 1)

# works with a batched input
batch_size = 2
channels = 3
h, w = 224, 224
x = torch.randn(batch_size, channels, h, w)
out = conv(x)
print(out.shape)
# torch.Size([2, 16, 224, 224])

# also works with an unbatched input
x = torch.randn(channels, h, w)
out = conv(x)
print(out.shape)
# torch.Size([16, 224, 224])

# fails with a 2D input
x = torch.randn(channels, h)
out = conv(x)
# RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [3, 224]

yes, my bad…the given code is wrong but even with 2d in place of 3d I’m getting the same error. So do I need to reshape my input features before using for the model building. As mentioned above the my input features are of below shapes:
X_train shape is:
torch.Size([75202, 1000])

Y_train shape is:
torch.Size([75202])

X_test shape is:
torch.Size([18801, 1000])

Y_test shape is:
torch.Size([18801])

Yes, 2D inputs are not accepted as seen in my code snippet.

after changing the input size to below sizes, I’m facing channel error.

My input shapes:
Shape of x_train : torch.Size([1, 84602, 1000])
Shape of y_train : torch.Size([1, 84602, 1])
Shape of x_test : torch.Size([1, 9401, 1000])
Shape of y_test : torch.Size([1, 9401, 1])

torch.Size([1, 84602, 1000])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [18], in <cell line: 24>()
     26 optimizer.zero_grad()
     28 # Reshape inputs to have the correct shape
     29 #inputs = torch.unsqueeze(inputs, dim=1)  # Add a channel dimension
     30 #inputs = inputs.reshape(16,4,2)
     31 
     32 # Forward pass
---> 33 outputs = model(inputs)
     35 # Calculate the loss
     36 loss = criterion(outputs, labels)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [15], in AnomalyDetector.forward(self, x)
     87 def forward(self, x):
     88     # Extract features
---> 89     x = self.feature_extractor(x)
     91     # Apply self-attention
     92     x = self.self_attention(x)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [15], in RelationAwareFeatureExtractor.forward(self, x)
     34 print(x.shape)
     35 # ConvNet layers
---> 36 x = self.conv1(x)
     37 x = F.relu(x)
     38 x = F.relu(self.conv2(x))

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~\anaconda3\lib\site-packages\torch\nn\modules\conv.py:457, in Conv2d.forward(self, input)
    456 def forward(self, input: Tensor) -> Tensor:
--> 457     return self._conv_forward(input, self.weight, self.bias)

File ~\anaconda3\lib\site-packages\torch\nn\modules\conv.py:453, in Conv2d._conv_forward(self, input, weight, bias)
    449 if self.padding_mode != 'zeros':
    450     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    451                     weight, bias, self.stride,
    452                     _pair(0), self.dilation, self.groups)
--> 453 return F.conv2d(input, weight, bias, self.stride,
    454                 self.padding, self.dilation, self.groups)

RuntimeError: Given groups=1, weight of size [64, 4, 3, 3], expected input[1, 1, 84602, 1000] to have 4 channels, but got 1 channels instead

Make sure you understand what the dimensions in your data represent and how each dimension should be processed by the model.
Initially you posted:

while you are now saying:

I don’t know why one dimension increased from 75k to 84k, but assume these are the number of samples.
If so, then note that the batch dimension is in dim0 for nn.Conv2d while you are now using an input of [1, 1, 84602, 1000].
This means that you are using a single sample with a single channel input with a spatial size of height=84602, width=1000.
Are you sure you want to process the input in this way and use only a single sample?

The error message is raised since your conv layer expects inputs with 4 channels, but the more important question would be how you want to process the data in the first place.

I mistakenly sent the wrong size information

My current input shape I’m working with is:

Shape of x_train features: torch.Size([75202, 4, 250])
Shape of y_train labels: torch.Size([75202, 1])
Shape of x_test features: torch.Size([18801, 4, 250])
Shape of y_test labels: torch.Size([18801, 1])

Error that I’m currently facing is:


torch.Size([4, 4, 250])
torch.Size([16, 2, 125])
torch.Size([64, 1, 63])
torch.Size([128, 1, 32])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [21], in <cell line: 22>()
     24 optimizer.zero_grad()
     26 # Forward pass
---> 27 outputs = model(inputs)
     29 # Calculate the loss
     30 loss = criterion(outputs, labels)

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [18], in AnomalyDetector.forward(self, x)
     93 def forward(self, x):
     94     # Extract features
---> 95     x = self.feature_extractor(x)
     97     # Apply self-attention
     98     x = x.permute(0, 2, 1) 

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [18], in RelationAwareFeatureExtractor.forward(self, x)
     42         print(x.shape)
     43 #         x = x.view(-1, 256 * 14 * 14)
     44 #         x = x.view(-1, 1024)
---> 45         x = F.relu(self.fc1(x))
     46         x = F.relu(self.fc2(x))
     47         return x

File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~\anaconda3\lib\site-packages\torch\nn\modules\linear.py:114, in Linear.forward(self, input)
    113 def forward(self, input: Tensor) -> Tensor:
--> 114     return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x32 and 50176x1024)

Model architecture:

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = []
        for feature in features:
            if len(feature) >= 2:
                self.features.append(feature)
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]
    
class RelationAwareFeatureExtractor(nn.Module):
    def __init__(self):
        super(RelationAwareFeatureExtractor, self).__init__()

        # ConvNet layers
        self.conv1 = nn.Conv2d(4, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv2 = nn.Conv2d(16, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv3 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.fc1 = nn.Linear(256*14*14*1, 1024)
        self.fc2 = nn.Linear(1024, 256)      


    def forward(self, x):
        print(x.shape)
        # ConvNet layers
        x = self.conv1(x)
        x = F.relu(x)
        print(x.shape)
        x = F.relu(self.conv2(x))
        print(x.shape)
        x = F.relu(self.conv3(x))
        print(x.shape)
#         x = x.view(-1, 256 * 14 * 14)
#         x = x.view(-1, 1024)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

class SelfAttention(nn.Module):
    def __init__(self, hidden_size):
        super(SelfAttention, self).__init__()

        self.W = nn.Linear(hidden_size, 1)
        #self.v = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        #x = x.unsqueeze(1)
        #x = x.permute(0, 2, 1)
        x = self.W(x).squeeze(2)
        #x = x.squeeze(1)
        #x = x.permute(0, 2, 1)
        x = F.softmax(x, dim=1).unsqueeze(2)
        return x

class ConditionalRandomFields(nn.Module):
    def __init__(self, hidden_size):
        super(ConditionalRandomFields, self).__init__()

        self.relation_aware_feature_extractor = RelationAwareFeatureExtractor()
        self.self_attention = SelfAttention(hidden_size)
        self.crf = nn.Linear(hidden_size, 2)

    def forward(self, x):
        x = self.relation_aware_feature_extractor(x)
        x = self.self_attention(x)
        x = self.crf(x)
        return x

class AnomalyDetector(nn.Module):
    def __init__(self):
        super(AnomalyDetector, self).__init__()

        # Feature extractor
        self.feature_extractor = RelationAwareFeatureExtractor()

        # Self-attention layer
        self.self_attention = SelfAttention(256)

        # Conditional random fields layer
        self.conditional_random_fields = ConditionalRandomFields(256)

    def forward(self, x):
        # Extract features
        x = self.feature_extractor(x)

        # Apply self-attention
        x = x.permute(0, 2, 1) 
        x = self.self_attention(x)
        x = x.permute(0, 2, 1)

        # Compute the log-likelihood
        log_likelihood = self.conditional_random_fields(x)

        return log_likelihood


batch_size = 40
input_dim = 4
hidden_dim = 50
output_dim = 124