Getting error in cross entropy loss

Hi Guys,

Could anyone help me solve this error ? Writing pytorch code for the first time for my image classification.

for epoch in range(1, 31):
train_loss, valid_loss = [], []
model.train()
for data, target in train_loader:
print(data,target)
optimizer.zero_grad()
output = model(data)
print(output.shape)
print(target.shape)
loss = loss_function(output, target)
loss.backward()
optimizer.step()
train_loss.append(loss.item())

RuntimeError: invalid argument 3: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1 at /pytorch/aten/src/THNN/generic/SpatialClassNLLCriterion.c:59 q

Its failing at the loss_function. I have printed the shape of my arguments for the loss_functions.

Output : torch.Size([10, 16, 56, 2])
Target : torch.Size([10])

Batchsize is 10

Based on the shape of output it looks like you are working on some segmentation task with 16 classes.
If that’s the case, your target should have the shape [10, 52, 2].

In the usual multi-class classification use case, you would provide the output as [batch_size, nb_classes] and the target as [batch_size] containing the class indices.
Have a look at the docs for more shape information.

I am working on a Binary classification problem. Is it the output shape or target shape that is causing the issue ?
i have printed the target tensor also.
tensor([1, 0, 1, 0, 0, 1, 1, 0, 0, 1])
These are my labels for 10 images in my batch.

The output shape doesn’t match your use case then, which should be [10, 2].
Could you explain the additional dimensions or post your model architecture so that we can have a look?

Here is the model architecture i am using

class Model(torch.nn.Module):

def __init__(self):
    super(Model, self).__init__()
    
    self.conv1 = torch.nn.Conv2d(3, 30, kernel_size=3, stride=1, padding=1)
    self.relu1 = nn.ReLU()
    self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    
    self.conv2 = torch.nn.Conv2d(30, 16, kernel_size=3, stride=1, padding=1)
    self.relu2 = nn.ReLU()

    self.conv3 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu3 = nn.ReLU()
                                 
    self.conv4 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu4 = nn.ReLU()
    self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
                                 
    self.conv5 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu5 = nn.ReLU()
    self.conv6 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu6 = nn.ReLU()
    self.conv7 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu7 = nn.ReLU()
    self.conv8 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu8 = nn.ReLU()
    self.drop2D = nn.Dropout2d(p=0.5, inplace=False)
    self.fc1 = nn.Linear(56, 2)
    
def forward(self, input):
    
    output = self.conv1(input)
    output = self.relu1(output)
    output = self.pool1(output)
    print(output.shape)
    
    
    output = self.conv2(output)
    output = self.relu2(output)
    print(output.shape)
    
    output = self.conv3(output)
    output = self.relu3(output)
    print(output.shape)
    
    output = self.conv4(output)
    output = self.relu4(output)
    output = self.pool2(output)
    
    print(output.shape)
    
    output = self.conv5(output)
    output = self.relu5(output)
    print(output.shape)
    
    output = self.conv6(output)
    output = self.relu6(output)
    print(output.shape)
    
    output = self.conv7(output)
    output = self.relu7(output)
    
    print(output.shape)
    
    output = self.conv8(output)
    output = self.relu8(output)
    
    print(output.shape)
    
    output = self.drop2D(output)
    print(output.shape)
    
    output = self.fc1(output)
    
    print(output.shape)
    return F.log_softmax(output)

model = Model()

Thanks for the code!
It looks like you are not flattening the activation before feeding it to the linear layer, which would be the usual use case.
Currently dim1 and dim2 are treated as “additional features” which is probably not your use case.
Add the number if input features of your last linear layer to:

self.fc1 = nn.Linear(16*56*56, 2)

and add this flattening operation in your forward:

output = self.drop2D(output)
output = output.view(output.size(0), -1)
output = self.fc1(output)
return F.log_softmax(output, 1)  # add the dim argument to log_softmax

Thanks for the suggestions but
Getting the below error again.

RuntimeError: size mismatch, m1: [10 x 4096], m2: [50176 x 2] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:266

Below is my entire code:
class Model(torch.nn.Module):

def __init__(self):
    super(Model, self).__init__()
    
    self.conv1 = torch.nn.Conv2d(3, 30, kernel_size=3, stride=1, padding=1)
    self.relu1 = nn.ReLU()
    self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    
    self.conv2 = torch.nn.Conv2d(30, 16, kernel_size=3, stride=1, padding=1)
    self.relu2 = nn.ReLU()

    self.conv3 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu3 = nn.ReLU()
                                 
    self.conv4 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu4 = nn.ReLU()
    self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
                                 
    self.conv5 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu5 = nn.ReLU()
    self.conv6 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu6 = nn.ReLU()
    self.conv7 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu7 = nn.ReLU()
    self.conv8 = torch.nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
    self.relu8 = nn.ReLU()
    self.drop2D = nn.Dropout2d(p=0.5, inplace=False)
    self.fc1 = nn.Linear(16*56*56, 2)
    
def forward(self, input):
    
    output = self.conv1(input)
    output = self.relu1(output)
    output = self.pool1(output)
    print(output.shape)
    
    
    output = self.conv2(output)
    output = self.relu2(output)
    print(output.shape)
    
    output = self.conv3(output)
    output = self.relu3(output)
    print(output.shape)
    
    output = self.conv4(output)
    output = self.relu4(output)
    output = self.pool2(output)
    
    print(output.shape)
    
    output = self.conv5(output)
    output = self.relu5(output)
    print(output.shape)
    
    output = self.conv6(output)
    output = self.relu6(output)
    print(output.shape)
    
    output = self.conv7(output)
    output = self.relu7(output)
    
    print(output.shape)
    
    output = self.conv8(output)
    output = self.relu8(output)
    
    print(output.shape)
    
    output = self.drop2D(output)
    print(output.shape)
    
    output = output.view(output.size(0), -1)
    output = self.fc1(output)
    
    print(output.shape)
    return F.log_softmax(output, 1)  # add the dim argument to log_softmax

model = Model()

import torch.optim as optim
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)

steps = 0
running_loss = 0
print_every = 10
train_loss, test_loss = [], []

for epoch in range(1, 31):
train_loss, valid_loss = [], []
model.train()
model.cuda()
for data, target in train_loader:
print(data.shape)
data , target = data.to(“cuda”), target.to(“cuda”)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
print(target.shape)
loss = loss_function(output, target)
loss.backward()
optimizer.step()
train_loss.append(loss.item())

model.eval()
for data, target in test_loader:
  output = model(data)
  loss = loss_function(output, target)
  test_loss.append(loss.item())

Hey It seems to run now after changing the last line of linear code into self.fc1 = nn.Linear(4096, 2) and adding this
output = output.view(output.size(0), -1) in forward. Could you please explain what exactly does the output.view line of code does?

Thanks so much for this help.

I assumed you are using images of the shape [3, 224, 224] and set the number of input features accordingly. Apparently your images are smaller, which explains the lower input features.

The .view(output.size(0), -1) flattens the activation into a two dimensional tensor, such that all features will be used by the linear layer.
The last layer before your linear layer outputs an activation volume of shape [batch_size, 16, 16, 16], i.e. 16 channels and a spatial size of 16x16.
Since you would like to use all these features created by your conv net, you usually flatten this activation volume and feed it into a linear layer.
The linear layer(s) learn to predict the class based on these features.

In the first version of your model only the last spatial size (width) was used in the linear layer repeatedly for all other dimensions.

Have a look at Stanford’s CS231n for more information about CNNs.

1 Like