RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1 I keep getting this error

Hello, I’m trying to train this model:

class Perceptron(nn.Module):

def __init__(self):
    super(Perceptron, self).__init__()
    self.fc = nn.Linear(28,10)
    self.relu = torch.nn.ReLU() 
def forward(self, x):
    output = self.fc(x)
    output = self.relu(x) 
    return output

On the MNIST dataset uploaded as follows:
trainset = torchvision.datasets.MNIST(root=‘./data’, train=True, download=True,

                         transform=torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor(),
                           torchvision.transforms.Normalize(
                             (0.1307,), (0.3081,))
                         ]))

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train,
shuffle=True, num_workers=2, pin_memory=pin_mem)

But I get this error:
RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1

I’m using nn.CrossEntropyLoss()

Your model output shape is most likely wrong since your model architecture seems to apply the linear and relu layers on a 4-dimensional input and would thus output [batch_size, 1, 28, 10] which I doubt is your intended use case.
Flatten the inputs before via x = x.view(x.size(0), -1) and change the in_features of the linear layer to 28*28.

Thank you for your answer, I did that, now i’m getting this error:
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Could you post a minimal and executable code snippet showing this error, please? The view operation won’t detach the computation graph so something else must be wrong in your code.

In the snippet i’m trying to gather the gradients. I tried x.requires_grad(), it solved the first error, but then i found that the param.grad were none

I don’t have access to your notebook as it’s restricted so please post the code snippet here.

import torch
import random
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
from scipy import stats
from torch.optim.optimizer import Optimizer, required
import seaborn as sns
import matplotlib.pylab as plt
import scipy
import torchvision.models as models
import copy
import sys
import matplotlib as mpl

seed=142857
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
np.random.seed(seed) # Numpy module.
random.seed(seed) # Python random module.
torch.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

batch_size_train= 20 #batch_size
tr_split_len= 20 #number_of_total training data samples

train_on_gpu=True
lr=0.01
num_epochs=30
cuda = torch.device(‘cuda’)

device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

te_split_len=10
batch_size_test=1
pin_mem=True

trainset = torchvision.datasets.MNIST(root=‘./data’, train=True, download=True,

                           transform=torchvision.transforms.Compose([
                             torchvision.transforms.ToTensor(),
                           torchvision.transforms.Normalize(
                             (0.1307,), (0.3081,))
                         ]))

part_tr = torch.utils.data.random_split(trainset, [tr_split_len, len(trainset)-tr_split_len])[0]

trainloader = torch.utils.data.DataLoader(part_tr, batch_size=batch_size_train,
shuffle=True, num_workers=2, pin_memory=pin_mem)

class Perceptron(nn.Module):

def __init__(self):
    super(Perceptron, self).__init__()
    self.fc = nn.Linear(28*28,10)
    self.relu = torch.nn.ReLU() 
def forward(self, x):
    x = x.view(x.size(0), -1)
    output = self.fc(x)

    output = self.relu(x) 
    return output

criterion1 = nn.CrossEntropyLoss()

net1=Perceptron()

net1 = nn.DataParallel(net1)
net1.to(device)

optimizer1 = torch.optim.SGD(net1.parameters(), lr=lr, momentum=0)

for epoch in range(num_epochs): # loop over the dataset multiple times

epoch_loss = 0.0
for i, data in enumerate(trainloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data
    if train_on_gpu:
      inputs, labels = inputs.to(device), labels.to(device)
    
    current_grads=[]
    # zero the parameter gradients
    optimizer1.zero_grad()

    # forward + backward + optimize
    outputs = net1(inputs)

    
    loss = criterion(outputs, labels)

    loss.backward()
        
    for param in net1.parameters():
      current_grads.append(param.grad.view(-1))
    current_grads=torch.cat(current_grads)
    grads.append(current_grads)

    

    
    optimizer1.step()
    epoch_loss += loss.item()


print('epoch_loss',epoch,':',epoch_loss)

You are passing x again to self.relu instead of output, which makes the output non-differentiable, since no trainable parameters were used.
After fixing this as well as more issues in your code, which made it non-executable, the code works fine for me.

1 Like

it worked, thank you