RuntimeError: Expected target size [3, 2], got [3]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class simpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(simpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
#         self.tlnn = TL_NN(input_size, hidden_size, num_layers, num_classes)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.lstm = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(device)

        # forward propagate lstm
#         out = self.tlnn(x) # LSTM, TLNN
        out, h_n = self.lstm(x, (h0, c0))  # GRU
# 
        # output at last time point
        out = self.fc(out[:])
        out_prob = F.softmax(out, 1)
        return out, out_prob



Crosval_num = 5
Accuracy_cv, Accuracy_svc = [], []
Recall_cv, Recall_svc = [], []
Precise_cv, Precise_svc = [], []
F1_cv, F1_svc = [], []
Asm_cv, Asm2_cv = [], []
Xtrainval, ytrainval = np.concatenate((Xtrain,Xtest)), np.concatenate((ytrain, ytest))
Ntrain = Xtrainval.shape[0]
Index_trval = list(range(Xtrainval.shape[0]))
labda = 0.5
Accuracy_rnn, Recall_rnn, Precise_rnn, F1_rnn = [], [], [], []
for cv_i in range(Crosval_num):
    Index_val = list(range(cv_i*Ntrain//Crosval_num, (cv_i+1)*Ntrain//Crosval_num))
    Index_train = list(set(Index_trval).difference(set(Index_val)))
    Xtrain, Xtest = Xtrainval[Index_train,:], Xtrainval[Index_val,:]
    ytrain, ytest = ytrainval[Index_train], ytrainval[Index_val]
    ytrain[ytrain==-1] = 0
    ytest[ytest == -1] = 0
    Xtrain = torch.Tensor(Xtrain)
    Xtest = torch.Tensor(Xtest)
    ytrain, ytest = torch.LongTensor(ytrain), torch.LongTensor(ytest)
    
#     print(Xtrain, Xtest, ytrain, ytest)
    epochs = 100
    N, T = Xtrain.shape[0], Xtrain.shape[1]
#     print(N,T)
    batch_size = N//10
    time_step = T      
    input_size = 28     
    hidden_size = 28
    num_layers = 2
    num_classes = 2
    lr = 0.01           # learning rate
    model = simpleLSTM(input_size, hidden_size, num_layers, num_classes)
    # loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr)
    loss_all, accu_all, auc_all, f1_all, recall_all, precise_all = [], [], [], [], [], []
    for epoch in range(epochs):
        for i in range(1):
            sp_idx = np.random.randint(0, Xtrain.shape[0], batch_size)
            images = Xtrain[sp_idx, :,:]
            labels = ytrain[sp_idx]
            print(labels)
            images = images.reshape(-1, time_step, input_size)
#             print(outputs, labels)
            
            # forward pass
            outputs, _ = model(images)
#             print(outputs, labels)
#             labels = labels.long()
            print(outputs.shape, labels.shape)
            loss = criterion(outputs, labels)
    
            # backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_all.append(loss.detach().numpy())
            
#             print("\n\n This is Xtest")
            
            outputs, outputs_prob = model(Xtest)
#             print("Output", outputs, outputs_prob, _ )
#             print(outputs_prob.shape)
            _, predicted = torch.max(outputs_prob.data, 1)
            
            predicted_prob = outputs_prob[:,1] 

the error is at this step loss = criterion(outputs, labels)

Based on the error message I would guess your model output has 3 dimensions as [batch_size, seq_len, nb_classes] while your target seems to have a single dimension.
This won’t work if you are using nn.CrossEntropyLoss as this loss function expects a target class index for each sample (except the class dimension). In your case the target should thus have the shape [batch_size, seq_len].

Note that:

is wrong, as indexing via [:] will return all samples, not the last one, in case you wanted to get rid of the seq_len.

1 Like