Cross Entropy Loss: Target size and Output size mismatch

I have problem using Categorical Cross Entropy loss
Target data is imported from a numpy array containing label indices for 3 classes (0,1,2)

Dataset definition

class Tr_dataset(Dataset):
    def __init__(self, windowed_input, classification_target):
        self.windowed_input = windowed_input
        self.classification_target = classification_target
        
    def __len__(self):
        return len(self.windowed_input)
    
    def __getitem__(self, index):      
        x_input = self.windowed_input[index]
        x_target = self.classification_target[index]
        
        x_input_tensor = torch.Tensor(x_input)
        x_input_tensor= x_input_tensor.view(SEQUENCE_LENGTH, INPUT_SIZE)
        
        # Ground truth 
        x_target_tensor = torch.LongTensor(x_target)
        return x_input_tensor, x_target_tensor

Model

class classification_RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(classification_RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.rnn = nn.RNN(input_size = input_size, hidden_size = hidden_size,num_layers= num_layers, 
                          batch_first= True)
        
        self.out = nn.Linear(hidden_size, 3)

    def forward(self, x):

        # The first hidden layer is automatically initialized to zeros if not passed
        rnn_out, hidden = self.rnn(x)
        
        class_label = self.out(rnn_out)
        
        return class_label

Model relevant information
model = classification_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS)

Where:

# Model params
BATCH_SIZE = 12

# The sequence length of the windowed data input
SEQUENCE_LENGTH = 12

# This is actually the number of dimensions/ features in the input
INPUT_SIZE = 1 

# The number of features in the last hidden state (Which should definitely be one)
# This is also equal to the number of outpu time steps to predict?
HIDDEN_SIZE = 1

#The number of RNN layers to stack
NUM_LAYERS = 3

Since it is already getting too long: Partial Training loop:

for bi, (x_input, x_target) in enumerate(train_loader):
        model.train()
  
        x_input_batch, x_target_batch = x_input.to(device),  x_target.to(device)
    
        optimizer.zero_grad() 
    
        output_batch = model(x_input_batch)
        
        loss = criterion(output_batch, x_target_batch)

The 2 inputs for the criterion seem to have a size mismatch

I read in many places, I seem to be doing everything fine. But,
I get the error: ValueError: Expected target size (12, 3), got torch.Size([12, 1])