RuntimeError: The size of tensor a (51) must match the size of tensor b (43) at non-singleton dimension 1

xamotex1000 · April 20, 2024, 9:43pm

This is my first project with neural networks, please be nice (and maybe dumb it down a bit)

So I'm trying to make a chatbot and train it on a custom database, but when i input the stuff into the model i get the error

Traceback (most recent call last):
  File "/home/xamotex1000/Scripts/Machine Learning/LLM Virtual Assistant/Train.py", line 41, in <module>
    loss = criterion(outputs, y_train)
  File "/home/xamotex1000/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/xamotex1000/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/xamotex1000/.local/lib/python3.10/site-packages/torch/nn/modules/loss.py", line 535, in forward
    return F.mse_loss(input, target, reduction=self.reduction)
  File "/home/xamotex1000/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 3338, in mse_loss
    expanded_input, expanded_target = torch.broadcast_tensors(input, target)
  File "/home/xamotex1000/.local/lib/python3.10/site-packages/torch/functional.py", line 76, in broadcast_tensors
    return _VF.broadcast_tensors(tensors)  # type: ignore[attr-defined]
RuntimeError: The size of tensor a (51) must match the size of tensor b (43) at non-singleton dimension 1

here’s my code:

Train.py:

import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from ParseData import load_data
import nltk
from nltk.corpus import words

X_train, y_train = load_data("Dataset/Personality")

X_train = X_train.view(-1, 1)

if os.path.exists('simple_model.pth'):
    model = torch.load('simple_model.pth')
else:
    class SimpleModel(nn.Module):
        def __init__(self):
            super(SimpleModel, self).__init__()
            self.fc = nn.Linear(1, 1)

        def forward(self, x):
            return self.fc(x)
        
    model = torch.nn.Sequential(
        torch.nn.Linear(1, 51),
        torch.nn.Flatten(start_dim=1, end_dim=-1)
    )

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 1000
for epoch in range(num_epochs):

	outputs = model(X_train)
	loss = criterion(outputs, y_train)

	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	if (epoch+1) % 100 == 0:
		print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

torch.save(model, 'simple_model.pth')

ParseData.py:

import os
import re
from sklearn import preprocessing
import torch
import nltk
from nltk.corpus import words
from torch.nn.utils.rnn import pad_sequence

def load_data(folder_path):
	prompts = []
	responses = []
	emotion_tags = []
	nltk.download("words")
	engwords = words.words()
    
	for root, dirs, files in os.walk(folder_path):
		for file in files:
			if file.endswith(".txt"):
				file_path = os.path.join(root, file)
				with open(file_path, "r", encoding="utf-8") as f:
					segments = f.read().strip().split(":")
					prompt = segments[1].strip().lower()
					response = segments[3].strip().lower()
					emotions = [e.strip() for e in segments[5].strip()[1:-1].split(",")]
					prompts.append(prompt)
					responses.append(response)
					emotion_tags.append(emotions)

	le = preprocessing.LabelEncoder()
	le.fit(engwords)

	tokenized_prompts = [sentence.split() for sentence in prompts]
	tokenized_responses = [sentence.split() for sentence in responses]
	vocab_prompts = set(word for sentence in tokenized_prompts for word in sentence)
	vocab_responses = set(word for sentence in tokenized_responses for word in sentence)
	word_to_idx_prompts = {word: idx for idx, word in enumerate(vocab_prompts)}
	word_to_idx_responses = {word: idx for idx, word in enumerate(vocab_responses)}
	indexed_prompts = [[word_to_idx_prompts[word] for word in sentence] for sentence in tokenized_prompts]
	indexed_responses = [[word_to_idx_responses[word] for word in sentence] for sentence in tokenized_responses]

	x = [torch.tensor(sentence, dtype=torch.float32) for sentence in indexed_prompts]
	y = [torch.tensor(sentence, dtype=torch.float32) for sentence in indexed_responses]
	x = pad_sequence(x, batch_first=True, padding_value=0)
	y = pad_sequence(y, batch_first=True, padding_value=0)
	return x, y

def load_string(string):
	prompt = string.strip().lower().split()

	le = preprocessing.LabelEncoder()
	le.fit(prompt)

	print(prompt)

	x = torch.tensor(le.transform(prompt), dtype=torch.float32)

	return x

any help is appreciated, thanks!

ptrblck · April 21, 2024, 3:25pm

Your loss calculation fails in:

loss = criterion(outputs, y_train)

as outputs and y_train have different and incompatible shapes. PyTorch tries to broadcast the tensors, but also fails:

criterion =nn.MSELoss()
outputs = torch.randn(1, 51)
y_train = torch.randn(1, 43)

loss = criterion(outputs, y_train)
# RuntimeError: The size of tensor a (51) must match the size of tensor b (43) at non-singleton dimension 1

Print the shapes of these tensors and try to narrow down why these shapes are incompatible.
Based on your code it seems the model is just a single linear layer returning 51 output features, so you should check why the target has a different shape.