The train function isnt working(I am a newbie)

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F

def tg(str):
    return [ord(s) for s in str]

class QAModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(QAModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.linear = nn.Linear(hidden_dim, vocab_size)

    def forward(self, input):
        embed = self.embedding(torch.tensor(input, dtype=torch.long))
        lstm_out, _ = self.lstm(embed)
        output = F.softmax(self.linear(lstm_out), dim=-1)
        return output

def train(model, dataloader, optimizer, epoch):
    model.train()
    for i, (data, target) in enumerate(dataloader):
        optimizer.zero_grad()
        r = []
        for sentence in data:
            for letter in sentence:
                r.append(ord(letter))
        output: torch.Tensor = model(r)  
        output = output.view(output.size(0), -1)
        if not isinstance(target, torch.Tensor):
            ten = []
            for sentence in [tg(s) for s in target]:
                for letter in sentence:
                    ten.append(letter)
            target = torch.tensor(ten,dtype=torch.long)
                
        loss = nn.functional.cross_entropy(output.view(output.size(0),-1), target.view(target.size(0),-1)[:len(output)])
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(f"Epoch {epoch}, Step {i}, Loss: {loss.item()}")

def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in dataloader:
            output = model(data)
            _, predicted = output.max(1)
            correct += (predicted == target).sum().item()
            total += target.size(0)
    return 100 * correct / total

def main():
    vocab_size = 10000
    embedding_dim = 128
    hidden_dim = 256

    model = QAModel(vocab_size, embedding_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    dataloader = data.DataLoader(
        dataset=torch.load("data.pt"),
        batch_size=128,
        shuffle=True,
    )

    for epoch in range(10):
        train(model, dataloader, optimizer, epoch)
        accuracy = evaluate(model, dataloader)
        print(f"Epoch {epoch}, Accuracy: {accuracy}")

if __name__ == "__main__":
    main()

it just keeps erroring
The error:

Traceback (most recent call last):
  File "c:\Users\Bartu\Desktop\asistan\main.py", line 78, in <module>
    main()
  File "c:\Users\Bartu\Desktop\asistan\main.py", line 73, in main
    train(model, dataloader, optimizer, epoch)
  File "c:\Users\Bartu\Desktop\asistan\main.py", line 40, in train
    loss = nn.functional.cross_entropy(output.view(output.size(0),-1), target.view(target.size(0),-1)[:len(output)])
  File "C:\Users\Bartu\anaconda3\lib\site-packages\torch\nn\functional.py", line 3029, in cross_entropy
    return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: 0D or 1D target tensor expected, multi-target not supported

my data.pt file is generated like this

something = [
   ("How was your day","Well great thanks"),
]

torch.save(something,"data.pt")

Do you guys know how to fix this problem? (Some of the code is generated by ai)

You have a shape mismatch error in the loss calculation as seen here:

criterion = nn.CrossEntropyLoss()
batch_size = 2
nb_classes = 3

output = torch.randn(batch_size, nb_classes)
target = torch.randint(0, nb_classes, (batch_size,))

# works
loss = criterion(output, target)

# breaks as the shape is wrong
target = target.unsqueeze(1)
loss = criterion(output, target)
# RuntimeError: 0D or 1D target tensor expected, multi-target not supported

My code snippet and the docs explain the expected shapes. For a multi-class classification the model output should have the shape [batch_size, nb_classes] containing logits and the target should have the shape [batch_size] containing class indices in the range [0, nb_classes-1].

how do i implement this in my code

Check the shapes of the model output and target in your code and make sure they match the expected shapes given in my code snippets and docs.
E.g. if the target has an additional dimension, you could squeeze it.

1 Like