Inputting [250, 33] getting [8250,3] output

Hi, I’m inputting a tensor of shape 250, 33 into my LSTM which is being used for a NLP multi-class classification. The inputs are sentences all of size 33 words, in batches of 250. I’m trying to get an output of 250, 3 as this would be a probability of each class for each sentence, however, i’m getting a predicition for each word instead?
I’ve had to make a loop to add every 33 words together and divide by 33 to get a predicition for a sentence but it’s causing training to be incredibly slow and definitely seems like a really makeshift way of fixing the issue.
Apologies if this is a relatively obvious fix, i’m really new to AI building in general and this is my first ever attempt.
Question is how would I get the output to be the probability of the 3 classes for each sentence in the batch.

> import torch
> 
> import torch.nn as nn
> 
> import torch.nn.functional as F
> 
> import torch.optim as op
> 
> import torchvision
> 
> from torch.utils.data import TensorDataset, DataLoader
> 
> from torchvision import transforms, datasets
> 
> class HateSpeechDetector(nn.Module):
> 
>     def __init__(self, device, vocabularySize, output, embedding, hidden, layers, dropProb=0.5):
> 
>         super(HateSpeechDetector, self).__init__()
> 
>         #Number of outputs (Classes/Categories)
> 
>         self.output = output
> 
>         #Number of layers in the LSTM
> 
>         self.numLayers = layers
> 
>         #Number of hidden neurons in each LSTM layer
> 
>         self.hiddenDimensions = hidden
> 
>         #Device being used for by model (CPU or GPU)
> 
>         self.device = device
> 
>         
> 
>         #Embedding layer finds correlations in words by converting word integers into vectors
> 
>         self.embedding = nn.Embedding(vocabularySize, embedding)
> 
>         #LSTM stores important data in memory, using it to help with future predictions
> 
>         self.lstm = nn.LSTM(embedding,hidden,layers,dropout=dropProb,batch_first=True)
> 
>         #Dropout is used to randomly drop nodes. This helps to prevent overfitting of the model during training
> 
>         self.dropout = nn.Dropout(dropProb)
> 
>         #Establishing 4 simple layers and a sigmoid output
> 
>         self.fc = nn.Linear(hidden, hidden)
> 
>         self.fc2 = nn.Linear(hidden, hidden)
> 
>         self.fc3 = nn.Linear(hidden, hidden)
> 
>         self.fc4 = nn.Linear(hidden, hidden)
> 
>         self.fc5 = nn.Linear(hidden, hidden)
> 
>         self.fc6 = nn.Linear(hidden, output)
> 
>         self.softmax = nn.Softmax(dim=1)
> 
>     def forward(self, x, hidden):
> 
>         batchSize = x.size(0)
> 
>         x = x.long()
> 
>         embeds = self.embedding(x)
> 
>         lstm_out, hidden = self.lstm(embeds, hidden)
> 
>         lstm_out = lstm_out.contiguous().view(-1,self.hiddenDimensions)
> 
>         out = self.dropout(lstm_out)
> 
>         out = self.fc6(out)
> 
>         out = self.softmax(out)
> 
>         myTensor = torch.Tensor([0,0,0])
> 
>         newOut = torch.zeros(batchSize, self.output)
> 
>         count = 0
> 
>         row = 0
> 
>         for tensor in out:
> 
>             if(count == 33):
> 
>                 newOut[row] = myTensor/33
> 
>                 myTensor = torch.Tensor([0,0,0])
> 
>                 row += 1
> 
>                 count = 0
> 
>             myTensor += tensor
> 
>             count += 1
> 
>         return newOut, hidden
> 
>     def init_hidden(self, batchSize, device):
> 
>         weight = next(self.parameters()).data
> 
>         hidden = (weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device), weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device))
> 
>         return hidden

Thanks

  • Ash