Hi, I’m inputting a tensor of shape 250, 33 into my LSTM which is being used for a NLP multi-class classification. The inputs are sentences all of size 33 words, in batches of 250. I’m trying to get an output of 250, 3 as this would be a probability of each class for each sentence, however, i’m getting a predicition for each word instead?
I’ve had to make a loop to add every 33 words together and divide by 33 to get a predicition for a sentence but it’s causing training to be incredibly slow and definitely seems like a really makeshift way of fixing the issue.
Apologies if this is a relatively obvious fix, i’m really new to AI building in general and this is my first ever attempt.
Question is how would I get the output to be the probability of the 3 classes for each sentence in the batch.
> import torch > > import torch.nn as nn > > import torch.nn.functional as F > > import torch.optim as op > > import torchvision > > from torch.utils.data import TensorDataset, DataLoader > > from torchvision import transforms, datasets > > class HateSpeechDetector(nn.Module): > > def __init__(self, device, vocabularySize, output, embedding, hidden, layers, dropProb=0.5): > > super(HateSpeechDetector, self).__init__() > > #Number of outputs (Classes/Categories) > > self.output = output > > #Number of layers in the LSTM > > self.numLayers = layers > > #Number of hidden neurons in each LSTM layer > > self.hiddenDimensions = hidden > > #Device being used for by model (CPU or GPU) > > self.device = device > > > > #Embedding layer finds correlations in words by converting word integers into vectors > > self.embedding = nn.Embedding(vocabularySize, embedding) > > #LSTM stores important data in memory, using it to help with future predictions > > self.lstm = nn.LSTM(embedding,hidden,layers,dropout=dropProb,batch_first=True) > > #Dropout is used to randomly drop nodes. This helps to prevent overfitting of the model during training > > self.dropout = nn.Dropout(dropProb) > > #Establishing 4 simple layers and a sigmoid output > > self.fc = nn.Linear(hidden, hidden) > > self.fc2 = nn.Linear(hidden, hidden) > > self.fc3 = nn.Linear(hidden, hidden) > > self.fc4 = nn.Linear(hidden, hidden) > > self.fc5 = nn.Linear(hidden, hidden) > > self.fc6 = nn.Linear(hidden, output) > > self.softmax = nn.Softmax(dim=1) > > def forward(self, x, hidden): > > batchSize = x.size(0) > > x = x.long() > > embeds = self.embedding(x) > > lstm_out, hidden = self.lstm(embeds, hidden) > > lstm_out = lstm_out.contiguous().view(-1,self.hiddenDimensions) > > out = self.dropout(lstm_out) > > out = self.fc6(out) > > out = self.softmax(out) > > myTensor = torch.Tensor([0,0,0]) > > newOut = torch.zeros(batchSize, self.output) > > count = 0 > > row = 0 > > for tensor in out: > > if(count == 33): > > newOut[row] = myTensor/33 > > myTensor = torch.Tensor([0,0,0]) > > row += 1 > > count = 0 > > myTensor += tensor > > count += 1 > > return newOut, hidden > > def init_hidden(self, batchSize, device): > > weight = next(self.parameters()).data > > hidden = (weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device), weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device)) > > return hidden