Hi, I’m inputting a tensor of shape 250, 33 into my LSTM which is being used for a NLP multi-class classification. The inputs are sentences all of size 33 words, in batches of 250. I’m trying to get an output of 250, 3 as this would be a probability of each class for each sentence, however, i’m getting a predicition for each word instead?
I’ve had to make a loop to add every 33 words together and divide by 33 to get a predicition for a sentence but it’s causing training to be incredibly slow and definitely seems like a really makeshift way of fixing the issue.
Apologies if this is a relatively obvious fix, i’m really new to AI building in general and this is my first ever attempt.
Question is how would I get the output to be the probability of the 3 classes for each sentence in the batch.
> import torch
>
> import torch.nn as nn
>
> import torch.nn.functional as F
>
> import torch.optim as op
>
> import torchvision
>
> from torch.utils.data import TensorDataset, DataLoader
>
> from torchvision import transforms, datasets
>
> class HateSpeechDetector(nn.Module):
>
> def __init__(self, device, vocabularySize, output, embedding, hidden, layers, dropProb=0.5):
>
> super(HateSpeechDetector, self).__init__()
>
> #Number of outputs (Classes/Categories)
>
> self.output = output
>
> #Number of layers in the LSTM
>
> self.numLayers = layers
>
> #Number of hidden neurons in each LSTM layer
>
> self.hiddenDimensions = hidden
>
> #Device being used for by model (CPU or GPU)
>
> self.device = device
>
>
>
> #Embedding layer finds correlations in words by converting word integers into vectors
>
> self.embedding = nn.Embedding(vocabularySize, embedding)
>
> #LSTM stores important data in memory, using it to help with future predictions
>
> self.lstm = nn.LSTM(embedding,hidden,layers,dropout=dropProb,batch_first=True)
>
> #Dropout is used to randomly drop nodes. This helps to prevent overfitting of the model during training
>
> self.dropout = nn.Dropout(dropProb)
>
> #Establishing 4 simple layers and a sigmoid output
>
> self.fc = nn.Linear(hidden, hidden)
>
> self.fc2 = nn.Linear(hidden, hidden)
>
> self.fc3 = nn.Linear(hidden, hidden)
>
> self.fc4 = nn.Linear(hidden, hidden)
>
> self.fc5 = nn.Linear(hidden, hidden)
>
> self.fc6 = nn.Linear(hidden, output)
>
> self.softmax = nn.Softmax(dim=1)
>
> def forward(self, x, hidden):
>
> batchSize = x.size(0)
>
> x = x.long()
>
> embeds = self.embedding(x)
>
> lstm_out, hidden = self.lstm(embeds, hidden)
>
> lstm_out = lstm_out.contiguous().view(-1,self.hiddenDimensions)
>
> out = self.dropout(lstm_out)
>
> out = self.fc6(out)
>
> out = self.softmax(out)
>
> myTensor = torch.Tensor([0,0,0])
>
> newOut = torch.zeros(batchSize, self.output)
>
> count = 0
>
> row = 0
>
> for tensor in out:
>
> if(count == 33):
>
> newOut[row] = myTensor/33
>
> myTensor = torch.Tensor([0,0,0])
>
> row += 1
>
> count = 0
>
> myTensor += tensor
>
> count += 1
>
> return newOut, hidden
>
> def init_hidden(self, batchSize, device):
>
> weight = next(self.parameters()).data
>
> hidden = (weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device), weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device))
>
> return hidden
Thanks
- Ash