Hi,
I’m working on a NLP LSTM for sentence classification. I’m feeding in a 50x33 batch, which is batchsizexfeatures, the feature being each word of the sentence. The output i’m receiving is off shape 50x33x3 which is essentially a prediction for every word in every sentence when i’m aiming for a shape of 50x3 which would be an overall prediction for every sentence instead of word.
How would I go about rectifying this?
Really new to AI, just a heads up.
class HateSpeechDetector(nn.Module): def __init__(self, device, vocabularySize, output, embedding, hidden, layers, dropProb=0.5): super(HateSpeechDetector, self).__init__() #Number of outputs (Classes/Categories) self.output = output #Number of layers in the LSTM self.numLayers = layers #Number of hidden neurons in each LSTM layer self.hiddenDimensions = hidden #Device being used for by model (CPU or GPU) self.device = device #Embedding layer finds correlations in words by converting word integers into vectors self.embedding = nn.Embedding(vocabularySize, embedding) #LSTM stores important data in memory, using it to help with future predictions self.lstm = nn.LSTM(embedding,hidden,layers,dropout=dropProb,batch_first=True) #Dropout is used to randomly drop nodes. This helps to prevent overfitting of the model during training self.dropout = nn.Dropout(dropProb) #Establishing 4 simple layers and a sigmoid output self.fc = nn.Linear(hidden, hidden) self.fc2 = nn.Linear(hidden, hidden) self.fc3 = nn.Linear(hidden, hidden) self.fc4 = nn.Linear(hidden, hidden) self.fc5 = nn.Linear(hidden, hidden) self.fc6 = nn.Linear(hidden, output) self.softmax = nn.Softmax(dim=1) def forward(self, x, hidden=None): batchSize = x.size(0) x = x.long() embeds = self.embedding(x) lstm_out, hidden = self.lstm(embeds, hidden) out = self.dropout(lstm_out) out = self.fc(out) out = self.fc2(out) out = self.fc3(out) out = self.fc4(out) out = self.fc5(out) out = self.fc6(out) print(out.shape) out = self.softmax(out) return out, hidden def init_hidden(self, batchSize, device): weight = next(self.parameters()).data hidden = (weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device), weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device)) return hidden
Thanks
-Ash