Hello, I have a model that does sentiment analysis.
I want to add attention to the model in order to improve accuracy.
In pytorch tutorial they have attention on decoder LSTM. But in my case the decoder is just an FC that decides the sentiment. How do I add attention to it?
Here is my network:
import torch.nn as nn
class Intent_LSTM(nn.Module): """ We are training the embedded layers along with LSTM for the sentiment analysis """ def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5): """ Settin up the parameters. """ super(Intent_LSTM, self).__init__() self.output_size = output_size self.n_layers = n_layers self.hidden_dim = hidden_dim # embedding layer and LSTM layers self.embedding = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True) # dropout layer to avoida over fitting self.dropout = nn.Dropout(0.5) # linear and sigmoid layers self.fc = nn.Linear(hidden_dim, output_size) def forward(self, x): """ Perform a forward pass """ batch_size = x.size(0) x = x.long() embeds = self.embedding(x) lstm_out, hidden = self.lstm(embeds) # stack up lstm outputs lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim) out = self.dropout(lstm_out) out = self.fc(out) # reshape to be batch_size first out = out.view(batch_size, -1,3) #print("sig_out",sig_out.shape) out = out[:, -1,:] # get last batch of labels # return last sigmoid output and hidden state return out
Where exactly does the attention mechanism fit in this scenario? and what are inputs to it?