I want to build a sentiment classification model. So far i have a simple one layer RNN (LSTM) model, which uses the last timestep of each sentence, as a fixed vector representation for classification. I am using my own pre-trained word embeddings and i apply zero_padding (to the right) on all sentences. The problem is that with my current code, the LSTM processes all timesteps, even the zero padded. How can i modify my code to handle variable length inputs?
If i am not mistaken, pytorch is able to handle inputs of variable length, but i there is no example of this in the pytorch tutorials.
train
word_manager = WordVectorsManager(corpus="glove.twitter", dim=300)
emb_matrix, word_indices = word_manager.get_embeddings()
vectorizer = Vectorizer(word_indices=word_indices, max_length=50)
dataset = SentimentDataset(transform=[vectorizer.vectorize])
BATCH_SIZE = 128
RNN_SIZE = 150
dataloader = DataLoader(dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
model = RNNModel(emb_matrix, rnn_size=RNN_SIZE)
model.cuda()
loss_function = nn.CrossEntropyLoss()
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(parameters)
for epoch in range(100):
running_loss = 0.0
for i_batch, sample_batched in enumerate(dataloader):
optimizer.zero_grad()
inputs, labels = sample_batched
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
outputs = model(inputs)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.data[0]
if (i_batch + 1) % 100 == 0:
print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
% (epoch + 1, 100, i_batch + 1,
len(dataset) // BATCH_SIZE, loss.data[0]))
dataset
class SentimentDataset(Dataset):
def __init__(self, transform):
"""
Args:
transform (list): a list of callables that apply transformation
on the samples.
"""
self.transform = transform
# code ...
self.label_encoder = preprocessing.LabelEncoder()
self.label_encoder.fit(self.target)
# code ...
def __len__(self):
return len(self.data)
def __getitem__(self, index):
"""
Args:
index (int):
Returns:
(tuple):
example (ndarray): vector representation of a training example
label (string): the class label
Example:
For an index where,
self.data[index] = ['super', 'eagles', 'coach', 'sunday', 'oliseh',
'meets', 'with', 'chelsea', "'", 's', 'victor',
'moses', 'in', 'london', '<url>']
self.target[index] = "neutral"
return,
example = [ 533 3908 1387 649 38127 4118 40 1876 63 106 7959 11520
22 888 7 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0]
label = 1
"""
example, label = self.data[index], self.target[index]
for i, tsfrm in enumerate(self.transform):
example = tsfrm(example)
label = self.label_encoder.transform([label])[0]
return example, label
model
class RNNModel(nn.Module):
def __init__(self, embeddings, rnn_size=150, classes=3):
super(RNNModel, self).__init__()
self.embedding = nn.Embedding(embeddings.shape[0], embeddings.shape[1])
self.rnn = nn.LSTM(embeddings.shape[1], rnn_size, batch_first=True)
self.linear = nn.Linear(rnn_size, classes)
self.init_weights(embeddings)
def init_weights(self, weights):
# self.embedding.weight = nn.Parameter(torch.from_numpy(weights),
# requires_grad=False)
self.embedding.weight.data.copy_(torch.from_numpy(weights))
self.embedding.weight.requires_grad = False
def forward(self, x):
x = self.embedding(x)
x, h = self.rnn(x)
# use the hidden state of last time step as representation
last = x[:, -1, :]
x = self.linear(last)
return x