Hi, I’m trying to create a custom LSTM cell with a modified forget gate. Before I can do that, I need to create add a working template of a standard LSTM layer and cell to my code and get it to run. But I’m getting an error with this.
As you can see, I’m working with sequences of max length 100, with 1000 unique integers in the sequences. It’s a binary classification.
I’m hitting the following error: "RuntimeError: size mismatch, m1: [1 x 10900], m2: [50 x 400] at /opt/conda/conda-bld/pytorch_1501971235237/work/pytorch-0.1.12/torch/lib/TH/generic/THTensorMath.c:1237
"
When I remove the custom LSTM code and replace
LSTM(embedding_dim,hidden_dim)
with
nn.LSTM(embedding_dim,hidden_dim)
There is no error.
The complete code (data and model included) is below. Intended for Anaconda python3. Do you have any suggestions?
from keras.datasets import imdb
import re
import unicodedata
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import pickle
import math
# Get test dataset
top_words = 1000
max_review_length = 100
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
vocabLimit = 1000
max_sequence_len = 100
#### CUSTOM LSTM HERE
############################################################################################################
############################################################################################################
class LSTM(nn.Module):
"""
An implementation of Hochreiter & Schmidhuber:
'Long-Short Term Memory'
http://www.bioinf.jku.at/publications/older/2604.pdf
Special args:
dropout_method: one of
* pytorch: default dropout implementation
* gal: uses GalLSTM's dropout
* moon: uses MoonLSTM's dropout
* semeniuta: uses SemeniutaLSTM's dropout
"""
def __init__(self, input_size, hidden_size, bias=True, dropout=0.0, dropout_method='pytorch'):
super(LSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.bias = bias
self.dropout = dropout
self.i2h = nn.Linear(input_size, 4 * hidden_size, bias=bias)
self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
self.reset_parameters()
assert(dropout_method.lower() in ['pytorch', 'gal', 'moon', 'semeniuta'])
self.dropout_method = dropout_method
def sample_mask(self):
keep = 1.0 - self.dropout
self.mask = V(th.bernoulli(T(1, self.hidden_size).fill_(keep)))
def reset_parameters(self):
std = 1.0 / math.sqrt(self.hidden_size)
for w in self.parameters():
w.data.uniform_(-std, std)
def forward(self, x, hidden):
do_dropout = self.training and self.dropout > 0.0
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
preact = self.i2h(x) + self.h2h(h)
# activations
gates = preact[:, :3 * self.hidden_size].sigmoid()
g_t = preact[:, 3 * self.hidden_size:].tanh()
i_t = gates[:, :self.hidden_size]
f_t = gates[:, self.hidden_size:2 * self.hidden_size]
o_t = gates[:, -self.hidden_size:]
# cell computations
if do_dropout and self.dropout_method == 'semeniuta':
g_t = F.dropout(g_t, p=self.dropout, training=self.training)
c_t = th.mul(c, f_t) + th.mul(i_t, g_t)
if do_dropout and self.dropout_method == 'moon':
c_t.data.set_(th.mul(c_t, self.mask).data)
c_t.data *= 1.0/(1.0 - self.dropout)
h_t = th.mul(o_t, c_t.tanh())
# Reshape for compatibility
if do_dropout:
if self.dropout_method == 'pytorch':
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
if self.dropout_method == 'gal':
h_t.data.set_(th.mul(h_t, self.mask).data)
h_t.data *= 1.0/(1.0 - self.dropout)
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
return h_t, (h_t, c_t)
############################################################################################################
############################################################################################################
class Model(torch.nn.Module) :
def __init__(self,embedding_dim,hidden_dim) :
super(Model,self).__init__()
self.hidden_dim = hidden_dim
self.embeddings = nn.Embedding(vocabLimit+1, embedding_dim)
self.lstm = LSTM(embedding_dim,hidden_dim)
self.linearOut = nn.Linear(hidden_dim,2)
def forward(self,inputs,hidden) :
x = self.embeddings(inputs).view(len(inputs),1,-1)
lstm_out,lstm_h = self.lstm(x,hidden)
x = lstm_out[-1]
x = self.linearOut(x)
x = F.log_softmax(x)
return x,lstm_h
def init_hidden(self) :
return (Variable(torch.zeros(1, 1, self.hidden_dim)),Variable(torch.zeros(1, 1, self.hidden_dim)))
model = Model(50,100)
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
epochs = 1
print('starting training')
for i in range(epochs) :
avg_loss = 0.0
for idx,lines in enumerate(X_train):
input_data = lines
input_data = Variable(torch.LongTensor(input_data))
target = int(y_train[idx])
target_data = Variable(torch.LongTensor([target]))
hidden = model.init_hidden()
y_pred,_ = model(input_data,hidden)
model.zero_grad()
loss = loss_function(y_pred,target_data)
avg_loss += loss.data[0]
if idx%10 == 0 or idx == 0:
print('epoch :%d iterations :%d loss :%g'%(i,idx,loss.data[0]))
loss.backward()
optimizer.step()
print('the average loss after completion of %d epochs is %g'%((i+1),(avg_loss/len(f))))