class CharLevelLanguageModel(torch.nn.Module):
def __init__(self,vocab_size,emb_dim,hidden_dim,batch_size):
super(CharLevelLanguageModel,self).__init__()
self.embedddings = torch.nn.Embedding(vocab_size,emb_dim,padding_idx=0)
self.lstm = torch.nn.LSTM(emb_dim,hidden_dim,1,batch_first=True)
self.linear = torch.nn.Linear(hidden_dim,vocab_size)
self.batch_size = batch_size
self.hidden_dim = hidden_dim
self.hidden_state = self.init_hidden()
def init_hidden(self):
return (Variable(torch.zeros(1,self.batch_size,self.hidden_dim)),Variable(torch.zeros(1,self.batch_size,self.hidden_dim)))
def forward(self,x):
embeds = self.embedddings(x)
output,self.hidden_state = self.lstm(embeds,self.hidden_state)
return F.log_softmax(self.linear(F.tanh(output[:,-1,:])))
train = data_utils.TensorDataset(torch.LongTensor(dataX),torch.LongTensor(dataY))
train_loader = data_utils.DataLoader(train,batch_size=100,drop_last=True)
model = CharLevelLanguageModel(len(char_to_id)+1,100,50,100)
criterion = torch.nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())
losses = []
for i in range(3):
total_loss = torch.FloatTensor([0])
for batch_idx,train in enumerate(train_loader):
model.hidden_state = model.init_hidden()
x,y = Variable(train[0]),Variable(train[1])
y_pred = model(x)
#print()
loss = criterion(y_pred,y)
total_loss+=loss.data
loss.backward()
optimizer.zero_grad()
optimizer.step()
losses.append(total_loss)
Above is the training code.
model.eval()
testX = dataX[0:2]
testVar = Variable(torch.LongTensor(testX))
The above code throws inconsistent size error.
But if I run the below code:
testX = dataX[0]
testVar = Variable(torch.LongTensor(testX))
It executes successfully but it returns a size of (batch_size*output_labels). Each row is repeated batch_size number of times.