I‘m a beginner of Pytorch ,and I try to build a lstm acoustic model, I used merlin’s frontend to prepare data,but my result is not as good as keras or tensorflow,here is my code,Did I make any mistakes in building the model and training?
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_dim, output_size):
super(LSTM, self).__init__()
self.fc1=nn.Linear(embedding_dim,hidden_dim)
self.fc2=nn.Linear(hidden_dim,hidden_dim)
self.hidden_dim = hidden_dim
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2,batch_first=True)
self.hidden2out = nn.Linear(hidden_dim, output_size)
self.dropout_layer = nn.Dropout(p=0.1)
def init_hidden(self, batch_size):
return (autograd.Variable(torch.randn(2, batch_size, self.hidden_dim)).cuda(),
autograd.Variable(torch.randn(2, batch_size, self.hidden_dim)).cuda())
def forward(self,input, lengths):
self.hidden = self.init_hidden(batch_size)
input1=torch.tanh(self.fc1(input))
input2=torch.tanh(self.fc2(input1))
packed_input = pack_padded_sequence(input2, lengths,batch_first=True)
outputs, (ht, ct) = self.lstm(packed_input, self.hidden)
opt,_=pad_packed_sequence(outputs,batch_first=True)
outputs=self.hidden2out(opt)
return outputs
model =LSTM(ins,ins,outs).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.002)
for epoch in range(25): # again, normally you would NOT do 300 epochs, it is toy data
L = 1
overall_loss = 0
for iteration in range(int(len(train_x.keys()) / batch_size) + 1):
x_batch, y_batch, utt_length_batch = get_batch(train_x, train_y,keys_list,iteration,batch_size)
if utt_length_batch == []:
continue
else:
L += 1
max_length_batch = max(utt_length_batch)
x_batch = data_utils.transform_data_to_3d_matrix(x_batch, max_length=max_length_batch, shuffle_data=False)
y_batch = data_utils.transform_data_to_3d_matrix(y_batch, max_length=max_length_batch, shuffle_data=False)
# for i in range(len(x_batch)):
# for s in range(len(x_batch[i])):
# temp_x_batch[s][i][:]=x_batch[i][s][:]
# temp_y_batch[s][i][:]=y_batch[i][s][:]
inputs = torch.from_numpy(x_batch).float().to(device)
tags = torch.from_numpy(y_batch).float().to(device)
# Also, we need to clear out the hidden state of the LSTM,
# detaching it from its history on the last instance.
model.zero_grad()
# Step 2. Get our inputs ready for the network, that is, turn them into
# Tensors of word indices.
# Step 3. Run our forward pass.
#output,hidden = model(inputs,utt_length_batch)
pred = model(torch.autograd.Variable(inputs), utt_length_batch)
loss=criterion(pred,tags)
loss.backward()
optimizer.step()
overall_loss += loss
print(overall_loss/L)