Hello,
I modified my LSTM based network so that it’s input be packed_padded_sequences, thinking that batch processing might be faster or parralleliized better (I am a noob in optimization), and also modified the training loop accordingly but now my model is 3 times solwer than before … any idea as to why it’s that way ?
the RNN before :
class myLSTM(nn.Module):
def __init__(self,pitch_size,pos_size,util_size,chord_size,hidden_size):
super().__init__()
self.input_size = pitch_size + pos_size + util_size + chord_size
self.hidden_size = hidden_size
self.lstm = nn.LSTM(self.input_size, hidden_size, batch_first = True)
self.notes_layer = nn.Linear(hidden_size,pitch_size)
self.pos_layer = nn.Linear(hidden_size,pos_size)
self.utils_layer = nn.Linear(hidden_size,util_size - 1)
self.tanh = nn.Tanh()
self.tmp_pos = pitch_size + pos_size
self.softmax = nn.LogSoftmax(dim = 2)
self.sigmoid = nn.Sigmoid()
self.drop_layer = nn.Dropout(p = 0.5)
def forward(self, input, hidden = None):
if hidden == None:
out, hidden = self.lstm(input,hidden)
out = self.drop_layer(self.sigmoid(out))
out_notes = self.softmax(self.notes_layer(out))
out_pos = self.sigmoid(self.pos_layer(out))
out_utils = self.softmax(self.utils_layer(out))
out = torch.cat((out_notes,out_pos,out_utils),2)
elif hidden != None:
out, hidden = self.lstm(input,hidden)
out = self.drop_layer(self.sigmoid(out))
out_notes = self.softmax(self.notes_layer(out))
out_pos = self.sigmoid(self.pos_layer(out))
out_utils = self.softmax(self.utils_layer(out))
out = torch.cat((out_notes,out_pos,out_utils),2)
return out, hidden
and after :
class myLSTM(nn.Module):
def __init__(self,pitch_size,pos_size,util_size,chord_size,hidden_size):
super().__init__()
self.input_size = pitch_size + pos_size + util_size + chord_size
self.hidden_size = hidden_size
self.lstm = nn.LSTM(self.input_size, hidden_size, batch_first = True)
self.notes_layer = nn.Linear(hidden_size,pitch_size)
self.pos_layer = nn.Linear(hidden_size,pos_size)
self.tempo_layer = nn.Linear(hidden_size,1)
self.utils_layer = nn.Linear(hidden_size,util_size - 1)
self.tanh = nn.Tanh()
self.tmp_pos = pitch_size + pos_size
self.softmax = nn.LogSoftmax(dim = 2)
self.sigmoid = nn.Sigmoid()
self.drop_layer = nn.Dropout(p = 0.5)
def forward(self, input, lengths, hidden = None):
input = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first = True, enforce_sorted = False)
if hidden == None:
out, hidden = self.lstm(input,hidden)
out = nn.utils.rnn.pad_packed_sequence(out, batch_first = True, padding_value= -1)[0]
out = self.drop_layer(self.sigmoid(out))
out_notes = self.softmax(self.notes_layer(out))
out_pos = self.sigmoid(self.pos_layer(out))
out_utils = self.softmax(self.utils_layer(out))
out = torch.cat((out_notes,out_pos,out_utils),2)
elif hidden != None:
out, hidden = self.lstm(input,hidden)
out = nn.utils.rnn.pad_packed_sequence(out, batch_first = True, padding_value= -1)[0]
out = self.drop_layer(self.sigmoid(out))
out_notes = self.softmax(self.notes_layer(out))
out_pos = self.sigmoid(self.pos_layer(out))
out_utils = self.softmax(self.utils_layer(out))
out = torch.cat((out_notes,out_pos,out_utils),2)
return out, hidden
the training loop now looks something like that :
for iter in range(1,n_iters+1):
batch = np.random.randint(0,10,10)
lengths = torch.as_tensor([dataSet[b]["inputTensor"].size(0) for b in batch], dtype=torch.int64, device='cpu')
inputTensor = nn.utils.rnn.pad_sequence([dataSet[b]["inputTensor"] for b in batch], batch_first = True, padding_value= -1)
target = [dataSet[b]["target"] for b in batch]
optimizer.zero_grad()
loss = 0
output, hidden = model(inputTensor, lengths)
for b in batch:
pads = max(lengths) - lengths[b]
dim = output[b,:,:].size(0) - pads
masked_out = output[b,:,:].view(-1)[:-pads*output[b,:,:].size(1)].reshape(dim,-1)
if pads == 0:
masked_out = output[b,:,:]
ln = criterion(masked_out[:,0:n_pitch], utilities.targetTensor(target[b][:,:n_pitch]))
lp = pos_criterion(masked_out[:, n_pitch:n_pitch+n_pos], target[b][:,n_pitch:n_pitch + n_pos])
lu = criterion(masked_out[:, n_pitch + n_pos:n_pitch + n_pos + n_util - 1], utilities.targetTensor(target[b][:,n_pitch+n_pos:]))
loss += 5*lp + ln + lu
if iter == n_iters - 1:
print("ln : %.3f,lp : %.3f, lu : %.3f" %(ln,lp,lu))
loss.backward()
optimizer.step()
Both version of the code are learning properly, but I was hoping for a speedup of what is already pretty slow, not the other way around