I was wondering whether there is an efficiency difference between using LSTM with step by step inputs, with a sequnce of inputs and packed batch. First of all, I wanted to compare the first two and wrote the following script. However, the resulting outputs differ from each other.
What may be the reason?
random.seed(260316)
torch.manual_seed(260316)
x_size = 4
hidden_size = 6
batch_size = 1
lstm = nn.LSTM(input_size=x_size,
hidden_size=hidden_size,
num_layers=1,
batch_first=True)
print("Creating the dataset")
seqs = []
num_seqs = 100
max_seqlen = 10
min_seqlen = 4
for _ in range(num_seqs):
seqlen = random.randint(min_seqlen, max_seqlen)
new = torch.rand(seqlen, x_size)
seqs.append(new)
print("Testing one by one")
"""
input(batch, seq_len, input_size)
h_0(num_layers * num_directions, batch, hidden_size)
c_0(num_layers * num_directions, batch, hidden_size)
"""
outputs1 = []
for seq in seqs:
output = []
h0 = Variable(torch.zeros(1, batch_size, hidden_size))
c0 = Variable(torch.zeros(1, batch_size, hidden_size))
hidden = (h0, c0)
for si in range(seq.size()[0]):
inp = Variable(seq[si]. view(batch_size, 1, x_size))
out, hidden = lstm(inp, hidden)
output.append(out)
outputs1.append(torch.cat(output, dim=1))
print("Testing as a sequence")
"""
input(batch, seq_len, input_size)
h_0(num_layers * num_directions, batch, hidden_size)
c_0(num_layers * num_directions, batch, hidden_size)
"""
outputs2 = []
for seq in seqs:
h0 = Variable(torch.zeros(1, batch_size, hidden_size))
c0 = Variable(torch.zeros(1, batch_size, hidden_size))
inp = Variable(seq.view(1, len(seq), x_size))
output, hidden = lstm(inp, hidden)
outputs2.append(output)