Hi, I am new to LSTM. I create a single LSTM network:
class LSTMNet(nn.Module):
def __init__(self):
super(LSTMNet, self).__init__()
self.lstm1 = nn.LSTMCell(1, 64) #(self, input_size, hidden_size, bias=True)
self.lstm2 = nn.LSTMCell(64, 1)
def forward(self, x):
length, batch_size, in_channels = x.size()
#initialisation
h_t1 = Variable(torch.zeros(length, batch_size, 64)).cuda()
c_t1 = Variable(torch.zeros(length, batch_size, 64)).cuda()
h_t2 = Variable(torch.zeros(length, batch_size, 1 )).cuda()
c_t2 = Variable(torch.zeros(length, batch_size, 1 )).cuda()
h_t1, c_t1 = self.lstm1(x, (h_t1,c_t1))
h_t2, c_t2 = self.lstm2(h_t1, (h_t2,c_t2))
y = h_t2
return y
This is my training code:
def run_lstm_example_sdg():
out_dir='/root/share/project/drone/results/xxx'
os.makedirs(out_dir, exist_ok=True)
# make training/validation set
length = 100
num_train = 200
num_valid = 5
in_channels = 1
out_channels = 1
train_input, train_target, valid_input, valid_target = gernerate_data(length, num_train, num_valid, in_channels, out_channels)
# build the model
net = LSTMNet()
net.cuda()
# optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01, ) #momentum=0.9, weight_decay=0.0001
#begin to train
num_iters = 300
for i in range(num_iters):
optimizer.zero_grad()
ys = net(train_input)
loss = nn.MSELoss()(ys, train_target)
loss.backward()
optimizer.step()
print('iter=%8d, train loss: %0.5f'%(i, loss.data.cpu().numpy()[0]))
My data. In particular, train data is torch.Size([100, 200, 1]), where sequence length=100, batch_size=200, dim=1. Train target dim is also dim=1.
def gernerate_data(length, num_train, num_valid, in_channels=1,out_channels=1):
assert(in_channels ==1)
assert(out_channels==1)
T = 20 #period
num = num_train+num_valid
data = np.empty((num, length+1), 'int64')
data[:] = np.array(range(length+1)) + np.random.randint(-4 * T, 4 * T, num).reshape(num, 1)
data = np.sin(data / T).astype('float32')
train_input = data[:num_train, :-1].transpose().reshape(length,num_train,in_channels )
train_target = data[:num_train, 1: ].transpose().reshape(length,num_train,out_channels)
valid_input = data[num_train:, :-1].transpose().reshape(length,num_valid,in_channels )
valid_target = data[num_train:, 1: ].transpose().reshape(length,num_valid,out_channels)
train_input = Variable(torch.from_numpy(train_input )).cuda() #torch.Size([100, 200, 1]); print(train_input.size())
train_target = Variable(torch.from_numpy(train_target)).cuda() #torch.Size([100, 200, 1]); print(train_target.size())
valid_input = Variable(torch.from_numpy(valid_input )).cuda() #torch.Size([100, 5, 1]); print(valid_input.size())
valid_target = Variable(torch.from_numpy(valid_target)).cuda() #torch.Size([100, 5, 1]); print(valid_target.size())
return train_input, train_target, valid_input, valid_target
the error is:
Traceback (most recent call last):
File "/root/share/project/drone/build/lstm-00/lstm_example-1.py", line 208, in <module>
run_lstm_example_sdg()
File "/root/share/project/drone/build/lstm-00/lstm_example-1.py", line 172, in run_lstm_example_sdg
optimizer.step()
File "/opt/anaconda3/lib/python3.6/site-packages/torch/optim/sgd.py", line 99, in step
p.data.add_(-group['lr'], d_p)
RuntimeError: invalid argument 3: sizes do not match at /opt/pytorch/8fbe003/pytorch/torch/lib/THC/generated/../generic/THCTensorMathPointwise.cu:271
Where did i go wrong? Thanks!