Hi every friends, I am stuck on training LSTM model. The input of LSTM has the size format of (seq_len, batch_size, input_len ), it is (5, 8, 2048). The following code will show more details of my model.
def __init__(self):
self.layer_size = 1
self.batch_size = 6
self.hidden_size = 512
self.tmestep = 3
self.hidden = self.init_hidden_lstm()
self.lstm = nn.LSTM(2048, self.hidden_size, self.layer_size,batch_first=False)
self.drop = nn.Dropout(0.2)
self.fc1 = nn.Linear(2560, 512)
self.fc2 = nn.Linear(512, 2)
self.init_weight()
def init_hidden_lstm(self):
return (torch.randn(self.layer_size, self.batch_size,self.hidden_size, requires_grad=True).cuda(),
torch.randn(self.layer_size, self.batch_size, self.hidden_size, requires_grad=True).cuda())
def init_weight(self):
for name, param in self.lstm.named_parameters():
if 'bias' in name:
nn.init.constant(param, 0.5)
nn.init.constant(param[256:512], 0.5)
elif 'weight' in name:
nn.init.uniform_(param)
def forward(self,in1,in2,in3,in4,in5):
# in1,in2,in3,in4,in5: extracted features from CNN network
self.hidden = repackage_hidden(self.hidden)
in = torch.stack([in1, in2, in3,in4,in5], dim=0)
out, self.hidden = self.lstm(in,self.hidden)
out = out.permute(1,0,2)
out= out.contiguous().view(self.batch_size,-1)
o = self.fc1(out)
o = self.drop(o)
o = self.fc2(o)
def repackage_hidden(h):
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
Currently, the training accuracy fluctuates between 0.55 and 0.48 from the beginning until 60th epoch.
I tried LSTM with 2,4,8 layers with different learning_rate(1, 0.1, 0.01), but the similar case happens. If you can guess the reason, please reply. I am appreciate your help!