Loss not decreasing in LSTM implemented by myself

I have implemented the lstm model based on the pytorch nn.module,the Forward propagation result of the model are the same as the official results.However, when i use my own LSTM model to complete the binary classification mission, the training loss is around 0.69. my code is as follows,
#自定义LSTM_Cell内部的结构:即这个结构自定义一个Cell结构的层,四个门,两个输出,
#学习参数为W和b,其余参数均可以通过计算得到

LSTM_cell自定义

LSTM_cell自定义

class LSTM_Cell(nn.Module):
def init(self, input_size, hidden_size):
super(LSTM_Cell, self).init()
self.input_size = input_size
self.hidden_size = hidden_size

	# parameter of input gate
	self.w_ii = Parameter(Tensor(hidden_size, input_size))
	self.b_ii = Parameter(Tensor(hidden_size, 1))
	self.w_hi = Parameter(Tensor(hidden_size, hidden_size))
	self.b_hi = Parameter(Tensor(hidden_size, 1))

	# parameter of forget gate
	self.w_if = Parameter(Tensor(hidden_size, input_size))
	self.b_if = Parameter(Tensor(hidden_size, 1))
	self.w_hf = Parameter(Tensor(hidden_size, hidden_size))
	self.b_hf = Parameter(Tensor(hidden_size, 1))

	# parameter of output gate
	self.w_io = Parameter(Tensor(hidden_size, input_size))
	self.b_io = Parameter(Tensor(hidden_size, 1))
	self.w_ho = Parameter(Tensor(hidden_size, hidden_size))
	self.b_ho = Parameter(Tensor(hidden_size, 1))

	# parameter of cell memory
	self.w_ic = Parameter(Tensor(hidden_size, input_size))
	self.b_ic = Parameter(Tensor(hidden_size, 1))
	self.w_hc = Parameter(Tensor(hidden_size, hidden_size))
	self.b_hc = Parameter(Tensor(hidden_size, 1))
	self.reset_weights()
    
def reset_weights(self):
	stdv = 1.0 / math.sqrt(self.hidden_size)
	for weight in self.parameters():
		init.uniform_(weight, -stdv, stdv)

# forward propagation of LSTM cell
def forward(self,inputs, h, c):
	# input gate output
	i = torch.sigmoid(self.w_ii @ inputs + self.b_ii + self.w_hi @ h + self.b_hi)
	# forget gate output
	f = torch.sigmoid(self.w_if @ inputs + self.b_if + self.w_hf @ h + self.b_hf)
	# output gate output
	o = torch.sigmoid(self.w_io @ inputs + self.b_io + self.w_ho @ h + self.b_ho)
	# candidate state
	g = torch.tanh(self.w_ic @ inputs + self.b_ic + self.w_hc @ h + self.b_hc)
	# cell memory of current time step
	c_current = f * c + i * g
	# hidden state of current time step
	h_current = o * torch.tanh(c_current)
	return h_current, c_current

# initialization of hidden state and cell memory
def init_hidden(self, batch_size, hidden_size):
	h_init = Variable(torch.randn(batch_size,hidden_size).t())
	c_init = Variable(torch.randn(batch_size,hidden_size).t())
	return h_init, c_init

LSTM model 多层自定义层(n>=1)

class LSTM_layer(nn.Module):
def init(self,input_size,hidden_size,layer_num):
super(LSTM_layer,self).init()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = layer_num
# LSTM Cell list
self._all_layers = []
for i in range(self.num_layers):
layer_name = ‘cell{}’.format(i)
if i==0:
cell = LSTM_Cell(self.input_size, self.hidden_size)#定义多层cell
cell.reset_weights()
setattr(self, layer_name, cell)#setattr 用于给类设置属性
self._all_layers.append(cell)
else:
cell = LSTM_Cell(self.hidden_size, self.hidden_size)#定义多层cell 第二层的输入为第一层的h输出
cell.reset_weights()
setattr(self, layer_name, cell)
self._all_layers.append(cell) #定义一个层结构,将定义的层都加入到层结构中
def forward(self, inputs):
# store internal state (hidden state, cell memory)
internal_state=[]
outputs=[] #
for t in range(inputs.size(0)):
x_step=inputs[t].t() #当前输入
for layer in range(self.num_layers):
layer_name = ‘cell{}’.format(layer)
if t==0:
batch_size = inputs[t].size()[0]
h, c= getattr(self, layer_name).init_hidden(batch_size=batch_size,hidden_size=self.hidden_size)
internal_state.append((h, c))
(h, c)= internal_state[layer]
x_step, c_new= getattr(self,layer_name)(x_step,h,c)
internal_state[layer] = (x_step, c_new)
outputs.append(x_step.t().unsqueeze(0))#在第一维增加维度,采用函数unsqueeze(0)
outputs = torch.cat(outputs, dim=0)#将三个张量合并为一个张量
return outputs,(x_step, c_new)

#定义通用模型网络结构
class my_model(nn.Module):
def init(self,input_size,hidden_size,layer_num,output_size):
super(my_model, self).init()
self.input_size=input_size
self.hidden_size=hidden_size
self.layer_num=layer_num
#self.bidirection=bidirection
self.output_size=output_size
self.lstm =LSTM_layer(self.input_size, self.hidden_size,self.layer_num)
self.linear=nn.Linear(self.hidden_size,self.output_size)
def forward(self,inputs):
r_out,(h_final, c_final)= self.lstm(inputs)
out =self.linear(r_out[-1, :, :]) #self.linear(h_final.t()) torch.sigmoid(self.linear(h_final.t()))
return out

if name == ‘main’:
model=my_model(14,50,1,2)#input_size,hidden_size,layer_num,output_size
#for name, parameter in model.named_parameters():
# print(name, parameter)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # optimize all parameters
#optimizer =torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)#leanring_rate
criterion =nn.CrossEntropyLoss() #定义损失函数 nn.MSELoss()
epoch = 0
for epoch in range(num_epochs):
for data in train_loader:
img, target = data
img = img.view(M,-1,14) #lenth_size,batch,input_size
target=target.squeeze(1)
img = img.squeeze(0)
img = Variable(img)
target = Variable(target)
out = model(img)
loss = criterion(out, target)
print_loss = loss.data.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch+=1
if epoch%10==0:
print(‘epoch: {}, loss: {:.4}’.format(epoch, print_loss))