Hello,
I’ve been trying to come up with a custom lstm in pytorch so that I can do further custom modification on it. Here I referenced the relevant work (lstms.pth/lstm.py at master · seba-1511/lstms.pth · GitHub) on how to layout the custom class for lstm. But when I try to run it to play on the toy dataset “Airline passenger prediction”, the loss seems to be stuck!
Any comment should be very helpful!
Thanks in advance!
class SlowLSTM(nn.Module):
"""
A pedagogic implementation of Hochreiter & Schmidhuber:
'Long-Short Term Memory'
http://www.bioinf.jku.at/publications/older/2604.pdf
"""
def __init__(self, input_size: int, hidden_size: int, bias: bool = True, dropout: float = 0.0):
super(SlowLSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.bias = bias
self.dropout = dropout
# input to hidden weights
self.w_xi = Parameter(th.empty(hidden_size, input_size))
self.w_xf = Parameter(th.empty(hidden_size, input_size))
self.w_xo = Parameter(th.empty(hidden_size, input_size))
self.w_xc = Parameter(th.empty(hidden_size, input_size))
# hidden to hidden weights
self.w_hi = Parameter(th.empty(hidden_size, hidden_size))
self.w_hf = Parameter(th.empty(hidden_size, hidden_size))
self.w_ho = Parameter(th.empty(hidden_size, hidden_size))
self.w_hc = Parameter(th.empty(hidden_size, hidden_size))
# bias terms
self.b_i = th.empty(hidden_size).fill_(0)
self.b_f = th.empty(hidden_size).fill_(0)
self.b_o = th.empty(hidden_size).fill_(0)
self.b_c = th.empty(hidden_size).fill_(0)
# Wrap biases as parameters if desired, else as variables without gradients
if bias:
self.b_i = Parameter(self.b_i)
self.b_f = Parameter(self.b_f)
self.b_o = Parameter(self.b_o)
self.b_c = Parameter(self.b_c)
self.reset_parameters()
self.fc = nn.Linear(self.hidden_size, 1)
def reset_parameters(self):
std = 1.0 / math.sqrt(self.hidden_size)
for w in self.parameters():
w.data.uniform_(-std, std)
def forward(self, x):#: th.Tensor, hidden: Tuple[th.Tensor, th.Tensor]) -> Tuple[th.Tensor, Tuple[th.Tensor, th.Tensor]]:
h, c = (V(th.zeros(1, hidden_size)), V(th.zeros(1, hidden_size)))
h = h.view(h.size(0), -1)
c = c.view(h.size(0), -1)
x = x.view(x.size(0), -1)
# Linear mappings
i_t = th.mm(x, self.w_xi.t()) + th.mm(h, self.w_hi) + self.b_i
f_t = th.mm(x, self.w_xf.t()) + th.mm(h, self.w_hf) + self.b_f
o_t = th.mm(x, self.w_xo.t()) + th.mm(h, self.w_ho) + self.b_o
# activations
i_t.sigmoid_()
f_t.sigmoid_()
o_t.sigmoid_()
# cell computations
c_t = th.mm(x, self.w_xc.t()) + th.mm(h, self.w_hc) + self.b_c
c_t.tanh_()
c_t = th.mul(c, f_t) + th.mul(i_t, c_t)
h_t = th.mul(o_t, th.tanh(c_t))
# Reshape for compatibility
h_t = h_t.view(h_t.size(0), 1, -1)
c_t = c_t.view(c_t.size(0), 1, -1)
if self.dropout > 0.0:
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
out = self.fc(h_t)
return out
#return h_t, (h_t, c_t)
def sample_mask(self):
pass
“”“## Training”“”
num_epochs = 20000
learning_rate = 1e-3#0.01
input_size = 4
hidden_size = 3#2
num_layers = 1
num_classes = 1
lstm = SlowLSTM(input_size, hidden_size)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)