Hi, I am a kind of Newb in pytorch
What I’m trying to do is a time series prediction model.
After many trials and errors, I found the Keras code I wanted and tried to apply it to the pytorch.
The main point of the Keras model is set to stateful = True, so I also used the hidden state and cell state values of the previous mini-batch without initializing the values of the hidden state and cell state.
But my code does not drop the loss value at all, and the prediction value seems very strange. I have no idea what’s wrong. What I want to do is to start with the most basic model, so I set the value of look-ahead day and look-back day to 1.(It is the same for the Keras code.) Feature dimension is also 1.
So a shape of input_X should be (1, batch_size, 1)
Anyone, please take a look my code and point out why loss value does not reduce. The summary of keras model and pytorch code is below.
# Keras Model
def build_model(self):
# first add input to hidden1
self.model.add(LSTM(
units=120,
batch_input_shape=(672,1,1),
stateful=True)
self.model.add(Dropout(0.1))
# add dense layer with output dimension to get output for one time_step
self.model.add(Dense(units=1))
# Repeat for look_ahead steps to get outputs for look_ahead timesteps.
self.model.add(RepeatVector(1)
# add activation
self.model.add(Activation("linear"))
# compile model and print summary
self.model.compile(loss="MSE", optimizer=Adam(lr=0.02,decay= .99))
return self.model
And my pytorch code
Xtrain = torch.load('X_train.pt')
ytrain = torch.load('y_train.pt')
batch_size = 672
remained = len(Xtrain) % 672 # len(Xtrain) = 22846
new_len = len(Xtrain) - remained # new_len = 22176
# In a stateful network, you should only pass inputs with a number of samples that can be divided by the batch size
X_train = Xtrain[:new_len]
y_train = ytrain[:new_len]
# transpose for input shape
X_train = np.transpose(X_train, (1, 0, 2))
batch_X_train = X_train.reshape(-1,672,1)
y_train = np.transpose(y_train, (1, 0, 2))
batch_y_train = y_train.reshape(-1,672,1)
print(batch_X_train.shape) # (33, 672, 1)
print(batch_y_train.shape) # (33, 672, 1)
trainX_tensor = torch.FloatTensor(batch_X_train)
trainY_tensor = torch.FloatTensor(batch_y_train)
trainData = TensorDataset(trainX_tensor, trainY_tensor)
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_size, out_dim, num_layers, batch_size, dropout, device):
super(LSTM, self).__init__()
self.batch_size = batch_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout_p = dropout
self.device = device
self.lstm = nn.LSTM(input_dim, hidden_size, num_layers=num_layers)
self.fc1 = nn.Linear(hidden_size, hidden_size//2)
self.dropout = nn.Dropout(p = self.dropout_p)
self.fc2 = nn.Linear(hidden_size//2,out_dim)
def forward(self, x, hidden, cell):
lstm_out, (hidden, cell) = self.lstm(x, (hidden, cell))
out = self.fc1(lstm_out)
out = F.relu(out)
out = self.dropout(out)
out = self.fc2(out)
return out, hidden, cell
epoch = 5
input_dim = 1
out_dim = 1
hidden_size = 120
num_layers = 1
batch_size = 672
dropout = 0.1
learning_rate = .02
loss_function = nn.MSELoss()
model = LSTM(input_dim, hidden_size, out_dim, num_layers, batch_size, dropout, device).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=.99)
# initialize hidden and cell state
def hidden_state_init(stack_num, bsz, hidden):
hw = torch.empty(stack_num, bsz, hidden)
cw = torch.empty(stack_num, bsz, hidden)
h_state = torch.nn.init.xavier_uniform_(hw)
c_state = torch.nn.init.xavier_uniform_(cw)
return h_state,c_state
for epoch_num in range(epoch):
model.train()
y_prediction = []
loss_list = []
average_loss = 0
h_state, c_state=hidden_state_init(num_layers, batch_size,hidden_size)
for idx, data in enumerate(trainData):
X, y = data
batch_size = X.shape[0]
X = X.reshape(-1, batch_size, 1)
# For stateful
h_state = h_state.detach()
c_state = c_state.detach()
optimizer.zero_grad()
y_pred, h_state, c_state = model(X, h_state, c_state)
y_prediction.append(y_pred)
loss = loss_function(y_pred, y)
loss.backward()
optimizer.step()
average_loss += (loss.item() / 33)
loss_list.append(loss)
print(average_loss)
1.0038617777101921
1.0039293639587632
1.003980219364166
1.0042506131258877
1.0043293851794615
I’m not sure what I have to do now. Any suggestion would be really appreciated.