Hi all, a pytorch newbie here,
I was trying to use a stacked LSTM model for time series analysis, and I wanted to batched my input. The input tensors are put into dataloader and move to Cuda when I call
model(batch.to(device)) with model moved GPU as well , but still got error telling me input tensores are on GPU and hidden states on CPU
input_dim = 1
hidden_dim = 64
num_layers = 3
output_dim = 1
num_epochs = 40
lr = 0.01 # 0.01
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
print(f'***** The current device is {device} !!! *****')
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim, bidirectional):
super().__init__()
d=1
if bidirectional==True:
d=2
self.d=d
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=bidirectional, dropout=0.2)
self.fc = nn.Linear(self.d*hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.d*self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
c0 = torch.zeros(self.d*self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out[:, -1, :])
return out
print(y_train.shape, type(y_train))
print(y_test.shape, type(y_test))
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers
, bidirectional=True)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=lr)
from torch.utils.data import Dataset, DataLoader, random_split
train_dataloader = DataLoader(x_train, batch_size=32, drop_last=False)
test_dataloader = DataLoader(x_test, batch_size=32, drop_last=False)
# print(len(train_dataloader), len(test_dataloader))
board_train = np.zeros(num_epochs)
board_test = np.zeros(num_epochs)
# execute per epoch
for t in range(num_epochs):
y_train_pred_lst = []
for i, batch in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
y_train_pred_batch = model(batch.to(device))
y_train_pred_lst.append(y_train_pred_batch)
y_train_pred = torch.cat(y_train_pred_lst)
# print('Shape for y_train_pred:', y_train_pred.shape)
loss_train = loss_fn.to(device)(y_train_pred, y_train)
print("Epoch ", t+1, "MSE: ", loss_train.item())
board_train[t] = loss_train.item()
optimiser.zero_grad()
loss_train.backward()
optimiser.step()
y_test_pred_lst = []
for i, batch in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
y_test_pred_batch = model(batch.to(device))
y_test_pred_lst.append(y_test_pred_batch)
y_test_pred = torch.cat(y_test_pred_lst)
loss_test = loss_fn.to(device)(y_test_pred, y_test)
board_test[t] = loss_test.item()
print('Training Completed !')