Hi everyone, I’m trying to create a simple RNN model in Pytorch and I’m at a loss here! How do you specify the output dimensions?
based on the documentation, it seems the output dimension is taken from the input dimension! and should have the shape (seq_len, batch, num_directions * hidden_size)
but even in this case, my simple model doesnt give the correct output dimension and thus fails!
I have a very simple example which is given below:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
# lets create a generator that generates new data!
def dataset_gen(i, sequence_num=10):
data = torch.linspace(i*np.pi, (i+1)* np.pi, steps=sequence_num)
data = torch.sin(data)
yield data.unsqueeze(0).unsqueeze(2)
#%%
#define the model
class rnn(torch.nn.Module):
def __init__(self, inputsize, hiddensize, outputsize, numlayers):
super().__init__()
self.rnn = nn.RNN(input_size=inputsize, hidden_size=hiddensize, num_layers=numlayers, batch_first=True)
def forward(self, x, hiddenstate):
outputs, hiddenstate = self.rnn(x, hiddenstate)
print('input: ',x.shape)
print('output: ',outputs.shape)
print('h: ',hiddenstate.shape)
outputs = F.sigmoid(outputs)
return outputs ,hiddenstate
inputsize = 1
outputsize = 1
hiddensize = 32
sequence_length = 10
num_layer = 1
batchsize = 1
#uni or bi direction (1 or 2)
direction = 1
model = rnn(inputsize, hiddensize, outputsize, num_layer)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
# test
data = next(iter(dataset_gen(0, sequence_length)))
hiddenstate = torch.zeros(direction*num_layer, batchsize, hiddensize)
# (output, hidden) = model(data.cuda(), hiddenstate.cuda())
# training
critertion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr =0.01)
def training(step , dataset, critertion, optimizer, model):
model.train()
hidden = torch.zeros((direction*num_layer, batchsize, hiddensize), device=device)
for i in range(step):
data = next(iter(dataset(i, sequence_length)))
data = data.to(device)
X = data[:, :-1, :]
Y = data[:, 1:, :]
(output, hidden) = model(X, hidden)
hidden = hidden.data
loss = critertion(output, Y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i%10==0:
print(f'{i+1}) loss: {loss.item()}')
plt.plot(X.cpu().numpy().flatten(), 'r.')
plt.plot(output.data.cpu().numpy().flatten(),'b.')
plt.show()
return model
training(3, dataset_gen, critertion, optimizer, model)
Running the code above creates the following output :
input: torch.Size([1, 9, 1])
output: torch.Size([1, 9, 32])
h: torch.Size([1, 1, 32])
1) loss: 0.12164192646741867
input: torch.Size([1, 9, 1])
output: torch.Size([1, 9, 32])
h: torch.Size([1, 1, 32])
input: torch.Size([1, 9, 1])
output: torch.Size([1, 9, 32])
h: torch.Size([1, 1, 32])
C:\Users\Testusr\Anaconda3\lib\site-packages\torch\nn\functional.py:1386: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.
warnings.warn("nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.")
C:\Users\Testusr\Anaconda3\lib\site-packages\torch\nn\modules\loss.py:443: UserWarning: Using a target size (torch.Size([1, 9, 1])) that is different to the input size (torch.Size([1, 9, 32])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
The output shape is completely off!
Can anyone please help me understand what is going on here!?
Thanks in advance