I’m using Dataset
, and I have:
def __getitem__(self, index):
random_tensor = torch.rand(self.seq_length, self.feature_dim) * 2
random_tensor = random_tensor - 0.5
return random_tensor, random_tensor * 2
So all I’m doing is learning how to multiply a tensor by 2.
My model is:
class BaselineModel(nn.Module):
def __init__(self, feature_dim=1, hidden_size=1, num_layers=2):
super(BaselineModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=feature_dim,
hidden_size=hidden_size, num_layers=num_layers)
def forward(self, x, hidden):
lstm_out, hidden = self.lstm(x, hidden)
return lstm_out, hidden
def init_hidden(self, batch_size):
hidden = torch.zeros(2, 1, 1)
cell = torch.zeros(2, 1, 1)
return (hidden, cell)
So basically, a simple 1 cell, 1 dimension LSTM. My training loop is:
model = BaselineModel(feature_dim=FEATURE_DIM, hidden_size=FEATURE_DIM)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
loss_fn = torch.nn.MSELoss(reduction='sum')
wandb.watch(model)
for epoch in range(250):
model.train(True) # Set model to training mode
running_loss = 0.0
for i, data in enumerate(data_loaders['train']):
hidden = model.init_hidden(13)
inputs = data[0]
outputs = data[1]
optimizer.zero_grad()
pred, hidden = model(inputs, hidden)
loss = loss_fn(pred, outputs)
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
loss.backward()
optimizer.step()
running_loss += loss.data
epoch_loss = running_loss / len(data_loaders['train'])
wandb.log({"Training Loss": epoch_loss})
print('Epoch: {}\tLoss: {:.4f}'.format(epoch, epoch_loss))
However this doesn’t really decrease the LOSS. Starts at 0.9490 and after 250 epochs, stays at 0.92. I realize that an LSTM is overkill for this, but I want to get a VERY basic version working before augmenting with my sequence data