Hello all. I am trying to train an encoder-decoder network to predict a float value at each timestep given an audio feature. My network seems to be training, but it only predicts down to -1 and my targets go down to -10. I am an amateur machine learning practitioner, just starting to grasp RNNs.
I assume I am making a mistake with my architecture or with my loss function. Can anybody take a look and let me know what I am doing wrong?
Inputs are of shape [1, 40, 121]
and targets are of shape [1, 121]
from torch import nn
import torch.nn.functional as F
import torch
device = 'cuda'
input_size = 40
hidden_size = 121
output_size = 1
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size):
super(EncoderRNN, self).__init__()
self.gru = nn.GRU(input_size, hidden_size, batch_first=False)
self.hidden = torch.zeros(1, bs, hidden_size).to("cuda" if torch.cuda.is_available() else "cpu")
def forward(self, input, hidden):
output = input.squeeze(dim=1)
output = output.view(121, -1, 40)
output, hidden = self.gru(output, hidden)
return output, hidden
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=False)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, input, hidden):
output = F.relu(input)
output, hidden = self.gru(output, hidden)
output = self.linear(output[0])
return output, hidden
class EDRNN(nn.Module):
def __init__(self, input_size, output_size):
super().__init__()
self.enc = EncoderRNN(input_size, hidden_size)
self.dec = DecoderRNN(hidden_size, output_size)
self = self.cuda() if CUDA else self
def forward(self, xb):
self.zero_grad()
enc_out, hidden = self.enc(x, self.enc.hidden)
dec_out, hidden = self.dec(enc_out, hidden)
return hidden.squeeze(dim=0)
def decode(self, x): # for inference
pass
model = EDRNN(input_size, output_size)
criterion = nn.L1Loss(reduction="none")
enc_optim = torch.optim.Adam(model.enc.parameters(), lr = 0.001)
dec_optim = torch.optim.Adam(model.dec.parameters(), lr = 0.001)
for epoch in range(0,100):
for x, y in train_dl:
model.zero_grad()
output = model(x)
loss = criterion(output, y.squeeze(dim=1))
loss.sum().backward()
enc_optim.step()
dec_optim.step()
print(loss)