Hello @ptrblck, Thanks so much for this. I think I am implementing it differently and may be that’s why I get an increasing loss instead. So will you have a separate loss for the CNN and RNN? can you please demonstrate what you are saying with code. Here is my model. Note: it is an image regression problem:
def CNN(num_outputs):
preNet = models.inception_v3(pretrained=True,aux_logits=False)
num_ftrs = preNet.fc.in_features
preNet.fc = nn.Linear(num_ftrs, num_outputs)
return preNet
class BiGRU(nn.Module):
def __init__(self, input_features, hidden_size,num_layers,output_dim,seq_len,batch_size):
super(BiGRU, self).__init__()
self.cnn = CNN(input_features)
self.gru = nn.GRU(input_features, hidden_size, num_layers=num_layers, bidirectional=True)
self.out1 = nn.Linear(hidden_size*2, output_dim)
def forward(self, input):
input1 = self.cnn(input.view(batch_size*seq_len,C,H,W))
output, hidden = self.gru(input1.view(seq_len,-1,input_features))
pred = self.out1(output.view(-1,seq_len,hidden_size*2))
return pred
model = BiGRU(input_features, hidden_size,num_layers,output_dim,seq_len,batch_size)
loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
So the input to the model has the following shape: (batch_size, seq_len, C,H,W) and output is (batch_size, seq_len, output_dim). But the model is not learning as loss and error keep increasing.
Below is my Dataset code to create a sliding sequence of images and the sequence of images are later grouped into batches using : torch.utils.data.DataLoader(Dataset, batch_size = batch_size,shuffle = False,drop_last = False)
, to produce (batch_size, seq_len, C,H,W) dataloaders.
class CustomImageDataset(Dataset):
def __init__(self, img_labels, img_dir, seq_length, transform=None):
self.img_labels = img_labels
self.img_dir = img_dir
self.seq_length = seq_length
self.transform = transform
def __len__(self):
return len(self.img_labels)-self.seq_length
def __getitem__(self, idx):
start = idx
end = idx + self.seq_length
print('Getting images from {} to {}'.format(start, end))
indices = list(range(start, end))
images = []
labels = []
for i in indices:
img_path = os.path.join(self.img_dir, self.img_labels.iloc[i, 0])
image = read_image(img_path)
if self.transform:
image = self.transform(image)
images.append(image)
labels.append(torch.tensor(self.img_labels.iloc[i, 1:]))
x = torch.stack(images)
y = torch.stack(labels)
return x, y