I am trying to replicate this model from matlab but for some reason, my loss function does not decrease a lot and stays near 1.80 (initial value) - model
Here is my code of the model and the training-accuracy loop:
class go(nn.Module):
def __init__(self):
super(go, self).__init__()
self.model_go = torch.hub.load('pytorch/vision:v0.10.0', 'googlenet', pretrained=True)
def forward(self, x):
#feature extraction
self.output = create_feature_extractor(self.model_go, return_nodes={'avgpool':0}) #return 10th layer as in paper
self.val = self.output(x.float())['0']
#flatten
#self.output = torch.flatten(self.output)
return self.val
class lstm(nn.Module):
def __init__(self, input_size = 1024, hidden_size = 2000, num_layers = 2, bidirectional = True, n_class=2):
super(lstm, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.n_class = n_class
self.model_lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers , bidirectional=self.bidirectional, batch_first=True)
self.hidden_state = None
self.output_layer = nn.Sequential(
nn.Linear(2 * self.hidden_size if self.bidirectional==True else self.hidden_size, self.n_class)
)
self.fc = nn.Sequential(
nn.Linear(self.n_class, self.n_class),
nn.Dropout(p=0.5, inplace=False)
)
def reset_hidden_state(self):
self.hidden_state = None
def forward(self, x):
lstm_output, hidden_state = self.model_lstm(x, self.hidden_state)
output = self.output_layer(lstm_output) #torch.Size([12, 30, 1024]) -> torch.Size([12, 30, 2])
output = output[:, -1, :] #12 x n_class
output = self.fc(output)
return output
#return torch.sigmoid(output)
modela = go().to('cpu')
modelb = lstm(n_class = len(new_categories_names)).to(device)
#modelb.load_state_dict(torch.load('/content/gdrive/My Drive/ColabNotebooks/human_activity_recognition/final_model1_train_test.pth'))
## Loss and optimizer
criterion = nn.CrossEntropyLoss()
#criterion = nn.BCELoss()
optimizer = optim.Adam(modelb.parameters(), lr=0.0001)
lr_scheduler = ReduceLROnPlateau(optimizer, mode='min',factor=0.5, patience=5,verbose=1)
for epoch in range(3):
modelb.train()
loss_epoch = 0
#iterate over the videos
for batch_idx, (_, _) in enumerate(zip(video_batches_train, label_batches_train)):
print(batch_idx)
inputs_train, targets_train = create_inputs(video_batches_train[batch_idx], label_batches_train[batch_idx], num_frames = 30) #get data -> torch.Size([12, 30, 3, 224, 224]), torch.Size([12, 6])
num_vids = inputs_train.shape[0]
# extract features of batch
features = []
#14 per batch
for input in inputs_train:
frms = []
#video's frames, 30 per video
for fr in input:
frms.append(modela(fr.unsqueeze(0).to('cpu'))) #torch.Size([1, 1024, 1, 1])
features.append(torch.cat(frms)) #torch.Size([30, 1024, 1, 1])
del frms
features = torch.stack(features) #torch.Size([12, 30, 1024, 1, 1])
del inputs_train
optimizer.zero_grad()
modelb.reset_hidden_state()
scores = modelb(features.view(num_vids, frames, -1).to(device)) #cuda, torch.Size([12, 30, 1024]) -> torch.Size([12, 2]),
del features
#loss = criterion(scores[:, -1, :],targets_train.max(1)[1].to(device))
loss = criterion(scores,targets_train.max(1)[1].to(device))
del targets_train
loss.backward()
optimizer.step()
loss_epoch += loss.item()
print(f"mini in epoch {epoch} :::: {loss.item()}")
#validation after each epoch
with torch.no_grad():
modelb.eval
features = []
#create test features
inputs_test, targets_test = create_inputs(video_batches_test[0], label_batches_test[0], num_frames = 30) #only 1 batch for validation
num_vids = inputs_test.shape[0]
for input_test in inputs_test:
frms = []
#video's frames, 40 per video
for fr in input_test:
frms.append(modela(fr.unsqueeze(0).to('cpu')))
features.append(torch.cat(frms))
del frms
features = torch.stack(features) #20batchesx(14x20x3x224x224)
del inputs_test
scores = modelb(features.view(num_vids, frames, -1).to(device)) #cuda
total = 100*(torch.nn.functional.softmax(scores).max(1)[1] == targets_test.to('cuda').max(1)[1]).sum()
print('Accuracy: ', total/targets_test.shape[0])
del scores, features, targets_test
#reshuffle after each epoch
x = np.array(video_batches_train).flatten()
y = np.array(label_batches_train).reshape((-1, label_batches_train[0].shape[1]))
x, y = shuffle(x, y)
video_batches_train, label_batches_train = create_batches(x, y, num_batches = 20)
del x,y
lr_scheduler.step(total)
del total
print(f"Loss in epoch {epoch} :::: {loss_epoch/len(video_batches_train)}")
The only difference is that my batch size is 12 instead of 16 and I am using 30 frames instead of 40 frames due to RAM issues.
Is there anything that I am missing?