I am using a pretrained resnet and adding layers on top of it , but the model keep giving the same scores after each epoch . Following is my code-
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import *
from convcap_resnet import resnet50, ResNet, rename_keys
#resnet=resnet50().cuda()
#resnet.eval()
#resnet.load_state_dict(rename_keys(torch.load("/content/drive/My Drive/triplet_resnet50_concat_3.25.pth")))
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.resnet=resnet50()
self.relu=nn.ReLU()
self.linear1=nn.Linear(2048,128)
self.linear2=nn.Linear(128,3)
self.softmax=nn.Softmax(1)
#self.resnet.train()
self.resnet.load_state_dict(rename_keys(torch.load("/content/drive/My Drive/triplet_resnet50_concat_3.25.pth")))
for p in self.resnet.parameters():
p.requires_grad=True
def forward(self, inputs):
image_output = self.resnet(inputs)[1]
x = self.linear1(image_output)
x = self.relu(x)
x = self.linear2(x)
x = self.softmax(x)
return x
import torch.optim as optim
from sklearn.metrics import f1_score,accuracy_score
import math,gc
model=Net()
model.train()
model.cuda()
criterion = nn.CrossEntropyLoss()#nn.NLLLoss(size_average=False)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
bs=32
top=val_input_ids.shape[0]
model.train()
for epoch in range(10):
running_loss = 0.0
for i in tqdm_notebook(range(math.ceil(data_train.shape[0]/bs))):
inputs= torch.tensor(process_input_images(data_train.iloc[i*bs:min(i*bs+bs,train_input_ids.shape[0]),:])).permute([0,3,1,2]).float().cuda()
labels = torch.tensor(train_labels[i*bs:min(i*bs+bs,train_labels.shape[0])]).cuda()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
#print(loss)
#model.zero_grad()
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
print(running_loss/(math.ceil(data_train.shape[0]/bs)))
torch.save(model.state_dict(),"/content/drive/My Drive/temp-2-{}.pt".format(epoch))
model.eval()
with torch.no_grad():
predictions=model(torch.tensor(process_input_images(data_val.iloc[0:2,:])).permute([0,3,1,2]).float().cuda()
for j in tqdm(range(math.ceil(data_val.shape[0]/bs))):
val_inputs=torch.tensor(process_input_images(data_val.iloc[j*bs:min(j*bs+bs,val_input_ids.shape[0]),:])).permute([0,3,1,2]).float().cuda()
predict = model(val_inputs)
predictions=torch.cat((predictions,predict),0)
predictions=predictions[2:]
print("val f1=",f1_score(pd.DataFrame(data_val.iloc[:,1],dtype='int'),np.argmax(predictions.detach().cpu().numpy(),axis=1),average='macro')," acc=",accuracy_score(pd.DataFrame(data_val.iloc[:,1],dtype='int'),np.argmax(predictions.detach().cpu().numpy(),axis=1)))
gc.collect()
model.train()
print('-'*10)
print('Finished Training')