No, previously I was not including the part of resnet in my training parameters. Now as I wrote pretrainn, the first condition is never getting executed.
Unfortunately, I couldn’t create a small executable version of the issue. I tried the following code. But I cant find the issue here, maybe, it is related to my dataset. Thanks a lot for all the help.
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
import os
import numpy as np
import random
import torchvision.models as models
seed=10
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
model =models.resnet152(pretrained=True)
sigmoid=nn.Sigmoid()
loss = nn.BCEWithLogitsLoss(reduction='mean')
class Flatten(nn.Module):
def __init__(self):
super(Flatten,self).__init__()
def forward(self, x):
return x.view(x.size()[0], -1)
class testnet(nn.Module):
def __init__(self):
super(testnet,self).__init__()
self.flat= Flatten()
self.Conv_pretrain = nn.Sequential(*list(model.children())[0:7])## Resnets,resnext
self.linear1=nn.Linear(200704,1000)
self.linear2=nn.Linear(1000,1)
def forward(self,x):
out1 = self.linear2(self.linear1(self.flat(self.Conv_pretrain(x))))###
#import pdb;pdb.set_trace()
return out1
if __name__=='__main__':
all_=400
inputs=torch.rand(all_,3,224,224).float()
labels=torch.LongTensor(all_).random_(0, 2).float().cuda()
model=testnet().cuda().float()
frozen_points=['Conv_pretrain.0','Conv_pretrain.1']#,'Conv_pretrain.2']#,'Conv_pretrain.3','Conv_pretrain.4','Conv_pretrain.5']#'Conv_pretrain.6.0','Conv_pretrain.6.1','Conv_pretrain.6.2','Conv_pretr ain.6.3','Conv_pretrain.6.4','Conv_pretrain.6.5','Conv_pretrain.6.6','Conv_pretrain.6.7','Conv_pretrain.6.8']
trainables1=[]
trainables2=[]
not_trainables=[]
pretrain_tune=[]
for name, p in model.named_parameters():
if name.split('.')[0]=='Conv_pretrain':
if name[0:15] in frozen_points or (name[0:17] in frozen_points and name[17]=='.'):
p.requires_grad=False
not_trainables.append(p)
else:
pretrain_tune.append(p)
else:
if name[0:7]=='linear1':
trainables1.append(p)
elif name[0:7]=='linear2':
trainables2.append(p)
import pdb;pdb.set_trace()
optimizer = optim.SGD([{"params":trainables1,"lr":0.001},
{"params":trainables2,"lr":1e-4},
{"params":pretrain_tune,"lr":1e-5}],
momentum=0.9,weight_decay=0.0001)
lambda1 = lambda epoch: 1.0 if epoch < 10 else (10 if epoch < 31 else 1)
lambda2 = lambda epoch: 1
lambda3 = lambda epoch: 1
scheduler=optim.lr_scheduler.LambdaLR(optimizer,[lambda1,lambda2,lambda3])
batch=8
l=0
mean=0
while l<all_:
optimizer.zero_grad()
curr_input=inputs[l:l+batch].cuda()
out=model(curr_input).squeeze()
#print(sigmoid(out))
print(sigmoid(out).cpu().detach().sum())
#import pdb;pdb.set_trace()
lossf=loss(out,labels[l:l+batch])
lossf.backward()
optimizer.step()
l+=batch