Background:
I’m trying to train on image pairs, which are views from different angles of the same object. I want to perform a binary classification task. I’m using 2 pretrained DenseNet-121, which are later connected by some FC layers and finally a sigmoid activation. This is clubbed with a BCELoss. Big pieces of code follow, I think Model code and training loop code should be helpful.
What I’ve tried/observed till now:
a) varying learning rate. from 1e-2 to 1e-4. No changes in loss even then, trained for 3 epochs for 1e-2, 1e-3, 1e-4.
b) outputs are always 0.5 from the network. The layers aren’t initialised to 0/1/(any constant) right? So is this the expected behavior?
Please help!
Model code:
class classifier(nn.Module):
def __init__(self):
super(classifier,self).__init__()
def get_densenet_model():
model = torch.hub.load('pytorch/vision:v0.6.0', 'densenet121', pretrained=True)
modules=list(model.children())[:-1]
model=nn.Sequential(*modules)
return model
self.frontal_model=get_densenet_model()
self.lateral_model=get_densenet_model()
LOG(2,"DenseNet121 models loaded")
self.frontal_fc1=nn.Linear(1024*8*8,1024*8)
self.lateral_fc1=nn.Linear(1024*8*8,1024*8)
self.frontal_fc2=nn.Linear(1024*8,512)
self.lateral_fc2=nn.Linear(1024*8,512)
self.final_fc1=nn.Linear(1024,512)
self.final_fc2=nn.Linear(512,128)
self.final_fc3=nn.Linear(128,1)
self.sigmoid=nn.Sigmoid()
def forward(self, frontal_img, lateral_img):
x=self.frontal_model(frontal_img)
#x=self.frontal_model.features(frontal_img)
y=self.lateral_model(lateral_img)
#y=self.lateral_model.features(lateral_img)
x=x.view(x.size(0),-1)
y=y.view(y.size(0),-1)
x=nn.functional.relu(self.frontal_fc1(x))
x=nn.functional.relu(self.frontal_fc2(x))
y=nn.functional.relu(self.lateral_fc1(y))
y=nn.functional.relu(self.lateral_fc2(y))
x=torch.cat((x,y),1)
x=nn.functional.relu(self.final_fc1(x))
x=nn.functional.relu(self.final_fc2(x))
x=nn.functional.relu(self.final_fc3(x))
x=self.sigmoid(x)
return x.squeeze()
Relevant to data loading:
class MyDataset(Dataset):
def __init__(self, image_dir, label_csv_path, frontal_lateral_path, transform=None, resize_=None):
image_path_list=[]
frontal_lateral_df=pd.read_csv(frontal_lateral_path)
LOG(2, "read frontal-lateral csv completed")
def get_full_path(file_):
return os.path.join(image_dir,file_+".png")
for i in range(len(frontal_lateral_df)):
image_path_list.append( (get_full_path(frontal_lateral_df['frontal'][i]),get_full_path(frontal_lateral_df['lateral'][i]) ))
label_df=pd.read_csv(label_csv_path)
LOG(2, "read label csv completed")
labels=label_df['normal']
labels=list(np.array(labels).astype(float))
self.data_paths = image_path_list
self.labels = labels
self.transform = transform
self.resize = resize_
LOG(2, "Dataset initialisation completed")
def __getitem__(self, index):
frontal = Image.open(self.data_paths[index][0])
frontal.convert('RGB')
if self.transform:
frontal = self.transform(frontal)
if self.resize:
frontal = self.resize(frontal)
lateral = Image.open(self.data_paths[index][1])
lateral.convert('RGB')
if self.transform:
lateral = self.transform(lateral)
if self.resize:
frontal = self.resize(lateral)
normal=self.labels[index]
return frontal, lateral, normal
def __len__(self):
return len(self.data_paths)
def get_data():
dataset = MyDataset(image_dir , labels_path, frontal_lateral_path, init_train_transform())#equalisation_stuff, resize_stuff)
# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(VAL_TRAIN_SPLIT * dataset_size))
if SHUFFLE_DATASET :
np.random.seed(RANDOM_SEED)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]
# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE,
sampler=train_sampler)
validation_loader = DataLoader(dataset, batch_size=BATCH_SIZE,
sampler=valid_sampler)
LOG(2, "Dataloaders initialisation completed")
return train_loader, validation_loader
class gray_2_rgb(object):
def __call__(self,img):
img = np.asanyarray(img)
return skimage.color.gray2rgb(img)
def init_train_transform():
transform = transforms.Compose([
gray_2_rgb(),
transforms.ToPILImage(),
transforms.Resize((256,256)),
# transforms.RandomCrop(224),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor() ])
# transforms.Normalize((0.485, 0.456, 0.406),
# (0.229, 0.224, 0.225))]
#)
return transform
Training Loop:
def train(train_loader, validation_loader):
model= classifier()
if torch.cuda.is_available():
model.cuda()
model.train()
criterion = nn.BCELoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001)#, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
for epoch in tqdm(range(NUM_EPOCHS)):
running_loss=0.0
#print('epoch: '+str(epoch))
for data in train_loader:
frontal_images, lateral_images, labels = data
frontal_images, lateral_images, labels = frontal_images.to(device), lateral_images.to(device), labels.to(device)
#frontal_images, lateral_images, labels = data
optimizer.zero_grad()
outputs = model(frontal_images, lateral_images)
labels=labels.float().to(device)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print('Epoch: {} Train_Loss: {:.4f}'.format(epoch, running_loss))
if(epoch % 5 == 0):
correct=0.0
total=0.0
with torch.no_grad():
val_loss=0
for frontal_images, lateral_images, labels in validation_loader:
outputs = model(frontal_images, lateral_images)
labels=labels.float().to(device)
loss = criterion(outputs, labels)
val_loss += loss.item()
total+=len(labels)
correct+=num_correct(outputs,labels)
print('Epoch: {} Val_Loss: {:.4f}'.format(epoch, val_loss))
print('Epoch: {} Val_Acc: {:.4f}'.format(epoch, correct/total))