While working on a kaggle competition involving a binary classification task, the GPU doesn’t seem to be utilized(0%). The GPU is utilized 100% for normal tensor operations but not for the task at hand. Few solutions were provided in this post(Credits to @ptrblck). Unfortunately, though I couldn’t solve the issue. I am a beginner in the field and am looking out for help.
Here’s the code:
df = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv")
df.head(3)
meta_data = df[['image_name','target']]
meta_data.head()
meta_data.to_csv('meta_data.csv',index=False)
path = "../input/siim-isic-melanoma-classification/jpeg/train/"
class Image_Pipeline(Dataset):
def __init__(self,path_dir,csv_file,transform=None):
self.df = pd.read_csv(csv_file)
self.path = path_dir
self.transform = transform
def __getitem__(self,index):
image_name = self.df.image_name.str.cat(['.jpg']*len(df)).values[index]
image = Image.open(self.path+image_name).convert("RGB")
#image = cv2.imread(os.path.join(self.path,image_name))
label = torch.tensor(self.df.target.values[index],dtype = torch.long)
if self.transform is not None:
image = self.transform(image)
return image,label
def targets(self):
label = torch.tensor(self.df.target.values,dtype = torch.float32)
return label
def __len__(self):
return len(self.df)
batch_size = 16
val_pct = 0.2
get_transform = transforms.Compose([transforms.Resize((224,224)),
transforms.ToTensor()]),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
dataset = Image_Pipeline(path,'meta_data.csv',transform = get_transform)
def split_train_val(dataset, valid_pct, batch):
train_idx, valid_idx = train_test_split(np.arange(len(dataset.targets())),
test_size=valid_pct,
shuffle=True,
stratify=dataset.targets())
train_set = DataLoader(dataset,batch_size=batch,sampler=SubsetRandomSampler(train_idx))#,num_workers=4)
val_set = DataLoader(dataset,batch_size=batch,sampler=SubsetRandomSampler(valid_idx))#,num_workers=4)
print("Training data size: {} \nValidation data size: {}".format(len(train_set),len(val_set)))
return train_set,val_set
traindata,validation = split_train_val(dataset,val_pct,batch_size)
#To verify that the dataset and the splitted train val are of the same size
print((len(traindata)*batch_size)+(len(validation)*batch_size),len(dataset))
device=('cuda' if torch.cuda.is_available() else 'cpu')
model=models.resnet34(pretrained=True)
def freeze_till_last(model):
for param in model.parameters():
param.requires_grad=False
freeze_till_last(model)
incoming = model.fc.in_features
model.fc = nn.Linear(in_features = incoming, out_features=1)
model.fc.weight.requires_grad=True
model.fc.bias.requires_grad=True
import torch.optim as optim
from torch.optim import lr_scheduler
model.to(device)
def fit(model, traind, validation,epochs=1): # loss_fn, optimizer, epochs=1): #
print(device)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(),lr=0.0001)
model.train()
torch.cuda.synchronize()
end = time.time()
for epoch in trange(epochs):
for data,label in traind:
print("ellapsed time:{}".format(time.time()-end))
torch.cuda.synchronize()
model.to(device)
data=data.to(device)
label=label.to(device)
output = model(data)
output = output.to(device)
loss = loss_fn(output.view(1,batch_size)[0],label.to(torch.float))
loss.backward()
optimizer.step()
print("loss:{:.3f}".format(loss.item()))
model.zero_grad()
end = time.time()
arg = [model,traindata,validation]
fit(*arg)
Thank you in advance.