I’d recently copied coding from old Ebook and practice to work on pytouch lightning, but it seems to be buggy to me. I’m hope someone could help me on that
#!/usr/bin/python
import os
import torch
import torchvision
import torchmetrics
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from PIL import Image
from pytorch_lightning.callbacks import ModelCheckpoint
class LoadCancerDataset(torch.utils.data.Dataset):
def __init__(self, data_folder, img_idx, transform = torchvision.transforms.Compose([
torchvision.transforms.CenterCrop(32), torchvision.transforms.ToTensor() ]), dict_labels = {} ):
self.data_folder = data_folder
#self.list_image_files = [s for s in os.listdir(data_folder)]
self.list_image_files = img_idx
self.transform = transform
self.dict_labels = dict_labels
self.labels = [ dict_labels[img.split('.')[0]] for img in self.list_image_files ]
def __len__(self):
return len(self.list_image_files)
def __getitem__(self, idx):
image_name = os.path.join(data_folder, self.list_image_files[idx])
image = Image.open(image_name)
image = self.transform(image)
image_short_name = self.list_image_files[idx].split('.')[0]
label = self.dict_labels[image_short_name]
return image, label
class CNNImageClassifier(pl.LightningModule):
def __init__(self, learning_rate = 0.001): #learning too high : less loss but not converge, learning too low, more accuracy but take time to learn
super().__init__()
self.learning_rate = learning_rate
#input_size = (256, 3, 32, 32)
self.conv_layer1 = torch.nn.Conv2d(in_channels = 3, out_channels = 3, kernel_size = 3, stride = 1, padding = 1)
#output_size = (256, 3, 32, 32)
self.relu1 = torch.nn.ReLU()
#output_size = (256, 3, 32, 32)
self.pool = torch.nn.MaxPool2d(kernel_size = 2)
#output_size = (256, 3, 16, 16)
self.conv_layer2 = torch.nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 3, stride = 1, padding = 1)
#output_size = (256, 6, 16, 16)
self.relu2 = torch.nn.ReLU()
self.fully_connected_1 = torch.nn.Linear(in_features = 16 * 16 * 6, out_features = 1000)
self.fully_connected_2 = torch.nn.Linear(in_features = 1000, out_features = 500)
self.fully_connected_3 = torch.nn.Linear(in_features = 500, out_features = 250)
self.fully_connected_4 = torch.nn.Linear(in_features = 250, out_features = 120)
self.fully_connected_5 = torch.nn.Linear(in_features = 120, out_features = 60)
self.fully_connected_6 = torch.nn.Linear(in_features = 60, out_features = 2)
self.loss = torch.nn.CrossEntropyLoss()
def forward(self, input):
output = self.conv_layer1(input)
output = self.relu1(output)
output = self.pool(output)
output = self.conv_layer2(output)
output = self.relu2(output)
#convert into one dimensional before transfer to Linear layer
output.view(-1, 6 * 16 * 16)
output = self.fully_connected_1(output)
output = self.fully_connected_2(output)
output = self.fully_connected_3(output)
output = self.fully_connected_4(output)
output = self.fully_connected_5(output)
output = self.fully_connected_6(output)
return output
#reduce loss & converge
def configure_optimizers(self):
params = self.parameters()
optimizer = torch.optim.Adam(params = params, lr = self.learning_rate)
return optimizer
def training_step(self, batch, batch_idx):
inputs, targets = batch
inputs, targets = inputs.to('cuda:0'), targets.to('cuda:0')
outputs = self(inputs)
train_accuracy = torchmetrics.functional.accuracy(task = 'binary', preds = outputs, target = targets) #multiclass, multilabel
loss = self.loss(outputs, targets)
self.log('train_accuracy:', train_accuracy, prog_bar = True)
self.log('train_loss:', loss)
return { 'train_accuracy' : train_accuracy, 'loss' : loss }
def test_step(self, batch, batch_idx):
inputs, targets = batch
inputs, targets = inputs.to('cuda:0'), targets.to('cuda:0')
outputs = self.forward(inputs)
test_accuracy = self.binary_accuracy(outputs, targets)
loss = self.loss(outputs, targets)
self.log('test_accuracy', test_accuracy)
return { 'test_accuracy' : test_accuracy, 'test_loss' : loss }
np.random.seed(0)
selected_image_list = []
train_imgs_orig = os.listdir('/home/pi/Downloads/histopathologic-cancer-detection/train')
for img in np.random.choice(train_imgs_orig, 1000): #10000
selected_image_list.append(img)
print('Length of images list:', len(selected_image_list))
np.random.seed(0)
np.random.shuffle(selected_image_list)
cancer_train_idx = selected_image_list[:1000] #8000
cancer_test_idx = selected_image_list[800:] #8000
print('Number of samples in training dataset:', len(cancer_train_idx))
print('Number of samples in testing dataset:', len(cancer_test_idx))
df_labels = pd.read_csv('/home/pi/Downloads/histopathologic-cancer-detection/train_labels.csv')
image_label_dict = {}
for img in selected_image_list:
#print('Image:', img.split('.')[0], '==', img.split('.')[1])
label_tuple = df_labels.loc[df_labels['id'] == img.split('.')[0]]
image_label_dict[label_tuple['id'].values[0]] = label_tuple['label'].values[0]
#print(label_tuple['id'].values[0], '==',label_tuple['label'].values[0])
#new_train_labels = pd.DataFrame({ 'id' : [], 'label' : [] })
id_list = []
label_list = []
for img in cancer_train_idx:
label_tuple = df_labels.loc[ df_labels['id'] == img.split('.')[0]]
id_list.append(label_tuple['id'].values[0])
label_list.append(label_tuple['label'].values[0])
new_train_labels = pd.DataFrame({ 'id' : id_list, 'label' : label_list })
new_train_labels.to_csv('train_labels.csv', index = False)
print('Image Label Dicts Length:', len(image_label_dict))
data_train_transforms = torchvision.transforms.Compose([
torchvision.transforms.CenterCrop(32),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.RandomVerticalFlip(),
torchvision.transforms.ToTensor()
])
data_test_transforms = torchvision.transforms.Compose([
torchvision.transforms.CenterCrop(32),
torchvision.transforms.ToTensor()
])
data_folder = '/home/pi/Downloads/histopathologic-cancer-detection/train'
train_dataset = LoadCancerDataset(
data_folder, cancer_train_idx, transform = data_train_transforms, dict_labels = image_label_dict)
test_dataset = LoadCancerDataset(
data_folder, cancer_test_idx, transform = data_test_transforms, dict_labels = image_label_dict)
#batch_size = 256
batch_size = 16 #(Each will be added as 96)
workers = 2
train_dataloader = torch.utils.data.DataLoader(
train_dataset,
batch_size = batch_size,
num_workers = workers,
pin_memory = True
)
test_dataloader = torch.utils.data.DataLoader(
test_dataset,
batch_size = batch_size,
num_workers = workers,
pin_memory = True
)
if __name__ == '__main__':
checkpoint_callback = ModelCheckpoint()
model = CNNImageClassifier()
trainer = pl.Trainer(max_epochs = 500, progress_bar_refresh_rate = 50, gpus = -1)
trainer.fit(model, train_dataloaders = train_dataloader)
print('Model Saved Location:', checkpoint_callback.best_model_path)