Hi!, I am the one who is studying pytorch!
I am working on the rock/scissors/papers classification project for study.
While, I am doing it, I have no idea how to make it work at all…
My train dataset is consisted of generalized png image files, and my validation dataset is consisted of hand jpg images.
My code is below for it
First, dataset code
mport os
import numpy as np
import torch
import torch.nn as nn
import natsort
from skimage.transform import resize
from PIL import Image
from skimage.color import rgb2gray
import imageio
# Data Loader
class CustomDataset(torch.utils.data.Dataset):
def __init__(self, data_dir, transform=None):#fdir, pdir, sdir, transform=None):
# 0: Paper, 1: Rock, 2: Scissors
self.paper_dir = os.path.join(data_dir,'paper/')
self.rock_dir = os.path.join(data_dir,'rock/')
self.scissors_dir = os.path.join(data_dir,'scissors/')
self.transform = transform
lst_paper = os.listdir(self.paper_dir)
lst_rock = os.listdir(self.rock_dir)
lst_scissors = os.listdir(self.scissors_dir)
lst_paper = [f for f in lst_paper]
lst_rock = [f for f in lst_rock]
lst_scissors = [f for f in lst_scissors]
print(lst_paper)
self.lst_dir = [self.paper_dir] * len(lst_paper) + [self.rock_dir] * len(lst_rock) + [self.scissors_dir] * len(lst_scissors)
self.lst_prs = natsort.natsorted(lst_paper) + natsort.natsorted(lst_rock) + natsort.natsorted(lst_scissors)
def __len__(self):
return len(self.lst_prs)
def __getitem__(self, index):
self.img_dir = self.lst_dir[index]
self.img_name = self.lst_prs[index]
return [self.img_dir, self.img_name]
def custom_collate_fn(self, data):
inputImages = []
outputVectors = []
for sample in data:
prs_img = imageio.imread(os.path.join(sample[0] + sample[1]))
gray_img = rgb2gray(prs_img)
if gray_img.ndim == 2:
gray_img = gray_img[:, :, np.newaxis]
inputImages.append(gray_img.reshape(300, 300, 1))
# 0: Paper, 1: Rock, 2: Scissors
dir_split = sample[0].split('/')
if dir_split[-2] == 'paper':
outputVectors.append(np.array(0))
elif dir_split[-2] == 'rock':
outputVectors.append(np.array(1))
elif dir_split[-2] == 'scissors':
outputVectors.append(np.array(2))
data = {'input': inputImages, 'label': outputVectors}
if self.transform:
data = self.transform(data)
return data
class ToTensor(object):
def __call__(self, data):
label, input = data['label'], data['input']
input_tensor = torch.empty(len(input),300, 300)
label_tensor = torch.empty(len(input))
for i in range(len(input)):
input[i] = input[i].transpose((2, 0, 1)).astype(np.float32)
input_tensor[i] = torch.from_numpy(input[i])
label_tensor[i] = torch.from_numpy(label[i])
input_tensor = torch.unsqueeze(input_tensor, 1)
data = {'label': label_tensor.long(), 'input': input_tensor}
return data
And here’s train code
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from copy import copy
import warnings
warnings.filterwarnings('ignore')
num_train = len(os.listdir("./Dataset3/train/paper")) + len(os.listdir("./Dataset3/train/rock")) + len(os.listdir("./Dataset3/train/scissors"))
num_val = len(os.listdir("./Dataset3/validation/paper")) + len(os.listdir("./Dataset3/validation/rock")) + len(os.listdir("./Dataset3/validation/scissors"))
transform = transforms.Compose([ToTensor()])
dataset_train = CustomDataset("./Dataset3/train/", transform=transform)
loader_train = DataLoader(dataset_train, batch_size = 64, \
shuffle=True, collate_fn=dataset_train.custom_collate_fn, num_workers=1)
dataset_val = CustomDataset("./Dataset3/validation/", transform=transform)
loader_val = DataLoader(dataset_val, batch_size=64, \
shuffle=True, collate_fn=dataset_val.custom_collate_fn, num_workers=1)
print(len(dataset_train))
print(len(dataset_val))
print(len(loader_train))
print(len(loader_val), loader_val, type(loader_val))
print(type(dataset_val.custom_collate_fn), dataset_val.custom_collate_fn)
# Define Model
model = nn.Sequential(nn.Conv2d(1, 32, 2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(32, 64, 2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(64, 128, 2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(128, 256, 2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(256, 256, 2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(256, 128, 2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(128, 64, 2, padding=0),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1),
torch.nn.Flatten(),
nn.Linear(64, 1024, bias = True),
nn.Dropout(0.75),
nn.Linear(1024, 3, bias = True),
)
soft = nn.Softmax(dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Current device:", device)
model.to(device)
# Define the loss
criterion = nn.CrossEntropyLoss().to(device)
# Define the optimizer
optim = torch.optim.Adam(model.parameters(), lr = 0.001)
best_epoch = 0
accuracy_save = np.array(0)
epochs = 10
for epoch in range(epochs):
model.train()
train_loss = []
correct_train = 0
correct_val = 0
correct_batch = 0
for batch, data in enumerate(loader_train, 1):
label = data['label'].to(device)
inputs = data['input'].to(device)
output = model(inputs)
label_pred = soft(output).argmax(1)
optim.zero_grad()
loss = criterion(output, label)
loss.backward()
optim.step()
correct_train += (label == label_pred).float().sum()
train_loss += [loss.item()]
accuracy_train = correct_train / num_train
correct_val = 0
accuracy_tmp = np.array(0)
with torch.no_grad():
model.eval()
val_loss = []
for batch, data in enumerate(loader_val, 1):
label_val = data['label'].to(device)
input_val = data['input'].to(device)
output_val = model(input_val)
label_val_pred = soft(output_val).argmax(1)
correct_val += (label_val == label_val_pred).float().sum()
loss = criterion(output_val, label_val)
val_loss += [loss.item()]
accuracy_val = correct_val / num_val
# Save the best model wrt val accuracy
accuracy_tmp = accuracy_val.cpu().numpy()
if accuracy_save < accuracy_tmp:
best_epoch = epoch
accuracy_save = accuracy_tmp.copy()
torch.save(model.state_dict(), 'param.data')
print(".......model updated (epoch = ", epoch+1, ")")
print("epoch: %04d / %04d | train loss: %.5f | train accuracy: %.4f | validation loss: %.5f | validation accuracy: %.4f" %
(epoch+1, epochs, np.mean(train_loss), accuracy_train, np.mean(val_loss), accuracy_val))
print("Model with the best validation accuracy is saved.")
print("Best epoch: ", best_epoch)
print("Best validation accuracy: ", accuracy_save)
print("Done.")
If I am running this code, "RuntimeError: size mismatch, m1: [64 x 1024], m2: [64 x 1024] at /opt/conda/conda-bld/pytorch_1579022060824/work/aten/src/THC/generic/THCTensorMathBlas.cu:290
" this error occurred
Can anyone help me what’s wrong in my code?
I am very confusing for now how to make it work…
As I said, is it matter to use traindata set as png and validation dataset as jpg?
Anyone take a look at it, and please tell me what’s wrong for now?
Thank you so much.