I am using a pre-trained model - Resnet18 - to identify dog breeds. Link to dataset: Dog-Breed
This is my code:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
from torchvision import datasets, models, transforms
import time
import os
from collections import Counter
import errno
import pandas
from sklearn.model_selection import StratifiedShuffleSplit
import cv2
LABELS_LOCATION = './dataset/labels.csv'
TRAIN_LOCATION = './dataset/train/'
TEST_LOCATION = './dataset/test/'
ROOT_PATH = './dataset/'
use_gpu = torch.cuda.is_available()
# Read CSV
def read_csv(csvf):
# print(pandas.read_csv(csvf).values)
data=pandas.read_csv(csvf).values
labels_dict = dict(data)
idz=list(labels_dict.keys())
clazz=list(labels_dict.values())
return labels_dict,idz,clazz
def create_dir(path_name):
try:
os.makedirs(path_name)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def class_hashmap(class_arr):
uniq_clazz = Counter(class_arr)
class_dict = {}
for i, j in enumerate(uniq_clazz):
class_dict[j] = i
return class_dict
labels, ids, class_names = read_csv(LABELS_LOCATION)
train_images = os.listdir(TRAIN_LOCATION)
class_numbers = class_hashmap(class_names)
data_transforms = {
'train': transforms.Scale(60),
}
images_len = len(train_images)
'''
Resizing the image to 224x224 so that we have same dimensionality while feeding the image to model
'''
resize_image = np.empty((60, 60))
resize = []
indexed_labels = []
for t_i in train_images:
# resize.append(transform.resize(io.imread(TRAIN_LOCATION+t_i), (60, 60, 3))) # (60,60) is the height and widht; 3 is the number of channels
resize.append(cv2.resize(cv2.imread(TRAIN_LOCATION+t_i), (224, 224)).reshape(3, 224, 224))
indexed_labels.append(class_numbers[labels[t_i.split('.')[0]]])
resize = np.asarray(resize)
print(resize.shape)
'''
Splitting the data into 7:1:2 (train:val:test) using stratification
'''
X = resize # numpy array of images [training data]
y = np.array(indexed_labels) # indexed labels for images [training labels]
sss = StratifiedShuffleSplit(n_splits=3, test_size=0.2, random_state=0)
sss.get_n_splits(X, y)
for train_index, test_index in sss.split(X, y):
X_temp, X_test = X[train_index], X[test_index] # split train into train and test [data]
y_temp, y_test = y[train_index], y[test_index] # labels
sss = StratifiedShuffleSplit(n_splits=3, test_size=0.123, random_state=0)
sss.get_n_splits(X_temp, y_temp)
for train_index, test_index in sss.split(X_temp, y_temp):
print("TRAIN:", train_index, "VAL:", test_index)
X_train, X_val = X[train_index], X[test_index] # training and validation data
y_train, y_val = y[train_index], y[test_index] # training and validation labels
# print(type(X_train),type(X_test),type(X_val))
# print(type(y_train),type(y_test),type(y_val))
batch_size = 500
learning_rate =0.001
'''
Data Loaders
'''
train = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=False)
val = torch.utils.data.TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, shuffle=False)
test = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)
# print(train_loader.size)
dataloaders = {
'train': train_loader,
'val': val_loader
}
print(dataloaders['train'])
def train_model(model, fc, criterionn, optimizer, scheduler, num_epochs=25):
"""
:param model:
:param criterion:
:param optimizer:
:param scheduler:
:param num_epochs:
:return:
"""
since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
print(".......phase:", phase)
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for data in dataloaders[phase]:
print(".......... inside second for dataloaders")
# get the inputs
inputs, i_labels = data
print("........ input size", len(inputs))
print("......... labels size", len(i_labels))
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda()).float()
i_labels = Variable(i_labels.cuda()).float().type(torch.cuda.LongTensor)
else:
inputs, i_labels = Variable(inputs).float(), Variable(i_labels).float().type(torch.LongTensor)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
outputs = outputs.view(outputs.size(0), -1)
outputs = fc(outputs)
_, preds = torch.max(outputs.data, 1)
print("........ output size", outputs.size)
loss = criterionn(outputs, i_labels)
print("....... loss - ", loss)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.data[0]
running_corrects += torch.sum(preds == i_labels.data)
epoch_loss = running_loss / len(dataloaders[phase])
epoch_acc = running_corrects / len(dataloaders[phase])
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = model.state_dict()
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
'''
any Pretrained model
'''
model_ft = models.resnet18(pretrained=True)
# freeze all model parameters
# for param in model_ft.parameters():
# param.requires_grad = False
# new final layer with 16 classes (from kaggle kernel)
print("...... total unique classes", len(class_numbers))
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, len(class_numbers))
print(".......model_ft_success.......")
if use_gpu:
model_ft = model_ft.cuda()
model_ft.fc = model_ft.fc.cuda()
if use_gpu:
criterion = nn.CrossEntropyLoss().cuda()
else:
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.fc.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
print(".....exp_lr_scheduler.......")
model_ft = train_model(model_ft, model_ft.fc, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
I am getting this error:
THCudaCheck FAIL file=/pytorch/torch/lib/THC/generic/THCStorage.cu line=66 error=2 : out of memory
Traceback (most recent call last):
File "TL.py", line 247, in <module>
num_epochs=25)
File "TL.py", line 185, in train_model
loss.backward()
File "/home/venvs/pytorch/local/lib/python2.7/site-packages/torch/autograd/variable.py", line 156, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
File "/home/venvs/pytorch/local/lib/python2.7/site-packages/torch/autograd/__init__.py", line 98, in backward
variables, grad_variables, retain_graph)
RuntimeError: cuda runtime error (2) : out of memory at /pytorch/torch/lib/THC/generic/THCStorage.cu:66
What is it that I am missing here?