When I ran the following code on my ‘testing’ folder, it works fine. However, when I started running on my ‘training’ folder the python program crashed. The ‘testing’ folder has about 60K images inside and the ‘training’ folder has about 1 million images inside. Both folders’ structure is the following
‘testing’
‘1’:
image1.jpg
image2.jpg
…
‘0’:
image1.jpg
image2.jpg
…
When I ran on the test folders, I got what I expected
image_datasets is set up
dataloaders is set up
takes 6.8155
0
torch.Size([32,3,299,299])
takes:20.034827
50
torch.Size([32,3,299,299])
....
However, when I ran on the training folder, i.e., replacing ‘testing’ with ‘training’ in the following code. I could not even get the ‘image_dataset is set up’ message and the python got stuck. I think the python crashed as the task manger showed cpu and memory usage as 0.
Here is the code
from __future__ import print_function, division
import torch.utils.data
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import torch.nn.functional as F
import time
import os
import pickle as pickle
import copy
from torchvision.datasets import DatasetFolder
plt.ion()
class MyDatasetFolder(DatasetFolder):
def __getitem__(self, index):
path, target = self.samples[index]
#print(path, target)
try:
sample = self.loader(path)
#print('sample is {}'.format(sample))
try:
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
except Exception as err:
print('{} can not be transformed'.format(path))
print('error is {}'.format(err))
return None
return sample, target
except:
#print('{} can not be loaded'.format(path))
return None
def myloader(path):
from PIL import Image, ImageFile
#ImageFile.LOAD_TRUNCATED_IMAGES = True
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def my_collate_fn(data):
data = list(filter(lambda x:x is not None, data))
#print('data is {}'.format(data[0]))
return torch.utils.data.dataloader.default_collate(data)
def main():
model = models.inception_v3(pretrained = True)
data_transforms = {
'testing': transforms.Compose([
transforms.Resize(300),
transforms.CenterCrop(299),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
}
data_dir = os.path.join('.','data','images')
batch_size = 32
img_ext = ['.jpg', '.jpeg', '.JPEG', '.JPG','.png','.ppm','.bmp','.pgm','.tif','.gif', '.eps', '.icns', '.asp', '.svg', '.ico', '.im', '.msp', '.pcx', '.sgi','.spider', '.tiff', '.webp','.xbm', '.octet-stream']
image_datasets = {x: MyDatasetFolder(os.path.join(data_dir, x), myloader, img_ext,
data_transforms[x])
for x in ['testing']}
print('image_datasets is set up')
dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size,
shuffle=True, num_workers=4, collate_fn = my_collate_fn)
for x in ['testing']}
print('dataloaders is set up')
index = 0
last_time = time.time()
for inputs, labels in dataloaders['testing']:
if index % 50 == 0:
print('takes:{}'.format(time.time() - last_time))
last_time = time.time()
print(index)
print(inputs.size())
index += 1
if __name__ == "__main__":
main()