Customized dataset crash the python in large training data

nimning · November 13, 2018, 2:01am

When I ran the following code on my ‘testing’ folder, it works fine. However, when I started running on my ‘training’ folder the python program crashed. The ‘testing’ folder has about 60K images inside and the ‘training’ folder has about 1 million images inside. Both folders’ structure is the following

‘testing’
‘1’:
image1.jpg
image2.jpg
…
‘0’:
image1.jpg
image2.jpg
…

When I ran on the test folders, I got what I expected

image_datasets is set up
dataloaders is set up
takes 6.8155
0
torch.Size([32,3,299,299])
takes:20.034827
50
torch.Size([32,3,299,299])
....

However, when I ran on the training folder, i.e., replacing ‘testing’ with ‘training’ in the following code. I could not even get the ‘image_dataset is set up’ message and the python got stuck. I think the python crashed as the task manger showed cpu and memory usage as 0.

Here is the code

from __future__ import print_function, division
import torch.utils.data
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import torch.nn.functional as F
import time
import os
import pickle as pickle
import copy
from torchvision.datasets import DatasetFolder 

plt.ion()

class MyDatasetFolder(DatasetFolder):
    def __getitem__(self, index):
        path, target = self.samples[index]
        #print(path, target)
        try:
            sample = self.loader(path)
            #print('sample is {}'.format(sample))
            try:
                if self.transform is not None:
                    sample = self.transform(sample)
                if self.target_transform is not None:
                    target = self.target_transform(target)
            except Exception as err:
                print('{} can not be transformed'.format(path))
                print('error is {}'.format(err))
                return None

            return sample, target
        except:
            #print('{} can not be loaded'.format(path))
            return None

def myloader(path):
    from PIL import Image, ImageFile
    #ImageFile.LOAD_TRUNCATED_IMAGES = True

    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

def my_collate_fn(data):
    data = list(filter(lambda x:x is not None, data))
    #print('data is {}'.format(data[0]))
    return torch.utils.data.dataloader.default_collate(data)

def main():
    model = models.inception_v3(pretrained = True) 
    
    data_transforms = {
        'testing': transforms.Compose([
            transforms.Resize(300),
            transforms.CenterCrop(299),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    data_dir = os.path.join('.','data','images')
    batch_size = 32
    img_ext = ['.jpg', '.jpeg', '.JPEG', '.JPG','.png','.ppm','.bmp','.pgm','.tif','.gif', '.eps', '.icns', '.asp', '.svg', '.ico', '.im', '.msp', '.pcx', '.sgi','.spider', '.tiff', '.webp','.xbm', '.octet-stream']
    image_datasets = {x: MyDatasetFolder(os.path.join(data_dir, x), myloader, img_ext,
                                              data_transforms[x])
                      for x in ['testing']}
    print('image_datasets is set up')

    dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size,
                                                 shuffle=True, num_workers=4, collate_fn = my_collate_fn) 
                   for x in ['testing']}
    
    print('dataloaders is set up')
    index = 0
    last_time = time.time()
    for inputs, labels in dataloaders['testing']:
        if index % 50 == 0:
            print('takes:{}'.format(time.time() - last_time))
            last_time = time.time()
            print(index)
            print(inputs.size())
        index += 1
if __name__ == "__main__":
    main()