DataLoader with num_workers>0: either RuntimeError or script starts from beginning

Hello,

I am trying to train a CNN on a GPU, with the DataLoader having num_workers>0.

I encountered the RuntimeError: An attempt has been made to start a new process before the current process has finished its bootstrapping phase.

According to the Windows FAQ (Windows FAQ — PyTorch 2.0 documentation), I need an if clause

if __name__ == '__main__':

But with this, my code starts again from the beginning. i.e. it starts with importing libraries again.

Here is a simplified version of my code, that still shows the same issue:

import torch
from torch import nn
import torchvision.transforms as transforms
from torchvision.datasets import DatasetFolder
from torch.utils.data import random_split, DataLoader, Subset
import matplotlib.pyplot as plt
import time
import os
from PIL import Image

print('finished importing')

torch.manual_seed(1)
generator1 = torch.Generator().manual_seed(1)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 

os.chdir(os.path.dirname(__file__)) 

##### hyperparameters #####

n_epochs = 100
learn_rate = 5e-6
n_classes = 4
conv_kernel_size = 3
channels_by_layer = [8,16,32]
data_filename = 'small_train_4class'
data_path = data_filename+'/'
n_workers = 4

##### defining dataloader and classes #####

class hgload(DatasetFolder): 
#the data consists of image files, with the folder names being the class names

    def __init__(self, root_path, transform=None):
        
        self.data_dir = root_path
        self.transform = transform
        self.classes, self.class_to_ind = self.find_classes(self.data_dir) 
        img_and_labels = self.make_dataset(self.data_dir, self.class_to_ind, extensions='.png') 
        split_img_and_labels = [list(i) for i in zip(*img_and_labels)] 

        self.images = split_img_and_labels[0]
        self.labels = torch.tensor(split_img_and_labels[1]).long()
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        img = Image.open(self.images[index]) 
        if self.transform:
            img = self.transform(img)
            img = img.float() 
        label = self.labels[index]
        return img,label

transform = transforms.ToTensor() 
dataset_transform = hgload(root_path=data_path, transform=transform)

raw_dl = DataLoader(dataset_transform, batch_size=32, num_workers=n_workers)

if __name__ == '__main__':
    for i,x in enumerate(raw_dl):
        print(i)
        print(x[0].shape)
        print(x[1].shape)
    
        if i>=2:
            break 

The above code gives me

finished importing
finished importing
finished importing
finished importing
finished importing
0
torch.Size([32, 1, 220, 220])
torch.Size([32])
1
torch.Size([32, 1, 220, 220])
torch.Size([32])
2
torch.Size([32, 1, 220, 220])
torch.Size([32])

I’d like to ask, what is the correct way of using multiple workers in DataLoader?

Any help would be greatly appreciated.

You are still executing operations in the global scope. so create a main function and move the code there. The if __name__=='__main__' guard will then call into main().

1 Like

Thanks for your reply.

Could you please kindly give an example?

I have tried

def main():
    for i,x in enumerate(raw_dl):
        print(i)
        print(x[0].shape)
        print(x[1].shape)
    
        if i>=2:
            break

if __name__ == '__main__':
    main()

and this

def main():
    raw_dl = DataLoader(dataset_transform, batch_size=32, num_workers=n_workers) 
    
    for i,x in enumerate(raw_dl):
        print(i)
        print(x[0].shape)
        print(x[1].shape)
    
        if i>=2:
            break

if __name__ == '__main__':
    main()

As well as put everything (except the imports and the class) under def main(), but I still get the same issue.