Runtime Error with DataLoader

Hi All,

I have a DataLoader that loads a line from a file with Numpy, then convert it to a torch Tensor, and whenever I run this with more than 1 workers, it gives me an error:

RuntimeError: DataLoader worker (pid 30141) exited unexpectedly with exit code 1.

However, whenever I run it with 0 workers, it would work.

Is there some sort of logs that shows why the worker exited unexpectedly?

def get_line(filename, index):
    with open(filename, "rb") as f:
        for count, line in enumerate(f):
            if count == index:
                return line
    
def count_line(filename):
    with open(filename, "rb") as f:
        size = 0
        for count, _ in enumerate(f):
            size = count
        return size

class LineDatasetDnn(Dataset):
    def __init__(self, features_filename, labels_filename):
        self.features_filename = features_filename
        self.labels_filename = labels_filename
        self.lines = count_line(self.features_filename)
    
    def __getitem__(self, idx):
        feature_line = get_line(self.features_filename, idx)
        feature = torch.from_numpy(np.fromstring(feature_line, dtype=float, sep=' ')).cuda().float() if torch.cuda.is_available() else torch.from_numpy(np.fromstring(feature_line, dtype=float, sep=' ')).float()
        
        label_line = get_line(self.labels_filename, idx)
        label = torch.Tensor(np.fromstring(label_line, dtype=float, sep=' ')) if torch.cuda.is_available() else torch.FloatTensor(np.fromstring(label_line, dtype=float, sep=' '))

        return feature, label
        
    def __len__(self):
        return self.lines

def get_concat_dataset_dnn(data_type):
    files = sorted(os.listdir(f"./ds_data/txt/dnn/{data_type}/input"))
    datasets = [LineDatasetDnn(f"./ds_data/txt/dnn/{data_type}/input/{file}",
                               f"./ds_data/txt/dnn/{data_type}/output/{file}")
                for file in files]
    return data.ConcatDataset(datasets)

for feature, label in data.DataLoader(get_concat_dataset_dnn("train"),
                                              batch_size=dnn_minibatch_size, shuffle=True, collate_fn=transform_variable_dnn,
                                              num_workers=2, pin_memory=pin_memory):
1 Like

Alright, I think I got it fixed.

Basically somehow inside Jupyter Notebooks the errors for the loaders aren’t appearing.

But If I download the notebooks as a .py file and execute it, then it will tell you what errors were happening.