Error when iterating dataloader my own dataset

Ben89 · December 2, 2018, 12:55pm

I have written a simple pythorc class to read images and generate Patches from them to obtain my own dataset . I’m using pythorch Dataloader but when I try to iterate trough the dataset it gives me an error:

    train()
    for i, data in enumerate(train_loader, 0):
    return _DataLoaderIter(self)
    self._put_indices()
    indices = next(self.sample_iter, None)
 in __iter__
    for idx in self.sampler:
 in __iter__
    return iter(range(len(self.data_source)))
 in __len__
    raise NotImplementedError
NotImplementedError

this is my code

def read_image(image_path, **kwargs):
    # cv2 requires int images or float between -1 and 1.
    image, metadata = read_dcm(image_path, dtype=np.int32, **kwargs)
    return image, metadata


def get_ROI(listP, views, patch_size, **kwargs):
   
    num_total = num_pos + num_neg

    print('num_pos', num_pos)
    print('num_neg', num_neg)
    print('num_total', num_total)

    rois = roisP + roisN
    grts = grtsP + grtsN

    """shuffle your dataset"""


    c = list(zip(rois, grts))
    random.shuffle(c)
    rois, grts = zip(*c)

    print('rois shape', np.shape(rois))
    print('grts shape', np.shape(grts))

    #
    # max_w = max(width)
    # max_l = max(length)
    # print('max_w',max_w)
    # print('max_l', max_l)

    return rois,grts,num_total,num_pos,num_neg


class UNetMammoMC(Dataset):
    """UNetMammoMC dataset."""

    def __init__(self, type_dataset, output_size, transform=None, compute_weights=True, **kwargs):
        """

        Parameters

        type_dataset: 'train val or test'
        output_size: patch size

        ==========

        """

        assert type_dataset in {'train', 'val', 'test'}, 'Choose one of `train`, `val` or `test`.'
        self.rois = []
        self.grts=[]
        self.total=0
        self.pos=0
        self.neg=0



        self.roi_w, self.roi_h=output_size,output_size

        self.annotations = cfg.ANNOTATIONS_MC_ROOT
        print(self.annotations)


        self.lists = cfg.LISTS_MC[type_dataset.upper()]
        print(self.lists)
        print(cfg.LISTS_MC.ROOT)
        self.pos_list = read_list(os.path.join(cfg.LISTS_MC.ROOT, self.lists))
        """Neg list has to be inserted"""


        print('self.pos_list', self.pos_list)  # image list

        self.classes = ['normal', 'has_lesion']

        self.idx_to_class = {0: 'normal', 1: 'has_lesion'}

        self.n_classes = len(self.classes)

        self.view = ['cl', 'cr', 'ml', 'mr']

        self.rois,self.grts,self.total, self.pos, self.neg=get_ROI(self.pos_list, self.view, output_size)

        print('Init: num total',self.total)

        self.transform=transform

        def __len__(self):

            return self.total

        def __getitem__(self, idx):

            if idx < self.total:
                sample = self.rois[idx]
                grt = self.grts[idx]
                # sample = torch.from_numpy(sample)
                # grt = torch.from_numpy(grt)

            return sample,grt


output_shape = 50



def load_dataset(is_train,output_shape):
    return UNetMammoMC(
        'train', output_shape, num_samples=10 if cfg.UNET.LIMIT_DATASET else False,transform=transforms.ToTensor())


def train(experiment_name='unet', resume=True):
    train_dataset=load_dataset(True,output_shape)

    width = train_dataset.roi_w
    height = train_dataset.roi_h
    num_cls = train_dataset.n_classes

    train_loader = DataLoader(train_dataset,#batch_size=cfg.UNET.BATCH_SIZE,
                              batch_size=2,
                              num_workers=cfg.NUM_WORKERS)
    for i, data in enumerate(train_loader, 0):
        #inputs, grts = data
        print('data')


if __name__ == '__main__':

    train()

ptrblck · December 2, 2018, 12:57pm

Could you check in your Dataset if and how

def __len__(self):

was implemented?
Also, check for typos in the function name or indentation errors.

Ben89 · December 2, 2018, 12:58pm

I have just attached the code

ptrblck · December 2, 2018, 12:59pm

Ah yeah, I see. Thanks!
You can add code snippets using three backticks `.
Let me format it for you and then we’ll have a look.

EDIT: It looks like both __getitem__ and __len__ are members of __init__.
Could you reduce one indentation level so that all three methods are on par?

Ben89 · December 2, 2018, 1:05pm

Oh you are right, thanks. Now it works but it gives me another error
File “”, line 1, in
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\spawn.py”, line 105, in spawn_main
exitcode = _main(fd)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\spawn.py”, line 114, in _main
prepare(preparation_data)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\spawn.py”, line 225, in prepare
_fixup_main_from_path(data[‘init_main_from_path’])
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\spawn.py”, line 277, in _fixup_main_from_path
run_name=“mp_main”)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\runpy.py”, line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\runpy.py”, line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\runpy.py”, line 85, in run_code
exec(code, run_globals)
File “C:\Users\bened\Desktop\UNIVERSITA’\dottorato_ricerca_Nijmegen\breast-unet\lib\common\data\MCs_mammo\data_loader4.py”, line 15, in
from torch.utils.data import Dataset
File "C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch_init.py", line 257, in
from .functional import *
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\functional.py”, line 2, in
Traceback (most recent call last):
File “C:/Users/bened/Desktop/UNIVERSITA’/dottorato_ricerca_Nijmegen/breast-unet/lib/common/data/MCs_mammo/data_loader4.py”, line 509, in
train()
File “C:/Users/bened/Desktop/UNIVERSITA’/dottorato_ricerca_Nijmegen/breast-unet/lib/common/data/MCs_mammo/data_loader4.py”, line 502, in train
for i, data in enumerate(train_loader, 0):
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\utils\data\dataloader.py”, line 501, in iter
return _DataLoaderIter(self)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\utils\data\dataloader.py”, line 289, in init
w.start()
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\process.py”, line 105, in start
self._popen = self._Popen(self)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\context.py”, line 223, in _Popen
return _default_context.get_context().Process.Popen(process_obj)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\context.py”, line 322, in Popen
return Popen(process_obj)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\popen_spawn_win32.py”, line 65, in init
reduction.dump(process_obj, to_child)
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\multiprocessing\reduction.py”, line 60, in dump
ForkingPickler(file, protocol).dump(obj)
BrokenPipeError: [Errno 32] Broken pipe
import torch.nn.functional as F
File "C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\nn_init.py", line 1, in
from .modules import *
File "C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\nn\modules_init.py", line 1, in
from .module import Module
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\nn\modules\module.py”, line 6, in
from …backends.thnn import backend as thnn_backend
File “C:\Users\bened\Miniconda3\envs\breast-manet-master2\lib\site-packages\torch\nn\backends\thnn.py”, line 41, in
_initialize_backend()
, line 23, in _initialize_backend
from …_functions.rnn import RNN,
", line 4, in
from … import functional as F
File “”, line 971, in _find_and_load
File “”, line 955, in _find_and_load_unlocked
File “”, line 665, in _load_unlocked
File “”, line 674, in exec_module
File “”, line 771, in get_code
File “”, line 482, in _validate_bytecode_header
MemoryError

ptrblck · December 2, 2018, 1:14pm

It looks like you got a BrokenPipeError: [Errno 32] Broken pipe.
Could you reduce the number of workers in your DataLoader to zero and see if your code still breaks?

Also, are you using the if-clause protection? Since you are using Windows, multiprocessing should be guarded with:

import torch

def main()
    for i, data in enumerate(dataloader):
        # do something here

if __name__ == '__main__':
    main()

Have a look at the Windows FAQ for more info.

Ben89 · December 2, 2018, 1:18pm

I’ve tried by using 0 num_workers and it works nw. Thank you very much for your help :-9 !

ptrblck · December 2, 2018, 2:21pm

Good to hear it’s working now.
However, I would assume you’ll get a proper error running your code with num_workers=0.
This should only be for debugging purposes as you are now using the main thread to load the data which might slow down your training.

Did you try to add the if-clause protection and use multiple workers?

Ben89 · December 2, 2018, 10:14pm

Yes, I have added the if-clause protection and it now works also with num_workers >0

Now I have just added a one line code to see the shape of the loaded data
but the output shape is <built-in method size of Tensor object at 0x0000013B77CE7900>
it seems that it is not loading the data properly . Is it?

def train(experiment_name='unet', resume=True):
    train_dataset=load_dataset(True,output_shape)

    width = train_dataset.roi_w
    height = train_dataset.roi_h
    num_cls = train_dataset.n_classes

    print('width',width)

    train_loader = DataLoader(train_dataset,#batch_size=cfg.UNET.BATCH_SIZE,
                              batch_size=2,
                              num_workers=2)
    for i, data in enumerate(train_loader, 0):
        inputs, grts = data

            #plt.figure()
            #plt.imshow(inputs, cmap=plt.cm.gray)
            #plt.show()
        print(inputs.size)
        print(grts.size)
if __name__ == '__main__':

    train() ```

ptrblck · December 2, 2018, 10:19pm

It is probably fine. tensor.size() is a method, so just add parentheses:

print(inputs.size())
print(grts.size())

xebaxt · November 25, 2021, 10:33am

thank you this solve my problem