I am trying to iterate through a dataloader, but I sometimes get a Runtime Error and don’t know how to solve it.
my dataset:
class MyDataset(Dataset):
def __init__(self, label_dir, image_dir, image_names, transform=None):
self.label_dir = label_dir
self.image_dir = image_dir
self.image_names = image_names
self.transform = transform
def __len__(self):
return sum(1 for line in open(self.image_names))
def __getitem__(self, idx):
img_name = ''
with open(self.image_names) as file:
for i, line in enumerate(file):
if (i == idx):
img_name = line[:-1]
image = io.imread(self.image_dir + img_name + '.jpg')
label = pd.read_csv(self.label_dir + img_name + '.csv').as_matrix()
sample = {'image': image, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
this is how I try to iterate:
dataset = MyDataset(label_dir='labels/',
image_dir='images/',
image_names = 'names.txt',
transform=transforms.Compose([ToTensor()]))
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
for i, data in enumerate(dataloader):
print(i, data['image'].size(), data['label'].size())
When I try to run the for-loop I get the following Error-message:
RuntimeError Traceback (most recent call last)
<ipython-input-246-2202a1a6f738> in <module>
----> 1 for i, data in enumerate(dataloader):
2 print(i, data['image'].size(), data['label'].size())
~\AppData\Local\conda\conda\envs\mt\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
--> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
~\AppData\Local\conda\conda\envs\mt\lib\site-packages\torch\utils\data\_utils\collate.py in default_collate(batch)
61 return batch
62 elif isinstance(batch[0], container_abcs.Mapping):
---> 63 return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
64 elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'): # namedtuple
65 return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
~\AppData\Local\conda\conda\envs\mt\lib\site-packages\torch\utils\data\_utils\collate.py in <dictcomp>(.0)
61 return batch
62 elif isinstance(batch[0], container_abcs.Mapping):
---> 63 return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
64 elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'): # namedtuple
65 return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
~\AppData\Local\conda\conda\envs\mt\lib\site-packages\torch\utils\data\_utils\collate.py in default_collate(batch)
41 storage = batch[0].storage()._new_shared(numel)
42 out = batch[0].new(storage)
---> 43 return torch.stack(batch, 0, out=out)
44 elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
45 and elem_type.__name__ != 'string_':
RuntimeError: Expected object of scalar type Double but got scalar type Long for sequence element 1 in sequence argument at position #1 'tensors'
The Error only appears if I use a Batch-size for my dataloader. I would be grateful for ideas to solve this problem.