hi,
I’m using data loader with dataset that I coded. here the dataset:
Dataset preparation
class dfDataset(Dataset):
def __init__(self, df, label_list, first_words=10000):
self.cur_df = df
self.label_list = label_list
self.first_words= first_words
def __len__(self):
return len(self.label_list)
def __getitem__(self, idx):
tmp = [i+1 for i in self.cur_df.iloc[idx, 1][:self.first_words]]
tmp =np.append( tmp,[0]*(self.first_n_byte-len(tmp)))
ret = dict()
ret["words"] = np.array(words)
ret["sig"] = np.array(np.append([i+1 for i in self.cur_df.iloc[idx, 0]], [0]*(sig_limit-len(self.cur_df.iloc[idx, 0]))))
ret["feature"] = np.array(self.cur_df.iloc[idx, 2])
ret["sig_len"] = np.array(len(self.cur_df.iloc[idx, 0]))
return ret, np.array([self.label_list[idx]])
I’ve got this error at some batch :
RuntimeError Traceback (most recent call last)
in ()
5 train_Loss.append(0)
----> 6 for step,batch_data in enumerate(dataloader):
7 i+=1
8 start = time.time()
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in next(self)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
–> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
66 elif isinstance(batch[0], container_abcs.Sequence):
67 transposed = zip(*batch)
—> 68 return [default_collate(samples) for samples in transposed]
69
70 raise TypeError((error_msg_fmt.format(type(batch[0]))))
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in (.0)
66 elif isinstance(batch[0], container_abcs.Sequence):
67 transposed = zip(*batch)
—> 68 return [default_collate(samples) for samples in transposed]
69
70 raise TypeError((error_msg_fmt.format(type(batch[0]))))
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
61 return batch
62 elif isinstance(batch[0], container_abcs.Mapping):
—> 63 return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
64 elif isinstance(batch[0], tuple) and hasattr(batch[0], ‘_fields’): # namedtuple
65 return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in (.0)
61 return batch
62 elif isinstance(batch[0], container_abcs.Mapping):
—> 63 return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
64 elif isinstance(batch[0], tuple) and hasattr(batch[0], ‘_fields’): # namedtuple
65 return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
50 raise TypeError(error_msg_fmt.format(elem.dtype))
51
—> 52 return default_collate([torch.from_numpy(b) for b in batch])
53 if elem.shape == (): # scalars
54 py_type = float if elem.dtype.name.startswith(‘float’) else int
/usr/local/lib/python3.6/dist-packages/torch/utils/data/utils/collate.py in default_collate(batch)
41 storage = batch[0].storage().new_shared(numel)
42 out = batch[0].new(storage)
—> 43 return torch.stack(batch, 0, out=out)
44 elif elem_type.module == ‘numpy’ and elem_type.name != 'str’
45 and elem_type.name != 'string’:
RuntimeError: Expected object of scalar type Double but got scalar type Long for sequence element 1 in sequence argument at position #1 ‘tensors’
I tracked down the specific batch that cause this error, but it looks o.k… any help?