Hello everyone,
I’ll try to make it simple with some code. I have some images, each image with a variable number of features associated with it. I’ve reduced the dimensionalities to simplify the example.
class test_dataset(Dataset):
def __init__(self):
self.items = torch.rand(2,3,3)
def __getitem__(self, i):
item_i = self.items[i,:,:]
if i==0:
feature_i = [{'a': 5, 'b': 6}, {'a': 7, 'b': 8}]
else:
feature_i = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'a': 5, 'b': 6}]
dict_item = {
'image': item_i,
'features': feature_i
}
return dict_item
def __len__(self):
return self.items.shape[0]
my_dataset = test_dataset()
my_dataloader = DataLoader(my_dataset, batch_size=2, shuffle=False, num_workers=2)
Image 0 is associated to 2 features (both for sub-feaures ‘a’ and ‘b’) and image 1 with 3 features (both for sub-feaures ‘a’ and ‘b’)
When running this:
for it, data_dict in enumerate(my_dataloader):
# extract data fields
items, features = data_dict['image'], data_dict['features']
print('Item 0: ', items[0])
print('Features of item 0: ', features[0])
print('\n')
print('Item 1: ', items[1])
print('Features of item 1: ', features[1])
I get:
Item 0: tensor([[0.7595, 0.5402, 0.8044],
[0.5137, 0.4920, 0.4653],
[0.3604, 0.8673, 0.3601]])
Features of item 0: {'a': tensor([5, 1]), 'b': tensor([6, 2])}
Item 1: tensor([[0.3523, 0.6935, 0.8192],
[0.4869, 0.4361, 0.8284],
[0.6358, 0.2016, 0.8535]])
Features of item 1: {'a': tensor([7, 3]), 'b': tensor([8, 4])}
However I would expect:
Item 0: tensor([[0.7595, 0.5402, 0.8044],
[0.5137, 0.4920, 0.4653],
[0.3604, 0.8673, 0.3601]])
Features of item 0: [{'a': 5, 'b': 6}, {'a': 7, 'b': 8}]
Item 1: tensor([[0.3523, 0.6935, 0.8192],
[0.4869, 0.4361, 0.8284],
[0.6358, 0.2016, 0.8535]])
Features of item 1: [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'a': 5, 'b': 6}]
How to solve it? I’ve tried using tuples instead of lists but nothing changes.
Thank you.