Hello,
I’m trying to return a batch of images and numerical data from a dataloader. The images seem to be coming out correct. But it’s returning the batches of the whole data set.
class Inspection_Dataset(Dataset):
"""
df: Dataframe containing all categorical, numerical and image columns
numerical columns: list of numerical columns
cat_columns: list of categorical columns
image: column containing image file name
root_dir: column containing root directory
"""
def __init__(self, df, numerical_columns, cat_columns, image,
root_dir, label, transform = None):
self.df = df
self.numerical_columns = numerical_columns
self.cat_columns = cat_columns
self.image_column = image
self.root_dir = root_dir
self.label = label
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
idx = list(self.df.index)
#image
image = Image.open(os.path.join(self.df.loc[idx, self.root_dir].values[0],
self.df.loc[idx, self.image_column].values[0]))
image = self.transform(image)
#numerical columns
numericals = np.asarray(self.df.loc[idx, self.numerical_columns].values)
return image, numericals
train_data = Inspection_Dataset(train_sample,
numerical_columns = numerical_columns,
cat_columns = non_loca_cat_columns,
image = 'file',
root_dir = 'root',
label = 'target',
transform = train_transform)
train_loader = DataLoader(train_data, batch_size = 10, shuffle = True)
for image, numericals in train_loader:
break
print(image.size(), numericals.size())
From that final print statement, I get:
torch.Size([10,3,224,224]) torch.Size([10, 8345, 6])
I would think it should be:
torch.Size([10,3,224,224]) torch.Size([10, 6])
or
torch.Size([10,3,224,224]) torch.Size([10, 1, 6])
Does anyone see what I’m doing wrong?