I am trying to use the KAIST dataset for pedestrian detection. I have done the tutorials on the PyTorch website. I am new to machine learning/deep learning and have limited programming experience. I have designed a custom dataset class and dataloader based on the ’ DATA LOADING AND PROCESSING TUTORIAL’ on the PyTorch website. To attempt to get a better understanding, I am using a small amount of images and labels to feed into a neural network.
class dataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.csv = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.csv)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir, self.csv.iloc[idx, 0])
image = io.imread(img_name)
labels = self.csv.iloc[idx, 18]
labels = labels.astype('float').reshape(-1, 1)
sample = {'image': image, 'labels': labels}
if self.transform:
sample = self.transform(sample)
return sample
csv_file='train/images/annotations.csv', root_dir='train/images/'
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, labels = sample['image'], sample['labels']
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
return {'image': torch.from_numpy(image),
'labels': torch.from_numpy(labels)}
transformed_dataset = dataset(csv_file='train/images/annotations.csv',
root_dir='train/images/',
transform=transforms.Compose([ToTensor()]))
for i in range(len(transformed_dataset)):
sample = transformed_dataset[i]
print(i, sample['image'].size(), sample['labels'].size())
if i == 3:
break
However, for some reason the dataloader will not output the batch size.
dataloader = DataLoader(transformed_dataset, batch_size=64)
loader = DataLoader(dataloader, batch_size=64)
for image, labels in dataloader:
print(sample['image'].shape, sample['labels'].shape)
torch.Size([3, 512, 640]) torch.Size([1, 1])
torch.Size([3, 512, 640]) torch.Size([1, 1])
torch.Size([3, 512, 640]) torch.Size([1, 1])
torch.Size([3, 512, 640]) torch.Size([1, 1])
torch.Size([3, 512, 640]) torch.Size([1, 1])
torch.Size([3, 512, 640]) torch.Size([1, 1])
torch.Size([3, 512, 640]) torch.Size([1, 1])
Any advice would be greatly appreciated.