I am working on a classification problem for fruits. I’m loading the dataset using pytorch dataloder but i don’t know how it picks up the samples from the dataset. length of the train set and test set (found using len(train_set) …) are reported as 836 and 208 respectively but length of the train loader and test loader is reported as 105 and 26 respectively. Why is it so. I’ve used the batch size of 8 so, the length of train loader and test loader should be 8. But, clearly, i’m wrong. Can anyone explain me or direct me to resource where this is already explained.
Below is the code that i’m using:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from skimage import io
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score
import numpy as np
class kinoo(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return(len(self.annotations))
def __getitem__(self, index):
image_path = os.path.join(self.root_dir, self.annotations.iloc[index,0])
image = io.imread(image_path)
y_label = torch.tensor(int(self.annotations.iloc[index,1]))
if self.transform:
image = image.copy()
image = self.transform(image)
return (image, y_label)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#hyperparameters
in_channel = 3
learning_rate = 1e-3
batch_size = 8
num_epochs = 50
#transforms
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
transforms.Resize((640,640))
])
#load data
dataset = kinoo(csv_file = '/content/drive/MyDrive/aza-exp/aza-fruit-regression-v2_csv/fruit_classificaiton.csv', root_dir = '/content/drive/MyDrive/aza-exp/aza-fruit-regression-v2', transform = transform)
train_set, test_set = torch.utils.data.random_split(dataset, [0.8, 0.2])
train_loader = DataLoader(dataset = train_set, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_set, batch_size = batch_size, shuffle = True)
Now, when i enquire the length, i get the following output:
len(train_set)
836
len(test_set)
208
len(test_loader)
26
len(train_loader)
105