Hi, I have written a dataloader class to load my data but when I check their size the numbers differ when specified a batch_size.
from torch.utils import data
class FlowerDataset(data.Dataset):
def __init__(self, root_dir, text_file, transform = None, target_transform = None):
self.root_dir = root_dir
self.text_file = text_file
self.name = pd.read_csv(self.root_dir + self.text_file).iloc[:, 0].apply(lambda x: '\\'.join(x.split('\\')[-2:]))
self.label = pd.read_csv(self.root_dir + text_file).iloc[:, 0].apply(lambda x: x.split('\\')[-2])
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.name)
def __getitem__(self, index):
img_name = os.path.join(self.root_dir + self.text_file.split('.')[0], self.name[index])
label = self.label[index]
img = cv2.imread(img_name)
if self.transform is not None:
img = self.transform(img)
label = torch.tensor(int(label))
return img, label
def __repr__(self):
return "{}({} {}) ".format(self.__class__.__name__,
self.root_dir,
self.transform)
a = FlowerDataset(root_path, '\\train.txt', transform = transforms.Compose([
transforms.ToPILImage(mode = 'RGB'),
transforms.Resize((224, 224)),
transforms.ToTensor()
]))
data = DataLoader(a, batch_size = 64, shuffle = True)
print(len(data) * 64, len(a))
>>> 6592 6551
data = DataLoader(a, batch_size = 1, shuffle = True)
print(len(data), len(a))
>>> 6551 6551
can anyone tell me what could be the problem?