import torch
from torchvision import datasets
class ImageFolderWithPaths(datasets.ImageFolder):
"""Custom dataset that includes image file paths. Extends
torchvision.datasets.ImageFolder
"""
# override the __getitem__ method. this is the method that dataloader calls
def __getitem__(self, index):
# this is what ImageFolder normally returns
original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
# the image file path
path = self.imgs[index][0]
# make a new tuple that includes original and the path
tuple_with_path = (original_tuple + (path,))
return tuple_with_path
# EXAMPLE USAGE:
# instantiate the dataset and dataloader
data_dir = '/Users/Documents/images'
dataset = ImageFolderWithPaths(data_dir) # our custom dataset
dataloader = torch.utils.DataLoader(dataset)
# iterate over data
for inputs, labels, paths in dataloader:
# use the above variables freely
print(inputs, labels, paths)
Could you check the root path again? /Users/Documents/Images might miss the actual root folder or are you sure your system contains a /Users folder in the system root folder?
Sorry, what do you mean by the root here. I am using a mac and i have done pwd in the folder where the images are saved. Do i need to have the script in the same place as the images. The script is in the folder above?
No, the script doesn’t need to be in the same folder.
Note that the passed folder should contain subfolders with the images. The images should not be in the passed root folder. Did you make sure that’s the case?
Traceback (most recent call last):
File "file_location2.py", line 30, in <module>
for inputs, labels, paths in dataloader:
File "/Users/nubstech/opt/anaconda3/envs/Cells_Counting/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 345, in __next__
data = self._next_data()
File "/Users/nubstech/opt/anaconda3/envs/Cells_Counting/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 385, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/Users/nubstech/opt/anaconda3/envs/Cells_Counting/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
return self.collate_fn(data)
File "/Users/nubstech/opt/anaconda3/envs/Cells_Counting/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in default_collate
return [default_collate(samples) for samples in transposed]
File "/Users/nubstech/opt/anaconda3/envs/Cells_Counting/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "/Users/nubstech/opt/anaconda3/envs/Cells_Counting/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 81, in default_collate
raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>
In your transform did you add a transform.toTensor() at the end of the composition?
Essentially you must have forgotten to transform from PIL image to Tensor
In the traditional ImageFolder class you would do something like -
data = dataloader(
Imagefolder( path_To_images, transforms.Compose(['YOUR TRANSFORMATIONS'])))
Essentially it loads the data using the PIL library performs some processing on the loaded image using the PIL library which you specify and then you add a ToTensor() in the compose where it takes the processed image makes it ready to be taken as input for your network, returns the tensor and the class… Dataloader prepares the batches.
So in your pipeline wherever you are loading your images using PIL, just convert them to tensors(or any other data type mentioned in the error) and it should work.
Many Thanks for your help. I have integrated the code into my training code now. However, when i try to save the image path into the CSV file I get an error message
Traceback (most recent call last):
File "train_filename.py", line 130, in <module>
fd.write( ','.join(map(str, path.detach().tolist())) + '\n')
NameError: name 'path' is not defined
Part code for train_filename.py
class ImageFolderWithPaths(datasets.ImageFolder):
"""Custom dataset that includes image file paths. Extends
torchvision.datasets.ImageFolder
"""
# override the __getitem__ method. this is the method that dataloader calls
def __getitem__(self, index):
# this is what ImageFolder normally returns
original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
# the image file path
path = self.imgs[index][0]
# make a new tuple that includes original and the path
tuple_with_path = (original_tuple + (path,))
return tuple_with_path
# main training loop
global_step = 0
best_test_error = 10000
for epoch in range(50):
print("Epoch %d" % epoch)
model.train()
for images, paths in tqdm(loader_train):
images = images.to(device)
targets = torch.tensor([metadata['count'][os.path.split(path)[-1]] for path in paths]) # B
targets = targets.float().to(device)
output = model(images) # B x 1 x 9 x 9 (analogous to a heatmap)
preds = output.sum(dim=[1,2,3]) # predicted cell counts (vector of length B)
print(preds)
with open('preds_base_model.csv','a') as fd:
fd.write( ','.join(map(str, preds.detach().tolist())) + '\n')
fd.write( ','.join(map(str, targets.detach().tolist())) + '\n')
fd.write( ','.join(map(str, path.detach().tolist())) + '\n')
path over here is supposed be a string right? Also you must have defined path somewhere in your code or passed to your training function. Not declaring path seems to be the problem here
class ImageFolderWithPaths(datasets.ImageFolder):
"""Custom dataset that includes image file paths. Extends
torchvision.datasets.ImageFolder
"""
# override the __getitem__ method. this is the method that dataloader calls
def __getitem__(self, index):
# this is what ImageFolder normally returns
original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
# the image file path
path = self.imgs[index][0]
# make a new tuple that includes original and the path
tuple_with_path = (original_tuple + (path,))
return tuple_with_path
# main training loop
global_step = 0
best_test_error = 10000
for epoch in range(50):
print("Epoch %d" % epoch)
model.train()
for images, paths in tqdm(loader_train):
images = images.to(device)
targets = torch.tensor([metadata['count'][os.path.split(path)[-1]] for path in paths]) # B
targets = targets.float().to(device)
output = model(images) # B x 1 x 9 x 9 (analogous to a heatmap)
preds = output.sum(dim=[1,2,3]) # predicted cell counts (vector of length B)
print(preds)
with open('preds_base_model.csv','a') as fd:
fd.write( ','.join(map(str, preds.detach().tolist())) + '\n')
fd.write( ','.join(map(str, targets.detach().tolist())) + '\n')
fd.write( ','.join(map(str, path.detach().tolist())) + '\n')