How do you load BMP images by Implementing Dataloader class?

nikilpatel94 · July 21, 2023, 3:21pm

Below is my custom dataset implementation

class BatchMaker(Dataset):
    def __init__(self, img_dir,img_extension, transform=None): #Default Method
        self.paths = list(pathlib.Path(img_dir).glob(f"*{img_extension}"))
        self.transform = transform
    def __len__(self):  #Default Method
        return len(self.paths)
    def load_image(self, index:int):    #Custom Method
        img_path = self.paths[index]
        return Image.open(img_path)
    def __getitem__(self,index:int):    #Default Method
        img = self.load_image(index)
        if self.transform:
            return torch.stack((transforms.ToTensor()(img),self.transform(img),self.transform(img)),0) #Returns stack of images by adding one more dimension (c,h,w)->(img,c,h,w)
        else:
            return transforms.ToTensor()(img)

This works fine with PNG, JPEG images but BMP images are causing problems. Below is the error I am getting while fetching BMP images.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[57], line 2
----> 2 MeOH_batch = next(iter(MeOH_dataloader))
      3 do_experiment(MeOH_batch)

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:634, in _BaseDataLoaderIter.__next__(self)
    631 if self._sampler_iter is None:
    632     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    633     self._reset()  # type: ignore[call-arg]
--> 634 data = self._next_data()
    635 self._num_yielded += 1
    636 if self._dataset_kind == _DatasetKind.Iterable and \
    637         self._IterableDataset_len_called is not None and \
    638         self._num_yielded > self._IterableDataset_len_called:

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:678, in _SingleProcessDataLoaderIter._next_data(self)
    676 def _next_data(self):
    677     index = self._next_index()  # may raise StopIteration
--> 678     data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    679     if self._pin_memory:
    680         data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py:51, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
     49         data = self.dataset.__getitems__(possibly_batched_index)
     50     else:
---> 51         data = [self.dataset[idx] for idx in possibly_batched_index]
     52 else:
     53     data = self.dataset[possibly_batched_index]

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py:51, in <listcomp>(.0)
     49         data = self.dataset.__getitems__(possibly_batched_index)
     50     else:
---> 51         data = [self.dataset[idx] for idx in possibly_batched_index]
     52 else:
     53     data = self.dataset[possibly_batched_index]

Cell In[45], line 33, in BatchMaker.__getitem__(self, index)
     31 img = self.load_image(index)
     32 if self.transform:
---> 33     return torch.stack((transforms.ToTensor()(img),self.transform(img),self.transform(img)),0) #Returns stack of images by adding one more dimension (c,h,w)->(img,c,h,w)
     34 else:
     35     return transforms.ToTensor()(img)

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/transforms.py:95, in Compose.__call__(self, img)
     93 def __call__(self, img):
     94     for t in self.transforms:
---> 95         img = t(img)
     96     return img

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/transforms.py:547, in RandomApply.forward(self, img)
    545     return img
    546 for t in self.transforms:
--> 547     img = t(img)
    548 return img

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/transforms.py:95, in Compose.__call__(self, img)
     93 def __call__(self, img):
     94     for t in self.transforms:
---> 95         img = t(img)
     96     return img

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/transforms.py:547, in RandomApply.forward(self, img)
    545     return img
    546 for t in self.transforms:
--> 547     img = t(img)
    548 return img

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/transforms.py:1285, in ColorJitter.forward(self, img)
   1283     img = F.adjust_contrast(img, contrast_factor)
   1284 elif fn_id == 2 and saturation_factor is not None:
-> 1285     img = F.adjust_saturation(img, saturation_factor)
   1286 elif fn_id == 3 and hue_factor is not None:
   1287     img = F.adjust_hue(img, hue_factor)

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/functional.py:937, in adjust_saturation(img, saturation_factor)
    935     _log_api_usage_once(adjust_saturation)
    936 if not isinstance(img, torch.Tensor):
--> 937     return F_pil.adjust_saturation(img, saturation_factor)
    939 return F_t.adjust_saturation(img, saturation_factor)

File /opt/conda/lib/python3.10/site-packages/torchvision/transforms/_functional_pil.py:93, in adjust_saturation(img, saturation_factor)
     90     raise TypeError(f"img should be PIL Image. Got {type(img)}")
     92 enhancer = ImageEnhance.Color(img)
---> 93 img = enhancer.enhance(saturation_factor)
     94 return img

File /opt/conda/lib/python3.10/site-packages/PIL/ImageEnhance.py:36, in _Enhance.enhance(self, factor)
     25 def enhance(self, factor):
     26     """
     27     Returns an enhanced image.
     28 
   (...)
     34     :rtype: :py:class:`~PIL.Image.Image`
     35     """
---> 36     return Image.blend(self.degenerate, self.image, factor)

File /opt/conda/lib/python3.10/site-packages/PIL/Image.py:3340, in blend(im1, im2, alpha)
   3338 im1.load()
   3339 im2.load()
-> 3340 return im1._new(core.blend(im1.im, im2.im, alpha))

ValueError: **image has wrong mode**

nikilpatel94 · July 21, 2023, 5:13pm

The more I look at it, I realized that this could be a transformation issue:
Here is my transformation:

def contrastive_transformation(op_size,s=1):
    contrast_transforms = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5),
                                            transforms.RandomResizedCrop((op_size,op_size)),
                                            transforms.RandomApply([get_color_distortion(s)]),
                                            transforms.RandomGrayscale(p=0.2),
                                            transforms.GaussianBlur(kernel_size=5),
                                            transforms.ToTensor(),
                                            #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])                                              
                                            ])

ptrblck · July 21, 2023, 11:26pm

The error is raised by PIL as you can see in the stacktrace. Check which mode the BMP image uses and if this is supported by PIL. If not, you might need to transform it either to a supported mode in PIL or into a tensor.

nikilpatel94 · July 23, 2023, 12:15am

Found a way by reading bmp images into NumPy via CV2 and then that numpy is read as PIL and return to further PyTorch processing.
Improved implementation

class BatchMaker(Dataset):
    def __init__(self, img_dir,img_extension, transform=None): #Default Method
        self.paths = list(pathlib.Path(img_dir).glob(f"*{img_extension}"))
        self.transform = transform
    def __len__(self):  #Default Method
        return len(self.paths)
    def load_image(self, index:int):    #Custom Method
        img_path = self.paths[index]
        np_img = cv2.imread (str(img_path)) #CV2 to open and convert images into NUMPY
        return  Image.fromarray(np_img) #return PIL images to process with Pytorch
    def __getitem__(self,index:int):    #Default Method
        img = self.load_image(index)
        if self.transform:
            return torch.stack((transforms.ToTensor()(img),self.transform(img),self.transform(img)),0) #Returns stack of images by adding one more dimension (c,h,w)->(img,c,h,w)
        else:
            return transforms.ToTensor()(img)

Is there any more optimal way to achieve this for relatively larger dataset?
Thanks.

ptrblck · July 23, 2023, 2:16am

I don’t know if fromarray triggers a copy, but if so, you might try to either pass the numpy array with a supported dtype to the transformation or transform it manually to a tensor via torch.from_numpy (and normalize it manually if needed).