How do I get data, transforms(data) from the dataloader at the same time?

class MyDataset(torch.utils.data.Dataset):

def __init__(self,dataset =  None, transform= None):
    self.MNIST = dataset
    #print(self.MNIST)
    self.transform = transform

def __getitem__(self, index):
    data, target = self.MNIST[index]
    #print(img.shape)
    if self.transform is not None:
        #print(data.size())
        #tr_data = TF.to_pil_image(data)
        #print(tr_data.size())
        #tr_data = TF.hflip(tr_data)   #### 10 degree rotations
        #print(tr_data.size())
        #tr_data = TF.to_tensor(tr_data)
        
        tr_data = self.transform(data)
       
        
    return data, tr_data,  target, index

def __len__(self):
    return len(self.MNIST)

train_dataset = datasets.MNIST(root=’./data’, train=True, download=True,
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))]))

trin_transform=transforms.Compose([transforms.RandomHorizontalFlip(p=1),
transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])

trainset = MyDataset(dataset=train_dataset, transform = trin_transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=24, shuffle=False, num_workers=2)

The error is coming like “TypeError: img should be PIL Image. Got <class ‘torch.Tensor’>”

@ptrblck please check it.

While line of code is raising this error and how did you create self.MNIST as well as self.transform?

How do I share code?

You can wrap it into three backticks ``` and then post it here.
Also, there is a “Preformatted text” button in the reply field.

thank you . I am trying the sharing the code. It woule be better to figure it out where I am making the erros.

import os
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
import numpy as np
import matplotlib.pyplot as plt


class MyDataset(torch.utils.data.Dataset):

      def __init__(self,dataset =  None, transform= None):
        self.MNIST = dataset
        self.transform = transform

      def __getitem__(self, index):

        data, target = self.MNIST[index]
       

        if self.transform is not None:
           

           tr_data = self.transform(data)
           
            
        return data, tr_data,  target, index
    
      def __len__(self):
          
        return len(self.MNIST)
    
    
    
train_dataset = datasets.MNIST(root='./data', train=True, download=True,
                               transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))]))

train_transform=transforms.Compose([transforms.ToPILImage(),transforms.RandomHorizontalFlip(p=1),
                              transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
    
trainset = MyDataset(dataset=train_dataset, transform = train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=24, shuffle=False, num_workers=2)

data, tr_data, target, index= iter(trainloader).next()


def imshow(inp, title=None):

     inp = inp.numpy().transpose((1, 2, 0))
     mean = np.array([0.5])
    std = np.array([0.5])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    
    
out = torchvision.utils.make_grid(tr_data)
imshow(out, title=[target[x] for x in target])

Now directly applying the same transformations on datasets.MNIST and checking the error between my custom datasets transformation and inbuilt mnist datasets (pytorch) . I am getting error value of tensor(0.9961)(for 1st sample) while both the dataloaders shuffle flags is false

new_dataset = datasets.MNIST(root='./data', train=True, download=True,
                       transform=transforms.Compose([transforms.RandomHorizontalFlip(p=1),transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))]))


loader = torch.utils.data.DataLoader(new_dataset, batch_size=24, shuffle=False, num_workers=2)

data_new, target_new = iter(loader).next()

new_out = torchvision.utils.make_grid(data_new)

imshow(new_out, title=[target_new[x] for x in target_new])

print(torch.max(data_new[0]-tr_data[0]))

I think the error is coming due to the these two functions
transforms.ToPILImage() and transforms.ToTensor()

Thanks for the code.
I’ve added the proper formatting to copy-paste the code and cannot reproduce the issue, as your code runs fine on my machine.

Thank you.
check the error on the last line code
print(torch.max(data_new[0]-tr_data[0])) while applying same transformations both datasets class(MyDataset and datasets.MNIST). It should come to zero value but I am getting tensor(0.9961)

Ah OK, I missed this and thought the code should reproduce the initial error.
The mismatch will most likely be created as you are transforming the data twice in your custom approach.
You are passing the transformations the first time when creating train_dataset and then another transformation to MyDataset, which will be applied again.

Thank you for the reply. But How could I solve the problem?

Apply the transformation only once.
I’m not sure, what your custom dataset is supposed to do besides applying another transformation, which will create this mismatch.

Okay I am trying and let you know. I think I need to pass two tranormations: first transformation (to Tensor and Normalisation for data only ) and last transformation (data augmentation) as it is.

do u know any better approach to tackle this issue?

Thank you. The error was coming due to the seond time normalisation. :slight_smile: Thank you