How do I get data, transforms(data) from the dataloader at the same time?

TanmDL · April 30, 2020, 3:25am

class MyDataset(torch.utils.data.Dataset):

def __init__(self,dataset =  None, transform= None):
    self.MNIST = dataset
    #print(self.MNIST)
    self.transform = transform

def __getitem__(self, index):
    data, target = self.MNIST[index]
    #print(img.shape)
    if self.transform is not None:
        #print(data.size())
        #tr_data = TF.to_pil_image(data)
        #print(tr_data.size())
        #tr_data = TF.hflip(tr_data)   #### 10 degree rotations
        #print(tr_data.size())
        #tr_data = TF.to_tensor(tr_data)
        
        tr_data = self.transform(data)
       
        
    return data, tr_data,  target, index

def __len__(self):
    return len(self.MNIST)

train_dataset = datasets.MNIST(root=’./data’, train=True, download=True,
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))]))

trin_transform=transforms.Compose([transforms.RandomHorizontalFlip(p=1),
transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])

trainset = MyDataset(dataset=train_dataset, transform = trin_transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=24, shuffle=False, num_workers=2)

The error is coming like “TypeError: img should be PIL Image. Got <class ‘torch.Tensor’>”

TanmDL · April 30, 2020, 3:58am

@ptrblck please check it.

ptrblck · April 30, 2020, 4:20am

While line of code is raising this error and how did you create self.MNIST as well as self.transform?

TanmDL · April 30, 2020, 4:46am

How do I share code?

ptrblck · April 30, 2020, 4:47am

You can wrap it into three backticks ``` and then post it here.
Also, there is a “Preformatted text” button in the reply field.

TanmDL · April 30, 2020, 4:48am

thank you . I am trying the sharing the code. It woule be better to figure it out where I am making the erros.

TanmDL · April 30, 2020, 4:59am

import os
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
import numpy as np
import matplotlib.pyplot as plt


class MyDataset(torch.utils.data.Dataset):

      def __init__(self,dataset =  None, transform= None):
        self.MNIST = dataset
        self.transform = transform

      def __getitem__(self, index):

        data, target = self.MNIST[index]
       

        if self.transform is not None:
           

           tr_data = self.transform(data)
           
            
        return data, tr_data,  target, index
    
      def __len__(self):
          
        return len(self.MNIST)
    
    
    
train_dataset = datasets.MNIST(root='./data', train=True, download=True,
                               transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))]))

train_transform=transforms.Compose([transforms.ToPILImage(),transforms.RandomHorizontalFlip(p=1),
                              transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
    
trainset = MyDataset(dataset=train_dataset, transform = train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=24, shuffle=False, num_workers=2)

data, tr_data, target, index= iter(trainloader).next()


def imshow(inp, title=None):

     inp = inp.numpy().transpose((1, 2, 0))
     mean = np.array([0.5])
    std = np.array([0.5])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    
    
out = torchvision.utils.make_grid(tr_data)
imshow(out, title=[target[x] for x in target])

Now directly applying the same transformations on datasets.MNIST and checking the error between my custom datasets transformation and inbuilt mnist datasets (pytorch) . I am getting error value of tensor(0.9961)(for 1st sample) while both the dataloaders shuffle flags is false

new_dataset = datasets.MNIST(root='./data', train=True, download=True,
                       transform=transforms.Compose([transforms.RandomHorizontalFlip(p=1),transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))]))


loader = torch.utils.data.DataLoader(new_dataset, batch_size=24, shuffle=False, num_workers=2)

data_new, target_new = iter(loader).next()

new_out = torchvision.utils.make_grid(data_new)

imshow(new_out, title=[target_new[x] for x in target_new])

print(torch.max(data_new[0]-tr_data[0]))

TanmDL · April 30, 2020, 5:07am

I think the error is coming due to the these two functions
transforms.ToPILImage() and transforms.ToTensor()

ptrblck · April 30, 2020, 5:20am

Thanks for the code.
I’ve added the proper formatting to copy-paste the code and cannot reproduce the issue, as your code runs fine on my machine.

TanmDL · April 30, 2020, 5:23am

Thank you.
check the error on the last line code
print(torch.max(data_new[0]-tr_data[0])) while applying same transformations both datasets class(MyDataset and datasets.MNIST). It should come to zero value but I am getting tensor(0.9961)

ptrblck · April 30, 2020, 5:25am

Ah OK, I missed this and thought the code should reproduce the initial error.
The mismatch will most likely be created as you are transforming the data twice in your custom approach.
You are passing the transformations the first time when creating train_dataset and then another transformation to MyDataset, which will be applied again.

TanmDL · April 30, 2020, 5:26am

Thank you for the reply. But How could I solve the problem?

ptrblck · April 30, 2020, 5:27am

Apply the transformation only once.
I’m not sure, what your custom dataset is supposed to do besides applying another transformation, which will create this mismatch.

TanmDL · April 30, 2020, 5:30am

Okay I am trying and let you know. I think I need to pass two tranormations: first transformation (to Tensor and Normalisation for data only ) and last transformation (data augmentation) as it is.

TanmDL · April 30, 2020, 5:32am

do u know any better approach to tackle this issue?

TanmDL · April 30, 2020, 5:39am

Thank you. The error was coming due to the seond time normalisation. Thank you