Imbalanced mnist dataset with random rotation

This is the following demo code I have used. But I got the error “TypeError: img should be PIL Image. Got <class ‘torch.Tensor’>”. How could I solve the problem?

class MyDataset(torch.utils.data.Dataset):

   def __init__(self, transform = None):
    #dset = get_dataset(dataset_name)
    data_dir='' 
    dataset = datasets.MNIST(data_dir, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ]))    ##### dataset download from  torch.vision
    imbalanced_train_dataset = copy.deepcopy(dataset)
    imbalanced_train_dataset = copy.deepcopy(dataset)
     targets = imbalanced_train_dataset.targets
    classes, class_counts = np.unique(targets, return_counts=True)
    nb_classes = len(classes)
    imbal_class_counts = [1182,  391,  324,  344, 1000,  466,  935,  673,  272 , 369]
    class_indices = [np.where(targets == i)[0] for i in range(nb_classes)]
    imbal_class_indices = [class_idx[:class_count] for class_idx, class_count in zip(class_indices, imbal_class_counts)]
    imbal_class_indices = np.hstack(imbal_class_indices)
    self.target= imbalanced_train_dataset.targets[imbal_class_indices]
    self.data = imbalanced_train_dataset.data[imbal_class_indices]
    self.transform = transform

def __getitem__(self, index):
    self.data1 = self.data[index]
    self.target = self.target[index]
    if self.transform is not None:
        self.data2 = self.transform(self.data1)
        
    return self.data1, self.data2, self.target, index

def __len__(self):
    return len(self.data)

transform2=transforms.Compose([transforms.RandomRotation(30)])

new_dataset = MyDataset(transform = transform2)

dataloader = torch.utils.data.DataLoader(
new_dataset,
num_workers=1,
batch_size=60,
shuffle=True)

data1, data2, target, index = iter(dataloader).next()

Please format the code so it is more readable, use % ``` (without the %) at the beginning and end of the code.

This is an example how you could do your transforms:

my_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=5), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 
    ])

please check now. I have formatted the code

I’m a little confused over your code, what is it you want to do?

It seems that you are writing a custom dataset but using datasets.MNIST, wouldn’t it be simpler to use torchvision to load MNIST and then change certain classes that are imbalanced in the way you want?

my_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=5), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 
    ])

dataset = torchvision.datasets.MNIST('./data', train=True, download=True, transform=my_transforms)
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

And then perform calculations on imbalanced classes

Exactly what you have said. Then apply transformations on the imbalance data through the function “def getitem(self, index)” to get outputs like data1, transformation(data1), target and index. Somehow I have managed to solve the previous problem(transforms.randomrotation can not apply on tensor data) but now I am facing another issue. The output data1 is bytetensor which means dataloader may not apply on data1. I am sharing the code. Please check it.
class MyDataset(torch.utils.data.Dataset):
# new dataset class that allows to get the sample indices of mini-batch

def __init__(self, transform = None):
    #dset = get_dataset(dataset_name)
    data_dir='' 
    dataset = datasets.MNIST(data_dir, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ]))    ##### dataset download from  torch.vision
    imbalanced_train_dataset = copy.deepcopy(dataset)
    imbalanced_train_dataset = copy.deepcopy(dataset)
    #### dataloader converting into numpy array
   # Get all training targets and count the number of class instances
    targets = imbalanced_train_dataset.targets
    classes, class_counts = np.unique(targets, return_counts=True)
    nb_classes = len(classes)
    print(class_counts)
   #class_prob = np.random.rand(nb_classes)
   #class_prob = class_prob/class_prob.sum()
   #imbal_class_counts = [int(a*b) for a,b in zip(class_counts,class_prob)]
    imbal_class_counts = [1182,  391,  324,  344, 1000,  466,  935,  673,  272 , 369]
    print(imbal_class_counts)
    print(np.array(imbal_class_counts).sum())
    class_indices = [np.where(targets == i)[0] for i in range(nb_classes)]
   ###### balanced dataset next two lines comments
    imbal_class_indices = [class_idx[:class_count] for class_idx, class_count in zip(class_indices, imbal_class_counts)]
    imbal_class_indices = np.hstack(imbal_class_indices)
   # Set target and data to dataset
    self.target= imbalanced_train_dataset.targets[imbal_class_indices]
    self.data = imbalanced_train_dataset.data[imbal_class_indices]
    print('#*50', self.data.type())
    self.transform = transforms.RandomRotation(30)

def __getitem__(self, index):
    data1 = self.data[index]
    target = self.target[index]
    if self.transform is not None:
        data2 = TF.to_pil_image(data1)
        data2 = self.transform(data2)
        data2 = TF.to_tensor(data2)
        
    return data1.unsqueeze(0), target, data2, index

def __len__(self):
    return len(self.data)

transform2=transforms.Compose([transforms.RandomRotation(10)
#transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

new_dataset = MyDataset(transform = transform2)

dataloader = torch.utils.data.DataLoader(
new_dataset,
num_workers=1,
batch_size=60,
shuffle=True)

data1, target, data2, index = iter(dataloader).next()
print(data1.size())
print(data2.size())
new_data1 = data1[0].squeeze().numpy()
new_data2 = data2[0].squeeze().numpy()
print(new_data2.shape)
print(new_data1.shape)
f1 = plt.figure(1)
plt.imshow(new_data1)
f2 = plt.figure(2)
plt.imshow(new_data2)
plt.show()
print(data1.type()) ### This output gives torch.ByteTensor not torch.FloatTensor (Which I am expecting from )
print(data2.type())
print(target.type())