"IndexError: too many indices for tensor of dimension 3"

aizyuval · August 17, 2022, 8:32am

When Loading an RGB image o a custom dataset class:

My problem is that: imag = Image.fromarray(imag) takes an original numpy image,
and drops from it a dimesion AND transposes the H and W.
Iv’e tried writing: imag = np.transpose(imag, (1,0,2)) , and it works just fine,
but it still drops the third dimention of 3 - fro RGB.

Thats my code:

import pandas as pd
import numpy as np
import os
from skimage import io
from torch.utils.data import Dataset
from PIL import Image

class projectdata(Dataset):

    def __init__(self, csv_file, root_dir, transform=None):
        self.landmarks_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
        imag = io.imread(img_name) 
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        imag = np.transpose(imag, (1,0,2))
        imag = Image.fromarray(imag) 
        sample = {'image': imag, 'landmarks': landmarks}
        
        if self.transform:
            sample = self.transform(sample['image'])
        
        img = sample['image'] 
        label = sample['landmarks']
        return img, label

The CNN Net expects tensor of size: [1,3,175,175]
The image itself (before it drops a dimension) is 3 dimensions, and im transforming it to be 4 dimensions.

from projectd import projectdata #custom dataset class file

transformer = transforms.Compose([
    transforms.Resize(1,3,175,175),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

trainset  = projectdata(csv_file = 'face_landmarks.csv', root_dir = 'faces',transform=transformer)
train_loader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
])

for epochs in range(2):
  running_loss = 0.0
  **for i,data in enumerate(train_loader,0):**

azhanmohammed · August 19, 2022, 8:57am

aizyuval:

transformer = transforms.Compose([
    transforms.Resize(1,3,175,175),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

trainset  = projectdata(csv_file = 'face_landmarks.csv', root_dir = 'faces',transform=transformer)
train_loader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
])

Change your resize transform to:

transformer = transforms.Compose([
    transforms.Resize(175,175),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

trainset  = projectdata(csv_file = 'face_landmarks.csv', root_dir = 'faces',transform=transformer)
train_loader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
])

Also why aren’t you using PIL directly to load the image? That should work as well. This way you won’t have to transpose the dimensions of the image. Try changing the __getitem__() function to:

 def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
        imag = Image.open(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        sample = {'image': imag, 'landmarks': landmarks}
        
        if self.transform:
            sample = self.transform(sample['image'])
        
        img = sample['image'] 
        label = sample['landmarks']
        return img, label

The modified __getitem__() function should work with the changed transforms function I have mentioned.