Image Classification Dataloader issues

I’ve been successful using various predefined datasets such as the CIFAR10 but this is my first attempt at using a custom dataloader.

I am trying to do simple image classification of 32x332 CIFAR like images with a single int for labels.

The imagedata comes in a ndarray that was transformed from [batch__size, h, w, c] to [batch__size, c, h, w]. I’ve tried converting these to PIL images prior to see if that made a difference to no avail. I keep running into “AttributeError: Can’t get attribute ‘MyDataset’ on <module ‘main’ (built-in)>” now when i try viewing the images using an iterator or when feeding it into the NN.

Does anyone see anything obvious here that I am missing? Thank you and appreciate the support.

#Import and process clean labels
verified_labels = np.genfromtxt('../verified_labels.csv', delimiter=',', dtype="int64")

#debug size and shape
print(verified_labels.shape) 
print(type(verified_labels)) 

#get size of clean labels
n_verified_labels = verified_labels.size

#create empty ndarray for  clean images
verified_imgs = np.empty((n_verified_labels,32,32,3))

#load images into ndarray, convert to RGB using cv2
for i in range(n_verified_labels):
    temp_image = f'../images/{i+1:05d}.png'
    verified_imgs[i,:,:,:] = cv2.cvtColor(cv2.imread(temp_image),cv2.COLOR_BGR2RGB)

#Debug - get shape of array - should be [batch__size,h,w,c]
print(verified_imgs.shape) 
print(type(verified_imgs)) 

#transpose ndarray from 32x32x3 to 3x32x32 to match [batch__size, c, h, w]
verified_imgs = np.transpose(verified_imgs, (0, 3,1,2))

#verify shape of array - should be [batch__size, c, h, w]
print(verified_imgs.shape)  

##convert to tensor format
x_train = verified_imgs
#x_train = torch.Tensor(verified_imgs) #didn't work
#x_train = torch.from_numpy(verified_imgs) #didnt work

y_train = torch.Tensor(verified_labels)

#PyTorch Transform 
transform = transforms.ToTensor()
batch_size = 4

#custom data loader
class MyDataset(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = data
        self.target = target
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)
            
        return x, y
    
    def __len__(self):
        return len(self.data)
    
#define testloaders

#trainset and trainloader
trainset = MyDataset(x_train, y_train, transform)
trainloader = DataLoader(
    trainset,
    batch_size=2,
    num_workers=1,
    shuffle=True,

#define NN - from CIFAR10 pyTorch example
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

#define loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

#train neural network
# Number of times NN will loop and train over data ser - from CIFAR10 example
loops = 4

for epoch in range(loops):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

)```

Hi,

Could you post the complete error log?

Thanks

Hi, this is what I am getting.

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'MyDataset' on <module '__main__' (built-in)>```

Hi,

It looks like MyDataset class import has some problems. What you can do is? Create separate files for Net and MyDataset and try to import them to the main file, then it should work.

Thanks