CustomDataLoader is not subscriptable

BlueTurtle · May 23, 2020, 4:22pm

I am trying to run resnet50 and get “CreateDataset” is not subscriptable (where CreateDataset is the class name).

class CreateDataset():
    def __init__(self, data, transforms = train_transform):
        self.transform = train_transform
        self.file_name = data["file_name"]
        self.category_id = data["category_id"]
        self.image_id = data["image_id"]
        
    def get_item(self, index: int):
        file_name = self.file_name[index]
        
        image = cv2.imread(train_path + file_name, cv2.IMREAD_COLOR)
        if image is None:
            return None
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        image_id = self.image_id[index]
        image_id = np.asarray(image_id)
        image_id = torch.from_numpy(image_id)
        
        
        if self.transform:
            image = {"image" : image,}
            image = self.transform(**image)
            
        return category_id, image, image_id
    
    def __len__(self):
        return len(self.image_id)

def collate_fn(batch):
    return tuple(zip(*batch))

train_data, test_data = train_test_split(data, train_size = 0.8)

train_dataset = CreateDataset(train_data, train_transform())
test_dataset = CreateDataset(test_data, test_transform())

There is model code here but I have omitted it as the error makes no reference to it.

epochs = 1
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses = [], []
for epoch in range(epochs):
    for image, image_id in train_loader:
        steps += 1
        image_id = file_name
        image_id = image_id.to(device)
        image = image.to(device)
    
        optimizer.zero_grad()
        logps = model.forward(image)
        loss = criterion(logps, image_id)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if steps % print_every == 0:
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for images, category in test_loader:
                    images, category = images.to(device), category.to(device)
                    logps = model.forward(images)
                    batch_loss = criterion(logps, category)
                    test_loss += batch_loss.item()
                    
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == category.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            train_losses.append(running_loss/len(train_loader))
            test_losses.append(test_loss/len(test_loader))                    
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader):.3f}")
            running_loss = 0
            model.train()
torch.save(model, 'PlantDetectionResNet50.pth')

The error occurs are the line “for image, imade_id in train_loader”

I have included the error below:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-10-a04ed18ad998> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, image_id in train_loader:
      8         steps += 1
      9         image_id = file_name

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

TypeError: 'CreateDataset' object is not subscriptable

Many thanks,

BT

ptrblck · May 24, 2020, 8:39am

You have a small typo in the method definition.
Use __getitem__(self, index) instead of get_item, as the former is the required method name to slice an object.

BlueTurtle · May 24, 2020, 9:11am

Thanks for the feedback. It has solved one error but now I am getting the below error, from the same line.

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-13-a04ed18ad998> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, image_id in train_loader:
      8         steps += 1
      9         image_id = file_name

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataset.py in __getitem__(self, index)
     23 
     24     def __getitem__(self, index):
---> 25         raise NotImplementedError
     26 
     27     def __add__(self, other):

NotImplementedError:

ptrblck · May 24, 2020, 9:15am

Try to derive the class from torch.utils.data.Dataset and check for typos again.
If you get stuck, please post the updated code snippet so that we can have another look.

BlueTurtle · May 25, 2020, 10:09pm

I bought a book on Pytorch and read that and thought I got it. I even read more forum posts of similar errors but nothing. I found people had this problem with the len function but rarely the getitem function like mine. I did find a typo as I put get_item but changing this to the correct getitem did not solve the problem. I changed the notation from category_id to label as I tried to follow someone elses class for the same Kaggle competition hoping to solve the problem but alas.

I also slightly tweaked my DataSet code to include the category_ids (labels) in the CreateDataset class too. Hence why def init now has an extra argument compared to the original post.

train_dataset = CreateDataset(train_data, train_data["category_id"], train_transform())

class CreateDataset(Dataset):
    def __init__(self, data, labels, transforms = None):
        self.data = data
        self.transform = train_transform
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        file_name = self.data["file_name"].values[index]
        image = cv2.imread(train_path + file_name)
        if image is None:
            return None
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        label = self.labels.values[index]
        
        if self.transform:
            image = {"image" : image,}
            image = self.transform(**image)   
            
        return image, label

Error…

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-63-d95c0074ab2b> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, label in train_loader:
      8         steps += 1
      9         label = label.to(device)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataset.py in __getitem__(self, index)
     23 
     24     def __getitem__(self, index):
---> 25         raise NotImplementedError
     26 
     27     def __add__(self, other):

NotImplementedError:

Thanks.

BlueTurtle · May 25, 2020, 10:14pm

I changed the len function:

    def __len__(self):
        return len(self.labels)

but now I get the error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-81-d95c0074ab2b> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, label in train_loader:
      8         steps += 1
      9         label = label.to(device)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

<ipython-input-70-fedc0fe41c52> in __getitem__(self, index)
     22         if self.transform:
     23             image = {"image" : image,}
---> 24             image = self.transform(**image)
     25 
     26         return image, label

TypeError: train_transform() got an unexpected keyword argument 'image'

ptrblck · May 26, 2020, 2:54am

I don’t know, how train_transform is defined, but your posted code works fine after removing the data loading part:

class CreateDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.transform = transform
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        image = self.data[index]
        label = self.labels[index]
        return image, label
    
dataset = CreateDataset(
    data=torch.randn(100, 3, 224, 224),
    labels=torch.randint(0, 10, (100,)))

loader = DataLoader(dataset, batch_size=5)
for data, target in loader:
    print(target)

If you still get stuck, please post another executable code snippet, to reproduce the error.
Since I don’t have your data on my machine, I would have to remove some parts such as the data loading from the code.