CustomDataLoader is not subscriptable

I am trying to run resnet50 and get “CreateDataset” is not subscriptable (where CreateDataset is the class name).

class CreateDataset():
    def __init__(self, data, transforms = train_transform):
        self.transform = train_transform
        self.file_name = data["file_name"]
        self.category_id = data["category_id"]
        self.image_id = data["image_id"]
        
    def get_item(self, index: int):
        file_name = self.file_name[index]
        
        image = cv2.imread(train_path + file_name, cv2.IMREAD_COLOR)
        if image is None:
            return None
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        image_id = self.image_id[index]
        image_id = np.asarray(image_id)
        image_id = torch.from_numpy(image_id)
        
        
        if self.transform:
            image = {"image" : image,}
            image = self.transform(**image)
            
        return category_id, image, image_id
    
    def __len__(self):
        return len(self.image_id)
def collate_fn(batch):
    return tuple(zip(*batch))

train_data, test_data = train_test_split(data, train_size = 0.8)

train_dataset = CreateDataset(train_data, train_transform())
test_dataset = CreateDataset(test_data, test_transform())

There is model code here but I have omitted it as the error makes no reference to it.

epochs = 1
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses = [], []
for epoch in range(epochs):
    for image, image_id in train_loader:
        steps += 1
        image_id = file_name
        image_id = image_id.to(device)
        image = image.to(device)
    
        optimizer.zero_grad()
        logps = model.forward(image)
        loss = criterion(logps, image_id)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if steps % print_every == 0:
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for images, category in test_loader:
                    images, category = images.to(device), category.to(device)
                    logps = model.forward(images)
                    batch_loss = criterion(logps, category)
                    test_loss += batch_loss.item()
                    
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == category.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            train_losses.append(running_loss/len(train_loader))
            test_losses.append(test_loss/len(test_loader))                    
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader):.3f}")
            running_loss = 0
            model.train()
torch.save(model, 'PlantDetectionResNet50.pth')

The error occurs are the line “for image, imade_id in train_loader”

I have included the error below:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-10-a04ed18ad998> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, image_id in train_loader:
      8         steps += 1
      9         image_id = file_name

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

TypeError: 'CreateDataset' object is not subscriptable

Many thanks,

BT

You have a small typo in the method definition.
Use __getitem__(self, index) instead of get_item, as the former is the required method name to slice an object.

1 Like

Thanks for the feedback. It has solved one error but now I am getting the below error, from the same line.

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-13-a04ed18ad998> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, image_id in train_loader:
      8         steps += 1
      9         image_id = file_name

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataset.py in __getitem__(self, index)
     23 
     24     def __getitem__(self, index):
---> 25         raise NotImplementedError
     26 
     27     def __add__(self, other):

NotImplementedError: 

Try to derive the class from torch.utils.data.Dataset and check for typos again.
If you get stuck, please post the updated code snippet so that we can have another look.

I bought a book on Pytorch and read that and thought I got it. I even read more forum posts of similar errors but nothing. I found people had this problem with the len function but rarely the getitem function like mine. I did find a typo as I put get_item but changing this to the correct getitem did not solve the problem. I changed the notation from category_id to label as I tried to follow someone elses class for the same Kaggle competition hoping to solve the problem but alas.

I also slightly tweaked my DataSet code to include the category_ids (labels) in the CreateDataset class too. Hence why def init now has an extra argument compared to the original post.

train_dataset = CreateDataset(train_data, train_data["category_id"], train_transform())
class CreateDataset(Dataset):
    def __init__(self, data, labels, transforms = None):
        self.data = data
        self.transform = train_transform
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        file_name = self.data["file_name"].values[index]
        image = cv2.imread(train_path + file_name)
        if image is None:
            return None
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        label = self.labels.values[index]
        
        if self.transform:
            image = {"image" : image,}
            image = self.transform(**image)   
            
        return image, label
    

Error…

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-63-d95c0074ab2b> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, label in train_loader:
      8         steps += 1
      9         label = label.to(device)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataset.py in __getitem__(self, index)
     23 
     24     def __getitem__(self, index):
---> 25         raise NotImplementedError
     26 
     27     def __add__(self, other):

NotImplementedError: 


Thanks.

I changed the len function:

    def __len__(self):
        return len(self.labels)

but now I get the error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-81-d95c0074ab2b> in <module>
      5 train_losses, test_losses = [], []
      6 for epoch in range(epochs):
----> 7     for image, label in train_loader:
      8         steps += 1
      9         label = label.to(device)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

<ipython-input-70-fedc0fe41c52> in __getitem__(self, index)
     22         if self.transform:
     23             image = {"image" : image,}
---> 24             image = self.transform(**image)
     25 
     26         return image, label

TypeError: train_transform() got an unexpected keyword argument 'image'


I don’t know, how train_transform is defined, but your posted code works fine after removing the data loading part:

class CreateDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.transform = transform
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        image = self.data[index]
        label = self.labels[index]
        return image, label
    
dataset = CreateDataset(
    data=torch.randn(100, 3, 224, 224),
    labels=torch.randint(0, 10, (100,)))

loader = DataLoader(dataset, batch_size=5)
for data, target in loader:
    print(target)

If you still get stuck, please post another executable code snippet, to reproduce the error.
Since I don’t have your data on my machine, I would have to remove some parts such as the data loading from the code.