I am working on a multiclass classification problem using PyTorch. I created my custom data loader. I get an error that says IndexError: index 3810 is out of bounds for dimension 0 with size 3810
when I start training the model.
X_train has 10 columns and y_train has 1 column 9 classes(integer encoded from 0 to 8).
Length of dataset = 487680
The model trains on a small subset of dataset. I’ve found the max limit to be 3810 after which I get the index error mentioned above.
Here is my code.
class trainData(Dataset):
def __init__(self, x, y):
'Initialization'
self.x = x
self.y = y
def __len__(self):
'Denotes the total number of samples'
return len(self.x)
def __getitem__(self, index):
'Generates one sample of data'
return self.x[index], self.y[index]
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
train_data = trainData(torch.FloatTensor(X_train), torch.LongTensor(y_train))
train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=False, num_workers=4)
for epoch in range(EPOCHS):
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
model.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred, y_batch)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(loss)
Running train_data.__len__()
returns 487680.
Following is the error I get when I start the training.
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-172-974210982f1a> in <module>()
3 for epoch in range(EPOCHS):
4
----> 5 for X_batch, y_batch in train_loader:
6 X_batch, y_batch = X_batch.to(device), y_batch.to(device)
7
/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
635 self.reorder_dict[idx] = batch
636 continue
--> 637 return self._process_next_batch(batch)
638
639 next = __next__ # Python 2 compatibility
/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_next_batch(self, batch)
656 self._put_indices()
657 if isinstance(batch, ExceptionWrapper):
--> 658 raise batch.exc_type(batch.exc_msg)
659 return batch
660
IndexError: Traceback (most recent call last):
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp>
samples = collate_fn([dataset[i] for i in batch_indices])
File "<ipython-input-165-8917f54b2949>", line 13, in __getitem__
return self.x[index], self.y[index]
IndexError: index 3810 is out of bounds for dimension 0 with size 3810
If I run train_data.__getitem__(3809)
, I get an output
(tensor([ 0.0471, -1.5022, -1.5161, 0.1334, -0.6350, 0.0935, 0.1073, -0.0268,
0.4136, 0.0583]), tensor(4))
However, If I run train_data.__getitem__(3810)
, I get an error
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-178-f115e994f119> in <module>()
----> 1 train_data.__getitem__(3810)
<ipython-input-165-8917f54b2949> in __getitem__(self, index)
11 def __getitem__(self, index):
12 'Generates one sample of data'
---> 13 return self.x[index], self.y[index]
IndexError: index 3810 is out of bounds for dimension 0 with size 3810
I am at loss as to why this is happening.