Loading data not implemented error

Hello, so I’m just trying to load up some numerical data with corresponding labels and I’m getting a not implemented error. I’m not sure what’s going on.

First, I made these classes so I could do this in the future not from scratch:

class Train_Set(Dataset):

def __init__(self, file_path):

    self.train_df = pd.read_csv(file_path)
    self.data_arr = self.train_df.iloc[:,1:].values
    self.label_arr = self.train_df.iloc[:,0].values
    self.enc_label_arr = pd.get_dummies(self.train_df.iloc[:,0]).values

def get_item(self, index):
    sample = torch.Tensor(self.data_arr[index])
    enc_label = torch.Tensor(self.enc_label_arr[index])

    return (sample, enc_label)

def __len__(self):
    return len(self.data_arr)

class Test_Set(Dataset):

def __init__(self, file_path):

    self.test_df = pd.read_csv(file_path)
    self.data_arr = self.test_df.iloc[:,1:].values
    self.label_arr = self.test_df.iloc[:,0].values
    self.enc_label_arr = pd.get_dummies(self.test_df.iloc[:,0]).values

def get_item(self, index):
    sample = torch.Tensor(self.data_arr[index])
    enc_label = torch.Tensor(self.enc_label_arr[index])

    return (sample, enc_label)

def __len__(self):
    return len(self.data_arr)

When I start tinkering away with this on a notebook, I go through the ropes and everything seems fine, I make a simple feed forward network etc, but when I get to computing the loss, i guess when the actual data is needed, it goes like this:

loss_items = list()

for t in range(epochs):
print(t)

for i, (samples, labels) in enumerate(train_loader):
    
    outputs = model(samples)
    
    
    loss = loss_function(outputs, labels)
    
    if i%1000 == 0:
        loss_items.append(loss.item())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

--------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
in ()
4 print(t)
5
----> 6 for i, (samples, labels) in enumerate(train_loader):
7
8 outputs = model(samples)

~/.local/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
312 if self.num_workers == 0: # same-process loading
313 indices = next(self.sample_iter) # may raise StopIteration
–> 314 batch = self.collate_fn([self.dataset[i] for i in indices])
315 if self.pin_memory:
316 batch = pin_memory_batch(batch)

~/.local/lib/python3.6/site-packages/torch/utils/data/dataloader.py in (.0)
312 if self.num_workers == 0: # same-process loading
313 indices = next(self.sample_iter) # may raise StopIteration
–> 314 batch = self.collate_fn([self.dataset[i] for i in indices])
315 if self.pin_memory:
316 batch = pin_memory_batch(batch)

~/.local/lib/python3.6/site-packages/torch/utils/data/dataset.py in __getitem__(self, index)
15
16 def __getitem__(self, index):
—> 17 raise NotImplementedError
18
19 def __len__(self):

NotImplementedError:

------------------------------------

So I made sure that get_item was not defined inside of __init__, but other than that, I’m pretty stumped.

The method for the class must be __getitem__(self, index) not get_item()

def __getitem__(self, index):
    sample = torch.Tensor(self.data_arr[index])
    enc_label = torch.Tensor(self.enc_label_arr[index])

    return (sample, enc_label)

Worked like a charm, whoops. Thanks