Hello,
The below custom data set worked and ran through my model just fine. The model was not doing well so I cut down on the types of photos from 10 to 3 and then the error popped up. I went back to 10 and it the data loader did just fine.
Why would a paired down data set throw the below error?
KeyError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 4305
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-366-d51e3bafe910> in <module>
----> 1 for image, label, policy, categorical_data in train_loader: #, numerical_data
2 print(f"""
3
4 image size is {image.shape}
5
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
343
344 def __next__(self):
--> 345 data = self._next_data()
346 self._num_yielded += 1
347 if self._dataset_kind == _DatasetKind.Iterable and \
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
383 def _next_data(self):
384 index = self._next_index() # may raise StopIteration
--> 385 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
386 if self._pin_memory:
387 data = _utils.pin_memory.pin_memory(data)
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\utils\data\_utils\fetch.py in fetch(self, possibly_batched_index)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\utils\data\_utils\fetch.py in <listcomp>(.0)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
<ipython-input-362-57af70cbdb57> in __getitem__(self, idx)
26 idx = idx.tolist()
27
---> 28 label = self.image_frame.loc[idx, 'target']
29 pic = Path(self.image_frame.loc[idx,'location'])
30 img = Image.open(pic)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1492 except (KeyError, IndexError, AttributeError):
1493 pass
-> 1494 return self._getitem_tuple(key)
1495 else:
1496 # we by definition only have the 0th axis
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
866 def _getitem_tuple(self, tup):
867 try:
--> 868 return self._getitem_lowerdim(tup)
869 except IndexingError:
870 pass
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
986 for i, key in enumerate(tup):
987 if is_label_like(key) or isinstance(key, tuple):
--> 988 section = self._getitem_axis(key, axis=i)
989
990 # we have yielded a scalar ?
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1911 # fall thru to straight lookup
1912 self._validate_key(key, axis)
-> 1913 return self._get_label(key, axis=axis)
1914
1915
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
139 raise IndexingError('no slices here, handle elsewhere')
140
--> 141 return self.obj._xs(label, axis=axis)
142
143 def _get_loc(self, key, axis=None):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3583 drop_level=drop_level)
3584 else:
-> 3585 loc = self.index.get_loc(key)
3586
3587 if isinstance(loc, np.ndarray):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 4305
Here is my data class:
'''
image class data set
'''
def __init__(self, data, transform = None):
'''
Args:
------------------------------------------------------------
data = dataframe
image = column in dataframe with absolute path to the image
label = column in dataframe that is the target classification variable
numerical_columns = numerical columns from data
categorical_columns = categorical columns from data
policy = ID variable
'''
self.image_frame = data
self.transform = transform
def __len__(self):
return len(self.image_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
label = self.image_frame.loc[idx, 'target']
pic = Path(self.image_frame.loc[idx,'location'])
img = Image.open(pic)
policy = self.image_frame.loc[idx, 'policy']
#sample = {'image': img, 'policy': policy, 'label':label}
#numerical_data = self.image_frame.loc[idx, numerical_columns]
#numerical_data = torch.tensor(numerical_data, dtype = torch.float)
if self.transform:
image = self.transform(img)
for category in categorical_columns:
self.image_frame[category] = self.image_frame[category].astype('category')
self.image_frame[category] = self.image_frame[category].astype('category').cat.codes.values
#categorical_column_sizes = [len(self.image_frame[column].astype('category')) for column in categorical_columns]
#categorical_embedding_sizes = [(col_size, min(50, (col_size+1)//2)) for col_size in categorical_column_sizes]
categorical_data = self.image_frame.loc[idx, categorical_columns]
categorical_data = torch.tensor(categorical_data, dtype = torch.int64)
return image, label, policy, categorical_data #, numerical_data
This is the line that throws the error although running the model throws the error as well:
for image, label, policy, categorical_data in train_loader: #, numerical_data
print(f"""
image size is {image.shape}
categorical_data is {categorical_data.shape}
""")
break
#numeric size is {numerical_data.shape}