Hey guys, I need some help with a bizarre question. I have a standard Dataset to load in my data of 4297 instances. when I cal len(dataset) I get 4297. However, when I loop over my dataset instances, I get an index out of bound error. Out of curiosity, I commented out all my getitem code to see how far it would go, and it never stops. I need help!
here is my code:
import os
import pandas as pd
import torch
import cv2
import numpy as np
from torch.utils.data import Dataset
from torch import nn, optim
from torch.utils.data import DataLoader
class VideoDataset(Dataset):
def __init__(self, dataset='iemocap', clip_len=16, leave_one_out=None,emotions=None):
self.root_dir = 'data'
self.df = os.path.join('data','iemocap.csv')
self.clip_len = clip_len
self.resize_height = 128
self.resize_width = 171
self.crop_size = 112
if not os.path.isfile(self.df):
data = {"fnames":[], "labels":[]}
folder = self.root_dir
for label in sorted(os.listdir(self.root_dir)):
for fname in os.listdir(os.path.join(folder, label)):
data['fnames'].append(os.path.join(folder, label, fname))
data['labels'].append(label)
assert(len(data['fnames'])==len(data['labels']))
self.df = pd.DataFrame.from_dict(data)
self.df.to_csv(os.path.join('data','iemocap.csv'), index=False)
else:
self.df= pd.read_csv(os.path.join('data','iemocap.csv'))
if emotions != None:
total_emotions = set(self.df.labels)
#check
for e in emotions:
#print(e, total_emotions)
assert(e in total_emotions)
emotion_2_elim = list(total_emotions.difference(emotions))
idx = []
for e in emotion_2_elim:
index = self.df.loc[self.df['labels']==e].index
self.df = self.df.drop(index).reset_index(drop=True)
# reindexing
#self.df.index = range(len(self.df.fnames))
if type(leave_one_out) != None:
'''
1 = Ses01 F
2 = Ses01 M
3 = Ses02 F
4 = Ses02 M
5 = Ses03 F
6 = Ses03 M
7 = Ses04 F
8 = Ses04 M
9 = Ses05 F
10 = Ses05 M
'''
key = {1:'Ses01F',
2:'Ses01M',
3:'Ses02F',
4:'Ses02M',
5:'Ses03F',
6:'Ses03M',
7:'Ses04F',
8:'Ses04M',
9:'Ses05F',
10:'Ses05M'}
people_to_leave = set([key[i] for i in leave_one_out])
idx =[]
for i in self.df.index:
fname = self.df.iat[i,0]
fname = os.path.basename(fname)
session,*_, gender = fname.split('_')
speaker = session[0:-1]+gender[0]
if speaker in people_to_leave:
idx.append(i)
self.df = self.df.drop(idx).reset_index(drop=True)
#reindexing
#self.df.index = range(len(self.df.fnames))
self.label2index = {label:index for index,label in enumerate(sorted(set(self.df.labels)))}
self.label_array = np.array([self.label2index[label] for label in self.df.labels], dtype=int)
self.fnames = self.df.fnames
self.labels = self.df.labels
def __len__(self):
return len(self.df.index)
def __getitem__(self,index):
# Loading and preprocessing.
assert(index != len(self.df.index))
buffer = self.load_frames(self.fnames[index])
buffer = self.crop(buffer, self.clip_len, self.crop_size)
labels = np.array(self.label_array[index])
# if self.split == 'test':
# # Perform data augmentation
# buffer = self.randomflip(buffer)
buffer = self.normalize(buffer)
buffer = self.to_tensor(buffer)
return torch.from_numpy(buffer), torch.from_numpy(labels).long()
def load_frames(self, file_dir):
frames = sorted([os.path.join(file_dir, img) for img in os.listdir(file_dir)])
frame_count = len(frames)
buffer = np.empty((frame_count, self.resize_height, self.resize_width, 3), np.dtype('float32'))
for i, frame_name in enumerate(frames):
frame = np.array(cv2.imread(frame_name)).astype(np.float64)
buffer[i] = frame
return buffer
def crop(self, buffer, clip_len, crop_size):
# randomly select time index for temporal jittering
time_index = np.random.randint(buffer.shape[0] - clip_len)
# Randomly select start indices in order to crop the video
height_index = np.random.randint(buffer.shape[1] - crop_size)
width_index = np.random.randint(buffer.shape[2] - crop_size)
# Crop and jitter the video using indexing. The spatial crop is performed on
# the entire array, so each frame is cropped in the same location. The temporal
# jitter takes place via the selection of consecutive frames
buffer = buffer[time_index:time_index + clip_len,
height_index:height_index + crop_size,
width_index:width_index + crop_size, :]
return buffer
def normalize(self, buffer):
for i, frame in enumerate(buffer):
frame -= np.array([[[90.0, 98.0, 102.0]]])
buffer[i] = frame
return buffer
def to_tensor(self, buffer):
return buffer.transpose((3, 0, 1, 2))