Can someone explain how I would go about running extract_features.py for my own video (i.e. how I should update the Dataset module)? Further, I want to start from a video, so I am also a bit unsure about how to convert a video into rgb frames/ optical flow frames.
I generally use the following dataset class for my video datasets. It essentially reads the video one frame at a time, stacks them and returns a tensor of shape num_frames, channels, height, width
Here is my implementation of the class…
class customVideoDataset(Dataset):
def __init__(self, path, frame_count):
self.videos = []
self.labels = []
self.frames = frame_count
folder = Path(path)
for label in sorted(os.listdir(folder)):
for fname in os.listdir(os.path.join(folder, label)):
self.videos.append(os.path.join(folder, label, fname))
self.labels.append(label)
self.label2index = {label: index for index, label in enumerate(sorted(set(self.labels)))}
self.label_array = numpy.array([self.label2index[label] for label in self.labels], dtype=int)
def __getitem__(self, idx):
video = cv2.VideoCapture(self.videos[idx])
stacked_frames = numpy.empty(shape=(self.frames, 32, 32, 3),
dtype=numpy.dtype('float16')) # as frame would have shape h,w,channels
frame_count = 0
while video.isOpened() and frame_count<self.frames:
ret, frame = video.read()
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (32, 32))
stacked_frames[frame_count] = frame
frame_count += 1
video.release()
stacked_frames = stacked_frames.transpose((3, 0, 1, 2))
return stacked_frames, self.label_array[idx]
def __len__(self):
length = len(self.videos)
return length
Well if it is for classification, it is root_dir/class1, root_dir/class2 and so on.
I generally leave it in the default structure, i.e. The way it is once you download and extract.