below is dataset and loader. I am returning the list of frames read for an video for a given video input.
from PIL import Image as PIL_Image
def read_video(mtcnn,path=None):
v_cap =cv2.VideoCapture(str(path))
#success, frame = v_cap.read()
#file_name=file_name_p
frame_count = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
#print('frame',frame_count,path)
#cur_pos=int(capture.get(cv2.CAP_PROP_POS_FRAMES)
frame_to_read=random.sample(range(frame_count),2)
#[0,1,frame_count-2,frame_count-1]
#print(frame_count)
face_list=[]
for fno in frame_to_read:
cur_pos=int(v_cap.get(cv2.CAP_PROP_POS_FRAMES)) # get frame that is to be read by .read()
#print(cur_pos)
if cur_pos!=fno:
v_cap.set(cv2.CAP_PROP_POS_FRAMES, fno) #set the frame position to this for.read to read
_,frame = v_cap.read()
frame_no= int(v_cap.get(cv2.CAP_PROP_POS_FRAMES))
#print(frame_no)
#print(frame_no)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#frame = Image.fromarray(frame)
#print(frame.shape)
frame = PIL_Image.fromarray(frame.astype('uint8') )
#plt.imshow(frame)
faces = mtcnn(frame)
#print(faces.shape)
#fig, axes = plt.subplots(1, len(faces))
#face_list.append(faces)
if faces is not None:
#print('x')
for face in faces:
face_list.append(face/255)
#print('face',face.shape)
#face=face.permute(1, 2, 0).int().numpy()
#plt.imshow(face)
#print(face.shape)
#face_list.append(face)
#face = Image.fromarray(face.astype('uint8'))
#face.save('/kaggle/working/' +label+'_' +str(frame_no)+'_'+file_name+'.png', 'PNG')
print(len(face_list))
return face_list
class vidSet(Dataset):
def __init__(self, videos_path):
self.video_paths = videos_path.ls()
self.root=videos_path
self.c=2
#self.itemlist=self.video_paths
#self.caps = [cv2.VideoCapture(str(split_path)) for video_path in self.video_paths]
#self.images = [[capid, framenum] for capid, cap in enumerate(self.caps) for framenum in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))]
#self.labels = [label for i in range(len(self.images))] # Whatever your needs are
def __len__(self):
return len(self.video_paths)
def __getitem__(self, idx):
#capid, framenum = self.images[idx]
faces = read_video(mtcnn,path=self.video_paths[idx])
if faces is None :
print('None')
#cap.set(cv2.CAP_PROP_POS_FRAMES, framenum)
#res, frame = cap.read()
#img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#label = self.labels[idx]
#img_tensor = torch.from_numpy(img).permute(2,0,1).float() # /255, -mean, /std ... do your things with the image
#label_tensor = torch.as_tensor(label)
print(idx)
return faces,0