Loader out put not matching batch size given

Jaideep_Valani · February 4, 2020, 12:23pm

below is dataset and loader. I am returning the list of frames read for an video for a given video input.


from PIL import Image as PIL_Image
def read_video(mtcnn,path=None):
    v_cap =cv2.VideoCapture(str(path))
    #success, frame = v_cap.read()
    #file_name=file_name_p
    frame_count = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    #print('frame',frame_count,path)
    #cur_pos=int(capture.get(cv2.CAP_PROP_POS_FRAMES)
    frame_to_read=random.sample(range(frame_count),2)
    #[0,1,frame_count-2,frame_count-1]
    #print(frame_count)
    face_list=[]
    for fno in frame_to_read:
        
      
        cur_pos=int(v_cap.get(cv2.CAP_PROP_POS_FRAMES)) # get frame that is to be read by .read()
        #print(cur_pos)
        if cur_pos!=fno:
            
            v_cap.set(cv2.CAP_PROP_POS_FRAMES, fno) #set the frame position to this for.read to read
            
                
        
        _,frame = v_cap.read()   
        frame_no= int(v_cap.get(cv2.CAP_PROP_POS_FRAMES))
        #print(frame_no)
                    
        #print(frame_no)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #frame = Image.fromarray(frame)
        #print(frame.shape)
        
        frame = PIL_Image.fromarray(frame.astype('uint8') )
        #plt.imshow(frame)
        faces = mtcnn(frame)
        #print(faces.shape)
        #fig, axes = plt.subplots(1, len(faces))
        #face_list.append(faces)
        if faces is not None:
            #print('x')
            
            for face in faces:
                face_list.append(face/255)
                #print('face',face.shape)
                #face=face.permute(1, 2, 0).int().numpy()
                #plt.imshow(face)
                #print(face.shape)
                #face_list.append(face)
                #face = Image.fromarray(face.astype('uint8'))
                #face.save('/kaggle/working/' +label+'_' +str(frame_no)+'_'+file_name+'.png', 'PNG')
        
    print(len(face_list))   
    return face_list     
class vidSet(Dataset):
    def __init__(self, videos_path):
        self.video_paths = videos_path.ls()
        self.root=videos_path
        self.c=2
        #self.itemlist=self.video_paths

        #self.caps = [cv2.VideoCapture(str(split_path)) for video_path in self.video_paths]
        #self.images = [[capid, framenum] for capid, cap in enumerate(self.caps) for framenum in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))]
    
        #self.labels = [label for i in range(len(self.images))] # Whatever your needs are
    
    def __len__(self):
         return len(self.video_paths)

    def __getitem__(self, idx):
        
        
       #capid, framenum = self.images[idx]
        faces = read_video(mtcnn,path=self.video_paths[idx])
        
        if faces is None :
            
            print('None')
        
        
       #cap.set(cv2.CAP_PROP_POS_FRAMES, framenum)
       #res, frame = cap.read()

       #img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
       #label = self.labels[idx]
    
       #img_tensor = torch.from_numpy(img).permute(2,0,1).float() # /255, -mean, /std ... do your things with the image
       #label_tensor = torch.as_tensor(label)
        print(idx)
        return faces,0

Jaideep_Valani · February 4, 2020, 5:23pm

this returns list of size one only,
img[0].size =64,C,HW
I was expecting this to be list of lists [ l1,l2,l3]