Help regarding building my custom dataset

I appreciate it if someone can help me regarding what im doing wrong.

My dataset get the path of the main folder, and then based on the index goes to the corresponding subfolders that belong to that video and read all the frames and stack them to gather and the output is like CxFxHxW, where C is the channel number = 3 and F is number of all the frames for that video.
e.g. if a video has 20 frames with size of 300x300 in rgb, the output will be 3x20x300x300
im not shure if i should add the batch size now to make it 1x3x20x300x300 or later…

Now lets go to the dataloader.
I build my dataloader as follow:

import torch
from torch.utils import data
import os
import torch.utils.data as data
from PIL import Image
import os
import os.path
import torchvision.transforms as transforms

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


def accimage_loader(path):
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)


def default_loader(path):
    from torchvision import get_image_backend
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)
    
    
class VideoDataset(data.Dataset):
    #     path is the path that all the video folders are there, meaning that for each video we have a folder that includes all the frame for that folder
    def __init__(self, path):
        filenames= os.listdir(path) # get all files' and folders' names in the current directory
        filenames.sort()
        NameOfAllTheVideos = []
        for filename in filenames: # loop through all the files and folders
            if os.path.isdir(os.path.join(os.path.abspath(PATH), filename)): # check whether the current object is a folder or not
                NameOfAllTheVideos.append(os.path.join(os.path.abspath(PATH), filename)) #create the address to read each video
        NameOfAllTheVideos.sort() #Name/Address of video folder that includes the frame inside of them
        JPEGFilesForEachVideo = {} #each key is the name of the folder and the value is the name of Frames
        for i in NameOfAllTheVideos:
            AllJPEGFiles = os.listdir(i + '/')
            AllJPEGFiles.sort()
            JPEGFilesForEachVideo[i] = AllJPEGFiles # give all the freames to that corresponding video
        self.JPEGFilesForEachVideo = JPEGFilesForEachVideo # so here i have the name/address of each video andthe name/address of their corresponding frames
    
    def __len__(self):
        return len(self.JPEGFilesForEachVideo)
    
    def __getitem__(self, index):
        # here o read the frames for that specific video that corresponds to index and save the frames in a tensor
        JPEGAddresses = list(self.JPEGFilesForOneVideo.keys())
        for FrameIndex in range(len(self.JPEGFilesForOneVideo[JPEGAddresses[index]])):
            JPEGPath = JPEGAddresses[index] + '/' +  self.JPEGFilesForOneVideo[JPEGAddresses[index]][FrameIndex]
            Frame = default_loader(JPEGPath)
            SIZE = [300,300]
            transform = transforms.Compose(
                [transforms.Resize(SIZE),
                 transforms.ToTensor(),
                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
            FrameTransformed = torch.unsqueeze(transform(Frame),1)
            if FrameIndex == 0:
                FramesForOneVideo = FrameTransformed
            else:
                FramesForOneVideo =torch.cat((FramesForOneVideo, FrameTransformed), 1)
        return torch.unsqueeze(FramesForOneVideo,0) # so it will have the size BxCxFxDxD

if i give the path to it i will have:

PATH = '/home/alireza/Desktop/Datasets/VID/ILSVRC-New/Data/VID/train/'
Training = VideoDataset(PATH)
print(Training)
<__main__.VideoDataset object at 0x7fcff04fdd68>

How can i check to see if im doing it right or not?
I am not sure if it is right or not???

if i give it to Dataloader it gives me:

import torch
import numpy as np

from torch.utils.data import Dataset, DataLoader



train_loader = data.DataLoader(Training,
                          batch_size=2,
                          shuffle=False,
                          num_workers=2)
    
print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7fcff0c9da90>

Again please let me know how i can check its correctness…

but when i want to iterate through it, it gives me this error:


for epoch in range(1):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        print(epoch, i, "inputs", inputs.data, "labels", labels.data)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-39-3e3a8304e478> in <module>()
     15 
     16 for epoch in range(1):
---> 17     for i, data in enumerate(train_loader, 0):
     18         # get the inputs
     19         inputs, labels = data

~/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    284                 self.reorder_dict[idx] = batch
    285                 continue
--> 286             return self._process_next_batch(batch)
    287 
    288     next = __next__  # Python 2 compatibility

~/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_next_batch(self, batch)
    305         self._put_indices()
    306         if isinstance(batch, ExceptionWrapper):
--> 307             raise batch.exc_type(batch.exc_msg)
    308         return batch
    309 

AttributeError: Traceback (most recent call last):
  File "/home/alireza/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/alireza/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "<ipython-input-35-d5c268d5dcaa>", line 56, in __getitem__
    JPEGAddresses = list(self.JPEGFilesForOneVideo.keys())
AttributeError: 'VideoDataset' object has no attribute 'JPEGFilesForOneVideo'

I highly recommend to you to work with the video files itself.
First of all, you do not need a custom DataLoader you only need a custom Dataset. Check out this tutorial. I like to use cv2 module, just do conda install opencv or pip install cv2.

in the Dataset's __getitem__() method you can just read a frame like following

MyDataset(Dataset):
    '''Custom dataset-class for loading video-frames'''
    def __init__(self, num_videos, video_directory, videos):
        self.num_videos = num_videos
        self.video_directory = video_directory
        self.videos = videos

    def __getitem__(self, i):
        cap = cv2.VideoCapture(os.path.join(self.video_directory, self.videos[i]))
        
        # reading all frames into a big ndarray
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        num_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

        frames = np.zeros((num_frames, h, w, 3))

        count = 0
        while count < num_frames:
            succesful, frame = cap.read()
            if not succesful:
                break
            else:
                frames[count] = frame

        labels = # get the labels for this video from somewhere
        return {'frames': frames, 'labels': labels}

This is how you can get all frames from video files into a numpy array, to convert each frame to a torch.image you will have to transpose its axis: frames.transpose(3, 0, 1, 2) where the new shape is (channels, num_frames, h, w) because torch.image needs channel before d, h, w, where d is the depth of your video aka the sequence_length if you will be using RNNs.

With the returned dictionary of MyDataset you can, while training, iterate the dataloader and call the outputs by their keys.

1 Like

Sorry for my mistake, i meant custom Dataset as well.

Can you please clarify what do you mean by labels?
is labels the GT labels for the corresponding video that im reading? and in what format i should have the labels? or is lables like the video index (name of the video)?

Also, is it necessary to have the labels in out put?

last but not least, do i need to convert the numpy to torch inside the MyDataset function?

UPDATE:
So I did it as you mentioned (lets not worry about the transpose right now, i can take care of it late).


class MyDataset(Dataset):
    '''Custom dataset-class for loading video-frames'''
    def __init__(self, VideosPath):
        Videos = os.listdir(VideosPath) # get all files' and folders' names in the current directory
        Videos.sort()
        self.num_videos = len(Videos)
        self.video_directory = VideosPath
        self.videos = Videos
    def __len__(self):
        return len(self.num_videos)
    
    def __getitem__(self, i):
        cap = cv2.VideoCapture(os.path.join(self.video_directory, self.videos[i]))
        
        # reading all frames into a big ndarray

        
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        num_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        print(width,height,num_frames)
        frames = np.zeros((int(num_frames), int(height), int(width), 3))
        count = 0
        while count < num_frames:
            succesful, frame = cap.read()
        #     print(succesful)
            if not succesful:
                break
            else:
                frames[count] = frame

        frames = torch.Tensor(frames)

        labels = i # not sure what should be for lables, please let me know
        return {'frames': frames, 'labels': labels}

but when i wanna test it:


VideosPath = '/home/alireza/Desktop/Datasets/VID/ILSVRC-Old/Data/VID/snippets/train/ILSVRC2015_VID_train_0000/' # this videospath includes 1000 videos

Training = MyDataset(VideosPath)
print(Training)
None

I get none as the output of my print :frowning:

I also checked and the code inside the __getitem__ is correct,
Test check:

VideosPath = '/home/alireza/Desktop/Datasets/VID/ILSVRC-Old/Data/VID/snippets/train/ILSVRC2015_VID_train_0000/'
Videos = os.listdir(VideosPath) # get all files' and folders' names in the current directory
Videos.sort()
print('len:',len(Videos))
i= 0
cap = cv2.VideoCapture(os.path.join(VideosPath, Videos[i]))
print(cap)

len: 1000
<VideoCapture 0x7f193b976890>

But i don’t know why MyDataset is not working :confused:

(1) Labels are the ground truth you have for each frame or video. I would load it in the Dataset because you can pass the Dataset to DataLoader class and if you return both the input for your net and the ground truth for this input in Dataset’s __getitem__() method, it is easy:

for i, data in enumerate(dataloader):
    inputs, labels = data['frames'], data['gt']

(2) If you print the MyDataset you get a None? This seems weird. The __getitem__() method though is called when you index your object as following:

dataset = MyDataset(video_paths)
frames, labels = dataset[23] # here the method __getitem__() gets called with i = 23
1 Like

Thanks, I did it from scratch again and the dataset part is working fine :slight_smile:

1 Like