How do I get rid of extra dimension in my DataLoader?

Hello, I am using resnet18 but my data loader has one extra dimension (I am assuming it is frame dimension). Data Loader looks like this:

class DataModule(pl.LightningDataModule):
    def __init__(
        self,
        train_paths,
        val_paths,
        clip_duration: int = 2,
        batch_size: int = 1,
        num_workers: int = 2,
        **kwargs
    ):
        super().__init__()
        self.train_paths = train_paths
        self.val_paths = val_paths
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.clip_duration = clip_duration
        self.num_labels = len({path[1] for path in train_paths._paths_and_labels})
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.train_transforms = ApplyTransformToKey(
            key='video',
            transform=Compose(
                [
                    UniformTemporalSubsample(8),
                    Lambda(lambda x: x / 255.0),
                    Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
                    RandomShortSideScale(min_size=256, max_size=320),
                    RandomCrop(224),
                    RandomHorizontalFlip(p=0.5),
                ]
            )
        )
        self.val_transforms = ApplyTransformToKey(
            key='video',
            transform=Compose(
                [
                    UniformTemporalSubsample(8),
                    Lambda(lambda x: x / 255.0),
                    Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
                    ShortSideScale(256),
                    CenterCrop(224)
                ]
            )
        )

    def train_dataloader(self):
        self.train_dataset = LimitDataset(
            LabeledVideoDataset(
                self.train_paths,
                clip_sampler=make_clip_sampler('random', self.clip_duration),
                decode_audio=False,
                transform=self.train_transforms,
            )
        )
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

    def val_dataloader(self):
        self.val_dataset = LimitDataset(
            LabeledVideoDataset(
                self.val_paths,
                clip_sampler=make_clip_sampler('uniform', self.clip_duration),
                decode_audio=False,
                transform=self.val_transforms,
            )
        )
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

I am getting this error:

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 3, 8, 224, 224]

How can I get rid of this extra dimension? I tried some solutions from forum, like using squeeze and None for batch size, but it does not help.

It’s unclear how your datasets are implemented and what exactly they return. However, based on the shape I assume the DataLoader adds the batch dimension with a size of 1. You could squeeze this dimension assuming your dataset already returns a batch of samples which is uncommon.