Hello, I am using resnet18 but my data loader has one extra dimension (I am assuming it is frame dimension). Data Loader looks like this:
class DataModule(pl.LightningDataModule):
def __init__(
self,
train_paths,
val_paths,
clip_duration: int = 2,
batch_size: int = 1,
num_workers: int = 2,
**kwargs
):
super().__init__()
self.train_paths = train_paths
self.val_paths = val_paths
self.batch_size = batch_size
self.num_workers = num_workers
self.clip_duration = clip_duration
self.num_labels = len({path[1] for path in train_paths._paths_and_labels})
for k, v in kwargs.items():
setattr(self, k, v)
self.train_transforms = ApplyTransformToKey(
key='video',
transform=Compose(
[
UniformTemporalSubsample(8),
Lambda(lambda x: x / 255.0),
Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
RandomShortSideScale(min_size=256, max_size=320),
RandomCrop(224),
RandomHorizontalFlip(p=0.5),
]
)
)
self.val_transforms = ApplyTransformToKey(
key='video',
transform=Compose(
[
UniformTemporalSubsample(8),
Lambda(lambda x: x / 255.0),
Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
ShortSideScale(256),
CenterCrop(224)
]
)
)
def train_dataloader(self):
self.train_dataset = LimitDataset(
LabeledVideoDataset(
self.train_paths,
clip_sampler=make_clip_sampler('random', self.clip_duration),
decode_audio=False,
transform=self.train_transforms,
)
)
return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers)
def val_dataloader(self):
self.val_dataset = LimitDataset(
LabeledVideoDataset(
self.val_paths,
clip_sampler=make_clip_sampler('uniform', self.clip_duration),
decode_audio=False,
transform=self.val_transforms,
)
)
return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)
I am getting this error:
RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 3, 8, 224, 224]
How can I get rid of this extra dimension? I tried some solutions from forum, like using squeeze and None for batch size, but it does not help.