I used this script to generate multi clip input for my validation process
class ActionDatasetMultiClips(ActionDataset):
def __loading(self, path, frame_indices):
clips = []
for clip_frame_indices in frame_indices:
valid_indices = [i for i in clip_frame_indices if i < len(path)]
if not valid_indices:
raise ValueError(f"No valid frame indices for clip: {clip_frame_indices}")
clip = [Image.open(path[i]).convert('RGB') for i in valid_indices]
if self.spatial_transform is not None:
clip = [self.spatial_transform(img) for img in clip]
clips.append(torch.stack(clip, 0))
return clips
def __getitem__(self, idx):
sequence_key = list(self.sequences.keys())[idx]
sequence_data = self.sequences[sequence_key]
frame_paths = [data['image_path'] for data in sequence_data]
if self.temporal_transform:
frame_indices = list(range(len(frame_paths)))
frame_indices = self.temporal_transform(frame_indices)
if isinstance(frame_indices[0], list):
frame_indices = [i for sublist in frame_indices for i in sublist]
frame_indices = [i for i in frame_indices if i < len(frame_paths)]
if not frame_indices:
raise ValueError(f"No valid frame indices for sequence {sequence_key}")
clips = self.__loading(frame_paths, [frame_indices])
action_name = sequence_data[0]['action_name']
condition = sequence_data[0]['condition']
action_name_id = self.action_name_to_id[action_name]
condition_id = self.condition_name_to_id[condition]
targets = [torch.tensor([action_name_id, condition_id], dtype=torch.long) for _ in range(len(clips))]
print(f"Number of clips: {len(clips)}")
print(f"Number of targets: {len(targets)}")
return clips, targets
and this is the collate_fn to flatten the input
def collate_fn(batch):
batch_clips, batch_targets = zip(*batch)
print(f"Batch size: {len(batch)}")
print(f"Number of clips in batch: {len(batch_clips)}")
print(f"Number of targets in batch: {len(batch_targets)}")
# Flatten the clips and targets
batch_clips = [clip for multi_clips in batch_clips for clip in multi_clips]
batch_targets = [target for multi_targets in batch_targets for target in multi_targets]
print(f"Flattened clips: {len(batch_clips)}")
print(f"Flattened targets: {len(batch_targets)}")
# Stack clips into a single tensor
batch_clips = torch.stack(batch_clips, 0)
# Stack targets into a single tensor
batch_targets = torch.stack(batch_targets, 0)
return batch_clips, batch_targets
i tried to print the output of the dataloader
i got this
for batch_idx, (data, target) in enumerate(val_loader):
print(f"\nBatch {batch_idx}: Data Shape: {data.shape}, Target: {target}")
break # Stop after one batch for debugging purposes
Number of clips: 1
Number of targets: 1
Number of clips: 1
Number of targets: 1
Batch size: 2
Number of clips in batch: 2
Number of targets in batch: 2
Flattened clips: 2
Flattened targets: 2
Batch 0: Data Shape: torch.Size([2, 32, 3, 224, 224]), Target: tensor([[0, 1],
[1, 1]])
but when I perform the validation an error occures saying
Traceback (most recent call last):
File ~\anaconda3\envs\cnnsvm\Lib\site-packages\spyder_kernels\customize\utils.py:209 in exec_encapsulate_locals
exec_fun(compile(code_ast, filename, "exec"), globals)
File d:\organized_files\transformer_exemple\train.py:164
val_loss, val_action_acc, val_condition_acc = validate_one_epoch(
File d:\organized_files\transformer_exemple\utils.py:85 in validate_one_epoch
for frames, labels in tqdm(dataloader, desc="Validation", leave=False):
ValueError: too many values to unpack (expected 2)