In my code, I need to augment a clip (image sequence) during an iteration. I try to use torch.Tensor
as my data type because the doc recommends I do so. However, I found it may lead negative impact on speed.
Here is my demo code, using the newest PyTorch version:
from torch.utils.data import DataLoader, Dataset
import torch
from PIL import Image
from torchvision.transforms import v2
import copy
import random
import time
class MyDatasets(Dataset):
def __init__(self, data_type="tensor"):
self.data_type = data_type
def __len__(self):
return 50
def __getitem__(self, index):
if self.data_type == "tensor":
_ = torch.randn(3, 800, 1440)
clip = _[None, ...].repeat(40, 1, 1, 1)
else:
_ = Image.new("RGB", (1440, 800))
clip = []
for i in range(40):
clip.append(copy.deepcopy(_))
# # Stack:
# if isinstance(clip, torch.Tensor):
# clip = torch
# Horizontal flip:
if isinstance(clip, torch.Tensor):
clip = v2.functional.hflip(clip)
else:
clip = [v2.functional.hflip(_) for _ in clip]
# Resize:
if isinstance(clip, torch.Tensor):
clip = v2.functional.resize(clip, [720, 1280])
else:
clip = [v2.functional.resize(_, [720, 1280]) for _ in clip]
# Crop:
if isinstance(clip, torch.Tensor):
clip = v2.functional.crop(clip, 50, 50, 480, 800)
else:
clip = [v2.functional.crop(_, 50, 50, 480, 800) for _ in clip]
# To Tensor
if isinstance(clip, list):
clip = [v2.functional.to_tensor(_) for _ in clip]
clip = torch.stack(clip, dim=0)
return clip
if __name__ == '__main__':
dataset = MyDatasets("tensor")
dataloader = DataLoader(dataset, batch_size=1, num_workers=4)
begin_time = time.time()
for data in enumerate(dataloader):
# print(data)
pass
print("Cost: ", time.time() - begin_time)
# For PIL:
dataset = MyDatasets("pil")
dataloader = DataLoader(dataset, batch_size=1, num_workers=4)
begin_time = time.time()
for data in enumerate(dataloader):
# print(data)
pass
print("Cost: ", time.time() - begin_time)
Run this script, I’ll get the outputs following:
Tensor Cost: 9.852455615997314
PIL Cost: 7.140314817428589
More complex augmentations may cause a larger gap.
However, if I set num_workers=0
in my data loader, I’ll get:
Tensor Cost: 11.182294607162476
PIL Cost: 24.352734088897705
I totally do not understand this phenomenon. Could anyone here help me understand why this happens?
Thanks a lot.