train_data=torch.utils.data.DataLoader
when calling the train_data
object through for loop, every iteration its gives random data from train_data
, is it sequential or random? how can I fetch data in sequential order?
data=(video,caption).
It depends on the shuffle
argument used in the DataLoader
creation.
The default shuffle=False
setting would yield the samples in a sequential way (the sample indices will be passed in a sequential way), otherwise it’ll be random.
Hi,
i used shuffle=False
as mentioned in below,
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=10,
shuffle=False, num_workers=0, collate_fn=collate_fn)
and wrote collate_fn
for mini-batch tensor. but still getting random data when running every instance.
Could you post your collate_fn
so that I could execute your code, please?
def collate_fn(data):
#Creates mini-batch tensors from the list of tuples (image, caption).
data.sort(key=lambda x: len(x[1]), reverse=True)
img, captions = zip(*data)
images=torch.Tensor(img)
lengths = [len(cap) for cap in captions]
targets = torch.zeros(len(captions), max(lengths)).long()
for i, cap in enumerate(captions):
end = lengths[i]
targets[i, :end] = cap[:end]
return images, targets, lengths
ref: https://github.com/XiaoxiaoGuo/fashion-iq/blob/master/start_kit/data_loader.py
Thanks for the link to the code.
It works for me and the data is not shuffled:
def collate_fn(data):
"""Creates mini-batch tensors from the list of tuples (image, caption).
Args:
data: list of tuple (image, caption).
- image: torch tensor of shape
- caption: torch tensor of shape (?); variable length.
Returns:
images: torch tensor of images.
targets: torch tensor of shape (batch_size, padded_length).
lengths: list; valid length for each padded caption.
"""
# Sort a data list by caption length (descending order).
target_images, candidate_images, captions, meta = zip(*data)
# Merge images (from tuple of 3D tensor to 4D tensor).
target_images = torch.stack(target_images, 0)
candidate_images = torch.stack(candidate_images, 0)
# Merge captions (from tuple of 1D tensor to 2D tensor).
lengths = [len(cap) for cap in captions]
captions_out = torch.zeros(len(captions), max(lengths)).long()
for i, cap in enumerate(captions):
end = lengths[i]
captions_out[i, :end] = cap[:end]
return target_images, candidate_images, captions_out, lengths, meta
class MyDataset(Dataset):
def __init__(self):
pass
def __len__(self):
return 10
def __getitem__(self, idx):
target_image = torch.tensor([idx]).float()
candidate_image = torch.tensor([idx]).float() + 0.1
caption = torch.tensor([idx]).float() + 0.2
target_asin = 't' + str(idx)
candidate_asin = 'candidate' + str(idx)
caption_texts = 'cap' + str(idx)
return target_image, candidate_image, caption, {'target': target_asin, 'candidate': candidate_asin, 'caption': caption_texts}
dataset = MyDataset()
print(dataset[0])
# (tensor([0.]), tensor([0.1000]), tensor([0.2000]), {'target': 't0', 'candidate': 'candidate0', 'caption': 'cap0'})
print(dataset[1])
# (tensor([1.]), tensor([1.1000]), tensor([1.2000]), {'target': 't1', 'candidate': 'candidate1', 'caption': 'cap1'})
loader = DataLoader(dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)
for target_images, candidate_images, captions_out, lengths, meta in loader:
print(target_images)
print(candidate_images)
print(captions_out)
print(lengths)
print(meta)
# output
# tensor([[0.],
# [1.]])
# tensor([[0.1000],
# [1.1000]])
# tensor([[0],
# [1]])
# [1, 1]
# ({'target': 't0', 'candidate': 'candidate0', 'caption': 'cap0'}, {'target': 't1', 'candidate': 'candidate1', 'caption': 'cap1'})
# tensor([[2.],
# [3.]])
# tensor([[2.1000],
# [3.1000]])
# tensor([[2],
# [3]])
# [1, 1]
yes. it’s working.I ll check again my code. thanks a lot.