Hello, I am having an issue with my data not being loaded in the same order all the time. As a result I am not getting the same accuracies every time. My code is the following:
import numpy as np
import torch
from torch.utils.data import SubsetRandomSampler
import random
from sklearn.model_selection import train_test_split
def load_hmdb():
dataset_path = os.getcwd() + "/HMDB51/Dataset/"
transform_train = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64), antialias=True),
transforms.Normalize(mean=[0.3574, 0.3479, 0.3089], std=[0.1995, 0.1931, 0.1908])
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64), antialias=True),
transforms.Normalize(mean=[0.3574, 0.3479, 0.3089], std=[0.1995, 0.1931, 0.1908])
])
train_set = ImageFolder(root=dataset_path + "train/images", transform=transform_train)
test_set = ImageFolder(root=dataset_path + "test/images", transform=transform_test)
return train_set, test_set
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
generator = torch.Generator().manual_seed(42)
np.random.seed(42)
random.seed(42)
train_set, test_set = load_hmdb()
labels = np.array(train_set.targets)
batch_size = 512
train_idx, val_idx= train_test_split(np.arange(len(labels)),
test_size=0.2,
shuffle=True,
stratify=labels,
random_state=42)
train_sampler = SubsetRandomSampler(train_idx, generator=generator)
val_sampler = SubsetRandomSampler(val_idx, generator=generator)
train_loader = torch.utils.data.DataLoader(train_set,
batch_size=batch_size,
sampler=train_sampler,
generator=generator,
shuffle=False,
num_workers=0)
val_loader = torch.utils.data.DataLoader(train_set,
batch_size=batch_size,
sampler=val_sampler,
generator=generator,
shuffle=False,
num_workers=0)
for data, label in val_loader:
print(label)
for data, label in val_loader:
print(label)
When I run this, the labels are not printed in the same order. I would imagine they would since I am using the same seeds and generators. Have I misunderstood how DataLoaders actually work or have I missed something which causes this behavior?