Thank You for the reply. I’ve used your code snippets.
from torch.utils.data import TensorDataset, DataLoader
t = torch.arange(100)
ds = TensorDataset(t)
# Setting Seed explicitly in dataloader
dl = DataLoader(ds, batch_size=16, shuffle=True, generator=torch.manual_seed(0))
print("first run through dl")
for i, yy in enumerate(dl):
if i < 3:
print(yy[0][:5])
print("second run through dl")
for i, yy in enumerate(dl):
if i < 3:
print(yy[0][:5])
first run through dl
tensor([33, 70, 17, 63, 71])
tensor([90, 64, 11, 30, 91])
tensor([43, 31, 92, 94, 19])
second run through dl
tensor([15, 9, 50, 34, 51])
tensor([82, 70, 73, 13, 57])
tensor([89, 23, 36, 55, 84])
# Setting Seed "globally"
torch.manual_seed(0)
dl = DataLoader(ds, batch_size=16, shuffle=True)
print("first run through dl")
for i, yy in enumerate(dl):
if i < 3:
print(yy[0][:5])
print("second run through dl")
for i, yy in enumerate(dl):
if i < 3:
print(yy[0][:5])
first run through dl
tensor([63, 70, 43, 75, 77])
tensor([78, 6, 23, 66, 44])
tensor([31, 84, 24, 73, 54])
second run through dl
tensor([71, 54, 40, 70, 80])
tensor([29, 90, 96, 56, 89])
tensor([92, 53, 41, 60, 78])
Both results are reproducible.If I understand correctly, the results happen to be different because the samples drawn, although use the same seed, are different instances and these instances are reproducible.
Edit:
Played around a bit. That explains it. Thank You.
torch.manual_seed(0)
x = torch.randn(5)
print(x)
print(torch.randn(5))
print(torch.randn(5))
# Reproducible
tensor([ 1.5410, -0.2934, -2.1788, 0.5684, -1.0845])
tensor([-1.3986, 0.4033, 0.8380, -0.7193, -0.4033])
tensor([-0.5966, 0.1820, -0.8567, 1.1006, -1.0712])
torch.manual_seed(0)
x = torch.randn(5)
print(x)
torch.manual_seed(0)
print(torch.randn(5))
torch.manual_seed(0)
print(torch.randn(5))
# Reproducible
tensor([ 1.5410, -0.2934, -2.1788, 0.5684, -1.0845])
tensor([ 1.5410, -0.2934, -2.1788, 0.5684, -1.0845])
tensor([ 1.5410, -0.2934, -2.1788, 0.5684, -1.0845])