Hi,
I find the forward pass result of the same data can be different with different batch size. Specifically, I run the code below:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
class SimpleNetwork(nn.Module):
def __init__(self):
super().__init__()
self.h1 = nn.Linear(16000, 1024)
self.h2 = nn.Linear(1024, 512)
self.h3 = nn.Linear(512, 256)
def forward(self, data):
x = self.h1(data)
x = self.h2(x)
o = self.h3(x)
return o
class DummyDataset(Dataset):
def __init__(self):
self.data = np.random.randn(32,16000)
def __len__(self):
return len(self.data)
def __getitem__(self, index):
tmp_data = self.data[index]
return tmp_data
if __name__ == "__main__":
seed = 1365
num_workers = 0
np.random.seed(seed)
torch.random.manual_seed(seed)
device = torch.device("cuda")
model = SimpleNetwork()
model.to(device)
model.eval()
dataset = DummyDataset()
dataloader_batch16 = DataLoader(dataset, batch_size=16, shuffle=False, num_workers=num_workers)
for batch_idx, data in enumerate(dataloader_batch16):
print("first data item for batch 16")
data = data.float().to(device)
print(data[0].abs().sum())
data_o = model(data)
print(data_o[0].abs().sum())
break
dataloader_batch1 = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=num_workers)
for batch_idx, data in enumerate(dataloader_batch1):
print("first data item for batch 1")
data = data.float().to(device)
print(data.abs().sum())
data_o = model(data)
print(data_o.abs().sum())
break
And I get
first data item for batch 16
tensor(12728.1973, device='cuda:0')
tensor(43.8060, device='cuda:0', grad_fn=<SumBackward0>)
first data item for batch 1
tensor(12728.1973, device='cuda:0')
tensor(43.8059, device='cuda:0', grad_fn=<SumBackward0>)
As you can see, the result of the first data item is different after forward pass. This seems happen for gpu only. If I run the same code on cpu, the result is the same for this case (I am not sure about other cases though).
Any ideas on the reason?
Thanks