Hi; I’m training a model on CeleberityA dataset as follows
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
trainset = torchvision.datasets.CelebA(root='./data', split="train",
download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
shuffle=True, num_workers=128)
testset = torchvision.datasets.CelebA(root='./data', split="test",
download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size,
shuffle=False, num_workers=128)
Now my code looks like this
for epoch in range(total_epochs):
Train()
where
def Train():
model.train()
epoch_start_time = time.time() # track how long time it take for this entire epoch
total_time_used = 0
for batch_idx, (data, _) in enumerate(train_loader):
batch_start_time = time.time() # track how long time it take for one batch
start_time = time.time()
data = data.to(device) # device is a cuda
optimizer.zero_grad()
loss = compute_loss(data)
loss.backward()
optimizer.step()
total_time_used += time.time() - batch_start_time # add the time spent for this batch
print(time.time() - epoch_start_time)) # this gives the entire epoch's run time
print(total_time_used)
However, I’ve found that total_time_used != time.time() - epoch_start_time)
different in around 30s
I’m wondering why is this the case ?