Hi,
I have been reading through the posts that mention slow back prop and my undertanding is that backprop should take about 2x the forward pass. Mine takes 20x.
I am running a 3D U-Net on a (1,1,32,512,512) test image volume and I am consistently gettig these numbers (Titan RTX and pytroch 1.5.0 from pip):
Transferring images: 0.01
Forward: 0.00
Loss Calc: 0.32
Back prop: 7.59
Step: 0.00
Would anyone be able help point me in the right direction? I assume it is simply slower than expected?
Minimal example below.
Thanks!
Soren
device = torch.device(‘cuda:0’ if torch.cuda.is_available() else ‘cpu’)
net = UNet3D(n_channels=1, n_classes=1)
net.to(device=device)
test_size=(1,1,32,512,512)
dumin=torch.from_numpy( np.zeros(test_size,np.float32) )
dumGT=torch.from_numpy( np.zeros(test_size,np.float32) )
net.cuda()
summary(net, input_size=dumin.shape[1:])
criterion = torch.nn.BCELoss()
optimizer = optim.RMSprop(net.parameters(), lr=0.1, weight_decay=1e-8)
losses=[]
for epoch in range(5):
net.train()
print(epoch)
for indx,item in enumerate(range(100)):
#to GPU
torch.cuda.synchronize()
c = time.perf_counter()
true_masks = dumin.to(device=device) # item["seg"].to(device=device)
imgs = dumin.to(device=device) # item["data"].to(device=device)
torch.cuda.synchronize()
print(f"Transferring images: {time.perf_counter()-c:2.2f}")
#Cost
c = time.perf_counter()
masks_pred = net(imgs)
print(f"Forward: {time.perf_counter() - c:2.2f}")
c = time.perf_counter()
beloss = criterion(masks_pred, true_masks)
torch.cuda.synchronize()
print(f"Loss Calc: {time.perf_counter() - c:2.2f}")
#backprop
c = time.perf_counter()
optimizer.zero_grad()
beloss.backward()
torch.cuda.synchronize()
print(f"Back prop: {time.perf_counter() - c:2.2f}")
#update
torch.cuda.synchronize()
c = time.perf_counter()
optimizer.step()
torch.cuda.synchronize()
print(f"Step: {time.perf_counter() - c:2.2f}")