The use of DataLoader significantly slows down backward pass as compared to simple data usage. See example below:
import torch
import time
# Custom dataset
class Dataset(torch.utils.data.Dataset):
def __init__(self, input_x, input_y, input_t, labels):
self.input_x = input_x
self.input_y = input_y
self.input_t = input_t
self.labels = labels
self.input_x.requires_grad = True
self.input_y.requires_grad = True
self.input_t.requires_grad = True
self.labels.requires_grad = True
def __len__(self):
return self.labels.shape[0]
def __getitem__(self, idx):
return self.input_x[idx, :], self.input_y[idx, :], \
self.input_t[idx, :], self.labels[idx, :]
# Neural Network (NN)
net = torch.nn.Sequential(
torch.nn.Linear(3, 50),
torch.nn.Tanh(),
torch.nn.Linear(50, 50),
torch.nn.Tanh(),
torch.nn.Linear(50, 1)
)
# Number of samples
num_samples = 1200
# ### Without Dataloader ###
# Generate (x, y, t) input data and output data
x, y, t = torch.rand(num_samples, 1, requires_grad=True), torch.rand(num_samples, 1, requires_grad=True), torch.rand(num_samples, 1, requires_grad=True)
output_data = torch.sin(torch.pi * x) + torch.cos(torch.pi * y) + t
# Optimiser
optimiser = torch.optim.Adam(net.parameters(), lr=1e-3)
optimiser.zero_grad()
# Compute loss u_t - (u_xx + u_yy) , u = NN prediction
output_pred = net(torch.cat((x, y, t), dim=1))
u_t = torch.autograd.grad(output_pred.sum(), t, create_graph=True)
u_x = torch.autograd.grad(output_pred.sum(), x, create_graph=True)
u_y = torch.autograd.grad(output_pred.sum(), y, create_graph=True)
u_xx = torch.autograd.grad(u_x[0].sum(), x, create_graph=True)
u_yy = torch.autograd.grad(u_y[0].sum(), y, create_graph=True)
loss_pde = torch.mean((u_t[0] - (u_xx[0] + u_yy[0]))**2)
start_time = time.process_time()
loss_pde.backward()
end_time = time.process_time()
print(f"PDE loss backward time without Dataloader: {end_time - start_time}")
optimiser.step()
# Compute MSE loss
output_pred = net(torch.cat((x, y, t), dim=1))
loss_mse = torch.mean((output_data - output_pred)**2)
start_time = time.process_time()
loss_mse.backward()
end_time = time.process_time()
print(f"MSE loss backward time without Dataloader: {end_time - start_time}")
optimiser.step()
# ### With Dataloader ###
# Generate (x, y, t) input data and output data
x, y, t = torch.rand(num_samples, 1), torch.rand(num_samples, 1), torch.rand(num_samples, 1)
output_data = torch.sin(torch.pi * x) + torch.cos(torch.pi * y) + t
# Create custom dataset and dataloder with only one batch
dataset = Dataset(x, y, t, output_data)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=x.shape[0], shuffle=False)
for batch, (x_data, y_data, t_data, output_data) in enumerate(dataloader):
optimiser.zero_grad()
# Compute loss u_t - (u_xx + u_yy) , u = NN prediction
output_pred = net(torch.cat((x_data, y_data, t_data), dim=1))
u_t = torch.autograd.grad(output_pred.sum(), t, create_graph=True)
u_x = torch.autograd.grad(output_pred.sum(), x, create_graph=True)
u_y = torch.autograd.grad(output_pred.sum(), y, create_graph=True)
u_xx = torch.autograd.grad(u_x[0].sum(), x, create_graph=True)
u_yy = torch.autograd.grad(u_y[0].sum(), y, create_graph=True)
loss_pde = torch.mean((u_t[0] - (u_xx[0] + u_yy[0]))**2)
start_time = time.process_time()
loss_pde.backward()
end_time = time.process_time()
print(f"PDE loss backward time with Dataloader: {end_time - start_time}")
optimiser.step()
# Compute MSE loss
output_pred = net(torch.cat((x_data, y_data, t_data), dim=1))
loss_mse = torch.mean((output_data - output_pred)**2)
start_time = time.process_time()
loss_mse.backward()
end_time = time.process_time()
print(f"MSE loss backward time with Dataloader: {end_time - start_time}")
optimiser.step()
The time taken for the backward pass without DataLoader
is 1 order of magnitude lower than using DataLoader
. Is there any reason for the same? Is it recommended to use DataLoader
? If so, why?
Thanks in advance.