Hi,

Here’re 2 regression programs with the same sets of data and the same basic model (2 inputs and 1 outpu):

Program 1 :

```
import numpy as np
import torch
import time
# Define the model
def model(x):
return x @ w.t() + b
# MSE loss
def mse(t1, t2):
diff = t1 - t2
return torch.sum(diff * diff) / diff.numel()
def fit(num_epochs, model, loss_fn, w, b):
for i in range(num_epochs):
preds = model(inputs)
loss = loss_fn(preds, targets)
loss.backward()
with torch.no_grad():
w -= w.grad * lr
b -= b.grad * lr
w.grad.zero_()
b.grad.zero_()
lr = 1e-3
nb_epochs = 1000
nb_data = 1000
min_x = 2.0
max_x = 3.0
min_y = 5.0
max_y = 9.0
X = np.linspace(min_x, max_x, num=nb_data, dtype=np.float32)
Y = np.linspace(min_y, max_y, num=nb_data, dtype=np.float32)
inputs = np.stack((X, Y), axis=1)
targets = X + Y
targets = targets.reshape(targets.size, 1)
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
# Weights and biases
w = torch.randn(1, 2, requires_grad=True)
b = torch.randn(1, requires_grad=True)
begin = time.time()
fit(nb_epochs, model, mse, w, b)
end = time.time()
print(f"Duration = {end-begin} s")
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(f"Loss = {loss}")
# Calculate a prediction
pred_y = model(torch.Tensor([[2.5, 6]]))
print("predict ", pred_y.item(), " should be ===>",8.5 )
type or paste code here
```

Program 2 :

```
import torch.nn as nn
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import time
# Define a utility function to train the model
def fit(num_epochs, model, loss_fn, opt):
for epoch in range(num_epochs):
for xb,yb in train_dl:
# Generate predictions
pred = model(xb)
loss = loss_fn(pred, yb)
# Perform gradient descent
loss.backward()
opt.step()
opt.zero_grad()
device = "cpu"
#device = "cuda:0"
lr = 1e-3
batch_size = 100
nb_epochs = 1000
nb_data = 1000
min_x = 2.0
max_x = 3.0
min_y = 5.0
max_y = 9.0
X = np.linspace(min_x, max_x, num=nb_data, dtype=np.float32)
Y = np.linspace(min_y, max_y, num=nb_data, dtype=np.float32)
inputs = np.stack((X, Y), axis=1)
targets = X + Y
targets = targets.reshape(targets.size, 1)
inputs = torch.from_numpy(inputs).to(device)
targets = torch.from_numpy(targets).to(device)
train_ds = TensorDataset(inputs, targets)
# Define data loader
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
# Define model, 2 inputs, 1 output
model = nn.Linear(2, 1).to(device)
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=lr)
# Define loss function
loss_fn = F.mse_loss
# Train the model for some epochs
begin = time.time()
fit(nb_epochs, model, loss_fn, opt)
end = time.time()
print(f"Duration = {end-begin} s")
# Calculate final loss
preds = model(inputs)
loss = loss_fn(preds, targets)
print(f"Loss = {loss}")
# Evaluate a prediction
pred_y = model(torch.Tensor([[2.5, 6]]).to(device))
print("predict ", pred_y.item(), " should be ===>",8.5 )
```

They have the same number of data (1000) and the same number of epochs (1000)

When I run these programs on the same machine (Ubuntu 20.04, 32Gb, core I7, NVIDIA 2080, torch 1.12.1 ), here are the duration for the training (**fit** function) :

program 1 : 0.25s

program 2 (on CPU) : 8.5s

program 2 (on GPU) : 11.8s

Why a so big difference between program 1 and program 2? And for program 2 with GPU, why is it worst than with CPU?

Regards,

Philippe