I have a simple linear classification example using the breast cancer set from sklearn
. I understand the cost of moving data from CPU to GPU; therefore, I am only measuring the main loop.
The system specs: Windows 10 Pro, 12 Core, 256G RAM, RTX A5000.
PyTorch version: 1.9.1+cu111
I am using and running from PyCharm.
run on CPU: 1.5 seconds
run on GPU: 5.4 seconds
Taking the “save loss and accuracy” code out of the loop doesn’t do much.
Is there anything I can do to speed up the GPU code?
Thanks.
Here’s the sample code:
import time
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
dev = torch.device("cuda")
# dev = torch.device("cpu")
data = load_breast_cancer()
X_train, X_test, Y_train_cpu, Y_test_cpu = train_test_split(data.data, data.target, test_size=0.33)
N, D = X_train.shape
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)
#
# PyTorch stuff
#
model = nn.Sequential(
nn.Linear(D, 1),
nn.Sigmoid()
)
model.to(dev)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters())
X_train = torch.from_numpy(X_train.astype(np.float32)).to(dev)
print(f'X_train on Cuda? {X_train.is_cuda}')
X_test = torch.from_numpy(X_test.astype(np.float32)).to(dev)
print(f'X_test on Cuda? {X_test.is_cuda}')
Y_train = torch.from_numpy(Y_train_cpu.astype(np.float32).reshape(-1, 1)).to(dev)
print(f'Y_train on Cuda? {Y_train.is_cuda}')
Y_test = torch.from_numpy(Y_test_cpu.astype(np.float32).reshape(-1, 1)).to(dev)
print(f'Y_test on Cuda? {Y_test.is_cuda}')
n_epochs = 1000
train_losses = np.zeros(n_epochs)
test_losses = np.zeros(n_epochs)
train_accuracies = np.zeros(n_epochs)
test_accuracies = np.zeros(n_epochs)
tic = time.perf_counter()
for it in range(n_epochs):
# zero the parameter gradients
optimizer.zero_grad()
# Forward pass
outputs = model(X_train)
loss = criterion(outputs, Y_train)
p_train = torch.round(outputs)
train_acc = torch.mean((Y_train == p_train).type(torch.float32))
# Backward and optimize
loss.backward()
optimizer.step()
# Get test loss
outputs_test = model(X_test)
lost_test = criterion(outputs_test, Y_test)
p_test = torch.round(outputs_test)
test_acc = torch.mean((Y_test == p_test).type(torch.float32))
# Save losses
train_losses[it] = loss.item()
test_losses[it] = lost_test.item()
train_accuracies[it] = train_acc
test_accuracies[it] = test_acc
if (it + 1) % 50 == 0:
print(f'Epoch {it + 1}/{n_epochs}, '
f'Train Loss: {loss.item():.4f}, acc: {train_acc:.4f}; '
f'Test Lost: {lost_test.item():.4f}, acc: {test_acc:.4f}')
toc = time.perf_counter()
print(f'Elapse time: {toc - tic:0.4f} seconds')
# Plot the train loss and test loss per iteration
plt.plot(train_losses, label='train loss')
plt.plot(test_losses, label='test loss')
plt.legend()
plt.show()
# Plot the train acc and test acc per iteration
plt.plot(train_accuracies, label='train acc')
plt.plot(test_accuracies, label='test acc')
plt.legend()
plt.show()