Hello!

I’d really appreciate any help on this.

I’m trying to understand why the GPU utilisation is under 2% with this training code. If possible, could you please tell me what parts of the code need to be changed to effectively utilise more of the GPU? Thank you so much!

```
print("Starting training ..")
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr)
criterion = torch.nn.CosineEmbeddingLoss(reduction='none')
# move model to GPU if CUDA is available
print("Moving computations to cuda.")
if self.use_cuda:
model = self.model.cuda()
# initialize tracker for minimum validation loss
valid_loss_min = self.valid_loss_min_input
for epoch in range(self.start_epochs, self.start_epochs + self.n_epochs + 1):
# initialize variables to monitor training and validation loss
train_loss = 0.0
valid_loss = 0.0
print("Starting epoch:", epoch)
# train the model #
model.train()
start_time = time.time()
start_batch_time = time.time()
for batch_idx, (scalar_1, scalar_2, label, user_vec_raw, prod_vec_raw) in enumerate(self.data_loaders['train']):
"""
if batch_idx % 1000 == 0:
print(f'train epoch {epoch}, batch {batch_idx}')
print("Time taken", time.time()-start_batch_time)
start_batch_time = time.time()
"""
# move to GPU
if self.use_cuda:
label_cuda, user_vec_cuda, prod_vec_cuda = label.cuda(), user_vec_raw.cuda(), prod_vec_raw.cuda()
## find the loss and update the model parameters accordingly
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output_user_emb, output_prod_emb = model(user_vec_cuda, prod_vec_cuda)
# calculate the batch loss
loss = criterion(output_user_emb, output_prod_emb, label_cuda.float())
# backward pass: compute gradient of the loss with respect to model parameters
loss.sum().backward()
# perform a single optimization step (parameter update)
optimizer.step()
## record the average training loss, using something like
## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data.sum() - train_loss))
# validate the model #
model.eval()
```