TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first. even though .cpu() is used

I used .cpu() but still get error. Could you please guide how to fix?

# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for i, sample_batched in enumerate(train_loader):
        #print(i, sample_batched['image'].size(),
        # sample_batched['landmarks'].size())
        
        images_batch, landmarks_batch = \
            sample_batched['image'], sample_batched['landmarks']
        
        images = images_batch
        labels = landmarks_batch.reshape(-1, 68 * 2)
        
        images = Variable(images.float())
        labels = Variable(labels)
        
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        #_, predicted = torch.max(outputs.data, 1)
        #_, predicted = outputs.data
        print("Predicted", outputs.data.shape)
        
        outputs = outputs.cpu()
        images = images.cpu()
        
        if i_batch == 3:
          plt.figure()
          show_landmarks_batch({'image': images, 'landmarks': outputs.data.reshape(-1, 68, 2) })
          plt.axis('off')
          plt.ioff()
          plt.show()
          show_landmarks_batch({'image': images, 'landmarks': labels.reshape(-1, 68, 2) })
          plt.axis('off')
          plt.ioff()
          plt.show()
          break

It is using this method:

# Helper function to show a batch
def show_landmarks_batch(sample_batched):
    """Show image with landmarks for a batch of samples."""
    images_batch, landmarks_batch = \
            sample_batched['image'], sample_batched['landmarks']
    batch_size = len(images_batch)
    im_size = images_batch.size(2)

    grid = utils.make_grid(images_batch)
    grid = grid.cpu()
    plt.imshow(grid.numpy().transpose((1, 2, 0)))
    #sample_batched = sample_batched.to('cpu')
    cpu_sample_batched = {}
    for k, v in sample_batched.items():
        cpu_sample_batched[k] = v.cpu()

    for i in range(batch_size):
        plt.scatter(landmarks_batch[i, :, 0].numpy() + i * im_size,
                    landmarks_batch[i, :, 1].numpy(),
                    s=10, marker='.', c='r')

        plt.title('Batch from dataloader')

The error is:

Predicted torch.Size([3, 136])

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-67-43119a6c95ec> in <module>
     34           plt.ioff()
     35           plt.show()
---> 36           show_landmarks_batch({'image': images, 'landmarks': labels.reshape(-1, 68, 2) })
     37           plt.axis('off')
     38           plt.ioff()

<ipython-input-65-a1ebb35aa3f1> in show_landmarks_batch(sample_batched)
     20 
     21     for i in range(batch_size):
---> 22         plt.scatter(landmarks_batch[i, :, 0].numpy() + i * im_size,
     23                     landmarks_batch[i, :, 1].numpy(),
     24                     s=10, marker='.', c='r')

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

Please let me know if there might be a need for more details.

(base) mona@mona:~/research/Pytorch-Tutorials$ python
Python 3.7.6 (default, Jan  8 2020, 19:59:22) 
[GCC 7.3.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.__version__
'1.6.0'

never mind. I missed to convert one of the variables. I added landmarks_batch = landmarks_batch.cpu() and now it works.

# Helper function to show a batch
def show_landmarks_batch(sample_batched):
    """Show image with landmarks for a batch of samples."""
    images_batch, landmarks_batch = \
            sample_batched['image'], sample_batched['landmarks']
    batch_size = len(images_batch)
    im_size = images_batch.size(2)

    grid = utils.make_grid(images_batch)
    grid = grid.cpu()
    plt.imshow(grid.numpy().transpose((1, 2, 0)))
    #sample_batched = sample_batched.to('cpu')
    cpu_sample_batched = {}
    ##for k, v in sample_batched.items():
    ##    cpu_sample_batched[k] = v.cpu()
    
    landmarks_batch = landmarks_batch.cpu()

    for i in range(batch_size):
        plt.scatter(landmarks_batch[i, :, 0].numpy() + i * im_size,
                    landmarks_batch[i, :, 1].numpy(),
                    s=10, marker='.', c='r')

        plt.title('Batch from dataloader')