I try to do automatic mixed-precision, but unfortunately I get an error that says that all tensors are not on the GPU. When I check the model and all variables it seems ok.
Any idea why it fails during the forward pass, and how to fix it?
(I chopped a lot of code so it’d be the shortest to reproduce the error)
(torch 1.10.2, python 3.6.9, T4 GPU)
import torch
print(torch.version)
import os
import torch.nn as nn
from torch.optim import SGD
from torchvision import transforms,models
from torchvision.datasets import ImageFolder
from torch.cuda.amp import GradScaler, autocast
dataset_path = ‘/my_source_path’ # under the path I have ‘train’ and ‘test’ folders
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print('device: ', device)
train_transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
train_dataset = ImageFolder(root=os.path.join(dataset_path , ‘train’), transform=train_transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, num_workers=2)
model = models.inception_v3(pretrained=True, aux_logits=False).to(device)
model.fc = nn.Linear(2048, 2)
print('model on cuda: ', next(model.parameters()).is_cuda)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.005, momentum=0.9)
scaler = GradScaler()
for epoch in range(3):
print('epoch: ', epoch+1)
for i, data in enumerate(train_loader, 0):
inputs, labels = data
if torch.cuda.is_available():
inputs, labels = inputs.to(device),labels.to(device)
print('inputs on cuda: ', inputs.is_cuda, ', type: ', inputs.dtype)
print('labels on cuda: ', labels.is_cuda, ', type: ', labels.dtype)
with autocast():
output = model(inputs)
print('output on cuda: ', output.is_cuda, ', type: ', output.dtype)
loss = criterion(output, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
optimizer.zero_grad()