Hi,
I’m trying to train using amp, but unfortunately I get the:
Input type (torch.cuda.HalfTensor) and weight type (torch.FloatTensor) should be the same
error. I’ve seen others had this problem, but only because they didn’t use autocast correctly, which is not the case here. I created a following shorter code that reproduce the error:
(torch 1.10.2, python 3.6.9, T4 GPU)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
from torchvision import transforms,models
import torchvision.datasets as datasets
from torch.cuda.amp import GradScaler, autocast
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print('device: ', device)
train_transform = transforms.Compose([
transforms.Resize(32),
transforms.RandomHorizontalFlip(p=.50),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
test_transform = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
traindata = datasets.CIFAR10(root=‘.’, train=True,download=True, transform=train_transform)
train_loader = torch.utils.data.DataLoader(traindata, batch_size=96,shuffle=True)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.005, momentum=0.9)
scaler = GradScaler()
for epoch in range(5):
for i, data in enumerate(train_loader, 0):
inputs, labels = data
inputs, labels = inputs.to(device),labels.to(device)
with torch.cuda.amp.autocast():
output = model(inputs)
loss = criterion(output, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
optimizer.zero_grad()
Plus,in my code (just different dataloader, that build using ImageFolder), if I use inception rather than a similiar net from above (2 outputs instead):
model = models.inception_v3(pretrained=True, aux_logits=False).to(device)
model.fc = nn.Linear(2048, 2)
Then I get:
Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)