I have an implementation of LeNet in PyTorch. It works completely fine with losses like MSE and MAE, but when I implement cross entropy loss according to the documentation and this forum post (RuntimeError: multi-target not supported (newbie)) I get this error:
RuntimeError: “host_softmax” not implemented for ‘torch.cuda.LongTensor’. The only reference to this error that exists on the internet is here: https://stackoverflow.com/questions/51818225/pytorch-runtimeerror-host-softmax-not-implemented-for-torch-cuda-longtensor, and I’m not doing what he did wrong according to the comments. Since the error was in computing softmax, I tried switching from cross entropy loss to nll loss to get rid of it, and I got this error: ‘RuntimeError: nll_loss_forward is not implemented for type torch.cuda.LongTensor’
What stupid thing am I missing here? I’ve started at it and all the documentation and posts for 4 hours and still can’t see what I’m doing wrong.
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
out = self.conv1(x)
out = F.relu(out)
out = F.max_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.max_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
def train(model, device, train_loader, optimizer):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device, dtype=torch.int64)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output.argmax(dim=1, keepdim=True), target.argmax(dim=1, keepdim=True)).item()
loss.backward()
optimizer.step()
def accuracy(model, loader, device):
model.eval()
correct = 0
with torch.no_grad():
for (data, target) in loader:
data, target = data.to(device), target.to(device)
output = model(data)
_, predicted = output.max(1)
_, target = target.max(1)
correct += (predicted == target).sum().item()
return correct/len(loader.dataset)
def loss(model, loader, device):
model.eval()
loss = 0
with torch.no_grad():
for data, target in loader:
data, target = data.to(device), target.to(device, dtype=torch.int64)
output = model(data)
loss += F.cross_entropy(output.argmax(dim=1, keepdim=True), target.argmax(dim=1, keepdim=True)).item()
loss /= len(loader.dataset)
return loss
def main():
batch_size = 6000
test_batch_size = 1000
epochs = 50
device = torch.device('cuda')
train_data = torch.utils.data.TensorDataset(torch.load('trainImages.pt'), torch.load('trainLabels.pt'))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)
test_data = torch.utils.data.TensorDataset(torch.load('testImages.pt'), torch.load('testLabels.pt'))
test_loader = torch.utils.data.DataLoader(test_data, batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True)
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(1, epochs + 1):
train(model, device, train_loader, optimizer)
trainLoss = loss(model, train_loader, device)
testLoss = loss(model, test_loader, device)
trainAcc = accuracy(model, train_loader, device)
testAcc = accuracy(model, test_loader, device)
print(epoch, trainLoss, testLoss, trainAcc, testAcc)
main()