RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'target' in call to _thnn_nll_loss_forward

I am trying to train multioutput image classification (age and gender labels) but stuck in this problem. Here is my model. I have seen other results so far, unfortunately could not solve it.

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv_layer = nn.Sequential(
            
            # Conv Layer block 1
            nn.Conv2d(3, 64, 1, stride = 1),
            nn.SELU(inplace=True),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, 7, stride = 1),
            nn.SELU(inplace=True),
            nn.MaxPool2d(3, 3),
            nn.BatchNorm2d(64),
            
            # Conv Layer block 2
            nn.Conv2d(64, 128, 1, stride = 1),
            nn.SELU(inplace=True),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, 5, stride = 1),
            nn.SELU(inplace=True),
            nn.MaxPool2d(3, 3),
            nn.BatchNorm2d(128),
            
            # Conv Layer block 3
            nn.Conv2d(128, 256, 1, stride = 1),
            nn.SELU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, 3, stride = 2),
            nn.SELU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 384, 1, stride = 1),
            nn.SELU(inplace=True),
            nn.MaxPool2d(2,2),
            nn.BatchNorm2d(384))
        
        self.fc_layer1 = nn.Sequential(
            nn.Linear(384*5*5, 1024),
            nn.SELU(inplace=True),
            nn.AlphaDropout(0.5),
            nn.Linear(1024, 512),
            nn.SELU(inplace=True),
            nn.AlphaDropout(0.5),            
            nn.Linear(512, 8)
        )
        
        self.fc_layer2 = nn.Sequential(
            nn.Linear(384*5*5, 1024),
            nn.SELU(inplace=True),
            nn.AlphaDropout(0.5),
            nn.Linear(1024, 512),
            nn.SELU(inplace=True),
            nn.AlphaDropout(0.5),            
            nn.Linear(512, 2)
        )
        
        
        self.apply(weights_init)
        
    def forward(self, x):
        # conv layers
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        label1 = self.fc_layer1(x)
        label2 = torch.sigmoid(self.fc_layer2(x))
        
        return {'label1': label1, 'label2': label2}

The error message is raises as it seems that the model’s output are on the GPU, while the target tensor is still on the CPU.
You might thus need to move the targets to the GPU before passing them to the criterion via:

target = target.to('cuda')

I already did that. My code was working either age or gender individually, but when I try to run as multioutput, it keeps showing this error.

Here is my CustomDataset

class Data(Dataset):
    
    def __init__(self, csv_file, root_dir, transform):
        self.csv_file = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.csv_file)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = os.path.join(self.root_dir, 
                                self.csv_file.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        label_age = self.csv_file.iloc[idx, 1]
        label_gender = self.csv_file.iloc[idx, 2]
        
        if self.transform:
            image = self.transform(image)
            
        return {
            'image': image,
            'label_age': label_age,
            'label_gender': label_gender
        }

And the training part

def train_model(model, criterion1, criterion2, optimizer, scheduler, n_epochs=10):
    """return trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    print('Training started')
    for epoch in range(1, n_epochs):
        start_time = time.monotonic()
        train_loss = 0.0
        valid_loss = 0.0
        # train the model
        model.train()
        for idx, batch in enumerate(train_loader):
            # get data and move to GPU
            image, label1, label2 = batch['image'].to(device), batch['label'].to(device)
            # zero the parameter gradients
            optimizer.zero_grad()
            output = model(image)
            label1_hat = output['label1']
            label2_hat = output['label2']
            # calculate loss
            loss1 = criterion1(label1_hat, label1.squeeze().type(torch.LongTensor))
            loss2 = criterion2(label2_hat, label2.squeeze().type(torch.LongTensor))
            loss = loss1 + loss2
            # backprop
            loss.backward()
            # grad
            optimizer.step()
            scheduler.step()
            train_loss = train_loss + ((1 / (idx + 1)) * (loss.data - train_loss))
            if idx % 50 == 0:
                print('Epoch %d, Batch %d loss: %.6f' % (epoch, idx + 1, train_loss))
                
        # validate the model 
        model.eval()
        for idx, batch in enumerate(valid_loader):
            # get data and move to GPU
            image, label1, label2 = batch['image'].to('cuda'), batch['label_age'].to('cuda'), batch['label_gender'].to('cuda')
            output = model(image)
            label1_hat = output['label1']
            label2_hat = output['label2']
            # calculate loss
            loss1 = criterion1(label1_hat, label1.squeeze().type(torch.LongTensor))
            loss2 = criterion2(label2_hat, label2.squeeze().type(torch.LongTensor))
            loss = loss1 + loss2
            valid_loss = valid_loss + ((1 / (idx + 1)) * (loss.data - valid_loss))
            
            end_time = time.monotonic()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)
            print('#################################################')
            print(f'Epoch: {epoch} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            # print training/validation statistics
            for param_group in optimizer.param_groups:
                print("Current learning rate is: {}".format(param_group['lr'])) 
            print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))
            
            # save the model if validation loss has decreased
            if valid_loss < valid_loss_min:
                torch.save(model, 'model.pt')
                print('Validation loss decreased ({:.6f}) --> {:.6f}. Saving model ...'.format(valid_loss_min, valid_loss))
                valid_loss_min = valid_loss
        # return trained model
        return model

The training looks a bit wrong, since you are only assigning batch['label'], which isn’t defined and are using device (what is device set to?) while the evaluation loop uses batch['label_age'].to('cuda'), batch['label_gender'].to('cuda'), so you might want to check the device argument of all target tensors separately.

Sorry, my mistake.
Here is the full version, everything seems okay, but shows the same error

def train_model(model, criterion1, criterion2, optimizer, scheduler, n_epochs=10):
    """return trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    print('Training started')
    for epoch in range(1, n_epochs):
        start_time = time.monotonic()
        train_loss = 0.0
        valid_loss = 0.0
        # train the model
        model.train()
        for idx, batch in enumerate(train_loader):
            # get data and move to GPU
            image, label1, label2 = batch['image'].to('cuda'), batch['label_age'].to('cuda'), batch['label_gender'].to('cuda')
            # zero the parameter gradients
            optimizer.zero_grad()
            output = model(image)
            label1_hat = output['label1']
            label2_hat = output['label2']
            # calculate loss
            loss1 = criterion1(label1_hat, label1.squeeze().type(torch.LongTensor))
            loss2 = criterion2(label2_hat, label2.squeeze().type(torch.LongTensor))
            loss = loss1 + loss2
            # backprop
            loss.backward()
            # grad
            optimizer.step()
            scheduler.step()
            train_loss = train_loss + ((1 / (idx + 1)) * (loss.data - train_loss))
            if idx % 50 == 0:
                print('Epoch %d, Batch %d loss: %.6f' % (epoch, idx + 1, train_loss))
                
        # validate the model 
        model.eval()
        for idx, batch in enumerate(valid_loader):
            # get data and move to GPU
            image, label1, label2 = batch['image'].to('cuda'), batch['label_age'].to('cuda'), batch['label_gender'].to('cuda')
            output = model(image)
            label1_hat = output['label1']
            label2_hat = output['label2']
            # calculate loss
            loss1 = criterion1(label1_hat, label1.squeeze().type(torch.LongTensor))
            loss2 = criterion2(label2_hat, label2.squeeze().type(torch.LongTensor))
            loss = loss1 + loss2
            valid_loss = valid_loss + ((1 / (idx + 1)) * (loss.data - valid_loss))
            
            end_time = time.monotonic()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)
            print('#################################################')
            print(f'Epoch: {epoch} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            # print training/validation statistics
            for param_group in optimizer.param_groups:
                print("Current learning rate is: {}".format(param_group['lr'])) 
            print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))
            
            # save the model if validation loss has decreased
            if valid_loss < valid_loss_min:
                torch.save(model, 'model.pt')
                print('Validation loss decreased ({:.6f}) --> {:.6f}. Saving model ...'.format(valid_loss_min, valid_loss))
                valid_loss_min = valid_loss
        # return trained model
        return model

I also uploaded the code to github, if it is needed to check all codes. github

Thanks for the update.
The error is caused, since you are explicitly casting the targets to torch.LongTensor, which is a CPU type.
Instead of using .type(torch.LongTensor), call label1.squeeze().long() to keep the tensor on the GPU.

1 Like

Thanks a lot. Finally, it worked. You saved my day :blush: