Expected object of scalar type Long but got scalar type Float for argument #2 'target'

Basically following the guide and made some minor adjustments.

I want to load in RGB images paired with binary masks.

If anyone could point me to some good examples of this. (Ones that don’t use .csv or other ‘label’-oriented files.)

Error:

Traceback (most recent call last):
  File "densenet/PyTorchAttempt2.py", line 340, in <module>
    model_ft, hist = train_model(model_ft, loaders, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))
  File "densenet/PyTorchAttempt2.py", line 188, in train_model
    loss = criterion(outputs, labels)
  File "venv/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "venv/lib/python3.6/site-packages/torch/nn/modules/loss.py", line 904, in forward
    ignore_index=self.ignore_index, reduction=self.reduction)
  File "venv/lib/python3.6/site-packages/torch/nn/functional.py", line 1970, in cross_entropy
    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
  File "/venv/lib/python3.6/site-packages/torch/nn/functional.py", line 1790, in nll_loss
    ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target'

Code:

from __future__ import print_function, division
import os
import torch
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models
from common import dataset as qc_ds
from PIL import Image
import time
import os
import copy
import torch.nn as nn
import torch.optim as optim


print("PyTorch Version: ",torch.__version__)
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "densenet"

# Number of classes in the dataset
num_classes = 1

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for
num_epochs = 15

# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = True

class QCDataset(Dataset):

    def __init__(self, paths, transform=None):
        self.paths = paths
        self.transform = transform
        ds = qc_ds.Dataset(0, 0, 0, 0)
        (frontImages, frontMasks, sideImages, sideMasks) = ds.get_file_names(paths)
        self.frontImages = frontImages
        self.frontMasks = frontMasks

    def __len__(self):
        return len(self.frontImages)

    def __getitem__(self, idx):

        print('Getting ', idx, ' image')
        print(self.frontImages[idx])
        print(self.frontMasks[idx])
    #    image = io.imread(self.frontImages[idx])
    #    mask = io.imread(self.frontMasks[idx])
        image = Image.open(self.frontImages[idx]).convert('RGB')
        mask = Image.open(self.frontMasks[idx]).convert('L')

        image = self.transform(image)
        mask = self.transform(mask)

        print(image.shape)
        print(mask.shape)

        return (image, mask)

def show_ds(dataset):
    print('Plotting figure')

    plt.figure(figsize=(10, 20))

    for i in range(4):
        (image, mask) = dataset[i]

        plt.tight_layout()
        plt.axis('on')

        plt.subplot(5, 5, 1 * i + 1)
        plt.imshow(image)
        plt.title("Image")

        plt.subplot(5, 5, 2 * i + 2)
        plt.imshow(mask)
        plt.title("Actual Mask")

        plt.pause(2)  # pause a bit so that plots are updated

        if i == 4:
            plt.ioff()
            plt.show()
            break

    plt.show()

def getValDS():
    validationPath = '/Users/...'
    myPath4 = '/Users/....'
    valids = [myPath4, validationPath]

    data_transform = transforms.Compose([
        transforms.RandomSizedCrop(input_size),
      #  transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
      #  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_ds = QCDataset(paths=valids, transform=data_transform)

    return val_ds

def getDS():
    myPath1 = '/Users/...'
    myPath2 ='/Users/...'
    myPath3 = '/Users/...'
    myPath4 = '/Users/...'
    validationPath = '/Users/...'
    allPaths = [myPath1, myPath2, myPath3]
    valids = [myPath4]

    data_transform = transforms.Compose([
            transforms.RandomSizedCrop(input_size),
          #  transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
          #  transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        ])

    tds = QCDataset(paths=allPaths, transform=data_transform)

    return tds

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        #outputs = outputs.float()
                        print(outputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

# Print the model we just instantiated
print(model_ft)

tds = getDS()
valds = getValDS()

tds_loader = DataLoader(tds, batch_size=batch_size, shuffle=True, num_workers=4)
valds_loader = DataLoader(valds, batch_size=batch_size, shuffle=True, num_workers=4)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

loaders = {}
loaders['train'] = tds_loader
loaders['val'] = valds_loader

# Train and evaluate
model_ft, hist = train_model(model_ft, loaders, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

Change:

labels = labels.to(device)

to

labels = labels.to(device=device, dtype=torch.int64)

CrossEntropyLoss expects targets to be class indices, and hence Long Tensors, but you gave it a Float Tensor.

9 Likes

After that change, I get the following error.

The size of labels is the following; torch.Size([8, 1, 224, 224])

  File "/Users/.../venv/lib/python3.6/site-packages/torch/nn/functional.py", line 1970, in cross_entropy
    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
  File "/Users/.../venv/lib/python3.6/site-packages/torch/nn/functional.py", line 1790, in nll_loss
    ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: multi-target not supported at /Users/administrator/nightlies/pytorch-1.0.0/wheel_build_dirs/wheel_3.6/pytorch/aten/src/THNN/generic/ClassNLLCriterion.c:21
1 Like

It seems your target has a channel dimension of 1, which is not needed using nn.CrossEntropyLoss or nn.NLLLoss.
Try to squeeze this dimension using labels.squeeze(1).

3 Likes

bro, u saved me from going into a pit of rage and self loathing.
i was training my model and the labels were initially unit8, i changed them to int8 and passed them to CE Loss. I was thinking about why is there an error as the model was fine and tried various other tweaks to the model.
turns out it was the dtype of labels which was wrong, such a silly mistake (i wanna cringe by the very thought of that).

HI i am also getting this error even though i have squeeze the labels but canot solve this problem
RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 ‘target’ in call to _thnn_nll_loss2d_forward

Is there any suggestions for me.

here is the dataloader

def __init__(self, path, data_name, batchsize=1, steps=None, shuffle=False, transforms=None):
    self.x, self.y = self.load_data(path, data_name)
    self.transforms = transforms
    self.steps = steps
    if steps is not None:
        self.idx_mapping = np.random.randint(0, self.x.shape[0], steps*batchsize)
        self.steps = self.steps * batchsize

def __len__(self):
    return self.steps if self.steps is not None else self.x.shape[0]

def __getitem__(self, index):
    if self.steps is not None:
        index = self.idx_mapping[index]

    if self.transforms is not None:def __init__(self, path, data_name, batchsize=1, steps=None, shuffle=False, transforms=None):
    self.x, self.y = self.load_data(path, data_name)
    self.transforms = transforms
    self.steps = steps
    if steps is not None:
        self.idx_mapping = np.random.randint(0, self.x.shape[0], steps*batchsize)
        self.steps = self.steps * batchsize

def __len__(self):
    return self.steps if self.steps is not None else self.x.shape[0]

def __getitem__(self, index):
    if self.steps is not None:
        index = self.idx_mapping[index]

    if self.transforms is not None:
        x, y = self.transforms(images=self.x[None, index], segmentation_maps=self.y[None, index])
    else:
        x, y = self.x[None, index], self.y[None, index]

    x, y = x.astype('float32')[0]/255., y.astype('float32')[0]/255.
    x, y = ToTensor()(x), ToTensor()(y)

    return x, y
        x, y = self.transforms(images=self.x[None, index], segmentation_maps=self.y[None, index])
    else:
        x, y = self.x[None, index], self.y[None, index]

    x, y = x.astype('float32')[0]/255., y.astype('float32')[0]/255.
    x, y = ToTensor()(x), ToTensor()(y)

    return x, y
loss function

class BCE_soft(nn.BCELoss):
def init(self, beta=0.95):
super(BCE_soft, self).init()
self.beta = beta
def forward(self, predicted, target):
cross_entropy = F.nll_loss(predicted.log(), target, size_average=False)
soft_reed = -predicted * torch.log(predicted + 1e-8)
return self.beta * cross_entropy + (1 - self.beta) * torch.sum(soft_reed)

training part

    model.train()
    for step, (x, y) in enumerate(tqdm(train_loader, desc='[TRAIN] Epoch '+str(epoch+1)+'/'+str(args.epochs))):
        if step >= args.steps:
            break
        x = x.to(device).float()
        y = y.to(device).float()

        y = y.squeeze(1)

        # dim = input.dim(y)

        optimizer.zero_grad()
        output = model(x)

        # loss
        l = criterion(output, y)
        tot_soft += l.item()
        l.backward()
        optimizer.step()

The target tensor is expected to be a LongTensor, so you could call:

cross_entropy = F.nll_loss(predicted.log(), target.long(), size_average=False)

A huge thanks for this discussion! As a newbie I was very interesting to read

Thanks for your reply. I have changed target with target.log() but still getting the same error

File “/home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/nn/functional.py”, line 2117, in nll_loss
ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 ‘target’ in call to _thnn_nll_loss2d_forward

You would have to use target.long() to create the expected LongTensor (int64) not target.log() (note the n in long).

ok now i have remove .log() with target and add in the train model in loss part as you can see in the loss y.log(). but again this error is appearing

training

    model.train()
    for step, (x, y) in enumerate(tqdm(train_loader, desc='[TRAIN] Epoch '+str(epoch+1)+'/'+str(args.epochs))):
        if step >= args.steps:
            break
        x = x.to(device).float()
        #y = y.to(device, dtype=torch.int64)
        y = y.to(device).float()

        #y = y.squeeze(1)

        # dim = input.dim(y)

        optimizer.zero_grad()
        output = model(x)

        # loss
        l = criterion(output, y.log())
        tot_soft += l.item()
        l.backward()
        optimizer.step()

You have to use y.long(), not y.log(). Please note the n in long. It’s defining the long data type, which is a 64bit integer. It’s not the logarithm used via log() (without the n).

thanks after correction now getting weird error. I cannot understand

warnings.warn(warning.format(ret))
[TRAIN] Epoch 1/3: 0%| | 1/250 [00:00<02:24, 1.73it/s]
Traceback (most recent call last):
File “main.py”, line 58, in
main(args, CORE)
File “main.py”, line 48, in main
CORE.train(args)
File “/home/ali/BioNet_project/pytorch_version/utils.py”, line 89, in train
l.backward()
File “/home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/tensor.py”, line 198, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File “/home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/autograd/init.py”, line 100, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasSgemv(handle, op, m, n, &alpha, a, lda, x, incx, &beta, y, incy) (gemv at /opt/conda/conda-bld/pytorch_1587428091666/work/aten/src/ATen/cuda/CUDABlas.cpp:318)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x4e (0x7fc15acaab5e in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: + 0xdb9fa7 (0x7fc15bc90fa7 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #2: at::native::(anonymous namespace)::slow_conv_transpose2d_acc_grad_parameters_cuda_template(at::Tensor const&, at::Tensor const&, at::Tensor&, at::Tensor&, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, int) + 0xea6 (0x7fc15d583e86 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #3: at::native::slow_conv_transpose2d_backward_cuda(at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, at::Tensor const&, at::Tensor const&, std::array<bool, 3ul>) + 0x323 (0x7fc15d588c93 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #4: + 0xe1f64d (0x7fc15bcf664d in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #5: + 0xe28007 (0x7fc15bcff007 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cuda.so)
frame #6: + 0x29e286e (0x7fc18486e86e in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #7: + 0xe23c87 (0x7fc182cafc87 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #8: torch::autograd::generated::SlowConvTranspose2DBackward::apply(std::vector<at::Tensor, std::allocatorat::Tensor >&&) + 0x516 (0x7fc1844b7c46 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #9: + 0x2ae8215 (0x7fc184974215 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #10: torch::autograd::Engine::evaluate_function(std::shared_ptrtorch::autograd::GraphTask&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x16f3 (0x7fc184971513 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #11: torch::autograd::Engine::thread_main(std::shared_ptrtorch::autograd::GraphTask const&, bool) + 0x3d2 (0x7fc1849722f2 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #12: torch::autograd::Engine::thread_init(int) + 0x39 (0x7fc18496a969 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_cpu.so)
frame #13: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7fc187cb0c38 in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #14: + 0xc819d (0x7fc18a72b19d in /home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/lib/…/…/…/…/./libstdc++.so.6)
frame #15: + 0x9609 (0x7fc1a3348609 in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #16: clone + 0x43 (0x7fc1a326f293 in /lib/x86_64-linux-gnu/libc.so.6)

You might be running out of memory, so could reduce the batch size and rerun the code.
If that doesn’t help, could you post an executable code snippet as well as the output of python -m torch.utils.collect_env?

I have reduced the batch size but it doesn’t work and here is the code snippet

OS: Ubuntu 20.04.2 LTS
GCC version: (Ubuntu 10.2.0-5ubuntu1~20.04) 10.2.0
CMake version: version 3.16.3

Python version: 3.6
Is CUDA available: Yes
CUDA runtime version: Could not collect
GPU models and configuration: GPU 0: NVIDIA GeForce GTX 1080 Ti
Nvidia driver version: 465.19.01
cuDNN version: Probably one of the following:
/usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5
/usr/lib/x86_64-linux-gnu/libcudnn.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.0.5
/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.0.5
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn.so.8.0.4
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn_adv_infer.so.8
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn_adv_train.so.8
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn_cnn_infer.so.8
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn_cnn_train.so.8
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn_ops_infer.so.8
/usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn_ops_train.so.8

Versions of relevant libraries:
[pip3] numpy==1.19.2
[pip3] torch==1.5.0
[pip3] torchsummary==1.5.1
[pip3] torchvision==0.6.0a0+82fd1c8
[conda] blas 1.0 mkl defaults
[conda] mkl 2020.2 256 defaults
[conda] mkl-service 2.3.0 py36he8ac12f_0 defaults
[conda] mkl_fft 1.3.0 py36h54f3939_0 defaults
[conda] mkl_random 1.1.1 py36h0573a6f_0 defaults
[conda] pytorch 1.5.0 py3.6_cuda10.1.243_cudnn7.6.3_0 pytorch
[conda] torchsummary 1.5.1 pypi_0 pypi
[conda] torchvision 0.6.0 py36_cu101 pytorch

i have used this device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)

now getting this error

File “main.py”, line 58, in
main(args, CORE)
File “main.py”, line 48, in main
CORE.train(args)
File “/home/ali/BioNet_project/pytorch_version/utils.py”, line 90, in train
l.backward()
File “/home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/tensor.py”, line 198, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File “/home/ali/anaconda3/envs/torch/lib/python3.6/site-packages/torch/autograd/init.py”, line 100, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: cuda runtime error (700) : an illegal memory access was encountered at /opt/conda/conda-bld/pytorch_1587428091666/work/aten/src/THCUNN/generic/SpatialClassNLLCriterion.cu:240

Could you update to the latest PyTorch release, please?
The 1.5.0 release was missing device assert statements and based in the illegal memory access in nn.NLLLoss I assume your target is just out of bounds.

Hi sorry for late response. I have reduced the batch size now using batch_size=1 and also update the lastest version of Pytorch as according to your suggestions but finally after all again got the error.

File “/home/ali/BioNet_project/pytorch_version/utils.py”, line 89, in train
tot_soft += l.item()
RuntimeError: CUDA error: device-side assert triggered