Can't pass LongTensor to custom model (expected scalar type Long but found Float)

Greetings,

I’m currently trying to train the following model:

class IrisModel(Module):
    def __init__(self) -> None:
        super(IrisModel, self).__init__()
        self.inLayer = nn.Linear(in_features=4, out_features=5)
        self.inLayerReLU = nn.ReLU()
        self.midLayer = nn.Linear(in_features=5, out_features=24, bias=True)
        self.midLayerReLU = nn.ReLU()
        self.outLayer = nn.Linear(in_features=24, out_features=3, bias=True)
        self.outLayerSoftMax = nn.Softmax()

    def forward(self, x):
        out = self.inLayer(x)
        out = self.inLayerReLU(out)
        out = self.midLayer(out)
        out = self.midLayerReLU(out)
        out = self.outLayer(out)
        out = self.outLayerSoftMax(out)
        return out

My training function is the following:

def train(model, train_in, train_out, loss_fn):
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
    model.train()
    iter = 1
    for x_in, y_out in zip(train_in, train_out):
        X = torch.LongTensor(x_in.tolist())
        y = torch.LongTensor(y_out.tolist())
        print(f"{X}, {X.type()}")
        print(f"{y}, {y.type()}")

        # compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # back propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"Loss: {loss.item():>7f}")
        iter += 1

The training data is loaded as follows:

def load_data():
    iris_label_array = ["Iris-setosa","Iris-versicolor","Iris-virginica"]
    #Load data
    iris_data = np.loadtxt("./iris.csv", delimiter=",", dtype="str",skiprows=1)
    
    #Pre-process data
    for label in iris_label_array :
        label_index = iris_label_array.index(label)

        # associate a value to each type of iris
        iris_data[np.where(iris_data[:,4] == label), 4] = label_index
    
    iris_data = iris_data.astype("float32")

    # convert data to int64 by multiplying by 10
    iris_data[:, 0:3] = iris_data[:, 0:3]*10
    iris_data = iris_data.astype("int64")
    return iris_data

# Splitting Data for later evaluation
# 80% train and 20% Evaluation
data = load_data()
input_data = data[:,0:4]
output_data = data[:,4].reshape(-1, 1)
output_data = to_categorical(output_data) # from tensorflow.keras.utils import to_categorical

train_in, test_in, train_out, test_out = train_test_split(input_data, output_data, test_size=0.20)
Traceback (most recent call last):
  File "IrisBrevitas.py", line 125, in <module>
    train(iris_model, train_in, train_out, device, categorical_crossentropy)
  File "IrisBrevitas.py", line 86, in train
    pred = model(X)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "IrisBrevitas.py", line 46, in forward
    out = self.inLayer(x)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 103, in forward
    return F.linear(input, self.weight, self.bias)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/functional.py", line 1848, in linear
    return torch._C._nn.linear(input, weight, bias)
RuntimeError: expected scalar type Long but found Float

This is weird to me as when checking the type of X and y they both come out as torch.LongTensor. I’m not exactly sure what I’m doing wrong here, any tip is appreciated.

EDIT: this is my setup:

OS: Ubuntu 20.04.4 LTS (running within WSL2 in Windows 10)
Python version: 3.8.10
PyTorch version: 1.10.1+cu102

Hi Jacopo!

Leaving the details of the error message aside, the traceback is telling
you that the error is raised by the first Linear layer in your model.

The problem is that Linears expect floating-point inputs (in short,
because their weights, in order to be optimized, are floating point, and
what is essentially an internal matmul() requires its arguments to be of
the same type).

So you need to pass a float() input into your model.

As to the specific error message, something like:

RuntimeError: expected scalar type Float but found Long

would be more intuitive.

Furthermore, I cannot reproduce your error message:

>>> import torch
>>> torch.__version__
'1.10.2'
>>> torch.nn.Linear (3, 1) (torch.ones (2, 3).long())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<path_to_pytorch_install>\torch\nn\modules\module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "<path_to_pytorch_install>\torch\nn\modules\linear.py", line 103, in forward
    return F.linear(input, self.weight, self.bias)
  File "<path_to_pytorch_install>\torch\nn\functional.py", line 1848, in linear
    return torch._C._nn.linear(input, weight, bias)
RuntimeError: expected scalar type Float but found Long

Could it be that your less-intuitive version of the error message due to
the version you are using?

I did find a github issue: nn.Conv3d throws incorrect error message, but
it does not appear to have been acted upon.

Perhaps @albanD has some further color on what is going on here.

Best.

K. Frank

Hi,

The error depends on what the Linear thinks is the “right common dtype”.
That can indeed depend on a couple things like the order of the arguments.

Hi both,

thank you for your replies. After the remark from @KFrank I updated my PyTorch version to 1.10.2+cu102; now apparently the problem with the Linear layer seems to be fixed but it is now moved to the loss function I use. This does not surprise me since I use the following function:

def categorical_crossentropy(pred : torch.Tensor, true: torch.Tensor) -> torch.Tensor:
    """ Calculates the categorical crossentropy loss function. 
    """
    return nn.NLLLoss()(torch.log(pred), true)

To be clear, I’m trying to use this function because my goal is to port this model from Keras to PyTorch, and the original model used the categorical crossentropy loss function to estimate the loss. When trying to make the port, I used the suggestion proposed in this thread. Of course I’m not entirely sure that this is correct at this point.

EDIT: this is what the traceback now looks like:

Traceback (most recent call last):
  File "IrisBrevitas.py", line 132, in <module>
    train(iris_model, train_in, train_out, categorical_crossentropy)
  File "IrisBrevitas.py", line 94, in train
    loss = loss_fn(pred, y)
  File "IrisBrevitas.py", line 77, in categorical_crossentropy
    return nn.NLLLoss()(torch.log(pred), true)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 211, in forward
    return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
  File "/home/jacopo/.local/lib/python3.8/site-packages/torch/nn/functional.py", line 2532, in nll_loss
    return torch._C._nn.nll_loss_nd(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: expected scalar type Long but found Float

nn.NLLLoss expects a target as a LongTensor containing class indices in the range [0, nb_classes-1] so you might need to use criterion(F.log_softmax(output), target.long()).

1 Like

Hi everybody,

apologies for the late reply. In the end I managed to solve the problem by:

  • updating pytorch version to 1.10.2
  • changed the loss function to nn.CrossEntropyLoss()
  • resolving the issue by correctly loading the dataset (I didn’t quite understand in the beginning how to train the model then after some thought I figured it out)

Here’s the full code:

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from torch import nn
from torch.nn import Module, functional as F
import torch
import numpy as np

class IrisModel(Module):
    def __init__(self) -> None:
        super(IrisModel, self).__init__()
        self.inLayer = nn.Linear(in_features=4, out_features=8, bias=True)
        self.midLayer = nn.Linear(in_features=8, out_features=24, bias=True)
        self.outLayer = nn.Linear(in_features=24, out_features=3, bias=True)
    
    def forward(self, x):
        out = F.relu(self.inLayer(x))
        out = F.relu(self.midLayer(out))
        return self.outLayer(out)

def train(model, train_in, train_out, is_quantized = True):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    epochs = 250
    if is_quantized:
        X = torch.from_numpy(train_in*10).type(torch.long)
        y = torch.from_numpy(train_out).type(torch.long)
    else:
        X = torch.from_numpy(train_in).type(torch.float)
        y = torch.from_numpy(train_out).type(torch.long)
    for epc in range(epochs):
        pred = model(X)
        loss = loss_fn(pred, y)
        # back propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"[Epoch {epc + 1}] - Loss: {loss.item():10.8f}")

def test(model, test_in, test_out, is_quantized = True):
    model.eval()
    correct = 0
    with torch.no_grad():
        if is_quantized:
            X = torch.from_numpy(test_in*10).type(torch.long)
            y = torch.from_numpy(test_out).type(torch.long)
        else:
            X = torch.from_numpy(test_in).type(torch.float)
            y = torch.from_numpy(test_out).type(torch.long)
        pred = model(X)
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    correct /= len(test_out)
    print(f"Testing: accuracy {(100*correct):>0.1f}")

iris_model = IrisModel().to(device)
is_quantized = False

data = load_iris() # sklearn.datasets.load_iris
train_in, test_in, train_out, test_out = train_test_split(data["data"], data["target"], test_size=0.20)
train(iris_model, train_in, train_out, is_quantized)
test(iris_model, test_in, test_out, is_quantized)

The is_quantized variable is present because my attempt was to create a quantized model with the Brevitas framework from Xilinx, for the regular model is not relevant.

My only remaining question is why there’s a difference in the loss function used for this type of classification problem between Keras and PyTorch (I remember reading that these 2 are basically different algorithms even though the name is similar), but I guess it’s out of the scope of my problem.