Training an image classification model

I am attempting to create an image classification model on recognizing german traffic signs.
I am facing the error upon running the following line of code:

history = [evaluate(model, val_loader)]
history
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-90-2b3da6205248> in <module>
----> 1 history = [evaluate(model, val_loader)]
      2 history

<ipython-input-79-40cd6b45e3d3> in evaluate(model, val_loader)
      1 def evaluate(model, val_loader):
----> 2     outputs = [model.validation_step(batch) for batch in val_loader]
      3     return model.validation_epoch_end(outputs)
      4 
      5 def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):

<ipython-input-79-40cd6b45e3d3> in <listcomp>(.0)
      1 def evaluate(model, val_loader):
----> 2     outputs = [model.validation_step(batch) for batch in val_loader]
      3     return model.validation_epoch_end(outputs)
      4 
      5 def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):

<ipython-input-78-6cf0f50fffc5> in validation_step(self, batch)
      9         images, labels = batch
     10         out = self(images)                    # Generate predictions
---> 11         loss = F.cross_entropy(out, labels)   # Calculate loss
     12         acc = accuracy(out, labels)           # Calculate accuracy
     13         return {'val_loss': loss, 'val_acc': acc}

~\Anaconda3\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2019     if size_average is not None or reduce is not None:
   2020         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2021     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2022 
   2023 

~\Anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1836                          .format(input.size(0), target.size(0)))
   1837     if dim == 2:
-> 1838         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   1839     elif dim == 4:
   1840         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target' in call to _thnn_nll_loss_forward

What I initially did was:
Step 1: Load the pickle files (train, test, validation)
step 2: Converted variables to numpy array, then normalized it in float type.
step 3: Then used torch.from_numpy to convert it into torch tensors.
step 4: Using utils.TensorDataset to create datasets which will be used later on to create loaders.

xtrainTensor = torch.from_numpy(xtrain_norm)
xtestTensor = torch.from_numpy(xtest_norm)
xvalidTensor = torch.from_numpy(xvalid_norm)

ytrainTensor = torch.from_numpy(ytrain).float()
ytestTensor = torch.from_numpy(ytest).float()
yvalidTensor = torch.from_numpy(yvalid).float()

import torch.utils.data as utils

#creating tensor datasets
train_dataset = utils.TensorDataset(xtrainTensor, ytrainTensor) # create your datset
test_dataset = utils.TensorDataset(xtestTensor, ytestTensor)
valid_dataset = utils.TensorDataset(xvalidTensor, yvalidTensor)

Here’s is My evaluate and fit method

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

Base model:

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))

Here’s the neural net model with input_size = 33232 and output_size = 43:

class trafficsignalModel(ImageClassificationBase):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, 1, padding=1)
    self.conv2 = nn.Conv2d(16, 32, 3, 1, padding=1)
    self.conv3 = nn.Conv2d(32, 64, 3, 1, padding=1)
    self.fc1 = nn.Linear(4*4*64, 500)
    self.dropout1 = nn.Dropout(0.5)
    self.fc2 = nn.Linear(500, 10)
  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.max_pool2d(x, 2, 2)
    x = F.relu(self.conv2(x))
    x = F.max_pool2d(x, 2, 2)
    x = F.relu(self.conv3(x))
    x = F.max_pool2d(x, 2, 2)
    x = x.view(-1, 4*4*64)
    x = F.relu(self.fc1(x))
    x = self.dropout1(x)
    x = self.fc2(x)
    return x

Please help, I have been stuck in this problem for quite sometime now.
Thank you.

F.cross_entropy needs labels to be long tensors. You have passed in a float tensor. So in the validation_step function add the code
labels = labels.long() to convert it into a long tensor

Hi,

I think the problem is that labels need to be in long dtype not float.

This may help the problem:
loss = F.cross_entropy(out, labels.long())
This post is related RuntimeError: Expected object of scalar type Long but got scalar type Float when using CrossEntropyLoss - #2 by ptrblck.

Bests

Oh that makes sense.
I made the changes, but I get this error:

RuntimeError: Input type (torch.cuda.DoubleTensor) and weight type (torch.cuda.FloatTensor) should be the same

How do I change my input type?

Add .float at the end or use dtype=torch.float as argument for above lines. Also, remove .float for labels, the real reason of first issue.