Runtime error: Training loop does not handle MSELoss

I am writing a training loop for my neural net using a code of which I know it works for the testing module. However, in my model i want to use MSELoss instead of CrossEnthropyLoss. When I change this in my DEFAULT_LOSS it throws out the following error:

RuntimeError: The size of tensor a (5) must match the size of tensor b (32) at non-singleton dimension 1

The code s attached below, does someone know what I could change to make it work?

import torch

from collections import OrderedDict
from functools import partial
from torch.autograd import Variable
from torch.nn import CrossEntropyLoss, Module, MSELoss
from torch.optim import SGD, Adam

from .utils import add_metrics_to_log, get_loader, log_to_message, ProgressBar

#DEFAULT_LOSS = CrossEntropyLoss()
#DEFAULT_OPTIMIZER = partial(SGD, lr=0.001, momentum=0.9)
DEFAULT_OPTIMIZER = partial(Adam, lr=0.001)

class FitModule(Module):

    def fit(self,
        """Trains the model similar to Keras' .fit(...) method

        # Arguments
            X: training data Tensor.
            y: target data Tensor.
            batch_size: integer. Number of samples per gradient update.
            epochs: integer, the number of times to iterate
                over the training data arrays.
            verbose: 0, 1. Verbosity mode.
                0 = silent, 1 = verbose.
            validation_split: float between 0 and 1:
                fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
            validation_data: (x_val, y_val) tuple on which to evaluate
                the loss and any model metrics
                at the end of each epoch. The model will not
                be trained on this data.
            shuffle: boolean, whether to shuffle the training data
                before each epoch.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)
            seed: random seed.
            optimizer: training optimizer
            loss: training loss
            metrics: list of functions with signatures `metric(y_true, y_pred)`
                where y_true and y_pred are both Tensors

        # Returns
            list of OrderedDicts with training metrics
        if seed and seed >= 0:
        # Prepare validation data
        if validation_data:
            X_val, y_val = validation_data
        elif validation_split and 0. < validation_split < 1.:
            split = int(X.size()[0] * (1. - validation_split))
            X, X_val = X[:split], X[split:]
            y, y_val = y[:split], y[split:]
            X_val, y_val = None, None
        # Build DataLoaders
        train_data = get_loader(X, y, batch_size, shuffle)
        # Compile optimizer
        opt = optimizer(self.parameters())
        # Run training loop
        logs = []
        for t in range(initial_epoch, epochs):
            if verbose:
                print("Epoch {0} / {1}".format(t+1, epochs))
            # Setup logger
            if verbose:
                pb = ProgressBar(len(train_data))
            log = OrderedDict()
            epoch_loss = 0.0
            # Run batches
            for batch_i, batch_data in enumerate(train_data):
                # Get batch data
                X_batch = Variable(batch_data[0])
                y_batch = Variable(batch_data[1])
                # Backprop
                y_batch_pred = self(X_batch)
                batch_loss = loss(y_batch_pred, y_batch)
                # Update status
                epoch_loss +=
                log['loss'] = float(epoch_loss) / (batch_i + 1)
                if verbose:
          , log_to_message(log))
            # Run metrics
            if metrics:
                y_train_pred = self.predict(X, batch_size)
                add_metrics_to_log(log, metrics, y, y_train_pred)
            if X_val is not None and y_val is not None:
                y_val_pred = self.predict(X_val, batch_size)
                val_loss = loss(Variable(y_val_pred), Variable(y_val))
                log['val_loss'] =
                if metrics:
                    add_metrics_to_log(log, metrics, y_val, y_val_pred, 'val_')
            if verbose:
        return logs

    def predict(self, X, batch_size=32):
        """Generates output predictions for the input samples.

        Computation is done in batches.

        # Arguments
            X: input data Tensor.
            batch_size: integer.

        # Returns
            prediction Tensor.
        # Build DataLoader
        data = get_loader(X, batch_size=batch_size)
        # Batch prediction
        r, n = 0, X.size()[0]
        for batch_data in data:
            # Predict on batch
            X_batch = Variable(batch_data[0])
            y_batch_pred = self(X_batch).data
            # Infer prediction shape
            if r == 0:
                y_pred = torch.zeros((n,) + y_batch_pred.size()[1:])
            # Add to prediction tensor
            y_pred[r : min(n, r + batch_size)] = y_batch_pred
            r += batch_size
        return y_pred

Would you be able to share a notebook of a working example for the CrossEntropy case?

Hi Eduardo, here i attached all the code that is needed to run the program, including a example run function: to run the code for crossenthropyloss, you can remove the hashtag in the code above.

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from pytorch_fitmodule import FitModule
from sklearn.datasets import make_multilabel_classification

SEED = 1701

def print_title(s):
    print("\n\n{0}\n{1}\n{0}".format("="*len(s), s))

##### Generate training set #####
print_title("Generating data set")

n_feats, n_classes = 200, 5
X, y = make_multilabel_classification(
    n_samples=10000, n_features=n_feats, n_classes=n_classes, n_labels=0.01,
    length=50, allow_unlabeled=False, sparse=False, return_indicator='dense',
    return_distributions=False, random_state=SEED
y = np.argmax(y, axis=1)
X = torch.from_numpy(X).float()
y = torch.from_numpy(y).long()

##### Define model #####
print_title("Building model")

class MLP(FitModule):
    def __init__(self, n_feats, n_classes, hidden_size=50):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(n_feats, hidden_size)
        self.fc2 = nn.Linear(hidden_size, n_classes)
    def forward(self, x):
        return F.log_softmax(self.fc2(F.relu(self.fc1(x))))

f = MLP(n_feats, n_classes)

##### Train model #####
print_title("Training model")

def accuracy(y_true, y_pred):
    return np.mean(y_true.numpy() == np.argmax(y_pred.numpy(), axis=1))
    X, y, epochs=50, validation_split=0.15, seed=SEED, metrics=[accuracy]


from pytorch_fitmodule import FitModule


import numpy as np
import sys
import torch

from functools import partial
from import DataLoader, TensorDataset

##### Data utils #####

def get_loader(X, y=None, batch_size=1, shuffle=False):
    """Convert X and y Tensors to a DataLoader
        If y is None, use a dummy Tensor
    if y is None:
        y = torch.Tensor(X.size()[0])
    return DataLoader(TensorDataset(X, y), batch_size, shuffle)

##### Logging #####

def add_metrics_to_log(log, metrics, y_true, y_pred, prefix=''):
    for metric in metrics:
        q = metric(y_true, y_pred)
        log[prefix + metric.__name__] = q
    return log

def log_to_message(log, precision=4):
    fmt = "{0}: {1:." + str(precision) + "f}"
    return "    ".join(fmt.format(k, v) for k, v in log.items())

class ProgressBar(object):
    """Cheers @ajratner"""

    def __init__(self, n, length=40):
        # Protect against division by zero
        self.n      = max(1, n)     = float(n)
        self.length = length
        # Precalculate the i values that should trigger a write operation
        self.ticks = set([round(i/100.0 * n) for i in range(101)])

    def bar(self, i, message=""):
        """Assumes i ranges through [0, n-1]"""
        if i in self.ticks:
            b = int(np.ceil(((i+1) / * self.length))
            sys.stdout.write("\r[{0}{1}] {2}%\t{3}".format(
                "="*b, " "*(self.length-b), int(100*((i+1) /, message

    def close(self, message=""):
        # Move the bar to 100% before closing

The Runtime error:
RuntimeError: The size of tensor a (5) must match the size of tensor b (32) at non-singleton dimension 1

please check the dimensions first. May you please tell me in which line you are getting this error.