Loss Function Error with Autoencoder on MNIST

Hello,

I have tried implementing an autoencoder for mnist, but the loss function does not seem to be accepting this type of network.

Code is as follows:

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)


kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}




train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)





class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = nn.Linear(784,1000)
        self.hidden2 = nn.Linear(1000,500)
        self.hidden3 = nn.Linear(500,250)
        self.hidden4 = nn.Linear(250,30)
        self.hidden5 = nn.Linear(30,250)
        self.hidden6 = nn.Linear(250,500)
        self.hidden7 = nn.Linear(500,1000)
        self.hidden8 = nn.Linear(1000,784)

        self.out = nn.Linear(784,784)

        

    def forward(self, x):
        x = x.view (-1, 784)
        x = F.sigmoid(self.hidden(x))
        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden2(x))
        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden3(x))
        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden4(x))

        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden5(x))

        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden6(x))

        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden7(x))

        x = F.dropout(x,0.1)
        x = F.sigmoid(self.hidden8(x))
        x = self.out(x)
        return x #F.log_softmax(x)

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features



model = Net()
print(model)

if args.cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=.01, momentum=0)

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), data.cuda()
        target = Variable(target)
        data = Variable(data)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, data)#F.nll_loss(output, data)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for (data, target) in test_loader:
        if args.cuda:
            data, target = data.cuda(), data.cuda()
        target = Variable(target, volatile=True)
        data = Variable(data)
        output = model(data)
        test_loss += F.cross_entropy(output, data).data[0]# F.nll_loss(output, target).data[0] #F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


for epoch in range(1, args.epochs + 1):
    train(epoch)
    test()

and the error I get is,

File “mymodelc.py”, line 139, in train
loss = F.cross_entropy(output, data)#F.nll_loss(output, data)
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/nn/functional.py”, line 533, in cross_entropy
return nll_loss(log_softmax(input), target, weight, size_average)
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/nn/functional.py”, line 501, in nll_loss
return f(input, target)
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/nn/_functions/thnn/auto.py”, line 41, in forward
output, *self.additional_args)
TypeError: FloatClassNLLCriterion_updateOutput received an invalid combination of arguments - got (int, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, bool, NoneType, torch.FloatTensor), but expected (int state, torch.FloatTensor input, torch.LongTensor target, torch.FloatTensor output, bool sizeAverage, [torch.FloatTensor weights or None], torch.FloatTensor total_weight)

Thank you

You need to cast your target variable as a Long tensor. Right now it is a float tensor. Different loss functions require the input and target to be of different types. NLLCriterion needs target labels to be Long and input to be Float.

Look closely error says it got (“int, torch.FloatTensor, torch.FloatTensor …”) while it expected (int state, torch.FloatTensor input, torch.LongTensor target…)

Change the line
target = Variable(target)
to
target.Long()
target = Variable(target)

Hope this helps! Read more about casting tensor to different type if this doesn’t work. Here - How to cast a tensor to another type?

I tried this and got

TypeError: FloatClassNLLCriterion_updateOutput received an invalid combination of arguments - got (int, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, bool, NoneType, torch.FloatTensor), but expected (int state, torch.FloatTensor input, torch.LongTensor target, torch.FloatTensor output, bool sizeAverage, [torch.FloatTensor weights or None], torch.FloatTensor total_weight)

then I tried changing forward to return x.long() as well

and now get,

Traceback (most recent call last):
File “”, line 1, in
File “mymodelc.py”, line 170, in
train(epoch)
File “mymodelc.py”, line 140, in train
loss = F.cross_entropy(output, data)#F.nll_loss(output, data)
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/nn/functional.py”, line 533, in cross_entropy
return nll_loss(log_softmax(input), target, weight, size_average)
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/nn/functional.py”, line 434, in log_softmax
return _functions.thnn.LogSoftmax()(input)
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/nn/_functions/thnn/auto.py”, line 110, in forward
self._backend = type2backend[type(input)]
File “/home/slava/dev/miniconda2/lib/python2.7/site-packages/torch/_thnn/init.py”, line 15, in getitem
return self.backends[name].load()
KeyError: <class ‘torch.LongTensor’>

Please let me know what could be the issue, it seems like an important thing to be able to train autoencoders. Thank you

First up, I understand that you’re training and autoencoder, so you want to get the loss between the data and output. In that case, you need to either somehow use target to be the same as the data, or use a different loss function. NLL_loss is used for classification into n classes. What you need is probably a different loss function.

But, if you just want to run the code you gave me here, I think I found the error. It’s probably in the line

 loss = F.cross_entropy(output, data)

Loss takes in output and TARGET, not data. when you read using data loader into (data,target) your data stores input data and target stores their ground truth labels. The loss is calculated on predicted labels (output) and the ground truth label (target).

So, that might be the error. So, including the change I mentioned in the previous answer, all the changes are -

target.Long()
target = Variable(target)
loss = F.cross_entropy(output, target)

so you either somehow mask your target to be your data (but I doubt that’s possible), or else you use a different loss function.

Thank you very much. Indeed now the code runs.

However, I tried changing the loss function, with everything in the code being the same except now,

F.mse_loss(output, target)

and I get

AttributeError: ‘module’ object has no attribute ‘mse_loss’

but the documentation

http://pytorch.org/docs/master/nn.html

has

torch.nn.functional.mse_loss(input, target, size_average=True)[source]

Try doing

from torch import nn
criterion = nn.MSELoss()
loss = MSELoss(output,target)
loss.backward()
1 Like

I’m guessing you meant

    criterion = nn.MSELoss()
    loss = criterion(output,target)

But now it complains again about the type, even though I still have the target.long() statement earlier

TypeError: FloatMSECriterion_updateOutput received an invalid combination of arguments - got (int, torch.FloatTensor, torch.LongTensor, torch.FloatTensor, bool), but expected (int state, torch.FloatTensor input, torch.FloatTensor target, torch.FloatTensor output, bool sizeAverage)

for CrossEntropyLoss the target is long, for MSELoss, it is float.

1 Like

I tried now

    target = target.float()
    target = Variable(target)
    data = Variable(data)
    optimizer.zero_grad()
    
    output = model(data)
    criterion = nn.MSELoss()
    loss = criterion(output,target)

and get

RuntimeError: input and target have different number of elements: input[64 x 784] has 50176 elements, while target[64] has 64 elements at /b/wheel/pytorch-src/torch/lib/THNN/generic/MSECriterion.c:12

if you dont read the documentation, it’s much harder to help you. NLLLoss and MSELoss take targets of different formats.

The error seems very strange to me, in the documentation MSELoss expects the same dimension tensors. I made target the same exact thing as as the input, and the output is the same exact dimensions as the input as it is an autoencoder. Why would MSELoss complain that the dimensions do not match?

Not that it practically matters, but as you mention the documentation perhaps there is some deeper problem–why does it say that there is a module nn.function.mse_loss, and when I try to assign the loss to this I get an error that nn.functional has no attribute ‘mse_loss’?