[deep cluster]How to resolve runtime error due to tensor size mismatch?

I am trying to implement deep cluster for unsupervised learning(https://github.com/facebookresearch/deepcluster) by using pytorch. My dataset consists of 28X28X1 images. I have built a data loader. When I run this program, I get a runtime error:

RuntimeError: size mismatch, m1: [256 x 9216], m2: [4096 x 10] at /Users/soumith/miniconda2/conda-bld/pytorch_1532623076075/work/aten/src/TH/generic/THTensorMath.cpp:2070

These are my hyper-parameters:
batch_size=256
learning_rate = 0.05
num_epochs = 10

Following is the architecture of alexnet and train function, main function:

def train(loader, model, crit, opt, epoch):
    """Training of the CNN.
        Args:
            loader (torch.utils.data.DataLoader): Data loader
            model (nn.Module): CNN
            crit (torch.nn): loss
            opt (torch.optim.SGD): optimizer for every parameters with True
                                   requires_grad in model except top layer
            epoch (int)
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    data_time = AverageMeter()
    forward_time = AverageMeter()
    backward_time = AverageMeter()

    # switch to train mode
    model.train()

    # create an optimizer for the last fc layer
    optimizer_tl = torch.optim.SGD(
        model.top_layer.parameters(),
        lr=args.lr,
        weight_decay=10**args.wd,
    )

    end = time.time()
    for i, (input_tensor, target) in enumerate(loader):
        data_time.update(time.time() - end)

        # save checkpoint
        n = len(loader) * epoch + i
        if n % args.checkpoints == 0:
            path = os.path.join(
                args.exp,
                'checkpoints',
                'checkpoint_' + str(n / args.checkpoints) + '.pth.tar',
            )
            if args.verbose:
                print('Save checkpoint at: {0}'.format(path))
            torch.save({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer' : opt.state_dict()
            }, path)

        target = target
        input_var = torch.autograd.Variable(input_tensor)
        target_var = torch.autograd.Variable(target)

        output = model(input_var)
        loss = crit(output, target_var)

        # record loss
        losses.update(loss.data[0], input_tensor.size(0))

        # compute gradient and do SGD step
        opt.zero_grad()
        optimizer_tl.zero_grad()
        loss.backward()
        opt.step()
        optimizer_tl.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.verbose and (i % 200) == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss: {loss.val:.4f} ({loss.avg:.4f})'
                  .format(epoch, i, len(loader), batch_time=batch_time,
                          data_time=data_time, loss=losses))

    return losses.avg
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import math

import numpy as np
import torch
import torch.nn as nn

__all__ = [ 'AlexNet', 'alexnet']
 
# (number of filters, kernel size, stride, pad)
CFG = {
    '2012': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M']
}


class AlexNet(nn.Module):
    def __init__(self, features, num_classes, sobel):
        super(AlexNet, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(nn.Dropout(0.5),
                            nn.Linear(256 * 6 * 6, 4096),
                            nn.ReLU(inplace=True),
                            nn.Dropout(0.5),
                            nn.Linear(4096, 4096),
                            nn.ReLU(inplace=True))

        self.top_layer = nn.Linear(4096, num_classes)
        self._initialize_weights()

        if sobel:
            grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0)
            grayscale.weight.data.fill_(1.0 / 3.0)
            grayscale.bias.data.zero_()
            sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
            sobel_filter.weight.data[0, 0].copy_(
                torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
            )
            sobel_filter.weight.data[1, 0].copy_(
                torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
            )
            sobel_filter.bias.data.zero_()
            self.sobel = nn.Sequential(grayscale, sobel_filter)
            for p in self.sobel.parameters():
                p.requires_grad = False
        else:
            self.sobel = None

    def forward(self, x):
        if self.sobel:
            x = self.sobel(x)
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        if self.top_layer:
            x = self.top_layer(x)
        return x

    def _initialize_weights(self):
        for y, m in enumerate(self.modules()):
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                for i in range(m.out_channels):
                    m.weight.data[i].normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def make_layers_features(cfg, input_dim, bn):
    layers = []
    in_channels = input_dim
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=3, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v[0], kernel_size=v[1], stride=v[2], padding=v[3])
            if bn:
                layers += [conv2d, nn.BatchNorm2d(v[0]), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v[0]
    return nn.Sequential(*layers)


def alexnet(sobel=False, bn=True, out=1000):
    dim = 2 + int(not sobel)
    model = AlexNet(make_layers_features(CFG['2012'], dim, bn=bn), out, sobel)
    return model
import argparse
import os
import pickle
import time

import faiss
import numpy as np
from sklearn.metrics.cluster import normalized_mutual_info_score
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import clustering
import models
from util import AverageMeter, Logger, UnifLabelSampler

import warnings
warnings.filterwarnings('ignore')

parser = argparse.ArgumentParser(description='PyTorch Implementation of DeepCluster')

parser.add_argument('data', metavar='DIR', help='path to dataset')
parser.add_argument('--arch', '-a', type=str, metavar='ARCH',
                    choices=['alexnet', 'vgg16'], default='alexnet',
                    help='CNN architecture (default: alexnet)')
parser.add_argument('--sobel', action='store_true', help='Sobel filtering')
parser.add_argument('--clustering', type=str, choices=['Kmeans', 'PIC'],
                    default='Kmeans', help='clustering algorithm (default: Kmeans)')
parser.add_argument('--nmb_cluster', '--k', type=int, default=10000,
                    help='number of cluster for k-means (default: 10000)')
parser.add_argument('--lr', default=0.05, type=float,
                    help='learning rate (default: 0.05)')
parser.add_argument('--wd', default=-5, type=float,
                    help='weight decay pow (default: -5)')
parser.add_argument('--reassign', type=float, default=1.,
                    help="""how many epochs of training between two consecutive
                    reassignments of clusters (default: 1)""")
parser.add_argument('--workers', default=4, type=int,
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', type=int, default=200,
                    help='number of total epochs to run (default: 200)')
parser.add_argument('--start_epoch', default=0, type=int,
                    help='manual epoch number (useful on restarts) (default: 0)')
parser.add_argument('--batch', default=256, type=int,
                    help='mini-batch size (default: 256)')
parser.add_argument('--momentum', default=0.9, type=float, help='momentum (default: 0.9)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to checkpoint (default: None)')
parser.add_argument('--checkpoints', type=int, default=25000,
                    help='how many iterations between two checkpoints (default: 25000)')
parser.add_argument('--seed', type=int, default=31, help='random seed (default: 31)')
parser.add_argument('--exp', type=str, default='', help='path to exp folder')
parser.add_argument('--verbose', action='store_true', help='chatty')

args = parser.parse_args(args=["./MNIST",
                               "--arch", "alexnet",
                               "--clustering", "Kmeans",
                               "--nmb_cluster", "10",
                               "--epochs", "10",
                               "--batch", "256",
                               "--verbose"])

# fix random seeds
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)

# CNN
if args.verbose:
    print('Architecture: {}'.format(args.arch))
    
model = models.__dict__[args.arch](sobel=args.sobel)
fd = int(model.top_layer.weight.size()[1])
model.top_layer = None
model.features = torch.nn.DataParallel(model.features)
cudnn.benchmark = True

# create optimizer
optimizer = torch.optim.SGD(
    filter(lambda x: x.requires_grad, model.parameters()),
    lr=args.lr,
    momentum=args.momentum,
    weight_decay=10**args.wd,
)

# define loss function
criterion = nn.CrossEntropyLoss()

# optionally resume from a checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        # remove top_layer parameters from checkpoint
        for key in checkpoint['state_dict']:
            if 'top_layer' in key:
                del checkpoint['state_dict'][key]
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
                .format(args.resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

# creating checkpoint repo
exp_check = os.path.join(args.exp, 'checkpoints')

if not os.path.isdir(exp_check):
    os.makedirs(exp_check)

# creating cluster assignments log
cluster_log = Logger(os.path.join(args.exp, 'clusters'))

# preprocessing of data
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

tra = [transforms.Resize(256),
       transforms.CenterCrop(224),
       transforms.ToTensor(),
       normalize]

# load the data
end = time.time()
dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra))
#dataset = datasets.MNIST(root = "./data", download = True)

if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end))
dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch,
                                         num_workers=args.workers,
                                         pin_memory=True)

# clustering algorithm to use
deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster)

# training convnet with DeepCluster
for epoch in range(args.start_epoch, args.epochs):
    end = time.time()

    # remove head
    model.top_layer = None
    model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])

    # get the features for the whole dataset
    features = compute_features(dataloader, model, len(dataset))

    # cluster the features
    clustering_loss = deepcluster.cluster(features, verbose=args.verbose)

    # assign pseudo-labels
    train_dataset = clustering.cluster_assign(deepcluster.images_lists,
                                              dataset.imgs)

    # uniformely sample per target
    sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)),
                               deepcluster.images_lists)

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch,
        num_workers=args.workers,
        sampler=sampler,
        pin_memory=True,
    )

    # set last fully connected layer
    mlp = list(model.classifier.children())
    #mlp.append(nn.ReLU(inplace=True)
    model.classifier = nn.Sequential(*mlp)
    model.top_layer = nn.Linear(fd, len(deepcluster.images_lists))
    model.top_layer.weight.data.normal_(0, 0.01)
    model.top_layer.bias.data.zero_()

    # train network with clusters as pseudo-labels
    end = time.time()
    loss = train(train_dataloader, model, criterion, optimizer, epoch)

    # print log
    if args.verbose:
        print('###### Epoch [{0}] ###### \n'
              'Time: {1:.3f} s\n'
              'Clustering loss: {2:.3f} \n'
              'ConvNet loss: {3:.3f}'
                .format(epoch + 1, time.time() - end, clustering_loss, loss))
        
        try:
            nmi = normalized_mutual_info_score(
            clustering.arrange_clustering(deepcluster.images_lists),
            clustering.arrange_clustering(cluster_log.data[-1])
            )
            
            print('NMI against previous assignment: {0:.3f}'.format(nmi))
            
        except IndexError:
            pass
        print('####################### \n')
    # save running checkpoint
    torch.save({'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer' : optimizer.state_dict()},
                os.path.join(args.exp, 'checkpoint.pth.tar'))

    # save cluster assignments
    cluster_log.log(deepcluster.images_lists)

Can someone help me in understanding what’s the issue ?

Hi,

DeepCluster expects the input to be a RGB image so that channel should be 3 instead of 1. If you look at the implementation of AlexNet in that repo, in the forward method, they are applying sobel filter that converts the RGB image to grayscale and then applies sobel filter.

Since in your case, you’re passing single channel image directly, so that portion of code will throw shape error. To fix it, you could either send RGB images or remove the grayscale portion of the sobel filter code.

Image Source: A Visual Exploration of DeepCluster

hi, i have problems to implement my enviroment to test this code [GitHub - facebookresearch/deepcluster: Deep Clustering for Unsupervised Learning of Visual Features ] can you help me to try it?