Size mismatch, m1: [16 x 4096], m2: [1024 x 3] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:41

I’ve seen very similar posts, but I’m quite new to PyTorch and couldn’t figure this out. I’m getting a

RuntimeError: size mismatch, m1: [16 x 4096], m2: [1024 x 3] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:136

I know that this is occurring because the input features are not adding up but not sure how to fix this.

##################
# DATA FUNCTIONS #
##################

def channels_last_to_first(img):
    img = np.swapaxes(img, 0,2)
    img = np.swapaxes(img, 1,2)
    return img 

def preprocess_input(img, model): 
    # assume image is RGB 
    img = img[..., ::-1].astype('float32')
    model_min = model.input_range[0] ; model_max = model.input_range[1] 
    img_min = float(np.min(img)) ; img_max = float(np.max(img))
    img_range = img_max - img_min 
    model_range = model_max - model_min 
    img = (((img - img_min) * model_range) / img_range) + model_min 
    img[..., 0] -= model.mean[0] 
    img[..., 1] -= model.mean[1] 
    img[..., 2] -= model.mean[2] 
    img[..., 0] /= model.std[0] 
    img[..., 1] /= model.std[1] 
    img[..., 2] /= model.std[2] 
    return img

class CXRDataset(Dataset): 
    #
    def __init__(self, imgfiles, labels, resize=None, preprocess=None, transform=None): 
        self.imgfiles   = imgfiles
        self.labels     = labels 
        self.preprocess = preprocess
        self.resize     = resize
        self.transform  = transform
    #
    def __len__(self): 
        return len(self.imgfiles) 
    # 
    def __getitem__(self, i): 
        X = cv2.imread(self.imgfiles[i])
        if self.resize: X = self.resize(X)
        if self.transform: X = self.transform(image=X)['image']
        X = channels_last_to_first(X)
        y = np.asarray(self.labels[i])
        if self.preprocess: X = self.preprocess(X) 
        return torch.from_numpy(X).type('torch.FloatTensor'), torch.from_numpy(y).type('torch.LongTensor')  

##########
# SCRIPT #
##########

print (">>CNNs for CXRs<<")
torch.device('cpu') ; torch.backends.cudnn.benchmark = True 

if not os.path.exists(save_dir): os.makedirs(save_dir) 

cxr_df = pd.read_csv(data_splits) 

split = 'split{}'.format(val_split)

train_df = cxr_df[cxr_df[split] == 'train'].reset_index(drop=True)
valid_df = cxr_df[cxr_df[split] == 'valid'].reset_index(drop=True)
print ('TRAIN : n = {}'.format(train_df.shape[0]))
print ('VALID : n = {}'.format(valid_df.shape[0]))

# Run model script 
print ('Loading pretrained model [{}] ...'.format(model)) 

# exec(open(load_model).read())
m = pretrainedmodels.__dict__['densenet121'](num_classes=1000, pretrained='imagenet') 
dim_feats = m.last_linear.in_features 
print(dim_feats)
m.last_linear = nn.Sequential(nn.Dropout(_dropout_p), nn.Linear(dim_feats, _nb_classes)) # Must change dropout_p and nb_classes in load_model python file
m.train() 

params = {'batch_size':  _batch_size, 
          'shuffle':     True, 
          'num_workers': 0} # num_workers is 0 when cpu and 4 when gpu



# Set up preprocessing function with model 
pp = partial(preprocess_input, model=m) 

print ('Setting up data loaders ...')
train_images = [os.path.join(data_dir, _) for _ in train_df.fileName] 
train_set = CXRDataset(imgfiles=train_images,
                       labels=train_df.label,
                       preprocess=pp, 
                       transform=train_aug)
train_gen = DataLoader(train_set, **params) 

valid_images = [os.path.join(data_dir, _) for _ in valid_df.fileName] 
valid_set = CXRDataset(imgfiles=valid_images,
                       labels=valid_df.label,
                       preprocess=pp)
valid_gen = DataLoader(valid_set, **params) 
# Calculate inverse frequency weights based on training data distribution 
weights = [] 

weights.append(1. / len(np.where(train_df.label==0)[0]))
weights.append(1. / len(np.where(train_df.label==1)[0]))
weights.append(1. / len(np.where(train_df.label==2)[0]))

weights = np.asarray(weights) 
#weights *= args.nb_classes / float(np.sum(weights))

criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(weights).type('torch.FloatTensor'))
optimizer = optim.Adam(m.parameters(), 
                       lr=_initial_lr,
                       weight_decay=_weight_decay)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', 
                                                 factor=_annealing_factor, 
                                                 patience=_lr_patience, 
                                                 threshold=_min_delta, 
                                                 threshold_mode='abs', 
                                                 verbose=True)

best_auc   = 0. 
stopping   = 0
num_epochs = 0 ; steps = 0 ; steps_per_epoch = _steps_per_epoch
start_time = datetime.datetime.now() 
print ('TRAINING : START')

while num_epochs < _max_epochs: 
    running_loss = 0.
    for i, data in enumerate(train_gen):
        batch, labels = data 
        optimizer.zero_grad()
        
        output = m(batch)
        loss = criterion(output, labels)
        loss.backward() 
        optimizer.step()
        running_loss += loss.item()
        steps += 1
        if steps % _verbosity == 0:  # print every 100 mini-batches
            print('epoch {epoch}, batch {batch} : loss = {train_loss:.4f}'.format(epoch=str(num_epochs + 1).zfill(3), batch=steps, train_loss=running_loss / _verbosity))
            running_loss = 0.
        if steps % steps_per_epoch == 0: 
            # Validate 
            with torch.no_grad():
                m = m.eval()
                val_loss = 0.
                val_y_pred = [] ; val_y_true = []
                for i, data in enumerate(valid_gen): 
                    batch, labels = data  
                    batch = batch.view(batch.shape[0],-1)
                    output = m(batch)
                    loss = criterion(output, labels)
                    val_loss += loss.item()
                    val_y_pred.extend(output.cpu().numpy())
                    val_y_true.extend(labels.numpy())
            val_y_pred = np.asarray(val_y_pred) 
            val_y_true = np.asarray(val_y_true) 
            val_loss /= float(len(valid_gen))
            val_auc_binary = roc_auc_score(val_y_true, val_y_pred[:,-1])
            print ('epoch {epoch} // VALIDATION : loss = {loss:.4f}, auc = {auc:.4f}'.format(epoch=str(num_epochs + 1).zfill(3), loss=val_loss, auc=val_auc_binary))
            scheduler.step(val_auc_binary)
            torch.save(m.state_dict(), os.path.join(save_dir, '{arch}_{epoch}-{val_loss:.4f}-{val_auc:.4f}.pth'.format(arch=m.upper(), epoch=str(num_epochs + 1).zfill(3), val_loss=val_loss, val_auc=val_auc_binary)))
            m = m.train()
            # Early stopping
            if val_auc_binary > (best_auc + _min_delta): 
                best_auc = val_auc_binary
                stopping = 0 
            else: 
                stopping += 1 
            if stopping >= _stop_patience: 
                num_epochs = _max_epochs
                break 
            num_epochs += 1
            steps = 0 
print ('TRAINING : END') 
print ('Training took {}\n'.format(datetime.datetime.now() - start_time))

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-11-aa0dc90d0d4d> in <module>
     11         optimizer.zero_grad()
     12 
---> 13         output = m(batch)
     14         loss = criterion(output, labels)
     15         loss.backward()

~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

~/ckk_covid/lib/python3.5/site-packages/pretrainedmodels/models/torchvision_models.py in forward(self, input)
    197     def forward(self, input):
    198         x = self.features(input)
--> 199         x = self.logits(x)
    200         return x
    201 

~/ckk_covid/lib/python3.5/site-packages/pretrainedmodels/models/torchvision_models.py in logits(self, features)
    192         x = F.avg_pool2d(x, kernel_size=7, stride=1)
    193         x = x.view(x.size(0), -1)
--> 194         x = self.last_linear(x)
    195         return x
    196 

~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/container.py in forward(self, input)
     98     def forward(self, input):
     99         for module in self:
--> 100             input = module(input)
    101         return input
    102 

~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

~/ckk_covid/lib/python3.5/site-packages/torch/nn/modules/linear.py in forward(self, input)
     85 
     86     def forward(self, input):
---> 87         return F.linear(input, self.weight, self.bias)
     88 
     89     def extra_repr(self):

~/ckk_covid/lib/python3.5/site-packages/torch/nn/functional.py in linear(input, weight, bias)
   1368     if input.dim() == 2 and bias is not None:
   1369         # fused op is marginally faster
-> 1370         ret = torch.addmm(bias, input, weight.t())
   1371     else:
   1372         output = input.matmul(weight.t())

RuntimeError: size mismatch, m1: [16 x 4096], m2: [1024 x 3] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:136

This error seems to be raised in the self.last_linear layer, which creates this shape mismatch.
E.g. this code snippet would raise the same error:

input = torch.randn(16, 4096)
weight = torch.randn(1024, 3)
torch.matmul(input, weight)
> RuntimeError: size mismatch, m1: [16 x 4096], m2: [1024 x 3] at ..\aten\src\TH/generic/THTensorMath.cpp:41

Most likely the in_features of self.last_linear are wrongly defined and you would have to change them to 4096.

Thanks for taking a look! I resized the image from 256 to 224 and this actually solved the error, but I don’t understand why this worked. Do you know how?

1 Like

Without seeing the model architecture I guess that you were flattening the activations at one point and did not use an adaptive pooling layer, which would relax the shape condition.
The original shape mismatch is a 4x increase in the activation shape, so that I’m wondering why changing the input from 256 to 224 would solve this issue.