Size mismatch: RunTime error

import torch
import torch.nn as nn
from torch.autograd import Variable
from keras.models import *
from keras.layers import *
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras import backend as K
K.set_image_dim_ordering('th')

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

torch.backends.cudnn.enabled = True
#print(the_model)

class PytorchToKeras(object):
    def __init__(self,pModel,kModel):
        super(PytorchToKeras,self)
        self.__source_layers = []
        self.__target_layers = []
        self.pModel = pModel
        self.kModel = kModel

        K.set_learning_phase(0)

    def __retrieve_k_layers(self):

        for i,layer in enumerate(self.kModel.layers):
            if len(layer.weights) > 0:
                self.__target_layers.append(i)

    def __retrieve_p_layers(self,input_size):

        input = torch.randn(input_size)

        input = Variable(input.unsqueeze(0))

        hooks = []

        def add_hooks(module):

            def hook(module, input, output):
                if hasattr(module,"weight"):
                    self.__source_layers.append(module)

            if not isinstance(module, nn.ModuleList) and not isinstance(module,nn.Sequential) and module != self.pModel:
                hooks.append(module.register_forward_hook(hook))

        self.pModel.apply(add_hooks)


        self.pModel(input)
        for hook in hooks:
            hook.remove()

    def convert(self,input_size):
        self.__retrieve_k_layers()
        self.__retrieve_p_layers(input_size)

        for i,(source_layer,target_layer) in enumerate(zip(self.__source_layers,self.__target_layers)):

            weight_size = len(source_layer.weight.data.size())

            transpose_dims = []

            for i in range(weight_size):
                transpose_dims.append(weight_size - i - 1)

            self.kModel.layers[target_layer].set_weights([source_layer.weight.data.numpy().transpose(transpose_dims), source_layer.bias.data.numpy()])

    def save_model(self,output_file):
        self.kModel.save(output_file)
    def save_weights(self,output_file):
        self.kModel.save_weights(output_file)



"""
We explicitly redefine the architecture since Keras has no predefined
"""


def basic_block(X, filters):
    X = ZeroPadding2D((1, 1))(X)
    X = Conv2D(filters, kernel_size=(3, 3), strides=(2, 2), use_bias=False)(X)
    X = Activation('relu')(X)
    X = ZeroPadding2D((1, 1))(X)
    X = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), use_bias=False)(X)

    return X


def convolutional_block(X, filters):
    X_shortcut = X

    X = basic_block(X, filters)

    ### Shortcut Path ###
    X_shortcut = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), use_bias=False)(X_shortcut)

    # Final step: Add shortcut value to main path
    X = Add()([X, X_shortcut])

    return X


def ResNet(input_shape=(224, 224, 3)):
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Zero Padding
    X = ZeroPadding2D((3, 3))(X_input)

    # Stage 1
    X = Conv2D(4, (7, 7), strides=(2, 2), use_bias=False)(X)
    X = Activation('relu')(X)
    X = ZeroPadding2D((1, 1))(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2
    X = basic_block(X, filters=4)

    # Stage 3
    X = convolutional_block(X, filters=6)
    X = convolutional_block(X, filters=98)
    X = convolutional_block(X, filters=160)

    # Stage 4
    X = AveragePooling2D((3, 3))(X)

    # Output Layer
    X = Flatten()(X)
    X = Dense(16, activation='softmax')(X)

    model = Model(inputs=X_input, output=X, name='HuaweiNet')

    return model

keras_model = ResNet()

path = "/neutrino/models/pretrained/huawei/huawei_0000_4_4_6_98_160.pth"

the_model = torch.load(path)

the_model.state_dict()

#Time to transfer weights

converter = PytorchToKeras(the_model,keras_model)
converter.convert((3,224,224))

#Save the weights of the converted keras model for later use
converter.save_weights("huaweinet.h5")

I have been getting this error.

Traceback (most recent call last):
  File "/tmp/pycharm_project_896/agents/pytorch2keras.py", line 155, in <module>
    converter.convert((3,224,224))
  File "/tmp/pycharm_project_896/agents/pytorch2keras.py", line 60, in convert
    self.__retrieve_p_layers(input_size)
  File "/tmp/pycharm_project_896/agents/pytorch2keras.py", line 54, in __retrieve_p_layers
    self.pModel(input)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 357, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/parallel/data_parallel.py", line 71, in forward
    return self.module(*inputs[0], **kwargs[0])
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 357, in __call__
    result = self.forward(*input, **kwargs)
  File "/tmp/pycharm_project_896/agents/training_agent.py", line 148, in forward
    x = self.fc(x)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 357, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/linear.py", line 55, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/functional.py", line 835, in linear
    return torch.addmm(bias, input, weight.t())
RuntimeError: size mismatch at /pytorch/torch/lib/THC/generic/THCTensorMathBlas.cu:247

Can anyone help me?

Thanks in advance

I’m not sure, how your PyTorch model is defined as you’ve only posted the Keras model.
From the error message it seems you have a size mismatch in your linear layers somewhere.
If you run your model on the CPU, you will most likely get a better error message.

Thansk for replying. Found the error. Solved it. The PyTorch model is a custom pretrained model storing everything.

But I am getting the following error.
RuntimeError: can’t convert CUDA tensor to numpy (it doesn’t support GPU arrays). Use .cpu() to move the tensor to host memory first.

  File "/tmp/pycharm_project_896/agents/pytorch2keras.py", line 152, in <module>
    converter.convert((3,224,224))
  File "/tmp/pycharm_project_896/agents/pytorch2keras.py", line 72, in convert
    self.kModel.layers[target_layer].set_weights([source_layer.weight.data.numpy().transpose(transpose_dims), source_layer.bias.data.numpy()])
RuntimeError: can't convert CUDA tensor to numpy (it doesn't support GPU arrays). Use .cpu() to move the tensor to host memory first.

Any help is appreciated. Thank You

Hi, tensor on your GPU is not torch.tensor.
So you need to use .cpu() before you call .numpy().

Please check related topic for detail:

Shortly, it would be like:
... data.cpu().numpy()

I followed the topic. I wasn’t able to get the result I needed. It ended up giving me more errors. The code I am using to convert is as above. I have attached the error details too.

Could you please help me in adding the line as required? I am still new to PyTorch.

Thank You.