Transferring weights from Keras to PyTorch

Hi and sorry for bringing this post to life. I, too, want to test if my pytorch model behaves similarly to a keras one so I tried to transfer the weights using the “keras_to_pyt” function by chirag1992m. However, after a forward pass with the two models I get different results. I am very new to pytorch and keras so I am pretty sure I have a bug somewhere, I just don’t know where to look. I attach the script (the code provided by chirag1992m at the start of the post works fine - meaning the passes from the two models are almost identical).

import random
import numpy as np
import time
import keras
import keras.backend as K
import tensorflow.compat.v1.keras.backend as backend
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
import tensorflow as tf

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

class DQNAgent:
    def __init__(self):

        # Main model
        self.model = self.create_model()

     
    def create_model(self):
        model = Sequential()

        model.add(Conv2D(256, (3, 3), input_shape=(10,10,3)))  
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.2))

        model.add(Conv2D(256, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.2))

        model.add(Flatten())  
        model.add(Dense(64))

        model.add(Dense(9, activation='linear'))  
        model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=['accuracy'])
        return model

  
agent = DQNAgent()



class Flatten(nn.Module):
  def forward(self, x):
    N, C, H, W = x.size() # read in N, C, H, W
    #print('here???')
    #time.sleep(4)
    return x.view(N, -1)


def keras_to_pyt(km, pm):
    weight_dict = dict()
    for layer in km.layers:
        if type(layer) is keras.layers.convolutional.Conv2D:
            weight_dict[layer.get_config()['name'] + '.weight'] = np.transpose(layer.get_weights()[0], (3, 2, 0, 1))
            weight_dict[layer.get_config()['name'] + '.bias'] = layer.get_weights()[1]
        elif type(layer) is keras.layers.Dense:
            weight_dict[layer.get_config()['name'] + '.weight'] = np.transpose(layer.get_weights()[0], (1, 0))
            weight_dict[layer.get_config()['name'] + '.bias'] = layer.get_weights()[1]
    pyt_state_dict = pm.state_dict()

    pyt_state_dict['model.0.weight'] = torch.from_numpy(weight_dict['conv2d.weight'])
    pyt_state_dict['model.0.bias'] = torch.from_numpy(weight_dict['conv2d.bias'])
    pyt_state_dict['model.4.weight'] = torch.from_numpy(weight_dict['conv2d_1.weight'])
    pyt_state_dict['model.4.bias'] = torch.from_numpy(weight_dict['conv2d_1.bias'])
    pyt_state_dict['model.9.weight'] = torch.from_numpy(weight_dict['dense.weight'])
    pyt_state_dict['model.9.bias'] = torch.from_numpy(weight_dict['dense.bias'])
    pyt_state_dict['model.10.weight'] = torch.from_numpy(weight_dict['dense_1.weight'])
    pyt_state_dict['model.10.bias'] = torch.from_numpy(weight_dict['dense_1.bias'])
    
    pm.load_state_dict(pyt_state_dict)
    return pm


class DQN(nn.Module):

    def __init__(self,  outputs):
        super(DQN, self).__init__()
        
        self.model = nn.Sequential(
            nn.Conv2d(3, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(p=0.2),
            nn.Conv2d(256, 256, kernel_size=3, stride=1),
            nn.ReLU(),            
            nn.MaxPool2d(2),
            nn.Dropout(p=0.2),
            Flatten(), 
            nn.Linear(256, 64),
            nn.Linear(64, outputs),
          )          
    
    def forward(self, x):         
        return self.model(x)
    
    
policy_net = DQN(9)

inp = np.random.normal(size=(1, 3, 10, 10)).astype(dtype=np.float32)

inp_pyt = torch.autograd.Variable(torch.from_numpy(inp.copy()).float())
inp_keras = np.transpose(inp.copy(), (0, 2, 3, 1))

keras_result = agent.model.predict(x=inp_keras, verbose=1)
pyt_res = policy_net(inp_pyt).data.numpy()

print('keras res= ', keras_result)
print('pyt res= ', pyt_res)

The given script prints the following:
keras res= [[ 0.05931383 -0.24000387 0.09934839 0.08949665 -0.02977627 0.06066565
0.37287465 0.09245875 -0.12169667]]
pyt res= [[ 0.13366437 -0.16113546 -0.03684063 0.28739393 0.09155414 0.08537194
-0.14583385 0.22953041 -0.07536763]]

I would appreciate any help, thanks!