I’ve trained a small size CNN binary classifier in Theano. To have a simpler code, I wanted to port the trained weights to PyTorch or numpy forward pass for predictions. The predictions by original Theano program are satisfying but the PyTorch forward pass predicted all the examples to one class.
Here is how I save trained weights in Theano using h5py:
layer0_w = layer0.W.get_value(borrow=True)
layer0_b = layer0.b.get_value(borrow=True)
layer1_w = layer1.W.get_value(borrow=True)
layer1_b = layer1.b.get_value(borrow=True)
layer2_w = layer2.W.get_value(borrow=True)
layer2_b = layer2.b.get_value(borrow=True)
sm_w = layer_softmax.W.get_value(borrow=True)
sm_b = layer_softmax.b.get_value(borrow=True)
h5_l0w = h5py.File('./model/layer0_w.h5', 'w')
h5_l0w.create_dataset('layer0_w', data=layer0_w)
h5_l0b = h5py.File('./model/layer0_b.h5', 'w')
h5_l0b.create_dataset('layer0_b', data=layer0_b)
h5_l1w = h5py.File('./model/layer1_w.h5', 'w')
h5_l1w.create_dataset('layer1_w', data=layer1_w)
h5_l1b = h5py.File('./model/layer1_b.h5', 'w')
h5_l1b.create_dataset('layer1_b', data=layer1_b)
h5_l2w = h5py.File('./model/layer2_w.h5', 'w')
h5_l2w.create_dataset('layer2_w', data=layer2_w)
h5_l2b = h5py.File('./model/layer2_b.h5', 'w')
h5_l2b.create_dataset('layer2_b', data=layer2_b)
h5_smw = h5py.File('./model/softmax_w.h5', 'w')
h5_smw.create_dataset('softmax_w', data=sm_w)
h5_smb = h5py.File('./model/softmax_b.h5', 'w')
h5_smb.create_dataset('softmax_b', data=sm_b)
Then load the weights to build a forward pass using Pytorch and Numpy:
import torch
import numpy as np
import torch.nn.functional as F
def model(data):
conv0_out = F.conv2d(input=np2var(data),
weight=np2var(layer0_w),
bias=np2var(layer0_b)
)
layer0_out = relu(var2np(conv0_out))
conv1_out = F.conv2d(input=np2var(layer0_out),
weight=np2var(layer1_w),
bias=np2var(layer1_b)
)
layer1_out = np.max(relu(var2np(conv1_out)), axis=2)
dense_out=relu(np.matmul(layer1_out, layer2_w) + layer2_b)
softmax_out = softmax(np.matmul(dense_out, softmax_w) + softmax_b)
return softmax_out
def relu(x):
return x * (x > 0)
def np2var(x):
return torch.autograd.Variable(torch.from_numpy(x))
def var2np(x):
return x.data.numpy()
def softmax(x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
The input and kernel shapes for conv2d functions are the same for Theano and PyTorch, and network structures in two frameworks are the same. I couldn’t detect any errors step by step. What could go wrong here?