I’ve been trying to convert the vgg16 model trained on the places365 dataset from caffe to pytorch, and eventually tried MMdnn, which successfully read the caffe files, converted them to intermediate representations, and then to python code. However, when it tries to do the last step (load the model and weights together and save them as a single file) or when I try to load them myself, the following error is produced:
in main()
104 # )
105 # model = torch.nn.DataParallel(model).cuda()
→ 106 model = vggpy.KitModel(“vggpy.npy”)
107 model = torch.nn.DataParallel(model).cuda()
108~\Documents\Pytorch\COCO Autoencoder\places365\vggpy.py in init(self, weight_file)
26 _weights_dict = load_weights(weight_file)
27
—> 28 self.conv1_1 = self.__conv(2, name=‘conv1_1’, in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
29 self.conv1_2 = self.__conv(2, name=‘conv1_2’, in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
30 self.conv2_1 = self.__conv(2, name=‘conv2_1’, in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)~\Documents\Pytorch\COCO Autoencoder\places365\vggpy.py in _conv(dim, name, **kwargs)
124 layer.state_dict()[‘weight’].copy(torch.from_numpy(_weights_dict[name][‘weights’]))
125 if ‘bias’ in weights_dict[name]:
→ 126 layer.state_dict()[‘bias’].copy(torch.from_numpy(_weights_dict[name][‘bias’]))
127 return layer
128RuntimeError: output with shape [64] doesn’t match the broadcast shape [1, 1, 1, 64]
This is obviously happening on the first layer as it tries to load the bias terms, without anything being input to the model, but I’m not sure whether the problem is with the model shape or the weights, and how this might be modified to then work. The full code itself does appear accurate for a constructed version of VGG:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
_weights_dict = dict()
def load_weights(weight_file):
if weight_file == None:
return
try:
weights_dict = np.load(weight_file, allow_pickle=True).item()
except:
weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item()
return weights_dict
class KitModel(nn.Module):
def __init__(self, weight_file):
super(KitModel, self).__init__()
global _weights_dict
_weights_dict = load_weights(weight_file)
self.conv1_1 = self.__conv(2, name='conv1_1', in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv1_2 = self.__conv(2, name='conv1_2', in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv2_1 = self.__conv(2, name='conv2_1', in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv2_2 = self.__conv(2, name='conv2_2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv3_1 = self.__conv(2, name='conv3_1', in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv3_2 = self.__conv(2, name='conv3_2', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv3_3 = self.__conv(2, name='conv3_3', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv4_1 = self.__conv(2, name='conv4_1', in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv4_2 = self.__conv(2, name='conv4_2', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv4_3 = self.__conv(2, name='conv4_3', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv5_1 = self.__conv(2, name='conv5_1', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv5_2 = self.__conv(2, name='conv5_2', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.conv5_3 = self.__conv(2, name='conv5_3', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
self.fc6_1 = self.__dense(name = 'fc6_1', in_features = 25088, out_features = 4096, bias = True)
self.fc7_1 = self.__dense(name = 'fc7_1', in_features = 4096, out_features = 4096, bias = True)
self.fc8a_1 = self.__dense(name = 'fc8a_1', in_features = 4096, out_features = 365, bias = True)
def forward(self, x):
conv1_1_pad = F.pad(x, (1, 1, 1, 1))
conv1_1 = self.conv1_1(conv1_1_pad)
relu1_1 = F.relu(conv1_1)
conv1_2_pad = F.pad(relu1_1, (1, 1, 1, 1))
conv1_2 = self.conv1_2(conv1_2_pad)
relu1_2 = F.relu(conv1_2)
pool1_pad = F.pad(relu1_2, (0, 1, 0, 1), value=float('-inf'))
pool1, pool1_idx = F.max_pool2d(pool1_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False, return_indices=True)
conv2_1_pad = F.pad(pool1, (1, 1, 1, 1))
conv2_1 = self.conv2_1(conv2_1_pad)
relu2_1 = F.relu(conv2_1)
conv2_2_pad = F.pad(relu2_1, (1, 1, 1, 1))
conv2_2 = self.conv2_2(conv2_2_pad)
relu2_2 = F.relu(conv2_2)
pool2_pad = F.pad(relu2_2, (0, 1, 0, 1), value=float('-inf'))
pool2, pool2_idx = F.max_pool2d(pool2_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False, return_indices=True)
conv3_1_pad = F.pad(pool2, (1, 1, 1, 1))
conv3_1 = self.conv3_1(conv3_1_pad)
relu3_1 = F.relu(conv3_1)
conv3_2_pad = F.pad(relu3_1, (1, 1, 1, 1))
conv3_2 = self.conv3_2(conv3_2_pad)
relu3_2 = F.relu(conv3_2)
conv3_3_pad = F.pad(relu3_2, (1, 1, 1, 1))
conv3_3 = self.conv3_3(conv3_3_pad)
relu3_3 = F.relu(conv3_3)
pool3_pad = F.pad(relu3_3, (0, 1, 0, 1), value=float('-inf'))
pool3, pool3_idx = F.max_pool2d(pool3_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False, return_indices=True)
conv4_1_pad = F.pad(pool3, (1, 1, 1, 1))
conv4_1 = self.conv4_1(conv4_1_pad)
relu4_1 = F.relu(conv4_1)
conv4_2_pad = F.pad(relu4_1, (1, 1, 1, 1))
conv4_2 = self.conv4_2(conv4_2_pad)
relu4_2 = F.relu(conv4_2)
conv4_3_pad = F.pad(relu4_2, (1, 1, 1, 1))
conv4_3 = self.conv4_3(conv4_3_pad)
relu4_3 = F.relu(conv4_3)
pool4_pad = F.pad(relu4_3, (0, 1, 0, 1), value=float('-inf'))
pool4, pool4_idx = F.max_pool2d(pool4_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False, return_indices=True)
conv5_1_pad = F.pad(pool4, (1, 1, 1, 1))
conv5_1 = self.conv5_1(conv5_1_pad)
relu5_1 = F.relu(conv5_1)
conv5_2_pad = F.pad(relu5_1, (1, 1, 1, 1))
conv5_2 = self.conv5_2(conv5_2_pad)
relu5_2 = F.relu(conv5_2)
conv5_3_pad = F.pad(relu5_2, (1, 1, 1, 1))
conv5_3 = self.conv5_3(conv5_3_pad)
relu5_3 = F.relu(conv5_3)
pool5_pad = F.pad(relu5_3, (0, 1, 0, 1), value=float('-inf'))
pool5, pool5_idx = F.max_pool2d(pool5_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False, return_indices=True)
fc6_0 = pool5.view(pool5.size(0), -1)
fc6_1 = self.fc6_1(fc6_0)
relu6 = F.relu(fc6_1)
drop6 = F.dropout(input = relu6, p = 0.5, training = self.training, inplace = True)
fc7_0 = drop6.view(drop6.size(0), -1)
fc7_1 = self.fc7_1(fc7_0)
relu7 = F.relu(fc7_1)
drop7 = F.dropout(input = relu7, p = 0.5, training = self.training, inplace = True)
fc8a_0 = drop7.view(drop7.size(0), -1)
fc8a_1 = self.fc8a_1(fc8a_0)
prob = F.softmax(fc8a_1)
return prob
@staticmethod
def __dense(name, **kwargs):
layer = nn.Linear(**kwargs)
layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
if 'bias' in _weights_dict[name]:
layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
return layer
@staticmethod
def __conv(dim, name, **kwargs):
if dim == 1: layer = nn.Conv1d(**kwargs)
elif dim == 2: layer = nn.Conv2d(**kwargs)
elif dim == 3: layer = nn.Conv3d(**kwargs)
else: raise NotImplementedError()
layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
if 'bias' in _weights_dict[name]:
layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
return layer
Trying _weights_dict[name][‘bias’].flatten()
and _weights_dict[name][‘bias’].reshape(1,1,1,64)
didn’t help anything, either.