Hello Friends,
I was trying to convert a CapsuleNet based model written in pytorch into onnx.The following is the errro I got.
CapsuleNet(
(conv1): Conv2d(9, 256, kernel_size=(1, 1), stride=(1, 1))
(primarycaps): PrimaryCapsule(
(conv2d): Conv2d(256, 256, kernel_size=(1, 1), stride=(2, 2))
)
(digitcaps): DenseCapsule()
(decoder): Sequential(
(0): Linear(in_features=160, out_features=512, bias=True)
(1): ReLU(inplace)
(2): Linear(in_features=512, out_features=1024, bias=True)
(3): ReLU(inplace)
(4): Linear(in_features=1024, out_features=81, bias=True)
(5): Sigmoid()
)
(relu): ReLU()
)
Traceback (most recent call last):
File "capsulenet_conversion.py", line 95, in <module>
test()
File "capsulenet_conversion.py", line 92, in test
torch.onnx.export(model, dummy_input, 'file.onnx')
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/__init__.py", line 27, in export
return utils.export(*args, **kwargs)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 111, in export
_retain_param_name=_retain_param_name)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 313, in _export
_retain_param_name)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 237, in _model_to_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args, training)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/onnx/utils.py", line 204, in _trace_and_get_graph_from_model
trace, torch_out = torch.jit.get_trace_graph(model, args, _force_outplace=True)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 219, in get_trace_graph
return LegacyTracedModule(f, _force_outplace, return_inputs)(*args, **kwargs)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 276, in forward
out = self.inner(*trace_inputs)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self._slow_forward(*input, **kwargs)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
result = self.forward(*input, **kwargs)
File "capsulenet_conversion.py", line 65, in forward
x = self.digitcaps(x)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self._slow_forward(*input, **kwargs)
File "/home/vijay/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 479, in _slow_forward
result = self.forward(*input, **kwargs)
File "/home/vijay/Documents/Vijay/Lidar/CapsNet-Pytorch/capsulelayers.py", line 54, in forward
x_hat = torch.squeeze(torch.matmul(self.weight, x[:, None, :, :, None]), dim=-1)
RuntimeError: The size of tensor a (1152) must match the size of tensor b (800) at non-singleton dimension 2
Please find the code of the conversion script given below:
"""
Pytorch implementation of CapsNet in paper Dynamic Routing Between Capsules.
The current version maybe only works for TensorFlow backend. Actually it will be straightforward to re-write to TF code.
Adopting to other backends should be easy, but I have not tested this.
Usage:
Launch `python CapsNet.py -h` for usage help
Result:
Validation accuracy > 99.6% after 50 epochs.
Speed: About 73s/epoch on a single GTX1070 GPU card and 43s/epoch on a GTX1080Ti GPU.
Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Pytorch`
"""
import torch
from torch import nn
from torch.optim import Adam, lr_scheduler
from torch.autograd import Variable
from torchvision import transforms, datasets
from capsulelayers import DenseCapsule, PrimaryCapsule
class CapsuleNet(nn.Module):
"""
A Capsule Network on MNIST.
:param input_size: data size = [channels, width, height]
:param classes: number of classes
:param routings: number of routing iterations
Shape:
- Input: (batch, channels, width, height), optional (batch, classes) .
- Output:((batch, classes), (batch, channels, width, height))
"""
def __init__(self, input_size, classes, routings):
super(CapsuleNet, self).__init__()
self.input_size = input_size
self.classes = classes
self.routings = routings
# Layer 1: Just a conventional Conv2D layer
self.conv1 = nn.Conv2d(input_size[0], 256, kernel_size=1, stride=1, padding=0)
# Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_caps, dim_caps]
self.primarycaps = PrimaryCapsule(256, 256, 8, kernel_size=1, stride=2, padding=0)
# Layer 3: Capsule layer. Routing algorithm works here.
self.digitcaps = DenseCapsule(in_num_caps=32*6*6, in_dim_caps=8,
out_num_caps=classes, out_dim_caps=16, routings=routings)
# Decoder network.
self.decoder = nn.Sequential(
nn.Linear(16*classes, 512),
nn.ReLU(inplace=True),
nn.Linear(512, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, input_size[0] * input_size[1] * input_size[2]),
nn.Sigmoid()
)
self.relu = nn.ReLU()
def forward(self, x, y=None):
x = self.relu(self.conv1(x))
x = self.primarycaps(x)
x = self.digitcaps(x)
length = x.norm(dim=-1)
if y is None: # during testing, no label given. create one-hot coding using `length`
index = length.max(dim=1)[1]
y = Variable(torch.zeros(length.size()).scatter_(1, index.view(-1, 1).cpu().data, 1.).cuda())
reconstruction = self.decoder((x * y[:, :, None]).view(x.size(0), -1))
return length, reconstruction.view(-1, *self.input_size)
def test():
# model = ClassificationPointNet(10, 0.3, 3).to('cuda')
# model = model.cuda()
#dummy_input = torch.randn(64, 2, 3).to('cuda')
#x = model(dummy_input)
model = CapsuleNet(input_size=[9, 3, 3], classes=10, routings=3)
model.cuda()
print(model)
dummy_input = torch.randn(256, 9, 9, 9).to('cuda')
#torch.onnx.export(model, dummy_input, 'file.onnx')
torch.save(model.state_dict(), '/home/vijay/Documents/Vijay/Lidar/CapsNet-Pytorch/data/MNIST/processed/training.pt')
#device = torch.device("cuda")
state_dict = torch.load('/home/vijay/Documents/Vijay/Lidar/CapsNet-Pytorch/data/MNIST/processed/training.pt')
model.load_state_dict(state_dict)
model.to('cuda')
# print(tensor.device)
dummy_input = torch.randn(256, 9, 9,9).to('cuda')
torch.onnx.export(model, dummy_input, 'file.onnx')
if __name__ == '__main__':
test()