Hi everyone,
I have a problem with a network and despite all my efforts, I can’t find a solution:
Here’s an excerpt from the code:
The network is constructed as follows:
from __future__ import print_function
import torch
import random
import math
import time
from torch import nn, optim
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.linears = nn.ModuleList([nn.Linear(14, 14) for i in range(12)]) # 12 inputs of 14 values in -> 14 out
self.extras = nn.Sequential(
nn.Linear(5, 15),
)
self.reseau = nn.Sequential(
nn.Linear(14*12 + 15, 780),
nn.PReLU(),
nn.Linear(780, 520),
nn.PReLU(),
nn.Linear(520, 160),
nn.Tanh(),
nn.Linear(160, 12),
)
def forward(self, x1,x2):
for i, l in enumerate(self.linears):
x1 = self.linears[i](x1)
x1 = x1.view(-1)
x2 = self.extras(x2)
x = torch.cat((x1, x2),dim=0)
x = self.reseau(x)
return x
model = MyModule()
model.cuda()
print(model)
x1 = torch.rand(12, 14)
x2 = torch.rand(5)
output = model(x1.cuda(),x2.cuda())
print(output)
Which gives at runtime:
MyModule(
(linears): ModuleList(
(0): Linear(in_features=14, out_features=14, bias=True)
(1): Linear(in_features=14, out_features=14, bias=True)
(2): Linear(in_features=14, out_features=14, bias=True)
(3): Linear(in_features=14, out_features=14, bias=True)
(4): Linear(in_features=14, out_features=14, bias=True)
(5): Linear(in_features=14, out_features=14, bias=True)
(6): Linear(in_features=14, out_features=14, bias=True)
(7): Linear(in_features=14, out_features=14, bias=True)
(8): Linear(in_features=14, out_features=14, bias=True)
(9): Linear(in_features=14, out_features=14, bias=True)
(10): Linear(in_features=14, out_features=14, bias=True)
(11): Linear(in_features=14, out_features=14, bias=True)
)
(extras): Sequential(
(0): Linear(in_features=5, out_features=15, bias=True)
)
(reseau): Sequential(
(0): Linear(in_features=183, out_features=780, bias=True) # 183 = 12*14 + 15
(1): PReLU()
(2): Linear(in_features=780, out_features=520, bias=True)
(3): PReLU()
(4): Linear(in_features=520, out_features=160, bias=True)
(5): Tanh()
(6): Linear(in_features=160, out_features=12, bias=True)
)
)
tensor([ 0.0053, -0.0556, 0.0377, 0.0156, -0.0305, -0.0137, -0.0470, -0.0658,
0.0241, -0.0616, 0.0356, 0.0459], device='cuda:0',
grad_fn=<AddBackward0>)
So far, so good. The network takes 14 values as input and has 14 values as output.
There is a second input (extras) which takes 5 values as input and a (5*3) = 15 values as output.
The two inputs are joined together with x = torch.cat((x1, x2),dim=0)
But there is one problem:
In my model, I have to multiply by 3 the outputs of each linear: the code is :
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.linears = nn.ModuleList([nn.Linear(14, 14*3) for i in range(12)]) # 12 inputs of 14 values in -> 42 out
self.extras = nn.Sequential(
nn.Linear(5, 15),
)
self.reseau = nn.Sequential(
nn.Linear(14*12*3 + 15, 780), # 519 = 12*42 + 15
nn.PReLU(),
nn.Linear(780, 520),
nn.PReLU(),
nn.Linear(520, 160),
nn.Tanh(),
nn.Linear(160, 12),
)
def forward(self, x1,x2):
for i, l in enumerate(self.linears):
x1 = self.linears[i](x1)
x1 = x1.view(-1)
x2 = self.extras(x2)
x = torch.cat((x1, x2),dim=0)
x = self.reseau(x)
return x
model = MyModule()
model.cuda()
print(model)
x1 = torch.rand(12, 14)
x2 = torch.rand(5)
output = model(x1.cuda(),x2.cuda())
print(output)
MyModule(
(linears): ModuleList(
(0): Linear(in_features=14, out_features=42, bias=True)
(1): Linear(in_features=14, out_features=42, bias=True)
(2): Linear(in_features=14, out_features=42, bias=True)
(3): Linear(in_features=14, out_features=42, bias=True)
(4): Linear(in_features=14, out_features=42, bias=True)
(5): Linear(in_features=14, out_features=42, bias=True)
(6): Linear(in_features=14, out_features=42, bias=True)
(7): Linear(in_features=14, out_features=42, bias=True)
(8): Linear(in_features=14, out_features=42, bias=True)
(9): Linear(in_features=14, out_features=42, bias=True)
(10): Linear(in_features=14, out_features=42, bias=True)
(11): Linear(in_features=14, out_features=42, bias=True)
)
(extras): Sequential(
(0): Linear(in_features=5, out_features=15, bias=True)
)
(reseau): Sequential(
(0): Linear(in_features=519, out_features=780, bias=True) # 519 = 12*42 + 15
(1): PReLU()
(2): Linear(in_features=780, out_features=520, bias=True)
(3): PReLU()
(4): Linear(in_features=520, out_features=160, bias=True)
(5): Tanh()
(6): Linear(in_features=160, out_features=12, bias=True)
)
)
The pattern seems correct to me. Each linear of the input takes 14 values at the input and a (14*3) = 42 values at the output.
But I have the following error at runtime:
RuntimeError Traceback (most recent call last)
<ipython-input-2-04b30ff8f25f> in <module>
37 x2 = torch.rand(5)
38
---> 39 output = model(x1.cuda(),x2.cuda())
40
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
<ipython-input-2-04b30ff8f25f> in forward(self, x1, x2)
20 def forward(self, x1,x2):
21 for i, l in enumerate(self.linears):
---> 22 x1 = self.linears[i](x1)
23
24 x1 = x1.view(-1)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
-> 1370 ret = torch.addmm(bias, input, weight.t())
1371 else:
1372 output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [12 x 42], m2: [14 x 42] at /tmp/pip-req-build-4baxydiv/aten/src/THC/generic/THCTensorMathBlas.cu:290
Would this multiplication of the linear output in the ModuleList be forbidden? I don’t think so. I don’t know how to solve this problem.
I hope an experienced contributor can give me a lead or a solution.
Thank you for your help.