Hi,
I loaded a pre-trained model by load_state_dict(), but while testing on testing data site the input dimension became wired, I don’t know whether it’s the problem of model saving and loading, or something occurred when putting testing dataset into my network. I’m using 8 GPUs.
Maybe it’s a problem of using DataParallel?
My Network structure is as below :
class APXM_conv3(nn.Module):
# out batch shape for input x is 3* 1024 * 2048
def __init__(self, nc=3, ndf=8):
super(APXM_conv3, self).__init__() # can be super(CNN), or super(Net), di\
fference?
self.main = nn.Sequential (
# Computes the activation of the first convolution
# Size changes from (3, 1020, 2040) to (18, 510, 2010)
# input is (nc) x 1024 x 2048
nn.Conv2d(
in_channels=nc, # Input channels = 3
out_channels=ndf, # Output channels = 64
kernel_size=4,
stride = 2,
padding = 1, # for same width and length for img after con\
v2d
),
nn.LeakyReLU(negative_slope=0.2, inplace=True), # activa\
tion function
# 2. state size. (ndf=8) x 512 x 1024
nn.Conv2d(ndf, ndf * 2, 4, 2, 1),
# nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(negative_slope=0.2, inplace=True),
# 3. state size. (ndf*2=16) x 256 x 512
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# 4. state size. (ndf*4=32) x 128 x 256
)
self.regressor = nn.Sequential(
# what's nn.Dropout used for?
torch.nn.Linear(ndf * 4 * 128 * 256, 256),
# what's nn.BatchNormld used for?
# what's ReLu used for here?
torch.nn.LeakyReLU(),
torch.nn.Linear(256, 1),
)
def forward(self, x):
x = x.float()
print('input shape: ', x.size())
x = self.main(x)
print('inputs after model main: ', x.size())
x = x.view(x.size(0), -1)
print(x.size(1))#_, self.regressor.weight.size(1))
#Computes the activation of the first fully connected layer
#Size changes from (1, flatten) to (1, 64)
output = self.regressor(x)
return output
And running error is shown:
DataParallel(
(module): DataParallel(
(module): APXM_conv3(
(main): Sequential(
(0): Conv2d(3, 8, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): LeakyReLU(negative_slope=0.2, inplace)
(2): Conv2d(8, 16, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(3): LeakyReLU(negative_slope=0.2, inplace)
(4): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(5): LeakyReLU(negative_slope=0.2, inplace)
)
(regressor): Sequential(
(0): Linear(in_features=1048576, out_features=256, bias=True)
(1): LeakyReLU(negative_slope=0.01)
(2): Linear(in_features=256, out_features=1, bias=True)
)
)
)
)
intpus: torch.Size([50, 3, 1020, 2040]) scores: torch.Size([50, 1])
input shape: torch.Size([1, 3, 1020, 2040])
input shape: torch.Size([1, 3, 1020, 2040])
input shape: torch.Size([1, 3, 1020, 2040])
input shape: torch.Size([1, 3, 1020, 2040])
input shape: torch.Size([1, 3, 1020, 2040])
input shape: torch.Size([1, 3, 1020, 2040])
input shape: torch.Size([1, 3, 1020, 2040])
inputs after model main: torch.Size([1, 32, 127, 255])
inputs after model main: torch.Size([1, 32, 127, 255])
inputs after model main: torch.Size([1, 32, 127, 255])
inputs after model main: torch.Size([1, 32, 127, 255])
inputs after model main: torch.Size([1, 32, 127, 255])
inputs after model main: torch.Size([1, 32, 127, 255])
inputs after model main: torch.Size([1, 32, 127, 255])
Traceback (most recent call last):
File "load_model_test.py", line 78, in <module>
outputs = model1(inputs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 114, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 124, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/parallel_apply.py", line 65, in parallel_apply
raise output
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/parallel_apply.py", line 41, in _worker
output = module(*input, **kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 114, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 124, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/parallel_apply.py", line 65, in parallel_apply
raise output
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/parallel/parallel_apply.py", line 41, in _worker
output = module(*input, **kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/usr/project/xtmp/superresoluter/approximator/model1/apxm.py", line 61, in forward
output = self.regressor(x)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/container.py", line 91, in forward
input = module(input)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/modules/linear.py", line 55, in forward
return F.linear(input, self.weight, self.bias)
File "/home/home2/leichen/.local/lib/python3.5/site-packages/torch/nn/functional.py", line 992, in linear
return torch.addmm(bias, input, weight.t())
RuntimeError: size mismatch, m1: [1 x 1036320], m2: [1048576 x 256] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:249