I got different behavior when doing training and testing.
My code was totally fine for training stage and output no error message.
However, when I switched to testing, the following error occurred.
At testing stage, the input data has the same dimension with training stage.
Why would conv2d have different behavior, and how can I fix it?
Error message
Traceback (most recent call last):
File "main.py", line 256, in <module>
main()
File "main.py", line 250, in main
test()
File "main.py", line 240, in test
output = model(batch_x)
File "/usr/local/lib/python3.5/dist-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "main.py", line 111, in forward
out_conv1 = self.conv1(c_out)
File "/usr/local/lib/python3.5/dist-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py", line 91, in forward
input = module(input)
File "/usr/local/lib/python3.5/dist-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py", line 301, in forward
self.padding, self.dilation, self.groups)
RuntimeError: expected stride to be a single integer value or a list of 1 values to match the convolution dimensions, but got stride=[2, 2]
Training code
def train():
EPOCH = 20
BATCH_SIZE = 10
model = Net()
model.train()
writer = SummaryWriter()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-6)
criterion = nn.L1Loss(size_average=False)
## Deal with data
dset = dataset.DatasetIter('path',
'path',
'path')
data_size = len(dset)
loader = torch.utils.data.DataLoader(
dset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
for k in range(EPOCH):
for step, (batch_x, batch_y) in enumerate(loader):
optimizer.zero_grad()
output = model(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
writer.add_scalar('loss', loss.data, k*(data_size/BATCH_SIZE) + step) #tensorboard scalar
printProcess(block, data_size, step, k, EPOCH, loss, BATCH_SIZE)
## Save checkpoint for each epoch
check_str = 'checkpoint_{}.pt'.format(k)
torch.save(model.state_dict(), check_str)
## Save final model
torch.save(model.state_dict(), 'path.pt')
writer.export_scalars_to_json("./all_scalars.json")
writer.close()
conv
def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
if batchNorm:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
nn.BatchNorm2d(out_planes),
nn.LeakyReLU(0.1,inplace=True)
)
else:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
nn.LeakyReLU(0.1,inplace=True)
)
Testing code
def test():
BATCH_SIZE = 1
checkpoint_pytorch = 'path'
if os.path.isfile(checkpoint_pytorch):
checkpoint = torch.load(checkpoint_pytorch,\
map_location=lambda storage, loc: storage.cuda(0))
else:
print('No checkpoint')
model = Net()
model.load_state_dict(checkpoint)
model.cuda()
model.eval()
dset = dataset.DatasetIter('path',
'path',
'path')
loader = torch.utils.data.DataLoader(
dset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=1)
err = 0
ans = []
for step, (batch_x, batch_y) in enumerate(loader):
print('batch_x', batch_x.shape)
output = model(batch_x)
output = output.data.cpu().numpy()
ans.append(output[0])
print(output[0])