I am implementing a 3d deconvolution autoencoder. The issue I am having now is that the training(and testing) loss doesn’t seems to decrease and the output of the neural network is a zero matrix.
I wonder if my training process is right, i.e., declare a net, read in the data, change the data into variable type, feed the data into the net, calculating loss, backpropogate and update the weight.
this is my net:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv3d(1, 4, 5, padding=2)
self.conv2 = nn.Conv3d(4, 16, 5, padding=2)
self.enc1 = nn.Conv3d(16, 32, 5, stride=2, padding=2)
self.enc2 = nn.Conv3d(32, 64, 5, stride=2, padding=2)
self.enc3 = nn.Conv3d(64, 128, 5, stride=2, padding=2)
self.enc4 = nn.Conv3d(128, 256, 5, stride=2, padding=2)
self.dec4 = nn.ConvTranspose3d(256, 128, 5, stride=2, padding=2)
self.dec3 = nn.ConvTranspose3d(128, 64, 5, stride=2, padding=2, output_padding=1)
self.dec2 = nn.ConvTranspose3d(64, 32, 5, stride=2, padding=2, )
self.dec1 = nn.ConvTranspose3d(32, 16, 5, stride=2, padding=2)
self.conv3= nn.Conv3d(16, 8, 5, padding=2)
self.conv4= nn.Conv3d(8, 1, 5, padding=2)
def forward(self, x):
x=F.relu(self.conv1(x))
x=F.relu(self.conv2(x))
x=F.relu(self.enc1(x))
x=F.relu(self.enc2(x))
x=F.relu(self.enc3(x))
x=F.relu(self.enc4(x))
x=F.relu(self.dec4(x))
x=F.relu(self.dec3(x))
x=F.relu(self.dec2(x))
x=F.relu(self.dec1(x))
x=F.relu(self.conv3(x))
return x
and this is my training process
#define a net
net = Net()
#using Gpus
if torch.cuda.is_available():
net.cuda()
else:
print('cuda disabled')
print(net)
optimizer=optim.SGD(net.parameters(), lr=0.0001)
criterion=nn.MSELoss()
for i_batch, sample in enumerate(dataLoader):
print('read the data')
input,target=sample['tr'].type(torch.FloatTensor), sample['gt'].type(torch.FloatTensor)
if torch.cuda.is_available():
input, target=input.unsqueeze(1).cuda(), target.unsqueeze(1).cuda()
else:
input, target=input.unsqueeze(1), target.unsqueeze(1)
input, target=Variable(input), Variable(target)
optimizer.zero_grad()
print('put the data into net')
output=net(input)
loss = criterion(output, target)
loss = loss*10
print('back propagate')
loss.backward()
optimizer.step()
print('iter %d, training loss: mse %.4f' %(i_batch, loss))
if i_batch%50==0:
id_test=random.randint(0,100)
testsample=dataSet.__getitem__(id_test)
test_input=Variable(testsample['tr'].unsqueeze(0).unsqueeze(1).type(torch.FloatTensor).cuda(), volatile=True)
test_output=net(test_input)
save_result_path='/gdata/Deconv/testresult/test%d'%int(time.time())
save_blur_path='/gdata/Deconv/blurresult/blur%d'%int(time.time())
np.save(save_result_path,test_output.squeeze().cpu().data.numpy())
np.save(save_blur_path,test_input.squeeze().cpu().data.numpy())
print('save test sample id %d blur to %s, result to path %s'%(id_test, save_blur_path, save_result_path))