Hi @ptrblck
I am not using any dropout layers. The architecture I am using can be seen below:
ResNet3D(
(strConv1): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
(strConv2): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
(strConv3): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
(conv_block1_32): ConvBlock(
(conv1): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn1): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn2): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_block32_64): residualUnit(
(conv1): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(convX): Conv3d(32, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1))
(bnX): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_block64_128): residualUnit(
(conv1): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn1): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn2): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(convX): Conv3d(64, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1))
(bnX): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_block128_256): residualUnit(
(conv1): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn1): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(bn2): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(convX): Conv3d(128, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1))
(bnX): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv1d): ConvBlock1D(
(conv1): Conv1d(1, 2, kernel_size=(3,), stride=(1,), padding=(1,))
(bn1): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(fc1): Linear(in_features=788480, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=256, bias=True)
)
I am using the following weight initialization:
def weights_init(m):
if isinstance(m, nn.Conv3d or nn.Conv1d): #nn.Conv3d
torch.nn.init.xavier_uniform_(m.weight.data, init.calculate_gain('relu'))
m.bias.data.fill_(0)
# torch.nn.init.xavier_uniform_(m.bias.data)
elif isinstance(m, nn.BatchNorm3d):
m.weight.data.normal_(mean=1.0, std=0.02)
m.bias.data.fill_(0)
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0.0, 0.02)
m.bias.data.fill_(0)
Just to clarify how do I call eval on only batchnorm layers?
Net.conv_block32_64.bn1.eval()
Something like above?