Hey all,
I have the following custom convolutional module that i initialize the weights using nn.Parameters:
class DilatedConv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size):
super(DilatedConv, self).__init__()
# Initialize kernel
self.kernel = torch.randn(out_channels, in_channels, kernel_size, kernel_size)
# Register the trainable parameters
self.weight = nn.Parameter(self.kernel)
self.bias = nn.Parameter(torch.randn(out_channels))
def forward(self, x, stride, padding, dilation):
# Do a functional call so we can
# use the same weights but different arguments
return F.conv2d(x, self.weight, bias=self.bias,
stride=stride, padding=padding, dilation=dilation
)
class SDCLayer(nn.Module):
def __init__(self, input_size, n_conv, kernel_sizes, n_kernels, dilations):
super(SDCLayer, self).__init__()
self.input_size = input_size
self.n_conv = n_conv
self.kernel_sizes = kernel_sizes
self.n_kernels = n_kernels
self.dilations = dilations
self.dilated_conv = DilatedConv(self.input_size, self.n_kernels, self.kernel_sizes)
self.elu = nn.ELU()
def weights_init_normal():
pass
def forward(self, x):
# The convolutions are sharing weights
# so just assign new values to it
# for each iteration
sdc = torch.tensor([]).to("cuda" if torch.cuda.is_available() else "cpu")
for i in range(0, self.n_conv):
x_d = self.dilated_conv(x, stride=1, padding='same', dilation=self.dilations[i])
sdc = torch.cat((sdc, x_d), dim=1)
sdc = self.elu(sdc)
return sdc
As you can see I’m initializing the weights with torch.randn, even though I think it’s causing me some problems, because my model ends up not learning.
I’ve found here that the problem could be wrong weight initialization, so I wanted to initialize the custom Conv layer correctly. How can I do this?
Thank you!