Hi… after training my code, I noticed that conv layers weights converged to zero and just linear layers worked. I changed activation function, kernels but nothing changed.
how could I fix this problem
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1= nn.Sequential(
nn.Conv2d(1,64, kernel_size=(11,11), stride=1,padding=5),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.conv2= nn.Sequential(
nn.Conv2d(64,32, kernel_size=(1,1), stride=1,padding=0),
nn.BatchNorm2d(32),
nn.ReLU()
)
self.conv3= nn.Sequential(
nn.Conv2d(32,1, kernel_size=(7,7), stride=1,padding=3),
nn.BatchNorm2d(1),
nn.ReLU()
)
self.conv4= nn.Sequential(
nn.Conv2d(1,64, kernel_size=(11,11), stride=1,padding=5),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.conv5= nn.Sequential(
nn.Conv2d(64,32, kernel_size=(1,1), stride=1,padding=0),
nn.BatchNorm2d(32),
nn.ReLU()
)
self.conv6= nn.Sequential(
nn.Conv2d(32,1, kernel_size=(7,7), stride=1,padding=3)
)
self.fc1=nn.Linear(1600,64)
self.fc2=nn.Linear(64,64)
def forward(self,kr):
out_conv1=self.conv1(kr)
out_conv2=self.conv2(out_conv1)
out_conv3=self.conv3(out_conv2)
out_conv4=self.conv4(out_conv3)
out_conv5=self.conv5(out_conv4)
out_conv6=self.conv6(out_conv5)
out_feedback=kr+out_conv6
out_linear=self.fc1(out_feedback.flatten(2))
out_linear=self.fc2(out_linear)
x=out_linear.reshape([out_linear.shape[0],out_linear.shape[1],8,8])
return x