Hi,
I’m training a CNN model using a TripletMarginLoss with no FC layers, when I print the output of the flatten layer after a few epochs I find that the output values explodes (e.g. 1e+23) what could be the reason for that and how to fix it?
I’m using ReLU activation function and Adam optimizer
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.layers = []
self.layers.append(nn.LazyConv2d(out_channels=8, kernel_size=1, stride=1))
for i in range(cfg.BLOCKS_NUMBER):
if i == 0:
self.layers.append(nn.LazyConv2d(out_channels=16, kernel_size=5, padding=2, stride=1))
self.layers.append(nn.ReLU())
self.layers.append(nn.LazyConv2d(out_channels=16, kernel_size=5, padding=2, stride=1))
self.layers.append(nn.ReLU())
self.layers.append(nn.LazyConv2d(out_channels=16, kernel_size=5, padding=2, stride=1))
self.layers.append(nn.ReLU())
else:
self.layers.append(nn.LazyConv2d(out_channels=256, kernel_size=3, padding=1, stride=1))
self.layers.append(nn.ReLU())
self.layers.append(nn.LazyConv2d(out_channels=256, kernel_size=3, padding=1, stride=1))
self.layers.append(nn.ReLU())
self.layers.append(nn.LazyConv2d(out_channels=256, kernel_size=3, padding=1, stride=1))
self.layers.append(nn.ReLU())
self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2, padding=1))
self.layers.append(nn.Flatten())
self.model = nn.Sequential(*self.layers)
def forward(self, anchors, positives, negatives):
a = self.model(anchors)
p = self.model(positives)
n = self.model(negatives)
return a, p, n
I also have parameter initialization as follows:
model = Model()
model.to(cfg.DEVICE)
input = torch.randn((1, 1, 640, 560)).to(cfg.DEVICE)
output1, output2, output3 = model(input, input, input)
model.apply(init_weights)