Hi, My model weights are very much low. After few rounds, model weights of Dense
layers gives NaN
. Model is as follows:
class FemnistNet(nn.Module):
def __init__(self):
super(FemnistNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2) ##output shape (batch, 32, 28, 28)
th.nn.init.xavier_uniform_(self.conv1.weight)
th.nn.init.zeros_(self.conv1.bias)
self.pool1 = nn.MaxPool2d(2, stride=2, ) ## output shape (batch, 32, 14, 14)
self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2) ##output shape (batch, 64, 14, 14)
th.nn.init.xavier_uniform_(self.conv2.weight)
th.nn.init.zeros_(self.conv2.bias)
self.pool2 = nn.MaxPool2d(2, stride=2) ## output shape (batch, 64, 7, 7)
self.fc1 = nn.Linear(3136, 2048)
th.nn.init.xavier_uniform_(self.fc1.weight)
th.nn.init.zeros_(self.fc1.bias)
self.fc2 = nn.Linear(2048 ,62)
th.nn.init.xavier_uniform_(self.fc2.weight)
th.nn.init.zeros_(self.fc2.bias)
Loss function
def cross_entropy_with_logits(log_logits, targets, batch_size):
eps = PlaceHolder().on(th.tensor(1e-7), wrap = False)
return -(targets * th.log(log_logits + eps)).sum() / batch_size
Weights after backward:
I am showing only bias
of last dense
layer:
[ 5.45458124e-06, -6.65503027e-35, -2.37094606e-36, 1.70051595e-13,
-7.77373658e-27, -4.78792679e-16, -1.24183730e-23, 2.01462756e-22,
-1.02379297e-16, 0.00000000e+00, -5.03637692e-29, -3.39672110e-34,
-6.52705158e-14, -3.19289355e-30, -2.39906655e-18, -4.25391589e-19,
-7.19190271e-19, 4.56857399e-11, -1.61931250e-11, 9.96295127e-31,
-2.16641244e-34, -2.83646022e-24, -1.91510188e-28, -9.11931553e-17,
-1.41994249e-14, -3.38881626e-36, -2.84330431e-21, 4.06037340e-16,
-1.31172990e-22, -3.30603825e-27, -1.17125438e-08, -1.59902817e-20,
6.45043300e-19, -3.84334130e-41, -2.47538482e-29, -2.58246272e-30,
-2.32159731e-08, -6.50596764e-13, -2.33269431e-19, -5.78347022e-24,
2.17626087e-23, -1.28808571e-21, 4.14140864e-08, -8.95332291e-26,
-2.18990720e-40, -3.20712151e-17, -3.79200586e-24, -9.60719157e-29,
-1.80594768e-32, -7.07660494e-26, -5.45340643e-30, -2.54328147e-36,
-2.45641157e-23, -1.14967558e-11, -2.38640034e-15, -6.48526921e-09,
-4.32448234e-30, -1.04072511e-19, -1.38691548e-20, -1.75144742e-11,
-3.81153477e-27, -3.90521984e-39]
Softmax Logits are:
[[8.91658049e-20, 1.14400521e-31, 8.14788736e-37, ...,
8.42350423e-01, 4.60953036e-28, 1.59884736e-38],
[3.03554970e-07, 3.90156505e-35, 1.44614002e-42, ...,
4.95101995e-06, 9.51176279e-20, 5.43945630e-29],
[1.78984669e-14, 2.60397768e-33, 5.21620865e-37, ...,
1.53031971e-12, 6.81439928e-26, 6.16571324e-44],
...,
[1.50593080e-10, 5.01664850e-43, 0.00000000e+00, ...,
1.47817869e-10, 1.97249187e-24, 5.15677835e-42],
[9.32251513e-01, 7.34700785e-41, 8.18218173e-42, ...,
7.75332438e-08, 3.47624517e-29, 9.62692045e-42],
[4.13767328e-14, 3.97828634e-42, 0.00000000e+00, ...,
8.61779452e-15, 6.90098691e-31, 3.00006511e-38]]
After few rounds:
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
Can anyone please point out what are the issues?