Hi, I am using the custom softmax
and loss
function but my train/test accuracy and loss is constant. Can anyone please guide what can be the issue?
Model
class FemnistNet(nn.Module):
def __init__(self):
super(FemnistNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2) ##output shape (batch, 32, 28, 28)
th.nn.init.xavier_uniform_(self.conv1.weight)
self.pool1 = nn.MaxPool2d(2, stride=2, ) ## output shape (batch, 32, 14, 14)
self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2) ##output shape (batch, 64, 14, 14)
th.nn.init.xavier_uniform_(self.conv2.weight)
self.pool2 = nn.MaxPool2d(2, stride=2) ## output shape (batch, 64, 7, 7)
self.fc1 = nn.Linear(3136, 2048)
th.nn.init.xavier_uniform_(self.fc1.weight)
self.fc2 = nn.Linear(2048 ,62)
th.nn.init.xavier_uniform_(self.fc2.weight)
def my_softmax(self, x):
max_el = x.max(dim=1)
max_el = max_el[0].reshape(x.shape[0],1)
result = th.exp(x - max_el)/th.sum(th.exp(x-max_el),dim = 1, keepdim = True)
return result
def forward(self, x):
x = x.view(-1, 1, 28, 28)
x = self.conv1(x)
x = th.nn.functional.relu(x)
x = self.pool1(x)
x=self.conv2(x)
x = th.nn.functional.relu(x)
x = self.pool2(x)
x = x.flatten(start_dim=1)
x = self.fc1(x)
l1_activations = th.nn.functional.relu(x)
softmax_input = self.fc2(l1_activations)
x = self.my_softmax(softmax_input)
grad_self_ = None
return x, l1_activations, grad_self_
Loss function
def cross_entropy_with_logits(softmax_logits, targets, batch_size):
eps = PlaceHolder().on(th.tensor(1e-7), wrap = False)
return -(targets * th.log(softmax_logits+eps)).sum() / batch_size
Round: 1 ---------train loss: tensor([963.06024170]) acc: tensor([0.]) gradient: tensor(3.11035156)
Round: 2 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 3 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 4 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 5 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 6 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 7 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 8 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Round: 9 ---------train loss: tensor([983.20312500]) acc: tensor([0.]) gradient: tensor(0.)
Here you can see the train loss and accuracy is constant. Moreover, I have printed the few values of gradient
of last layer. After first round its constant i.e., 0. Any pointer will be helpful. @ptrblck