Hey I have a very simple NN with only one layer. I was going through someones tutorial and they did not take the log in the negative log likely hood loss. This is on the MNIST flattened dataset. When I take the log and train, my model does not learning anything while without the log it achieves 90% accuracy. How can log be messing up the learning, shouldn’t it be the same?
This is his and my loss:
def nll(input, target): return -input[range(target.shape), target].mean() def nll2(input,target): #my x= -input[:,target].mean() return torch.log(x)
def log_softmax(x): return x - x.exp().sum(-1).log().unsqueeze(-1) # ??? def model(xb): return log_softmax(torch.mm(xb,weights) + bias) with gzip.open('data/mnist/mnist.pkl.gz', 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f) bs=64 # batch size xb = x_train[0:bs] # a mini-batch from x preds = model(xb) # predictions preds, preds.shape weights = torch.randn(784,10)/math.sqrt(784) # dont want 784 to be in grad weights.requires_grad_() bias = torch.zeros(10, requires_grad=True) def acc(input,target): indices=torch.argmax(input,1) return (indices==target).sum().float()/len(indices)
lr=0.015 for i in range(0,5000): r=np.random.randint(0,50000) ins=x_train[r:r+64] tars=y_train[r:r+64] outs=model(ins) loss=nll(outs,tars) # ***** changing nll to nll2 does not make the model learn if i%10==0: print('loss: ',loss) print('accuracy: ',acc(outs,tars)) loss.backward() with torch.no_grad(): weights -= weights.grad * lr bias -= bias.grad * lr weights.grad.zero_() bias.grad.zero_()