I have a code of a custom loss that I implemented on tensorflow that I would like to pass on Pytorch for technical reasons. However I can’t seem to make it work and I don’t know why.
The pytorch loss doesn’t seem to train the network
The aim is to compare raw linear output of a network (before softmax) with true probabilities.
The wanted loss is similar to the one in this video at 13:10
Any help would be appreciated
Tensorflow code:
import numpy as np
from scipy.special import softmax
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Softmax
from tensorflow.keras import Input, Model
delta = tf.Variable([[1.]], trainable=False)
main_input = Input(shape=(10,))
output = Dense(4, activation='linear')(main_input)
def custom_loss(delta):
def loss(y_true, y_pred):
y_pred_softmax = Softmax()(y_pred)
y_pred_softmax_clipped = K.clip(y_pred_softmax, 1e-8, 1 - 1e-8)
log_likelihood = y_true * K.log(y_pred_softmax_clipped)
return K.sum(-log_likelihood * delta)
return loss
model = Model(inputs=[main_input], outputs=output)
model.compile(optimizer=Adam(lr=0.01), loss=custom_loss(delta))
print(model.predict(np.ones((1,10))))
print(softmax(model.predict(np.ones((1,10)))[0]))
delta.assign([[1.0]])
model.fit(np.ones((1000,10), dtype='float'),np.asarray(1000*[[0.7, 0.3, 0.0, 0.0]], dtype='float'))
print(model.predict(np.ones((1,10))))
print(softmax(model.predict(np.ones((1,10)))[0]))
Pytorch code:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(10, 4)
def forward(self, x):
x = self.fc(x)
return x
def custom_loss(delta):
def loss(y_pred, y_true):
y_pred_softmax = nn.Softmax(dim=1)(y_pred)
y_pred_softmax_clipped = torch.clamp(y_pred, 1e-8, 1 - 1e-8)
log_likelihood = y_true * torch.log(y_pred_softmax_clipped)
return torch.sum(-log_likelihood * delta)
return loss
delta = 1
network = Net()
loss_function = custom_loss(delta)
optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=0.9)
optimizer.zero_grad()
x = torch.ones((1,10))
print(network(x))
print(nn.Softmax(dim=1)(network(x)), '\n')
n=1000
x = torch.ones((n,10))
target = torch.from_numpy(np.array(n*[[0.8, 0.2, 0.0, 0.0]]))
output = network(x)
loss = loss_function(output, target)
loss.backward()
optimizer.step()
print(loss, '\n')
x = torch.ones((1,10))
print(network(x))
print(nn.Softmax(dim=1)(network(x)))
example of outputs, either only one value become extremely big :
tensor([[-0.0858, 0.1533, -0.1739, 2.0263]], grad_fn=<AddmmBackward>)
tensor([[0.0873, 0.1109, 0.0800, 0.7218]], grad_fn=<SoftmaxBackward>)
tensor([[-8.5779e-02, 1.4366e+02, -1.7395e-01, 2.0263e+00]], grad_fn=<AddmmBackward>)
tensor([[0., 1., 0., 0.]], grad_fn=<SoftmaxBackward>)
Or the outputs doesn’t change at all:
tensor([[ 1.2120, -0.1411, -0.5820, -0.6478]], grad_fn=<AddmmBackward>)
tensor([[0.6327, 0.1635, 0.1052, 0.0985]], grad_fn=<SoftmaxBackward>)
tensor([[ 1.2120, -0.1411, -0.5820, -0.6478]], grad_fn=<AddmmBackward>)
tensor([[0.6327, 0.1635, 0.1052, 0.0985]], grad_fn=<SoftmaxBackward>)