I have defined a custom loss function but the loss function is not decreasing, not even changing.
my loss function aims to minimize the inverse of gap statistic which is used to evaluate the cluster formed from my embeddings.
this is a toy code:
def get_data():
Xlist = []
for i in range(6):
X, _ = make_blobs(n_samples=1000, n_features=2)
Xlist.append(X)
dat = np.stack(np.array(Xlist), axis=2)
dat = torch.from_numpy(np.reshape(dat,(-1))).float()
return dat
def custom_loss_function(gap):
loss = torch.sum([1./g for g in gap])
return loss
def calculate_Wk(X):
Sum_of_squared_distances = []
for k in [10, 50, 150, 300, 500]:
optics = OPTICS(min_samples=k).fit(X)
Sum_of_squared_distances.append(sum(optics.core_distances_))
sum_of_squared_dist = np.log(np.array(Sum_of_squared_distances))
normalized_wcd = sum_of_squared_dist - np.max(sum_of_squared_dist)
return normalized_wcd
def calculate_gap(output,x, Wks, s_prime):
list_of_G = []
Wks = torch.tensor(Wks)
output = torch.tensor(output)
x = np.reshape(x, (1000, 2, 6))
for i, weight in enumerate(output):
Wk = calculate_Wk(weight * x[:,:,i])
Wk = torch.tensor(Wk)
G = Wks - Wk
optimum = 0
for i in range(0, len(G)):
if(G[i] >= G[i+1] + s_prime[i+1]):
optimum = i+1
break
if optimum == 0:
optimum = 1
list_of_G.append(G[optimum])
return list_of_G
def get_reference_data():
e = np.finfo(float).eps
simulated_Wk = np.zeros((20, 5)) + e
simulated_sk = np.zeros((20, 5)) + e
for i in range(20):
temp_wk = []
temp_sk = []
X = np.random.uniform(0, 1, size=(1000, 2))
simulated_Wk[i] = calculate_Wk(X)
Wks = np.mean(simulated_Wk + e, axis=0)
sks = np.std(simulated_Wk + e, axis=0) * np.sqrt(1 + 1/20)
return Wks, sks
Wks, s_prime = get_reference_data()
in_features = 12000
out_features = 6
model = nn.Sequential(nn.Linear(in_features, out_features))
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
x = get_data()
for n in range(10):
output = model(x)
#This is the part where i calculate the gap statistic that i want to minimize.
list_of_gaps = calculate_gap(output, x, Wks, s_prime)
loss = custom_loss_func(list_of_gaps)
loss = Variable(loss, requires_grad = True)
loss.backward()
optimizer.step()
The loss is not even changing, my model isnt learning anything. Can you help me out with this?
Also, another potential problem could be that you’re detaching the output of your model with
output1 = output.detach().numpy()
This will break the gradients within the model and probably explains why your model isn’t learning! The gradients are zero!
Also, remember to clear the gradient cache of your parameters (via optimizer.zero_grad()) otherwise your gradients will acculumate from all epochs! You’ll want to have something like this within your code!
for epoch in range(epochs):
#lines to calculate loss
optimizer.zero_grad() #clear current gradient values!
loss = custom_loss_function(list_of_gaps) #calculate loss
loss.backward() #calculate gradients
optimizer.step() #update parameters
What about my 2nd comment? Using the detach function will kill any gradients in your network which is most likely the explanation as to why it’s not learning.
Also could you indent your code by wrapping it in three backticks ``` , it makes it easier for people to read/copy!
After having a brief look through, it seems you’re swapping between torch and numpy, when moving back and forth between the library would break the gradient of any intermediate computations, no? I’d suggest trying to remove all dependencies on numpy and purely use torch operations so autograd can track the operations.
Also, you do use the gradient of your input data at all (i.e. x) ? If you do, make sure to enable grad for that data! You can add x.requires_grad_() before your loop.
so im using scikit learn OPTICS to calculate clusters. that requires the input x to be in numpy. i did do requires_grad() like you said, but i have to detach before i send it to calculate gap or it gives me
RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.
The main issue is that the outputs of your model are being detached, so they have no connection to your model weights, and therefore as your loss is dependent on output and x (both of which are detached), your loss will have no gradient with respect to your model parameters! Which is why it’s not decreasing!
You’ll need to calculate your loss value without using the detach() method at all. Also, you don’t need the loss = Variable(loss, requires_grad=True) line, I think!
i willl try and create a dummy function using torch to see if my loss is decreasing. if it is, i can go ahead and implement in torch. i want to know if that really is what is causing the issue
I have completely removed gap calculation and im doing a dummy mean to get the G, which i pass to the loss function now. all my variables are requires_grad True. but loss is still constant. Can you maybe try running the code as well?
def calculate_gap(output,x, Wks, s_prime):
list_of_G = []
Wks = torch.tensor(Wks)
output = torch.tensor(output)
x = torch.reshape(x, (1000, 2, 6))
print(output)
for i, weight in enumerate(output):
Wk = weight * x[:,:,i]
G = torch.mean(Wk)
return G
def my_loss(g):
loss = torch.square(1./g)
return loss
in_features = 12000
out_features = 6
model = nn.Sequential(nn.Linear(in_features, out_features))
optimizer = torch.optim.SGD(model.parameters(), lr=0.000001)
print(model)
x = get_data()
for n in range(10):
x.requires_grad_(True)
output = model(x)
g = calculate_gap(output, x, Wks, s_prime)
print(g.requires_grad)
loss = my_loss(g)
print(loss.requires_grad)
loss = Variable(loss, requires_grad = True)
print('LOSS', loss)
loss.backward()
optimizer.step()