Optimizing biases for custom feature generation function

Hello together,

I created a custom function that generates features for univariate time-series datasets. I use these features in a Ridge Classification model afterwards. My custom function “generate_features” receives multiple inputs but i only want to train the biases to improve the performance of my model. Below you can see my code so far. The code is running but print(model.biases.grad) gives “None” and the biases are not updated. What did i do wrong?
For clarification: In each epoch the ridge classifier is trained with the new features that were calculated with the new biases. Dont be confused by why i dont use X_train. In this version i use predefined initialization variables for the feature creation instead of using X_train for it.

import torch
import torch.nn as nn

from sklearn.utils.extmath import softmax
class RidgeClassifierCVwithProba(RidgeClassifierCV):
    def predict_proba(self, X):
        p = self.decision_function(X)
        d_2d = np.c_[-p, p]
        return softmax(d_2d)

class features_ridge(torch.nn.Module):
    def __init__(self, weight_matrix, dilation_matrix, biases_matrix, top_x_indices, padding_matrix , C_alpha_matrix, C_gamma_matrix ):
        super(features_ridge, self).__init__()
        self.weights = torch.nn.Parameter(torch.tensor(weight_matrix), requires_grad=False)
        self.dilation = torch.nn.Parameter(torch.tensor(dilation_matrix), requires_grad=False)
        self.top_x_indices = torch.nn.Parameter(torch.tensor(top_x_indices), requires_grad=False)
        self.padding = torch.nn.Parameter(torch.tensor(padding_matrix), requires_grad=False)
        self.C_alpha_matrix = torch.nn.Parameter(torch.tensor(C_alpha_matrix), requires_grad=False)
        self.C_gamma_matrix = torch.nn.Parameter(torch.tensor(C_gamma_matrix), requires_grad=False)
        self.biases = torch.nn.Parameter(torch.tensor(biases_matrix), requires_grad=True)
        self.classifier_ridge = RidgeClassifierCVwithProba()
        
    def forward(self,x):
        features= generate_features(weights=self.weights, dilations =self.dilation, biases = self.biases, n_jobs = -1,top_x_indices = top_x_indices, padding = self.padding, C_alpha_matrix = self.C_alpha_matrix, C_gamma_matrix = self.C_gamma_matrix)
        x = self.classifier_ridge.predict_proba(features)
        x = torch.tensor(x, requires_grad=True)

        return x, features


loss_fn = torch.nn.CrossEntropyLoss()
target = torch.from_numpy(train_y).long()
model = features_ridge(weight_matrix = weight_matrix, dilation_matrix = dilation_matrix, biases_matrix = biases_matrix,top_x_indices = top_x_indices, padding_matrix = padding_matrix, C_alpha_matrix = C_alpha_matrix, C_gamma_matrix = C_gamma_matrix)
optimizer = torch.optim.SGD([model.biases], lr =0.01)
num_epochs=2

for epoch in range(num_epochs):
    model.classifier_ridge.fit(features, train_y)
    output, features = model(X_train)
    loss = loss_fn(output, target)
    optimizer.zero_grad()
    loss.backward()
    print(model.biases.grad)
    optimizer.step()

Many thanks before all,
Jannis