Translating keras.backend.gradients to pytorch

I am trying to translate this keras code to pytorch:

from keras import backend as K
y_hat = model.get_layer('fc_out').output[0, 0]
conv_output = model.get_layer(activation_layer).output

grads = K.gradients(y_hat , conv_output)[0]
grad_function = K.function([model.get_layer('input_1').input], [conv_output, grads])
output, grads_val = grad_function([img])

so far I manage to get the output variable correctly but not the grads_val:

activation = {}

def get_activation(name):
    def hook(model, input, prediction):
        activation[name] = prediction
    return hook

model.fc_out.register_forward_hook(get_activation('fc_out'))
model.last_swish.register_forward_hook(get_activation(activation_layer))
model.eval()
prediction = model(img)

y_hat = activation_layer['fc_out']
output = activation_layer[activation_layer']
grads_val = torch.autograd.grad(y_hat , output, grad_outputs=torch.ones_like(y_hat))

Any clue what I did wrong here? Thank you