Creating custom pytorch layers

I am trying to create custom layers within PyTorch which can perform forward passes of data and run the backward method via finite difference. The custom layer is defined via a quantum circuit created in CUDA Quantum.

A simplified version of the code is here:

# !pip install cuda-quantum 


import cudaq 
from cudaq import spin 

import numpy as np 

import torch 
import torch.nn as nn

import torch
from torch.autograd import Function
import torch.nn as nn

cudaq.set_target("qpp-cpu")  
device = 'cpu'

torch.manual_seed(33)
np.random.seed(42)

class QuantumFunction(Function):
    
    """This class initializes the quantum circuit structure and defines the forward and the backward method."""

    def __init__(self, qubit_count, hamiltonian):
        
        """Define the quantum circuit in CUDA Quantum"""

        kernel, x, weights  = cudaq.make_kernel(list, list)  #x is the data input and weights are the parameters we want to optimise 

        self.kernel = kernel
        self.hamiltonian = hamiltonian

        qubits = kernel.qalloc(qubit_count)

        for i in range(qubit_count):
            kernel.ry(x[i] * weights[i], qubits[i])  #contains data*weights - similar to wx+b but without the bias 

        kernel.ry(x[0] * weights[0], qubits[0])  
        kernel.ry(x[1] * weights[1], qubits[1]) 
        
        kernel.rx(weights[2], qubits[0])
        kernel.rx(weights[3], qubits[1])

            
  
    def run(self, x_vals, weight_vals):
        
        """Execute the quantum circuit to output an expectation value"""
            
        N = x_vals.shape[0]  # Number of input samples  
    
        weight_vals = weight_vals.unsqueeze(0).repeat(N, 1)    #tile the weights so that we can run it for N data inputs 
        
        results = cudaq.observe(self.kernel, self.hamiltonian, x_vals, weight_vals).expectation()
        
        return results

    
    @staticmethod
    def forward(ctx, x_vals, weight_vals, quantum_circuit, shift):
        
        # Save shift and quantum_circuit in context to use in backward.
        ctx.shift = shift
        ctx.quantum_circuit = quantum_circuit

        # Calculate expectation value.
        exp_vals = ctx.quantum_circuit.run(x_vals, weight_vals)

        ctx.save_for_backward(x_vals, weight_vals)

        return exp_vals

    @staticmethod
    def backward(ctx, grad_output):
        """Backward pass computation via finite difference parameter shift"""

        x_vals, weight_vals = ctx.saved_tensors

        gradients = torch.zeros(len(weight_vals), device=device)

        for i in range(len(weight_vals)):
            shift_right = torch.clone(weight_vals)

            shift_right[i] += ctx.shift

            shift_left = torch.clone(weight_vals)

            shift_left[i] -= ctx.shift

            expectation_right = ctx.quantum_circuit.run(x_vals, shift_right)
            expectation_left = ctx.quantum_circuit.run(x_vals, shift_left)

            gradients[i] =  (expectation_right - expectation_left) / 2*ctx.shift
            
        
        return gradients * grad_output.float() , None, None
    
    
class QuantumLayer(nn.Module):
    """Encapsulates a quantum function into a quantum torch layer"""

    def __init__(self, qubit_count: int, hamiltonian, shift: torch.tensor):
        super(QuantumLayer, self).__init__()
        
        self.quantum_circuit = QuantumFunction(qubit_count , hamiltonian)  
        self.shift = shift

    def forward(self, input):
        ans = QuantumFunction.apply(input, self.quantum_circuit, self.shift)

        return ans
    
    
qubit_count = 2
hamiltonian = spin.z(0)
N = 128  # number of data samples 
latent_dim = 2
no_of_weights = 4
shift = torch.tensor(np.pi / 2)

x_vals = torch.rand(N, latent_dim)
weight_vals = torch.rand(no_of_weights)  #parameters we want to optimize 

generator = QuantumLayer(qubit_count, hamiltonian, shift)

generator_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 13
      9 weight_vals = torch.rand(no_of_weights)  #parameters we want to optimize 
     11 generator = QuantumLayer(qubit_count, hamiltonian, shift)
---> 13 generator_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)

File ~/.local/lib/python3.10/site-packages/torch/optim/adam.py:45, in Adam.__init__(self, params, lr, betas, eps, weight_decay, amsgrad, foreach, maximize, capturable, differentiable, fused)
     39     raise ValueError(f"Invalid weight_decay value: {weight_decay}")
     41 defaults = dict(lr=lr, betas=betas, eps=eps,
     42                 weight_decay=weight_decay, amsgrad=amsgrad,
     43                 maximize=maximize, foreach=foreach, capturable=capturable,
     44                 differentiable=differentiable, fused=fused)
---> 45 super().__init__(params, defaults)
     47 if fused:
     48     if differentiable:

File ~/.local/lib/python3.10/site-packages/torch/optim/optimizer.py:261, in Optimizer.__init__(self, params, defaults)
    259 param_groups = list(params)
    260 if len(param_groups) == 0:
--> 261     raise ValueError("optimizer got an empty parameter list")
    262 if not isinstance(param_groups[0], dict):
    263     param_groups = [{'params': param_groups}]

ValueError: optimizer got an empty parameter list

Questions:

  1. I am unsure how to register the weights such that they can be picked up by the optimizer.
  2. Does PyTorch have built-in finite difference backward methods?
  3. Does PyTorch have built-in gradient-free methods such as COBYLA?

Thanks.

Please look at this tutorial: PyTorch: Custom nn Modules — PyTorch Tutorials 2.2.0+cu121 documentation

Basically, if you want a learnable parameter that can be seen by the Optimizer, you have to register it as a nn.Parameter. For example in your case, I think you want self.shift = nn.Parameter(shift) maybe?

Does PyTorch have built-in finite difference backward methods?

Only for unit-testing, its available in the utility from torch.testing._internal.common_utils import gradcheck. Generally, because we do partial derivatives, finite difference based backward becomes O(number_of_parameters), which is not common in the kind of stuff PyTorch is optimized for (neural nets), but as you are writing custom autograd Functions, you can make the backward to just be finite differences yourself.

  1. Does PyTorch have built-in gradient-free methods such as COBYLA?

No. But there are packages such as nevergrad that implement gradient-free methods and are compatible with pytorch

One other library built on PyTorch you might look into is PennyLane.

It is specifically designed for quantum programming and machine learning.