I’m implementing a neural network that includes quantum layers (using PennyLane’s pytorch’s interface) to solve ODEs. I want to encode several points at once and get several results of the ODE (one result per corresponding input) at the same run.
Currently I’m running a toy model where I try to get u(x)=sin(x)
according to the loss function: du_dx - cos(x)
.
The network structure is:
network structure and quantum circuit
class QuantumNetworkBaseAngleClass(nn.Module):
def __init__(self, n_qubits=8, n_layers=2, x_scale_factor=1.0, x_scale_bias=0.0):
super().__init__()
self.n_qubits = n_qubits
self.n_layers = n_layers
self.weight_shapes = None
self.x_scale_factor = x_scale_factor
self.x_scale_bias = x_scale_bias
self.q_layer, self.qnode = self.create_hybrid_network()
# Initialize the weights of the quantum layer
for param in self.q_layer.parameters():
if param.requires_grad: # Only initialize trainable parameters
nn.init.uniform_(param, 0.0, torch.pi)
def create_hybrid_network(self):
dev = qml.device("default.qubit", wires=self.n_qubits)
layer = qml.StronglyEntanglingLayers
shape = layer.shape(n_layers=self.n_layers, n_wires=self.n_qubits)
self.weight_shapes = {"weights": shape}
@qml.qnode(dev, interface="torch", diff_method="backprop", differentiable=True, max_diff=2)
def qnode(inputs, weights):
scaled_inputs = (inputs - self.x_scale_bias) * self.x_scale_factor
qml.AngleEmbedding(scaled_inputs, wires=range(self.n_qubits))
layer(weights, wires=range(self.n_qubits))
return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]
return qml.qnn.TorchLayer(qnode, self.weight_shapes), qnode
def forward(self, inputs):
output = self.q_layer(inputs)
return output
class QuantumNetworkAngleClass(nn.Module):
def __init__(self, n_qubits=8, n_layers=2, x_scale_factor=1.0, x_scale_bias=0.0):
super().__init__()
self.fc_in = nn.Linear(n_qubits, n_qubits)
self.activation = nn.Tanh()
self.q_layer = QuantumNetworkBaseAngleClass(n_qubits=n_qubits, n_layers=n_layers,
x_scale_factor=x_scale_factor, x_scale_bias=x_scale_bias)
self.fc_out = nn.Linear(n_qubits, n_qubits)
def forward(self, inputs):
out_fc_in = self.fc_in(inputs.unsqueeze(0))
out_activation = self.activation(out_fc_in)
out_q_layer = self.q_layer(out_activation)
out_fc_out = self.fc_out(out_q_layer).squeeze(0).squeeze(0)
return out_fc_out
The gradients:
gradients
for epoch in range(self.epochs):
u_out_arr = []
d_u_dx_arr = []
loss_single_batch_arr = []
loss_single_batch_no_beta_arr = []
for collocation_points in self.dataloader:
self.opt.zero_grad(set_to_none=False) # check if adding False makes any difference
collocation_points = collocation_points.squeeze(-1)
u_out = self.model(collocation_points)
u_out_arr.append(u_out) # for plotting and derivative calculation check
d_u_dx = torch.zeros(self.n_points)
for i in range(self.num_batches):
jac_func = jacobian(self.model, collocation_points[i], create_graph=True, strict=True)
d_u_dx[i*self.batch_size:(i+1)*self.batch_size] = torch.diag(jac_func, 0)
d_u_dx_arr.append(d_u_dx)
loss, loss_no_beta = self.loss_fn(collocation_points, u=u_out, d_u_dx=d_u_dx,
target_fn=self.target_fn_dict)
loss.backward()
loss_single_batch_arr.append(loss)
loss_single_batch_no_beta_arr.append(loss_no_beta)
self.opt.step()
When computing derivatives using jacobian (only the diagonal because I need each output’s derivative with respect to the corresponding input):
- With
in_out_size=1
: derivatives correctly correspond to spatial derivatives - With
in_out_size=n_qubits
: derivatives don’t match expected spatial derivatives
Question: Why does increasing input/output dimensions affect the derivative computation?
The same behavior is observed when I change the quantum layer to nn.Linear
or when I use torch.gradient
, so I don’t think it’s due to the quantum circuit or the kind of derivative.
results for the two cases:
Thanks in advance!