Problem is as the title says. I’m unsure of why this is happening, as when I’m printing from the backward() method of my loss function, it prints the correct tensor for the grad, but when printing the grad from the training loop, it prints tensor(1.).
In the following code, I use a lot of code from a library called Qiskit. The main value that needs to be passed to the grad function is in state_fidelity()
, which returns a scalar.
Example of some output:
Training loss for batch 0: 0.008105693179143336
tensor(1.)
[[tensor(8.5487e-15), tensor(-2.8533e-14)]]
Training loss for batch 1: 0.568790329178134
tensor(1.)
[[tensor(0.), tensor(0.)]]
training_loop:
epochs = 10
losses = []
loss_fn = SingleQubitGaussianLoss.apply
print(model)
model.train(True)
for epoch in range(epochs):
print(f"\nEPOCH {epoch}")
for i, data in enumerate(data_loader):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs)
loss = loss_fn(outputs, inputs, labels)
loss.register_hook(lambda grad: print(grad))
if len(losses) < points:
losses.append(loss.item())
print(f"Training loss for batch {i}: {loss.item()}")
else:
comp = (loss.item() - losses[i]) / losses[i] * 100
losses[i] = loss.item()
print(f"Training loss for batch {i}: {loss.item()}, {comp}% change")
loss.backward()
optimizer.step()
loss function:
class SingleQubitGaussianLoss(torch.autograd.Function):
# TODO somehow make duration a parameter
@staticmethod
def forward(ctx, params, inits, labels):
ctx.save_for_backward(params)
eval_inits = []
eval_labels = []
infidelity_list = []
for pred, init, label in zip(params, inits, labels):
amp, ln_sigma = pred
amp = amp.item()
ln_sigma = ln_sigma.item()
init_state = [torch.cos(init[0] / 2).item(), torch.exp(init[1] * 1.j).item() * \
torch.sin(init[0] / 2).item()]
eval_inits.append(init_state)
job = run_gaussian(duration=128,
amp=process_amp(amp),
sigma=process_ln_width(ln_sigma),
init_state=init_state)
sv = job.result().get_statevector()
actual_sv = [label[0].item(), label[1].item(), 0]
eval_labels.append(actual_sv)
infidelity_list.append(1 - state_fidelity(sv, actual_sv))
ctx.eval_inits = eval_inits
ctx.eval_labels = eval_labels
ctx.infidelity_list = infidelity_list
return torch.DoubleTensor([sum(infidelity_list) / len(infidelity_list)])[0]
@staticmethod
def backward(ctx, h=1e-3):
params, = ctx.saved_tensors
jacobian = []
for pred, init_state, actual_sv, infidelity in \
zip(params, ctx.eval_inits, ctx.eval_labels, ctx.infidelity_list):
amp, ln_sigma = pred
amp = amp.item()
ln_sigma = ln_sigma.item()
amp_job = run_gaussian(duration=128,
amp=process_amp(amp + h),
sigma=process_ln_width(ln_sigma),
init_state=init_state)
amp_sv = amp_job.result().get_statevector()
amp_infid = 1 - state_fidelity(actual_sv, amp_sv)
grad_amp = (amp_infid - infidelity) / h
sigma_job = run_gaussian(duration=128,
amp=process_amp(amp),
sigma=process_ln_width(ln_sigma + h),
init_state=init_state)
sigma_sv = sigma_job.result().get_statevector()
sigma_infid = 1 - state_fidelity(actual_sv, sigma_sv)
grad_sigma = (sigma_infid - infidelity) / h
jacobian.append([grad_amp, grad_sigma])
print(jacobian)
return torch.DoubleTensor(jacobian), None, None
def process_amp(real: float, imag: float = 0):
# TODO implement imaginary amps
return 1 / (1 + math.exp(-1*real))
def process_ln_width(ln_width: float):
return ln_width
model:
class SingleQubitPulseModel(torch.nn.Module):
def __init__(self):
super(SingleQubitPulseModel, self).__init__()
# TODO Find better activation function, ReLU keeps producing 0 because amp is small
self.linear1 = torch.nn.Linear(2, 64)
self.relu1 = torch.nn.ReLU()
self.linear2 = torch.nn.Linear(64, 64)
self.relu2 = torch.nn.ReLU()
self.linear3 = torch.nn.Linear(64, 2)
self.relu3 = torch.nn.ReLU()
def forward(self, x):
x = self.linear1(x)
x = self.relu1(x)
x = self.linear2(x)
x = self.relu2(x)
x = self.linear3(x)
x = self.relu3(x)
return x