Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Single gap GPSR #213

Merged
merged 12 commits into from
Jul 4, 2024
39 changes: 36 additions & 3 deletions docs/differentiation.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,32 @@ It uses the `torch` native automatic differentiation engine which tracks operati
The [adjoint differentiation mode](https://arxiv.org/abs/2009.02823) computes first-order gradients by only requiring at most three states in memory in `O(P)` time where `P` is the number of parameters in a circuit.

### Generalized Parameter-Shift rules (DiffMode.GPSR)
To be added.
The Generalized parameter shift rule (GPSR mode) is an extension of the well known [parameter shift rule (PSR)](https://arxiv.org/abs/1811.11184) algorithm [to arbitrary quantum operations](https://arxiv.org/abs/2108.01218). Indeed, PSR only works for quantum operations whose generator has a single gap in its eigenvalue spectrum, GPSR extending to multi-gap.

For this, we define the differentiable function as quantum expectation value

$$
f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle
$$

where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$ is the quantum evolution operator with generator $\hat{G}$ representing the structure of the underlying quantum circuit and $\hat{C}$ is the cost operator. Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$ we calculate the full set of corresponding unique non-zero spectral gaps $\left\{ \Delta_s\right\}$ (differences between eigenvalues). It can be shown that the final expression of derivative of $f(x)$ is then given by the following expression:

$\begin{equation}
\frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s},
\end{equation}$

where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that are solutions of a system of linear equations

$\begin{equation}
\begin{cases}
F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\
F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\
& ...\\
F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}.
\end{cases}
\end{equation}$

Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values of functions evaluated at shifted arguments $x\pm\delta_s$.

### Example
```python exec="on" source="material-block" html="1"
Expand All @@ -23,7 +48,7 @@ batch_size = 1

ry = pyq.RY(0, param_name="x")
cnot = pyq.CNOT(1, 2)
ops = [ry, cnot]
ops = [ry]
n_qubits = 3
circ = pyq.QuantumCircuit(n_qubits, ops)

Expand All @@ -32,16 +57,24 @@ state = pyq.zero_state(n_qubits)

values_ad = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}
values_adjoint = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}
values_gpsr = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}

exp_ad = pyq.expectation(circ, state, values_ad, obs, DiffMode.AD)
exp_adjoint = pyq.expectation(circ, state, values_adjoint, obs, DiffMode.ADJOINT)
exp_gpsr = pyq.expectation(circ, state, values_gpsr, obs, DiffMode.GPSR)

dfdx_ad = torch.autograd.grad(exp_ad, tuple(values_ad.values()), torch.ones_like(exp_ad))

dfdx_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values_adjoint.values()), torch.ones_like(exp_adjoint)
)

assert len(dfdx_ad) == len(dfdx_adjoint)
dfdx_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values_gpsr.values()), torch.ones_like(exp_gpsr)
)

assert len(dfdx_ad) == len(dfdx_adjoint) == len(dfdx_gpsr)
for i in range(len(dfdx_ad)):
assert torch.allclose(dfdx_ad[i], dfdx_adjoint[i])
assert torch.allclose(dfdx_ad[i], dfdx_gpsr[i])
```
5 changes: 4 additions & 1 deletion pyqtorch/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pyqtorch.adjoint import AdjointExpectation
from pyqtorch.analog import Observable
from pyqtorch.circuit import QuantumCircuit
from pyqtorch.gpsr import PSRExpectation
from pyqtorch.utils import DiffMode, inner_prod

logger = getLogger(__name__)
Expand Down Expand Up @@ -94,6 +95,8 @@ def expectation(
circuit, observable, state, values.keys(), *values.values()
)
elif diff_mode == DiffMode.GPSR:
raise NotImplementedError("To be added.")
return PSRExpectation.apply(
circuit, observable, state, values.keys(), *values.values()
)
else:
logger.error(f"Requested diff_mode '{diff_mode}' not supported.")
152 changes: 152 additions & 0 deletions pyqtorch/gpsr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from __future__ import annotations

from logging import getLogger
from typing import Any, Tuple

import torch
from torch import Tensor, no_grad
from torch.autograd import Function

import pyqtorch as pyq
from pyqtorch.analog import HamiltonianEvolution, Observable, Scale
from pyqtorch.circuit import QuantumCircuit
from pyqtorch.parametric import Parametric
from pyqtorch.utils import inner_prod, param_dict

logger = getLogger(__name__)


class PSRExpectation(Function):
r"""
Implementation of the generalized parameter shift rule.

Compared to the original parameter shift rule
which only works for quantum operations whose generator has a single gap
in its eigenvalue spectrum, GPSR works with arbitrary
generators of quantum operations.

For this, we define the differentiable function as quantum expectation value

$$
f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle
$$

where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$
is the quantum evolution operator with generator $\hat{G}$ representing the structure
of the underlying quantum circuit and $\hat{C}$ is the cost operator.
Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$
we calculate the full set of corresponding unique non-zero spectral gaps
$\left\{ \Delta_s\right\}$ (differences between eigenvalues).
It can be shown that the final expression of derivative of $f(x)$
is then given by the following expression:

$\begin{equation}
\frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s},
\end{equation}$

where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that
are solutions of a system of linear equations

$\begin{equation}
\begin{cases}
F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}
\left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\
F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}
\left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\
& ...\\
F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}
\left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}.
\end{cases}
\end{equation}$

Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values
of functions evaluated at shifted arguments $x\pm\delta_s$.

Arguments:
circuit: A QuantumCircuit instance
observable: A hamiltonian.
state: A state in the form of [2 * n_qubits + [batch_size]]
param_names: A list of parameter names.
*param_values: A unpacked tensor of values for each parameter.
"""

@staticmethod
@no_grad()
def forward(
ctx: Any,
circuit: QuantumCircuit,
observable: Observable,
state: Tensor,
param_names: list[str],
*param_values: Tensor,
) -> Tensor:
ctx.circuit = circuit
ctx.observable = observable
ctx.param_names = param_names
ctx.state = state
values = param_dict(param_names, param_values)
ctx.out_state = circuit.run(state, values)
ctx.projected_state = observable.run(ctx.out_state, values)
ctx.save_for_backward(*param_values)
return inner_prod(ctx.out_state, ctx.projected_state).real

@staticmethod
def backward(ctx: Any, grad_out: Tensor) -> Tuple[None, ...]:
check_support_psr(ctx.circuit)
param_values = ctx.saved_tensors
values = param_dict(ctx.param_names, param_values)
grads_dict = {k: None for k in values.keys()}
shift = torch.tensor(torch.pi) / 2.0

for op in ctx.circuit.flatten():
if isinstance(op, Parametric) and isinstance(op.param_name, str):
spectrum = torch.linalg.eigvalsh(op.pauli).reshape(-1, 1)
spectral_gap = torch.unique(
torch.abs(torch.tril(spectrum - spectrum.T))
)
spectral_gap = spectral_gap[spectral_gap.nonzero()]
assert (
len(spectral_gap) == 1
), "PSRExpectation only works on single_gap for now."
chMoussa marked this conversation as resolved.
Show resolved Hide resolved

if values[op.param_name].requires_grad:
with no_grad():
copied_values = values.copy()
copied_values[op.param_name] += shift
f_plus = pyq.expectation(
ctx.circuit, ctx.state, copied_values, ctx.observable
)
copied_values[op.param_name] -= 2.0 * shift
f_min = pyq.expectation(
ctx.circuit, ctx.state, copied_values, ctx.observable
)

grad = (
spectral_gap
* (f_plus - f_min)
/ (4 * torch.sin(spectral_gap * shift / 2))
)
grad *= grad_out
if grads_dict[op.param_name] is not None:
grads_dict[op.param_name] += grad
else:
grads_dict[op.param_name] = grad
else:
logger.error(f"PSRExpectation does not support operation: {type(op)}.")
return (None, None, None, None, *grads_dict.values())


def check_support_psr(circuit: QuantumCircuit):
"""Checking that circuit has only compatible operations for PSR.

Args:
circuit (QuantumCircuit): Circuit to check.

Raises:
ValueError: When circuit contains Scale or HamiltonianEvolution.
"""
for op in circuit.operations:
if isinstance(op, Scale) or isinstance(op, HamiltonianEvolution):
raise ValueError(
f"PSR is not applicable as circuit contains an operation of type: {type(op)}."
)
1 change: 1 addition & 0 deletions pyqtorch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DiffMode(StrEnum):
"""An implementation of "Efficient calculation of gradients
in classical simulations of variational quantum algorithms",
Jones & Gacon, 2020"""

GPSR = "gpsr"
"""To be added."""

Expand Down
86 changes: 81 additions & 5 deletions tests/test_circuit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,21 @@
from pyqtorch.noise import Noise
from pyqtorch.parametric import Parametric
from pyqtorch.primitive import Primitive
from pyqtorch.utils import GRADCHECK_ATOL, DensityMatrix, product_state
from pyqtorch.utils import (
GRADCHECK_ATOL,
DensityMatrix,
product_state,
)


def test_adjoint_diff() -> None:
# TODO add GPSR when multigap is implemented for this test
@pytest.mark.parametrize("n_qubits", [3, 4, 5])
def test_adjoint_diff(n_qubits: int) -> None:
rx = pyq.RX(0, param_name="theta_0")
cry = pyq.CPHASE(0, 1, param_name="theta_1")
rz = pyq.RZ(2, param_name="theta_2")
cnot = pyq.CNOT(1, 2)
ops = [rx, cry, rz, cnot]
n_qubits = 3
circ = pyq.QuantumCircuit(n_qubits, ops)
obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)])

Expand Down Expand Up @@ -60,7 +65,7 @@ def test_adjoint_diff() -> None:

assert len(grad_ad) == len(grad_adjoint)
for i in range(len(grad_ad)):
assert torch.allclose(grad_ad[i], grad_adjoint[i])
assert torch.allclose(grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL)


@pytest.mark.parametrize("dtype", [torch.complex64, torch.complex128])
Expand Down Expand Up @@ -155,7 +160,7 @@ def test_adjoint_duplicate_params() -> None:
grad_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint)
)[0]
assert torch.allclose(grad_ad, grad_adjoint)
assert torch.allclose(grad_ad, grad_adjoint, atol=GRADCHECK_ATOL)


@pytest.mark.parametrize("fn", [pyq.X, pyq.Z, pyq.Y])
Expand Down Expand Up @@ -310,3 +315,74 @@ def test_sample_run() -> None:
assert torch.allclose(wf, product_state("1100"))
assert torch.allclose(pyq.QuantumCircuit(4, [pyq.I(0)]).run("1100"), wf)
assert "1100" in samples[0]


# TODO delete this test when first one up is
@pytest.mark.parametrize("n_qubits", [3, 4, 5])
def test_all_diff(n_qubits: int) -> None:
rx = pyq.RX(0, param_name="theta_0")
rz = pyq.RZ(2, param_name="theta_1")
cnot = pyq.CNOT(1, 2)
ops = [rx, rz, cnot]
circ = pyq.QuantumCircuit(n_qubits, ops)
obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)])

theta_0_value = torch.pi / 2
theta_1_value = torch.pi

state = pyq.zero_state(n_qubits)

theta_0 = torch.tensor([theta_0_value], requires_grad=True)

theta_1 = torch.tensor([theta_1_value], requires_grad=True)

values = {"theta_0": theta_0, "theta_1": theta_1}

exp_ad = expectation(circ, state, values, obs, DiffMode.AD)
exp_adjoint = expectation(circ, state, values, obs, DiffMode.ADJOINT)
exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR)

grad_ad = torch.autograd.grad(
exp_ad, tuple(values.values()), torch.ones_like(exp_ad)
)

grad_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint)
)

grad_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr)
)

assert len(grad_ad) == len(grad_adjoint) == len(grad_gpsr)

for i in range(len(grad_ad)):
assert torch.allclose(
grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL
) and torch.allclose(grad_ad[i], grad_gpsr[i], atol=GRADCHECK_ATOL)


@pytest.mark.xfail(raises=ValueError)
@pytest.mark.parametrize("gate_type", ["scale", "hamevo"])
def test_compatibility_gpsr(gate_type: str) -> None:

if gate_type == "scale":
seq_gate = pyq.Sequence([pyq.X(0)])
scale = pyq.Scale(seq_gate, "theta_0")
ops = [scale]
else:
hamevo = pyq.HamiltonianEvolution(pyq.Sequence([pyq.X(0)]), "theta_0", (0,))

ops = [hamevo]

circ = pyq.QuantumCircuit(1, ops)
obs = pyq.QuantumCircuit(1, [pyq.Z(0)])
state = pyq.zero_state(1)

param_value = torch.pi / 2
values = {"theta_0": torch.tensor([param_value], requires_grad=True)}
exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR)

grad_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr)
)
Loading