"""
registry --- Molecule registry and factory functions
====================================================
Defines a registry of standard molecular benchmarks used in quantum
chemistry. Each entry provides a factory function that computes
molecular integrals and constructs a :class:`MolecularHamiltonian`.
The registry covers a range of system sizes from H2 (4 qubits) to
C2H4 (28 qubits), enabling systematic benchmarking of SQD/SKQD methods.
Constants
---------
MOLECULE_REGISTRY
Dictionary mapping lowercase molecule names to factory metadata.
Functions
---------
get_molecule
Instantiate a Hamiltonian for a named molecule.
list_molecules
Return sorted list of registered molecule names.
"""
from __future__ import annotations
import logging
from typing import Any
from qvartools.hamiltonians.integrals import _FCI_CONFIG_LIMIT
from qvartools.hamiltonians.molecular import (
MolecularHamiltonian,
compute_molecular_integrals,
)
__all__ = [
"MOLECULE_REGISTRY",
"get_molecule",
"get_molecule_info",
"list_molecules",
]
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Geometry definitions
# ---------------------------------------------------------------------------
_H2_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("H", (0.0, 0.0, 0.0)),
("H", (0.0, 0.0, 0.74)),
]
_LIH_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("Li", (0.0, 0.0, 0.0)),
("H", (0.0, 0.0, 1.6)),
]
_BEH2_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("Be", (0.0, 0.0, 0.0)),
("H", (0.0, 0.0, 1.33)),
("H", (0.0, 0.0, -1.33)),
]
# H2O geometry from parametric (OH=0.96 Å, angle=104.5°)
import math as _math
_H2O_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("O", (0.0, 0.0, 0.0)),
("H", (0.96, 0.0, 0.0)),
(
"H",
(
0.96 * _math.cos(_math.radians(104.5)),
0.96 * _math.sin(_math.radians(104.5)),
0.0,
),
),
]
_NH3_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("N", (0.0, 0.0, 0.0)),
("H", (0.0, -0.9377, -0.3816)),
("H", (0.8121, 0.4689, -0.3816)),
("H", (-0.8121, 0.4689, -0.3816)),
]
_N2_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("N", (0.0, 0.0, 0.0)),
("N", (0.0, 0.0, 1.0977)),
]
_CH4_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("C", (0.0, 0.0, 0.0)),
("H", (0.6276, 0.6276, 0.6276)),
("H", (0.6276, -0.6276, -0.6276)),
("H", (-0.6276, 0.6276, -0.6276)),
("H", (-0.6276, -0.6276, 0.6276)),
]
_C2H4_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("C", (0.0, 0.0, 0.6695)),
("C", (0.0, 0.0, -0.6695)),
("H", (0.0, 0.9289, 1.2321)),
("H", (0.0, -0.9289, 1.2321)),
("H", (0.0, 0.9289, -1.2321)),
("H", (0.0, -0.9289, -1.2321)),
]
_CO_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("C", (0.0, 0.0, 0.0)),
("O", (0.0, 0.0, 1.13)),
]
_HCN_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("H", (0.0, 0.0, 0.0)),
("C", (0.0, 0.0, 1.06)),
("N", (0.0, 0.0, 2.22)),
]
_C2H2_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("H", (0.0, 0.0, 0.0)),
("C", (0.0, 0.0, 1.06)),
("C", (0.0, 0.0, 2.26)),
("H", (0.0, 0.0, 3.32)),
]
_H2S_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("S", (0.0, 0.0, 0.0)),
("H", (1.34, 0.0, 0.0)),
("H", (-0.0497, 1.3391, 0.0)),
]
# ---------------------------------------------------------------------------
# Factory functions
# ---------------------------------------------------------------------------
def _make_h2(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create H2 Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_H2_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("H2", 4, "sto-3g", _H2_GEOMETRY, 0, 0)
return ham, info
def _make_lih(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create LiH Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_LIH_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("LiH", 12, "sto-3g", _LIH_GEOMETRY, 0, 0)
return ham, info
def _make_beh2(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create BeH2 Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_BEH2_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("BeH2", 14, "sto-3g", _BEH2_GEOMETRY, 0, 0)
return ham, info
def _make_h2o(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create H2O Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_H2O_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("H2O", 14, "sto-3g", _H2O_GEOMETRY, 0, 0)
return ham, info
def _make_nh3(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create NH3 Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_NH3_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("NH3", 16, "sto-3g", _NH3_GEOMETRY, 0, 0)
return ham, info
def _make_n2(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create N2 Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_N2_GEOMETRY, basis="cc-pvdz", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("N2", 20, "cc-pvdz", _N2_GEOMETRY, 0, 0)
return ham, info
def _make_ch4(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create CH4 Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_CH4_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("CH4", 18, "sto-3g", _CH4_GEOMETRY, 0, 0)
return ham, info
def _make_c2h4(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create C2H4 (ethylene) Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_C2H4_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("C2H4", 28, "sto-3g", _C2H4_GEOMETRY, 0, 0)
return ham, info
def _make_co(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create CO (carbon monoxide) Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_CO_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("CO", 20, "sto-3g", _CO_GEOMETRY, 0, 0)
return ham, info
def _make_hcn(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create HCN (hydrogen cyanide) Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_HCN_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("HCN", 22, "sto-3g", _HCN_GEOMETRY, 0, 0)
return ham, info
def _make_c2h2(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create C2H2 (acetylene) Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_C2H2_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("C2H2", 24, "sto-3g", _C2H2_GEOMETRY, 0, 0)
return ham, info
def _make_h2s(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create H2S (hydrogen sulfide) Hamiltonian and info dict.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)``.
"""
integrals = compute_molecular_integrals(
geometry=_H2S_GEOMETRY, basis="sto-3g", charge=0, spin=0
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("H2S", 26, "sto-3g", _H2S_GEOMETRY, 0, 0)
return ham, info
# ---------------------------------------------------------------------------
# CAS molecule geometries
# ---------------------------------------------------------------------------
# N₂ CAS uses 1.10 Å bond length (standard for CAS benchmarks in Flow-Guided-Krylov),
# distinct from the full-space N₂ which uses 1.0977 Å (NIST equilibrium).
_N2_CAS_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("N", (0.0, 0.0, 0.0)),
("N", (0.0, 0.0, 1.10)),
]
_CR2_GEOMETRY: list[tuple[str, tuple[float, float, float]]] = [
("Cr", (0.0, 0.0, 0.0)),
("Cr", (0.0, 0.0, 1.68)),
]
def _benzene_geometry() -> list[tuple[str, tuple[float, float, float]]]:
"""Regular hexagon benzene geometry (C-C = 1.40 A, C-H = 1.08 A)."""
import math
cc, ch = 1.40, 1.08
geom: list[tuple[str, tuple[float, float, float]]] = []
for i in range(6):
angle = math.pi / 3 * i
geom.append(("C", (cc * math.cos(angle), cc * math.sin(angle), 0.0)))
geom.append(
("H", ((cc + ch) * math.cos(angle), (cc + ch) * math.sin(angle), 0.0))
)
return geom
# ---------------------------------------------------------------------------
# CAS molecule factory functions
# ---------------------------------------------------------------------------
def _make_n2_cas(
nelecas: int, ncas: int, basis: str, device: str = "cpu"
) -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create N₂ CAS Hamiltonian with specified active space.
Parameters
----------
nelecas : int
Number of active electrons.
ncas : int
Number of active orbitals.
basis : str
Gaussian basis set name (e.g. ``"cc-pvdz"``).
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)`` where ``info_dict["is_cas"]`` is ``True``.
Raises
------
ImportError
If PySCF is not installed.
"""
use_casci = ncas >= 15
integrals = compute_molecular_integrals(
_N2_CAS_GEOMETRY, basis=basis, cas=(nelecas, ncas), casci=use_casci
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info(
f"N2-CAS({nelecas},{ncas})", 2 * ncas, basis, _N2_CAS_GEOMETRY, 0, 0
)
info["is_cas"] = True
return ham, info
def _make_cr2(
basis: str = "sto-3g",
cas: tuple[int, int] = (12, 12),
device: str = "cpu",
) -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create Cr₂ CAS Hamiltonian with fix_spin_(ss=0) for singlet.
Without fix_spin_, CASSCF converges to the septet (S=3) instead of the
singlet ground state. Uses ROHF fallback if RHF doesn't converge.
Auto-CASCI for large active spaces (ncas >= 15).
Parameters
----------
basis : str, optional
Gaussian basis set name (default ``"sto-3g"``).
cas : tuple of (int, int), optional
``(nelecas, ncas)`` active-space specification (default ``(12, 12)``).
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)`` where ``info_dict["is_cas"]`` is ``True``.
Raises
------
ImportError
If PySCF is not installed.
"""
import warnings
from math import comb as _comb
try:
from pyscf import ao2mo, fci, gto, mcscf, scf
except ImportError as exc:
raise ImportError("PySCF is required for Cr₂. pip install pyscf") from exc
nelecas, ncas = cas
mol = gto.M(
atom="Cr 0 0 0; Cr 0 0 1.68", basis=basis, spin=0, symmetry=True, verbose=0
)
mf = scf.RHF(mol)
mf.max_cycle = 300
mf.kernel()
if not mf.converged:
warnings.warn("RHF did not converge for Cr₂, trying ROHF.", stacklevel=2)
mf = scf.ROHF(mol)
mf.max_cycle = 300
mf.kernel()
if not mf.converged:
warnings.warn(
"ROHF also did not converge for Cr₂. Results may be unreliable.",
stacklevel=2,
)
# Estimate config count for auto-CASCI
n_alpha_cas = nelecas // 2
n_beta_cas = nelecas // 2
n_configs = _comb(ncas, n_alpha_cas) * _comb(ncas, n_beta_cas)
use_casci = ncas >= 15 or n_configs > _FCI_CONFIG_LIMIT
if use_casci:
mc = mcscf.CASCI(mf, ncas=ncas, nelecas=nelecas)
mc.fix_spin_(ss=0) # Enforce singlet (needed for CASCI too)
else:
mc = mcscf.CASSCF(mf, ncas=ncas, nelecas=nelecas)
mc.fix_spin_(ss=0) # Enforce singlet
# Linear molecules need non-symmetry FCI solver
if mol.symmetry and mol.topgroup in ("Dooh", "Coov"):
mc.fcisolver = fci.direct_spin1.FCISolver(mol)
if use_casci and n_configs > _FCI_CONFIG_LIMIT:
logger.info(
"Skipping FCI for Cr₂ CAS(%d,%d): %s configs.",
nelecas,
ncas,
f"{n_configs:,}",
)
else:
mc.kernel()
if not use_casci and not mc.converged:
warnings.warn(
f"CASSCF did not converge for Cr₂ CAS({nelecas},{ncas}).", stacklevel=2
)
h1e_cas, e_core = mc.h1e_for_cas()
active_mo = mc.mo_coeff[:, mc.ncore : mc.ncore + mc.ncas]
h2e_cas = ao2mo.full(mol, active_mo)
h2e_cas = ao2mo.restore(1, h2e_cas, ncas)
import numpy as np
from qvartools.hamiltonians.integrals import MolecularIntegrals
integrals = MolecularIntegrals(
h1e=np.asarray(h1e_cas, dtype=np.float64),
h2e=np.asarray(h2e_cas, dtype=np.float64),
nuclear_repulsion=float(e_core),
n_electrons=nelecas,
n_orbitals=ncas,
n_alpha=n_alpha_cas,
n_beta=n_beta_cas,
)
ham = MolecularHamiltonian(integrals, device=device)
name = (
"Cr2" if cas == (12, 12) and basis == "sto-3g" else f"Cr2-CAS({nelecas},{ncas})"
)
info = _build_info(name, 2 * ncas, basis, list(_CR2_GEOMETRY), 0, 0)
info["is_cas"] = True
return ham, info
def _make_benzene(device: str = "cpu") -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create Benzene CAS(6,15) Hamiltonian.
Uses CASCI (no orbital optimisation) because ncas >= 15.
Parameters
----------
device : str, optional
Torch device (default ``"cpu"``).
Returns
-------
tuple
``(hamiltonian, info_dict)`` where ``info_dict["is_cas"]`` is ``True``.
"""
geom = _benzene_geometry()
integrals = compute_molecular_integrals(
geom, basis="sto-3g", cas=(6, 15), casci=True
)
ham = MolecularHamiltonian(integrals, device=device)
info = _build_info("Benzene", 30, "sto-3g", geom, 0, 0)
info["is_cas"] = True
return ham, info
# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------
def _build_info(
name: str,
n_qubits: int,
basis: str,
geometry: list[tuple[str, tuple[float, float, float]]],
charge: int,
spin: int,
) -> dict[str, Any]:
"""Build a standardised molecule info dictionary.
Parameters
----------
name : str
Molecule name.
n_qubits : int
Number of qubits (spin-orbitals).
basis : str
Gaussian basis set.
geometry : list
Atomic geometry.
charge : int
Net molecular charge.
spin : int
Spin multiplicity minus one (2S).
Returns
-------
dict
Info dictionary with keys ``name``, ``n_qubits``, ``basis``,
``geometry``, ``charge``, ``spin``.
"""
return {
"name": name,
"n_qubits": n_qubits,
"basis": basis,
"geometry": geometry,
"charge": charge,
"spin": spin,
}
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
MOLECULE_REGISTRY: dict[str, dict[str, Any]] = {
"h2": {
"factory": _make_h2,
"n_qubits": 4,
"description": "Hydrogen molecule (minimal basis)",
"basis": "sto-3g",
},
"lih": {
"factory": _make_lih,
"n_qubits": 12,
"description": "Lithium hydride",
"basis": "sto-3g",
},
"beh2": {
"factory": _make_beh2,
"n_qubits": 14,
"description": "Beryllium dihydride",
"basis": "sto-3g",
},
"h2o": {
"factory": _make_h2o,
"n_qubits": 14,
"description": "Water molecule",
"basis": "sto-3g",
},
"nh3": {
"factory": _make_nh3,
"n_qubits": 16,
"description": "Ammonia",
"basis": "sto-3g",
},
"n2": {
"factory": _make_n2,
"n_qubits": 20,
"description": "Nitrogen molecule (cc-pVDZ basis)",
"basis": "cc-pvdz",
},
"ch4": {
"factory": _make_ch4,
"n_qubits": 18,
"description": "Methane",
"basis": "sto-3g",
},
"c2h4": {
"factory": _make_c2h4,
"n_qubits": 28,
"description": "Ethylene (minimal basis)",
"basis": "sto-3g",
},
"co": {
"factory": _make_co,
"n_qubits": 20,
"description": "Carbon monoxide (STO-3G)",
"basis": "sto-3g",
},
"hcn": {
"factory": _make_hcn,
"n_qubits": 22,
"description": "Hydrogen cyanide (STO-3G)",
"basis": "sto-3g",
},
"c2h2": {
"factory": _make_c2h2,
"n_qubits": 24,
"description": "Acetylene (STO-3G)",
"basis": "sto-3g",
},
"h2s": {
"factory": _make_h2s,
"n_qubits": 26,
"description": "Hydrogen sulfide (STO-3G)",
"basis": "sto-3g",
},
# --- CAS active-space systems (24-58 qubits) ---
"n2-cas(10,12)": {
"factory": lambda device="cpu": _make_n2_cas(10, 12, "cc-pvdz", device),
"n_qubits": 24,
"description": "Nitrogen CAS(10,12) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"cr2": {
"factory": lambda device="cpu": _make_cr2("sto-3g", (12, 12), device),
"n_qubits": 24,
"description": "Chromium dimer CAS(12,12) STO-3G",
"basis": "sto-3g",
"is_cas": True,
},
"n2-cas(10,15)": {
"factory": lambda device="cpu": _make_n2_cas(10, 15, "cc-pvdz", device),
"n_qubits": 30,
"description": "Nitrogen CAS(10,15) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"benzene": {
"factory": _make_benzene,
"n_qubits": 30,
"description": "Benzene CAS(6,15) STO-3G",
"basis": "sto-3g",
"is_cas": True,
},
"n2-cas(10,17)": {
"factory": lambda device="cpu": _make_n2_cas(10, 17, "cc-pvdz", device),
"n_qubits": 34,
"description": "Nitrogen CAS(10,17) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"cr2-cas(12,18)": {
"factory": lambda device="cpu": _make_cr2("cc-pvdz", (12, 18), device),
"n_qubits": 36,
"description": "Chromium dimer CAS(12,18) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"n2-cas(10,20)": {
"factory": lambda device="cpu": _make_n2_cas(10, 20, "cc-pvtz", device),
"n_qubits": 40,
"description": "Nitrogen CAS(10,20) cc-pVTZ",
"basis": "cc-pvtz",
"is_cas": True,
},
"cr2-cas(12,20)": {
"factory": lambda device="cpu": _make_cr2("cc-pvdz", (12, 20), device),
"n_qubits": 40,
"description": "Chromium dimer CAS(12,20) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"n2-cas(10,26)": {
"factory": lambda device="cpu": _make_n2_cas(10, 26, "cc-pvtz", device),
"n_qubits": 52,
"description": "Nitrogen CAS(10,26) cc-pVTZ",
"basis": "cc-pvtz",
"is_cas": True,
},
"cr2-cas(12,26)": {
"factory": lambda device="cpu": _make_cr2("cc-pvdz", (12, 26), device),
"n_qubits": 52,
"description": "Chromium dimer CAS(12,26) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"cr2-cas(12,28)": {
"factory": lambda device="cpu": _make_cr2("cc-pvdz", (12, 28), device),
"n_qubits": 56,
"description": "Chromium dimer CAS(12,28) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
"cr2-cas(12,29)": {
"factory": lambda device="cpu": _make_cr2("cc-pvdz", (12, 29), device),
"n_qubits": 58,
"description": "Chromium dimer CAS(12,29) cc-pVDZ",
"basis": "cc-pvdz",
"is_cas": True,
},
}
_MOLECULE_INFO_REGISTRY: dict[str, dict[str, Any]] = {
"h2": _build_info("H2", 4, "sto-3g", _H2_GEOMETRY, 0, 0),
"lih": _build_info("LiH", 12, "sto-3g", _LIH_GEOMETRY, 0, 0),
"beh2": _build_info("BeH2", 14, "sto-3g", _BEH2_GEOMETRY, 0, 0),
"h2o": _build_info("H2O", 14, "sto-3g", _H2O_GEOMETRY, 0, 0),
"nh3": _build_info("NH3", 16, "sto-3g", _NH3_GEOMETRY, 0, 0),
"n2": _build_info("N2", 20, "cc-pvdz", _N2_GEOMETRY, 0, 0),
"ch4": _build_info("CH4", 18, "sto-3g", _CH4_GEOMETRY, 0, 0),
"c2h4": _build_info("C2H4", 28, "sto-3g", _C2H4_GEOMETRY, 0, 0),
"co": _build_info("CO", 20, "sto-3g", _CO_GEOMETRY, 0, 0),
"hcn": _build_info("HCN", 22, "sto-3g", _HCN_GEOMETRY, 0, 0),
"c2h2": _build_info("C2H2", 24, "sto-3g", _C2H2_GEOMETRY, 0, 0),
"h2s": _build_info("H2S", 26, "sto-3g", _H2S_GEOMETRY, 0, 0),
# --- CAS active-space systems ---
"n2-cas(10,12)": {
**_build_info("N2-CAS(10,12)", 24, "cc-pvdz", list(_N2_CAS_GEOMETRY), 0, 0),
"is_cas": True,
},
"cr2": {
**_build_info("Cr2", 24, "sto-3g", list(_CR2_GEOMETRY), 0, 0),
"is_cas": True,
},
"n2-cas(10,15)": {
**_build_info("N2-CAS(10,15)", 30, "cc-pvdz", list(_N2_CAS_GEOMETRY), 0, 0),
"is_cas": True,
},
"benzene": {
**_build_info("Benzene", 30, "sto-3g", _benzene_geometry(), 0, 0),
"is_cas": True,
},
"n2-cas(10,17)": {
**_build_info("N2-CAS(10,17)", 34, "cc-pvdz", list(_N2_CAS_GEOMETRY), 0, 0),
"is_cas": True,
},
"cr2-cas(12,18)": {
**_build_info("Cr2-CAS(12,18)", 36, "cc-pvdz", list(_CR2_GEOMETRY), 0, 0),
"is_cas": True,
},
"n2-cas(10,20)": {
**_build_info("N2-CAS(10,20)", 40, "cc-pvtz", list(_N2_CAS_GEOMETRY), 0, 0),
"is_cas": True,
},
"cr2-cas(12,20)": {
**_build_info("Cr2-CAS(12,20)", 40, "cc-pvdz", list(_CR2_GEOMETRY), 0, 0),
"is_cas": True,
},
"n2-cas(10,26)": {
**_build_info("N2-CAS(10,26)", 52, "cc-pvtz", list(_N2_CAS_GEOMETRY), 0, 0),
"is_cas": True,
},
"cr2-cas(12,26)": {
**_build_info("Cr2-CAS(12,26)", 52, "cc-pvdz", list(_CR2_GEOMETRY), 0, 0),
"is_cas": True,
},
"cr2-cas(12,28)": {
**_build_info("Cr2-CAS(12,28)", 56, "cc-pvdz", list(_CR2_GEOMETRY), 0, 0),
"is_cas": True,
},
"cr2-cas(12,29)": {
**_build_info("Cr2-CAS(12,29)", 58, "cc-pvdz", list(_CR2_GEOMETRY), 0, 0),
"is_cas": True,
},
}
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def get_molecule(
name: str, device: str = "cpu"
) -> tuple[MolecularHamiltonian, dict[str, Any]]:
"""Create a Hamiltonian and info dict for a named molecule.
Looks up the molecule in :data:`MOLECULE_REGISTRY`, runs the PySCF
integral computation, and constructs a :class:`MolecularHamiltonian`.
Parameters
----------
name : str
Molecule name (case-insensitive). Must be a key in
:data:`MOLECULE_REGISTRY`.
device : str, optional
Torch device for the Hamiltonian (default ``"cpu"``).
Returns
-------
hamiltonian : MolecularHamiltonian
The molecular Hamiltonian ready for diagonalisation.
info : dict
Metadata dictionary with keys ``name``, ``n_qubits``, ``basis``,
``geometry``, ``charge``, ``spin``.
Raises
------
KeyError
If ``name`` is not found in the registry.
Examples
--------
>>> ham, info = get_molecule("H2")
>>> info["n_qubits"]
4
>>> ham.num_sites
4
"""
key = name.lower().strip()
if key not in MOLECULE_REGISTRY:
available = ", ".join(sorted(MOLECULE_REGISTRY.keys()))
raise KeyError(f"Unknown molecule {name!r}. Available: {available}")
entry = MOLECULE_REGISTRY[key]
factory = entry["factory"]
logger.info(
"Creating molecule %r (%d qubits, %s basis)",
key,
entry["n_qubits"],
entry["basis"],
)
return factory(device=device)
[docs]
def get_molecule_info(name: str) -> dict[str, Any]:
"""Return molecule metadata without constructing a Hamiltonian.
Parameters
----------
name : str
Molecule name (case-insensitive). Must be a key in
:data:`MOLECULE_REGISTRY`.
Returns
-------
dict
Metadata dictionary with keys ``name``, ``n_qubits``, ``basis``,
``geometry``, ``charge``, ``spin``.
Raises
------
KeyError
If ``name`` is not found in the registry.
"""
key = name.lower().strip()
if key not in MOLECULE_REGISTRY:
available = ", ".join(sorted(MOLECULE_REGISTRY.keys()))
raise KeyError(f"Unknown molecule {name!r}. Available: {available}")
if key not in _MOLECULE_INFO_REGISTRY:
raise KeyError(
f"Molecule {name!r} is in MOLECULE_REGISTRY but missing from "
f"_MOLECULE_INFO_REGISTRY. Both registries must be kept in sync."
)
# Return a copy to prevent callers from mutating global state
info = dict(_MOLECULE_INFO_REGISTRY[key])
if "geometry" in info:
info["geometry"] = list(info["geometry"])
return info
# Validate registry consistency at import time (survives python -O)
if set(MOLECULE_REGISTRY.keys()) != set(_MOLECULE_INFO_REGISTRY.keys()):
raise RuntimeError(
"MOLECULE_REGISTRY and _MOLECULE_INFO_REGISTRY are out of sync: "
f"missing in info: {set(MOLECULE_REGISTRY) - set(_MOLECULE_INFO_REGISTRY)}, "
f"extra in info: {set(_MOLECULE_INFO_REGISTRY) - set(MOLECULE_REGISTRY)}"
)
[docs]
def list_molecules() -> list[str]:
"""Return a sorted list of available molecule names.
Returns
-------
list of str
Registered molecule names in alphabetical order.
Examples
--------
>>> names = list_molecules()
>>> "h2" in names
True
"""
return sorted(MOLECULE_REGISTRY.keys())