Source code for qvartools.diag.selection.excitation_rank

"""
excitation_rank --- Excitation-rank and Hamming-distance utilities
=================================================================

Provides functions for computing excitation ranks, Hamming distances,
and bit-packing of binary configuration tensors.

Functions
---------
compute_excitation_rank
    Count the number of bit flips between a configuration and a reference.
compute_hamming_distance
    Hamming distance between two binary configuration tensors.
bitpack_configs
    Pack binary configuration tensors into int64 words for fast Hamming.
bitpacked_hamming
    Vectorized Hamming distance via popcount on bit-packed tensors.
"""

from __future__ import annotations

import torch

__all__ = [
    "compute_excitation_rank",
    "compute_hamming_distance",
    "bitpack_configs",
    "bitpacked_hamming",
]



[docs]
def compute_excitation_rank(config: torch.Tensor, reference: torch.Tensor) -> int:
    """Count the number of bit flips from a reference configuration.

    The excitation rank is the Hamming distance between the configuration
    and the reference, which for a fermionic system corresponds to the
    number of single-particle excitations (orbital substitutions).

    Parameters
    ----------
    config : torch.Tensor
        Binary configuration vector, shape ``(n_orbitals,)``.
    reference : torch.Tensor
        Reference configuration (e.g. Hartree-Fock), shape ``(n_orbitals,)``.

    Returns
    -------
    int
        Number of differing bits (excitation rank).

    Raises
    ------
    ValueError
        If ``config`` and ``reference`` have different shapes.

    Examples
    --------
    >>> ref = torch.tensor([1, 1, 0, 0])
    >>> cfg = torch.tensor([1, 0, 1, 0])
    >>> compute_excitation_rank(cfg, ref)
    2
    """
    if config.shape != reference.shape:
        raise ValueError(
            f"Shape mismatch: config {config.shape} vs reference {reference.shape}"
        )
    return int((config != reference).sum().item())




[docs]
def compute_hamming_distance(config1: torch.Tensor, config2: torch.Tensor) -> int:
    """Compute the Hamming distance between two binary configurations.

    Parameters
    ----------
    config1 : torch.Tensor
        First binary configuration, shape ``(n_orbitals,)``.
    config2 : torch.Tensor
        Second binary configuration, shape ``(n_orbitals,)``.

    Returns
    -------
    int
        Number of positions where the two configurations differ.

    Raises
    ------
    ValueError
        If ``config1`` and ``config2`` have different shapes.

    Examples
    --------
    >>> a = torch.tensor([1, 0, 1, 0])
    >>> b = torch.tensor([1, 1, 0, 0])
    >>> compute_hamming_distance(a, b)
    2
    """
    if config1.shape != config2.shape:
        raise ValueError(
            f"Shape mismatch: config1 {config1.shape} vs config2 {config2.shape}"
        )
    return int((config1 != config2).sum().item())




[docs]
def bitpack_configs(configs: torch.Tensor) -> torch.Tensor:
    """Pack binary configurations into int64 words for O(1) Hamming distance.

    Each configuration of ``n_orbitals`` binary values is packed into
    ``ceil(n_orbitals / 63)`` int64 words (63 bits per word to avoid sign
    bit issues).

    Parameters
    ----------
    configs : torch.Tensor
        Binary configuration matrix, shape ``(n_configs, n_orbitals)``
        with entries in ``{0, 1}``.

    Returns
    -------
    torch.Tensor
        Bit-packed tensor, shape ``(n_configs, n_words)``, dtype ``int64``.

    Examples
    --------
    >>> cfgs = torch.tensor([[1, 0, 1, 1], [0, 1, 0, 0]])
    >>> packed = bitpack_configs(cfgs)
    >>> packed.shape
    torch.Size([2, 1])
    """
    n_configs, n_orbitals = configs.shape
    bits_per_word = 63
    n_words = (n_orbitals + bits_per_word - 1) // bits_per_word

    packed = torch.zeros(n_configs, n_words, dtype=torch.int64, device=configs.device)

    for w in range(n_words):
        start = w * bits_per_word
        end = min(start + bits_per_word, n_orbitals)
        chunk = configs[:, start:end]

        shifts = torch.arange(end - start, dtype=torch.int64, device=configs.device)
        word_values = (chunk.to(torch.int64) << shifts.unsqueeze(0)).sum(dim=1)
        packed[:, w] = word_values

    return packed




[docs]
def bitpacked_hamming(
    packed: torch.Tensor, idx_a: torch.Tensor, idx_b: torch.Tensor
) -> torch.Tensor:
    """Compute vectorized Hamming distances via popcount on bit-packed configs.

    Parameters
    ----------
    packed : torch.Tensor
        Bit-packed configurations, shape ``(n_configs, n_words)``, dtype
        ``int64``, as returned by :func:`bitpack_configs`.
    idx_a : torch.Tensor
        Index tensor for the first set of configurations, shape ``(n_pairs,)``.
    idx_b : torch.Tensor
        Index tensor for the second set of configurations, shape ``(n_pairs,)``.

    Returns
    -------
    torch.Tensor
        Hamming distances, shape ``(n_pairs,)``, dtype ``int64``.

    Examples
    --------
    >>> cfgs = torch.tensor([[1, 0, 1, 1], [0, 1, 0, 0], [1, 1, 0, 0]])
    >>> packed = bitpack_configs(cfgs)
    >>> idx_a = torch.tensor([0, 0])
    >>> idx_b = torch.tensor([1, 2])
    >>> bitpacked_hamming(packed, idx_a, idx_b)
    tensor([4, 2])
    """
    xor_vals = packed[idx_a] ^ packed[idx_b]

    # Portable popcount via Kernighan-style bit counting on each word
    distances = torch.zeros(idx_a.shape[0], dtype=torch.int64, device=packed.device)
    for w in range(packed.shape[1]):
        word = xor_vals[:, w]
        # Use bit manipulation popcount: count set bits
        count = torch.zeros_like(word)
        remaining = word.clone()
        while True:
            mask = remaining != 0
            if not mask.any():
                break
            remaining = remaining & (remaining - 1)
            count = count + mask.to(torch.int64)
        distances = distances + count

    return distances