Source code for qvartools.diag.selection.bitstring

"""
Bitstring and basis-set utility functions for SKQD postprocessing.

Provides helpers for converting between bitstring and integer
representations, accumulating measurement results across Krylov steps,
filtering low-probability states, and computing basis-set overlap metrics.
"""

from __future__ import annotations

import numpy as np

__all__ = [
    "bitstring_to_int",
    "int_to_bitstring",
    "get_basis_states_as_array",
    "calculate_cumulative_results",
    "filter_high_probability_states",
    "compute_basis_overlap",
    "estimate_ground_state_sparsity",
    "merge_basis_sets",
]


# ---------------------------------------------------------------------------
# Bitstring <-> integer conversions
# ---------------------------------------------------------------------------



[docs]
def bitstring_to_int(bitstring: str) -> int:
    """Convert a binary bitstring to its integer representation.

    Parameters
    ----------
    bitstring : str
        String of ``'0'`` and ``'1'`` characters (e.g. ``"0110"``).

    Returns
    -------
    int
        Integer value of the bitstring.

    Examples
    --------
    >>> bitstring_to_int("0110")
    6
    """
    return int(bitstring, 2)




[docs]
def int_to_bitstring(value: int, num_bits: int) -> str:
    """Convert an integer to a zero-padded binary bitstring.

    Parameters
    ----------
    value : int
        Non-negative integer to convert.
    num_bits : int
        Total width of the output string (zero-padded on the left).

    Returns
    -------
    str
        Binary string of length ``num_bits``.

    Examples
    --------
    >>> int_to_bitstring(6, 4)
    '0110'
    """
    return format(value, f"0{num_bits}b")



# ---------------------------------------------------------------------------
# Measurement-result helpers
# ---------------------------------------------------------------------------



[docs]
def get_basis_states_as_array(
    measurement_results: dict[str, int],
    num_qubits: int,  # noqa: ARG001 -- kept for API compatibility
) -> np.ndarray:
    """Convert measurement results to an array of unique basis-state integers.

    Parameters
    ----------
    measurement_results : dict of str to int
        Mapping from bitstring (e.g. ``"0110"``) to occurrence count.
    num_qubits : int
        Number of qubits.  Unused; kept for backward compatibility.

    Returns
    -------
    np.ndarray
        Sorted array of unique basis-state integers, dtype ``int64``.

    Examples
    --------
    >>> results = {"01": 5, "10": 3, "01": 2}
    >>> get_basis_states_as_array(results, num_qubits=2)
    array([1, 2])
    """
    states = [bitstring_to_int(bs) for bs in measurement_results]
    return np.array(sorted(set(states)), dtype=np.int64)




[docs]
def calculate_cumulative_results(
    all_measurement_results: list[dict[str, int]],
) -> list[dict[str, int]]:
    """Calculate cumulative measurement results across Krylov steps.

    For step *k*, the cumulative results include all unique bitstrings
    from steps 0, 1, ..., *k* with their total counts.

    Parameters
    ----------
    all_measurement_results : list of dict
        One measurement dictionary per Krylov step, each mapping
        bitstring to count.

    Returns
    -------
    list of dict
        Cumulative measurement dictionaries.  Entry *k* contains the
        union of all bitstrings observed in steps 0 through *k*.

    Examples
    --------
    >>> step0 = {"00": 3, "01": 2}
    >>> step1 = {"01": 1, "10": 4}
    >>> cumulative = calculate_cumulative_results([step0, step1])
    >>> cumulative[1]
    {'00': 3, '01': 3, '10': 4}
    """
    cumulative: list[dict[str, int]] = []
    all_counts: dict[str, int] = {}

    for step_results in all_measurement_results:
        # Merge counts (immutable snapshot per step)
        for bitstring, count in step_results.items():
            all_counts[bitstring] = all_counts.get(bitstring, 0) + count

        # Store snapshot
        cumulative.append(dict(all_counts))

    return cumulative




[docs]
def filter_high_probability_states(
    measurement_results: dict[str, int],
    threshold: float = 0.0,
    max_states: int | None = None,
) -> dict[str, int]:
    """Filter measurement results to keep only high-probability states.

    Parameters
    ----------
    measurement_results : dict of str to int
        Mapping from bitstring to occurrence count.
    threshold : float, optional
        Minimum empirical probability for a state to be retained
        (default ``0.0``, i.e. keep all).
    max_states : int or None, optional
        If not ``None``, keep at most this many states (the highest-count
        states are preferred).

    Returns
    -------
    dict of str to int
        Filtered measurement dictionary.

    Examples
    --------
    >>> counts = {"00": 90, "01": 5, "10": 3, "11": 2}
    >>> filter_high_probability_states(counts, threshold=0.05)
    {'00': 90, '01': 5}
    """
    total_counts = sum(measurement_results.values())
    if total_counts == 0:
        return {}

    # Compute probabilities
    probs = {bs: count / total_counts for bs, count in measurement_results.items()}

    # Filter by threshold
    filtered = {
        bs: count for bs, count in measurement_results.items() if probs[bs] >= threshold
    }

    # Limit number of states
    if max_states is not None and len(filtered) > max_states:
        sorted_states = sorted(filtered.items(), key=lambda x: x[1], reverse=True)
        filtered = dict(sorted_states[:max_states])

    return filtered



# ---------------------------------------------------------------------------
# Basis-set analysis
# ---------------------------------------------------------------------------



[docs]
def compute_basis_overlap(
    basis1: np.ndarray,
    basis2: np.ndarray,
) -> float:
    """Compute the overlap fraction between two basis sets.

    Returns the fraction of states in ``basis1`` that are also present
    in ``basis2``.

    Parameters
    ----------
    basis1 : np.ndarray
        First basis as an array of state integers.
    basis2 : np.ndarray
        Second basis as an array of state integers.

    Returns
    -------
    float
        Overlap fraction in the range ``[0, 1]``.  Returns ``0.0``
        when ``basis1`` is empty.

    Examples
    --------
    >>> b1 = np.array([0, 1, 2, 3])
    >>> b2 = np.array([2, 3, 4, 5])
    >>> compute_basis_overlap(b1, b2)
    0.5
    """
    set1 = set(basis1.tolist())
    set2 = set(basis2.tolist())

    intersection = len(set1 & set2)
    return intersection / len(set1) if len(set1) > 0 else 0.0




[docs]
def estimate_ground_state_sparsity(
    ground_state: np.ndarray,
    threshold: float = 1e-6,
) -> dict[str, float]:
    """Estimate sparsity metrics of a ground-state wavefunction.

    Parameters
    ----------
    ground_state : np.ndarray
        Ground-state wavefunction vector, shape ``(dim,)``.
    threshold : float, optional
        Minimum probability ``|c_i|^2`` for a component to be counted
        as significant (default ``1e-6``).

    Returns
    -------
    dict
        Sparsity metrics with keys:

        - ``"n_significant"`` : int -- number of components above
          ``threshold``.
        - ``"sparsity_ratio"`` : float -- fraction of Hilbert space
          with significant weight.
        - ``"concentration"`` : float -- total probability weight in
          the top 10 %% of components.
        - ``"total_dimension"`` : int -- total Hilbert-space dimension.

    Examples
    --------
    >>> psi = np.array([0.9, 0.1, 0.0, 0.0])
    >>> stats = estimate_ground_state_sparsity(psi, threshold=1e-4)
    >>> stats["n_significant"]
    2
    """
    probs = np.abs(ground_state) ** 2
    prob_sum = probs.sum()
    if prob_sum > 0:
        probs = probs / prob_sum

    n_significant = int(np.sum(probs > threshold))
    sparsity_ratio = n_significant / len(ground_state)

    # Weight in top components
    sorted_probs = np.sort(probs)[::-1]
    n_top = max(1, len(sorted_probs) // 10)
    concentration = float(np.sum(sorted_probs[:n_top]))

    return {
        "n_significant": n_significant,
        "sparsity_ratio": float(sparsity_ratio),
        "concentration": concentration,
        "total_dimension": len(ground_state),
    }




[docs]
def merge_basis_sets(*bases: np.ndarray) -> np.ndarray:
    """Merge multiple basis sets into one sorted, unique set.

    Parameters
    ----------
    *bases : np.ndarray
        Variable number of 1-D arrays of basis-state integers.

    Returns
    -------
    np.ndarray
        Sorted array of unique basis states, dtype ``int64``.

    Examples
    --------
    >>> b1 = np.array([0, 1, 2])
    >>> b2 = np.array([2, 3])
    >>> merge_basis_sets(b1, b2)
    array([0, 1, 2, 3])
    """
    all_states: set[int] = set()
    for basis in bases:
        all_states.update(basis.tolist())

    return np.array(sorted(all_states), dtype=np.int64)