Source code for strawberrypy.postprocessing.correlations

import numpy as np
import glob
import os

from ..classes import Model
from ..finite_model import FiniteModel
from ..supercell import Supercell

from .pair_distance import get_pbc_distance_pairs, get_obc_distance_pairs
from .contractions import pbc_lattice_contraction, lattice_contraction
from .averages import average_over_radius
from .marker_io import load_marker


def _all_distances(sup_model: Model) -> tuple[np.ndarray, np.ndarray]:
    r"""Compute unique distances between a unit cell and all other cells in the supercell.

    The function returns a tuple containing a 1D array of unique distances and
    a 2D array of distances between all pairs of unit cells in the supercell.

    Parameters
    ----------
        sup_model : Model
            The supercell model for which to compute the distances.

    Returns
    -------
        unique_dist : np.ndarray
            A 1D array of unique distances between a unit cell and all the other cells in
            the supercell.
        distances : np.ndarray
            A 2D array of distances between all pairs of unit cells in the supercell.
    """
    if isinstance(sup_model, Supercell):
        distance_pairs = get_pbc_distance_pairs(sup_model)
    elif isinstance(sup_model, FiniteModel):
        distance_pairs = get_obc_distance_pairs(sup_model)

    # Indices are in terms of the orbitals of only one type. This is equivalent to taking
    #   the distances between the unit cell and all the other cells in the supercell
    distances = distance_pairs[:: sup_model.states_uc, :: sup_model.states_uc]
    unique_dist = np.sort(np.unique(np.round(distances[0, :], 7)))

    return unique_dist, np.round(distances, 7)


def _ivic_cell(
    sup_model: Model, unique_dist: np.ndarray, distances: np.ndarray
) -> np.ndarray:
    r"""Returns a 2D array where each row contains the indices of the two
    unit cells and the index of their distance in the `unique_dist` array.
    Expressed in terms of the orbitals of all types.

    Parameters
    ----------
        sup_model : Model
            The supercell model for which to compute the indices.
        unique_dist : np.ndarray
            1D array of unique distances between a unit cell and all other cells.
        distances : np.ndarray
            2D array of distances between all pairs of unit cells.

    Returns
    -------
        np.ndarray
            A 2D array of shape ``(N, 3)`` where N is the number of unit cell pairs.
            Each row contains ``(unit_cell_index_1, unit_cell_index_2, distance_index)``.
    """
    # Replace each element in the 2D array (distances) with the index in the 1D array
    #   (unique_dist) whose index value corresponds to the element value
    dist_indices = np.searchsorted(unique_dist, distances)

    # Express indices in terms of orbitals of only one type. This is equivalent to taking
    #   the indices of the unit cell pairs in the supercell
    row_idx, col_idx = np.indices(dist_indices.shape) * sup_model.states_uc
    ivic = np.column_stack((row_idx.ravel(), col_idx.ravel(), dist_indices.ravel()))

    return ivic


[docs] def correlation( sup_model: Model, directory: str | None = None, filenames: list[str] | str | None = None, is_bare_marker: bool = False, cutoff: float = 0.5, spinful: bool = False, to_normalize: bool = True, ) -> tuple[np.ndarray, np.ndarray]: r"""Calculate the spatial correlation function of local markers over disorder realizations. This function computes the mean correlation over multiple disorder realizations (seeds). It averages the markers over a specified cutoff radius and calculates the normalized or unnormalized correlation function versus distance. Parameters ---------- sup_model : Model The supercell model on which the markers were computed. directory : str Directory containing the marker data files. filenames : list of str or str Base filenames (prefix, without extension) of the data files. is_bare_marker : bool, optional Whether the marker data files contain the bare marker values (:python:`True`) or if they contain the marker values already averaged over the cutoff radius (:python:`False`). Default is :python:`False`. cutoff : float, optional Radius to consider for the local averaging (contraction) around each orbital if :python:`is_bare_marker == True`. Default is :python:`0.5`. spinful : bool, optional Whether the model and markers are spinful. If :python:`True`, computes the marker properly from spin-down and spin-up data columns. Default is :python:`False`. to_normalize : bool, optional Whether to normalize the correlation function by the variance. Default is :python:`True`. Returns ------- unique_dist : np.ndarray 1D array of unique distances. corr_r : np.ndarray 1D array containing the mean correlation value for each corresponding distance. """ if directory is None and filenames is None: raise ValueError("Either directory or filename must be provided.") elif directory is None and filenames is not None: # Process all the given files assuming their path is provided here file_list = filenames if isinstance(filenames, list) else [filenames] elif directory is not None and filenames is not None: # If filename is a string, we assume it's a common pattern to match files in the # directory, else the list of files is provided directly if isinstance(filenames, str): file_list = glob.glob(f"{directory}/{filenames}*.npz") else: file_list = [f"{directory}/{fname}" for fname in filenames] elif directory is not None and filenames is None: # Process all files in the directory file_list = [f for f in os.listdir(directory) if f.endswith(".npz")] # Assert all the files have the right extension to be loaded file_list = [f if f.endswith(".npz") else f + ".npz" for f in file_list] if not file_list: raise ValueError("No matching files found to process.") Lx = sup_model.Lx Ly = sup_model.Ly # Same type of orbitals (i.e. unit cells in the supercell) unique_dist, distances = _all_distances(sup_model) ivic = _ivic_cell(sup_model, unique_dist, distances) # Contraction is a list of lists, each sublist contains the indices of the orbitals # that are within the cutoff radius from the orbital of interest (which is the # index of the sublist in the list) # Ex. contraction = [[0, 1, 2], [1, 0, 3], ...] means that the orbitals 0, 1, and 2 # are the closest to orbital 0; orbitals 1, 0, and 3 are the closest to orbital 1, # etc. within the cutoff radius # All type of orbitals if isinstance(sup_model, Supercell): contraction = pbc_lattice_contraction(sup_model, cutoff=cutoff) elif isinstance(sup_model, FiniteModel): contraction = lattice_contraction(sup_model, cutoff=cutoff) corr_r = np.zeros(len(unique_dist)) n_dist = np.array([np.sum(np.isclose(distances[0, :], val)) for val in unique_dist]) if not spinful: for ctr, file in enumerate(file_list, start=1): print(f"Processing file: {file}", flush=True) c_i = load_marker("", file)[1] if is_bare_marker: marker_use = average_over_radius( sup_model, c_i, cutoff=cutoff, contraction=contraction ) else: marker_use = c_i # All type of orbitals marker0 = np.empty_like(marker_use) # Take the mean of the marker over the orbitals within unit cell in the # supercell and replace the values with the mean. for t in range(0, len(marker_use), sup_model.states_uc): mean_t = np.mean(marker_use[t : t + sup_model.states_uc]) marker0[t : t + sup_model.states_uc] = mean_t c_mean = np.mean(marker0[0 :: sup_model.states_uc]) c2_mean = np.mean(marker0[0 :: sup_model.states_uc] ** 2) if to_normalize: var_c = c2_mean - c_mean**2 else: var_c = 1.0 corr_r += np.bincount( ivic[:, 2], weights=(marker0[ivic[:, 0]] * marker0[ivic[:, 1]] - c_mean**2) / var_c, minlength=len(corr_r), ) elif spinful: for ctr, file in enumerate(file_list, start=1): print(f"Processing file: {file}", flush=True) c_i_p = load_marker("", file)[1][0] c_i_m = load_marker("", file)[1][1] if is_bare_marker: marker_use_p = average_over_radius( sup_model, c_i_p, cutoff=cutoff, contraction=contraction ) marker_use_m = average_over_radius( sup_model, c_i_m, cutoff=cutoff, contraction=contraction ) marker_use = np.mod(0.5 * (marker_use_p - marker_use_m), 2) marker_use = 1 - np.abs(1 - marker_use) else: marker_use = np.mod(0.5 * (c_i_p - c_i_m), 2) marker_use = 1 - np.abs(1 - marker_use) # All type of orbitals marker0 = np.empty_like(marker_use) # Take the mean of the marker over the orbitals within unit cell in the # supercell and replace the values with the mean. for t in range(0, len(marker_use), sup_model.states_uc): mean_t = np.mean(marker_use[t : t + sup_model.states_uc]) marker0[t : t + sup_model.states_uc] = mean_t c_mean = np.mean(marker0[0 :: sup_model.states_uc]) c2_mean = np.mean(marker0[0 :: sup_model.states_uc] ** 2) if to_normalize: var_c = c2_mean - c_mean**2 else: var_c = 1.0 corr_r += np.bincount( ivic[:, 2], weights=(marker0[ivic[:, 0]] * marker0[ivic[:, 1]] - c_mean**2) / var_c, minlength=len(corr_r), ) corr_r /= n_dist corr_r /= ctr corr_r /= Lx * Ly return unique_dist, corr_r