Source code for physical_validation.kinetic_energy

###########################################################################
#                                                                         #
#    physical_validation,                                                 #
#    a python package to test the physical validity of MD results         #
#                                                                         #
#    Written by Pascal T. Merz <pascal.merz@me.com>                       #
#               Michael R. Shirts <michael.shirts@colorado.edu>           #
#                                                                         #
#    Copyright (c) 2017-2021 University of Colorado Boulder               #
#              (c) 2012      The University of Virginia                   #
#                                                                         #
###########################################################################
"""
The `kinetic_energy` module is part of the physical_validation package, and
consists of checks of the kinetic energy distribution and its
equipartition.
"""
from typing import List, Optional, Tuple, Union

import numpy as np

from .data import ObservableData, SimulationData
from .util import kinetic_energy as util_kin


[docs]def distribution(
    data: SimulationData,
    strict: bool = False,
    verbosity: int = 2,
    screen: bool = False,
    filename: Optional[str] = None,
    bs_repetitions: int = 200,
    bootstrap_seed: Optional[int] = None,
    data_is_uncorrelated: bool = False,
) -> Union[float, Tuple[float, float]]:
    r"""Checks the distribution of a kinetic energy trajectory.

    Parameters
    ----------
    data
        Simulation data object
    strict
        If True, check full kinetic energy distribution via K-S test.
        Otherwise, check mean and width of kinetic energy distribution.
        Default: False
    verbosity
        Verbosity level, where 0 is quiet and 3 shows full details. Default: 2.
    screen
        Plot distributions on screen. Default: False.
    filename
        Plot distributions to `filename`. Default: None, no plotting to file.
    bs_repetitions
        Number of bootstrap samples used for error estimate (if strict=False).
        Default: 200.
    bootstrap_seed
        Sets the random number seed for bootstrapping (if strict=False).
        If set, bootstrapping will be reproducible.
        Default: `None`, bootstrapping is non-reproducible.
    data_is_uncorrelated
        Whether the provided data is uncorrelated. If this option
        is set, the equilibration, decorrelation and tail pruning
        of the trajectory is skipped. This can speed up the analysis,
        but note that if the provided data is correlated, the results
        of the physical validation checks might be invalid.

    Returns
    -------
    result
        If `strict=True`: The p value of the test.
        If `strict=False`: Distance of the estimated T(mu) and T(sigma) from
        the expected temperature, measured in standard deviations of the respective
        estimate.

    Notes
    -----
    Non-strict test
        If `strict = False` (the default), this function will estimate the mean and
        the standard deviation of the data. Analytically, a gamma distribution with
        shape :math:`k = N / 2` (with :math:`N` the number of degrees of freedom)
        and scale :math:`\theta = k_B T` (with :math:`T` the target temperature)
        is expected. The mean and the standard deviation of a gamma distribution
        are given by :math:`\mu = k\theta` and :math:`\sigma = \sqrt k \theta`.

        The standard error of the mean and standard deviation are estimated via
        bootstrap resampling. The function prints the analytically expected mean
        and variance as well as the fitted values and their error estimates. It
        also prints T(mu) and T(sigma), which are defined as the temperatures to
        which the estimated mean and standard deviation correspond, given the number
        of degrees of freedom :math:`N` in the system:

        .. math::
            T(\mu') = \frac{2 \mu'}{N k_B}

        .. math::
            T(\sigma') = \frac{\sqrt 2 \sigma'}{\sqrt N k_B}

        The return value is a tuple containing the distance of the estimated T(mu) and
        T(sigma) from the expected temperature, measured in standard deviations of the
        respective estimates.

    Strict test
        If `strict = True`, this function tests the hypothesis that a sample
        of kinetic energies is Maxwell-Boltzmann distributed given a specific
        target temperature and the number of degrees of freedom in the system,

        .. math::
            P(K) \sim K^{N/2-1} e^{-\beta K} \, .

        The test is performed using the Kolmogorov-Smirnov test provided by
        scipy.stats.kstest_. It returns the :math:`p`-value, measuring the
        likelihood that a sample at least as extreme as the one given is
        originating from the expected distribution.

        .. _scipy.stats.kstest: https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.stats.kstest.html

        .. note:: The Kolmogorov-Smirnov test is known to have two weaknesses.

           #. The test is more sensitive towards deviations around the center
              of the distribution than at its tails. We deem this to be acceptable
              for most MD applications, but be wary if yours is sensible to the
              kinetic distribution tails.
           #. The test is not valid if its parameters are guessed from the data
              set. Using the target temperature of the MD simulation as an input
              is therefore perfectly valid, but using the average temperature
              over the trajectory as an input to the test can potentially
              invalidate it.

    """
    data.raise_if_units_are_none(
        test_name="kinetic_energy.distribution",
        argument_name="data",
    )
    data.raise_if_ensemble_is_invalid(
        test_name="kinetic_energy.distribution",
        argument_name="data",
        check_pressure=False,
        check_mu=False,
    )
    data.raise_if_system_data_is_invalid(
        test_name="kinetic_energy.distribution",
        argument_name="data",
        check_full_system_data_only=True,
    )
    data.raise_if_observable_data_is_invalid(
        required_observables=["kinetic_energy"],
        test_name="kinetic_energy.distribution",
        argument_name="data",
    )

    ndof = (
        data.system.natoms * 3
        - data.system.nconstraints
        - data.system.ndof_reduction_tra
        - data.system.ndof_reduction_rot
    )

    if strict:
        return util_kin.check_distribution(
            kin=data.observables.kinetic_energy,
            temp=data.ensemble.temperature,
            ndof=ndof,
            kb=data.units.kb,
            verbosity=verbosity,
            screen=screen,
            filename=filename,
            ene_unit=data.units.energy_str,
            temp_unit=data.units.temperature_str,
            data_is_uncorrelated=data_is_uncorrelated,
        )
    else:
        return util_kin.check_mean_std(
            kin=data.observables.kinetic_energy,
            temp=data.ensemble.temperature,
            ndof=ndof,
            kb=data.units.kb,
            verbosity=verbosity,
            bs_repetitions=bs_repetitions,
            bootstrap_seed=bootstrap_seed,
            screen=screen,
            filename=filename,
            ene_unit=data.units.energy_str,
            temp_unit=data.units.temperature_str,
            data_is_uncorrelated=data_is_uncorrelated,
        )


[docs]def equipartition(
    data: SimulationData,
    strict: bool = False,
    molec_groups: Optional[List[np.ndarray]] = None,
    random_divisions: int = 0,
    random_groups: int = 0,
    random_division_seed: Optional[int] = None,
    verbosity: int = 2,
    screen: bool = False,
    filename: Optional[str] = None,
    bootstrap_seed: Optional[int] = None,
    data_is_uncorrelated: bool = False,
) -> Union[List[float], List[Tuple[float, float]]]:
    r"""Checks the equipartition of a simulation trajectory.

    Parameters
    ----------
    data
        Simulation data object
    strict
        If True, check full kinetic energy distribution via K-S test.
        Otherwise, check mean and width of kinetic energy distribution.
        Default: False
    molec_groups
        List of 1d arrays containing molecule indices defining groups. Useful to pre-define
        groups of molecules (e.g. solute / solvent, liquid mixture species, ...). If None,
        no pre-defined molecule groups will be tested. Default: None.

        *Note:* If an empty 1d array is found as last element in the list, the remaining
        molecules are collected in this array. This allows, for example, to only
        specify the solute, and indicate the solvent by giving an empty array.
    random_divisions
        Number of random division tests attempted. Default: 0 (random division tests off).
    random_groups
        Number of groups the system is randomly divided in. Default: 2.
    random_division_seed
        Seed making the random divisions reproducible.
        Default: None, random divisions not reproducible
    verbosity
        Verbosity level, where 0 is quiet and 3 very chatty. Default: 2.
    screen
        Plot distributions on screen. Default: False.
    filename
        Plot distributions to `filename`. Default: None, no plotting to file
    bootstrap_seed
        Sets the random number seed for bootstrapping (if strict=False).
        If set, bootstrapping will be reproducible.
        Default: None, bootstrapping is non-reproducible.
    data_is_uncorrelated
        Whether the provided data is uncorrelated. If this option
        is set, the equilibration, decorrelation and tail pruning
        of the trajectory is skipped. This can speed up the analysis,
        but note that if the provided data is correlated, the results
        of the physical validation checks might be invalid.

    Returns
    -------
    result
        If `strict=True`: The p value for every tests.
        If `strict=False`: Distance of the estimated T(mu) and T(sigma) from
        the expected temperature, measured in standard deviations of the
        respective estimate, for every test.

    Notes
    -----
    This function compares the kinetic energy between groups of degrees of
    freedom. Theoretically, the kinetic energy is expected (via the
    equipartition theorem) to be equally distributed over all degrees of
    freedom. In practice, deviations of temperature between groups of
    degrees of freedom up to several degrees K are routinely observed.
    Larger deviations can, however, hint to misbehaving simulations, such
    as, e.g., frozen degrees of freedom, lack of energy exchange between
    degrees of freedom, and transfer of heat from faster to slower degrees
    of freedom.

    Splitting of degrees of freedom is done both on a sub-molecular and on
    a molecular level. On a sub-molecular level, the degrees of freedom of
    a molecule can be partitioned into rigid-body contributions
    (translation of the center-of-mass, rotation around the
    center-of-mass) and intra-molecular contributions. On a molecular
    level, the single molecules of the system can be divided in groups,
    either by function (solute / solvent, different species of liquid
    mixtures, ...) or randomly.

    `check_equipartition()` partitions the kinetic energy of the entire
    system and, optionally, of predefined or randomly separated groups. It
    then computes either the mean and the standard deviation of each partition
    and compares them to the theoretically expected value (`strict=True`, the
    default), or it performs a Kolmogorov-Smirnov test of the distribution.
    See physical_validation.kinetic_energy.distribution for more detail on the
    checks.

    """
    if data.observables is None:
        # The equipartition test doesn't need observable input, but uses the
        # data structure to store information. Create it if it doesn't exist.
        data.observables = ObservableData()

    data.raise_if_units_are_none(
        test_name="kinetic_energy.equipartition",
        argument_name="data",
    )
    data.raise_if_ensemble_is_invalid(
        test_name="kinetic_energy.equipartition",
        argument_name="data",
        check_pressure=False,
        check_mu=False,
    )
    data.raise_if_system_data_is_invalid(
        test_name="kinetic_energy.equipartition",
        argument_name="data",
        check_full_system_data_only=False,
    )
    data.raise_if_trajectory_data_is_invalid(
        test_name="kinetic_energy.equipartition",
        argument_name="data",
    )

    (
        result,
        data.system.ndof_per_molecule,
        data.observables.kinetic_energy_per_molecule,
    ) = util_kin.check_equipartition(
        positions=data.trajectory["position"],
        velocities=data.trajectory["velocity"],
        masses=data.system.mass,
        molec_idx=data.system.molecule_idx,
        molec_nbonds=data.system.nconstraints_per_molecule,
        natoms=data.system.natoms,
        nmolecs=len(data.system.molecule_idx),
        temp=data.ensemble.temperature,
        kb=data.units.kb,
        strict=strict,
        ndof_reduction_tra=data.system.ndof_reduction_tra,
        ndof_reduction_rot=data.system.ndof_reduction_rot,
        molec_groups=molec_groups,
        random_divisions=random_divisions,
        random_groups=random_groups,
        random_division_seed=random_division_seed,
        ndof_molec=data.system.ndof_per_molecule,
        kin_molec=data.observables.kinetic_energy_per_molecule,
        verbosity=verbosity,
        screen=screen,
        filename=filename,
        ene_unit=data.units.energy_str,
        temp_unit=data.units.temperature_str,
        bootstrap_seed=bootstrap_seed,
        data_is_uncorrelated=data_is_uncorrelated,
    )

    return result