###########################################################################
# #
# physical_validation, #
# a python package to test the physical validity of MD results #
# #
# Written by Pascal T. Merz <pascal.merz@me.com> #
# Michael R. Shirts <michael.shirts@colorado.edu> #
# #
# Copyright (c) 2017-2021 University of Colorado Boulder #
# (c) 2012 The University of Virginia #
# #
###########################################################################
r"""
gromacs_parser.py
"""
import warnings
from typing import List, Optional, Union
import numpy as np
from ..util import error as pv_error
from ..util.gromacs_interface import GromacsInterface
from . import (
EnsembleData,
ObservableData,
SimulationData,
SystemData,
TrajectoryData,
UnitData,
parser,
)
from .trajectory_data import RectangularBox
[docs]class GromacsParser(parser.Parser):
"""
GromacsParser
"""
[docs] @staticmethod
def units() -> UnitData:
# Gromacs uses kJ/mol
return UnitData(
kb=8.314462435405199e-3,
energy_str="kJ/mol",
energy_conversion=1.0,
length_str="nm",
length_conversion=1.0,
volume_str="nm^3",
volume_conversion=1.0,
temperature_str="K",
temperature_conversion=1.0,
pressure_str="bar",
pressure_conversion=1.0,
time_str="ps",
time_conversion=1.0,
)
def __init__(
self, exe: Optional[str] = None, includepath: Union[str, List[str]] = None
):
r"""
Create a GromacsParser object
Parameters
----------
exe: str, optional
Path to a gmx executable (or simply the executable name, if it is in the path)
Default: Looks for `gmx`, then for `gmx_d` in the path. If neither is found, `exe` is
set to None, and any parsing including simulation trajectories (`edr`, `trr`
and `gro` arguments in `get_simulation_data()`) will fail.
includepath: str or List[str], optional
Path or list of paths to location(s) of topology file. Is used for the lookup of
`#include` statements in topologies.
Default: None - no additional topology location. Lookup will be restricted to current
directory and location of the `top` file given to `get_simulation_data()`,
plus any include locations added to the `mdp` file.
"""
super(GromacsParser, self).__init__()
self.__interface = GromacsInterface(exe=exe, includepath=includepath)
# gmx energy codes
self.__gmx_energy_names = {
"kinetic_energy": "Kinetic-En.",
"potential_energy": "Potential",
"total_energy": "Total-Energy",
"volume": "Volume",
"pressure": "Pressure",
"temperature": "Temperature",
"constant_of_motion": "Conserved-En.",
}
[docs] def get_simulation_data(
self,
mdp: Optional[str] = None,
top: Optional[str] = None,
edr: Optional[str] = None,
trr: Optional[str] = None,
gro: Optional[str] = None,
) -> SimulationData:
r"""
Parameters
----------
mdp: str, optional
A string pointing to a .mdp file
top: str, optional
A string pointing to a .top file
edr: str, optional
A string pointing to a .edr file
trr: str, optional
A string pointing to a .trr file
gro: str, optional
A string pointing to a .gro file (Note: if also trr is given, gro is ignored)
Returns
-------
result: SimulationData
A SimulationData filled with the results of the simulation as described by
the provided GROMACS files.
"""
result = SimulationData()
result.units = self.units()
# trajectories (might be used later for the box...)
trajectory_dict = None
if trr is not None:
if gro is not None:
warnings.warn("`trr` and `gro` given. Ignoring `gro`.")
trajectory_dict = self.__interface.read_trr(trr)
# check box shape
if trajectory_dict["box"].ndim == 2:
if (
trajectory_dict["box"] - np.diag(np.diag(trajectory_dict["box"]))
).any():
raise NotImplementedError("Triclinic boxes not implemented.")
else:
box = RectangularBox(np.diag(trajectory_dict["box"]))
elif trajectory_dict["box"].ndim == 3:
if np.array(
[b - np.diag(np.diag(b)) for b in trajectory_dict["box"]]
).any():
raise NotImplementedError("Triclinic boxes not implemented.")
else:
box = RectangularBox(
np.array([np.diag(b) for b in trajectory_dict["box"]])
)
else:
raise RuntimeError("Unknown box shape.")
trajectory_dict["box"] = box
elif gro is not None:
trajectory_dict = self.__interface.read_gro(gro)
# check box shape
if trajectory_dict["box"].ndim == 1:
if (
trajectory_dict["box"].size > 3
and (trajectory_dict["box"][3:]).any()
):
raise NotImplementedError("Triclinic boxes not implemented.")
else:
box = RectangularBox(trajectory_dict["box"][:3])
elif trajectory_dict["box"].ndim == 2:
if (
trajectory_dict["box"].shape[1] > 3
and (trajectory_dict["box"][:, 3:]).any()
):
raise NotImplementedError("Triclinic boxes not implemented.")
else:
box = RectangularBox(trajectory_dict["box"][:, :3])
else:
raise RuntimeError("Unknown box shape.")
trajectory_dict["box"] = box
# simulation parameters & system
if mdp is not None and top is not None:
mdp_options = self.__interface.read_mdp(mdp)
define = None
include = None
if "define" in mdp_options:
define = mdp_options["define"]
if "include" in mdp_options:
include = mdp_options["include"]
molecules = self.__interface.read_system_from_top(
top, define=define, include=include
)
if "dt" in mdp_options:
result.dt = float(mdp_options["dt"])
natoms = 0
mass = []
constraints_per_molec = []
angles = (
"constraints" in mdp_options
and mdp_options["constraints"] == "all-angles"
)
angles_h = (
angles
or "constraints" in mdp_options
and mdp_options["constraints"] == "h-angles"
)
bonds = (
angles_h
or "constraints" in mdp_options
and mdp_options["constraints"] == "all-bonds"
)
bonds_h = (
bonds
or "constraints" in mdp_options
and mdp_options["constraints"] == "h-bonds"
)
molecule_idx = []
next_molec = 0
molec_bonds = []
molec_bonds_constrained = []
for molecule in molecules:
natoms += molecule["nmolecs"] * molecule["natoms"]
for n in range(0, molecule["nmolecs"]):
molecule_idx.append(next_molec)
next_molec += molecule["natoms"]
mass.extend(molecule["mass"] * molecule["nmolecs"])
constraints = 0
constrained_bonds = []
all_bonds = molecule["bonds"] + molecule["bondsh"]
if molecule["settles"]:
constraints = 3
constrained_bonds = all_bonds
else:
if bonds:
constraints += molecule["nbonds"][0]
constrained_bonds.extend(molecule["bonds"])
if bonds_h:
constraints += molecule["nbonds"][1]
constrained_bonds.extend(molecule["bondsh"])
if angles:
constraints += molecule["nangles"][0]
if angles_h:
constraints += molecule["nangles"][1]
constraints_per_molec.extend([constraints] * molecule["nmolecs"])
molec_bonds.extend([all_bonds] * molecule["nmolecs"])
molec_bonds_constrained.extend(
[constrained_bonds] * molecule["nmolecs"]
)
system = SystemData()
system.natoms = natoms
system.mass = mass
system.molecule_idx = molecule_idx
system.nconstraints = np.sum(constraints_per_molec)
system.nconstraints_per_molecule = constraints_per_molec
system.ndof_reduction_tra = 3
system.ndof_reduction_rot = 0
if "comm-mode" in mdp_options:
if mdp_options["comm-mode"] == "linear":
system.ndof_reduction_tra = 3
elif mdp_options["comm-mode"] == "angular":
system.ndof_reduction_tra = 3
system.ndof_reduction_rot = 3
if mdp_options["comm-mode"] == "none":
system.ndof_reduction_tra = 0
system.bonds = molec_bonds
system.constrained_bonds = molec_bonds_constrained
result.system = system
if trajectory_dict is not None:
# now that we know the bonds, we can gather & save the trajectory
trajectory_dict["position"] = trajectory_dict["box"].gather(
trajectory_dict["position"], molec_bonds, molecule_idx
)
result.trajectory = TrajectoryData(
trajectory_dict["position"], trajectory_dict["velocity"]
)
thermostat = (
"tcoupl" in mdp_options
and mdp_options["tcoupl"]
and mdp_options["tcoupl"] != "no"
)
stochastic_dyn = "integrator" in mdp_options and mdp_options[
"integrator"
] in ["sd", "sd2", "bd"]
constant_temp = thermostat or stochastic_dyn
temperature = None
if constant_temp:
ref_t = [float(t) for t in mdp_options["ref-t"].split()]
if len(ref_t) == 1 or np.allclose(ref_t, [ref_t[0]] * len(ref_t)):
temperature = ref_t[0]
else:
raise pv_error.InputError(
"mdp",
"Ensemble definition ambiguous: Different t-ref values found.",
)
constant_press = (
"pcoupl" in mdp_options
and mdp_options["pcoupl"]
and mdp_options["pcoupl"] != "no"
)
volume = None
pressure = None
if constant_press:
ref_p = [float(p) for p in mdp_options["ref-p"].split()]
if len(ref_p) == 1 or np.allclose(ref_p, [ref_p[0]] * len(ref_p)):
pressure = ref_p[0]
else:
raise pv_error.InputError(
"mdp",
"Ensemble definition ambiguous: Different p-ref values found.",
)
else:
if trajectory_dict is not None:
box = trajectory_dict["box"].box[0]
# Different box shapes?
if box.ndim == 1:
volume = box[0] * box[1] * box[2]
elif box.ndim == 2:
volume = box[0, 0] * box[1, 1] * box[2, 2]
if constant_temp and constant_press:
ens = "NPT"
elif constant_temp:
ens = "NVT"
else:
ens = "NVE"
if ens == "NVE":
self.__gmx_energy_names["constant_of_motion"] = "Total-Energy"
else:
self.__gmx_energy_names["constant_of_motion"] = "Conserved-En."
result.ensemble = EnsembleData(
ens,
natoms=natoms,
volume=volume,
pressure=pressure,
temperature=temperature,
)
elif trajectory_dict is not None:
# we don't know the system, so we can't gather, but save it anyway
result.trajectory = TrajectoryData(
trajectory_dict["position"], trajectory_dict["velocity"]
)
if edr is not None:
observable_dict = self.__interface.get_quantities(
edr, list(self.__gmx_energy_names.values()), args=["-dp"]
)
# constant volume simulations don't write out the volume in .edr file
if (
observable_dict["Volume"] is None
and result.ensemble is not None
and result.ensemble.volume is not None
):
nframes = observable_dict["Pressure"].size
observable_dict["Volume"] = np.ones(nframes) * result.ensemble.volume
result.observables = ObservableData()
for key, gmxkey in self.__gmx_energy_names.items():
result.observables[key] = observable_dict[gmxkey]
return result