Source code for MDAnalysis.coordinates.LAMMPS

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#


"""LAMMPS DCD trajectory and DATA I/O  --- :mod:`MDAnalysis.coordinates.LAMMPS`
===============================================================================

Classes to read and write LAMMPS_ DCD binary trajectories, LAMMPS DATA files
and LAMMPS dump files.  Trajectories can be read regardless of system-endianness
as this is auto-detected.

LAMMPS can `write DCD`_ trajectories but unlike a `CHARMM trajectory`_
(which is often called a DCD even though CHARMM itself calls them
"trj") the time unit is not fixed to be the AKMA_ time unit (20 AKMA
is 0.978 picoseconds or 1 AKMA = 4.888821e-14 s) but can depend on
settings in LAMMPS. The most common case for biomolecular simulations
appears to be that the time step is recorded in femtoseconds (command
`units real`_ in the input file) and lengths in ångströms. Other cases
are unit-less Lennard-Jones time units.

This presents a problem for MDAnalysis because it cannot autodetect
the unit from the file. By default we are assuming that the unit for
length is the ångström and for the time is the femtosecond. If this is
not true then the user *should supply the appropriate units* in the
keywords *timeunit* and/or *lengthunit* to :class:`DCDWriter` and
:class:`~MDAnalysis.core.universe.Universe` (which then calls
:class:`DCDReader`).

Data file formats
-----------------

By default either the `atomic` or `full` atom styles are expected,
however this can be customised, see :ref:`atom_style_kwarg`.

Dump files
----------

The DumpReader expects ascii dump files written with the default
`LAMMPS dump format`_ of 'atom'


Example: Loading a LAMMPS simulation
------------------------------------

To load a LAMMPS simulation from a LAMMPS data file (using the
:class:`~MDAnalysis.topology.LAMMPSParser.DATAParser`) together with a
LAMMPS DCD with "*real*" provide the keyword *format="LAMMPS*"::

    >>> u = MDAnalysis.Universe("lammps.data", "lammps_real.dcd", format="LAMMPS")

If the trajectory uses *units nano* then use ::

    >>> u = MDAnalysis.Universe("lammps.data", "lammps_nano.dcd", format="LAMMPS",
    ...                          lengthunit="nm", timeunit="ns")

To scan through a trajectory to find a desirable frame and write to a LAMMPS
data file,

>>> for ts in u.trajectory:
...     # analyze frame
...     if take_this_frame == True:
...         with mda.Writer('frame.data') as W:
...             W.write(u.atoms)
...         break

Note
----
Lennard-Jones units are not implemented. See :mod:`MDAnalysis.units`
for other recognized values and the documentation for the LAMMPS
`units command`_.

See Also
--------

   For further discussion follow the reports for `Issue 84`_ and `Issue 64`_.

.. _LAMMPS: http://lammps.sandia.gov/
.. _write DCD: http://lammps.sandia.gov/doc/dump.html
.. _CHARMM trajectory: http://www.charmm.org/documentation/c36b1/dynamc.html#%20Trajectory
.. _AKMA: http://www.charmm.org/documentation/c36b1/usage.html#%20AKMA
.. _units real: http://lammps.sandia.gov/doc/units.html
.. _units command: http://lammps.sandia.gov/doc/units.html
.. _`Issue 64`: https://github.com/MDAnalysis/mdanalysis/issues/64
.. _`Issue 84`: https://github.com/MDAnalysis/mdanalysis/issues/84
.. _`LAMMPS dump format`: http://lammps.sandia.gov/doc/dump.html

Classes
-------

.. autoclass:: DCDReader
   :members:
   :inherited-members:
.. autoclass:: DCDWriter
   :members:
   :inherited-members:
.. autoclass:: DATAReader
   :members:
   :inherited-members:
.. autoclass:: DATAWriter
   :members:
   :inherited-members:
.. autoclass:: DumpReader
   :members:
   :inherited-members:

"""
import os
import numpy as np

from ..core.groups import requires
from ..lib import util, mdamath, distances
from ..lib.util import cached, store_init_arguments
from . import DCD
from .. import units
from ..topology.LAMMPSParser import DATAParser
from ..exceptions import NoDataError
from . import base

btype_sections = {'bond':'Bonds', 'angle':'Angles',
                  'dihedral':'Dihedrals', 'improper':'Impropers'}

[docs]class DCDWriter(DCD.DCDWriter):
    """Write a LAMMPS_ DCD trajectory.

    The units can be set from the constructor with the keyword
    arguments *timeunit* and *lengthunit*. The defaults are "fs" and
    "Angstrom". See :mod:`MDAnalysis.units` for other recognized
    values.
    """
    format = 'LAMMPS'
    multiframe = True
    flavor = 'LAMMPS'

    def __init__(self, *args, **kwargs):
        self.units = {'time': 'fs', 'length': 'Angstrom'}  # must be instance level
        self.units['time'] = kwargs.pop('timeunit', self.units['time'])
        self.units['length'] = kwargs.pop('lengthunit', self.units['length'])
        for unit_type, unit in self.units.items():
            try:
                if units.unit_types[unit] != unit_type:
                    raise TypeError("LAMMPS DCDWriter: wrong unit {0!r} for unit type {1!r}".format(unit, unit_type))
            except KeyError:
                errmsg = f"LAMMPS DCDWriter: unknown unit {unit}"
                raise ValueError(errmsg) from None
        super(DCDWriter, self).__init__(*args, **kwargs)


[docs]class DCDReader(DCD.DCDReader):
    """Read a LAMMPS_ DCD trajectory.

    The units can be set from the constructor with the keyword
    arguments *timeunit* and *lengthunit*. The defaults are "fs" and
    "Angstrom", corresponding to LAMMPS `units style`_ "**real**". See
    :mod:`MDAnalysis.units` for other recognized values.

    .. _units style: http://lammps.sandia.gov/doc/units.html
    """
    format = 'LAMMPS'
    flavor = 'LAMMPS'

    @store_init_arguments
    def __init__(self, dcdfilename, **kwargs):
        self.units = {'time': 'fs', 'length': 'Angstrom'}  # must be instance level
        self.units['time'] = kwargs.pop('timeunit', self.units['time'])
        self.units['length'] = kwargs.pop('lengthunit', self.units['length'])
        for unit_type, unit in self.units.items():
            try:
                if units.unit_types[unit] != unit_type:
                    raise TypeError("LAMMPS DCDReader: wrong unit {0!r} for unit type {1!r}".format(unit, unit_type))
            except KeyError:
                raise ValueError("LAMMPS DCDReader: unknown unit {0!r}".format(unit))
        super(DCDReader, self).__init__(dcdfilename, **kwargs)


[docs]class DATAReader(base.SingleFrameReaderBase):
    """Reads a single frame of coordinate information from a LAMMPS DATA file.

    .. versionadded:: 0.9.0
    .. versionchanged:: 0.11.0
       Frames now 0-based instead of 1-based
    """
    format = 'DATA'
    units = {'time': None, 'length': 'Angstrom', 'velocity': 'Angstrom/fs'}

    @store_init_arguments
    def __init__(self, filename, **kwargs):
        self.n_atoms = kwargs.pop('n_atoms', None)
        if self.n_atoms is None:  # this should be done by parsing DATA first
            raise ValueError("DATAReader requires n_atoms keyword")
        self.atom_style = kwargs.pop('atom_style', None)
        super(DATAReader, self).__init__(filename, **kwargs)

    def _read_first_frame(self):
        with DATAParser(self.filename) as p:
            self.ts = p.read_DATA_timestep(self.n_atoms, self._Timestep,
                                           self._ts_kwargs, self.atom_style)

        self.ts.frame = 0
        if self.convert_units:
            self.convert_pos_from_native(self.ts._pos)  # in-place !
            try:
                self.convert_velocities_from_native(self.ts._velocities)  # in-place !
            except AttributeError:
                pass

[docs]class DATAWriter(base.WriterBase):
    """Write out the current time step as a LAMMPS DATA file.

    This writer supports the sections Atoms, Masses, Velocities, Bonds,
    Angles, Dihedrals, and Impropers. This writer will write the header
    and these sections (if applicable). Atoms section is written in the
    "full" sub-style if charges are available or "molecular" sub-style
    if they are not. Molecule id is set to 0 for all atoms.

    Note
    ----
    This writer assumes "conventional" or "real" LAMMPS units where length
    is measured in Angstroms and velocity is measured in Angstroms per
    femtosecond. To write in different units, specify `lengthunit`

    If atom types are not already positive integers, the user must set them
    to be positive integers, because the writer will not automatically
    assign new types.

    To preserve numerical atom types when writing a selection, the Masses
    section will have entries for each atom type up to the maximum atom type.
    If the universe does not contain atoms of some type in
    {1, ... max(atom_types)}, then the mass for that type will be set to 1.

    In order to write bonds, each selected bond type must be explicitly set to
    an integer >= 1.

    """
    format = 'DATA'

    def __init__(self, filename, convert_units=True, **kwargs):
        """Set up a DATAWriter

        Parameters
        ----------
        filename : str
            output filename
        convert_units : bool, optional
            units are converted to the MDAnalysis base format; [``True``]
        """
        self.filename = util.filename(filename, ext='data')

        self.convert_units = convert_units

        self.units = {'time': 'fs', 'length': 'Angstrom'}
        self.units['length'] = kwargs.pop('lengthunit', self.units['length'])
        self.units['time'] = kwargs.pop('timeunit', self.units['time'])
        self.units['velocity'] = kwargs.pop('velocityunit',
                                 self.units['length']+'/'+self.units['time'])

    def _write_atoms(self, atoms):
        self.f.write('\n')
        self.f.write('Atoms\n')
        self.f.write('\n')

        try:
            charges = atoms.charges
        except (NoDataError, AttributeError):
            has_charges = False
        else:
            has_charges = True

        indices = atoms.indices + 1
        types = atoms.types.astype(np.int32)

        if self.convert_units:
            coordinates = self.convert_pos_to_native(atoms.positions, inplace=False)

        if has_charges:
            for index, atype, charge, coords in zip(indices, types, charges,
                    coordinates):
                self.f.write('{i:d} 0 {t:d} {c:f} {x:f} {y:f} {z:f}\n'.format(
                             i=index, t=atype, c=charge, x=coords[0],
                             y=coords[1], z=coords[2]))
        else:
            for index, atype, coords in zip(indices, types, coordinates):
                self.f.write('{i:d} 0 {t:d} {x:f} {y:f} {z:f}\n'.format(
                             i=index, t=atype, x=coords[0], y=coords[1],
                             z=coords[2]))

    def _write_velocities(self, atoms):
        self.f.write('\n')
        self.f.write('Velocities\n')
        self.f.write('\n')
        indices = atoms.indices + 1
        velocities = self.convert_velocities_to_native(atoms.velocities,
                                                       inplace=False)
        for index, vel in zip(indices, velocities):
            self.f.write('{i:d} {x:f} {y:f} {z:f}\n'.format(i=index, x=vel[0],
                y=vel[1], z=vel[2]))

    def _write_masses(self, atoms):
        self.f.write('\n')
        self.f.write('Masses\n')
        self.f.write('\n')
        mass_dict = {}
        max_type = max(atoms.types.astype(np.int32))
        for atype in range(1, max_type+1):
            # search entire universe for mass info, not just writing selection
            masses = set(atoms.universe.atoms.select_atoms(
                'type {:d}'.format(atype)).masses)
            if len(masses) == 0:
                mass_dict[atype] = 1.0
            else:
                mass_dict[atype] = masses.pop()
            if masses:
                raise ValueError('LAMMPS DATAWriter: to write data file, '+
                        'atoms with same type must have same mass')
        for atype, mass in mass_dict.items():
            self.f.write('{:d} {:f}\n'.format(atype, mass))

    def _write_bonds(self, bonds):
        self.f.write('\n')
        self.f.write('{}\n'.format(btype_sections[bonds.btype]))
        self.f.write('\n')
        for bond, i in zip(bonds, range(1, len(bonds)+1)):
            try:
                self.f.write('{:d} {:d} '.format(i, int(bond.type))+\
                        ' '.join((bond.atoms.indices + 1).astype(str))+'\n')
            except TypeError:
                errmsg = (f"LAMMPS DATAWriter: Trying to write bond, but bond "
                          f"type {bond.type} is not numerical.")
                raise TypeError(errmsg) from None

    def _write_dimensions(self, dimensions):
        """Convert dimensions to triclinic vectors, convert lengths to native
        units and then write the dimensions section
        """
        if self.convert_units:
            triv = self.convert_pos_to_native(mdamath.triclinic_vectors(
                                              dimensions),inplace=False)
        self.f.write('\n')
        self.f.write('{:f} {:f} xlo xhi\n'.format(0., triv[0][0]))
        self.f.write('{:f} {:f} ylo yhi\n'.format(0., triv[1][1]))
        self.f.write('{:f} {:f} zlo zhi\n'.format(0., triv[2][2]))
        if any([triv[1][0], triv[2][0], triv[2][1]]):
            self.f.write('{xy:f} {xz:f} {yz:f} xy xz yz\n'.format(
                xy=triv[1][0], xz=triv[2][0], yz=triv[2][1]))
        self.f.write('\n')

[docs]    @requires('types', 'masses')
    def write(self, selection, frame=None):
        """Write selection at current trajectory frame to file.

        The sections for Atoms, Masses, Velocities, Bonds, Angles,
        Dihedrals, and Impropers (if these are defined) are
        written. The Atoms section is written in the "full" sub-style
        if charges are available or "molecular" sub-style if they are
        not. Molecule id in atoms section is set to to 0.

        No other sections are written to the DATA file.
        As of this writing, other sections are not parsed into the topology
        by the :class:`DATAReader`.

        Note
        ----
        If the selection includes a partial fragment, then only the bonds,
        angles, etc. whose atoms are contained within the selection will be
        included.

        Parameters
        ----------
        selection : AtomGroup or Universe
            MDAnalysis AtomGroup (selection or Universe.atoms) or also Universe
        frame : int (optional)
            optionally move to frame number `frame`

        """
        u = selection.universe
        if frame is not None:
            u.trajectory[frame]
        else:
            frame = u.trajectory.ts.frame

        # make sure to use atoms (Issue 46)
        atoms = selection.atoms

        # check that types can be converted to ints if they aren't ints already
        try:
            atoms.types.astype(np.int32)
        except ValueError:
            errmsg = ("LAMMPS.DATAWriter: atom types must be convertible to "
                      "integers")
            raise ValueError(errmsg) from None

        try:
            velocities = atoms.velocities
        except (NoDataError, AttributeError):
            has_velocities = False
        else:
            has_velocities = True

        features = {}
        with util.openany(self.filename, 'w') as self.f:
            self.f.write('LAMMPS data file via MDAnalysis\n')
            self.f.write('\n')
            self.f.write('{:>12d}  atoms\n'.format(len(atoms)))

            attrs = [('bond', 'bonds'), ('angle', 'angles'),
                ('dihedral', 'dihedrals'), ('improper', 'impropers')]

            for btype, attr_name in attrs:
                features[btype] = atoms.__getattribute__(attr_name)
                self.f.write('{:>12d}  {}\n'.format(len(features[btype]),
                                                    attr_name))
                features[btype] = features[btype].atomgroup_intersection(
                                    atoms, strict=True)

            self.f.write('\n')
            self.f.write('{:>12d}  atom types\n'.format(max(atoms.types.astype(np.int32))))

            for btype, attr in features.items():
                self.f.write('{:>12d}  {} types\n'.format(len(attr.types()),
                                                          btype))

            self._write_dimensions(atoms.dimensions)

            self._write_masses(atoms)
            self._write_atoms(atoms)
            for attr in features.values():
                if attr is None or len(attr) == 0:
                    continue
                self._write_bonds(attr)

            if has_velocities:
                self._write_velocities(atoms)


[docs]class DumpReader(base.ReaderBase):
    """Reads the default `LAMMPS dump format`_

    Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs),
    "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate
    conventions (see https://docs.lammps.org/dump.html for more details).
    If `lammps_coordinate_convention='auto'` (default),
    one will be guessed. Guessing checks whether the coordinates fit each convention in the order "unscaled",
    "scaled", "unwrapped", "scaled_unwrapped" and whichever set of coordinates
    is detected first will be used. If coordinates are given in the scaled
    coordinate convention (xs,ys,zs) or scaled unwrapped coordinate convention
    (xsu,ysu,zsu) they will automatically be converted from their
    scaled/fractional representation to their real values.

    Supports both orthogonal and triclinic simulation box dimensions (for more
    details see https://docs.lammps.org/Howto_triclinic.html). In either case,
    MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, *gamma*)``
    to represent the unit cell. Lengths *A*, *B*, *C* are in the MDAnalysis
    length unit (Å), and angles are in degrees.

    .. versionchanged:: 2.2.0
       Triclinic simulation boxes are supported.
       (Issue `#3383 <https://github.com/MDAnalysis/mdanalysis/issues/3383>`__)
    .. versionchanged:: 2.0.0
       Now parses coordinates in multiple lammps conventions (x,xs,xu,xsu)
    .. versionadded:: 0.19.0
    """
    format = 'LAMMPSDUMP'
    _conventions = ["auto", "unscaled", "scaled", "unwrapped",
                    "scaled_unwrapped"]
    _coordtype_column_names = {
        "unscaled": ["x", "y", "z"],
        "scaled": ["xs", "ys", "zs"],
        "unwrapped": ["xu", "yu", "zu"],
        "scaled_unwrapped": ["xsu", "ysu", "zsu"]
    }

    @store_init_arguments
    def __init__(self, filename, lammps_coordinate_convention="auto",
                 **kwargs):
        super(DumpReader, self).__init__(filename, **kwargs)

        root, ext = os.path.splitext(self.filename)
        if lammps_coordinate_convention in self._conventions:
            self.lammps_coordinate_convention = lammps_coordinate_convention
        else:
            option_string = "'" + "', '".join(self._conventions) + "'"
            raise ValueError("lammps_coordinate_convention="
                             f"'{lammps_coordinate_convention}'"
                             " is not a valid option. "
                             f"Please choose one of {option_string}")

        self._cache = {}

        self._reopen()

        self._read_next_timestep()

    def _reopen(self):
        self.close()
        self._file = util.anyopen(self.filename)
        self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
        self.ts.frame = -1

    @property
    @cached('n_atoms')
    def n_atoms(self):
        with util.anyopen(self.filename) as f:
            f.readline()
            f.readline()
            f.readline()
            n_atoms = int(f.readline())
        return n_atoms

    @property
    @cached('n_frames')
    def n_frames(self):
        # 2(timestep) + 2(natoms info) + 4(box info) + 1(atom header) + n_atoms
        lines_per_frame = self.n_atoms + 9
        offsets = []
        counter = 0
        with util.anyopen(self.filename) as f:
            line = True
            while line:
                if not counter % lines_per_frame:
                    offsets.append(f.tell())
                line = f.readline()
                counter += 1
        self._offsets = offsets[:-1]  # last is EOF
        return len(self._offsets)

[docs]    def close(self):
        if hasattr(self, '_file'):
            self._file.close()

    def _read_frame(self, frame):
        self._file.seek(self._offsets[frame])
        self.ts.frame = frame - 1  # gets +1'd in next

        return self._read_next_timestep()

    def _read_next_timestep(self):
        f = self._file
        ts = self.ts
        ts.frame += 1
        if ts.frame >= len(self):
            raise EOFError

        f.readline()  # ITEM TIMESTEP
        step_num = int(f.readline())
        ts.data['step'] = step_num

        f.readline()  # ITEM NUMBER OF ATOMS
        n_atoms = int(f.readline())
        if n_atoms != self.n_atoms:
            raise ValueError("Number of atoms in trajectory changed "
                             "this is not supported in MDAnalysis")

        triclinic = len(f.readline().split()) == 9  # ITEM BOX BOUNDS
        if triclinic:
            xlo_bound, xhi_bound, xy = map(float, f.readline().split())
            ylo_bound, yhi_bound, xz = map(float, f.readline().split())
            zlo, zhi, yz = map(float, f.readline().split())

            # converts orthogonal bounding box to the conventional format,
            # see https://docs.lammps.org/Howto_triclinic.html
            xlo = xlo_bound - min(0.0, xy, xz, xy + xz)
            xhi = xhi_bound - max(0.0, xy, xz, xy + xz)
            ylo = ylo_bound - min(0.0, yz)
            yhi = yhi_bound - max(0.0, yz)

            box = np.zeros((3, 3), dtype=np.float64)
            box[0] = xhi - xlo, 0.0, 0.0
            box[1] = xy, yhi - ylo, 0.0
            box[2] = xz, yz, zhi - zlo

            xlen, ylen, zlen, alpha, beta, gamma = mdamath.triclinic_box(*box)
        else:
            xlo, xhi = map(float, f.readline().split())
            ylo, yhi = map(float, f.readline().split())
            zlo, zhi = map(float, f.readline().split())
            xlen = xhi - xlo
            ylen = yhi - ylo
            zlen = zhi - zlo
            alpha = beta = gamma = 90.
        ts.dimensions = xlen, ylen, zlen, alpha, beta, gamma

        indices = np.zeros(self.n_atoms, dtype=int)

        atomline = f.readline()  # ITEM ATOMS etc
        attrs = atomline.split()[2:]  # attributes on coordinate line
        attr_to_col_ix = {x: i for i, x in enumerate(attrs)}
        convention_to_col_ix = {}
        for cv_name, cv_col_names in self._coordtype_column_names.items():
            try:
                convention_to_col_ix[cv_name] = [attr_to_col_ix[x] for x in cv_col_names]
            except KeyError:
                pass
        # this should only trigger on first read of "ATOM" card, after which it
        # is fixed to the guessed value. Auto proceeds unscaled -> scaled
        # -> unwrapped -> scaled_unwrapped
        if self.lammps_coordinate_convention == "auto":
            try:
                # this will automatically select in order of priority
                # unscaled, scaled, unwrapped, scaled_unwrapped
                self.lammps_coordinate_convention = list(convention_to_col_ix)[0]
            except IndexError:
                raise ValueError("No coordinate information detected")
        elif not self.lammps_coordinate_convention in convention_to_col_ix:
            raise ValueError(f"No coordinates following convention {self.lammps_coordinate_convention} found in timestep")

        coord_cols = convention_to_col_ix[self.lammps_coordinate_convention]

        ids = "id" in attr_to_col_ix
        for i in range(self.n_atoms):
            fields = f.readline().split()
            if ids:
                indices[i] = fields[attr_to_col_ix["id"]]
            ts.positions[i] = [fields[dim] for dim in coord_cols]

        order = np.argsort(indices)
        ts.positions = ts.positions[order]
        if (self.lammps_coordinate_convention.startswith("scaled")):
            # if coordinates are given in scaled format, undo that
            ts.positions = distances.transform_StoR(ts.positions,
                                                    ts.dimensions)

        return ts