Source code for MDAnalysis.coordinates.XYZ

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#


"""XYZ trajectory reader --- :mod:`MDAnalysis.coordinates.XYZ`
==============================================================

The :ref:`XYZ format <xyz-format>` is a loosely defined, simple
coordinate trajectory format. The implemented format definition was
taken from the `VMD xyzplugin`_ and is therefore compatible with VMD.

Note the following:

* Comments are not allowed in the XYZ file (we neither read nor write
  them to remain compatible with VMD).
* The atom name (first column) is ignored during reading.
* The coordinates are assumed to be space-delimited rather than fixed
  width (this may cause issues - see below).
* All fields to the right of the z-coordinate are ignored.
* The unitcell information is all zeros since this is not recorded in
  the XYZ format.

.. rubric:: Units

* Coordinates are in Angstroms.
* The length of a timestep can be set by passing the *dt* argument,
  it's assumed to be in ps (default: 1 ps).

There appears to be no rigid format definition so it is likely users
will need to tweak this class.

.. _xyz-format:

XYZ File format
---------------

Definition used by the :class:`XYZReader` and :class:`XYZWriter` (and
the `VMD xyzplugin`_ from whence the definition was taken)::

    [ comment line            ] !! NOT IMPLEMENTED !! DO NOT INCLUDE
    [ N                       ] # of atoms, required by this xyz reader plugin  line 1
    [ molecule name           ] name of molecule (can be blank)                 line 2
    atom1 x y z [optional data] atom name followed by xyz coords                line 3
    atom2 x y z [ ...         ] and (optionally) other data.
    ...
    atomN x y z [ ...         ]                                                 line N+2


Note
----
* comment lines not implemented (do not include them)
* molecule name: the line is required but the content is ignored
  at the moment
* optional data (after the coordinates) are presently ignored


.. Links
.. _`VMD xyzplugin`: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

Classes
-------

"""
import itertools
import os
import errno
import numpy as np
import warnings
import logging
logger = logging.getLogger('MDAnalysis.coordinates.XYZ')

from . import base
from ..lib import util
from ..lib.util import cached, store_init_arguments
from ..exceptions import NoDataError
from ..version import __version__


[docs]class XYZWriter(base.WriterBase):
    """Writes an XYZ file

    The XYZ file format is not formally defined. This writer follows
    the VMD implementation for the molfile `xyzplugin`_.


    Notes
    -----
    By default, the XYZ writer will attempt to use the input
    :class:`~MDAnalysis.core.groups.AtomGroup` or
    :class:`~MDAnalysis.core.universe.Universe` ``elements`` record to assign
    atom names in the XYZ file. If the ``elements`` record is missing, then
    the ``name`` record will be used. In the event that neither of these are
    available, the atoms will all be named ``X``. Please see, the
    `User Guide`_ for more information on how to add topology attributes if
    you wish to add your own elements / atom names to a
    :class:`~MDAnalysis.core.universe.Universe`.


    .. Links

    .. _xyzplugin:
           http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html
    .. _User Guide:
           https://userguide.mdanalysis.org/examples/constructing_universe.html#Adding-topology-attributes


    .. versionchanged:: 1.0.0
       Use elements attribute instead of names attribute, if present.
    .. versionchanged:: 2.0.0
       Support for passing timestep to the writer was deprecated in 1.0 and
       has now been removed. As a consequence, custom names can no longer be
       passed to the writer, these should be added to the
       :class:`~MDAnalysis.core.universe.Universe`, or
       :class:`~MDAnalysis.core.groups.AtomGroup` before invoking the writer.
    """

    format = 'XYZ'
    multiframe = True
    # these are assumed!
    units = {'time': 'ps', 'length': 'Angstrom'}

    def __init__(self, filename, n_atoms=None, convert_units=True,
                 remark=None, **kwargs):
        """Initialize the XYZ trajectory writer

        Parameters
        ----------
        filename: str
            filename of trajectory file. If it ends with "gz" then the file
            will be gzip-compressed; if it ends with "bz2" it will be bzip2
            compressed.
        n_atoms: int (optional)
            Number of atoms in trajectory. By default assume that this is None
            and that this file is used to store several different models
            instead of a single trajectory. If a number is provided each
            written TimeStep has to contain the same number of atoms.
        convert_units : bool (optional)
            convert quantities to default MDAnalysis units of Angstrom upon
            writing  [``True``]
        remark: str (optional)
            single line of text ("molecule name"). By default writes MDAnalysis
            version and frame


        .. versionchanged:: 1.0.0
           Removed :code:`default_remark` variable (Issue #2692).
        .. versionchanged:: 2.0.0
           Due to the removal of timestep as an input for writing, the atoms
           parameter is no longer relevant and has been removed. If passing
           an empty universe, please use ``add_TopologyAttr`` to add in the
           required elements or names.
        """
        self.filename = filename
        self.remark = remark
        self.n_atoms = n_atoms
        self.convert_units = convert_units

        # can also be gz, bz2
        self._xyz = util.anyopen(self.filename, 'wt')

    def _get_atoms_elements_or_names(self, atoms):
        """Return a list of atom elements (if present) or fallback to atom names"""
        try:
            return atoms.atoms.elements
        except (AttributeError, NoDataError):
            try:
                return atoms.atoms.names
            except (AttributeError, NoDataError):
                wmsg = ("Input AtomGroup or Universe does not have atom "
                        "elements or names attributes, writer will default "
                        "atom names to 'X'")
                warnings.warn(wmsg)
                return itertools.cycle(('X',))

[docs]    def close(self):
        """Close the trajectory file and finalize the writing"""
        if self._xyz is not None:
            self._xyz.write("\n")
            self._xyz.close()
        self._xyz = None

[docs]    def write(self, obj):
        """Write object `obj` at current trajectory frame to file.

        Atom elements (or names) in the output are taken from the `obj` or
        default to the value of the `atoms` keyword supplied to the
        :class:`XYZWriter` constructor.

        Parameters
        ----------
        obj : Universe or AtomGroup
            The :class:`~MDAnalysis.core.groups.AtomGroup` or
            :class:`~MDAnalysis.core.universe.Universe` to write.


        .. versionchanged:: 2.0.0
           Deprecated support for Timestep argument has now been removed.
           Use AtomGroup or Universe as an input instead.
        """
        # prepare the Timestep and extract atom names if possible
        # (The way it is written it should be possible to write
        # trajectories with frames that differ in atom numbers
        # but this is not tested.)
        try:
            atoms = obj.atoms
        except AttributeError:
            errmsg = "Input obj is neither an AtomGroup or Universe"
            raise TypeError(errmsg) from None
        else:
            if hasattr(obj, 'universe'):
                # For AtomGroup and children (Residue, ResidueGroup, Segment)
                ts_full = obj.universe.trajectory.ts
                if ts_full.n_atoms == atoms.n_atoms:
                    ts = ts_full
                else:
                    # Only populate a time step with the selected atoms.
                    ts = ts_full.copy_slice(atoms.indices)
            elif hasattr(obj, 'trajectory'):
                # For Universe only --- get everything
                ts = obj.trajectory.ts
            # update atom names
            self.atomnames = self._get_atoms_elements_or_names(atoms)

        self._write_next_frame(ts)

    def _write_next_frame(self, ts=None):
        """
        Write coordinate information in *ts* to the trajectory

        .. versionchanged:: 1.0.0
           Print out :code:`remark` if present, otherwise use generic one 
           (Issue #2692).
           Renamed from `write_next_timestep` to `_write_next_frame`.
        """
        if ts is None:
            if not hasattr(self, 'ts'):
                raise NoDataError('XYZWriter: no coordinate data to write to '
                                  'trajectory file')
            else:
                ts = self.ts

        if self.n_atoms is not None:
            if self.n_atoms != ts.n_atoms:
                raise ValueError('n_atoms keyword was specified indicating '
                                 'that this should be a trajectory of the '
                                 'same model. But the provided TimeStep has a '
                                 'different number ({}) then expected ({})'
                                 ''.format(ts.n_atoms, self.n_atoms))
        else:
            if (not isinstance(self.atomnames, itertools.cycle) and
                len(self.atomnames) != ts.n_atoms):
                logger.info('Trying to write a TimeStep with unknown atoms. '
                            'Expected {} atoms, got {}. Try using "write" if you are '
                            'using "_write_next_frame" directly'.format(
                                len(self.atomnames), ts.n_atoms))
                self.atomnames = np.array([self.atomnames[0]] * ts.n_atoms)

        if self.convert_units:
            coordinates = self.convert_pos_to_native(
                ts.positions, inplace=False)
        else:
            coordinates = ts.positions

        # Write number of atoms
        self._xyz.write("{0:d}\n".format(ts.n_atoms))

        # Write remark
        if self.remark is None:
            remark = "frame {} | Written by MDAnalysis {} (release {})\n".format(
                ts.frame, self.__class__.__name__, __version__)

            self._xyz.write(remark)
        else:
            self._xyz.write(self.remark.strip() + "\n")

        # Write content
        for atom, (x, y, z) in zip(self.atomnames, coordinates):
            self._xyz.write("{0!s:>8}  {1:10.5f} {2:10.5f} {3:10.5f}\n"
                            "".format(atom, x, y, z))


[docs]class XYZReader(base.ReaderBase):
    """Reads from an XYZ file

    :Data:
        :attr:`ts`
          Timestep object containing coordinates of current frame

    :Methods:
        ``len(xyz)``
          return number of frames in xyz
        ``for ts in xyz:``
          iterate through trajectory

    .. Note: this can read both compressed (foo.xyz) and compressed
          (foo.xyz.bz2 or foo.xyz.gz) files; uncompression is handled
          on the fly and also reads streams via
          :class:`~MDAnalysis.lib.util.NamedStream`.

    The XYZ file format follows VMD's xyzplugin_ and is also described
    under :ref:`XYZ format <xyz-format>`.

    .. versionchanged:: 0.11.0
       Frames now 0-based instead of 1-based. Added *dt* and
       *time_offset* keywords (passed to :class:`Timestep`)
    """

    # Phil Fowler:
    # Validation: the geometric centre of 1284 atoms was calculated over
    # 500 frames using both MDAnalysis and a VMD Tcl script. There was
    # exact agreement (measured to three decimal places). bzipped and
    # gzipped versions of the XYZ file were also tested

    format = "XYZ"
    # these are assumed!
    units = {'time': 'ps', 'length': 'Angstrom'}
    _Timestep = base.Timestep

    @store_init_arguments
    def __init__(self, filename, **kwargs):
        super(XYZReader, self).__init__(filename, **kwargs)

        # the filename has been parsed to be either be foo.xyz or foo.xyz.bz2 by
        # coordinates::core.py so the last file extension will tell us if it is
        # bzipped or not
        root, ext = os.path.splitext(self.filename)
        self.xyzfile = util.anyopen(self.filename)
        self.compression = ext[1:] if ext[1:] != "xyz" else None
        self._cache = dict()

        self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
        # Haven't quite figured out where to start with all the self._reopen()
        # etc.
        # (Also cannot just use seek() or reset() because that would break
        # with urllib2.urlopen() streams)
        self._read_next_timestep()

    @property
    @cached('n_atoms')
    def n_atoms(self):
        """number of atoms in a frame"""
        with util.anyopen(self.filename) as f:
            n = f.readline()
        # need to check type of n
        return int(n)

    @property
    @cached('n_frames')
    def n_frames(self):
        try:
            return self._read_xyz_n_frames()
        except IOError:
            return 0

    def _read_xyz_n_frames(self):
        # the number of lines in the XYZ file will be 2 greater than the
        # number of atoms
        linesPerFrame = self.n_atoms + 2
        counter = 0
        offsets = []

        with util.anyopen(self.filename) as f:
            line = True
            while line:
                if not counter % linesPerFrame:
                    offsets.append(f.tell())
                line = f.readline()
                counter += 1

        # need to check this is an integer!
        n_frames = int(counter / linesPerFrame)
        self._offsets = offsets
        return n_frames

    def _read_frame(self, frame):
        self.xyzfile.seek(self._offsets[frame])
        self.ts.frame = frame - 1  # gets +1'd in next
        return self._read_next_timestep()

    def _read_next_timestep(self, ts=None):
        # check that the timestep object exists
        if ts is None:
            ts = self.ts

        f = self.xyzfile

        try:
            # we assume that there are only two header lines per frame
            f.readline()
            f.readline()
            # convert all entries at the end once for optimal speed
            tmp_buf = []
            for i in range(self.n_atoms):
                tmp_buf.append(f.readline().split()[1:4])
            ts.positions = tmp_buf
            ts.frame += 1
            return ts
        except (ValueError, IndexError) as err:
            raise EOFError(err) from None

    def _reopen(self):
        self.close()
        self.open_trajectory()

    def open_trajectory(self):
        if self.xyzfile is not None:
            raise IOError(
                errno.EALREADY, 'XYZ file already opened', self.filename)

        self.xyzfile = util.anyopen(self.filename)

        # reset ts
        ts = self.ts
        ts.frame = -1

        return self.xyzfile

[docs]    def Writer(self, filename, n_atoms=None, **kwargs):
        """Returns a XYZWriter for *filename* with the same parameters as this
        XYZ.

        Parameters
        ----------
        filename: str
            filename of the output trajectory
        n_atoms: int (optional)
            number of atoms. If none is given use the same number of atoms from
            the reader instance is used
        **kwargs:
            See :class:`XYZWriter` for additional kwargs

        Returns
        -------
        :class:`XYZWriter` (see there for more details)

        See Also
        --------
        :class:`XYZWriter`
        """
        if n_atoms is None:
            n_atoms = self.n_atoms
        return XYZWriter(filename, n_atoms=n_atoms, **kwargs)

[docs]    def close(self):
        """Close xyz trajectory file if it was open."""
        if self.xyzfile is None:
            return
        self.xyzfile.close()
        self.xyzfile = None