Source code for MDAnalysis.coordinates.GRO

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""
GRO file format --- :mod:`MDAnalysis.coordinates.GRO`
======================================================

Classes to read and write Gromacs_ GRO_ coordinate files; see the notes on the
`GRO format`_ which includes a conversion routine for the box.


Writing GRO files
-----------------

By default any written GRO files will renumber the atom ids to move sequentially
from 1.  This can be disabled, and instead the original atom ids kept, by
using the `reindex=False` keyword argument.  This is useful when writing a
subsection of a larger Universe while wanting to preserve the original
identities of atoms.

For example::

   >>> u = mda.Universe()`

   >>> u.atoms.write('out.gro', reindex=False)

   # OR
   >>> with mda.Writer('out.gro', reindex=False) as w:
   ...     w.write(u.atoms)


Classes
-------

.. autoclass:: GROReader
   :members:

.. autoclass:: GROWriter
   :members:


Developer notes: ``GROWriter`` format strings
---------------------------------------------

The :class:`GROWriter` class has a :attr:`GROWriter.fmt` attribute, which is a dictionary of different
strings for writing lines in ``.gro`` files.  These are as follows:

``n_atoms``
  For the first line of the gro file, supply the number of atoms in the system.
  E.g.::

      fmt['n_atoms'].format(42)

``xyz``
  An atom line without velocities.  Requires that the 'resid', 'resname',
  'name', 'index' and 'pos' keys be supplied.
  E.g.::

     fmt['xyz'].format(resid=1, resname='SOL', name='OW2', index=2, pos=(0.0, 1.0, 2.0))

``xyz_v``
  As above, but with velocities.  Needs an additional keyword 'vel'.

``box_orthorhombic``
  The final line of the gro file which gives box dimensions.  Requires
  the box keyword to be given, which should be the three cartesian dimensions.
  E.g.::

     fmt['box_orthorhombic'].format(box=(10.0, 10.0, 10.0))

``box_triclinic``
  As above, but for a non orthorhombic box. Requires the box keyword, but this
  time as a length 9 vector.  This is a flattened version of the (3,3) triclinic
  vector representation of the unit cell.  The rearrangement into the odd
  gromacs order is done automatically.


.. _Gromacs: http://www.gromacs.org
.. _GRO: http://manual.gromacs.org/current/online/gro.html
.. _GRO format: http://chembytes.wikidot.com/g-grofile

"""
import re

import itertools
import warnings

import numpy as np

from . import base
from .core import triclinic_box, triclinic_vectors
from ..exceptions import NoDataError
from ..lib import util
from .timestep import Timestep

"""unitcell dimensions (A, B, C, alpha, beta, gamma)

GRO::

  8.00170   8.00170   5.65806   0.00000   0.00000   0.00000   0.00000   4.00085   4.00085

PDB::

  CRYST1   80.017   80.017   80.017  60.00  60.00  90.00 P 1           1

XTC: c.trajectory.ts._unitcell::

  array([[ 80.00515747,   0.        ,   0.        ],
         [  0.        ,  80.00515747,   0.        ],
         [ 40.00257874,  40.00257874,  56.57218552]], dtype=float32)
"""
# unit cell line (from http://manual.gromacs.org/current/online/gro.html)
# v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y)
# 0     1     2      3     4     5     6    7     8
# This information now stored as _ts_order_x/y/z to keep DRY
_TS_ORDER_X = [0, 3, 4]
_TS_ORDER_Y = [5, 1, 6]
_TS_ORDER_Z = [7, 8, 2]

def _gmx_to_dimensions(box):
    # convert gromacs ordered box to [lx, ly, lz, alpha, beta, gamma] form
    x = box[_TS_ORDER_X]
    y = box[_TS_ORDER_Y]
    z = box[_TS_ORDER_Z]  # this ordering is correct! (checked it, OB)
    return triclinic_box(x, y, z)


[docs] class GROReader(base.SingleFrameReaderBase): """Reader for the Gromacs GRO structure format. .. note:: This Reader will only read the first frame present in a file. .. note:: GRO files with zeroed 3 entry unit cells (i.e. ``0.0 0.0 0.0``) are read as missing unit cell information. In these cases ``dimensions`` will be set to ``None``. .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based .. versionchanged:: 2.0.0 Reader now only parses boxes defined with 3 or 9 fields. Reader now reads a 3 entry zero unit cell (i.e. ``[0, 0, 0]``) as a being without dimension information (i.e. will the timestep dimension to ``None``). """ format = 'GRO' units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'} _Timestep = Timestep def _read_first_frame(self): with util.openany(self.filename, 'rt') as grofile: # Read first two lines to get number of atoms grofile.readline() self.n_atoms = n_atoms = int(grofile.readline()) self.ts = ts = self._Timestep(n_atoms, **self._ts_kwargs) # Always try, and maybe add them later velocities = np.zeros((n_atoms, 3), dtype=np.float32) missed_vel = False # and the third line to get the spacing between coords (cs) # (dependent upon the GRO file precision) first_atomline = grofile.readline() cs = first_atomline[25:].find('.') + 1 ts._pos[0] = [first_atomline[20 + cs * i:20 + cs * (i + 1)] for i in range(3)] try: velocities[0] = [first_atomline[20 + cs * i:20 + cs * (i + 1)] for i in range(3, 6)] except ValueError: # Remember that we got this error missed_vel = True # TODO: Handle missing unitcell? for pos, line in enumerate(grofile, start=1): # 2 header lines, 1 box line at end if pos == n_atoms: try: unitcell = np.float32(line.split()) except ValueError: # Try to parse floats with 5 digits if no spaces between values... unitcell = np.float32(re.findall(r"(\d+\.\d{5})", line)) break ts._pos[pos] = [line[20 + cs * i:20 + cs * (i + 1)] for i in range(3)] try: velocities[pos] = [line[20 + cs * i:20 + cs * (i + 1)] for i in range(3, 6)] except ValueError: # Remember that we got this error missed_vel = True if np.any(velocities): ts.velocities = velocities if missed_vel: warnings.warn("Not all velocities were present. " "Unset velocities set to zero.") self.ts.frame = 0 # 0-based frame number if len(unitcell) == 3: # special case: a b c --> (a 0 0) (b 0 0) (c 0 0) # see docstring above for format (!) # Treat empty 3 entry boxes as not having a unit cell if np.allclose(unitcell, [0., 0., 0.]): wmsg = ("Empty box [0., 0., 0.] found - treating as missing " "unit cell. Dimensions set to `None`.") warnings.warn(wmsg) self.ts.dimensions = None else: self.ts.dimensions = np.r_[unitcell, [90., 90., 90.]] elif len(unitcell) == 9: self.ts.dimensions = _gmx_to_dimensions(unitcell) else: # raise an error for wrong format errmsg = "GRO unitcell has neither 3 nor 9 entries." raise ValueError(errmsg) if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! if self.ts.dimensions is not None: self.convert_pos_from_native(self.ts.dimensions[:3]) # in-place! if self.ts.has_velocities: # converts nm/ps to A/ps units self.convert_velocities_from_native(self.ts._velocities)
[docs] def Writer(self, filename, n_atoms=None, **kwargs): """Returns a CRDWriter for *filename*. Parameters ---------- filename: str filename of the output GRO file Returns ------- :class:`GROWriter` """ if n_atoms is None: n_atoms = self.n_atoms return GROWriter(filename, n_atoms=n_atoms, **kwargs)
[docs] class GROWriter(base.WriterBase): """GRO Writer that conforms to the Trajectory API. Will attempt to write the following information from the topology: - atom name (defaults to 'X') - resnames (defaults to 'UNK') - resids (defaults to '1') .. note:: The precision is hard coded to three decimal places. .. note:: When dimensions are missing (i.e. set to `None`), a zero width unit cell box will be written (i.e. [0.0, 0.0, 0.0]). .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based .. versionchanged:: 0.13.0 Now strictly writes positions with 3dp precision. and velocities with 4dp. Removed the `convert_dimensions_to_unitcell` method, use `Timestep.triclinic_dimensions` instead. Now now writes velocities where possible. .. versionchanged:: 0.18.0 Added `reindex` keyword argument to allow original atom ids to be kept. .. versionchanged:: 2.0.0 Raises a warning when writing timestep with missing dimension information (i.e. set to ``None``). """ format = 'GRO' units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'} gro_coor_limits = {'min': -999.9995, 'max': 9999.9995} #: format strings for the GRO file (all include newline); precision #: of 3 decimal places is hard-coded here. fmt = { 'n_atoms': "{0:5d}\n", # number of atoms # coordinates output format, see http://chembytes.wikidot.com/g-grofile 'xyz': "{resid:>5d}{resname:<5.5s}{name:>5.5s}{index:>5d}{pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}\n", # unitcell 'box_orthorhombic': "{box[0]:10.5f} {box[1]:9.5f} {box[2]:9.5f}\n", 'box_triclinic': "{box[0]:10.5f} {box[4]:9.5f} {box[8]:9.5f} {box[1]:9.5f} {box[2]:9.5f} {box[3]:9.5f} {box[5]:9.5f} {box[6]:9.5f} {box[7]:9.5f}\n" } fmt['xyz_v'] = fmt['xyz'][:-1] + "{vel[0]:8.4f}{vel[1]:8.4f}{vel[2]:8.4f}\n" def __init__(self, filename, convert_units=True, n_atoms=None, **kwargs): """Set up a GROWriter with a precision of 3 decimal places. Parameters ----------- filename : str output filename n_atoms : int (optional) number of atoms convert_units : bool (optional) units are converted to the MDAnalysis base format; [``True``] reindex : bool (optional) By default, all the atoms were reindexed to have a atom id starting from 1. [``True``] However, this behaviour can be turned off by specifying `reindex` ``=False``. Note ---- To use the reindex keyword, user can follow the two examples given below.:: u = mda.Universe() Usage 1:: u.atoms.write('out.gro', reindex=False) Usage 2:: with mda.Writer('out.gro', reindex=False) as w: w.write(u.atoms) """ self.filename = util.filename(filename, ext='gro', keep=True) self.n_atoms = n_atoms self.reindex = kwargs.pop('reindex', True) self.convert_units = convert_units # convert length and time to base units
[docs] def write(self, obj): """Write selection at current trajectory frame to file. Parameters ----------- obj : AtomGroup or Universe Note ---- The GRO format only allows 5 digits for *resid* and *atom number*. If these numbers become larger than 99,999 then this routine will chop off the leading digits. .. versionchanged:: 0.7.6 *resName* and *atomName* are truncated to a maximum of 5 characters .. versionchanged:: 0.16.0 `frame` kwarg has been removed .. versionchanged:: 2.0.0 Deprecated support for calling with Timestep has nwo been removed. Use AtomGroup or Universe as an input instead. """ # write() method that complies with the Trajectory API try: # make sure to use atoms (Issue 46) ag = obj.atoms # can write from selection == Universe (Issue 49) except AttributeError: errmsg = "Input obj is neither an AtomGroup or Universe" raise TypeError(errmsg) from None try: velocities = ag.velocities except NoDataError: has_velocities = False else: has_velocities = True # Check for topology information missing_topology = [] try: names = ag.names except (AttributeError, NoDataError): names = itertools.cycle(('X',)) missing_topology.append('names') try: resnames = ag.resnames except (AttributeError, NoDataError): resnames = itertools.cycle(('UNK',)) missing_topology.append('resnames') try: resids = ag.resids except (AttributeError, NoDataError): resids = itertools.cycle((1,)) missing_topology.append('resids') if not self.reindex: try: atom_indices = ag.ids except (AttributeError, NoDataError): atom_indices = range(1, ag.n_atoms+1) missing_topology.append('ids') else: atom_indices = range(1, ag.n_atoms + 1) if missing_topology: warnings.warn( "Supplied AtomGroup was missing the following attributes: " "{miss}. These will be written with default values. " "Alternatively these can be supplied as keyword arguments." "".format(miss=', '.join(missing_topology))) positions = ag.positions if self.convert_units: # Convert back to nm from Angstroms, # Not inplace because AtomGroup is not a copy positions = self.convert_pos_to_native(positions, inplace=False) if has_velocities: velocities = self.convert_velocities_to_native(velocities, inplace=False) # check if any coordinates are illegal # (checks the coordinates in native nm!) if not self.has_valid_coordinates(self.gro_coor_limits, positions): raise ValueError("GRO files must have coordinate values between " "{0:.3f} and {1:.3f} nm: No file was written." "".format(self.gro_coor_limits["min"], self.gro_coor_limits["max"])) with util.openany(self.filename, 'wt') as output_gro: # Header output_gro.write('Written by MDAnalysis\n') output_gro.write(self.fmt['n_atoms'].format(ag.n_atoms)) # Atom descriptions and coords # Dont use enumerate here, # all attributes could be infinite cycles! for atom_index, resid, resname, name in zip( range(ag.n_atoms), resids, resnames, names): truncated_atom_index = util.ltruncate_int(atom_indices[atom_index], 5) truncated_resid = util.ltruncate_int(resid, 5) if has_velocities: output_gro.write(self.fmt['xyz_v'].format( resid=truncated_resid, resname=resname, index=truncated_atom_index, name=name, pos=positions[atom_index], vel=velocities[atom_index], )) else: output_gro.write(self.fmt['xyz'].format( resid=truncated_resid, resname=resname, index=truncated_atom_index, name=name, pos=positions[atom_index] )) # Footer: box dimensions if (ag.dimensions is None or np.allclose(ag.dimensions[3:], [90., 90., 90.])): if ag.dimensions is None: wmsg = ("missing dimension - setting unit cell to zeroed " "box [0., 0., 0.]") warnings.warn(wmsg) box = np.zeros(3) else: box = self.convert_pos_to_native( ag.dimensions[:3], inplace=False) # orthorhombic cell, only lengths along axes needed in gro output_gro.write(self.fmt['box_orthorhombic'].format( box=box) ) else: try: # for AtomGroup/Universe tri_dims = obj.universe.coord.triclinic_dimensions except AttributeError: # for Timestep tri_dims = obj.triclinic_dimensions # full output box = self.convert_pos_to_native(tri_dims.flatten(), inplace=False) output_gro.write(self.fmt['box_triclinic'].format(box=box))