Source code for MDAnalysis.coordinates.GRO

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# MDAnalysis ---
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
# Released under the GNU Public Licence, v2 or any higher version
# Please cite your use of MDAnalysis in published work:
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787

GRO file format --- :mod:`MDAnalysis.coordinates.GRO`

Classes to read and write Gromacs_ GRO_ coordinate files; see the notes on the
`GRO format`_ which includes a conversion routine for the box.

Writing GRO files

By default any written GRO files will renumber the atom ids to move sequentially
from 1.  This can be disabled, and instead the original atom ids kept, by
using the `reindex=False` keyword argument.  This is useful when writing a
subsection of a larger Universe while wanting to preserve the original
identities of atoms.

For example::

   >>> u = mda.Universe()`

   >>> u.atoms.write('out.gro', reindex=False)

   # OR
   >>> with mda.Writer('out.gro', reindex=False) as w:
   ...     w.write(u.atoms)


.. autoclass:: Timestep

.. autoclass:: GROReader

.. autoclass:: GROWriter

Developer notes: ``GROWriter`` format strings

The :class:`GROWriter` class has a :attr:`GROWriter.fmt` attribute, which is a dictionary of different
strings for writing lines in ``.gro`` files.  These are as follows:

  For the first line of the gro file, supply the number of atoms in the system.


  An atom line without velocities.  Requires that the 'resid', 'resname',
  'name', 'index' and 'pos' keys be supplied.

     fmt['xyz'].format(resid=1, resname='SOL', name='OW2', index=2, pos=(0.0, 1.0, 2.0))

  As above, but with velocities.  Needs an additional keyword 'vel'.

  The final line of the gro file which gives box dimensions.  Requires
  the box keyword to be given, which should be the three cartesian dimensions.

     fmt['box_orthorhombic'].format(box=(10.0, 10.0, 10.0))

  As above, but for a non orthorhombic box. Requires the box keyword, but this
  time as a length 9 vector.  This is a flattened version of the (3,3) triclinic
  vector representation of the unit cell.  The rearrangement into the odd
  gromacs order is done automatically.

.. _Gromacs:
.. _GRO:
.. _GRO format:

from __future__ import absolute_import

import re
from six.moves import range, zip
from six import raise_from

import itertools
import warnings

import numpy as np

from . import base
from .core import triclinic_box, triclinic_vectors
from ..exceptions import NoDataError
from ..lib import util

[docs]class Timestep(base.Timestep): _ts_order_x = [0, 3, 4] _ts_order_y = [5, 1, 6] _ts_order_z = [7, 8, 2] def _init_unitcell(self): return np.zeros(9, dtype=np.float32) @property def dimensions(self): """unitcell dimensions (A, B, C, alpha, beta, gamma) GRO:: 8.00170 8.00170 5.65806 0.00000 0.00000 0.00000 0.00000 4.00085 4.00085 PDB:: CRYST1 80.017 80.017 80.017 60.00 60.00 90.00 P 1 1 XTC: c.trajectory.ts._unitcell:: array([[ 80.00515747, 0. , 0. ], [ 0. , 80.00515747, 0. ], [ 40.00257874, 40.00257874, 56.57218552]], dtype=float32) """ # unit cell line (from # v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y) # 0 1 2 3 4 5 6 7 8 # This information now stored as _ts_order_x/y/z to keep DRY x = self._unitcell[self._ts_order_x] y = self._unitcell[self._ts_order_y] z = self._unitcell[self._ts_order_z] # this ordering is correct! (checked it, OB) return triclinic_box(x, y, z) @dimensions.setter def dimensions(self, box): x, y, z = triclinic_vectors(box) np.put(self._unitcell, self._ts_order_x, x) np.put(self._unitcell, self._ts_order_y, y) np.put(self._unitcell, self._ts_order_z, z)
[docs]class GROReader(base.SingleFrameReaderBase): """Reader for the Gromacs GRO structure format. .. note:: This Reader will only read the first frame present in a file. .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based """ format = 'GRO' units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'} _Timestep = Timestep def _read_first_frame(self): with util.openany(self.filename, 'rt') as grofile: # Read first two lines to get number of atoms grofile.readline() self.n_atoms = n_atoms = int(grofile.readline()) self.ts = ts = self._Timestep(n_atoms, **self._ts_kwargs) # Always try, and maybe add them later velocities = np.zeros((n_atoms, 3), dtype=np.float32) missed_vel = False # and the third line to get the spacing between coords (cs) # (dependent upon the GRO file precision) first_atomline = grofile.readline() cs = first_atomline[25:].find('.') + 1 ts._pos[0] = [first_atomline[20 + cs * i:20 + cs * (i + 1)] for i in range(3)] try: velocities[0] = [first_atomline[20 + cs * i:20 + cs * (i + 1)] for i in range(3, 6)] except ValueError: # Remember that we got this error missed_vel = True for pos, line in enumerate(grofile, start=1): # 2 header lines, 1 box line at end if pos == n_atoms: try: unitcell = np.float32(line.split()) except ValueError: # Try to parse floats with 5 digits if no spaces between values... unitcell = np.float32(re.findall(r"(\d+\.\d{5})", line)) break ts._pos[pos] = [line[20 + cs * i:20 + cs * (i + 1)] for i in range(3)] try: velocities[pos] = [line[20 + cs * i:20 + cs * (i + 1)] for i in range(3, 6)] except ValueError: # Remember that we got this error missed_vel = True if np.any(velocities): ts.velocities = velocities if missed_vel: warnings.warn("Not all velocities were present. " "Unset velocities set to zero.") self.ts.frame = 0 # 0-based frame number if len(unitcell) == 3: # special case: a b c --> (a 0 0) (b 0 0) (c 0 0) # see Timestep.dimensions() above for format (!) self.ts._unitcell[:3] = unitcell elif len(unitcell) == 9: self.ts._unitcell[:] = unitcell # fill all else: # or maybe raise an error for wrong format?? warnings.warn("GRO unitcell has neither 3 nor 9 entries --- might be wrong.") self.ts._unitcell[:len(unitcell)] = unitcell # fill linearly ... not sure about this if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native(self.ts._unitcell) # in-place ! (all are lengths) if self.ts.has_velocities: # converts nm/ps to A/ps units self.convert_velocities_from_native(self.ts._velocities)
[docs] def Writer(self, filename, n_atoms=None, **kwargs): """Returns a CRDWriter for *filename*. Parameters ---------- filename: str filename of the output GRO file Returns ------- :class:`GROWriter` """ if n_atoms is None: n_atoms = self.n_atoms return GROWriter(filename, n_atoms=n_atoms, **kwargs)
[docs]class GROWriter(base.WriterBase): """GRO Writer that conforms to the Trajectory API. Will attempt to write the following information from the topology: - atom name (defaults to 'X') - resnames (defaults to 'UNK') - resids (defaults to '1') Note ---- The precision is hard coded to three decimal places .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based .. versionchanged:: 0.13.0 Now strictly writes positions with 3dp precision. and velocities with 4dp. Removed the `convert_dimensions_to_unitcell` method, use `Timestep.triclinic_dimensions` instead. Now now writes velocities where possible. .. versionchanged:: 0.18.0 Added `reindex` keyword argument to allow original atom ids to be kept. """ format = 'GRO' units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'} gro_coor_limits = {'min': -999.9995, 'max': 9999.9995} #: format strings for the GRO file (all include newline); precision #: of 3 decimal places is hard-coded here. fmt = { 'n_atoms': "{0:5d}\n", # number of atoms # coordinates output format, see 'xyz': "{resid:>5d}{resname:<5.5s}{name:>5.5s}{index:>5d}{pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}\n", # unitcell 'box_orthorhombic': "{box[0]:10.5f} {box[1]:9.5f} {box[2]:9.5f}\n", 'box_triclinic': "{box[0]:10.5f} {box[4]:9.5f} {box[8]:9.5f} {box[1]:9.5f} {box[2]:9.5f} {box[3]:9.5f} {box[5]:9.5f} {box[6]:9.5f} {box[7]:9.5f}\n" } fmt['xyz_v'] = fmt['xyz'][:-1] + "{vel[0]:8.4f}{vel[1]:8.4f}{vel[2]:8.4f}\n" def __init__(self, filename, convert_units=True, n_atoms=None, **kwargs): """Set up a GROWriter with a precision of 3 decimal places. Parameters ----------- filename : str output filename n_atoms : int (optional) number of atoms convert_units : bool (optional) units are converted to the MDAnalysis base format; [``True``] reindex : bool (optional) By default, all the atoms were reindexed to have a atom id starting from 1. [``True``] However, this behaviour can be turned off by specifying `reindex` ``=False``. Note ---- To use the reindex keyword, user can follow the two examples given below.:: u = mda.Universe() Usage 1:: u.atoms.write('out.gro', reindex=False) Usage 2:: with mda.Writer('out.gro', reindex=False) as w: w.write(u.atoms) """ self.filename = util.filename(filename, ext='gro', keep=True) self.n_atoms = n_atoms self.reindex = kwargs.pop('reindex', True) self.convert_units = convert_units # convert length and time to base units
[docs] def write(self, obj): """Write selection at current trajectory frame to file. Parameters ----------- obj : AtomGroup or Universe Note ---- The GRO format only allows 5 digits for *resid* and *atom number*. If these numbers become larger than 99,999 then this routine will chop off the leading digits. .. versionchanged:: 0.7.6 *resName* and *atomName* are truncated to a maximum of 5 characters .. versionchanged:: 0.16.0 `frame` kwarg has been removed .. deprecated:: 1.0.0 Deprecated calling with Timestep, use AtomGroup or Universe. To be removed in version 2.0. """ # write() method that complies with the Trajectory API try: # make sure to use atoms (Issue 46) ag_or_ts = obj.atoms # can write from selection == Universe (Issue 49) except AttributeError: if isinstance(obj, base.Timestep): warnings.warn( 'Passing a Timestep to write is deprecated, ' 'and will be removed in 2.0; ' 'use either an AtomGroup or Universe', DeprecationWarning) ag_or_ts = obj.copy() else: raise_from(TypeError("No Timestep found in obj argument"), None) try: velocities = ag_or_ts.velocities except NoDataError: has_velocities = False else: has_velocities = True # Check for topology information missing_topology = [] try: names = ag_or_ts.names except (AttributeError, NoDataError): names = itertools.cycle(('X',)) missing_topology.append('names') try: resnames = ag_or_ts.resnames except (AttributeError, NoDataError): resnames = itertools.cycle(('UNK',)) missing_topology.append('resnames') try: resids = ag_or_ts.resids except (AttributeError, NoDataError): resids = itertools.cycle((1,)) missing_topology.append('resids') if not self.reindex: try: atom_indices = ag_or_ts.ids except (AttributeError, NoDataError): atom_indices = range(1, ag_or_ts.n_atoms+1) missing_topology.append('ids') else: atom_indices = range(1, ag_or_ts.n_atoms + 1) if missing_topology: warnings.warn( "Supplied AtomGroup was missing the following attributes: " "{miss}. These will be written with default values. " "Alternatively these can be supplied as keyword arguments." "".format(miss=', '.join(missing_topology))) positions = ag_or_ts.positions if self.convert_units: # Convert back to nm from Angstroms, # Not inplace because AtomGroup is not a copy positions = self.convert_pos_to_native(positions, inplace=False) if has_velocities: velocities = self.convert_velocities_to_native(velocities, inplace=False) # check if any coordinates are illegal # (checks the coordinates in native nm!) if not self.has_valid_coordinates(self.gro_coor_limits, positions): raise ValueError("GRO files must have coordinate values between " "{0:.3f} and {1:.3f} nm: No file was written." "".format(self.gro_coor_limits["min"], self.gro_coor_limits["max"])) with util.openany(self.filename, 'wt') as output_gro: # Header output_gro.write('Written by MDAnalysis\n') output_gro.write(self.fmt['n_atoms'].format(ag_or_ts.n_atoms)) # Atom descriptions and coords # Dont use enumerate here, # all attributes could be infinite cycles! for atom_index, resid, resname, name in zip( range(ag_or_ts.n_atoms), resids, resnames, names): truncated_atom_index = util.ltruncate_int(atom_indices[atom_index], 5) truncated_resid = util.ltruncate_int(resid, 5) if has_velocities: output_gro.write(self.fmt['xyz_v'].format( resid=truncated_resid, resname=resname, index=truncated_atom_index, name=name, pos=positions[atom_index], vel=velocities[atom_index], )) else: output_gro.write(self.fmt['xyz'].format( resid=truncated_resid, resname=resname, index=truncated_atom_index, name=name, pos=positions[atom_index] )) # Footer: box dimensions if np.allclose(ag_or_ts.dimensions[3:], [90., 90., 90.]): box = self.convert_pos_to_native( ag_or_ts.dimensions[:3], inplace=False) # orthorhombic cell, only lengths along axes needed in gro output_gro.write(self.fmt['box_orthorhombic'].format( box=box) ) else: try: # for AtomGroup/Universe tri_dims = obj.universe.coord.triclinic_dimensions except AttributeError: # for Timestep tri_dims = obj.triclinic_dimensions # full output box = self.convert_pos_to_native(tri_dims.flatten(), inplace=False) output_gro.write(self.fmt['box_triclinic'].format(box=box))