Source code for MDAnalysis.topology.GROParser

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""
GRO topology parser
===================

Read a list of atoms from a GROMOS/Gromacs GRO coordinate file to
build a basic topology.

Atom types and masses are guessed.

See Also
--------
:mod:`MDAnalysis.coordinates.GRO`



Classes
-------

.. autoclass:: GROParser
   :members:
   :inherited-members:

"""
import numpy as np

from ..lib.util import openany
from ..core.topologyattrs import (
    Atomnames,
    Atomtypes,
    Atomids,
    Masses,
    Resids,
    Resnames,
    Resnums,
    Segids,
)
from ..core.topology import Topology
from .base import TopologyReaderBase, change_squash
from . import guessers


[docs]class GROParser(TopologyReaderBase):
    """Reads a Gromacs GRO file

    Reads the following attributes:
      - resids
      - resnames
      - atomids
      - atomnames

    Guesses the following attributes
      - atomtypes
      - masses
    """
    format = 'GRO'

[docs]    def parse(self, **kwargs):
        """Return the *Topology* object for this file"""
        # Gro has the following columns
        # resid, resname, name, index, (x,y,z)
        with openany(self.filename) as inf:
            next(inf)
            n_atoms = int(next(inf))

            # Allocate shizznizz
            resids = np.zeros(n_atoms, dtype=np.int32)
            resnames = np.zeros(n_atoms, dtype=object)
            names = np.zeros(n_atoms, dtype=object)
            indices = np.zeros(n_atoms, dtype=np.int32)

            for i, line in enumerate(inf):
                if i == n_atoms:
                    break
                try:
                    resids[i] = int(line[:5])
                    resnames[i] = line[5:10].strip()
                    names[i] = line[10:15].strip()
                    indices[i] = int(line[15:20])
                except (ValueError, TypeError):
                    errmsg = (
                    f"Couldn't read the following line of the .gro file:\n"
                    f"{line}")
                    raise IOError(errmsg) from None
        # Check all lines had names
        if not np.all(names):
            missing = np.where(names == '')
            raise IOError("Missing atom name on line: {0}"
                          "".format(missing[0][0] + 3))  # 2 header, 1 based

        # Fix wrapping of resids (if we ever saw a wrap)
        if np.any(resids == 0):
            # find places where resid hit zero again
            wraps = np.where(resids == 0)[0]
            # group these places together:
            # find indices of first 0 in each block of zeroes
            # 1) find large changes in index, (ie non sequential blocks)
            diff = np.diff(wraps) != 1
            # 2) make array of where 0-blocks start
            starts = np.hstack([wraps[0], wraps[1:][diff]])

            # remove 0 in starts, ie the first residue **can** be 0
            if starts[0] == 0:
                starts = starts[1:]

            # for each resid after a wrap, add 100k (5 digit wrap)
            for s in starts:
                resids[s:] += 100000

        # Guess types and masses
        atomtypes = guessers.guess_types(names)
        masses = guessers.guess_masses(atomtypes)

        residx, (new_resids, new_resnames) = change_squash(
                                (resids, resnames), (resids, resnames))

        # new_resids is len(residues)
        # so resindex 0 has resid new_resids[0]
        attrs = [
            Atomnames(names),
            Atomids(indices),
            Atomtypes(atomtypes, guessed=True),
            Resids(new_resids),
            Resnums(new_resids.copy()),
            Resnames(new_resnames),
            Masses(masses, guessed=True),
            Segids(np.array(['SYSTEM'], dtype=object))
        ]

        top = Topology(n_atoms=n_atoms, n_res=len(new_resids), n_seg=1,
                       attrs=attrs,
                       atom_resindex=residx,
                       residue_segindex=None)

        return top