Source code for MDAnalysis.coordinates.GMS

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
#

"""GAMESS trajectory reader --- :mod:`MDAnalysis.coordinates.GMS`
=================================================================

Resources: the GMS output format is a common output format for different
GAMESS distributions: US-GAMESS, Firefly (PC-GAMESS) and GAMESS-UK.

Current version was approbated with US-GAMESS & Firefly only.

There appears to be no rigid format definition so it is likely users
will need to tweak the :class:`GMSReader`.

.. autoclass:: GMSReader
   :members:

"""
import os
import errno
import re

from . import base
import MDAnalysis.lib.util as util
from MDAnalysis.lib.util import store_init_arguments


[docs]class GMSReader(base.ReaderBase):
    """Reads from an GAMESS output file

    :Data:
        ts
          Timestep object containing coordinates of current frame

    :Methods:
        ``len(out)``
          return number of frames in out
        ``for ts in out:``
          iterate through trajectory

    Note
    ----
    :class:`GMSReader` can read both uncompressed (``foo.out``) and
    compressed (``foo.out.bz2`` or ``foo.out.gz``) files;
    decompression is handled on the fly


    .. versionchanged:: 0.11.0
       Frames now 0-based instead of 1-based.
       Added `dt` and `time_offset` keywords (passed to :class:`Timestep`).

    """

    format = "GMS"

    # these are assumed!
    units = {'time': 'ps', 'length': 'Angstrom'}

    @store_init_arguments
    def __init__(self, outfilename, **kwargs):
        super(GMSReader, self).__init__(outfilename, **kwargs)

        # the filename has been parsed to be either b(g)zipped or not
        self.outfile = util.anyopen(self.filename)

        # note that, like for xtc and trr files, _n_atoms and _n_frames are used quasi-private variables
        # to prevent the properties being recalculated
        # this is because there is no indexing so the way it measures the number of frames is to read the whole file!
        self._n_atoms = None
        self._n_frames = None
        self._runtyp = None

        self.ts = self._Timestep(0) # need for properties initial calculations

        # update runtyp property
        self.runtyp
        if not self.runtyp in ['optimize', 'surface']:
            raise AttributeError('Wrong RUNTYP= '+self.runtyp)

        self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
        # update n_frames property
        self.n_frames

        # Read in the first timestep
        self._read_next_timestep()

    @property
    def runtyp(self):
        """RUNTYP property of the GAMESS run"""
        if self._runtyp is not None:   # return cached value
            return self._runtyp
        try:
            self._runtyp = self._determine_runtyp()
        except IOError:
            return 0
        else:
            return self._runtyp

    def _determine_runtyp(self):
        with util.openany(self.filename) as out:
            for line in out:
                m = re.match(r'^.*RUNTYP=([A-Z]+)\s+.*', line)
                if m is not None:
                    res = m.group(1).lower()
                    break

        return res

    @property
    def n_atoms(self):
        """number of atoms in a frame"""
        if self._n_atoms is not None:   # return cached value
            return self._n_atoms
        try:
            self._n_atoms = self._read_out_natoms()
        except IOError:
            return 0
        else:
            return self._n_atoms

    def _read_out_natoms(self):
        with util.openany(self.filename) as out:
            for line in out:
                m = re.match(r'\s*TOTAL NUMBER OF ATOMS\s*=\s*([0-9]+)\s*',line)
                if m is not None:
                    res = int(m.group(1))
                    break

        return res

    @property
    def n_frames(self):
        if self._n_frames is not None:   # return cached value
            return self._n_frames
        try:
            self._n_frames = self._read_out_n_frames()
        except IOError:
            return 0
        else:
            return self._n_frames

    def _read_out_n_frames(self):
        if self.runtyp == 'optimize':
            trigger = re.compile(b'^.NSERCH=.*')
        elif self.runtyp == 'surface':
            trigger = re.compile(b'^.COORD 1=.*')

        self._offsets = offsets = []
        with util.openany(self.filename, 'rb') as out:
            line = True
            while not line == b'':  # while not EOF
                line = out.readline()
                if re.match(trigger, line):
                    offsets.append(out.tell() - len(line))

        return len(offsets)

    def _read_frame(self, frame):
        self.outfile.seek(self._offsets[frame])
        self.ts.frame = frame - 1  # gets +1'd in _read_next
        return self._read_next_timestep()

    def _read_next_timestep(self, ts=None):
        # check that the timestep object exists
        if ts is None:
            ts = self.ts
        # check that the outfile object exists; if not reopen the trajectory
        if self.outfile is None:
            self.open_trajectory()
        x = []
        y = []
        z = []

        flag = 0
        counter = 0

        for line in self.outfile:
            if self.runtyp == 'optimize':
                if (flag == 0) and (re.match(r'^.NSERCH=.*', line) is not None):
                    flag = 1
                    continue
                if (flag == 1) and (re.match(r'^ COORDINATES OF ALL ATOMS ARE ',\
                    line) is not None):
                    flag = 2
                    continue
                if (flag == 2) and (re.match(r'^\s*-+\s*', line) is not None):
                    flag = 3
                    continue
                if flag == 3 and counter < self.n_atoms:
                    words = line.split()
                    x.append(float(words[2]))
                    y.append(float(words[3]))
                    z.append(float(words[4]))
                    counter += 1

            elif self.runtyp == 'surface':
                if (flag == 0) and (re.match(\
                        r'^.COORD 1=\s*([-]?[0-9]+\.[0-9]+).*', line) is not None):
                    flag = 1
                    continue
                if (flag == 1) and (re.match(\
                        r'^\s*HAS ENERGY VALUE\s*([-]?[0-9]+\.[0-9]+)\s*', line) is not None):
                    flag = 3
                    continue
                if flag == 3 and counter < self.n_atoms:
                    words = line.split()
                    x.append(float(words[1]))
                    y.append(float(words[2]))
                    z.append(float(words[3]))
                    counter += 1

            # stop when the cursor has reached the end of that block
            if counter == self._n_atoms:
                ts._x[:] = x # more efficient to do it this way to avoid re-creating the numpy arrays
                ts._y[:] = y
                ts._z[:] = z
                #print ts.frame
                ts.frame += 1
                return ts

        raise EOFError

    def _reopen(self):
        self.close()
        self.open_trajectory()

    def open_trajectory(self):
        if self.outfile is not None:
            raise IOError(errno.EALREADY, 'GMS file already opened', self.filename)
        if not os.path.exists(self.filename):
            # must check; otherwise might segmentation fault
            raise IOError(errno.ENOENT, 'GMS file not found', self.filename)

        self.outfile = util.anyopen(self.filename)

        # reset ts
        ts = self.ts
        ts.frame = -1
        return self.outfile

[docs]    def close(self):
        """Close out trajectory file if it was open."""
        if self.outfile is None:
            return
        self.outfile.close()
        self.outfile = None