Source code for MDAnalysis.coordinates.chemfiles

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Reading trajectory with Chemfiles --- :mod:`MDAnalysis.coordinates.chemfiles`
=============================================================================

Classes to read and write files using the `chemfiles`_ library. This library
provides C++ implementation of multiple formats readers and writers, the full
list if available `here <formats>`_.

.. _chemfiles: https://chemfiles.org
.. _formats: http://chemfiles.org/chemfiles/latest/formats.html

Classes
-------

.. autoclass:: ChemfilesReader

.. autoclass:: ChemfilesWriter

"""
from __future__ import absolute_import, division
from __future__ import print_function, unicode_literals

import numpy as np
from six import string_types
from distutils.version import LooseVersion
import warnings

from . import base, core

try:
    import chemfiles
except ImportError:
    HAS_CHEMFILES = False
else:
    HAS_CHEMFILES = True


MIN_CHEMFILES_VERSION = LooseVersion("0.9")
MAX_CHEMFILES_VERSION = LooseVersion("0.10")


def check_chemfiles_version():
    """Check an appropriate Chemfiles is available

    Returns True if a usable chemfiles version is available

    .. versionadded:: 1.0.0
    """
    if not HAS_CHEMFILES:
        warnings.warn(
            "No Chemfiles package found.  "
            "Try installing with 'pip install chemfiles'"
        )
        return False
    version = LooseVersion(chemfiles.__version__)
    wrong = version < MIN_CHEMFILES_VERSION or version >= MAX_CHEMFILES_VERSION
    if wrong:
        warnings.warn(
            "unsupported Chemfiles version {}, we need a version >{} and <{}"
            .format(version, MIN_CHEMFILES_VERSION, MAX_CHEMFILES_VERSION)
        )
    return not wrong


[docs]class ChemfilesReader(base.ReaderBase): """Coordinate reader using chemfiles. The file format to used is guessed based on the file extension. If no matching format is found, a ``ChemfilesError`` is raised. It is also possible to manually specify the format to use for a given file. .. versionadded:: 1.0.0 """ format = 'chemfiles' units = {'time': 'fs', 'length': 'Angstrom'} def __init__(self, filename, chemfiles_format="", **kwargs): """ Parameters ---------- filename : chemfiles.Trajectory or str the chemfiles object to read or filename to read chemfiles_format : str (optional) if *filename* was a string, use the given format name instead of guessing from the extension. The `list of supported formats <formats>`_ and the associated names is available in chemfiles documentation. **kwargs : dict General reader arguments. .. _formats: http://chemfiles.org/chemfiles/latest/formats.html """ if not check_chemfiles_version(): raise RuntimeError("Please install Chemfiles > {}" "".format(MIN_CHEMFILES_VERSION)) super(ChemfilesReader, self).__init__(filename, **kwargs) self._format = chemfiles_format self._cached_n_atoms = None self._open() self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) self.next() @staticmethod def _format_hint(thing): """Can this Reader read *thing*""" # nb, filename strings can still get passed through if # format='CHEMFILES' is used return HAS_CHEMFILES and isinstance(thing, chemfiles.Trajectory) def _open(self): self._closed = False self._step = 0 self._frame = -1 if isinstance(self.filename, chemfiles.Trajectory): self._file = self.filename else: self._file = chemfiles.Trajectory(self.filename, 'r', self._format) def close(self): """close reader""" if not self._closed: self._file.close() self._closed = True @property def n_frames(self): """number of frames in trajectory""" return self._file.nsteps @property def n_atoms(self): """number of atoms in the first frame of the trajectory""" if self._cached_n_atoms is None: self._cached_n_atoms = len(self._file.read_step(0).atoms) return self._cached_n_atoms def _reopen(self): """reopen trajectory""" self.close() self._open() def _read_frame(self, i): """read frame i""" self._step = i return self._read_next_timestep() def _read_next_timestep(self, ts=None): """copy next frame into timestep""" if self._step >= self.n_frames: raise IOError('trying to go over trajectory limit') if ts is None: ts = self.ts self.ts = ts frame = self._file.read_step(self._step) self._frame_to_ts(frame, ts) ts.frame = frame.step self._step += 1 return ts def _frame_to_ts(self, frame, ts): """convert a chemfiles frame to a :class:`TimeStep`""" if len(frame.atoms) != self.n_atoms: raise IOError( "The number of atom changed in the trajectory. " "This is not supported by MDAnalysis." ) ts.dimensions[:] = frame.cell.lengths + frame.cell.angles ts.positions[:] = frame.positions[:] if frame.has_velocities(): ts.has_velocities = True ts.velocities[:] = frame.velocities[:] def Writer(self, filename, n_atoms=None, **kwargs): """Return writer for trajectory format""" if n_atoms is None: n_atoms = self.n_atoms return ChemfilesWriter(filename, n_atoms, **kwargs)
[docs]class ChemfilesWriter(base.WriterBase): """ Coordinate writer using chemfiles. The file format to used is guessed based on the file extension. If no matching format is found, a ``ChemfilesError`` is raised. It is also possible to manually specify the format to use for a given file. Chemfiles support writting to files with varying number of atoms if the underlying format support it. This is the case of most of text-based formats. .. versionadded:: 1.0.0 """ format = 'chemfiles' multiframe = True # chemfiles mostly[1] uses these units for the in-memory representation, # and convert into the file format units when writing. # # [1] mostly since some format don't have a specified unit # (XYZ for example), so then chemfiles just assume they are in A and fs. units = {'time': 'fs', 'length': 'Angstrom'} def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=None, **kwargs): """ Parameters ---------- filename: str filename of trajectory file. n_atoms: int number of atoms in the trajectory to write. This value is not used and can vary during trajectory, if the underlying format support it mode : str (optional) file opening mode: use 'a' to append to an existing file or 'w' to create a new file chemfiles_format : str (optional) use the given format name instead of guessing from the extension. The `list of supported formats <formats>`_ and the associated names is available in chemfiles documentation. topology : Universe or AtomGroup (optional) use the topology from this :class:`~MDAnalysis.core.groups.AtomGroup` or :class:`~MDAnalysis.core.universe.Universe` to write all the timesteps to the file **kwargs : dict General writer arguments. .. _formats: http://chemfiles.org/chemfiles/latest/formats.html """ if not check_chemfiles_version(): raise RuntimeError("Please install Chemfiles > {}" "".format(MIN_CHEMFILES_VERSION)) self.filename = filename self.n_atoms = n_atoms if mode != "a" and mode != "w": raise IOError("Expected 'a' or 'w' as mode in ChemfilesWriter") self._file = chemfiles.Trajectory(self.filename, mode, chemfiles_format) self._closed = False if topology is not None: if isinstance(topology, string_types): self._file.set_topology(topology) else: topology = self._topology_to_chemfiles(topology, n_atoms) self._file.set_topology(topology) def close(self): """Close the trajectory file and finalize the writing""" if hasattr(self, "_closed") and not self._closed: self._file.close() self._closed = True def _write_next_frame(self, obj): """Write information associated with ``obj`` at current frame into trajectory. Topology for the output is taken from the `obj` or default to the value of the `topology` keyword supplied to the :class:`ChemfilesWriter` constructor. If `obj` contains velocities, and the underlying format supports it, the velocities are writen to the file. Writing forces is unsupported at the moment. Parameters ---------- obj : AtomGroup or Universe The :class:`~MDAnalysis.core.groups.AtomGroup` or :class:`~MDAnalysis.core.universe.Universe` to write. """ # TODO 2.0: Remove timestep logic if hasattr(obj, "atoms"): if hasattr(obj, 'universe'): # For AtomGroup and children (Residue, ResidueGroup, Segment) ts_full = obj.universe.trajectory.ts if ts_full.n_atoms == obj.atoms.n_atoms: ts = ts_full else: # Only populate a time step with the selected atoms. ts = ts_full.copy_slice(atoms.indices) elif hasattr(obj, 'trajectory'): # For Universe only --- get everything ts = obj.trajectory.ts else: if isinstance(obj, base.Timestep): ts = obj topology = None else: raise TypeError("No Timestep found in obj argument") frame = self._timestep_to_chemfiles(ts) frame.topology = self._topology_to_chemfiles(obj, len(frame.atoms)) self._file.write(frame) def _timestep_to_chemfiles(self, ts): """ Convert a Timestep to a chemfiles Frame """ # TODO: CONVERTERS? frame = chemfiles.Frame() frame.resize(ts.n_atoms) if ts.has_positions: frame.positions[:] = ts.positions[:] if ts.has_velocities: frame.add_velocities() frame.velocities[:] = ts.velocities[:] frame.cell = chemfiles.UnitCell(*ts.dimensions) return frame def _topology_to_chemfiles(self, obj, n_atoms): """ Convert an AtomGroup or an Universe to a chemfiles Topology """ topology = chemfiles.Topology() if not hasattr(obj, "atoms"): # use an empty topology topology.resize(n_atoms) return topology # (1) add all atoms to the topology residues = {} for atom in obj.atoms: name = getattr(atom, 'name', "") type = getattr(atom, 'type', name) chemfiles_atom = chemfiles.Atom(name, type) if hasattr(atom, 'altLoc'): chemfiles_atom["altloc"] = str(atom.altLoc) if hasattr(atom, 'segid'): chemfiles_atom["segid"] = str(atom.segid) if hasattr(atom, 'segindex'): chemfiles_atom["segindex"] = int(atom.segindex) if hasattr(atom, 'resid'): resname = getattr(atom, 'resname', "") if atom.resid not in residues.keys(): residues[atom.resid] = chemfiles.Residue(resname, atom.resid) residue = residues[atom.resid] atom_idx = len(topology.atoms) residue.atoms.append(atom_idx) if hasattr(atom, "record_type"): # set corresponding chemfiles residue property if atom.record_type == "ATOM": residue["is_standard_pdb"] = True else: residue["is_standard_pdb"] = False topology.atoms.append(chemfiles_atom) # (2) add residues to the topology for residue in residues.values(): topology.residues.append(residue) # (3) add bonds to the topology for bond in getattr(obj, "bonds", []): topology.add_bond(bond.atoms[0].ix, bond.atoms[1].ix) return topology