# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
AMBER PRMTOP topology parser
============================
Reads an AMBER top file to build the system.
Amber keywords are turned into the following attributes:
+----------------------------+----------------------+
| AMBER flag | MDAnalysis attribute |
+============================+======================+
| ATOM_NAME | names |
+----------------------------+----------------------+
| CHARGE | charges |
+----------------------------+----------------------+
| ATOMIC_NUMBER | elements |
+----------------------------+----------------------+
| MASS | masses |
+----------------------------+----------------------+
| BONDS_INC_HYDROGEN | bonds |
| BONDS_WITHOUT_HYDROGEN | |
+----------------------------+----------------------+
| ANGLES_INC_HYDROGEN | angles |
| ANGLES_WITHOUT_HYDROGEN | |
+----------------------------+----------------------+
| DIHEDRALS_INC_HYDROGEN | dihedrals / improper |
| DIHEDRALS_WITHOUT_HYDROGEN | |
+----------------------------+----------------------+
| ATOM_TYPE_INDEX | type_indices |
+----------------------------+----------------------+
| AMBER_ATOM_TYPE | types |
+----------------------------+----------------------+
| RESIDUE_LABEL | resnames |
+----------------------------+----------------------+
| RESIDUE_POINTER | residues |
+----------------------------+----------------------+
TODO:
Add support for Chamber-style topologies
More stringent tests
.. Note::
The Amber charge is converted to electron charges as used in
MDAnalysis and other packages. To get back Amber charges, multiply
by 18.2223.
Chamber-style Amber topologies (i.e. topologies generated via parmed
conversion of a CHARMM topology to an AMBER one) are not currently
supported. Support will likely be added in future MDAnalysis releases.
As of version 2.0.0, elements are no longer guessed if ATOMIC_NUMBER records
are missing. In those scenarios, if elements are necessary, users will have
to invoke the element guessers after parsing the topology file. Please see
:mod:`MDAnalysis.guessers` for more details.
.. _`PARM parameter/topology file specification`:
https://ambermd.org/FileFormats.php#topo.cntrl
Classes
-------
.. autoclass:: TOPParser
:members:
:inherited-members:
"""
import numpy as np
import itertools
from ..guesser.tables import Z2SYMB
from ..lib.util import openany, FORTRANReader
from .base import TopologyReaderBase, change_squash
from ..core.topology import Topology
from ..core.topologyattrs import (
Atomnames,
Atomtypes,
Atomids,
Charges,
Elements,
Masses,
Resnames,
Resids,
Resnums,
Segids,
ChainIDs,
AtomAttr,
Bonds,
Angles,
Dihedrals,
Impropers,
)
import warnings
import logging
logger = logging.getLogger("MDAnalysis.topology.TOPParser")
class TypeIndices(AtomAttr):
"""Numerical type of each Atom"""
attrname = "type_indices"
singular = "type_index"
level = "atom"
[docs]
class TOPParser(TopologyReaderBase):
"""Reads topology information from an AMBER top file.
Reads the following Attributes if in topology:
- Atomnames
- Charges
- Masses
- Elements
- Atomtypes
- Resnames
- Type_indices
- Bonds
- Angles
- Dihedrals (inc. impropers)
- ChainIDs (from %RESIDUE_CHAINID)
- Segids (from %RESIDUE_CHAINID)
The format is defined in `PARM parameter/topology file
specification`_. The reader tries to detect if it is a newer
(AMBER 12?) file format by looking for the flag "ATOMIC_NUMBER".
.. _`PARM parameter/topology file specification`:
https://ambermd.org/FileFormats.php#topo.cntrl
Additionally, the RESIDUE_CHAINID non-standard flag is supported. This
can be added with the `addPDB`_ command from parmed:
.. _`addPDB`: https://parmed.github.io/ParmEd/html/parmed.html#addpdb
Notes
-----
Elements are obtained from the atomic numbers (if present). If a given
input atomic number does not belong to an element (usually either -1 or 0),
the element will be assigned an empty record.
.. versionchanged:: 0.7.6
parses both amber10 and amber12 formats
.. versionchanged:: 0.19.0
parses bonds, angles, dihedrals, and impropers
.. versionchanged:: 1.0.0
warns users that chamber-style topologies are not currently supported
.. versionchanged:: 2.0.0
no longer guesses elements if missing
.. versionchanged:: 2.7.0
gets Segments and chainIDs from flag RESIDUE_CHAINID, when present
"""
format = ["TOP", "PRMTOP", "PARM7"]
[docs]
def parse(self, **kwargs):
"""Parse Amber PRMTOP topology file *filename*.
Returns
-------
A MDAnalysis Topology object
"""
# Sections that we grab as we parse the file
sections = {
"ATOM_NAME": (1, 20, self.parse_names, "name", 0),
"CHARGE": (1, 5, self.parse_charges, "charge", 0),
"ATOMIC_NUMBER": (1, 10, self.parse_elements, "elements", 0),
"MASS": (1, 5, self.parse_masses, "mass", 0),
"ATOM_TYPE_INDEX": (
1,
10,
self.parse_type_indices,
"type_indices",
0,
),
"AMBER_ATOM_TYPE": (1, 20, self.parse_types, "types", 0),
"RESIDUE_LABEL": (1, 20, self.parse_resnames, "resname", 11),
"RESIDUE_POINTER": (1, 10, self.parse_residx, "respoint", 11),
"BONDS_INC_HYDROGEN": (3, 10, self.parse_bonded, "bondh", 2),
"BONDS_WITHOUT_HYDROGEN": (3, 10, self.parse_bonded, "bonda", 3),
"ANGLES_INC_HYDROGEN": (4, 10, self.parse_bonded, "angh", 4),
"ANGLES_WITHOUT_HYDROGEN": (4, 10, self.parse_bonded, "anga", 5),
"DIHEDRALS_INC_HYDROGEN": (5, 10, self.parse_bonded, "dihh", 6),
"DIHEDRALS_WITHOUT_HYDROGEN": (
5,
10,
self.parse_bonded,
"diha",
7,
),
"RESIDUE_CHAINID": (1, 20, self.parse_chainids, "segids", 11),
}
attrs = {} # empty dict for attrs that we'll fill
# Open and check top validity
# Reading header info POINTERS
with openany(self.filename) as self.topfile:
header = next(self.topfile)
if not header.startswith("%VE"):
raise ValueError(
"{0} is not a valid TOP file. %VE Missing in header"
"".format(self.filename)
)
title = next(self.topfile).split()
if not (title[1] == "TITLE"):
# Raise a separate warning if Chamber-style TOP is detected
if title[1] == "CTITLE":
emsg = (
"{0} is detected as a Chamber-style TOP file. "
"At this time MDAnalysis does not support such "
"topologies".format(self.filename)
)
else:
emsg = (
"{0} is not a valid TOP file. "
"'TITLE' missing in header".format(self.filename)
)
raise ValueError(emsg)
while not header.startswith("%FLAG POINTERS"):
header = next(self.topfile)
next(self.topfile)
topremarks = [next(self.topfile).strip() for i in range(4)]
sys_info = [int(k) for i in topremarks for k in i.split()]
header = next(self.topfile)
# grab the next section title
next_section = header.split("%FLAG")[1].strip()
while next_section is not None:
try:
(num_per_record, per_line, func, name, sect_num) = (
sections[next_section]
)
except KeyError:
def next_getter():
return self.skipper()
else:
num = sys_info[sect_num] * num_per_record
numlines = num // per_line
if num % per_line != 0:
numlines += 1
attrs[name] = func(num_per_record, numlines)
def next_getter():
return next(self.topfile)
try:
line = next_getter()
# Capture case where section is empty w/ 1 empty line
if numlines == 0 and not line.strip():
line = next_getter()
except StopIteration:
next_section = None
else:
try:
next_section = line.split("%FLAG")[1].strip()
except IndexError:
errmsg = (
f"%FLAG section not found, formatting error "
f"for PARM7 file {self.filename} "
)
raise IndexError(errmsg) from None
# strip out a few values to play with them
n_atoms = len(attrs["name"])
resptrs = attrs.pop("respoint")
resptrs.append(n_atoms)
residx = np.zeros(n_atoms, dtype=np.int32)
for i, (x, y) in enumerate(zip(resptrs[:-1], resptrs[1:])):
residx[x:y] = i
n_res = len(attrs["resname"])
# Deal with recreating bonds and angle records here
attrs["bonds"] = Bonds(
[
i
for i in itertools.chain(
attrs.pop("bonda"), attrs.pop("bondh")
)
]
)
attrs["angles"] = Angles(
[i for i in itertools.chain(attrs.pop("anga"), attrs.pop("angh"))]
)
attrs["dihedrals"], attrs["impropers"] = self.parse_dihedrals(
attrs.pop("diha"), attrs.pop("dihh")
)
# Warn user if elements not in topology
if "elements" not in attrs:
msg = (
"ATOMIC_NUMBER record not found, elements attribute will "
"not be populated. If needed these can be guessed using "
"universe.guess_TopologyAttrs(to_guess=['elements'])."
)
logger.warning(msg)
warnings.warn(msg)
elif np.any(attrs["elements"].values == ""):
# only send out one warning that some elements are unknown
msg = (
"Unknown ATOMIC_NUMBER value found for some atoms, these "
"have been given an empty element record. If needed these "
"can be guessed using "
"universe.guess_TopologyAttrs(to_guess=['elements'])."
)
logger.warning(msg)
warnings.warn(msg)
# atom ids are mandatory
attrs["atomids"] = Atomids(np.arange(n_atoms) + 1)
attrs["resids"] = Resids(np.arange(n_res) + 1)
attrs["resnums"] = Resnums(np.arange(n_res) + 1)
# Amber's 'RESIDUE_CHAINID' is a by-residue attribute, turn it into
# a by-atom attribute when present. See PR #4007.
if "segids" in attrs and len(attrs["segids"]) == n_res:
segidx, (segids,) = change_squash(
(attrs["segids"],), (attrs["segids"],)
)
chainids = [attrs["segids"][r] for r in residx]
attrs["segids"] = Segids(segids)
attrs["ChainIDs"] = ChainIDs(chainids)
n_segs = len(segids)
else:
if "segids" in attrs:
msg = (
f"Number of residues ({n_res}) does not match number of "
f"%RESIDUE_CHAINID ({len(attrs['segids'])}). Skipping section."
)
logger.warning(msg)
warnings.warn(msg)
attrs["segids"] = Segids(np.array(["SYSTEM"], dtype=object))
segidx = None
n_segs = 1
top = Topology(
n_atoms,
n_res,
n_segs,
attrs=list(attrs.values()),
atom_resindex=residx,
residue_segindex=segidx,
)
return top
[docs]
def skipper(self):
"""TOPParser :class: helper function, skips lines of input parm7 file
until we find the next %FLAG entry and return that
Returns
-------
line : string
String containing the current line of the parm7 file
"""
line = next(self.topfile)
while not line.startswith("%FLAG"):
line = next(self.topfile)
return line
[docs]
def parse_names(self, num_per_record, numlines):
"""Extracts atoms names from parm7 file
Parameters
----------
num_per_record : int
The number of entries for each record in the section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr : :class:`Atomnames`
A :class:`Atomnames` instance containing the names of each atom as
defined in the parm7 file
"""
vals = self.parsesection_mapper(numlines, lambda x: x)
attr = Atomnames(np.array(vals, dtype=object))
return attr
[docs]
def parse_resnames(self, num_per_record, numlines):
"""Extracts the names of each residue
Parameters
----------
num_per_record : int
The number of entries for each recrod in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr : :class:`Resnames`
A :class:`Resnames` instance containing the names of each residue
as defined in the parm7 file
"""
vals = self.parsesection_mapper(numlines, lambda x: x)
attr = Resnames(np.array(vals, dtype=object))
return attr
[docs]
def parse_charges(self, num_per_record, numlines):
"""Extracts the partial charges for each atom
Parameters
----------
num_per_record : int
The number of entries for each record in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr : :class:`Charges`
A :class:`Charges` instance containing the partial charges of each
atom as defined in the parm7 file
"""
vals = self.parsesection_mapper(numlines, lambda x: float(x))
charges = np.array(vals, dtype=np.float32)
charges /= 18.2223 # to electron charge units
attr = Charges(charges)
return attr
[docs]
def parse_masses(self, num_per_record, numlines):
"""Extracts the mass of each atom
Parameters
----------
num_per_record : int
The number of entries for each record in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr : :class:`Masses`
A :class:`Masses` instance containing the mass of each atom as
defined in the parm7 file
"""
vals = self.parsesection_mapper(numlines, lambda x: float(x))
attr = Masses(vals)
return attr
[docs]
def parse_elements(self, num_per_record, numlines):
"""Extracts the atomic numbers of each atom and converts to element type
Parameters
----------
num_per_record : int
The number of entries for each record in section(unused input)
numlines : int
The number of lines to be pasred in current section
Returns
-------
attr : :class:`Elements`
A :class:`Elements` instance containing the element of each atom
as defined in the parm7 file
Note
----
If the record contains unknown atomic numbers (e.g. <= 0), these will
be treated as unknown elements and assigned an empty string value. See
issues #2306 and #2651 for more details.
.. versionchanged:: 2.0.0
Unrecognised elements will now return a empty string. The parser
will no longer attempt to guess the element by default.
"""
vals = self.parsesection_mapper(
numlines, lambda x: Z2SYMB[int(x)] if int(x) > 0 else ""
)
attr = Elements(np.array(vals, dtype=object))
return attr
[docs]
def parse_types(self, num_per_record, numlines):
"""Extracts the force field atom types of each atom
Parameters
----------
num_per_record : int
The number of entries for each record in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr : :class:`Atomtypes`
A :class:`Atomtypes` instance containing the atom types for each
atom as defined in the parm7 file
"""
vals = self.parsesection_mapper(numlines, lambda x: x)
attr = Atomtypes(np.array(vals, dtype=object))
return attr
[docs]
def parse_type_indices(self, num_per_record, numlines):
"""Extracts the index of atom types of the each atom involved in Lennard
Jones (6-12) interactions.
Parameters
----------
num_per_record : int
The number of entries for each record in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr :class:`TypeIndices`
A :class:`TypeIndices` instance containing the LJ 6-12 atom type
index for each atom
"""
vals = self.parsesection_mapper(numlines, lambda x: int(x))
attr = TypeIndices(np.array(vals, dtype=np.int32))
return attr
[docs]
def parse_residx(self, num_per_record, numlines):
"""Extracts the residue pointers for each atom
Parameters
----------
num_per_record : int
The number of entries for each record in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
vals : list of int
A list of zero-formatted residue pointers for each atom
"""
vals = self.parsesection_mapper(numlines, lambda x: int(x) - 1)
return vals
[docs]
def parse_chunks(self, data, chunksize):
"""Helper function to parse AMBER PRMTOP bonds/angles.
Parameters
----------
data : list of int
Input list of the parm7 bond/angle section, zero-indexed
num_per_record : int
The number of entries for each record in the input list
Returns
-------
vals : list of int tuples
A list of tuples containing the atoms involved in a given bonded
interaction
Note
----
In the parm7 format this information is structured in the following
format: [ atoms 1:n, internal index ]
Where 1:n represent the ids of the n atoms involved in the bond/angle
and the internal index links to a given set of FF parameters.
Therefore, to extract the required information, we split out the list
into chunks of size num_per_record, and only extract the atom ids.
"""
vals = [
tuple(data[x : x + chunksize - 1])
for x in range(0, len(data), chunksize)
]
return vals
[docs]
def parse_bonded(self, num_per_record, numlines):
"""Extracts bond information from PARM7 format files
Parameters
----------
num_per_record : int
The number of entries for each record in section
numlines : int
The number of lines to be parsed for this section
Note
----
For the bond/angle sections of parm7 files, the atom numbers are set to
coordinate array index values. As detailed in `the specification`_,
to recover the actual atom number, one
should divide the values by 3 and add 1. Here, since we want to satisfy
zero-indexing, we only divide by 3.
.. _`the specification`: https://ambermd.org/FileFormats.php#topo.cntrl
"""
fields = self.parsesection_mapper(numlines, lambda x: int(x) // 3)
section = self.parse_chunks(fields, num_per_record)
return section
[docs]
def parsesection_mapper(self, numlines, mapper):
"""Parses FORTRAN formatted section, and returns a list of all entries
in each line
Parameters
----------
numlines : int
The number of lines to be parsed in this section
mapper : lambda operator
Operator to format entries in current section
Returns
-------
section : list
A list of all entries in a given parm7 section
"""
section = []
def get_fmt(file):
"""Skips '%COMMENT' lines until it gets the FORMAT specification
for the section."""
line = next(file)
if line[:7] == "%FORMAT":
return line[8:].split(")")[0]
elif line[:8] == "%COMMENT":
return get_fmt(file)
else:
raise ValueError(
"Invalid header line. Does not begin with either %FLAG, %FORMAT "
f"nor %COMMENT:\n{line}"
)
# There may be %COMMENT lines between %FLAG and %FORMAT statements. Skip them.
fmt = get_fmt(self.topfile)
x = FORTRANReader(fmt)
for i in range(numlines):
l = next(self.topfile)
for j in range(len(x.entries)):
val = l[x.entries[j].start : x.entries[j].stop].strip()
if val:
section.append(mapper(val))
return section
[docs]
def parse_dihedrals(self, diha, dihh):
"""Combines hydrogen and non-hydrogen containing AMBER dihedral lists
and extracts sublists for conventional dihedrals and improper angles
Parameters
----------
diha : list of tuples
The atom ids of dihedrals not involving hydrogens
dihh : list of tuples
The atom ids of dihedrals involving hydrogens
Returns
-------
dihedrals : :class:`Dihedrals`
A :class:`Dihedrals` instance containing a list of all unique
dihedrals as defined by the parm7 file
impropers : :class:`Impropers`
A :class:`Impropers` instance containing a list of all unique
improper dihedrals as defined by the parm7 file
Note
----
As detailed in `the specification`_, the dihedral sections
of parm7 files contain information about both conventional dihedrals
and impropers. The following must be accounted for:
1) If the fourth atom in a dihedral entry is given a negative value,
this indicates that it is an improper.
2) If the third atom in a dihedral entry is given a negative value,
this indicates that it 1-4 NB interactions are ignored for this
dihedrals. This could be due to the dihedral within a ring, or if it is
part of a multi-term dihedral definition or if it is an improper.
.. _`the specification`: https://ambermd.org/FileFormats.php#topo.cntrl
"""
improp = []
dihed = []
for i in itertools.chain(diha, dihh):
if i[3] < 0:
improp.append(i[:2] + (abs(i[2]),) + (abs(i[3]),))
elif i[2] < 0:
vals = i[:2] + (abs(i[2]),) + i[3:]
dihed.append(vals)
else:
dihed.append(i)
dihed = sorted(set(dihed))
dihedrals = Dihedrals(dihed)
impropers = Impropers(improp)
return dihedrals, impropers
[docs]
def parse_chainids(self, num_per_record: int, numlines: int):
"""Extracts the chainID of each residue
Parameters
----------
num_per_record : int
The number of entries for each record in section (unused input)
numlines : int
The number of lines to be parsed in current section
Returns
-------
attr : numpy array
A numpy array containing the chainID of each residue as defined in
the parm7 file
"""
vals = self.parsesection_mapper(numlines, lambda x: x)
attr = np.array(vals)
return attr