Source code for MDAnalysis.topology.MOL2Parser
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
MOL2 file format --- :mod:`MDAnalysis.coordinates.MOL2`
========================================================
Classes to read Tripos_ molecule structure format (MOL2_) coordinate
and topology files. Used by the DOCK_ docking code.
.. _MOL2: http://www.tripos.com/data/support/mol2.pdf
.. _Tripos: http://www.tripos.com/
.. _DOCK: http://dock.compbio.ucsf.edu/
Classes
-------
.. autoclass:: MOL2Parser
:members:
:inherited-members:
"""
from __future__ import absolute_import
import os
import numpy as np
from . import guessers
from ..lib.util import openany
from .base import TopologyReaderBase, squash_by
from ..core.topologyattrs import (
Atomids,
Atomnames,
Atomtypes,
Bonds,
Charges,
Masses,
Resids,
Resnums,
Resnames,
Segids,
)
from ..core.topology import Topology
[docs]class MOL2Parser(TopologyReaderBase):
"""Read topology from a Tripos_ MOL2_ file.
Create the following Attributes:
- Atomids
- Atomnames
- Atomtypes
- Charges
- Resids,
- Resnames
- Bonds
Guesses the following:
- masses
.. versionchanged:: 0.9
Now subclasses TopologyReaderBase
.. versionchanged:: 0.20.0
Allows for comments at the top of the file
Ignores status bit strings
"""
format = 'MOL2'
[docs] def parse(self, **kwargs):
"""Parse MOL2 file *filename* and return the dict `structure`.
Returns
-------
A MDAnalysis Topology object
"""
blocks = []
with openany(self.filename) as f:
for i, line in enumerate(f):
# found new molecules
if "@<TRIPOS>MOLECULE" in line:
if len(blocks):
break
blocks.append({"start_line": i, "lines": []})
if len(blocks):
blocks[-1]["lines"].append(line)
if not len(blocks):
raise ValueError("The mol2 file '{0}' needs to have at least one"
" @<TRIPOS>MOLECULE block".format(self.filename))
block = blocks[0]
sections = {}
cursor = None
for line in block["lines"]:
if "@<TRIPOS>" in line:
cursor = line.split("@<TRIPOS>")[1].strip().lower()
sections[cursor] = []
continue
elif line.startswith("#") or line == "\n":
continue
sections[cursor].append(line)
atom_lines, bond_lines = sections["atom"], sections["bond"]
if not len(atom_lines):
raise ValueError("The mol2 block ({0}:{1}) has no atoms".format(
os.path.basename(self.filename), block["start_line"]))
if not len(bond_lines):
raise ValueError("The mol2 block ({0}:{1}) has no bonds".format(
os.path.basename(self.filename), block["start_line"]))
ids = []
names = []
types = []
resids = []
resnames = []
charges = []
for a in atom_lines:
aid, name, x, y, z, atom_type, resid, resname, charge = a.split()[:9]
ids.append(aid)
names.append(name)
types.append(atom_type)
resids.append(resid)
resnames.append(resname)
charges.append(charge)
n_atoms = len(ids)
masses = guessers.guess_masses(types)
attrs = []
attrs.append(Atomids(np.array(ids, dtype=np.int32)))
attrs.append(Atomnames(np.array(names, dtype=object)))
attrs.append(Atomtypes(np.array(types, dtype=object)))
attrs.append(Charges(np.array(charges, dtype=np.float32)))
attrs.append(Masses(masses, guessed=True))
resids = np.array(resids, dtype=np.int32)
resnames = np.array(resnames, dtype=object)
residx, resids, (resnames,) = squash_by(
resids, resnames)
n_residues = len(resids)
attrs.append(Resids(resids))
attrs.append(Resnums(resids.copy()))
attrs.append(Resnames(resnames))
attrs.append(Segids(np.array(['SYSTEM'], dtype=object)))
bonds = []
bondorder = []
for b in bond_lines:
# bond_type can be: 1, 2, am, ar
bid, a0, a1, bond_type = b.split()[:4]
a0, a1 = int(a0) - 1, int(a1) - 1
bond = tuple(sorted([a0, a1]))
bondorder.append(bond_type)
bonds.append(bond)
attrs.append(Bonds(bonds, order=bondorder))
top = Topology(n_atoms, n_residues, 1,
attrs=attrs,
atom_resindex=residx)
return top