Source code for MDAnalysis.coordinates.MOL2
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""MOL2 file format --- :mod:`MDAnalysis.coordinates.MOL2`
========================================================
Classes to work with Tripos_ molecule structure format (MOL2_) coordinate and
topology files. Used, for instance, by the DOCK_ docking code.
Example for working with mol2 files
-----------------------------------
To open a mol2, remove all hydrogens and save as a new file, use the following::
u = Universe("Molecule.mol2")
gr = u.select_atoms("not name H*")
print(len(u.atoms), len(gr))
gr.write("Molecule_noh.mol2")
.. _MOL2: http://www.tripos.com/data/support/mol2.pdf
.. _Tripos: http://www.tripos.com/
.. _DOCK: http://dock.compbio.ucsf.edu/
See Also
--------
rdkit: rdkit_ is an open source cheminformatics toolkit with more mol2
functionality
.. _rdkit: http://www.rdkit.org/docs/GettingStartedInPython.html
Classes
-------
.. autoclass:: MOL2Reader
:members:
.. autoclass:: MOL2Writer
:members:
MOL2 format notes
-----------------
* MOL2 Format Specification: (http://www.tripos.com/data/support/mol2.pdf)
* Example file (http://www.tripos.com/mol2/mol2_format3.html)::
# Name: benzene
# Creating user name: tom
# Creation time: Wed Dec 28 00:18:30 1988
# Modifying user name: tom
# Modification time: Wed Dec 28 00:18:30 1988
@<TRIPOS>MOLECULE
benzene
12 12 1 0 0
SMALL
NO_CHARGES
@<TRIPOS>ATOM
1 C1 1.207 2.091 0.000 C.ar 1 BENZENE 0.000
2 C2 2.414 1.394 0.000 C.ar 1 BENZENE 0.000
3 C3 2.414 0.000 0.000 C.ar 1 BENZENE 0.000
4 C4 1.207 -0.697 0.000 C.ar 1 BENZENE 0.000
5 C5 0.000 0.000 0.000 C.ar 1 BENZENE 0.000
6 C6 0.000 1.394 0.000 C.ar 1 BENZENE 0.000
7 H1 1.207 3.175 0.000 H 1 BENZENE 0.000
8 H2 3.353 1.936 0.000 H 1 BENZENE 0.000
9 H3 3.353 -0.542 0.000 H 1 BENZENE 0.000
10 H4 1.207 -1.781 0.000 H 1 BENZENE 0.000
11 H5 -0.939 -0.542 0.000 H 1 BENZENE 0.000
12 H6 -0.939 1.936 0.000 H 1 BENZENE 0.000
@<TRIPOS>BOND
1 1 2 ar
2 1 6 ar
3 2 3 ar
4 3 4 ar
5 4 5 ar
6 5 6 ar
7 1 7 1
8 2 8 1
9 3 9 1
10 4 10 1
11 5 11 1
12 6 12 1
@<TRIPOS>SUBSTRUCTURE
1 BENZENE 1 PERM 0 **** **** 0 ROOT
"""
from __future__ import absolute_import
from six import raise_from
import numpy as np
from . import base
from ..lib import util
[docs]class MOL2Reader(base.ReaderBase):
"""Reader for MOL2 structure format.
.. versionchanged:: 0.11.0
Frames now 0-based instead of 1-based.
MOL2 now reuses the same Timestep object for every frame,
previously created a new instance of Timestep each frame.
.. versionchanged:: 0.20.0
Allows for comments at top of file.
Ignores status bit strings
"""
format = 'MOL2'
units = {'time': None, 'length': 'Angstrom'}
def __init__(self, filename, **kwargs):
"""Read coordinates from `filename`.
Parameters
----------
filename : str or NamedStream
name of the mol2 file or stream
"""
super(MOL2Reader, self).__init__(filename, **kwargs)
self.n_atoms = None
blocks = []
with util.openany(filename) as f:
for i, line in enumerate(f):
# found new molecules
if "@<TRIPOS>MOLECULE" in line:
blocks.append({"start_line": i, "lines": []})
if len(blocks):
blocks[-1]["lines"].append(line)
self.n_frames = len(blocks)
self.frames = blocks
sections, coords = self.parse_block(blocks[0])
self.n_atoms = len(coords)
self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
self.ts = self._read_frame(0)
def parse_block(self, block):
sections = {}
cursor = None
for line in block["lines"]:
if "@<TRIPOS>" in line:
cursor = line.split("@<TRIPOS>")[1].strip().lower()
sections[cursor] = []
continue
elif line.startswith("#") or line == "\n":
continue
sections[cursor].append(line)
atom_lines, bond_lines = sections["atom"], sections["bond"]
if not len(atom_lines):
raise Exception("The mol2 (starting at line {0}) block has no atoms"
"".format(block["start_line"]))
elif self.n_atoms is None:
# First time round, remember the number of atoms
self.n_atoms = len(atom_lines)
elif len(atom_lines) != self.n_atoms:
raise ValueError(
"MOL2Reader assumes that the number of atoms remains unchanged"
" between frames; the current "
"frame has {0}, the next frame has {1} atoms"
"".format(self.n_atoms, len(atom_lines)))
if not len(bond_lines):
raise Exception("The mol2 (starting at line {0}) block has no bonds"
"".format(block["start_line"]))
coords = np.zeros((self.n_atoms, 3), dtype=np.float32)
for i, a in enumerate(atom_lines):
aid, name, x, y, z, atom_type, resid, resname, charge = a.split()[:9]
#x, y, z = float(x), float(y), float(z)
coords[i, :] = x, y, z
return sections, coords
def _read_next_timestep(self, ts=None):
if ts is None:
ts = self.ts
else:
# TODO: cleanup _read_frame() to use a "free" Timestep
raise NotImplementedError("MOL2Reader cannot assign to a timestep")
frame = self.frame + 1
return self._read_frame(frame)
def _read_frame(self, frame):
unitcell = np.zeros(6, dtype=np.float32)
try:
block = self.frames[frame]
except IndexError:
raise_from(IOError("Invalid frame {0} for trajectory with length {1}"
"".format(frame, len(self))),
None)
sections, coords = self.parse_block(block)
for sect in ['molecule', 'substructure']:
try:
self.ts.data[sect] = sections[sect]
except KeyError:
pass
self.ts.positions = np.array(coords, dtype=np.float32)
self.ts.unitcell = unitcell
if self.convert_units:
# in-place !
self.convert_pos_from_native(self.ts._pos)
self.convert_pos_from_native(self.ts._unitcell[:3])
self.ts.frame = frame
return self.ts
def _reopen(self):
# Make frame think it's before start, so calling next
# reads first frame
self.ts.frame = -1
[docs]class MOL2Writer(base.WriterBase):
"""mol2 format writer
Write a file in the Tripos_ molecule structure format (MOL2_).
Note
----
:class:`MOL2Writer` can only be used to write out previously loaded MOL2 files.
If you're trying to convert, for example, a PDB file to MOL you should
use other tools, like rdkit_.
Here is an example how to use rdkit_ to convert a PDB to MOL::
from rdkit import Chem
mol = Chem.MolFromPDBFile("molecule.pdb", removeHs=False)
Chem.MolToMolFile(mol, "molecule.mol" )
MOL2 writer is currently not available for rdkit master. It requires SYBYL
atomtype generation.
See page 7 for list of SYBYL atomtypes
(http://tripos.com/tripos_resources/fileroot/pdfs/mol2_format2.pdf).
.. _rdkit: http://www.rdkit.org/docs/GettingStartedInPython.html
.. versionchanged:: 0.11.0
Frames now 0-based instead of 1-based
"""
format = 'MOL2'
multiframe = True
units = {'time': None, 'length': 'Angstrom'}
def __init__(self, filename, n_atoms=None, convert_units=True):
"""Create a new MOL2Writer
Parameters
----------
filename: str
name of output file
convert_units: bool (optional)
units are converted to the MDAnalysis base format; [``True``]
"""
self.filename = filename
self.convert_units = convert_units # convert length and time to base units
self.frames_written = 0
self.file = util.anyopen(self.filename, 'w') # open file on init
[docs] def encode_block(self, obj):
"""
Parameters
----------
obj : AtomGroup or Universe
"""
# Issue 2717
obj = obj.atoms
traj = obj.universe.trajectory
ts = traj.ts
try:
molecule = ts.data['molecule']
except KeyError:
raise_from(NotImplementedError(
"MOL2Writer cannot currently write non MOL2 data"),
None)
# Need to remap atom indices to 1 based in this selection
mapping = {a: i for i, a in enumerate(obj.atoms, start=1)}
# Grab only bonds between atoms in the obj
# ie none that extend out of it
bondgroup = obj.bonds.atomgroup_intersection(obj, strict=True)
bonds = sorted((b[0], b[1], b.order) for b in bondgroup)
bond_lines = ["@<TRIPOS>BOND"]
bond_lines.extend("{0:>5} {1:>5} {2:>5} {3:>2}"
"".format(bid,
mapping[atom1],
mapping[atom2],
order)
for bid, (atom1, atom2, order)in enumerate(
bonds, start=1))
bond_lines.append("\n")
bond_lines = "\n".join(bond_lines)
atom_lines = ["@<TRIPOS>ATOM"]
atom_lines.extend("{0:>4} {1:>4} {2:>13.4f} {3:>9.4f} {4:>9.4f}"
" {5:>4} {6} {7} {8:>7.4f}"
"".format(mapping[a],
a.name,
a.position[0],
a.position[1],
a.position[2],
a.type,
a.resid,
a.resname,
a.charge)
for a in obj.atoms)
atom_lines.append("\n")
atom_lines = "\n".join(atom_lines)
try:
substructure = ["@<TRIPOS>SUBSTRUCTURE\n"] + ts.data['substructure']
except KeyError:
substructure = ""
check_sums = molecule[1].split()
check_sums[0], check_sums[1] = str(len(obj.atoms)), str(len(bondgroup))
# prevent behavior change between repeated calls
# see gh-2678
molecule_0_store = molecule[0]
molecule_1_store = molecule[1]
molecule[1] = "{0}\n".format(" ".join(check_sums))
molecule.insert(0, "@<TRIPOS>MOLECULE\n")
return_val = ("".join(molecule) + atom_lines +
bond_lines + "".join(substructure))
molecule[0] = molecule_0_store
molecule[1] = molecule_1_store
return return_val
def _write_next_frame(self, obj):
"""Write a new frame to the MOL2 file.
Parameters
----------
obj : AtomGroup or Universe
.. versionchanged:: 1.0.0
Renamed from `write_next_timestep` to `_write_next_frame`.
"""
block = self.encode_block(obj)
self.file.writelines(block)