Source code for MDAnalysis.coordinates.PDBQT

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#


"""
PDBQT structure files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBQT`
===========================================================================

MDAnalysis reads coordinates from PDBQT_ files and additional optional
data such as B-factors, partial charge and AutoDock_ atom types.  It
is also possible to substitute a PDBQT file for a PSF file in order to
define the list of atoms (but no connectivity information will be
available in this case).

.. _PDBQT:
   http://autodock.scripps.edu/faqs-help/faq/what-is-the-format-of-a-pdbqt-file
.. _AutoDock:
   http://autodock.scripps.edu/
"""

import os
import errno
import itertools
import numpy as np
import warnings

from ..lib import util
from . import base


[docs] class PDBQTReader(base.SingleFrameReaderBase): """PDBQTReader that reads a PDBQT-formatted file, no frills. Records read: - CRYST1 for unitcell A,B,C, alpha,beta,gamm - ATOM. HETATM for x,y,z Original `PDB format documentation`_ with `AutoDOCK extensions`_ .. _PDB format documentation: http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html .. _AutoDOCK extensions: http://autodock.scripps.edu/faqs-help/faq/what-is-the-format-of-a-pdbqt-file ============= ============ =========== ============================================= COLUMNS DATA TYPE FIELD DEFINITION ============= ============ =========== ============================================= 1 - 6 Record name "CRYST1" 7 - 15 Real(9.3) a a (Angstroms). 16 - 24 Real(9.3) b b (Angstroms). 25 - 33 Real(9.3) c c (Angstroms). 34 - 40 Real(7.2) alpha alpha (degrees). 41 - 47 Real(7.2) beta beta (degrees). 48 - 54 Real(7.2) gamma gamma (degrees). 1 - 6 Record name "ATOM " 7 - 11 Integer serial Atom serial number. 13 - 16 Atom name Atom name. 17 Character altLoc Alternate location indicator. IGNORED 18 - 21 Residue name resName Residue name. 22 Character chainID Chain identifier. 23 - 26 Integer resSeq Residue sequence number. 27 AChar iCode Code for insertion of residues. IGNORED 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) occupancy Occupancy. 61 - 66 Real(6.2) tempFactor Temperature factor. 67 - 70 LString(4) footnote Usually blank. IGNORED. 71 - 76 Real(6.4) partialChrg Gasteiger PEOE partial charge *q*. 79 - 80 LString(2) atomType AutoDOCK atom type *t*. ============= ============ =========== ============================================= We ignore torsion notation and just pull the partial charge and atom type columns:: COMPND NSC7810 REMARK 3 active torsions: REMARK status: ('A' for Active; 'I' for Inactive) REMARK 1 A between atoms: A7_7 and C22_23 REMARK 2 A between atoms: A9_9 and A11_11 REMARK 3 A between atoms: A17_17 and C21_21 ROOT 123456789.123456789.123456789.123456789.123456789.123456789.123456789.123456789. (column reference) ATOM 1 A1 INH I 1.054 3.021 1.101 0.00 0.00 0.002 A ATOM 2 A2 INH I 1.150 1.704 0.764 0.00 0.00 0.012 A ATOM 3 A3 INH I -0.006 0.975 0.431 0.00 0.00 -0.024 A ATOM 4 A4 INH I 0.070 -0.385 0.081 0.00 0.00 0.012 A ATOM 5 A5 INH I -1.062 -1.073 -0.238 0.00 0.00 0.002 A ATOM 6 A6 INH I -2.306 -0.456 -0.226 0.00 0.00 0.019 A ATOM 7 A7 INH I -2.426 0.885 0.114 0.00 0.00 0.052 A ATOM 8 A8 INH I -1.265 1.621 0.449 0.00 0.00 0.002 A ATOM 9 A9 INH I -1.339 2.986 0.801 0.00 0.00 -0.013 A ATOM 10 A10 INH I -0.176 3.667 1.128 0.00 0.00 0.013 A ENDROOT BRANCH 9 11 ATOM 11 A11 INH I -2.644 3.682 0.827 0.00 0.00 -0.013 A ATOM 12 A16 INH I -3.007 4.557 -0.220 0.00 0.00 0.002 A ATOM 13 A12 INH I -3.522 3.485 1.882 0.00 0.00 0.013 A ATOM 14 A15 INH I -4.262 5.209 -0.177 0.00 0.00 -0.024 A ATOM 15 A17 INH I -2.144 4.784 -1.319 0.00 0.00 0.052 A ATOM 16 A14 INH I -5.122 4.981 0.910 0.00 0.00 0.012 A ATOM 17 A20 INH I -4.627 6.077 -1.222 0.00 0.00 0.012 A ATOM 18 A13 INH I -4.749 4.135 1.912 0.00 0.00 0.002 A ATOM 19 A19 INH I -3.777 6.285 -2.267 0.00 0.00 0.002 A ATOM 20 A18 INH I -2.543 5.650 -2.328 0.00 0.00 0.019 A BRANCH 15 21 ATOM 21 C21 INH I -0.834 4.113 -1.388 0.00 0.00 0.210 C ATOM 22 O1 INH I -0.774 2.915 -1.581 0.00 0.00 -0.644 OA ATOM 23 O3 INH I 0.298 4.828 -1.237 0.00 0.00 -0.644 OA ENDBRANCH 15 21 ENDBRANCH 9 11 BRANCH 7 24 ATOM 24 C22 INH I -3.749 1.535 0.125 0.00 0.00 0.210 C ATOM 25 O2 INH I -4.019 2.378 -0.708 0.00 0.00 -0.644 OA ATOM 26 O4 INH I -4.659 1.196 1.059 0.00 0.00 -0.644 OA ENDBRANCH 7 24 TORSDOF 3 123456789.123456789.123456789.123456789.123456789.123456789.123456789.123456789. (column reference) .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based """ format = "PDBQT" units = {"time": None, "length": "Angstrom"} def _read_first_frame(self): coords = [] unitcell = np.zeros(6, dtype=np.float32) with util.openany(self.filename) as pdbfile: for line in pdbfile: # Should only break at the 'END' of a model definition # and prevent premature exit for a torsion termination # , eg, ENDBRANCH if line.startswith("END\n"): break if line.startswith("CRYST1"): # lengths x, y, z = np.float32( (line[6:15], line[15:24], line[24:33]) ) # angles A, B, G = np.float32( (line[33:40], line[40:47], line[47:54]) ) unitcell[:] = x, y, z, A, B, G if line.startswith(("ATOM", "HETATM")): # convert all entries at the end once for optimal speed coords.append([line[30:38], line[38:46], line[46:54]]) self.n_atoms = len(coords) self.ts = self._Timestep.from_coordinates(coords, **self._ts_kwargs) self.ts.dimensions = unitcell self.ts.frame = 0 # 0-based frame number if self.convert_units: # in-place ! self.convert_pos_from_native(self.ts._pos) if self.ts.dimensions is not None: self.convert_pos_from_native(self.ts.dimensions[:3])
[docs] def Writer(self, filename, **kwargs): """Returns a permissive (simple) PDBQTWriter for *filename*. Parameters ---------- filename : str filename of the output PDBQT file Returns ------- :class:`PDBQTWriter` """ return PDBQTWriter(filename, **kwargs)
[docs] class PDBQTWriter(base.WriterBase): """PDBQT writer that implements a subset of the PDB_ 3.2 standard and the PDBQT_ spec. .. _PDB: http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html .. _PDBQT: http://autodock.scripps.edu/faqs-help/faq/what-is-the-format-of-a-pdbqt-file .. versionchanged:: 2.6.0 Files are now written in `wt` mode, and keep extensions, allowing for files to be written under compressed formats """ fmt = { "ATOM": ( "ATOM {serial:5d} {name:<4.4s} {resName:<4.4s}" "{chainID:1.1s}{resSeq:4d}{iCode:1.1s}" " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" "{tempFactor:6.2f} {charge:< 1.3f} {element:<2.2s}\n" ), "REMARK": "REMARK {0}\n", "TITLE": "TITLE {0}\n", "CRYST1": ( "CRYST1{box[0]:9.3f}{box[1]:9.3f}{box[2]:9.3f}" "{ang[0]:7.2f}{ang[1]:7.2f}{ang[2]:7.2f} " "{spacegroup:<11s}{zvalue:4d}\n" ), } format = "PDBQT" units = {"time": None, "length": "Angstrom"} pdb_coor_limits = {"min": -999.9995, "max": 9999.9995} def __init__(self, filename, **kwargs): self.filename = util.filename(filename, ext="pdbqt", keep=True) self.pdb = util.anyopen(self.filename, "wt")
[docs] def close(self): self.pdb.close()
[docs] def write(self, selection, frame=None): """Write selection at current trajectory frame to file. Parameters ---------- selection : AtomGroup The selection to be written frame : int (optional) optionally move to frame index `frame` before writing; the default is to write the current trajectory frame Note ---- The last letter of the :attr:`~MDAnalysis.core.groups.Atom.segid` is used as the PDB chainID. .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based """ try: u = selection.universe except AttributeError: errmsg = "Input obj is neither an AtomGroup or Universe" raise TypeError(errmsg) from None if frame is not None: u.trajectory[frame] # advance to frame else: try: frame = u.trajectory.ts.frame except AttributeError: frame = 0 # should catch cases when we are analyzing a single PDB (?) atoms = selection.atoms # make sure to use atoms (Issue 46) coor = ( atoms.positions ) # can write from selection == Universe (Issue 49) # Check attributes attrs = {} missing_topology = [] for attr, dflt in ( ("altLocs", " "), ("charges", 0.0), ("icodes", " "), ("names", "X"), ("occupancies", 1.0), ("resids", 1), ("resnames", "UNK"), ("tempfactors", 0.0), ("types", " "), ): try: attrs[attr] = getattr(atoms, attr) except AttributeError: attrs[attr] = itertools.cycle((dflt,)) missing_topology.append(attr) # Order of preference: chainids -> segids -> blank string try: attrs["chainids"] = atoms.chainids except AttributeError: try: attrs["chainids"] = atoms.segids except AttributeError: attrs["chainids"] = itertools.cycle((" ",)) missing_topology.append("chainids") if missing_topology: warnings.warn( "Supplied AtomGroup was missing the following attributes: " "{miss}. These will be written with default values. " "".format(miss=", ".join(missing_topology)) ) # check if any coordinates are illegal (coordinates are already # in Angstroem per package default) if not self.has_valid_coordinates(self.pdb_coor_limits, coor): self.close() try: os.remove(self.filename) except OSError as err: if err.errno == errno.ENOENT: pass raise ValueError( "PDB files must have coordinate values between {0:.3f}" " and {1:.3f} Angstroem: No file was written." "".format( self.pdb_coor_limits["min"], self.pdb_coor_limits["max"] ) ) # Write title record # http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html line = "FRAME " + str(frame) + " FROM " + str(u.trajectory.filename) self.pdb.write(self.fmt["TITLE"].format(line)) # Write CRYST1 record # http://www.wwpdb.org/documentation/file-format-content/format32/sect8.html box = self.convert_dimensions_to_unitcell(u.trajectory.ts) self.pdb.write( self.fmt["CRYST1"].format( box=box[:3], ang=box[3:], spacegroup="P 1", zvalue=1 ) ) # Write atom records # http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html for serial, ( pos, name, resname, chainid, resid, icode, occupancy, tempfactor, charge, element, ) in enumerate( zip( coor, attrs["names"], attrs["resnames"], attrs["chainids"], attrs["resids"], attrs["icodes"], attrs["occupancies"], attrs["tempfactors"], attrs["charges"], attrs["types"], ), start=1, ): serial = util.ltruncate_int(serial, 5) # check for overflow here? resid = util.ltruncate_int(resid, 4) name = name[:4] if len(name) < 4: name = " " + name # customary to start in column 14 chainid = chainid.strip()[-1:] # take the last character self.pdb.write( self.fmt["ATOM"].format( serial=serial, name=name, resName=resname, chainID=chainid, resSeq=resid, iCode=icode, pos=pos, occupancy=occupancy, tempFactor=tempfactor, charge=charge, element=element, ) ) self.close()