Source code for MDAnalysis.topology.PQRParser
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
PQR topology parser
===================
Read atoms with charges from a PQR_ file (as written by PDB2PQR_). No
connectivity is deduced.
Note
----
The file format is described in :mod:`MDAnalysis.coordinates.PQR`.
Classes
-------
.. autoclass:: PQRParser
:members:
:inherited-members:
.. _PQR: https://apbs-pdb2pqr.readthedocs.io/en/latest/formats/pqr.html
.. _APBS: https://apbs-pdb2pqr.readthedocs.io/en/latest/apbs/index.html
.. _PDB2PQR: https://apbs-pdb2pqr.readthedocs.io/en/latest/pdb2pqr/index.html
.. _PDB: http://www.wwpdb.org/documentation/file-format
"""
from __future__ import absolute_import
import numpy as np
from . import guessers
from ..lib.util import openany
from ..core.topologyattrs import (
Atomids,
Atomnames,
Atomtypes,
Charges,
ICodes,
Masses,
Radii,
RecordTypes,
Resids,
Resnums,
Resnames,
Segids,
)
from ..core.topology import Topology
from .base import TopologyReaderBase, squash_by, change_squash
[docs]class PQRParser(TopologyReaderBase):
"""Parse atom information from PQR file *filename*.
Creates a MDAnalysis Topology with the following attributes
- Atomids
- Atomnames
- Charges
- Radii
- RecordTypes (ATOM/HETATM)
- Resids
- Resnames
- Segids
Guesses the following:
- atomtypes (if not present, Gromacs generated PQR files have these)
- masses
.. versionchanged:: 0.9.0
Read chainID from a PQR file and use it as segid (before we always used
'SYSTEM' as the new segid).
.. versionchanged:: 0.16.1
Now reads insertion codes and splits into new residues around these
.. versionchanged:: 0.18.0
Added parsing of Record types
Can now read PQR files from Gromacs, these provide atom type as last column
but don't have segids
"""
format = 'PQR'
[docs] @staticmethod
def guess_flavour(line):
"""Guess which variant of PQR format this line is
Parameters
----------
line : str
entire line of PQR file starting with ATOM/HETATM
Returns
-------
flavour : str
ORIGINAL / GROMACS / NO_CHAINID
.. versionadded:: 0.18.0
"""
fields = line.split()
if len(fields) == 11:
try:
float(fields[-1])
except ValueError:
flavour = 'GROMACS'
else:
flavour = 'ORIGINAL'
else:
flavour = 'NO_CHAINID'
return flavour
[docs] def parse(self, **kwargs):
"""Parse atom information from PQR file *filename*.
Returns
-------
A MDAnalysis Topology object
"""
record_types = []
serials = []
names = []
resnames = []
chainIDs = []
resids = []
icodes = []
charges = []
radii = []
elements = []
flavour = None
with openany(self.filename) as f:
for line in f:
if not line.startswith(("ATOM", "HETATM")):
continue
fields = line.split()
if flavour is None:
flavour = self.guess_flavour(line)
if flavour == 'ORIGINAL':
(recordName, serial, name, resName,
chainID, resSeq, x, y, z, charge,
radius) = fields
elif flavour == 'GROMACS':
(recordName, serial, name, resName,
resSeq, x, y, z, charge,
radius, element) = fields
chainID = "SYSTEM"
elements.append(element)
elif flavour == 'NO_CHAINID':
# files without the chainID
(recordName, serial, name, resName,
resSeq, x, y, z, charge, radius) = fields
chainID = "SYSTEM"
try:
resid = int(resSeq)
except ValueError:
# has icode present
resid = int(resSeq[:-1])
icode = resSeq[-1]
else:
icode = ''
record_types.append(recordName)
serials.append(serial)
names.append(name)
resnames.append(resName)
resids.append(resid)
icodes.append(icode)
charges.append(charge)
radii.append(radius)
chainIDs.append(chainID)
n_atoms = len(serials)
if not elements:
atomtypes = guessers.guess_types(names)
guessed_types = True
else:
atomtypes = elements
guessed_types = False
masses = guessers.guess_masses(atomtypes)
attrs = []
attrs.append(Atomids(np.array(serials, dtype=np.int32)))
attrs.append(Atomnames(np.array(names, dtype=object)))
attrs.append(Charges(np.array(charges, dtype=np.float32)))
attrs.append(Atomtypes(atomtypes, guessed=guessed_types))
attrs.append(Masses(masses, guessed=True))
attrs.append(RecordTypes(np.array(record_types, dtype=object)))
attrs.append(Radii(np.array(radii, dtype=np.float32)))
resids = np.array(resids, dtype=np.int32)
icodes = np.array(icodes, dtype=object)
resnames = np.array(resnames, dtype=object)
chainIDs = np.array(chainIDs, dtype=object)
residx, (resids, resnames, icodes, chainIDs) = change_squash(
(resids, resnames, icodes, chainIDs),
(resids, resnames, icodes, chainIDs))
n_residues = len(resids)
attrs.append(Resids(resids))
attrs.append(Resnums(resids.copy()))
attrs.append(Resnames(resnames))
attrs.append(ICodes(icodes))
segidx, chainIDs = squash_by(chainIDs)[:2]
n_segments = len(chainIDs)
attrs.append(Segids(chainIDs))
top = Topology(n_atoms, n_residues, n_segments,
attrs=attrs,
atom_resindex=residx,
residue_segindex=segidx)
return top