Source code for MDAnalysis.analysis.hole2.utils

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2020 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#


"""
HOLE Analysis --- :mod:`MDAnalysis.analysis.hole2.helper`
=========================================================

:Author: Lily Wang
:Year: 2020
:Copyright: GNU Public License v3

.. versionadded:: 1.0

Helper functions used in :mod:`MDAnalysis.analysis.hole2.hole`
"""
import logging
import tempfile
import subprocess
import os
import numpy as np
import errno

from ...lib import util
from ...exceptions import ApplicationError
from .templates import (SIMPLE2_RAD, IGNORE_RESIDUES, hole_input,
                        hole_lines, exe_err)

logger = logging.getLogger(__name__)


[docs] def write_simplerad2(filename="simple2.rad"): """Write the built-in radii in :data:`SIMPLE2_RAD` to `filename`. Does nothing if `filename` already exists. Parameters ---------- filename : str, optional output file name; the default is "simple2.rad" Returns ------- filename : str returns the name of the data file """ if not os.path.exists(filename): with open(filename, "w") as rad: rad.write(SIMPLE2_RAD + "\n") logger.debug("Created simple radii file {}".format(filename)) return filename
[docs] def check_and_fix_long_filename(filename, tmpdir=os.path.curdir, max_length=70, make_symlink=True): """Return a file name suitable for HOLE. HOLE is limited to filenames <= ``max_length``. This method 1. returns `filename` if HOLE can process it 2. returns a relative path (see :func:`os.path.relpath`) if that shortens the path sufficiently 3. creates a symlink to `filename` (:func:`os.symlink`) in a safe temporary directory and returns the path of the symlink. Parameters ---------- filename : str file name to be processed tmpdir : str, optional By default the temporary directory is created inside the current directory in order to keep that path name short. This can be changed with the `tmpdir` keyword (e.g. one can use "/tmp"). The default is the current directory :data:`os.path.curdir`. Returns ------- str path to the file that has a length less than ``max_length`` Raises ------ RuntimeError If none of the tricks for filename shortening worked. In this case, manually rename the file or recompile your version of HOLE. """ if len(filename) <= max_length: return filename msg = ('HOLE will not read {} ' 'because it has more than {} characters.') logger.debug(msg.format(filename, max_length)) # try a relative path new_name = os.path.relpath(filename) if len(new_name) <= max_length: msg = 'Using relative path: {} -> {}' logger.debug(msg.format(filename, new_name)) return new_name if make_symlink: # shorten path by creating a symlink inside a safe temp dir _, ext = os.path.splitext(filename) dirname = os.path.relpath(tempfile.mkdtemp(dir=tmpdir)) newname = os.path.join(dirname, os.path.basename(filename)) if len(newname) > max_length: fd, newname = tempfile.mkstemp(suffix=ext, dir=dirname) os.close(fd) os.unlink(newname) if len(newname) > max_length: newname = os.path.relpath(newname) if len(newname) <= max_length: os.symlink(filename, newname) msg = 'Using symlink: {} -> {}' logger.debug(msg.format(filename, newname)) return newname msg = 'Failed to shorten filename {}' raise RuntimeError(msg.format(filename))
def set_up_hole_input(pdbfile, infile_text=None, infile=None, sphpdb_file='hole.sph', vdwradii_file=None, tmpdir=os.path.curdir, sample=0.2, end_radius=22, cpoint=None, cvect=None, random_seed=None, ignore_residues=IGNORE_RESIDUES, output_level=0, dcd=None, dcd_iniskip=0, dcd_step=1): """ Generate HOLE input for the parameters. Parameters ---------- pdbfile : str The `filename` is used as input for HOLE in the "COORD" card of the input file. It specifies the name of a PDB coordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. Both ATOM and HETATM records are read. infile_text: str, optional HOLE input text or template. If set to ``None``, the function will create the input text from the other parameters. Default: ``None`` infile: str, optional File to write the HOLE input text for later inspection. If set to ``None``, the input text is not written out. Default: ``None`` sphpdb_file : str, optional path to the HOLE sph file, a PDB-like file containing the coordinates of the pore centers. The coordinates are set to the sphere centres and the occupancies are the sphere radii. All centres are assigned the atom name QSS and residue name SPH and the residue number is set to the storage number of the centre. In VMD, sph objects are best displayed as "Points". Displaying .sph objects rather than rendered or dot surfaces can be useful to analyze the distance of particular atoms from the sphere-centre line. .sph files can be used to produce molecular graphical output from a hole run, by using the :program:`sph_process` program to read the .sph file. Default: 'hole.sph' vdwradii_file: str, optional path to the file specifying van der Waals radii for each atom. If set to ``None``, then a set of default radii, :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from the HOLE distribution). Default: ``None`` tmpdir: str, optional The temporary directory that files can be symlinked to, to shorten the path name. HOLE can only read filenames up to a certain length. Default: current working directory sample : float, optional distance of sample points in Å. Specifies the distance between the planes used in the HOLE procedure. The default value should be reasonable for most purposes. However, if you wish to visualize a very tight constriction then specify a smaller value. This value determines how many points in the pore profile are calculated. Default: 0.2 end_radius : float, optional Radius in Å, which is considered to be the end of the pore. This keyword can be used to specify the radius above which the program regards a result as indicating that the end of the pore has been reached. This may need to be increased for large channels, or reduced for small channels. Default: 22.0 cpoint : array_like, 'center_of_geometry' or None, optional coordinates of a point inside the pore, e.g. ``[12.3, 0.7, 18.55]``. If set to ``None`` (the default) then HOLE's own search algorithm is used. ``cpoint`` specifies a point which lies within the channel. For simple channels (e.g. gramicidin), results do not show great sensitivity to the exact point taken. An easy way to produce an initial point is to use molecular graphics to find two atoms which lie either side of the pore and to average their coordinates. Or if the channel structure contains water molecules or counter ions then take the coordinates of one of these (and use the `ignore_residues` keyword to ignore them in the pore radius calculation). If this card is not specified, then HOLE (from version 2.2) attempts to guess where the channel will be. The procedure assumes the channel is reasonably symmetric. The initial guess on cpoint will be the centroid of all alpha carbon atoms (name 'CA' in pdb file). This is then refined by a crude grid search up to 5 Å from the original position. This procedure works most of the time but is far from infallible — results should be carefully checked (with molecular graphics) if it is used. Default: None cvect : array_like, optional Search direction, should be parallel to the pore axis, e.g. ``[0,0,1]`` for the z-axis. If this keyword is ``None`` (the default), then HOLE attempts to guess where the channel will be. The procedure assumes that the channel is reasonably symmetric. The guess will be either along the X axis (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not aligned on one of these axis the results will clearly be approximate. If a guess is used then results should be carefully checked. Default: None random_seed : int, optional integer number to start the random number generator. By default, :program:`hole` will use the time of the day. For reproducible runs (e.g., for testing) set ``random_seed`` to an integer. Default: ``None`` ignore_residues : array_like, optional sequence of three-letter residues that are not taken into account during the calculation; wildcards are *not* supported. Note that all residues must have 3 letters. Pad with space on the right-hand side if necessary. Default: {}. output_level : int, optional Determines the output of output in the ``outfile``. For automated processing, this must be < 3. 0: Full text output, 1: All text output given except "run in progress" (i.e., detailed contemporary description of what HOLE is doing). 2: Ditto plus no graph type output - only leaving minimum radius and conductance calculations. 3: All text output other than input card mirroring and error messages turned off. Default: 0 dcd : str, optional File name of DCD trajectory (must be supplied together with a matching PDB file `filename`) and then HOLE runs its analysis on each frame. It does multiple HOLE runs on positions taken from a CHARMM binary dynamics format DCD trajectory file. The ``dcd`` file must have exactly the same number of atoms in exactly the same order as the pdb file specified by ``pdbfile``. Note that if this option is used the pdb file is used as a template only - the coordinates are ignored. Note that structural parameters determined for each individual structure are written in a tagged format so that it is possible to extract the information from the text output file using a :program:`grep` command. The reading of the file can be controlled by the ``dcd_step`` keyword and/or setting ``dcd_iniskip`` to the number of frames to be skipped initially. dcd_step : int, optional step size for going through the trajectory (skips ``dcd_step-1`` frames). Default: 1 Returns ------- str input text to run HOLE .. versionadded:: 1.0 """.format(IGNORE_RESIDUES) short_filename = check_and_fix_long_filename(pdbfile, tmpdir=tmpdir) if vdwradii_file is not None: vdwradii_file = check_and_fix_long_filename(vdwradii_file, tmpdir=tmpdir) else: vdwradii_file = write_simplerad2() if dcd is not None: dcd = check_and_fix_long_filename(dcd, tmpdir=tmpdir) if infile_text is None: infile_text = hole_input residues = ' '.join(ignore_residues) infile_text = infile_text.format(filename=pdbfile, coordinates=short_filename, radius=vdwradii_file, sphpdb=sphpdb_file, sample=sample, end_radius=end_radius, ignore=residues, output_level=output_level) if random_seed is not None: random_seed = int(random_seed) infile_text += hole_lines['random_seed'].format(random_seed) logger.info("Fixed random number seed {} for reproducible " "runs.".format(random_seed)) if cpoint is not None: if isinstance(cpoint, str): infile_text += 'CPOINT ' + cpoint + '\n' else: infile_text += hole_lines['cpoint'].format(*cpoint) else: logger.info("HOLE will guess CPOINT") if cvect is not None: infile_text += hole_lines['cvect'].format(*cvect) else: logger.info("HOLE will guess CVECT") if dcd is not None: infile_text += hole_lines['dcd'].format(dcd=dcd, iniskip=dcd_iniskip, step=dcd_step) if infile is not None: with open(infile, 'w') as f: f.write(infile_text) msg = 'Wrote HOLE input file {} for inspection' logger.debug(msg.format(infile)) return infile_text
[docs] def run_hole(outfile, infile_text, executable): """Run the HOLE program. Parameters ---------- outfile: str Output file name infile_text: str HOLE input text (typically generated by :func:`set_up_hole_input`) executable: str HOLE executable Returns ------- str Output file name """ with open(outfile, 'w') as output: proc = subprocess.Popen(executable, stdin=subprocess.PIPE, stdout=output) stdout, stderr = proc.communicate(infile_text.encode('utf-8')) # check output in case of errors with open(outfile, 'r') as output: for line in output: if line.strip().startswith(('*** ERROR ***', 'ERROR')): proc.returncode = 255 break # die in case of error if proc.returncode != 0: msg = 'HOLE failure ({}). Check output {}' logger.fatal(msg.format(proc.returncode, outfile)) if stderr is not None: logger.fatal(stderr) raise ApplicationError(proc.returncode, msg.format(executable, outfile)) logger.info('HOLE finished. Output: {}'.format(outfile)) return outfile
[docs] def collect_hole(outfile='hole.out'): """Collect data from HOLE output Parameters ---------- outfile: str, optional HOLE output file to read. Default: 'hole.out' Returns ------- dict Dictionary of HOLE profiles as record arrays """ fmt = util.FORTRANReader('3F12') recarrays = {} with open(outfile, 'r') as output: toggle_read = False profile = 0 records = [] for line in output: line = line.rstrip() # preserve columns in FORTRAN output # multiple frames from dcd in? if line.startswith(" Starting calculation for position number"): fields = line.split() profile = int(fields[5]) records = [] logger.debug('Started reading profile {}'.format(profile)) continue # found data if line.startswith(' cenxyz.cvec'): toggle_read = True logger.debug('Started reading data') continue if toggle_read: if len(line.strip()) != 0: try: rxncoord, radius, cenlineD = fmt.read(line) except: msg = 'Problem parsing line: {}. Check output file {}' logger.exception(msg.format(line, outfile)) raise records.append((rxncoord, radius, cenlineD)) continue # end of data else: toggle_read = False names = ['rxn_coord', 'radius', 'cen_line_D'] recarr = np.rec.fromrecords(records, formats='f8, f8, f8', names=names) recarrays[profile] = recarr return recarrays
[docs] def create_vmd_surface(sphpdb='hole.sph', filename=None, sph_process='sph_process', sos_triangle='sos_triangle', dot_density=15): """Create VMD surface file from sphpdb file. Parameters ---------- sphpdb: str, optional sphpdb file to read. Default: 'hole.sph' filename: str, optional output VMD surface file. If ``None``, a temporary file is generated. Default: ``None`` sph_process: str, optional Executable for ``sph_process`` program. Default: 'sph_process' sos_triangle: str, optional Executable for ``sos_triangle`` program. Default: 'sos_triangle' dot_density: int, optional density of facets for generating a 3D pore representation. The number controls the density of dots that will be used. A sphere of dots is placed on each centre determined in the Monte Carlo procedure. The actual number of dots written is controlled by ``dot_density`` and the ``sample`` level of the original analysis. ``dot_density`` should be set between 5 (few dots per sphere) and 35 (many dots per sphere). Default: 15 Returns ------- str the output filename for the VMD surface """ fd, tmp_sos = tempfile.mkstemp(suffix=".sos", text=True) os.close(fd) sph_process_path = util.which(sph_process) if sph_process_path is None: raise OSError(errno.ENOENT, exe_err.format(name=sph_process, kw='sph_process')) base_path = os.path.dirname(sph_process_path) sos_triangle_path = util.which(sos_triangle) if sos_triangle_path is None: path = os.path.join(base_path, sos_triangle) sos_triangle_path = util.which(path) if sos_triangle_path is None: raise OSError(errno.ENOENT, exe_err.format(name=sos_triangle, kw='sos_triangle')) try: output = subprocess.check_output([sph_process_path, "-sos", "-dotden", str(dot_density), "-color", sphpdb, tmp_sos], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as err: os.unlink(tmp_sos) logger.fatal("sph_process failed ({0})".format(err.returncode)) raise OSError(err.returncode, "sph_process failed") from None except: os.unlink(tmp_sos) raise if filename is None: fd, filename = tempfile.mkstemp(suffix=".sos", text=True) os.close(fd) try: # Could check: os.devnull if subprocess.DEVNULL not available (>3.3) # Suppress stderr messages of sos_triangle with open(tmp_sos) as sos, open(filename, "w") as triangles, \ open(os.devnull, 'w') as FNULL: subprocess.check_call( [sos_triangle_path, "-s"], stdin=sos, stdout=triangles, stderr=FNULL) except subprocess.CalledProcessError as err: logger.fatal("sos_triangle failed ({0})".format(err.returncode)) raise OSError(err.returncode, "sos_triangle failed") from None finally: os.unlink(tmp_sos) return filename