Source code for MDAnalysis.guesser.tables

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""
MDAnalysis topology tables
==========================

The module contains static lookup tables for atom typing etc. The
tables are dictionaries that are indexed by the element.

.. autodata:: atomelements
.. autodata:: masses
.. autodata:: vdwradii

The original raw data are stored as multi-line strings that are
translated into dictionaries with :func:`kv2dict`. In the future,
these tables might be moved into external data files; see
:func:`kv2dict` for explanation of the file format.

.. autofunction:: kv2dict

The raw tables are stored in the strings

.. autodata:: TABLE_ATOMELEMENTS
.. autodata:: TABLE_MASSES
.. autodata:: TABLE_VDWRADII
"""

from typing import Any


[docs] def kv2dict(s, convertor: Any = str): """Primitive ad-hoc parser of a key-value record list. * The string *s* should contain each key-value pair on a separate line (separated by newline). The first white space after the key separates key and value. * Empty lines are allowed. * Comment lines (starting with #) are allowed. * Leading whitespace is ignored. The *convertor* is a function that converts its single argument to a valid Python type. The default is :func:`str` but other possibilities are :func:`int` (for integers) or :func:`float` for floating point numbers. """ d = {} lines = s.splitlines() for line in lines: line = line.lstrip() values = line.split(None, 1) if len(values) == 0 or line.startswith("#"): continue d[values[0]] = convertor(values[1]) return d
#: Table with hard-coded special atom names, used for guessing atom types #: with :func:`MDAnalysis.topology.core.guess_atom_element`. TABLE_ATOMELEMENTS = """ # translation of atomnames to types/element # based on CHARMM and AMBER usage with a little bit of GROMOS (and PROPKA) # NOTE: CL might be ambiguous and is interpreted as chloride! # --------- ------------------ # atomname element # --------- ------------------ # Bromide BR BR # Calcium CAL CA C0 CA CA2+ CA # Cesium CES CS # Chloride CLA CL CLAL CL CL CL CL- CL # Iodide IOD I # Iron FE FE FE2 FE # Lithium LIT LI LI LI LI+ LI QL LI # Magnesium MG MG MG2+ MG # Noble gases ## XXX collides with NE, HE in Arg XXX ## XXX so we remove the noble gases XXX ##HE HE ##NE NE # Potassium K K POT K K+ K QK K # Sodium SOD NA NA NA NA+ NA QN NA # Zink ZN ZN # Copper CU CU # Cesium CS CS CS+ CS CES CS # Cerium?? QC CE # Rubidium RB RB QR RB # special carbons (Amber?) BC C AC C # dummy atom types MW DUMMY # other types are guessed from the name; see # topology.core.guess_atom_elements() """ #: Dictionary with hard-coded special atom names, used for guessing atom types #: with :func:`MDAnalysis.topology.core.guess_atom_type`. atomelements = kv2dict(TABLE_ATOMELEMENTS) # fmt: off elements = ['H', 'LI', 'BE', 'B', 'C', 'N', 'O', 'F', 'NA', 'MG', 'AL', 'P', 'SI', 'S', 'CL', 'K'] # fmt: on #: Plain-text table with atomic masses in u. TABLE_MASSES = """ # masses for elements in atomic units (u) # (taken from CHARMM and Gromacs atommass.dat) #------------ ----------- # atomtype mass #------------ ----------- Ac 227.028 Al 26.981539 Am 243 Sb 121.757 Ar 39.948 As 74.92159 At 210 Ba 137.327 Bk 247 Be 9.012182 Bi 208.98037 Bh 264 B 10.811 BR 79.90400 Cd 112.411 CA 40.08000 Cf 251 C 12.01100 Ce 140.11600 CS 132.90000 CL 35.45000 Cr 51.9961 Co 58.9332 CU 63.54600 Cm 247 Db 262 Dy 162.5 Es 252 Er 167.26 Eu 151.965 Fm 257 F 18.99800 Fr 223 Gd 157.25 Ga 69.723 Ge 72.61 Au 196.96654 Hf 178.49 Hs 265 HE 4.00260 Ho 164.93032 H 1.00800 In 114.82 I 126.90450 Ir 192.22 FE 55.84700 Kr 83.8 La 138.9055 Lr 262 Pb 207.2 Li 6.941 Lu 174.967 MG 24.30500 Mn 54.93805 Mt 266 Md 258 Hg 200.59 Mo 95.94 N 14.00700 NA 22.98977 Nd 144.24 NE 20.17970 Np 237.048 Ni 58.6934 Nb 92.90638 No 259 Os 190.2 O 15.99900 Pd 106.42 P 30.97400 Pt 195.08 Pu 244 Po 209 K 39.10200 Pr 140.90765 Pm 145 Pa 231.0359 Ra 226.025 Rn 222 Re 186.207 Rh 102.9055 RB 85.46780 Ru 101.07 Rf 261 Sm 150.36 Sc 44.95591 Sg 263 Se 78.96 Si 28.0855 Ag 107.8682 Na 22.989768 Sr 87.62 S 32.06000 Ta 180.9479 Tc 98 Te 127.6 Tb 158.92534 Tl 204.3833 Th 232.0381 Tm 168.93421 Sn 118.71 Ti 47.88 W 183.85 U 238.0289 V 50.9415 Xe 131.29 Yb 173.04 Y 88.90585 ZN 65.37000 Zr 91.224 DUMMY 0.0 """ #: Dictionary table with atomic masses in u, indexed by the element from #: :data:`atomelements`. masses = kv2dict(TABLE_MASSES, convertor=float) #: Plain-text table with vdw radii. TABLE_VDWRADII = r""" # Van der Waals radii taken from # [1] Bondi, A. (1964). "Van der Waals Volumes and Radii". # J. Phys. Chem. 68 (3): 441-451. doi:10.1021/j100785a001. # [2] Rowland and Taylor (1996). "Intermolecular Nonbonded Contact Distances in Organic Crystal Structures: # Comparison with Distances Expected from van der Waals Radii". # J. Phys. Chem., 1996, 100 (18), 7384.7391. doi:10.1021/jp953141+. # [3] Mantina, et al. (2009). "Consistent van der Waals Radii for the Whole Main Group". # J. Phys. Chem. A, 2009, 113 (19), 5806-5812. doi:10.1021/jp8111556. #------------ ----------- # atomtype r_vdw #------------ ----------- H 1.10 HE 1.40 LI 1.82 BE 1.53 B 1.92 C 1.70 N 1.55 O 1.52 F 1.47 NE 1.54 NA 2.27 MG 1.73 AL 1.84 SI 2.10 P 1.80 S 1.80 CL 1.75 AR 1.88 K 2.75 CA 2.31 NI 1.63 CU 1.40 ZN 1.39 GA 1.87 GE 2.11 AA 1.85 SE 1.90 BR 1.85 KR 2.02 RR 3.03 SR 2.49 PD 1.63 AG 1.72 CD 1.58 IN 1.93 SN 2.17 SB 2.06 TE 2.06 I 1.98 XE 2.16 CS 3.43 BA 2.68 PT 1.75 AU 1.66 HH 1.55 TL 1.96 PB 2.02 BI 2.07 PO 1.97 AT 2.02 RN 2.20 FR 3.48 RA 2.83 U 1.86 """ #: Dictionary table with vdw radii, indexed by the element from #: :data:`atomelements`. #: .. SeeAlso:: :func:`MDAnalysis.topology.core.guess_bonds` vdwradii = kv2dict(TABLE_VDWRADII, convertor=float) # fmt: off Z2SYMB = {1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Db', 106: 'Sg', 107: 'Bh', 108: 'Hs', 109: 'Mt', 110: 'Ds', 111: 'Rg', 112: 'Cn', 113: 'Nh', 114: 'Fl', 115: 'Mc', 116: 'Lv', 117: 'Ts', 118: 'Og'} # fmt: on SYMB2Z = {v: k for k, v in Z2SYMB.items()} # Conversion between SYBYL atom types and corresponding elements # Tripos MOL2 file format: # https://web.archive.org/web/*/http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf # fmt: off SYBYL2SYMB = { "H": "H", "H.spc": "H", "H.t3p": "H", "C.3": "C", "C.2": "C", "C.1": "C", "C.ar": "C", "C.cat": "C", "N.3": "N", "N.2": "N", "N.1": "N", "N.ar": "N", "N.am": "N", "N.pl3": "N", "N.4": "N", "O.3": "O", "O.2": "O", "O.co2": "O", "O.spc": "O", "O.t3p": "O", "S.3": "S", "S.2": "S", "S.O": "S", "S.O2": "S", "S.o": "S", "S.o2": "S", # Non-standard but often found in the wild... "P.3": "P", "F": "F", "Li": "Li", "Na": "Na", "Mg": "Mg", "Al": "Al", "Si": "Si", "K": "K", "Ca": "Ca", "Cr.th": "Cr", "Cr.oh": "Cr", "Mn": "Mn", "Fe": "Fe", "Co.oh": "Co", "Cu": "Cu", "Cl": "Cl", "Br": "Br", "I": "I", "Zn": "Zn", "Se": "Se", "Mo": "Mo", "Sn": "Sn", } # fmt: on