Source code for isicle.adducts

import functools
import operator
import os
import pickle
import re

from rdkit import Chem

import isicle
from isicle.interfaces import WrapperInterface
from isicle.md import md
from isicle.utils import safelist


[docs] def _parse_file_ions(path): ''' Read adduct ions (eg. 'Na+', 'H+', 'Mg2+') from file. Parameters ---------- path : str Path to ions text file (.txt, etc.), each ion should be a new line. Returns ------- parsed_contents List of ions from given text file. ''' if path is None: raise TypeError('Path to file containing ions must be specified') if os.path.isfile(path) == True: contents = isicle.geometry._load_text(path) parsed_contents = [line.rstrip() for line in contents] return parsed_contents else: raise TypeError('Path to file is not valid')
[docs] def _parse_list_ions(ion_list): ''' Parse and categorize list of ions by ion type. Input ----- ion_list: list List of all ions to be considered, eg. ['K+','H-','H-Na+'] Strings in list must have notation 'IonCharge' eg. 'H+', not '+H' Complex ions should be arranged in order of desired operation. (i.e. 'H-K+' specifies deprotonatin first, potassium added second) Returns ---------- anions : list List of anions, default None cations : list List of cations, default None complex : list List of complex ions, default None ''' anions = [] cations = [] complex = [] ion_list = safelist(ion_list) for x in ion_list: if ('+' in x) and ('-' in x): complex.append(x) elif '+' in x: cations.append(x) elif '-' in x: anions.append(x) else: raise ValueError('Unrecognized ion specified: {}'.format(x)) return (cations, anions, complex)
[docs] def _parse_ions(ion_path=None, ion_list=None): ''' Parses supplied ion information to recognized format Params ------ ion_path: str Filepath to text file with an ion on each new line For more layout information, see: adducts._parse_file_ions() ion_list: list of str List of ions in format 'ioncharge' (eg. 'H+' or 'Na+') For more information on complex ions, see: adducts._parse_list_ions() Returns ------- Tuple of (cations, anions, complex ions) ''' if ion_path is not None: # Load ion file ion_list = _parse_file_ions(ion_path) if ion_list is not None: # Parse ion file return _parse_list_ions(ion_list) else: raise RuntimeError('No ions supplied to parse.')
[docs] def _check_atom_group(ion_atomic_num): ''' Checks periodic group atom belongs to Params ------ ion_atomic_num: Int Returns ------- Bool ''' return ion_atomic_num in [1, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56, 87, 88]
[docs] def _filter_by_substructure_match(init_mol, unknown_valid_list): ''' Filters ions in supplied list that are a substructure match to the supplied mol Params ------ init_mol: RDKit mol object unknown_valid_list: list of str List of ions (eg. [`H-`]) ''' valid_list = [] for ion in unknown_valid_list: parsed_ion = re.findall('.+?[0-9]?[+-]', ion) subset = [ f"[{re.findall('(.+?)[0-9]?[-]', i)[0]}]" for i in parsed_ion if '-' in i] subset_mol = [Chem.MolFromSmiles(i, sanitize=False) for i in subset] substruc_check = [ i for i in subset_mol if init_mol.HasSubstructMatch(i)] if False in substruc_check: continue else: valid_list.append(ion) return valid_list
[docs] def get_nested_dict_values(d): for v in d.values(): if isinstance(v, dict): yield from get_nested_dict_values(v) else: yield v
[docs] class AdductEnsemble(dict): def __setitem__(self, key, value): self.__dict__[key] = value def __getitem__(self, key): return self.__dict__[key] def __repr__(self): return repr(self.__dict__) def __len__(self): return len(self.__dict__) def __delitem__(self, key): del self.__dict__[key]
[docs] def clear(self): return self.__dict__.clear()
[docs] def copy(self): return self.__dict__.copy()
[docs] def has_key(self, k): return k in self.__dict__
[docs] def update(self, *args, **kwargs): return self.__dict__.update(*args, **kwargs)
[docs] def keys(self): return self.__dict__.keys()
[docs] def values(self): return self.__dict__.values()
[docs] def get_structures(self): values = get_nested_dict_values(self.__dict__) return isicle.conformers.build_conformational_ensemble(values)
[docs] def build_adduct_ensemble(geometries): # Cast to conformational ensemble if not isinstance(geometries, isicle.conformers.ConformationalEnsemble): geometries = isicle.conformers.build_conformational_ensemble( geometries) geometries._check_attributes('ion') geometries._check_attributes('adductID') d = AdductEnsemble() for adduct in geometries: if adduct.ion not in d.keys(): d[adduct.ion] = {} d[adduct.ion][adduct.adductID] = adduct return d
[docs] def ionize(ion_method): ''' Selects a supported ionization method. Parameters ---------- ion_method : str Alias for ion method selection (e.g. explicit). Returns ------- program Wrapped functionality of the selected program. Must implement :class:`~isicle.interfaces.AdductInterface`. ''' ion_method_map = {'explicit': ExplicitIonizationWrapper, 'crest': CRESTIonizationWrapper} if ion_method.lower() in ion_method_map.keys(): return ion_method_map[ion_method.lower()]() else: raise ValueError( '{} not a supported ionization method.'.format(ion_method))
[docs] def write(IonizationWrapper, path, fmt): ''' Write mol objects to file. Parameters ---------- path : str Directory to write output files fmt : str Format in which to save the RDKit mol object (eg. 'mol2', 'pdb') ''' if path is not None and fmt is not None: for adduct in IonizationWrapper.adducts: isicle.geometry.Geometry.save(adduct.mol, os.path.join( path, '{}_{}'.format(adduct.basename, adduct.ion, adduct.adductID)), fmt) return elif path is not None: raise ( 'path passed to isicle.adducts.write; fmt flag must also be passed; data not written') elif fmt is not None: raise ( 'fmt is supplied to isicle.adducts.write; path flag must also be passed; data not saved') else: return
[docs] class ExplicitIonizationWrapper(WrapperInterface): ''' ''' _defaults = ('geom', 'adducts') _default_value = None def __init__(self, **kwargs): ''' Initialize :obj:`~isicle.adducts.ExplicitIonizationWrapper` instance. ''' self.__dict__.update(dict.fromkeys( self._defaults, self._default_value)) self.__dict__.update(**kwargs) if self.adducts is None: self.adducts = {}
[docs] def set_geometry(self, geom): ''' Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters ---------- geometry : :obj:`~isicle.geometry.Geometry` Molecule representation. ''' if self.__dict__.get('geom') is not None: pass elif self.__dict__.get('mol') is not None: self.geom = geom else: raise ValueError('Could not find self.mol or self.geom')
[docs] def _set_ions(self, ion_path=None, ion_list=None): ''' ''' cations, anions, complex = _parse_ions( ion_path=ion_path, ion_list=ion_list) # Cast to list safely self.adducts['cations'] = safelist(cations) self.adducts['anions'] = safelist(anions) self.adducts['complex'] = safelist(complex)
[docs] def _check_valid(self): ''' Perform substructure search. ''' self.adducts['anions'] = _filter_by_substructure_match( self.geom.mol, self.adducts['anions']) self.adducts['complex'] = _filter_by_substructure_match( self.geom.mol, self.adducts['complex'])
[docs] def configure(self, ion_path=None, ion_list=None): ''' ''' self._set_ions(ion_path=ion_path, ion_list=ion_list) self._check_valid()
[docs] def _modify_atom_dict(self, init_mol, ion_atomic_num, mode=None, include_Alkali_ne=False): ''' Downselect atom sites that can accept ions Parameters ---------- init_mol: RDKit mol object ion_atomic_num: Int Integer denoting the atomic number of the ion to be added or removed mode: str 'positive' for positive mode 'negative' for negative mode include_Alkaline_ne: Bool If False: removes alkali and alkaline metals from base atoms to be ionized (default) If True: Adducts are attempted to be formed at these atoms, in addition to other atoms. Returns ------- all_atom_dict defined as {<atom.rdkit.obj>:['C',3,2,1]} -atom.GetSymbol() returns atomic symbol e.g. 'C' -atom.GetTotalValence() returns total (explicit+implicit) valence -atom.GetDegree() returns number of directly-bonded neighbours indep. of bond order, dep. of Hs set to explicit -atom.GetTotalNumHs returns total (explicit+implicit) Hs on atom ''' pt = Chem.GetPeriodicTable() all_atom_dict = {atom.GetIdx(): [atom.GetSymbol(), atom.GetTotalValence() - atom.GetDegree(), atom.GetTotalNumHs(includeNeighbors=True), atom] for atom in init_mol.GetAtoms()} if mode == 'positive': atom_dict = {} for key, value in all_atom_dict.items(): # Check if atom atomic number is the same as the specified ion atom_atomic_num = pt.GetAtomicNumber(value[0]) if include_Alkali_ne == False: # Remove ion sites that are alkali or alkaline metals if _check_atom_group(atom_atomic_num): continue # Remove atoms that cannot accept a bond & do not have H to bump elif value[1] == 0 and value[2] == 0: continue atom_dict[key] = value return atom_dict elif mode == 'negative': atom_dict = {} for key, value in all_atom_dict.items(): # Check if ion atomic num is in list of neighbouring atomic nums nbrs = [nbr.GetAtomicNum() for nbr in value[3].GetNeighbors()] if ion_atomic_num in nbrs: atom_dict[key] = value else: continue return atom_dict elif mode == None: raise ValueError( 'Must supply an ionization mode to _modify_atom_dict')
[docs] def _subset_atom_dict(self, atom_dict, index_list=None, element_list=None): ''' Downselect atom sites to user supplied atom indices or element symbols Params ------ atom_dict: Dict Dictionary of format {<atom.rdkit.obj>:[atom Symbol, atom Total Valence, atom Degree, # bonded H atoms]} index_list: list list of integers denoting IUPAC atom indexes of base atoms to be ionized (eg. [0,1]) element_list: list list of atoms symbols denoting base atom types to be ionized (eg. ['O','N']) Returns ------- subset_atom_dict defined as {<atom.rdkit.obj>:['C',3,2,1]} -atom.GetSymbol() returns atomic symbol e.g. 'C' -atom.GetTotalValence() returns total (explicit+implicit) valence -atom.GetDegree() returns number of directly-bonded neighbours indep. of bond order, dep. of Hs set to explicit -atom.GetTotalNumHs returns total (explicit+implicit) Hs on atom ''' subset_atom_dict = {} if index_list is not None: for k, v in atom_dict.items(): if k in index_list: subset_atom_dict[k] = v if element_list is not None: for k, v in atom_dict.items(): if v[0] in element_list: subset_atom_dict[k] = v return subset_atom_dict
[docs] def _forcefield_selector(self, forcefield, mw): ''' Checks if user specified forcefield is compatible with supplied mol Parameters ---------- forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant mw: RDKit mol object Returns ------- RDKit forcefield optimization function that must be implemented ''' forcefield = forcefield.lower() if forcefield == 'uff': if Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mw) is True: return Chem.AllChem.UFFOptimizeMolecule else: raise ValueError( 'UFF is not available for all atoms in molecule.') elif forcefield in ['mmff', 'mmff94', 'mmff94s']: if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mw) is True: return Chem.rdForceFieldHelpers.MMFFOptimizeMolecule else: raise ValueError( 'specified MMFF is not available for all atoms in molecule.') else: raise ValueError( 'RDKit only supports UFF, MMFF, MMFF94, MMFF94s as forcefields.')
# TODO: update this
[docs] def _update_geometry_charge(self, geom): ''' ''' geom.__dict__.update(charge=geom.formal_charge)
[docs] def _add_ion(self, init_mol, base_atom_idx, ion_atomic_num, sanitize, forcefield, ff_iter): ''' Adds specified ion (by atomic num) to specified base atom (by atom's index). Parameters ---------- init_mol: RDKit mol object base_atom_idx: Int Integer denoting the the atom to which an ion will be added ion_atomic_num: Int Integer denoting the atomic number of the ion to be added sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield Returns _______ Ionized RDKit mol object ''' mw = Chem.RWMol(init_mol) atom_idx = mw.AddAtom(Chem.Atom(ion_atomic_num)) ba = mw.GetAtomWithIdx(base_atom_idx) fc = ba.GetFormalCharge() mw.AddBond(base_atom_idx, atom_idx, Chem.BondType.SINGLE) ba.SetFormalCharge(fc+1) mw.UpdatePropertyCache(strict=False) if sanitize is True: Chem.SanitizeMol(mw) if forcefield: tempff = self._forcefield_selector(forcefield, mw) if 'mmff94s' in forcefield: tempff(mw, mmffVariant=forcefield, maxIters=ff_iter) else: tempff(mw, maxIters=ff_iter) geom = isicle.geometry.Geometry( mol=mw, basename=self.geom.get_basename()) self._update_geometry_charge(geom) return geom
[docs] def _apply_add_ion(self, init_mol, atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter): ''' Iteratively ionize all atoms in a supplied dictionary. Parameters ---------- init_mol: RDKit mol object atom_dict: Dict Dictionary of format {<atom.rdkit.obj>:[atom Symbol, atom Total Valence, atom Degree, # bonded H atoms]} ion_atomic_num: Int Integer denoting the atomic number of the ion to be added sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield Returns ------- Dictionary of style {base atom index: ionized RDKit mol object} ''' mol_dict = {} for key in atom_dict.keys(): mw = self._add_ion(init_mol, key, ion_atomic_num, sanitize, forcefield, ff_iter) # Format {atom_base_idx:<newmol>} mol_dict[key] = mw return mol_dict
[docs] def _positive_mode(self, init_mol, ion_atomic_num, batch, index_list, element_list, sanitize, forcefield, ff_iter, include_Alkali_ne): ''' Subsets atoms in supplied mol by specified index or elemental constraints, or feasibilty based upon supplied ion atomic number. Parameters ---------- init_mol: RDKit mol object ion_atomic_num: Int Integer denoting the atomic number of the ion to be added batch: Bool Whether adducts should be formed from all possible heavy atoms index_list: list list of integers denoting IUPAC atom indexes of base atoms to be ionized (eg. [0,1]) element_list: list list of atoms symbols denoting base atom types to be ionized (eg. ['O','N']) sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield include_Alkaline_ne: Bool If False: removes alkali and alkaline metals from base atoms to be ionized If True: Adducts are attempted to be formed at these atoms, in addition to other atoms. Returns ------- Dictionary of style {base atom index: ionized RDKit mol object} ''' atom_dict = self._modify_atom_dict( init_mol, ion_atomic_num, mode='positive', include_Alkali_ne=include_Alkali_ne) if index_list is not None: subset_atom_dict = self._subset_atom_dict( atom_dict, index_list=index_list) mol_dict = self._apply_add_ion(init_mol, subset_atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter) elif element_list is not None: subset_atom_dict = self._subset_atom_dict( atom_dict, element_list=element_list) mol_dict = self._apply_add_ion(init_mol, subset_atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter) elif batch == True: mol_dict = self._apply_add_ion( init_mol, atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter) else: raise ValueError( 'Must provide a list of atom indexes, list of elements, or set batch=True.') return mol_dict
[docs] def _remove_ion(self, init_mol, base_atom_idx, ion_atomic_num, sanitize, forcefield, ff_iter): ''' Removes specified ion (by atomic num) from specified base atom (by atom's index). Parameters ---------- init_mol: RDKit mol object base_atom_idx: Int Integer denoting the the atom from which an ion will be removed from ion_atomic_num: Int Integer denoting the atomic number of the ion to be removed sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield Returns ------- Deionized RDKit mol object ''' mw = Chem.RWMol(init_mol) ba = mw.GetAtomWithIdx(base_atom_idx) fc = ba.GetFormalCharge() nbrs_dict = {nbr.GetAtomicNum(): nbr.GetIdx() for nbr in ba.GetNeighbors()} mw.RemoveAtom(nbrs_dict[ion_atomic_num]) ba.SetFormalCharge(fc-1) mw.UpdatePropertyCache(strict=False) if sanitize is True: Chem.SanitizeMol(mw) if forcefield: tempff = self._forcefield_selector(forcefield, mw) if 'mmff94s' in forcefield: tempff(mw, mmffVariant=forcefield, maxIters=ff_iter) else: tempff(mw, maxIters=ff_iter) geom = isicle.geometry.Geometry( mol=mw, basename=self.geom.get_basename()) self._update_geometry_charge(geom) return geom
[docs] def _apply_remove_ion(self, init_mol, atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter): ''' Iteratively deionize all atoms in a supplied dictionary. Parameters ---------- init_mol: RDKit mol object atom_dict: Dict Dictionary of format {<atom.rdkit.obj>:[atom Symbol, atom Total Valence, atom Degree, # bonded H atoms]} ion_atomic_num: Int Integer denoting the atomic number of the ion to be removed sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield Returns ------- Dictionary of style {base atom index: deionized RDKit mol object} ''' mol_dict = {} for key, value in atom_dict.items(): mw = self._remove_ion( init_mol, key, ion_atomic_num, sanitize, forcefield, ff_iter) mol_dict[key] = mw return mol_dict
[docs] def _negative_mode(self, init_mol, ion_atomic_num, batch, index_list, element_list, sanitize, forcefield, ff_iter, include_Alkali_ne): ''' Subsets atoms in supplied mol by specified index or elemental constraints, or feasibilty based upon supplied ion atomic number. Parameters ---------- init_mol: RDKit mol object ion_atomic_num: Int Integer denoting the atomic number of the ion to be removed batch: Bool Whether adducts should be formed from all possible heavy atoms index_list: list list of integers denoting IUPAC atom indexes of base atoms to be deionized (eg. [0,1]) element_list: list list of atoms symbols denoting base atom types to be ionized (eg. ['O','N']) sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify "UFF" for Universal Force Field optimization by RDKit Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 Specify "MMFF94s" for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield include_Alkaline_ne: Bool If False: removes alkali and alkaline metals from base atoms to be deionized If True: Adducts are attempted to be formed at these atoms, in addition to other atoms. Returns ------- Dictionary of style {base atom index: deionized RDKit mol object} ''' atom_dict = self._modify_atom_dict( init_mol, ion_atomic_num, mode='negative', include_Alkali_ne=include_Alkali_ne) if index_list is not None: subset_atom_dict = self._subset_atom_dict( atom_dict, index_list=index_list) mol_dict = self._apply_remove_ion( init_mol, subset_atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter) elif element_list is not None: subset_atom_dict = self._subset_atom_dict( atom_dict, element_list=element_list) mol_dict = self._apply_remove_ion( init_mol, subset_atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter) elif batch == True: mol_dict = self._apply_remove_ion( init_mol, atom_dict, ion_atomic_num, sanitize, forcefield, ff_iter) else: raise ValueError( 'Must provide a list of atom indexes, list of elements, or set batch=True.') return mol_dict
[docs] def submit(self, batch=True, index_list=None, element_list=None, sanitize=True, forcefield='UFF', ff_iter=200, include_Alkali_ne=False): ''' Calls positive or negative ionization modes based upon supplied ion list. Parameters ---------- batch: Bool Whether adducts should be formed from all possible heavy atoms index_list: list list of integers denoting IUPAC atom indexes of base atoms to be ionized (eg. [0,1]) element_list: list list of atoms symbols denoting base atom types to be ionized (eg. [`O`,`N`]) sanitize: Bool Kekulize, check valencies, set aromaticity, conjugation and hybridization forcefield: str Specify `UFF` for Universal Force Field optimization by RDKit (default) Specify `MMFF` or `MMFF94` for Merck Molecular Force Field 94 Specify `MMFF94s` for the MMFF94 s variant ff_iter: Int Integer specifying the max iterations for the specified forcefield (200 default) include_Alkaline_ne: Bool If False: removes alkali and alkaline metals from base atoms to be ionized (default) If True: Adducts are attempted to be formed at these atoms, in addition to other atoms. Returns ------- Dictionary of style {base atom index: ionized RDKit mol object} ''' pt = Chem.GetPeriodicTable() anion_dict = {} cation_dict = {} complex_dict = {} if index_list is not None: index_list = safelist(index_list) if element_list is not None: element_list = safelist(element_list) for x in self.adducts['cations']: ion_atomic_num = pt.GetAtomicNumber( re.findall('(.+?)[0-9]?[+]', x)[0]) mol_dict = self._positive_mode(self.geom.mol, ion_atomic_num, batch, index_list, element_list, sanitize, forcefield, ff_iter, include_Alkali_ne) cation_dict[x] = list(mol_dict.values()) for x in self.adducts['anions']: ion_atomic_num = pt.GetAtomicNumber( re.findall('(.+?)[0-9]?[-]', x)[0]) mol_dict = self._negative_mode(self.geom.mol, ion_atomic_num, batch, index_list, element_list, sanitize, forcefield, ff_iter, include_Alkali_ne) anion_dict[x] = list(mol_dict.values()) for x in self.adducts['complex']: # This self-determined charge doesn't account for K(2+) or CO2- type adducts parsed = re.findall('.*?[+-]', x) # TODO strip whitespace geom_list = safelist(self.geom) for ion in parsed: ion_atomic_num = pt.GetAtomicNumber( re.findall('(.+?)[0-9]?[+-]', ion)[0]) if ('+') in ion: res = list(map(lambda geom: list(self._positive_mode(geom.mol, ion_atomic_num, batch, index_list, element_list, sanitize, forcefield, ff_iter, include_Alkali_ne).values()), geom_list)) complex_dict[x] = functools.reduce( operator.iconcat, res, []) elif ('-') in ion: res = list(map(lambda geom: list(self._negative_mode(geom.mol, ion_atomic_num, batch, index_list, element_list, sanitize, forcefield, ff_iter, include_Alkali_ne).values()), geom_list)) complex_dict[x] = functools.reduce( operator.iconcat, res, []) geom_list = complex_dict.get(x) # Combine ion dictionaries and index ensemble = [] for k, v in {**cation_dict, **anion_dict, **complex_dict}.items(): id = 0 for geom in v: geom.__dict__.update(ion=k, adductID=id) ensemble.append(geom) id += 1 self.adducts = build_adduct_ensemble(ensemble)
[docs] def finish(self): ''' Parse results. Returns ------- :obj:`~isicle.adducts.ExplicitIonizationWrapper` ''' return self
[docs] def run(self, geom, ion_path=None, ion_list=None, **kwargs): ''' Ionize geometry via with supplied geometry and file containing list of ions. Parameters ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. ion_path : str Filepath to text file containing ions with charge (eg. `H+`) to be considered Either ion_path or ion_list must be specified ion_list : list List of strings of adducts to be considered. Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` Either ion_path or ion_list must be specified **kwargs Keyword arguments to configure how ionization is run. See :meth:`~isicle.adducts.ExplicitIonizationWrapper.submit`. Returns ------- :obj:`~isicle.adducts.ExplicitIonizationWrapper` ''' # New instance self = ExplicitIonizationWrapper(**geom.__dict__) # Sets geom to self.geom self.set_geometry(geom) # Load specified ions by type # Validity check if negative ionization can be done self.configure(ion_path=ion_path, ion_list=ion_list) # Generate adducts self.submit(**kwargs) # Finish self.finish() return self
[docs] def get_structures(self): ''' Extract all structures from containing object as a conformational ensemble. Returns ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. ''' return self.adducts.get_structures()
[docs] def get_adducts(self): ''' Extract all structures from containing object as an adduct ensemble. Returns ------- :obj:`~isicle.adducts.AdductEnsemble` Adduct ensemble. ''' return self.adducts
[docs] def save_pickle(self, path): ''' Pickle this class instance. Parameters ---------- path : str Path to save pickle object to. ''' with open(path, 'wb') as f: pickle.dump(self, f)
[docs] def save(self, path): ''' Save this class instance. Parameters ---------- path : str Path to save object to. ''' self.save_pickle(path)
[docs] class CRESTIonizationWrapper(WrapperInterface): ''' ''' _defaults = ('geom', 'adducts') _default_value = None def __init__(self, **kwargs): self.__dict__.update(dict.fromkeys( self._defaults, self._default_value)) self.__dict__.update(**kwargs) if self.adducts is None: self.adducts = {}
[docs] def set_geometry(self, geom): ''' Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. ''' if self.__dict__.get('geom') is not None: pass elif self.__dict__.get('mol') is not None: self.geom = geom elif self.__dict__.get('xyz') is not None: self.geom = geom else: raise ValueError('Could not find self.xyz, self.mol, or self.geom')
[docs] def _set_ions(self, ion_path=None, ion_list=None): cations, anions, complex = _parse_ions( ion_path=ion_path, ion_list=ion_list) # cast to list safely self.adducts['cations'] = safelist(cations) self.adducts['anions'] = safelist(anions) self.adducts['complex'] = safelist(complex)
[docs] def _filter_supported_by_xtb(self, unknown_valid_list): ''' Filters ions by what is supported by xtb software. consult https://github.com/grimme-lab/crest/issues/2 for supported ions by xtb CREST xtb supports alkali and alkaline earth metals. ''' pt = Chem.GetPeriodicTable() valid_list = [] for ion in unknown_valid_list: parsed_ion = re.findall('(.+?)[0-9]?[+-]', ion) if len(parsed_ion) == 0: raise ValueError( f"Couldn't identify supplied ion {ion} in _filter_supported_by_xtb") else: parsed_num = [pt.GetAtomicNumber(i) for i in parsed_ion] group_check = [_check_atom_group(i) for i in parsed_num] if False in group_check: continue else: valid_list.append(ion) return valid_list
[docs] def _check_valid(self): ''' Performs substructure search and checks if supported by CREST documentation ''' self.adducts['cations'] = self._filter_supported_by_xtb( self.adducts['cations']) self.adducts['anions'] = self._filter_supported_by_xtb( self.adducts['anions']) self.adducts['complex'] = self._filter_supported_by_xtb( self.adducts['complex']) self.adducts['anions'] = _filter_by_substructure_match( self.geom.mol, self.adducts['anions']) self.adducts['complex'] = _filter_by_substructure_match( self.geom.mol, self.adducts['complex'])
[docs] def configure(self, ion_path=None, ion_list=None): self._set_ions(ion_path=ion_path, ion_list=ion_list) self._check_valid()
[docs] def _parse_ion_charge(self, ion): ''' ''' # TODO update with MSAC code for ion charge lookup table charge = re.findall('[0-9]', ion) if not charge: charge = 1 else: charge = int(charge[0]) return charge
# TODO: update this
[docs] def _update_geometry_charge(self, geom, adduct, ion_charge, mode): ''' ''' if mode == 'negative': charge = geom.formal_charge - ion_charge elif mode == 'positive': charge = geom.formal_charge + ion_charge adduct.__dict__.update(charge=charge)
[docs] def _positive_mode(self, geom, forcefield, ewin, cation, optlevel, dryrun, processes, solvation, ignore_topology): ''' Call isicle.md.md for specified geom and cation ''' output = md(geom, program='xtb', task='protonate', forcefield=forcefield, ewin=ewin, ion=cation, optlevel=optlevel, dryrun=dryrun, processes=processes, solvation=solvation, ignore_topology=ignore_topology) ion_charge = self._parse_ion_charge(cation) for adduct in output.geom: self._update_geometry_charge(geom, adduct, ion_charge, 'positive') return output.geom
[docs] def _negative_mode(self, geom, forcefield, ewin, anion, optlevel, dryrun, processes, solvation, ignore_topology): ''' Call isicle.md.md for specified geom and anion ''' output = md(geom, program='xtb', task='deprotonate', forcefield=forcefield, ewin=ewin, ion=anion, optlevel=optlevel, dryrun=dryrun, processes=processes, solvation=solvation, ignore_topology=ignore_topology) ion_charge = self._parse_ion_charge(anion) for adduct in output.geom: self._update_geometry_charge(geom, adduct, ion_charge, 'negative') return output.geom
[docs] def submit(self, forcefield='gfn2', ewin=30, optlevel='Normal', dryrun=False, processes=1, solvation=None, ignore_topology=False): ''' Call positive_mode and negative_mode to ionize according to parsed ion lists isicle.md.md returned by both modes to self.anion, self.cation, self.complex \ in form of {ion : xyz object [returned by xtb]} Input ----- see isicle.md.md for forcefield, ewin, optlevel, dryrun molecular_charge is net charge of structure pre-ionization, default neutral, 0 ''' anion_dict = {} cation_dict = {} complex_dict = {} for x in self.adducts['cations']: cation_dict[x] = self._positive_mode( self.geom, forcefield, ewin, x, optlevel, dryrun, processes, solvation, ignore_topology) for x in self.adducts['anions']: anion_dict[x] = self._negative_mode( self.geom, forcefield, ewin, x, optlevel, dryrun, processes, solvation, ignore_topology) for x in self.adducts['complex']: # This self-determined charge doesn't account for K(2+) or CO2- type adducts parsed = re.findall('.*?[+-]', x) # TODO strip whitespace geom_list = safelist(self.geom) for ion in parsed: if ('+') in ion: # complex_dict[x] = res = list(map(lambda geom: self._positive_mode(geom, forcefield, ewin, ion, optlevel, dryrun, processes, solvation, ignore_topology), geom_list)) complex_dict[x] = functools.reduce( operator.iconcat, res, []) elif ('-') in ion: res = list(map(lambda geom: self._negative_mode(geom, forcefield, ewin, ion, optlevel, dryrun, processes, solvation, ignore_topology), geom_list)) complex_dict[x] = functools.reduce( operator.iconcat, res, []) geom_list = complex_dict.get(x) # Combine ion dictionaries and index ensemble = [] for k, v in {**cation_dict, **anion_dict, **complex_dict}.items(): id = 0 for geom in v: geom.__dict__.update(ion=k, adductID=id) ensemble.append(geom) id += 1 self.adducts = build_adduct_ensemble(ensemble)
[docs] def finish(self): ''' Parse results. Returns ------- :obj:`~isicle.adducts.CRESTIonizationWrapper` ''' return self
[docs] def run(self, geom, ion_path=None, ion_list=None, **kwargs): ''' Ionize geometry via with supplied geometry and file containing list of ions. Parameters ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. ion_method : str Alias for ionaztion method selection (explicit). ion_list : list List of ion str. See :meth`~isicle.adducts.parse_ions`. **kwargs Keyword arguments to configure how ionization is run. See :meth:`~isicle.adducts.CRESTIonizationWrapper.generator` Returns ------- :obj:`~isicle.adducts.CRESTIonizationWrapper` ''' self = CRESTIonizationWrapper(**geom.__dict__) self.set_geometry(geom) # Load specified ions by type # Validity check if negative ionization can be done # Validity check if CREST supports the ions specified self.configure(ion_path=ion_path, ion_list=ion_list) # Generate adducts self.submit(**kwargs) # Compile various adducts into one dictionary: self.adducts self.finish() return self
[docs] def get_structures(self): ''' Extract all structures from containing object as a conformational ensemble. Returns ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. ''' return self.adducts.get_structures()
[docs] def get_adducts(self): ''' Extract all structures from containing object as an adduct ensemble. Returns ------- :obj:`~isicle.adducts.AdductEnsemble` Adduct ensemble. ''' return self.adducts
[docs] def save_pickle(self, path): ''' Pickle this class instance. Parameters ---------- path : str Path to save pickle object to. ''' with open(path, 'wb') as f: pickle.dump(self, f)
[docs] def save(self, path): ''' Save this class instance. Parameters ---------- path : str Path to save object to. ''' self.save_pickle(path)