import numpy as np
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.SaltRemover import SaltRemover
from rdkit.Chem import AllChem
import isicle
from isicle.interfaces import GeometryInterface
[docs]
class Geometry(GeometryInterface):
"""
Molecule information, specialized for 3D representations. It is not
recommended to manipulate or retrieve attributes of this class without
using class functions.
"""
_defaults = [
"mol",
"basename",
"_energy",
"_shielding",
"_spin",
"_frequency",
"_molden",
"_charge",
"_connectivity",
"_formal_charge",
"_ccs",
]
_default_value = None
def __init__(self, **kwargs):
self.__dict__.update(dict.fromkeys(self._defaults, self._default_value))
self.__dict__.update(kwargs)
@property
def energy(self):
"""
Get total energy of the molecule.
Returns
-------
float
Total energy.
"""
return self._energy
@property
def orbital_energies(self):
"""
Get orbital energies of the molecule.
Returns
-------
:obj:`~pandas.DataFrame`
Orbital energies.
"""
return self._orbital_energies
@property
def shielding(self):
"""
Get orbital energies of the molecule.
Returns
-------
dict
Shielding values.
"""
return self.shielding
@property
def spin(self):
"""
Get spin couplings of the molecule.
Returns
-------
:obj:`~pandas.DataFrame`
Spin couplings.
"""
return self._spin
@property
def frequency(self):
"""
Get frequency values of the molecule.
Returns
-------
dict
Frequency values.
"""
return self._frequency
@property
def molden(self):
"""
Get Molden values of the molecule.
Returns
-------
dict
Frequency values.
"""
return self._molden
@property
def charge(self):
"""
Get per-atoms charges of the molecule.
Returns
-------
list : float
Per-atom charges.
"""
return self._charge
@property
def connectivity(self):
"""
Get molecule connectivity.
Returns
-------
list
Pairwise atom connectivity.
"""
return self._connectivity
@property
def formal_charge(self):
"""
Get formal charge of the molecule.
Returns
-------
int
Formal charge.
"""
return Chem.rdmolops.GetFormalCharge(self.to_mol())
@property
def ccs(self):
"""
Get CCS of the molecule.
Returns
-------
float
Collision cross section.
"""
return self._ccs
[docs]
def view(self):
"""
View internal rdkit mol object representation.
Returns
-------
:obj:`~rdkit.Chem.rdchem.Mol`
RDKit Mol instance.
"""
return self.to_mol()
[docs]
def get_basename(self):
"""
Returns a copy of this object's basename (original filename).
Returns
-------
str
Basename of original filepath.
"""
return self.basename
[docs]
def add___dict__(self, d, override=False):
"""
Accepts a dictionary of values and adds any non-conflicting
information to the attribute dictionary.
Parameters
----------
d : dict
Attribute dictionary.
override : bool
Singal whether to override existing attributes.
"""
if not override:
# Remove keys that are already present in attribute dictionary
[d.pop(k, None) for k in self.__dict__.keys()]
# Add to attributes
self.__dict__.update(d)
[docs]
def get___dict__(self):
"""
Return a copy of this object's attributes as a dictionary.
Returns
-------
dict
Instance attributes.
"""
return self.__dict__.copy()
def __copy__(self, **kwargs):
"""
Return hard copy of this class instance.
Parameters
----------
kwargs
Keyword arguments to update copy.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Copy attributes
d = self.__dict__.copy()
# Safely copy mol if present
if "mol" in d:
d["mol"] = self.to_mol()
# Build self instance from scratch
instance = type(self)(**d)
# Update from kwargs
instance.__dict__.update(kwargs)
return instance
[docs]
def _is_embedded(self, mol):
"""
Check if molecule has embedded 3D coordinates.
Returns
-------
bool
Indication of 3D coordinate presence.
"""
try:
mol.GetConformer().Is3D()
return True
except:
return False
[docs]
def update_coordinates(self, other):
"""
Update atom coordinates using another geometry as reference.
Parameters
----------
other : :obj:`~isicle.geometry.Geometry`
Geometry with reference coordinates.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Get copies of mol object
mol = self.to_mol()
mol_other = other.to_mol()
# Iterate atoms
for i in range(mol.GetNumAtoms()):
# Get coordinates to update
pos_other = mol_other.GetConformer().GetAtomPosition(i)
# Set coordinates
mol.GetConformer().SetAtomPosition(i, pos_other)
# Return fresh instance
return self.__copy__(mol=mol)
[docs]
def addHs(self):
"""
Add implicit hydrogens to molecule.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Get copy of mol object
mol = self.to_mol()
# Add Hs with coordinates
mol = Chem.AddHs(mol, addCoords=True)
# Return new geometry instance
return self.__copy__(mol=mol)
[docs]
def initial_optimize(self, embed=False, forcefield="UFF", ff_iter=200):
"""
Initial molecule optimization by basic force field to establish rough
3D coordinates. Further optimization (molecular dynamics, density
functional theory) recommended.
Parameters
----------
embed: bool
Attempt molecule embedding with eigenvalues of distance matrix.
Failure results in seeding with random coordinates.
forcefield: str
Specify "UFF" for universal force field, "MMFF" or "MMFF94" for
Merck molecular force field 94, or "MMFF94s" for the MMFF94 s variant.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
def _forcefield_selector(forcefield, mw):
"""
Checks if user specified forcefield is compatible with supplied mol
"""
forcefield = forcefield.lower()
if forcefield == "uff":
if Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mw) is True:
return AllChem.UFFOptimizeMolecule
else:
raise ValueError("UFF is not available for all atoms in molecule.")
elif forcefield in ["mmff", "mmff94", "mmff94s"]:
if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mw) is True:
return Chem.rdForceFieldHelpers.MMFFOptimizeMolecule
else:
raise ValueError(
"specified MMFF is not available for all atoms in molecule."
)
else:
raise ValueError(
"RDKit only supports UFF, MMFF, MMFF94, MMFF94s as forcefields."
)
# Get rdkit mol
mol = self.to_mol()
# Embed molecule 3D coordinates
if embed is True:
# Attempt embedding
res = AllChem.EmbedMolecule(mol)
if res == -1:
# Use random coordinates
res = AllChem.EmbedMolecule(mol, useRandomCoords=True)
if res == -1:
raise ValueError("Embedding failure.")
# Optimize according to supplied forcefield
if forcefield is not None:
# Check if embedded
if self._is_embedded(mol):
# Forcefield selection
if "mmff94s" in forcefield.lower():
_forcefield_selector(forcefield, mol)(
mol, mmffVariant=forcefield, maxIters=ff_iter
)
else:
_forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter)
# Not embedded
else:
raise ValueError("Molecule must have embedded 3D coordinates.")
# Return copy with updated mol
return self.__copy__(mol=mol)
[docs]
def desalt(self, salts=None):
"""
Desalts RDKit mol object using Chem.SaltRemover module.
Parameters
----------
salts : str (optional)
Salt type to remove. Ex: 'Cl', 'Br', '[Na+]'. Default: None.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Moleculerepresentation.
"""
# TODO: should this raise an error instead?
# If no salts given, skip desalting
if salts is None:
return self.__copy__()
remover = SaltRemover(defnFormat="smiles", defnData=salts)
# defnData="[Cl,Br,Na]" *sample definition of salts to be removed*
# add iterator for salts listed in config?
# set potential salts to be removed in a config file
mol, deleted = remover.StripMolWithDeleted(self.to_mol())
# using StripMolWithDeleted instead of StripMol
# add functionality to track removed salts
# atomno = res.GetNumAtoms
# if relevant to future use, returns atom count post desalting
return self.__copy__(mol=mol)
[docs]
def neutralize(self):
"""
Neutralizes RDKit mol object using neutralization reactions.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
def _initialize_neutralisation_reactions():
patts = (
# Imidazoles
("[n+;H]", "n"),
# Amines
("[N+;!H0]", "N"),
# Carboxylic acids and alcohols
("[$([O-]);!$([O-][#7])]", "O"),
# Thiols
("[S-;X1]", "S"),
# Sulfonamides
("[$([N-;X2]S(=O)=O)]", "N"),
# Enamines
("[$([N-;X2][C,N]=C)]", "N"),
# Tetrazoles
("[n-]", "[nH]"),
# Sulfoxides
("[$([S-]=O)]", "S"),
# Amides
("[$([N-]C=O)]", "N"),
)
return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y)) for x, y in patts]
reactions = _initialize_neutralisation_reactions()
mol = self.to_mol()
# TODO: `replaced` is unused
replaced = False
for i, (reactant, product) in enumerate(reactions):
while mol.HasSubstructMatch(reactant):
replaced = True
rms = AllChem.ReplaceSubstructs(mol, reactant, product)
mol = rms[0]
return self.__copy__(mol=mol)
[docs]
def tautomerize(self, return_all=False):
"""
Generate tautomers according to RDKit TautomerEnumerator() method.
Parameters
----------
return_all : boolean (optional)
If true, return all tautomers generated. Otherwise, only return
the most common. Default=False
Returns
-------
:obj:`~isicle.geometry.Geometry` or list of :obj:`~isicle.geometry.Geometry`
Tautomer(s) of starting structure.
"""
# source: https://rdkit.blogspot.com/2020/01/trying-out-new-tautomer.html
# Discuss noted double bond changes
enumerator = rdMolStandardize.TautomerEnumerator()
mol = self.to_mol()
res = [mol]
tauts = enumerator.Enumerate(mol)
smis = [Chem.MolToSmiles(x) for x in tauts]
s_smis = sorted((x, y) for x, y in zip(smis, tauts) if x != self.to_smiles())
res += [y for x, y in s_smis]
# Ensure res is a list of mol objects
if return_all:
new_geoms = []
for r in res:
geom = self.__copy__(mol=r)
new_geoms.append(geom)
return new_geoms
return self.__copy__(mol=res[0])
[docs]
def kekulize(self):
"""
Kekulizes the molecule if possible. Otherwise the molecule is not modified.
This is recommended for aromatic molecules undergoing explicit ionization.
Aromatic bonds are explicitly defined and aromatic flags are cleared.
"""
mol = Chem.KekulizeIfPossible(self.to_mol(), clearAromaticFlags=True)
return self.__copy__(mol=mol)
[docs]
def ionize(self, ion_path=None, ion_list=None, method="explicit", **kwargs):
"""
Ionize geometry, using specified list of ions and method of ionization.
Parameters
----------
ion_path : str
Filepath to text file containing ions with charge (eg. `H+`) to be
considered. Either ion_path or ion_list must be specified.
ion_list : list
List of strings of adducts to be considered. Must be specifed in
syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+`. Either ion_path
or ion_list must be specified
method : str
Method of ionization to be used, 'explicit' or 'crest' is accepted
ensembl : bool (optional)
Returns instead a list of adduct geometries
kwargs:
see :meth: `~isicle.adducts.ExplicitIonizationWrapper.submit` or
`~isicle.adducts.CRESTIonizationWrapper.submit`
for more options
Returns
-------
:obj:`~isicle.adducts.IonizationWrapper`
Ionization result.
"""
iw = isicle.adducts.ionize(method).run(
self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs
)
return iw
[docs]
def get_natoms(self):
"""
Calculate total number of atoms.
Returns
-------
int
Number of atoms.
"""
return Chem.Mol.GetNumAtoms(self.to_mol())
[docs]
def get_atom_indices(
self, atoms=["C", "H"], lookup={"C": 6, "H": 1, "N": 7, "O": 8, "F": 9, "P": 15}
):
"""
Extract indices of each atom from the internal geometry.
Parameters
----------
atoms : list of str
Atom types of interest.
lookup : dict
Mapping between atom symbol and atomic number.
Returns
-------
list of int
Atom indices.
"""
atoms = [lookup[x] for x in atoms]
idx = []
for a in self.mol.GetAtoms():
if a.GetAtomicNum() in atoms:
idx.append(a.GetIdx())
return idx
[docs]
def get_total_partial_charge(self):
"""
Sum the partial charge across all atoms.
Returns
-------
float
Total partial charge.
"""
mol = self.to_mol()
AllChem.ComputeGasteigerCharges(mol)
contribs = [
mol.GetAtomWithIdx(i).GetDoubleProp("_GasteigerCharge")
for i in range(mol.GetNumAtoms())
]
return np.nansum(contribs)
[docs]
def dft(self, backend="NWChem", **kwargs):
"""
Perform density functional theory calculations according to supplied task list
and configuration parameters for specified backend.
Parameters
----------
backend : str
Alias for backend selection (NWChem, ORCA).
kwargs
Keyword arguments supplied to selected program. See `run` method of relevant
wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`.
Returns
-------
:obj:`~isicle.qm.{program}Wrapper`
Wrapper object containing relevant outputs from the simulation.
"""
return isicle.qm.dft(self, backend=backend, **kwargs)
[docs]
def md(self, backend="xtb", **kwargs):
"""
Optimize geometry or generate conformers or adducts using stated forcefield.
Parameters
----------
kwargs
Keyword arguments supplied to selected program. See `run` method of relevant
wrapper for configuration parameters, e.g. :meth:`~isicle.md.XTBWrapper.run`.
Returns
-------
:obj:`~isicle.md.{program}Wrapper`
Wrapper object containing relevant outputs from the simulation.
"""
return isicle.md.md(self, backend=backend, **kwargs)
[docs]
def to_mol(self):
"""
Returns :obj:`~rdkit.Chem.rdchem.Mol` instance for this Geometry.
Returns
-------
:obj:`~rdkit.Chem.rdchem.Mol`
RDKit Mol instance.
"""
return self.mol.__copy__()
[docs]
def to_smiles(self):
"""
Get SMILES for this structure.
Returns
-------
str
SMILES string.
"""
return Chem.MolToSmiles(self.to_mol())
[docs]
def to_inchi(self):
"""
Get InChI for this structure.
Returns
-------
str
InChI string.
"""
return Chem.MolToInchi(self.to_mol())
[docs]
def to_smarts(self):
"""
Get SMARTS for this structure.
Returns
-------
str
SMARTS string.
"""
return Chem.MolToSmarts(self.to_mol())
[docs]
def to_xyzblock(self):
"""
Get XYZ text for this structure.
Returns
-------
str
XYZ representation as string.
"""
return Chem.MolToXYZBlock(self.to_mol())
[docs]
def to_pdbblock(self):
"""
Get PDB text for this structure.
Returns
-------
str
PDB representation as string.
"""
return Chem.MolToPDBBlock(self.to_mol())
[docs]
def to_molblock(self):
"""
Get :obj:`~rdkit.Chem.rdchem.Mol` text for this structure.
Returns
-------
str
:obj:`~rdkit.Chem.rdchem.Mol` representation as string.
"""
return Chem.MolToMolBlock(self.to_mol())