import os
import pickle
from io import StringIO
import joblib
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdDetermineBonds
import isicle
[docs]
def _load_text(path: str):
"""
Load text from file.
Parameters
----------
path : str
Path to text file.
Returns
-------
list
Lines from given text file.
"""
# Read file contents
with open(path, "r") as f:
contents = f.readlines()
# Strip each line
return [x.strip() for x in contents]
[docs]
def load_xyz(path):
"""
Load XYZ from file.
Parameters
----------
path : str
Path to XYZ file.
Return
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# # Check for charge
# if charge is None:
# raise ValueError("Charge must be specified when loading XYZ files.")
# Create mol object
raw_mol = Chem.MolFromXYZFile(path)
mol = Chem.Mol(raw_mol)
# rdDetermineBonds.DetermineBonds(mol, charge=charge)
# Basename
basename = os.path.splitext(os.path.basename(path))[0]
# Initialize Geometry instance
geom = isicle.geometry.Geometry(mol=mol, basename=basename)
return geom
[docs]
def _check_mol(mol, string_struct):
"""
Check if mol failed to generate. If so, throw error.
Parameters
----------
mol : :obj:`~rdkit.Chem.rdchem.Mol'
RDKit representation of molecule structure.
string_struct : str
Input used to initialize Mol object.
"""
if mol is None:
raise ValueError("Could not convert structure to mol: {}".format(string_struct))
[docs]
def _load_mol_from_file(path, func=None):
"""
Load RDKit mol representation from file (pdb, mol, mol2).
Parameters
----------
path : str
Path to supported file.
func : onj
basename : str
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Load mol representation
if func == Chem.MolFromMolFile:
mol = func(path, removeHs=False, strictParsing=False)
else:
mol = func(path, removeHs=False)
# Check result
_check_mol(mol, path)
# Populate basename
basename = os.path.splitext(os.path.basename(path))[0]
# Initialize geometry instance
geom = isicle.geometry.Geometry(mol=mol, basename=basename)
return geom
[docs]
def load_mol(path):
"""
Load mol from file.
Parameters
----------
path : str
Path to mol file.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
return _load_mol_from_file(path, func=Chem.MolFromMolFile)
[docs]
def load_mol2(path: str):
"""
Load mol2 from file.
Parameters
----------
path : str
Path to mol2 file.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
return _load_mol_from_file(path, func=Chem.MolFromMol2File)
[docs]
def load_pdb(path):
"""
Load PDB from file.
Parameters
----------
path : str
Path to PDB file.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
return _load_mol_from_file(path, func=Chem.MolFromPDBFile)
[docs]
def _load_line_notation(path, func=None, force=False, string=False):
"""
Load line notation representation (InChI, SMILES) from file.
Parameters
----------
path : str
Path to file
force : bool
Indicate whether to force load input, ignoring errors.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
basename = None
if string:
# Load text
text = path
else:
# Load text
text = _load_text(path)[0].strip()
# Extract basename
basename = os.path.splitext(os.path.basename(path))[0]
# Load without sanitization, downstream checks
if force is True:
mol = func(text, sanitize=False)
_check_mol(mol, text)
mol.UpdatePropertyCache(strict=False)
# Safely load
else:
mol = func(text)
_check_mol(mol, text)
# Add explicit hydrogens
mol = Chem.AddHs(mol)
_check_mol(mol, text)
if basename is None:
basename = Chem.MolToInchiKey(mol)
# Initialize geometry instance
geom = isicle.geometry.Geometry(mol=mol, basename=basename)
return geom
[docs]
def load_smiles(path, force=False):
"""
Load SMILES from file.
Parameters
----------
path : str
Path to file.
force : bool
Indicate whether to force load input, ignoring errors.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
extension = os.path.splitext(path)[-1].lower()
if "smi" in extension:
return _load_line_notation(path, func=Chem.MolFromSmiles, force=force)
else:
return _load_line_notation(
path, func=Chem.MolFromSmiles, force=force, string=True
)
[docs]
def load_inchi(path, force=False):
"""
Load InChI from file.
Parameters
----------
path : str
Path to file.
force : bool
Indicate whether to force load input, ignoring errors.
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
if "inchi=" in path.lower():
return _load_line_notation(
path, func=Chem.MolFromInchi, force=force, string=True
)
else:
return _load_line_notation(path, func=Chem.MolFromInchi, force=force)
[docs]
def load_pickle(path):
"""
Load pickled file.
Parameters
----------
path : str
Path to pickle.
Returns
-------
data
Previously pickled object instance.
"""
# Load file
with open(path, "rb") as f:
return pickle.load(f)
[docs]
def load_joblib(path):
"""
Load joblib file.
Parameters
----------
path : str
Path to pickle.
Returns
-------
data
Previously pickled object instance.
"""
# Load file
with open(path, "rb") as f:
return joblib.load(f)
[docs]
def _check_mol_obj(mol_obj):
""" """
if isinstance(mol_obj, Chem.Mol):
return
else:
raise IOError("Not a valid RDKit Mol object passed.")
[docs]
def load_mol_obj(mol_obj):
"""
Load RDKit mol object into geometry instance
Parameters
----------
mol_obj : mol
RDKit mol object
Returns
-------
:obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Validate mol object
_check_mol_obj(mol_obj)
# Initialize geometry instance
geom = isicle.geometry.Geometry(mol=mol_obj, basename=Chem.MolToInchiKey(mol_obj))
return geom
[docs]
def load(path, **kwargs):
"""
Reads in molecule information of the following supported file types:
.smi, .inchi, .xyz, .mol, .mol2, .pkl, .pdb. Direct loaders can also
be used, see load_* functions for more information.
Parameters
----------
path : str
Path to file with molecule information.
kwargs
Keyword arguments passed to format-specific loaders.
Returns
-------
:obj:`~isicle.geometry.Geometry` or :obj:`~isicle.geometry.XYZGeometry`
Molecule representation.
"""
if isinstance(path, str):
path = path.strip()
extension = os.path.splitext(path)[-1].lower()
if extension == ".pkl":
return load_pickle(path)
if "mol2" in extension:
return load_mol2(path)
if "mol" in extension:
return load_mol(path)
if extension == ".joblib":
return load_joblib(path)
if extension == ".xyz":
return load_xyz(path, **kwargs)
if extension == ".pdb":
return load_pdb(path)
if extension == ".inchi" or "inchi=" in path.lower():
return load_inchi(path, **kwargs)
try:
return load_smiles(path, **kwargs)
except:
raise IOError("Extension {} not recognized.".format(extension))
else:
try:
return load_mol_obj(path)
except:
raise IOError("Not a valid RDKit mol object passed.")
[docs]
def save_xyz(path, geom):
"""
Save molecule geometry as XYZ file.
Parameters
----------
path : str
Path to output file.
geom : :obj:`~isicle.geometry.Geometry`
or :obj:`~isicle.geometry.XYZGeometry`
Molecule representation.
"""
# Check instance type
if not isinstance(geom, isicle.geometry.Geometry):
raise TypeError("Must be `isicle.geometry.Geometry` to save in XYZ format.")
# Write to file
with open(path, "w") as f:
f.write(geom.to_xyzblock())
[docs]
def save_joblib(path, data):
"""
Save object as joblib file.
Parameters
----------
path : str
Path to output file.
data : object
Aribtrary object instance.
"""
with open(path, "wb") as f:
joblib.dump(data, f)
[docs]
def save_pickle(path, data):
"""
Save object as pickle file.
Parameters
----------
path : str
Path to output file.
data : object
Aribtrary object instance.
"""
with open(path, "wb") as f:
pickle.dump(data, f)
[docs]
def save_mfj(path, geom):
"""
Save molecule geometry as MFJ file. Must have energy and charge information.
Parameters
----------
path : str
Path to output file.
geom : :obj:`~isicle.geometry.Geometry`
or :obj:`~isicle.geometry.XYZGeometry`
Molecule representation.
"""
# Check instance type
if not isinstance(geom, isicle.geometry.Geometry):
raise TypeError("Must be `isicle.geometry.Geometry` to save in XYZ format.")
# Check for charges in global properties
if (geom.energy is None) or (geom.charge is None):
raise KeyError("DFT energy calculation required. See `isicle.qm.dft`.")
# Get XYZ coordinates
xyz = pd.read_csv(
StringIO(geom.to_xyzblock()),
skiprows=2,
header=None,
sep="\s+",
names=["Atom", "x", "y", "z"],
)
# Extract and append charges
xyz["Charge"] = geom.charge
# Load masses and merge
masses = isicle.utils.atomic_masses()[["Symbol", "Mass"]]
mfj = pd.merge(xyz, masses, left_on="Atom", right_on="Symbol")
# Rename columns
mfj = mfj[["x", "y", "z", "Mass", "Charge"]].astype(float)
# Write to file
with open(path, "w") as f:
f.write(os.path.splitext(os.path.basename(path))[0] + "\n")
f.write("1\n")
f.write(str(len(mfj.index)) + "\n")
f.write("ang\n")
f.write("calc\n")
f.write("1.000\n")
for row in mfj.values:
f.write("\t".join([str(x) for x in row]) + "\n")
[docs]
def save_smiles(path, geom):
"""
Save molecule geometry as SMILES file.
Parameters
----------
path : str
Path to output file.
geom : :obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Check instance type
if not isinstance(geom, isicle.geometry.Geometry):
raise TypeError("Must be `isicle.geometry.Geometry` to save in SMILES format.")
# Write
with open(path, "w") as f:
f.write(geom.to_smiles())
[docs]
def save_inchi(path, geom):
"""
Save molecule geometry as InChI file.
Parameters
----------
path : str
Path to output file.
geom : :obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Check instance type
if not isinstance(geom, isicle.geometry.Geometry):
raise TypeError("Must be `isicle.geometry.Geometry` to save in InChI format.")
# Write
with open(path, "w") as f:
f.write(geom.to_inchi())
[docs]
def save_mol(path, geom):
"""
Save molecule geometry as MOL file.
Parameters
----------
path : str
Path to output file.
geom : :obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Check instance type
if not isinstance(geom, isicle.geometry.Geometry):
raise TypeError("Must be `isicle.geometry.Geometry` to save in MOL format.")
# Write
Chem.MolToMolFile(geom.mol, path)
[docs]
def save_csv(path, dataframe):
"""
Save `pandas.DataFrame` as csv file.
Parameters
----------
path : str
Path to output file.
dataframe : :obj:`pandas.DataFrame`
Dataframe of results
"""
dataframe.to_csv(path)
[docs]
def save_pdb(path, geom):
"""
Save molecule geometry as PDB file.
Parameters
----------
path : str
Path to output file.
geom : :obj:`~isicle.geometry.Geometry`
Molecule representation.
"""
# Check instance type
if not isinstance(geom, isicle.geometry.Geometry):
raise TypeError("Must be `isicle.geometry.Geometry` to save in PDB format.")
# Write
Chem.MolToPDBFile(geom.mol, path)
[docs]
def save(path, data):
"""
Save molecule, format detected by path extension.
Parameters
----------
path : str
Path to save file. Supported extensions include .pkl, .mfj, .xyz, .mol,
.pdb, .inchi, .smi.
data : obj
Object instance. Must be :obj:`~isicle.geometry.Geometry` for .xyz and .mfj.
"""
# Determine format from extension
extension = os.path.splitext(path)[-1].lower()
# Extension checks
if extension == ".pkl":
return save_pickle(path, data)
if extension == ".joblib":
return save_joblib(path, data)
if extension == ".mfj":
return save_mfj(path, data)
if "mol" in extension:
return save_mol(path, data)
if extension == ".xyz":
return save_xyz(path, data)
if extension == ".pdb":
return save_pdb(path, data)
if extension == ".csv":
return save_csv(path, data)
if "smi" in extension:
return save_smiles(path, data)
if extension == ".inchi":
return save_inchi(path, data)
raise IOError("Extension {} not recognized.".format(extension))