Source code for isicle.conformers
import numpy as np
import pandas as pd
from statsmodels.stats.weightstats import DescrStatsW
from isicle.geometry import Geometry
from isicle.utils import TypedList, safelist
[docs]
def _function_selector(func):
"""
Selects a supported reduction function for reducing a set of conformers.
Parameters
----------
func : str
Alias for function selection (one of "boltzmann", "simple", "lowest",
or "threshold").
Returns
-------
func
Conformer reduction function.
"""
# Mapping between names and functions
func_map = {
"boltzmann": boltzmann,
"simple": simple_average,
"lowest": lowest_energy,
"threshold": energy_threshold,
}
# Check for function by name
if func.lower() in func_map:
return func_map[func.lower()]
# Not a named/implemented function
raise ValueError("{} not a supported reduction function.".format(func))
[docs]
def _energy_based(func):
"""
Checks whether function employs an energy-based reduction operation.
Parameters
----------
func : function
Conformer reduction function.
Returns
-------
bool
True if energy based, otherwise False.
"""
# Check if among energy-based functions
if func in [boltzmann, lowest_energy, energy_threshold]:
return True
# Not energy based
return False
[docs]
def reduce(value, func="boltzmann", **kwargs):
"""
Combine values according to indicated function.
Parameters
----------
value : :obj:`~numpy.array`
Array containing values that will be combined.
func : str
Alias for function selection (one of "boltzmann", "simple", "lowest",
or "threshold").
kwargs
Additional keyword arguments passed to `func`.
Returns
-------
:obj:`~pandas.DataFrame`
Result of reduction operation.
"""
# Select function
f = _function_selector(func)
# Energy-based method
if _energy_based(f):
energy = kwargs.pop("energy")
return f(value, energy, **kwargs)
# Other method
return f(value, **kwargs)
[docs]
def boltzmann(value, energy, index=None, atom=None):
"""
Combine values according to a Boltzmann-weighted average.
Parameters
----------
value : :obj:`~numpy.array`
Array containing values that will be combined.
energy : :obj:`~numpy.array`
Array containing energy values that correspond to entries in `value`.
index : None or :obj:`~numpy.array`
Index by which to group values for averaging.
atom : None or :obj:`~numpy.array`
Atom by which to group values for averaging.
Returns
-------
:obj:`~pandas.DataFrame`
Result of reduction operation.
"""
# Placeholder for index
if index is None:
index = np.full_like(value, -1)
# Placeholder for atom
if atom is None:
atom = np.full_like(value, -1)
# Initialize data frame
df = pd.DataFrame.from_dict(
{"value": value, "energy": energy, "index": index, "atom": atom}
)
# Result container
res = []
# Iterate over unique indices
for name, group in df.groupby(["index", "atom"]):
# Compute relative delta G
g = group["energy"] * 627.503
mn = g.min()
relG = g - mn
# Compute Boltzmann weighting factors
b = np.exp(-relG / 0.5924847535)
w = (b / b.sum()) * len(b)
# Compute weighted statistics
ws = DescrStatsW(group["value"], weights=w, ddof=0)
# Append to container
res.append([name[0], name[1], ws.mean, ws.std, len(group.index)])
# Initialize data frame
res = pd.DataFrame(res, columns=["index", "atom", "mean", "std", "n"])
# Drop index if not supplied
if np.all(index == -1):
return res.drop(columns=["index", "atom"]).iloc[0]
return res
[docs]
def simple_average(value, index=None, atom=None):
"""
Combine values according to a simple average.
Parameters
----------
value : :obj:`~numpy.array`
Array containing values that will be combined.
index : None or :obj:`~numpy.array`
Index by which to group values for averaging.
atom : None or :obj:`~numpy.array`
Atom by which to group values for averaging.
Returns
-------
:obj:`~pandas.DataFrame`
Result of reduction operation.
"""
# Placeholder for index
if index is None:
index = np.full_like(value, -1)
# Placeholder for atom
if atom is None:
atom = np.full_like(value, -1)
# Initialize data frame
df = pd.DataFrame.from_dict({"value": value, "index": index, "atom": atom})
# Average per unique index
res = df.groupby(["index", "atom"], as_index=False).agg(
{"value": ["mean", "std", "count"]}
)
# Rename columns
res.columns = ["index", "atom", "mean", "std", "n"]
# Drop indices if not supplied
if np.all(index == -1):
return res.drop(columns=["index", "atom"]).iloc[0]
return res
[docs]
def lowest_energy(value, energy, index=None, atom=None):
"""
Combine values according to lowest energy.
Parameters
----------
value : :obj:`~numpy.array`
Array containing values that will be combined.
energy : :obj:`~numpy.array`
Array containing energy values that correspond to entries in `value`.
index : None or :obj:`~numpy.array`
Index by which to group values for averaging.
atom : None or :obj:`~numpy.array`
Atom by which to group values for averaging.
Returns
-------
:obj:`~pandas.DataFrame`
Result of reduction operation.
"""
# Placeholder for index
if index is None:
index = np.full_like(value, -1)
# Placeholder for atom
if atom is None:
atom = np.full_like(value, -1)
# Initialize data frame
df = pd.DataFrame.from_dict(
{"value": value, "energy": energy, "index": index, "atom": atom}
)
# Take minimum energy per unique index
res = df.loc[df.groupby(["index", "atom"])["energy"].idxmin()]
# Drop indices if not supplied
if np.all(index == -1):
return res.drop(columns=["index", "atom"]).iloc[0]
return res
[docs]
def energy_threshold(value, energy, threshold=5, index=None, atom=None):
"""
Combine values with energy below a given threshold according to a simple
average.
Parameters
----------
value : :obj:`~numpy.array`
Array containing values that will be combined.
energy : :obj:`~numpy.array`
Array containing energy values that correspond to entries in `value`.
index : None or :obj:`~numpy.array`
Index by which to group values for averaging.
atom : None or :obj:`~numpy.array`
Atom by which to group values for averaging.
Returns
-------
:obj:`~pandas.DataFrame`
Result of reduction operation.
"""
# Placeholder for index
if index is None:
index = np.full_like(value, -1)
# Placeholder for atom
if atom is None:
atom = np.full_like(value, -1)
# Initialize data frame
df = pd.DataFrame.from_dict(
{"value": value, "energy": energy, "index": index, "atom": atom}
)
# Filter by energy
df = df.loc[df["energy"] <= threshold, :]
# Aggregate
res = df.groupby(["index", "atom"], as_index=False).agg(
{"value": ["mean", "std", "count"]}
)
# Rename columns
res.columns = ["index", "atom", "mean", "std", "n"]
# Drop indices if not supplied
if index is None:
return res.drop(columns=["index", "atom"]).iloc[0]
return res
[docs]
def transform(
value, m={"H": 1.0, "C": 1.0}, b={"H": 0.0, "C": 0.0}, index=None, atom=None
):
"""
Perform linear transformation with values using provided parameters.
Parameters
----------
value : :obj: `~numpy.array`
Array containing vales that will be transformed.
m : float or dict
Slope value
b : float or dict
Y-intercept value
index : None or :obj: `~numpy.array`
Index by which to group values for transforming.
atom : None or :obj: `~numpy.array`
Atom by which to group values for transforming.
Returns
-------
:obj: `~pandas.DataFrame`
Result of transformation operation.
"""
# Placeholder for index
if index is None:
index = np.full_like(value, -1)
# Placeholder for atom
if atom is None:
atom = np.full_like(value, -1)
# Initialize data frame
df = pd.DataFrame.from_dict({"value": value, "index": index, "atom": atom})
# Process with per-atom values
if isinstance(m, dict):
res = pd.DataFrame()
for idx in m:
part = df.loc[df["atom"] == idx].copy()
part["new_value"] = part["value"].apply(lambda x: m[idx] * x + b[idx])
res = pd.concat([res, part])
# Process with global values
else:
res = df.copy()
res["new_value"] = res["value"].apply(lambda x: m * x + b)
return res
[docs]
def build_conformational_ensemble(geometries):
"""
Create a conformational ensemble from a collection of geometries.
Parameters
----------
geometries : list of :obj:`~isicle.geometry.Geometry` or related subclass
Collection of geometry instances.
Returns
-------
:obj:`~isicle.conformers.ConformationalEnsemble`
Conformational ensemble.
"""
return ConformationalEnsemble(geometries)
[docs]
class ConformationalEnsemble(TypedList):
"""
Collection of :obj:`~isicle.geometry.Geometry`, or related subclass,
instances.
"""
def __init__(self, *args):
"""
Initialize :obj:`~isicle.conformers.ConformationalEnsemble` instance.
Parameters
----------
*args
Objects to comprise the conformational ensemble.
"""
super().__init__(Geometry, *args)
[docs]
def _check_attributes(self, attr):
"""
Check if all ensemble members have the supplied attribute.
Parameters
----------
attr : str
Attribute to check.
Raises
------
AttributeError
If all members do not have `attr`.
"""
if not all(hasattr(x, attr) for x in self):
raise AttributeError(
'"{}" not found for all conformational '
"ensemble members.".format(attr)
)
[docs]
def reduce(self, attr, func="boltzmann", **kwargs):
"""
Combine attribute values according to indicated function.
Parameters
----------
attr : str
Attribute that will be combined.
func : str
Alias for function selection (one of "boltzmann", "simple",
"lowest", or "threshold").
kwargs
Additional keyword arguments passed to `func`.
Returns
-------
:obj:`~pandas.DataFrame`
Result of reduction operation.
"""
# Modify energy attr to match provided attr
if attr.startswith("_"):
e_attr = "_energy"
else:
e_attr = "energy"
# Select reduction function
f = _function_selector(func)
# Check for primary attribute
self._check_attributes(attr)
# Check for energy attribute
if _energy_based(f):
self._check_attributes(e_attr)
# Extract (possibly nested) value attribute
value = [getattr(x, attr) for x in self]
# Check nested values
if isinstance(value[0], dict):
# Check index
if "index" in value[0]:
index = np.array([x["index"] for x in value]).flatten()
pad = int(len(index) / len(self))
else:
index = None
pad = 1
# Check atom
if "atom" in value[0]:
atom = np.array([x["atom"] for x in value]).flatten()
else:
atom = None
# Special case for CCS
if "mean" in value[0] and "std" in value[0]:
value = np.array([x["mean"] for x in value]).flatten()
elif "shielding" in value[0]:
value = np.array([x["shielding"] for x in value]).flatten()
else:
value = np.array([value.get(x) for x in value]).flatten()
# Not nested
else:
index = None
atom = None
pad = 1
# Extract energy attribute
if _energy_based(f):
energy = np.array([np.repeat(getattr(x, e_attr), pad) for x in self])
energy = energy.flatten()
# Exectue energy-based method
return f(value, energy, index=index, atom=atom, **kwargs)
# Execute other method
return f(value, index=index, atom=atom, **kwargs)
[docs]
def _apply_method(self, method, **kwargs):
"""
Process conformational ensemble members according to supplied method.
Parameters
----------
method : str
Method by which ensemble members will be processed.
kwargs
Keyword arguments passed to `method`.
Returns
-------
:obj:`~isicle.conformers.ConformationalEnsemble` or list
Result of operation, type depends on `method` return type.
"""
# Check for attribute
if not all(hasattr(x, method) for x in self):
raise AttributeError(
'"{}" not found for all conformational '
"ensemble members.".format(method)
)
# Apply method to collection
result = [getattr(x, method)(**kwargs) for x in self]
# Return ConformationalEnsemble if correct result type
try:
return ConformationalEnsemble(result)
# Return result as-is
except:
return result
[docs]
def _apply_function(self, func, **kwargs):
"""
Process conformational ensemble members according to supplied function.
Parameters
----------
func : function
Function by which ensemble members will be processed.
kwargs
Keyword arguments passed to `func`.
Returns
-------
:obj:`~isicle.conformers.ConformationalEnsemble` or list
Result of operation, type depends on `func` return type.
"""
# Apply method to collection
result = [func(x, **kwargs) for x in self]
# Return ConformationalEnsemble if correct result type
try:
return ConformationalEnsemble(result)
# Return result as-is
except:
return result
[docs]
def apply(self, func=None, method=None, **kwargs):
"""
Process conformational ensemble members according to supplied function
or method.
Parameters
----------
func : function
Function by which ensemble members will be processed.
method : str
Method by which ensemble members will be processed.
kwargs
Keyword arguments passed to `method`.
Returns
-------
:obj:`~isicle.conformers.ConformationalEnsemble` or list
Result of operation, type depends on `method` return type.
Raises
------
ValueError
If neither `func` nor `method` is supplied.
"""
# Apply function
if func is not None:
return self._apply_function(func, **kwargs)
# Apply method
if method is not None:
return self._apply_method(method, **kwargs)
raise ValueError("Must supply `func` or `method`.")
[docs]
def get_structures(self):
"""
Extract all structures from containing object as a conformational ensemble.
Returns
-------
:obj:`~isicle.conformers.ConformationalEnsemble`
Conformational ensemble.
"""
# Check for geom attribute
self._check_attributes("geom")
# Build and return
return build_conformational_ensemble([x.geom for x in self])