import pandas as pd
import numpy as np
import py3Dmol
from rdkit import Chem, DataStructs
from rdkit.Chem import Draw, rdFMCS, AllChem, rdFingerprintGenerator, MACCSkeys
from rdkit.Chem.Crippen import MolLogP
import matplotlib.colors as mcolors
from typing import Optional, Union
[docs]
class SAR:
def __init__(self, data: pd.DataFrame, smi_col: str = "smiles", act_col: str = "activity", units: str = "nM"):
self.atom_difference = None
self.smi_col = smi_col
self.act_col = act_col
# check if act_col is type float
if data[act_col].dtype == 'float':
# print(f"Column '{act_col}' is type float") # for debugging
pass
else:
# print(f"Converting column '{act_col}' to type float") # for debugging
data[act_col] = data[act_col].str.replace(',', '').astype(float)
# calculate and add pIC50 and append to data
act_list = data[act_col].tolist()
pIC = _calculate_pic50(act_list, units)
data['pIC50'] = pIC
self.data = data
# todo add different fp types?
[docs]
def calc_LiPE(self, smi_col: Optional[list] = None, act_col: Optional[list] = None):
"""
Calculate the Lipophilic Efficiency (LiPE) for a query molecule. The LiPE is calculated as LipE = pIC50 - LogP
:param smi_col: Optional[list]
Designate the smiles cols.
:param act_col: Optional[list]
Designate the activities cols.
:return:
"""
global smi_list
if smi_col is None:
smi_col = self.smi_col
if act_col is None:
act_col = self.act_col
data = self.data
smi_list = data[smi_col].tolist()
pIC50_list = data['pIC50'].tolist()
# calculate LogP
logp = []
for x in smi_list:
mol = Chem.MolFromSmiles(x)
calc_logp = MolLogP(mol)
logp.append(calc_logp)
# calculate LiPE
LiPE = []
for x, y in zip(pIC50_list, logp):
calc_LiPE = x - y
# result = np.round(calc_LiPE, decimals=3)
# LiPE.append(float(result))
LiPE.append(calc_LiPE)
data["LiPE"] = LiPE
self.data = data
return self.data
[docs]
def get_sali(self, smi_col: Optional[list] = None, bits: int = 1024, radius: int = 2, type: str = 'morgan'):
"""
Calculate the Structure-Activity Landscape Index (SALI) between a pair of molecules.
:param smi_col: Optional[list]
Designate the smiles cols.
:return:
"""
if smi_col is None:
smi_col = self.smi_col
data = self.data
smi_list = data[smi_col].tolist()
pIC50_list = data['pIC50'].tolist()
# todo fix
if type == 'morgan':
print(Warning("Currently only RDKFingerprint will be used by default"))
# add fp col
mol_list = [Chem.MolFromSmiles(x) for x in smi_list]
data['fp'] = [Chem.RDKFingerprint(x) for x in
mol_list] # todo a function to use different types of fingerprints
fp_list = data['fp'].tolist()
sal_list = []
for i, fp in enumerate(data.fp):
sim_list = DataStructs.BulkTanimotoSimilarity(fp, fp_list)
for j in range(0, i):
# get mol name
mol_name_1 = data.name.iloc[i]
mol_name_2 = data.name.iloc[j]
if pIC50_list[i] >= pIC50_list[j]:
mol_name_1, mol_name_2 = mol_name_1, mol_name_2
else:
mol_name_1, mol_name_2 = mol_name_1, mol_name_2
delta = abs(pIC50_list[i] - pIC50_list[j])
sim = sim_list[j]
sal_list.append(
[mol_name_1, mol_name_2, sim, delta / (1 - sim + 0.001), smi_list[i], smi_list[j], pIC50_list[i],
pIC50_list[j]])
sal_df = pd.DataFrame(sal_list,
columns=["mol_1", "mol_2", "tanimoto", "SALI", "smiles_1", "smiles_2", "pIC50_1",
"pIC50_2"])
return sal_df
[docs]
def highlight_cliffs(self, smi_list: Optional[list] = None, ncols: int = 2, subsize: tuple = (400, 400),
legend: list = None, highlight_color: str = None, radius: int = 0.3, SVG: bool = False,
savepath: str = None):
"""
Draw and highlight differing structures in 2D. For each molecule, the Maximum Common Substructure (MCS) is
identified. Only differing functional groups will be highlighted. Information is pulled from the pd.DataFrame
given when initializing class SAR().
:param smi_col: str
Column name containing SMILES strings.
:param ncols: int
Set the number of columns for drawn molecules.
:param subsize: tuple
Set the drawing size for each molecule.
:param legend: list
Set the legend for each moleucle in the grid.
:param highlight_color: str
Set the color style of the highlights. Names must be found in Matplotlib.
:param radius: int
Set the highlight radius.
:param SVG: bool
Whether to output image in SVG format. Defaults to False, giving a .png image.
:param savepath: str
Set the savepath for the image.
:return:
"""
# extract smi and convert to RDKit object
if smi_list is None:
smi_col = self.smi_col
smi_list = self.data[smi_col].tolist()
mol_list = [Chem.MolFromSmiles(smi) for smi in smi_list]
# checks
if len(mol_list) == 0:
raise ValueError("No valid molecules found from provided SMILES")
# if only one molecule, handle specially (can't find MCS)
if len(mol_list) == 1:
print("Only one molecule provided, no MCS can be calculated.")
img = Draw.MolsToGridImage(
mol_list,
molsPerRow=1,
subImgSize=(400, 400),
legends=[f"Molecule {i + 1}" for i in range(len(mol_list))]
)
if savepath:
img.save(savepath)
return img
# find mcs and create RDKit object
mcs = rdFMCS.FindMCS(mol_list)
mcs_mol = Chem.MolFromSmarts(mcs.smartsString)
# align mols
try:
ref = Chem.MolFromSmiles(smi_list[0])
AllChem.Compute2DCoords(ref)
for mol in mol_list:
AllChem.GenerateDepictionMatching2DStructure(mol, ref)
except Exception as e:
print(f"{e}\nCannot Align Molecules.")
# extract matching atoms
atom_match = []
atom_difference = []
for mol in mol_list:
match_atoms = mol.GetSubstructMatch(mcs_mol)
atom_match.append(match_atoms)
diff_atoms = [atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx() not in match_atoms]
atom_difference.append(diff_atoms)
# save for view
self.atom_difference = atom_difference
if legend is None:
try:
legend = self.data['name'].tolist()
except Exception as e:
raise Exception(f"{e}\nCannot find 'name' column. No legend set!")
if highlight_color is None:
highlight_colors = None
else:
highlight = _color_to_rgb(highlight_color)
# give color palette to each mol
highlight_colors = [{atom_idx: highlight for atom_idx in atom_list} for atom_list in atom_difference]
# set highlight radius
opts = Draw.MolDrawOptions()
opts.highlightRadius = radius
# draw molecules
img = Draw.MolsToGridImage(
mols=mol_list,
molsPerRow=ncols,
subImgSize=subsize,
legends=legend,
highlightAtomLists=atom_difference,
highlightAtomColors=highlight_colors,
drawOptions=opts,
useSVG=SVG,
)
return img
[docs]
def output_cliffs(self, mols: Chem.Mol = None, savepath: str = None):
"""
Output the activity cliffs for molecule as .pml file for rendering in PyMOL.
:param mols: Chem.Mol
Input the RDKit molecule object to generate the activity cliffs.
:param savepath: str
Savepath for the .pml file.
:return:
"""
if mols is None:
mol = self.mols
else:
mol = mols
flat_indices = [idx for sublist in self.atom_difference for idx in sublist]
# check if indices found in query mol
flat_indices = [idx for idx in flat_indices if idx < mol.GetNumAtoms()]
with open(savepath, "w") as f:
# define color scheme
f.write("set_color Hydrophob, [46, 204, 113]\n")
f.write("set_color HDonor, [33, 150, 243]\n")
f.write("set_color HAcceptor, [244, 67, 54]\n")
f.write("set_color DualH, [255, 0, 255]\n")
f.write("set_color Aromatic, [255, 235, 59]\n")
# generate aromatic psudoatom points
aromatic_atom_indices = set()
ring_info = mol.GetRingInfo()
for x, ring in enumerate(ring_info.AtomRings()):
if all(mol.GetAtomWithIdx(idx).GetIsAromatic() for idx in ring):
if any(idx in flat_indices for idx in ring):
conf = mol.GetConformer()
coords = [conf.GetAtomPosition(idx) for idx in ring]
centroid = np.mean(coords, axis=0)
obj_name = f"Aromatic_{x}"
f.write(
f"pseudoatom {obj_name}, pos=[{centroid[0]:.3f}, {centroid[1]:.3f}, {centroid[2]:.3f}]\n")
f.write(f"show spheres, {obj_name}\n")
f.write(f"hide nonbonded, {obj_name}\n") # This removes the "plus" sign
f.write(f"set sphere_scale, 0.5, {obj_name}\n")
f.write(f"color Aromatic, {obj_name}\n")
f.write(f"set sphere_transparency, 0.0, {obj_name}\n")
for idx in ring:
aromatic_atom_indices.add(idx)
# generate atom points
for idx in flat_indices:
atom = mol.GetAtomWithIdx(idx)
symbol = atom.GetSymbol()
pos = conf.GetAtomPosition(idx)
donor = symbol in ['N', 'O', 'S'] and atom.GetTotalNumHs() > 0
acceptor = symbol in ['N', 'O'] and atom.GetFormalCharge() <= 0
color = None
label = ""
if donor and acceptor:
color, label = "DualH", "Dual"
elif donor:
color, label = "HDonor", "Donor"
elif acceptor:
color, label = "HAcceptor", "Acceptor"
elif idx not in aromatic_atom_indices and symbol in ['C', 'Cl', 'Br', 'I']:
color, label = "Hydrophob", "Hydrophobic"
if color:
obj_name = f"Pin_{label}_{idx}"
f.write(f"pseudoatom {obj_name}, pos=[{pos.x:.3f}, {pos.y:.3f}, {pos.z:.3f}]\n")
f.write(f"show spheres, {obj_name}\n")
f.write(f"color {color}, {obj_name}\n")
f.write(f"set sphere_transparency, 0.3, {obj_name}\n")
# f.write(f"hide nonbonded, {obj_name}\n") # This removes the "plus" sign
# Smaller spheres for atoms inside aromatic rings
scale = 0.7 if idx in aromatic_atom_indices else 1.0
f.write(f"set sphere_scale, {scale}, {obj_name}\n")
f.write("zoom all\n")
[docs]
def view_cliffs(self, mols: Union[Chem.Mol, list[Chem.Mol]] = None, protein_path: str = None,
window: tuple = (500, 500)):
"""
View the activity cliffs of molecules in py3Dmol. Only atoms not found using Maximum Common Substructure (MCS)
will be highlighted.
:param mols: Union[Chem.Mol, list[Chem.Mol]]
RDKit molecule object for rendering. Should be the same as the smiles given as the pd.DataFrame input when
initializing SAR.
:param protein_path: str
Filepath to the target protein structure.
:param window: tuple
Set the windows size of the visualization window.
:return:
"""
self.mols = mols
self.window = window
self.protein_path = protein_path
# dropdown menu
if isinstance(mols, Chem.Mol):
drop_options = [("Molecule 1", 0)]
elif mols is None:
raise ValueError(f"No valid RDKit molecules given!")
else:
drop_options = [(f"Molecule {i + 1}", i) for i in range(len(mols))]
import ipywidgets as widgets
dropdown = widgets.Dropdown(
options=drop_options,
value=0,
description="Select:",
style={"description_width": "initial"}
)
widgets.interact(self._render_cliffs, index=dropdown)
def _render_cliffs(self, index):
"""
Render molecules with fog effect on differing atoms
"""
if isinstance(self.mols, Chem.Mol):
mol = self.mols
else:
mol = self.mols[index]
mol_block = Chem.MolToMolBlock(mol)
viewer = py3Dmol.view(width=self.window[0], height=self.window[1])
viewer.setBackgroundColor("white")
viewer.addModel(mol_block, "mol")
viewer.setStyle({'stick': {'radius': 0.15, 'colorscheme': 'grayCarbon'}})
viewer.zoomTo()
# set protein
if self.protein_path:
with open(self.protein_path, 'r') as f:
pdb_data = f.read()
viewer.addModel(pdb_data, "pdb")
# protein style
viewer.setStyle({'model': -1}, {'cartoon': {'color': 'lightgray', 'opacity': 0.6},
'line': {'color': 'lightgray', 'opacity': 0.3}})
# set ligand
viewer.addModel(Chem.MolToPDBBlock(mol), "mol")
viewer.setStyle({'model': -1}, {'stick': {'radius': 0.15, 'colorscheme': 'grayCarbon'}})
flat_indices = [idx for sublist in self.atom_difference for idx in sublist]
# check if indices found in query mol
flat_indices = [idx for idx in flat_indices if idx < mol.GetNumAtoms()]
# track aromatic atoms
aromatic_atom_indices = set()
ring_info = mol.GetRingInfo()
for ring in ring_info.AtomRings():
# check if aromatic atom not in MCS
if all(mol.GetAtomWithIdx(idx).GetIsAromatic() for idx in ring):
if any(idx in flat_indices for idx in ring):
# calculate centroid
conf = mol.GetConformer()
coords = [conf.GetAtomPosition(idx) for idx in ring]
centroid = np.mean(coords, axis=0)
# add sphere
viewer.addSphere({
'center': {'x': centroid[0], 'y': centroid[1], 'z': centroid[2]},
'radius': 0.6,
'color': 'gold',
'opacity': 1.0
})
# tag aromatic atoms
for idx in ring: aromatic_atom_indices.add(idx)
# track other atoms
for idx in flat_indices:
atom = mol.GetAtomWithIdx(idx)
symbol = atom.GetSymbol()
# tag donor/acceptor
donor = symbol in ['N', 'O', 'S'] and atom.GetTotalNumHs() > 0
acceptor = symbol in ['N', 'O'] and atom.GetFormalCharge() <= 0
# use if halogens are listed as acceptors
# acceptor = symbol in ['N', 'O', 'F', 'Cl', 'Br', 'I'] and atom.GetFormalCharge() <= 0
# set color var
color = None
# donor/acceptor colorscheme
if donor and acceptor:
color = 'magenta'
elif donor:
color = 'blue'
elif acceptor:
color = 'red'
# hydrophobic colorscheme
elif idx not in aromatic_atom_indices:
if symbol in ['C', 'F', 'Cl', 'Br', 'I'] and not atom.GetIsAromatic():
# only use 'C' if halogens are going to be acceptors
# if symbol == 'C':
color = '#2ecc71'
else:
color = '#7f8c8d'
# apply style
if color:
viewer.addStyle({'model': -1, 'index': idx}, {
'sphere': {
'color': color,
'opacity': 0.7,
'radius': 0.7 if idx in aromatic_atom_indices else 1.0
}
})
viewer.zoomTo()
return viewer.show()
def _calculate_pic50(activity: Optional[list], units: str = "nM"):
# check values
for x in activity:
if np.any(x <= 0):
raise ValueError("Error! Input Activity must be greater than 0!")
pic50 = []
for x in activity:
if units == "nM":
converted_ic = 9 - np.log10(x)
elif units == "uM" or units == "µM":
converted_ic = 6 - np.log10(x)
elif units == "mM":
converted_ic = 3 - np.log10(x)
else:
raise ValueError(f"Cannot Convert {x}!")
# converted_ic = np.round(converted_ic, decimals=3)
pic50.append(converted_ic)
return pic50
# todo add additional fingerprints
def _calculate_fingerprint(mol, fp_type='morgan', radius=2, fpSize=1024):
"""
Calculates molecular fingerprints for various RDKit algorithms.
Parameters:
mol: RDKit Mol object
fp_type: Type of fingerprint ('morgan', 'rdkit', 'atompair', 'torsion', 'maccs')
radius: Radius for Morgan (ignored by others)
fpSize: Size of the bit vector (ignored by MACCS)
"""
# 1. Map string names to their respective Generator factory functions
generators = {
'morgan': lambda: rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=fpSize),
'rdkit': lambda: rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=fpSize),
'atompair': lambda: rdFingerprintGenerator.GetAtomPairGenerator(fpSize=fpSize),
'torsion': lambda: rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=fpSize),
}
# 2. Handle MACCS Keys separately (they have a fixed size of 167 bits)
if fp_type.lower() == 'maccs':
return MACCSkeys.GenMACCSKeys(mol)
# 3. Get the generator and produce the fingerprint
if fp_type.lower() in generators:
gen = generators[fp_type.lower()]()
return gen.GetFingerprint(mol)
else:
raise ValueError(f"Unknown fingerprint type: {fp_type}. "
f"Choose from {list(generators.keys()) + ['maccs']}")
def _color_to_rgb(color_input):
"""Support function to convert Matplotlib colors to RGB for RDKit highlighting"""
try:
return mcolors.to_rgb(color_input)
except ValueError:
return f"Error: '{color_input}' is not a recognized color name or hex code."
if __name__ == "__main__":
import doctest
doctest.testmod()