Source code for pharmacophore.pharmacophore

import warnings
import collections
import pandas as pd
import numpy as np
from tqdm import tqdm
from rdkit import Chem
from rdkit.Chem import AllChem
from typing import Union, Optional

from pharmacophore.constants import feature_factory, FEATURES, FEATURE_COLORS, color_convert


[docs] class Pharmacophore: def __init__(self, features: Union[str, dict] = 'default'): """ Initialize the Pharmacophore class. :param features: Union[str, dict] Set features to use. Will default to 'default'. Can also accept RDKit features or a dictionary of features. """ self.sdf = None self.features = features self.features_list = None
[docs] def read_sdf(self, sdf_file: str = None, verbose: bool = False): """ Read sdf files. :param sdf_file: Str File path to .sdf file. :param verbose: Bool Output description when reading .sdf file. :return: list A list of molecules in Chem.Mol format. """ supplier = Chem.SDMolSupplier(sdf_file) # extract ROMols into a list mol_list = [] if verbose: for mol in tqdm(supplier, desc="Reading Molecules"): mol_list.append(mol) else: for mol in supplier: mol_list.append(mol) self.sdf = mol_list return mol_list
[docs] def feature_types(self, features: Optional[Union[str, dict]] = None): """ Output feature types that will be displayed in the pharmacophore model. This will default to 'default' settings. However, default RDKit features can also be used or a dictionary containing a dictionary of features with SMARTS string can be used. :param features: Optional[Union[str, dict]] The feature type to be used in the pharmacophore model. This will default to "default" settings. Using "rdkit" will set the model to default RDKit features. Custom features can be used by giving a dictionary with feature:list pair, where the list contains SMARTS string for a given feature. :return: """ # set instance variable if features is None: features = self.features global phrase if features == "default": phrase = f"Default features: \n('Donor', 'Acceptor', 'Aromatic', 'Hydrophobe')" elif features == "rdkit": phrase = f"Default features from RDKit: \n{feature_factory.GetFeatureFamilies()}" elif isinstance(features, dict): phrase = f"Custom features: \n{list(features.keys())}" return phrase
[docs] def to_df(self, mols: list = None, mol_name: list = None, features: Optional[Union[str, dict]] = None): """ From a list of containing Chem.MOl and a list containing molecule names, create a dataframe displaying matching features the molecules. Method will default to "default" settings. Users can use "rdkit" to set the model to default RDKit features, or give a dictionary with feature:list pair, where the list contains SMARTS string for a given feature. :param mols: list A list containing molecules in Chem.Mol format. :param mol_name: list A list containing names of molecules. :param features: Union[str, dict] The feature type to be used in the pharmacophore model. This will default to "default" settings. Using "rdkit" will set the model to default RDKit features. Custom features can be used by giving a dictionary with feature:list pair, where the list contains SMARTS string for a given feature. :return: """ global feat_factory # set instance variable if features is None: features = self.features molecule_feature_frequencies = [] # use default features if features == 'rdkit': feat_factory = feature_factory # get default features, count frequency, and append into list for mol in mols: feats = [feature.GetFamily() for feature in feat_factory.GetFeaturesForMol(mol)] feature_frequency = collections.Counter(feats) molecule_feature_frequencies.append(feature_frequency) # use default features elif features == 'default': feature_list = [] # get features from constant file, count pharmacophore type, append to list for mol in mols: feats = self._calc_pharmacophore(mol) for feat in feats: feature_list.append(feat[0]) feature_frequency = collections.Counter(feature_list) molecule_feature_frequencies.append(feature_frequency) feature_list = [] # reset list to avoid double counting # use custom features elif isinstance(features, dict): # reset instance variable self.features = features feature_list = [] # get features from constant file, count pharmacophore type, append to list for mol in mols: feats = self._calc_pharmacophore(mol, features) for feat in feats: feature_list.append(feat[0]) feature_frequency = collections.Counter(feature_list) molecule_feature_frequencies.append(feature_frequency) feature_list = [] # reset list to avoid double counting elif isinstance(features, dict) is False: raise ValueError("Only 'default', 'rdkit', or custom features as dictionary are supported!") # check if mol_name is given if mol_name is None: mol_name = [] for x in range(len(mols)): name = "Mol" + str(x) mol_name.append(name) # # for troubleshooting # print(molecule_feature_frequencies) # reformat table and rename columns to molecule list df = pd.DataFrame(molecule_feature_frequencies) feature_frequencies_df = df.transpose() feature_frequencies_df = feature_frequencies_df.fillna(0).astype(int) feature_frequencies_df = feature_frequencies_df.set_axis(mol_name, axis=1) return feature_frequencies_df
[docs] def calc_pharm(self, mol: Chem.Mol = None, features: Optional[Union[str, dict]] = None): """ Generate a list of pharmacophore features and position from a molecule. :param mol: Chem.Mol A molecule in ROMol format. :param features: str Designate the type of pharmacophore features to calculate from. Will default to "default" settings. This param can also accept "rdkit" to use RDKit default features or give a dictionary containing feature:list pair, where the list contains SMARTS string for a given feature. :return: list A list of pharmacophore features, matching atom index, and position from a molecule. """ global pharmacophore # set instance variable if features is None: features = self.features # calculate pharmacophore by rdkit or pharmacophore dict if features == 'rdkit': pharmacophore = self._calc_rdkit(mol) elif features == 'default': pharmacophore = self._calc_pharmacophore(mol) elif isinstance(features, dict): pharmacophore = self._calc_pharmacophore(mol, features) else: raise ValueError("Only 'default', 'rdkit', or custom features as dictionary are supported!") # set features to self self.features_list = pharmacophore return pharmacophore
[docs] def add_feats(self, mol: Chem.Mol = None, substruct: str = None, type: str = None): """ Optional method to add specific features if not present using default or RDKit rules. :param mol: Chem.Mol Main molecule in ROMol format. :param substruct: str A SMARTS string for specific substructure to match :param type: str Set the type of interaction for substructure. Only 'Donor', 'Acceptor', 'Aromatic', and 'Hydrophobe' is allowed! :return: """ # convert substruct smarts string into ROMol query = [Chem.MolFromSmarts(substruct)] # find matches matches = find_matches(mol, query) # check type if type == 'Donor' or type == 'Acceptor' or type == 'Aromatic' or type == 'Hydrophobe': pass else: raise ValueError(f"Type {type} not supported! Only 'Donor', 'Acceptor', 'Aromatic', and 'Hydrophobe' is " f"allowed!") # obtain specific data from each match for match in matches: result = [type, match[0], match[1][0], match[1][1], match[1][2]] # add custom feats to self.features_list self.features_list.append(result) return self.features_list
[docs] def output_features(self, feature_list: Optional[list] = None, savepath: str = None, type: str = "sphere", sphere_size: float = 0.7, transparency: float = 0.2, color: dict = None): """ Output features as a .pml format for visualization in PyMol. :param feature_list: Optional[list] A list containing features, corresponding atom number, and 3D position. Preferably genreated using the calc_pharm method. :param savepath: str = None Must be a file in .pml format. :param type: str = "Sphere" Set the pharmacophore representation. Can be "sphere" or "surface". If surface is used, transparency can be set for each pharmacophore. :param sphere_size: float = 0.5 Set size of spheres. :param transparency: float = 0.2 Set teh transparency of pharmacophore. Only works when the pharmacophore type is set to "surface". :param color: dict Set color for the pharmacophores. Must be given as Acceptor: color where color is a tuple for RGB or a string for a specific color to be translated into RGB format. :return: """ if feature_list is None: feature_list = self.features_list with open(savepath, "w") as f: # feat_factory = feature_factory # features = feat_factory.GetFeaturesForMol(mol) print(f"Number of features: {len(feature_list)}") # set feature colors if color is None: for feat in FEATURE_COLORS: f.write(f"set_color {feat}_color, {FEATURE_COLORS[feat]}\n") # if custom color, convert to RGB elif isinstance(color, dict): for shade in color: rgb = color_convert(color[shade]) # print(rgb) # for troubleshooting f.write(f"set_color {shade}_color, {rgb}\n") else: warnings.warn("Issue with color features!") # to give sequential numbering for each group: feature_counts = { "Acceptor": 0, "Donor": 0, "Hydrophobe": 0, "Aromatic": 0, "LumpedHydrophobe": 0, "PosIonizable": 0} # get features for feat in feature_list: feature = feat[0] # extract feature type feature_counts[feature] += 1 # give feature count count = feature_counts[feature] # get current count # get feature position pos_x = feat[2] pos_y = feat[3] pos_z = feat[4] f.write( # f"pseudoatom {feature}_{count}, pos=[{pos_x}, {pos_y}, {pos_z}], color={color_type}\n" f"pseudoatom {feature}_{count}, pos=[{pos_x}, {pos_y}, {pos_z}]\n" ) #todo add mol name in front of pharmacophre # set color and sphere size f.write("\n") if type == "sphere": f.write("show spheres, Acceptor_*\n") f.write("color acceptor_color, Acceptor_*\n") f.write("\n") f.write("show spheres, Donor_*\n") f.write("color donor_color, Donor_*\n") f.write("\n") f.write("show spheres, Hydrophobe_*\n") f.write("color hydrophobe_color, Hydrophobe_*\n") f.write("\n") f.write("show spheres, Aromatic_*\n") f.write("color aromatic_color, Aromatic_*\n") f.write("\n") f.write("show spheres, LumpedHydrophobe_*\n") f.write("color lumpedhydrophobe, LumpedHydrophobe_*\n") f.write("\n") f.write("show spheres, PosIonizable*\n") f.write("color posionizable, PosIonizable*\n") f.write("\n") f.write(f"set sphere_scale, {sphere_size}\n") # Adjust sphere size in PyMOL elif type == "surface": f.write("show surface, Acceptor_*\n") f.write("color acceptor_color, Acceptor_*\n") f.write(f"set transparency, {transparency}, Acceptor_*") f.write("\n") f.write("show surface, Donor_*\n") f.write("color donor_color, Donor_*\n") f.write(f"set transparency, {transparency}, Donor_*\n") f.write("\n") f.write("show surface, Hydrophobe_*\n") f.write("color hydrophobe_color, Hydrophobe_*\n") f.write(f"set transparency, {transparency}, Hydrophobe_*\n") f.write("\n") f.write("show surface, Aromatic_*\n") f.write("color aromatic_color, Aromatic_*\n") f.write(f"set transparency, {transparency}, Aromatic_*\n") f.write("\n") f.write("show surface, LumpedHydrophobe_*\n") f.write("color lumpedhydrophobe, LumpedHydrophobe_*\n") f.write(f"set transparency, {transparency}, LumpedHydrophobe_*\n") f.write("\n") f.write("show surface, PosIonizable*\n") f.write("color posionizable, PosIonizable*\n") f.write(f"set transparency, {transparency}, PosIonizable*\n") f.write("\n") else: raise ValueError("Issue with type {type}! Only 'sphere' or 'surface' allowed!") print(f"Feature visualization script written to {savepath}.")
def _calc_pharmacophore(self, mol: Chem.Mol = None, features: Optional[dict] = None): """ Calculate pharmacophore features from a molecule using dict from constants :param mol: Chem.Mol Input molecule in ROMol format. :param features: Optional[dict] Include custom features for calculating features. :return: List of pharmacophore type and centroid coordinates. """ global pharmacophore # read in custom feature dict if features is None: constant_feats = FEATURES else: constant_feats = features # hold matches matches = {} # Identify matches to feature dict for key, value in constant_feats.items(): try: query = [Chem.MolFromSmarts(smarts) for smarts in value] matches[key] = find_matches(mol, query, verbose=False) except: pass # remove duplicate SMARTS matches cleaned_matches = {} for key, value in matches.items(): unique_lists = [] for list in value: if list not in unique_lists: unique_lists.append(list) cleaned_matches[key] = unique_lists # create list containing pharmacophore and centroid coordinates pharmacophore = [] for key, value in cleaned_matches.items(): for match in value: # extarct feature type, atom match, and XYZ position p = [key, match[0], match[1][0], match[1][1], match[1][2]] pharmacophore.append(p) # remove duplicates for aromatic/hydrophobe set atom_indices = set() final_pharmacophore = [] # extract aromatic/hydrophobe matches by atom index adn remove hydrophobe for entry in pharmacophore: label, atom_indexes, *rest = entry index = frozenset(atom_indexes) # Convert atom indexes to a frozenset so order does not matter # Check if the atom indexes are already in the set if index not in atom_indices: final_pharmacophore.append(entry) atom_indices.add(index) # keep Aromatic elif label == 'Aromatic': final_pharmacophore = [e for e in final_pharmacophore if not (e[0] == 'Hydrophobe' and frozenset(e[1]) == index)] final_pharmacophore.append(entry) atom_indices.add(index) return final_pharmacophore def _calc_rdkit(self, mol: Chem.Mol = None): """ Calculate pharmacophore features from a molecule using default RDKit methods. :param mol: Chem.Mol input molecule in ROMol format. :return: List of pharmacophore type and centroid coordinates. """ global pharmacophore # load default RDKit feature factory file feat_factory = feature_factory # get list of features for molecule features = feat_factory.GetFeaturesForMol(mol) # store pharmacophore features as list pharmacophore = [] for feature in features: fam = feature.GetFamily() pos = feature.GetPos() atom_indices = feature.GetAtomIds() pharmacophore_item = [fam, atom_indices, pos[0], pos[1], pos[2]] pharmacophore.append(pharmacophore_item) return pharmacophore
# Support function to fix bond order of molecule
[docs] def fix_bond_order(mol: Chem.Mol, template_smi: str, savepath: str = None): """ Fix bond order for a given molecule. A template smiles string must be given. :param mol: Chem.Mol Molecule to fix. Must be in ROMol format. :param template_smi: str A smiles string for molecule to fix. Recommend canonical smiles string if possible. :param savepath: str Set the save path for the molecule. Output will be in sdf format. :return: """ # remove hydrogen if present mol_prep = Chem.RemoveHs(mol) # read template template = Chem.MolFromSmiles(template_smi) # assign bond orders to crystal ligand fixed_mol = AllChem.AssignBondOrdersFromTemplate(mol_prep, template) # Map atoms from the template to the docked pose template_mapping = mol_prep.GetSubstructMatch(template) if not template_mapping: raise ValueError("Failed to match template to docked pose.") # Transfer 3D coordinates from docked_pose to fixed_mol new_conformer = Chem.Conformer(fixed_mol.GetNumAtoms()) original_conformer = mol_prep.GetConformer() # set coordinates of heavy atoms to fixed_mol for new_idx, origin_idx in enumerate(template_mapping): pos = original_conformer.GetAtomPosition(origin_idx) new_conformer.SetAtomPosition(new_idx, pos) # Add 3D coordinates to the molecule fixed_mol.AddConformer(new_conformer, assignId=True) # save file Chem.MolToMolFile(fixed_mol, savepath)
[docs] def find_matches(mol: Chem.Mol = None, patterns: list[Chem.Mol] = None, verbose=True): """ Support function to visualize matches between query molecule and features. :param mol: Chem.Mol Query molecule. Must be in ROMol format. :param patterns: list[Chem.Mol] A list of molecules to match against the query molecule. :param verbose: Bool Output messages for matches. :return: """ matches = [] for pattern in patterns: matched = mol.GetSubstructMatches(pattern) # get centroid and coordinates for each match for m in matched: try: centroid = _compute_match_centroid(mol, m) matches.append([m, centroid]) except: pass # output statement if no matches if verbose is True: if len(matches) == 0: output_message = Chem.MolToSmarts(pattern) print(f"No Matches to {output_message}!") return matches return matches
def _compute_match_centroid(mol, matched_pattern): """ Support function to calculate centroid of matches between query molecule and features. :param mol: :param matched_pattern: :return: """ conf = mol.GetConformer() positions = [conf.GetAtomPosition(i) for i in matched_pattern] center = np.mean(positions, axis=0) # convert result to float center = center.tolist() return tuple(center) if __name__ == "__main__": import doctest doctest.testmod()