├── demo ├── demo_1020.smi ├── subtructure_filter_demo.xls ├── rules.json └── phgdh_demo_vina.ini ├── docs └── platform.jpg ├── secse ├── growing │ ├── mutation │ │ ├── rules_demo.db │ │ ├── __init__.py │ │ └── mutation.py │ ├── __init__.py │ ├── filter_parallel.sh │ ├── filter.py │ └── pains_smarts.json ├── utilities │ ├── Structure Filter_20211015_v1.12.xls │ ├── __init__.py │ ├── load_rules.py │ ├── open_filter.py │ ├── function_helper.py │ ├── substructure_filter.py │ ├── autogridGen.sh │ ├── check_rules.py │ ├── selectByLE.py │ ├── wash_mol.py │ ├── excel2db.py │ └── ring_tool.py ├── __init__.py ├── report │ ├── __init__.py │ ├── filter_sdf_by_titles.pl │ └── grow_path.py ├── evaluate │ ├── __init__.py │ ├── ligprep_glide.sh │ ├── glide_docking.py │ ├── proprep.py │ ├── ligprep_unidock.sh │ ├── ligprep_autodock_gpu.sh │ ├── ligprep_vina_parallel.sh │ ├── docking.py │ └── ligprep.py ├── scoring │ ├── __init__.py │ ├── chemprop_pre.sh │ ├── sampling.py │ ├── diversity_score.py │ ├── docking_score_prediction.py │ └── ranking.py ├── run_secse.py └── grow_processes.py ├── requirements.txt ├── README.md └── LICENSE.txt /demo/demo_1020.smi: -------------------------------------------------------------------------------- 1 | c1ccccc1 f1 2 | c1ccncc1 f2 3 | c1cncnc1 f3 -------------------------------------------------------------------------------- /docs/platform.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/docs/platform.jpg -------------------------------------------------------------------------------- /demo/subtructure_filter_demo.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/demo/subtructure_filter_demo.xls -------------------------------------------------------------------------------- /secse/growing/mutation/rules_demo.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/secse/growing/mutation/rules_demo.db -------------------------------------------------------------------------------- /secse/utilities/Structure Filter_20211015_v1.12.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/secse/utilities/Structure Filter_20211015_v1.12.xls -------------------------------------------------------------------------------- /secse/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/11/17/10:41 7 | """ 8 | -------------------------------------------------------------------------------- /secse/report/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/8/17/11:38 7 | """ 8 | -------------------------------------------------------------------------------- /secse/evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/8/17/11:38 7 | """ 8 | -------------------------------------------------------------------------------- /secse/growing/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/8/17/11:22 7 | """ 8 | -------------------------------------------------------------------------------- /secse/scoring/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/8/17/11:23 7 | """ 8 | -------------------------------------------------------------------------------- /secse/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/8/17/11:22 7 | """ 8 | -------------------------------------------------------------------------------- /secse/growing/mutation/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: __init__.py 6 | @time: 2021/8/17/11:40 7 | """ 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy~=1.20.3 2 | pandas~=1.3.3 3 | pandarallel~=1.5.2 4 | SECSE~=0.1 5 | tqdm~=4.62.2 6 | biopandas~=0.2.9 7 | openbabel~=3.1.1 8 | rdkit~=2021.03.5 9 | chemprop~=1.3.1 10 | xlrd~=2.0.1 11 | -------------------------------------------------------------------------------- /secse/utilities/load_rules.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: load_rules.py 6 | @time: 2022/2/28/09:52 7 | """ 8 | 9 | import sqlite3 10 | import pandas as pd 11 | from loguru import logger 12 | 13 | 14 | def json_to_DB(in_json, out_db_path): 15 | df = pd.read_json(in_json) 16 | conn = sqlite3.connect(out_db_path) 17 | try: 18 | df.to_sql("G-001", conn) 19 | except Exception as e: 20 | logger.error(e) 21 | conn.close() 22 | -------------------------------------------------------------------------------- /demo/rules.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Rule ID": "G-001-0020", 4 | "SMARTS": "[c,CR0;!H0:1]>>[*:1]C1OC(NC1)=O", 5 | "Priority": 3 6 | }, 7 | { 8 | "Rule ID": "G-001-0028", 9 | "SMARTS": "[c,CR0,n,N,O,S;!H0:1]>>[*:1]c1ccccc1", 10 | "Priority": 3 11 | }, 12 | { 13 | "Rule ID": "G-001-0063", 14 | "SMARTS": "[c,CR0,n,N,O,S;!H0:1]>>[*:1]c1ocnc1", 15 | "Priority": 3 16 | }, 17 | { 18 | "Rule ID": "G-001-0069", 19 | "SMARTS": "[c,CR0,n,N,O,S;!H0:1]>>[*:1]c1n[nH]cc1", 20 | "Priority": 3 21 | } 22 | ] -------------------------------------------------------------------------------- /secse/growing/filter_parallel.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # @author: Lu Chong 3 | # @file: filter_parallel.sh 4 | # @time: 2021/ 03/03/9:26 5 | 6 | SECONDS=0 7 | workdir=${1} 8 | gen=${2} 9 | config=${3} 10 | cpu_num=${4} 11 | script=$SECSE/growing/filter.py 12 | files=tmp.txt 13 | cd "${workdir}"/generation_split_by_seed || exit 14 | for i in *.csv; do 15 | echo "$i;$workdir;$gen;$config" 16 | done >$files 17 | 18 | mkdir -p ../filter_flag 19 | # filter default 20 | parallel --jobs "$cpu_num" -I {} -a ${files} -C ";" python "$script" 21 | rm $files 22 | cd "${workdir}"/filter_flag || exit 23 | for i in *.csv; do 24 | echo "$i" | parallel grep PASS 25 | done >"${workdir}"/filter.csv 26 | cd "${workdir}" || exit 27 | #rm -r filter_flag/ 28 | rm -r generation_split_by_seed/ mutation.csv mutation.raw generation.raw 29 | duration=$SECONDS 30 | echo "Filter runtime: $((duration / 60)) minutes $((duration % 60)) seconds." 31 | -------------------------------------------------------------------------------- /demo/phgdh_demo_vina.ini: -------------------------------------------------------------------------------- 1 | [general] 2 | project_code = PHG 3 | workdir = /home/dachong/PHGDH/res/demo001/ 4 | fragments = /home/dachong/PHGDH/input/demo_1020.smi 5 | num_gen = 5 6 | num_per_gen = 200 7 | seed_per_gen = 10 8 | start_gen = 0 9 | cpu = 320 10 | gpu = 0 11 | rule_db = 0 12 | 13 | [docking] 14 | docking_program = Vina 15 | target = /home/dachong/PHGDH/input/PHGDH_6RJ3_for_vina.pdbqt 16 | x = 20.9 17 | y = -10.4 18 | z = 3.0 19 | box_size_x = 15 20 | box_size_y = 15 21 | box_size_z = 15 22 | rmsd = 2 23 | delta_score = -1.0 24 | score_cutoff = -9 25 | 26 | [prediction] 27 | mode = 2 28 | dl_per_gen = 100 29 | dl_score_cutoff = -9 30 | 31 | [properties] 32 | mw = 450 33 | logp_lower = 0.5 34 | logp_upper = 7 35 | chiral_center = 2 36 | heteroatom_ratio = 0.35 37 | rdkit_rotatable_bound_num = 5 38 | keen_rotatable_bound_num = 3 39 | rigid_body_num = 2 40 | hbd = 5 41 | hba = 10 42 | tpsa = 200 43 | lipinski_violation = 1 44 | qed = 0.5 45 | max_ring_size = 7 46 | max_ring_system_size = 3 47 | ring_system_count = 4 48 | bridged_site_count = 2 49 | spiro_site_count = 1 50 | fused_site_count = 3 51 | rdkit_sa_score = 5 52 | substructure_filter = 0 -------------------------------------------------------------------------------- /secse/utilities/open_filter.py: -------------------------------------------------------------------------------- 1 | # we use logs for generated molecule filter 2 | 3 | """ 4 | The user can define their own filter function as needed. 5 | The input parameter of the function is an rdkit mol object, 6 | and the return value is a boolean. If the molecule is needed, return true; 7 | if it is not needed, return false. 8 | The user can modify this Python script file according to their own requirements. 9 | 10 | The following code is just an example. 11 | LogS = 0.26 - 0.74 LogP - 0.0066 MW + 0.0034 RB - 0.42 AP 12 | ref :https://practicalcheminformatics.blogspot.com/2023/06/ 13 | getting-real-with-molecular-property.html 14 | 15 | """ 16 | from rdkit import Chem 17 | from rdkit.Chem import Descriptors, Crippen, Lipinski 18 | from loguru import logger 19 | 20 | 21 | def user_filter(mol): 22 | mw = Descriptors.MolWt(mol) 23 | logp = Crippen.MolLogP(mol) 24 | rotors = Lipinski.NumRotatableBonds(mol) 25 | ap = len(mol.GetSubstructMatches(Chem.MolFromSmarts("a"))) / mol.GetNumAtoms() 26 | intercept = 0.16 27 | coef = {"logp": -0.63, "mw": -0.0062, "rotors": 0.066, "ap": -0.74} 28 | esol = intercept + coef["logp"] * logp + coef["mw"] * mw + coef["rotors"] * rotors + coef["ap"] * ap 29 | 30 | if esol <= -4.5: 31 | return True 32 | else: 33 | return False 34 | -------------------------------------------------------------------------------- /secse/evaluate/ligprep_glide.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mols=${1} 3 | workdir=${2} 4 | target=${3} 5 | generation=${4} 6 | docking_precision=${5} 7 | cpu_num=${6} 8 | #docking_precision=SP 9 | #docking_precision=XP 10 | #docking_precision=HTVS 11 | ligprep_in=ligprep_gen_$generation.inp 12 | glide_in=glide_gen_$generation.in 13 | glide_mae=ligprep_gen_$generation.maegz 14 | 15 | cd "$workdir" || exit 16 | 17 | # LigPreparation 18 | echo "Run ligprep ..." 19 | 20 | cat >"$ligprep_in" <"$glide_in" <>"$glide_in" < "): 26 | # write docking score 27 | write_score = True 28 | continue 29 | elif write_score: 30 | score = line.strip() 31 | newline = "> \n{}\n".format(score) 32 | write_score = False 33 | elif line.startswith("> <"): 34 | # drop other fields 35 | pass_line = 2 36 | continue 37 | elif pass_line > 0: 38 | pass_line -= 1 39 | continue 40 | else: 41 | newline = line 42 | sdf.write(newline) 43 | -------------------------------------------------------------------------------- /secse/utilities/function_helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: function_helper.py 6 | @time: 2022/10/13/16:36 7 | """ 8 | import subprocess 9 | from loguru import logger 10 | 11 | 12 | def shell_cmd_execute(cmd_lst, capture_mode="all"): 13 | cmd = " ".join(cmd_lst) 14 | logger.info(f"Executing command:\n{cmd}") 15 | 16 | try: 17 | # Set subprocess options based on the capture_mode 18 | if capture_mode == "all": 19 | result = subprocess.run( 20 | cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, shell=True, check=True 21 | ) 22 | if len(result.stdout) > 0: 23 | logger.info("Command output:\n" + result.stdout) 24 | return result.stdout 25 | 26 | elif capture_mode == "error": 27 | result = subprocess.run( 28 | cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, shell=True, check=True 29 | ) 30 | logger.error("Captured stderr:\n" + result.stderr) 31 | return result.stderr 32 | 33 | elif capture_mode == 0: 34 | subprocess.run(cmd, shell=True, check=True) 35 | return None 36 | 37 | else: 38 | raise ValueError("Invalid capture_mode. Use 'all', 'error', or 0.") 39 | 40 | except subprocess.CalledProcessError as e: 41 | logger.error(f"Command failed with return code {e.returncode}.") 42 | if capture_mode in {"all", "error"}: 43 | logger.error("Captured error:\n" + e.output if e.output else "No error captured.") 44 | raise Exception(f"Error executing command: {cmd}") from e 45 | -------------------------------------------------------------------------------- /secse/evaluate/proprep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: proprep.py 6 | @time: 2021/9/10/14:14 7 | 8 | prepare the protein file (pdbqt format) 9 | """ 10 | import os 11 | import subprocess 12 | from loguru import logger 13 | from biopandas.pdb import PandasPdb 14 | 15 | 16 | def clean(code, chain=None): 17 | ppdb = PandasPdb().fetch_pdb(code) 18 | if chain is not None: 19 | ppdb.df['ATOM'] = ppdb.df['ATOM'][ppdb.df['ATOM'].chain_id == chain] 20 | name = code + "_clean.pdb" 21 | ppdb.to_pdb(path=name, 22 | records=['ATOM', 'OTHERS'], 23 | gz=False, 24 | append_newline=True) 25 | 26 | ADFRsuit = r"C:\Program Files (x86)\ADFRsuite-1.0\bin" 27 | prepare_ligand = "prepare_receptor.bat" 28 | exe = os.path.join(ADFRsuit, prepare_ligand) 29 | p = subprocess.Popen([exe, "-r", name, 30 | "-A", "hydrogens", '-w'], stdin=subprocess.PIPE, 31 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 32 | (stdout_data, stderr_data) = p.communicate() 33 | 34 | 35 | def boxinfo(code, resn, extend=6): 36 | ppdb = PandasPdb().fetch_pdb(code) 37 | df_het = ppdb.df['HETATM'][ppdb.df['HETATM'].residue_name == resn] 38 | x_center = df_het.x_coord.mean() 39 | y_center = df_het.y_coord.mean() 40 | z_center = df_het.z_coord.mean() 41 | 42 | x_max = df_het.x_coord.max() + extend 43 | x_min = df_het.x_coord.min() - extend 44 | y_max = df_het.y_coord.max() + extend 45 | y_min = df_het.y_coord.min() - extend 46 | z_max = df_het.z_coord.max() + extend 47 | z_min = df_het.z_coord.min() - extend 48 | 49 | x_size = x_max - x_min 50 | y_size = y_max - y_min 51 | z_size = z_max - z_min 52 | 53 | return x_center, y_center, z_center, x_size, y_size, z_size 54 | -------------------------------------------------------------------------------- /secse/scoring/chemprop_pre.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # -*- coding:utf-8 _*- 3 | # @author: Lu Chong 4 | # @file: chemprop_pre.sh 5 | # @time: 2021/10/27/16:32 6 | workdir=${1} 7 | train=${2} 8 | pre=${3} 9 | max_gen=${4} 10 | num_output=${5} 11 | seed=${6} 12 | model_dir=$workdir/prediction/models/ 13 | files=tmp.txt 14 | 15 | mkdir -p "$model_dir" 16 | 17 | # all data 18 | model="$model_dir"/G"$max_gen"_seed"$seed" 19 | chemprop train --data-path "$train" --task-type regression --save-dir \ 20 | "$model" --data-seed "$seed" --show-individual-scores --split-type random -qq 21 | 22 | # split files and prediction with CPU Parallelization 23 | split_dir=$workdir/prediction/pre_split_$max_gen 24 | mkdir -p "$split_dir" 25 | split -l 1000 -d "$pre" "$split_dir"/part --additional-suffix ".csv" 26 | 27 | pre_dir="$workdir"/prediction/pre_dir_$max_gen 28 | mkdir -p "$pre_dir" 29 | cd "$split_dir" || exit 30 | # add header 31 | sed -i "1i\\id,smiles" part*.csv 32 | for i in *.csv; do 33 | echo "$split_dir/$i;$pre_dir/$i" 34 | done >$files 35 | 36 | # run chemprop_predict 37 | parallel -I {} -a ${files} -C ";" chemprop predict --test-path {1} --preds-path {2} --smiles-columns smiles --model-paths "$model"/model_0/best.pt --accelerator cpu -qq 38 | 39 | # merge prediction 40 | cd "$workdir"/prediction || exit 41 | tail -n +2 -q "$pre_dir"/part*.csv >pre_G"$max_gen".csv 42 | 43 | # fetch top predicted compounds 44 | sort -nk3 -t, pre_G"$max_gen".csv >pre_G"$max_gen"_sorted.csv 45 | echo "id,smiles,pred score" >pre_G"$max_gen".csv 46 | head -n "$num_output" pre_G"$max_gen"_sorted.csv >>pre_G"$max_gen".csv 47 | #rm ../pre_G"$max_gen"_sorted.csv 48 | 49 | # write mols for next round of docking 50 | pre_docking_dir=$workdir/generation_"$max_gen"_pre 51 | mkdir -p "$pre_docking_dir" 52 | tail -n+2 pre_G"$max_gen".csv | awk -F, '{print $2"\t"$1}' >"$pre_docking_dir"/mols_for_docking_pred.smi 53 | -------------------------------------------------------------------------------- /secse/utilities/substructure_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: substructure_filter.py 6 | @time: 2021/02/08/14:13 7 | """ 8 | import os 9 | import pandas as pd 10 | from rdkit import Chem 11 | from loguru import logger 12 | 13 | FILTER_FILE = os.path.join(os.getenv("SECSE"), "utilities", "Structure Filter_20211015_v1.12.xls") 14 | 15 | 16 | class StructureFilter: 17 | def __init__(self, filter_lst=FILTER_FILE): 18 | df = pd.read_excel(filter_lst, usecols=["Pattern", "ID", "Max"]).dropna() 19 | df["ID"] = df["ID"].astype(str) 20 | df = df.set_index("ID") 21 | df["Pattern_sma"] = df["Pattern"].apply(lambda x: Chem.MolFromSmarts(x)) 22 | self.fdic = df[["Pattern_sma", "Max"]].T.to_dict() 23 | 24 | def sfilter(self, mol): 25 | for k, v in self.fdic.items(): 26 | pattern = v["Pattern_sma"] 27 | if int(v["Max"]) == 0: 28 | if mol.HasSubstructMatch(pattern): 29 | return k 30 | else: 31 | mts = mol.GetSubstructMatches(pattern) 32 | if len(mts) > int(v['Max']): 33 | return k 34 | return "PASS" 35 | 36 | def sfilter_all(self, mol): 37 | res = [] 38 | for k, v in self.fdic.items(): 39 | pattern = v["Pattern_sma"] 40 | if int(v["Max"]) == 0: 41 | if mol.HasSubstructMatch(pattern): 42 | res.append(k) 43 | else: 44 | mts = mol.GetSubstructMatches(pattern) 45 | if len(mts) > int(v['Max']): 46 | res.append(k) 47 | if len(res) == 0: 48 | return "PASS" 49 | else: 50 | return res 51 | 52 | 53 | if __name__ == '__main__': 54 | sf = StructureFilter() 55 | tmol = Chem.MolFromSmiles("CC(Cc1ncccn1)(c2ncccc2)C") 56 | logger.info(sf.sfilter(tmol)) 57 | -------------------------------------------------------------------------------- /secse/evaluate/ligprep_unidock.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # -*- coding:utf-8 _*- 3 | # @author: Yannan Yuan 4 | # @file: ligprep_unidock.sh 5 | # @time: 2024/3/11/17:00 6 | 7 | SECONDS=0 8 | workdir=${1} 9 | smi=${2} 10 | receptor=${3} 11 | x=${4} 12 | y=${5} 13 | z=${6} 14 | box_size_x=${7} 15 | box_size_y=${8} 16 | box_size_z=${9} 17 | cpu_num=${10} 18 | script=$SECSE/evaluate/ligprep.py 19 | split_dir=$workdir/docking_split 20 | docking_dir=$workdir/docking_poses 21 | lig_dir=$workdir/ligands_for_docking 22 | pdb_dir=$workdir/pdb_files 23 | sdf_dir=$workdir/sdf_files 24 | conf=$workdir/vina_config.txt 25 | cd "$workdir" || exit 26 | create_clean_directory() { 27 | dir_name=$1 28 | if [ -d "$dir_name" ]; then 29 | echo "Directory $dir_name already exists, removing $dir_name ..." 30 | rm -rf "$dir_name" 31 | fi 32 | if mkdir "$dir_name"; then 33 | return 0 34 | else 35 | echo "Creating directory failed: $dir_name" 36 | return 1 37 | fi 38 | } 39 | for dir in "$split_dir" "$docking_dir" "$lig_dir" "$pdb_dir" "$sdf_dir"; do 40 | create_clean_directory "$dir" 41 | done 42 | # split by line 43 | split -l 100 -d "$smi" "$split_dir"/part --additional-suffix ".smi" 44 | 45 | # run ligprep 46 | cd "$split_dir" || exit 47 | find . -name "*smi" | parallel --jobs "$cpu_num" python "$script" "$workdir" 48 | 49 | # run unidock 50 | files=ligand_index.txt 51 | cd "$lig_dir" || exit 52 | for i in *pdbqt; do 53 | echo "$lig_dir/$i" 54 | done >$files 55 | 56 | $UNIDOCK --receptor $receptor --ligand_index $files --dir $docking_dir \ 57 | --center_x $x --center_y $y --center_z $z \ 58 | --size_x $box_size_x --size_y $box_size_y --size_z $box_size_z \ 59 | --exhaustiveness 128 --max_step 20 --refine_step 3 \ 60 | --num_modes 3 --energy_range 3 --verbosity 2 >/dev/null 61 | rm $files 62 | 63 | find "$docking_dir" -name "*pdbqt" | parallel --jobs "$cpu_num" obabel -ipdbqt {} -O "$pdb_dir"/{/.}-dp.pdb -m &>/dev/null 64 | 65 | duration=$SECONDS 66 | echo "Docking runtime: $((duration / 60)) minutes $((duration % 60)) seconds." 67 | -------------------------------------------------------------------------------- /secse/evaluate/ligprep_autodock_gpu.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # -*- coding:utf-8 _*- 3 | # @author: Lu Chong 4 | # @file: ligprep_autodock_gpu.sh 5 | # @time: 2022/2/16/15:25 6 | 7 | SECONDS=0 8 | workdir=${1} 9 | smi=${2} 10 | receptor=${3} 11 | cpu_num=${4} 12 | gpu_num=${5} 13 | 14 | files=tmp.txt 15 | script=$SECSE/evaluate/ligprep.py 16 | split_dir=$workdir/docking_split 17 | docking_dir=$workdir/docking_poses 18 | lig_dir=$workdir/ligands_for_docking 19 | pdb_dir=$workdir/pdb_files 20 | sdf_dir=$workdir/sdf_files 21 | 22 | cd "$workdir" || exit 23 | 24 | create_clean_directory() { 25 | dir_name=$1 26 | if [ -d "$dir_name" ]; then 27 | echo "Directory $dir_name already exists, removing $dir_name ..." 28 | rm -rf "$dir_name" 29 | fi 30 | if mkdir "$dir_name"; then 31 | return 0 32 | else 33 | echo "Creating directory failed: $dir_name" 34 | return 1 35 | fi 36 | } 37 | for dir in "$split_dir" "$docking_dir" "$lig_dir" "$pdb_dir" "$sdf_dir"; do 38 | create_clean_directory "$dir" 39 | done 40 | 41 | # split by line 42 | split -l 100 -d "$smi" "$split_dir"/part --additional-suffix ".smi" 43 | 44 | # run ligprep 45 | cd "$split_dir" || exit 46 | find . -name "*smi" | parallel --jobs "$cpu_num" python "$script" "$workdir" 47 | 48 | # run autdock gpu 49 | cd "$lig_dir" || exit 50 | for i in *pdbqt; do 51 | echo "$lig_dir/$i;$docking_dir/${i%.*}" 52 | done >$files 53 | 54 | parallel --jobs "$gpu_num" -I {} -a ${files} -C ";" "$AUTODOCK_GPU/bin/autodock_gpu_128wi" --ffile "$receptor" --lfile {1} --resnam {2} --seed 12345 -D '$(({%}))' -x 0 -n 3 # >/dev/null 55 | #rm $files 56 | 57 | # covert dlg file to pdb 58 | cd "$docking_dir" || exit 59 | find . -name "*.dlg" | parallel "grep '^DOCKED' {} >{.}.tmp" 60 | find . -name "*.tmp" | parallel "cut -c9- {} >{.}.pdbqt" 61 | rm ./*.tmp 62 | 63 | sed -e "s/USER Estimated Free Energy of Binding =/REMARK/g" -i *pdbqt 64 | find "$docking_dir" -name "*pdbqt" | parallel --jobs "$cpu_num" obabel -ipdbqt {} -O "$pdb_dir"/{/.}-dp.pdb -m &>/dev/null 65 | 66 | duration=$SECONDS 67 | echo "Docking runtime: $((duration / 60)) minutes $((duration % 60)) seconds." 68 | -------------------------------------------------------------------------------- /secse/report/filter_sdf_by_titles.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # -*- coding:utf-8 _*- 3 | # @author: Lu Chong 4 | # @file: filter_sdf_by_titles.pl 5 | # @time: 2023/06/15/17:58 6 | 7 | use strict; 8 | use warnings; 9 | 10 | # Check command line arguments 11 | my ($input, $title_file, $output) = @ARGV; 12 | die "Usage: perl filter_sdf_by_titles.pl \n" unless defined $input; 13 | 14 | # Open output file 15 | open(my $out, ">$output") or die "Cannot open $output for writing: $!\n"; 16 | 17 | # Read titles 18 | my %titles; 19 | if ($title_file) { 20 | open(my $fh, "<", $title_file) or die "Cannot open $title_file for reading: $!\n"; 21 | while (my $title = <$fh>) { 22 | chomp($title); 23 | $title =~ s/^\s+|\s+$//g; 24 | $titles{$title} = 1; 25 | } 26 | close($fh); 27 | } 28 | 29 | open(my $in, "<", $input) or die "Cannot open $input for reading: $!\n"; 30 | 31 | # Process input file 32 | local $/ = '$$$$'; # Set the input record separator 33 | 34 | my $numstructs = 0; 35 | my @title_indices; 36 | my $buffer; 37 | my $index = 0; 38 | my $first_structure = 1; 39 | 40 | while ($buffer = <$in>) { 41 | $numstructs++; 42 | 43 | # Get the title of the current structure 44 | my $title = get_title($buffer); 45 | 46 | if (exists $titles{$title}) { 47 | $index++; 48 | if (!$title_indices[$index]) { 49 | $title_indices[$index] = $numstructs; 50 | } else { 51 | $title_indices[$index] .= ",$numstructs"; 52 | } 53 | # if the first structure starts with a newline, then strip the newline 54 | if ($index==$first_structure && $buffer =~ /^\n/) { 55 | $buffer =~ s/^\n//; 56 | $first_structure = 0; 57 | } 58 | print $out $buffer; 59 | } 60 | } 61 | 62 | # Add newline at the end of the output file 63 | print $out "\n"; 64 | 65 | close($in); 66 | close($out); 67 | 68 | # Extract the first line as the CT title 69 | sub get_title { 70 | my ($ct) = @_; 71 | $ct =~ s/^\s+//; 72 | my ($title) = $ct =~ /^(.+)$/m; 73 | return $title || ''; 74 | } 75 | -------------------------------------------------------------------------------- /secse/evaluate/ligprep_vina_parallel.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # -*- coding:utf-8 _*- 3 | # @author: Lu Chong 4 | # @file: ligprep_vina_parallel.sh 5 | # @time: 2021/9/8/09:52 6 | 7 | SECONDS=0 8 | workdir=${1} 9 | smi=${2} 10 | receptor=${3} 11 | x=${4} 12 | y=${5} 13 | z=${6} 14 | box_size_x=${7} 15 | box_size_y=${8} 16 | box_size_z=${9} 17 | cpu_num=${10} 18 | files=tmp.txt 19 | script=$SECSE/evaluate/ligprep.py 20 | split_dir=$workdir/docking_split 21 | docking_dir=$workdir/docking_poses 22 | lig_dir=$workdir/ligands_for_docking 23 | pdb_dir=$workdir/pdb_files 24 | sdf_dir=$workdir/sdf_files 25 | conf=$workdir/vina_config.txt 26 | cd "$workdir" || exit 27 | create_clean_directory() { 28 | dir_name=$1 29 | if [ -d "$dir_name" ]; then 30 | echo "Directory $dir_name already exists, removing $dir_name ..." 31 | rm -rf "$dir_name" 32 | fi 33 | if mkdir "$dir_name"; then 34 | return 0 35 | else 36 | echo "Creating directory failed: $dir_name" 37 | return 1 38 | fi 39 | } 40 | for dir in "$split_dir" "$docking_dir" "$lig_dir" "$pdb_dir" "$sdf_dir"; do 41 | create_clean_directory "$dir" 42 | done 43 | # split by line 44 | split -l 100 -d "$smi" "$split_dir"/part --additional-suffix ".smi" 45 | 46 | # run ligprep 47 | cd "$split_dir" || exit 48 | find . -name "*smi" | parallel --jobs "$cpu_num" python "$script" "$workdir" 49 | 50 | # write vina config file 51 | cat >"$conf" <$files 74 | 75 | # ignore Vina stdout 76 | parallel --jobs "$cpu_num" -I {} -a ${files} -C ";" "$VINA" --config "$conf" --ligand {1} --out {2} >/dev/null 77 | rm $files 78 | 79 | find "$docking_dir" -name "*pdbqt" | parallel --jobs "$cpu_num" obabel -ipdbqt {} -O "$pdb_dir"/{/.}-dp.pdb -m &>/dev/null 80 | 81 | duration=$SECONDS 82 | echo "Docking runtime: $((duration / 60)) minutes $((duration % 60)) seconds." 83 | -------------------------------------------------------------------------------- /secse/utilities/autogridGen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Zhenting Gao 4 | # Command 5 | # - autogridGen.sh pro.pdbqt grid.gpf 6 | # Update 7 | # - 2023/5/16 8 | # - This script is created for AutoDock Grid generation 9 | 10 | # Parameters 11 | pdbqtFile=$1 12 | gridInputFile=$2 #gpf file 13 | # Please download autogrid4 from https://autodock.scripps.edu/download-autodock4/ 14 | autogrid='/tools/docking/autodock/cpu/autogrid4' 15 | 16 | if [ ! -f ${autogrid} ]; then 17 | echo ${autogrid}" is needed but does not exist!" 18 | echo " 19 | - Please download autogrid4 from https://autodock.scripps.edu/download-autodock4/ 20 | - Modify this script at line 14 to set the correct path of autogrid4 21 | " 22 | exit 23 | fi 24 | 25 | if [ ! $# -eq 2 ]; then #Test input parameter 26 | echo 'autogridGen.sh protein.pdbqt gpfFile' 27 | echo 28 | echo "grid.gpf.example is created for your reference." 29 | echo " 30 | npts 70 70 70 31 | spacing 0.375 32 | gridcenter 17.510 29.510 32.520 33 | " > grid.gpf.example 34 | cat grid.gpf.example 35 | exit 36 | fi 37 | npts=$(grep npts ${gridInputFile}) 38 | spacing=$(grep spacing ${gridInputFile}) 39 | gridcenter=$(grep gridcenter ${gridInputFile}) 40 | echo $pdbqtFile 41 | prefix=$(basename ${pdbqtFile} | sed -e 's/.pdbqt$//') 42 | gpfPrefix=$(basename ${gridInputFile} | sed -e 's/.gpf$//') 43 | cat >${gpfPrefix}_production.gpf <0, constant 66 | EOF 67 | 68 | ${autogrid} -p ${gpfPrefix}_production.gpf -l ${gpfPrefix}_production.glg 69 | -------------------------------------------------------------------------------- /secse/scoring/sampling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: sampling.py 6 | @time: 2022/2/8/10:25 7 | """ 8 | import os 9 | import pandas as pd 10 | from loguru import logger 11 | 12 | from scoring.diversity_score import cal_morgan_fp, tanimoto_smi 13 | 14 | 15 | def sample_by_rule_weight(gen, filter_df, workdir_now): 16 | if "G-002" in list(filter_df["type"]): 17 | # control ratio of G-002 mutation 18 | spacer_df = filter_df[filter_df["type"] == "G-002"] 19 | 20 | common_df = filter_df.drop(spacer_df.index, axis=0) 21 | # control ratio of ring with spacer based on different stage 22 | if gen <= 3: 23 | spacer_ratio = 0.3 24 | elif gen <= 7: 25 | spacer_ratio = 0.1 26 | else: 27 | spacer_ratio = 0.01 28 | sample_size = min(filter_df.shape[0], 500000) 29 | 30 | spacer_df = spacer_df.sample(min(int(sample_size * spacer_ratio), spacer_df.shape[0]), 31 | replace=False, 32 | weights="priority_gen_" + str(gen)) 33 | 34 | common_df = common_df.sample(min(int(sample_size * (1 - spacer_ratio)), common_df.shape[0]), 35 | replace=False, 36 | weights="priority_gen_" + str(gen)) 37 | sampled_df = pd.concat([spacer_df, common_df], axis=0) 38 | sampled_df.to_csv(os.path.join(workdir_now, "sampled.csv"), index=False) 39 | else: 40 | logger.error("No cmpds generated from ring with spacer in the generation!") 41 | sampled_df = filter_df.sample(min(filter_df.shape[0], 500000), replace=False, 42 | weights="priority_gen_" + str(gen)) 43 | sampled_df.to_csv(os.path.join(workdir_now, "sampled.csv"), index=False) 44 | 45 | return sampled_df 46 | 47 | 48 | def sample_by_similarity(gen, filter_df, workdir_now, num_per_gen, 49 | ref_smi="O=C(C1=CC=C(C(C)NC(C2=CC(C3=CC=CC=C3)=NN2C)=O)C=C1)O"): 50 | ref_fp = cal_morgan_fp(ref_smi) 51 | filter_df["similarity"] = filter_df["smiles_gen_" + str(gen)].apply( 52 | lambda x: tanimoto_smi(cal_morgan_fp(x), ref_fp)) 53 | sampled_df = filter_df.nlargest(num_per_gen, columns="similarity") 54 | sampled_df.to_csv(os.path.join(workdir_now, "sampled.csv"), index=False) 55 | return sampled_df 56 | -------------------------------------------------------------------------------- /secse/scoring/diversity_score.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: diversity_score.py 6 | @time: 2020/11/18/9:47 7 | """ 8 | import math 9 | import numpy as np 10 | import pandas as pd 11 | import rdkit 12 | from rdkit.Chem import AllChem, rdFMCS, rdShapeHelpers 13 | from rdkit import Chem 14 | from pandarallel import pandarallel 15 | from loguru import logger 16 | 17 | def cal_morgan_fp(smi): 18 | mol = Chem.MolFromSmiles(smi) 19 | if not mol: 20 | mol = Chem.MolFromSmiles("C") 21 | fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 512) 22 | return fp 23 | 24 | 25 | def tanimoto_smi(fp1, fp2): 26 | return rdkit.DataStructs.cDataStructs.TanimotoSimilarity(fp1, fp2) 27 | 28 | 29 | def tanimoto_shape(ref, mol): 30 | return 1 - rdShapeHelpers.ShapeTanimotoDist(ref, mol) 31 | 32 | 33 | def protrude_shape(ref, mol): 34 | return 1 - rdShapeHelpers.ShapeProtrudeDist(ref, mol) 35 | 36 | 37 | def clustering(df: pd.DataFrame, smi, gen, cpu_num, k=500): 38 | df = df.reset_index(drop=True) 39 | pandarallel.initialize(verbose=0, nb_workers=cpu_num) 40 | df["fp2"] = df[smi].parallel_apply(cal_morgan_fp) 41 | df = df.dropna(subset=["fp2"]) 42 | c = df["fp2"].sample(1) 43 | c_next = c.index[0] 44 | c = c.iloc[0] 45 | c_lst = [] 46 | dis = np.zeros(df.shape[0]) 47 | dis_dic = dict() 48 | for i in range(k): 49 | new_dis = np.array(df["fp2"].apply(lambda x: tanimoto_smi(c, x))) 50 | dis_dic[c_next] = new_dis.copy() 51 | # mask mols with similarity larger than 0.6, those mols with not be consider as cluster center in next loops 52 | new_dis[new_dis >= 0.6] = 999999999 53 | dis += new_dis 54 | if np.min(dis) >= 999999999: 55 | break 56 | else: 57 | c_next = np.argmin(dis) 58 | c = df["fp2"].iloc[c_next] 59 | c_lst.append(c_next) 60 | 61 | df_cluster = pd.DataFrame(dis_dic) 62 | df["cluster_center_gen_" + str(gen)] = df_cluster.parallel_apply(lambda x: x.nlargest(1).index[0], axis=1) 63 | df["cluster_center_dis_gen_" + str(gen)] = df_cluster.parallel_apply(lambda x: x.nlargest(1).iloc[0], axis=1) 64 | df = df.drop(columns="fp2") 65 | return df 66 | 67 | 68 | def cal_rmsd(parent, c): 69 | mcs = rdFMCS.FindMCS([parent, c], threshold=1, completeRingsOnly=True, ringMatchesRingOnly=True, 70 | bondCompare=rdFMCS.BondCompare.CompareOrderExact, 71 | 72 | timeout=1).queryMol 73 | if mcs is None: # no common substructure 74 | return -2 75 | p_match = parent.GetSubstructMatch(mcs) 76 | c_match = c.GetSubstructMatch(mcs) 77 | 78 | delta2 = 0.0 79 | for pi, ci in zip(p_match, c_match): 80 | d = (parent.GetConformer().GetAtomPosition(pi) - c.GetConformer().GetAtomPosition(ci)).LengthSq() 81 | delta2 += d 82 | return math.sqrt(delta2 / len(p_match)) 83 | -------------------------------------------------------------------------------- /secse/utilities/check_rules.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import re 3 | import csv 4 | import os 5 | import sys 6 | 7 | def quote_ident(name: str) -> str: 8 | """用 SQLite 规则安全转义标识符(表名/列名)。""" 9 | return '"' + name.replace('"', '""') + '"' 10 | 11 | def check_smarts(db_file: str, out_file: str): 12 | conn = sqlite3.connect(db_file) 13 | cur = conn.cursor() 14 | 15 | with open(out_file, "w", newline="", encoding="utf-8") as f: 16 | writer = csv.writer(f) 17 | # 写表头 18 | writer.writerow(["Table", "Rule ID", "SMARTS", "Left Tags", "Right Tags", "Only Left", "Only Right"]) 19 | 20 | # 获取所有表和视图 21 | cur.execute("SELECT name FROM sqlite_master WHERE type IN ('table','view');") 22 | tables = [r[0] for r in cur.fetchall()] 23 | 24 | pattern = re.compile(r":(\d+)") # 提取 :数字 25 | 26 | for t in tables: 27 | qt = quote_ident(t) 28 | try: 29 | # 获取表结构 30 | cur.execute(f"PRAGMA table_info({qt});") 31 | cols = [row[1] for row in cur.fetchall()] 32 | lower_cols = [c.lower() for c in cols] 33 | 34 | # 找 smarts 列 35 | if "smarts" not in lower_cols: 36 | continue 37 | smarts_col = cols[lower_cols.index("smarts")] 38 | qsmarts = quote_ident(smarts_col) 39 | 40 | # 找 "Rule ID" 列(大小写不敏感) 41 | id_col = None 42 | for c in cols: 43 | if c.lower().replace(" ", "") in ["ruleid", "rule_id"]: 44 | id_col = c 45 | break 46 | if not id_col: 47 | continue 48 | qid = quote_ident(id_col) 49 | 50 | # 查询包含 >> 的行 51 | cur.execute(f"SELECT {qid}, {qsmarts} FROM {qt} WHERE {qsmarts} LIKE '%>>%';") 52 | for rid, smarts in cur.fetchall(): 53 | if not smarts or ">>" not in smarts: 54 | continue 55 | left, right = smarts.split(">>", 1) 56 | 57 | left_tags = set(pattern.findall(left)) 58 | right_tags = set(pattern.findall(right)) 59 | 60 | # 左右标签集合不一致 61 | if left_tags != right_tags: 62 | only_left = sorted(left_tags - right_tags, key=int) 63 | only_right = sorted(right_tags - left_tags, key=int) 64 | writer.writerow([ 65 | t, 66 | rid, 67 | smarts, 68 | " ".join(sorted(left_tags, key=int)), 69 | " ".join(sorted(right_tags, key=int)), 70 | " ".join(only_left), 71 | " ".join(only_right) 72 | ]) 73 | 74 | except Exception: 75 | continue 76 | 77 | conn.close() 78 | 79 | 80 | if __name__ == "__main__": 81 | if len(sys.argv) != 3: 82 | print("用法: python check_rules.py ") 83 | sys.exit(1) 84 | 85 | db_file = sys.argv[1] 86 | out_file = sys.argv[2] 87 | 88 | if not os.path.exists(db_file): 89 | print(f"错误: 输入数据库文件不存在 -> {db_file}") 90 | sys.exit(1) 91 | 92 | try: 93 | check_smarts(db_file, out_file) 94 | print(f"✅ 检查完成,结果已保存到 {out_file}") 95 | except Exception as e: 96 | print(f"❌ 处理失败: {e}") 97 | sys.exit(1) 98 | 99 | -------------------------------------------------------------------------------- /secse/utilities/selectByLE.py: -------------------------------------------------------------------------------- 1 | #!/tools/miniconda3/envs/cadd/bin/python 2 | ''' 3 | This script will calculate ligand efficiency of a user specified property in a SDF file, bin the list by the property, select the rows with highest LE and save into a new SDF file 4 | ## Author: zhentgpicasa@gmail.com 5 | ## Revision History: 6 | - 2024/3/17 7 | - Fist version 8 | ''' 9 | 10 | import argparse 11 | import os.path 12 | import pandas as pd 13 | from rdkit.Chem import rdMolDescriptors 14 | from rdkit.Chem import AllChem 15 | import os 16 | import time 17 | import pandas as pd 18 | from rdkit.Chem import PandasTools 19 | import numpy as np 20 | from loguru import logger 21 | 22 | startTime = time.time() 23 | ''' 24 | if RDkit is installed as an virtual environment other than 'base' environment, within Jupyter some paths are not included, and thus rdkit will not be imported, so the missing path need to be added manually 25 | Zhenting has tracked this bug on 2/21/2020. 26 | ''' 27 | pythonPath = os.__file__.split("lib")[0] 28 | os.environ['PATH'] = os.environ[ 29 | 'PATH'] + os.pathsep + pythonPath + r'Library\bin' + os.pathsep 30 | 31 | # import click 32 | 33 | parser = argparse.ArgumentParser( 34 | description='Calculate ligand efficiency of a user specified property in a SDF file, bin the list by the property, select the rows with highest LE and save into a new SDF file') 35 | parser.add_argument('-i', required=True, help='SDF input file') 36 | parser.add_argument('-o', required=True, help='SDF output file') 37 | parser.add_argument('-p', required=False, 38 | help='Property for ligand efficiency calculation', default='docking score') 39 | parser.add_argument('-d', default='ID', required=False, 40 | help='Molecule ID column name') 41 | parser.add_argument('-b', type=int, default=100, 42 | required=False, help='Bin count') 43 | args = parser.parse_args() 44 | 45 | prop4LE = args.p 46 | idCol = args.d 47 | sdfFile = args.i 48 | outputSdfFile = args.o 49 | binCount = args.b 50 | 51 | 52 | def workflow(): 53 | # Read the SDF file into a DataFrame 54 | df = PandasTools.LoadSDF(sdfFile, removeHs=False) 55 | if idCol not in df.columns: 56 | logger.info('Molecule ID column name is not detected', df.columns) 57 | quit() 58 | if prop4LE not in df.columns: 59 | logger.info('Column name of the property for ligand efficiency calculation is not detected', df.columns) 60 | quit() 61 | try: # Set data type to float 62 | df[prop4LE] = df[prop4LE].astype(float) 63 | except: 64 | '''Do nothing''' 65 | 66 | # Calculate the heavy atom count for each molecule 67 | df['HeavyAtomCount'] = df['ROMol'].apply(lambda x: x.GetNumHeavyAtoms()) 68 | df['LE'] = df[prop4LE] / df['HeavyAtomCount'] 69 | 70 | # Sort by prop4LE and remove duplicated rows 71 | df.sort_values([prop4LE], inplace=True, ascending=[True]) 72 | df.drop_duplicates([idCol], inplace=True) 73 | 74 | # Calculate the range and step size 75 | min_value = df[prop4LE].min() 76 | max_value = df[prop4LE].max() 77 | range_of_values = max_value - min_value 78 | step_size = range_of_values / binCount 79 | 80 | # Create an array of bins 81 | bins = list(np.arange(min_value, max_value + step_size, step_size)) 82 | 83 | # Bin the data into intervals 84 | df['bin'] = pd.cut(df[prop4LE], bins=bins, right=False) 85 | 86 | # Sort by LE 87 | df.sort_values('LE', inplace=True) 88 | # Keep the row with minimum LE in each bin 89 | resultDf = df.drop_duplicates(['bin']).copy() 90 | # Sort the result dataframe by prop4LE 91 | resultDf.sort_values([prop4LE], inplace=True) 92 | # Write the output SDF file 93 | PandasTools.WriteSDF(resultDf, outputSdfFile, 94 | molColName='ROMol', properties=list(resultDf)) 95 | 96 | logger.info('The script took {:.2f} second!'.format(time.time() - startTime)) 97 | 98 | 99 | if __name__ == '__main__': 100 | workflow() 101 | -------------------------------------------------------------------------------- /secse/utilities/wash_mol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: wash_mol.py 6 | @time: 2021/02/08/14:13 7 | """ 8 | 9 | import random 10 | from openbabel import openbabel 11 | from openbabel import pybel 12 | from rdkit import Chem 13 | from loguru import logger 14 | 15 | 16 | def wash_mol(smi): 17 | ob_conversion = openbabel.OBConversion() 18 | ob_conversion.SetInAndOutFormats("smi", "can") 19 | ob_mol = openbabel.OBMol() 20 | ob_conversion.ReadString(ob_mol, smi) 21 | ob_conversion.Convert() 22 | res = ob_conversion.WriteString(ob_mol).strip() 23 | return res 24 | 25 | 26 | def retreat_aromatic_nitrogen(smi): 27 | mol = Chem.MolFromSmiles(smi, sanitize=False) 28 | mol.UpdatePropertyCache() 29 | Chem.GetSymmSSSR(mol) 30 | ri = mol.GetRingInfo() 31 | aromatic_n_atoms = mol.GetSubstructMatches(Chem.MolFromSmarts('[nr5]')) 32 | res = set() 33 | for ring in ri.AtomRings(): 34 | n_at_ring = set() 35 | for n_atom in aromatic_n_atoms: 36 | tmp = set(n_atom).intersection(set(ring)) 37 | if tmp: 38 | n_at_ring = n_at_ring.union(n_atom) 39 | if n_at_ring: 40 | res.add(random.choice(list(n_at_ring))) 41 | for index in res: 42 | atom = mol.GetAtomWithIdx(index) 43 | atom.SetNumExplicitHs(1) 44 | 45 | return Chem.MolToSmiles(mol) 46 | 47 | 48 | def neutralize(smi): 49 | mol = Chem.MolFromSmiles(smi) 50 | if mol is None: 51 | smi = wash_mol(smi) 52 | mol = Chem.MolFromSmiles(smi) 53 | if mol is None: 54 | return "C" 55 | new_mol = neutralize_atoms(mol) 56 | return new_mol, Chem.MolToSmiles(new_mol) 57 | 58 | 59 | def neutralize_atoms(mol): 60 | pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]") 61 | at_matches = mol.GetSubstructMatches(pattern) 62 | at_matches_list = [y[0] for y in at_matches] 63 | if len(at_matches_list) > 0: 64 | for at_idx in at_matches_list: 65 | atom = mol.GetAtomWithIdx(at_idx) 66 | chg = atom.GetFormalCharge() 67 | hcount = atom.GetTotalNumHs() 68 | atom.SetFormalCharge(0) 69 | atom.SetNumExplicitHs(hcount - chg) 70 | atom.UpdatePropertyCache() 71 | return mol 72 | 73 | 74 | def charge_mol(smi): 75 | mol = pybel.readstring("smi", smi) 76 | mol.removeh() 77 | mol.OBMol.AddHydrogens(False, True, 7.4) 78 | # mol.OBMol.CorrectForPH(7.4) 79 | charged_smi = mol.write("can", None, overwrite=False).strip() 80 | return charged_smi 81 | 82 | 83 | def radical_filter(smi): 84 | mol = Chem.MolFromSmiles(smi) 85 | for a in mol.GetAtoms(): 86 | if a.GetNumRadicalElectrons() == 1: 87 | return False 88 | return True 89 | 90 | 91 | def get_bridged_atoms(mol): 92 | ri = mol.GetRingInfo() 93 | bond_rings = ri.BondRings() 94 | bridged_atoms = set() 95 | 96 | for i in range(len(bond_rings)): 97 | bond_ring_i = set(bond_rings[i]) 98 | for j in range(i): 99 | bond_ring_j = set(bond_rings[j]) 100 | common_bonds = bond_ring_i.intersection(bond_ring_j) 101 | 102 | if len(common_bonds) > 1: 103 | atoms = [0] * len(mol.GetAtoms()) 104 | bridged_unit = () 105 | for b in common_bonds: 106 | atoms[mol.GetBondWithIdx(b).GetBeginAtomIdx()] += 1 107 | atoms[mol.GetBondWithIdx(b).GetEndAtomIdx()] += 1 108 | for idx in range(len(atoms)): 109 | if atoms[idx] == 1: 110 | bridged_unit += (idx,) 111 | bridged_atoms.add(bridged_unit) 112 | return bridged_atoms 113 | 114 | 115 | def get_keen_rotatable_bound_num(mol): 116 | rb_smarts = Chem.MolFromSmarts( 117 | '[C^3!D1;!$(C(F)(F)F)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F)]') 118 | # sma = '[C^3!D1;!$(C(F)(F)F);!R;!$(C=O(N));!$(NC(=O));!$(C(=O)O);!$(C(=O)O)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F);!R;!$(C=O([N,O]));!$(NC(=O));!$(C(=O)O)]' 119 | return len((mol.GetSubstructMatches(rb_smarts))) 120 | 121 | 122 | def get_rigid_body_num(mol): 123 | pattern = "[C^3!D1;!$(C(F)(F)F);!R;!$(C=O(N));!$(NC(=O));!$(C(=O)O);!$(C(=O)O)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F);!R;!$(C=O([N,O]));!$(NC(=O));!$(C(=O)O)]" 124 | rb = Chem.MolFromSmarts(pattern) 125 | return len((mol.GetSubstructMatches(rb))) 126 | -------------------------------------------------------------------------------- /secse/utilities/excel2db.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import pandas as pd 3 | import json 4 | import re 5 | import os 6 | from rdkit import Chem 7 | from rdkit.Chem import Descriptors, rdChemReactions 8 | from rdkit.Chem.rdMolDescriptors import CalcExactMolWt, CalcFractionCSP3, CalcNumRings 9 | from rdkit.Chem import FindMolChiralCenters 10 | from loguru import logger 11 | 12 | 13 | def read_excel(filename, sheet_name): 14 | """Read an Excel sheet into a DataFrame.""" 15 | return pd.read_excel(filename, sheet_name=sheet_name) 16 | 17 | 18 | def write_to_json(df, filename): 19 | """Write a DataFrame to a JSON file.""" 20 | df.to_json(filename, orient='records', force_ascii=False, indent=4) 21 | 22 | 23 | def write_to_sqlite(df, table, db_path): 24 | """Write a DataFrame to an SQLite database.""" 25 | with sqlite3.connect(db_path) as conn: 26 | df.to_sql(table, conn, if_exists='replace', index=False) 27 | 28 | 29 | def test_rxn(sma): 30 | """Test if a SMARTS string can be converted to an RDKit reaction.""" 31 | try: 32 | rdChemReactions.ReactionFromSmarts(sma) 33 | except Exception as e: 34 | logger.error(f"Error processing SMARTS: {sma}\n{e}") 35 | 36 | 37 | def add_prop(df, ref_smi): 38 | mol = Chem.MolFromSmiles(ref_smi) 39 | mol_weight_ref = CalcExactMolWt(mol) 40 | fsp3_ref = CalcFractionCSP3(mol) 41 | ring_num_ref = CalcNumRings(mol) 42 | logp_ref = Descriptors.MolLogP(mol) 43 | chiral_num_ref = len(FindMolChiralCenters(mol, includeUnassigned=True)) 44 | 45 | for index, row in df.iterrows(): 46 | sma = row['SMARTS'] 47 | try: 48 | rxn = rdChemReactions.ReactionFromSmarts(sma) 49 | products = rxn.RunReactants((mol,)) 50 | new = Chem.MolFromSmiles(Chem.MolToSmiles(products[0][0])) 51 | mol_weight = CalcExactMolWt(new) 52 | df.at[index, 'ΔMW'] = mol_weight - mol_weight_ref 53 | fsp3 = CalcFractionCSP3(new) 54 | df.at[index, 'ΔFsp3'] = fsp3 - fsp3_ref 55 | ring_num = CalcNumRings(new) 56 | df.at[index, 'ΔNR'] = ring_num - ring_num_ref 57 | logp = Descriptors.MolLogP(mol) 58 | df.at[index, 'ΔlogP'] = logp - logp_ref 59 | chiral_num = len(FindMolChiralCenters(new, includeUnassigned=True)) 60 | df.at[index, 'ΔNCC'] = chiral_num - chiral_num_ref 61 | except Exception as e: 62 | logger.error(e) 63 | logger.error(sma) 64 | return df 65 | 66 | 67 | def process_sheet(sheet_df, ref_smi): 68 | """Process a sheet DataFrame to calculate properties and test reactions.""" 69 | for sma in sheet_df['SMARTS']: 70 | test_rxn(sma) 71 | add_prop(sheet_df, ref_smi) 72 | return sheet_df 73 | 74 | 75 | def main(excel_filename, output_type): 76 | """Main function to convert Excel to DB or JSON based on user input.""" 77 | pattern = r'^[A-Za-z]-\d{3}$' 78 | collect_df = [] 79 | db_name = f"{os.path.splitext(excel_filename)[0]}.db" if output_type == 'db' else None 80 | 81 | xls = pd.ExcelFile(excel_filename) 82 | for sheet_name in xls.sheet_names: 83 | if re.match(pattern, sheet_name): 84 | logger.info(f"Processing sheet: {sheet_name}") 85 | sheet_df = read_excel(excel_filename, sheet_name) 86 | # sheet_df = process_sheet(sheet_df, ref_smi='YourReferenceSMILES') 87 | 88 | if output_type == 'db': 89 | write_to_sqlite(sheet_df, sheet_name, db_name) 90 | else: 91 | if sheet_name == "G-002": 92 | new_df = sheet_df[['Rule ID', 'SMARTS', 'Spacer Priority', 'Ring Priority']] 93 | else: 94 | new_df = sheet_df[['Rule ID', 'SMARTS', 'Priority']] # Adjust columns as needed 95 | collect_df.append(new_df) 96 | logger.info(f"Finished processing {output_type} for {sheet_name}") 97 | 98 | if output_type == 'json': 99 | combined_df = pd.concat(collect_df, ignore_index=True) 100 | output_filename = f"{os.path.splitext(excel_filename)[0]}.json" 101 | write_to_json(combined_df, output_filename) 102 | logger.info("All sheets processed and JSON file created.") 103 | 104 | 105 | if __name__ == "__main__": 106 | excel_filename = input("Enter the Excel file name: ") 107 | output_type = input("Choose the output type (db for database, json for JSON file): ") 108 | if output_type not in ['db', 'json']: 109 | logger.error("Invalid output type. Please choose 'db' for database or 'json' for JSON file.") 110 | else: 111 | main(excel_filename, output_type) 112 | -------------------------------------------------------------------------------- /secse/run_secse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: run_secse.py 6 | @time: 2020/11/02/13:35 7 | """ 8 | import argparse 9 | import time 10 | import configparser 11 | from loguru import logger 12 | from datetime import datetime 13 | from pathlib import Path 14 | 15 | from grow_processes import Grow 16 | from report.grow_path import write_growth 17 | 18 | 19 | def setup_logger(project_code, work_directory): 20 | # Ensure work_directory is a Path object for compatibility 21 | if not isinstance(work_directory, Path): 22 | work_directory = Path(work_directory) 23 | 24 | # timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 25 | log_file_path = work_directory / f'{project_code}.log' 26 | error_file_path = work_directory / f'{project_code}_error.log' 27 | 28 | logger.add(log_file_path, rotation="10 MB", backtrace=True, diagnose=True, level="INFO", mode='a') 29 | logger.add(error_file_path, rotation="5 MB", backtrace=True, diagnose=True, level="ERROR", mode='a') 30 | return logger 31 | 32 | 33 | def main(): 34 | parser = argparse.ArgumentParser(description="SECSE") 35 | 36 | parser.add_argument("--config", help="path of config file", default=False) 37 | args = parser.parse_args() 38 | 39 | try: 40 | # config file given 41 | config = configparser.ConfigParser() 42 | config.read(args.config) 43 | project_code = config.get("general", "project_code") 44 | workdir = config.get("general", "workdir") 45 | 46 | setup_logger(project_code, workdir) 47 | 48 | num_gen = config.getint("general", "num_gen") 49 | mols_smi = config.get("general", "fragments") 50 | 51 | num_per_gen = config.getint("general", "num_per_gen") 52 | start_gen = config.getint("general", "start_gen") 53 | docking_program = config.get("docking", "docking_program") 54 | cpu_num = config.getint("general", "cpu") 55 | gpu_num = config.getint("general", "gpu") 56 | rule_db = config.get("general", "rule_db") 57 | 58 | receptor = config.get("docking", "target") 59 | dl_mode = config.getint("prediction", "mode") 60 | if "vina" in docking_program.lower() or "unidock" in docking_program.lower(): 61 | x = config.getfloat("docking", "x") 62 | y = config.getfloat("docking", "y") 63 | z = config.getfloat("docking", "z") 64 | box_size_x = config.getfloat("docking", "box_size_x") 65 | box_size_y = config.getfloat("docking", "box_size_y") 66 | box_size_z = config.getfloat("docking", "box_size_z") 67 | 68 | 69 | except Exception as e: 70 | logger.error("Please check your input arguments.") 71 | return None 72 | 73 | if "vina" in docking_program.lower(): 74 | workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode, 75 | args.config, cpu_num=cpu_num, rule_db=rule_db, project_code=project_code, x=x, y=y, z=z, 76 | box_size_x=box_size_x, box_size_y=box_size_y, box_size_z=box_size_z) 77 | elif "glide" in docking_program.lower(): 78 | workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode, 79 | args.config, cpu_num=cpu_num, rule_db=rule_db, project_code=project_code) 80 | elif "autodock-gpu" in docking_program.lower(): 81 | workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode, 82 | args.config, cpu_num=cpu_num, gpu_num=gpu_num, rule_db=rule_db, project_code=project_code) 83 | elif "unidock" in docking_program.lower(): 84 | workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode, 85 | args.config, cpu_num=cpu_num, rule_db=rule_db, project_code=project_code, x=x, y=y, z=z, 86 | box_size_x=box_size_x, box_size_y=box_size_y, box_size_z=box_size_z) 87 | else: 88 | logger.error("Please check your input docking program argument.") 89 | return None 90 | workflow.grow() 91 | 92 | 93 | if __name__ == '__main__': 94 | time1 = time.time() 95 | logger.info( 96 | "\n" 97 | + "*" * 88 + "\n" 98 | " ____ _____ ____ ____ _____ \n" 99 | " / ___| | ____| / ___| / ___| | ____|\n" 100 | " \\___ \\ | _| | | \\___ \\ | _| \n" 101 | " ___) | | |___ | |___ ___) | | |___ \n" 102 | " |____/ |_____| \\____| |____/ |_____| v1.3\n" 103 | + "*" * 88 104 | ) 105 | 106 | try: 107 | main() 108 | except SystemExit as err: 109 | logger.info(f"Program exited with status: {err}") 110 | except KeyboardInterrupt: 111 | logger.info("Program interrupted by user") 112 | except Exception as e: 113 | logger.error("An unexpected error occurred", exc_info=True) 114 | 115 | time2 = time.time() 116 | logger.info("Time consumption (total): {} hours".format(round((time2 - time1) / 3600, 2))) 117 | -------------------------------------------------------------------------------- /secse/scoring/docking_score_prediction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: docking_score_prediction.py 6 | @time: 2021/10/27/14:26 7 | """ 8 | import argparse 9 | 10 | from openbabel import openbabel 11 | import pandas as pd 12 | import os 13 | import rdkit 14 | from rdkit import Chem 15 | from rdkit.Chem import PandasTools 16 | from rdkit.Chem.MolStandardize import rdMolStandardize 17 | from tqdm import tqdm 18 | from utilities.function_helper import shell_cmd_execute 19 | from loguru import logger 20 | rdkit.RDLogger.DisableLog("rdApp.*") 21 | 22 | 23 | def get_train(sdf, dock): 24 | g = PandasTools.LoadSDF(sdf, molColName='Molecule') 25 | 26 | g_smi = pd.read_csv(dock, sep="\t", header=None) 27 | g_smi.columns = ["Smiles", "ID"] 28 | g_smi = g_smi.drop_duplicates(subset="ID") 29 | g_smi = g_smi.set_index("ID") 30 | 31 | g = g[["ID", "Molecule", "docking score"]] 32 | g["docking score"] = g["docking score"].astype(float) 33 | g = g.sort_values("docking score", ascending=True) 34 | 35 | g["Smiles"] = g["ID"].apply(lambda x: g_smi.loc[x.rsplit("-C", 1)[0]][0]) 36 | g_new = g.sort_values(by="docking score", ascending=True).drop_duplicates(subset="Smiles", keep="first") 37 | 38 | smi = g_new["Smiles"].apply(lambda x: neutralize(x)) 39 | g_new["Smiles"] = smi 40 | g_new = g_new.drop_duplicates(subset="Smiles", keep="first") 41 | return g_new 42 | 43 | 44 | def get_pre(workdir, max_gen, get_all=False): 45 | pre_dir = os.path.join(workdir, "prediction") 46 | if get_all: 47 | pre_raw = os.path.join(pre_dir, "all_G" + str(max_gen) + "_for_pre.raw") 48 | pre_file = os.path.join(pre_dir, "all_G" + str(max_gen) + "_for_pre.csv") 49 | 50 | cmd_cat = ["find", workdir, "-name \"filter.csv\" |xargs awk -F, 'FNR>1{{print $(NF-5)\",\"$(NF-6)}}' >", 51 | pre_raw] 52 | shell_cmd_execute(cmd_cat) 53 | cmd_dedup = ["awk -F',' '!seen[$2]++'", pre_raw, ">", pre_file] 54 | shell_cmd_execute(cmd_dedup) 55 | 56 | drop_mols = os.path.join(pre_dir, "drop_ids.txt") 57 | mols_id_cat = ["find", workdir, "-name \"mols_for_docking.smi\" |xargs cut -f2 >", drop_mols] 58 | shell_cmd_execute(mols_id_cat) 59 | final_file = os.path.join(pre_dir, "all_G" + str(max_gen) + "_for_pre_uniq.csv") 60 | else: 61 | pre_file = os.path.join(pre_dir, "gen_" + str(max_gen) + "_for_pre.csv") 62 | cmd_cp = ["awk -F, 'NR>1{{print $(NF-5)\",\"$(NF-6)}}'", 63 | os.path.join(workdir, "generation_" + str(max_gen), "filter.csv"), ">", pre_file] 64 | shell_cmd_execute(cmd_cp) 65 | 66 | drop_mols = os.path.join(pre_dir, "drop_ids_{}.txt".format(max_gen)) 67 | mols_id_cat = ["cut -f2", os.path.join(workdir, "generation_" + str(max_gen), "mols_for_docking.smi"), ">", 68 | drop_mols] 69 | shell_cmd_execute(mols_id_cat) 70 | final_file = os.path.join(pre_dir, "gen_" + str(max_gen) + "_for_pre_uniq.csv") 71 | 72 | try: 73 | cmd_drop = ["grep -wvf", drop_mols, pre_file, ">", final_file] 74 | shell_cmd_execute(cmd_drop) 75 | except: 76 | final_file = None 77 | return final_file 78 | 79 | 80 | def neutralize(smi): 81 | mol = Chem.MolFromSmiles(smi) 82 | if mol is None: 83 | smi = wash_mol(smi) 84 | mol = Chem.MolFromSmiles(smi) 85 | if mol is None: 86 | return "C" 87 | uc = rdMolStandardize.Uncharger() 88 | return Chem.MolToSmiles(uc.uncharge(mol)) 89 | 90 | 91 | def wash_mol(smi): 92 | ob_conversion = openbabel.OBConversion() 93 | ob_conversion.SetInAndOutFormats("smi", "can") 94 | ob_mol = openbabel.OBMol() 95 | ob_conversion.ReadString(ob_mol, smi) 96 | ob_conversion.Convert() 97 | res = ob_conversion.WriteString(ob_mol).strip() 98 | return res 99 | 100 | 101 | def prepare_files(max_gen, workdir, dl_mode): 102 | pre_dir = os.path.join(workdir, "prediction") 103 | os.makedirs(pre_dir, exist_ok=True) 104 | 105 | def pre_train_per_gen(gen): 106 | sdf = os.path.join(workdir, "generation_{}/docking_outputs_with_score.sdf".format(gen)) 107 | dock = os.path.join(workdir, "generation_{}/mols_for_docking.smi".format(gen)) 108 | df_train = get_train(sdf, dock)[['Smiles', 'docking score']] 109 | # write per generation 110 | df_train.to_csv(os.path.join(pre_dir, "train_G{}.csv".format(gen)), index=False) 111 | return df_train 112 | 113 | if dl_mode == 1: 114 | # prepare current generation data 115 | pre_train_per_gen(max_gen) 116 | train = os.path.join(pre_dir, "train_G{}.csv".format(max_gen)) 117 | pre = get_pre(workdir, max_gen, False) 118 | return train, pre 119 | 120 | elif dl_mode == 2: 121 | # prepare files for all the generation and merge together 122 | cum_path = os.path.join(pre_dir, "train_G" + str(max_gen) + "_all.csv") 123 | df_lst = [] 124 | for i in tqdm(range(1, max_gen + 1)): 125 | df = pre_train_per_gen(i) 126 | # write cumulative dataframe 127 | df_lst.append(df) 128 | 129 | df_all = pd.concat(df_lst, axis=0).sort_values( 130 | by="docking score", ascending=True).drop_duplicates(subset="Smiles", keep="first") 131 | df_all.to_csv(cum_path, index=False) 132 | pre = get_pre(workdir, max_gen, True) 133 | return cum_path, pre 134 | 135 | 136 | if __name__ == '__main__': 137 | parser = argparse.ArgumentParser(description="SCESE -- Prepare Data for Deep Learning") 138 | parser.add_argument("max_gen", help="Max number of generation.", type=int) 139 | parser.add_argument("workdir", help="Workdir") 140 | parser.add_argument("dl_mode", 141 | help="Mode of deep learning modeling, 1: modeling per generation, 2: modeling overall after all the generation", 142 | type=int, default=0) 143 | args = parser.parse_args() 144 | prepare_files(args.max_gen, args.workdir, args.dl_mode) 145 | -------------------------------------------------------------------------------- /secse/evaluate/docking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: docking.py 6 | @time: 2021/9/6/11:22 7 | """ 8 | import argparse 9 | import os 10 | import shutil 11 | import glob 12 | import sys 13 | from loguru import logger 14 | from rdkit import Chem 15 | from rdkit.Chem import AllChem 16 | from utilities.function_helper import shell_cmd_execute 17 | 18 | sys.path.append(os.getenv("SECSE")) 19 | 20 | VINA_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_vina_parallel.sh") 21 | AUTODOCK_GPU_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_autodock_gpu.sh") 22 | UNIDOCK_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_unidock.sh") 23 | 24 | 25 | def dock_by_py_vina(workdir, smi, receptor, cpu_num, x, y, z, box_size_x=20, box_size_y=20, box_size_z=20): 26 | cmd = list(map(str, [VINA_SHELL, workdir, smi, receptor, x, y, z, box_size_x, box_size_y, box_size_z, cpu_num])) 27 | shell_cmd_execute(cmd) 28 | merged_sdf(workdir, 0) 29 | 30 | 31 | def dock_by_py_autodock_gpu(workdir, smi, receptor, cpu_num, gpu_num): 32 | cmd = list(map(str, [AUTODOCK_GPU_SHELL, workdir, smi, receptor, cpu_num, gpu_num])) 33 | shell_cmd_execute(cmd) 34 | merged_sdf(workdir, 1) 35 | 36 | 37 | def dock_by_unidock(workdir, smi, receptor, cpu_num, x, y, z, box_size_x=20, box_size_y=20, box_size_z=20): 38 | if not os.environ.get("UNIDOCK"): 39 | os.environ["UNIDOCK"] = "unidock" 40 | cmd = list(map(str, [UNIDOCK_SHELL, workdir, smi, receptor, x, y, z, box_size_x, box_size_y, box_size_z, cpu_num])) 41 | shell_cmd_execute(cmd) 42 | for res_file in glob.glob(os.path.join(workdir, "pdb_files", "*.pdb")): 43 | new_name = os.path.basename(res_file).replace("_out", "") 44 | os.rename(res_file, os.path.join(workdir, "pdb_files", new_name)) 45 | merged_sdf(workdir, 2) 46 | 47 | 48 | def merged_sdf(workdir, program): 49 | # modify output sdf 50 | check_mols(workdir, program) 51 | out_sdf = os.path.join(workdir, "docking_outputs_with_score.sdf") 52 | cmd_cat = ["find", os.path.join(workdir, "sdf_files"), "-name \"*sdf\" | xargs -n 100 cat >", out_sdf] 53 | shell_cmd_execute(cmd_cat) 54 | # remove temporary files 55 | shutil.rmtree(os.path.join(workdir, "pdb_files")) 56 | shutil.rmtree(os.path.join(workdir, "ligands_for_docking")) 57 | shutil.rmtree(os.path.join(workdir, "docking_poses")) 58 | shutil.rmtree(os.path.join(workdir, "docking_split")) 59 | 60 | 61 | def check_mols(workdir, program): 62 | files = os.listdir(os.path.join(workdir, "pdb_files")) 63 | for i in files: 64 | raw_id = i.rsplit("-dp", 1)[0] 65 | pdb_path = os.path.join(workdir, "pdb_files", i) 66 | sdf_path = os.path.join(workdir, "sdf_files", i.replace("pdb", "sdf")) 67 | raw_mol = Chem.SDMolSupplier(os.path.join(workdir, "ligands_for_docking", raw_id + ".sdf"))[0] 68 | mol = AllChem.MolFromPDBFile(pdb_path, removeHs=True) 69 | if mol: 70 | try: 71 | new = AllChem.AssignBondOrdersFromTemplate(raw_mol, mol) 72 | except ValueError: 73 | logger.error("Failed check: ", i) 74 | continue 75 | new = Chem.AddHs(new, addCoords=True) 76 | Chem.MolToMolFile(new, sdf_path) 77 | if program == 0 or program == 2: 78 | with open(pdb_path, "r") as pdb: 79 | for line in pdb.readlines(): 80 | if line.startswith("REMARK VINA RESULT"): 81 | score = line.split(":")[1][:10].replace(" ", "") 82 | with open(sdf_path, "a") as sdf: 83 | newline = "\n".join(["> ", score, "\n$$$$\n"]) 84 | sdf.write(newline) 85 | elif program == 1: 86 | with open(pdb_path, "r") as pdb: 87 | for line in pdb.readlines(): 88 | if "kcal" in line: 89 | score = line.split("kcal")[0].replace(" ", "")[6:] 90 | with open(sdf_path, "a") as sdf: 91 | newline = "\n".join(["> ", score, "\n$$$$\n"]) 92 | sdf.write(newline) 93 | 94 | 95 | if __name__ == '__main__': 96 | parser = argparse.ArgumentParser(description="Run Open-source Docking Program for SMILES Format.") 97 | parser.add_argument("program", help="Name of docking program, input vina or autodock-gpu", type=str) 98 | parser.add_argument("workdir", help="Workdir") 99 | parser.add_argument("mols_smi", help="Seed fragments") 100 | parser.add_argument("receptor", help="Target PDBQT") 101 | 102 | parser.add_argument("cpu_num", help="Number of CPU cores") 103 | 104 | parser.add_argument("--gpu_num", help="Number of GPUs") 105 | parser.add_argument("--x", help="Docking box x", type=float) 106 | parser.add_argument("--y", help="Docking box y", type=float) 107 | parser.add_argument("--z", help="Docking box z", type=float) 108 | 109 | parser.add_argument("--box_size_x", help="Docking box size x, default 20", type=float, default=20) 110 | parser.add_argument("--box_size_y", help="Docking box size y, default 20", type=float, default=20) 111 | parser.add_argument("--box_size_z", help="Docking box size z, default 20", type=float, default=20) 112 | 113 | args = parser.parse_args() 114 | if args.program == "vina": 115 | logger.info("Docking by Autodock Vina with {} CPUs...".format(args.cpu_num)) 116 | dock_by_py_vina(args.workdir, args.mols_smi, args.receptor, args.cpu_num, args.x, args.y, args.z, 117 | args.box_size_x, args.box_size_y, args.box_size_z) 118 | elif args.program == "autodock-gpu": 119 | logger.info("Docking by Autodock-GPU with {} CPUs and {} GPUs...".format(args.cpu_num, args.gpu_num)) 120 | dock_by_py_autodock_gpu(args.workdir, args.mols_smi, args.receptor, args.cpu_num, args.gpu_num) 121 | else: 122 | logger.error("Please choose a docking program.") 123 | -------------------------------------------------------------------------------- /secse/utilities/ring_tool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: ring tool.py 6 | @time: 2021/02/07/14:17 7 | """ 8 | from rdkit import Chem 9 | from loguru import logger 10 | 11 | 12 | def ring_site_count(ring_atoms, systems): 13 | site_count = [-1] # add -1 in case no ring site 14 | for ring_s in systems: 15 | ring_s = set(ring_s) 16 | count = 0 17 | for site in ring_atoms: 18 | site = set(site) 19 | if ring_s.intersection(site): 20 | count += 1 21 | site_count.append(count) 22 | return site_count 23 | 24 | 25 | class RingSystems(object): 26 | def __init__(self, mol): 27 | self.mol = mol 28 | self.ri = self.mol.GetRingInfo() 29 | self.atom_rings = self.ri.AtomRings() 30 | self.bond_rings = self.ri.BondRings() 31 | self.systems = self.ring_systems() 32 | 33 | def ring_systems(self): 34 | systems = [] 35 | for ring in self.atom_rings: 36 | ringAts = set(ring) 37 | nSystems = [] 38 | for system in systems: 39 | nInCommon = len(ringAts.intersection(system)) 40 | if nInCommon: 41 | ringAts = ringAts.union(system) 42 | else: 43 | nSystems.append(system) 44 | nSystems.append(ringAts) 45 | systems = nSystems 46 | return systems 47 | 48 | # ring size of each ring system 49 | def ring_systems_size(self): 50 | ring_sys_size = [] 51 | for ring_s in self.systems: 52 | ring_s = set(ring_s) 53 | size = 0 54 | for ring in self.atom_rings: 55 | ring = set(ring) 56 | if ring_s.intersection(ring): 57 | size += 1 58 | ring_sys_size.append(size) 59 | return ring_sys_size 60 | 61 | def get_spiro_atoms(self): 62 | spiro = [] 63 | spiro_atoms = set() 64 | for i in range(len(self.atom_rings)): 65 | atom_ring_i = set(self.atom_rings[i]) 66 | for j in range(i): 67 | atom_ring_j = set(self.atom_rings[j]) 68 | common_atoms = atom_ring_i.intersection(atom_ring_j) 69 | if len(common_atoms) == 1: 70 | atoms = [0] * len(self.mol.GetAtoms()) 71 | for a in common_atoms: 72 | atoms[a] += 1 73 | 74 | for idx in range(len(atoms)): 75 | if atoms[idx] == 1: 76 | spiro = (idx,) 77 | spiro_atoms.add(spiro) 78 | return spiro_atoms 79 | 80 | def get_fused_atoms(self): 81 | fused_atoms = set() 82 | 83 | for i in range(len(self.bond_rings)): 84 | bond_ring_i = set(self.bond_rings[i]) 85 | for j in range(i): 86 | bond_ring_j = set(self.bond_rings[j]) 87 | common_bonds = bond_ring_i.intersection(bond_ring_j) 88 | if len(common_bonds) == 1: 89 | atoms = [0] * len(self.mol.GetAtoms()) 90 | fused_unit = () 91 | 92 | for b in common_bonds: 93 | atoms[self.mol.GetBondWithIdx(b).GetBeginAtomIdx()] += 1 94 | atoms[self.mol.GetBondWithIdx(b).GetEndAtomIdx()] += 1 95 | for idx in range(len(atoms)): 96 | if atoms[idx] == 1: 97 | fused_unit += (idx,) 98 | fused_atoms.add(fused_unit) 99 | 100 | return fused_atoms 101 | 102 | def get_bridged_atoms(self): 103 | bridged_atoms = set() 104 | 105 | for i in range(len(self.bond_rings)): 106 | bond_ring_i = set(self.bond_rings[i]) 107 | for j in range(i): 108 | bond_ring_j = set(self.bond_rings[j]) 109 | common_bonds = bond_ring_i.intersection(bond_ring_j) 110 | 111 | if len(common_bonds) > 1: 112 | atoms = [0] * len(self.mol.GetAtoms()) 113 | bridged_unit = () 114 | for b in common_bonds: 115 | atoms[self.mol.GetBondWithIdx(b).GetBeginAtomIdx()] += 1 116 | atoms[self.mol.GetBondWithIdx(b).GetEndAtomIdx()] += 1 117 | for idx in range(len(atoms)): 118 | if atoms[idx] == 1: 119 | bridged_unit += (idx,) 120 | bridged_atoms.add(bridged_unit) 121 | return bridged_atoms 122 | 123 | def spiro_site_count(self): 124 | return ring_site_count(self.get_spiro_atoms(), self.systems) 125 | 126 | def bridged_site_count(self): 127 | return ring_site_count(self.get_bridged_atoms(), self.systems) 128 | 129 | def fused_site_count(self): 130 | return ring_site_count(self.get_fused_atoms(), self.systems) 131 | 132 | def ring_system_count_filter(self, num=4): 133 | return len(self.systems) <= num 134 | 135 | def largest_ring_system_size_filter(self, num=3): 136 | return max(self.ring_systems_size() + [-1]) <= num 137 | 138 | def largest_spiro_site_filter(self, num=1): 139 | return max(self.spiro_site_count()) <= num 140 | 141 | def largest_fused_site_filter(self, num=3): 142 | return max(self.fused_site_count()) <= num 143 | 144 | def largest_bridged_site_filter(self, num=2): 145 | return max(self.bridged_site_count()) <= num 146 | 147 | def bridged_atom_is_aromatic_filter(self): 148 | bridged_atoms = self.get_bridged_atoms() 149 | for atom_cubic in bridged_atoms: 150 | for atom_idx in atom_cubic: 151 | atom = self.mol.GetAtomWithIdx(atom_idx) 152 | if atom.GetIsAromatic(): 153 | return False 154 | return True 155 | 156 | def ring_check(self, rssc, bsc, ssc, fsc, rsc): 157 | return all([self.largest_ring_system_size_filter(rssc), 158 | self.largest_bridged_site_filter(bsc), 159 | self.largest_spiro_site_filter(ssc), 160 | self.largest_fused_site_filter(fsc), 161 | self.ring_system_count_filter(rsc), 162 | self.bridged_atom_is_aromatic_filter()]) 163 | 164 | 165 | if __name__ == '__main__': 166 | mol = Chem.MolFromSmiles("C1(C2)CC(CC3)NC3CC2C1") 167 | ringcheck = RingSystems(mol) 168 | logger.info("Not Pass Filter" if not ringcheck.ring_check() else "Pass Filter") 169 | logger.info(ringcheck.ring_systems_size()) 170 | -------------------------------------------------------------------------------- /secse/evaluate/ligprep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Liu Shien 5 | @file: ligprep.py 6 | @time: 2021/4/1/16:28 7 | @modify: 2022/3/1/12:04 8 | @modify: 2023/5/5/14:22 9 | """ 10 | import argparse 11 | import os 12 | import sys 13 | import rdkit 14 | from loguru import logger 15 | from rdkit import Chem 16 | from rdkit.Chem import AllChem 17 | from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions 18 | from rdkit.Chem.MolStandardize import rdMolStandardize 19 | from rdkit.Chem import rdDistGeom 20 | from rdkit.Chem import rdMolAlign 21 | from openbabel import pybel 22 | from openbabel import openbabel as ob 23 | 24 | sys.path.append(os.getenv("SECSE")) 25 | from utilities.wash_mol import charge_mol 26 | 27 | rdkit.RDLogger.DisableLog("rdApp.*") 28 | 29 | 30 | def setero(mol, onlyUnassigned=True): 31 | if onlyUnassigned: 32 | opts = StereoEnumerationOptions(tryEmbedding=True) 33 | else: 34 | opts = StereoEnumerationOptions(tryEmbedding=True, onlyUnassigned=False) 35 | isomers = tuple(EnumerateStereoisomers(mol, options=opts)) 36 | res = [] 37 | if len(isomers) > 1: 38 | for idx, tmp in enumerate(isomers): 39 | name = tmp.GetProp("_Name") + "-CC" + str(idx) 40 | tmp.SetProp("_Name", name) 41 | res.append(tmp) 42 | return res 43 | else: 44 | return list(isomers) 45 | 46 | 47 | def tau(mol, can=True): 48 | params = rdMolStandardize.CleanupParameters() 49 | params.tautomerRemoveSp3Stereo = False 50 | params.tautomerRemoveBondStereo = False 51 | params.maxTautomers = 1000 52 | params.maxTransforms = 10000 53 | enumerator = rdMolStandardize.TautomerEnumerator(params) 54 | try: 55 | canon = enumerator.Canonicalize(mol) 56 | except Exception as e: 57 | logger.error(e) 58 | return [mol] 59 | 60 | if can: 61 | return [canon] 62 | csmi = Chem.MolToSmiles(canon) 63 | res = [canon] 64 | tauts = enumerator.Enumerate(mol) 65 | smis = [Chem.MolToSmiles(x) for x in tauts] 66 | stpl = sorted((x, y) for x, y in zip(smis, tauts) if x != csmi) 67 | res += [y for x, y in stpl] 68 | 69 | new = [] 70 | for idx, tmp in enumerate(res): 71 | name = tmp.GetProp("_Name") + "-CT" + str(idx) 72 | tmp.SetProp("_Name", name) 73 | new.append(tmp) 74 | 75 | return new 76 | 77 | 78 | def to_3D(mol): 79 | mol = Chem.AddHs(mol) 80 | AllChem.EmbedMolecule(mol, useExpTorsionAnglePrefs=True, useBasicKnowledge=True, maxAttempts=10000, 81 | useRandomCoords=True) 82 | if mol.GetNumConformers() > 0: 83 | AllChem.UFFOptimizeMolecule(mol, 200, 10.0, -1) 84 | return mol 85 | else: 86 | return None 87 | 88 | 89 | def gen_minimized_3D(path, rdmol, numConformer=1, rms_cutoff=1, addH=True): 90 | name = rdmol.GetProp("_Name") 91 | sdf_path = os.path.join(path, name + ".sdf") 92 | writer = Chem.SDWriter(sdf_path) 93 | if addH: 94 | rdmol = Chem.AddHs(rdmol, addCoords=True) 95 | 96 | param = rdDistGeom.ETKDGv2() 97 | param.pruneRmsThresh = rms_cutoff 98 | cids = rdDistGeom.EmbedMultipleConfs(rdmol, 50, param) 99 | mp = AllChem.MMFFGetMoleculeProperties(rdmol, mmffVariant='MMFF94s') 100 | AllChem.MMFFOptimizeMoleculeConfs(rdmol, numThreads=0, mmffVariant='MMFF94s') 101 | res = [] 102 | for cid in cids: 103 | ff = AllChem.MMFFGetMoleculeForceField(rdmol, mp, confId=cid) 104 | # ff.Initialize() 105 | ff.Minimize() 106 | e = ff.CalcEnergy() 107 | res.append((cid, e)) 108 | sorted_res = sorted(res, key=lambda x: x[1]) 109 | rdMolAlign.AlignMolConformers(rdmol) 110 | if len(sorted_res) > numConformer: 111 | selected = numConformer 112 | else: 113 | selected = len(sorted_res) 114 | # new = Chem.Mol(rdmol) 115 | # new.RemoveAllConformers() 116 | # min_conf = rdmol.GetConformer(sorted_res[0][0]) 117 | # new.AddConformer(min_conf) 118 | for i in range(selected): 119 | cid = sorted_res[i][0] 120 | writer.write(rdmol, cid) 121 | writer.close() 122 | 123 | return sdf_path 124 | 125 | 126 | def ionization(smi_string): 127 | return charge_mol(smi_string) 128 | 129 | 130 | def sdf2pdbqt(sdf_path): 131 | path = os.path.dirname(sdf_path) 132 | name = os.path.basename(sdf_path).split(".")[0] 133 | num = 0 134 | for mol in pybel.readfile("sdf", sdf_path): 135 | mol.write("pdbqt", "{}.pdbqt".format(os.path.join(path, name)), overwrite=True) 136 | num += 1 137 | 138 | return num == 1 139 | 140 | 141 | class LigPrep: 142 | def __init__(self, infile, workdir): 143 | self.infile = infile 144 | self.workdir = workdir 145 | self.mol_dict = {} 146 | 147 | def parse_infile(self): 148 | with open(self.infile, "r") as inf: 149 | for line in inf: 150 | tmp = line.strip().split() 151 | if len(tmp) < 2: 152 | continue 153 | smi = tmp[0] 154 | id1 = tmp[1] 155 | smi = ionization(smi) 156 | 157 | mol = Chem.MolFromSmiles(smi) 158 | if mol is None: 159 | continue 160 | mol.SetProp("_Name", id1) 161 | self.mol_dict[id1] = mol 162 | 163 | def process(self, des): 164 | dirc_name = "ligands_for_" + des 165 | path = os.path.join(self.workdir, dirc_name) 166 | os.makedirs(path, exist_ok=True) 167 | 168 | self.parse_infile() 169 | for gid in self.mol_dict: 170 | mol = self.mol_dict[gid] 171 | mystereo = setero(mol) 172 | 173 | mytau = [] 174 | for stereo in mystereo: 175 | tmp = tau(stereo) 176 | mytau += tmp 177 | 178 | for newmol in mytau: 179 | if newmol is not None: 180 | try: 181 | if des == 'docking': 182 | sdf_path = gen_minimized_3D(path, newmol) 183 | sdf2pdbqt(sdf_path) 184 | if des == 'shape': 185 | gen_minimized_3D(path, newmol, 10) 186 | except Exception as e: 187 | logger.error(e) 188 | continue 189 | 190 | 191 | if __name__ == '__main__': 192 | parser = argparse.ArgumentParser(description="LigPrep @dalong") 193 | parser.add_argument("workdir", help="Workdir") 194 | parser.add_argument("mols_smi", help="Seed fragments") 195 | parser.add_argument("--mode", 196 | help="1: prepare pdbqt file for docking input; 2: prepare sdf file for shape based screening.", 197 | type=int, default=1) 198 | 199 | args = parser.parse_args() 200 | lig = LigPrep(args.mols_smi, args.workdir) 201 | lig.parse_infile() 202 | if args.mode == 1: 203 | lig.process(des="docking") 204 | elif args.mode == 2: 205 | lig.process(des="shape") 206 | -------------------------------------------------------------------------------- /secse/report/grow_path.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: grow_path.py 6 | @time: 2021/01/19/13:42 7 | """ 8 | import os 9 | import sys 10 | import time 11 | import argparse 12 | import numpy as np 13 | import pandas as pd 14 | from rdkit import Chem 15 | from rdkit.Chem import Descriptors 16 | from rdkit.Chem.rdMolDescriptors import CalcExactMolWt 17 | from pandarallel import pandarallel 18 | import configparser 19 | from loguru import logger 20 | 21 | sys.path.append(os.getenv("SECSE")) 22 | from scoring.ranking import read_dock_file 23 | from utilities.function_helper import shell_cmd_execute 24 | 25 | pandarallel.initialize(verbose=0) 26 | SELECT_SDF_SHELL = os.path.join(os.getenv("SECSE"), "report", "filter_sdf_by_titles.pl") 27 | 28 | 29 | def cal_mutation_dic(workdir, max_gen): 30 | mut_dic_all = dict() 31 | 32 | while max_gen > 0: 33 | mut_file = os.path.join(workdir, "generation_" + str(max_gen), "filter.csv") 34 | logger.info(mut_file) 35 | with open(mut_file, "r") as f: 36 | lines = f.readlines() 37 | lines = [i.strip().split(",") for i in lines] 38 | mut_dic = {i[-6].split("-dp")[0].split("-C")[0]: [i[0], i[1].split("-dp")[0].split("-C")[0], i[-5], i[-4]] for i 39 | in lines} 40 | mut_dic_all["gen" + str(max_gen)] = mut_dic 41 | max_gen -= 1 42 | return mut_dic_all 43 | 44 | 45 | def merge_multi_generation(workdir, max_gen, file_path, dl_mode, config_path): 46 | df_lst = [pd.read_csv(os.path.join(workdir, "generation_" + str(i), 47 | "docked_gen_" + str(i) + ".csv")) for i in range(1, max_gen + 1)] 48 | if dl_mode == 2: 49 | dl_df = read_dock_file(os.path.join(workdir, "generation_{}_pre".format(max_gen), 50 | "docking_outputs_with_score.sdf")) 51 | dl_df["le_ln"] = dl_df.apply( 52 | lambda x: x["docking score"] / Chem.MolFromSmiles(x["smiles"]).GetNumHeavyAtoms(), 53 | axis=1) 54 | dl_df.columns = [i.lower() for i in list(dl_df.columns)] 55 | dl_df = dl_df.drop(columns=["molecule"]) 56 | dl_df = dl_df.reindex(columns=df_lst[0].columns) 57 | config = configparser.ConfigParser() 58 | config.read(config_path) 59 | score_cutoff = config.getfloat("prediction", "dl_score_cutoff") 60 | dl_df = dl_df[dl_df["docking score"] < score_cutoff] 61 | df_lst.append(dl_df) 62 | 63 | final_df = pd.concat(df_lst, axis=0).drop_duplicates(subset=["smiles"]) 64 | final_df.to_csv(file_path, index=False) 65 | return final_df 66 | 67 | 68 | def grow_path(mut_dic_all, mut_id): 69 | mut_id = mut_id.split("-dp")[0].split("-C")[0] 70 | try: 71 | gen_mol = int(mut_id.split("_")[-3]) 72 | except IndexError: 73 | logger.error(f"Index error: {mut_id}") 74 | return None 75 | mut_info_lst = [] 76 | 77 | while gen_mol > 0: 78 | mut_info = mut_dic_all["gen" + str(gen_mol)][mut_id] 79 | if "." in mut_info[2]: 80 | gen_mol -= 1 81 | continue 82 | mut_info_lst.append(mut_info) 83 | mut_id = mut_info[1] 84 | gen_mol -= 1 85 | return mut_info_lst 86 | 87 | 88 | def add_prop(merged_df_path): 89 | merged_df = pd.read_csv(merged_df_path) 90 | raw_cols = list(merged_df.columns) 91 | merged_df["mol"] = merged_df["smiles"].apply(Chem.MolFromSmiles) 92 | # check charge 93 | merged_df["charge flag"] = merged_df["mol"].apply(charge_filter) 94 | merged_df = merged_df[merged_df["charge flag"]] 95 | # add MW, logP 96 | merged_df["MW"] = merged_df["mol"].apply(CalcExactMolWt) 97 | merged_df["LogP"] = merged_df["mol"].apply(Descriptors.MolLogP) 98 | new_cols = ["smiles", "MW", "LogP"] + raw_cols[1:] 99 | return merged_df[new_cols] 100 | 101 | 102 | def charge_filter(mol): 103 | negative_charge = Chem.MolFromSmarts("[*-1]") 104 | positive_charge = Chem.MolFromSmarts("[*+1]") 105 | nc = len(mol.GetSubstructMatches(negative_charge)) 106 | pc = len(mol.GetSubstructMatches(positive_charge)) 107 | npc = nc + pc 108 | if npc <= 1: 109 | return True 110 | elif npc == 2: 111 | if nc <= 1: 112 | return True 113 | return False 114 | 115 | 116 | def grep_sdf(workdir, merge_file): 117 | merged_sdf = os.path.join(workdir, "merged_all.sdf") 118 | selected_sdf = os.path.join(workdir, "selected.sdf") 119 | ids_txt = os.path.join(workdir, "seleted_ids.txt") 120 | # merge all sdf 121 | cmd_merge = ["find", workdir, "-name \"docking_outputs_with_score.sdf\" | xargs cat >", merged_sdf] 122 | shell_cmd_execute(cmd_merge) 123 | # create ids 124 | df = pd.read_csv(merge_file) 125 | ids = list(set(df["id"].apply(lambda x: x.split("-dp")[0]))) 126 | # write ids 127 | with open(ids_txt, "w") as ids_out: 128 | [ids_out.write(i + "\n") for i in ids] 129 | # subset sdf 130 | cmd_filter_sdf = ["perl", SELECT_SDF_SHELL, merged_sdf, ids_txt, selected_sdf] 131 | shell_cmd_execute(cmd_filter_sdf) 132 | # remove temporary file 133 | # os.remove(ids_txt) 134 | os.remove(merged_sdf) 135 | 136 | 137 | def write_growth(config_path: str, max_gen: int, dl_mode: int): 138 | config = configparser.ConfigParser() 139 | config.read(config_path) 140 | workdir = config.get("general", "workdir") 141 | now = str(int(time.time())) 142 | file_path = os.path.join(workdir, "merged_docked_best_" + now + ".csv") 143 | merge_multi_generation(workdir, max_gen, file_path, dl_mode, config_path) 144 | 145 | new_file = file_path.replace(".csv", "_tmp.csv") 146 | final_file = file_path.replace(".csv", "_with_grow_path.csv") 147 | 148 | mut_dic_all = cal_mutation_dic(workdir, max_gen) 149 | with open(file_path, 'r') as raw: 150 | header = raw.readline().strip().split(",") 151 | path_header = list(zip(["smi_gen_", "id_gen_", "rxn_gen_", "partner_gen_"] * max_gen, 152 | np.repeat(list(range(max_gen)), 4).astype(str))) 153 | header += ["".join(i) for i in path_header] 154 | new_header = ",".join(header) + "\n" 155 | with open(new_file, "w") as new: 156 | new.write(new_header) 157 | for line in raw.readlines(): 158 | line = line.strip().split(",") 159 | mol_id = line[1] 160 | # find grow path per line 161 | mut_info_lst = grow_path(mut_dic_all, mol_id) 162 | if mut_info_lst is None: 163 | continue 164 | mut_info_lst.reverse() 165 | mut_info_lst = list(np.concatenate(mut_info_lst)) 166 | new_line = ",".join(line + mut_info_lst) 167 | 168 | # fill empty columns 169 | cols = new_header.count(",") 170 | new_line += "," * (cols - new_line.count(",")) + "\n" 171 | new.write(new_line) 172 | 173 | grow_df = add_prop(new_file) 174 | grow_df.to_csv(final_file, index=False) 175 | grep_sdf(workdir, final_file) 176 | # logger.info("\n", "*" * 100) 177 | logger.info(f"Output file: {final_file}") 178 | # logger.info("*" * 100) 179 | 180 | # remove temporary files 181 | os.remove(file_path) 182 | os.remove(new_file) 183 | 184 | 185 | if __name__ == '__main__': 186 | parser = argparse.ArgumentParser(description="SCESE -- find path") 187 | parser.add_argument("config_path", help="config file path", type=str) 188 | parser.add_argument("max_gen", help="Max number of generation.", type=int) 189 | parser.add_argument("dl_mode", 190 | help="Mode of deep learning modeling, 0: not use, 1: modeling per generation, 2: modeling overall after all the generation") 191 | args = parser.parse_args() 192 | write_growth(args.config_path, args.max_gen, args.dl_mode) 193 | -------------------------------------------------------------------------------- /secse/scoring/ranking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: ranking.py 6 | @time: 2020/11/04/13:35 7 | """ 8 | import pandas as pd 9 | from rdkit.Chem import PandasTools 10 | from scoring.diversity_score import * 11 | import numpy as np 12 | import os 13 | import configparser 14 | from pandarallel import pandarallel 15 | from loguru import logger 16 | 17 | pandarallel.initialize(verbose=0) 18 | rdkit.RDLogger.DisableLog("rdApp.*") 19 | 20 | 21 | def read_dock_file(sdf): 22 | # assign new id for duplicates, with suffix -1, -2, ... 23 | sdf_df = PandasTools.LoadSDF(sdf, smilesName='smiles', molColName='Molecule')[ 24 | ["ID", "Molecule", "smiles", "docking score"]] 25 | sdf_df["docking score"] = sdf_df["docking score"].astype(float) 26 | sdf_df = sdf_df.sort_values(by="docking score", ascending=True) 27 | name_groups = sdf_df.groupby("ID")["ID"] 28 | suffix = name_groups.cumcount() + 1 29 | repeats = name_groups.transform("size") 30 | sdf_df["ID"] = np.where(repeats > 1, sdf_df['ID'] + "-dp" + suffix.map(str), sdf_df["ID"]) 31 | return sdf_df 32 | 33 | 34 | def clean_id(raw_id, gen): 35 | new_id = raw_id 36 | # if "GEN_" + str(gen) in raw_id: 37 | if "-C" in new_id: 38 | new_id = new_id.rsplit("-C", 1)[0] 39 | # elif new_id.count("dp") > 1: 40 | # new_id = new_id.rsplit("-dp", 1)[0] 41 | # elif new_id.count("-C") > 1: 42 | # new_id = new_id.rsplit("-C", 1)[0] 43 | return new_id 44 | 45 | 46 | class Ranking(object): 47 | def __init__(self, sdf, gen, config_file): 48 | self.sdf = sdf 49 | self.gen = gen 50 | 51 | config = configparser.ConfigParser() 52 | config.read(config_file) 53 | self.docking_score_cutoff = config.getfloat("docking", "score_cutoff") 54 | self.RMSD = config.getfloat("docking", "rmsd") 55 | self.delta_docking_score = config.getfloat("docking", "delta_score") 56 | 57 | self.docked_df = pd.DataFrame(None) 58 | self.diff = None 59 | self.score_min = None 60 | self.winner = None 61 | self.final_df = None 62 | self.keep_mols = None 63 | 64 | self.load_sdf() 65 | self.ranking_flag = True 66 | if self.gen > 0: 67 | if self.filter_rmsd_docking_score(): 68 | self.cal_le_rank() 69 | else: 70 | self.ranking_flag = False 71 | logger.info("No molecule left, stopping generation.") 72 | elif self.gen == 0: 73 | self.cal_le_rank() 74 | 75 | self.size = min(config.getint("general", "seed_per_gen"), self.docked_df.shape[0]) 76 | 77 | def load_sdf(self): 78 | raw_df = PandasTools.LoadSDF(self.sdf, smilesName='smiles', molColName='Molecule')[ 79 | ["ID", "Molecule", "smiles", "docking score"]] 80 | raw_df["docking score"] = raw_df["docking score"].astype(float) 81 | raw_df = raw_df.sort_values(by="docking score", ascending=True) 82 | 83 | raw_df.columns = [i.lower() for i in list(raw_df.columns)] 84 | 85 | self.docked_df = raw_df[["smiles", "id", "docking score", "molecule"]].copy() 86 | # assign new id for duplicates, with suffix -1, -2, ... 87 | name_groups = self.docked_df.groupby("id")["id"] 88 | suffix = name_groups.cumcount() + 1 89 | repeats = name_groups.transform("size") 90 | self.docked_df["id_raw"] = self.docked_df["id"].copy() 91 | self.docked_df["id"] = np.where(repeats > 1, self.docked_df['id'] + "-dp" + suffix.map(str), 92 | self.docked_df["id"]) 93 | 94 | logger.info("{} cmpds after evaluate".format(self.docked_df.shape[0])) 95 | 96 | def load_parents_sdf(self): 97 | gen = str(self.gen - 1) 98 | read_dock_file(os.path.join(os.path.dirname(os.path.dirname(self.sdf)), "generation_" + gen, 99 | "docking_outputs_with_score.sdf")) 100 | 101 | def mols_score_below_cutoff(self): 102 | self.docking_score_cutoff = min(self.docking_score_cutoff, 103 | self.docked_df["docking score"].astype(float).quantile(0.01)) 104 | logger.info("The evaluate score cutoff is: {}".format(self.docking_score_cutoff)) 105 | self.keep_mols = self.docked_df[self.docked_df["docking score"].astype(float) <= self.docking_score_cutoff] 106 | self.final_df = pd.concat([self.keep_mols, self.winner]).drop_duplicates(subset="id") 107 | cols = list(self.final_df.columns) 108 | cols = [i + "_gen_" + str(self.gen) for i in cols] 109 | self.final_df.columns = cols 110 | logger.info("{} final seeds.".format(self.final_df.shape[0])) 111 | 112 | def filter_rmsd_docking_score(self): 113 | last_sdf = self.sdf.replace("generation_" + str(self.gen), "generation_" + str(self.gen - 1)) 114 | last_df = read_dock_file(last_sdf).set_index("ID") 115 | mut_df = pd.read_csv(os.path.join(os.path.dirname(self.sdf), "filter.csv"), low_memory=False) 116 | parent_dic = dict(zip(mut_df["id_gen_" + str(self.gen)], zip(mut_df["id_gen_" + str(self.gen - 1)], 117 | mut_df["type"]))) 118 | self.docked_df["id_find_parent"] = self.docked_df["id_raw"].apply(lambda x: clean_id(x, self.gen)) 119 | 120 | # calculate RMSD: parent from last generation 121 | def cal_rmsd_docked(row): 122 | # do not care rmsd for the first generation 123 | if self.gen == 1: 124 | return -1 125 | # do not care rmsd except for Grow type 126 | if "G" not in parent_dic[row["id_find_parent"]][1]: 127 | return -2 128 | return cal_rmsd(last_df.loc[parent_dic[row["id_find_parent"]][0]]["Molecule"], row["molecule"]) 129 | 130 | # calculate RMSD only for Type Grow mutation, assign -1 for other mutation 131 | self.docked_df["rmsd"] = self.docked_df.apply(cal_rmsd_docked, axis=1) 132 | # calculate change of evaluate score after growing 133 | self.docked_df["delta_docking_score"] = self.docked_df.apply(lambda x: float(x["docking score"]) - float( 134 | last_df.loc[parent_dic[x["id_find_parent"]][0]]["docking score"]), axis=1) 135 | 136 | # keep same binding mode (RMSD < 2A and delta evaluate score < -0.3) or 137 | # find a better binding mode (delta evaluate score < -1.2kcal ) 138 | logger.info("{} cmpds before RMSD/Docking Score filter".format(self.docked_df.shape[0])) 139 | self.docked_df = self.docked_df[(self.docked_df["delta_docking_score"] <= self.delta_docking_score) | ( 140 | (self.docked_df["rmsd"] <= self.RMSD) & (self.docked_df["delta_docking_score"] <= -0.2))] 141 | rest_cmpds = self.docked_df.shape[0] 142 | logger.info("{} cmpds after RMSD/Docking Score filter".format(rest_cmpds)) 143 | if rest_cmpds == 0: 144 | return False 145 | return True 146 | 147 | def cal_le_rank(self): 148 | # calculate ln LE and fitness rank 149 | self.docked_df["le_ln"] = self.docked_df.apply( 150 | lambda x: x["docking score"] / (1 + np.log(x["molecule"].GetNumHeavyAtoms())), 151 | axis=1) 152 | self.diff = self.docked_df["le_ln"].max() - self.docked_df["le_ln"].min() 153 | self.score_min = self.docked_df["le_ln"].min() 154 | self.docked_df["fitness"] = 1 - ((self.docked_df["le_ln"] - self.score_min) / self.diff) 155 | self.docked_df["fitness"] = self.docked_df["fitness"].fillna(-1) 156 | self.docked_df["fitness_rank"] = self.docked_df["fitness"].rank(ascending=False) 157 | self.docked_df["fitness_rank"] = self.docked_df["fitness_rank"].fillna(-1) 158 | # drop molecule columns 159 | self.docked_df = self.docked_df.drop(columns=["molecule", "id_raw"]) 160 | 161 | def roulette_selection(self): 162 | self.winner = self.docked_df.sample(n=self.size, weights="fitness") 163 | 164 | def tournament_selection(self): 165 | # random sample 3 molecules the one with smallest evaluate score win, repeat until get 20% of original data 166 | win_lst = [] 167 | if self.size == 1: 168 | self.winner = self.docked_df.copy() 169 | pool = self.docked_df.copy() 170 | for i in range(int(self.size)): 171 | winner = pool.sample(min(10, pool.shape[0])).nsmallest(1, "le_ln", keep="first") 172 | win_lst.append(winner) 173 | pool = pool.drop(winner.index) 174 | 175 | self.winner = pd.concat(win_lst) 176 | -------------------------------------------------------------------------------- /secse/growing/mutation/mutation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.getenv("SECSE")) 5 | import copy 6 | import sqlite3 7 | import pandas as pd 8 | from loguru import logger 9 | import rdkit 10 | from pandarallel import pandarallel 11 | from rdkit import Chem 12 | from rdkit.Chem import rdChemReactions 13 | from utilities.wash_mol import get_bridged_atoms, neutralize_atoms 14 | from utilities.load_rules import json_to_DB 15 | from utilities.function_helper import shell_cmd_execute 16 | 17 | rdkit.RDLogger.DisableLog("rdApp.*") 18 | 19 | RULE_DB = os.path.join(os.getenv("SECSE"), "growing/mutation/rules_demo.db") 20 | 21 | 22 | class Mutation: 23 | 24 | def __init__(self, num, workdir, rule_db=RULE_DB): 25 | # self.load_reaction() 26 | self.workdir = workdir 27 | self.rule_db = rule_db 28 | # self.load_buildingblock(num=num) 29 | self.rules_dict = {} 30 | self.load_common_rules() 31 | self.load_spacer_rings_rules() 32 | 33 | # drop unwanted rules where Priority < 0 34 | self.rules_dict = {k: v for k, v in self.rules_dict.items() if int(v[1]) > 0} 35 | self.out_product_smiles = [] 36 | self.input_smiles = None 37 | self.mol = None 38 | 39 | def load_common_rules(self, tables=None): 40 | if tables is None: 41 | tables = ['B-001', 42 | 'G-001', 'G-003', 'G-004', 'G-005', 'G-006', 'G-007', 43 | 'M-001', 'M-002', 'M-003', 'M-004', 'M-005', 'M-006', 'M-007', 'M-008', 'M-009', 'M-010' 44 | ] 45 | rules_dict = {} 46 | for table in tables: 47 | try: 48 | sql = 'select * from "{0}"'.format(table) 49 | conn = sqlite3.connect(self.rule_db) 50 | conn.row_factory = sqlite3.Row 51 | c = conn.cursor() 52 | c.execute(sql) 53 | rs = c.fetchall() 54 | for row in rs: 55 | row = dict(row) 56 | rules_dict[row["Rule ID"]] = (rdChemReactions.ReactionFromSmarts(row["SMARTS"]), row['Priority']) 57 | except sqlite3.OperationalError: 58 | logger.error("No rule class: ", table) 59 | pass 60 | self.rules_dict.update(rules_dict) 61 | 62 | def load_spacer_rings_rules(self): 63 | rules_dict = {} 64 | try: 65 | sql = 'select * from "{}"'.format("G-002") 66 | conn = sqlite3.connect(self.rule_db) 67 | conn.row_factory = sqlite3.Row 68 | c = conn.cursor() 69 | c.execute(sql) 70 | rs = c.fetchall() 71 | for row in rs: 72 | row = dict(row) 73 | pri = int(row['Spacer Priority']) * int(row['Ring Priority']) 74 | rules_dict[row["Rule ID"]] = (rdChemReactions.ReactionFromSmarts(row["SMARTS"]), str(pri)) 75 | self.rules_dict.update(rules_dict) 76 | except sqlite3.OperationalError: 77 | logger.error("No rule class: G-002") 78 | 79 | # set smiles 80 | def load_mol(self, input_smiles): 81 | self.clean() 82 | self.input_smiles = input_smiles 83 | # uncharged each atom 84 | self.mol = Chem.MolFromSmiles(self.input_smiles) 85 | assert self.mol, "Can not read smiles" 86 | if self.input_smiles.count("-") + self.input_smiles.count("+") > 0: 87 | self.mol = neutralize_atoms(self.mol) 88 | # self.input_smiles = Chem.MolToSmiles(self.mol) 89 | 90 | def reaction(self, rxn, react, item, partner, priority): 91 | try: 92 | products = rxn.RunReactants(react) 93 | uniq = set() 94 | for mol_tuple in products: 95 | Chem.SanitizeMol(mol_tuple[0]) 96 | # enumerator = rdMolStandardize.TautomerEnumerator() 97 | # canon = enumerator.Canonicalize(mol_tuple[0]) 98 | # smi = Chem.MolToSmiles(Chem.RemoveHs(canon), isomericSmiles=True, kekuleSmiles=False) 99 | smi = Chem.MolToSmiles(Chem.RemoveHs(mol_tuple[0]), isomericSmiles=True, kekuleSmiles=False) 100 | uniq.add(smi) 101 | for smi in uniq: 102 | self.out_product_smiles.append((smi, item, partner, priority)) 103 | except Exception as e: 104 | # logger.error(e) 105 | pass 106 | 107 | # add 2021.1.7 108 | # modify 2021.01.14 109 | def single_point_mutate(self): 110 | mol = self.spiro_atom_label() 111 | for item in self.rules_dict: 112 | rxn = self.rules_dict[item][0] 113 | priority = self.rules_dict[item][1] 114 | if mol.HasSubstructMatch(rxn.GetReactantTemplate(0)): 115 | self.reaction(rxn, (mol,), item, "", priority) 116 | self.protected_atom_label_remove() 117 | return self.out_product_smiles 118 | 119 | def spiro_atom_label(self): 120 | mol = copy.deepcopy(self.mol) 121 | ri = mol.GetRingInfo() 122 | 123 | # spiro_ sma = '[*r3,*r4,*r5,*r6;R2X4$([*,*,*,*](@[r3,r4,r5,r6,r7])(@[r3,r4,r5,r6,r7])(@[r3,r4,r5,r6, 124 | # r7])@[r3,r4,r5,r6,r7])]' 125 | spiro_sma = '[x4]' 126 | spiro_atoms = mol.GetSubstructMatches(Chem.MolFromSmarts(spiro_sma)) 127 | 128 | res = set() 129 | for ring in ri.AtomRings(): 130 | for spi in spiro_atoms: 131 | tmp = set(spi).intersection(set(ring)) 132 | if tmp: 133 | res = res.union(ring) 134 | 135 | for index in res: 136 | mol.GetAtomWithIdx(index).SetProp('_protected', '1') 137 | 138 | self.mol = mol 139 | return mol 140 | 141 | def bridged_atom_label(self): 142 | 143 | mol = self.mol 144 | brigded_atoms = get_bridged_atoms(mol) 145 | ri = mol.GetRingInfo() 146 | res = set() 147 | for ring in ri.AtomRings(): 148 | for bri in brigded_atoms: 149 | tmp = set(bri).intersection(set(ring)) 150 | if tmp: 151 | res = res.union(ring) 152 | 153 | for index in res: 154 | mol.GetAtomWithIdx(index).SetProp('_protected', '1') 155 | 156 | self.mol = mol 157 | return mol 158 | 159 | def protected_atom_label_remove(self): 160 | mol = self.mol 161 | for idx in range(len(mol.GetAtoms())): 162 | if mol.GetAtomWithIdx(idx).HasProp('_protected'): 163 | mol.GetAtomWithIdx(idx).ClearProp('_protected') 164 | self.mol = mol 165 | return mol 166 | 167 | def clean(self): 168 | self.input_smiles = None 169 | self.out_product_smiles = [] 170 | 171 | 172 | def mutation_df(df: pd.DataFrame, workdir, cpu_num, gen=1, rule_db=None, project_code="GEN"): 173 | workdir = os.path.join(workdir, "generation_" + str(gen)) 174 | 175 | if rule_db is None: 176 | mutation = Mutation(5000, workdir) 177 | else: 178 | mutation = Mutation(5000, workdir, rule_db=rule_db) 179 | 180 | def mutation_per_row(mut: Mutation, smi): 181 | # mutation for each seed molecule 182 | try: 183 | mut.load_mol(smi) 184 | except AssertionError: 185 | return None 186 | mut.single_point_mutate() 187 | return mut.out_product_smiles 188 | 189 | mut_df = df.copy() 190 | if mut_df.shape[0] == 1: 191 | mut_df["smiles_gen_" + str(gen)] = mut_df["smiles_gen_" + str(gen - 1)].apply( 192 | lambda x: mutation_per_row(mutation, x)) 193 | else: 194 | pandarallel.initialize(verbose=0, nb_workers=cpu_num) 195 | mut_df["smiles_gen_" + str(gen)] = mut_df["smiles_gen_" + str(gen - 1)].parallel_apply( 196 | lambda x: mutation_per_row(mutation, x)) 197 | mut_df = mut_df.dropna(subset=["smiles_gen_" + str(gen)]).reset_index(drop=True) 198 | n = 1 199 | mut_path = os.path.join(workdir, "mutation") 200 | with open(mut_path + ".raw", "w") as f: 201 | header = list(mut_df.columns[:-1]) + ["smiles_gen_" + str(gen), "id_gen_" + str(gen), 202 | "reaction_id_gen_" + str(gen), "partner_gen_" + str(gen), 203 | "priority_gen_" + str(gen)] 204 | for i in mut_df.values.tolist(): 205 | last_gen_info = list(map(str, i[:-1])) 206 | # keep parent mol 207 | f.write(",".join(last_gen_info + [last_gen_info[0], last_gen_info[1].split("-dp")[0].split("-C")[0], 208 | "Na-Na-Na", "", "3"]) + "\n") 209 | # write mutation mols 210 | for info in i[-1]: 211 | info = list(map(str, info)) 212 | new_line = last_gen_info + [info[0]] + [ 213 | project_code.upper() + "_" + str(gen) + "_M_" + str(n).zfill(9)] + info[1:] 214 | f.write(",".join(new_line) + "\n") 215 | n += 1 216 | # drop duplicates product smiles by awk 217 | cmd_dedup = ["awk -F',' '!seen[$(NF-4)]++'", mut_path + ".raw ", ">", mut_path + ".csv"] 218 | shell_cmd_execute(cmd_dedup) 219 | 220 | return header 221 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **SECSE** 2 | 3 | ---------------------------- 4 | 5 | ### SECSE: _**S**ystemic **E**volutionary **C**hemical **S**pace **E**xplorer_ 6 | 7 | ![plot](docs/platform.jpg) 8 | 9 | Chemical space exploration is a major task of the hit-finding process during the pursuit of novel chemical entities. 10 | Compared with other screening technologies, computational _de novo_ design has become a popular approach to overcome the 11 | limitation of current chemical libraries. Here, we reported a _de novo_ design platform named systemic evolutionary 12 | chemical space explorer (SECSE). The platform was conceptually inspired by fragment-based drug design, that miniaturized 13 | a “lego-building” process within the pocket of a certain target. The key to virtual hits generation was then turned into 14 | a computational search problem. To enhance search and optimization, human intelligence and deep learning were 15 | integrated. SECSE has the potential in finding novel and diverse small molecules that are attractive starting points for 16 | further validation. 17 | 18 | ### Tutorials and Usage 19 | 20 | ---------------------------- 21 | 22 | 1. Setting up dependencies 23 | python ~=3.9, perl ~=5.32 24 | ```bash 25 | conda create --name secse -c conda-forge parallel tqdm biopandas openbabel chemprop xlrd=2 pandarallel rdkit=2024.09.1 loguru tensorboard 26 | conda activate secse 27 | ``` 28 | 2. Installing from source 29 | ```bash 30 | git clone https://github.com/KeenThera/SECSE.git 31 | ``` 32 | 3. Setting Environment Variables 33 | `export SECSE=/absolute/path/to/SECSE` 34 | I'm using AutoDock Vina for docking: 35 | [(download here)](https://github.com/ccsb-scripps/AutoDock-Vina/releases) 36 | `export VINA=/absolute/path/to/AutoDockVINA` 37 | I'm using AutoDock GPU: (adgpu-v1.5.3_linux_ocl_128wi) 38 | [(download here)](https://github.com/ccsb-scripps/AutoDock-GPU/releases) 39 | `export AUTODOCK_GPU=/absolute/path/to/AutoDockGPU` 40 | I'm using [Gilde](https://www.schrodinger.com/products/glide) for docking (additional installation & license 41 | required): 42 | `export SCHRODINGER=/absolute/path/to/SCHRODINGER` 43 | I'm using [Uni-Dock](https://github.com/dptech-corp/Uni-Dock) for docking (need GPU): 44 | [compile from Uni-Dock source code](https://github.com/dptech-corp/Uni-Dock/tree/main/unidock#building-from-source) (recommand), or [download here](https://github.com/dptech-corp/Uni-Dock/releases/download/1.1.0/unidock-1.1.0-cuda120-linux-x86_64) and add `export UNIDOCK=/absolute/path/to/UNIDOCK` 45 | 4. Giving execution permissions to the SECSE directory 46 | `chmod -R +x /absolute/path/to/SECSE` 47 | 5. Input fragments: a tab separated _.smi_ file without header. See demo [here](demo/demo_1020.smi). 48 | 6. Parameters in config file: 49 | 50 | [general] 51 | 52 | - _project_code_, project identifier, which will be prefixed to each generated molecule ID, type=str 53 | - _workdir_, working directory, create if not exists, otherwise overwrite, type=str 54 | - _fragments_, file path to seed fragments, smi format, type=str 55 | - _num_per_gen_, number of molecules generated each generation, type=int 56 | - _seed_per_gen_, number of selected seed molecules per generation, default=1000, type=int 57 | - _start_gen_, number of staring generation, if you want to resume the generation, please specify the 'start_gen' as 58 | the number corresponding to the last **completed generation** in your previous run, default=0, type=int 59 | - _num_gen_, number of growing generations, the final generation number will be the sum of start_gen and num_gen, 60 | type=int 61 | 62 | - _cpu_, number of max invoke CPUs, type=int 63 | - _gpu_, number of max invoke GPU for AutoDock GPU, type=int 64 | - _rule_db_, path to customized rule in json format, input 0 if use default rule, default=0 65 | 66 | [docking] 67 | - _docking_program_, name of docking program, AutoDock-Vina (input vina) or AutoDock-GPU (input autodock-gpu) or 68 | Glide (input glide) , default=vina, type=str 69 | - _target_, protein PDBQT if use AutoDock Vina; grid map files descriptor fld file if AutoDock GPU; Grid file if 70 | choose Glide, type=str 71 | - _RMSD_, docking pose RMSD cutoff between children and parent, default=2, type=float 72 | - _delta_score_, decreased docking score cutoff between children and parent, default=-1.0, type=float 73 | - _score_cutoff_, default=-9, type=float 74 | 75 | Parameters when docking by AutoDock Vina: 76 | 77 | - _x_, Docking box x, type=float 78 | - _y_, Docking box y, type=float 79 | - _z_, Docking box z, type=float 80 | - _box_size_x_, Docking box size x, default=20, type=float 81 | - _box_size_y_, Docking box size y, default=20, type=float 82 | - _box_size_z_, Docking box size z, default=20, type=float 83 | 84 | [prediction] 85 | 86 | - _mode_, mode of deep learning modeling, 0: not use, 1: modeling per generation, 2: modeling overall after all the 87 | generation, default=0, type=int 88 | - _dl_per_gen_, top N predicted molecules for docking, default=100, type=int 89 | - _dl_score_cutoff_, default=-9, type=float 90 | 91 | [properties] 92 | 93 | - _mw_, molecular weights cutoff, default=450, type=int 94 | - _logp_lower_, minimum of logP, default=0.5, type=float 95 | - _logp_upper_, maximum of logP, default=7, type=float 96 | - _chiral_center_, maximum of chiral center,default=2, type=int 97 | - _heteroatom_ratio_, maximum of heteroatom ratio, default=0.35, type=float 98 | - _rdkit_rotatable_bound_num_, maximum of rotatable bound calculated from 99 | rdkit.rdMolDescriptors.CalcNumRotatableBonds, default=5, type=int 100 | - _keen_rotatable_bound_num_, maximum of rotatable bound defined by KEEN ( 101 | SMARTS: "[C^3!D1;!$(C(F)(F)F)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F)]"), default=3, type=int 102 | - _rigid_body_num_, maximum of rigid body defined by KEEN ( 103 | SMARTS: "[C^3!D1;!$(C(F)(F)F);!R;!$(C=O(N));!$(NC(=O));!$(C(=O)O);!$(C(=O)O)]-!@[!Br!F!Cl!I!H3&!$(*#*)! 104 | D1;!$([!Br!F!Cl!I](F)(F)F);!R;!$(C=O([N,O]));!$(NC(=O));!$(C(=O)O)]"), default=2, type=int 105 | - _hbd_, maximum of hydrogen bond donor calculated by rdkit.rdMolDescriptors.CalcNumHBD, default=5, type=int 106 | - _hba_, maximum of hydrogen bond acceptor calculated by rdkit.rdMolDescriptors.CalcNumHBA, default=10, type=int 107 | - _tpsa_, maximum of topological polar surface area calculated by rdkit.Chem.Descriptors.TPSA, default=200, 108 | type=float 109 | - _lipinski_violation_, maximum of violation of Lipinski rule of five calculated by RDKit, default=1, default=1, 110 | type=int 111 | - _qed_, QED (calculated by rdkit.Chem.QED.qed) cutoff value, default=0.5, type=float 112 | - _max_ring_size_, maximum of ring size, default=7, type=int 113 | - _max_ring_system_size_, maximum of ring system member size in one ring system, default=3, type=int 114 | - _ring_system_count_, maximum of seperated ring system count, default=4, type=int 115 | - _bridged_site_count_, maximum of bridged ring site count, default=2, type=int 116 | - _spiro_site_count_, maximum of spiro ring site count, default=1, type=int 117 | - _fused_site_count_, maximum of fused ring site count, default=3, type=int 118 | - _rdkit_sa_score_, synthetic accessibility score (calculated by RDKit) cutoff, default=5, type=float 119 | - _substructure_filter_, files containing the customized unwanted substructure SMARTS in "*.xls" format, set the 120 | value to 0 if you do not have any additional unwanted substructure. PANIS already includes as default. The file 121 | should include columns for **`Pattern`**, **`ID`**, and **`Max`**, where the **`ID`** should be unique for each SMARTS. You can 122 | refer to the example file [subtructure_filter_demo.xls](demo/subtructure_filter_demo.xls), default=0, type=string 123 | 124 | Config file of a demo case [phgdh_demo_vina.ini](demo/phgdh_demo_vina.ini) 125 | Customized rule json template [rules.json](demo/rules.json). Rule ID should be in the form G-001-XXXX, like 126 | G-001-0001, G-001-0002, G-001-0003 ... 127 | 128 | 7. Run SECSE 129 | `python $SECSE/run_secse.py --config /absolute/path/to/config` 130 | Please input the **absolute path** of the config file here. 131 | 8. Output files 132 | - merged_docked_best_timestamp_with_grow_path.csv: selected molecules and growing path 133 | - selected.sdf: 3D conformers of all selected molecules 134 | 135 | ### Dependencies 136 | 137 | ------- 138 | GNU Parallel installation 139 | 140 | - CentOS / RHEL 141 | `sudo yum install parallel` 142 | - Ubuntu / Debian 143 | `sudo apt-get install parallel` 144 | - From source: https://www.gnu.org/software/parallel/ 145 | 146 | python ~=3.12, perl ~=5.32 147 | 148 | numpy~=1.26.4, pandas~=2.2.2, xlrd~=2.0.1, pandarallel~=1.6.5, tqdm~=4.67.0, biopandas~=0.5.1, openbabel~=3.1.1, rdkit~ 149 | =2024.09, chemprop~=2.1, pytorch~=2.5.1+cu117, tensorboard~=2.18.0 150 | 151 | Linux server with CPUs only also works. 152 | 153 | ### Citation 154 | 155 | ------- 156 | Lu, C.; Liu, S.; Shi, W.; Yu, J.; Zhou, Z.; Zhang, X.; Lu, X.; Cai, F.; Xia, N.; Wang, Y. Systemic Evolutionary Chemical 157 | Space Exploration For Drug Discovery. J Cheminform 14, 19 (2022). 158 | 159 | https://doi.org/10.1186/s13321-022-00598-4 160 | 161 | ### License 162 | 163 | ------- 164 | SECSE is released under [Apache License, Version 2.0](LICENSE.txt). 165 | 166 | The project is being actively developed, if you have any questions or suggestions, please contact: 167 | wang_yikai@keenthera.com or luchong121@outlook.com 168 | -------------------------------------------------------------------------------- /secse/growing/filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: filter.py 6 | @time: 2020/11/16/13:14 7 | """ 8 | import argparse 9 | import os 10 | import sys 11 | import time 12 | import configparser 13 | import rdkit 14 | import rdkit.Chem as Chem 15 | from rdkit.Chem.rdMolDescriptors import CalcExactMolWt, CalcNumHBD, CalcNumHBA, CalcNumRotatableBonds 16 | from rdkit.Chem import Descriptors, AllChem 17 | from rdkit.Chem import QED 18 | from rdkit.Chem import RDConfig 19 | import json 20 | from loguru import logger 21 | 22 | sys.path.append(os.getenv("SECSE")) 23 | from utilities.ring_tool import RingSystems 24 | from utilities.substructure_filter import StructureFilter 25 | from utilities.wash_mol import wash_mol, neutralize, charge_mol, get_keen_rotatable_bound_num, get_rigid_body_num 26 | from utilities.open_filter import user_filter 27 | 28 | sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score')) 29 | import sascorer 30 | 31 | rdkit.RDLogger.DisableLog("rdApp.*") 32 | 33 | 34 | class Filter: 35 | def __init__(self, gen, config_path): 36 | 37 | self.gen = int(gen) 38 | self.input_smiles = None 39 | self.mol = None 40 | self.pains_smarts = None 41 | 42 | config = configparser.ConfigParser() 43 | config.read(config_path) 44 | 45 | substructure_filter_file = config.get("properties", "substructure_filter") 46 | if substructure_filter_file == "0": 47 | self.strutFilter = StructureFilter() 48 | else: 49 | # logger.info("Use additional substructure filter patters.") 50 | self.strutFilter = StructureFilter(substructure_filter_file) 51 | 52 | self.MW = config.getfloat("properties", "mw") 53 | self.logP_lower = config.getfloat("properties", "logp_lower") 54 | self.logP_upper = config.getfloat("properties", "logp_upper") 55 | self.chiral_center = config.getint("properties", "chiral_center") 56 | self.heteroatom_ratio = config.getfloat("properties", "heteroatom_ratio") 57 | self.rdkit_rotatable_bound_num = config.getint("properties", "rdkit_rotatable_bound_num") 58 | self.keen_rotatable_bound_num = config.getint("properties", "keen_rotatable_bound_num") 59 | self.rigid_body_num = config.getint("properties", "rigid_body_num") 60 | self.hbd = config.getint("properties", "hbd") 61 | self.hba = config.getint("properties", "hba") 62 | self.tpsa = config.getfloat("properties", "tpsa") 63 | self.lipinski_violation = config.getint("properties", "lipinski_violation") 64 | self.qed = config.getfloat("properties", "qed") 65 | self.max_ring_size = config.getint("properties", "max_ring_size") 66 | self.max_ring_system_size = config.getint("properties", "max_ring_system_size") 67 | self.ring_system_count = config.getint("properties", "ring_system_count") 68 | self.bridged_site_count = config.getint("properties", "bridged_site_count") 69 | self.spiro_site_count = config.getint("properties", "spiro_site_count") 70 | self.fused_site_count = config.getint("properties", "fused_site_count") 71 | self.rdkit_sa_score = config.getint("properties", "rdkit_sa_score") 72 | 73 | def load_mol(self, input_smiles): 74 | self.clean() 75 | self.input_smiles = input_smiles 76 | self.mol = Chem.MolFromSmiles(self.input_smiles) 77 | 78 | # uncharged each atom 79 | if self.input_smiles.count("-") + self.input_smiles.count("+") > 0: 80 | self.mol, self.input_smiles = neutralize(self.input_smiles) 81 | 82 | if self.mol is None: 83 | self.input_smiles = wash_mol(self.input_smiles) 84 | self.mol = Chem.MolFromSmiles(self.input_smiles) 85 | if self.mol is None: 86 | self.input_smiles = "C" 87 | self.mol = Chem.MolFromSmiles(self.input_smiles) 88 | 89 | def clean(self): 90 | self.input_smiles = None 91 | self.mol = None 92 | 93 | def pp_filter(self): 94 | """ 95 | property filter 96 | """ 97 | violation_counter = 0 98 | 99 | mw = CalcExactMolWt(self.mol) 100 | if mw > self.MW: 101 | yield "MW" 102 | if mw > 500: 103 | violation_counter += 1 104 | if self.gen > 3: 105 | if 81 > mw: 106 | yield "MW" 107 | 108 | mol_hbd = CalcNumHBD(self.mol) 109 | if mol_hbd > self.hbd: 110 | yield "HBD" 111 | if mol_hbd > 5: 112 | violation_counter += 1 113 | 114 | mol_hba = CalcNumHBA(self.mol) 115 | if mol_hba > self.hba: 116 | yield "HBA" 117 | if mol_hba > 10: 118 | violation_counter += 1 119 | 120 | logp = Descriptors.MolLogP(self.mol) 121 | if logp < self.logP_lower or logp > self.logP_upper: 122 | yield "cLogP" 123 | if logp > 5: 124 | violation_counter += 1 125 | 126 | if violation_counter > self.lipinski_violation: 127 | yield "Lipinski Violation" 128 | 129 | if Descriptors.TPSA(self.mol) > self.tpsa: 130 | yield "TPSA" 131 | 132 | if CalcNumRotatableBonds(self.mol) > self.rdkit_rotatable_bound_num: 133 | yield "RDKit Rotatable Bonds" 134 | 135 | if get_keen_rotatable_bound_num(self.mol) > self.keen_rotatable_bound_num: 136 | # rotatable bound customized @dalong 137 | yield "Keen Rotatable Bounds" 138 | if get_rigid_body_num(self.mol) > self.rigid_body_num: 139 | # rotatable bound customized @dalong 140 | yield "Rigid Body" 141 | yield "PASS" 142 | 143 | def load_pains_filter(self): 144 | # read smarts for pains 145 | with open(os.path.join(os.getenv("SECSE"), 'growing/pains_smarts.json')) as f: 146 | data = json.load(f) 147 | pains_smarts = dict((k, Chem.MolFromSmarts(v)) for k, v in data.items()) 148 | self.pains_smarts = pains_smarts 149 | 150 | def alert_filter(self): 151 | self.load_pains_filter() 152 | for name in self.pains_smarts: 153 | sma = self.pains_smarts[name] 154 | if self.mol.HasSubstructMatch(sma): 155 | yield "PAINS" 156 | yield "PASS" 157 | 158 | def substructure_filter(self): 159 | yield self.strutFilter.sfilter(self.mol) 160 | 161 | def ring_system_filter(self): 162 | ring_sys = RingSystems(self.mol) 163 | if ring_sys.ring_check(self.max_ring_system_size, self.bridged_site_count, self.spiro_site_count, 164 | self.fused_site_count, self.ring_system_count): 165 | yield "PASS" 166 | yield "RS" 167 | 168 | def custom_filter(self): 169 | # add Chiral center filter, cycle size less than 7, remove 3 continues hetero-atom 170 | chiral_tags = Chem.FindMolChiralCenters(self.mol, includeUnassigned=True, useLegacyImplementation=True) 171 | # the maximum number of chiral center <= 3 172 | if len(chiral_tags) > self.chiral_center: 173 | yield "CC" 174 | 175 | chiral_atom_list = set([x[0] for x in chiral_tags]) 176 | rings = self.mol.GetRingInfo().AtomRings() 177 | 178 | if rings: 179 | # the maximum of ring size <= 7 180 | mol_max_ring_size = max([len(x) for x in rings]) 181 | if mol_max_ring_size > self.max_ring_size: 182 | yield "max ring size" 183 | 184 | if len(chiral_tags) == 3: 185 | # 3 CCs should not in the same ring 186 | for ring in rings: 187 | if len(set(ring).intersection(chiral_atom_list)) >= 3: 188 | yield "chiral center in one ring >2" 189 | yield "PASS" 190 | 191 | def heteroatom_filter(self): 192 | hetero_ratio = Chem.rdMolDescriptors.CalcNumHeteroatoms(self.mol) / self.mol.GetNumHeavyAtoms() 193 | if hetero_ratio > self.heteroatom_ratio: 194 | yield "heteroatom_ratio" 195 | else: 196 | yield "PASS" 197 | 198 | def charge_filter(self): 199 | negative_charge = Chem.MolFromSmarts("[*-1]") 200 | positive_charge = Chem.MolFromSmarts("[*+1]") 201 | charged_smi = charge_mol(self.input_smiles) 202 | mol = Chem.MolFromSmiles(charged_smi) 203 | if mol is None: 204 | mol = self.mol 205 | nc = len(mol.GetSubstructMatches(negative_charge)) 206 | pc = len(mol.GetSubstructMatches(positive_charge)) 207 | npc = nc + pc 208 | if npc <= 1: 209 | yield "PASS" 210 | elif npc == 2: 211 | if nc <= 1: 212 | yield "PASS" 213 | else: 214 | yield "Charge" 215 | else: 216 | yield "Charge" 217 | 218 | def similarity_filter(self): 219 | fp = AllChem.GetMorganFingerprintAsBitVect(self.mol, 2, 512) 220 | 221 | def QED_filter(self): 222 | if QED.qed(self.mol) >= self.qed: 223 | yield "PASS" 224 | else: 225 | yield "QED" 226 | 227 | def SA_filter(self): 228 | sa_score = sascorer.calculateScore(self.mol) 229 | if sa_score <= self.rdkit_sa_score: 230 | yield "PASS" 231 | else: 232 | yield "SA score" 233 | 234 | def my_filter(self): 235 | if self.gen > 3: 236 | tag = user_filter(self.mol) 237 | if tag: 238 | yield "PASS" 239 | else: 240 | yield "CUSTOM" 241 | else: 242 | yield "PASS" 243 | 244 | 245 | def mol_filter(molfilter: Filter, smi): 246 | molfilter.load_mol(smi) 247 | pass_filter = [molfilter.pp_filter(), 248 | molfilter.custom_filter(), 249 | molfilter.charge_filter(), 250 | molfilter.heteroatom_filter(), 251 | molfilter.substructure_filter(), 252 | molfilter.ring_system_filter(), 253 | molfilter.alert_filter(), 254 | molfilter.QED_filter(), 255 | molfilter.SA_filter(), 256 | molfilter.my_filter() 257 | ] 258 | for i in pass_filter: 259 | res = next(i) 260 | if res != "PASS": 261 | return res 262 | return "PASS" 263 | 264 | 265 | def file_filter(file_path, workdir, gen, config): 266 | molsfilter = Filter(gen, config) 267 | with open(file_path, "r") as inf: 268 | with open(os.path.join(workdir, "filter_flag", os.path.basename(file_path)), "w") as outf: 269 | for line in inf.readlines(): 270 | line = line.strip() 271 | smi = line.split(",")[-5] 272 | flag = mol_filter(molsfilter, smi) 273 | new_line = line + "," + flag + "\n" 274 | outf.write(new_line) 275 | 276 | 277 | if __name__ == '__main__': 278 | parser = argparse.ArgumentParser(description="Filter per file") 279 | parser.add_argument("file_path", help="File path") 280 | parser.add_argument("workdir", help="Workdir") 281 | parser.add_argument("gen", help="generation number") 282 | parser.add_argument("config", help="Configuration file") 283 | args = parser.parse_args() 284 | time1 = time.time() 285 | file_filter(args.file_path, args.workdir, args.gen, args.config) 286 | 287 | time2 = time.time() 288 | # mfilter = Filter() 289 | # mfilter.load_mol("C12CCCC3(CCCCC3)C1C4C5C(CC(C(C6CCCC7C6C8C9C(C%10CCC9C%10)C7C8)CCC%11)C%11C5)C2C4") 290 | # logger.info(next(mfilter.ring_system_filter())) 291 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 Suzhou Keen Therapeutics Co., Ltd. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /secse/grow_processes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 _*- 3 | """ 4 | @author: Lu Chong 5 | @file: grow_processes.py 6 | @time: 2021/11/17/13:49 7 | """ 8 | import csv 9 | import shutil 10 | import os 11 | import pandas as pd 12 | import rdkit 13 | import configparser 14 | from evaluate.glide_docking import dock_by_glide 15 | from growing.mutation.mutation import mutation_df 16 | from scoring.ranking import Ranking 17 | from scoring.diversity_score import clustering 18 | from scoring.docking_score_prediction import prepare_files 19 | from scoring.sampling import sample_by_similarity, sample_by_rule_weight 20 | from evaluate.docking import dock_by_py_vina, dock_by_py_autodock_gpu, dock_by_unidock 21 | from report.grow_path import write_growth 22 | from utilities.load_rules import json_to_DB 23 | from utilities.function_helper import shell_cmd_execute 24 | import time 25 | from loguru import logger 26 | 27 | rdkit.RDLogger.DisableLog("rdApp.*") 28 | 29 | 30 | class Grow(object): 31 | def __init__(self, generation, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode, 32 | config_path, cpu_num=0, gpu_num=1, rule_db=0, project_code="GEN", x=0, y=0, z=0, box_size_x=0, 33 | box_size_y=0, box_size_z=0): 34 | 35 | self.mols_smi = mols_smi 36 | self.total_generation = int(generation) 37 | self.workdir = workdir 38 | self.num_per_gen = num_per_gen 39 | self.cpu_num = cpu_num 40 | self.gpu_num = gpu_num 41 | 42 | self.target = receptor 43 | self.x = x 44 | self.y = y 45 | self.z = z 46 | self.box_size_x = box_size_x 47 | self.box_size_y = box_size_y 48 | self.box_size_z = box_size_z 49 | 50 | self.start_gen = start_gen # record start 51 | self.gen = start_gen # generation num for now 52 | # Resume from breakpoint 53 | if self.gen > 0: 54 | self.workdir_now = os.path.join(self.workdir, "generation_{}".format(self.gen)) 55 | self.mols_smi = os.path.join(self.workdir_now, "mols_for_docking.smi") 56 | 57 | self.docking_program = docking_program.lower() 58 | self.dl_mode = dl_mode 59 | 60 | self.config_path = config_path 61 | 62 | rule_db = str(rule_db) 63 | if rule_db in [0, "0"]: 64 | self.rule_db = None 65 | elif rule_db.endswith("json"): 66 | os.makedirs(self.workdir, exist_ok=True) 67 | self.rule_db = os.path.join(self.workdir, "rules.db") 68 | json_to_DB(rule_db, self.rule_db) 69 | elif rule_db.endswith("db"): 70 | self.rule_db = rule_db 71 | else: 72 | raise Exception("Please check your input rule file.") 73 | self.project_code = project_code 74 | 75 | self.lig_sdf = None 76 | self.winner_df = None 77 | self.winner_path = None 78 | self._generation_dir = None 79 | self._filter_df = None 80 | self._dock_df = None 81 | self._sampled_df = None 82 | self.workdir_now = None 83 | 84 | self._GROWING_STATE_LIST = ["GROWING", "BROKEN", "STOP"] 85 | self.growing_flag = self._GROWING_STATE_LIST[0] 86 | 87 | def docking_sh(self, step): 88 | start = time.time() 89 | os.makedirs(self.workdir_now, exist_ok=True) 90 | 91 | if "vina" in self.docking_program: 92 | self.docking_vina(step) 93 | elif "glide" in self.docking_program: 94 | self.docking_glide(step) 95 | elif "autodock-gpu" in self.docking_program: 96 | self.docking_autodock_gpu(step) 97 | elif "unidock" in self.docking_program: 98 | self.docking_unidock(step) 99 | 100 | # ranking and find top fragments 101 | self.lig_sdf = os.path.join(self.workdir_now, "docking_outputs_with_score.sdf") 102 | end = time.time() 103 | logger.info("Docking time cost: {} min.".format(round((end - start) / 60, 2))) 104 | 105 | def docking_autodock_gpu(self, step): 106 | logger.info("Step {}: Docking with AutoDock GPU ...".format(step)) 107 | dock_by_py_autodock_gpu(self.workdir_now, self.mols_smi, self.target, self.cpu_num, self.gpu_num) 108 | 109 | def docking_vina(self, step): 110 | logger.info("Step {}: Docking with Autodock Vina ...".format(step)) 111 | dock_by_py_vina(self.workdir_now, self.mols_smi, self.target, self.cpu_num, self.x, self.y, self.z, 112 | self.box_size_x, self.box_size_y, self.box_size_z) 113 | 114 | def docking_glide(self, step): 115 | logger.info("Step {}: Docking with Glide ...".format(step)) 116 | # set different docking precision for different generation 117 | if self.gen < 1: 118 | dock_mode = "SP" 119 | else: 120 | dock_mode = "HTVS" 121 | dock_by_glide(self.workdir_now, self.mols_smi, self.target, self.gen, dock_mode, self.cpu_num) 122 | 123 | def docking_unidock(self, step): 124 | logger.info("Step {}: Docking with UniDock ...".format(step)) 125 | dock_by_unidock(self.workdir_now, self.mols_smi, self.target, self.cpu_num, self.x, self.y, self.z, 126 | self.box_size_x, self.box_size_y, self.box_size_z) 127 | 128 | def ranking_docked_mols(self, step=2): 129 | logger.info("Step {}: Ranking docked molecules...".format(str(step))) 130 | ranking = Ranking(sdf=self.lig_sdf, gen=self.gen, config_file=self.config_path) 131 | if ranking.ranking_flag: 132 | ranking.docked_df.to_csv( 133 | os.path.join(self.workdir, "generation_" + str(self.gen), "docked_gen_" + str(self.gen) + ".csv"), 134 | index=False) 135 | ranking.tournament_selection() 136 | # merge mols whose evaluate score below the cutoff 137 | ranking.mols_score_below_cutoff() 138 | self.winner_df = ranking.final_df 139 | # generate smi file 140 | self.winner_path = os.path.join(self.workdir, "generation_" + str(self.gen), 141 | "best_fragment_gen_" + str(self.gen) + ".smi") 142 | self.winner_df["id_gen_" + str(self.gen)] = self.winner_df["id_gen_" + str(self.gen)].apply( 143 | lambda x: x.split("\t")[0]) 144 | self.winner_df[["smiles_gen_" + str(self.gen), "id_gen_" + str(self.gen)]].to_csv(self.winner_path, 145 | sep="\t", 146 | index=False, 147 | quoting=csv.QUOTE_NONE) 148 | else: 149 | self.growing_flag = self._GROWING_STATE_LIST[1] 150 | self.check_growing() 151 | 152 | def dl_pre(self, step): 153 | logger.info("Step {}.1: Building deep learning models...".format(str(step))) 154 | 155 | train, pre = prepare_files(self.gen, self.workdir, self.dl_mode) 156 | if pre is None: 157 | logger.info("Skipping docking score prediction as all molecules have been docked.") 158 | self.dl_mode = 0 159 | return 160 | dl_shell = os.path.join(os.getenv("SECSE"), "scoring", "chemprop_pre.sh") 161 | config = configparser.ConfigParser() 162 | config.read(self.config_path) 163 | 164 | dl_select_num = config.get("prediction", "dl_per_gen") 165 | dl_cmd = [dl_shell, self.workdir, train, pre, str(self.gen), dl_select_num, "22"] 166 | shell_cmd_execute(dl_cmd, 0) 167 | # docking top predicted compounds 168 | self.workdir_now = os.path.join(self.workdir, "generation_{}_pre".format(self.gen)) 169 | self.mols_smi = os.path.join(self.workdir_now, "mols_for_docking_pred.smi") 170 | self.docking_sh(str(step) + ".2") 171 | 172 | # merge results to the current generation if prediction per generation 173 | if self.dl_mode == 1: 174 | self.lig_sdf = os.path.join(self.workdir, "generation_{}".format(self.gen), 175 | "docking_outputs_with_score.sdf") 176 | merge_cmd = ["cat", os.path.join(self.workdir_now, "docking_outputs_with_score.sdf"), ">>", self.lig_sdf] 177 | shell_cmd_execute(merge_cmd) 178 | self.workdir_now = os.path.join(self.workdir, "generation_{}".format(self.gen)) 179 | 180 | def check_growing(self): 181 | if self.growing_flag == self._GROWING_STATE_LIST[0]: 182 | # still growing 183 | pass 184 | elif self.growing_flag == self._GROWING_STATE_LIST[1]: 185 | # broken and report generated molecules 186 | if self.dl_mode == 2: 187 | self.dl_mode = 0 188 | write_growth(self.config_path, self.gen - 1, self.dl_mode) 189 | raise SystemExit( 190 | "Note: Calculations are only performed from the generation {} to the generation {} out of the preset generations.".format( 191 | self.start_gen, self.gen - 1)) 192 | elif self.growing_flag == self._GROWING_STATE_LIST[2]: 193 | # regular finsh and stop the program 194 | write_growth(self.config_path, self.gen, self.dl_mode) 195 | raise SystemExit( 196 | "Finish the calculation from the generation {} to the generation {}".format(self.start_gen, self.gen)) 197 | 198 | def grow(self): 199 | logger.info(f"Input fragment file: {self.mols_smi}") 200 | logger.info(f"Target grid file: {self.target}") 201 | logger.info(f"Workdir: {self.workdir}") 202 | logger.info(f"Generation {self.gen} ...") 203 | # generation 0 : 1.evaluate; 2.ranking 204 | self.workdir_now = os.path.join(self.workdir, "generation_" + str(self.gen)) 205 | step = 1 206 | self.docking_sh(step) 207 | step += 1 208 | if self.gen > 2 and self.dl_mode == 1: 209 | try: 210 | self.dl_pre(step) 211 | step += 1 212 | except: 213 | pass 214 | self.ranking_docked_mols(step) 215 | 216 | # next generations: 1.copy the best mols from last generation as seed; 2.mutation; 3.filter; 4. sampling; 217 | # 5.clustering; 6.evaluate; 7.ranking 218 | for g in range(1, self.total_generation + 1): 219 | self.gen += 1 220 | logger.info(f"Generation {self.gen} ...") 221 | self.workdir_now = os.path.join(self.workdir, "generation_" + str(self.gen)) 222 | if os.path.exists(self.workdir_now): 223 | shutil.rmtree(self.workdir_now) 224 | os.makedirs(self.workdir_now, exist_ok=True) 225 | self.winner_df.to_csv(os.path.join(self.workdir_now, "seed_fragments.smi"), sep="\t", index=False, 226 | quoting=csv.QUOTE_NONE) 227 | # mutation 228 | logger.info("Step 1: Mutation") 229 | 230 | self._generation_dir = os.path.join(self.workdir_now, "generation_split_by_seed") 231 | self.winner_df = self.winner_df.reset_index(drop=True) 232 | header = mutation_df(self.winner_df, self.workdir, self.cpu_num, self.gen, self.rule_db, self.project_code) 233 | generation_path = os.path.join(self.workdir_now, "generation") 234 | 235 | cmd_cat = ["cat", os.path.join(self.workdir_now, "mutation.csv"), ">", generation_path + ".raw"] 236 | shell_cmd_execute(cmd_cat) 237 | cmd_dedup = ["awk -F',' '!seen[$(NF-4)]++'", generation_path + ".raw", ">", generation_path + ".csv"] 238 | shell_cmd_execute(cmd_dedup) 239 | if not os.path.exists(self._generation_dir): 240 | os.mkdir(self._generation_dir) 241 | cmd_split = ["awk -F, '{print>\"" + self._generation_dir + "/\"$2\".csv\"}'", generation_path + ".csv"] 242 | shell_cmd_execute(cmd_split) 243 | # filter 244 | logger.info("Step 2: Applying filter to all mutated molecules.") 245 | time1 = time.time() 246 | cmd_filter = [os.path.join(os.getenv("SECSE"), "growing", "filter_parallel.sh"), self.workdir_now, 247 | str(self.gen), self.config_path, str(self.cpu_num)] 248 | shell_cmd_execute(cmd_filter) 249 | time2 = time.time() 250 | logger.info("Filter runtime: {:.2f} min.".format((time2 - time1) / 60)) 251 | 252 | # do not sample or clustering if generated molecules less than wanted size 253 | try: 254 | self._filter_df = pd.read_csv(os.path.join(self.workdir_now, "filter.csv"), header=None) 255 | except pd.errors.EmptyDataError: 256 | self.growing_flag = self._GROWING_STATE_LIST[1] 257 | logger.info("No molecules met the filter criteria. Please adjust your configuration.") 258 | self.check_growing() 259 | 260 | self._filter_df.columns = header + ["flag"] 261 | self._filter_df["type"] = self._filter_df["reaction_id_gen_" + str(self.gen)].apply( 262 | lambda x: "-".join(x.split("-")[:2])) 263 | self._filter_df.to_csv(os.path.join(self.workdir_now, "filter.csv"), index=False) 264 | if self._filter_df.shape[0] <= self.num_per_gen: 265 | self._dock_df = self._filter_df 266 | self._dock_df.to_csv(os.path.join(self.workdir_now, "sampled.csv"), index=False) 267 | else: 268 | # sampling 269 | logger.info("Step 3: Sampling") 270 | self._sampled_df = sample_by_rule_weight(self.gen, self._filter_df, self.workdir_now) 271 | # self._sampled_df = sample_by_similarity(self.gen, self._filter_df, self.workdir_now, self.num_per_gen) 272 | logger.info("Step 4: Clustering") 273 | # clustering 274 | num_clusters = int(self.num_per_gen / 5) + 1 275 | self._sampled_df = clustering(self._sampled_df, "smiles_gen_" + str(self.gen), self.gen, self.cpu_num, 276 | num_clusters) 277 | 278 | # sample enough mol 279 | self._dock_df = self._sampled_df.sort_values("cluster_center_dis_gen_" + str(self.gen)).groupby( 280 | "cluster_center_gen_" + str(self.gen)).head(int(self.num_per_gen / num_clusters) + 1) 281 | 282 | # write file for evaluate 283 | self.mols_smi = os.path.join(self.workdir_now, "mols_for_docking.smi") 284 | self._dock_df[["smiles_gen_" + str(self.gen), "id_gen_" + str(self.gen)]].to_csv(self.mols_smi, index=False, 285 | header=False, sep="\t") 286 | 287 | # evaluate 288 | step = 5 289 | self.docking_sh(step) 290 | # run deep learning model, when ( dl_mode is 1) & (not all generated compounds were docked) 291 | if (self.dl_mode == 1) and (self._filter_df.shape[0] > self._dock_df.shape[0]): 292 | step += 1 293 | self.dl_pre(step) 294 | # ranking 295 | step += 1 296 | self.ranking_docked_mols(step) 297 | 298 | if self.dl_mode == 2: 299 | step += 1 300 | self.dl_pre(step) 301 | 302 | self.growing_flag = self._GROWING_STATE_LIST[2] 303 | self.check_growing() 304 | -------------------------------------------------------------------------------- /secse/growing/pains_smarts.json: -------------------------------------------------------------------------------- 1 | { 2 | "ene_six_het_A(483)": "[#6]-1(-[#6](~[!#6&!#1]~[#6]-[!#6&!#1]-[#6]-1=[!#6&!#1])~[!#6&!#1])=[#6;!R]", 3 | "hzone_phenol_A(479)": "c:1:c:c(:c(:c:c:1)-[#6]=[#7]-[#7])-[O;H1]", 4 | "anil_di_alk_A(478)": "[C;H2]N([C;H2])c1cc([$([H]),$([C;H2]),$([O][C;H2][C;H2])])c(N)c([H])c1", 5 | "indol_3yl_alk(461)": "n:1(c(c(c:2:c:1:c:c:c:c:2-[H])-[C;D4]-[H])-[$([C;H2]),$([C]=,:[!C]),$([C;H1][N]),$([C;H1]([C;H2])[N;H1][C;H2]),$([C;H1]([C;H2])[C;H2][N;H1][C;H2])])-[$([H]),$([C;H2])]", 6 | "quinone_A(370)": "[!#6&!#1]=[#6]-1-[#6]=,:[#6]-[#6](=[!#6&!#1])-[#6]=,:[#6]-1", 7 | "azo_A(324)": "[#7;!R]=[#7]", 8 | "imine_one_A(321)": "[#6]-[#6](=[!#6&!#1;!R])-[#6](=[!#6&!#1;!R])-[$([#6]),$([#16](=[#8])=[#8])]", 9 | "mannich_A(296)": "[#7]-[C;X4]-c1ccccc1-[O;H1]", 10 | "anil_di_alk_B(251)": "c:1:c:c(:c:c:c:1-[#7](-[#6;X4])-[#6;X4])-[#6]=[#6]", 11 | "anil_di_alk_C(246)": "c:1:c:c(:c:c:c:1-[#8]-[#6;X4])-[#7](-[#6;X4])-[$([#1]),$([#6;X4])]", 12 | "ene_rhod_A(235)": "[#7]-1-[#6](=[#16])-[#16]-[#6](=[#6])-[#6]-1=[#8]", 13 | "hzone_phenol_B(215)": "c:1(:c:c:c(:c:c:1)-[#6]=[#7]-[#7])-[#8]-[#1]", 14 | "ene_five_hetA1(201A)": "[#6]-1(=[#6])-[#6]=[#7]-[#7,#8,#16]-[#6]-1=[#8]", 15 | "ene_five_het_A(201)": "[#6]-1(=[#6])-[#6]=[#7]-[!#6&!#1]-[#6]-1=[#8]", 16 | "anil_di_alk_D(198)": "c:1:c:c(:c:c:c:1-[#7](-[#6;X4])-[#6;X4])-[#6;X4]-[$([#8]-[#1]),$([#6]=[#6]-[#1]),$([#7]-[#6;X4])]", 17 | "imine_one_isatin(189)": "[#8]=[#6]-2-[#6](=!@[#7]-[#7])-c:1:c:c:c:c:c:1-[#7]-2", 18 | "anil_di_alk_E(186)": "[#6](-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[$([#1]),$([#6](-[#1])-[#1])])-[#6](-[#1])-[$([#1]),$([#6]-[#1])])-[#1])-[#1]", 19 | "thiaz_ene_A(128)": "[#6]-1(=[#6](-[$([#1]),$([#6](-[#1])-[#1]),$([#6]=[#8])])-[#16]-[#6](-[#7]-1-[$([#1]),$([#6]-[#1]),$([#6]:[#6])])=[#7;!R])-[$([#6](-[#1])-[#1]),$([#6]:[#6])]", 20 | "pyrrole_A(118)": "n2(-[#6]:1:[!#1]:[#6]:[#6]:[#6]:[#6]:1)c(cc(c2-[#6;X4])-[#1])-[#6;X4]", 21 | "catechol_A(92)": "c:1:c:c(:c(:c:c:1)-[#8;H1])-[#8;H1]", 22 | "ene_five_het_B(90)": "[#6]-1(=[#6])-[#6](-[#7]=[#6]-[#16]-1)=[#8]", 23 | "imine_one_fives(89)": "[#6]-1=[!#1]-[!#6&!#1]-[#6](-[#6]-1=[!#6&!#1;!R])=[#8]", 24 | "ene_five_het_C(85)": "[#6]-1(-[#6](-[#6]=[#6]-[!#6&!#1]-1)=[#6])=[!#6&!#1]", 25 | "hzone_pipzn(79)": "CN1[C;H2][C;H2]N(N=[C;H1][#6]=,:[#6])[C;H2][C;H2]1", 26 | "keto_keto_beta_A(68)": "c:1-2:c(:c:c:c:c:1)-[#6](=[#8])-[#6;X4]-[#6]-2=[#8]", 27 | "hzone_pyrrol(64)": "Cn1cccc1C=NN", 28 | "ene_one_ene_A(57)": "[#6]=!@[#6](-[!#1])-@[#6](=!@[!#6&!#1])-@[#6](=!@[#6])-[!#1]", 29 | "cyano_ene_amine_A(56)": "N#CC=C(N)C(C#N)C#N", 30 | "ene_five_one_A(55)": "c:1-2:c(:c:c:c:c:1)-[#6](=[#8])-[#6](=[#6])-[#6]-2=[#8]", 31 | "cyano_pyridone_A(54)": "N#Cc1ccc[#7;H1]c1=S", 32 | "anil_alk_ene(51)": "c:1:c:c-2:c(:c:c:1)-[#6]-3-[#6](-[#6]-[#7]-2)-[#6]-[#6]=[#6]-3", 33 | "amino_acridine_A(46)": "c:1:c:2:c(:c:c:c:1):n:c:3:c(:c:2-[#7]):c:c:c:c:3", 34 | "ene_five_het_D(46)": "[#6]-1(=[#6])-[#6](=[#8])-[#7]-[#7]-[#6]-1=[#8]", 35 | "thiophene_amino_Aa(45)": "[H]N([H])c1sc([!#1])c([!#1])c1C=O", 36 | "ene_five_het_E(44)": "[#7]-[#6]=!@[#6]-2-[#6](=[#8])-c:1:c:c:c:c:c:1-[!#6&!#1]-2", 37 | "sulfonamide_A(43)": "NS(=O)(=O)c1cc([F,Cl,Br,I])cc([F,Cl,Br,I])c1O", 38 | "thio_ketone(43)": "[#6]-[#6](=[#16])-[#6]", 39 | "sulfonamide_B(41)": "[H]N(c1ccc([O;H1])cc1)S(=O)=O", 40 | "anil_no_alk(40)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[$([#8]),$([#7]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#7](-[#1])-[#1]", 41 | "thiophene_amino_Ab(40)": "[$([#1]),$([#6](-[#1])-[#1]),$([#6]:[#6])]-c:1:c(:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-[#6])-[#6](=[#8])-[#8])-[$([#6]:1:[#6]:[#6]:[#6]:[#6]:[#6]:1),$([#6]:1:[#16]:[#6]:[#6]:[#6]:1)]", 42 | "het_pyridiniums_A(39)": "[H]c1c([$([N]),$([H])])ccc2ccc[n+]([$([O;X1]),$([C;H3]),$([#6][#6]:[#6]),$([#6][#6][#8]),$([#6][#6](C)=[#8]),$([#6][#6](N)=[#8]),$([#6][#6][#6])])c12", 43 | "anthranil_one_A(38)": "CC(=O)c1ccccc1[#7;H1][!$([#6]=[#8])]", 44 | "cyano_imine_A(37)": "[#7;H1][#7]=[#6](-[#6]#[#7])-[#6]=[!#6&!#1;!R]", 45 | "diazox_sulfon_A(36)": "[#7](-c:1:c:c:c:c:c:1)-[#16](=[#8])(=[#8])-[#6]:2:[#6]:[#6]:[#6]:[#6]:3:[#7]:[$([#8]),$([#16])]:[#7]:[#6]:2:3", 46 | "hzone_anil_di_alk(35)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])=[#7]-[#7]-[$([#6](=[#8])-[#6](-[#1])(-[#1])-[#16]-[#6]:[#7]),$([#6](=[#8])-[#6](-[#1])(-[#1])-[!#1]:[!#1]:[#7]),$([#6](=[#8])-[#6]:[#6]-[#8]-[#1]),$([#6]:[#7]),$([#6](-[#1])(-[#1])-[#6](-[#1])-[#8]-[#1])])-[#1])-[#1]", 47 | "rhod_sat_A(33)": "[#7]-1-[#6](=[#16])-[#16]-[#6;X4]-[#6]-1=[#8]", 48 | "hzone_enamin(30)": "[#7][#7]=[#6][#6](-[$([#1]),$([#6])])=[#6]([#6])-!@[$([#7]),$([#8])]", 49 | "pyrrole_B(29)": "[#6;X4]c1ccc([#6]:[#6])n1c2ccccc2", 50 | "thiophene_hydroxy(28)": "s1ccc(c1)-[#8;H1]", 51 | "cyano_pyridone_B(27)": "[!#6][#6]1=,:[#7][#6]([#6])=,:[#6](C#N)[#6](=O)[#7]1", 52 | "imine_one_sixes(27)": "[#6]-1(-[#6](=[#8])-[#7]-[#6](=[#8])-[#7]-[#6]-1=[#8])=[#7]", 53 | "dyes5A(27)": "[#6]=,:[#6]:[#7]([#6])~[#6]:[#6]=,:[#6][#6]~[#6]:[#7]", 54 | "naphth_amino_A(25)": "c1cc2cccc3[#7][#6]=,:[#7]c(c1)c23", 55 | "naphth_amino_B(25)": "[C;X4]1[N;H1]c3cccc2cccc([N;H1]1)c23", 56 | "ene_one_ester(24)": "[#6]-[#8]-[#6](=[#8])-[#6](-[#7][#6])=[#6]-[#6](-[#6])=[#8]", 57 | "thio_dibenzo(23)": "S=[#6]1[#6]=,:[#6][!#6,!#6][#6]=,:[#6]1", 58 | "cyano_cyano_A(23)": "[#6](-[#6]#[#7])(-[#6]#[#7])-[#6](-[$([#6]#[#7]),$([#6]=[#7])])-[#6]#[#7]", 59 | "hzone_acyl_naphthol(22)": "[H]c2c([H])c([H])c1c([H])c(C(=O)NN=C)c(O)c([H])c1c2[H]", 60 | "het_65_A(21)": "O=Cc1cnn2c([#8;H1])ccnc12", 61 | "imidazole_A(19)": "n:1:c(:n(:c(:c:1-c:2:c:c:c:c:c:2)-c:3:c:c:c:c:c:3)-[#1])-[#6]:[!#1]", 62 | "ene_cyano_A(19)": "[#6](-[#6]#[#7])(-[#6]#[#7])=[#6]-c:1:c:c:c:c:c:1", 63 | "anthranil_acid_A(19)": "C=NNc1ccccc1C(=O)[#8;H1]", 64 | "dyes3A(19)": "[#6]-,:[#6]:[#7+]=,:[#6][#6]=[#6][#7][#6;X4]", 65 | "dhp_bis_amino_CN(19)": "[#6]=,:[#6]C1C(C#N)=C(N)SC(N)=C1C#N", 66 | "het_6_tetrazine(18)": "[#7]~[#6]:1:[#7]:[#7]:[#6](:[$([#7]),$([#6]-[#1]),$([#6]-[#7]-[#1])]:[$([#7]),$([#6]-[#7])]:1)-[$([#7]-[#1]),$([#8]-[#6](-[#1])-[#1])]", 67 | "ene_one_hal(17)": "[#6]-[#6]=[#6](-[F,Cl,Br,I])-[#6](=[#8])-[#6]", 68 | "cyano_imine_B(17)": "N#CC(C#N)=NNc1ccccc1", 69 | "thiaz_ene_B(17)": "[#6]NC(=O)-!@[#6]1=,:[#6]([$([N]),$(NC(=O)[#6]:[#6])])[#7]([$([#6;H2]-[#6;H1]=[#6;H2]),$([#6]=,:[#6])])[#6](=S)[#16]1", 70 | "ene_rhod_B(16)": "[H]C([$([#6]-[#35]),$([#6]:[#6](-[#1]):[#6](-[F,Cl,Br,I]):[#6]:[#6]-[F,Cl,Br,I]),$([#6]:[#6](-[#1]):[#6](-[#1]):[#6]-[#16]-[#6](-[#1])-[#1]),$([#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]-[#8]-[#6;H2]),$([#6]:1:[#6](-[#6;H2]):[#7](-[#6;H2]):[#6](-[#6;H2]):[#6]:1)])=C1SC(=O)[N]C1=O", 71 | "thio_carbonate_A(15)": "[#7,#8]c2ccc1oc(=[#8,#16])sc1c2", 72 | "anil_di_alk_furan_A(15)": "[#7](-[#6](-[#1])-[#1])(-[#6](-[#1])-[#1])-c:1:c(:c(:c(:o:1)-[#6]=[#7]-[#7](-[#1])-[#6]=[!#6&!#1])-[#1])-[#1]", 73 | "ene_five_het_F(15)": "O=[#6]2[#6](=!@[#6]c1ccccc1)Sc3ccccc23", 74 | "anil_di_alk_F(14)": "c:1:c:c(:c:c:c:1-[#6;X4]-c:2:c:c:c(:c:c:2)-[#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])])-[#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])]", 75 | "hzone_anil(14)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#1])-[#1])-[#1])-[#6]=[#7]-[#7]-[#1]", 76 | "het_5_pyrazole_OH(14)": "c1(nn(c(c1-[$([#1]),$([#6]-[#1])])-[#8]-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#1])-[#1])-[#1])-[#6;X4]", 77 | "het_thio_666_A(13)": "c:2(:c:1-[#16]-c:3:c(-[#7](-c:1:c(:c(:c:2-[#1])-[#1])-[#1])-[$([#1]),$([#6](-[#1])(-[#1])-[#1]),$([#6](-[#1])(-[#1])-[#6]-[#1])]):c(:c(~[$([#1]),$([#6]:[#6])]):c(:c:3-[#1])-[$([#1]),$([#7](-[#1])-[#1]),$([#8]-[#6;X4])])~[$([#1]),$([#7](-[#1])-[#6;X4]),$([#6]:[#6])])-[#1]", 78 | "styrene_A(13)": "[#6]-2-[#6]-c:1:c(:c:c:c:c:1)-[#6](-c:3:c:c:c:c:c-2:3)=[#6]-[#6]", 79 | "ene_rhod_C(13)": "[#16]-1-[#6](=[#7]-[#6]:[#6])-[#7](-[$([#1]),$([#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#8]),$([#6]:[#6])])-[#6](=[#8])-[#6]-1=[#6](-[#1])-[$([#6]:[#6]:[#6]-[#17]),$([#6]:[!#6&!#1])]", 80 | "dhp_amino_CN_A(13)": "[#7](-[#1])(-[#1])-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](=[#6](-[#6]=[#6])-[#8]-1)-[#6](-[#1])-[#1]", 81 | "cyano_imine_C(12)": "[#8]=[#16](=[#8])-[#6](-[#6]#[#7])=[#7]-[#7]-[#1]", 82 | "thio_urea_A(12)": "c:1:c:c:c:c:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2", 83 | "thiophene_amino_B(12)": "c:1:c(:c:c:c:c:1)-[#7](-[#1])-c:2:c(:c(:c(:s:2)-[$([#6]=[#8]),$([#6]#[#7]),$([#6](-[#8]-[#1])=[#6])])-[#7])-[$([#6]#[#7]),$([#6](:[#7]):[#7])]", 84 | "keto_keto_beta_B(12)": "[#6;X4]-1-[#6](=[#8])-[#7]-[#7]-[#6]-1=[#8]", 85 | "keto_phenone_A(11)": "c:1:c-3:c(:c:c:c:1)-[#6]:2:[#7]:[!#1]:[#6]:[#6]:[#6]:2-[#6]-3=[#8]", 86 | "cyano_pyridone_C(11)": "[#6]-1(-[#6](=[#6](-[#6]#[#7])-[#6](~[#8])~[#7]~[#6]-1~[#8])-[#6](-[#1])-[#1])=[#6](-[#1])-[#6]:[#6]", 87 | "thiaz_ene_C(11)": "[#6]-1(=[#6](-!@[#6]=[#7])-[#16]-[#6](-[#7]-1)=[#8])-[$([F,Cl,Br,I]),$([#7+](:[#6]):[#6])]", 88 | "hzone_thiophene_A(11)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1]):[!#6&!#1]:[#6](:[#6]:2-[#6](-[#1])=[#7]-[#7](-[#1])-[$([#6]:1:[#7]:[#6]:[#6](-[#1]):[#16]:1),$([#6]:[#6](-[#1]):[#6]-[#1]),$([#6]:[#7]:[#6]:[#7]:[#6]:[#7]),$([#6]:[#7]:[#7]:[#7]:[#7])])-[$([#1]),$([#8]-[#1]),$([#6](-[#1])-[#1])]", 89 | "ene_quin_methide(10)": "[!#1]:[!#1]-[#6](-[$([#1]),$([#6]#[#7])])=[#6]-1-[#6]=:[#6]-[#6](=[$([#8]),$([#7;!R])])-[#6]=:[#6]-1", 90 | "het_thio_676_A(10)": "c:1:c:c-2:c(:c:c:1)-[#6]-[#6](-c:3:c(-[#16]-2):c(:c(-[#1]):c(:c:3-[#1])-[$([#1]),$([#8]),$([#16;X2]),$([#6;X4]),$([#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])])])-[#1])-[#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])]", 91 | "ene_five_het_G(10)": "[#6]-1(=[#6])-[#6](-[#7,#16,#8][#6](-[!#1])=[#7]-1)=[#8]", 92 | "acyl_het_A(9)": "[#7+](:[!#1]:[!#1]:[!#1])-[!#1]=[#8]", 93 | "anil_di_alk_G(9)": "[#6;X4]-[#7](-[#6;X4])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6]2=:[#7][#6]:[#6]:[!#1]2)-[#1])-[#1]", 94 | "dhp_keto_A(9)": "[#7]-1(-[$([#6;X4]),$([#1])])-[#6]=:[#6](-[#6](=[#8])-[#6]:[#6]:[#6])-[#6](-[#6])-[#6](=[#6]-1-[#6](-[#1])(-[#1])-[#1])-[$([#6]=[#8]),$([#6]#[#7])]", 95 | "thio_urea_B(9)": "c:1:c:c:c:c:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2", 96 | "anil_alk_bim(9)": "c:1:3:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c:c:c:2)-[#1]):n:c(-[#1]):n:3-[#6]", 97 | "imine_imine_A(9)": "c:1:c:c-2:c(:c:c:1)-[#7]=[#6]-[#6]-2=[#7;!R]", 98 | "thio_urea_C(9)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-[#6](=[#8])-[#6]-2:[!#1]:[!#6&!#1]:[#6]:[#6]-2", 99 | "imine_one_fives_B(9)": "[#7;!R]=[#6]-2-[#6](=[#8])-c:1:c:c:c:c:c:1-[#16]-2", 100 | "dhp_amino_CN_B(9)": "[$([#7](-[#1])-[#1]),$([#8]-[#1])]-[#6]-2=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-c:1:c(:n(-[#6]):n:c:1)-[#8]-2", 101 | "anil_OC_no_alk_A(8)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:n:c:1-[#1])-[#8]-c:2:c:c:c:c:c:2)-[#1])-[#1]", 102 | "het_thio_66_one(8)": "[#6](=[#8])-[#6]-1=[#6]-[#7]-c:2:c(-[#16]-1):c:c:c:c:2", 103 | "styrene_B(8)": "c:1:c:c-2:c(:c:c:1)-[#6](-c:3:c(-[$([#16;X2]),$([#6;X4])]-2):c:c:c(:c:3)-[$([#1]),$([#17]),$([#6;X4])])=[#6]-[#6]", 104 | "het_thio_5_A(8)": "[#6](-[#1])(-[#1])-[#16;X2]-c:1:n:c(:c(:n:1-!@[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2)-[#1]", 105 | "anil_di_alk_ene_A(8)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6]-2=[#6](-[#1])-c:1:c(:c:c:c:c:1)-[#16;X2]-c:3:c-2:c:c:c:c:3", 106 | "ene_rhod_D(8)": "[#16]-1-[#6](=!@[#7]-[$([#1]),$([#7](-[#1])-[#6]:[#6])])-[#7](-[$([#1]),$([#6]:[#7]:[#6]:[#6]:[#16])])-[#6](=[#8])-[#6]-1=[#6](-[#1])-[#6]:[#6]-[$([#17]),$([#8]-[#6]-[#1])]", 107 | "ene_rhod_E(8)": "[#16]-1-[#6](=[#8])-[#7]-[#6](=[#16])-[#6]-1=[#6](-[#1])-[#6]:[#6]", 108 | "anil_OH_alk_A(8)": "c:1:c(:c:c:c:c:1)-[#6](-[#1])(-[#1])-[#7](-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#1])-[#1])-[#1]", 109 | "pyrrole_C(8)": "n1(-[#6;X4])c(c(-[#1])c(c1-[#6]:[#6])-[#1])-[#6](-[#1])-[#1]", 110 | "thio_urea_D(8)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-c:2:c:c:c:c:c:2", 111 | "thiaz_ene_D(8)": "[#7](-c:1:c:c:c:c:c:1)-c2[n+]c(cs2)-c:3:c:c:c:c:c:3", 112 | "ene_rhod_F(8)": "n:1:c:c:c(:c:1-[#6](-[#1])-[#1])-[#6](-[#1])=[#6]-2-[#6](=[#8])-[#7]-[#6](=[!#6&!#1])-[#7]-2", 113 | "thiaz_ene_E(8)": "[#6]-1(=[#6](-[#6](-[#1])(-[#6])-[#6])-[#16]-[#6](-[#7]-1-[$([#1]),$([#6](-[#1])-[#1])])=[#8])-[#16]-[#6;R]", 114 | "het_65_B(7)": "[!#1]:1:[!#1]-2:[!#1](:[!#1]:[!#1]:[!#1]:1)-[#7](-[#1])-[#7](-[#6]-2=[#8])-[#6]", 115 | "keto_keto_beta_C(7)": "c:1:c:c-2:c(:c:c:1)-[#6](=[#6](-[#6]-2=[#8])-[#6])-[#8]-[#1]", 116 | "het_66_A(7)": "c:2:c:c:1:n:n:c(:n:c:1:c:c:2)-[#6](-[#1])(-[#1])-[#6]=[#8]", 117 | "thio_urea_E(7)": "c:1:c:c:c:c:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:n:c:c:c:c:2", 118 | "thiophene_amino_C(7)": "[#6](-[#1])-[#6](-[#1])(-[#1])-c:1:c(:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-[#6]-[#6]-[#6]=[#8])-[$([#6](=[#8])-[#8]),$([#6]#[#7])])-[#6](-[#1])-[#1]", 119 | "hzone_phenone(7)": "[#6](-c:1:c(:c(:c(:c:c:1-[#1])-[$([#6;X4]),$([#1])])-[#1])-[#1])(-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[$([#1]),$([#17])])-[#1])-[#1])=[$([#7]-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]),$([#7]-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]),$([#7]-[#7](-[#1])-[#6](=[#7]-[#1])-[#7](-[#1])-[#1]),$([#6](-[#1])-[#7])]", 120 | "ene_rhod_G(7)": "[#8](-[#1])-[#6](=[#8])-c:1:c:c(:c:c:c:1)-[#6]:[!#1]:[#6]-[#6](-[#1])=[#6]-2-[#6](=[!#6&!#1])-[#7]-[#6](=[!#6&!#1])-[!#6&!#1]-2", 121 | "ene_cyano_B(7)": "[#6]-1(=[#6]-[#6](-c:2:c:c(:c(:n:c-1:2)-[#7](-[#1])-[#1])-[#6]#[#7])=[#6])-[#6]#[#7]", 122 | "dhp_amino_CN_C(7)": "[#7](-[#1])(-[#1])-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](=[#6](-[#6]:[#6])-[#8]-1)-[#6]#[#7]", 123 | "het_5_A(7)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#6]=[#8])-[#6;X4]-[#6]-2=[#8]", 124 | "ene_five_het_H(6)": "[#7]-1=[#6]-[#6](-[#6](-[#7]-1)=[#16])=[#6]", 125 | "thio_amide_A(6)": "c1(coc(c1-[#1])-[#6](=[#16])-[#7]-2-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[!#1]-[#6](-[#1])(-[#1])-[#6]-2(-[#1])-[#1])-[#1]", 126 | "ene_cyano_C(6)": "[#6]=[#6](-[#6]#[#7])-[#6](=[#7]-[#1])-[#7]-[#7]", 127 | "hzone_furan_A(6)": "c:1(:c(:c(:c(:o:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](-[$([#1]),$([#6](-[#1])-[#1])])=[#7]-[#7](-[#1])-c:2:n:c:c:s:2", 128 | "anil_di_alk_H(6)": "c:1(:c(:c(:c(:c(:c:1-[#7](-[#1])-[#16](=[#8])(=[#8])-[#6]:2:[#6]:[!#1]:[#6]:[#6]:[#6]:2)-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]", 129 | "het_65_C(6)": "n2c1ccccn1c(c2-[$([#6](-[!#1])=[#6](-[#1])-[#6]:[#6]),$([#6]:[#8]:[#6])])-[#7]-[#6]:[#6]", 130 | "thio_urea_F(6)": "[#6]-1-[#7](-[#1])-[#7](-[#1])-[#6](=[#16])-[#7]-[#7]-1-[#1]", 131 | "ene_five_het_I(6)": "c:1(:c:c:c:o:1)-[#6](-[#1])=!@[#6]-3-[#6](=[#8])-c:2:c:c:c:c:c:2-[!#6&!#1]-3", 132 | "keto_keto_gamma(5)": "[#8]=[#6]-1-[#6;X4]-[#6]-[#6](=[#8])-c:2:c:c:c:c:c-1:2", 133 | "quinone_B(5)": "c:1:c:c-2:c(:c:c:1)-[#6](-c3cccc4noc-2c34)=[#8]", 134 | "het_6_pyridone_OH(5)": "[#8](-[#1])-c:1:n:c(:c:c:c:1)-[#8]-[#1]", 135 | "hzone_naphth_A(5)": "c:1:2:c(:c(:c(:c(:c:1:c(:c(:c(:c:2-[#1])-[#1])-[#6]=[#7]-[#7](-[#1])-[$([#6]:[#6]),$([#6]=[#16])])-[#1])-[#1])-[#1])-[#1])-[#1]", 136 | "thio_ester_A(5)": "[#6]-1=[#6](-[#16]-[#6](-[#6]=[#6]-1)=[#16])-[#7]", 137 | "ene_misc_A(5)": "[#6]-1=[#6]-[#6](-[#8]-[#6]-1-[#8])(-[#8])-[#6]", 138 | "cyano_pyridone_D(5)": "[#8]=[#6]-1-[#6](=[#6]-[#6](=[#7]-[#7]-1)-[#6]=[#8])-[#6]#[#7]", 139 | "het_65_Db(5)": "C3=CN1C(=NC(=C1-[#7]-[#6])-c:2:c:c:c:c:n:2)C=C3", 140 | "het_666_A(5)": "[#7]N-2-c:1:c:c:c:c:c:1-[#6](=[#7])-c:3:c-2:c:c:c:c:3", 141 | "diazox_sulfon_B(5)": "c:1:c(:c:c:c:c:1)-[#7]-2-[#6](-[#1])-[#6](-[#1])-[#7](-[#6](-[#1])-[#6]-2-[#1])-[#16](=[#8])(=[#8])-c:3:c:c:c:c:4:n:s:n:c:3:4", 142 | "anil_NH_alk_A(5)": "c:1(:c(:c-2:c(:c(:c:1-[#1])-[#1])-[#7](-[#6](-[#7]-2-[#1])=[#8])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])-[#1]", 143 | "sulfonamide_C(5)": "c:1(:c(:c-3:c(:c(:c:1-[#7](-[#1])-[#16](=[#8])(=[#8])-c:2:c:c:c(:c:c:2)-[!#6&!#1])-[#1])-[#8]-[#6](-[#8]-3)(-[#1])-[#1])-[#1])-[#1]", 144 | "het_thio_N_55(5)": "[#6](-[#1])-[#6]:2:[#7]:[#7](-c:1:c:c:c:c:c:1):[#16]:3:[!#6&!#1]:[!#1]:[#6]:[#6]:2:3", 145 | "keto_keto_beta_D(5)": "[#8]=[#6]-[#6]=[#6](-[#1])-[#8]-[#1]", 146 | "ene_rhod_H(5)": "[#7]-1-2-[#6](=[#7]-[#6](=[#8])-[#6](=[#7]-1)-[#6](-[#1])-[#1])-[#16]-[#6](=[#6](-[#1])-[#6]:[#6])-[#6]-2=[#8]", 147 | "imine_ene_A(5)": "[#6]:[#6]-[#6](-[#1])=[#6](-[#1])-[#6](-[#1])=[#7]-[#7](-[#6;X4])-[#6;X4]", 148 | "het_thio_656a(5)": "c:1:3:c(:c:c:c:c:1):c:2:n:n:c(-[#16]-[#6](-[#1])(-[#1])-[#6]=[#8]):n:c:2:n:3-[#6](-[#1])(-[#1])-[#6](-[#1])=[#6](-[#1])-[#1]", 149 | "pyrrole_D(5)": "n1(-[#6])c(c(-[#1])c(c1-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6](=[#16])-[#7]-[#1])-[#1])-[#1]", 150 | "pyrrole_E(5)": "n2(-[#6]:1:[!#1]:[!#6&!#1]:[!#1]:[#6]:1-[#1])c(c(-[#1])c(c2-[#6;X4])-[#1])-[#6;X4]", 151 | "thio_urea_G(5)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-[#6]([#7;R])[#7;R]", 152 | "anisol_A(5)": "c:1(:c(:c(:c(:c(:c:1-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#8]-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[$([#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]),$([#6](-[#1])(-[#6](-[#1])-[#1])-[#7](-[#1])-[#6](=[#16])-[#7]-[#1])])-[#1])-[#8]-[#6](-[#1])-[#1]", 153 | "pyrrole_F(5)": "n2(-[#6]:1:[#6](-[#6]#[#7]):[#6]:[#6]:[!#6&!#1]:1)c(c(-[#1])c(c2)-[#1])-[#1]", 154 | "dhp_amino_CN_D(5)": "[#7](-[#1])(-[#1])-[#6]-2=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-c:1:c(:c:c:s:1)-[#8]-2", 155 | "thiazole_amine_A(4)": "[#7](-[#1])-c:1:n:c(:c:s:1)-c:2:c:n:c(-[#7](-[#1])-[#1]):s:2", 156 | "het_6_imidate_A(4)": "[#7]=[#6]-1-[#7](-[#1])-[#6](=[#6](-[#7]-[#1])-[#7]=[#7]-1)-[#7]-[#1]", 157 | "anil_OC_no_alk_B(4)": "c:1:c(:c:2:c(:c:c:1):c:c:c:c:2)-[#8]-c:3:c(:c(:c(:c(:c:3-[#1])-[#1])-[#7]-[#1])-[#1])-[#1]", 158 | "styrene_C(4)": "c:1:c:c-2:c(:c:c:1)-[#6]-[#16]-c3c(-[#6]-2=[#6])ccs3", 159 | "azulene(4)": "c:2:c:c:c:1:c(:c:c:c:1):c:c:2", 160 | "furan_acid_A(4)": "c:1(:c(:c(:c(:o:1)-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#8]-[#6]:[#6])-[#1])-[#6](=[#8])-[#8]-[#1]", 161 | "cyano_pyridone_E(4)": "[!#1]:[#6]-[#6]-1=[#6](-[#1])-[#6](=[#6](-[#6]#[#7])-[#6](=[#8])-[#7]-1-[#1])-[#6]:[#8]", 162 | "anil_alk_thio(4)": "[#6]-1-3=[#6](-[#6](-[#7]-c:2:c:c:c:c:c-1:2)(-[#6])-[#6])-[#16]-[#16]-[#6]-3=[!#1]", 163 | "anil_di_alk_I(4)": "c:1(:c(:c(:c(:c(:c:1-[#7](-[#1])-[#6](=[#8])-c:2:c:c:c:c:c:2)-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]", 164 | "het_thio_6_furan(4)": "[#6](-[#1])(-[#1])-[#16;X2]-c:1:n:n:c(:c(:n:1)-c:2:c(:c(:c(:o:2)-[#1])-[#1])-[#1])-c:3:c(:c(:c(:o:3)-[#1])-[#1])-[#1]", 165 | "anil_di_alk_ene_B(4)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6]-2=[#6]-c:1:c(:c:c:c:c:1)-[#6]-2(-[#1])-[#1]", 166 | "imine_one_B(4)": "[#7](-[#1])(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#6](=[#8])-[#6](-[#1])-[#1])-[#7](-[#1])-[$([#7]-[#1]),$([#6]:[#6])]", 167 | "anil_OC_alk_A(4)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1]):o:c:3:c(-[#1]):c(:c(-[#8]-[#6](-[#1])-[#1]):c(:c:2:3)-[#1])-[#7](-[#1])-[#6](-[#1])-[#1]", 168 | "ene_five_het_J(4)": "[#16]=[#6]-1-[#7](-[#1])-[#6]=[#6]-[#6]-2=[#6]-1-[#6](=[#8])-[#8]-[#6]-2=[#6]-[#1]", 169 | "pyrrole_G(4)": "n2(-c:1:c(:c:c(:c(:c:1)-[#1])-[$([#7](-[#1])-[#1]),$([#6]:[#7])])-[#1])c(c(-[#1])c(c2-[#1])-[#1])-[#1]", 170 | "ene_five_het_K(4)": "n1(-[#6])c(c(-[#1])c(c1-[#6](-[#1])=[#6]-2-[#6](=[#8])-[!#6&!#1]-[#6]=:[!#1]-2)-[#1])-[#1]", 171 | "cyano_ene_amine_B(4)": "[#6]=[#6]-[#6](-[#6]#[#7])(-[#6]#[#7])-[#6](-[#6]#[#7])=[#6]-[#7](-[#1])-[#1]", 172 | "thio_ester_B(4)": "[#6]:[#6]-[#6](=[#16;X1])-[#16;X2]-[#6](-[#1])-[$([#6](-[#1])-[#1]),$([#6]:[#6])]", 173 | "ene_five_het_L(4)": "[#8]=[#6]-3-[#6](=!@[#6](-[#1])-c:1:c:n:c:c:1)-c:2:c:c:c:c:c:2-[#7]-3", 174 | "hzone_thiophene_B(4)": "c:1(:c(:c(:c(:s:1)-[#1])-[#1])-[$([#1]),$([#6](-[#1])-[#1])])-[#6](-[#1])=[#7]-[#7](-[#1])-c:2:c:c:c:c:c:2", 175 | "dhp_amino_CN_E(4)": "[#6](-[#1])(-[#1])-[#16;X2]-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](-[#6]#[#7])-[#6](=[#8])-[#7]-1", 176 | "het_5_B(4)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#7](-[#1])-[#6]=[#8])-[#6](-[#1])(-[#1])-[#6]-2=[#8]", 177 | "imine_imine_B(3)": "[#6]:[#6]-[#6](-[#1])=[#6](-[#1])-[#6](-[#1])=[#7]-[#7]=[#6]", 178 | "thiazole_amine_B(3)": "c:1(:c:c:c(:c:c:1)-[#6](-[#1])-[#1])-c:2:c(:s:c(:n:2)-[#7](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#1]", 179 | "imine_ene_one_A(3)": "[#6]-2(-[#6]=[#7]-c:1:c:c:c:c:c:1-[#7]-2)=[#6](-[#1])-[#6]=[#8]", 180 | "diazox_A(3)": "[#8](-c:1:c:c:c:c:c:1)-c:3:c:c:2:n:o:n:c:2:c:c:3", 181 | "ene_one_A(3)": "[!#1]:1:[!#1]:[!#1]:[!#1](:[!#1]:[!#1]:1)-[#6](-[#1])=[#6](-[#1])-[#6](-[#7]-c:2:c:c:c:3:c(:c:2):c:c:c(:n:3)-[#7](-[#6])-[#6])=[#8]", 182 | "anil_OC_no_alk_C(3)": "[#7](-[#1])(-[#1])-c:1:c(:c:c:c:n:1)-[#8]-[#6](-[#1])(-[#1])-[#6]:[#6]", 183 | "thiazol_SC_A(3)": "[#6]-[#16;X2]-c:1:n:c(:c:s:1)-[#1]", 184 | "het_666_B(3)": "c:1:c-3:c(:c:c:c:1)-[#7](-c:2:c:c:c:c:c:2-[#8]-3)-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]", 185 | "furan_A(3)": "c:1(:c(:c(:c(:o:1)-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#8]-[#1])-[#6]#[#6]-[#6;X4]", 186 | "colchicine_A(3)": "[#6]-1(-[#6](=[#6]-[#6]=[#6]-[#6]=[#6]-1)-[#7]-[#1])=[#7]-[#6]", 187 | "thiophene_C(3)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])=[#6]-[#6](=[#8])-c:1:c(-[#16;X2]):s:c(:c:1)-[$([#6]#[#7]),$([#6]=[#8])]", 188 | "anil_OC_alk_B(3)": "c:1:3:c(:c:c:c:c:1)-[#7]-2-[#6](=[#8])-[#6](=[#6](-[F,Cl,Br,I])-[#6]-2=[#8])-[#7](-[#1])-[#6]:[#6]:[#6]:[#6](-[#8]-[#6](-[#1])-[#1]):[#6]:[#6]:3", 189 | "het_thio_66_A(3)": "c:1-2:c(:c:c:c:c:1)-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]=[#6]-2-[#16;X2]-[#6](-[#1])(-[#1])-[#6](=[#8])-c:3:c:c:c:c:c:3", 190 | "rhod_sat_B(3)": "[#7]-2(-c:1:c:c:c:c:c:1-[#6](-[#1])-[#1])-[#6](=[#16])-[#7](-[#6](-[#1])(-[#1])-[!#1]:[!#1]:[!#1]:[!#1]:[!#1])-[#6](-[#1])(-[#1])-[#6]-2=[#8]", 191 | "ene_rhod_I(3)": "[#7]-2(-[#6](-[#1])-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](=[#6](-[#1])-c:1:c:c:c:c(:c:1)-[Br])-[#6]-2=[#8]", 192 | "keto_thiophene(3)": "c:1(:c(:c:2:c(:s:1):c:c:c:c:2)-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]", 193 | "imine_imine_C(3)": "[#7](-[#6](-[#1])-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])=[#7]-[#6](-[#6](-[#1])-[#1])=[#7]-[#7](-[#6](-[#1])-[#1])-[#6]:[#6]", 194 | "het_65_pyridone_A(3)": "[#6]:2(:[#6](-[#6](-[#1])-[#1]):[#6]-1:[#6](-[#7]=[#6](-[#7](-[#6]-1=[!#6&!#1;X1])-[#6](-[#1])-[$([#6](=[#8])-[#8]),$([#6]:[#6])])-[$([#1]),$([#16]-[#6](-[#1])-[#1])]):[!#6&!#1;X2]:2)-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]", 195 | "thiazole_amine_C(3)": "c:1(:n:c(:c(-[#1]):s:1)-[!#1]:[!#1]:[!#1](-[$([#8]-[#6](-[#1])-[#1]),$([#6](-[#1])-[#1])]):[!#1]:[!#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c(-[#1]):c(:c(-[#1]):o:2)-[#1]", 196 | "het_thio_pyr_A(3)": "n:1:c(:c(:c(:c(:c:1-[#16]-[#6]-[#1])-[#6]#[#7])-c:2:c:c:c(:c:c:2)-[#8]-[#6](-[#1])-[#1])-[#1])-[#6]:[#6]", 197 | "melamine_A(3)": "c:1:4:c(:n:c(:n:c:1-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c(:c(:c(:o:2)-[#1])-[#1])-[#1])-[#7](-[#1])-c:3:c:c(:c(:c:c:3-[$([#1]),$([#6](-[#1])-[#1]),$([#16;X2]),$([#8]-[#6]-[#1]),$([#7;X3])])-[$([#1]),$([#6](-[#1])-[#1]),$([#16;X2]),$([#8]-[#6]-[#1]),$([#7;X3])])-[$([#1]),$([#6](-[#1])-[#1]),$([#16;X2]),$([#8]-[#6]-[#1]),$([#7;X3])]):c:c:c:c:4", 198 | "anil_NH_alk_B(3)": "[#7](-[#1])(-[#6]:1:[#6]:[#6]:[!#1]:[#6]:[#6]:1)-c:2:c:c:c(:c:c:2)-[#7](-[#1])-[#6]-[#1]", 199 | "rhod_sat_C(3)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#7]-[#6]=[#8])-[#16]-[#6](-[#1])(-[#1])-[#6]-2=[#8]", 200 | "thiophene_amino_D(3)": "[#6]=[#6]-[#6](=[#8])-[#7]-c:1:c(:c(:c(:s:1)-[#6](=[#8])-[#8])-[#6]-[#1])-[#6]#[#7]", 201 | "anil_OC_alk_C(3)": "[$([#1]),$([#6](-[#1])-[#1])]-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:n:c:c:n:2", 202 | "het_thio_65_A(3)": "[#6](-[#1])(-[#1])-[#16;X2]-c3nc1c(n(nc1-[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2)nn3", 203 | "het_thio_656b(3)": "[#6]-[#6](=[#8])-[#6](-[#1])(-[#1])-[#16;X2]-c:3:n:n:c:2:c:1:c(:c(:c(:c(:c:1:n(:c:2:n:3)-[#1])-[#1])-[#1])-[#1])-[#1]", 204 | "thiazole_amine_D(3)": "s:1:c(:[n+](-[#6](-[#1])-[#1]):c(:c:1-[#1])-[#6])-[#7](-[#1])-c:2:c:c:c:c:c:2[$([#6](-[#1])-[#1]),$([#6]:[#6])]", 205 | "thio_urea_H(3)": "[#6]-2(=[#16])-[#7](-[#6](-[#1])(-[#1])-c:1:c:c:c:o:1)-[#6](=[#7]-[#7]-2-[#1])-[#6]:[#6]", 206 | "cyano_pyridone_F(3)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#6](=[#6]-[#6](=[#7]-2)-[#6]#[#7])-[#6]#[#7]", 207 | "rhod_sat_D(3)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#16]-[#6](-[#1])(-[#6](-[#1])(-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6])-[#6]-2=[#8]", 208 | "ene_rhod_J(3)": "[#6](-[#1])(-[#1])-[#7]-2-[#6](=[$([#16]),$([#7])])-[!#6&!#1]-[#6](=[#6]-1-[#6](=[#6](-[#1])-[#6]:[#6]-[#7]-1-[#6](-[#1])-[#1])-[#1])-[#6]-2=[#8]", 209 | "imine_phenol_A(3)": "[#6]=[#7;!R]-c:1:c:c:c:c:c:1-[#8]-[#1]", 210 | "thio_carbonate_B(3)": "[#8]=[#6]-2-[#16]-c:1:c(:c(:c:c:c:1)-[#8]-[#6](-[#1])-[#1])-[#8]-2", 211 | "het_thio_N_5A(3)": "[#7]=[#6]-1-[#7]=[#6]-[#7]-[#16]-1", 212 | "het_thio_N_65A(3)": "[#7]-2-[#16]-[#6]-1=[#6](-[#6]:[#6]-[#7]-[#6]-1)-[#6]-2=[#16]", 213 | "anil_di_alk_J(3)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])=[#7]-[#7]=[#6](-[#6])-[#6]:[#6])-[#1])-[#1]", 214 | "pyrrole_H(3)": "n1-2cccc1-[#6]=[#7](-[#6])-[#6]-[#6]-2", 215 | "ene_cyano_D(3)": "[#6](-[#6]#[#7])(-[#6]#[#7])=[#6](-[#16])-[#16]", 216 | "cyano_cyano_B(3)": "[#6]-1(-[#6]#[#7])(-[#6]#[#7])-[#6](-[#1])(-[#6](=[#8])-[#6])-[#6]-1-[#1]", 217 | "ene_five_het_M(3)": "[#6]-1=:[#6]-[#6](-[#6](-[$([#8]),$([#16])]-1)=[#6]-[#6]=[#8])=[#8]", 218 | "cyano_ene_amine_C(3)": "[#6]:[#6]-[#6](=[#8])-[#7](-[#1])-[#6](=[#8])-[#6](-[#6]#[#7])=[#6](-[#1])-[#7](-[#1])-[#6]:[#6]", 219 | "thio_urea_I(3)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#7]=[#6]-c:2:c:n:c:c:2", 220 | "dhp_amino_CN_F(3)": "[#7](-[#1])(-[#1])-[#6]-2=[#6](-[#6]#[#7])-[#6](-[#1])(-c:1:c:c:c:s:1)-[#6](=[#6](-[#6](-[#1])-[#1])-[#8]-2)-[#6](=[#8])-[#8]-[#6]", 221 | "anthranil_acid_B(3)": "c:1:c-3:c(:c:c(:c:1)-[#6](=[#8])-[#7](-[#1])-c:2:c(:c:c:c:c:2)-[#6](=[#8])-[#8]-[#1])-[#6](-[#7](-[#6]-3=[#8])-[#6](-[#1])-[#1])=[#8]", 222 | "diazox_B(3)": "[Cl]-c:2:c:c:1:n:o:n:c:1:c:c:2", 223 | "thio_aldehyd_A(3)": "[#6]-[#6](=[#16])-[#1]", 224 | "thio_amide_B(2)": "[#6;X4]-[#7](-[#1])-[#6](-[#6]:[#6])=[#6](-[#1])-[#6](=[#16])-[#7](-[#1])-c:1:c:c:c:c:c:1", 225 | "imidazole_B(2)": "[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#16]-[#6](-[#1])(-[#1])-c1cn(cn1)-[#1]", 226 | "thiazole_amine_E(2)": "[#8]=[#6]-[#7](-[#1])-c:1:c(-[#6]:[#6]):n:c(-[#6](-[#1])(-[#1])-[#6]#[#7]):s:1", 227 | "thiazole_amine_F(2)": "[#6](-[#1])-[#7](-[#1])-c:1:n:c(:c:s:1)-c2cnc3n2ccs3", 228 | "thio_ester_C(2)": "[#7]-1-[#6](=[#8])-[#6](=[#6](-[#6])-[#16]-[#6]-1=[#16])-[#1]", 229 | "ene_one_B(2)": "[#6](-[#16])(-[#7])=[#6](-[#1])-[#6]=[#6](-[#1])-[#6]=[#8]", 230 | "quinone_C(2)": "[#8]=[#6]-3-c:1:c(:c:c:c:c:1)-[#6]-2=[#6](-[#8]-[#1])-[#6](=[#8])-[#7]-c:4:c-2:c-3:c:c:c:4", 231 | "keto_naphthol_A(2)": "c:1:2:c:c:c:c(:c:1:c(:c:c:c:2)-[$([#8]-[#1]),$([#7](-[#1])-[#1])])-[#6](-[#6])=[#8]", 232 | "thio_amide_C(2)": "[#6](-[#1])(-c:1:c:c:c:c:c:1)(-c:2:c:c:c:c:c:2)-[#6](=[#16])-[#7]-[#1]", 233 | "phthalimide_misc(2)": "[#7]-2(-[#6](=[#8])-c:1:c(:c(:c(:c(:c:1-[#1])-[#6](=[#8])-[#8]-[#1])-[#1])-[#1])-[#6]-2=[#8])-c:3:c(:c:c(:c(:c:3)-[#1])-[#8])-[#1]", 234 | "sulfonamide_D(2)": "c:1:c:c(:c:c:c:1-[#7](-[#1])-[#16](=[#8])=[#8])-[#7](-[#1])-[#16](=[#8])=[#8]", 235 | "anil_NH_alk_C(2)": "[#6](-[#1])-[#7](-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6]-[#1]", 236 | "het_65_E(2)": "s1c(c(c-2c1-[#7](-[#1])-[#6](-[#6](=[#6]-2-[#1])-[#6](=[#8])-[#8]-[#1])=[#8])-[#7](-[#1])-[#1])-[#6](=[#8])-[#7]-[#1]", 237 | "hzide_naphth(2)": "c:2(:c:1:c(:c(:c(:c(:c:1:c(:c(:c:2-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#7](-[#1])-[#6]=[#8])-[#1])-[#1])-[#1]", 238 | "anisol_B(2)": "[#6](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6;X4])-[#1]", 239 | "thio_carbam_ene(2)": "[#6]-1=[#6]-[#7]-[#6](-[#16]-[#6;X4]-1)=[#16]", 240 | "thio_amide_D(2)": "[#6](-[#7](-[#6]-[#1])-[#6]-[#1]):[#6]-[#7](-[#1])-[#6](=[#16])-[#6]-[#1]", 241 | "het_65_Da(2)": "n2nc(c1cccc1c2-[#6])-[#6]", 242 | "thiophene_D(2)": "s:1:c(:c(-[#1]):c(:c:1-[#6](=[#8])-[#7](-[#1])-[#7]-[#1])-[#8]-[#6](-[#1])-[#1])-[#1]", 243 | "het_thio_6_ene(2)": "[#6]-1:[#6]-[#7]=[#6]-[#6](=[#6]-[#7]-[#6])-[#16]-1", 244 | "cyano_keto_A(2)": "[#6](-[#1])(-[#1])-[#6](-[#1])(-[#6]#[#7])-[#6](=[#8])-[#6]", 245 | "anthranil_acid_C(2)": "c2(c(-[#7](-[#1])-[#1])n(-c:1:c:c:c:c:c:1-[#6](=[#8])-[#8]-[#1])nc2-[#6]=[#8])-[$([#6]#[#7]),$([#6]=[#16])]", 246 | "naphth_amino_C(2)": "c:2:c:1:c:c:c:c-3:c:1:c(:c:c:2)-[#7](-[#7]=[#6]-3)-[#1]", 247 | "naphth_amino_D(2)": "c:2:c:1:c:c:c:c-3:c:1:c(:c:c:2)-[#7]-[#7]=[#7]-3", 248 | "thiazole_amine_G(2)": "c1csc(n1)-[#7]-[#7]-[#16](=[#8])=[#8]", 249 | "het_66_B(2)": "c:1:c:c:c:2:c(:c:1):n:c(:n:c:2)-[#7](-[#1])-[#6]-3=[#7]-[#6](-[#6]=[#6]-[#7]-3-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]", 250 | "coumarin_A(2)": "c:1-3:c(:c(:c(:c(:c:1)-[#8]-[#6]-[#1])-[#1])-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#8])-[#8]-3", 251 | "anthranil_acid_D(2)": "c:12:c(:c:c:c:n:1)c(c(-[#6](=[#8])~[#8;X1])s2)-[#7](-[#1])-[#1]", 252 | "het_66_C(2)": "c:1:2:n:c(:c(:n:c:1:[#6]:[#6]:[#6]:[!#1]:2)-[#6](-[#1])=[#6](-[#8]-[#1])-[#6])-[#6](-[#1])=[#6](-[#8]-[#1])-[#6]", 253 | "thiophene_amino_E(2)": "c1csc(c1-[#7](-[#1])-[#1])-[#6](-[#1])=[#6](-[#1])-c2cccs2", 254 | "het_6666_A(2)": "c:2:c:c:1:n:c:3:c(:n:c:1:c:c:2):c:c:c:4:c:3:c:c:c:c:4", 255 | "sulfonamide_E(2)": "[#6]:[#6]-[#7](-[#1])-[#16](=[#8])(=[#8])-[#7](-[#1])-[#6]:[#6]", 256 | "anil_di_alk_K(2)": "c:1:c:c(:c:c:c:1-[#7](-[#1])-[#1])-[#7](-[#6;X3])-[#6;X3]", 257 | "het_5_C(2)": "[#7]-2=[#6](-c:1:c:c:c:c:c:1)-[#6](-[#1])(-[#1])-[#6](-[#8]-[#1])(-[#6](-[#9])(-[#9])-[#9])-[#7]-2-[$([#6]:[#6]:[#6]:[#6]:[#6]:[#6]),$([#6](=[#16])-[#6]:[#6]:[#6]:[#6]:[#6]:[#6])]", 258 | "ene_six_het_B(2)": "c:1:c(:c:c:c:c:1)-[#6](=[#8])-[#6](-[#1])=[#6]-3-[#6](=[#8])-[#7](-[#1])-[#6](=[#8])-[#6](=[#6](-[#1])-c:2:c:c:c:c:c:2)-[#7]-3-[#1]", 259 | "steroid_A(2)": "[#8]=[#6]-4-[#6]-[#6]-[#6]-3-[#6]-2-[#6](=[#8])-[#6]-[#6]-1-[#6]-[#6]-[#6]-[#6]-1-[#6]-2-[#6]-[#6]-[#6]-3=[#6]-4", 260 | "het_565_A(2)": "c:1:2:c:3:c(:c(-[#8]-[#1]):c(:c:1:c(:c:n:2-[#6])-[#6]=[#8])-[#1]):n:c:n:3", 261 | "thio_imine_ium(2)": "[#6;X4]-[#7+](-[#6;X4]-[#8]-[#1])=[#6]-[#16]-[#6]-[#1]", 262 | "anthranil_acid_E(2)": "[#6]-3(=[#8])-[#6](=[#6](-[#1])-[#7](-[#1])-c:1:c:c:c:c:c:1-[#6](=[#8])-[#8]-[#1])-[#7]=[#6](-c:2:c:c:c:c:c:2)-[#8]-3", 263 | "hzone_furan_B(2)": "c:1(:c(:c(:c(:o:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](-[$([#1]),$([#6](-[#1])-[#1])])=[#7]-[#7](-[#1])-c:2:c:c:n:c:c:2", 264 | "thiophene_E(2)": "c:1(:c(:c(:c(:s:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](-[$([#1]),$([#6](-[#1])-[#1])])-[#6](=[#8])-[#7](-[#1])-c:2:n:c:c:s:2", 265 | "ene_misc_B(2)": "[#6]:[#6]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#6]=[#8])-[#7]-2-[#6](=[#8])-[#6]-1(-[#1])-[#6](-[#1])(-[#1])-[#6]=[#6]-[#6](-[#1])(-[#1])-[#6]-1(-[#1])-[#6]-2=[#8]", 266 | "het_thio_5_B(2)": "[#6]-1(-[#6]=[#8])(-[#6]:[#6])-[#16;X2]-[#6]=[#7]-[#7]-1-[#1]", 267 | "thiophene_amino_F(2)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-c:2:c:c:c:c:c:2)-[#6]#[#7])-[#6]:3:[!#1]:[!#1]:[!#1]:[!#1]:[!#1]:3", 268 | "anil_OC_alk_D(2)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c:c:c:2-[$([#6](-[#1])-[#1]),$([#8]-[#6](-[#1])-[#1])]", 269 | "tert_butyl_A(2)": "[#6](-[#1])(-[#1])(-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-c:1:c(:c:c(:c(:c:1-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-[#6](-[#1])(-[#1])-[#1])-[#8]-[#6](-[#1])-[#7])-[#1]", 270 | "thio_urea_J(2)": "c:1(:c(:o:c:c:1)-[#6]-[#1])-[#6]=[#7]-[#7](-[#1])-[#6](=[#16])-[#7]-[#1]", 271 | "het_thio_65_B(2)": "[#7](-[#1])-c1nc(nc2nnc(n12)-[#16]-[#6])-[#7](-[#1])-[#6]", 272 | "coumarin_B(2)": "c:1-2:c(:c:c:c:c:1-[#6](-[#1])(-[#1])-[#6](-[#1])=[#6](-[#1])-[#1])-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-[#6]:[#6])-[#6](=[#8])-[#8]-2)-[#1]", 273 | "thio_urea_K(2)": "[#6]-2(=[#16])-[#7]-1-[#6]:[#6]-[#7]=[#7]-[#6]-1=[#7]-[#7]-2-[#1]", 274 | "thiophene_amino_G(2)": "[#6]:[#6]:[#6]:[#6]:[#6]:[#6]-c:1:c:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-[#6])-[#6](=[#8])-[#8]-[#1]", 275 | "anil_NH_alk_D(2)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c:c:1-[#7](-[#1])-[#6](-[#1])(-[#6])-[#6](-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]", 276 | "het_thio_5_C(2)": "[#16]=[#6]-2-[#7](-[#1])-[#7]=[#6](-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#8]-2", 277 | "thio_keto_het(2)": "[#16]=[#6]-c:1:c:c:c:2:c:c:c:c:n:1:2", 278 | "het_thio_N_5B(2)": "[#6]~1~[#6](~[#7]~[#7]~[#6](~[#6](-[#1])-[#1])~[#6](-[#1])-[#1])~[#7]~[#16]~[#6]~1", 279 | "quinone_D(2)": "[#6]-1(-[#6]=:[#6]-[#6]=:[#6]-[#6]-1=[!#6&!#1])=[!#6&!#1]", 280 | "anil_di_alk_furan_B(2)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(-[#1]):c(:c(:o:1)-[#6](-[#1])=[#6]-[#6]#[#7])-[#1]", 281 | "ene_six_het_C(2)": "[#8]=[#6]-1-[#6]:[#6]-[#6](-[#1])(-[#1])-[#7]-[#6]-1=[#6]-[#1]", 282 | "het_55_A(2)": "[#6]:[#6]-[#7]:2:[#7]:[#6]:1-[#6](-[#1])(-[#1])-[#16;X2]-[#6](-[#1])(-[#1])-[#6]:1-[#6]:2-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])=[#6]-[#1]", 283 | "het_thio_65_C(2)": "n:1:c(:n(:c:2:c:1:c:c:c:c:2)-[#6](-[#1])-[#1])-[#16]-[#6](-[#1])(-[#1])-[#6](=[#8])-[#7](-[#1])-[#7]=[#6](-[#1])-[#6](-[#1])=[#6]-[#1]", 284 | "hydroquin_A(2)": "c:1(:c:c(:c(:c:c:1)-[#8]-[#1])-[#6](=!@[#6]-[#7])-[#6]=[#8])-[#8]-[#1]", 285 | "anthranil_acid_F(2)": "c:1(:c:c(:c(:c:c:1)-[#7](-[#1])-[#6](=[#8])-[#6]:[#6])-[#6](=[#8])-[#8]-[#1])-[#8]-[#1]", 286 | "pyrrole_I(2)": "n2(-[#6](-[#1])-[#1])c-1c(-[#6]:[#6]-[#6]-1=[#8])cc2-[#6](-[#1])-[#1]", 287 | "thiophene_amino_H(2)": "[#6](-[#1])-[#7](-[#1])-c:1:c(:c(:c(:s:1)-[#6]-[#1])-[#6]-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6]", 288 | "imine_one_fives_C(2)": "[#6]:[#6]-[#7;!R]=[#6]-2-[#6](=[!#6&!#1])-c:1:c:c:c:c:c:1-[#7]-2", 289 | "keto_phenone_zone_A(2)": "c:1:c:c:c:c:c:1-[#6](=[#8])-[#7](-[#1])-[#7]=[#6]-3-c:2:c:c:c:c:c:2-c:4:c:c:c:c:c-3:4", 290 | "dyes7A(2)": "c:1:c(:c:c:c:c:1)-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])=[#6](-[#1])-[#6]=!@[#6](-[#1])-[#6](-[#1])=[#6]-[#6]=@[#7]-c:2:c:c:c:c:c:2", 291 | "het_pyridiniums_B(2)": "[#6]:1:2:[!#1]:[#7+](:[!#1]:[#6](:[!#1]:1:[#6]:[#6]:[#6]:[#6]:2)-[*])~[#6]:[#6]", 292 | "het_5_D(2)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#16]-[#6])-[#6]-2=[#8]", 293 | "thiazole_amine_H(1)": "c:1:c:c:c(:c:c:1-[#7](-[#1])-c2nc(c(-[#1])s2)-c:3:c:c:c(:c:c:3)-[#6](-[#1])(-[#6]-[#1])-[#6]-[#1])-[#6](=[#8])-[#8]-[#1]", 294 | "thiazole_amine_I(1)": "[#6](-[#1])(-[#1])-[#7](-[#1])-[#6]=[#7]-[#7](-[#1])-c1nc(c(-[#1])s1)-[#6]:[#6]", 295 | "het_thio_N_5C(1)": "[#6]:[#6]-[#7](-[#1])-[#6](=[#8])-c1c(snn1)-[#7](-[#1])-[#6]:[#6]", 296 | "sulfonamide_F(1)": "[#8]=[#16](=[#8])(-[#6]:[#6])-[#7](-[#1])-c1nc(cs1)-[#6]:[#6]", 297 | "thiazole_amine_J(1)": "[#8]=[#16](=[#8])(-[#6]:[#6])-[#7](-[#1])-[#7](-[#1])-c1nc(cs1)-[#6]:[#6]", 298 | "het_65_F(1)": "s2c:1:n:c:n:c(:c:1c(c2-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#7]-[#7]=[#6]-c3ccco3", 299 | "keto_keto_beta_E(1)": "[#6](=[#8])-[#6](-[#1])=[#6](-[#8]-[#1])-[#6](-[#8]-[#1])=[#6](-[#1])-[#6](=[#8])-[#6]", 300 | "ene_five_one_B(1)": "c:2(:c:1-[#6](-[#6](-[#6](-c:1:c(:c(:c:2-[#1])-[#1])-[#1])(-[#1])-[#1])=[#8])=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1]", 301 | "keto_keto_beta_zone(1)": "[#6]:[#6]-[#7](-[#1])-[#7]=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#6](-[#1])-[#1])=[#7]-[#7](-[#1])-[#6]:[#6]", 302 | "thio_urea_L(1)": "[#6;X4]-[#16;X2]-[#6](=[#7]-[!#1]:[!#1]:[!#1]:[!#1])-[#7](-[#1])-[#7]=[#6]", 303 | "het_thio_urea_ene(1)": "[#6]-1(=[#7]-[#7](-[#6](-[#16]-1)=[#6](-[#1])-[#6]:[#6])-[#6]:[#6])-[#6]=[#8]", 304 | "cyano_amino_het_A(1)": "c:1(:c(:c:2:c(:n:c:1-[#7](-[#1])-[#1]):c:c:c(:c:2-[#7](-[#1])-[#1])-[#6]#[#7])-[#6]#[#7])-[#6]#[#7]", 305 | "tetrazole_hzide(1)": "[!#1]:1:[!#1]:[!#1]:[!#1](:[!#1]:[!#1]:1)-[#6](-[#1])=[#6](-[#1])-[#6](-[#7](-[#1])-[#7](-[#1])-c2nnnn2-[#6])=[#8]", 306 | "imine_naphthol_A(1)": "c:1:2:c(:c(:c(:c(:c:1:c(:c(:c(:c:2-[#1])-[#1])-[#6](=[#7]-[#6]:[#6])-[#6](-[#1])-[#1])-[#8]-[#1])-[#1])-[#1])-[#1])-[#1]", 307 | "misc_anisole_A(1)": "c:1(:c(:c:2:c(:c(:c:1-[#8]-[#6](-[#1])-[#1])-[#1]):c(:c(:c(:c:2-[#7](-[#1])-[#6](-[#1])(-[#1])-[#1])-[#1])-c:3:c(:c(:c(:c(:c:3-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#1])-[#8]-[#6](-[#1])-[#1]", 308 | "het_thio_665(1)": "c:1:c:c-2:c(:c:c:1)-[#16]-c3c(-[#7]-2)cc(s3)-[#6](-[#1])-[#1]", 309 | "anil_di_alk_L(1)": "c:1:c:c:c-2:c(:c:1)-[#6](-[#6](-[#7]-2-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]-4-[#6](-c:3:c:c:c:c:c:3-[#6]-4=[#8])=[#8])(-[#1])-[#1])(-[#1])-[#1]", 310 | "colchicine_B(1)": "c:1(:c:c:c(:c:c:1)-[#6]-3=[#6]-[#6](-c2cocc2-[#6](=[#6]-3)-[#8]-[#1])=[#8])-[#16]-[#6](-[#1])-[#1]", 311 | "misc_aminoacid_A(1)": "[#6;X4]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6](-[#1])(-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#16]-[#6](-[#1])(-[#1])-[#1])-[#6](=[#8])-[#8]-[#1])-[#1])-[#1]", 312 | "imidazole_amino_A(1)": "n:1:c(:n(:c(:c:1-c:2:c:c:c:c:c:2)-c:3:c:c:c:c:c:3)-[#7]=!@[#6])-[#7](-[#1])-[#1]", 313 | "phenol_sulfite_A(1)": "[#6](-c:1:c:c:c(:c:c:1)-[#8]-[#1])(-c:2:c:c:c(:c:c:2)-[#8]-[#1])-[#8]-[#16](=[#8])=[#8]", 314 | "het_66_D(1)": "c:2:c:c:1:n:c(:c(:n:c:1:c:c:2)-[#6](-[#1])(-[#1])-[#6](=[#8])-[#6]:[#6])-[#6](-[#1])(-[#1])-[#6](=[#8])-[#6]:[#6]", 315 | "misc_anisole_B(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c(-[#6](-[#1])-[#1])c:c:2", 316 | "tetrazole_A(1)": "[#6](-[#1])(-[#1])-c1nnnn1-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#1])-[#1]", 317 | "het_65_G(1)": "[#6]-2(=[#7]-c1c(c(nn1-[#6](-[#6]-2(-[#1])-[#1])=[#8])-[#7](-[#1])-[#1])-[#7](-[#1])-[#1])-[#6]", 318 | "misc_trityl_A(1)": "[#6](-[#6]:[#6])(-[#6]:[#6])(-[#6]:[#6])-[#16]-[#6]:[#6]-[#6](=[#8])-[#8]-[#1]", 319 | "misc_pyridine_OC(1)": "[#8]=[#6](-c:1:c(:c(:n:c(:c:1-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]", 320 | "het_6_hydropyridone(1)": "[#7]-1=[#6](-[#7](-[#6](-[#6](-[#6]-1(-[#1])-[#6]:[#6])(-[#1])-[#1])=[#8])-[#1])-[#7]-[#1]", 321 | "misc_stilbene(1)": "[#6]-1(=[#6](-[#6](-[#6](-[#6](-[#6]-1(-[#1])-[#1])(-[#1])-[#6](=[#8])-[#6])(-[#1])-[#6](=[#8])-[#8]-[#1])(-[#1])-[#1])-[#6]:[#6])-[#6]:[#6]", 322 | "misc_imidazole(1)": "[#6](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[Cl])-[#1])-[#1])(-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[Cl])-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-c3nc(c(n3-[#6](-[#1])(-[#1])-[#1])-[#1])-[#1]", 323 | "anil_NH_no_alk_A(1)": "n:1:c(:c(:c(:c(:c:1-[#1])-[#7](-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6]:[#6]", 324 | "het_6_imidate_B(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#8]-[#1])-[#6]-2=[#6](-[#8]-[#6](-[#7]=[#7]-2)=[#7])-[#7](-[#1])-[#1]", 325 | "anil_alk_B(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1]", 326 | "styrene_anil_A(1)": "c:1:c:c-3:c(:c:c:1)-c:2:c:c:c(:c:c:2-[#6]-3=[#6](-[#1])-[#6])-[#7](-[#1])-[#1]", 327 | "misc_aminal_acid(1)": "c:1:c:c-2:c(:c:c:1)-[#7](-[#6](-[#8]-[#6]-2)(-[#6](=[#8])-[#8]-[#1])-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](-[#1])-[#1]", 328 | "anil_no_alk_D(1)": "n:1:c(:c(:c(:c(:c:1-[#7](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#1])-[#1]", 329 | "anil_alk_C(1)": "[#7](-[#1])(-c:1:c:c:c:c:c:1)-[#6](-[#6])(-[#6])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1]", 330 | "misc_anisole_C(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#8]-[#6]-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])(-[#1])-[#1])-[#6]:[#6]", 331 | "het_465_misc(1)": "c:1-2:c:c-3:c(:c:c:1-[#8]-[#6]-[#8]-2)-[#6]-[#6]-3", 332 | "anthranil_acid_G(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#8])-[#8]-[#1])-[#7](-[#1])-[#6]:[#6]", 333 | "anil_di_alk_M(1)": "c:1(:c:4:c(:n:c(:c:1-[#6](-[#1])(-[#1])-[#7]-3-c:2:c(:c(:c(:c(:c:2-[#6](-[#1])(-[#1])-[#6]-3(-[#1])-[#1])-[#1])-[#1])-[#1])-[#1])-[#1]):c(:c(:c(:c:4-[#1])-[#1])-[#1])-[#1])-[#1]", 334 | "anthranil_acid_H(1)": "c:1:c(:c2:c(:c:c:1)c(c(n2-[#1])-[#6]:[#6])-[#6]:[#6])-[#6](=[#8])-[#8]-[#1]", 335 | "thio_urea_M(1)": "[#6]:[#6]-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-c:1:c(:c(:c(:c(:c:1-[F,Cl,Br,I])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1]", 336 | "thiazole_amine_K(1)": "n:1:c3:c(:c:c2:c:1nc(s2)-[#7])sc(n3)-[#7]", 337 | "het_thio_5_imine_A(1)": "[#7]=[#6]-1-[#16]-[#6](=[#7])-[#7]=[#6]-1", 338 | "thio_amide_E(1)": "c:1:c(:n:c:c:c:1)-[#6](=[#16])-[#7](-[#1])-c:2:c(:c:c:c:c:2)-[#8]-[#6](-[#1])-[#1]", 339 | "het_thio_676_B(1)": "c:1-2:c(:c(:c(:c(:c:1-[#6](-c:3:c(-[#16]-[#6]-2(-[#1])-[#1]):c(:c(-[#1]):c(:c:3-[#1])-[#1])-[#1])-[#8]-[#6]:[#6])-[#1])-[#1])-[#1])-[#1]", 340 | "sulfonamide_G(1)": "[#6](-[#1])(-[#1])(-[#1])-c:1:c(:c(:c(:c(:n:1)-[#7](-[#1])-[#16](-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])(=[#8])=[#8])-[#1])-[#1])-[#1]", 341 | "thio_thiomorph_Z(1)": "[#6](=[#8])(-[#7]-1-[#6]-[#6]-[#16]-[#6]-[#6]-1)-c:2:c(:c(:c(:c(:c:2-[#16]-[#6](-[#1])-[#1])-[#1])-[#1])-[#1])-[#1]", 342 | "naphth_ene_one_A(1)": "c:1:c:c:3:c:2:c(:c:1)-[#6](-[#6]=[#6](-c:2:c:c:c:3)-[#8]-[#6](-[#1])-[#1])=[#8]", 343 | "naphth_ene_one_B(1)": "c:1-3:c:2:c(:c(:c:c:1)-[#7]):c:c:c:c:2-[#6](-[#6]=[#6]-3-[#6](-[F])(-[F])-[F])=[#8]", 344 | "amino_acridine_A(1)": "c:1:c:c:c:c:2:c:1:c:c:3:c(:n:2):n:c:4:c(:c:3-[#7]):c:c:c:c:4", 345 | "keto_phenone_B(1)": "c:1:c-3:c(:c:c:c:1)-[#6]-2=[#7]-[!#1]=[#6]-[#6]-[#6]-2-[#6]-3=[#8]", 346 | "hzone_acid_A(1)": "c:1-3:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#7]-[#7](-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#6](=[#8])-[#8]-[#1])-[#1])-[#1])-c:4:c-3:c(:c(:c(:c:4-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1]", 347 | "sulfonamide_H(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#1])-[#1])-[#1])-[#16](=[#8])(=[#8])-[#7](-[#1])-c:2:n:n:c(:c(:c:2-[#1])-[#1])-[#1]", 348 | "het_565_indole(1)": "c2(c(-[#1])n(-[#6](-[#1])-[#1])c:3:c(:c(:c:1n(c(c(c:1:c2:3)-[#1])-[#1])-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1]", 349 | "pyrrole_J(1)": "c1(c-2c(c(n1-[#6](-[#8])=[#8])-[#6](-[#1])-[#1])-[#16]-[#6](-[#1])(-[#1])-[#16]-2)-[#6](-[#1])-[#1]", 350 | "pyrazole_amino_B(1)": "s1ccnc1-c2c(n(nc2-[#1])-[#1])-[#7](-[#1])-[#1]", 351 | "pyrrole_K(1)": "c1(c(c(c(n1-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#6](=[#8])-[#8]-[#1]", 352 | "anthranil_acid_I(1)": "c:1:2(:c(:c(:c(:o:1)-[#6])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6](-[#1]):[#6](-[#1]):[#6](-[#1]):[#6](-[#1]):[#6]:2-[#6](=[#8])-[#8]-[#1]", 353 | "thio_amide_F(1)": "[!#1]:[#6]-[#6](=[#16])-[#7](-[#1])-[#7](-[#1])-[#6]:[!#1]", 354 | "ene_one_C(1)": "[#6]-1(=[#8])-[#6](-[#6](-[#6]#[#7])=[#6](-[#1])-[#7])-[#6](-[#7])-[#6]=[#6]-1", 355 | "het_65_H(1)": "c2(c-1n(-[#6](-[#6]=[#6]-[#7]-1)=[#8])nc2-c3cccn3)-[#6]#[#7]", 356 | "cyano_imine_D(1)": "[#8]=[#6]-1-[#6](=[#7]-[#7]-[#6]-[#6]-1)-[#6]#[#7]", 357 | "cyano_misc_A(1)": "c:2(:c:1:c:c:c:c:c:1:n:n:c:2)-[#6](-[#6]:[#6])-[#6]#[#7]", 358 | "ene_misc_C(1)": "c:1:c:c-2:c(:c:c:1)-[#6]=[#6]-[#6](-[#7]-2-[#6](=[#8])-[#7](-[#1])-c:3:c:c(:c(:c:c:3)-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]", 359 | "het_66_E(1)": "c:2:c:c:1:n:c(:c(:n:c:1:c:c:2)-c:3:c:c:c:c:c:3)-c:4:c:c:c:c:c:4-[#8]-[#1]", 360 | "keto_keto_beta_F(1)": "[#6](-[#1])(-[#1])-[#6](-[#8]-[#1])=[#6](-[#6](=[#8])-[#6](-[#1])-[#1])-[#6](-[#1])-[#6]#[#6]", 361 | "misc_naphthimidazole(1)": "c:1:c:4:c(:c:c2:c:1nc(n2-[#1])-[#6]-[#8]-[#6](=[#8])-c:3:c:c(:c:c(:c:3)-[#7](-[#1])-[#1])-[#7](-[#1])-[#1]):c:c:c:c:4", 362 | "naphth_ene_one_C(1)": "c:2(:c:1:c:c:c:c-3:c:1:c(:c:c:2)-[#6]=[#6]-[#6]-3=[#7])-[#7]", 363 | "keto_phenone_C(1)": "c:2(:c:1:c:c:c:c:c:1:c-3:c(:c:2)-[#6](-c:4:c:c:c:c:c-3:4)=[#8])-[#8]-[#1]", 364 | "coumarin_C(1)": "[#6]-2(-[#6]=[#7]-c:1:c:c(:c:c:c:1-[#8]-2)-[Cl])=[#8]", 365 | "thio_est_cyano_A(1)": "[#6]-1=[#6]-[#7](-[#6](-c:2:c-1:c:c:c:c:2)(-[#6]#[#7])-[#6](=[#16])-[#16])-[#6]=[#8]", 366 | "het_65_imidazole(1)": "c2(nc:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])n2-[#6])-[#7](-[#1])-[#6](-[#7](-[#1])-c:3:c(:c:c:c:c:3-[#1])-[#1])=[#8]", 367 | "anthranil_acid_J(1)": "[#7](-[#1])(-[#6]:[#6])-c:1:c(-[#6](=[#8])-[#8]-[#1]):c:c:c(:n:1)-[#6]:[#6]", 368 | "colchicine_het(1)": "c:1-3:c(:c:c:c:c:1)-[#16]-[#6](=[#7]-[#7]=[#6]-2-[#6]=[#6]-[#6]=[#6]-[#6]=[#6]-2)-[#7]-3-[#6](-[#1])-[#1]", 369 | "ene_misc_D(1)": "c:1-2:c(:c(:c(:c(:c:1-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#6](-[#6])-[#16]-[#6]-2(-[#1])-[#1])-[#6]", 370 | "indole_3yl_alk_B(1)": "c:12:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])c(c(-[#6]:[#6])n2-!@[#6]:[#6])-[#6](-[#1])-[#1]", 371 | "anil_OH_no_alk_A(1)": "[#7](-[#1])(-[#1])-c:1:c:c:c(:c:c:1-[#8]-[#1])-[#16](=[#8])(=[#8])-[#8]-[#1]", 372 | "thiazole_amine_L(1)": "s:1:c:c:c(:c:1-[#1])-c:2:c:s:c(:n:2)-[#7](-[#1])-[#1]", 373 | "pyrazole_amino_A(1)": "c1c(-[#7](-[#1])-[#1])nnc1-c2c(-[#6](-[#1])-[#1])oc(c2-[#1])-[#1]", 374 | "het_thio_N_5D(1)": "n1nscc1-c2nc(no2)-[#6]:[#6]", 375 | "anil_alk_indane(1)": "c:1(:c:c-3:c(:c:c:1)-[#7]-[#6]-4-c:2:c:c:c:c:c:2-[#6]-[#6]-3-4)-[#6;X4]", 376 | "anil_di_alk_N(1)": "c:1-2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#6](=[#6](-[#1])-[#6]-3-[#6](-[#6]#[#7])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#7]-2-3)-[#1]", 377 | "het_666_C(1)": "c:2-3:c(:c:c:1:c:c:c:c:c:1:c:2)-[#7](-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](=[#7]-3)-[#6]:[#6]-[#7](-[#1])-[#6](-[#1])-[#1]", 378 | "ene_one_D(1)": "[#6](-[#8]-[#1]):[#6]-[#6](=[#8])-[#6](-[#1])=[#6](-[#6])-[#6]", 379 | "anil_di_alk_indol(1)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1]):c(:c(-[#1]):n:2-[#1])-[#16](=[#8])=[#8]", 380 | "anil_no_alk_indol_A(1)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#1])-[#1]):c(:c(-[#1]):n:2-[#6](-[#1])-[#1])-[#1]", 381 | "dhp_amino_CN_G(1)": "[#16;X2]-1-[#6]=[#6](-[#6]#[#7])-[#6](-[#6])(-[#6]=[#8])-[#6](=[#6]-1-[#7](-[#1])-[#1])-[$([#6]=[#8]),$([#6]#[#7])]", 382 | "anil_di_alk_dhp(1)": "[#7]-2-[#6]=[#6](-[#6]=[#8])-[#6](-c:1:c:c:c(:c:c:1)-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#6]~3=[#6]-2~[#7]~[#6](~[#16])~[#7]~[#6]~3~[#7]", 383 | "anthranil_amide_A(1)": "c:1:c(:c:c:c:c:1)-[#6](=[#8])-[#7](-[#1])-c:2:c(:c:c:c:c:2)-[#6](=[#8])-[#7](-[#1])-[#7](-[#1])-c:3:n:c:c:s:3", 384 | "hzone_anthran_Z(1)": "c:1:c:2:c(:c:c:c:1):c(:c:3:c(:c:2):c:c:c:c:3)-[#6]=[#7]-[#7](-[#1])-c:4:c:c:c:c:c:4", 385 | "ene_one_amide_A(1)": "c:1:c(:c:c:c:c:1)-[#6](-[#1])-[#7]-[#6](=[#8])-[#6](-[#7](-[#1])-[#6](-[#1])-[#1])=[#6](-[#1])-[#6](=[#8])-c:2:c:c:c(:c:c:2)-[#8]-[#6](-[#1])-[#1]", 386 | "het_76_A(1)": "s:1:c(:c(-[#1]):c(:c:1-[#6]-3=[#7]-c:2:c:c:c:c:c:2-[#6](=[#7]-[#7]-3-[#1])-c:4:c:c:n:c:c:4)-[#1])-[#1]", 387 | "thio_urea_N(1)": "o:1:c(:c(-[#1]):c(:c:1-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6](=[#16])-[#7](-[#6]-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c:c:c:2)-[#1])-[#1]", 388 | "anil_di_alk_coum(1)": "c:1:c(:c:c:c:c:1)-[#7](-[#6]-[#1])-[#6](-[#1])-[#6](-[#1])-[#6](-[#1])-[#7](-[#1])-[#6](=[#8])-[#6]-2=[#6](-[#8]-[#6](-[#6](=[#6]-2-[#6](-[#1])-[#1])-[#1])=[#8])-[#6](-[#1])-[#1]", 389 | "ene_one_amide_B(1)": "c2-3:c:c:c:1:c:c:c:c:c:1:c2-[#6](-[#1])-[#6;X4]-[#7]-[#6]-3=[#6](-[#1])-[#6](=[#8])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]", 390 | "het_thio_656c(1)": "c:1:c(:c:c:c:c:1)-[#6]-4=[#7]-[#7]:2:[#6](:[#7+]:c:3:c:2:c:c:c:c:3)-[#16]-[#6;X4]-4", 391 | "het_5_ene(1)": "[#6]-2(=[#8])-[#6](=[#6](-[#6](-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#7]=[#6](-c:1:c:c:c:c:c:1)-[#8]-2", 392 | "thio_imide_A(1)": "c:1:c(:c:c:c:c:1)-[#7]-2-[#6](=[#8])-[#6](=[#6](-[#1])-[#6]-2=[#8])-[#16]-c:3:c:c:c:c:c:3", 393 | "dhp_amidine_A(1)": "[#7]-1(-[#1])-[#7]=[#6](-[#7]-[#1])-[#16]-[#6](=[#6]-1-[#6]:[#6])-[#6]:[#6]", 394 | "thio_urea_O(1)": "c:1(:c(:c-3:c(:c(:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])-c:2:c(:c(:c(:o:2)-[#6]-[#1])-[#1])-[#1])-[#1])-[#8]-[#6](-[#8]-3)(-[#1])-[#1])-[#1])-[#1]", 395 | "anil_di_alk_O(1)": "c:1(:c(:c(:c(:c(:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-c:2:c:c:c:c:c:2)-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]", 396 | "thio_urea_P(1)": "[#8]=[#6]-!@n:1:c:c:c-2:c:1-[#7](-[#1])-[#6](=[#16])-[#7]-2-[#1]", 397 | "het_pyraz_misc(1)": "[#6](-[F])(-[F])-[#6](=[#8])-[#7](-[#1])-c:1:c(-[#1]):n(-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#8]-[#6](-[#1])(-[#1])-[#6]:[#6]):n:c:1-[#1]", 398 | "diazox_C(1)": "[#7]-2=[#7]-[#6]:1:[#7]:[!#6&!#1]:[#7]:[#6]:1-[#7]=[#7]-[#6]:[#6]-2", 399 | "diazox_D(1)": "[#6]-2(-[#1])(-[#8]-[#1])-[#6]:1:[#7]:[!#6&!#1]:[#7]:[#6]:1-[#6](-[#1])(-[#8]-[#1])-[#6]=[#6]-2", 400 | "misc_cyclopropane(1)": "[#6]-1(-[#6](-[#1])(-[#1])-[#6]-1(-[#1])-[#1])(-[#6](=[#8])-[#7](-[#1])-c:2:c:c:c(:c:c:2)-[#8]-[#6](-[#1])(-[#1])-[#8])-[#16](=[#8])(=[#8])-[#6]:[#6]", 401 | "imine_ene_one_B(1)": "[#6]-1:[#6]-[#6](=[#8])-[#6]=[#6]-1-[#7]=[#6](-[#1])-[#7](-[#6;X4])-[#6;X4]", 402 | "coumarin_D(1)": "c:1:c:c(:c:c-2:c:1-[#6](=[#6](-[#1])-[#6](=[#8])-[#8]-2)-c:3:c:c:c:c:c:3)-[#8]-[#6](-[#1])(-[#1])-[#6]:[#8]:[#6]", 403 | "misc_furan_A(1)": "c:1:c(:o:c(:c:1-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#7]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#8]-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#8]-c:2:c:c-3:c(:c:c:2)-[#8]-[#6](-[#8]-3)(-[#1])-[#1]", 404 | "rhod_sat_E(1)": "[#7]-4(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#16]-[#6](-[#1])(-[#7](-[#1])-c:2:c:c:c:c:3:c:c:c:c:c:2:3)-[#6]-4=[#8]", 405 | "rhod_sat_imine_A(1)": "[#7]-3(-[#6](=[#8])-c:1:c:c:c:c:c:1)-[#6](=[#7]-c:2:c:c:c:c:c:2)-[#16]-[#6](-[#1])(-[#1])-[#6]-3=[#8]", 406 | "rhod_sat_F(1)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#16]-[#6](-[#1])(-[#1])-[#6]-2=[#16]", 407 | "het_thio_5_imine_B(1)": "[#7]-1(-[#6](-[#1])-[#1])-[#6](=[#16])-[#7](-[#6]:[#6])-[#6](=[#7]-[#6]:[#6])-[#6]-1=[#7]-[#6]:[#6]", 408 | "het_thio_5_imine_C(1)": "[#16]-1-[#6](=[#7]-[#7]-[#1])-[#16]-[#6](=[#7]-[#6]:[#6])-[#6]-1=[#7]-[#6]:[#6]", 409 | "ene_five_het_N(1)": "[#6]-2(=[#8])-[#6](=[#6](-[#1])-c:1:c(:c:c:c(:c:1)-[F,Cl,Br,I])-[#8]-[#6](-[#1])-[#1])-[#7]=[#6](-[#16]-[#6](-[#1])-[#1])-[#16]-2", 410 | "thio_carbam_A(1)": "[#6](-[#1])(-[#1])-[#16]-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]", 411 | "misc_anilide_A(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6])-[#1])-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]", 412 | "misc_anilide_B(1)": "c:1(:c(:c:c(:c:c:1-[#6])-[Br])-[#6])-[#7](-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]-[#6]-[#6]", 413 | "mannich_B(1)": "c:1-2:c(:c:c:c(:c:1-[#8]-[#6](-[#1])(-[#1])-[#7](-[#6]:[#6]-[#8]-[#6](-[#1])-[#1])-[#6]-2(-[#1])-[#1])-[#1])-[#1]", 414 | "mannich_catechol_A(1)": "c:1-2:c(:c(:c(:c(:c:1-[#8]-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6]-2(-[#1])-[#1])-[#1])-[#8])-[#8])-[#1]", 415 | "anil_alk_D(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]", 416 | "het_65_I(1)": "n:1:2:c:c:c(:c:c:1:c:c(:c:2-[#6](=[#8])-[#6]:[#6])-[#6]:[#6])-[#6](~[#8])~[#8]", 417 | "misc_urea_A(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#6](=[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#6](-[#6;X4])(-[#6;X4])-[#7](-[#1])-[#6](=[#8])-[#7](-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]", 418 | "imidazole_C(1)": "[#6]-3(-[#1])(-n:1:c(:n:c(:c:1-[#1])-[#1])-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[Br])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-c:4:c-3:c(:c(:c(:c:4-[#1])-[#1])-[#1])-[#1]", 419 | "styrene_imidazole_A(1)": "[#6](=[#6](-[#1])-[#6](-[#1])(-[#1])-n:1:c(:n:c(:c:1-[#1])-[#1])-[#1])(-[#6]:[#6])-[#6]:[#6]", 420 | "thiazole_amine_M(1)": "c:1(:n:c(:c(-[#1]):s:1)-c:2:c:c:n:c:c:2)-[#7](-[#1])-[#6]:[#6]-[#6](-[#1])-[#1]", 421 | "misc_pyrrole_thiaz(1)": "c:1(:n:c(:c(-[#1]):s:1)-c:2:c:c:c:c:c:2)-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]-[#6](-[#1])(-[#1])-c:3:c:c:c:n:3-[#1]", 422 | "pyrrole_L(1)": "n:1(-[#1]):c(:c(-[#6](-[#1])-[#1]):c(:c:1-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#6](=[#8])-[#8]-[#6](-[#1])-[#1]", 423 | "het_thio_65_D(1)": "c:2(:n:c:1:c(:c(:c:c(:c:1-[#1])-[F,Cl,Br,I])-[#1]):n:2-[#1])-[#16]-[#6](-[#1])(-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6]", 424 | "ene_misc_E(1)": "c:1(:c(:c-2:c(:c(:c:1-[#8]-[#6](-[#1])-[#1])-[#1])-[#6]=[#6]-[#6](-[#1])-[#16]-2)-[#1])-[#8]-[#6](-[#1])-[#1]", 425 | "thio_cyano_A(1)": "[#7]-1(-[#1])-[#6](=[#16])-[#6](-[#1])(-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](=[#6]-1-[#6]:[#6])-[#1]", 426 | "cyano_amino_het_B(1)": "n:1:c(:c(:c(:c(:c:1-[#16;X2]-c:2:c:c:c:c:c:2-[#7](-[#1])-[#1])-[#6]#[#7])-c:3:c:c:c:c:c:3)-[#6]#[#7])-[#7](-[#1])-[#1]", 427 | "cyano_pyridone_G(1)": "[#7]-2(-c:1:c:c:c(:c:c:1)-[#8]-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](=[#6]-[#6](=[#7]-2)-n:3:c:n:c:c:3)-[#6]#[#7]", 428 | "het_65_J(1)": "o:1:c(:c:c:2:c:1:c(:c(:c(:c:2-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](~[#8])~[#8]", 429 | "ene_one_yne_A(1)": "[#6]#[#6]-[#6](=[#8])-[#6]#[#6]", 430 | "anil_OH_no_alk_B(1)": "c:2(:c:1:c(:c(:c(:c(:c:1:c(:c(:c:2-[#8]-[#1])-[#6]=[#8])-[#1])-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#1]", 431 | "hzone_acyl_misc_A(1)": "c:1(:c(:c(:c(:o:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#7]=[#6](-[$([#1]),$([#6](-[#1])-[#1])])-c:2:c:c:c:c(:c:2)-[*]-[*]-[*]-c:3:c:c:c:o:3", 432 | "thiophene_F(1)": "[#16](=[#8])(=[#8])-[#7](-[#1])-c:1:c(:c(:c(:s:1)-[#6]-[#1])-[#6]-[#1])-[#6](=[#8])-[#7]-[#1]", 433 | "anil_OC_alk_E(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#8]-[#1])-[#6](-[#1])-[#1]", 434 | "anil_OC_alk_F(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#6]=[#8])-[#16]", 435 | "het_65_K(1)": "n1nnnc2cccc12", 436 | "het_65_L(1)": "c:1-2:c(-[#1]):s:c(:c:1-[#6](=[#8])-[#7]-[#7]=[#6]-2-[#7](-[#1])-[#1])-[#6]=[#8]", 437 | "coumarin_E(1)": "c:1-3:c(:c:2:c(:c:c:1-[Br]):o:c:c:2)-[#6](=[#6]-[#6](=[#8])-[#8]-3)-[#1]", 438 | "coumarin_F(1)": "c:1-3:c(:c:c:c:c:1)-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-c:2:n:o:c:c:2-[Br])-[#6](=[#8])-[#8]-3)-[#1]", 439 | "coumarin_G(1)": "c:1-2:c(:c:c(:c:c:1-[F,Cl,Br,I])-[F,Cl,Br,I])-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-[#1])-[#6](=[#7]-[#1])-[#8]-2)-[#1]", 440 | "coumarin_H(1)": "c:1-3:c(:c:c:c:c:1)-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-c:2:n:c(:c:s:2)-[#6]:[#16]:[#6]-[#1])-[#6](=[#8])-[#8]-3)-[#1]", 441 | "het_thio_67_A(1)": "[#6](-[#1])(-[#1])-[#16;X2]-c:2:n:n:c:1-[#6]:[#6]-[#7]=[#6]-[#8]-c:1:n:2", 442 | "sulfonamide_I(1)": "[#16](=[#8])(=[#8])(-c:1:c:n(-[#6](-[#1])-[#1]):c:n:1)-[#7](-[#1])-c:2:c:n(:n:c:2)-[#6](-[#1])(-[#1])-[#6]:[#6]-[#8]-[#6](-[#1])-[#1]", 443 | "het_65_mannich(1)": "c:1-2:c(:c(:c(:c(:c:1-[#8]-[#6](-[#1])(-[#1])-[#8]-2)-[#6](-[#1])(-[#1])-[#7]-3-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]-3)-[#1])-[#1])-[#1]", 444 | "anil_alk_A(1)": "[#6](-[#1])(-[#1])-[#8]-[#6]:[#6]-[#6](-[#1])(-[#1])-[#7](-[#1])-c:2:c(:c(:c:1:n(:c(:n:c:1:c:2-[#1])-[#1])-[#6]-[#1])-[#1])-[#1]", 445 | "het_5_inium(1)": "[#7]-4(-c:1:c:c:c:c:c:1)-[#6](=[#7+](-c:2:c:c:c:c:c:2)-[#6](=[#7]-c:3:c:c:c:c:c:3)-[#7]-4)-[#1]", 446 | "anil_di_alk_P(1)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c:1:s:c(:n:c:1:c:2)-[#16]-[#6](-[#1])-[#1]", 447 | "thio_urea_Q(1)": "c:1:2:c(:c(:c(:c(:c:1:c(:c(-[#1]):c(:c:2-[#1])-[#1])-[#6](-[#6](-[#1])-[#1])=[#7]-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6]:[#6]:[#6])-[#1])-[#1])-[#1])-[#1]", 448 | "thio_pyridine_A(1)": "[#6]:1(:[#7]:[#6](:[#7]:[!#1]:[#7]:1)-c:2:c(:c(:c(:o:2)-[#1])-[#1])-[#1])-[#16]-[#6;X4]", 449 | "melamine_B(1)": "n:1:c(:n:c(:n:c:1-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#6]-[#1])-[#6]=[#8]", 450 | "misc_phthal_thio_N(1)": "c:1(:n:s:c(:n:1)-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]-[#6](=[#8])-c:2:c:c:c:c:c:2-[#6](=[#8])-[#8]-[#1])-c:3:c:c:c:c:c:3", 451 | "hzone_acyl_misc_B(1)": "n:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#7]=[#6](-[#1])-c:2:c:c:c:c:c:2-[#8]-[#6](-[#1])(-[#1])-[#6](=[#8])-[#8]-[#1]", 452 | "tert_butyl_B(1)": "[#6](-[#1])(-[#1])(-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#8]-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-[#6](-[#1])(-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c(:c(:c:2-[#1])-[#1])-[#8]-[#1])-[#1]", 453 | "diazox_E(1)": "[#7](-[#1])(-[#1])-c:1:c(-[#7](-[#1])-[#1]):c(:c(-[#1]):c:2:n:o:n:c:1:2)-[#1]", 454 | "anil_NH_no_alk_B(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#7](-[#1])-[#16](=[#8])=[#8])-[#1])-[#7](-[#1])-[#6](-[#1])-[#1])-[F,Cl,Br,I])-[#1]", 455 | "anil_no_alk_A(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#7]=[#6]-2-[#6](=[#6]~[#6]~[#6]=[#6]-2)-[#1])-[#1])-[#1])-[#1])-[#1]", 456 | "anil_no_alk_B(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-n:2:c:c:c:c:2)-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1]", 457 | "thio_ene_amine_A(1)": "[#16]=[#6]-[#6](-[#6](-[#1])-[#1])=[#6](-[#6](-[#1])-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]", 458 | "het_55_B(1)": "[#6]-1:[#6]-[#8]-[#6]-2-[#6](-[#1])(-[#1])-[#6](=[#8])-[#8]-[#6]-1-2", 459 | "cyanamide_A(1)": "[#8]-[#6](=[#8])-[#6](-[#1])(-[#1])-[#16;X2]-[#6](=[#7]-[#6]#[#7])-[#7](-[#1])-c:1:c:c:c:c:c:1", 460 | "ene_one_one_A(1)": "[#8]=[#6]-[#6]-1=[#6](-[#16]-[#6](=[#6](-[#1])-[#6])-[#16]-1)-[#6]=[#8]", 461 | "ene_six_het_D(1)": "[#8]=[#6]-1-[#7]-[#7]-[#6](=[#7]-[#6]-1=[#6]-[#1])-[!#1]:[!#1]", 462 | "ene_cyano_E(1)": "[#8]=[#6]-[#6](-[#1])=[#6](-[#6]#[#7])-[#6]", 463 | "ene_cyano_F(1)": "[#8](-[#1])-[#6](=[#8])-c:1:c(:c(:c(:c(:c:1-[#8]-[#1])-[#1])-c:2:c(-[#1]):c(:c(:o:2)-[#6](-[#1])=[#6](-[#6]#[#7])-c:3:n:c:c:n:3)-[#1])-[#1])-[#1]", 464 | "hzone_furan_C(1)": "c:1:c(:c:c:c:c:1)-[#7](-c:2:c:c:c:c:c:2)-[#7]=[#6](-[#1])-[#6]:3:[#6](:[#6](:[#6](:[!#1]:3)-c:4:c:c:c:c(:c:4)-[#6](=[#8])-[#8]-[#1])-[#1])-[#1]", 465 | "anil_no_alk_C(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-c:2:c(-[#1]):c(:c(-[#6](-[#1])-[#1]):o:2)-[#6]=[#8])-[#1])-[#1]", 466 | "hzone_acid_D(1)": "[#8](-[#1])-[#6](=[#8])-c:1:c:c:c(:c:c:1)-[#7]-[#7]=[#6](-[#1])-[#6]:2:[#6](:[#6](:[#6](:[!#1]:2)-c:3:c:c:c:c:c:3)-[#1])-[#1]", 467 | "hzone_furan_E(1)": "[#8](-[#1])-[#6](=[#8])-c:1:c:c:c:c(:c:1)-[#6]:[!#1]:[#6]-[#6]=[#7]-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#8]", 468 | "het_6_pyridone_NH2(1)": "[#8](-[#1])-[#6]:1:[#6](:[#6]:[!#1]:[#6](:[#7]:1)-[#7](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](=[#8])-[#8]", 469 | "imine_one_fives_D(1)": "[#6]-1(=[!#6&!#1])-[#6](-[#7]=[#6]-[#16]-1)=[#8]", 470 | "pyrrole_M(1)": "n2(-c:1:c:c:c:c:c:1)c(c(-[#1])c(c2-[#6]=[#7]-[#8]-[#1])-[#1])-[#1]", 471 | "pyrrole_N(1)": "n2(-[#6](-[#1])-c:1:c(:c(:c:c(:c:1-[#1])-[#1])-[#1])-[#1])c(c(-[#1])c(c2-[#6]-[#1])-[#1])-[#6]-[#1]", 472 | "pyrrole_O(1)": "n1(-[#6](-[#1])-[#1])c(c(-[#6](=[#8])-[#6])c(c1-[#6]:[#6])-[#6])-[#6](-[#1])-[#1]", 473 | "ene_cyano_G(1)": "n1(-[#6])c(c(-[#1])c(c1-[#6](-[#1])=[#6](-[#6]#[#7])-c:2:n:c:c:s:2)-[#1])-[#1]", 474 | "sulfonamide_J(1)": "n3(-c:1:c:c:c:c:c:1-[#7](-[#1])-[#16](=[#8])(=[#8])-c:2:c:c:c:s:2)c(c(-[#1])c(c3-[#1])-[#1])-[#1]", 475 | "misc_pyrrole_benz(1)": "n2(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#8]-[#6]:[#6])c(c(-[#1])c(c2-[#1])-[#1])-[#1]", 476 | "thio_urea_R(1)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-[#6](-[#1])=[#6](-[#1])-[#6]=[#8]", 477 | "ene_one_one_B(1)": "[#6]-1(-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6]-[#6](-[#1])(-[#1])-[#6]-1=[#8])=[#6](-[#7]-[#1])-[#6]=[#8]", 478 | "dhp_amino_CN_H(1)": "[#7](-[#1])(-[#1])-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#16]-[#6;X4]-[#16]-1", 479 | "het_66_anisole(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-c:2:c:c:n:c:3:c(:c:c:c(:c:2:3)-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1]", 480 | "thiazole_amine_N(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#7](-[#1])-c:2:n:c(:c:s:2)-c:3:c:c:c(:c:c:3)-[#8]-[#6](-[#1])-[#1]", 481 | "het_pyridiniums_C(1)": "[#6]~1~3~[#7](-[#6]:[#6])~[#6]~[#6]~[#6]~[#6]~1~[#6]~2~[#7]~[#6]~[#6]~[#6]~[#7+]~2~[#7]~3", 482 | "het_5_E(1)": "[#7]-3(-c:2:c:1:c:c:c:c:c:1:c:c:c:2)-[#7]=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6]-3=[#8]" 483 | } --------------------------------------------------------------------------------