├── demo
    ├── demo_1020.smi
    ├── subtructure_filter_demo.xls
    ├── rules.json
    └── phgdh_demo_vina.ini
├── docs
    └── platform.jpg
├── secse
    ├── growing
    │   ├── mutation
    │   │   ├── rules_demo.db
    │   │   ├── __init__.py
    │   │   └── mutation.py
    │   ├── __init__.py
    │   ├── filter_parallel.sh
    │   ├── filter.py
    │   └── pains_smarts.json
    ├── utilities
    │   ├── Structure Filter_20211015_v1.12.xls
    │   ├── __init__.py
    │   ├── load_rules.py
    │   ├── open_filter.py
    │   ├── function_helper.py
    │   ├── substructure_filter.py
    │   ├── autogridGen.sh
    │   ├── check_rules.py
    │   ├── selectByLE.py
    │   ├── wash_mol.py
    │   ├── excel2db.py
    │   └── ring_tool.py
    ├── __init__.py
    ├── report
    │   ├── __init__.py
    │   ├── filter_sdf_by_titles.pl
    │   └── grow_path.py
    ├── evaluate
    │   ├── __init__.py
    │   ├── ligprep_glide.sh
    │   ├── glide_docking.py
    │   ├── proprep.py
    │   ├── ligprep_unidock.sh
    │   ├── ligprep_autodock_gpu.sh
    │   ├── ligprep_vina_parallel.sh
    │   ├── docking.py
    │   └── ligprep.py
    ├── scoring
    │   ├── __init__.py
    │   ├── chemprop_pre.sh
    │   ├── sampling.py
    │   ├── diversity_score.py
    │   ├── docking_score_prediction.py
    │   └── ranking.py
    ├── run_secse.py
    └── grow_processes.py
├── requirements.txt
├── README.md
└── LICENSE.txt


/demo/demo_1020.smi:
--------------------------------------------------------------------------------
1 | c1ccccc1	f1
2 | c1ccncc1	f2
3 | c1cncnc1	f3


--------------------------------------------------------------------------------
/docs/platform.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/docs/platform.jpg


--------------------------------------------------------------------------------
/demo/subtructure_filter_demo.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/demo/subtructure_filter_demo.xls


--------------------------------------------------------------------------------
/secse/growing/mutation/rules_demo.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/secse/growing/mutation/rules_demo.db


--------------------------------------------------------------------------------
/secse/utilities/Structure Filter_20211015_v1.12.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KeenThera/SECSE/HEAD/secse/utilities/Structure Filter_20211015_v1.12.xls


--------------------------------------------------------------------------------
/secse/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/11/17/10:41
7 | """
8 | 


--------------------------------------------------------------------------------
/secse/report/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/8/17/11:38
7 | """
8 | 


--------------------------------------------------------------------------------
/secse/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/8/17/11:38
7 | """
8 | 


--------------------------------------------------------------------------------
/secse/growing/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/8/17/11:22
7 | """
8 | 


--------------------------------------------------------------------------------
/secse/scoring/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/8/17/11:23
7 | """
8 | 


--------------------------------------------------------------------------------
/secse/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/8/17/11:22
7 | """
8 | 


--------------------------------------------------------------------------------
/secse/growing/mutation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python  
2 | # -*- coding:utf-8 _*-
3 | """ 
4 | @author: Lu Chong
5 | @file: __init__.py
6 | @time: 2021/8/17/11:40
7 | """
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy~=1.20.3
 2 | pandas~=1.3.3
 3 | pandarallel~=1.5.2
 4 | SECSE~=0.1
 5 | tqdm~=4.62.2
 6 | biopandas~=0.2.9
 7 | openbabel~=3.1.1
 8 | rdkit~=2021.03.5
 9 | chemprop~=1.3.1
10 | xlrd~=2.0.1
11 | 


--------------------------------------------------------------------------------
/secse/utilities/load_rules.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: load_rules.py
 6 | @time: 2022/2/28/09:52
 7 | """
 8 | 
 9 | import sqlite3
10 | import pandas as pd
11 | from loguru import logger
12 | 
13 | 
14 | def json_to_DB(in_json, out_db_path):
15 |     df = pd.read_json(in_json)
16 |     conn = sqlite3.connect(out_db_path)
17 |     try:
18 |         df.to_sql("G-001", conn)
19 |     except Exception as e:
20 |         logger.error(e)
21 |     conn.close()
22 | 


--------------------------------------------------------------------------------
/demo/rules.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "Rule ID": "G-001-0020",
 4 |         "SMARTS": "[c,CR0;!H0:1]>>[*:1]C1OC(NC1)=O",
 5 |         "Priority": 3
 6 |     },
 7 |     {
 8 |         "Rule ID": "G-001-0028",
 9 |         "SMARTS": "[c,CR0,n,N,O,S;!H0:1]>>[*:1]c1ccccc1",
10 |         "Priority": 3
11 |     },
12 |     {
13 |         "Rule ID": "G-001-0063",
14 |         "SMARTS": "[c,CR0,n,N,O,S;!H0:1]>>[*:1]c1ocnc1",
15 |         "Priority": 3
16 |     },
17 |     {
18 |         "Rule ID": "G-001-0069",
19 |         "SMARTS": "[c,CR0,n,N,O,S;!H0:1]>>[*:1]c1n[nH]cc1",
20 |         "Priority": 3
21 |     }
22 | ]


--------------------------------------------------------------------------------
/secse/growing/filter_parallel.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # @author: Lu Chong
 3 | # @file: filter_parallel.sh
 4 | # @time: 2021/ 03/03/9:26
 5 | 
 6 | SECONDS=0
 7 | workdir=${1}
 8 | gen=${2}
 9 | config=${3}
10 | cpu_num=${4}
11 | script=$SECSE/growing/filter.py
12 | files=tmp.txt
13 | cd "${workdir}"/generation_split_by_seed || exit
14 | for i in *.csv; do
15 |   echo "$i;$workdir;$gen;$config"
16 | done >$files
17 | 
18 | mkdir -p ../filter_flag
19 | # filter default
20 | parallel --jobs "$cpu_num" -I {} -a ${files} -C ";" python "$script"
21 | rm $files
22 | cd "${workdir}"/filter_flag || exit
23 | for i in *.csv; do
24 |   echo "$i" | parallel grep PASS
25 | done >"${workdir}"/filter.csv
26 | cd "${workdir}" || exit
27 | #rm -r filter_flag/
28 | rm -r generation_split_by_seed/ mutation.csv mutation.raw generation.raw
29 | duration=$SECONDS
30 | echo "Filter runtime: $((duration / 60)) minutes $((duration % 60)) seconds."
31 | 


--------------------------------------------------------------------------------
/demo/phgdh_demo_vina.ini:
--------------------------------------------------------------------------------
 1 | [general]
 2 | project_code = PHG
 3 | workdir = /home/dachong/PHGDH/res/demo001/
 4 | fragments = /home/dachong/PHGDH/input/demo_1020.smi
 5 | num_gen = 5
 6 | num_per_gen = 200
 7 | seed_per_gen = 10
 8 | start_gen = 0
 9 | cpu = 320
10 | gpu = 0
11 | rule_db = 0
12 | 
13 | [docking]
14 | docking_program = Vina
15 | target = /home/dachong/PHGDH/input/PHGDH_6RJ3_for_vina.pdbqt
16 | x = 20.9
17 | y = -10.4
18 | z = 3.0
19 | box_size_x = 15
20 | box_size_y = 15
21 | box_size_z = 15
22 | rmsd = 2
23 | delta_score = -1.0
24 | score_cutoff = -9
25 | 
26 | [prediction]
27 | mode = 2
28 | dl_per_gen = 100
29 | dl_score_cutoff = -9
30 | 
31 | [properties]
32 | mw = 450
33 | logp_lower = 0.5
34 | logp_upper = 7
35 | chiral_center = 2
36 | heteroatom_ratio = 0.35
37 | rdkit_rotatable_bound_num = 5
38 | keen_rotatable_bound_num = 3
39 | rigid_body_num = 2
40 | hbd = 5
41 | hba = 10
42 | tpsa = 200
43 | lipinski_violation = 1
44 | qed = 0.5
45 | max_ring_size = 7
46 | max_ring_system_size = 3
47 | ring_system_count = 4
48 | bridged_site_count = 2
49 | spiro_site_count = 1
50 | fused_site_count = 3
51 | rdkit_sa_score = 5
52 | substructure_filter = 0


--------------------------------------------------------------------------------
/secse/utilities/open_filter.py:
--------------------------------------------------------------------------------
 1 | # we use logs for generated molecule filter
 2 | 
 3 | """
 4 | The user can define their own filter function as needed.
 5 | The input parameter of the function is an rdkit mol object,
 6 | and the return value is a boolean. If the molecule is needed, return true;
 7 | if it is not needed, return false.
 8 | The user can modify this Python script file according to their own requirements.
 9 | 
10 | The following code is just an example.
11 | LogS = 0.26 -  0.74 LogP - 0.0066 MW + 0.0034 RB - 0.42 AP
12 | ref :https://practicalcheminformatics.blogspot.com/2023/06/
13 | getting-real-with-molecular-property.html
14 | 
15 | """
16 | from rdkit import Chem
17 | from rdkit.Chem import Descriptors, Crippen, Lipinski
18 | from loguru import logger
19 | 
20 | 
21 | def user_filter(mol):
22 |     mw = Descriptors.MolWt(mol)
23 |     logp = Crippen.MolLogP(mol)
24 |     rotors = Lipinski.NumRotatableBonds(mol)
25 |     ap = len(mol.GetSubstructMatches(Chem.MolFromSmarts("a"))) / mol.GetNumAtoms()
26 |     intercept = 0.16
27 |     coef = {"logp": -0.63, "mw": -0.0062, "rotors": 0.066, "ap": -0.74}
28 |     esol = intercept + coef["logp"] * logp + coef["mw"] * mw + coef["rotors"] * rotors + coef["ap"] * ap
29 | 
30 |     if esol <= -4.5:
31 |         return True
32 |     else:
33 |         return False
34 | 


--------------------------------------------------------------------------------
/secse/evaluate/ligprep_glide.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | mols=${1}
 3 | workdir=${2}
 4 | target=${3}
 5 | generation=${4}
 6 | docking_precision=${5}
 7 | cpu_num=${6}
 8 | #docking_precision=SP
 9 | #docking_precision=XP
10 | #docking_precision=HTVS
11 | ligprep_in=ligprep_gen_$generation.inp
12 | glide_in=glide_gen_$generation.in
13 | glide_mae=ligprep_gen_$generation.maegz
14 | 
15 | cd "$workdir" || exit
16 | 
17 | # LigPreparation
18 | echo "Run ligprep ..."
19 | 
20 | cat >"$ligprep_in" <<EOF
21 | INPUT_FILE_NAME $mols
22 | OUT_MAE $glide_mae
23 | FORCE_FIELD 16
24 | IONIZATION  2
25 | PH  7.2
26 | PH_THRESHOLD  1.0
27 | EPIK  yes
28 | DETERMINE_CHIRALITIES no
29 | IGNORE_CHIRALITIES  no
30 | NUM_STEREOISOMERS 8
31 | EOF
32 | 
33 | "${SCHRODINGER}/ligprep" -inp $ligprep_in -HOST "localhost:$cpu_num" -TMPDIR "$workdir" -WAIT
34 | 
35 | # generate glide input file
36 | cat >"$glide_in" <<EOF
37 | FORCEFIELD  OPLS3e
38 | GRIDFILE  $target
39 | LIGANDFILE  $glide_mae
40 | POSTDOCK_NPOSE  3
41 | PRECISION $docking_precision
42 | POSE_OUTTYPE  ligandlib_sd
43 | COMPRESS_POSES  FALSE
44 | EOF
45 | 
46 | if [ "$generation" -le 1 ]; then
47 |   # add parameter for fragments docking, add constrains
48 |   cat >>"$glide_in" <<EOF
49 | EXPANDED_SAMPLING   True
50 | MAXKEEP   50000
51 | MAXREF   1200
52 | SCORING_CUTOFF   500.0
53 | EOF
54 | fi
55 | 
56 | echo "Run glide ..."
57 | 
58 | # Docking
59 | "${SCHRODINGER}/glide" "$glide_in" -OVERWRITE -adjust -HOST "localhost:$cpu_num" -TMPDIR "$workdir" -WAIT
60 | 
61 | echo "Finshing docking of genration $generation !"
62 | 


--------------------------------------------------------------------------------
/secse/evaluate/glide_docking.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: glide_docking.py
 6 | @time: 2021/11/19/10:05
 7 | """
 8 | import os
 9 | from loguru import logger
10 | from utilities.function_helper import shell_cmd_execute
11 | 
12 | GLIDE_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_glide.sh")
13 | 
14 | 
15 | def dock_by_glide(workdir, mols_smi, target, gen, dock_mode, cpu_num):
16 |     ligprep_glide = [GLIDE_SHELL, mols_smi, workdir, target, str(gen), dock_mode, str(cpu_num)]
17 |     shell_cmd_execute(ligprep_glide)
18 |     glide_out = os.path.join(workdir, "glide_gen_{}_lib.sdf".format(gen))
19 |     sdf_path = os.path.join(workdir, "docking_outputs_with_score.sdf")
20 |     write_score = False
21 |     pass_line = 0
22 |     with open(glide_out, "r") as glide:
23 |         with open(sdf_path, "w") as sdf:
24 |             for line in glide.readlines():
25 |                 if line.startswith("> <r_i_glide_gscore>"):
26 |                     # write docking score
27 |                     write_score = True
28 |                     continue
29 |                 elif write_score:
30 |                     score = line.strip()
31 |                     newline = "> <docking score>\n{}\n".format(score)
32 |                     write_score = False
33 |                 elif line.startswith("> <"):
34 |                     # drop other fields
35 |                     pass_line = 2
36 |                     continue
37 |                 elif pass_line > 0:
38 |                     pass_line -= 1
39 |                     continue
40 |                 else:
41 |                     newline = line
42 |                 sdf.write(newline)
43 | 


--------------------------------------------------------------------------------
/secse/utilities/function_helper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: function_helper.py
 6 | @time: 2022/10/13/16:36
 7 | """
 8 | import subprocess
 9 | from loguru import logger
10 | 
11 | 
12 | def shell_cmd_execute(cmd_lst, capture_mode="all"):
13 |     cmd = " ".join(cmd_lst)
14 |     logger.info(f"Executing command:\n{cmd}")
15 | 
16 |     try:
17 |         # Set subprocess options based on the capture_mode
18 |         if capture_mode == "all":
19 |             result = subprocess.run(
20 |                 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, shell=True, check=True
21 |             )
22 |             if len(result.stdout) > 0:
23 |                 logger.info("Command output:\n" + result.stdout)
24 |             return result.stdout
25 | 
26 |         elif capture_mode == "error":
27 |             result = subprocess.run(
28 |                 cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, shell=True, check=True
29 |             )
30 |             logger.error("Captured stderr:\n" + result.stderr)
31 |             return result.stderr
32 | 
33 |         elif capture_mode == 0:
34 |             subprocess.run(cmd, shell=True, check=True)
35 |             return None
36 | 
37 |         else:
38 |             raise ValueError("Invalid capture_mode. Use 'all', 'error', or 0.")
39 | 
40 |     except subprocess.CalledProcessError as e:
41 |         logger.error(f"Command failed with return code {e.returncode}.")
42 |         if capture_mode in {"all", "error"}:
43 |             logger.error("Captured error:\n" + e.output if e.output else "No error captured.")
44 |         raise Exception(f"Error executing command: {cmd}") from e
45 | 


--------------------------------------------------------------------------------
/secse/evaluate/proprep.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: proprep.py
 6 | @time: 2021/9/10/14:14
 7 | 
 8 | prepare the protein file (pdbqt format)
 9 | """
10 | import os
11 | import subprocess
12 | from loguru import logger
13 | from biopandas.pdb import PandasPdb
14 | 
15 | 
16 | def clean(code, chain=None):
17 |     ppdb = PandasPdb().fetch_pdb(code)
18 |     if chain is not None:
19 |         ppdb.df['ATOM'] = ppdb.df['ATOM'][ppdb.df['ATOM'].chain_id == chain]
20 |     name = code + "_clean.pdb"
21 |     ppdb.to_pdb(path=name,
22 |                 records=['ATOM', 'OTHERS'],
23 |                 gz=False,
24 |                 append_newline=True)
25 | 
26 |     ADFRsuit = r"C:\Program Files (x86)\ADFRsuite-1.0\bin"
27 |     prepare_ligand = "prepare_receptor.bat"
28 |     exe = os.path.join(ADFRsuit, prepare_ligand)
29 |     p = subprocess.Popen([exe, "-r", name,
30 |                           "-A", "hydrogens", '-w'], stdin=subprocess.PIPE,
31 |                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
32 |     (stdout_data, stderr_data) = p.communicate()
33 | 
34 | 
35 | def boxinfo(code, resn, extend=6):
36 |     ppdb = PandasPdb().fetch_pdb(code)
37 |     df_het = ppdb.df['HETATM'][ppdb.df['HETATM'].residue_name == resn]
38 |     x_center = df_het.x_coord.mean()
39 |     y_center = df_het.y_coord.mean()
40 |     z_center = df_het.z_coord.mean()
41 | 
42 |     x_max = df_het.x_coord.max() + extend
43 |     x_min = df_het.x_coord.min() - extend
44 |     y_max = df_het.y_coord.max() + extend
45 |     y_min = df_het.y_coord.min() - extend
46 |     z_max = df_het.z_coord.max() + extend
47 |     z_min = df_het.z_coord.min() - extend
48 | 
49 |     x_size = x_max - x_min
50 |     y_size = y_max - y_min
51 |     z_size = z_max - z_min
52 | 
53 |     return x_center, y_center, z_center, x_size, y_size, z_size
54 | 


--------------------------------------------------------------------------------
/secse/scoring/chemprop_pre.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # -*- coding:utf-8 _*-
 3 | # @author: Lu Chong
 4 | # @file: chemprop_pre.sh
 5 | # @time: 2021/10/27/16:32
 6 | workdir=${1}
 7 | train=${2}
 8 | pre=${3}
 9 | max_gen=${4}
10 | num_output=${5}
11 | seed=${6}
12 | model_dir=$workdir/prediction/models/
13 | files=tmp.txt
14 | 
15 | mkdir -p "$model_dir"
16 | 
17 | # all data
18 | model="$model_dir"/G"$max_gen"_seed"$seed"
19 | chemprop train --data-path "$train" --task-type regression --save-dir \
20 |   "$model" --data-seed "$seed" --show-individual-scores --split-type random -qq
21 | 
22 | # split files and prediction with CPU Parallelization
23 | split_dir=$workdir/prediction/pre_split_$max_gen
24 | mkdir -p "$split_dir"
25 | split -l 1000 -d "$pre" "$split_dir"/part --additional-suffix ".csv"
26 | 
27 | pre_dir="$workdir"/prediction/pre_dir_$max_gen
28 | mkdir -p "$pre_dir"
29 | cd "$split_dir" || exit
30 | # add header
31 | sed -i "1i\\id,smiles" part*.csv
32 | for i in *.csv; do
33 |   echo "$split_dir/$i;$pre_dir/$i"
34 | done >$files
35 | 
36 | # run chemprop_predict
37 | parallel -I {} -a ${files} -C ";" chemprop predict --test-path {1} --preds-path {2} --smiles-columns smiles --model-paths "$model"/model_0/best.pt --accelerator cpu -qq
38 | 
39 | # merge prediction
40 | cd "$workdir"/prediction || exit
41 | tail -n +2 -q "$pre_dir"/part*.csv >pre_G"$max_gen".csv
42 | 
43 | # fetch top predicted compounds
44 | sort -nk3 -t, pre_G"$max_gen".csv >pre_G"$max_gen"_sorted.csv
45 | echo "id,smiles,pred score" >pre_G"$max_gen".csv
46 | head -n "$num_output" pre_G"$max_gen"_sorted.csv >>pre_G"$max_gen".csv
47 | #rm ../pre_G"$max_gen"_sorted.csv
48 | 
49 | # write mols for next round of docking
50 | pre_docking_dir=$workdir/generation_"$max_gen"_pre
51 | mkdir -p "$pre_docking_dir"
52 | tail -n+2 pre_G"$max_gen".csv | awk -F, '{print $2"\t"$1}' >"$pre_docking_dir"/mols_for_docking_pred.smi
53 | 


--------------------------------------------------------------------------------
/secse/utilities/substructure_filter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: substructure_filter.py 
 6 | @time: 2021/02/08/14:13
 7 | """
 8 | import os
 9 | import pandas as pd
10 | from rdkit import Chem
11 | from loguru import logger
12 | 
13 | FILTER_FILE = os.path.join(os.getenv("SECSE"), "utilities", "Structure Filter_20211015_v1.12.xls")
14 | 
15 | 
16 | class StructureFilter:
17 |     def __init__(self, filter_lst=FILTER_FILE):
18 |         df = pd.read_excel(filter_lst, usecols=["Pattern", "ID", "Max"]).dropna()
19 |         df["ID"] = df["ID"].astype(str)
20 |         df = df.set_index("ID")
21 |         df["Pattern_sma"] = df["Pattern"].apply(lambda x: Chem.MolFromSmarts(x))
22 |         self.fdic = df[["Pattern_sma", "Max"]].T.to_dict()
23 | 
24 |     def sfilter(self, mol):
25 |         for k, v in self.fdic.items():
26 |             pattern = v["Pattern_sma"]
27 |             if int(v["Max"]) == 0:
28 |                 if mol.HasSubstructMatch(pattern):
29 |                     return k
30 |             else:
31 |                 mts = mol.GetSubstructMatches(pattern)
32 |                 if len(mts) > int(v['Max']):
33 |                     return k
34 |         return "PASS"
35 | 
36 |     def sfilter_all(self, mol):
37 |         res = []
38 |         for k, v in self.fdic.items():
39 |             pattern = v["Pattern_sma"]
40 |             if int(v["Max"]) == 0:
41 |                 if mol.HasSubstructMatch(pattern):
42 |                     res.append(k)
43 |             else:
44 |                 mts = mol.GetSubstructMatches(pattern)
45 |                 if len(mts) > int(v['Max']):
46 |                     res.append(k)
47 |         if len(res) == 0:
48 |             return "PASS"
49 |         else:
50 |             return res
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     sf = StructureFilter()
55 |     tmol = Chem.MolFromSmiles("CC(Cc1ncccn1)(c2ncccc2)C")
56 |     logger.info(sf.sfilter(tmol))
57 | 


--------------------------------------------------------------------------------
/secse/evaluate/ligprep_unidock.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # -*- coding:utf-8 _*-
 3 | # @author: Yannan Yuan
 4 | # @file: ligprep_unidock.sh
 5 | # @time: 2024/3/11/17:00
 6 | 
 7 | SECONDS=0
 8 | workdir=${1}
 9 | smi=${2}
10 | receptor=${3}
11 | x=${4}
12 | y=${5}
13 | z=${6}
14 | box_size_x=${7}
15 | box_size_y=${8}
16 | box_size_z=${9}
17 | cpu_num=${10}
18 | script=$SECSE/evaluate/ligprep.py
19 | split_dir=$workdir/docking_split
20 | docking_dir=$workdir/docking_poses
21 | lig_dir=$workdir/ligands_for_docking
22 | pdb_dir=$workdir/pdb_files
23 | sdf_dir=$workdir/sdf_files
24 | conf=$workdir/vina_config.txt
25 | cd "$workdir" || exit
26 | create_clean_directory() {
27 |   dir_name=$1
28 |   if [ -d "$dir_name" ]; then
29 |     echo "Directory $dir_name already exists, removing $dir_name ..."
30 |     rm -rf "$dir_name"
31 |   fi
32 |   if mkdir "$dir_name"; then
33 |     return 0
34 |   else
35 |     echo "Creating directory failed: $dir_name"
36 |     return 1
37 |   fi
38 | }
39 | for dir in "$split_dir" "$docking_dir" "$lig_dir" "$pdb_dir" "$sdf_dir"; do
40 |   create_clean_directory "$dir"
41 | done
42 | # split by line
43 | split -l 100 -d "$smi" "$split_dir"/part --additional-suffix ".smi"
44 | 
45 | # run ligprep
46 | cd "$split_dir" || exit
47 | find . -name "*smi" | parallel --jobs "$cpu_num" python "$script" "$workdir"
48 | 
49 | # run unidock
50 | files=ligand_index.txt
51 | cd "$lig_dir" || exit
52 | for i in *pdbqt; do
53 |   echo "$lig_dir/$i"
54 | done >$files
55 | 
56 | $UNIDOCK --receptor $receptor --ligand_index $files --dir $docking_dir \
57 |     --center_x $x --center_y $y --center_z $z \
58 |     --size_x $box_size_x --size_y $box_size_y --size_z $box_size_z \
59 |     --exhaustiveness 128 --max_step 20 --refine_step 3 \
60 |     --num_modes 3 --energy_range 3 --verbosity 2 >/dev/null
61 | rm $files
62 | 
63 | find "$docking_dir" -name "*pdbqt" | parallel --jobs "$cpu_num" obabel -ipdbqt {} -O "$pdb_dir"/{/.}-dp.pdb -m &>/dev/null
64 | 
65 | duration=$SECONDS
66 | echo "Docking runtime: $((duration / 60)) minutes $((duration % 60)) seconds."
67 | 


--------------------------------------------------------------------------------
/secse/evaluate/ligprep_autodock_gpu.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # -*- coding:utf-8 _*-
 3 | # @author: Lu Chong
 4 | # @file: ligprep_autodock_gpu.sh
 5 | # @time: 2022/2/16/15:25
 6 | 
 7 | SECONDS=0
 8 | workdir=${1}
 9 | smi=${2}
10 | receptor=${3}
11 | cpu_num=${4}
12 | gpu_num=${5}
13 | 
14 | files=tmp.txt
15 | script=$SECSE/evaluate/ligprep.py
16 | split_dir=$workdir/docking_split
17 | docking_dir=$workdir/docking_poses
18 | lig_dir=$workdir/ligands_for_docking
19 | pdb_dir=$workdir/pdb_files
20 | sdf_dir=$workdir/sdf_files
21 | 
22 | cd "$workdir" || exit
23 | 
24 | create_clean_directory() {
25 |   dir_name=$1
26 |   if [ -d "$dir_name" ]; then
27 |     echo "Directory $dir_name already exists, removing $dir_name ..."
28 |     rm -rf "$dir_name"
29 |   fi
30 |   if mkdir "$dir_name"; then
31 |     return 0
32 |   else
33 |     echo "Creating directory failed: $dir_name"
34 |     return 1
35 |   fi
36 | }
37 | for dir in "$split_dir" "$docking_dir" "$lig_dir" "$pdb_dir" "$sdf_dir"; do
38 |   create_clean_directory "$dir"
39 | done
40 | 
41 | # split by line
42 | split -l 100 -d "$smi" "$split_dir"/part --additional-suffix ".smi"
43 | 
44 | # run ligprep
45 | cd "$split_dir" || exit
46 | find . -name "*smi" | parallel --jobs "$cpu_num" python "$script" "$workdir"
47 | 
48 | # run autdock gpu
49 | cd "$lig_dir" || exit
50 | for i in *pdbqt; do
51 |   echo "$lig_dir/$i;$docking_dir/${i%.*}"
52 | done >$files
53 | 
54 | parallel --jobs "$gpu_num" -I {} -a ${files} -C ";" "$AUTODOCK_GPU/bin/autodock_gpu_128wi" --ffile "$receptor" --lfile {1} --resnam {2} --seed 12345 -D '$(({%}))' -x 0 -n 3 # >/dev/null
55 | #rm $files
56 | 
57 | # covert dlg file to pdb
58 | cd "$docking_dir" || exit
59 | find . -name "*.dlg" | parallel "grep '^DOCKED' {} >{.}.tmp"
60 | find . -name "*.tmp" | parallel "cut -c9- {} >{.}.pdbqt"
61 | rm ./*.tmp
62 | 
63 | sed -e "s/USER    Estimated Free Energy of Binding    =/REMARK/g" -i *pdbqt
64 | find "$docking_dir" -name "*pdbqt" | parallel --jobs "$cpu_num" obabel -ipdbqt {} -O "$pdb_dir"/{/.}-dp.pdb -m &>/dev/null
65 | 
66 | duration=$SECONDS
67 | echo "Docking runtime: $((duration / 60)) minutes $((duration % 60)) seconds."
68 | 


--------------------------------------------------------------------------------
/secse/report/filter_sdf_by_titles.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # -*- coding:utf-8 _*-
 3 | # @author: Lu Chong
 4 | # @file: filter_sdf_by_titles.pl
 5 | # @time: 2023/06/15/17:58
 6 | 
 7 | use strict;
 8 | use warnings;
 9 | 
10 | # Check command line arguments
11 | my ($input, $title_file, $output) = @ARGV;
12 | die "Usage: perl filter_sdf_by_titles.pl <input> <title_file> <output>\n" unless defined $input;
13 | 
14 | # Open output file
15 | open(my $out, ">$output") or die "Cannot open $output for writing: $!\n";
16 | 
17 | # Read titles
18 | my %titles;
19 | if ($title_file) {
20 |     open(my $fh, "<", $title_file) or die "Cannot open $title_file for reading: $!\n";
21 |     while (my $title = <$fh>) {
22 |         chomp($title);
23 |         $title =~ s/^\s+|\s+$//g;
24 |         $titles{$title} = 1;
25 |     }
26 |     close($fh);
27 | }
28 | 
29 | open(my $in, "<", $input) or die "Cannot open $input for reading: $!\n";
30 | 
31 | # Process input file
32 | local $/ = '$$$$'; # Set the input record separator
33 | 
34 | my $numstructs = 0;
35 | my @title_indices;
36 | my $buffer;
37 | my $index = 0;
38 | my $first_structure = 1;
39 | 
40 | while ($buffer = <$in>) {
41 |     $numstructs++;
42 | 
43 |     # Get the title of the current structure
44 |     my $title = get_title($buffer);
45 | 
46 |     if (exists $titles{$title}) {
47 |         $index++;
48 |         if (!$title_indices[$index]) {
49 |             $title_indices[$index] = $numstructs;
50 |         } else {
51 |             $title_indices[$index] .= ",$numstructs";
52 |         }
53 |         # if the first structure starts with a newline, then strip the newline
54 |         if ($index==$first_structure && $buffer =~ /^\n/) {
55 |             $buffer =~ s/^\n//;
56 |             $first_structure = 0;
57 |         }
58 |         print $out $buffer;
59 |     }
60 | }
61 | 
62 | # Add newline at the end of the output file
63 | print $out "\n";
64 | 
65 | close($in);
66 | close($out);
67 | 
68 | # Extract the first line as the CT title
69 | sub get_title {
70 |     my ($ct) = @_;
71 |     $ct =~ s/^\s+//;
72 |     my ($title) = $ct =~ /^(.+)$/m;
73 |     return $title || '';
74 | }
75 | 


--------------------------------------------------------------------------------
/secse/evaluate/ligprep_vina_parallel.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # -*- coding:utf-8 _*-
 3 | # @author: Lu Chong
 4 | # @file: ligprep_vina_parallel.sh
 5 | # @time: 2021/9/8/09:52
 6 | 
 7 | SECONDS=0
 8 | workdir=${1}
 9 | smi=${2}
10 | receptor=${3}
11 | x=${4}
12 | y=${5}
13 | z=${6}
14 | box_size_x=${7}
15 | box_size_y=${8}
16 | box_size_z=${9}
17 | cpu_num=${10}
18 | files=tmp.txt
19 | script=$SECSE/evaluate/ligprep.py
20 | split_dir=$workdir/docking_split
21 | docking_dir=$workdir/docking_poses
22 | lig_dir=$workdir/ligands_for_docking
23 | pdb_dir=$workdir/pdb_files
24 | sdf_dir=$workdir/sdf_files
25 | conf=$workdir/vina_config.txt
26 | cd "$workdir" || exit
27 | create_clean_directory() {
28 |   dir_name=$1
29 |   if [ -d "$dir_name" ]; then
30 |     echo "Directory $dir_name already exists, removing $dir_name ..."
31 |     rm -rf "$dir_name"
32 |   fi
33 |   if mkdir "$dir_name"; then
34 |     return 0
35 |   else
36 |     echo "Creating directory failed: $dir_name"
37 |     return 1
38 |   fi
39 | }
40 | for dir in "$split_dir" "$docking_dir" "$lig_dir" "$pdb_dir" "$sdf_dir"; do
41 |   create_clean_directory "$dir"
42 | done
43 | # split by line
44 | split -l 100 -d "$smi" "$split_dir"/part --additional-suffix ".smi"
45 | 
46 | # run ligprep
47 | cd "$split_dir" || exit
48 | find . -name "*smi" | parallel --jobs "$cpu_num" python "$script" "$workdir"
49 | 
50 | # write vina config file
51 | cat >"$conf" <<EOF
52 | receptor = $receptor
53 | center_x =  $x
54 | center_y =  $y
55 | center_z =  $z
56 | 
57 | size_x = $box_size_x
58 | size_y = $box_size_y
59 | size_z = $box_size_z
60 | 
61 | seed = 12345
62 | cpu = 1
63 | num_modes = 3
64 | energy_range = 3
65 | exhaustiveness = 16
66 | verbosity = 0
67 | EOF
68 | 
69 | # run vina
70 | cd "$lig_dir" || exit
71 | for i in *pdbqt; do
72 |   echo "$lig_dir/$i;$docking_dir/$i"
73 | done >$files
74 | 
75 | # ignore Vina stdout
76 | parallel --jobs "$cpu_num" -I {} -a ${files} -C ";" "$VINA" --config "$conf" --ligand {1} --out {2} >/dev/null
77 | rm $files
78 | 
79 | find "$docking_dir" -name "*pdbqt" | parallel --jobs "$cpu_num" obabel -ipdbqt {} -O "$pdb_dir"/{/.}-dp.pdb -m &>/dev/null
80 | 
81 | duration=$SECONDS
82 | echo "Docking runtime: $((duration / 60)) minutes $((duration % 60)) seconds."
83 | 


--------------------------------------------------------------------------------
/secse/utilities/autogridGen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Zhenting Gao
 4 | # Command
 5 | # - autogridGen.sh pro.pdbqt grid.gpf
 6 | # Update
 7 | # - 2023/5/16
 8 | #  - This script is created for AutoDock Grid generation
 9 | 
10 | # Parameters
11 | pdbqtFile=$1
12 | gridInputFile=$2 #gpf file
13 | # Please download autogrid4 from https://autodock.scripps.edu/download-autodock4/
14 | autogrid='/tools/docking/autodock/cpu/autogrid4'
15 | 
16 | if [ ! -f ${autogrid} ]; then
17 |     echo ${autogrid}" is needed but does not exist!"
18 |     echo "
19 |     - Please download autogrid4 from https://autodock.scripps.edu/download-autodock4/
20 |     - Modify this script at line 14 to set the correct path of autogrid4
21 |     "
22 |     exit
23 | fi
24 | 
25 | if [ ! $# -eq 2 ]; then #Test input parameter
26 |     echo 'autogridGen.sh protein.pdbqt gpfFile'
27 |     echo
28 |     echo "grid.gpf.example is created for your reference."
29 |     echo "
30 | npts 70 70 70
31 | spacing 0.375
32 | gridcenter    17.510   29.510   32.520
33 | " > grid.gpf.example
34 |     cat grid.gpf.example
35 |     exit
36 | fi
37 | npts=$(grep npts ${gridInputFile})
38 | spacing=$(grep spacing ${gridInputFile})
39 | gridcenter=$(grep gridcenter ${gridInputFile})
40 | echo $pdbqtFile
41 | prefix=$(basename ${pdbqtFile} | sed -e 's/.pdbqt$//')
42 | gpfPrefix=$(basename ${gridInputFile} | sed -e 's/.gpf$//')
43 | cat >${gpfPrefix}_production.gpf <<EOF
44 | ${npts}
45 | gridfld ${prefix}.maps.fld # grid_data_file
46 | ${spacing}
47 | receptor_types A C HD N NA OA SA        # receptor atom types
48 | ligand_types A Br C Cl F HD N NA OA S SA # ligand atom types
49 | receptor ${pdbqtFile}   # macromolecule
50 | ${gridcenter}
51 | smooth 0.5                           # store minimum energy w/in rad(A)
52 | map ${prefix}.A.map
53 | map ${prefix}.Br.map
54 | map ${prefix}.C.map
55 | map ${prefix}.Cl.map
56 | map ${prefix}.F.map
57 | map ${prefix}.HD.map
58 | map ${prefix}.N.map
59 | map ${prefix}.NA.map
60 | map ${prefix}.OA.map
61 | map ${prefix}.S.map
62 | map ${prefix}.SA.map
63 | elecmap ${prefix}.e.map    # electrostatic potential map
64 | dsolvmap ${prefix}.d.map              # desolvation potential map
65 | dielectric -0.1465                   # <0, AD4 distance-dep.diel;>0, constant
66 | EOF
67 | 
68 | ${autogrid} -p ${gpfPrefix}_production.gpf -l ${gpfPrefix}_production.glg
69 | 


--------------------------------------------------------------------------------
/secse/scoring/sampling.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: sampling.py
 6 | @time: 2022/2/8/10:25
 7 | """
 8 | import os
 9 | import pandas as pd
10 | from loguru import logger
11 | 
12 | from scoring.diversity_score import cal_morgan_fp, tanimoto_smi
13 | 
14 | 
15 | def sample_by_rule_weight(gen, filter_df, workdir_now):
16 |     if "G-002" in list(filter_df["type"]):
17 |         # control ratio of G-002 mutation
18 |         spacer_df = filter_df[filter_df["type"] == "G-002"]
19 | 
20 |         common_df = filter_df.drop(spacer_df.index, axis=0)
21 |         # control ratio of ring with spacer based on different stage
22 |         if gen <= 3:
23 |             spacer_ratio = 0.3
24 |         elif gen <= 7:
25 |             spacer_ratio = 0.1
26 |         else:
27 |             spacer_ratio = 0.01
28 |         sample_size = min(filter_df.shape[0], 500000)
29 | 
30 |         spacer_df = spacer_df.sample(min(int(sample_size * spacer_ratio), spacer_df.shape[0]),
31 |                                      replace=False,
32 |                                      weights="priority_gen_" + str(gen))
33 | 
34 |         common_df = common_df.sample(min(int(sample_size * (1 - spacer_ratio)), common_df.shape[0]),
35 |                                      replace=False,
36 |                                      weights="priority_gen_" + str(gen))
37 |         sampled_df = pd.concat([spacer_df, common_df], axis=0)
38 |         sampled_df.to_csv(os.path.join(workdir_now, "sampled.csv"), index=False)
39 |     else:
40 |         logger.error("No cmpds generated from ring with spacer in the generation!")
41 |         sampled_df = filter_df.sample(min(filter_df.shape[0], 500000), replace=False,
42 |                                       weights="priority_gen_" + str(gen))
43 |         sampled_df.to_csv(os.path.join(workdir_now, "sampled.csv"), index=False)
44 | 
45 |     return sampled_df
46 | 
47 | 
48 | def sample_by_similarity(gen, filter_df, workdir_now, num_per_gen,
49 |                          ref_smi="O=C(C1=CC=C(C(C)NC(C2=CC(C3=CC=CC=C3)=NN2C)=O)C=C1)O"):
50 |     ref_fp = cal_morgan_fp(ref_smi)
51 |     filter_df["similarity"] = filter_df["smiles_gen_" + str(gen)].apply(
52 |         lambda x: tanimoto_smi(cal_morgan_fp(x), ref_fp))
53 |     sampled_df = filter_df.nlargest(num_per_gen, columns="similarity")
54 |     sampled_df.to_csv(os.path.join(workdir_now, "sampled.csv"), index=False)
55 |     return sampled_df
56 | 


--------------------------------------------------------------------------------
/secse/scoring/diversity_score.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python  
 2 | # -*- coding:utf-8 _*-
 3 | """ 
 4 | @author: Lu Chong
 5 | @file: diversity_score.py 
 6 | @time: 2020/11/18/9:47
 7 | """
 8 | import math
 9 | import numpy as np
10 | import pandas as pd
11 | import rdkit
12 | from rdkit.Chem import AllChem, rdFMCS, rdShapeHelpers
13 | from rdkit import Chem
14 | from pandarallel import pandarallel
15 | from loguru import logger
16 | 
17 | def cal_morgan_fp(smi):
18 |     mol = Chem.MolFromSmiles(smi)
19 |     if not mol:
20 |         mol = Chem.MolFromSmiles("C")
21 |     fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 512)
22 |     return fp
23 | 
24 | 
25 | def tanimoto_smi(fp1, fp2):
26 |     return rdkit.DataStructs.cDataStructs.TanimotoSimilarity(fp1, fp2)
27 | 
28 | 
29 | def tanimoto_shape(ref, mol):
30 |     return 1 - rdShapeHelpers.ShapeTanimotoDist(ref, mol)
31 | 
32 | 
33 | def protrude_shape(ref, mol):
34 |     return 1 - rdShapeHelpers.ShapeProtrudeDist(ref, mol)
35 | 
36 | 
37 | def clustering(df: pd.DataFrame, smi, gen, cpu_num, k=500):
38 |     df = df.reset_index(drop=True)
39 |     pandarallel.initialize(verbose=0, nb_workers=cpu_num)
40 |     df["fp2"] = df[smi].parallel_apply(cal_morgan_fp)
41 |     df = df.dropna(subset=["fp2"])
42 |     c = df["fp2"].sample(1)
43 |     c_next = c.index[0]
44 |     c = c.iloc[0]
45 |     c_lst = []
46 |     dis = np.zeros(df.shape[0])
47 |     dis_dic = dict()
48 |     for i in range(k):
49 |         new_dis = np.array(df["fp2"].apply(lambda x: tanimoto_smi(c, x)))
50 |         dis_dic[c_next] = new_dis.copy()
51 |         # mask mols with similarity larger than 0.6, those mols with not be consider as cluster center in next loops
52 |         new_dis[new_dis >= 0.6] = 999999999
53 |         dis += new_dis
54 |         if np.min(dis) >= 999999999:
55 |             break
56 |         else:
57 |             c_next = np.argmin(dis)
58 |             c = df["fp2"].iloc[c_next]
59 |             c_lst.append(c_next)
60 | 
61 |     df_cluster = pd.DataFrame(dis_dic)
62 |     df["cluster_center_gen_" + str(gen)] = df_cluster.parallel_apply(lambda x: x.nlargest(1).index[0], axis=1)
63 |     df["cluster_center_dis_gen_" + str(gen)] = df_cluster.parallel_apply(lambda x: x.nlargest(1).iloc[0], axis=1)
64 |     df = df.drop(columns="fp2")
65 |     return df
66 | 
67 | 
68 | def cal_rmsd(parent, c):
69 |     mcs = rdFMCS.FindMCS([parent, c], threshold=1, completeRingsOnly=True, ringMatchesRingOnly=True,
70 |                          bondCompare=rdFMCS.BondCompare.CompareOrderExact,
71 | 
72 |                          timeout=1).queryMol
73 |     if mcs is None:  # no common substructure
74 |         return -2
75 |     p_match = parent.GetSubstructMatch(mcs)
76 |     c_match = c.GetSubstructMatch(mcs)
77 | 
78 |     delta2 = 0.0
79 |     for pi, ci in zip(p_match, c_match):
80 |         d = (parent.GetConformer().GetAtomPosition(pi) - c.GetConformer().GetAtomPosition(ci)).LengthSq()
81 |         delta2 += d
82 |     return math.sqrt(delta2 / len(p_match))
83 | 


--------------------------------------------------------------------------------
/secse/utilities/check_rules.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import re
 3 | import csv
 4 | import os
 5 | import sys
 6 | 
 7 | def quote_ident(name: str) -> str:
 8 |     """用 SQLite 规则安全转义标识符（表名/列名）。"""
 9 |     return '"' + name.replace('"', '""') + '"'
10 | 
11 | def check_smarts(db_file: str, out_file: str):
12 |     conn = sqlite3.connect(db_file)
13 |     cur = conn.cursor()
14 | 
15 |     with open(out_file, "w", newline="", encoding="utf-8") as f:
16 |         writer = csv.writer(f)
17 |         # 写表头
18 |         writer.writerow(["Table", "Rule ID", "SMARTS", "Left Tags", "Right Tags", "Only Left", "Only Right"])
19 | 
20 |         # 获取所有表和视图
21 |         cur.execute("SELECT name FROM sqlite_master WHERE type IN ('table','view');")
22 |         tables = [r[0] for r in cur.fetchall()]
23 | 
24 |         pattern = re.compile(r":(\d+)")  # 提取 :数字
25 | 
26 |         for t in tables:
27 |             qt = quote_ident(t)
28 |             try:
29 |                 # 获取表结构
30 |                 cur.execute(f"PRAGMA table_info({qt});")
31 |                 cols = [row[1] for row in cur.fetchall()]
32 |                 lower_cols = [c.lower() for c in cols]
33 | 
34 |                 # 找 smarts 列
35 |                 if "smarts" not in lower_cols:
36 |                     continue
37 |                 smarts_col = cols[lower_cols.index("smarts")]
38 |                 qsmarts = quote_ident(smarts_col)
39 | 
40 |                 # 找 "Rule ID" 列（大小写不敏感）
41 |                 id_col = None
42 |                 for c in cols:
43 |                     if c.lower().replace(" ", "") in ["ruleid", "rule_id"]:
44 |                         id_col = c
45 |                         break
46 |                 if not id_col:
47 |                     continue
48 |                 qid = quote_ident(id_col)
49 | 
50 |                 # 查询包含 >> 的行
51 |                 cur.execute(f"SELECT {qid}, {qsmarts} FROM {qt} WHERE {qsmarts} LIKE '%>>%';")
52 |                 for rid, smarts in cur.fetchall():
53 |                     if not smarts or ">>" not in smarts:
54 |                         continue
55 |                     left, right = smarts.split(">>", 1)
56 | 
57 |                     left_tags = set(pattern.findall(left))
58 |                     right_tags = set(pattern.findall(right))
59 | 
60 |                     # 左右标签集合不一致
61 |                     if left_tags != right_tags:
62 |                         only_left = sorted(left_tags - right_tags, key=int)
63 |                         only_right = sorted(right_tags - left_tags, key=int)
64 |                         writer.writerow([
65 |                             t,
66 |                             rid,
67 |                             smarts,
68 |                             " ".join(sorted(left_tags, key=int)),
69 |                             " ".join(sorted(right_tags, key=int)),
70 |                             " ".join(only_left),
71 |                             " ".join(only_right)
72 |                         ])
73 | 
74 |             except Exception:
75 |                 continue
76 | 
77 |     conn.close()
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     if len(sys.argv) != 3:
82 |         print("用法: python check_rules.py <input.db> <output.csv>")
83 |         sys.exit(1)
84 | 
85 |     db_file = sys.argv[1]
86 |     out_file = sys.argv[2]
87 | 
88 |     if not os.path.exists(db_file):
89 |         print(f"错误: 输入数据库文件不存在 -> {db_file}")
90 |         sys.exit(1)
91 | 
92 |     try:
93 |         check_smarts(db_file, out_file)
94 |         print(f"✅ 检查完成，结果已保存到 {out_file}")
95 |     except Exception as e:
96 |         print(f"❌ 处理失败: {e}")
97 |         sys.exit(1)
98 | 
99 | 


--------------------------------------------------------------------------------
/secse/utilities/selectByLE.py:
--------------------------------------------------------------------------------
  1 | #!/tools/miniconda3/envs/cadd/bin/python
  2 | '''
  3 | This script will calculate ligand efficiency of a user specified property in a SDF file, bin the list by the property, select the rows with highest LE and save into a new SDF file
  4 | ## Author: zhentgpicasa@gmail.com
  5 | ## Revision History:
  6 | - 2024/3/17
  7 |   - Fist version
  8 | '''
  9 | 
 10 | import argparse
 11 | import os.path
 12 | import pandas as pd
 13 | from rdkit.Chem import rdMolDescriptors
 14 | from rdkit.Chem import AllChem
 15 | import os
 16 | import time
 17 | import pandas as pd
 18 | from rdkit.Chem import PandasTools
 19 | import numpy as np
 20 | from loguru import logger
 21 | 
 22 | startTime = time.time()
 23 | '''
 24 | if RDkit is installed as an virtual environment other than 'base' environment, within Jupyter some paths are not included, and thus rdkit will not be imported, so the missing path need to be added manually
 25 | Zhenting has tracked this bug on 2/21/2020.
 26 | '''
 27 | pythonPath = os.__file__.split("lib")[0]
 28 | os.environ['PATH'] = os.environ[
 29 |                          'PATH'] + os.pathsep + pythonPath + r'Library\bin' + os.pathsep
 30 | 
 31 | # import click
 32 | 
 33 | parser = argparse.ArgumentParser(
 34 |     description='Calculate ligand efficiency of a user specified property in a SDF file, bin the list by the property, select the rows with highest LE and save into a new SDF file')
 35 | parser.add_argument('-i', required=True, help='SDF input file')
 36 | parser.add_argument('-o', required=True, help='SDF output file')
 37 | parser.add_argument('-p', required=False,
 38 |                     help='Property for ligand efficiency calculation', default='docking score')
 39 | parser.add_argument('-d', default='ID', required=False,
 40 |                     help='Molecule ID column name')
 41 | parser.add_argument('-b', type=int, default=100,
 42 |                     required=False, help='Bin count')
 43 | args = parser.parse_args()
 44 | 
 45 | prop4LE = args.p
 46 | idCol = args.d
 47 | sdfFile = args.i
 48 | outputSdfFile = args.o
 49 | binCount = args.b
 50 | 
 51 | 
 52 | def workflow():
 53 |     # Read the SDF file into a DataFrame
 54 |     df = PandasTools.LoadSDF(sdfFile, removeHs=False)
 55 |     if idCol not in df.columns:
 56 |         logger.info('Molecule ID column name is not detected', df.columns)
 57 |         quit()
 58 |     if prop4LE not in df.columns:
 59 |         logger.info('Column name of the property for ligand efficiency calculation is not detected', df.columns)
 60 |         quit()
 61 |     try:  # Set data type to float
 62 |         df[prop4LE] = df[prop4LE].astype(float)
 63 |     except:
 64 |         '''Do nothing'''
 65 | 
 66 |     # Calculate the heavy atom count for each molecule
 67 |     df['HeavyAtomCount'] = df['ROMol'].apply(lambda x: x.GetNumHeavyAtoms())
 68 |     df['LE'] = df[prop4LE] / df['HeavyAtomCount']
 69 | 
 70 |     # Sort by prop4LE and remove duplicated rows
 71 |     df.sort_values([prop4LE], inplace=True, ascending=[True])
 72 |     df.drop_duplicates([idCol], inplace=True)
 73 | 
 74 |     # Calculate the range and step size
 75 |     min_value = df[prop4LE].min()
 76 |     max_value = df[prop4LE].max()
 77 |     range_of_values = max_value - min_value
 78 |     step_size = range_of_values / binCount
 79 | 
 80 |     # Create an array of bins
 81 |     bins = list(np.arange(min_value, max_value + step_size, step_size))
 82 | 
 83 |     # Bin the data into intervals
 84 |     df['bin'] = pd.cut(df[prop4LE], bins=bins, right=False)
 85 | 
 86 |     # Sort by LE
 87 |     df.sort_values('LE', inplace=True)
 88 |     # Keep the row with minimum LE in each bin
 89 |     resultDf = df.drop_duplicates(['bin']).copy()
 90 |     # Sort the result dataframe by prop4LE
 91 |     resultDf.sort_values([prop4LE], inplace=True)
 92 |     # Write the output SDF file
 93 |     PandasTools.WriteSDF(resultDf, outputSdfFile,
 94 |                          molColName='ROMol', properties=list(resultDf))
 95 | 
 96 |     logger.info('The script took {:.2f} second!'.format(time.time() - startTime))
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     workflow()
101 | 


--------------------------------------------------------------------------------
/secse/utilities/wash_mol.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 _*-
  3 | """
  4 | @author: Lu Chong
  5 | @file: wash_mol.py
  6 | @time: 2021/02/08/14:13
  7 | """
  8 | 
  9 | import random
 10 | from openbabel import openbabel
 11 | from openbabel import pybel
 12 | from rdkit import Chem
 13 | from loguru import logger
 14 | 
 15 | 
 16 | def wash_mol(smi):
 17 |     ob_conversion = openbabel.OBConversion()
 18 |     ob_conversion.SetInAndOutFormats("smi", "can")
 19 |     ob_mol = openbabel.OBMol()
 20 |     ob_conversion.ReadString(ob_mol, smi)
 21 |     ob_conversion.Convert()
 22 |     res = ob_conversion.WriteString(ob_mol).strip()
 23 |     return res
 24 | 
 25 | 
 26 | def retreat_aromatic_nitrogen(smi):
 27 |     mol = Chem.MolFromSmiles(smi, sanitize=False)
 28 |     mol.UpdatePropertyCache()
 29 |     Chem.GetSymmSSSR(mol)
 30 |     ri = mol.GetRingInfo()
 31 |     aromatic_n_atoms = mol.GetSubstructMatches(Chem.MolFromSmarts('[nr5]'))
 32 |     res = set()
 33 |     for ring in ri.AtomRings():
 34 |         n_at_ring = set()
 35 |         for n_atom in aromatic_n_atoms:
 36 |             tmp = set(n_atom).intersection(set(ring))
 37 |             if tmp:
 38 |                 n_at_ring = n_at_ring.union(n_atom)
 39 |         if n_at_ring:
 40 |             res.add(random.choice(list(n_at_ring)))
 41 |     for index in res:
 42 |         atom = mol.GetAtomWithIdx(index)
 43 |         atom.SetNumExplicitHs(1)
 44 | 
 45 |     return Chem.MolToSmiles(mol)
 46 | 
 47 | 
 48 | def neutralize(smi):
 49 |     mol = Chem.MolFromSmiles(smi)
 50 |     if mol is None:
 51 |         smi = wash_mol(smi)
 52 |         mol = Chem.MolFromSmiles(smi)
 53 |         if mol is None:
 54 |             return "C"
 55 |     new_mol = neutralize_atoms(mol)
 56 |     return new_mol, Chem.MolToSmiles(new_mol)
 57 | 
 58 | 
 59 | def neutralize_atoms(mol):
 60 |     pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
 61 |     at_matches = mol.GetSubstructMatches(pattern)
 62 |     at_matches_list = [y[0] for y in at_matches]
 63 |     if len(at_matches_list) > 0:
 64 |         for at_idx in at_matches_list:
 65 |             atom = mol.GetAtomWithIdx(at_idx)
 66 |             chg = atom.GetFormalCharge()
 67 |             hcount = atom.GetTotalNumHs()
 68 |             atom.SetFormalCharge(0)
 69 |             atom.SetNumExplicitHs(hcount - chg)
 70 |             atom.UpdatePropertyCache()
 71 |     return mol
 72 | 
 73 | 
 74 | def charge_mol(smi):
 75 |     mol = pybel.readstring("smi", smi)
 76 |     mol.removeh()
 77 |     mol.OBMol.AddHydrogens(False, True, 7.4)
 78 |     # mol.OBMol.CorrectForPH(7.4)
 79 |     charged_smi = mol.write("can", None, overwrite=False).strip()
 80 |     return charged_smi
 81 | 
 82 | 
 83 | def radical_filter(smi):
 84 |     mol = Chem.MolFromSmiles(smi)
 85 |     for a in mol.GetAtoms():
 86 |         if a.GetNumRadicalElectrons() == 1:
 87 |             return False
 88 |     return True
 89 | 
 90 | 
 91 | def get_bridged_atoms(mol):
 92 |     ri = mol.GetRingInfo()
 93 |     bond_rings = ri.BondRings()
 94 |     bridged_atoms = set()
 95 | 
 96 |     for i in range(len(bond_rings)):
 97 |         bond_ring_i = set(bond_rings[i])
 98 |         for j in range(i):
 99 |             bond_ring_j = set(bond_rings[j])
100 |             common_bonds = bond_ring_i.intersection(bond_ring_j)
101 | 
102 |             if len(common_bonds) > 1:
103 |                 atoms = [0] * len(mol.GetAtoms())
104 |                 bridged_unit = ()
105 |                 for b in common_bonds:
106 |                     atoms[mol.GetBondWithIdx(b).GetBeginAtomIdx()] += 1
107 |                     atoms[mol.GetBondWithIdx(b).GetEndAtomIdx()] += 1
108 |                 for idx in range(len(atoms)):
109 |                     if atoms[idx] == 1:
110 |                         bridged_unit += (idx,)
111 |                 bridged_atoms.add(bridged_unit)
112 |     return bridged_atoms
113 | 
114 | 
115 | def get_keen_rotatable_bound_num(mol):
116 |     rb_smarts = Chem.MolFromSmarts(
117 |         '[C^3!D1;!$(C(F)(F)F)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F)]')
118 |     # sma = '[C^3!D1;!$(C(F)(F)F);!R;!$(C=O(N));!$(NC(=O));!$(C(=O)O);!$(C(=O)O)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F);!R;!$(C=O([N,O]));!$(NC(=O));!$(C(=O)O)]'
119 |     return len((mol.GetSubstructMatches(rb_smarts)))
120 | 
121 | 
122 | def get_rigid_body_num(mol):
123 |     pattern = "[C^3!D1;!$(C(F)(F)F);!R;!$(C=O(N));!$(NC(=O));!$(C(=O)O);!$(C(=O)O)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F);!R;!$(C=O([N,O]));!$(NC(=O));!$(C(=O)O)]"
124 |     rb = Chem.MolFromSmarts(pattern)
125 |     return len((mol.GetSubstructMatches(rb)))
126 | 


--------------------------------------------------------------------------------
/secse/utilities/excel2db.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import pandas as pd
  3 | import json
  4 | import re
  5 | import os
  6 | from rdkit import Chem
  7 | from rdkit.Chem import Descriptors, rdChemReactions
  8 | from rdkit.Chem.rdMolDescriptors import CalcExactMolWt, CalcFractionCSP3, CalcNumRings
  9 | from rdkit.Chem import FindMolChiralCenters
 10 | from loguru import logger
 11 | 
 12 | 
 13 | def read_excel(filename, sheet_name):
 14 |     """Read an Excel sheet into a DataFrame."""
 15 |     return pd.read_excel(filename, sheet_name=sheet_name)
 16 | 
 17 | 
 18 | def write_to_json(df, filename):
 19 |     """Write a DataFrame to a JSON file."""
 20 |     df.to_json(filename, orient='records', force_ascii=False, indent=4)
 21 | 
 22 | 
 23 | def write_to_sqlite(df, table, db_path):
 24 |     """Write a DataFrame to an SQLite database."""
 25 |     with sqlite3.connect(db_path) as conn:
 26 |         df.to_sql(table, conn, if_exists='replace', index=False)
 27 | 
 28 | 
 29 | def test_rxn(sma):
 30 |     """Test if a SMARTS string can be converted to an RDKit reaction."""
 31 |     try:
 32 |         rdChemReactions.ReactionFromSmarts(sma)
 33 |     except Exception as e:
 34 |         logger.error(f"Error processing SMARTS: {sma}\n{e}")
 35 | 
 36 | 
 37 | def add_prop(df, ref_smi):
 38 |     mol = Chem.MolFromSmiles(ref_smi)
 39 |     mol_weight_ref = CalcExactMolWt(mol)
 40 |     fsp3_ref = CalcFractionCSP3(mol)
 41 |     ring_num_ref = CalcNumRings(mol)
 42 |     logp_ref = Descriptors.MolLogP(mol)
 43 |     chiral_num_ref = len(FindMolChiralCenters(mol, includeUnassigned=True))
 44 | 
 45 |     for index, row in df.iterrows():
 46 |         sma = row['SMARTS']
 47 |         try:
 48 |             rxn = rdChemReactions.ReactionFromSmarts(sma)
 49 |             products = rxn.RunReactants((mol,))
 50 |             new = Chem.MolFromSmiles(Chem.MolToSmiles(products[0][0]))
 51 |             mol_weight = CalcExactMolWt(new)
 52 |             df.at[index, 'ΔMW'] = mol_weight - mol_weight_ref
 53 |             fsp3 = CalcFractionCSP3(new)
 54 |             df.at[index, 'ΔFsp3'] = fsp3 - fsp3_ref
 55 |             ring_num = CalcNumRings(new)
 56 |             df.at[index, 'ΔNR'] = ring_num - ring_num_ref
 57 |             logp = Descriptors.MolLogP(mol)
 58 |             df.at[index, 'ΔlogP'] = logp - logp_ref
 59 |             chiral_num = len(FindMolChiralCenters(new, includeUnassigned=True))
 60 |             df.at[index, 'ΔNCC'] = chiral_num - chiral_num_ref
 61 |         except Exception as e:
 62 |             logger.error(e)
 63 |             logger.error(sma)
 64 |     return df
 65 | 
 66 | 
 67 | def process_sheet(sheet_df, ref_smi):
 68 |     """Process a sheet DataFrame to calculate properties and test reactions."""
 69 |     for sma in sheet_df['SMARTS']:
 70 |         test_rxn(sma)
 71 |     add_prop(sheet_df, ref_smi)
 72 |     return sheet_df
 73 | 
 74 | 
 75 | def main(excel_filename, output_type):
 76 |     """Main function to convert Excel to DB or JSON based on user input."""
 77 |     pattern = r'^[A-Za-z]-\d{3}$'
 78 |     collect_df = []
 79 |     db_name = f"{os.path.splitext(excel_filename)[0]}.db" if output_type == 'db' else None
 80 | 
 81 |     xls = pd.ExcelFile(excel_filename)
 82 |     for sheet_name in xls.sheet_names:
 83 |         if re.match(pattern, sheet_name):
 84 |             logger.info(f"Processing sheet: {sheet_name}")
 85 |             sheet_df = read_excel(excel_filename, sheet_name)
 86 |             # sheet_df = process_sheet(sheet_df, ref_smi='YourReferenceSMILES')
 87 | 
 88 |             if output_type == 'db':
 89 |                 write_to_sqlite(sheet_df, sheet_name, db_name)
 90 |             else:
 91 |                 if sheet_name == "G-002":
 92 |                     new_df = sheet_df[['Rule ID', 'SMARTS', 'Spacer Priority', 'Ring Priority']]
 93 |                 else:
 94 |                     new_df = sheet_df[['Rule ID', 'SMARTS', 'Priority']]  # Adjust columns as needed
 95 |                 collect_df.append(new_df)
 96 |             logger.info(f"Finished processing {output_type} for {sheet_name}")
 97 | 
 98 |     if output_type == 'json':
 99 |         combined_df = pd.concat(collect_df, ignore_index=True)
100 |         output_filename = f"{os.path.splitext(excel_filename)[0]}.json"
101 |         write_to_json(combined_df, output_filename)
102 |         logger.info("All sheets processed and JSON file created.")
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     excel_filename = input("Enter the Excel file name: ")
107 |     output_type = input("Choose the output type (db for database, json for JSON file): ")
108 |     if output_type not in ['db', 'json']:
109 |         logger.error("Invalid output type. Please choose 'db' for database or 'json' for JSON file.")
110 |     else:
111 |         main(excel_filename, output_type)
112 | 


--------------------------------------------------------------------------------
/secse/run_secse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 _*-
  3 | """
  4 | @author: Lu Chong
  5 | @file: run_secse.py
  6 | @time: 2020/11/02/13:35
  7 | """
  8 | import argparse
  9 | import time
 10 | import configparser
 11 | from loguru import logger
 12 | from datetime import datetime
 13 | from pathlib import Path
 14 | 
 15 | from grow_processes import Grow
 16 | from report.grow_path import write_growth
 17 | 
 18 | 
 19 | def setup_logger(project_code, work_directory):
 20 |     # Ensure work_directory is a Path object for compatibility
 21 |     if not isinstance(work_directory, Path):
 22 |         work_directory = Path(work_directory)
 23 | 
 24 |     # timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 25 |     log_file_path = work_directory / f'{project_code}.log'
 26 |     error_file_path = work_directory / f'{project_code}_error.log'
 27 | 
 28 |     logger.add(log_file_path, rotation="10 MB", backtrace=True, diagnose=True, level="INFO", mode='a')
 29 |     logger.add(error_file_path, rotation="5 MB", backtrace=True, diagnose=True, level="ERROR", mode='a')
 30 |     return logger
 31 | 
 32 | 
 33 | def main():
 34 |     parser = argparse.ArgumentParser(description="SECSE")
 35 | 
 36 |     parser.add_argument("--config", help="path of config file", default=False)
 37 |     args = parser.parse_args()
 38 | 
 39 |     try:
 40 |         # config file given
 41 |         config = configparser.ConfigParser()
 42 |         config.read(args.config)
 43 |         project_code = config.get("general", "project_code")
 44 |         workdir = config.get("general", "workdir")
 45 | 
 46 |         setup_logger(project_code, workdir)
 47 | 
 48 |         num_gen = config.getint("general", "num_gen")
 49 |         mols_smi = config.get("general", "fragments")
 50 | 
 51 |         num_per_gen = config.getint("general", "num_per_gen")
 52 |         start_gen = config.getint("general", "start_gen")
 53 |         docking_program = config.get("docking", "docking_program")
 54 |         cpu_num = config.getint("general", "cpu")
 55 |         gpu_num = config.getint("general", "gpu")
 56 |         rule_db = config.get("general", "rule_db")
 57 | 
 58 |         receptor = config.get("docking", "target")
 59 |         dl_mode = config.getint("prediction", "mode")
 60 |         if "vina" in docking_program.lower() or "unidock" in docking_program.lower():
 61 |             x = config.getfloat("docking", "x")
 62 |             y = config.getfloat("docking", "y")
 63 |             z = config.getfloat("docking", "z")
 64 |             box_size_x = config.getfloat("docking", "box_size_x")
 65 |             box_size_y = config.getfloat("docking", "box_size_y")
 66 |             box_size_z = config.getfloat("docking", "box_size_z")
 67 | 
 68 | 
 69 |     except Exception as e:
 70 |         logger.error("Please check your input arguments.")
 71 |         return None
 72 | 
 73 |     if "vina" in docking_program.lower():
 74 |         workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode,
 75 |                         args.config, cpu_num=cpu_num, rule_db=rule_db, project_code=project_code, x=x, y=y, z=z,
 76 |                         box_size_x=box_size_x, box_size_y=box_size_y, box_size_z=box_size_z)
 77 |     elif "glide" in docking_program.lower():
 78 |         workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode,
 79 |                         args.config, cpu_num=cpu_num, rule_db=rule_db, project_code=project_code)
 80 |     elif "autodock-gpu" in docking_program.lower():
 81 |         workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode,
 82 |                         args.config, cpu_num=cpu_num, gpu_num=gpu_num, rule_db=rule_db, project_code=project_code)
 83 |     elif "unidock" in docking_program.lower():
 84 |         workflow = Grow(num_gen, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode,
 85 |                         args.config, cpu_num=cpu_num, rule_db=rule_db, project_code=project_code, x=x, y=y, z=z,
 86 |                         box_size_x=box_size_x, box_size_y=box_size_y, box_size_z=box_size_z)
 87 |     else:
 88 |         logger.error("Please check your input docking program argument.")
 89 |         return None
 90 |     workflow.grow()
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     time1 = time.time()
 95 |     logger.info(
 96 |         "\n"
 97 |         + "*" * 88 + "\n"
 98 |                      "      ____    _____    ____   ____    _____ \n"
 99 |                      "     / ___|  | ____|  / ___| / ___|  | ____|\n"
100 |                      "     \\___ \\  |  _|   | |     \\___ \\  |  _|  \n"
101 |                      "      ___) | | |___  | |___   ___) | | |___ \n"
102 |                      "     |____/  |_____|  \\____| |____/  |_____| v1.3\n"
103 |         + "*" * 88
104 |     )
105 | 
106 |     try:
107 |         main()
108 |     except SystemExit as err:
109 |         logger.info(f"Program exited with status: {err}")
110 |     except KeyboardInterrupt:
111 |         logger.info("Program interrupted by user")
112 |     except Exception as e:
113 |         logger.error("An unexpected error occurred", exc_info=True)
114 | 
115 |     time2 = time.time()
116 |     logger.info("Time consumption (total): {} hours".format(round((time2 - time1) / 3600, 2)))
117 | 


--------------------------------------------------------------------------------
/secse/scoring/docking_score_prediction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Lu Chong
  5 | @file: docking_score_prediction.py
  6 | @time: 2021/10/27/14:26
  7 | """
  8 | import argparse
  9 | 
 10 | from openbabel import openbabel
 11 | import pandas as pd
 12 | import os
 13 | import rdkit
 14 | from rdkit import Chem
 15 | from rdkit.Chem import PandasTools
 16 | from rdkit.Chem.MolStandardize import rdMolStandardize
 17 | from tqdm import tqdm
 18 | from utilities.function_helper import shell_cmd_execute
 19 | from loguru import logger
 20 | rdkit.RDLogger.DisableLog("rdApp.*")
 21 | 
 22 | 
 23 | def get_train(sdf, dock):
 24 |     g = PandasTools.LoadSDF(sdf, molColName='Molecule')
 25 | 
 26 |     g_smi = pd.read_csv(dock, sep="\t", header=None)
 27 |     g_smi.columns = ["Smiles", "ID"]
 28 |     g_smi = g_smi.drop_duplicates(subset="ID")
 29 |     g_smi = g_smi.set_index("ID")
 30 | 
 31 |     g = g[["ID", "Molecule", "docking score"]]
 32 |     g["docking score"] = g["docking score"].astype(float)
 33 |     g = g.sort_values("docking score", ascending=True)
 34 | 
 35 |     g["Smiles"] = g["ID"].apply(lambda x: g_smi.loc[x.rsplit("-C", 1)[0]][0])
 36 |     g_new = g.sort_values(by="docking score", ascending=True).drop_duplicates(subset="Smiles", keep="first")
 37 | 
 38 |     smi = g_new["Smiles"].apply(lambda x: neutralize(x))
 39 |     g_new["Smiles"] = smi
 40 |     g_new = g_new.drop_duplicates(subset="Smiles", keep="first")
 41 |     return g_new
 42 | 
 43 | 
 44 | def get_pre(workdir, max_gen, get_all=False):
 45 |     pre_dir = os.path.join(workdir, "prediction")
 46 |     if get_all:
 47 |         pre_raw = os.path.join(pre_dir, "all_G" + str(max_gen) + "_for_pre.raw")
 48 |         pre_file = os.path.join(pre_dir, "all_G" + str(max_gen) + "_for_pre.csv")
 49 | 
 50 |         cmd_cat = ["find", workdir, "-name \"filter.csv\" |xargs awk -F, 'FNR>1{{print $(NF-5)\",\"$(NF-6)}}' >",
 51 |                    pre_raw]
 52 |         shell_cmd_execute(cmd_cat)
 53 |         cmd_dedup = ["awk -F',' '!seen[$2]++'", pre_raw, ">", pre_file]
 54 |         shell_cmd_execute(cmd_dedup)
 55 | 
 56 |         drop_mols = os.path.join(pre_dir, "drop_ids.txt")
 57 |         mols_id_cat = ["find", workdir, "-name \"mols_for_docking.smi\" |xargs cut -f2  >", drop_mols]
 58 |         shell_cmd_execute(mols_id_cat)
 59 |         final_file = os.path.join(pre_dir, "all_G" + str(max_gen) + "_for_pre_uniq.csv")
 60 |     else:
 61 |         pre_file = os.path.join(pre_dir, "gen_" + str(max_gen) + "_for_pre.csv")
 62 |         cmd_cp = ["awk -F, 'NR>1{{print $(NF-5)\",\"$(NF-6)}}'",
 63 |                   os.path.join(workdir, "generation_" + str(max_gen), "filter.csv"), ">", pre_file]
 64 |         shell_cmd_execute(cmd_cp)
 65 | 
 66 |         drop_mols = os.path.join(pre_dir, "drop_ids_{}.txt".format(max_gen))
 67 |         mols_id_cat = ["cut -f2", os.path.join(workdir, "generation_" + str(max_gen), "mols_for_docking.smi"), ">",
 68 |                        drop_mols]
 69 |         shell_cmd_execute(mols_id_cat)
 70 |         final_file = os.path.join(pre_dir, "gen_" + str(max_gen) + "_for_pre_uniq.csv")
 71 | 
 72 |     try:
 73 |         cmd_drop = ["grep -wvf", drop_mols, pre_file, ">", final_file]
 74 |         shell_cmd_execute(cmd_drop)
 75 |     except:
 76 |         final_file = None
 77 |     return final_file
 78 | 
 79 | 
 80 | def neutralize(smi):
 81 |     mol = Chem.MolFromSmiles(smi)
 82 |     if mol is None:
 83 |         smi = wash_mol(smi)
 84 |         mol = Chem.MolFromSmiles(smi)
 85 |         if mol is None:
 86 |             return "C"
 87 |     uc = rdMolStandardize.Uncharger()
 88 |     return Chem.MolToSmiles(uc.uncharge(mol))
 89 | 
 90 | 
 91 | def wash_mol(smi):
 92 |     ob_conversion = openbabel.OBConversion()
 93 |     ob_conversion.SetInAndOutFormats("smi", "can")
 94 |     ob_mol = openbabel.OBMol()
 95 |     ob_conversion.ReadString(ob_mol, smi)
 96 |     ob_conversion.Convert()
 97 |     res = ob_conversion.WriteString(ob_mol).strip()
 98 |     return res
 99 | 
100 | 
101 | def prepare_files(max_gen, workdir, dl_mode):
102 |     pre_dir = os.path.join(workdir, "prediction")
103 |     os.makedirs(pre_dir, exist_ok=True)
104 | 
105 |     def pre_train_per_gen(gen):
106 |         sdf = os.path.join(workdir, "generation_{}/docking_outputs_with_score.sdf".format(gen))
107 |         dock = os.path.join(workdir, "generation_{}/mols_for_docking.smi".format(gen))
108 |         df_train = get_train(sdf, dock)[['Smiles', 'docking score']]
109 |         # write per generation
110 |         df_train.to_csv(os.path.join(pre_dir, "train_G{}.csv".format(gen)), index=False)
111 |         return df_train
112 | 
113 |     if dl_mode == 1:
114 |         # prepare current generation data
115 |         pre_train_per_gen(max_gen)
116 |         train = os.path.join(pre_dir, "train_G{}.csv".format(max_gen))
117 |         pre = get_pre(workdir, max_gen, False)
118 |         return train, pre
119 | 
120 |     elif dl_mode == 2:
121 |         # prepare files for all the generation and merge together
122 |         cum_path = os.path.join(pre_dir, "train_G" + str(max_gen) + "_all.csv")
123 |         df_lst = []
124 |         for i in tqdm(range(1, max_gen + 1)):
125 |             df = pre_train_per_gen(i)
126 |             # write cumulative dataframe
127 |             df_lst.append(df)
128 | 
129 |         df_all = pd.concat(df_lst, axis=0).sort_values(
130 |             by="docking score", ascending=True).drop_duplicates(subset="Smiles", keep="first")
131 |         df_all.to_csv(cum_path, index=False)
132 |         pre = get_pre(workdir, max_gen, True)
133 |         return cum_path, pre
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     parser = argparse.ArgumentParser(description="SCESE -- Prepare Data for Deep Learning")
138 |     parser.add_argument("max_gen", help="Max number of generation.", type=int)
139 |     parser.add_argument("workdir", help="Workdir")
140 |     parser.add_argument("dl_mode",
141 |                         help="Mode of deep learning modeling, 1: modeling per generation, 2: modeling overall after all the generation",
142 |                         type=int, default=0)
143 |     args = parser.parse_args()
144 |     prepare_files(args.max_gen, args.workdir, args.dl_mode)
145 | 


--------------------------------------------------------------------------------
/secse/evaluate/docking.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Lu Chong
  5 | @file: docking.py
  6 | @time: 2021/9/6/11:22
  7 | """
  8 | import argparse
  9 | import os
 10 | import shutil
 11 | import glob
 12 | import sys
 13 | from loguru import logger
 14 | from rdkit import Chem
 15 | from rdkit.Chem import AllChem
 16 | from utilities.function_helper import shell_cmd_execute
 17 | 
 18 | sys.path.append(os.getenv("SECSE"))
 19 | 
 20 | VINA_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_vina_parallel.sh")
 21 | AUTODOCK_GPU_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_autodock_gpu.sh")
 22 | UNIDOCK_SHELL = os.path.join(os.getenv("SECSE"), "evaluate", "ligprep_unidock.sh")
 23 | 
 24 | 
 25 | def dock_by_py_vina(workdir, smi, receptor, cpu_num, x, y, z, box_size_x=20, box_size_y=20, box_size_z=20):
 26 |     cmd = list(map(str, [VINA_SHELL, workdir, smi, receptor, x, y, z, box_size_x, box_size_y, box_size_z, cpu_num]))
 27 |     shell_cmd_execute(cmd)
 28 |     merged_sdf(workdir, 0)
 29 | 
 30 | 
 31 | def dock_by_py_autodock_gpu(workdir, smi, receptor, cpu_num, gpu_num):
 32 |     cmd = list(map(str, [AUTODOCK_GPU_SHELL, workdir, smi, receptor, cpu_num, gpu_num]))
 33 |     shell_cmd_execute(cmd)
 34 |     merged_sdf(workdir, 1)
 35 | 
 36 | 
 37 | def dock_by_unidock(workdir, smi, receptor, cpu_num, x, y, z, box_size_x=20, box_size_y=20, box_size_z=20):
 38 |     if not os.environ.get("UNIDOCK"):
 39 |         os.environ["UNIDOCK"] = "unidock"
 40 |     cmd = list(map(str, [UNIDOCK_SHELL, workdir, smi, receptor, x, y, z, box_size_x, box_size_y, box_size_z, cpu_num]))
 41 |     shell_cmd_execute(cmd)
 42 |     for res_file in glob.glob(os.path.join(workdir, "pdb_files", "*.pdb")):
 43 |         new_name = os.path.basename(res_file).replace("_out", "")
 44 |         os.rename(res_file, os.path.join(workdir, "pdb_files", new_name))
 45 |     merged_sdf(workdir, 2)
 46 | 
 47 | 
 48 | def merged_sdf(workdir, program):
 49 |     # modify output sdf
 50 |     check_mols(workdir, program)
 51 |     out_sdf = os.path.join(workdir, "docking_outputs_with_score.sdf")
 52 |     cmd_cat = ["find", os.path.join(workdir, "sdf_files"), "-name \"*sdf\" | xargs -n 100 cat >", out_sdf]
 53 |     shell_cmd_execute(cmd_cat)
 54 |     # remove temporary files
 55 |     shutil.rmtree(os.path.join(workdir, "pdb_files"))
 56 |     shutil.rmtree(os.path.join(workdir, "ligands_for_docking"))
 57 |     shutil.rmtree(os.path.join(workdir, "docking_poses"))
 58 |     shutil.rmtree(os.path.join(workdir, "docking_split"))
 59 | 
 60 | 
 61 | def check_mols(workdir, program):
 62 |     files = os.listdir(os.path.join(workdir, "pdb_files"))
 63 |     for i in files:
 64 |         raw_id = i.rsplit("-dp", 1)[0]
 65 |         pdb_path = os.path.join(workdir, "pdb_files", i)
 66 |         sdf_path = os.path.join(workdir, "sdf_files", i.replace("pdb", "sdf"))
 67 |         raw_mol = Chem.SDMolSupplier(os.path.join(workdir, "ligands_for_docking", raw_id + ".sdf"))[0]
 68 |         mol = AllChem.MolFromPDBFile(pdb_path, removeHs=True)
 69 |         if mol:
 70 |             try:
 71 |                 new = AllChem.AssignBondOrdersFromTemplate(raw_mol, mol)
 72 |             except ValueError:
 73 |                 logger.error("Failed check: ", i)
 74 |                 continue
 75 |             new = Chem.AddHs(new, addCoords=True)
 76 |             Chem.MolToMolFile(new, sdf_path)
 77 |             if program == 0 or program == 2:
 78 |                 with open(pdb_path, "r") as pdb:
 79 |                     for line in pdb.readlines():
 80 |                         if line.startswith("REMARK VINA RESULT"):
 81 |                             score = line.split(":")[1][:10].replace(" ", "")
 82 |                             with open(sdf_path, "a") as sdf:
 83 |                                 newline = "\n".join(["> <docking score>", score, "\n$$$$\n"])
 84 |                                 sdf.write(newline)
 85 |             elif program == 1:
 86 |                 with open(pdb_path, "r") as pdb:
 87 |                     for line in pdb.readlines():
 88 |                         if "kcal" in line:
 89 |                             score = line.split("kcal")[0].replace(" ", "")[6:]
 90 |                             with open(sdf_path, "a") as sdf:
 91 |                                 newline = "\n".join(["> <docking score>", score, "\n$$$$\n"])
 92 |                                 sdf.write(newline)
 93 | 
 94 | 
 95 | if __name__ == '__main__':
 96 |     parser = argparse.ArgumentParser(description="Run Open-source Docking Program for SMILES Format.")
 97 |     parser.add_argument("program", help="Name of docking program, input vina or autodock-gpu", type=str)
 98 |     parser.add_argument("workdir", help="Workdir")
 99 |     parser.add_argument("mols_smi", help="Seed fragments")
100 |     parser.add_argument("receptor", help="Target PDBQT")
101 | 
102 |     parser.add_argument("cpu_num", help="Number of CPU cores")
103 | 
104 |     parser.add_argument("--gpu_num", help="Number of GPUs")
105 |     parser.add_argument("--x", help="Docking box x", type=float)
106 |     parser.add_argument("--y", help="Docking box y", type=float)
107 |     parser.add_argument("--z", help="Docking box z", type=float)
108 | 
109 |     parser.add_argument("--box_size_x", help="Docking box size x, default 20", type=float, default=20)
110 |     parser.add_argument("--box_size_y", help="Docking box size y, default 20", type=float, default=20)
111 |     parser.add_argument("--box_size_z", help="Docking box size z, default 20", type=float, default=20)
112 | 
113 |     args = parser.parse_args()
114 |     if args.program == "vina":
115 |         logger.info("Docking by Autodock Vina with {} CPUs...".format(args.cpu_num))
116 |         dock_by_py_vina(args.workdir, args.mols_smi, args.receptor, args.cpu_num, args.x, args.y, args.z,
117 |                         args.box_size_x, args.box_size_y, args.box_size_z)
118 |     elif args.program == "autodock-gpu":
119 |         logger.info("Docking by Autodock-GPU with {} CPUs and {} GPUs...".format(args.cpu_num, args.gpu_num))
120 |         dock_by_py_autodock_gpu(args.workdir, args.mols_smi, args.receptor, args.cpu_num, args.gpu_num)
121 |     else:
122 |         logger.error("Please choose a docking program.")
123 | 


--------------------------------------------------------------------------------
/secse/utilities/ring_tool.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """
  4 | @author: Lu Chong
  5 | @file: ring tool.py
  6 | @time: 2021/02/07/14:17
  7 | """
  8 | from rdkit import Chem
  9 | from loguru import logger
 10 | 
 11 | 
 12 | def ring_site_count(ring_atoms, systems):
 13 |     site_count = [-1]  # add -1 in case no ring site
 14 |     for ring_s in systems:
 15 |         ring_s = set(ring_s)
 16 |         count = 0
 17 |         for site in ring_atoms:
 18 |             site = set(site)
 19 |             if ring_s.intersection(site):
 20 |                 count += 1
 21 |         site_count.append(count)
 22 |     return site_count
 23 | 
 24 | 
 25 | class RingSystems(object):
 26 |     def __init__(self, mol):
 27 |         self.mol = mol
 28 |         self.ri = self.mol.GetRingInfo()
 29 |         self.atom_rings = self.ri.AtomRings()
 30 |         self.bond_rings = self.ri.BondRings()
 31 |         self.systems = self.ring_systems()
 32 | 
 33 |     def ring_systems(self):
 34 |         systems = []
 35 |         for ring in self.atom_rings:
 36 |             ringAts = set(ring)
 37 |             nSystems = []
 38 |             for system in systems:
 39 |                 nInCommon = len(ringAts.intersection(system))
 40 |                 if nInCommon:
 41 |                     ringAts = ringAts.union(system)
 42 |                 else:
 43 |                     nSystems.append(system)
 44 |             nSystems.append(ringAts)
 45 |             systems = nSystems
 46 |         return systems
 47 | 
 48 |     # ring size of each ring system
 49 |     def ring_systems_size(self):
 50 |         ring_sys_size = []
 51 |         for ring_s in self.systems:
 52 |             ring_s = set(ring_s)
 53 |             size = 0
 54 |             for ring in self.atom_rings:
 55 |                 ring = set(ring)
 56 |                 if ring_s.intersection(ring):
 57 |                     size += 1
 58 |             ring_sys_size.append(size)
 59 |         return ring_sys_size
 60 | 
 61 |     def get_spiro_atoms(self):
 62 |         spiro = []
 63 |         spiro_atoms = set()
 64 |         for i in range(len(self.atom_rings)):
 65 |             atom_ring_i = set(self.atom_rings[i])
 66 |             for j in range(i):
 67 |                 atom_ring_j = set(self.atom_rings[j])
 68 |                 common_atoms = atom_ring_i.intersection(atom_ring_j)
 69 |                 if len(common_atoms) == 1:
 70 |                     atoms = [0] * len(self.mol.GetAtoms())
 71 |                     for a in common_atoms:
 72 |                         atoms[a] += 1
 73 | 
 74 |                     for idx in range(len(atoms)):
 75 |                         if atoms[idx] == 1:
 76 |                             spiro = (idx,)
 77 |                     spiro_atoms.add(spiro)
 78 |         return spiro_atoms
 79 | 
 80 |     def get_fused_atoms(self):
 81 |         fused_atoms = set()
 82 | 
 83 |         for i in range(len(self.bond_rings)):
 84 |             bond_ring_i = set(self.bond_rings[i])
 85 |             for j in range(i):
 86 |                 bond_ring_j = set(self.bond_rings[j])
 87 |                 common_bonds = bond_ring_i.intersection(bond_ring_j)
 88 |                 if len(common_bonds) == 1:
 89 |                     atoms = [0] * len(self.mol.GetAtoms())
 90 |                     fused_unit = ()
 91 | 
 92 |                     for b in common_bonds:
 93 |                         atoms[self.mol.GetBondWithIdx(b).GetBeginAtomIdx()] += 1
 94 |                         atoms[self.mol.GetBondWithIdx(b).GetEndAtomIdx()] += 1
 95 |                     for idx in range(len(atoms)):
 96 |                         if atoms[idx] == 1:
 97 |                             fused_unit += (idx,)
 98 |                     fused_atoms.add(fused_unit)
 99 | 
100 |         return fused_atoms
101 | 
102 |     def get_bridged_atoms(self):
103 |         bridged_atoms = set()
104 | 
105 |         for i in range(len(self.bond_rings)):
106 |             bond_ring_i = set(self.bond_rings[i])
107 |             for j in range(i):
108 |                 bond_ring_j = set(self.bond_rings[j])
109 |                 common_bonds = bond_ring_i.intersection(bond_ring_j)
110 | 
111 |                 if len(common_bonds) > 1:
112 |                     atoms = [0] * len(self.mol.GetAtoms())
113 |                     bridged_unit = ()
114 |                     for b in common_bonds:
115 |                         atoms[self.mol.GetBondWithIdx(b).GetBeginAtomIdx()] += 1
116 |                         atoms[self.mol.GetBondWithIdx(b).GetEndAtomIdx()] += 1
117 |                     for idx in range(len(atoms)):
118 |                         if atoms[idx] == 1:
119 |                             bridged_unit += (idx,)
120 |                     bridged_atoms.add(bridged_unit)
121 |         return bridged_atoms
122 | 
123 |     def spiro_site_count(self):
124 |         return ring_site_count(self.get_spiro_atoms(), self.systems)
125 | 
126 |     def bridged_site_count(self):
127 |         return ring_site_count(self.get_bridged_atoms(), self.systems)
128 | 
129 |     def fused_site_count(self):
130 |         return ring_site_count(self.get_fused_atoms(), self.systems)
131 | 
132 |     def ring_system_count_filter(self, num=4):
133 |         return len(self.systems) <= num
134 | 
135 |     def largest_ring_system_size_filter(self, num=3):
136 |         return max(self.ring_systems_size() + [-1]) <= num
137 | 
138 |     def largest_spiro_site_filter(self, num=1):
139 |         return max(self.spiro_site_count()) <= num
140 | 
141 |     def largest_fused_site_filter(self, num=3):
142 |         return max(self.fused_site_count()) <= num
143 | 
144 |     def largest_bridged_site_filter(self, num=2):
145 |         return max(self.bridged_site_count()) <= num
146 | 
147 |     def bridged_atom_is_aromatic_filter(self):
148 |         bridged_atoms = self.get_bridged_atoms()
149 |         for atom_cubic in bridged_atoms:
150 |             for atom_idx in atom_cubic:
151 |                 atom = self.mol.GetAtomWithIdx(atom_idx)
152 |                 if atom.GetIsAromatic():
153 |                     return False
154 |         return True
155 | 
156 |     def ring_check(self, rssc, bsc, ssc, fsc, rsc):
157 |         return all([self.largest_ring_system_size_filter(rssc),
158 |                     self.largest_bridged_site_filter(bsc),
159 |                     self.largest_spiro_site_filter(ssc),
160 |                     self.largest_fused_site_filter(fsc),
161 |                     self.ring_system_count_filter(rsc),
162 |                     self.bridged_atom_is_aromatic_filter()])
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     mol = Chem.MolFromSmiles("C1(C2)CC(CC3)NC3CC2C1")
167 |     ringcheck = RingSystems(mol)
168 |     logger.info("Not Pass Filter" if not ringcheck.ring_check() else "Pass Filter")
169 |     logger.info(ringcheck.ring_systems_size())
170 | 


--------------------------------------------------------------------------------
/secse/evaluate/ligprep.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Liu Shien
  5 | @file: ligprep.py
  6 | @time: 2021/4/1/16:28
  7 | @modify: 2022/3/1/12:04
  8 | @modify: 2023/5/5/14:22
  9 | """
 10 | import argparse
 11 | import os
 12 | import sys
 13 | import rdkit
 14 | from loguru import logger
 15 | from rdkit import Chem
 16 | from rdkit.Chem import AllChem
 17 | from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
 18 | from rdkit.Chem.MolStandardize import rdMolStandardize
 19 | from rdkit.Chem import rdDistGeom
 20 | from rdkit.Chem import rdMolAlign
 21 | from openbabel import pybel
 22 | from openbabel import openbabel as ob
 23 | 
 24 | sys.path.append(os.getenv("SECSE"))
 25 | from utilities.wash_mol import charge_mol
 26 | 
 27 | rdkit.RDLogger.DisableLog("rdApp.*")
 28 | 
 29 | 
 30 | def setero(mol, onlyUnassigned=True):
 31 |     if onlyUnassigned:
 32 |         opts = StereoEnumerationOptions(tryEmbedding=True)
 33 |     else:
 34 |         opts = StereoEnumerationOptions(tryEmbedding=True, onlyUnassigned=False)
 35 |     isomers = tuple(EnumerateStereoisomers(mol, options=opts))
 36 |     res = []
 37 |     if len(isomers) > 1:
 38 |         for idx, tmp in enumerate(isomers):
 39 |             name = tmp.GetProp("_Name") + "-CC" + str(idx)
 40 |             tmp.SetProp("_Name", name)
 41 |             res.append(tmp)
 42 |         return res
 43 |     else:
 44 |         return list(isomers)
 45 | 
 46 | 
 47 | def tau(mol, can=True):
 48 |     params = rdMolStandardize.CleanupParameters()
 49 |     params.tautomerRemoveSp3Stereo = False
 50 |     params.tautomerRemoveBondStereo = False
 51 |     params.maxTautomers = 1000
 52 |     params.maxTransforms = 10000
 53 |     enumerator = rdMolStandardize.TautomerEnumerator(params)
 54 |     try:
 55 |         canon = enumerator.Canonicalize(mol)
 56 |     except Exception as e:
 57 |         logger.error(e)
 58 |         return [mol]
 59 | 
 60 |     if can:
 61 |         return [canon]
 62 |     csmi = Chem.MolToSmiles(canon)
 63 |     res = [canon]
 64 |     tauts = enumerator.Enumerate(mol)
 65 |     smis = [Chem.MolToSmiles(x) for x in tauts]
 66 |     stpl = sorted((x, y) for x, y in zip(smis, tauts) if x != csmi)
 67 |     res += [y for x, y in stpl]
 68 | 
 69 |     new = []
 70 |     for idx, tmp in enumerate(res):
 71 |         name = tmp.GetProp("_Name") + "-CT" + str(idx)
 72 |         tmp.SetProp("_Name", name)
 73 |         new.append(tmp)
 74 | 
 75 |     return new
 76 | 
 77 | 
 78 | def to_3D(mol):
 79 |     mol = Chem.AddHs(mol)
 80 |     AllChem.EmbedMolecule(mol, useExpTorsionAnglePrefs=True, useBasicKnowledge=True, maxAttempts=10000,
 81 |                           useRandomCoords=True)
 82 |     if mol.GetNumConformers() > 0:
 83 |         AllChem.UFFOptimizeMolecule(mol, 200, 10.0, -1)
 84 |         return mol
 85 |     else:
 86 |         return None
 87 | 
 88 | 
 89 | def gen_minimized_3D(path, rdmol, numConformer=1, rms_cutoff=1, addH=True):
 90 |     name = rdmol.GetProp("_Name")
 91 |     sdf_path = os.path.join(path, name + ".sdf")
 92 |     writer = Chem.SDWriter(sdf_path)
 93 |     if addH:
 94 |         rdmol = Chem.AddHs(rdmol, addCoords=True)
 95 | 
 96 |     param = rdDistGeom.ETKDGv2()
 97 |     param.pruneRmsThresh = rms_cutoff
 98 |     cids = rdDistGeom.EmbedMultipleConfs(rdmol, 50, param)
 99 |     mp = AllChem.MMFFGetMoleculeProperties(rdmol, mmffVariant='MMFF94s')
100 |     AllChem.MMFFOptimizeMoleculeConfs(rdmol, numThreads=0, mmffVariant='MMFF94s')
101 |     res = []
102 |     for cid in cids:
103 |         ff = AllChem.MMFFGetMoleculeForceField(rdmol, mp, confId=cid)
104 |         # ff.Initialize()
105 |         ff.Minimize()
106 |         e = ff.CalcEnergy()
107 |         res.append((cid, e))
108 |     sorted_res = sorted(res, key=lambda x: x[1])
109 |     rdMolAlign.AlignMolConformers(rdmol)
110 |     if len(sorted_res) > numConformer:
111 |         selected = numConformer
112 |     else:
113 |         selected = len(sorted_res)
114 |     # new = Chem.Mol(rdmol)
115 |     # new.RemoveAllConformers()
116 |     # min_conf = rdmol.GetConformer(sorted_res[0][0])
117 |     # new.AddConformer(min_conf)
118 |     for i in range(selected):
119 |         cid = sorted_res[i][0]
120 |         writer.write(rdmol, cid)
121 |     writer.close()
122 | 
123 |     return sdf_path
124 | 
125 | 
126 | def ionization(smi_string):
127 |     return charge_mol(smi_string)
128 | 
129 | 
130 | def sdf2pdbqt(sdf_path):
131 |     path = os.path.dirname(sdf_path)
132 |     name = os.path.basename(sdf_path).split(".")[0]
133 |     num = 0
134 |     for mol in pybel.readfile("sdf", sdf_path):
135 |         mol.write("pdbqt", "{}.pdbqt".format(os.path.join(path, name)), overwrite=True)
136 |         num += 1
137 | 
138 |     return num == 1
139 | 
140 | 
141 | class LigPrep:
142 |     def __init__(self, infile, workdir):
143 |         self.infile = infile
144 |         self.workdir = workdir
145 |         self.mol_dict = {}
146 | 
147 |     def parse_infile(self):
148 |         with open(self.infile, "r") as inf:
149 |             for line in inf:
150 |                 tmp = line.strip().split()
151 |                 if len(tmp) < 2:
152 |                     continue
153 |                 smi = tmp[0]
154 |                 id1 = tmp[1]
155 |                 smi = ionization(smi)
156 | 
157 |                 mol = Chem.MolFromSmiles(smi)
158 |                 if mol is None:
159 |                     continue
160 |                 mol.SetProp("_Name", id1)
161 |                 self.mol_dict[id1] = mol
162 | 
163 |     def process(self, des):
164 |         dirc_name = "ligands_for_" + des
165 |         path = os.path.join(self.workdir, dirc_name)
166 |         os.makedirs(path, exist_ok=True)
167 | 
168 |         self.parse_infile()
169 |         for gid in self.mol_dict:
170 |             mol = self.mol_dict[gid]
171 |             mystereo = setero(mol)
172 | 
173 |             mytau = []
174 |             for stereo in mystereo:
175 |                 tmp = tau(stereo)
176 |                 mytau += tmp
177 | 
178 |             for newmol in mytau:
179 |                 if newmol is not None:
180 |                     try:
181 |                         if des == 'docking':
182 |                             sdf_path = gen_minimized_3D(path, newmol)
183 |                             sdf2pdbqt(sdf_path)
184 |                         if des == 'shape':
185 |                             gen_minimized_3D(path, newmol, 10)
186 |                     except Exception as e:
187 |                         logger.error(e)
188 |                         continue
189 | 
190 | 
191 | if __name__ == '__main__':
192 |     parser = argparse.ArgumentParser(description="LigPrep @dalong")
193 |     parser.add_argument("workdir", help="Workdir")
194 |     parser.add_argument("mols_smi", help="Seed fragments")
195 |     parser.add_argument("--mode",
196 |                         help="1: prepare pdbqt file for docking input; 2: prepare sdf file for shape based screening.",
197 |                         type=int, default=1)
198 | 
199 |     args = parser.parse_args()
200 |     lig = LigPrep(args.mols_smi, args.workdir)
201 |     lig.parse_infile()
202 |     if args.mode == 1:
203 |         lig.process(des="docking")
204 |     elif args.mode == 2:
205 |         lig.process(des="shape")
206 | 


--------------------------------------------------------------------------------
/secse/report/grow_path.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Lu Chong
  5 | @file: grow_path.py 
  6 | @time: 2021/01/19/13:42
  7 | """
  8 | import os
  9 | import sys
 10 | import time
 11 | import argparse
 12 | import numpy as np
 13 | import pandas as pd
 14 | from rdkit import Chem
 15 | from rdkit.Chem import Descriptors
 16 | from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
 17 | from pandarallel import pandarallel
 18 | import configparser
 19 | from loguru import logger
 20 | 
 21 | sys.path.append(os.getenv("SECSE"))
 22 | from scoring.ranking import read_dock_file
 23 | from utilities.function_helper import shell_cmd_execute
 24 | 
 25 | pandarallel.initialize(verbose=0)
 26 | SELECT_SDF_SHELL = os.path.join(os.getenv("SECSE"), "report", "filter_sdf_by_titles.pl")
 27 | 
 28 | 
 29 | def cal_mutation_dic(workdir, max_gen):
 30 |     mut_dic_all = dict()
 31 | 
 32 |     while max_gen > 0:
 33 |         mut_file = os.path.join(workdir, "generation_" + str(max_gen), "filter.csv")
 34 |         logger.info(mut_file)
 35 |         with open(mut_file, "r") as f:
 36 |             lines = f.readlines()
 37 |         lines = [i.strip().split(",") for i in lines]
 38 |         mut_dic = {i[-6].split("-dp")[0].split("-C")[0]: [i[0], i[1].split("-dp")[0].split("-C")[0], i[-5], i[-4]] for i
 39 |                    in lines}
 40 |         mut_dic_all["gen" + str(max_gen)] = mut_dic
 41 |         max_gen -= 1
 42 |     return mut_dic_all
 43 | 
 44 | 
 45 | def merge_multi_generation(workdir, max_gen, file_path, dl_mode, config_path):
 46 |     df_lst = [pd.read_csv(os.path.join(workdir, "generation_" + str(i),
 47 |                                        "docked_gen_" + str(i) + ".csv")) for i in range(1, max_gen + 1)]
 48 |     if dl_mode == 2:
 49 |         dl_df = read_dock_file(os.path.join(workdir, "generation_{}_pre".format(max_gen),
 50 |                                             "docking_outputs_with_score.sdf"))
 51 |         dl_df["le_ln"] = dl_df.apply(
 52 |             lambda x: x["docking score"] / Chem.MolFromSmiles(x["smiles"]).GetNumHeavyAtoms(),
 53 |             axis=1)
 54 |         dl_df.columns = [i.lower() for i in list(dl_df.columns)]
 55 |         dl_df = dl_df.drop(columns=["molecule"])
 56 |         dl_df = dl_df.reindex(columns=df_lst[0].columns)
 57 |         config = configparser.ConfigParser()
 58 |         config.read(config_path)
 59 |         score_cutoff = config.getfloat("prediction", "dl_score_cutoff")
 60 |         dl_df = dl_df[dl_df["docking score"] < score_cutoff]
 61 |         df_lst.append(dl_df)
 62 | 
 63 |     final_df = pd.concat(df_lst, axis=0).drop_duplicates(subset=["smiles"])
 64 |     final_df.to_csv(file_path, index=False)
 65 |     return final_df
 66 | 
 67 | 
 68 | def grow_path(mut_dic_all, mut_id):
 69 |     mut_id = mut_id.split("-dp")[0].split("-C")[0]
 70 |     try:
 71 |         gen_mol = int(mut_id.split("_")[-3])
 72 |     except IndexError:
 73 |         logger.error(f"Index error: {mut_id}")
 74 |         return None
 75 |     mut_info_lst = []
 76 | 
 77 |     while gen_mol > 0:
 78 |         mut_info = mut_dic_all["gen" + str(gen_mol)][mut_id]
 79 |         if "." in mut_info[2]:
 80 |             gen_mol -= 1
 81 |             continue
 82 |         mut_info_lst.append(mut_info)
 83 |         mut_id = mut_info[1]
 84 |         gen_mol -= 1
 85 |     return mut_info_lst
 86 | 
 87 | 
 88 | def add_prop(merged_df_path):
 89 |     merged_df = pd.read_csv(merged_df_path)
 90 |     raw_cols = list(merged_df.columns)
 91 |     merged_df["mol"] = merged_df["smiles"].apply(Chem.MolFromSmiles)
 92 |     # check charge
 93 |     merged_df["charge flag"] = merged_df["mol"].apply(charge_filter)
 94 |     merged_df = merged_df[merged_df["charge flag"]]
 95 |     # add MW, logP
 96 |     merged_df["MW"] = merged_df["mol"].apply(CalcExactMolWt)
 97 |     merged_df["LogP"] = merged_df["mol"].apply(Descriptors.MolLogP)
 98 |     new_cols = ["smiles", "MW", "LogP"] + raw_cols[1:]
 99 |     return merged_df[new_cols]
100 | 
101 | 
102 | def charge_filter(mol):
103 |     negative_charge = Chem.MolFromSmarts("[*-1]")
104 |     positive_charge = Chem.MolFromSmarts("[*+1]")
105 |     nc = len(mol.GetSubstructMatches(negative_charge))
106 |     pc = len(mol.GetSubstructMatches(positive_charge))
107 |     npc = nc + pc
108 |     if npc <= 1:
109 |         return True
110 |     elif npc == 2:
111 |         if nc <= 1:
112 |             return True
113 |     return False
114 | 
115 | 
116 | def grep_sdf(workdir, merge_file):
117 |     merged_sdf = os.path.join(workdir, "merged_all.sdf")
118 |     selected_sdf = os.path.join(workdir, "selected.sdf")
119 |     ids_txt = os.path.join(workdir, "seleted_ids.txt")
120 |     # merge all sdf
121 |     cmd_merge = ["find", workdir, "-name \"docking_outputs_with_score.sdf\" | xargs cat >", merged_sdf]
122 |     shell_cmd_execute(cmd_merge)
123 |     # create ids
124 |     df = pd.read_csv(merge_file)
125 |     ids = list(set(df["id"].apply(lambda x: x.split("-dp")[0])))
126 |     # write ids
127 |     with open(ids_txt, "w") as ids_out:
128 |         [ids_out.write(i + "\n") for i in ids]
129 |     # subset sdf
130 |     cmd_filter_sdf = ["perl", SELECT_SDF_SHELL, merged_sdf, ids_txt, selected_sdf]
131 |     shell_cmd_execute(cmd_filter_sdf)
132 |     # remove temporary file
133 |     # os.remove(ids_txt)
134 |     os.remove(merged_sdf)
135 | 
136 | 
137 | def write_growth(config_path: str, max_gen: int, dl_mode: int):
138 |     config = configparser.ConfigParser()
139 |     config.read(config_path)
140 |     workdir = config.get("general", "workdir")
141 |     now = str(int(time.time()))
142 |     file_path = os.path.join(workdir, "merged_docked_best_" + now + ".csv")
143 |     merge_multi_generation(workdir, max_gen, file_path, dl_mode, config_path)
144 | 
145 |     new_file = file_path.replace(".csv", "_tmp.csv")
146 |     final_file = file_path.replace(".csv", "_with_grow_path.csv")
147 | 
148 |     mut_dic_all = cal_mutation_dic(workdir, max_gen)
149 |     with open(file_path, 'r') as raw:
150 |         header = raw.readline().strip().split(",")
151 |         path_header = list(zip(["smi_gen_", "id_gen_", "rxn_gen_", "partner_gen_"] * max_gen,
152 |                                np.repeat(list(range(max_gen)), 4).astype(str)))
153 |         header += ["".join(i) for i in path_header]
154 |         new_header = ",".join(header) + "\n"
155 |         with open(new_file, "w") as new:
156 |             new.write(new_header)
157 |             for line in raw.readlines():
158 |                 line = line.strip().split(",")
159 |                 mol_id = line[1]
160 |                 # find grow path per line
161 |                 mut_info_lst = grow_path(mut_dic_all, mol_id)
162 |                 if mut_info_lst is None:
163 |                     continue
164 |                 mut_info_lst.reverse()
165 |                 mut_info_lst = list(np.concatenate(mut_info_lst))
166 |                 new_line = ",".join(line + mut_info_lst)
167 | 
168 |                 # fill empty columns
169 |                 cols = new_header.count(",")
170 |                 new_line += "," * (cols - new_line.count(",")) + "\n"
171 |                 new.write(new_line)
172 | 
173 |     grow_df = add_prop(new_file)
174 |     grow_df.to_csv(final_file, index=False)
175 |     grep_sdf(workdir, final_file)
176 |     # logger.info("\n", "*" * 100)
177 |     logger.info(f"Output file: {final_file}")
178 |     # logger.info("*" * 100)
179 | 
180 |     # remove temporary files
181 |     os.remove(file_path)
182 |     os.remove(new_file)
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     parser = argparse.ArgumentParser(description="SCESE -- find path")
187 |     parser.add_argument("config_path", help="config file path", type=str)
188 |     parser.add_argument("max_gen", help="Max number of generation.", type=int)
189 |     parser.add_argument("dl_mode",
190 |                         help="Mode of deep learning modeling, 0: not use, 1: modeling per generation, 2: modeling overall after all the generation")
191 |     args = parser.parse_args()
192 |     write_growth(args.config_path, args.max_gen, args.dl_mode)
193 | 


--------------------------------------------------------------------------------
/secse/scoring/ranking.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Lu Chong
  5 | @file: ranking.py 
  6 | @time: 2020/11/04/13:35
  7 | """
  8 | import pandas as pd
  9 | from rdkit.Chem import PandasTools
 10 | from scoring.diversity_score import *
 11 | import numpy as np
 12 | import os
 13 | import configparser
 14 | from pandarallel import pandarallel
 15 | from loguru import logger
 16 | 
 17 | pandarallel.initialize(verbose=0)
 18 | rdkit.RDLogger.DisableLog("rdApp.*")
 19 | 
 20 | 
 21 | def read_dock_file(sdf):
 22 |     # assign new id for duplicates, with suffix -1, -2, ...
 23 |     sdf_df = PandasTools.LoadSDF(sdf, smilesName='smiles', molColName='Molecule')[
 24 |         ["ID", "Molecule", "smiles", "docking score"]]
 25 |     sdf_df["docking score"] = sdf_df["docking score"].astype(float)
 26 |     sdf_df = sdf_df.sort_values(by="docking score", ascending=True)
 27 |     name_groups = sdf_df.groupby("ID")["ID"]
 28 |     suffix = name_groups.cumcount() + 1
 29 |     repeats = name_groups.transform("size")
 30 |     sdf_df["ID"] = np.where(repeats > 1, sdf_df['ID'] + "-dp" + suffix.map(str), sdf_df["ID"])
 31 |     return sdf_df
 32 | 
 33 | 
 34 | def clean_id(raw_id, gen):
 35 |     new_id = raw_id
 36 |     # if "GEN_" + str(gen) in raw_id:
 37 |     if "-C" in new_id:
 38 |         new_id = new_id.rsplit("-C", 1)[0]
 39 |     # elif new_id.count("dp") > 1:
 40 |     #     new_id = new_id.rsplit("-dp", 1)[0]
 41 |     # elif new_id.count("-C") > 1:
 42 |     #     new_id = new_id.rsplit("-C", 1)[0]
 43 |     return new_id
 44 | 
 45 | 
 46 | class Ranking(object):
 47 |     def __init__(self, sdf, gen, config_file):
 48 |         self.sdf = sdf
 49 |         self.gen = gen
 50 | 
 51 |         config = configparser.ConfigParser()
 52 |         config.read(config_file)
 53 |         self.docking_score_cutoff = config.getfloat("docking", "score_cutoff")
 54 |         self.RMSD = config.getfloat("docking", "rmsd")
 55 |         self.delta_docking_score = config.getfloat("docking", "delta_score")
 56 | 
 57 |         self.docked_df = pd.DataFrame(None)
 58 |         self.diff = None
 59 |         self.score_min = None
 60 |         self.winner = None
 61 |         self.final_df = None
 62 |         self.keep_mols = None
 63 | 
 64 |         self.load_sdf()
 65 |         self.ranking_flag = True
 66 |         if self.gen > 0:
 67 |             if self.filter_rmsd_docking_score():
 68 |                 self.cal_le_rank()
 69 |             else:
 70 |                 self.ranking_flag = False
 71 |                 logger.info("No molecule left, stopping generation.")
 72 |         elif self.gen == 0:
 73 |             self.cal_le_rank()
 74 | 
 75 |         self.size = min(config.getint("general", "seed_per_gen"), self.docked_df.shape[0])
 76 | 
 77 |     def load_sdf(self):
 78 |         raw_df = PandasTools.LoadSDF(self.sdf, smilesName='smiles', molColName='Molecule')[
 79 |             ["ID", "Molecule", "smiles", "docking score"]]
 80 |         raw_df["docking score"] = raw_df["docking score"].astype(float)
 81 |         raw_df = raw_df.sort_values(by="docking score", ascending=True)
 82 | 
 83 |         raw_df.columns = [i.lower() for i in list(raw_df.columns)]
 84 | 
 85 |         self.docked_df = raw_df[["smiles", "id", "docking score", "molecule"]].copy()
 86 |         # assign new id for duplicates, with suffix -1, -2, ...
 87 |         name_groups = self.docked_df.groupby("id")["id"]
 88 |         suffix = name_groups.cumcount() + 1
 89 |         repeats = name_groups.transform("size")
 90 |         self.docked_df["id_raw"] = self.docked_df["id"].copy()
 91 |         self.docked_df["id"] = np.where(repeats > 1, self.docked_df['id'] + "-dp" + suffix.map(str),
 92 |                                         self.docked_df["id"])
 93 | 
 94 |         logger.info("{} cmpds after evaluate".format(self.docked_df.shape[0]))
 95 | 
 96 |     def load_parents_sdf(self):
 97 |         gen = str(self.gen - 1)
 98 |         read_dock_file(os.path.join(os.path.dirname(os.path.dirname(self.sdf)), "generation_" + gen,
 99 |                                     "docking_outputs_with_score.sdf"))
100 | 
101 |     def mols_score_below_cutoff(self):
102 |         self.docking_score_cutoff = min(self.docking_score_cutoff,
103 |                                         self.docked_df["docking score"].astype(float).quantile(0.01))
104 |         logger.info("The evaluate score cutoff is: {}".format(self.docking_score_cutoff))
105 |         self.keep_mols = self.docked_df[self.docked_df["docking score"].astype(float) <= self.docking_score_cutoff]
106 |         self.final_df = pd.concat([self.keep_mols, self.winner]).drop_duplicates(subset="id")
107 |         cols = list(self.final_df.columns)
108 |         cols = [i + "_gen_" + str(self.gen) for i in cols]
109 |         self.final_df.columns = cols
110 |         logger.info("{} final seeds.".format(self.final_df.shape[0]))
111 | 
112 |     def filter_rmsd_docking_score(self):
113 |         last_sdf = self.sdf.replace("generation_" + str(self.gen), "generation_" + str(self.gen - 1))
114 |         last_df = read_dock_file(last_sdf).set_index("ID")
115 |         mut_df = pd.read_csv(os.path.join(os.path.dirname(self.sdf), "filter.csv"), low_memory=False)
116 |         parent_dic = dict(zip(mut_df["id_gen_" + str(self.gen)], zip(mut_df["id_gen_" + str(self.gen - 1)],
117 |                                                                      mut_df["type"])))
118 |         self.docked_df["id_find_parent"] = self.docked_df["id_raw"].apply(lambda x: clean_id(x, self.gen))
119 | 
120 |         # calculate RMSD: parent from last generation
121 |         def cal_rmsd_docked(row):
122 |             # do not care rmsd for the first generation
123 |             if self.gen == 1:
124 |                 return -1
125 |             # do not care rmsd except for Grow type
126 |             if "G" not in parent_dic[row["id_find_parent"]][1]:
127 |                 return -2
128 |             return cal_rmsd(last_df.loc[parent_dic[row["id_find_parent"]][0]]["Molecule"], row["molecule"])
129 | 
130 |         # calculate RMSD only for Type Grow mutation, assign -1 for other mutation
131 |         self.docked_df["rmsd"] = self.docked_df.apply(cal_rmsd_docked, axis=1)
132 |         # calculate change of evaluate score after growing
133 |         self.docked_df["delta_docking_score"] = self.docked_df.apply(lambda x: float(x["docking score"]) - float(
134 |             last_df.loc[parent_dic[x["id_find_parent"]][0]]["docking score"]), axis=1)
135 | 
136 |         # keep same binding mode (RMSD < 2A and delta evaluate score < -0.3) or
137 |         # find a better binding mode (delta evaluate score < -1.2kcal )
138 |         logger.info("{} cmpds before RMSD/Docking Score filter".format(self.docked_df.shape[0]))
139 |         self.docked_df = self.docked_df[(self.docked_df["delta_docking_score"] <= self.delta_docking_score) | (
140 |                 (self.docked_df["rmsd"] <= self.RMSD) & (self.docked_df["delta_docking_score"] <= -0.2))]
141 |         rest_cmpds = self.docked_df.shape[0]
142 |         logger.info("{} cmpds after RMSD/Docking Score filter".format(rest_cmpds))
143 |         if rest_cmpds == 0:
144 |             return False
145 |         return True
146 | 
147 |     def cal_le_rank(self):
148 |         # calculate ln LE and fitness rank
149 |         self.docked_df["le_ln"] = self.docked_df.apply(
150 |             lambda x: x["docking score"] / (1 + np.log(x["molecule"].GetNumHeavyAtoms())),
151 |             axis=1)
152 |         self.diff = self.docked_df["le_ln"].max() - self.docked_df["le_ln"].min()
153 |         self.score_min = self.docked_df["le_ln"].min()
154 |         self.docked_df["fitness"] = 1 - ((self.docked_df["le_ln"] - self.score_min) / self.diff)
155 |         self.docked_df["fitness"] = self.docked_df["fitness"].fillna(-1)
156 |         self.docked_df["fitness_rank"] = self.docked_df["fitness"].rank(ascending=False)
157 |         self.docked_df["fitness_rank"] = self.docked_df["fitness_rank"].fillna(-1)
158 |         # drop molecule columns
159 |         self.docked_df = self.docked_df.drop(columns=["molecule", "id_raw"])
160 | 
161 |     def roulette_selection(self):
162 |         self.winner = self.docked_df.sample(n=self.size, weights="fitness")
163 | 
164 |     def tournament_selection(self):
165 |         # random sample 3 molecules the one with smallest evaluate score win, repeat until get 20% of original data
166 |         win_lst = []
167 |         if self.size == 1:
168 |             self.winner = self.docked_df.copy()
169 |         pool = self.docked_df.copy()
170 |         for i in range(int(self.size)):
171 |             winner = pool.sample(min(10, pool.shape[0])).nsmallest(1, "le_ln", keep="first")
172 |             win_lst.append(winner)
173 |             pool = pool.drop(winner.index)
174 | 
175 |         self.winner = pd.concat(win_lst)
176 | 


--------------------------------------------------------------------------------
/secse/growing/mutation/mutation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | sys.path.append(os.getenv("SECSE"))
  5 | import copy
  6 | import sqlite3
  7 | import pandas as pd
  8 | from loguru import logger
  9 | import rdkit
 10 | from pandarallel import pandarallel
 11 | from rdkit import Chem
 12 | from rdkit.Chem import rdChemReactions
 13 | from utilities.wash_mol import get_bridged_atoms, neutralize_atoms
 14 | from utilities.load_rules import json_to_DB
 15 | from utilities.function_helper import shell_cmd_execute
 16 | 
 17 | rdkit.RDLogger.DisableLog("rdApp.*")
 18 | 
 19 | RULE_DB = os.path.join(os.getenv("SECSE"), "growing/mutation/rules_demo.db")
 20 | 
 21 | 
 22 | class Mutation:
 23 | 
 24 |     def __init__(self, num, workdir, rule_db=RULE_DB):
 25 |         # self.load_reaction()
 26 |         self.workdir = workdir
 27 |         self.rule_db = rule_db
 28 |         # self.load_buildingblock(num=num)
 29 |         self.rules_dict = {}
 30 |         self.load_common_rules()
 31 |         self.load_spacer_rings_rules()
 32 | 
 33 |         # drop unwanted rules where Priority < 0
 34 |         self.rules_dict = {k: v for k, v in self.rules_dict.items() if int(v[1]) > 0}
 35 |         self.out_product_smiles = []
 36 |         self.input_smiles = None
 37 |         self.mol = None
 38 | 
 39 |     def load_common_rules(self, tables=None):
 40 |         if tables is None:
 41 |             tables = ['B-001',
 42 |                       'G-001', 'G-003', 'G-004', 'G-005', 'G-006', 'G-007',
 43 |                       'M-001', 'M-002', 'M-003', 'M-004', 'M-005', 'M-006', 'M-007', 'M-008', 'M-009', 'M-010'
 44 |                       ]
 45 |         rules_dict = {}
 46 |         for table in tables:
 47 |             try:
 48 |                 sql = 'select * from "{0}"'.format(table)
 49 |                 conn = sqlite3.connect(self.rule_db)
 50 |                 conn.row_factory = sqlite3.Row
 51 |                 c = conn.cursor()
 52 |                 c.execute(sql)
 53 |                 rs = c.fetchall()
 54 |                 for row in rs:
 55 |                     row = dict(row)
 56 |                     rules_dict[row["Rule ID"]] = (rdChemReactions.ReactionFromSmarts(row["SMARTS"]), row['Priority'])
 57 |             except sqlite3.OperationalError:
 58 |                 logger.error("No rule class: ", table)
 59 |                 pass
 60 |         self.rules_dict.update(rules_dict)
 61 | 
 62 |     def load_spacer_rings_rules(self):
 63 |         rules_dict = {}
 64 |         try:
 65 |             sql = 'select * from "{}"'.format("G-002")
 66 |             conn = sqlite3.connect(self.rule_db)
 67 |             conn.row_factory = sqlite3.Row
 68 |             c = conn.cursor()
 69 |             c.execute(sql)
 70 |             rs = c.fetchall()
 71 |             for row in rs:
 72 |                 row = dict(row)
 73 |                 pri = int(row['Spacer Priority']) * int(row['Ring Priority'])
 74 |                 rules_dict[row["Rule ID"]] = (rdChemReactions.ReactionFromSmarts(row["SMARTS"]), str(pri))
 75 |             self.rules_dict.update(rules_dict)
 76 |         except sqlite3.OperationalError:
 77 |             logger.error("No rule class: G-002")
 78 | 
 79 |     # set smiles
 80 |     def load_mol(self, input_smiles):
 81 |         self.clean()
 82 |         self.input_smiles = input_smiles
 83 |         # uncharged each atom
 84 |         self.mol = Chem.MolFromSmiles(self.input_smiles)
 85 |         assert self.mol, "Can not read smiles"
 86 |         if self.input_smiles.count("-") + self.input_smiles.count("+") > 0:
 87 |             self.mol = neutralize_atoms(self.mol)
 88 |             # self.input_smiles = Chem.MolToSmiles(self.mol)
 89 | 
 90 |     def reaction(self, rxn, react, item, partner, priority):
 91 |         try:
 92 |             products = rxn.RunReactants(react)
 93 |             uniq = set()
 94 |             for mol_tuple in products:
 95 |                 Chem.SanitizeMol(mol_tuple[0])
 96 |                 # enumerator = rdMolStandardize.TautomerEnumerator()
 97 |                 # canon = enumerator.Canonicalize(mol_tuple[0])
 98 |                 # smi = Chem.MolToSmiles(Chem.RemoveHs(canon), isomericSmiles=True, kekuleSmiles=False)
 99 |                 smi = Chem.MolToSmiles(Chem.RemoveHs(mol_tuple[0]), isomericSmiles=True, kekuleSmiles=False)
100 |                 uniq.add(smi)
101 |             for smi in uniq:
102 |                 self.out_product_smiles.append((smi, item, partner, priority))
103 |         except Exception as e:
104 |             # logger.error(e)
105 |             pass
106 | 
107 |     # add 2021.1.7
108 |     # modify 2021.01.14
109 |     def single_point_mutate(self):
110 |         mol = self.spiro_atom_label()
111 |         for item in self.rules_dict:
112 |             rxn = self.rules_dict[item][0]
113 |             priority = self.rules_dict[item][1]
114 |             if mol.HasSubstructMatch(rxn.GetReactantTemplate(0)):
115 |                 self.reaction(rxn, (mol,), item, "", priority)
116 |         self.protected_atom_label_remove()
117 |         return self.out_product_smiles
118 | 
119 |     def spiro_atom_label(self):
120 |         mol = copy.deepcopy(self.mol)
121 |         ri = mol.GetRingInfo()
122 | 
123 |         # spiro_ sma = '[*r3,*r4,*r5,*r6;R2X4$([*,*,*,*](@[r3,r4,r5,r6,r7])(@[r3,r4,r5,r6,r7])(@[r3,r4,r5,r6,
124 |         # r7])@[r3,r4,r5,r6,r7])]'
125 |         spiro_sma = '[x4]'
126 |         spiro_atoms = mol.GetSubstructMatches(Chem.MolFromSmarts(spiro_sma))
127 | 
128 |         res = set()
129 |         for ring in ri.AtomRings():
130 |             for spi in spiro_atoms:
131 |                 tmp = set(spi).intersection(set(ring))
132 |                 if tmp:
133 |                     res = res.union(ring)
134 | 
135 |         for index in res:
136 |             mol.GetAtomWithIdx(index).SetProp('_protected', '1')
137 | 
138 |         self.mol = mol
139 |         return mol
140 | 
141 |     def bridged_atom_label(self):
142 | 
143 |         mol = self.mol
144 |         brigded_atoms = get_bridged_atoms(mol)
145 |         ri = mol.GetRingInfo()
146 |         res = set()
147 |         for ring in ri.AtomRings():
148 |             for bri in brigded_atoms:
149 |                 tmp = set(bri).intersection(set(ring))
150 |                 if tmp:
151 |                     res = res.union(ring)
152 | 
153 |         for index in res:
154 |             mol.GetAtomWithIdx(index).SetProp('_protected', '1')
155 | 
156 |         self.mol = mol
157 |         return mol
158 | 
159 |     def protected_atom_label_remove(self):
160 |         mol = self.mol
161 |         for idx in range(len(mol.GetAtoms())):
162 |             if mol.GetAtomWithIdx(idx).HasProp('_protected'):
163 |                 mol.GetAtomWithIdx(idx).ClearProp('_protected')
164 |         self.mol = mol
165 |         return mol
166 | 
167 |     def clean(self):
168 |         self.input_smiles = None
169 |         self.out_product_smiles = []
170 | 
171 | 
172 | def mutation_df(df: pd.DataFrame, workdir, cpu_num, gen=1, rule_db=None, project_code="GEN"):
173 |     workdir = os.path.join(workdir, "generation_" + str(gen))
174 | 
175 |     if rule_db is None:
176 |         mutation = Mutation(5000, workdir)
177 |     else:
178 |         mutation = Mutation(5000, workdir, rule_db=rule_db)
179 | 
180 |     def mutation_per_row(mut: Mutation, smi):
181 |         # mutation for each seed molecule
182 |         try:
183 |             mut.load_mol(smi)
184 |         except AssertionError:
185 |             return None
186 |         mut.single_point_mutate()
187 |         return mut.out_product_smiles
188 | 
189 |     mut_df = df.copy()
190 |     if mut_df.shape[0] == 1:
191 |         mut_df["smiles_gen_" + str(gen)] = mut_df["smiles_gen_" + str(gen - 1)].apply(
192 |             lambda x: mutation_per_row(mutation, x))
193 |     else:
194 |         pandarallel.initialize(verbose=0, nb_workers=cpu_num)
195 |         mut_df["smiles_gen_" + str(gen)] = mut_df["smiles_gen_" + str(gen - 1)].parallel_apply(
196 |             lambda x: mutation_per_row(mutation, x))
197 |     mut_df = mut_df.dropna(subset=["smiles_gen_" + str(gen)]).reset_index(drop=True)
198 |     n = 1
199 |     mut_path = os.path.join(workdir, "mutation")
200 |     with open(mut_path + ".raw", "w") as f:
201 |         header = list(mut_df.columns[:-1]) + ["smiles_gen_" + str(gen), "id_gen_" + str(gen),
202 |                                               "reaction_id_gen_" + str(gen), "partner_gen_" + str(gen),
203 |                                               "priority_gen_" + str(gen)]
204 |         for i in mut_df.values.tolist():
205 |             last_gen_info = list(map(str, i[:-1]))
206 |             # keep parent mol
207 |             f.write(",".join(last_gen_info + [last_gen_info[0], last_gen_info[1].split("-dp")[0].split("-C")[0],
208 |                                               "Na-Na-Na", "", "3"]) + "\n")
209 |             # write mutation mols
210 |             for info in i[-1]:
211 |                 info = list(map(str, info))
212 |                 new_line = last_gen_info + [info[0]] + [
213 |                     project_code.upper() + "_" + str(gen) + "_M_" + str(n).zfill(9)] + info[1:]
214 |                 f.write(",".join(new_line) + "\n")
215 |                 n += 1
216 |     # drop duplicates product smiles by awk
217 |     cmd_dedup = ["awk -F',' '!seen[$(NF-4)]++'", mut_path + ".raw ", ">", mut_path + ".csv"]
218 |     shell_cmd_execute(cmd_dedup)
219 | 
220 |     return header
221 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # **SECSE**
  2 | 
  3 | ----------------------------
  4 | 
  5 | ### SECSE: _**S**ystemic **E**volutionary **C**hemical **S**pace **E**xplorer_
  6 | 
  7 | ![plot](docs/platform.jpg)
  8 | 
  9 | Chemical space exploration is a major task of the hit-finding process during the pursuit of novel chemical entities.
 10 | Compared with other screening technologies, computational _de novo_ design has become a popular approach to overcome the
 11 | limitation of current chemical libraries. Here, we reported a _de novo_ design platform named systemic evolutionary
 12 | chemical space explorer (SECSE). The platform was conceptually inspired by fragment-based drug design, that miniaturized
 13 | a “lego-building” process within the pocket of a certain target. The key to virtual hits generation was then turned into
 14 | a computational search problem. To enhance search and optimization, human intelligence and deep learning were
 15 | integrated. SECSE has the potential in finding novel and diverse small molecules that are attractive starting points for
 16 | further validation.
 17 | 
 18 | ### Tutorials and Usage
 19 | 
 20 | ----------------------------
 21 | 
 22 | 1. Setting up dependencies  
 23 |    python ~=3.9, perl ~=5.32
 24 |     ```bash
 25 |     conda create --name secse -c conda-forge parallel tqdm biopandas openbabel chemprop xlrd=2 pandarallel rdkit=2024.09.1 loguru tensorboard
 26 |     conda activate secse
 27 |    ```
 28 | 2. Installing from source
 29 |     ```bash
 30 |     git clone https://github.com/KeenThera/SECSE.git 
 31 |    ```
 32 | 3. Setting Environment Variables  
 33 |    `export SECSE=/absolute/path/to/SECSE`  
 34 |    I'm using AutoDock Vina for docking:
 35 |    [(download here)](https://github.com/ccsb-scripps/AutoDock-Vina/releases)  
 36 |    `export VINA=/absolute/path/to/AutoDockVINA`  
 37 |    I'm using AutoDock GPU: (adgpu-v1.5.3_linux_ocl_128wi)
 38 |    [(download here)](https://github.com/ccsb-scripps/AutoDock-GPU/releases)  
 39 |    `export AUTODOCK_GPU=/absolute/path/to/AutoDockGPU`  
 40 |    I'm using [Gilde](https://www.schrodinger.com/products/glide) for docking (additional installation & license
 41 |    required):  
 42 |    `export SCHRODINGER=/absolute/path/to/SCHRODINGER`  
 43 |    I'm using [Uni-Dock](https://github.com/dptech-corp/Uni-Dock) for docking (need GPU):  
 44 |    [compile from Uni-Dock source code](https://github.com/dptech-corp/Uni-Dock/tree/main/unidock#building-from-source) (recommand), or [download here](https://github.com/dptech-corp/Uni-Dock/releases/download/1.1.0/unidock-1.1.0-cuda120-linux-x86_64) and add `export UNIDOCK=/absolute/path/to/UNIDOCK`
 45 | 4. Giving execution permissions to the SECSE directory  
 46 |    `chmod -R +x /absolute/path/to/SECSE`
 47 | 5. Input fragments: a tab separated _.smi_ file without header. See demo [here](demo/demo_1020.smi).
 48 | 6. Parameters in config file:
 49 | 
 50 |    [general]
 51 | 
 52 |     - _project_code_, project identifier, which will be prefixed to each generated molecule ID, type=str
 53 |     - _workdir_, working directory, create if not exists, otherwise overwrite, type=str
 54 |     - _fragments_, file path to seed fragments, smi format, type=str
 55 |     - _num_per_gen_, number of molecules generated each generation, type=int
 56 |     - _seed_per_gen_, number of selected seed molecules per generation, default=1000, type=int
 57 |     - _start_gen_, number of staring generation, if you want to resume the generation, please specify the 'start_gen' as
 58 |       the number corresponding to the last **completed generation** in your previous run, default=0, type=int
 59 |     - _num_gen_, number of growing generations, the final generation number will be the sum of start_gen and num_gen,
 60 |       type=int
 61 |     
 62 |     - _cpu_, number of max invoke CPUs, type=int
 63 |     - _gpu_, number of max invoke GPU for AutoDock GPU, type=int
 64 |     - _rule_db_, path to customized rule in json format, input 0 if use default rule, default=0
 65 | 
 66 |    [docking]
 67 |     - _docking_program_, name of docking program, AutoDock-Vina (input vina) or AutoDock-GPU (input autodock-gpu) or
 68 |       Glide (input glide) , default=vina, type=str
 69 |     - _target_, protein PDBQT if use AutoDock Vina; grid map files descriptor fld file if AutoDock GPU; Grid file if
 70 |       choose Glide, type=str
 71 |     - _RMSD_, docking pose RMSD cutoff between children and parent, default=2, type=float
 72 |     - _delta_score_, decreased docking score cutoff between children and parent, default=-1.0, type=float
 73 |     - _score_cutoff_, default=-9, type=float
 74 | 
 75 |    Parameters when docking by AutoDock Vina:
 76 | 
 77 |     - _x_, Docking box x, type=float
 78 |     - _y_, Docking box y, type=float
 79 |     - _z_, Docking box z, type=float
 80 |     - _box_size_x_, Docking box size x, default=20, type=float
 81 |     - _box_size_y_, Docking box size y, default=20, type=float
 82 |     - _box_size_z_, Docking box size z, default=20, type=float
 83 | 
 84 |    [prediction]
 85 | 
 86 |     - _mode_, mode of deep learning modeling, 0: not use, 1: modeling per generation, 2: modeling overall after all the
 87 |       generation, default=0, type=int
 88 |     - _dl_per_gen_, top N predicted molecules for docking, default=100, type=int
 89 |     - _dl_score_cutoff_, default=-9, type=float
 90 | 
 91 |    [properties]
 92 | 
 93 |     - _mw_, molecular weights cutoff, default=450, type=int
 94 |     - _logp_lower_, minimum of logP, default=0.5, type=float
 95 |     - _logp_upper_, maximum of logP, default=7, type=float
 96 |     - _chiral_center_, maximum of chiral center,default=2, type=int
 97 |     - _heteroatom_ratio_, maximum of heteroatom ratio, default=0.35, type=float
 98 |     - _rdkit_rotatable_bound_num_, maximum of rotatable bound calculated from
 99 |       rdkit.rdMolDescriptors.CalcNumRotatableBonds, default=5, type=int
100 |     - _keen_rotatable_bound_num_, maximum of rotatable bound defined by KEEN (
101 |       SMARTS: "[C^3!D1;!$(C(F)(F)F)]-!@[!Br!F!Cl!I!H3&!$(*#*)!D1;!$([!Br!F!Cl!I](F)(F)F)]"), default=3, type=int
102 |     - _rigid_body_num_, maximum of rigid body defined by KEEN (
103 |       SMARTS: "[C^3!D1;!$(C(F)(F)F);!R;!$(C=O(N));!$(NC(=O));!$(C(=O)O);!$(C(=O)O)]-!@[!Br!F!Cl!I!H3&!$(*#*)!
104 |       D1;!$([!Br!F!Cl!I](F)(F)F);!R;!$(C=O([N,O]));!$(NC(=O));!$(C(=O)O)]"), default=2, type=int
105 |     - _hbd_, maximum of hydrogen bond donor calculated by rdkit.rdMolDescriptors.CalcNumHBD, default=5, type=int
106 |     - _hba_, maximum of hydrogen bond acceptor calculated by rdkit.rdMolDescriptors.CalcNumHBA, default=10, type=int
107 |     - _tpsa_, maximum of topological polar surface area calculated by rdkit.Chem.Descriptors.TPSA, default=200,
108 |       type=float
109 |     - _lipinski_violation_, maximum of violation of Lipinski rule of five calculated by RDKit, default=1, default=1,
110 |       type=int
111 |     - _qed_, QED (calculated by rdkit.Chem.QED.qed) cutoff value, default=0.5, type=float
112 |     - _max_ring_size_, maximum of ring size, default=7, type=int
113 |     - _max_ring_system_size_, maximum of ring system member size in one ring system, default=3, type=int
114 |     - _ring_system_count_, maximum of seperated ring system count, default=4, type=int
115 |     - _bridged_site_count_, maximum of bridged ring site count, default=2, type=int
116 |     - _spiro_site_count_, maximum of spiro ring site count, default=1, type=int
117 |     - _fused_site_count_, maximum of fused ring site count, default=3, type=int
118 |     - _rdkit_sa_score_, synthetic accessibility score (calculated by RDKit) cutoff, default=5, type=float
119 |     - _substructure_filter_, files containing the customized unwanted substructure SMARTS in "*.xls" format, set the
120 |       value to 0 if you do not have any additional unwanted substructure. PANIS already includes as default. The file
121 |       should include columns for **`Pattern`**,  **`ID`**, and **`Max`**, where the **`ID`** should be unique for each SMARTS. You can
122 |       refer to the example file [subtructure_filter_demo.xls](demo/subtructure_filter_demo.xls), default=0, type=string
123 | 
124 |    Config file of a demo case [phgdh_demo_vina.ini](demo/phgdh_demo_vina.ini)  
125 |    Customized rule json template [rules.json](demo/rules.json). Rule ID should be in the form G-001-XXXX, like
126 |    G-001-0001, G-001-0002, G-001-0003 ...
127 | 
128 | 7. Run SECSE  
129 |    `python $SECSE/run_secse.py --config /absolute/path/to/config`  
130 |    Please input the **absolute path** of the config file here.
131 | 8. Output files
132 |     - merged_docked_best_timestamp_with_grow_path.csv: selected molecules and growing path
133 |     - selected.sdf: 3D conformers of all selected molecules
134 | 
135 | ### Dependencies
136 | 
137 | -------
138 | GNU Parallel installation
139 | 
140 | - CentOS / RHEL  
141 |   `sudo yum install parallel`
142 | - Ubuntu / Debian  
143 |   `sudo apt-get install parallel`
144 | - From source: https://www.gnu.org/software/parallel/
145 | 
146 | python ~=3.12, perl ~=5.32
147 | 
148 | numpy~=1.26.4, pandas~=2.2.2, xlrd～=2.0.1, pandarallel~=1.6.5, tqdm~=4.67.0, biopandas~=0.5.1, openbabel~=3.1.1, rdkit~
149 | =2024.09, chemprop~=2.1, pytorch~=2.5.1+cu117, tensorboard~=2.18.0
150 | 
151 | Linux server with CPUs only also works.
152 | 
153 | ### Citation
154 | 
155 | -------
156 | Lu, C.; Liu, S.; Shi, W.; Yu, J.; Zhou, Z.; Zhang, X.; Lu, X.; Cai, F.; Xia, N.; Wang, Y. Systemic Evolutionary Chemical
157 | Space Exploration For Drug Discovery. J Cheminform 14, 19 (2022).
158 | 
159 | https://doi.org/10.1186/s13321-022-00598-4
160 | 
161 | ### License
162 | 
163 | -------
164 | SECSE is released under [Apache License, Version 2.0](LICENSE.txt).
165 | 
166 | The project is being actively developed, if you have any questions or suggestions, please contact:
167 | wang_yikai@keenthera.com or luchong121@outlook.com
168 | 


--------------------------------------------------------------------------------
/secse/growing/filter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Lu Chong
  5 | @file: filter.py 
  6 | @time: 2020/11/16/13:14
  7 | """
  8 | import argparse
  9 | import os
 10 | import sys
 11 | import time
 12 | import configparser
 13 | import rdkit
 14 | import rdkit.Chem as Chem
 15 | from rdkit.Chem.rdMolDescriptors import CalcExactMolWt, CalcNumHBD, CalcNumHBA, CalcNumRotatableBonds
 16 | from rdkit.Chem import Descriptors, AllChem
 17 | from rdkit.Chem import QED
 18 | from rdkit.Chem import RDConfig
 19 | import json
 20 | from loguru import logger
 21 | 
 22 | sys.path.append(os.getenv("SECSE"))
 23 | from utilities.ring_tool import RingSystems
 24 | from utilities.substructure_filter import StructureFilter
 25 | from utilities.wash_mol import wash_mol, neutralize, charge_mol, get_keen_rotatable_bound_num, get_rigid_body_num
 26 | from utilities.open_filter import user_filter
 27 | 
 28 | sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
 29 | import sascorer
 30 | 
 31 | rdkit.RDLogger.DisableLog("rdApp.*")
 32 | 
 33 | 
 34 | class Filter:
 35 |     def __init__(self, gen, config_path):
 36 | 
 37 |         self.gen = int(gen)
 38 |         self.input_smiles = None
 39 |         self.mol = None
 40 |         self.pains_smarts = None
 41 | 
 42 |         config = configparser.ConfigParser()
 43 |         config.read(config_path)
 44 | 
 45 |         substructure_filter_file = config.get("properties", "substructure_filter")
 46 |         if substructure_filter_file == "0":
 47 |             self.strutFilter = StructureFilter()
 48 |         else:
 49 |             # logger.info("Use additional substructure filter patters.")
 50 |             self.strutFilter = StructureFilter(substructure_filter_file)
 51 | 
 52 |         self.MW = config.getfloat("properties", "mw")
 53 |         self.logP_lower = config.getfloat("properties", "logp_lower")
 54 |         self.logP_upper = config.getfloat("properties", "logp_upper")
 55 |         self.chiral_center = config.getint("properties", "chiral_center")
 56 |         self.heteroatom_ratio = config.getfloat("properties", "heteroatom_ratio")
 57 |         self.rdkit_rotatable_bound_num = config.getint("properties", "rdkit_rotatable_bound_num")
 58 |         self.keen_rotatable_bound_num = config.getint("properties", "keen_rotatable_bound_num")
 59 |         self.rigid_body_num = config.getint("properties", "rigid_body_num")
 60 |         self.hbd = config.getint("properties", "hbd")
 61 |         self.hba = config.getint("properties", "hba")
 62 |         self.tpsa = config.getfloat("properties", "tpsa")
 63 |         self.lipinski_violation = config.getint("properties", "lipinski_violation")
 64 |         self.qed = config.getfloat("properties", "qed")
 65 |         self.max_ring_size = config.getint("properties", "max_ring_size")
 66 |         self.max_ring_system_size = config.getint("properties", "max_ring_system_size")
 67 |         self.ring_system_count = config.getint("properties", "ring_system_count")
 68 |         self.bridged_site_count = config.getint("properties", "bridged_site_count")
 69 |         self.spiro_site_count = config.getint("properties", "spiro_site_count")
 70 |         self.fused_site_count = config.getint("properties", "fused_site_count")
 71 |         self.rdkit_sa_score = config.getint("properties", "rdkit_sa_score")
 72 | 
 73 |     def load_mol(self, input_smiles):
 74 |         self.clean()
 75 |         self.input_smiles = input_smiles
 76 |         self.mol = Chem.MolFromSmiles(self.input_smiles)
 77 | 
 78 |         # uncharged each atom
 79 |         if self.input_smiles.count("-") + self.input_smiles.count("+") > 0:
 80 |             self.mol, self.input_smiles = neutralize(self.input_smiles)
 81 | 
 82 |         if self.mol is None:
 83 |             self.input_smiles = wash_mol(self.input_smiles)
 84 |             self.mol = Chem.MolFromSmiles(self.input_smiles)
 85 |             if self.mol is None:
 86 |                 self.input_smiles = "C"
 87 |                 self.mol = Chem.MolFromSmiles(self.input_smiles)
 88 | 
 89 |     def clean(self):
 90 |         self.input_smiles = None
 91 |         self.mol = None
 92 | 
 93 |     def pp_filter(self):
 94 |         """
 95 |         property filter
 96 |         """
 97 |         violation_counter = 0
 98 | 
 99 |         mw = CalcExactMolWt(self.mol)
100 |         if mw > self.MW:
101 |             yield "MW"
102 |         if mw > 500:
103 |             violation_counter += 1
104 |         if self.gen > 3:
105 |             if 81 > mw:
106 |                 yield "MW"
107 | 
108 |         mol_hbd = CalcNumHBD(self.mol)
109 |         if mol_hbd > self.hbd:
110 |             yield "HBD"
111 |         if mol_hbd > 5:
112 |             violation_counter += 1
113 | 
114 |         mol_hba = CalcNumHBA(self.mol)
115 |         if mol_hba > self.hba:
116 |             yield "HBA"
117 |         if mol_hba > 10:
118 |             violation_counter += 1
119 | 
120 |         logp = Descriptors.MolLogP(self.mol)
121 |         if logp < self.logP_lower or logp > self.logP_upper:
122 |             yield "cLogP"
123 |         if logp > 5:
124 |             violation_counter += 1
125 | 
126 |         if violation_counter > self.lipinski_violation:
127 |             yield "Lipinski Violation"
128 | 
129 |         if Descriptors.TPSA(self.mol) > self.tpsa:
130 |             yield "TPSA"
131 | 
132 |         if CalcNumRotatableBonds(self.mol) > self.rdkit_rotatable_bound_num:
133 |             yield "RDKit Rotatable Bonds"
134 | 
135 |         if get_keen_rotatable_bound_num(self.mol) > self.keen_rotatable_bound_num:
136 |             # rotatable bound customized @dalong
137 |             yield "Keen Rotatable Bounds"
138 |         if get_rigid_body_num(self.mol) > self.rigid_body_num:
139 |             # rotatable bound customized @dalong
140 |             yield "Rigid Body"
141 |         yield "PASS"
142 | 
143 |     def load_pains_filter(self):
144 |         # read smarts for pains
145 |         with open(os.path.join(os.getenv("SECSE"), 'growing/pains_smarts.json')) as f:
146 |             data = json.load(f)
147 |         pains_smarts = dict((k, Chem.MolFromSmarts(v)) for k, v in data.items())
148 |         self.pains_smarts = pains_smarts
149 | 
150 |     def alert_filter(self):
151 |         self.load_pains_filter()
152 |         for name in self.pains_smarts:
153 |             sma = self.pains_smarts[name]
154 |             if self.mol.HasSubstructMatch(sma):
155 |                 yield "PAINS"
156 |         yield "PASS"
157 | 
158 |     def substructure_filter(self):
159 |         yield self.strutFilter.sfilter(self.mol)
160 | 
161 |     def ring_system_filter(self):
162 |         ring_sys = RingSystems(self.mol)
163 |         if ring_sys.ring_check(self.max_ring_system_size, self.bridged_site_count, self.spiro_site_count,
164 |                                self.fused_site_count, self.ring_system_count):
165 |             yield "PASS"
166 |         yield "RS"
167 | 
168 |     def custom_filter(self):
169 |         # add Chiral center filter, cycle size less than 7, remove 3 continues hetero-atom
170 |         chiral_tags = Chem.FindMolChiralCenters(self.mol, includeUnassigned=True, useLegacyImplementation=True)
171 |         # the maximum number of chiral center <= 3
172 |         if len(chiral_tags) > self.chiral_center:
173 |             yield "CC"
174 | 
175 |         chiral_atom_list = set([x[0] for x in chiral_tags])
176 |         rings = self.mol.GetRingInfo().AtomRings()
177 | 
178 |         if rings:
179 |             # the maximum of ring size <= 7
180 |             mol_max_ring_size = max([len(x) for x in rings])
181 |             if mol_max_ring_size > self.max_ring_size:
182 |                 yield "max ring size"
183 | 
184 |             if len(chiral_tags) == 3:
185 |                 # 3 CCs should not in the same ring
186 |                 for ring in rings:
187 |                     if len(set(ring).intersection(chiral_atom_list)) >= 3:
188 |                         yield "chiral center in one ring >2"
189 |         yield "PASS"
190 | 
191 |     def heteroatom_filter(self):
192 |         hetero_ratio = Chem.rdMolDescriptors.CalcNumHeteroatoms(self.mol) / self.mol.GetNumHeavyAtoms()
193 |         if hetero_ratio > self.heteroatom_ratio:
194 |             yield "heteroatom_ratio"
195 |         else:
196 |             yield "PASS"
197 | 
198 |     def charge_filter(self):
199 |         negative_charge = Chem.MolFromSmarts("[*-1]")
200 |         positive_charge = Chem.MolFromSmarts("[*+1]")
201 |         charged_smi = charge_mol(self.input_smiles)
202 |         mol = Chem.MolFromSmiles(charged_smi)
203 |         if mol is None:
204 |             mol = self.mol
205 |         nc = len(mol.GetSubstructMatches(negative_charge))
206 |         pc = len(mol.GetSubstructMatches(positive_charge))
207 |         npc = nc + pc
208 |         if npc <= 1:
209 |             yield "PASS"
210 |         elif npc == 2:
211 |             if nc <= 1:
212 |                 yield "PASS"
213 |             else:
214 |                 yield "Charge"
215 |         else:
216 |             yield "Charge"
217 | 
218 |     def similarity_filter(self):
219 |         fp = AllChem.GetMorganFingerprintAsBitVect(self.mol, 2, 512)
220 | 
221 |     def QED_filter(self):
222 |         if QED.qed(self.mol) >= self.qed:
223 |             yield "PASS"
224 |         else:
225 |             yield "QED"
226 | 
227 |     def SA_filter(self):
228 |         sa_score = sascorer.calculateScore(self.mol)
229 |         if sa_score <= self.rdkit_sa_score:
230 |             yield "PASS"
231 |         else:
232 |             yield "SA score"
233 | 
234 |     def my_filter(self):
235 |         if self.gen > 3:
236 |             tag = user_filter(self.mol)
237 |             if tag:
238 |                 yield "PASS"
239 |             else:
240 |                 yield "CUSTOM"
241 |         else:
242 |             yield "PASS"
243 | 
244 | 
245 | def mol_filter(molfilter: Filter, smi):
246 |     molfilter.load_mol(smi)
247 |     pass_filter = [molfilter.pp_filter(),
248 |                    molfilter.custom_filter(),
249 |                    molfilter.charge_filter(),
250 |                    molfilter.heteroatom_filter(),
251 |                    molfilter.substructure_filter(),
252 |                    molfilter.ring_system_filter(),
253 |                    molfilter.alert_filter(),
254 |                    molfilter.QED_filter(),
255 |                    molfilter.SA_filter(),
256 |                    molfilter.my_filter()
257 |                    ]
258 |     for i in pass_filter:
259 |         res = next(i)
260 |         if res != "PASS":
261 |             return res
262 |     return "PASS"
263 | 
264 | 
265 | def file_filter(file_path, workdir, gen, config):
266 |     molsfilter = Filter(gen, config)
267 |     with open(file_path, "r") as inf:
268 |         with open(os.path.join(workdir, "filter_flag", os.path.basename(file_path)), "w") as outf:
269 |             for line in inf.readlines():
270 |                 line = line.strip()
271 |                 smi = line.split(",")[-5]
272 |                 flag = mol_filter(molsfilter, smi)
273 |                 new_line = line + "," + flag + "\n"
274 |                 outf.write(new_line)
275 | 
276 | 
277 | if __name__ == '__main__':
278 |     parser = argparse.ArgumentParser(description="Filter per file")
279 |     parser.add_argument("file_path", help="File path")
280 |     parser.add_argument("workdir", help="Workdir")
281 |     parser.add_argument("gen", help="generation number")
282 |     parser.add_argument("config", help="Configuration file")
283 |     args = parser.parse_args()
284 |     time1 = time.time()
285 |     file_filter(args.file_path, args.workdir, args.gen, args.config)
286 | 
287 |     time2 = time.time()
288 |     # mfilter = Filter()
289 |     # mfilter.load_mol("C12CCCC3(CCCCC3)C1C4C5C(CC(C(C6CCCC7C6C8C9C(C%10CCC9C%10)C7C8)CCC%11)C%11C5)C2C4")
290 |     # logger.info(next(mfilter.ring_system_filter()))
291 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2021 Suzhou Keen Therapeutics Co., Ltd.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/secse/grow_processes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python  
  2 | # -*- coding:utf-8 _*-
  3 | """ 
  4 | @author: Lu Chong
  5 | @file: grow_processes.py
  6 | @time: 2021/11/17/13:49
  7 | """
  8 | import csv
  9 | import shutil
 10 | import os
 11 | import pandas as pd
 12 | import rdkit
 13 | import configparser
 14 | from evaluate.glide_docking import dock_by_glide
 15 | from growing.mutation.mutation import mutation_df
 16 | from scoring.ranking import Ranking
 17 | from scoring.diversity_score import clustering
 18 | from scoring.docking_score_prediction import prepare_files
 19 | from scoring.sampling import sample_by_similarity, sample_by_rule_weight
 20 | from evaluate.docking import dock_by_py_vina, dock_by_py_autodock_gpu, dock_by_unidock
 21 | from report.grow_path import write_growth
 22 | from utilities.load_rules import json_to_DB
 23 | from utilities.function_helper import shell_cmd_execute
 24 | import time
 25 | from loguru import logger
 26 | 
 27 | rdkit.RDLogger.DisableLog("rdApp.*")
 28 | 
 29 | 
 30 | class Grow(object):
 31 |     def __init__(self, generation, mols_smi, workdir, num_per_gen, docking_program, receptor, start_gen, dl_mode,
 32 |                  config_path, cpu_num=0, gpu_num=1, rule_db=0, project_code="GEN", x=0, y=0, z=0, box_size_x=0,
 33 |                  box_size_y=0, box_size_z=0):
 34 | 
 35 |         self.mols_smi = mols_smi
 36 |         self.total_generation = int(generation)
 37 |         self.workdir = workdir
 38 |         self.num_per_gen = num_per_gen
 39 |         self.cpu_num = cpu_num
 40 |         self.gpu_num = gpu_num
 41 | 
 42 |         self.target = receptor
 43 |         self.x = x
 44 |         self.y = y
 45 |         self.z = z
 46 |         self.box_size_x = box_size_x
 47 |         self.box_size_y = box_size_y
 48 |         self.box_size_z = box_size_z
 49 | 
 50 |         self.start_gen = start_gen  # record start
 51 |         self.gen = start_gen  # generation num for now
 52 |         # Resume from breakpoint
 53 |         if self.gen > 0:
 54 |             self.workdir_now = os.path.join(self.workdir, "generation_{}".format(self.gen))
 55 |             self.mols_smi = os.path.join(self.workdir_now, "mols_for_docking.smi")
 56 | 
 57 |         self.docking_program = docking_program.lower()
 58 |         self.dl_mode = dl_mode
 59 | 
 60 |         self.config_path = config_path
 61 | 
 62 |         rule_db = str(rule_db)
 63 |         if rule_db in [0, "0"]:
 64 |             self.rule_db = None
 65 |         elif rule_db.endswith("json"):
 66 |             os.makedirs(self.workdir, exist_ok=True)
 67 |             self.rule_db = os.path.join(self.workdir, "rules.db")
 68 |             json_to_DB(rule_db, self.rule_db)
 69 |         elif rule_db.endswith("db"):
 70 |             self.rule_db = rule_db
 71 |         else:
 72 |             raise Exception("Please check your input rule file.")
 73 |         self.project_code = project_code
 74 | 
 75 |         self.lig_sdf = None
 76 |         self.winner_df = None
 77 |         self.winner_path = None
 78 |         self._generation_dir = None
 79 |         self._filter_df = None
 80 |         self._dock_df = None
 81 |         self._sampled_df = None
 82 |         self.workdir_now = None
 83 | 
 84 |         self._GROWING_STATE_LIST = ["GROWING", "BROKEN", "STOP"]
 85 |         self.growing_flag = self._GROWING_STATE_LIST[0]
 86 | 
 87 |     def docking_sh(self, step):
 88 |         start = time.time()
 89 |         os.makedirs(self.workdir_now, exist_ok=True)
 90 | 
 91 |         if "vina" in self.docking_program:
 92 |             self.docking_vina(step)
 93 |         elif "glide" in self.docking_program:
 94 |             self.docking_glide(step)
 95 |         elif "autodock-gpu" in self.docking_program:
 96 |             self.docking_autodock_gpu(step)
 97 |         elif "unidock" in self.docking_program:
 98 |             self.docking_unidock(step)
 99 | 
100 |         # ranking and find top fragments
101 |         self.lig_sdf = os.path.join(self.workdir_now, "docking_outputs_with_score.sdf")
102 |         end = time.time()
103 |         logger.info("Docking time cost: {} min.".format(round((end - start) / 60, 2)))
104 | 
105 |     def docking_autodock_gpu(self, step):
106 |         logger.info("Step {}: Docking with AutoDock GPU ...".format(step))
107 |         dock_by_py_autodock_gpu(self.workdir_now, self.mols_smi, self.target, self.cpu_num, self.gpu_num)
108 | 
109 |     def docking_vina(self, step):
110 |         logger.info("Step {}: Docking with Autodock Vina ...".format(step))
111 |         dock_by_py_vina(self.workdir_now, self.mols_smi, self.target, self.cpu_num, self.x, self.y, self.z,
112 |                         self.box_size_x, self.box_size_y, self.box_size_z)
113 | 
114 |     def docking_glide(self, step):
115 |         logger.info("Step {}: Docking with Glide ...".format(step))
116 |         # set different docking precision for different generation
117 |         if self.gen < 1:
118 |             dock_mode = "SP"
119 |         else:
120 |             dock_mode = "HTVS"
121 |         dock_by_glide(self.workdir_now, self.mols_smi, self.target, self.gen, dock_mode, self.cpu_num)
122 | 
123 |     def docking_unidock(self, step):
124 |         logger.info("Step {}: Docking with UniDock ...".format(step))
125 |         dock_by_unidock(self.workdir_now, self.mols_smi, self.target, self.cpu_num, self.x, self.y, self.z,
126 |                         self.box_size_x, self.box_size_y, self.box_size_z)
127 | 
128 |     def ranking_docked_mols(self, step=2):
129 |         logger.info("Step {}: Ranking docked molecules...".format(str(step)))
130 |         ranking = Ranking(sdf=self.lig_sdf, gen=self.gen, config_file=self.config_path)
131 |         if ranking.ranking_flag:
132 |             ranking.docked_df.to_csv(
133 |                 os.path.join(self.workdir, "generation_" + str(self.gen), "docked_gen_" + str(self.gen) + ".csv"),
134 |                 index=False)
135 |             ranking.tournament_selection()
136 |             # merge mols whose evaluate score below the cutoff
137 |             ranking.mols_score_below_cutoff()
138 |             self.winner_df = ranking.final_df
139 |             # generate smi file
140 |             self.winner_path = os.path.join(self.workdir, "generation_" + str(self.gen),
141 |                                             "best_fragment_gen_" + str(self.gen) + ".smi")
142 |             self.winner_df["id_gen_" + str(self.gen)] = self.winner_df["id_gen_" + str(self.gen)].apply(
143 |                 lambda x: x.split("\t")[0])
144 |             self.winner_df[["smiles_gen_" + str(self.gen), "id_gen_" + str(self.gen)]].to_csv(self.winner_path,
145 |                                                                                               sep="\t",
146 |                                                                                               index=False,
147 |                                                                                               quoting=csv.QUOTE_NONE)
148 |         else:
149 |             self.growing_flag = self._GROWING_STATE_LIST[1]
150 |         self.check_growing()
151 | 
152 |     def dl_pre(self, step):
153 |         logger.info("Step {}.1: Building deep learning models...".format(str(step)))
154 | 
155 |         train, pre = prepare_files(self.gen, self.workdir, self.dl_mode)
156 |         if pre is None:
157 |             logger.info("Skipping docking score prediction as all molecules have been docked.")
158 |             self.dl_mode = 0
159 |             return
160 |         dl_shell = os.path.join(os.getenv("SECSE"), "scoring", "chemprop_pre.sh")
161 |         config = configparser.ConfigParser()
162 |         config.read(self.config_path)
163 | 
164 |         dl_select_num = config.get("prediction", "dl_per_gen")
165 |         dl_cmd = [dl_shell, self.workdir, train, pre, str(self.gen), dl_select_num, "22"]
166 |         shell_cmd_execute(dl_cmd, 0)
167 |         # docking top predicted compounds
168 |         self.workdir_now = os.path.join(self.workdir, "generation_{}_pre".format(self.gen))
169 |         self.mols_smi = os.path.join(self.workdir_now, "mols_for_docking_pred.smi")
170 |         self.docking_sh(str(step) + ".2")
171 | 
172 |         # merge results to the current generation if prediction per generation
173 |         if self.dl_mode == 1:
174 |             self.lig_sdf = os.path.join(self.workdir, "generation_{}".format(self.gen),
175 |                                         "docking_outputs_with_score.sdf")
176 |             merge_cmd = ["cat", os.path.join(self.workdir_now, "docking_outputs_with_score.sdf"), ">>", self.lig_sdf]
177 |             shell_cmd_execute(merge_cmd)
178 |             self.workdir_now = os.path.join(self.workdir, "generation_{}".format(self.gen))
179 | 
180 |     def check_growing(self):
181 |         if self.growing_flag == self._GROWING_STATE_LIST[0]:
182 |             # still growing
183 |             pass
184 |         elif self.growing_flag == self._GROWING_STATE_LIST[1]:
185 |             # broken and report generated molecules
186 |             if self.dl_mode == 2:
187 |                 self.dl_mode = 0
188 |             write_growth(self.config_path, self.gen - 1, self.dl_mode)
189 |             raise SystemExit(
190 |                 "Note: Calculations are only performed from the generation {} to the generation {} out of the preset generations.".format(
191 |                     self.start_gen, self.gen - 1))
192 |         elif self.growing_flag == self._GROWING_STATE_LIST[2]:
193 |             # regular finsh and stop the program
194 |             write_growth(self.config_path, self.gen, self.dl_mode)
195 |             raise SystemExit(
196 |                 "Finish the calculation from the generation {} to the generation {}".format(self.start_gen, self.gen))
197 | 
198 |     def grow(self):
199 |         logger.info(f"Input fragment file: {self.mols_smi}")
200 |         logger.info(f"Target grid file: {self.target}")
201 |         logger.info(f"Workdir: {self.workdir}")
202 |         logger.info(f"Generation {self.gen} ...")
203 |         # generation 0 : 1.evaluate; 2.ranking
204 |         self.workdir_now = os.path.join(self.workdir, "generation_" + str(self.gen))
205 |         step = 1
206 |         self.docking_sh(step)
207 |         step += 1
208 |         if self.gen > 2 and self.dl_mode == 1:
209 |             try:
210 |                 self.dl_pre(step)
211 |                 step += 1
212 |             except:
213 |                 pass
214 |         self.ranking_docked_mols(step)
215 | 
216 |         # next generations: 1.copy the best mols from last generation as seed; 2.mutation; 3.filter; 4. sampling;
217 |         #                   5.clustering; 6.evaluate; 7.ranking
218 |         for g in range(1, self.total_generation + 1):
219 |             self.gen += 1
220 |             logger.info(f"Generation {self.gen} ...")
221 |             self.workdir_now = os.path.join(self.workdir, "generation_" + str(self.gen))
222 |             if os.path.exists(self.workdir_now):
223 |                 shutil.rmtree(self.workdir_now)
224 |             os.makedirs(self.workdir_now, exist_ok=True)
225 |             self.winner_df.to_csv(os.path.join(self.workdir_now, "seed_fragments.smi"), sep="\t", index=False,
226 |                                   quoting=csv.QUOTE_NONE)
227 |             # mutation
228 |             logger.info("Step 1: Mutation")
229 | 
230 |             self._generation_dir = os.path.join(self.workdir_now, "generation_split_by_seed")
231 |             self.winner_df = self.winner_df.reset_index(drop=True)
232 |             header = mutation_df(self.winner_df, self.workdir, self.cpu_num, self.gen, self.rule_db, self.project_code)
233 |             generation_path = os.path.join(self.workdir_now, "generation")
234 | 
235 |             cmd_cat = ["cat", os.path.join(self.workdir_now, "mutation.csv"), ">", generation_path + ".raw"]
236 |             shell_cmd_execute(cmd_cat)
237 |             cmd_dedup = ["awk -F',' '!seen[$(NF-4)]++'", generation_path + ".raw", ">", generation_path + ".csv"]
238 |             shell_cmd_execute(cmd_dedup)
239 |             if not os.path.exists(self._generation_dir):
240 |                 os.mkdir(self._generation_dir)
241 |             cmd_split = ["awk -F, '{print>\"" + self._generation_dir + "/\"$2\".csv\"}'", generation_path + ".csv"]
242 |             shell_cmd_execute(cmd_split)
243 |             # filter
244 |             logger.info("Step 2: Applying filter to all mutated molecules.")
245 |             time1 = time.time()
246 |             cmd_filter = [os.path.join(os.getenv("SECSE"), "growing", "filter_parallel.sh"), self.workdir_now,
247 |                           str(self.gen), self.config_path, str(self.cpu_num)]
248 |             shell_cmd_execute(cmd_filter)
249 |             time2 = time.time()
250 |             logger.info("Filter runtime: {:.2f} min.".format((time2 - time1) / 60))
251 | 
252 |             # do not sample or clustering if generated molecules less than wanted size
253 |             try:
254 |                 self._filter_df = pd.read_csv(os.path.join(self.workdir_now, "filter.csv"), header=None)
255 |             except pd.errors.EmptyDataError:
256 |                 self.growing_flag = self._GROWING_STATE_LIST[1]
257 |                 logger.info("No molecules met the filter criteria. Please adjust your configuration.")
258 |                 self.check_growing()
259 | 
260 |             self._filter_df.columns = header + ["flag"]
261 |             self._filter_df["type"] = self._filter_df["reaction_id_gen_" + str(self.gen)].apply(
262 |                 lambda x: "-".join(x.split("-")[:2]))
263 |             self._filter_df.to_csv(os.path.join(self.workdir_now, "filter.csv"), index=False)
264 |             if self._filter_df.shape[0] <= self.num_per_gen:
265 |                 self._dock_df = self._filter_df
266 |                 self._dock_df.to_csv(os.path.join(self.workdir_now, "sampled.csv"), index=False)
267 |             else:
268 |                 # sampling
269 |                 logger.info("Step 3: Sampling")
270 |                 self._sampled_df = sample_by_rule_weight(self.gen, self._filter_df, self.workdir_now)
271 |                 # self._sampled_df = sample_by_similarity(self.gen, self._filter_df, self.workdir_now, self.num_per_gen)
272 |                 logger.info("Step 4: Clustering")
273 |                 # clustering
274 |                 num_clusters = int(self.num_per_gen / 5) + 1
275 |                 self._sampled_df = clustering(self._sampled_df, "smiles_gen_" + str(self.gen), self.gen, self.cpu_num,
276 |                                               num_clusters)
277 | 
278 |                 # sample enough mol
279 |                 self._dock_df = self._sampled_df.sort_values("cluster_center_dis_gen_" + str(self.gen)).groupby(
280 |                     "cluster_center_gen_" + str(self.gen)).head(int(self.num_per_gen / num_clusters) + 1)
281 | 
282 |             # write file for evaluate
283 |             self.mols_smi = os.path.join(self.workdir_now, "mols_for_docking.smi")
284 |             self._dock_df[["smiles_gen_" + str(self.gen), "id_gen_" + str(self.gen)]].to_csv(self.mols_smi, index=False,
285 |                                                                                              header=False, sep="\t")
286 | 
287 |             # evaluate
288 |             step = 5
289 |             self.docking_sh(step)
290 |             # run deep learning model, when ( dl_mode is 1) & (not all generated compounds were docked)
291 |             if (self.dl_mode == 1) and (self._filter_df.shape[0] > self._dock_df.shape[0]):
292 |                 step += 1
293 |                 self.dl_pre(step)
294 |             # ranking
295 |             step += 1
296 |             self.ranking_docked_mols(step)
297 | 
298 |         if self.dl_mode == 2:
299 |             step += 1
300 |             self.dl_pre(step)
301 | 
302 |         self.growing_flag = self._GROWING_STATE_LIST[2]
303 |         self.check_growing()
304 | 


--------------------------------------------------------------------------------
/secse/growing/pains_smarts.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "ene_six_het_A(483)": "[#6]-1(-[#6](~[!#6&!#1]~[#6]-[!#6&!#1]-[#6]-1=[!#6&!#1])~[!#6&!#1])=[#6;!R]",
  3 |   "hzone_phenol_A(479)": "c:1:c:c(:c(:c:c:1)-[#6]=[#7]-[#7])-[O;H1]",
  4 |   "anil_di_alk_A(478)": "[C;H2]N([C;H2])c1cc([$([H]),$([C;H2]),$([O][C;H2][C;H2])])c(N)c([H])c1",
  5 |   "indol_3yl_alk(461)": "n:1(c(c(c:2:c:1:c:c:c:c:2-[H])-[C;D4]-[H])-[$([C;H2]),$([C]=,:[!C]),$([C;H1][N]),$([C;H1]([C;H2])[N;H1][C;H2]),$([C;H1]([C;H2])[C;H2][N;H1][C;H2])])-[$([H]),$([C;H2])]",
  6 |   "quinone_A(370)": "[!#6&!#1]=[#6]-1-[#6]=,:[#6]-[#6](=[!#6&!#1])-[#6]=,:[#6]-1",
  7 |   "azo_A(324)": "[#7;!R]=[#7]",
  8 |   "imine_one_A(321)": "[#6]-[#6](=[!#6&!#1;!R])-[#6](=[!#6&!#1;!R])-[$([#6]),$([#16](=[#8])=[#8])]",
  9 |   "mannich_A(296)": "[#7]-[C;X4]-c1ccccc1-[O;H1]",
 10 |   "anil_di_alk_B(251)": "c:1:c:c(:c:c:c:1-[#7](-[#6;X4])-[#6;X4])-[#6]=[#6]",
 11 |   "anil_di_alk_C(246)": "c:1:c:c(:c:c:c:1-[#8]-[#6;X4])-[#7](-[#6;X4])-[$([#1]),$([#6;X4])]",
 12 |   "ene_rhod_A(235)": "[#7]-1-[#6](=[#16])-[#16]-[#6](=[#6])-[#6]-1=[#8]",
 13 |   "hzone_phenol_B(215)": "c:1(:c:c:c(:c:c:1)-[#6]=[#7]-[#7])-[#8]-[#1]",
 14 |   "ene_five_hetA1(201A)": "[#6]-1(=[#6])-[#6]=[#7]-[#7,#8,#16]-[#6]-1=[#8]",
 15 |   "ene_five_het_A(201)": "[#6]-1(=[#6])-[#6]=[#7]-[!#6&!#1]-[#6]-1=[#8]",
 16 |   "anil_di_alk_D(198)": "c:1:c:c(:c:c:c:1-[#7](-[#6;X4])-[#6;X4])-[#6;X4]-[$([#8]-[#1]),$([#6]=[#6]-[#1]),$([#7]-[#6;X4])]",
 17 |   "imine_one_isatin(189)": "[#8]=[#6]-2-[#6](=!@[#7]-[#7])-c:1:c:c:c:c:c:1-[#7]-2",
 18 |   "anil_di_alk_E(186)": "[#6](-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[$([#1]),$([#6](-[#1])-[#1])])-[#6](-[#1])-[$([#1]),$([#6]-[#1])])-[#1])-[#1]",
 19 |   "thiaz_ene_A(128)": "[#6]-1(=[#6](-[$([#1]),$([#6](-[#1])-[#1]),$([#6]=[#8])])-[#16]-[#6](-[#7]-1-[$([#1]),$([#6]-[#1]),$([#6]:[#6])])=[#7;!R])-[$([#6](-[#1])-[#1]),$([#6]:[#6])]",
 20 |   "pyrrole_A(118)": "n2(-[#6]:1:[!#1]:[#6]:[#6]:[#6]:[#6]:1)c(cc(c2-[#6;X4])-[#1])-[#6;X4]",
 21 |   "catechol_A(92)": "c:1:c:c(:c(:c:c:1)-[#8;H1])-[#8;H1]",
 22 |   "ene_five_het_B(90)": "[#6]-1(=[#6])-[#6](-[#7]=[#6]-[#16]-1)=[#8]",
 23 |   "imine_one_fives(89)": "[#6]-1=[!#1]-[!#6&!#1]-[#6](-[#6]-1=[!#6&!#1;!R])=[#8]",
 24 |   "ene_five_het_C(85)": "[#6]-1(-[#6](-[#6]=[#6]-[!#6&!#1]-1)=[#6])=[!#6&!#1]",
 25 |   "hzone_pipzn(79)": "CN1[C;H2][C;H2]N(N=[C;H1][#6]=,:[#6])[C;H2][C;H2]1",
 26 |   "keto_keto_beta_A(68)": "c:1-2:c(:c:c:c:c:1)-[#6](=[#8])-[#6;X4]-[#6]-2=[#8]",
 27 |   "hzone_pyrrol(64)": "Cn1cccc1C=NN",
 28 |   "ene_one_ene_A(57)": "[#6]=!@[#6](-[!#1])-@[#6](=!@[!#6&!#1])-@[#6](=!@[#6])-[!#1]",
 29 |   "cyano_ene_amine_A(56)": "N#CC=C(N)C(C#N)C#N",
 30 |   "ene_five_one_A(55)": "c:1-2:c(:c:c:c:c:1)-[#6](=[#8])-[#6](=[#6])-[#6]-2=[#8]",
 31 |   "cyano_pyridone_A(54)": "N#Cc1ccc[#7;H1]c1=S",
 32 |   "anil_alk_ene(51)": "c:1:c:c-2:c(:c:c:1)-[#6]-3-[#6](-[#6]-[#7]-2)-[#6]-[#6]=[#6]-3",
 33 |   "amino_acridine_A(46)": "c:1:c:2:c(:c:c:c:1):n:c:3:c(:c:2-[#7]):c:c:c:c:3",
 34 |   "ene_five_het_D(46)": "[#6]-1(=[#6])-[#6](=[#8])-[#7]-[#7]-[#6]-1=[#8]",
 35 |   "thiophene_amino_Aa(45)": "[H]N([H])c1sc([!#1])c([!#1])c1C=O",
 36 |   "ene_five_het_E(44)": "[#7]-[#6]=!@[#6]-2-[#6](=[#8])-c:1:c:c:c:c:c:1-[!#6&!#1]-2",
 37 |   "sulfonamide_A(43)": "NS(=O)(=O)c1cc([F,Cl,Br,I])cc([F,Cl,Br,I])c1O",
 38 |   "thio_ketone(43)": "[#6]-[#6](=[#16])-[#6]",
 39 |   "sulfonamide_B(41)": "[H]N(c1ccc([O;H1])cc1)S(=O)=O",
 40 |   "anil_no_alk(40)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[$([#8]),$([#7]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#7](-[#1])-[#1]",
 41 |   "thiophene_amino_Ab(40)": "[$([#1]),$([#6](-[#1])-[#1]),$([#6]:[#6])]-c:1:c(:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-[#6])-[#6](=[#8])-[#8])-[$([#6]:1:[#6]:[#6]:[#6]:[#6]:[#6]:1),$([#6]:1:[#16]:[#6]:[#6]:[#6]:1)]",
 42 |   "het_pyridiniums_A(39)": "[H]c1c([$([N]),$([H])])ccc2ccc[n+]([$([O;X1]),$([C;H3]),$([#6][#6]:[#6]),$([#6][#6][#8]),$([#6][#6](C)=[#8]),$([#6][#6](N)=[#8]),$([#6][#6][#6])])c12",
 43 |   "anthranil_one_A(38)": "CC(=O)c1ccccc1[#7;H1][!$([#6]=[#8])]",
 44 |   "cyano_imine_A(37)": "[#7;H1][#7]=[#6](-[#6]#[#7])-[#6]=[!#6&!#1;!R]",
 45 |   "diazox_sulfon_A(36)": "[#7](-c:1:c:c:c:c:c:1)-[#16](=[#8])(=[#8])-[#6]:2:[#6]:[#6]:[#6]:[#6]:3:[#7]:[$([#8]),$([#16])]:[#7]:[#6]:2:3",
 46 |   "hzone_anil_di_alk(35)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])=[#7]-[#7]-[$([#6](=[#8])-[#6](-[#1])(-[#1])-[#16]-[#6]:[#7]),$([#6](=[#8])-[#6](-[#1])(-[#1])-[!#1]:[!#1]:[#7]),$([#6](=[#8])-[#6]:[#6]-[#8]-[#1]),$([#6]:[#7]),$([#6](-[#1])(-[#1])-[#6](-[#1])-[#8]-[#1])])-[#1])-[#1]",
 47 |   "rhod_sat_A(33)": "[#7]-1-[#6](=[#16])-[#16]-[#6;X4]-[#6]-1=[#8]",
 48 |   "hzone_enamin(30)": "[#7][#7]=[#6][#6](-[$([#1]),$([#6])])=[#6]([#6])-!@[$([#7]),$([#8])]",
 49 |   "pyrrole_B(29)": "[#6;X4]c1ccc([#6]:[#6])n1c2ccccc2",
 50 |   "thiophene_hydroxy(28)": "s1ccc(c1)-[#8;H1]",
 51 |   "cyano_pyridone_B(27)": "[!#6][#6]1=,:[#7][#6]([#6])=,:[#6](C#N)[#6](=O)[#7]1",
 52 |   "imine_one_sixes(27)": "[#6]-1(-[#6](=[#8])-[#7]-[#6](=[#8])-[#7]-[#6]-1=[#8])=[#7]",
 53 |   "dyes5A(27)": "[#6]=,:[#6]:[#7]([#6])~[#6]:[#6]=,:[#6][#6]~[#6]:[#7]",
 54 |   "naphth_amino_A(25)": "c1cc2cccc3[#7][#6]=,:[#7]c(c1)c23",
 55 |   "naphth_amino_B(25)": "[C;X4]1[N;H1]c3cccc2cccc([N;H1]1)c23",
 56 |   "ene_one_ester(24)": "[#6]-[#8]-[#6](=[#8])-[#6](-[#7][#6])=[#6]-[#6](-[#6])=[#8]",
 57 |   "thio_dibenzo(23)": "S=[#6]1[#6]=,:[#6][!#6,!#6][#6]=,:[#6]1",
 58 |   "cyano_cyano_A(23)": "[#6](-[#6]#[#7])(-[#6]#[#7])-[#6](-[$([#6]#[#7]),$([#6]=[#7])])-[#6]#[#7]",
 59 |   "hzone_acyl_naphthol(22)": "[H]c2c([H])c([H])c1c([H])c(C(=O)NN=C)c(O)c([H])c1c2[H]",
 60 |   "het_65_A(21)": "O=Cc1cnn2c([#8;H1])ccnc12",
 61 |   "imidazole_A(19)": "n:1:c(:n(:c(:c:1-c:2:c:c:c:c:c:2)-c:3:c:c:c:c:c:3)-[#1])-[#6]:[!#1]",
 62 |   "ene_cyano_A(19)": "[#6](-[#6]#[#7])(-[#6]#[#7])=[#6]-c:1:c:c:c:c:c:1",
 63 |   "anthranil_acid_A(19)": "C=NNc1ccccc1C(=O)[#8;H1]",
 64 |   "dyes3A(19)": "[#6]-,:[#6]:[#7+]=,:[#6][#6]=[#6][#7][#6;X4]",
 65 |   "dhp_bis_amino_CN(19)": "[#6]=,:[#6]C1C(C#N)=C(N)SC(N)=C1C#N",
 66 |   "het_6_tetrazine(18)": "[#7]~[#6]:1:[#7]:[#7]:[#6](:[$([#7]),$([#6]-[#1]),$([#6]-[#7]-[#1])]:[$([#7]),$([#6]-[#7])]:1)-[$([#7]-[#1]),$([#8]-[#6](-[#1])-[#1])]",
 67 |   "ene_one_hal(17)": "[#6]-[#6]=[#6](-[F,Cl,Br,I])-[#6](=[#8])-[#6]",
 68 |   "cyano_imine_B(17)": "N#CC(C#N)=NNc1ccccc1",
 69 |   "thiaz_ene_B(17)": "[#6]NC(=O)-!@[#6]1=,:[#6]([$([N]),$(NC(=O)[#6]:[#6])])[#7]([$([#6;H2]-[#6;H1]=[#6;H2]),$([#6]=,:[#6])])[#6](=S)[#16]1",
 70 |   "ene_rhod_B(16)": "[H]C([$([#6]-[#35]),$([#6]:[#6](-[#1]):[#6](-[F,Cl,Br,I]):[#6]:[#6]-[F,Cl,Br,I]),$([#6]:[#6](-[#1]):[#6](-[#1]):[#6]-[#16]-[#6](-[#1])-[#1]),$([#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]:[#6]-[#8]-[#6;H2]),$([#6]:1:[#6](-[#6;H2]):[#7](-[#6;H2]):[#6](-[#6;H2]):[#6]:1)])=C1SC(=O)[N]C1=O",
 71 |   "thio_carbonate_A(15)": "[#7,#8]c2ccc1oc(=[#8,#16])sc1c2",
 72 |   "anil_di_alk_furan_A(15)": "[#7](-[#6](-[#1])-[#1])(-[#6](-[#1])-[#1])-c:1:c(:c(:c(:o:1)-[#6]=[#7]-[#7](-[#1])-[#6]=[!#6&!#1])-[#1])-[#1]",
 73 |   "ene_five_het_F(15)": "O=[#6]2[#6](=!@[#6]c1ccccc1)Sc3ccccc23",
 74 |   "anil_di_alk_F(14)": "c:1:c:c(:c:c:c:1-[#6;X4]-c:2:c:c:c(:c:c:2)-[#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])])-[#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])]",
 75 |   "hzone_anil(14)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#1])-[#1])-[#1])-[#6]=[#7]-[#7]-[#1]",
 76 |   "het_5_pyrazole_OH(14)": "c1(nn(c(c1-[$([#1]),$([#6]-[#1])])-[#8]-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#1])-[#1])-[#1])-[#6;X4]",
 77 |   "het_thio_666_A(13)": "c:2(:c:1-[#16]-c:3:c(-[#7](-c:1:c(:c(:c:2-[#1])-[#1])-[#1])-[$([#1]),$([#6](-[#1])(-[#1])-[#1]),$([#6](-[#1])(-[#1])-[#6]-[#1])]):c(:c(~[$([#1]),$([#6]:[#6])]):c(:c:3-[#1])-[$([#1]),$([#7](-[#1])-[#1]),$([#8]-[#6;X4])])~[$([#1]),$([#7](-[#1])-[#6;X4]),$([#6]:[#6])])-[#1]",
 78 |   "styrene_A(13)": "[#6]-2-[#6]-c:1:c(:c:c:c:c:1)-[#6](-c:3:c:c:c:c:c-2:3)=[#6]-[#6]",
 79 |   "ene_rhod_C(13)": "[#16]-1-[#6](=[#7]-[#6]:[#6])-[#7](-[$([#1]),$([#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#8]),$([#6]:[#6])])-[#6](=[#8])-[#6]-1=[#6](-[#1])-[$([#6]:[#6]:[#6]-[#17]),$([#6]:[!#6&!#1])]",
 80 |   "dhp_amino_CN_A(13)": "[#7](-[#1])(-[#1])-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](=[#6](-[#6]=[#6])-[#8]-1)-[#6](-[#1])-[#1]",
 81 |   "cyano_imine_C(12)": "[#8]=[#16](=[#8])-[#6](-[#6]#[#7])=[#7]-[#7]-[#1]",
 82 |   "thio_urea_A(12)": "c:1:c:c:c:c:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2",
 83 |   "thiophene_amino_B(12)": "c:1:c(:c:c:c:c:1)-[#7](-[#1])-c:2:c(:c(:c(:s:2)-[$([#6]=[#8]),$([#6]#[#7]),$([#6](-[#8]-[#1])=[#6])])-[#7])-[$([#6]#[#7]),$([#6](:[#7]):[#7])]",
 84 |   "keto_keto_beta_B(12)": "[#6;X4]-1-[#6](=[#8])-[#7]-[#7]-[#6]-1=[#8]",
 85 |   "keto_phenone_A(11)": "c:1:c-3:c(:c:c:c:1)-[#6]:2:[#7]:[!#1]:[#6]:[#6]:[#6]:2-[#6]-3=[#8]",
 86 |   "cyano_pyridone_C(11)": "[#6]-1(-[#6](=[#6](-[#6]#[#7])-[#6](~[#8])~[#7]~[#6]-1~[#8])-[#6](-[#1])-[#1])=[#6](-[#1])-[#6]:[#6]",
 87 |   "thiaz_ene_C(11)": "[#6]-1(=[#6](-!@[#6]=[#7])-[#16]-[#6](-[#7]-1)=[#8])-[$([F,Cl,Br,I]),$([#7+](:[#6]):[#6])]",
 88 |   "hzone_thiophene_A(11)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1]):[!#6&!#1]:[#6](:[#6]:2-[#6](-[#1])=[#7]-[#7](-[#1])-[$([#6]:1:[#7]:[#6]:[#6](-[#1]):[#16]:1),$([#6]:[#6](-[#1]):[#6]-[#1]),$([#6]:[#7]:[#6]:[#7]:[#6]:[#7]),$([#6]:[#7]:[#7]:[#7]:[#7])])-[$([#1]),$([#8]-[#1]),$([#6](-[#1])-[#1])]",
 89 |   "ene_quin_methide(10)": "[!#1]:[!#1]-[#6](-[$([#1]),$([#6]#[#7])])=[#6]-1-[#6]=:[#6]-[#6](=[$([#8]),$([#7;!R])])-[#6]=:[#6]-1",
 90 |   "het_thio_676_A(10)": "c:1:c:c-2:c(:c:c:1)-[#6]-[#6](-c:3:c(-[#16]-2):c(:c(-[#1]):c(:c:3-[#1])-[$([#1]),$([#8]),$([#16;X2]),$([#6;X4]),$([#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])])])-[#1])-[#7](-[$([#1]),$([#6;X4])])-[$([#1]),$([#6;X4])]",
 91 |   "ene_five_het_G(10)": "[#6]-1(=[#6])-[#6](-[#7,#16,#8][#6](-[!#1])=[#7]-1)=[#8]",
 92 |   "acyl_het_A(9)": "[#7+](:[!#1]:[!#1]:[!#1])-[!#1]=[#8]",
 93 |   "anil_di_alk_G(9)": "[#6;X4]-[#7](-[#6;X4])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6]2=:[#7][#6]:[#6]:[!#1]2)-[#1])-[#1]",
 94 |   "dhp_keto_A(9)": "[#7]-1(-[$([#6;X4]),$([#1])])-[#6]=:[#6](-[#6](=[#8])-[#6]:[#6]:[#6])-[#6](-[#6])-[#6](=[#6]-1-[#6](-[#1])(-[#1])-[#1])-[$([#6]=[#8]),$([#6]#[#7])]",
 95 |   "thio_urea_B(9)": "c:1:c:c:c:c:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2",
 96 |   "anil_alk_bim(9)": "c:1:3:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c:c:c:2)-[#1]):n:c(-[#1]):n:3-[#6]",
 97 |   "imine_imine_A(9)": "c:1:c:c-2:c(:c:c:1)-[#7]=[#6]-[#6]-2=[#7;!R]",
 98 |   "thio_urea_C(9)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-[#6](=[#8])-[#6]-2:[!#1]:[!#6&!#1]:[#6]:[#6]-2",
 99 |   "imine_one_fives_B(9)": "[#7;!R]=[#6]-2-[#6](=[#8])-c:1:c:c:c:c:c:1-[#16]-2",
100 |   "dhp_amino_CN_B(9)": "[$([#7](-[#1])-[#1]),$([#8]-[#1])]-[#6]-2=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-c:1:c(:n(-[#6]):n:c:1)-[#8]-2",
101 |   "anil_OC_no_alk_A(8)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:n:c:1-[#1])-[#8]-c:2:c:c:c:c:c:2)-[#1])-[#1]",
102 |   "het_thio_66_one(8)": "[#6](=[#8])-[#6]-1=[#6]-[#7]-c:2:c(-[#16]-1):c:c:c:c:2",
103 |   "styrene_B(8)": "c:1:c:c-2:c(:c:c:1)-[#6](-c:3:c(-[$([#16;X2]),$([#6;X4])]-2):c:c:c(:c:3)-[$([#1]),$([#17]),$([#6;X4])])=[#6]-[#6]",
104 |   "het_thio_5_A(8)": "[#6](-[#1])(-[#1])-[#16;X2]-c:1:n:c(:c(:n:1-!@[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2)-[#1]",
105 |   "anil_di_alk_ene_A(8)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6]-2=[#6](-[#1])-c:1:c(:c:c:c:c:1)-[#16;X2]-c:3:c-2:c:c:c:c:3",
106 |   "ene_rhod_D(8)": "[#16]-1-[#6](=!@[#7]-[$([#1]),$([#7](-[#1])-[#6]:[#6])])-[#7](-[$([#1]),$([#6]:[#7]:[#6]:[#6]:[#16])])-[#6](=[#8])-[#6]-1=[#6](-[#1])-[#6]:[#6]-[$([#17]),$([#8]-[#6]-[#1])]",
107 |   "ene_rhod_E(8)": "[#16]-1-[#6](=[#8])-[#7]-[#6](=[#16])-[#6]-1=[#6](-[#1])-[#6]:[#6]",
108 |   "anil_OH_alk_A(8)": "c:1:c(:c:c:c:c:1)-[#6](-[#1])(-[#1])-[#7](-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#1])-[#1])-[#1]",
109 |   "pyrrole_C(8)": "n1(-[#6;X4])c(c(-[#1])c(c1-[#6]:[#6])-[#1])-[#6](-[#1])-[#1]",
110 |   "thio_urea_D(8)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-c:2:c:c:c:c:c:2",
111 |   "thiaz_ene_D(8)": "[#7](-c:1:c:c:c:c:c:1)-c2[n+]c(cs2)-c:3:c:c:c:c:c:3",
112 |   "ene_rhod_F(8)": "n:1:c:c:c(:c:1-[#6](-[#1])-[#1])-[#6](-[#1])=[#6]-2-[#6](=[#8])-[#7]-[#6](=[!#6&!#1])-[#7]-2",
113 |   "thiaz_ene_E(8)": "[#6]-1(=[#6](-[#6](-[#1])(-[#6])-[#6])-[#16]-[#6](-[#7]-1-[$([#1]),$([#6](-[#1])-[#1])])=[#8])-[#16]-[#6;R]",
114 |   "het_65_B(7)": "[!#1]:1:[!#1]-2:[!#1](:[!#1]:[!#1]:[!#1]:1)-[#7](-[#1])-[#7](-[#6]-2=[#8])-[#6]",
115 |   "keto_keto_beta_C(7)": "c:1:c:c-2:c(:c:c:1)-[#6](=[#6](-[#6]-2=[#8])-[#6])-[#8]-[#1]",
116 |   "het_66_A(7)": "c:2:c:c:1:n:n:c(:n:c:1:c:c:2)-[#6](-[#1])(-[#1])-[#6]=[#8]",
117 |   "thio_urea_E(7)": "c:1:c:c:c:c:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:n:c:c:c:c:2",
118 |   "thiophene_amino_C(7)": "[#6](-[#1])-[#6](-[#1])(-[#1])-c:1:c(:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-[#6]-[#6]-[#6]=[#8])-[$([#6](=[#8])-[#8]),$([#6]#[#7])])-[#6](-[#1])-[#1]",
119 |   "hzone_phenone(7)": "[#6](-c:1:c(:c(:c(:c:c:1-[#1])-[$([#6;X4]),$([#1])])-[#1])-[#1])(-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[$([#1]),$([#17])])-[#1])-[#1])=[$([#7]-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]),$([#7]-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]),$([#7]-[#7](-[#1])-[#6](=[#7]-[#1])-[#7](-[#1])-[#1]),$([#6](-[#1])-[#7])]",
120 |   "ene_rhod_G(7)": "[#8](-[#1])-[#6](=[#8])-c:1:c:c(:c:c:c:1)-[#6]:[!#1]:[#6]-[#6](-[#1])=[#6]-2-[#6](=[!#6&!#1])-[#7]-[#6](=[!#6&!#1])-[!#6&!#1]-2",
121 |   "ene_cyano_B(7)": "[#6]-1(=[#6]-[#6](-c:2:c:c(:c(:n:c-1:2)-[#7](-[#1])-[#1])-[#6]#[#7])=[#6])-[#6]#[#7]",
122 |   "dhp_amino_CN_C(7)": "[#7](-[#1])(-[#1])-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](=[#6](-[#6]:[#6])-[#8]-1)-[#6]#[#7]",
123 |   "het_5_A(7)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#6]=[#8])-[#6;X4]-[#6]-2=[#8]",
124 |   "ene_five_het_H(6)": "[#7]-1=[#6]-[#6](-[#6](-[#7]-1)=[#16])=[#6]",
125 |   "thio_amide_A(6)": "c1(coc(c1-[#1])-[#6](=[#16])-[#7]-2-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[!#1]-[#6](-[#1])(-[#1])-[#6]-2(-[#1])-[#1])-[#1]",
126 |   "ene_cyano_C(6)": "[#6]=[#6](-[#6]#[#7])-[#6](=[#7]-[#1])-[#7]-[#7]",
127 |   "hzone_furan_A(6)": "c:1(:c(:c(:c(:o:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](-[$([#1]),$([#6](-[#1])-[#1])])=[#7]-[#7](-[#1])-c:2:n:c:c:s:2",
128 |   "anil_di_alk_H(6)": "c:1(:c(:c(:c(:c(:c:1-[#7](-[#1])-[#16](=[#8])(=[#8])-[#6]:2:[#6]:[!#1]:[#6]:[#6]:[#6]:2)-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]",
129 |   "het_65_C(6)": "n2c1ccccn1c(c2-[$([#6](-[!#1])=[#6](-[#1])-[#6]:[#6]),$([#6]:[#8]:[#6])])-[#7]-[#6]:[#6]",
130 |   "thio_urea_F(6)": "[#6]-1-[#7](-[#1])-[#7](-[#1])-[#6](=[#16])-[#7]-[#7]-1-[#1]",
131 |   "ene_five_het_I(6)": "c:1(:c:c:c:o:1)-[#6](-[#1])=!@[#6]-3-[#6](=[#8])-c:2:c:c:c:c:c:2-[!#6&!#1]-3",
132 |   "keto_keto_gamma(5)": "[#8]=[#6]-1-[#6;X4]-[#6]-[#6](=[#8])-c:2:c:c:c:c:c-1:2",
133 |   "quinone_B(5)": "c:1:c:c-2:c(:c:c:1)-[#6](-c3cccc4noc-2c34)=[#8]",
134 |   "het_6_pyridone_OH(5)": "[#8](-[#1])-c:1:n:c(:c:c:c:1)-[#8]-[#1]",
135 |   "hzone_naphth_A(5)": "c:1:2:c(:c(:c(:c(:c:1:c(:c(:c(:c:2-[#1])-[#1])-[#6]=[#7]-[#7](-[#1])-[$([#6]:[#6]),$([#6]=[#16])])-[#1])-[#1])-[#1])-[#1])-[#1]",
136 |   "thio_ester_A(5)": "[#6]-1=[#6](-[#16]-[#6](-[#6]=[#6]-1)=[#16])-[#7]",
137 |   "ene_misc_A(5)": "[#6]-1=[#6]-[#6](-[#8]-[#6]-1-[#8])(-[#8])-[#6]",
138 |   "cyano_pyridone_D(5)": "[#8]=[#6]-1-[#6](=[#6]-[#6](=[#7]-[#7]-1)-[#6]=[#8])-[#6]#[#7]",
139 |   "het_65_Db(5)": "C3=CN1C(=NC(=C1-[#7]-[#6])-c:2:c:c:c:c:n:2)C=C3",
140 |   "het_666_A(5)": "[#7]N-2-c:1:c:c:c:c:c:1-[#6](=[#7])-c:3:c-2:c:c:c:c:3",
141 |   "diazox_sulfon_B(5)": "c:1:c(:c:c:c:c:1)-[#7]-2-[#6](-[#1])-[#6](-[#1])-[#7](-[#6](-[#1])-[#6]-2-[#1])-[#16](=[#8])(=[#8])-c:3:c:c:c:c:4:n:s:n:c:3:4",
142 |   "anil_NH_alk_A(5)": "c:1(:c(:c-2:c(:c(:c:1-[#1])-[#1])-[#7](-[#6](-[#7]-2-[#1])=[#8])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])-[#1]",
143 |   "sulfonamide_C(5)": "c:1(:c(:c-3:c(:c(:c:1-[#7](-[#1])-[#16](=[#8])(=[#8])-c:2:c:c:c(:c:c:2)-[!#6&!#1])-[#1])-[#8]-[#6](-[#8]-3)(-[#1])-[#1])-[#1])-[#1]",
144 |   "het_thio_N_55(5)": "[#6](-[#1])-[#6]:2:[#7]:[#7](-c:1:c:c:c:c:c:1):[#16]:3:[!#6&!#1]:[!#1]:[#6]:[#6]:2:3",
145 |   "keto_keto_beta_D(5)": "[#8]=[#6]-[#6]=[#6](-[#1])-[#8]-[#1]",
146 |   "ene_rhod_H(5)": "[#7]-1-2-[#6](=[#7]-[#6](=[#8])-[#6](=[#7]-1)-[#6](-[#1])-[#1])-[#16]-[#6](=[#6](-[#1])-[#6]:[#6])-[#6]-2=[#8]",
147 |   "imine_ene_A(5)": "[#6]:[#6]-[#6](-[#1])=[#6](-[#1])-[#6](-[#1])=[#7]-[#7](-[#6;X4])-[#6;X4]",
148 |   "het_thio_656a(5)": "c:1:3:c(:c:c:c:c:1):c:2:n:n:c(-[#16]-[#6](-[#1])(-[#1])-[#6]=[#8]):n:c:2:n:3-[#6](-[#1])(-[#1])-[#6](-[#1])=[#6](-[#1])-[#1]",
149 |   "pyrrole_D(5)": "n1(-[#6])c(c(-[#1])c(c1-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6](=[#16])-[#7]-[#1])-[#1])-[#1]",
150 |   "pyrrole_E(5)": "n2(-[#6]:1:[!#1]:[!#6&!#1]:[!#1]:[#6]:1-[#1])c(c(-[#1])c(c2-[#6;X4])-[#1])-[#6;X4]",
151 |   "thio_urea_G(5)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-[#6]([#7;R])[#7;R]",
152 |   "anisol_A(5)": "c:1(:c(:c(:c(:c(:c:1-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#8]-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[$([#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]),$([#6](-[#1])(-[#6](-[#1])-[#1])-[#7](-[#1])-[#6](=[#16])-[#7]-[#1])])-[#1])-[#8]-[#6](-[#1])-[#1]",
153 |   "pyrrole_F(5)": "n2(-[#6]:1:[#6](-[#6]#[#7]):[#6]:[#6]:[!#6&!#1]:1)c(c(-[#1])c(c2)-[#1])-[#1]",
154 |   "dhp_amino_CN_D(5)": "[#7](-[#1])(-[#1])-[#6]-2=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-c:1:c(:c:c:s:1)-[#8]-2",
155 |   "thiazole_amine_A(4)": "[#7](-[#1])-c:1:n:c(:c:s:1)-c:2:c:n:c(-[#7](-[#1])-[#1]):s:2",
156 |   "het_6_imidate_A(4)": "[#7]=[#6]-1-[#7](-[#1])-[#6](=[#6](-[#7]-[#1])-[#7]=[#7]-1)-[#7]-[#1]",
157 |   "anil_OC_no_alk_B(4)": "c:1:c(:c:2:c(:c:c:1):c:c:c:c:2)-[#8]-c:3:c(:c(:c(:c(:c:3-[#1])-[#1])-[#7]-[#1])-[#1])-[#1]",
158 |   "styrene_C(4)": "c:1:c:c-2:c(:c:c:1)-[#6]-[#16]-c3c(-[#6]-2=[#6])ccs3",
159 |   "azulene(4)": "c:2:c:c:c:1:c(:c:c:c:1):c:c:2",
160 |   "furan_acid_A(4)": "c:1(:c(:c(:c(:o:1)-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#8]-[#6]:[#6])-[#1])-[#6](=[#8])-[#8]-[#1]",
161 |   "cyano_pyridone_E(4)": "[!#1]:[#6]-[#6]-1=[#6](-[#1])-[#6](=[#6](-[#6]#[#7])-[#6](=[#8])-[#7]-1-[#1])-[#6]:[#8]",
162 |   "anil_alk_thio(4)": "[#6]-1-3=[#6](-[#6](-[#7]-c:2:c:c:c:c:c-1:2)(-[#6])-[#6])-[#16]-[#16]-[#6]-3=[!#1]",
163 |   "anil_di_alk_I(4)": "c:1(:c(:c(:c(:c(:c:1-[#7](-[#1])-[#6](=[#8])-c:2:c:c:c:c:c:2)-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]",
164 |   "het_thio_6_furan(4)": "[#6](-[#1])(-[#1])-[#16;X2]-c:1:n:n:c(:c(:n:1)-c:2:c(:c(:c(:o:2)-[#1])-[#1])-[#1])-c:3:c(:c(:c(:o:3)-[#1])-[#1])-[#1]",
165 |   "anil_di_alk_ene_B(4)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6]-2=[#6]-c:1:c(:c:c:c:c:1)-[#6]-2(-[#1])-[#1]",
166 |   "imine_one_B(4)": "[#7](-[#1])(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#6](=[#8])-[#6](-[#1])-[#1])-[#7](-[#1])-[$([#7]-[#1]),$([#6]:[#6])]",
167 |   "anil_OC_alk_A(4)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1]):o:c:3:c(-[#1]):c(:c(-[#8]-[#6](-[#1])-[#1]):c(:c:2:3)-[#1])-[#7](-[#1])-[#6](-[#1])-[#1]",
168 |   "ene_five_het_J(4)": "[#16]=[#6]-1-[#7](-[#1])-[#6]=[#6]-[#6]-2=[#6]-1-[#6](=[#8])-[#8]-[#6]-2=[#6]-[#1]",
169 |   "pyrrole_G(4)": "n2(-c:1:c(:c:c(:c(:c:1)-[#1])-[$([#7](-[#1])-[#1]),$([#6]:[#7])])-[#1])c(c(-[#1])c(c2-[#1])-[#1])-[#1]",
170 |   "ene_five_het_K(4)": "n1(-[#6])c(c(-[#1])c(c1-[#6](-[#1])=[#6]-2-[#6](=[#8])-[!#6&!#1]-[#6]=:[!#1]-2)-[#1])-[#1]",
171 |   "cyano_ene_amine_B(4)": "[#6]=[#6]-[#6](-[#6]#[#7])(-[#6]#[#7])-[#6](-[#6]#[#7])=[#6]-[#7](-[#1])-[#1]",
172 |   "thio_ester_B(4)": "[#6]:[#6]-[#6](=[#16;X1])-[#16;X2]-[#6](-[#1])-[$([#6](-[#1])-[#1]),$([#6]:[#6])]",
173 |   "ene_five_het_L(4)": "[#8]=[#6]-3-[#6](=!@[#6](-[#1])-c:1:c:n:c:c:1)-c:2:c:c:c:c:c:2-[#7]-3",
174 |   "hzone_thiophene_B(4)": "c:1(:c(:c(:c(:s:1)-[#1])-[#1])-[$([#1]),$([#6](-[#1])-[#1])])-[#6](-[#1])=[#7]-[#7](-[#1])-c:2:c:c:c:c:c:2",
175 |   "dhp_amino_CN_E(4)": "[#6](-[#1])(-[#1])-[#16;X2]-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](-[#6]#[#7])-[#6](=[#8])-[#7]-1",
176 |   "het_5_B(4)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#7](-[#1])-[#6]=[#8])-[#6](-[#1])(-[#1])-[#6]-2=[#8]",
177 |   "imine_imine_B(3)": "[#6]:[#6]-[#6](-[#1])=[#6](-[#1])-[#6](-[#1])=[#7]-[#7]=[#6]",
178 |   "thiazole_amine_B(3)": "c:1(:c:c:c(:c:c:1)-[#6](-[#1])-[#1])-c:2:c(:s:c(:n:2)-[#7](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#1]",
179 |   "imine_ene_one_A(3)": "[#6]-2(-[#6]=[#7]-c:1:c:c:c:c:c:1-[#7]-2)=[#6](-[#1])-[#6]=[#8]",
180 |   "diazox_A(3)": "[#8](-c:1:c:c:c:c:c:1)-c:3:c:c:2:n:o:n:c:2:c:c:3",
181 |   "ene_one_A(3)": "[!#1]:1:[!#1]:[!#1]:[!#1](:[!#1]:[!#1]:1)-[#6](-[#1])=[#6](-[#1])-[#6](-[#7]-c:2:c:c:c:3:c(:c:2):c:c:c(:n:3)-[#7](-[#6])-[#6])=[#8]",
182 |   "anil_OC_no_alk_C(3)": "[#7](-[#1])(-[#1])-c:1:c(:c:c:c:n:1)-[#8]-[#6](-[#1])(-[#1])-[#6]:[#6]",
183 |   "thiazol_SC_A(3)": "[#6]-[#16;X2]-c:1:n:c(:c:s:1)-[#1]",
184 |   "het_666_B(3)": "c:1:c-3:c(:c:c:c:1)-[#7](-c:2:c:c:c:c:c:2-[#8]-3)-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]",
185 |   "furan_A(3)": "c:1(:c(:c(:c(:o:1)-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#8]-[#1])-[#6]#[#6]-[#6;X4]",
186 |   "colchicine_A(3)": "[#6]-1(-[#6](=[#6]-[#6]=[#6]-[#6]=[#6]-1)-[#7]-[#1])=[#7]-[#6]",
187 |   "thiophene_C(3)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])=[#6]-[#6](=[#8])-c:1:c(-[#16;X2]):s:c(:c:1)-[$([#6]#[#7]),$([#6]=[#8])]",
188 |   "anil_OC_alk_B(3)": "c:1:3:c(:c:c:c:c:1)-[#7]-2-[#6](=[#8])-[#6](=[#6](-[F,Cl,Br,I])-[#6]-2=[#8])-[#7](-[#1])-[#6]:[#6]:[#6]:[#6](-[#8]-[#6](-[#1])-[#1]):[#6]:[#6]:3",
189 |   "het_thio_66_A(3)": "c:1-2:c(:c:c:c:c:1)-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]=[#6]-2-[#16;X2]-[#6](-[#1])(-[#1])-[#6](=[#8])-c:3:c:c:c:c:c:3",
190 |   "rhod_sat_B(3)": "[#7]-2(-c:1:c:c:c:c:c:1-[#6](-[#1])-[#1])-[#6](=[#16])-[#7](-[#6](-[#1])(-[#1])-[!#1]:[!#1]:[!#1]:[!#1]:[!#1])-[#6](-[#1])(-[#1])-[#6]-2=[#8]",
191 |   "ene_rhod_I(3)": "[#7]-2(-[#6](-[#1])-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](=[#6](-[#1])-c:1:c:c:c:c(:c:1)-[Br])-[#6]-2=[#8]",
192 |   "keto_thiophene(3)": "c:1(:c(:c:2:c(:s:1):c:c:c:c:2)-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]",
193 |   "imine_imine_C(3)": "[#7](-[#6](-[#1])-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])=[#7]-[#6](-[#6](-[#1])-[#1])=[#7]-[#7](-[#6](-[#1])-[#1])-[#6]:[#6]",
194 |   "het_65_pyridone_A(3)": "[#6]:2(:[#6](-[#6](-[#1])-[#1]):[#6]-1:[#6](-[#7]=[#6](-[#7](-[#6]-1=[!#6&!#1;X1])-[#6](-[#1])-[$([#6](=[#8])-[#8]),$([#6]:[#6])])-[$([#1]),$([#16]-[#6](-[#1])-[#1])]):[!#6&!#1;X2]:2)-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1]",
195 |   "thiazole_amine_C(3)": "c:1(:n:c(:c(-[#1]):s:1)-[!#1]:[!#1]:[!#1](-[$([#8]-[#6](-[#1])-[#1]),$([#6](-[#1])-[#1])]):[!#1]:[!#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c(-[#1]):c(:c(-[#1]):o:2)-[#1]",
196 |   "het_thio_pyr_A(3)": "n:1:c(:c(:c(:c(:c:1-[#16]-[#6]-[#1])-[#6]#[#7])-c:2:c:c:c(:c:c:2)-[#8]-[#6](-[#1])-[#1])-[#1])-[#6]:[#6]",
197 |   "melamine_A(3)": "c:1:4:c(:n:c(:n:c:1-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c(:c(:c(:o:2)-[#1])-[#1])-[#1])-[#7](-[#1])-c:3:c:c(:c(:c:c:3-[$([#1]),$([#6](-[#1])-[#1]),$([#16;X2]),$([#8]-[#6]-[#1]),$([#7;X3])])-[$([#1]),$([#6](-[#1])-[#1]),$([#16;X2]),$([#8]-[#6]-[#1]),$([#7;X3])])-[$([#1]),$([#6](-[#1])-[#1]),$([#16;X2]),$([#8]-[#6]-[#1]),$([#7;X3])]):c:c:c:c:4",
198 |   "anil_NH_alk_B(3)": "[#7](-[#1])(-[#6]:1:[#6]:[#6]:[!#1]:[#6]:[#6]:1)-c:2:c:c:c(:c:c:2)-[#7](-[#1])-[#6]-[#1]",
199 |   "rhod_sat_C(3)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#7]-[#6]=[#8])-[#16]-[#6](-[#1])(-[#1])-[#6]-2=[#8]",
200 |   "thiophene_amino_D(3)": "[#6]=[#6]-[#6](=[#8])-[#7]-c:1:c(:c(:c(:s:1)-[#6](=[#8])-[#8])-[#6]-[#1])-[#6]#[#7]",
201 |   "anil_OC_alk_C(3)": "[$([#1]),$([#6](-[#1])-[#1])]-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:n:c:c:n:2",
202 |   "het_thio_65_A(3)": "[#6](-[#1])(-[#1])-[#16;X2]-c3nc1c(n(nc1-[#6](-[#1])-[#1])-c:2:c:c:c:c:c:2)nn3",
203 |   "het_thio_656b(3)": "[#6]-[#6](=[#8])-[#6](-[#1])(-[#1])-[#16;X2]-c:3:n:n:c:2:c:1:c(:c(:c(:c(:c:1:n(:c:2:n:3)-[#1])-[#1])-[#1])-[#1])-[#1]",
204 |   "thiazole_amine_D(3)": "s:1:c(:[n+](-[#6](-[#1])-[#1]):c(:c:1-[#1])-[#6])-[#7](-[#1])-c:2:c:c:c:c:c:2[$([#6](-[#1])-[#1]),$([#6]:[#6])]",
205 |   "thio_urea_H(3)": "[#6]-2(=[#16])-[#7](-[#6](-[#1])(-[#1])-c:1:c:c:c:o:1)-[#6](=[#7]-[#7]-2-[#1])-[#6]:[#6]",
206 |   "cyano_pyridone_F(3)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#6](=[#6]-[#6](=[#7]-2)-[#6]#[#7])-[#6]#[#7]",
207 |   "rhod_sat_D(3)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#16]-[#6](-[#1])(-[#6](-[#1])(-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6])-[#6]-2=[#8]",
208 |   "ene_rhod_J(3)": "[#6](-[#1])(-[#1])-[#7]-2-[#6](=[$([#16]),$([#7])])-[!#6&!#1]-[#6](=[#6]-1-[#6](=[#6](-[#1])-[#6]:[#6]-[#7]-1-[#6](-[#1])-[#1])-[#1])-[#6]-2=[#8]",
209 |   "imine_phenol_A(3)": "[#6]=[#7;!R]-c:1:c:c:c:c:c:1-[#8]-[#1]",
210 |   "thio_carbonate_B(3)": "[#8]=[#6]-2-[#16]-c:1:c(:c(:c:c:c:1)-[#8]-[#6](-[#1])-[#1])-[#8]-2",
211 |   "het_thio_N_5A(3)": "[#7]=[#6]-1-[#7]=[#6]-[#7]-[#16]-1",
212 |   "het_thio_N_65A(3)": "[#7]-2-[#16]-[#6]-1=[#6](-[#6]:[#6]-[#7]-[#6]-1)-[#6]-2=[#16]",
213 |   "anil_di_alk_J(3)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])=[#7]-[#7]=[#6](-[#6])-[#6]:[#6])-[#1])-[#1]",
214 |   "pyrrole_H(3)": "n1-2cccc1-[#6]=[#7](-[#6])-[#6]-[#6]-2",
215 |   "ene_cyano_D(3)": "[#6](-[#6]#[#7])(-[#6]#[#7])=[#6](-[#16])-[#16]",
216 |   "cyano_cyano_B(3)": "[#6]-1(-[#6]#[#7])(-[#6]#[#7])-[#6](-[#1])(-[#6](=[#8])-[#6])-[#6]-1-[#1]",
217 |   "ene_five_het_M(3)": "[#6]-1=:[#6]-[#6](-[#6](-[$([#8]),$([#16])]-1)=[#6]-[#6]=[#8])=[#8]",
218 |   "cyano_ene_amine_C(3)": "[#6]:[#6]-[#6](=[#8])-[#7](-[#1])-[#6](=[#8])-[#6](-[#6]#[#7])=[#6](-[#1])-[#7](-[#1])-[#6]:[#6]",
219 |   "thio_urea_I(3)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#7]=[#6]-c:2:c:n:c:c:2",
220 |   "dhp_amino_CN_F(3)": "[#7](-[#1])(-[#1])-[#6]-2=[#6](-[#6]#[#7])-[#6](-[#1])(-c:1:c:c:c:s:1)-[#6](=[#6](-[#6](-[#1])-[#1])-[#8]-2)-[#6](=[#8])-[#8]-[#6]",
221 |   "anthranil_acid_B(3)": "c:1:c-3:c(:c:c(:c:1)-[#6](=[#8])-[#7](-[#1])-c:2:c(:c:c:c:c:2)-[#6](=[#8])-[#8]-[#1])-[#6](-[#7](-[#6]-3=[#8])-[#6](-[#1])-[#1])=[#8]",
222 |   "diazox_B(3)": "[Cl]-c:2:c:c:1:n:o:n:c:1:c:c:2",
223 |   "thio_aldehyd_A(3)": "[#6]-[#6](=[#16])-[#1]",
224 |   "thio_amide_B(2)": "[#6;X4]-[#7](-[#1])-[#6](-[#6]:[#6])=[#6](-[#1])-[#6](=[#16])-[#7](-[#1])-c:1:c:c:c:c:c:1",
225 |   "imidazole_B(2)": "[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#16]-[#6](-[#1])(-[#1])-c1cn(cn1)-[#1]",
226 |   "thiazole_amine_E(2)": "[#8]=[#6]-[#7](-[#1])-c:1:c(-[#6]:[#6]):n:c(-[#6](-[#1])(-[#1])-[#6]#[#7]):s:1",
227 |   "thiazole_amine_F(2)": "[#6](-[#1])-[#7](-[#1])-c:1:n:c(:c:s:1)-c2cnc3n2ccs3",
228 |   "thio_ester_C(2)": "[#7]-1-[#6](=[#8])-[#6](=[#6](-[#6])-[#16]-[#6]-1=[#16])-[#1]",
229 |   "ene_one_B(2)": "[#6](-[#16])(-[#7])=[#6](-[#1])-[#6]=[#6](-[#1])-[#6]=[#8]",
230 |   "quinone_C(2)": "[#8]=[#6]-3-c:1:c(:c:c:c:c:1)-[#6]-2=[#6](-[#8]-[#1])-[#6](=[#8])-[#7]-c:4:c-2:c-3:c:c:c:4",
231 |   "keto_naphthol_A(2)": "c:1:2:c:c:c:c(:c:1:c(:c:c:c:2)-[$([#8]-[#1]),$([#7](-[#1])-[#1])])-[#6](-[#6])=[#8]",
232 |   "thio_amide_C(2)": "[#6](-[#1])(-c:1:c:c:c:c:c:1)(-c:2:c:c:c:c:c:2)-[#6](=[#16])-[#7]-[#1]",
233 |   "phthalimide_misc(2)": "[#7]-2(-[#6](=[#8])-c:1:c(:c(:c(:c(:c:1-[#1])-[#6](=[#8])-[#8]-[#1])-[#1])-[#1])-[#6]-2=[#8])-c:3:c(:c:c(:c(:c:3)-[#1])-[#8])-[#1]",
234 |   "sulfonamide_D(2)": "c:1:c:c(:c:c:c:1-[#7](-[#1])-[#16](=[#8])=[#8])-[#7](-[#1])-[#16](=[#8])=[#8]",
235 |   "anil_NH_alk_C(2)": "[#6](-[#1])-[#7](-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6]-[#1]",
236 |   "het_65_E(2)": "s1c(c(c-2c1-[#7](-[#1])-[#6](-[#6](=[#6]-2-[#1])-[#6](=[#8])-[#8]-[#1])=[#8])-[#7](-[#1])-[#1])-[#6](=[#8])-[#7]-[#1]",
237 |   "hzide_naphth(2)": "c:2(:c:1:c(:c(:c(:c(:c:1:c(:c(:c:2-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#7](-[#1])-[#6]=[#8])-[#1])-[#1])-[#1]",
238 |   "anisol_B(2)": "[#6](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6;X4])-[#1]",
239 |   "thio_carbam_ene(2)": "[#6]-1=[#6]-[#7]-[#6](-[#16]-[#6;X4]-1)=[#16]",
240 |   "thio_amide_D(2)": "[#6](-[#7](-[#6]-[#1])-[#6]-[#1]):[#6]-[#7](-[#1])-[#6](=[#16])-[#6]-[#1]",
241 |   "het_65_Da(2)": "n2nc(c1cccc1c2-[#6])-[#6]",
242 |   "thiophene_D(2)": "s:1:c(:c(-[#1]):c(:c:1-[#6](=[#8])-[#7](-[#1])-[#7]-[#1])-[#8]-[#6](-[#1])-[#1])-[#1]",
243 |   "het_thio_6_ene(2)": "[#6]-1:[#6]-[#7]=[#6]-[#6](=[#6]-[#7]-[#6])-[#16]-1",
244 |   "cyano_keto_A(2)": "[#6](-[#1])(-[#1])-[#6](-[#1])(-[#6]#[#7])-[#6](=[#8])-[#6]",
245 |   "anthranil_acid_C(2)": "c2(c(-[#7](-[#1])-[#1])n(-c:1:c:c:c:c:c:1-[#6](=[#8])-[#8]-[#1])nc2-[#6]=[#8])-[$([#6]#[#7]),$([#6]=[#16])]",
246 |   "naphth_amino_C(2)": "c:2:c:1:c:c:c:c-3:c:1:c(:c:c:2)-[#7](-[#7]=[#6]-3)-[#1]",
247 |   "naphth_amino_D(2)": "c:2:c:1:c:c:c:c-3:c:1:c(:c:c:2)-[#7]-[#7]=[#7]-3",
248 |   "thiazole_amine_G(2)": "c1csc(n1)-[#7]-[#7]-[#16](=[#8])=[#8]",
249 |   "het_66_B(2)": "c:1:c:c:c:2:c(:c:1):n:c(:n:c:2)-[#7](-[#1])-[#6]-3=[#7]-[#6](-[#6]=[#6]-[#7]-3-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]",
250 |   "coumarin_A(2)": "c:1-3:c(:c(:c(:c(:c:1)-[#8]-[#6]-[#1])-[#1])-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#8])-[#8]-3",
251 |   "anthranil_acid_D(2)": "c:12:c(:c:c:c:n:1)c(c(-[#6](=[#8])~[#8;X1])s2)-[#7](-[#1])-[#1]",
252 |   "het_66_C(2)": "c:1:2:n:c(:c(:n:c:1:[#6]:[#6]:[#6]:[!#1]:2)-[#6](-[#1])=[#6](-[#8]-[#1])-[#6])-[#6](-[#1])=[#6](-[#8]-[#1])-[#6]",
253 |   "thiophene_amino_E(2)": "c1csc(c1-[#7](-[#1])-[#1])-[#6](-[#1])=[#6](-[#1])-c2cccs2",
254 |   "het_6666_A(2)": "c:2:c:c:1:n:c:3:c(:n:c:1:c:c:2):c:c:c:4:c:3:c:c:c:c:4",
255 |   "sulfonamide_E(2)": "[#6]:[#6]-[#7](-[#1])-[#16](=[#8])(=[#8])-[#7](-[#1])-[#6]:[#6]",
256 |   "anil_di_alk_K(2)": "c:1:c:c(:c:c:c:1-[#7](-[#1])-[#1])-[#7](-[#6;X3])-[#6;X3]",
257 |   "het_5_C(2)": "[#7]-2=[#6](-c:1:c:c:c:c:c:1)-[#6](-[#1])(-[#1])-[#6](-[#8]-[#1])(-[#6](-[#9])(-[#9])-[#9])-[#7]-2-[$([#6]:[#6]:[#6]:[#6]:[#6]:[#6]),$([#6](=[#16])-[#6]:[#6]:[#6]:[#6]:[#6]:[#6])]",
258 |   "ene_six_het_B(2)": "c:1:c(:c:c:c:c:1)-[#6](=[#8])-[#6](-[#1])=[#6]-3-[#6](=[#8])-[#7](-[#1])-[#6](=[#8])-[#6](=[#6](-[#1])-c:2:c:c:c:c:c:2)-[#7]-3-[#1]",
259 |   "steroid_A(2)": "[#8]=[#6]-4-[#6]-[#6]-[#6]-3-[#6]-2-[#6](=[#8])-[#6]-[#6]-1-[#6]-[#6]-[#6]-[#6]-1-[#6]-2-[#6]-[#6]-[#6]-3=[#6]-4",
260 |   "het_565_A(2)": "c:1:2:c:3:c(:c(-[#8]-[#1]):c(:c:1:c(:c:n:2-[#6])-[#6]=[#8])-[#1]):n:c:n:3",
261 |   "thio_imine_ium(2)": "[#6;X4]-[#7+](-[#6;X4]-[#8]-[#1])=[#6]-[#16]-[#6]-[#1]",
262 |   "anthranil_acid_E(2)": "[#6]-3(=[#8])-[#6](=[#6](-[#1])-[#7](-[#1])-c:1:c:c:c:c:c:1-[#6](=[#8])-[#8]-[#1])-[#7]=[#6](-c:2:c:c:c:c:c:2)-[#8]-3",
263 |   "hzone_furan_B(2)": "c:1(:c(:c(:c(:o:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](-[$([#1]),$([#6](-[#1])-[#1])])=[#7]-[#7](-[#1])-c:2:c:c:n:c:c:2",
264 |   "thiophene_E(2)": "c:1(:c(:c(:c(:s:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](-[$([#1]),$([#6](-[#1])-[#1])])-[#6](=[#8])-[#7](-[#1])-c:2:n:c:c:s:2",
265 |   "ene_misc_B(2)": "[#6]:[#6]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#6]=[#8])-[#7]-2-[#6](=[#8])-[#6]-1(-[#1])-[#6](-[#1])(-[#1])-[#6]=[#6]-[#6](-[#1])(-[#1])-[#6]-1(-[#1])-[#6]-2=[#8]",
266 |   "het_thio_5_B(2)": "[#6]-1(-[#6]=[#8])(-[#6]:[#6])-[#16;X2]-[#6]=[#7]-[#7]-1-[#1]",
267 |   "thiophene_amino_F(2)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-c:2:c:c:c:c:c:2)-[#6]#[#7])-[#6]:3:[!#1]:[!#1]:[!#1]:[!#1]:[!#1]:3",
268 |   "anil_OC_alk_D(2)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c:c:c:2-[$([#6](-[#1])-[#1]),$([#8]-[#6](-[#1])-[#1])]",
269 |   "tert_butyl_A(2)": "[#6](-[#1])(-[#1])(-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-c:1:c(:c:c(:c(:c:1-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-[#6](-[#1])(-[#1])-[#1])-[#8]-[#6](-[#1])-[#7])-[#1]",
270 |   "thio_urea_J(2)": "c:1(:c(:o:c:c:1)-[#6]-[#1])-[#6]=[#7]-[#7](-[#1])-[#6](=[#16])-[#7]-[#1]",
271 |   "het_thio_65_B(2)": "[#7](-[#1])-c1nc(nc2nnc(n12)-[#16]-[#6])-[#7](-[#1])-[#6]",
272 |   "coumarin_B(2)": "c:1-2:c(:c:c:c:c:1-[#6](-[#1])(-[#1])-[#6](-[#1])=[#6](-[#1])-[#1])-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-[#6]:[#6])-[#6](=[#8])-[#8]-2)-[#1]",
273 |   "thio_urea_K(2)": "[#6]-2(=[#16])-[#7]-1-[#6]:[#6]-[#7]=[#7]-[#6]-1=[#7]-[#7]-2-[#1]",
274 |   "thiophene_amino_G(2)": "[#6]:[#6]:[#6]:[#6]:[#6]:[#6]-c:1:c:c(:c(:s:1)-[#7](-[#1])-[#6](=[#8])-[#6])-[#6](=[#8])-[#8]-[#1]",
275 |   "anil_NH_alk_D(2)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c:c:1-[#7](-[#1])-[#6](-[#1])(-[#6])-[#6](-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]",
276 |   "het_thio_5_C(2)": "[#16]=[#6]-2-[#7](-[#1])-[#7]=[#6](-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#8]-2",
277 |   "thio_keto_het(2)": "[#16]=[#6]-c:1:c:c:c:2:c:c:c:c:n:1:2",
278 |   "het_thio_N_5B(2)": "[#6]~1~[#6](~[#7]~[#7]~[#6](~[#6](-[#1])-[#1])~[#6](-[#1])-[#1])~[#7]~[#16]~[#6]~1",
279 |   "quinone_D(2)": "[#6]-1(-[#6]=:[#6]-[#6]=:[#6]-[#6]-1=[!#6&!#1])=[!#6&!#1]",
280 |   "anil_di_alk_furan_B(2)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:1:c(-[#1]):c(:c(:o:1)-[#6](-[#1])=[#6]-[#6]#[#7])-[#1]",
281 |   "ene_six_het_C(2)": "[#8]=[#6]-1-[#6]:[#6]-[#6](-[#1])(-[#1])-[#7]-[#6]-1=[#6]-[#1]",
282 |   "het_55_A(2)": "[#6]:[#6]-[#7]:2:[#7]:[#6]:1-[#6](-[#1])(-[#1])-[#16;X2]-[#6](-[#1])(-[#1])-[#6]:1-[#6]:2-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])=[#6]-[#1]",
283 |   "het_thio_65_C(2)": "n:1:c(:n(:c:2:c:1:c:c:c:c:2)-[#6](-[#1])-[#1])-[#16]-[#6](-[#1])(-[#1])-[#6](=[#8])-[#7](-[#1])-[#7]=[#6](-[#1])-[#6](-[#1])=[#6]-[#1]",
284 |   "hydroquin_A(2)": "c:1(:c:c(:c(:c:c:1)-[#8]-[#1])-[#6](=!@[#6]-[#7])-[#6]=[#8])-[#8]-[#1]",
285 |   "anthranil_acid_F(2)": "c:1(:c:c(:c(:c:c:1)-[#7](-[#1])-[#6](=[#8])-[#6]:[#6])-[#6](=[#8])-[#8]-[#1])-[#8]-[#1]",
286 |   "pyrrole_I(2)": "n2(-[#6](-[#1])-[#1])c-1c(-[#6]:[#6]-[#6]-1=[#8])cc2-[#6](-[#1])-[#1]",
287 |   "thiophene_amino_H(2)": "[#6](-[#1])-[#7](-[#1])-c:1:c(:c(:c(:s:1)-[#6]-[#1])-[#6]-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6]",
288 |   "imine_one_fives_C(2)": "[#6]:[#6]-[#7;!R]=[#6]-2-[#6](=[!#6&!#1])-c:1:c:c:c:c:c:1-[#7]-2",
289 |   "keto_phenone_zone_A(2)": "c:1:c:c:c:c:c:1-[#6](=[#8])-[#7](-[#1])-[#7]=[#6]-3-c:2:c:c:c:c:c:2-c:4:c:c:c:c:c-3:4",
290 |   "dyes7A(2)": "c:1:c(:c:c:c:c:1)-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])=[#6](-[#1])-[#6]=!@[#6](-[#1])-[#6](-[#1])=[#6]-[#6]=@[#7]-c:2:c:c:c:c:c:2",
291 |   "het_pyridiniums_B(2)": "[#6]:1:2:[!#1]:[#7+](:[!#1]:[#6](:[!#1]:1:[#6]:[#6]:[#6]:[#6]:2)-[*])~[#6]:[#6]",
292 |   "het_5_D(2)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#7]=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#16]-[#6])-[#6]-2=[#8]",
293 |   "thiazole_amine_H(1)": "c:1:c:c:c(:c:c:1-[#7](-[#1])-c2nc(c(-[#1])s2)-c:3:c:c:c(:c:c:3)-[#6](-[#1])(-[#6]-[#1])-[#6]-[#1])-[#6](=[#8])-[#8]-[#1]",
294 |   "thiazole_amine_I(1)": "[#6](-[#1])(-[#1])-[#7](-[#1])-[#6]=[#7]-[#7](-[#1])-c1nc(c(-[#1])s1)-[#6]:[#6]",
295 |   "het_thio_N_5C(1)": "[#6]:[#6]-[#7](-[#1])-[#6](=[#8])-c1c(snn1)-[#7](-[#1])-[#6]:[#6]",
296 |   "sulfonamide_F(1)": "[#8]=[#16](=[#8])(-[#6]:[#6])-[#7](-[#1])-c1nc(cs1)-[#6]:[#6]",
297 |   "thiazole_amine_J(1)": "[#8]=[#16](=[#8])(-[#6]:[#6])-[#7](-[#1])-[#7](-[#1])-c1nc(cs1)-[#6]:[#6]",
298 |   "het_65_F(1)": "s2c:1:n:c:n:c(:c:1c(c2-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#7]-[#7]=[#6]-c3ccco3",
299 |   "keto_keto_beta_E(1)": "[#6](=[#8])-[#6](-[#1])=[#6](-[#8]-[#1])-[#6](-[#8]-[#1])=[#6](-[#1])-[#6](=[#8])-[#6]",
300 |   "ene_five_one_B(1)": "c:2(:c:1-[#6](-[#6](-[#6](-c:1:c(:c(:c:2-[#1])-[#1])-[#1])(-[#1])-[#1])=[#8])=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1]",
301 |   "keto_keto_beta_zone(1)": "[#6]:[#6]-[#7](-[#1])-[#7]=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#6](-[#1])-[#1])=[#7]-[#7](-[#1])-[#6]:[#6]",
302 |   "thio_urea_L(1)": "[#6;X4]-[#16;X2]-[#6](=[#7]-[!#1]:[!#1]:[!#1]:[!#1])-[#7](-[#1])-[#7]=[#6]",
303 |   "het_thio_urea_ene(1)": "[#6]-1(=[#7]-[#7](-[#6](-[#16]-1)=[#6](-[#1])-[#6]:[#6])-[#6]:[#6])-[#6]=[#8]",
304 |   "cyano_amino_het_A(1)": "c:1(:c(:c:2:c(:n:c:1-[#7](-[#1])-[#1]):c:c:c(:c:2-[#7](-[#1])-[#1])-[#6]#[#7])-[#6]#[#7])-[#6]#[#7]",
305 |   "tetrazole_hzide(1)": "[!#1]:1:[!#1]:[!#1]:[!#1](:[!#1]:[!#1]:1)-[#6](-[#1])=[#6](-[#1])-[#6](-[#7](-[#1])-[#7](-[#1])-c2nnnn2-[#6])=[#8]",
306 |   "imine_naphthol_A(1)": "c:1:2:c(:c(:c(:c(:c:1:c(:c(:c(:c:2-[#1])-[#1])-[#6](=[#7]-[#6]:[#6])-[#6](-[#1])-[#1])-[#8]-[#1])-[#1])-[#1])-[#1])-[#1]",
307 |   "misc_anisole_A(1)": "c:1(:c(:c:2:c(:c(:c:1-[#8]-[#6](-[#1])-[#1])-[#1]):c(:c(:c(:c:2-[#7](-[#1])-[#6](-[#1])(-[#1])-[#1])-[#1])-c:3:c(:c(:c(:c(:c:3-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#1])-[#8]-[#6](-[#1])-[#1]",
308 |   "het_thio_665(1)": "c:1:c:c-2:c(:c:c:1)-[#16]-c3c(-[#7]-2)cc(s3)-[#6](-[#1])-[#1]",
309 |   "anil_di_alk_L(1)": "c:1:c:c:c-2:c(:c:1)-[#6](-[#6](-[#7]-2-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]-4-[#6](-c:3:c:c:c:c:c:3-[#6]-4=[#8])=[#8])(-[#1])-[#1])(-[#1])-[#1]",
310 |   "colchicine_B(1)": "c:1(:c:c:c(:c:c:1)-[#6]-3=[#6]-[#6](-c2cocc2-[#6](=[#6]-3)-[#8]-[#1])=[#8])-[#16]-[#6](-[#1])-[#1]",
311 |   "misc_aminoacid_A(1)": "[#6;X4]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6](-[#1])(-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#16]-[#6](-[#1])(-[#1])-[#1])-[#6](=[#8])-[#8]-[#1])-[#1])-[#1]",
312 |   "imidazole_amino_A(1)": "n:1:c(:n(:c(:c:1-c:2:c:c:c:c:c:2)-c:3:c:c:c:c:c:3)-[#7]=!@[#6])-[#7](-[#1])-[#1]",
313 |   "phenol_sulfite_A(1)": "[#6](-c:1:c:c:c(:c:c:1)-[#8]-[#1])(-c:2:c:c:c(:c:c:2)-[#8]-[#1])-[#8]-[#16](=[#8])=[#8]",
314 |   "het_66_D(1)": "c:2:c:c:1:n:c(:c(:n:c:1:c:c:2)-[#6](-[#1])(-[#1])-[#6](=[#8])-[#6]:[#6])-[#6](-[#1])(-[#1])-[#6](=[#8])-[#6]:[#6]",
315 |   "misc_anisole_B(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c(-[#6](-[#1])-[#1])c:c:2",
316 |   "tetrazole_A(1)": "[#6](-[#1])(-[#1])-c1nnnn1-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#1])-[#1]",
317 |   "het_65_G(1)": "[#6]-2(=[#7]-c1c(c(nn1-[#6](-[#6]-2(-[#1])-[#1])=[#8])-[#7](-[#1])-[#1])-[#7](-[#1])-[#1])-[#6]",
318 |   "misc_trityl_A(1)": "[#6](-[#6]:[#6])(-[#6]:[#6])(-[#6]:[#6])-[#16]-[#6]:[#6]-[#6](=[#8])-[#8]-[#1]",
319 |   "misc_pyridine_OC(1)": "[#8]=[#6](-c:1:c(:c(:n:c(:c:1-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]",
320 |   "het_6_hydropyridone(1)": "[#7]-1=[#6](-[#7](-[#6](-[#6](-[#6]-1(-[#1])-[#6]:[#6])(-[#1])-[#1])=[#8])-[#1])-[#7]-[#1]",
321 |   "misc_stilbene(1)": "[#6]-1(=[#6](-[#6](-[#6](-[#6](-[#6]-1(-[#1])-[#1])(-[#1])-[#6](=[#8])-[#6])(-[#1])-[#6](=[#8])-[#8]-[#1])(-[#1])-[#1])-[#6]:[#6])-[#6]:[#6]",
322 |   "misc_imidazole(1)": "[#6](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[Cl])-[#1])-[#1])(-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[Cl])-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-c3nc(c(n3-[#6](-[#1])(-[#1])-[#1])-[#1])-[#1]",
323 |   "anil_NH_no_alk_A(1)": "n:1:c(:c(:c(:c(:c:1-[#1])-[#7](-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6]:[#6]",
324 |   "het_6_imidate_B(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#8]-[#1])-[#6]-2=[#6](-[#8]-[#6](-[#7]=[#7]-2)=[#7])-[#7](-[#1])-[#1]",
325 |   "anil_alk_B(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1]",
326 |   "styrene_anil_A(1)": "c:1:c:c-3:c(:c:c:1)-c:2:c:c:c(:c:c:2-[#6]-3=[#6](-[#1])-[#6])-[#7](-[#1])-[#1]",
327 |   "misc_aminal_acid(1)": "c:1:c:c-2:c(:c:c:1)-[#7](-[#6](-[#8]-[#6]-2)(-[#6](=[#8])-[#8]-[#1])-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](-[#1])-[#1]",
328 |   "anil_no_alk_D(1)": "n:1:c(:c(:c(:c(:c:1-[#7](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#1])-[#1]",
329 |   "anil_alk_C(1)": "[#7](-[#1])(-c:1:c:c:c:c:c:1)-[#6](-[#6])(-[#6])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1]",
330 |   "misc_anisole_C(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#1])-[#8]-[#6]-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])(-[#1])-[#1])-[#6]:[#6]",
331 |   "het_465_misc(1)": "c:1-2:c:c-3:c(:c:c:1-[#8]-[#6]-[#8]-2)-[#6]-[#6]-3",
332 |   "anthranil_acid_G(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#8])-[#8]-[#1])-[#7](-[#1])-[#6]:[#6]",
333 |   "anil_di_alk_M(1)": "c:1(:c:4:c(:n:c(:c:1-[#6](-[#1])(-[#1])-[#7]-3-c:2:c(:c(:c(:c(:c:2-[#6](-[#1])(-[#1])-[#6]-3(-[#1])-[#1])-[#1])-[#1])-[#1])-[#1])-[#1]):c(:c(:c(:c:4-[#1])-[#1])-[#1])-[#1])-[#1]",
334 |   "anthranil_acid_H(1)": "c:1:c(:c2:c(:c:c:1)c(c(n2-[#1])-[#6]:[#6])-[#6]:[#6])-[#6](=[#8])-[#8]-[#1]",
335 |   "thio_urea_M(1)": "[#6]:[#6]-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-c:1:c(:c(:c(:c(:c:1-[F,Cl,Br,I])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1]",
336 |   "thiazole_amine_K(1)": "n:1:c3:c(:c:c2:c:1nc(s2)-[#7])sc(n3)-[#7]",
337 |   "het_thio_5_imine_A(1)": "[#7]=[#6]-1-[#16]-[#6](=[#7])-[#7]=[#6]-1",
338 |   "thio_amide_E(1)": "c:1:c(:n:c:c:c:1)-[#6](=[#16])-[#7](-[#1])-c:2:c(:c:c:c:c:2)-[#8]-[#6](-[#1])-[#1]",
339 |   "het_thio_676_B(1)": "c:1-2:c(:c(:c(:c(:c:1-[#6](-c:3:c(-[#16]-[#6]-2(-[#1])-[#1]):c(:c(-[#1]):c(:c:3-[#1])-[#1])-[#1])-[#8]-[#6]:[#6])-[#1])-[#1])-[#1])-[#1]",
340 |   "sulfonamide_G(1)": "[#6](-[#1])(-[#1])(-[#1])-c:1:c(:c(:c(:c(:n:1)-[#7](-[#1])-[#16](-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#8]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])(=[#8])=[#8])-[#1])-[#1])-[#1]",
341 |   "thio_thiomorph_Z(1)": "[#6](=[#8])(-[#7]-1-[#6]-[#6]-[#16]-[#6]-[#6]-1)-c:2:c(:c(:c(:c(:c:2-[#16]-[#6](-[#1])-[#1])-[#1])-[#1])-[#1])-[#1]",
342 |   "naphth_ene_one_A(1)": "c:1:c:c:3:c:2:c(:c:1)-[#6](-[#6]=[#6](-c:2:c:c:c:3)-[#8]-[#6](-[#1])-[#1])=[#8]",
343 |   "naphth_ene_one_B(1)": "c:1-3:c:2:c(:c(:c:c:1)-[#7]):c:c:c:c:2-[#6](-[#6]=[#6]-3-[#6](-[F])(-[F])-[F])=[#8]",
344 |   "amino_acridine_A(1)": "c:1:c:c:c:c:2:c:1:c:c:3:c(:n:2):n:c:4:c(:c:3-[#7]):c:c:c:c:4",
345 |   "keto_phenone_B(1)": "c:1:c-3:c(:c:c:c:1)-[#6]-2=[#7]-[!#1]=[#6]-[#6]-[#6]-2-[#6]-3=[#8]",
346 |   "hzone_acid_A(1)": "c:1-3:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#7]-[#7](-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#6](=[#8])-[#8]-[#1])-[#1])-[#1])-c:4:c-3:c(:c(:c(:c:4-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#1]",
347 |   "sulfonamide_H(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#1])-[#1])-[#1])-[#16](=[#8])(=[#8])-[#7](-[#1])-c:2:n:n:c(:c(:c:2-[#1])-[#1])-[#1]",
348 |   "het_565_indole(1)": "c2(c(-[#1])n(-[#6](-[#1])-[#1])c:3:c(:c(:c:1n(c(c(c:1:c2:3)-[#1])-[#1])-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1]",
349 |   "pyrrole_J(1)": "c1(c-2c(c(n1-[#6](-[#8])=[#8])-[#6](-[#1])-[#1])-[#16]-[#6](-[#1])(-[#1])-[#16]-2)-[#6](-[#1])-[#1]",
350 |   "pyrazole_amino_B(1)": "s1ccnc1-c2c(n(nc2-[#1])-[#1])-[#7](-[#1])-[#1]",
351 |   "pyrrole_K(1)": "c1(c(c(c(n1-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#6](=[#8])-[#8]-[#1]",
352 |   "anthranil_acid_I(1)": "c:1:2(:c(:c(:c(:o:1)-[#6])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6](-[#1]):[#6](-[#1]):[#6](-[#1]):[#6](-[#1]):[#6]:2-[#6](=[#8])-[#8]-[#1]",
353 |   "thio_amide_F(1)": "[!#1]:[#6]-[#6](=[#16])-[#7](-[#1])-[#7](-[#1])-[#6]:[!#1]",
354 |   "ene_one_C(1)": "[#6]-1(=[#8])-[#6](-[#6](-[#6]#[#7])=[#6](-[#1])-[#7])-[#6](-[#7])-[#6]=[#6]-1",
355 |   "het_65_H(1)": "c2(c-1n(-[#6](-[#6]=[#6]-[#7]-1)=[#8])nc2-c3cccn3)-[#6]#[#7]",
356 |   "cyano_imine_D(1)": "[#8]=[#6]-1-[#6](=[#7]-[#7]-[#6]-[#6]-1)-[#6]#[#7]",
357 |   "cyano_misc_A(1)": "c:2(:c:1:c:c:c:c:c:1:n:n:c:2)-[#6](-[#6]:[#6])-[#6]#[#7]",
358 |   "ene_misc_C(1)": "c:1:c:c-2:c(:c:c:1)-[#6]=[#6]-[#6](-[#7]-2-[#6](=[#8])-[#7](-[#1])-c:3:c:c(:c(:c:c:3)-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]",
359 |   "het_66_E(1)": "c:2:c:c:1:n:c(:c(:n:c:1:c:c:2)-c:3:c:c:c:c:c:3)-c:4:c:c:c:c:c:4-[#8]-[#1]",
360 |   "keto_keto_beta_F(1)": "[#6](-[#1])(-[#1])-[#6](-[#8]-[#1])=[#6](-[#6](=[#8])-[#6](-[#1])-[#1])-[#6](-[#1])-[#6]#[#6]",
361 |   "misc_naphthimidazole(1)": "c:1:c:4:c(:c:c2:c:1nc(n2-[#1])-[#6]-[#8]-[#6](=[#8])-c:3:c:c(:c:c(:c:3)-[#7](-[#1])-[#1])-[#7](-[#1])-[#1]):c:c:c:c:4",
362 |   "naphth_ene_one_C(1)": "c:2(:c:1:c:c:c:c-3:c:1:c(:c:c:2)-[#6]=[#6]-[#6]-3=[#7])-[#7]",
363 |   "keto_phenone_C(1)": "c:2(:c:1:c:c:c:c:c:1:c-3:c(:c:2)-[#6](-c:4:c:c:c:c:c-3:4)=[#8])-[#8]-[#1]",
364 |   "coumarin_C(1)": "[#6]-2(-[#6]=[#7]-c:1:c:c(:c:c:c:1-[#8]-2)-[Cl])=[#8]",
365 |   "thio_est_cyano_A(1)": "[#6]-1=[#6]-[#7](-[#6](-c:2:c-1:c:c:c:c:2)(-[#6]#[#7])-[#6](=[#16])-[#16])-[#6]=[#8]",
366 |   "het_65_imidazole(1)": "c2(nc:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])n2-[#6])-[#7](-[#1])-[#6](-[#7](-[#1])-c:3:c(:c:c:c:c:3-[#1])-[#1])=[#8]",
367 |   "anthranil_acid_J(1)": "[#7](-[#1])(-[#6]:[#6])-c:1:c(-[#6](=[#8])-[#8]-[#1]):c:c:c(:n:1)-[#6]:[#6]",
368 |   "colchicine_het(1)": "c:1-3:c(:c:c:c:c:1)-[#16]-[#6](=[#7]-[#7]=[#6]-2-[#6]=[#6]-[#6]=[#6]-[#6]=[#6]-2)-[#7]-3-[#6](-[#1])-[#1]",
369 |   "ene_misc_D(1)": "c:1-2:c(:c(:c(:c(:c:1-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](=[#6](-[#6])-[#16]-[#6]-2(-[#1])-[#1])-[#6]",
370 |   "indole_3yl_alk_B(1)": "c:12:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])c(c(-[#6]:[#6])n2-!@[#6]:[#6])-[#6](-[#1])-[#1]",
371 |   "anil_OH_no_alk_A(1)": "[#7](-[#1])(-[#1])-c:1:c:c:c(:c:c:1-[#8]-[#1])-[#16](=[#8])(=[#8])-[#8]-[#1]",
372 |   "thiazole_amine_L(1)": "s:1:c:c:c(:c:1-[#1])-c:2:c:s:c(:n:2)-[#7](-[#1])-[#1]",
373 |   "pyrazole_amino_A(1)": "c1c(-[#7](-[#1])-[#1])nnc1-c2c(-[#6](-[#1])-[#1])oc(c2-[#1])-[#1]",
374 |   "het_thio_N_5D(1)": "n1nscc1-c2nc(no2)-[#6]:[#6]",
375 |   "anil_alk_indane(1)": "c:1(:c:c-3:c(:c:c:1)-[#7]-[#6]-4-c:2:c:c:c:c:c:2-[#6]-[#6]-3-4)-[#6;X4]",
376 |   "anil_di_alk_N(1)": "c:1-2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#6](=[#6](-[#1])-[#6]-3-[#6](-[#6]#[#7])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#7]-2-3)-[#1]",
377 |   "het_666_C(1)": "c:2-3:c(:c:c:1:c:c:c:c:c:1:c:2)-[#7](-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](=[#7]-3)-[#6]:[#6]-[#7](-[#1])-[#6](-[#1])-[#1]",
378 |   "ene_one_D(1)": "[#6](-[#8]-[#1]):[#6]-[#6](=[#8])-[#6](-[#1])=[#6](-[#6])-[#6]",
379 |   "anil_di_alk_indol(1)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1]):c(:c(-[#1]):n:2-[#1])-[#16](=[#8])=[#8]",
380 |   "anil_no_alk_indol_A(1)": "c:1:2:c(:c(:c(:c(:c:1-[#1])-[#1])-[#7](-[#1])-[#1])-[#1]):c(:c(-[#1]):n:2-[#6](-[#1])-[#1])-[#1]",
381 |   "dhp_amino_CN_G(1)": "[#16;X2]-1-[#6]=[#6](-[#6]#[#7])-[#6](-[#6])(-[#6]=[#8])-[#6](=[#6]-1-[#7](-[#1])-[#1])-[$([#6]=[#8]),$([#6]#[#7])]",
382 |   "anil_di_alk_dhp(1)": "[#7]-2-[#6]=[#6](-[#6]=[#8])-[#6](-c:1:c:c:c(:c:c:1)-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#6]~3=[#6]-2~[#7]~[#6](~[#16])~[#7]~[#6]~3~[#7]",
383 |   "anthranil_amide_A(1)": "c:1:c(:c:c:c:c:1)-[#6](=[#8])-[#7](-[#1])-c:2:c(:c:c:c:c:2)-[#6](=[#8])-[#7](-[#1])-[#7](-[#1])-c:3:n:c:c:s:3",
384 |   "hzone_anthran_Z(1)": "c:1:c:2:c(:c:c:c:1):c(:c:3:c(:c:2):c:c:c:c:3)-[#6]=[#7]-[#7](-[#1])-c:4:c:c:c:c:c:4",
385 |   "ene_one_amide_A(1)": "c:1:c(:c:c:c:c:1)-[#6](-[#1])-[#7]-[#6](=[#8])-[#6](-[#7](-[#1])-[#6](-[#1])-[#1])=[#6](-[#1])-[#6](=[#8])-c:2:c:c:c(:c:c:2)-[#8]-[#6](-[#1])-[#1]",
386 |   "het_76_A(1)": "s:1:c(:c(-[#1]):c(:c:1-[#6]-3=[#7]-c:2:c:c:c:c:c:2-[#6](=[#7]-[#7]-3-[#1])-c:4:c:c:n:c:c:4)-[#1])-[#1]",
387 |   "thio_urea_N(1)": "o:1:c(:c(-[#1]):c(:c:1-[#6](-[#1])(-[#1])-[#7](-[#1])-[#6](=[#16])-[#7](-[#6]-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c:c:c:2)-[#1])-[#1]",
388 |   "anil_di_alk_coum(1)": "c:1:c(:c:c:c:c:1)-[#7](-[#6]-[#1])-[#6](-[#1])-[#6](-[#1])-[#6](-[#1])-[#7](-[#1])-[#6](=[#8])-[#6]-2=[#6](-[#8]-[#6](-[#6](=[#6]-2-[#6](-[#1])-[#1])-[#1])=[#8])-[#6](-[#1])-[#1]",
389 |   "ene_one_amide_B(1)": "c2-3:c:c:c:1:c:c:c:c:c:1:c2-[#6](-[#1])-[#6;X4]-[#7]-[#6]-3=[#6](-[#1])-[#6](=[#8])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]",
390 |   "het_thio_656c(1)": "c:1:c(:c:c:c:c:1)-[#6]-4=[#7]-[#7]:2:[#6](:[#7+]:c:3:c:2:c:c:c:c:3)-[#16]-[#6;X4]-4",
391 |   "het_5_ene(1)": "[#6]-2(=[#8])-[#6](=[#6](-[#6](-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#7]=[#6](-c:1:c:c:c:c:c:1)-[#8]-2",
392 |   "thio_imide_A(1)": "c:1:c(:c:c:c:c:1)-[#7]-2-[#6](=[#8])-[#6](=[#6](-[#1])-[#6]-2=[#8])-[#16]-c:3:c:c:c:c:c:3",
393 |   "dhp_amidine_A(1)": "[#7]-1(-[#1])-[#7]=[#6](-[#7]-[#1])-[#16]-[#6](=[#6]-1-[#6]:[#6])-[#6]:[#6]",
394 |   "thio_urea_O(1)": "c:1(:c(:c-3:c(:c(:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])-c:2:c(:c(:c(:o:2)-[#6]-[#1])-[#1])-[#1])-[#1])-[#8]-[#6](-[#8]-3)(-[#1])-[#1])-[#1])-[#1]",
395 |   "anil_di_alk_O(1)": "c:1(:c(:c(:c(:c(:c:1-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-c:2:c:c:c:c:c:2)-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#1]",
396 |   "thio_urea_P(1)": "[#8]=[#6]-!@n:1:c:c:c-2:c:1-[#7](-[#1])-[#6](=[#16])-[#7]-2-[#1]",
397 |   "het_pyraz_misc(1)": "[#6](-[F])(-[F])-[#6](=[#8])-[#7](-[#1])-c:1:c(-[#1]):n(-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#8]-[#6](-[#1])(-[#1])-[#6]:[#6]):n:c:1-[#1]",
398 |   "diazox_C(1)": "[#7]-2=[#7]-[#6]:1:[#7]:[!#6&!#1]:[#7]:[#6]:1-[#7]=[#7]-[#6]:[#6]-2",
399 |   "diazox_D(1)": "[#6]-2(-[#1])(-[#8]-[#1])-[#6]:1:[#7]:[!#6&!#1]:[#7]:[#6]:1-[#6](-[#1])(-[#8]-[#1])-[#6]=[#6]-2",
400 |   "misc_cyclopropane(1)": "[#6]-1(-[#6](-[#1])(-[#1])-[#6]-1(-[#1])-[#1])(-[#6](=[#8])-[#7](-[#1])-c:2:c:c:c(:c:c:2)-[#8]-[#6](-[#1])(-[#1])-[#8])-[#16](=[#8])(=[#8])-[#6]:[#6]",
401 |   "imine_ene_one_B(1)": "[#6]-1:[#6]-[#6](=[#8])-[#6]=[#6]-1-[#7]=[#6](-[#1])-[#7](-[#6;X4])-[#6;X4]",
402 |   "coumarin_D(1)": "c:1:c:c(:c:c-2:c:1-[#6](=[#6](-[#1])-[#6](=[#8])-[#8]-2)-c:3:c:c:c:c:c:3)-[#8]-[#6](-[#1])(-[#1])-[#6]:[#8]:[#6]",
403 |   "misc_furan_A(1)": "c:1:c(:o:c(:c:1-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#7]-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#8]-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#8]-c:2:c:c-3:c(:c:c:2)-[#8]-[#6](-[#8]-3)(-[#1])-[#1]",
404 |   "rhod_sat_E(1)": "[#7]-4(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#16]-[#6](-[#1])(-[#7](-[#1])-c:2:c:c:c:c:3:c:c:c:c:c:2:3)-[#6]-4=[#8]",
405 |   "rhod_sat_imine_A(1)": "[#7]-3(-[#6](=[#8])-c:1:c:c:c:c:c:1)-[#6](=[#7]-c:2:c:c:c:c:c:2)-[#16]-[#6](-[#1])(-[#1])-[#6]-3=[#8]",
406 |   "rhod_sat_F(1)": "[#7]-2(-c:1:c:c:c:c:c:1)-[#6](=[#8])-[#16]-[#6](-[#1])(-[#1])-[#6]-2=[#16]",
407 |   "het_thio_5_imine_B(1)": "[#7]-1(-[#6](-[#1])-[#1])-[#6](=[#16])-[#7](-[#6]:[#6])-[#6](=[#7]-[#6]:[#6])-[#6]-1=[#7]-[#6]:[#6]",
408 |   "het_thio_5_imine_C(1)": "[#16]-1-[#6](=[#7]-[#7]-[#1])-[#16]-[#6](=[#7]-[#6]:[#6])-[#6]-1=[#7]-[#6]:[#6]",
409 |   "ene_five_het_N(1)": "[#6]-2(=[#8])-[#6](=[#6](-[#1])-c:1:c(:c:c:c(:c:1)-[F,Cl,Br,I])-[#8]-[#6](-[#1])-[#1])-[#7]=[#6](-[#16]-[#6](-[#1])-[#1])-[#16]-2",
410 |   "thio_carbam_A(1)": "[#6](-[#1])(-[#1])-[#16]-[#6](=[#16])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]",
411 |   "misc_anilide_A(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6])-[#1])-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]",
412 |   "misc_anilide_B(1)": "c:1(:c(:c:c(:c:c:1-[#6])-[Br])-[#6])-[#7](-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]-[#6]-[#6]",
413 |   "mannich_B(1)": "c:1-2:c(:c:c:c(:c:1-[#8]-[#6](-[#1])(-[#1])-[#7](-[#6]:[#6]-[#8]-[#6](-[#1])-[#1])-[#6]-2(-[#1])-[#1])-[#1])-[#1]",
414 |   "mannich_catechol_A(1)": "c:1-2:c(:c(:c(:c(:c:1-[#8]-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6]-2(-[#1])-[#1])-[#1])-[#8])-[#8])-[#1]",
415 |   "anil_alk_D(1)": "[#7](-[#1])(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]",
416 |   "het_65_I(1)": "n:1:2:c:c:c(:c:c:1:c:c(:c:2-[#6](=[#8])-[#6]:[#6])-[#6]:[#6])-[#6](~[#8])~[#8]",
417 |   "misc_urea_A(1)": "c:1(:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#6](=[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1])-[#6](-[#6;X4])(-[#6;X4])-[#7](-[#1])-[#6](=[#8])-[#7](-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]",
418 |   "imidazole_C(1)": "[#6]-3(-[#1])(-n:1:c(:n:c(:c:1-[#1])-[#1])-[#1])-c:2:c(:c(:c(:c(:c:2-[#1])-[Br])-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-c:4:c-3:c(:c(:c(:c:4-[#1])-[#1])-[#1])-[#1]",
419 |   "styrene_imidazole_A(1)": "[#6](=[#6](-[#1])-[#6](-[#1])(-[#1])-n:1:c(:n:c(:c:1-[#1])-[#1])-[#1])(-[#6]:[#6])-[#6]:[#6]",
420 |   "thiazole_amine_M(1)": "c:1(:n:c(:c(-[#1]):s:1)-c:2:c:c:n:c:c:2)-[#7](-[#1])-[#6]:[#6]-[#6](-[#1])-[#1]",
421 |   "misc_pyrrole_thiaz(1)": "c:1(:n:c(:c(-[#1]):s:1)-c:2:c:c:c:c:c:2)-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]-[#6](-[#1])(-[#1])-c:3:c:c:c:n:3-[#1]",
422 |   "pyrrole_L(1)": "n:1(-[#1]):c(:c(-[#6](-[#1])-[#1]):c(:c:1-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])-[#1])-[#6](=[#8])-[#8]-[#6](-[#1])-[#1]",
423 |   "het_thio_65_D(1)": "c:2(:n:c:1:c(:c(:c:c(:c:1-[#1])-[F,Cl,Br,I])-[#1]):n:2-[#1])-[#16]-[#6](-[#1])(-[#1])-[#6](=[#8])-[#7](-[#1])-[#6]:[#6]",
424 |   "ene_misc_E(1)": "c:1(:c(:c-2:c(:c(:c:1-[#8]-[#6](-[#1])-[#1])-[#1])-[#6]=[#6]-[#6](-[#1])-[#16]-2)-[#1])-[#8]-[#6](-[#1])-[#1]",
425 |   "thio_cyano_A(1)": "[#7]-1(-[#1])-[#6](=[#16])-[#6](-[#1])(-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#6](=[#6]-1-[#6]:[#6])-[#1]",
426 |   "cyano_amino_het_B(1)": "n:1:c(:c(:c(:c(:c:1-[#16;X2]-c:2:c:c:c:c:c:2-[#7](-[#1])-[#1])-[#6]#[#7])-c:3:c:c:c:c:c:3)-[#6]#[#7])-[#7](-[#1])-[#1]",
427 |   "cyano_pyridone_G(1)": "[#7]-2(-c:1:c:c:c(:c:c:1)-[#8]-[#6](-[#1])-[#1])-[#6](=[#8])-[#6](=[#6]-[#6](=[#7]-2)-n:3:c:n:c:c:3)-[#6]#[#7]",
428 |   "het_65_J(1)": "o:1:c(:c:c:2:c:1:c(:c(:c(:c:2-[#1])-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#6](~[#8])~[#8]",
429 |   "ene_one_yne_A(1)": "[#6]#[#6]-[#6](=[#8])-[#6]#[#6]",
430 |   "anil_OH_no_alk_B(1)": "c:2(:c:1:c(:c(:c(:c(:c:1:c(:c(:c:2-[#8]-[#1])-[#6]=[#8])-[#1])-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#1]",
431 |   "hzone_acyl_misc_A(1)": "c:1(:c(:c(:c(:o:1)-[$([#1]),$([#6](-[#1])-[#1])])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#7]=[#6](-[$([#1]),$([#6](-[#1])-[#1])])-c:2:c:c:c:c(:c:2)-[*]-[*]-[*]-c:3:c:c:c:o:3",
432 |   "thiophene_F(1)": "[#16](=[#8])(=[#8])-[#7](-[#1])-c:1:c(:c(:c(:s:1)-[#6]-[#1])-[#6]-[#1])-[#6](=[#8])-[#7]-[#1]",
433 |   "anil_OC_alk_E(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#8]-[#1])-[#6](-[#1])-[#1]",
434 |   "anil_OC_alk_F(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-[#6](-[#1])(-[#6]=[#8])-[#16]",
435 |   "het_65_K(1)": "n1nnnc2cccc12",
436 |   "het_65_L(1)": "c:1-2:c(-[#1]):s:c(:c:1-[#6](=[#8])-[#7]-[#7]=[#6]-2-[#7](-[#1])-[#1])-[#6]=[#8]",
437 |   "coumarin_E(1)": "c:1-3:c(:c:2:c(:c:c:1-[Br]):o:c:c:2)-[#6](=[#6]-[#6](=[#8])-[#8]-3)-[#1]",
438 |   "coumarin_F(1)": "c:1-3:c(:c:c:c:c:1)-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-c:2:n:o:c:c:2-[Br])-[#6](=[#8])-[#8]-3)-[#1]",
439 |   "coumarin_G(1)": "c:1-2:c(:c:c(:c:c:1-[F,Cl,Br,I])-[F,Cl,Br,I])-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-[#1])-[#6](=[#7]-[#1])-[#8]-2)-[#1]",
440 |   "coumarin_H(1)": "c:1-3:c(:c:c:c:c:1)-[#6](=[#6](-[#6](=[#8])-[#7](-[#1])-c:2:n:c(:c:s:2)-[#6]:[#16]:[#6]-[#1])-[#6](=[#8])-[#8]-3)-[#1]",
441 |   "het_thio_67_A(1)": "[#6](-[#1])(-[#1])-[#16;X2]-c:2:n:n:c:1-[#6]:[#6]-[#7]=[#6]-[#8]-c:1:n:2",
442 |   "sulfonamide_I(1)": "[#16](=[#8])(=[#8])(-c:1:c:n(-[#6](-[#1])-[#1]):c:n:1)-[#7](-[#1])-c:2:c:n(:n:c:2)-[#6](-[#1])(-[#1])-[#6]:[#6]-[#8]-[#6](-[#1])-[#1]",
443 |   "het_65_mannich(1)": "c:1-2:c(:c(:c(:c(:c:1-[#8]-[#6](-[#1])(-[#1])-[#8]-2)-[#6](-[#1])(-[#1])-[#7]-3-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#6]:[#6]-3)-[#1])-[#1])-[#1]",
444 |   "anil_alk_A(1)": "[#6](-[#1])(-[#1])-[#8]-[#6]:[#6]-[#6](-[#1])(-[#1])-[#7](-[#1])-c:2:c(:c(:c:1:n(:c(:n:c:1:c:2-[#1])-[#1])-[#6]-[#1])-[#1])-[#1]",
445 |   "het_5_inium(1)": "[#7]-4(-c:1:c:c:c:c:c:1)-[#6](=[#7+](-c:2:c:c:c:c:c:2)-[#6](=[#7]-c:3:c:c:c:c:c:3)-[#7]-4)-[#1]",
446 |   "anil_di_alk_P(1)": "[#6](-[#1])(-[#1])-[#7](-[#6](-[#1])-[#1])-c:2:c:c:c:1:s:c(:n:c:1:c:2)-[#16]-[#6](-[#1])-[#1]",
447 |   "thio_urea_Q(1)": "c:1:2:c(:c(:c(:c(:c:1:c(:c(-[#1]):c(:c:2-[#1])-[#1])-[#6](-[#6](-[#1])-[#1])=[#7]-[#7](-[#1])-[#6](=[#16])-[#7](-[#1])-[#6]:[#6]:[#6])-[#1])-[#1])-[#1])-[#1]",
448 |   "thio_pyridine_A(1)": "[#6]:1(:[#7]:[#6](:[#7]:[!#1]:[#7]:1)-c:2:c(:c(:c(:o:2)-[#1])-[#1])-[#1])-[#16]-[#6;X4]",
449 |   "melamine_B(1)": "n:1:c(:n:c(:n:c:1-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#7](-[#6]-[#1])-[#6]=[#8]",
450 |   "misc_phthal_thio_N(1)": "c:1(:n:s:c(:n:1)-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](-[#1])(-[#1])-[#7]-[#6](=[#8])-c:2:c:c:c:c:c:2-[#6](=[#8])-[#8]-[#1])-c:3:c:c:c:c:c:3",
451 |   "hzone_acyl_misc_B(1)": "n:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#7]=[#6](-[#1])-c:2:c:c:c:c:c:2-[#8]-[#6](-[#1])(-[#1])-[#6](=[#8])-[#8]-[#1]",
452 |   "tert_butyl_B(1)": "[#6](-[#1])(-[#1])(-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-c:1:c(:c(:c(:c(:c:1-[#8]-[#1])-[#6](-[#6](-[#1])(-[#1])-[#1])(-[#6](-[#1])(-[#1])-[#1])-[#6](-[#1])(-[#1])-[#1])-[#1])-[#6](-[#1])(-[#1])-c:2:c:c:c(:c(:c:2-[#1])-[#1])-[#8]-[#1])-[#1]",
453 |   "diazox_E(1)": "[#7](-[#1])(-[#1])-c:1:c(-[#7](-[#1])-[#1]):c(:c(-[#1]):c:2:n:o:n:c:1:2)-[#1]",
454 |   "anil_NH_no_alk_B(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#7](-[#1])-[#16](=[#8])=[#8])-[#1])-[#7](-[#1])-[#6](-[#1])-[#1])-[F,Cl,Br,I])-[#1]",
455 |   "anil_no_alk_A(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#7]=[#6]-2-[#6](=[#6]~[#6]~[#6]=[#6]-2)-[#1])-[#1])-[#1])-[#1])-[#1]",
456 |   "anil_no_alk_B(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-n:2:c:c:c:c:2)-[#1])-[#6](-[#1])-[#1])-[#6](-[#1])-[#1])-[#1]",
457 |   "thio_ene_amine_A(1)": "[#16]=[#6]-[#6](-[#6](-[#1])-[#1])=[#6](-[#6](-[#1])-[#1])-[#7](-[#6](-[#1])-[#1])-[#6](-[#1])-[#1]",
458 |   "het_55_B(1)": "[#6]-1:[#6]-[#8]-[#6]-2-[#6](-[#1])(-[#1])-[#6](=[#8])-[#8]-[#6]-1-2",
459 |   "cyanamide_A(1)": "[#8]-[#6](=[#8])-[#6](-[#1])(-[#1])-[#16;X2]-[#6](=[#7]-[#6]#[#7])-[#7](-[#1])-c:1:c:c:c:c:c:1",
460 |   "ene_one_one_A(1)": "[#8]=[#6]-[#6]-1=[#6](-[#16]-[#6](=[#6](-[#1])-[#6])-[#16]-1)-[#6]=[#8]",
461 |   "ene_six_het_D(1)": "[#8]=[#6]-1-[#7]-[#7]-[#6](=[#7]-[#6]-1=[#6]-[#1])-[!#1]:[!#1]",
462 |   "ene_cyano_E(1)": "[#8]=[#6]-[#6](-[#1])=[#6](-[#6]#[#7])-[#6]",
463 |   "ene_cyano_F(1)": "[#8](-[#1])-[#6](=[#8])-c:1:c(:c(:c(:c(:c:1-[#8]-[#1])-[#1])-c:2:c(-[#1]):c(:c(:o:2)-[#6](-[#1])=[#6](-[#6]#[#7])-c:3:n:c:c:n:3)-[#1])-[#1])-[#1]",
464 |   "hzone_furan_C(1)": "c:1:c(:c:c:c:c:1)-[#7](-c:2:c:c:c:c:c:2)-[#7]=[#6](-[#1])-[#6]:3:[#6](:[#6](:[#6](:[!#1]:3)-c:4:c:c:c:c(:c:4)-[#6](=[#8])-[#8]-[#1])-[#1])-[#1]",
465 |   "anil_no_alk_C(1)": "[#7](-[#1])(-[#1])-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-c:2:c(-[#1]):c(:c(-[#6](-[#1])-[#1]):o:2)-[#6]=[#8])-[#1])-[#1]",
466 |   "hzone_acid_D(1)": "[#8](-[#1])-[#6](=[#8])-c:1:c:c:c(:c:c:1)-[#7]-[#7]=[#6](-[#1])-[#6]:2:[#6](:[#6](:[#6](:[!#1]:2)-c:3:c:c:c:c:c:3)-[#1])-[#1]",
467 |   "hzone_furan_E(1)": "[#8](-[#1])-[#6](=[#8])-c:1:c:c:c:c(:c:1)-[#6]:[!#1]:[#6]-[#6]=[#7]-[#7](-[#1])-[#6](=[#8])-[#6](-[#1])(-[#1])-[#8]",
468 |   "het_6_pyridone_NH2(1)": "[#8](-[#1])-[#6]:1:[#6](:[#6]:[!#1]:[#6](:[#7]:1)-[#7](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6](=[#8])-[#8]",
469 |   "imine_one_fives_D(1)": "[#6]-1(=[!#6&!#1])-[#6](-[#7]=[#6]-[#16]-1)=[#8]",
470 |   "pyrrole_M(1)": "n2(-c:1:c:c:c:c:c:1)c(c(-[#1])c(c2-[#6]=[#7]-[#8]-[#1])-[#1])-[#1]",
471 |   "pyrrole_N(1)": "n2(-[#6](-[#1])-c:1:c(:c(:c:c(:c:1-[#1])-[#1])-[#1])-[#1])c(c(-[#1])c(c2-[#6]-[#1])-[#1])-[#6]-[#1]",
472 |   "pyrrole_O(1)": "n1(-[#6](-[#1])-[#1])c(c(-[#6](=[#8])-[#6])c(c1-[#6]:[#6])-[#6])-[#6](-[#1])-[#1]",
473 |   "ene_cyano_G(1)": "n1(-[#6])c(c(-[#1])c(c1-[#6](-[#1])=[#6](-[#6]#[#7])-c:2:n:c:c:s:2)-[#1])-[#1]",
474 |   "sulfonamide_J(1)": "n3(-c:1:c:c:c:c:c:1-[#7](-[#1])-[#16](=[#8])(=[#8])-c:2:c:c:c:s:2)c(c(-[#1])c(c3-[#1])-[#1])-[#1]",
475 |   "misc_pyrrole_benz(1)": "n2(-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#6](=[#8])-[#7](-[#1])-[#6](-[#1])(-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#8]-[#6]:[#6])c(c(-[#1])c(c2-[#1])-[#1])-[#1]",
476 |   "thio_urea_R(1)": "c:1(:c:c:c:c:c:1)-[#7](-[#1])-[#6](=[#16])-[#7]-[#7](-[#1])-[#6](-[#1])=[#6](-[#1])-[#6]=[#8]",
477 |   "ene_one_one_B(1)": "[#6]-1(-[#6](=[#8])-[#6](-[#1])(-[#1])-[#6]-[#6](-[#1])(-[#1])-[#6]-1=[#8])=[#6](-[#7]-[#1])-[#6]=[#8]",
478 |   "dhp_amino_CN_H(1)": "[#7](-[#1])(-[#1])-[#6]-1=[#6](-[#6]#[#7])-[#6](-[#1])(-[#6]:[#6])-[#16]-[#6;X4]-[#16]-1",
479 |   "het_66_anisole(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#1])-[#1])-[#7](-[#1])-c:2:c:c:n:c:3:c(:c:c:c(:c:2:3)-[#8]-[#6](-[#1])-[#1])-[#8]-[#6](-[#1])-[#1]",
480 |   "thiazole_amine_N(1)": "[#6](-[#1])(-[#1])-[#8]-c:1:c(:c(:c(:c(:c:1-[#1])-[#1])-[#8]-[#6](-[#1])-[#1])-[#1])-[#7](-[#1])-c:2:n:c(:c:s:2)-c:3:c:c:c(:c:c:3)-[#8]-[#6](-[#1])-[#1]",
481 |   "het_pyridiniums_C(1)": "[#6]~1~3~[#7](-[#6]:[#6])~[#6]~[#6]~[#6]~[#6]~1~[#6]~2~[#7]~[#6]~[#6]~[#6]~[#7+]~2~[#7]~3",
482 |   "het_5_E(1)": "[#7]-3(-c:2:c:1:c:c:c:c:c:1:c:c:c:2)-[#7]=[#6](-[#6](-[#1])-[#1])-[#6](-[#1])(-[#1])-[#6]-3=[#8]"
483 | }


--------------------------------------------------------------------------------