├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── MCTS_node.py ├── README.md ├── Rollout_policies.py ├── Tree.py ├── UCT_policies.py ├── biological_scoring.py ├── calculate_organisms.py ├── calculate_rule_sets_similarity.py ├── change_config.py ├── chemical_compounds_state.py ├── chemical_scoring.py ├── chemistry_choices.md ├── compound.py ├── compound_scoring.py ├── config.py ├── convert_to_SBML.py ├── data ├── base_config.py ├── compounds_to_add │ └── TPA_to_add.csv ├── golden_dataset.csv ├── name_structure_toxicity.csv ├── sinks │ ├── bsubtilis_iYO844_sink_reduced_rp_ready.csv │ ├── detectable_metabolites_uncommented.csv │ ├── ecoli_core_sink_reduced_rp_ready.csv │ ├── ecoli_iJO1366_sink_reduced_rp_ready.csv │ └── ecoli_iML1515_sink_reduced_rp_ready.csv └── supplement_finder │ ├── data │ └── metanetx_extracted_inchikeys.json.tar.gz │ └── tree_for_testing │ ├── TPA │ └── pickles │ │ └── tree_end_search.pkl.tar.gz │ └── morphine │ └── pickles │ └── tree_end_search.pkl.tar.gz ├── document_all_options.md ├── expected_results ├── deoxiviolacein_1.json ├── deoxiviolacein_2.json ├── deoxiviolacein_3.json ├── deoxiviolacein_4.json ├── deoxiviolacein_best.json ├── deoxiviolacein_full_scope.json ├── deoxiviolacein_full_tree_for_MCTS.json ├── deoxiviolacein_iteration_12.json ├── deoxiviolacein_iteration_15.json ├── deoxiviolacein_iteration_82.json ├── deoxiviolacein_iteration_85.json ├── pickles │ └── tree_end_search.pkl.tar.gz ├── results.csv └── tree.log ├── move.py ├── organisms.py ├── pathway.py ├── pathway_scoring.py ├── pyproject.toml ├── representation.py ├── rewarding.py ├── rule_sets_examples.py ├── rule_sets_similarity.py ├── setup.py ├── supplement_finder.py ├── tests ├── data │ ├── rules_mixed_subset.tsv │ ├── rules_r10_subset.tsv │ ├── rules_r2_subset.tsv │ ├── state_BOPG_BSAB_GPRL.pkl │ └── tree_pipecolate_test.pkl ├── generated_jsons │ ├── .gitignore │ └── .gitkeep ├── test_Filters.py ├── test_MCTS_node.py ├── test_Standardizer.py ├── test_Tree.py ├── test_Utils.py ├── test_cli.py ├── test_compound.py ├── test_moves.py ├── test_state.py └── tree_test.pkl ├── tox.ini ├── tree_viewer.py └── utilities ├── chemtools ├── Filters.py ├── Sequences.py ├── Standardizer.py └── Utils.py └── reactor ├── Core.py ├── Utils.py └── cli.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Usual stuff 2 | .DS_Store 3 | __pycache__ 4 | *.egg-info 5 | 6 | # Data 7 | data/*/*.log 8 | data/*/*.pkl 9 | 10 | # Test data 11 | tests/generated_jsons/pipecolate_iteration_0.json 12 | 13 | # IDE 14 | .vscode 15 | .idea -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Unreleased 2 | 3 | ### Feat 4 | 5 | - enables execution without fire timeout 6 | - **Tree**: refine debug logging 7 | 8 | ### Fix 9 | 10 | - **compound**: use standardisation timeout ti new compounds 11 | - **Tree**: timeout arguments as int 12 | - further restrict rdkit version (reproducibility issue #21) 13 | 14 | ### Refactor 15 | 16 | - **Tree**: remove unused code 17 | 18 | ## 1.0.1 (2024-06-20) 19 | 20 | ### Fix 21 | 22 | - **DATA_PATH**: fix typo 23 | - **Tree**: import missing pre-parsed organisms 24 | 25 | ### Refactor 26 | 27 | - **Tree**: sweep imports 28 | - **calculate_organisms**: clean organism data files generation 29 | - **calculate_organisms**: remove unused imports 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Mathilde Koch, INRA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include chemistry_choices.md 2 | include data/compounds_to_add/* 3 | include data/sinks/* 4 | include data/supplement_finder/* 5 | include data/golden_dataset.csv 6 | include data/name_structure_toxicity.csv 7 | include expected_results/* 8 | include tests/* 9 | include utilities/* 10 | include README.md 11 | -------------------------------------------------------------------------------- /Rollout_policies.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Defines the Rollout policies. 4 | Usage is : move = RolloutPolicy.select_best_move(available_moves) 5 | Remarks: 6 | - various policies have been tested on toy examples on a Jupyter notebook during implementation 7 | """ 8 | 9 | from math import sqrt, log 10 | import random 11 | 12 | class Rollout_policy(object): 13 | """ 14 | Defines rollout policy. 15 | From a list of moves, select the one that should be used for rollout. 16 | This is the base object, subclasses necessitate a policy function. 17 | """ 18 | def __init__(self, policy_type, description = "Default Rollout Policy"): 19 | self.policy_type = policy_type 20 | self.description = description 21 | 22 | def select_best_move(self, available_moves): 23 | try: 24 | move = self.policy(available_moves) 25 | return(move) 26 | except IndexError: 27 | return(None) 28 | 29 | def __str__(self): 30 | return("Policy type: {} \nDescription: {}".format(self.policy_type, self.description)) 31 | 32 | class Rollout_policy_first(Rollout_policy): 33 | """ 34 | Defines rollout policy. 35 | Always returns the first element: first compound, first rule 36 | """ 37 | def __init__(self): 38 | description = "Always select the first compound_rule combination" 39 | Rollout_policy.__init__(self, policy_type = "First found combination", description = description) 40 | self.name = "Rollout_policy_first" 41 | self.policy = self.policy() 42 | 43 | def policy(self): 44 | # CODE IT 45 | def select_best_inside(available_moves): 46 | move = available_moves[0] 47 | return(move) 48 | return(select_best_inside) 49 | 50 | class Rollout_policy_chemical_best(Rollout_policy): 51 | """ 52 | Defines rollout policy. 53 | Always returns the best chemical move 54 | """ 55 | def __init__(self): 56 | description = "Always select the move with the highest chemical score" 57 | Rollout_policy.__init__(self, policy_type = "Best Chemical", description = description) 58 | self.policy = self.best_chemical_policy() 59 | self.name = "Rollout_policy_chemical_best" 60 | 61 | def best_chemical_policy(self): 62 | # CODE IT 63 | def select_best_inside(available_moves): 64 | current_best = available_moves[0] 65 | current_best_score = current_best.chemical_score 66 | for element in available_moves: 67 | chemical_score = element.chemical_score 68 | if chemical_score > current_best_score: 69 | current_best_score = chemical_score 70 | current_best = element 71 | return(current_best) 72 | return(select_best_inside) 73 | 74 | class Rollout_policy_biological_best(Rollout_policy): 75 | """ 76 | Defines rollout policy. 77 | Always returns the best biological move 78 | """ 79 | def __init__(self): 80 | description = "Always select the move with the highest biological score" 81 | Rollout_policy.__init__(self, policy_type = "Best Biological", description = description) 82 | self.policy = self.best_biological_policy() 83 | self.name = "Rollout_policy_biological_best" 84 | 85 | def best_biological_policy(self): 86 | # CODE IT 87 | def select_best_inside(available_moves): 88 | current_best = available_moves[0] 89 | current_best_score = current_best.biological_score 90 | for element in available_moves: 91 | biological_score = current_best_score = element.biological_score 92 | if biological_score > current_best_score: 93 | current_best_score = biological_score 94 | current_best = element 95 | return(current_best) 96 | return(select_best_inside) 97 | 98 | class Rollout_policy_biochemical_addition_best(Rollout_policy): 99 | """ 100 | Defines rollout policy. 101 | Always returns the best biochemical (addition of scores) move 102 | """ 103 | def __init__(self): 104 | description = "Select the highest Biochemical addition score" 105 | Rollout_policy.__init__(self, policy_type = "Best Biochemical addition", description = description) 106 | self.policy = self.best_biochemical_policy() 107 | self.name = "Rollout_policy_biochemical_addition_best" 108 | 109 | def best_biochemical_policy(self): 110 | # CODE IT 111 | def select_best_inside(available_moves): 112 | current_best = available_moves[0] 113 | current_best_score = current_best.biological_score + current_best.chemical_score 114 | for element in available_moves: 115 | biological_score = element.biological_score 116 | chemical_score = element.chemical_score 117 | if biological_score + chemical_score > current_best_score: 118 | current_best_score = biological_score + chemical_score 119 | current_best = element 120 | return(current_best) 121 | return(select_best_inside) 122 | 123 | class Rollout_policy_biochemical_multiplication_best(Rollout_policy): 124 | """ 125 | Defines rollout policy. 126 | Always returns the best biochemical (multiplication of scores) move 127 | """ 128 | def __init__(self): 129 | description = "Select the highest Biochemical multiplication score" 130 | Rollout_policy.__init__(self, policy_type = "Best Biochemical multiplication", description = description) 131 | self.policy = self.best_biochemical_policy() 132 | self.name = "Rollout_policy_biochemical_multiplication_best" 133 | 134 | def best_biochemical_policy(self): 135 | # CODE IT 136 | def select_best_inside(available_moves): 137 | current_best = available_moves[0] 138 | current_best_score = current_best.biological_score * current_best.chemical_score 139 | for element in available_moves: 140 | biological_score = element.biological_score 141 | chemical_score = element.chemical_score 142 | if biological_score * chemical_score > current_best_score: 143 | current_best_score = biological_score * chemical_score 144 | current_best = element 145 | return(current_best) 146 | return(select_best_inside) 147 | 148 | class Rollout_policy_random_uniform(Rollout_policy): 149 | """ 150 | Random sampling of the move amongst available moves 151 | """ 152 | def __init__(self): 153 | description = "Random selection - no scoring involved" 154 | Rollout_policy.__init__(self, policy_type = "Random sampling", description = description) 155 | self.policy = self.policy() 156 | self.name = "Rollout_policy_random_uniform" 157 | 158 | def policy(self): 159 | # CODE IT 160 | def select_best_inside(available_moves): 161 | index = random.randrange(0, len(available_moves)) 162 | move = available_moves[index] 163 | return(move) 164 | return(select_best_inside) 165 | 166 | class Rollout_policy_random_uniform_on_chem_score(Rollout_policy): 167 | """ 168 | Random sampling of the move amongst available moves, weighted by chemical score 169 | """ 170 | def __init__(self): 171 | description = "Random selection - uniform sampling from chemical weights" 172 | Rollout_policy.__init__(self, policy_type = "Chemical uniform sampling", description = description) 173 | self.policy = self.policy() 174 | self.name = "Rollout_policy_random_uniform_on_chem_score" 175 | 176 | def policy(self): 177 | # CODE IT 178 | def select_best_inside(available_moves): 179 | pop, cum, cum_w = [], [], 0 180 | for move in available_moves: 181 | pop.append(move) 182 | cum_w = cum_w + move.chemical_score 183 | cum.append(cum_w) 184 | move = random.choices(pop, cum_weights=cum, k=1)[0] 185 | return(move) 186 | return(select_best_inside) 187 | 188 | class Rollout_policy_random_uniform_on_bio_score(Rollout_policy): 189 | """ 190 | Random sampling of the move amongst available moves, weighted by biological score 191 | """ 192 | def __init__(self): 193 | description = "Random selection - uniform sampling from biological weights" 194 | Rollout_policy.__init__(self, policy_type = "Biological uniform sampling", description = description) 195 | self.policy = self.policy() 196 | self.name = "Rollout_policy_random_uniform_on_bio_score" 197 | def policy(self): 198 | # CODE IT 199 | def select_best_inside(available_moves): 200 | pop, cum, cum_w = [], [], 0 201 | 202 | for move in available_moves: 203 | pop.append(move) 204 | cum_w = cum_w + move.biological_score 205 | cum.append(cum_w) 206 | move = random.choices(pop, cum_weights=cum, k=1)[0] 207 | return(move) 208 | return(select_best_inside) 209 | 210 | class Rollout_policy_random_uniform_on_biochemical_addition_score(Rollout_policy): 211 | """ 212 | Random sampling of the move amongst available moves, weighted by biochemical (addition) score 213 | """ 214 | def __init__(self): 215 | description = "Random selection - uniform sampling from added biochemical weights" 216 | Rollout_policy.__init__(self, policy_type = "Biochemical addition uniform sampling", description = description) 217 | self.policy = self.policy() 218 | self.name = "Rollout_policy_random_uniform_on_biochemical_addition_score" 219 | 220 | def policy(self): 221 | # CODE IT 222 | def select_best_inside(available_moves): 223 | pop, cum, cum_w = [], [], 0 224 | 225 | for move in available_moves: 226 | pop.append(move) 227 | cum_w = cum_w + move.biological_score + move.chemical_score 228 | cum.append(cum_w) 229 | move = random.choices(pop, cum_weights=cum, k=1)[0] 230 | return(move) 231 | return(select_best_inside) 232 | 233 | class Rollout_policy_random_uniform_on_biochemical_multiplication_score(Rollout_policy): 234 | """ 235 | Random sampling of the move amongst available moves, weighted by biochemical (multiplication) score 236 | """ 237 | def __init__(self): 238 | description = "Random selection - uniform sampling from multiplied biochemical weights" 239 | Rollout_policy.__init__(self, policy_type = "Biochemical uniform sampling", description = description) 240 | self.policy = self.policy() 241 | self.name = "Rollout_policy_random_uniform_on_biochemical_multiplication_score" 242 | 243 | def policy(self): 244 | # CODE IT 245 | def select_best_inside(available_moves): 246 | pop, cum, cum_w = [], [], 0 247 | 248 | for move in available_moves: 249 | pop.append(move) 250 | cum_w = cum_w + move.biological_score * move.chemical_score 251 | cum.append(cum_w) 252 | move = random.choices(pop, cum_weights=cum, k=1)[0] 253 | return(move) 254 | return(select_best_inside) 255 | -------------------------------------------------------------------------------- /UCT_policies.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the UCT (Upper Confidence Tree) policies. 3 | It it the formula that allows for balancing between exploration and exploitation when selecting children in the Tree. 4 | Implements a number of different policies. 5 | Policies are Subclasses of UCT_policy Class. 6 | They need to have a attribute function that does the calculation. See examples if you want to develop your own. 7 | """ 8 | 9 | from math import sqrt, log 10 | 11 | 12 | class UCT_policy(object): 13 | """ 14 | Defines UCT_policies objects. 15 | They take a node and return the best child according to this policy. 16 | Only subclasses of this object can work as there is no default calculation function. 17 | """ 18 | def __init__(self, parameters = {"UCTK": 2}, policy_type = 'Classical', function = None): 19 | self.parameters = parameters 20 | self.policy_type = policy_type 21 | 22 | def calculate(self, node, top_n = 1): 23 | s = sorted(node.children, key = lambda c: self.function(c, parent_visits = node.visits)) 24 | s = s[-top_n] 25 | return s 26 | 27 | def __str__(self): 28 | return("Policy type: {} \nFormula: {}".format(self.policy_type, self.formula)) 29 | 30 | 31 | class Classical_UCT(UCT_policy): 32 | """ 33 | This class implements the most basic UCT functions. 34 | Only uses number of visits as a criteria. 35 | It is the Classical UCT formula where no additionnal expert knowledge is inputed. 36 | """ 37 | def __init__(self, parameters = {"UCTK": 1000}): 38 | UCT_policy.__init__(self, policy_type = "Classical") 39 | self.parameters = parameters 40 | self.formula = "mean_score + sqrt({}*log(N + 1)/(n+1))".format(parameters["UCTK"]) 41 | self.function = self.simple_UCT_formula(self.parameters) 42 | 43 | def simple_UCT_formula(self, parameters): 44 | UCTK = parameters["UCTK"] 45 | def simple_formula_inside(c, parent_visits): 46 | value = c.average_score + sqrt(UCTK*log(parent_visits +1)/(c.visits + 1)) 47 | return(value) 48 | return(simple_formula_inside) 49 | 50 | class Classical_UCT_RAVE(UCT_policy): 51 | """ 52 | This class implements UCT based on visit count and RAVE. 53 | RAVE stands for Rapid Action Value Estimation: 54 | - it adds another score based on usage of identical moves elsewhere in the Tree 55 | - this is ponderated by the number of visits: as visits increase, the actual score of the node becomes more important than this initila estimation. 56 | """ 57 | def __init__(self, parameters = {"UCTK": 1000, "k_rave": 100}): 58 | UCT_policy.__init__(self, policy_type = "Classical_RAVE") 59 | self.parameters = parameters 60 | self.formula = "(1-b) mean_score + b rave_score + sqrt({}*log(N + 1)/(n+1)) with b = sqrt({}/(3N + {}))".format(parameters["UCTK"], parameters["k_rave"], parameters["k_rave"]) 61 | self.function = self.RAVE_formula(parameters = self.parameters) 62 | 63 | def RAVE_formula(self, parameters): 64 | UCTK = parameters["UCTK"] 65 | k_rave = parameters["k_rave"] 66 | def simple_formula_inside(c, parent_visits): 67 | b = sqrt(k_rave/(3*parent_visits + k_rave)) 68 | value = c.average_score *(1-b) + b * c.move.RAVE_average_score + sqrt(UCTK*log(parent_visits +1)/(c.visits + 1)) 69 | return(value) 70 | return(simple_formula_inside) 71 | 72 | class Classical_UCT_with_bias(UCT_policy): 73 | """ 74 | This class implements UCT based on visits and progressive bias. 75 | Progressive bias works by 76 | - giving an initial value to a node (based on expert knowledge for example) 77 | - this importance decreases as the node gets visited and this initial estimation's importance decreases in favor of actual rollouts. 78 | """ 79 | def __init__(self, parameters = {"UCTK": 1000, "bias_k": 1}): 80 | UCT_policy.__init__(self, policy_type = "Classical") 81 | self.parameters = parameters 82 | self.formula = "mean_score + sqrt({}*log(N + 1)/(n+1)) + {} * progressive_bias/(n+1)".format(parameters["UCTK"], parameters["bias_k"]) 83 | self.function = self.simple_UCT_formula(self.parameters) 84 | 85 | def simple_UCT_formula(self, parameters): 86 | UCTK = parameters["UCTK"] 87 | bias_k = parameters["bias_k"] 88 | def simple_formula_inside(c, parent_visits): 89 | value = c.average_score + sqrt(UCTK*log(parent_visits +1)/(c.visits + 1))+ bias_k * c.progressive_bias/(c.visits + 1) 90 | return(value) 91 | return(simple_formula_inside) 92 | 93 | class Nature_UCT(UCT_policy): 94 | """ 95 | This class implements the formula used in the following Nature paper :(https://doi.org/10.1038/nature25978) 96 | Planning chemical syntheses with deep neural networks and symbolic AI 97 | It is identical to the Chemical Scoring UCT (Chemical_UCT_1) 98 | """ 99 | def __init__(self, parameters = {"UCTK": 3}): 100 | UCT_policy.__init__(self, policy_type = "Nature Symbolic IA") 101 | self.parameters = parameters 102 | self.formula = "mean_score + {} * P * sqrt(N/(n+1))".format(parameters["UCTK"]) 103 | self.function = self.Nature_UCT_formula(self.parameters) 104 | 105 | def Nature_UCT_formula(self, parameters): 106 | UCTK = parameters["UCTK"] 107 | def simple_formula_inside(c, parent_visits): 108 | chem_P = c.move.chemical_score 109 | value = c.average_score + UCTK * chem_P *sqrt(parent_visits/(c.visits + 1)) 110 | return(value) 111 | return(simple_formula_inside) 112 | 113 | class Biochemical_UCT_1(UCT_policy): 114 | """ 115 | This class implements a simple biochemical score UCT. 116 | The selection is guided by a product of chemical and biological score. 117 | """ 118 | def __init__(self, parameters = {"UCTK": 3}): 119 | UCT_policy.__init__(self, policy_type = "Biochemical multiplication") 120 | self.parameters = parameters 121 | self.formula = "mean_score + {} * P_c * B * sqrt(N/(n+1))".format(parameters["UCTK"]) 122 | self.function = self.Biochemical_UCT_formula(self.parameters) 123 | 124 | def Biochemical_UCT_formula(self, parameters): 125 | UCTK = parameters["UCTK"] 126 | def simple_formula_inside(c, parent_visits): 127 | chem_P = c.move.chemical_score 128 | b_score = c.move.biological_score 129 | value = c.average_score + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1)) 130 | return(value) 131 | return(simple_formula_inside) 132 | 133 | class Biological_UCT_1(UCT_policy): 134 | """ 135 | This class implements a simple biological score UCT. 136 | The selection is guided by Biological score only. 137 | """ 138 | def __init__(self, parameters = {"UCTK": 3}): 139 | UCT_policy.__init__(self, policy_type = "Biological score only") 140 | self.parameters = parameters 141 | self.formula = "mean_score + {} * B * sqrt(N/(n+1))".format(parameters["UCTK"]) 142 | self.function = self.Biological_UCT_formula(self.parameters) 143 | 144 | def Biological_UCT_formula(self, parameters): 145 | UCTK = parameters["UCTK"] 146 | def simple_formula_inside(c, parent_visits): 147 | b_score = c.move.biological_score 148 | value = c.average_score + UCTK * b_score *sqrt(parent_visits/(c.visits + 1)) 149 | return(value) 150 | return(simple_formula_inside) 151 | 152 | class Chemical_UCT_1(UCT_policy): 153 | """ 154 | This class implements a simple chemical score UCT. 155 | The selection is guided by Chemical score only. 156 | """ 157 | def __init__(self, parameters = {"UCTK": 3}): 158 | UCT_policy.__init__(self, policy_type = "Chemical multiplication") 159 | self.parameters = parameters 160 | self.formula = "mean_score + {} * P_c * sqrt(N/(n+1))".format(parameters["UCTK"]) 161 | self.function = self.Chemical_UCT_formula(self.parameters) 162 | 163 | # @staticmethod 164 | def Chemical_UCT_formula(self, parameters): 165 | UCTK = parameters["UCTK"] 166 | def simple_formula_inside(c, parent_visits): 167 | chem_P = c.move.chemical_score 168 | value = c.average_score + UCTK * chem_P *sqrt(parent_visits/(c.visits + 1)) 169 | return(value) 170 | return(simple_formula_inside) 171 | 172 | class Biochemical_UCT_1_with_RAVE(UCT_policy): 173 | """ 174 | This class implements a biochemical score UCT with RAVE augmentation. 175 | RAVE stands for Rapid Action Value Estimation: 176 | - it adds another score based on usage of identical moves elsewhere in the Tree 177 | - this is ponderated by the number of visits: as visits increase, the actual score of the node becomes more important than this initila estimation. 178 | """ 179 | def __init__(self, parameters = {"UCTK": 3, "k_rave": 100}): 180 | UCT_policy.__init__(self, policy_type = "Biochemical multiplication with RAVE") 181 | self.parameters = parameters 182 | self.formula = "(1-b) mean_score + b rave_score + {} * P_c * B * sqrt(N/(n+1)) with b = sqrt({}/(3N + {})".format(parameters["UCTK"], parameters["k_rave"], parameters["k_rave"]) 183 | self.function = self.Biochemical_UCT_RAVE_formula(self.parameters) 184 | 185 | def Biochemical_UCT_RAVE_formula(self, parameters): 186 | UCTK = parameters["UCTK"] 187 | k_rave = parameters["k_rave"] 188 | def simple_formula_inside(c, parent_visits): 189 | b = sqrt(k_rave/(3*parent_visits + k_rave)) 190 | b_score = c.move.biological_score 191 | chem_P = c.move.chemical_score 192 | value = c.average_score * (1-b) + b * c.move.RAVE_average_score + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1)) 193 | return(value) 194 | return(simple_formula_inside) 195 | 196 | class Biochemical_UCT_with_progressive_bias(UCT_policy): 197 | """ 198 | This class implements a biochemical score UCT and progressive bias. 199 | Progressive bias works by 200 | - giving an initial value to a node (based on expert knowledge for example) 201 | - this importance decreases as the node gets visited and this initial estimation's importance decreases in favor of actual rollouts. 202 | """ 203 | def __init__(self, parameters = {"UCTK": 3, "bias_k": 1}): 204 | UCT_policy.__init__(self, policy_type = "Biochemical with progressive bias") 205 | self.parameters = parameters 206 | self.formula = "mean_score + {} * bias/(n+1) + {} * P_c * B * sqrt(N/(n+1))".format(parameters["bias_k"], parameters["UCTK"]) 207 | self.function = self.Biochemical_UCT_with_bias_formula(parameters) 208 | 209 | # @staticmethod 210 | def Biochemical_UCT_with_bias_formula(self, parameters): 211 | UCTK = parameters["UCTK"] 212 | bias_k = parameters["bias_k"] 213 | def simple_formula_inside(c, parent_visits): 214 | chem_P = c.move.chemical_score 215 | b_score = c.move.biological_score 216 | bias = c.progressive_bias 217 | value = c.average_score + bias_k * bias/(c.visits +1) + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1)) 218 | return(value) 219 | return(simple_formula_inside) 220 | 221 | class Biochemical_UCT_with_toxicity(UCT_policy): 222 | """ 223 | This class implements a biochemical score UCT combined with toxicity bias. 224 | the formula is identical to the Biochemical_UCT_with_progressive_bias, the bias being the node's toxicity. 225 | """ 226 | def __init__(self, parameters = {"UCTK": 3, "bias_k": 1}): 227 | UCT_policy.__init__(self, policy_type = "Biochemical with toxicity") 228 | self.parameters = parameters 229 | self.formula = "mean_score + {} * toxicity/(n+1) + {} * P_c * B * sqrt(N/(n+1))".format(parameters["bias_k"], parameters["UCTK"]) 230 | self.function = self.Biochemical_UCT_with_toxicity_formula(parameters) 231 | 232 | def Biochemical_UCT_with_toxicity_formula(self, parameters): 233 | UCTK = parameters["UCTK"] 234 | bias_k = parameters["bias_k"] 235 | def simple_formula_inside(c, parent_visits): 236 | chem_P = c.move.chemical_score 237 | b_score = c.move.biological_score 238 | toxicity = c.toxicity 239 | value = c.average_score + bias_k * toxicity/(c.visits +1) + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1)) 240 | return(value) 241 | return(simple_formula_inside) 242 | -------------------------------------------------------------------------------- /biological_scoring.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the biological scoring function. 3 | Necessitates random for random scoring, and all rule sets for biological scoring. 4 | """ 5 | 6 | import random 7 | from rule_sets_examples import * 8 | from rule_sets_similarity import * 9 | 10 | class BiologicalScoring(object): 11 | """ 12 | Defines Biological Scorer object. 13 | Returns the biological score associated to a reaction rule. 14 | """ 15 | def __init__(self, scoring_function): 16 | self.scoring_function = scoring_function 17 | self.name = "Random" 18 | 19 | def __repr__(self): 20 | return(self.name) 21 | 22 | def calculate(self, rule): 23 | score = self.scoring_function(rule) 24 | return(score) 25 | 26 | def pseudo_random(rule): 27 | score = random.uniform(0, 10) 28 | return(score) 29 | 30 | class BiologicalScoringOrganism(BiologicalScoring): 31 | """ 32 | Defines Biological Scorer object from an organism with predefined scores. 33 | Inverted converts a penalty to a score. 34 | This will be analysed more in depth when biological score will evolve. 35 | """ 36 | def __init__(self, rules_dictionnary, inverted = False, name = "None"): 37 | BiologicalScoring.__init__(self, scoring_function = None) 38 | self.scoring_function = self.assign_from_dict(rules_dictionnary, inverted) 39 | self.name = name 40 | 41 | def __repr__(self): 42 | return(self.name) 43 | 44 | def assign_from_dict(self, rules_dictionnary, inverted): 45 | rules_dictionnary = rules_dictionnary 46 | def simple_assign_inside(rule): 47 | score = rules_dictionnary[rule]["biological_score"] 48 | # Inverted if to use penalties instead of scors. 49 | # if inverted: 50 | # try: 51 | # return(1/score) 52 | # except ZeroDivisionError: 53 | # return(33) 54 | # else: 55 | return(score) 56 | return(simple_assign_inside) 57 | 58 | 59 | RandomBiologicalScorer = BiologicalScoring(scoring_function = pseudo_random) 60 | BiologicalFullScoringRetroH = BiologicalScoringOrganism(rules_dictionnary= full_rules_retro_H, name = "full_rules_retro_H") 61 | BiologicalFullScoringFwdH = BiologicalScoringOrganism(rules_dictionnary= full_rules_forward_H, name = "full_rules_forward_H") 62 | BiologicalFullScoringRetroNoH = BiologicalScoringOrganism(rules_dictionnary= full_rules_retro_no_H, name = "full_rules_retro_no_H") 63 | BiologicalFullScoringFwdNoH = BiologicalScoringOrganism(rules_dictionnary= full_rules_forward_no_H, name = "full_rules_forward_no_H") 64 | 65 | full_H = full_rules_retro_H 66 | full_H.update(full_rules_forward_H) 67 | BiologicalFullScoringH = BiologicalScoringOrganism(rules_dictionnary= full_H, name = "full_rules_retro_H") 68 | -------------------------------------------------------------------------------- /calculate_organisms.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module loads calculates organisms 3 | - standardises compounds within the organism 4 | - saves them as pickles that can be laoded by RP3 5 | """ 6 | 7 | # General utilities 8 | import logging 9 | import os 10 | import csv 11 | import sys 12 | import argparse 13 | 14 | from config import DATA_PATH 15 | 16 | # RP3 specific objects 17 | from compound import Compound 18 | from chemical_compounds_state import ChemicalCompoundState 19 | from utilities.reactor.Utils import ChemConversionError 20 | 21 | 22 | def __run__(): 23 | def import_organism_from_csv(csv_file, add_Hs=True): 24 | with open(csv_file) as csv_handle: 25 | dict_reader = csv.DictReader(csv_handle, delimiter=",") 26 | compound_list = [] 27 | for row in dict_reader: 28 | name = row["name"] 29 | inchi = row["inchi"] 30 | if inchi is None or inchi == "None" or inchi == "": 31 | pass 32 | else: 33 | try: 34 | if name.startswith("InChI"): 35 | compound = Compound( 36 | InChI=inchi, 37 | heavy_standardisation=True, 38 | force_add_H=add_Hs, 39 | ) 40 | else: 41 | compound = Compound( 42 | InChI=inchi, 43 | name=name, 44 | heavy_standardisation=True, 45 | force_add_H=add_Hs, 46 | ) 47 | if not compound.in_list(compound_list, main_layer=False): 48 | compound_list.append(compound) 49 | except ChemConversionError: 50 | logging.error( 51 | "For compound {} with inchi {}: error ChemConversionError".format( 52 | name, inchi 53 | ) 54 | ) 55 | organism = ChemicalCompoundState(compound_list, main_layer=False) 56 | return organism 57 | 58 | # Calculate with H ======================================================== 59 | logging.info("Calculating organisms with H...") 60 | 61 | # Test organism 62 | compound_1 = Compound( 63 | "[H+]", name="1", heavy_standardisation=True, force_add_H=True 64 | ) 65 | compound_6 = Compound( 66 | "[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]", 67 | force_add_H=True, 68 | name="6", 69 | heavy_standardisation=True, 70 | ) 71 | compound_3459 = Compound( 72 | "[H][O][C](=[O])[C](=[O])[C]([H])([H])[C]([H])([O][H])[C]([H])([O][H])[C]([H])([H])[H]", 73 | name="3459", 74 | heavy_standardisation=True, 75 | force_add_H=True, 76 | ) 77 | test_organism = ChemicalCompoundState( 78 | state_name="Test", compound_list=[compound_1, compound_6, compound_3459] 79 | ) 80 | 81 | # Load real organisms 82 | detectable_cmpds = import_organism_from_csv( 83 | f"{SINK_DATA_PATH}/detectable_metabolites_uncommented.csv", add_Hs=True 84 | ) 85 | iML1515_chassis = import_organism_from_csv( 86 | f"{SINK_DATA_PATH}/ecoli_iML1515_sink_reduced_rp_ready.csv", add_Hs=True 87 | ) 88 | core_ecoli = import_organism_from_csv( 89 | f"{SINK_DATA_PATH}/ecoli_core_sink_reduced_rp_ready.csv", add_Hs=True 90 | ) 91 | iJO1366_chassis = import_organism_from_csv( 92 | f"{SINK_DATA_PATH}/ecoli_iJO1366_sink_reduced_rp_ready.csv", add_Hs=True 93 | ) 94 | bsubtilis = import_organism_from_csv( 95 | f"{SINK_DATA_PATH}/bsubtilis_iYO844_sink_reduced_rp_ready.csv", add_Hs=True 96 | ) 97 | 98 | # Save organisms 99 | test_organism.save(file_name="Test_organism_H", folder_address=ORGANISMS_DATA_PATH) 100 | detectable_cmpds.save( 101 | file_name="detectable_cmpds_H", folder_address=ORGANISMS_DATA_PATH 102 | ) 103 | iML1515_chassis.save( 104 | file_name="iML1515_chassis_H", folder_address=ORGANISMS_DATA_PATH 105 | ) 106 | core_ecoli.save(file_name="core_ecoli_H", folder_address=ORGANISMS_DATA_PATH) 107 | iJO1366_chassis.save( 108 | file_name="iJO1366_chassis_H", folder_address=ORGANISMS_DATA_PATH 109 | ) 110 | bsubtilis.save(file_name="bsubtilis_H", folder_address=ORGANISMS_DATA_PATH) 111 | 112 | # Calculate without H ===================================================== 113 | logging.info("Calculating organisms without H...") 114 | 115 | # Test organism 116 | compound_1 = Compound( 117 | "[H+]", name="1", heavy_standardisation=True, force_add_H=False 118 | ) 119 | compound_6 = Compound( 120 | "[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]", 121 | force_add_H=False, 122 | name="6", 123 | heavy_standardisation=True, 124 | ) 125 | compound_3459 = Compound( 126 | "[H][O][C](=[O])[C](=[O])[C]([H])([H])[C]([H])([O][H])[C]([H])([O][H])[C]([H])([H])[H]", 127 | name="3459", 128 | heavy_standardisation=True, 129 | force_add_H=False, 130 | ) 131 | test_organism = ChemicalCompoundState( 132 | state_name="Test", compound_list=[compound_1, compound_6, compound_3459] 133 | ) 134 | 135 | # Load real organisms 136 | detectable_cmpds = import_organism_from_csv( 137 | f"{SINK_DATA_PATH}/detectable_metabolites_uncommented.csv", add_Hs=True 138 | ) 139 | iML1515_chassis = import_organism_from_csv( 140 | f"{SINK_DATA_PATH}/ecoli_iML1515_sink_reduced_rp_ready.csv", add_Hs=False 141 | ) 142 | core_ecoli = import_organism_from_csv( 143 | f"{SINK_DATA_PATH}/ecoli_core_sink_reduced_rp_ready.csv", add_Hs=False 144 | ) 145 | iJO1366_chassis = import_organism_from_csv( 146 | f"{SINK_DATA_PATH}/ecoli_iJO1366_sink_reduced_rp_ready.csv", add_Hs=False 147 | ) 148 | bsubtilis = import_organism_from_csv( 149 | f"{SINK_DATA_PATH}/bsubtilis_iYO844_sink_reduced_rp_ready.csv", add_Hs=False 150 | ) 151 | 152 | # Save organisms 153 | test_organism.save( 154 | file_name="Test_organism_noH", folder_address=ORGANISMS_DATA_PATH 155 | ) 156 | detectable_cmpds.save( 157 | file_name="detectable_cmpds_noH", folder_address=ORGANISMS_DATA_PATH 158 | ) 159 | iML1515_chassis.save( 160 | file_name="iML1515_chassis_noH", folder_address=ORGANISMS_DATA_PATH 161 | ) 162 | core_ecoli.save(file_name="core_ecoli_noH", folder_address=ORGANISMS_DATA_PATH) 163 | iJO1366_chassis.save( 164 | file_name="iJO1366_chassis_noH", folder_address=ORGANISMS_DATA_PATH 165 | ) 166 | bsubtilis.save(file_name="bsubtilis_noH", folder_address=ORGANISMS_DATA_PATH) 167 | 168 | return 0 169 | 170 | 171 | if __name__ == "__main__": 172 | d = "Formatting organisms in a RP3 compatible format" 173 | parser = argparse.ArgumentParser(description=d) 174 | parser.add_argument( 175 | "--terminal", 176 | help="Default logger is logs_organisms_set_up, switch to terminal if specified", 177 | action="store_true", 178 | default=False, 179 | ) 180 | args = parser.parse_args() 181 | 182 | # Sink data path 183 | global SINK_DATA_PATH 184 | SINK_DATA_PATH = f"{DATA_PATH}/sinks" 185 | assert os.path.exists( 186 | SINK_DATA_PATH 187 | ), f"Sink data path {SINK_DATA_PATH} does not exist" 188 | 189 | # Organisms data path 190 | global ORGANISMS_DATA_PATH 191 | ORGANISMS_DATA_PATH = f"{DATA_PATH}/organisms" 192 | if not os.path.exists(ORGANISMS_DATA_PATH): 193 | os.mkdir(ORGANISMS_DATA_PATH) 194 | 195 | if args.terminal is True: 196 | logging.basicConfig( 197 | stream=sys.stderr, 198 | level=logging.INFO, 199 | datefmt="%d/%m/%Y %H:%M:%S", 200 | format="%(asctime)s -- %(levelname)s -- %(message)s", 201 | ) 202 | else: 203 | logging.basicConfig( 204 | stream=open( 205 | "{}/{}.log".format(ORGANISMS_DATA_PATH, "logs_organisms_set_up"), "w" 206 | ), 207 | level=logging.INFO, 208 | datefmt="%d/%m/%Y %H:%M:%S", 209 | format="%(asctime)s -- %(levelname)s -- %(message)s", 210 | ) 211 | print( 212 | f"By default, logs are saved in {ORGANISMS_DATA_PATH}/logs_organisms_set_up.log. Please use --terminal to redirect to sys.stderr" 213 | ) 214 | __run__() 215 | -------------------------------------------------------------------------------- /change_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | The aim of this script is to change configuration file from command line. 3 | It takes as input the base config file from data. 4 | """ 5 | 6 | import argparse 7 | import re 8 | import os 9 | 10 | 11 | def __cli(): 12 | """ 13 | Command line interface. 14 | """ 15 | 16 | d = "Arguments to change the config file before running a Tree" 17 | parser = argparse.ArgumentParser(description=d) 18 | # Logs and saving information 19 | parser.add_argument( 20 | "--DB_CACHE", type=lambda x: (str(x).lower() == "true"), default=False 21 | ) 22 | parser.add_argument( 23 | "--DB_REPLACE", type=lambda x: (str(x).lower() == "true"), default=False 24 | ) 25 | parser.add_argument("--DB_time", default=1, type=float) 26 | parser.add_argument( 27 | "--biosensor", type=lambda x: (str(x).lower() == "true"), default=False 28 | ) 29 | parser.add_argument( 30 | "--use_cache", type=lambda x: (str(x).lower() == "true"), default=False 31 | ) 32 | parser.add_argument( 33 | "--add_Hs", type=lambda x: (str(x).lower() == "true"), default=False 34 | ) 35 | parser.add_argument( 36 | "--use_transpositions", type=lambda x: (str(x).lower() == "true"), default=False 37 | ) 38 | parser.add_argument( 39 | "--use_transpositions_depth", 40 | type=lambda x: (str(x).lower() == "true"), 41 | default=False, 42 | ) 43 | parser.add_argument( 44 | "--folder_to_save", default=os.path.dirname(os.path.abspath(__file__)) 45 | ) 46 | args = parser.parse_args() 47 | 48 | def change_dB_setting( 49 | DB_CACHE, 50 | DB_REPLACE, 51 | DB_time, 52 | biosensor, 53 | use_cache, 54 | add_Hs, 55 | use_transpositions, 56 | use_transpositions_depth, 57 | folder_to_save, 58 | ): 59 | with open( 60 | "{}/data/base_config.py".format(os.path.dirname(os.path.abspath(__file__))), 61 | "r", 62 | ) as file_original: 63 | whole_text = file_original.read() 64 | with open("{}/config.py".format(folder_to_save), "w") as replacement_text: 65 | # Changing DB_cache 66 | if DB_CACHE: 67 | if "DB_CACHE = True" not in whole_text: 68 | whole_text = whole_text.replace( 69 | "DB_CACHE = False", "DB_CACHE = True" 70 | ) 71 | else: 72 | if "DB_CACHE = False" not in whole_text: 73 | whole_text = whole_text.replace( 74 | "DB_CACHE = True", "DB_CACHE = False" 75 | ) 76 | # Changing DB replace 77 | if DB_REPLACE: 78 | if "DB_REPLACE = True" not in whole_text: 79 | whole_text = whole_text.replace( 80 | "DB_REPLACE = False", "DB_REPLACE = True" 81 | ) 82 | else: 83 | if "DB_REPLACE = False" not in whole_text: 84 | whole_text = whole_text.replace( 85 | "DB_REPLACE = True", "DB_REPLACE = False" 86 | ) 87 | # Changing DB_time: 88 | whole_text = re.sub( 89 | "DB_time = \d+.\d+", "DB_time = {}".format(DB_time), whole_text 90 | ) 91 | 92 | # Changing running mode from biosensor to retrosynthesis 93 | if biosensor: 94 | if "biosensor = True" not in whole_text: 95 | whole_text = whole_text.replace( 96 | "biosensor = False", "biosensor = True" 97 | ) 98 | whole_text = whole_text.replace( 99 | "retrosynthesis = True", "retrosynthesis = False" 100 | ) 101 | else: 102 | if "biosensor = False" not in whole_text: 103 | whole_text = whole_text.replace( 104 | "biosensor = True", "biosensor = False" 105 | ) 106 | whole_text = whole_text.replace( 107 | "retrosynthesis = False", "retrosynthesis = True" 108 | ) 109 | # Changing use_cache 110 | if use_cache: 111 | if "use_cache = True" not in whole_text: 112 | whole_text = whole_text.replace( 113 | "use_cache = False", "use_cache = True" 114 | ) 115 | else: 116 | if "use_cache = False" not in whole_text: 117 | whole_text = whole_text.replace( 118 | "use_cache = True", "use_cache = False" 119 | ) 120 | 121 | # Hydrogen handling: 122 | if add_Hs: 123 | if "add_Hs = True" not in whole_text: 124 | whole_text = whole_text.replace("add_Hs = False", "add_Hs = True") 125 | else: 126 | if "add_Hs = False" not in whole_text: 127 | whole_text = whole_text.replace("add_Hs = True", "add_Hs = False") 128 | 129 | # Changing use_transpositions 130 | if use_transpositions: 131 | if "use_transpositions = True" not in whole_text: 132 | whole_text = whole_text.replace( 133 | "use_transpositions = False", "use_transpositions = True" 134 | ) 135 | else: 136 | if "use_transpositions = False" not in whole_text: 137 | whole_text = whole_text.replace( 138 | "use_transpositions = True", "use_transpositions = False" 139 | ) 140 | # Changing use_transpositions_depth 141 | if use_transpositions_depth: 142 | if "use_transpositions_depth = True" not in whole_text: 143 | whole_text = whole_text.replace( 144 | "use_transpositions_depth = False", 145 | "use_transpositions_depth = True", 146 | ) 147 | else: 148 | if "use_transpositions_depth = False" not in whole_text: 149 | whole_text = whole_text.replace( 150 | "use_transpositions_depth = True", 151 | "use_transpositions_depth = False", 152 | ) 153 | replacement_text.write(whole_text) 154 | 155 | change_dB_setting( 156 | DB_CACHE=args.DB_CACHE, 157 | DB_REPLACE=args.DB_REPLACE, 158 | DB_time=args.DB_time, 159 | biosensor=args.biosensor, 160 | use_cache=args.use_cache, 161 | add_Hs=args.add_Hs, 162 | use_transpositions=args.use_transpositions, 163 | use_transpositions_depth=args.use_transpositions_depth, 164 | folder_to_save=args.folder_to_save, 165 | ) 166 | 167 | 168 | if __name__ == "__main__": 169 | __cli() 170 | -------------------------------------------------------------------------------- /chemical_scoring.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the chemical scoring functions 3 | """ 4 | 5 | # General utility packages 6 | import random 7 | import itertools # For all permutations when IDing the best products 8 | import numpy as np # Allows for simpler calculations on lists 9 | import logging 10 | 11 | # Chemistry packages 12 | from rdkit import DataStructs # For similarity computation 13 | 14 | def list_product(combination): 15 | """ 16 | Calculates the product of all elements from the list. 17 | Remark: deprecated, use geometric mean instead. 18 | """ 19 | score = 1 20 | for tanimoto in combination: 21 | score = score * tanimoto 22 | return(score) 23 | 24 | def combine_products(product_list, product_list_bis, max_combination = 1000): 25 | """ 26 | Calculates all possible combinations of products (native and query products). 27 | Limited to 1000 combinations, knowing that combinations behave as n! with n the number of products. 28 | """ 29 | combinations = [(x,product_list_bis) for x in itertools.permutations(product_list,len(product_list_bis))] 30 | if len(combinations) > max_combination: 31 | combinations = combinations[0:max_combination] 32 | return(combinations) 33 | 34 | def list_geometric_mean(combination): 35 | """ 36 | Calculates the geometric mean of the array. 37 | """ 38 | a = np.array(combination) 39 | return a.prod()**(1.0/len(a)) 40 | 41 | def tanimoto_product_calc(native_products_ecfp, query_products_ecfp, verbose = False): 42 | all_scores = [] 43 | if len(native_products_ecfp) != len(query_products_ecfp): 44 | # Reject rules that do not produce the same number of compounds. 45 | logging.debug("Rule does not generate the same number of products: native is {} and new is {}".format(len(native_products_ecfp), len(query_products_ecfp))) 46 | return(-1) 47 | combinations = combine_products(product_list = native_products_ecfp, product_list_bis = query_products_ecfp) 48 | score_list = [] 49 | for combination in combinations: 50 | tanimoto_combination = [] 51 | native, query = combination[0], combination[1] 52 | for i in range(len(native)): 53 | tanimoto = DataStructs.cDataStructs.TanimotoSimilarity(native[i], query[i]) 54 | tanimoto_combination.append(tanimoto) 55 | score_list.append(list_geometric_mean(tanimoto_combination)) 56 | if verbose: 57 | logging.debug("Score list length is {} and scores {}".format(len(score_list), score_list)) 58 | return(max(score_list)) 59 | 60 | class ChemicalScoring(object): 61 | logger = logging.getLogger(__name__) 62 | """ 63 | Defines Chemical Scorer objects. 64 | """ 65 | def __init__(self, scoring_function, name = "ChemicalScoring"): 66 | self.scoring_function = scoring_function 67 | self.scoring_warning = True 68 | self.name = name 69 | 70 | def calculate(self, compound, products = None, rule = None, original_substrates_list = None, original_products_list_list = None): 71 | if original_substrates_list == [None] and (original_products_list_list is None or original_products_list_list == [None]): 72 | if self.scoring_warning: 73 | self.scoring_warning = False 74 | self.logger.warning("Not using chemical scoring for {}. Default is set to 1".format(self.name)) 75 | return(1) 76 | score, warning = self.scoring_function(compound, products, rule, original_substrates_list, original_products_list_list) 77 | if not warning is None: 78 | self.logger.debug(warning) 79 | return(score) 80 | 81 | def pseudo_random(compound, products, rule, original_substrates_list = None, original_products_list_list = None): 82 | """ 83 | Was used during development. 84 | """ 85 | warning = None 86 | if compound.InChIKey == "NBBJYMSMWIIQGU-UHFFFAOYSA-N": 87 | if rule == "MNXR94682_MNXM821": 88 | score = 0.99 89 | elif rule == "MNXR117465_MNXM821": 90 | score = 0.88 91 | else: 92 | score = random.uniform(0,0.75) 93 | elif compound.InChIKey == "DNIAPMSPPWPWGF-UHFFFAOYSA-N": 94 | if rule == "MNXR95713_MNXM90191": 95 | score = 0.80 96 | elif rule == "MNXR103108_MNXM90191": 97 | score = 0.76 98 | else: 99 | score = random.uniform(0,0.75) 100 | else: 101 | score = random.uniform(0,0.75) 102 | return(score, warning) 103 | 104 | def substrate_calculation(compound, products = None, rule = None, original_substrates_list = None, original_products_list_list = None): 105 | """ 106 | If the original_substrates_list is none, it means chemical scoring is not implemented and scoring should eb neutral: 1 in mutiplication. 107 | """ 108 | if original_substrates_list is None: 109 | warning = "Score is set to 1 for cmp {} and rule {}".format(compound, rule) 110 | tanimoto = 1 111 | else: 112 | tanimoto = 0 113 | for native_substrate in original_substrates_list: 114 | query_substrate = compound._get_ECFP() 115 | tanimoto_this = DataStructs.cDataStructs.TanimotoSimilarity(query_substrate, native_substrate) 116 | warning = None 117 | tanimoto = max(tanimoto, tanimoto_this) 118 | return(tanimoto, warning) 119 | 120 | def substrate_and_product_calculation(compound, products, rule, original_substrates_list = None, original_products_list_list = None): 121 | """ 122 | If the original_substrates_list is none, it means chemical scoring is not implemented and scoring should eb neutral: 1 in mutiplication. 123 | """ 124 | warning = None 125 | if original_substrates_list is None: 126 | warning = "Score is set to 1 for cmp {} and rule {}".format(compound, rule) 127 | tanimoto = 1 128 | return(tanimoto, warning) 129 | else: 130 | tanimoto = 0 131 | for i in range(len(original_substrates_list)): 132 | native_substrate = original_substrates_list[i] 133 | query_substrate = compound._get_ECFP() 134 | tanimoto_substrate = DataStructs.cDataStructs.TanimotoSimilarity(query_substrate, native_substrate) 135 | warning = None 136 | query_products_ecfp = [] 137 | for prod in products: 138 | query_products_ecfp.append(prod._get_ECFP()) 139 | prod_result = tanimoto_product_calc(original_products_list_list[i], query_products_ecfp, verbose = False) 140 | if prod_result == -1: 141 | warning = "Number of product issue with rule {} and products {}".format(rule, products) 142 | tanimoto = max(tanimoto, tanimoto_substrate * prod_result) 143 | return(tanimoto, warning) 144 | 145 | def constant_scorer(compound, products, rule, original_substrates_list = None, original_products_list_list = None): 146 | warning = None 147 | return(1, warning) 148 | 149 | RandomChemicalScorer = ChemicalScoring(scoring_function = pseudo_random, name = "RandomChemicalScorer") 150 | SubstrateChemicalScorer = ChemicalScoring(scoring_function = substrate_calculation, name = "SubstrateChemicalScorer") 151 | SubandprodChemicalScorer = ChemicalScoring(scoring_function = substrate_and_product_calculation, name = "SubandprodChemicalScorer") 152 | ConstantChemicalScorer = ChemicalScoring(scoring_function = constant_scorer, name = "ConstantChemicalScorer") 153 | # Chemical scoring utilities. Taken from similarity. 154 | -------------------------------------------------------------------------------- /chemistry_choices.md: -------------------------------------------------------------------------------- 1 | The aim of this file is to document choices that handled bug correction and why, concerning precise chemoinformatics choices. 2 | 3 | # Chemical rule application. 4 | 5 | When a rule applies to a substrate and after standardisation produces this substrate again (S -> S + I), the rule is deleted as this is not biological. 6 | This is corrected at the compound stage. 7 | 8 | # Compound equality: either main layer or full inchikey 9 | Choices: usually less stringent for the chassis. 10 | 11 | # Moves generating duplicate compounds: 12 | - Only unique compounds are conserved. 13 | - Logs will say it is merged (and conserve the number of compounds in stoechiometry dictionnary) 14 | 15 | 16 | # Moves generating the same compounds: 17 | 18 | Keep the one with the higher score. In practice, also keeping the synonyms (transformation IDs) of the other moves generating the same compounds. 19 | 20 | # History of the state. 21 | 22 | Keeping a history of visited compounds (excluding the organism's compounds). 23 | - Refuse moves that generate compounds present in the history to avoid loops. 24 | 25 | # Refusing rules that produce a different number of compounds as the original. 26 | 27 | This can happen when the rule that learned on 2 molecules, which subgroups appear in a much bigger molecule when doing the retrosynthetic search. 28 | It is unrealistic to expect an enzyme to work this way. 29 | -------------------------------------------------------------------------------- /compound_scoring.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the compound scoring function. 3 | Currently implements toxicity in E. coli, based on data from EcoliTox. 4 | """ 5 | 6 | # General use packages 7 | import random 8 | import numpy as np 9 | import sys 10 | import csv 11 | import math 12 | import logging 13 | from rdkit.Chem import DataStructs 14 | from rdkit import Chem 15 | 16 | from config import * 17 | 18 | 19 | class CompoundScoring(object): 20 | """ 21 | Defines Compound Scorer object. 22 | """ 23 | logger = logging.getLogger(__name__) 24 | def __init__(self, scoring_function = None): 25 | if scoring_function is None: 26 | pass 27 | else: 28 | self.scoring_function = scoring_function 29 | 30 | def __repr__(self): 31 | """ 32 | Name the used scorer. 33 | Raises an error is the class is not properly instanciated 34 | """ 35 | return(self.name) 36 | 37 | def calculate(self, compound): 38 | score = self.scoring_function(compound) 39 | return(score) 40 | 41 | def pseudo_random(compound): 42 | score = random.uniform(0, 10) 43 | return(score) 44 | 45 | 46 | class ToxicityScoring(CompoundScoring): 47 | """ 48 | Returns the log toxicity value of a compound. 49 | The data is stored in a csv file, tab delimited, with columns "name", 'InChI' and "toxicity" 50 | This can easily be changed to another data with a similar formatting. 51 | """ 52 | def __init__(self, toxicity_data = "{}/name_structure_toxicity.csv".format(DATA_PATH)): 53 | CompoundScoring.__init__(self) 54 | self.scoring_function = self.scoring_function() 55 | self.name = "ToxicityScoring" 56 | self.fit_model(toxicity_data) 57 | 58 | def calculate_ECFP(self,inchi): 59 | rdmol = Chem.inchi.MolFromInchi(inchi, sanitize=False) 60 | # rd_mol = standardize_chemical(rdmol, add_hs=False, heavy = True, rm_stereo=True) 61 | ECFP= Chem.AllChem.GetMorganFingerprintAsBitVect(rdmol, radius = 2, nBits=1024, useFeatures = False, useChirality = False) 62 | return(ECFP) 63 | 64 | def select_current_best_model(self, X, y, 65 | models_number = 10, 66 | verbose = False): 67 | 68 | trained_model_list = [] 69 | # Training all models 70 | for i in range(models_number): 71 | X_train, y_train = X, y 72 | other_MLP = MLPRegressor(hidden_layer_sizes = (10, 100,100, 20), solver ="adam", max_iter=20000, 73 | early_stopping = True, learning_rate = "adaptive") 74 | other_MLP.fit(X_train, y_train.flatten()) 75 | trained_model_list.append(other_MLP) 76 | 77 | big_MLP = MLPRegressor(hidden_layer_sizes = (100,100, 20),solver ="adam", max_iter=20000, 78 | early_stopping = True, learning_rate = "adaptive") 79 | big_MLP.fit(X_train, y_train.flatten()) 80 | trained_model_list.append(big_MLP) 81 | 82 | 83 | medium_MLP = MLPRegressor(hidden_layer_sizes = (40, 10), solver ="adam", max_iter=20000, 84 | early_stopping = True, learning_rate = "adaptive") 85 | medium_MLP.fit(X_train, y_train.flatten()) 86 | trained_model_list.append(medium_MLP) 87 | 88 | small_MLP = MLPRegressor(hidden_layer_sizes = (10), solver ="adam", max_iter=20000, 89 | early_stopping = True, learning_rate = "adaptive") 90 | small_MLP.fit(X_train, y_train.flatten()) 91 | trained_model_list.append(small_MLP) 92 | 93 | # Evaluating all 94 | all_scores = [] 95 | for i in range(len(trained_model_list)): 96 | selected_mdoel = trained_model_list[i] 97 | y_pred = selected_mdoel.predict(X) 98 | score = sklearn.metrics.r2_score(y, y_pred) 99 | all_scores.append(score) 100 | 101 | try: 102 | best_index = all_scores.index(max(all_scores)) 103 | best_score = all_scores[best_index] 104 | except ValueError: 105 | best_index = 0 106 | best_model = trained_model_list[best_index] 107 | return(best_model, best_score) 108 | 109 | def fit_model(self,toxicity_data): 110 | y = [] 111 | X = None 112 | # Loading data 113 | with open(toxicity_data, "r") as file_hdl: 114 | reader = csv.DictReader(file_hdl, delimiter = '\t') 115 | for row in reader: 116 | y.append(math.log(float(row["toxicity"]))) 117 | arr = np.zeros((1,)) 118 | fp = self.calculate_ECFP(row["InChI"]) 119 | DataStructs.ConvertToNumpyArray(fp, arr) 120 | arr = np.reshape(arr, (1, 1024)) 121 | if X is None: 122 | X = arr 123 | else: 124 | X = np.concatenate((X, arr), axis = 0) 125 | self.log_loading = "Loaded {} compounds from {}".format(len(y), toxicity_data) 126 | y = np.array(y) 127 | # Fitting mdoel: 128 | best_model, score = self.select_current_best_model(X, y, models_number = 10) 129 | y_pred = best_model.predict(X) 130 | score = sklearn.metrics.r2_score(y, y_pred) 131 | self.log_score = "The toxicity model has a R2 score of {} on itself".format(round(score, 2)) 132 | self.model = best_model 133 | 134 | def scoring_function(self): 135 | # CODE IT 136 | def compound_scoring(compound): 137 | ECFP = compound._get_ECFP() 138 | arr = np.zeros((1,)) 139 | DataStructs.ConvertToNumpyArray(ECFP, arr) 140 | arr = np.reshape(arr, (1, 1024)) 141 | y_pred = self.model.predict(arr) 142 | return(y_pred) 143 | return(compound_scoring) 144 | 145 | 146 | RandomCompoundScorer = CompoundScoring(scoring_function = pseudo_random) 147 | if use_toxicity: 148 | toxicity_scorer = ToxicityScoring() 149 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """ 2 | the aim of this file is to store configuration parameters, notably for the DB. 3 | It replaces what I previously wanted to define as 'global' 4 | """ 5 | try: 6 | from rp3_dcache.Manager import Manager # In house module 7 | from rp3_dcache.Utils import make_document_id, as_document, rdmols_from_document 8 | dcache_installed = True 9 | except ModuleNotFoundError: 10 | dcache_installed = False 11 | import logging 12 | import os 13 | 14 | # Files and addresses configurations - should not be modified: 15 | global DATA_PATH 16 | DATA_PATH = "{}/data".format(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | global add_Hs 19 | add_Hs = True 20 | hydrogen_config = "Using explicit hydrogens : {}".format(add_Hs) 21 | 22 | # Database for storing results configuration 23 | global DB_CACHE 24 | global DB_REPLACE 25 | DB_CACHE = False and dcache_installed 26 | DB_REPLACE = False and dcache_installed 27 | DB_time = 0 28 | if DB_CACHE: 29 | global CACHE_MGR 30 | if add_Hs: 31 | CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_with_H") 32 | else: 33 | CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_without_H") 34 | CACHE_MGR.connect() 35 | DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE) 36 | elif dcache_installed: 37 | DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE) 38 | else: 39 | DB_config = "Setting the DB from config file: Installed package: {}".format(dcache_installed) 40 | 41 | # Mode for using RP3: retrosynthesis or biosensor. QSAR might be implemented one day. 42 | global retrosynthesis 43 | global biosensor 44 | retrosynthesis = True 45 | biosensor = False 46 | tree_mode_config = "Using retrosynthesis: {} - using biosensor {}".format(retrosynthesis, biosensor) 47 | 48 | # Configuring local cache. Could be replaced by a proper caching system one day. 49 | global home_made_cache 50 | home_made_cache = {} 51 | 52 | global use_cache 53 | use_cache = False 54 | 55 | cache_config = "Initialising an empty cache: {}; Using it: {}".format(home_made_cache, use_cache) 56 | 57 | # MCTS parameters for configuration 58 | 59 | global transposition_table 60 | global use_transpositions 61 | global use_transpositions_depth 62 | 63 | transposition_table = {} 64 | use_transpositions = False 65 | use_transpositions_depth = False 66 | 67 | transposition_table_config = "Using transposition tables: {}. With depth: {}".format(use_transpositions, use_transpositions_depth) 68 | 69 | # For toxicity, using log(IC50) as penaly when below 0. 70 | global use_toxicity 71 | try: 72 | import sklearn 73 | from sklearn.neural_network import MLPRegressor 74 | sklearn_here = True 75 | except ModuleNotFoundError: 76 | toxicity_config = "Toxicity will not be enabled because sklearn is not installed" 77 | sklearn_here = False 78 | use_toxicity = False 79 | use_toxicity = use_toxicity and sklearn_here 80 | -------------------------------------------------------------------------------- /convert_to_SBML.py: -------------------------------------------------------------------------------- 1 | """ 2 | Converts pathways under json format to SBML format 3 | """ 4 | 5 | # General utilities 6 | import sys 7 | import logging 8 | import csv 9 | import copy 10 | import json 11 | import pickle 12 | import libsbml 13 | from hashlib import md5 14 | import os 15 | import argparse 16 | 17 | # RP3 specific objects 18 | from compound import Compound 19 | from move import Move 20 | 21 | def _nameToSbmlId(name): 22 | IdStream = [] 23 | count = 0 24 | end = len(name) 25 | if '0' <= name[count] and name[count] <= '9': 26 | IdStream.append('_') 27 | for count in range(0, end): 28 | if (('0' <= name[count] and name[count] <= '9') or 29 | ('a' <= name[count] and name[count] <= 'z') or 30 | ('A' <= name[count] and name[count] <= 'Z')): 31 | IdStream.append(name[count]) 32 | else: 33 | IdStream.append('_') 34 | Id = ''.join(IdStream) 35 | if Id[len(Id) - 1] != '_': 36 | return Id 37 | return Id[:-1] 38 | 39 | def add_specy(sbml_model, 40 | chemId = 'Id_cmpound', 41 | smiles = "smilescomppoun", 42 | inchi = "inchicompounds", 43 | inchiKey = "inchiKeycomppoun", 44 | name = "compounds_name", 45 | in_sink = False): 46 | 47 | spe = sbml_model.createSpecies() 48 | spe.setCompartment("cytoplasm") 49 | spe.setHasOnlySubstanceUnits(False) 50 | spe.setBoundaryCondition(False) 51 | spe.setConstant(False) 52 | spe.setInitialConcentration(1.0) 53 | clean_id = str(chemId)+'__64__'+str("cytoplasm") 54 | clean_id = clean_id.replace('-', '_') # No - in name 55 | metaid = _nameToSbmlId(md5(str(name).encode('utf-8')).hexdigest()) 56 | spe.setMetaId(metaid) 57 | spe.setName(name) 58 | if in_sink: 59 | annotation = ''' 60 | ''' 62 | annotation += ''' 63 | 64 | 65 | '''+str(smiles or '')+''' 66 | '''+str(inchi or '')+''' 67 | '''+str(inchiKey or '')+''' 68 | '''+ str(True)+''' 69 | 70 | ''' 71 | annotation += ''' 72 | 73 | ''' 74 | else: 75 | annotation = ''' 76 | ''' 78 | annotation += ''' 79 | 80 | 81 | '''+str(smiles or '')+''' 82 | '''+str(inchi or '')+''' 83 | '''+str(inchiKey or '')+''' 84 | 85 | ''' 86 | annotation += ''' 87 | 88 | ''' 89 | spe.setAnnotation(annotation) 90 | return(sbml_model) 91 | 92 | def add_reaction(sbml_model, 93 | reacId = 'Id_reac', 94 | ec = "Test_ec", 95 | rule_id = "rule_id", 96 | biological_score = "biological_score", 97 | chemical_score = "chemical_score", 98 | reactant_stoechio = {}, 99 | product = "product_name", 100 | reaction_smiles = "reaction_smiles", 101 | diameter = "diameter"): 102 | reac = sbml_model.createReaction() 103 | 104 | reac_fbc = reac.getPlugin('fbc') 105 | reac_fbc.setUpperFluxBound('B_999999') 106 | reac_fbc.setLowerFluxBound('B_0') 107 | #reactions 108 | reac.setId(reacId) 109 | reac.setSBOTerm(185) 110 | reac.setReversible(True) 111 | reac.setFast(False) 112 | metaid = _nameToSbmlId(md5(str(reacId).encode('utf-8')).hexdigest()) 113 | reac.setMetaId(metaid) 114 | #reactants_dict 115 | for reactant in reactant_stoechio.keys(): 116 | chemId = reactant 117 | spe = reac.createReactant() 118 | clean_id = str(chemId)+'__64__'+str("cytoplasm") 119 | clean_id = clean_id.replace('-', '_') # No - in name 120 | spe.setSpecies(clean_id) 121 | spe.setConstant(True) 122 | try: 123 | stoechio = reactant_stoechio[reactant] 124 | except KeyError: 125 | stoechio = 1 126 | spe.setStoichiometry(stoechio) 127 | #products_dict 128 | if not product is None: 129 | pro = reac.createProduct() 130 | clean_id = str(product)+'__64__'+str("cytoplasm") 131 | clean_id = clean_id.replace('-', '_') # No - in name 132 | pro.setSpecies(clean_id) 133 | pro.setConstant(True) 134 | pro.setStoichiometry(1) 135 | #annotation 136 | annotation = ''' 137 | ''' 139 | 140 | annotation += ''' 141 | 142 | 143 | '''+str(reaction_smiles or '')+''' 144 | '''+str(rule_id or '')+''' 145 | '''+str(ec)+''' 146 | 147 | 148 | 149 | 150 | 151 | 152 | ''' 153 | reac.setAnnotation(annotation) 154 | return(sbml_model) 155 | 156 | 157 | def convert_json_to_SBML(json_file, modelID = "test", folder_to_save = 'temp'): 158 | # Set up the empty model 159 | smbl_namespace = libsbml.SBMLNamespaces(3,1) 160 | smbl_namespace.addPkgNamespace('fbc',2) 161 | smbl_namespace.addPkgNamespace('groups',2) 162 | document = libsbml.SBMLDocument(smbl_namespace) 163 | sbml_model = document.createModel() 164 | sbml_model.getPlugin('fbc') 165 | sbml_model.getPlugin('groups') 166 | sbml_model.setId(modelID) 167 | sbml_model.setName(modelID) 168 | sbml_model.setTimeUnits('second') 169 | sbml_model.setExtentUnits('mole') 170 | sbml_model.setSubstanceUnits('mole') 171 | # Could implement units, currently removed from the model 172 | # Should have it in a seperate function 173 | compartment = sbml_model.createCompartment() 174 | compartment.setId("cytoplasm") 175 | target_node = None 176 | for node in json_file["elements"]["nodes"]: 177 | if node["data"]["type"] == "compound": 178 | sbml_model = add_specy(sbml_model, 179 | chemId = node["data"]["id"], 180 | smiles = node["data"]["SMILES"], 181 | inchi = node["data"]["InChI"], 182 | inchiKey = node["data"]["id"], 183 | name = ",".join(node["data"]["Names"]), 184 | in_sink = node["data"]["inSink"] == 1) 185 | if node["data"]["isSource"] == 1: 186 | logging.info("Target node is {}".format(node["data"]["id"])) 187 | target_node = node 188 | for element in sbml_model.getListOfSpecies(): 189 | logging.debug(element) 190 | for node in json_file["elements"]["nodes"]: 191 | if node["data"]["type"] == "reaction": 192 | try: 193 | reactant_stoechio = node["data"]["Stoechiometry"] 194 | except KeyError: 195 | reactant_stoechio = {} 196 | sbml_model = add_reaction(sbml_model, 197 | reacId = node["data"]["id"], 198 | ec = ','.join(node["data"]["EC number"]), 199 | rule_id = ','.join(node["data"]["Rule ID"]), 200 | biological_score = node["data"]["Score"], 201 | chemical_score = node["data"]["ChemicalScore"], 202 | reactant_stoechio = reactant_stoechio, 203 | product = node["data"]["id"].split("-RR")[0], 204 | reaction_smiles = node["data"]["Reaction SMILES"], 205 | diameter = node["data"]["Diameter"]) 206 | sbml_model = add_reaction(sbml_model, 207 | reacId = "production", 208 | ec = 'NA', 209 | rule_id = 'NA', 210 | biological_score = 'NA', 211 | chemical_score = 'NA', 212 | reactant_stoechio = {target_node["data"]["id"]: 1}, 213 | product = None, 214 | reaction_smiles = 'NA', 215 | diameter = 'NA') 216 | 217 | document.setModel(sbml_model) 218 | libsbml.writeSBMLToFile(document,'{}/{}.xml'.format(folder_to_save, modelID)) 219 | pass 220 | 221 | 222 | def __cli(): 223 | def define_folder_to_save(folder): 224 | if folder is None: 225 | folder_to_save = os.path.join('debugging_results', args.c_name) 226 | else: 227 | folder_to_save = folder 228 | if not os.path.exists(folder_to_save): 229 | os.makedirs(folder_to_save, exist_ok=True) 230 | return folder_to_save 231 | d = "Command line interface to convert json files to SBML files" 232 | parser = argparse.ArgumentParser(description=d) 233 | # Logs and saving information 234 | """Command line interface to convert json files to SBML files""" 235 | parser.add_argument("--verbose", help="Default logger is INFO, switch to DEBUG is specified", 236 | dest='verbose', action='store_true', default=False) 237 | parser.add_argument("--log_file", help="Default logger is stderr, switch to log_file if specified", 238 | default=None) 239 | parser.add_argument("--folder_to_save", 240 | help="Folder to store results. Default: temp", 241 | default="temp") 242 | parser.add_argument("--json_convert", 243 | help="File to convert", 244 | default="deoxi_07_no_H/deoxiviolacein_iteration_85.json") 245 | parser.add_argument("--file_name", help = 'File name if name changes.', default = None) 246 | args = parser.parse_args() 247 | # Setting up the logs 248 | if args.verbose: 249 | logging_level = logging.DEBUG 250 | else: 251 | logging_level = logging.INFO 252 | if args.log_file is None: 253 | logging.basicConfig(stream=sys.stderr, 254 | level=logging_level, 255 | datefmt='%d/%m/%Y %H:%M:%S', 256 | format='%(asctime)s -- %(levelname)s -- %(message)s') 257 | else: 258 | if not "log" in args.log_file: 259 | log_file = "log_" + args.log_file 260 | else: 261 | log_file = args.log_file 262 | log_writer = open("{}/{}".format(folder_to_save, log_file), "w") 263 | logging.basicConfig(stream=log_writer, 264 | level=logging_level, 265 | datefmt='%d/%m/%Y %H:%M:%S', 266 | format='%(asctime)s -- %(levelname)s -- %(message)s') 267 | 268 | folder_to_save = define_folder_to_save(args.folder_to_save) 269 | # Choosing file 270 | if args.file_name is None: 271 | model_ID = args.json_convert.split("/")[-1].split(".json")[0] 272 | else: 273 | model_ID = args.file_name 274 | pathway_to_test = json.load(open(args.json_convert, "r")) 275 | convert_json_to_SBML(pathway_to_test, model_ID, folder_to_save = folder_to_save) 276 | 277 | if __name__ == "__main__": 278 | __cli() 279 | -------------------------------------------------------------------------------- /data/base_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | the aim of this file is to store configuration parameters, notably for the DB. 3 | It replaces what I previously wanted to define as 'global' 4 | """ 5 | try: 6 | from rp3_dcache.Manager import Manager # In house module 7 | from rp3_dcache.Utils import make_document_id, as_document, rdmols_from_document 8 | dcache_installed = True 9 | except ModuleNotFoundError: 10 | dcache_installed = False 11 | import logging 12 | import os 13 | 14 | # Files and addresses configurations - should not be modified: 15 | global DATA_PATH 16 | DATA_PATH = "{}/data".format(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | global add_Hs 19 | add_Hs = True 20 | hydrogen_config = "Using explicit hydrogens : {}".format(add_Hs) 21 | 22 | # Database for storing results configuration 23 | global DB_CACHE 24 | global DB_REPLACE 25 | DB_CACHE = False and dcache_installed 26 | DB_REPLACE = False and dcache_installed 27 | DB_time = 0 28 | if DB_CACHE: 29 | global CACHE_MGR 30 | if add_Hs: 31 | CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_with_H") 32 | else: 33 | CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_without_H") 34 | CACHE_MGR.connect() 35 | DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE) 36 | elif dcache_installed: 37 | DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE) 38 | else: 39 | DB_config = "Setting the DB from config file: Installed package: {}".format(dcache_installed) 40 | 41 | # Mode for using RP3: retrosynthesis or biosensor. QSAR might be implemented one day. 42 | global retrosynthesis 43 | global biosensor 44 | retrosynthesis = True 45 | biosensor = False 46 | tree_mode_config = "Using retrosynthesis: {} - using biosensor {}".format(retrosynthesis, biosensor) 47 | 48 | # Configuring local cache. Could be replaced by a proper caching system one day. 49 | global home_made_cache 50 | home_made_cache = {} 51 | 52 | global use_cache 53 | use_cache = True 54 | 55 | cache_config = "Initialising an empty cache: {}; Using it: {}".format(home_made_cache, use_cache) 56 | 57 | # MCTS parameters for configuration 58 | 59 | global transposition_table 60 | global use_transpositions 61 | global use_transpositions_depth 62 | 63 | transposition_table = {} 64 | use_transpositions = False 65 | use_transpositions_depth = False 66 | 67 | transposition_table_config = "Using transposition tables: {}. With depth: {}".format(use_transpositions, use_transpositions_depth) 68 | 69 | # For toxicity, using log(IC50) as penaly when below 0. 70 | global use_toxicity 71 | try: 72 | import sklearn 73 | from sklearn.neural_network import MLPRegressor 74 | sklearn_here = True 75 | except ModuleNotFoundError: 76 | toxicity_config = "Toxicity will not be enabled because sklearn is not installed" 77 | sklearn_here = False 78 | use_toxicity = False 79 | use_toxicity = use_toxicity and sklearn_here 80 | -------------------------------------------------------------------------------- /data/compounds_to_add/TPA_to_add.csv: -------------------------------------------------------------------------------- 1 | name,inchi 2 | MNXM162174,"InChI=1S/C8H10/c1-7-3-5-8(2)6-4-7/h3-6H,1-2H3" 3 | -------------------------------------------------------------------------------- /data/golden_dataset.csv: -------------------------------------------------------------------------------- 1 | name inchi file_to_add 2 | 1,4-Butanediol InChI=1S/C4H10O2/c5-3-1-2-4-6/h5-6H,1-4H2 3 | 2,3-amino-1,3-propanediol InChI=1S/C3H9NO2/c4-3(1-5)2-6/h3,5-6H,1-2,4H2 4 | 2,5-DHBA InChI=1S/C7H6O4/c8-4-1-2-6(9)5(3-4)7(10)11/h1-3,8-9H,(H,10,11) 5 | 3-methylbutanol InChI=1S/C5H12O/c1-5(2)3-4-6/h5-6H,3-4H2,1-2H3 6 | N-methylpyrrolinium InChI=1S/C5H10N/c1-6-4-2-3-5-6/h4H,2-3,5H2,1H3/q+1 7 | benzyl_alcohol InChI=1S/C7H8O/c8-6-7-4-2-1-3-5-7/h1-5,8H,6H2 8 | caroten InChI=1S/C40H56/c1-31(19-13-21-33(3)25-27-37-35(5)23-15-29-39(37,7)8)17-11-12-18-32(2)20-14-22-34(4)26-28-38-36(6)24-16-30-40(38,9)10/h11-14,17-22,25-28H,15-16,23-24,29-30H2,1-10H3/b12-11+,19-13+,20-14+,27-25+,28-26+,31-17+,32-18+,33-21+,34-22+ 9 | cis,cis-muconate InChI=1S/C6H6O4/c7-5(8)3-1-2-4-6(9)10/h1-4H,(H,7,8)(H,9,10)/p-2/b3-1-,4-2- 10 | violacein InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+ 11 | glutaric_acid InChI=1S/C5H8O4/c6-4(7)2-1-3-5(8)9/h1-3H2,(H,6,7)(H,8,9) 12 | mesaconic_acid InChI=1S/C5H6O4/c1-3(5(8)9)2-4(6)7/h2H,1H3,(H,6,7)(H,8,9)/b3-2+ 13 | naringenin InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19)15-11(18)5-10(17)6-14(15)20-13/h1-6,13,16-18H,7H2 14 | p-hydroxystyrene InChI=1S/C8H8O/c1-2-7-3-5-8(9)6-4-7/h2-6,9H,1H2 15 | piceatannol InChI=1S/C14H12O4/c15-11-5-10(6-12(16)8-11)2-1-9-3-4-13(17)14(18)7-9/h1-8,15-18H/b2-1+ 16 | protopanaxadiol InChI=1S/C30H52O3/c1-19(2)10-9-14-30(8,33)20-11-16-29(7)25(20)21(31)18-23-27(5)15-13-24(32)26(3,4)22(27)12-17-28(23,29)6/h10,20-25,31-33H,9,11-18H2,1-8H3/t20-,21+,22-,23+,24-,25-,27-,28+,29+,30+/m0/s1 17 | TPA InChI=1S/C8H6O4/c9-7(10)5-1-2-6(4-3-5)8(11)12/h1-4H,(H,9,10)(H,11,12) clean_data/compounds_to_add/TPA_to_add.csv 18 | vanillin InChI=1S/C8H8O3/c1-11-8-4-6(5-9)2-3-7(8)10/h2-5,10H,1H3 19 | lycopene InChI=1S/C40H56/c1-33(2)19-13-23-37(7)27-17-31-39(9)29-15-25-35(5)21-11-12-22-36(6)26-16-30-40(10)32-18-28-38(8)24-14-20-34(3)4/h11-12,15-22,25-32H,13-14,23-24H2,1-10H3/b12-11+,25-15+,26-16+,31-17+,32-18+,35-21+,36-22+,37-27+,38-28+,39-29+,40-30+ 20 | pinocembrin InChI=1S/C15H12O4/c16-10-6-11(17)15-12(18)8-13(19-14(15)7-10)9-4-2-1-3-5-9/h1-7,13,16-17H,8H2/t13-/m0/s1 21 | styrene InChI=1S/C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2 22 | -------------------------------------------------------------------------------- /data/sinks/ecoli_core_sink_reduced_rp_ready.csv: -------------------------------------------------------------------------------- 1 | "name","inchi" 2 | "2-Oxoglutarate","InChI=1S/C5H6O5/c6-3(5(9)10)1-2-4(7)8/h1-2H2,(H,7,8)(H,9,10)" 3 | "3-Phospho-D-glycerate","InChI=1S/C3H7O7P/c4-2(3(5)6)1-10-11(7,8)9/h2,4H,1H2,(H,5,6)(H2,7,8,9)" 4 | "3-Phospho-D-glyceroyl phosphate","InChI=1S/C3H8O10P2/c4-2(1-12-14(6,7)8)3(5)13-15(9,10)11/h2,4H,1H2,(H2,6,7,8)(H2,9,10,11)" 5 | "6-Phospho-D-gluconate","InChI=1S/C6H13O10P/c7-2(1-16-17(13,14)15)3(8)4(9)5(10)6(11)12/h2-5,7-10H,1H2,(H,11,12)(H2,13,14,15)" 6 | "6-phospho-D-glucono-1,5-lactone","InChI=1S/C6H11O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-5,7-9H,1H2,(H2,11,12,13)" 7 | "ADP C10H12N5O10P2","InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)" 8 | "AMP C10H12N5O7P","InChI=1S/C10H14N5O7P/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(22-10)1-21-23(18,19)20/h2-4,6-7,10,16-17H,1H2,(H2,11,12,13)(H2,18,19,20)" 9 | "ATP C10H12N5O13P3","InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)" 10 | "Acetaldehyde","InChI=1S/C2H4O/c1-2-3/h2H,1H3" 11 | "Acetate","InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)" 12 | "Acetyl phosphate","InChI=1S/C2H5O5P/c1-2(3)7-8(4,5)6/h1H3,(H2,4,5,6)" 13 | "Acetyl-CoA","InChI=1S/C23H38N7O17P3S/c1-12(31)51-7-6-25-14(32)4-5-26-21(35)18(34)23(2,3)9-44-50(41,42)47-49(39,40)43-8-13-17(46-48(36,37)38)16(33)22(45-13)30-11-29-15-19(24)27-10-28-20(15)30/h10-11,13,16-18,22,33-34H,4-9H2,1-3H3,(H,25,32)(H,26,35)(H,39,40)(H,41,42)(H2,24,27,28)(H2,36,37,38)" 14 | "Alpha-D-Ribose 5-phosphate","InChI=1S/C5H11O8P/c6-3-2(1-12-14(9,10)11)13-5(8)4(3)7/h2-8H,1H2,(H2,9,10,11)" 15 | "Ammonium","InChI=1S/H3N/h1H3" 16 | "CO2 CO2","InChI=1S/CO2/c2-1-3" 17 | "Cis-Aconitate","InChI=1S/C6H6O6/c7-4(8)1-3(6(11)12)2-5(9)10/h1H,2H2,(H,7,8)(H,9,10)(H,11,12)" 18 | "Citrate","InChI=1S/C6H8O7/c7-3(8)1-6(13,5(11)12)2-4(9)10/h13H,1-2H2,(H,7,8)(H,9,10)(H,11,12)" 19 | "Coenzyme A","InChI=1S/C21H36N7O16P3S/c1-21(2,16(31)19(32)24-4-3-12(29)23-5-6-48)8-41-47(38,39)44-46(36,37)40-7-11-15(43-45(33,34)35)14(30)20(42-11)28-10-27-13-17(22)25-9-26-18(13)28/h9-11,14-16,20,30-31,48H,3-8H2,1-2H3,(H,23,29)(H,24,32)(H,36,37)(H,38,39)(H2,22,25,26)(H2,33,34,35)" 20 | "D-Erythrose 4-phosphate","InChI=1S/C4H9O7P/c5-1-3(6)4(7)2-11-12(8,9)10/h1,3-4,6-7H,2H2,(H2,8,9,10)" 21 | "D-Fructose 1,6-bisphosphate","InChI=1S/C6H14O12P2/c7-4-3(1-16-19(10,11)12)18-6(9,5(4)8)2-17-20(13,14)15/h3-5,7-9H,1-2H2,(H2,10,11,12)(H2,13,14,15)" 22 | "D-Fructose 6-phosphate","InChI=1S/C6H13O9P/c7-2-6(10)5(9)4(8)3(15-6)1-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)" 23 | "D-Glucose 6-phosphate","InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)" 24 | "D-Glycerate 2-phosphate","InChI=1S/C3H7O7P/c4-1-2(3(5)6)10-11(7,8)9/h2,4H,1H2,(H,5,6)(H2,7,8,9)" 25 | "D-Lactate","InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)" 26 | "D-Ribulose 5-phosphate","InChI=1S/C5H11O8P/c6-1-3(7)5(9)4(8)2-13-14(10,11)12/h4-6,8-9H,1-2H2,(H2,10,11,12)" 27 | "D-Xylulose 5-phosphate","InChI=1S/C5H11O8P/c6-1-3(7)5(9)4(8)2-13-14(10,11)12/h4-6,8-9H,1-2H2,(H2,10,11,12)" 28 | "Dihydroxyacetone phosphate","InChI=1S/C3H7O6P/c4-1-3(5)2-9-10(6,7)8/h4H,1-2H2,(H2,6,7,8)" 29 | "Ethanol","InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3" 30 | "Formate","InChI=1S/CH2O2/c2-1-3/h1H,(H,2,3)" 31 | "Fumarate","InChI=1S/C4H4O4/c5-3(6)1-2-4(7)8/h1-2H,(H,5,6)(H,7,8)" 32 | "Glyceraldehyde 3-phosphate","InChI=1S/C3H7O6P/c4-1-3(5)2-9-10(6,7)8/h1,3,5H,2H2,(H2,6,7,8)" 33 | "Glyoxylate","InChI=1S/C2H2O3/c3-1-2(4)5/h1H,(H,4,5)" 34 | "H+","InChI=1S/p+1" 35 | "H2O H2O","InChI=1S/H2O/h1H2" 36 | "Isocitrate","InChI=1S/C6H8O7/c7-3(8)1-2(5(10)11)4(9)6(12)13/h2,4,9H,1H2,(H,7,8)(H,10,11)(H,12,13)" 37 | "L-Glutamate","InChI=1S/C5H9NO4/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H,7,8)(H,9,10)" 38 | "L-Glutamine","InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H2,7,8)(H,9,10)" 39 | "L-Malate","InChI=1S/C4H6O5/c5-2(4(8)9)1-3(6)7/h2,5H,1H2,(H,6,7)(H,8,9)" 40 | "Nicotinamide adenine dinucleotide","InChI=1S/C21H27N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,(H5-,22,23,24,25,33,34,35,36,37)/p+1" 41 | "Nicotinamide adenine dinucleotide - reduced","InChI=1S/C21H29N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,25)" 42 | "Nicotinamide adenine dinucleotide phosphate","InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1-4,7-8,10-11,13-16,20-21,29-31H,5-6H2,(H7-,22,23,24,25,32,33,34,35,36,37,38,39)/p+1" 43 | "Nicotinamide adenine dinucleotide phosphate - reduced","InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)" 44 | "O2 O2","InChI=1S/O2/c1-2" 45 | "Oxaloacetate","InChI=1S/C4H4O5/c5-2(4(8)9)1-3(6)7/h1H2,(H,6,7)(H,8,9)" 46 | "Phosphate","InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)" 47 | "Phosphoenolpyruvate","InChI=1S/C3H5O6P/c1-2(3(4)5)9-10(6,7)8/h1H2,(H,4,5)(H2,6,7,8)" 48 | "Pyruvate","InChI=1S/C3H4O3/c1-2(4)3(5)6/h1H3,(H,5,6)" 49 | "Sedoheptulose 7-phosphate","InChI=1S/C7H15O10P/c8-1-3(9)5(11)7(13)6(12)4(10)2-17-18(14,15)16/h4-8,10-13H,1-2H2,(H2,14,15,16)" 50 | "Succinate","InChI=1S/C4H6O4/c5-3(6)1-2-4(7)8/h1-2H2,(H,5,6)(H,7,8)" 51 | "Succinyl-CoA","InChI=1S/C25H40N7O19P3S/c1-25(2,20(38)23(39)28-6-5-14(33)27-7-8-55-16(36)4-3-15(34)35)10-48-54(45,46)51-53(43,44)47-9-13-19(50-52(40,41)42)18(37)24(49-13)32-12-31-17-21(26)29-11-30-22(17)32/h11-13,18-20,24,37-38H,3-10H2,1-2H3,(H,27,33)(H,28,39)(H,34,35)(H,43,44)(H,45,46)(H2,26,29,30)(H2,40,41,42)" 52 | "Ubiquinol-8","InChI=1S/C49H76O4/c1-36(2)20-13-21-37(3)22-14-23-38(4)24-15-25-39(5)26-16-27-40(6)28-17-29-41(7)30-18-31-42(8)32-19-33-43(9)34-35-45-44(10)46(50)48(52-11)49(53-12)47(45)51/h20,22,24,26,28,30,32,34,50-51H,13-19,21,23,25,27,29,31,33,35H2,1-12H3" 53 | "Ubiquinone-8","InChI=1S/C49H74O4/c1-36(2)20-13-21-37(3)22-14-23-38(4)24-15-25-39(5)26-16-27-40(6)28-17-29-41(7)30-18-31-42(8)32-19-33-43(9)34-35-45-44(10)46(50)48(52-11)49(53-12)47(45)51/h20,22,24,26,28,30,32,34H,13-19,21,23,25,27,29,31,33,35H2,1-12H3" 54 | -------------------------------------------------------------------------------- /data/supplement_finder/data/metanetx_extracted_inchikeys.json.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/data/supplement_finder/data/metanetx_extracted_inchikeys.json.tar.gz -------------------------------------------------------------------------------- /data/supplement_finder/tree_for_testing/TPA/pickles/tree_end_search.pkl.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/data/supplement_finder/tree_for_testing/TPA/pickles/tree_end_search.pkl.tar.gz -------------------------------------------------------------------------------- /data/supplement_finder/tree_for_testing/morphine/pickles/tree_end_search.pkl.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/data/supplement_finder/tree_for_testing/morphine/pickles/tree_end_search.pkl.tar.gz -------------------------------------------------------------------------------- /document_all_options.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | The aim of this file is to document all options available to run the MCTS and where to find them. 4 | More details are in the attached paper at https://doi.org/10.1101/800474 , especially in the Appendix. 5 | 6 | ### Global configuration options 7 | 8 | - where: in the config.py file 9 | - how: either by modying the config.py file by hand or by running the change_config.py with its argparser (recommanded) 10 | 11 | - DB_CACHE: uses the MongoDB cache when activated 12 | - DB_REPLACE: replaces data in the Mongo DB cache when activated 13 | - DB_time: time cut_off for loading in the DB: stored only if above the cut-off, otherwise the rule is applied by Python 14 | - use_cache: dictionnary for caching results within the script. Highly recommanded. 15 | - retrosynthesis: performs a retrosynthetic search; biosensor: performs a biosensor search 16 | Both cannot be activated at the same time. 17 | Main difference is how the state is considered sucessful: all compounds have to be found for retrosynthesis, and only 1 for biosensors 18 | - add_Hs: explicit hydrogens. Recommanded at false for faster calculations. 19 | - use_transpositions and transposition depth: not stable. Allow for sharing of information between nodes with the same chemical state but at different places in the tree, as done in doi:10.1007/BF03192151. 20 | 21 | ### Tree search configuration: 22 | 23 | - stop_at_first_result: stops once a signle pathway is found. 24 | - c_name, c_smile,s c_inchi: information on the chemical compound of interest 25 | - fire_timeout, standardisation_timeout: time allowed for firing a rule/standardising a compound 26 | - organism_name: which model to use for production of compounds 27 | - complementary_sink: csv file containing compounds to add to the sink. If organism is None, is the full sink. 28 | - representation: how to print results in logs 29 | - itermax: maximum number of iterations allowed for running the Tree search 30 | - parallel: not possible to use at the moment due to workaround for RDKit rule application. Aimed at parallelising rollouts. 31 | - expansion_width: maximum number of children per node 32 | - time budget: time allowed for running the tree search. The search will stop at the end of the iteration that exceeds this allotted time 33 | - max_depth: maximum depth of the Tree (also the maximum number of pathway steps) 34 | - minimal_visit_counts: Minimal number of times a node has to be rolled out before his brothers can be expanded 35 | - UCT_policy: define the UCT policy to use, ie the way to rank the children of a node. Allows various bias, and scoring considerations. 36 | - UCTK: the constant defining the exploration/exploitation parameter in the UCT formula 37 | - bias_k: if progressive bias is used, define the weight of the progressive bias in the UCT formula 38 | - k_rave: if RAVE is used, how to weight the RAVE. Roughly for visits below this value RAVE values lead the UCT and above, rollout values lead. 39 | - use_RAVE: moves have scores each time they are used throughout the Tree, adapting RAVE (Rapid Action Value Estimation) principle to the whole tree and not just rollouts. 40 | - penalty: penalty when no compound of the state belongs to the organism 41 | - full_state_reward: reward when all compounds of the state belongs to the organism 42 | - pathway_scoring: how to score a pathway when it is found. 43 | - Rollout_policy: how to select moves for the Rollout: randomly, wieghting by which scores. Many options. 44 | - max_rollout: maximum length of the rollout (it also stops when max_depth is reached) 45 | - chemical_scoring: chose the way to chemically score reactions (considering only substrates or both substrates and products). Possibility to use ConstantChemicalScorer which always returns 1. 46 | - biological_score_cut_off: cuts off with biological score at the specified level 47 | - substrate_only_score_cut_off: cuts off with substrate similarity only score BEFORE applying the rule at the specified level 48 | - chemical_score_cut_off: cuts off with specified chemical score AFTER applying the rule at the specified level 49 | - virtual_visits: start nodes at virtual_visits values, to avoid stochasticity at initial simulations; used to avoid having to much variability at initial Monte Carlo simulations. 50 | - progressive_bias_strategy: policy for the progressive bias (untested) 51 | - progressive widening: add a child to nodes visited more than len(nodes)^2 (untested) 52 | - diameter: Speficy the diameters (as list) to use 53 | - EC_filter: allow only certain EC subclasses 54 | - small: development archive 55 | - seed: for reproducibility 56 | - tree_to_complete: if restarting the search from another tree. 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /expected_results/deoxiviolacein_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "elements": { 3 | "nodes": [ 4 | { 5 | "data": { 6 | "SMILES": "NC(Cc1c[nH]c2ccccc12)C(=O)O", 7 | "inSink": 1, 8 | "isSource": 0, 9 | "InChI": "InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)", 10 | "Names": [ 11 | "QIVBCDIJIAJPQS-UHFFFAOYSA-N", 12 | "L-Tryptophan" 13 | ], 14 | "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N", 15 | "type": "compound", 16 | "Rule ID": null, 17 | "EC number": null, 18 | "Reaction SMILES": null, 19 | "Diameter": null, 20 | "Score": null, 21 | "Iteration": null 22 | } 23 | }, 24 | { 25 | "data": { 26 | "SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O", 27 | "inSink": 0, 28 | "isSource": 0, 29 | "InChI": "InChI=1S/C11H10N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,12-13H,5H2,(H,14,15)", 30 | "Names": [ 31 | "LKYWXXAVLLVJAS-UHFFFAOYSA-N" 32 | ], 33 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N", 34 | "type": "compound", 35 | "Rule ID": null, 36 | "EC number": null, 37 | "Reaction SMILES": null, 38 | "Diameter": null, 39 | "Score": null, 40 | "Iteration": null 41 | } 42 | }, 43 | { 44 | "data": { 45 | "SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12", 46 | "inSink": 0, 47 | "isSource": 0, 48 | "InChI": "InChI=1S/C22H18N4O4/c23-19(21(27)28)17(13-9-25-15-7-3-1-5-11(13)15)18(20(24)22(29)30)14-10-26-16-8-4-2-6-12(14)16/h1-10,17-18,23-26H,(H,27,28)(H,29,30)", 49 | "Names": [ 50 | "CKBGWXPNAUCVQQ-UHFFFAOYSA-N" 51 | ], 52 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N", 53 | "type": "compound", 54 | "Rule ID": null, 55 | "EC number": null, 56 | "Reaction SMILES": null, 57 | "Diameter": null, 58 | "Score": null, 59 | "Iteration": null 60 | } 61 | }, 62 | { 63 | "data": { 64 | "SMILES": "O=O", 65 | "inSink": 1, 66 | "isSource": 0, 67 | "InChI": "InChI=1S/O2/c1-2", 68 | "Names": [ 69 | "MYMOFIZGZYHOMD-UHFFFAOYSA-N", 70 | "O2 O2" 71 | ], 72 | "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N", 73 | "type": "compound", 74 | "Rule ID": null, 75 | "EC number": null, 76 | "Reaction SMILES": null, 77 | "Diameter": null, 78 | "Score": null, 79 | "Iteration": null 80 | } 81 | }, 82 | { 83 | "data": { 84 | "SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12", 85 | "inSink": 0, 86 | "isSource": 0, 87 | "InChI": "InChI=1S/C21H15N3O2/c25-21(26)20-14(15-10-22-17-7-3-1-5-12(15)17)9-19(24-20)16-11-23-18-8-4-2-6-13(16)18/h1-11,22-24H,(H,25,26)", 88 | "Names": [ 89 | "SFLGFRJGKHRRID-UHFFFAOYSA-N" 90 | ], 91 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N", 92 | "type": "compound", 93 | "Rule ID": null, 94 | "EC number": null, 95 | "Reaction SMILES": null, 96 | "Diameter": null, 97 | "Score": null, 98 | "Iteration": null 99 | } 100 | }, 101 | { 102 | "data": { 103 | "SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21", 104 | "inSink": 0, 105 | "isSource": 1, 106 | "InChI": "InChI=1S/C20H13N3O2/c24-19-13(18-12-6-2-4-8-16(12)22-20(18)25)9-17(23-19)14-10-21-15-7-3-1-5-11(14)15/h1-10,21H,(H,22,25)(H,23,24)", 107 | "Names": [ 108 | "deoxiviolacein", 109 | "OJUJNNKCVPCATE-UHFFFAOYSA-N" 110 | ], 111 | "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N", 112 | "type": "compound", 113 | "Rule ID": null, 114 | "EC number": null, 115 | "Reaction SMILES": null, 116 | "Diameter": null, 117 | "Score": null, 118 | "Iteration": null 119 | } 120 | }, 121 | { 122 | "data": { 123 | "SMILES": null, 124 | "inSink": null, 125 | "isSource": null, 126 | "InChI": null, 127 | "Names": null, 128 | "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 129 | "type": "reaction", 130 | "Rule ID": [ 131 | "RR-02-8907c369787578b3-16-F", 132 | "RR-02-8907c369787578b3-14-F", 133 | "RR-02-8907c369787578b3-12-F", 134 | "RR-02-8907c369787578b3-10-F" 135 | ], 136 | "EC number": [ 137 | "1.14.13.224" 138 | ], 139 | "Reaction SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21>>O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12.O=O", 140 | "Diameter": 16, 141 | "Score": 1.0, 142 | "ChemicalScore": 1.0, 143 | "Iteration": 1, 144 | "Stoechiometry": { 145 | "SFLGFRJGKHRRID-UHFFFAOYSA-N": 1, 146 | "MYMOFIZGZYHOMD-UHFFFAOYSA-N": 1 147 | } 148 | } 149 | }, 150 | { 151 | "data": { 152 | "SMILES": null, 153 | "inSink": null, 154 | "isSource": null, 155 | "InChI": null, 156 | "Names": null, 157 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2", 158 | "type": "reaction", 159 | "Rule ID": [ 160 | "RR-02-74068b9f6b2efdc1-16-F", 161 | "RR-02-74068b9f6b2efdc1-14-F", 162 | "RR-02-74068b9f6b2efdc1-12-F", 163 | "RR-02-74068b9f6b2efdc1-10-F" 164 | ], 165 | "EC number": [ 166 | "" 167 | ], 168 | "Reaction SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12>>N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12", 169 | "Diameter": 16, 170 | "Score": 1.0, 171 | "ChemicalScore": 1.0, 172 | "Iteration": 2, 173 | "Stoechiometry": { 174 | "CKBGWXPNAUCVQQ-UHFFFAOYSA-N": 1 175 | } 176 | } 177 | }, 178 | { 179 | "data": { 180 | "SMILES": null, 181 | "inSink": null, 182 | "isSource": null, 183 | "InChI": null, 184 | "Names": null, 185 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3", 186 | "type": "reaction", 187 | "Rule ID": [ 188 | "RR-02-47e9577f4cb98f97-16-F", 189 | "RR-02-47e9577f4cb98f97-14-F", 190 | "RR-02-47e9577f4cb98f97-12-F", 191 | "RR-02-47e9577f4cb98f97-10-F" 192 | ], 193 | "EC number": [ 194 | "1.21.98" 195 | ], 196 | "Reaction SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12>>N=C(Cc1c[nH]c2ccccc12)C(=O)O.N=C(Cc1c[nH]c2ccccc12)C(=O)O", 197 | "Diameter": 16, 198 | "Score": 1.0, 199 | "ChemicalScore": 1.0, 200 | "Iteration": 3, 201 | "Stoechiometry": { 202 | "LKYWXXAVLLVJAS-UHFFFAOYSA-N": 2 203 | } 204 | } 205 | }, 206 | { 207 | "data": { 208 | "SMILES": null, 209 | "inSink": null, 210 | "isSource": null, 211 | "InChI": null, 212 | "Names": null, 213 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4", 214 | "type": "reaction", 215 | "Rule ID": [ 216 | "RR-02-0c9c5a5559e132c7-16-F", 217 | "RR-02-0c9c5a5559e132c7-14-F", 218 | "RR-02-0c9c5a5559e132c7-12-F", 219 | "RR-02-bbedd3c9b9124d30-10-F" 220 | ], 221 | "EC number": [ 222 | "1.3.3.10", 223 | "1.4.3", 224 | "1.4.3.-" 225 | ], 226 | "Reaction SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O>>NC(Cc1c[nH]c2ccccc12)C(=O)O", 227 | "Diameter": 16, 228 | "Score": 0.453552175675181, 229 | "ChemicalScore": 1.0, 230 | "Iteration": 4, 231 | "Stoechiometry": { 232 | "QIVBCDIJIAJPQS-UHFFFAOYSA-N": 1 233 | } 234 | } 235 | } 236 | ], 237 | "edges": [ 238 | { 239 | "data": { 240 | "target": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 241 | "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N", 242 | "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N" 243 | } 244 | }, 245 | { 246 | "data": { 247 | "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N", 248 | "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 249 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1" 250 | } 251 | }, 252 | { 253 | "data": { 254 | "target": "MYMOFIZGZYHOMD-UHFFFAOYSA-N", 255 | "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 256 | "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1" 257 | } 258 | }, 259 | { 260 | "data": { 261 | "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2", 262 | "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N", 263 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N" 264 | } 265 | }, 266 | { 267 | "data": { 268 | "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N", 269 | "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2", 270 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2" 271 | } 272 | }, 273 | { 274 | "data": { 275 | "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3", 276 | "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N", 277 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N" 278 | } 279 | }, 280 | { 281 | "data": { 282 | "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N", 283 | "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3", 284 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3" 285 | } 286 | }, 287 | { 288 | "data": { 289 | "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4", 290 | "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N", 291 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N" 292 | } 293 | }, 294 | { 295 | "data": { 296 | "target": "QIVBCDIJIAJPQS-UHFFFAOYSA-N", 297 | "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4", 298 | "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4" 299 | } 300 | } 301 | ] 302 | } 303 | } -------------------------------------------------------------------------------- /expected_results/deoxiviolacein_iteration_15.json: -------------------------------------------------------------------------------- 1 | { 2 | "elements": { 3 | "nodes": [ 4 | { 5 | "data": { 6 | "SMILES": "NC(Cc1c[nH]c2ccccc12)C(=O)O", 7 | "inSink": 1, 8 | "isSource": 0, 9 | "InChI": "InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)", 10 | "Names": [ 11 | "QIVBCDIJIAJPQS-UHFFFAOYSA-N", 12 | "L-Tryptophan" 13 | ], 14 | "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N", 15 | "type": "compound", 16 | "Rule ID": null, 17 | "EC number": null, 18 | "Reaction SMILES": null, 19 | "Diameter": null, 20 | "Score": null, 21 | "Iteration": null 22 | } 23 | }, 24 | { 25 | "data": { 26 | "SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O", 27 | "inSink": 0, 28 | "isSource": 0, 29 | "InChI": "InChI=1S/C11H10N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,12-13H,5H2,(H,14,15)", 30 | "Names": [ 31 | "LKYWXXAVLLVJAS-UHFFFAOYSA-N" 32 | ], 33 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N", 34 | "type": "compound", 35 | "Rule ID": null, 36 | "EC number": null, 37 | "Reaction SMILES": null, 38 | "Diameter": null, 39 | "Score": null, 40 | "Iteration": null 41 | } 42 | }, 43 | { 44 | "data": { 45 | "SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12", 46 | "inSink": 0, 47 | "isSource": 0, 48 | "InChI": "InChI=1S/C22H18N4O4/c23-19(21(27)28)17(13-9-25-15-7-3-1-5-11(13)15)18(20(24)22(29)30)14-10-26-16-8-4-2-6-12(14)16/h1-10,17-18,23-26H,(H,27,28)(H,29,30)", 49 | "Names": [ 50 | "CKBGWXPNAUCVQQ-UHFFFAOYSA-N" 51 | ], 52 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N", 53 | "type": "compound", 54 | "Rule ID": null, 55 | "EC number": null, 56 | "Reaction SMILES": null, 57 | "Diameter": null, 58 | "Score": null, 59 | "Iteration": null 60 | } 61 | }, 62 | { 63 | "data": { 64 | "SMILES": "O=O", 65 | "inSink": 1, 66 | "isSource": 0, 67 | "InChI": "InChI=1S/O2/c1-2", 68 | "Names": [ 69 | "MYMOFIZGZYHOMD-UHFFFAOYSA-N", 70 | "O2 O2" 71 | ], 72 | "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N", 73 | "type": "compound", 74 | "Rule ID": null, 75 | "EC number": null, 76 | "Reaction SMILES": null, 77 | "Diameter": null, 78 | "Score": null, 79 | "Iteration": null 80 | } 81 | }, 82 | { 83 | "data": { 84 | "SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12", 85 | "inSink": 0, 86 | "isSource": 0, 87 | "InChI": "InChI=1S/C21H15N3O2/c25-21(26)20-14(15-10-22-17-7-3-1-5-12(15)17)9-19(24-20)16-11-23-18-8-4-2-6-13(16)18/h1-11,22-24H,(H,25,26)", 88 | "Names": [ 89 | "SFLGFRJGKHRRID-UHFFFAOYSA-N" 90 | ], 91 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N", 92 | "type": "compound", 93 | "Rule ID": null, 94 | "EC number": null, 95 | "Reaction SMILES": null, 96 | "Diameter": null, 97 | "Score": null, 98 | "Iteration": null 99 | } 100 | }, 101 | { 102 | "data": { 103 | "SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21", 104 | "inSink": 0, 105 | "isSource": 1, 106 | "InChI": "InChI=1S/C20H13N3O2/c24-19-13(18-12-6-2-4-8-16(12)22-20(18)25)9-17(23-19)14-10-21-15-7-3-1-5-11(14)15/h1-10,21H,(H,22,25)(H,23,24)", 107 | "Names": [ 108 | "deoxiviolacein", 109 | "OJUJNNKCVPCATE-UHFFFAOYSA-N" 110 | ], 111 | "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N", 112 | "type": "compound", 113 | "Rule ID": null, 114 | "EC number": null, 115 | "Reaction SMILES": null, 116 | "Diameter": null, 117 | "Score": null, 118 | "Iteration": null 119 | } 120 | }, 121 | { 122 | "data": { 123 | "SMILES": null, 124 | "inSink": null, 125 | "isSource": null, 126 | "InChI": null, 127 | "Names": null, 128 | "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 129 | "type": "reaction", 130 | "Rule ID": [ 131 | "RR-02-8907c369787578b3-16-F", 132 | "RR-02-8907c369787578b3-14-F", 133 | "RR-02-8907c369787578b3-12-F", 134 | "RR-02-8907c369787578b3-10-F" 135 | ], 136 | "EC number": [ 137 | "1.14.13.224" 138 | ], 139 | "Reaction SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21>>O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12.O=O", 140 | "Diameter": 16, 141 | "Score": 1.0, 142 | "ChemicalScore": 1.0, 143 | "Iteration": 1, 144 | "Stoechiometry": { 145 | "SFLGFRJGKHRRID-UHFFFAOYSA-N": 1, 146 | "MYMOFIZGZYHOMD-UHFFFAOYSA-N": 1 147 | } 148 | } 149 | }, 150 | { 151 | "data": { 152 | "SMILES": null, 153 | "inSink": null, 154 | "isSource": null, 155 | "InChI": null, 156 | "Names": null, 157 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2", 158 | "type": "reaction", 159 | "Rule ID": [ 160 | "RR-02-74068b9f6b2efdc1-16-F", 161 | "RR-02-74068b9f6b2efdc1-14-F", 162 | "RR-02-74068b9f6b2efdc1-12-F", 163 | "RR-02-74068b9f6b2efdc1-10-F" 164 | ], 165 | "EC number": [ 166 | "" 167 | ], 168 | "Reaction SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12>>N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12", 169 | "Diameter": 16, 170 | "Score": 1.0, 171 | "ChemicalScore": 1.0, 172 | "Iteration": 2, 173 | "Stoechiometry": { 174 | "CKBGWXPNAUCVQQ-UHFFFAOYSA-N": 1 175 | } 176 | } 177 | }, 178 | { 179 | "data": { 180 | "SMILES": null, 181 | "inSink": null, 182 | "isSource": null, 183 | "InChI": null, 184 | "Names": null, 185 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3", 186 | "type": "reaction", 187 | "Rule ID": [ 188 | "RR-02-47e9577f4cb98f97-16-F", 189 | "RR-02-47e9577f4cb98f97-14-F", 190 | "RR-02-47e9577f4cb98f97-12-F", 191 | "RR-02-47e9577f4cb98f97-10-F" 192 | ], 193 | "EC number": [ 194 | "1.21.98" 195 | ], 196 | "Reaction SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12>>N=C(Cc1c[nH]c2ccccc12)C(=O)O.N=C(Cc1c[nH]c2ccccc12)C(=O)O", 197 | "Diameter": 16, 198 | "Score": 1.0, 199 | "ChemicalScore": 1.0, 200 | "Iteration": 3, 201 | "Stoechiometry": { 202 | "LKYWXXAVLLVJAS-UHFFFAOYSA-N": 2 203 | } 204 | } 205 | }, 206 | { 207 | "data": { 208 | "SMILES": null, 209 | "inSink": null, 210 | "isSource": null, 211 | "InChI": null, 212 | "Names": null, 213 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4", 214 | "type": "reaction", 215 | "Rule ID": [ 216 | "RR-02-0c9c5a5559e132c7-16-F", 217 | "RR-02-0c9c5a5559e132c7-14-F", 218 | "RR-02-0c9c5a5559e132c7-12-F", 219 | "RR-02-bbedd3c9b9124d30-10-F" 220 | ], 221 | "EC number": [ 222 | "1.3.3.10", 223 | "1.4.3", 224 | "1.4.3.-" 225 | ], 226 | "Reaction SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O>>NC(Cc1c[nH]c2ccccc12)C(=O)O", 227 | "Diameter": 16, 228 | "Score": 0.453552175675181, 229 | "ChemicalScore": 1.0, 230 | "Iteration": 4, 231 | "Stoechiometry": { 232 | "QIVBCDIJIAJPQS-UHFFFAOYSA-N": 1 233 | } 234 | } 235 | } 236 | ], 237 | "edges": [ 238 | { 239 | "data": { 240 | "target": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 241 | "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N", 242 | "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N" 243 | } 244 | }, 245 | { 246 | "data": { 247 | "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N", 248 | "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 249 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1" 250 | } 251 | }, 252 | { 253 | "data": { 254 | "target": "MYMOFIZGZYHOMD-UHFFFAOYSA-N", 255 | "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1", 256 | "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1" 257 | } 258 | }, 259 | { 260 | "data": { 261 | "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2", 262 | "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N", 263 | "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N" 264 | } 265 | }, 266 | { 267 | "data": { 268 | "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N", 269 | "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2", 270 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2" 271 | } 272 | }, 273 | { 274 | "data": { 275 | "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3", 276 | "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N", 277 | "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N" 278 | } 279 | }, 280 | { 281 | "data": { 282 | "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N", 283 | "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3", 284 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3" 285 | } 286 | }, 287 | { 288 | "data": { 289 | "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4", 290 | "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N", 291 | "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N" 292 | } 293 | }, 294 | { 295 | "data": { 296 | "target": "QIVBCDIJIAJPQS-UHFFFAOYSA-N", 297 | "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4", 298 | "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4" 299 | } 300 | } 301 | ] 302 | } 303 | } -------------------------------------------------------------------------------- /expected_results/pickles/tree_end_search.pkl.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/expected_results/pickles/tree_end_search.pkl.tar.gz -------------------------------------------------------------------------------- /expected_results/results.csv: -------------------------------------------------------------------------------- 1 | parameter,value 2 | stop_at_first_result,False 3 | c_name,deoxiviolacein 4 | c_smiles, 5 | c_inchi,"InChI=1S/C20H13N3O2/c24-19-13(18-12-6-2-4-8-16(12)22-20(18)25)9-17(23-19)14-10-21-15-7-3-1-5-11(14)15/h1-10,21H,(H,22,25)(H,23,24)/b18-13+" 6 | fire_timeout,1 7 | organism_name,ecoli 8 | complementary_sink, 9 | itermax,1000 10 | expansion_width,10 11 | time_budget,7200 12 | max_depth,7 13 | minimal_visit_counts,1 14 | UCT_policy,Biochemical_UCT_1 15 | UCTK,20.0 16 | bias_k,0.0 17 | k_rave,0.0 18 | use_RAVE,False 19 | penalty,-1 20 | full_state_reward,2 21 | Rollout_policy,Rollout_policy_random_uniform_on_biochemical_multiplication_score 22 | max_rollout,3 23 | chemical_scoring,SubandprodChemicalScorer 24 | biological_score_cut_off,0.1 25 | substrate_only_score_cut_off,0.7 26 | chemical_score_cut_off,0.7 27 | virtual_visits,0 28 | progressive_bias_strategy,0 29 | progressive_widening,False 30 | diameter,"[10, 12, 14, 16]" 31 | EC_filter, 32 | tree_to_complete, 33 | found_pathways,4 34 | TIME_EXECUTION,3.58 35 | STOP_REASON,iteration 36 | NUMBER_ITERATION,999 37 | -------------------------------------------------------------------------------- /move.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the move class that contains: 3 | - compound it applies to 4 | - rsmart 5 | - rid 6 | - set (becuase a signle rule cna generate multiple product sets) 7 | - biological_score 8 | - chemical_score 9 | """ 10 | 11 | # General utilities 12 | import logging 13 | import csv 14 | 15 | class Move(object): 16 | """ 17 | Basic move object. At the moment will have only attributes, no function per say. 18 | """ 19 | 20 | logger = logging.getLogger(__name__) 21 | def __init__(self, 22 | rsmart, 23 | rid, 24 | compound_id, 25 | rsmiles = None, 26 | set_number = 0, 27 | chemical_score = 0, 28 | chemical_substrate_score = 0, 29 | biological_score = 0, 30 | product_list = [], 31 | EC_number = ["EC: None"], 32 | compound_index = 0, stoechiometry = {}): 33 | self.rsmart = rsmart 34 | if rsmiles is None: 35 | self.rsmiles = self.rsmart 36 | else: 37 | self.rsmiles = rsmiles 38 | self.rid = rid 39 | self.compound_id = compound_id 40 | self.set_number = set_number 41 | self.chemical_score = chemical_score 42 | self.chemical_substrate_score = chemical_substrate_score 43 | self.biological_score = biological_score 44 | self.EC_numbers = EC_number 45 | self.product_list = product_list 46 | self.name = "{}-{}-{}".format(self.compound_id, self.rid, str(self.set_number)) 47 | self.synonyms = [self.rid] 48 | self.RAVE_visits = 0 49 | self.RAVE_total_score = 0 50 | self.RAVE_average_score = 0 51 | self.stoechiometry = stoechiometry 52 | 53 | def set_set_number(self, set_number): 54 | self.set_number = set_number 55 | self.name = "{}-{}-{}".format(self.compound_id, self.rid, str(self.set_number)) 56 | 57 | def set_rsmart(self, rsmart): 58 | self.rsmart = rsmart 59 | 60 | def set_rsmiles(self, rsmiles): 61 | self.rsmiles = rsmiles 62 | 63 | def calculate_rsmiles(self, substrate): 64 | """ 65 | Smiles of the actual transformation that is happening between the substrate and the products 66 | """ 67 | sub_smiles = "{}".format(substrate.csmiles) 68 | prod_smiles = ".".join([prod.csmiles for prod in self.full_product_list()]) 69 | self.rsmiles = "{}>>{}".format(sub_smiles, prod_smiles) 70 | 71 | def set_chemical_score(self, chemical_score): 72 | self.chemical_score = chemical_score 73 | 74 | def set_chemical_substrate_score(self, chemical_substrate_score): 75 | self.chemical_substrate_score = chemical_substrate_score 76 | 77 | def delete_intermediate_chemical_score(self): 78 | del self.original_substrates_list 79 | del self.original_products_list_list 80 | 81 | def set_intermediate_chemical_score(self, original_substrates_list, original_products_list_list): 82 | self.original_substrates_list = original_substrates_list 83 | self.original_products_list_list = original_products_list_list 84 | 85 | def set_id(self, id): 86 | self.id = id 87 | 88 | def set_EC_numbers(self, EC_numbers): 89 | self.EC_numbers = EC_numbers 90 | 91 | def set_biological_score(self, biological_score): 92 | self.biological_score = biological_score 93 | 94 | def set_product_list(self, product_list): 95 | self.product_list = product_list 96 | 97 | def set_stoechiometry(self, stoechiometry): 98 | self.stoechiometry = stoechiometry 99 | 100 | def __repr__(self): 101 | return self.name 102 | 103 | def print_all_attributes(self): 104 | text = "For move {}, attributes are: rid: {}, cid: {} \n".format(self.name, self.rid, self.compound_id) 105 | text_next = "set: {}, chem_score: {}, bio score: {} \n".format(self.set_number, self.chemical_score, self.biological_score) 106 | text_last = "product_list: {}, stoechiometry: {} \n".format(self.product_list, self.stoechiometry) 107 | text_appendix = "EC numbers are {}".format(self.EC_numbers) 108 | return (text + text_next + text_last + text_appendix) 109 | 110 | def full_product_list(self): 111 | full_list = [] 112 | ordered_product_list = sorted(self.product_list, key = lambda item: self.stoechiometry[item.InChIKey]) 113 | for product in ordered_product_list: 114 | for i in range(self.stoechiometry[product.InChIKey]): 115 | full_list.append(product) 116 | return full_list 117 | 118 | def _calculate_simles_from_move(self): 119 | sub = "{}".format() 120 | 121 | def clone(self): 122 | cloned_move = Move( 123 | rsmart=self.rsmart, 124 | rid=self.rid, 125 | compound_id=self.compound_id, 126 | set_number=self.set_number, 127 | chemical_score=self.chemical_score, 128 | biological_score=self.biological_score, 129 | product_list=self.product_list, 130 | EC_number=self.EC_numbers, 131 | stoechiometry=self.stoechiometry, 132 | ) 133 | try: 134 | cloned_move.set_intermediate_chemical_score( 135 | self.original_substrates_list, 136 | self.original_products_list_list, 137 | ) 138 | except AttributeError: 139 | pass 140 | return cloned_move 141 | 142 | def add_synonym(self, move): 143 | """ 144 | Adds a synonym to this move. 145 | (When another move was deemed equal to current move (self)) 146 | """ 147 | if move.rid not in self.synonyms: 148 | self.synonyms.append(move.rid) 149 | for EC in move.EC_numbers: 150 | if EC not in self.EC_numbers: 151 | self.EC_numbers.append(EC) 152 | if self.biological_score * self.chemical_score < move.biological_score * move.chemical_score: 153 | self.biological_score = move.biological_score 154 | self.chemical_score = move.chemical_score 155 | self.stoechiometry = move.stoechiometry 156 | 157 | def eq_full_inchi_key(self, other): 158 | """ 159 | Tow moves are identical if they 160 | - apply to the same compound 161 | - generate the same products 162 | """ 163 | compound_eq = (self.compound_id == other.compound_id) 164 | products_eq = len(self.product_list) == len(other.product_list) 165 | for product in self.product_list: 166 | products_eq = products_eq and (product.in_list(other.product_list, main_layer = False)) 167 | return(compound_eq and products_eq) 168 | 169 | def eq_main_layer(self, other): 170 | """ 171 | Tow moves are identical if they 172 | - apply to the same compound 173 | - generate the same products 174 | """ 175 | compound_eq = (self.compound_id == other.compound_id) 176 | products_eq = len(self.product_list) == len(other.product_list) 177 | for product in self.product_list: 178 | products_eq = products_eq and (product.in_list(other.product_list, main_layer = True)) 179 | return(compound_eq and products_eq) 180 | 181 | def in_list(self, list_moves, main_layer = False): 182 | in_list = False 183 | for move_in_list in list_moves: 184 | if main_layer: 185 | equality = self.eq_main_layer(move_in_list) 186 | if equality: 187 | in_list = True 188 | move_in_list.add_synonym(self) 189 | break 190 | else: 191 | equality = self.eq_full_inchi_key(move_in_list) 192 | if equality: 193 | in_list = True 194 | move_in_list.add_synonym(self) 195 | break 196 | return(in_list) 197 | 198 | def update(self, result, visit_number = 1): 199 | """ 200 | Values are used only for RAVE implementation. 201 | """ 202 | self.RAVE_visits = self.RAVE_visits + visit_number 203 | self.RAVE_total_score = self.RAVE_total_score + result * visit_number 204 | self.RAVE_average_score = self.RAVE_total_score/self.RAVE_visits 205 | -------------------------------------------------------------------------------- /organisms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines organisms as chemical_compounds_state objects. 3 | Unpickled after calculation when setting up RP3. 4 | """ 5 | 6 | # General utilities 7 | import logging 8 | import pickle 9 | import os 10 | import csv 11 | import sys 12 | 13 | from config import * 14 | 15 | # RP3 specific objects 16 | from compound import Compound, unpickle 17 | from chemical_compounds_state import ChemicalCompoundState 18 | from rdkit.Chem import AllChem 19 | from utilities.reactor.Utils import standardize_chemical, standardize_results, handle_results, ChemConversionError 20 | from utilities.reactor.cli import worker_match, worker_fire, RuleConversionError 21 | 22 | 23 | class NotReady(Exception): 24 | """Raised when organisms or rules have not been caculated in advance""" 25 | 26 | def __init__(self, msg = "Not Ready. Need to run set-up scripts"): 27 | self._msg = msg 28 | 29 | def __str__(self): 30 | return self._msg 31 | 32 | 33 | def import_organism_from_csv(csv_file, add_Hs=True): 34 | with open(csv_file) as csv_handle: 35 | dict_reader = csv.DictReader(csv_handle, delimiter=",") 36 | compound_list = [] 37 | for row in dict_reader: 38 | name = row["name"] 39 | inchi = row["inchi"] 40 | if inchi is None or inchi == "None" or inchi == "": 41 | pass 42 | else: 43 | try: 44 | if name.startswith("InChI"): 45 | compound = Compound( 46 | InChI=inchi, heavy_standardisation=True, force_add_H=add_Hs 47 | ) 48 | else: 49 | compound = Compound( 50 | InChI=inchi, 51 | name=name, 52 | heavy_standardisation=True, 53 | force_add_H=add_Hs, 54 | ) 55 | if not compound.in_list(compound_list, main_layer = False): 56 | compound_list.append(compound) 57 | except ChemConversionError as e: 58 | logging.error("For compound {} with inchi {}: error ChemConversionError".format(name, inchi)) 59 | organism = ChemicalCompoundState(compound_list, main_layer = False) 60 | # organism.set_main_layer(True) 61 | return(organism) 62 | 63 | 64 | organisms_data_path = "{}/organisms".format(DATA_PATH) 65 | if not os.path.exists(organisms_data_path): 66 | os.mkdir(organisms_data_path) 67 | 68 | if not os.path.exists(organisms_data_path + '/state_iML1515_chassis_H.pkl'): 69 | logging.error("Please run calculate_organisms script") 70 | raise NotReady 71 | 72 | 73 | Test_organism_H = unpickle(file_name = "{}".format('Test_organism_H'), type = 'state', folder_address = organisms_data_path) 74 | ecoli_chassis_H = unpickle(file_name = "{}".format('iML1515_chassis_H'), type = 'state', folder_address = organisms_data_path) 75 | detectable_cmpds_H = unpickle(file_name = "{}".format('detectable_cmpds_H'), type = 'state', folder_address = organisms_data_path) 76 | core_ecoli_H = unpickle(file_name = "{}".format('core_ecoli_H'), type = 'state', folder_address = organisms_data_path) 77 | bsubtilis_H = unpickle(file_name = "{}".format('bsubtilis_H'), type = 'state', folder_address = organisms_data_path) 78 | iJO1366_chassis_H = unpickle(file_name = "{}".format('iJO1366_chassis_H'), type = 'state', folder_address = organisms_data_path) 79 | 80 | 81 | Test_organism_noH = unpickle(file_name = "{}".format('Test_organism_noH'), type = 'state', folder_address = organisms_data_path) 82 | ecoli_chassis_noH = unpickle(file_name = "{}".format('iML1515_chassis_noH'), type = 'state', folder_address = organisms_data_path) 83 | detectable_cmpds_noH = unpickle(file_name = "{}".format('detectable_cmpds_noH'), type = 'state', folder_address = organisms_data_path) 84 | core_ecoli_noH = unpickle(file_name = "{}".format('core_ecoli_noH'), type = 'state', folder_address = organisms_data_path) 85 | bsubtilis_noH = unpickle(file_name = "{}".format('bsubtilis_noH'), type = 'state', folder_address = organisms_data_path) 86 | iJO1366_chassis_noH = unpickle(file_name = "{}".format('iJO1366_chassis_noH'), type = 'state', folder_address = organisms_data_path) 87 | -------------------------------------------------------------------------------- /pathway.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the pathway objects for visualisation and export 3 | """ 4 | 5 | # General utilities 6 | import logging 7 | import csv 8 | import copy 9 | import json 10 | import pickle 11 | # RP3 specific objects 12 | from compound import Compound 13 | from move import Move 14 | from chemical_compounds_state import ChemicalCompoundState 15 | from organisms import Test_organism_H 16 | 17 | 18 | class Pathway(object): 19 | """ 20 | Pathway object. 21 | Has methods for quick visualisation as well as export to json (for visualisation and treatment) 22 | Also has cloning and compound addition 23 | """ 24 | logger = logging.getLogger(__name__) 25 | 26 | def __init__(self, first_iteration = -1, target = None, compounds = [], moves = [], 27 | file_to_save = "temporary_pathway_json", main_layer = True, 28 | organism = Test_organism_H, edges = [], nodes_compounds = [], nodes_transformations = []): 29 | """ 30 | Initialising a pathway object. 31 | A compound has an ID and a dict with chemical structures 32 | A reaction links 2 compounds and has a smart, scores etc 33 | self.compounds is a dictionnary of ID: chemical_struct_of_compound 34 | Remarks: 35 | - a pathway can only be defined for a fully solved Node (ie: in the Tree, not in rollout) 36 | - it needs to verify at each step what products are formed 37 | as those could have been deleted in the tree search (already in state) 38 | """ 39 | self.first_iteration = first_iteration 40 | self.target = target 41 | self.organism = organism 42 | self.main_layer = main_layer 43 | self.compounds = compounds 44 | self.moves = moves 45 | self.file_to_save = file_to_save 46 | self.nodes_compounds = nodes_compounds 47 | self.nodes_transformations = nodes_transformations 48 | self.edges = edges 49 | self.pathway_as_dict = None 50 | 51 | def __eq__(self, other): 52 | """ 53 | Two pathways are identical if their compounds and moves are identical 54 | """ 55 | node_compounds_equal = len(self.nodes_compounds) == len(other.nodes_compounds) 56 | node_trasnfo_equal = len(self.nodes_transformations) == len(other.nodes_transformations) 57 | node_edges_equal = len(self.edges) == len(other.edges) 58 | compounds_equal = len(self.compounds) == len(other.compounds) 59 | if compounds_equal: 60 | for compound in self.compounds: 61 | in_other = compound.in_list(other.compounds, main_layer = True) 62 | if not in_other: 63 | compounds_equal = False 64 | break 65 | moves_equal = len(self.moves) == len(other.moves) 66 | if moves_equal: 67 | for move in self.moves: 68 | in_other = move.in_list(other.moves, main_layer = True) 69 | if not in_other: 70 | moves_equal = False 71 | break 72 | equality = compounds_equal and moves_equal and node_compounds_equal and node_trasnfo_equal and node_edges_equal 73 | return (equality) 74 | 75 | def __repr__(self): 76 | """ 77 | Print list of compoudns and list of moves 78 | """ 79 | rep = 'Compound \n' 80 | for compound in self.compounds: 81 | rep = rep + str(compound) + "\n" 82 | rep = rep + 'Edges \n' 83 | for edge in self.edges: 84 | rep = rep + edge["data"]["id"] + "\n" 85 | return(rep) 86 | 87 | def all_attributes_with_nodes(self): 88 | """ 89 | Print list of compounds and list of moves 90 | """ 91 | rep = 'Compound \n' 92 | for compound in self.compounds: 93 | rep = rep + str(compound) + "\n" 94 | rep = rep + 'Edges \n' 95 | for edge in self.edges: 96 | rep = rep + edge["data"]["id"] + "\n" 97 | for node_cp in self.nodes_compounds: 98 | rep = rep + node_cp["data"]["id"] + "\n" 99 | for node_tf in self.nodes_transformations: 100 | rep = rep + node_tf["data"]["id"] + "\n" 101 | return(rep) 102 | 103 | def set_file_to_save(self, file_to_save): 104 | self.file_to_save = file_to_save 105 | 106 | def set_main_layer(self, main_layer): 107 | self.main_layer = main_layer 108 | 109 | def set_first_iteration(self, first_iteration): 110 | self.first_iteration = first_iteration 111 | 112 | def clone(self): 113 | """ Cloning """ 114 | duplicated_pathway = Pathway( 115 | first_iteration=self.first_iteration, 116 | organism=self.organism, 117 | main_layer=self.main_layer, 118 | target=self.target, 119 | compounds=[cmp.clone() for cmp in self.compounds], 120 | moves=[mv.clone() for mv in self.moves], 121 | edges=copy.deepcopy(self.edges), 122 | nodes_compounds=copy.deepcopy(self.nodes_compounds), 123 | nodes_transformations=copy.deepcopy(self.nodes_transformations), 124 | ) 125 | return duplicated_pathway 126 | 127 | def save(self, file_name = None, folder_address = "pickled_data"): 128 | if file_name is None: 129 | base_name = self.file_to_save 130 | file_saving = open('{}/pathway_{}.pkl'.format(folder_address, file_name), 'wb') 131 | pickle.dump(self, file_saving) 132 | 133 | def add_compound(self, compound, in_sink = None, is_source = 0): 134 | """ 135 | Adding a compound object to the pathway. 136 | """ 137 | if is_source: 138 | self.target = compound 139 | if not compound.in_list(self.compounds, main_layer = self.main_layer): 140 | self.compounds.append(compound) 141 | if in_sink is None: 142 | if self.organism.compound_in_state(compound): 143 | in_sink = 1 144 | else: 145 | in_sink = 0 146 | data_dict = { 147 | 'SMILES': compound.csmiles, 148 | 'inSink':in_sink, 149 | 'isSource': is_source, 150 | 'InChI': compound.InChI, 151 | 'Names': compound.synonyms_names, # If I want synonyms, keep them 152 | 'id': compound.InChIKey, 153 | 'type': 'compound', 154 | 'Rule ID': None, 155 | 'EC number': None, 156 | 'Reaction SMILES': None, 157 | 'Diameter': None, 158 | 'Score': None, 159 | 'Iteration': None 160 | } 161 | self.nodes_compounds.append({"data": data_dict}) 162 | else: 163 | self.logger.warning("Compound {} is already in compounds".format(compound)) 164 | 165 | def clean_up(self, move, depth): 166 | str = "{}-{}-{}-{}".format(move.compound_id, move.rid, move.set_number, depth) 167 | return(str) 168 | 169 | def add_reaction(self, move, depth = 1): 170 | """ 171 | Adding a reaction to the pathway. 172 | """ 173 | if not move.in_list(self.moves): 174 | self.moves.append(move) 175 | move_compound_id_present = False 176 | for cp in self.compounds: 177 | for sym in cp.synonyms_names: 178 | if sym == move.compound_id: 179 | move_compound_id_present = True 180 | move_compound_ID = cp.InChIKey 181 | break 182 | if not move_compound_id_present: 183 | self.logger.warning("Trying to add move {} when compound {} is not in the pathway".format(move, move.compound_id)) 184 | 185 | for product in move.product_list: 186 | if not product.in_list(self.compounds): 187 | # Adding the products of the pathway 188 | self.add_compound(product, in_sink = None, is_source = 0) 189 | 190 | cleaned_up_moved = self.clean_up(move, depth) 191 | try: 192 | diameter = int(move.rid.split("-")[3]) 193 | except: 194 | diameter = 42 195 | data_dict = { 196 | "SMILES": None, 197 | "inSink": None, 198 | "isSource": None, 199 | "InChI": None, 200 | "Names": None, 201 | "id": cleaned_up_moved, 202 | "type": "reaction", 203 | "Rule ID": move.synonyms, 204 | "EC number": move.EC_numbers, 205 | "Reaction SMILES": move.rsmiles, 206 | "Diameter": diameter, 207 | "Score": move.biological_score, 208 | "ChemicalScore": move.chemical_score, 209 | "Iteration": depth, 210 | "Stoechiometry": move.stoechiometry 211 | } 212 | self.nodes_transformations.append({"data": data_dict}) 213 | # Adding all the edges: 214 | # from compound to reaction (move as target, compound as source) 215 | # From reactions to compound (move as source, product as target) 216 | data_dict = { 217 | "target" : cleaned_up_moved, 218 | "source" : move_compound_ID, 219 | "id" : "{}_=>_{}".format(cleaned_up_moved, move.compound_id) 220 | } 221 | self.edges.append({"data": data_dict}) 222 | for product in move.product_list: 223 | data_dict = { 224 | "target" : product.name, 225 | "source" : cleaned_up_moved, 226 | "id" : "{}_=>_{}".format(product.name, cleaned_up_moved) 227 | } 228 | self.edges.append({"data": data_dict}) 229 | else: 230 | self.logger.debug("Move {} is already in moves".format(move)) 231 | 232 | def jsonify_scope_viewer(self): 233 | """ 234 | Use scope viewer to visualise pathways before the DBTL advances more. 235 | THe json file is a dict composed of one item called elements. 236 | The elements values is a dict composed of "nodes" and "edges" 237 | Nodes is a list of compounds, or reactions, with: 238 | """ 239 | if self.pathway_as_dict is None: 240 | self.nodes_compounds.reverse() 241 | self.pathway_as_dict = {"elements": {"nodes": self.nodes_compounds + self.nodes_transformations, 242 | "edges": self.edges}} 243 | with open(self.file_to_save, "w") as json_handler: 244 | json.dump(self.pathway_as_dict, json_handler, indent = 2) 245 | 246 | def export_as_json_dict(self): 247 | """ 248 | To export as a dict without needing to read and write the json. 249 | """ 250 | if self.pathway_as_dict is None: 251 | self.nodes_compounds.reverse() 252 | self.pathway_as_dict = {"elements": {"nodes": self.nodes_compounds + self.nodes_transformations, 253 | "edges": self.edges}} 254 | return(self.pathway_as_dict) 255 | 256 | 257 | def __cli(): 258 | """Command line interface. Was actually used to make quick 259 | tests before implementing them in the testing file""" 260 | logging.basicConfig( 261 | stream=sys.stderr, level=logging.INFO, 262 | datefmt='%d/%m/%Y %H:%M:%S', 263 | format='%(asctime)s -- %(levelname)s -- %(message)s' 264 | ) 265 | logging.warning("CLI is not available for Pathway") 266 | 267 | 268 | if __name__ == "__main__": 269 | __cli() 270 | -------------------------------------------------------------------------------- /pathway_scoring.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the pathway scoring functions. 3 | Can take as inputs both Pathway objects and json dictionnaries exported from Pathways. 4 | """ 5 | 6 | import random 7 | import numpy as np 8 | import json 9 | import os 10 | # RP3 - specific objects 11 | from pathway import Pathway 12 | 13 | 14 | def geo_mean(iterable): 15 | a = np.array(iterable) 16 | return a.prod()**(1.0/len(a)) 17 | 18 | # def geo_mean_overflow(iterable): 19 | # a = np.log(iterable) 20 | # return np.exp(a.sum()/len(a)) 21 | 22 | class PathwayScoring(object): 23 | """ 24 | Defines Pathway Scorer object. 25 | """ 26 | def __init__(self, scoring_function = None, scoring_json_function = None): 27 | if scoring_function is None: 28 | pass 29 | else: 30 | self.scoring_function = scoring_function 31 | if scoring_json_function is None: 32 | pass 33 | else: 34 | self.scoring_json_function = scoring_json_function 35 | 36 | def __repr__(self): 37 | """ 38 | Name the used scorer. 39 | Raises an error is the class is not properly instantiated 40 | """ 41 | return(self.name) 42 | 43 | def calculate(self, pathway): 44 | score = self.scoring_function(pathway) 45 | return(score) 46 | 47 | def calculate_json(self, pathway): 48 | score = self.scoring_json_function(pathway) 49 | return(score) 50 | 51 | def pseudo_random(pathway): 52 | score = random.uniform(0, 10) 53 | return(score) 54 | 55 | class ConstantPathwayScoring(PathwayScoring): 56 | """ 57 | Returns a constant reward, whichever the pathway. 58 | """ 59 | def __init__(self, reward = 10): 60 | PathwayScoring.__init__(self) 61 | self.reward = reward 62 | self.scoring_function = self.scoring_function() 63 | self.scoring_json_function = self.scoring_json_function() 64 | self.name = "ConstantPathwayScoring of {}".format(reward) 65 | 66 | def set_reward(self,reward): 67 | # For changing the reward of the object 68 | self.reward = reward 69 | self.scoring_function = self.scoring_function() 70 | self.scoring_json_function = self.scoring_json_function() 71 | 72 | def scoring_function(self): 73 | def pathway_scoring(pathway): 74 | return(self.reward) 75 | return(pathway_scoring) 76 | 77 | def scoring_json_function(self): 78 | def pathway_scoring(pathway): 79 | return(self.reward) 80 | return(pathway_scoring) 81 | 82 | class BiologicalPathwayScoring(PathwayScoring): 83 | """ 84 | Returns the geometric mean of biological scores in the Pathway. 85 | """ 86 | def __init__(self): 87 | PathwayScoring.__init__(self) 88 | self.scoring_function = self.scoring_function() 89 | self.scoring_json_function = self.scoring_json_function() 90 | self.name = "BiologicalPathwayScoring" 91 | 92 | def scoring_function(self): 93 | def pathway_scoring(pathway): 94 | scores = [] 95 | for move in pathway.nodes_transformations: 96 | scores.append(move["data"]["Score"]) 97 | return(geo_mean(scores)) 98 | return(pathway_scoring) 99 | 100 | def scoring_json_function(self): 101 | def pathway_scoring(pathway): 102 | scores = [] 103 | for move in pathway["elements"]["nodes"]: 104 | if move["data"]["type"] == "reaction": 105 | scores.append(move["data"]["Score"]) 106 | return(geo_mean(scores)) 107 | return(pathway_scoring) 108 | 109 | class ChemicalPathwayScoring(PathwayScoring): 110 | """ 111 | Returns the geometric mean of chemical scores in the Pathway. 112 | """ 113 | def __init__(self): 114 | PathwayScoring.__init__(self) 115 | self.scoring_function = self.scoring_function() 116 | self.scoring_json_function = self.scoring_json_function() 117 | self.name = "ChemicalPathwayScoring" 118 | 119 | def scoring_function(self): 120 | def pathway_scoring(pathway): 121 | scores = [] 122 | for move in pathway.nodes_transformations: 123 | scores.append(move["data"]["ChemicalScore"]) 124 | return(geo_mean(scores)) 125 | return(pathway_scoring) 126 | 127 | def scoring_json_function(self): 128 | def pathway_scoring(pathway): 129 | scores = [] 130 | for move in pathway["elements"]["nodes"]: 131 | if move["data"]["type"] == "reaction": 132 | scores.append(move["data"]["ChemicalScore"]) 133 | return(geo_mean(scores)) 134 | return(pathway_scoring) 135 | 136 | class BiochemicalPathwayScoring(PathwayScoring): 137 | """ 138 | Returns the geometric mean of biochemical scores in the Pathway. 139 | """ 140 | def __init__(self): 141 | PathwayScoring.__init__(self) 142 | self.scoring_function = self.scoring_function() 143 | self.scoring_json_function = self.scoring_json_function() 144 | self.name = "ChemicalPathwayScoring" 145 | 146 | def scoring_function(self): 147 | def pathway_scoring(pathway): 148 | scores = [] 149 | for move in pathway.nodes_transformations: 150 | scores.append(move["data"]["ChemicalScore"] * move["data"]["Score"]) 151 | return(geo_mean(scores)) 152 | return(pathway_scoring) 153 | 154 | def scoring_json_function(self): 155 | def pathway_scoring(pathway): 156 | scores = [] 157 | for move in pathway["elements"]["nodes"]: 158 | if move["data"]["type"] == "reaction": 159 | scores.append(move["data"]["Score"] * move["data"]["ChemicalScore"]) 160 | return(geo_mean(scores)) 161 | return(pathway_scoring) 162 | 163 | RandomPathwayScorer = PathwayScoring(scoring_function = pseudo_random) 164 | constant_pathway_scoring = ConstantPathwayScoring(reward = 10) 165 | null_pathway_scoring = ConstantPathwayScoring(reward = 0) 166 | biological_pathway_scoring = BiologicalPathwayScoring() 167 | chemical_pathway_scoring = ChemicalPathwayScoring() 168 | biochemical_pathway_scoring = BiochemicalPathwayScoring() 169 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.commitizen] 2 | name = "cz_conventional_commits" 3 | version = "1.1.0" 4 | version_provider = "commitizen" 5 | tag_format = "$version" 6 | version_type = "semver2" -------------------------------------------------------------------------------- /representation.py: -------------------------------------------------------------------------------- 1 | """ 2 | The aim of this file is to define a representation class for tree printing. 3 | It is useful to switch between the 2 for terminal or text file output. 4 | """ 5 | 6 | class Representation(object): 7 | """ Contains all things necessary for representing my nodes and trees""" 8 | def __init__(self, delimiter = "|", color = "red", printing_solved = "- solved"): 9 | self.delimiter = delimiter # Delimiter between nodes 10 | if color == "red": 11 | self.color_begin = '\033[91m' 12 | self.color_end = '\033[0m' 13 | elif color == "": 14 | self.color_begin = '' 15 | self.color_end = '' 16 | else: 17 | raise NotImplementedError 18 | self.printing_solved = printing_solved 19 | 20 | Test_representation = Representation(delimiter = "|", color = "red", printing_solved = "") 21 | Test_to_file = Representation(delimiter = "|", color = "", printing_solved = "- solved") 22 | -------------------------------------------------------------------------------- /rewarding.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the possible rewards for rollout. 3 | Can be augmented for more complex policies using simialr scheme as Rollout or UCT policies. 4 | Is defined through CLI in the Tree script. 5 | """ 6 | 7 | class RolloutRewards(object): 8 | """ 9 | Defines penalty and rewards for the rollout if it's in the chasis. 10 | """ 11 | def __init__(self, penalty, full_state_reward): 12 | self.penalty = penalty 13 | self.full_state_reward = full_state_reward 14 | 15 | def __repr__(self): 16 | """Reward representation is its values""" 17 | return("Penalty is {} and full state reward is {}".format(self.penalty, self.full_state_reward)) 18 | 19 | Basic_Rollout_Reward = RolloutRewards(penalty = -1, full_state_reward = 2) 20 | -------------------------------------------------------------------------------- /rule_sets_examples.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the rules examples that will be used throughout the tests. 3 | The aim is to 4 | """ 5 | 6 | import logging 7 | import csv 8 | import os 9 | 10 | rule_10_subset_address = "{}/tests/data/rules_r10_subset.tsv".format(os.path.dirname(__file__)) 11 | applicable_rules_10_dict = {} 12 | with open(rule_10_subset_address, "r") as csv_file: 13 | fieldnames = ["Rule_ID", "Reaction_ID", "Diameter", "Direction", "Rule_order", "Rule_SMARTS", "Substrate_ID", "Substrate_SMILES", "Product_IDs", "Product_SMILES", "Rule_SMILES", "Rule_SMARTS_lite"] 14 | csv_reader = csv.DictReader(csv_file, delimiter = '\t', fieldnames = fieldnames) 15 | next(csv_reader) # skip first line 16 | for element in csv_reader: 17 | applicable_rules_10_dict[element["Rule_ID"]] = {"Rule_SMARTS": element["Rule_SMARTS"], 18 | "biological_score": 1, 19 | "EC_number": ["EC: None"], 20 | "Rule_SMILES": element["Rule_SMILES"]} 21 | 22 | 23 | rule_2_subset_address = "{}/tests/data/rules_r2_subset.tsv".format(os.path.dirname(__file__)) 24 | applicable_rules_2_dict = {} 25 | with open(rule_2_subset_address, "r") as csv_file: 26 | fieldnames = ["Rule_ID", "Reaction_ID", "Diameter", "Direction", "Rule_order", "Rule_SMARTS", "Substrate_ID", "Substrate_SMILES", "Product_IDs", "Product_SMILES", "Rule_SMILES", "Rule_SMARTS_lite"] 27 | csv_reader = csv.DictReader(csv_file, delimiter = '\t', fieldnames = fieldnames) 28 | next(csv_reader) # skip first line 29 | for element in csv_reader: 30 | applicable_rules_2_dict[element["Rule_ID"]] = {"Rule_SMARTS": element["Rule_SMARTS"], 31 | "biological_score": 1, 32 | "EC_number": ["EC: None"], 33 | "Rule_SMILES": element["Rule_SMILES"]} 34 | 35 | 36 | rule_mixed_subset_address = "{}/tests/data/rules_mixed_subset.tsv".format(os.path.dirname(__file__)) 37 | applicable_rules_mixed_dict = {} 38 | with open(rule_mixed_subset_address, "r") as csv_file: 39 | fieldnames = ["Rule_ID", "Reaction_ID", "Diameter", "Direction", "Rule_order", "Rule_SMARTS", "Substrate_ID", "Substrate_SMILES", "Product_IDs", "Product_SMILES", "Rule_SMILES", "Rule_SMARTS_lite"] 40 | csv_reader = csv.DictReader(csv_file, delimiter = '\t', fieldnames = fieldnames) 41 | next(csv_reader) # skip first line 42 | for element in csv_reader: 43 | applicable_rules_mixed_dict[element["Rule_ID"]] = {"Rule_SMARTS": element["Rule_SMARTS"], 44 | "biological_score": 1, 45 | "EC_number": ["EC: None"], 46 | "Rule_SMILES": element["Rule_SMILES"]} 47 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="rp3", 8 | version="0.0", 9 | author="Mathilde Koch", 10 | author_email="mathilde.koch@inra.fr", 11 | description="Perform retrosynthesis with Monte-Carlo Tree Search algorithm", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/brsynth/RetroPath3", 15 | packages=find_packages(), 16 | python_requires=">=3.6", 17 | include_package_data=True, 18 | ) 19 | -------------------------------------------------------------------------------- /supplement_finder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Find supplements to complete a Tree. 3 | Read argparser for details of arguments. 4 | Principle is to identify compounds needed to complete chemical states. 5 | """ 6 | 7 | # General utilities 8 | import os 9 | import sys 10 | import time 11 | import signal 12 | import datetime 13 | import logging 14 | import argparse 15 | import pickle 16 | import json 17 | 18 | import random 19 | 20 | from Tree import Tree 21 | 22 | def unpickle(file_name, type = "tree", folder_address = "pickled_data"): 23 | with open('{}/{}_{}.pkl'.format(folder_address, type, file_name), 'rb') as input: 24 | return(pickle.load(input)) 25 | 26 | 27 | def run(tree, number_suggestions, rescued_states, folder_to_save, database = None): 28 | potential_supplements = {} 29 | # Extracting all potential supplements from the Tree. 30 | nodes_to_treat = [tree.root_node] 31 | while nodes_to_treat != []: 32 | node = nodes_to_treat[0] 33 | del nodes_to_treat[0] 34 | state = node.state 35 | supplement = state.GetSupplement_from_InChI_Keys() 36 | if not supplement is None: 37 | if supplement.InChIKey in potential_supplements.keys(): 38 | potential_supplements[supplement.InChIKey]["rescued_states"] = potential_supplements[supplement.InChIKey]["rescued_states"] + 1 39 | else: 40 | information_to_keep = {"structure": supplement.csmiles, 41 | "name_from_MCTS": supplement.name, 42 | "synonyms_names": supplement.synonyms_names, 43 | "rescued_states":1} 44 | potential_supplements[supplement.InChIKey] = information_to_keep 45 | if node.terminal: 46 | pass 47 | else: 48 | for child in node.children: 49 | nodes_to_treat.append(child) 50 | logging.info("Potential supplements without filtering: {}".format(len(potential_supplements.keys()))) 51 | # Sorting according to number of rescued states 52 | sorted_supplements = [suppl for suppl, value in sorted(potential_supplements.items(), key=lambda item: item[1]["rescued_states"], reverse=True) if value["rescued_states"] >= rescued_states] 53 | logging.info("Potential supplements after filtering with {} rescued states: {}".format(rescued_states, len(sorted_supplements))) 54 | 55 | # Filtering according to presence in a database of interest 56 | if database is None: 57 | supplements_of_interest = sorted_supplements 58 | logging.warning("Not checking availability within a Database of interest") 59 | else: 60 | supplements_of_interest = [] 61 | for element in sorted_supplements: 62 | if element in database.keys(): 63 | logging.info("Element {} (with {} pathways) is in database ({})".format(element, potential_supplements[element], database[element])) 64 | supplements_of_interest.append(element) 65 | # Filtering accoridng to maximal number of allwoed suggestions 66 | if len(supplements_of_interest) > number_suggestions: 67 | supplements_of_interest = supplements_of_interest[0:number_suggestions] 68 | logging.info("Keeping {} potential supplements".format(number_suggestions)) 69 | assert len(supplements_of_interest) == number_suggestions 70 | else: 71 | logging.info("Keeping all supplements as there are only {} ({} allowed)".format(len(supplements_of_interest), number_suggestions)) 72 | 73 | # Extracting pathways 74 | for supplement_to_extract in supplements_of_interest: 75 | # setting up search 76 | found_pathways = 0 77 | folder_to_save_pathways = "{}/{}".format(folder_to_save, supplement_to_extract.split("-")[0]) 78 | if not os.path.exists(folder_to_save_pathways): 79 | os.mkdir(folder_to_save_pathways) 80 | # searching 81 | tree.set_folder_to_save(folder_to_save_pathways) 82 | nodes_to_treat = [tree.root_node] 83 | while nodes_to_treat != []: 84 | node = nodes_to_treat[0] 85 | del nodes_to_treat[0] 86 | state = node.state 87 | supplement = state.GetSupplement_from_InChI_Keys() 88 | if not supplement is None: 89 | if supplement.InChIKey == supplement_to_extract: 90 | found_pathways = found_pathways + 1 91 | found_pathway = tree.extract_pathway_from_bottom(node, iteration=found_pathways) 92 | if node.terminal: 93 | pass 94 | else: 95 | for child in node.children: 96 | nodes_to_treat.append(child) 97 | logging.info("Extract {} pathways for {}".format(found_pathways, supplement_to_extract)) 98 | 99 | def __cli(): 100 | """ 101 | Command line interface. 102 | """ 103 | 104 | d = "Arguments for supplement finder. Find compounds that can complete a Tree and be suppelmented to media." 105 | parser = argparse.ArgumentParser(description=d) 106 | parser.add_argument("--tree_to_complete", help="Tree to find supplements to", default="end_search") 107 | parser.add_argument("--folder_tree_to_complete", help="Tree to find supplements to", default=None) 108 | 109 | parser.add_argument("--number_suggestions", default = 20, 110 | help = "Maximum number of suggestions returned") 111 | parser.add_argument("--rescued_states", default = 1, 112 | help = "Minimum number of times the compound must complete states") 113 | parser.add_argument("--folder_to_save", default="testing_supplement_finder") 114 | parser.add_argument("--terminal", help="Default logger is within the new folder_to_save, switch to terminal if specified", 115 | action='store_true', default=False) 116 | parser.add_argument("--database_address", default=None, 117 | help = "Address of a database to check availability. Json format required. Keys are inchikeys. Values are names, but could be cost or any metric of interest") 118 | 119 | args = parser.parse_args() 120 | folder_to_save = args.folder_to_save 121 | if not os.path.exists(folder_to_save): 122 | os.makedirs(folder_to_save, exist_ok=True) 123 | 124 | if args.terminal is True: 125 | logging.basicConfig( 126 | stream = sys.stderr, 127 | level=logging.INFO, 128 | datefmt='%d/%m/%Y %H:%M:%S', 129 | format='%(asctime)s -- %(levelname)s -- %(message)s' 130 | ) 131 | else: 132 | logging.basicConfig( 133 | stream = open("{}/{}.log".format(folder_to_save, "supplement_finder"), "w"), 134 | level=logging.INFO, 135 | datefmt='%d/%m/%Y %H:%M:%S', 136 | format='%(asctime)s -- %(levelname)s -- %(message)s' 137 | ) 138 | completed_tree = unpickle(file_name=args.tree_to_complete, 139 | type='tree', 140 | folder_address="{}/pickles".format(args.folder_tree_to_complete)) 141 | if args.database_address is None: 142 | database = None 143 | else: 144 | with open(args.database_address, "r") as json_file: 145 | database = json.load(json_file) 146 | 147 | run(completed_tree, number_suggestions = args.number_suggestions, 148 | rescued_states =args.rescued_states, folder_to_save = args.folder_to_save, 149 | database = database) 150 | 151 | 152 | if __name__ == "__main__": 153 | __cli() 154 | -------------------------------------------------------------------------------- /tests/data/state_BOPG_BSAB_GPRL.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/data/state_BOPG_BSAB_GPRL.pkl -------------------------------------------------------------------------------- /tests/data/tree_pipecolate_test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/data/tree_pipecolate_test.pkl -------------------------------------------------------------------------------- /tests/generated_jsons/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitkeep 3 | !.gitignore -------------------------------------------------------------------------------- /tests/generated_jsons/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/generated_jsons/.gitkeep -------------------------------------------------------------------------------- /tests/test_Filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import pytest 3 | 4 | from utilities.chemtools.Filters import Filters 5 | from rdkit.Chem import MolFromSmiles, MolToSmiles 6 | from rdkit.Chem import MolFromInchi, MolToInchi 7 | 8 | 9 | def test_init(): 10 | assert Filters() 11 | 12 | def test_copy_properties(): 13 | # TODO: add some tests here 14 | pass 15 | 16 | def test_keep_biggest(): 17 | mol = Filters.keep_biggest(MolFromSmiles('CCCC.CC')) 18 | assert MolToSmiles(mol) == 'CCCC' 19 | mol = Filters.keep_biggest(MolFromSmiles('CCCCC.CC.[H].CCC')) 20 | assert MolToSmiles(mol) == 'CCCCC' 21 | mol = Filters.keep_biggest(MolFromInchi('InChI=1S/C5H12N2O2.C4H7NO4/c6-3-1-2-4(7)5(8)9;5-2(4(8)9)1-3(6)7/h4H,1-3,6-7H2,(H,8,9);2H,1,5H2,(H,6,7)(H,8,9)/t4-;2-/m00/s1')) 22 | assert MolToInchi(mol) == 'InChI=1S/C4H7NO4/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,(H,6,7)(H,8,9)/t2-/m0/s1' 23 | mol = Filters.keep_biggest(MolFromInchi('InChI=1S/Mo.4O/q;;;2*-1')) 24 | assert MolToInchi(mol) == 'InChI=1S/Mo' 25 | 26 | def test_commute_inchi(): 27 | inchi = 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/p-1' 28 | mol = Filters.commute_inchi(MolFromInchi(inchi)) 29 | assert MolToInchi(mol) == inchi 30 | 31 | def test_remove_isotope(): 32 | mol = Filters.remove_isotope(MolFromSmiles('c1cc[14cH]cc1')) 33 | assert MolToSmiles(mol) == ('c1ccccc1') 34 | 35 | def test_neutralise_charge(): 36 | mol = Filters.neutralise_charge(MolFromSmiles('CC(C(=O)[O-])O')) 37 | assert MolToSmiles(mol) == ('CC(O)C(=O)O') 38 | 39 | def test_add_hydrogen(): 40 | mol = Filters.add_hydrogen(MolFromSmiles('CC(O)C(=O)O')) 41 | assert MolToSmiles(mol) == '[H]OC(=O)C([H])(O[H])C([H])([H])[H]' 42 | mol = Filters.add_hydrogen(MolFromSmiles('CC(C(=O)[O-])O')) 43 | assert MolToSmiles(mol) == '[H]OC([H])(C(=O)[O-])C([H])([H])[H]' 44 | 45 | def test_kekulize(): 46 | mol = Filters.kekulize(MolFromSmiles('c1ccccc1')) 47 | assert MolToSmiles(mol) == 'C1=CC=CC=C1' 48 | 49 | def test_remove_stereo(): 50 | mol = Filters.remove_stereo(MolFromSmiles('C[C@@H](C(=O)[O-])O')) 51 | assert MolToSmiles(mol) == 'CC(O)C(=O)[O-]' 52 | mol = Filters.remove_stereo(MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')) 53 | assert MolToSmiles(mol) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(O)=Nc2ccccc21' 54 | mol = Filters.commute_inchi(mol) # Expected to change tautomerism 55 | assert MolToSmiles(mol) == 'O=C1NC(C2=CNC3=C2C=C(O)C=C3)=CC1=C1C(=O)NC2=CC=CC=C21' 56 | -------------------------------------------------------------------------------- /tests/test_Standardizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import pytest 3 | 4 | import inspect 5 | from utilities.chemtools.Standardizer import Standardizer 6 | from utilities.chemtools.Sequences import sequence_tunable 7 | from rdkit.Chem import MolFromSmiles, MolToSmiles 8 | from rdkit.Chem import MolFromInchi, MolToInchi 9 | 10 | def test_init(): 11 | def sequence_dummy(mol): 12 | return mol 13 | assert Standardizer() 14 | assert Standardizer(sequence_fun=sequence_dummy) 15 | assert Standardizer(sequence_fun=sequence_dummy, params=dict()) 16 | 17 | def test_sequence_minimal(): 18 | # Violacein 19 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 20 | ans = Standardizer().compute(mol) 21 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+' 22 | assert MolToSmiles(ans) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=C/C1=C1\\C(O)=Nc2ccccc21' 23 | # L-Lactate 24 | mol = MolFromInchi('') 25 | 26 | def test_sequence_rr_legacy(): 27 | # Violacein 28 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 29 | ans = Standardizer(sequence_fun='sequence_rr_legacy').compute(mol) 30 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+' 31 | assert MolToSmiles(ans) == '[H]OC1=NC(C2=C([H])N([H])C3=C2C([H])=C(O[H])C([H])=C3[H])=C([H])/C1=C1\\C(O[H])=NC2=C([H])C([H])=C([H])C([H])=C21' 32 | 33 | def test_sequence_tunable(): 34 | # Check default arguments 35 | args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations = inspect.getfullargspec(sequence_tunable) 36 | default_params = dict(zip(args[-len(defaults):], defaults)) 37 | assert default_params == { 38 | 'OP_REMOVE_ISOTOPE':True, 39 | 'OP_NEUTRALISE_CHARGE': True, 40 | 'OP_REMOVE_STEREO': False, 41 | 'OP_COMMUTE_INCHI': False, 42 | 'OP_KEEP_BIGGEST': True, 43 | 'OP_ADD_HYDROGEN': True, 44 | 'OP_KEKULIZE': True, 45 | 'OP_NEUTRALISE_CHARGE_LATE': True 46 | } 47 | # Violacein, default parameter 48 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 49 | ans = Standardizer(sequence_fun='sequence_tunable').compute(mol) 50 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+' 51 | assert MolToSmiles(ans) == '[H]OC1=NC(C2=C([H])N([H])C3=C2C([H])=C(O[H])C([H])=C3[H])=C([H])/C1=C1\\C(O[H])=NC2=C([H])C([H])=C([H])C([H])=C21' 52 | # Violacein, strip stereo 53 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 54 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_REMOVE_STEREO': True}).compute(mol) 55 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)' 56 | assert MolToSmiles(ans) == '[H]OC1=C([H])C2=C(C([H])=C1[H])N([H])C([H])=C2C1=C([H])C(=C2C(=O)N([H])C3=C([H])C([H])=C([H])C([H])=C23)C(=O)N1[H]' 57 | # Violacien, implicit Hs 58 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 59 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_ADD_HYDROGEN': False}).compute(mol) 60 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+' 61 | assert MolToSmiles(ans) == 'OC1=CC2=C(C=C1)NC=C2C1=C/C(=C2/C3=CC=CC=C3N=C2O)C(O)=N1' 62 | # Violacien, no kekulerization 63 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 64 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_KEKULIZE': False}).compute(mol) 65 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+' 66 | assert MolToSmiles(ans) == '[H]OC1=NC(c2c([H])n([H])c3c([H])c([H])c(O[H])c([H])c23)=C([H])/C1=C1\\C(O[H])=Nc2c([H])c([H])c([H])c([H])c21' 67 | # Violacien, strip stereo & implicit Hs & no kekulerization 68 | mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') 69 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_REMOVE_STEREO': True, 'OP_ADD_HYDROGEN': False, 'OP_KEKULIZE': False}).compute(mol) 70 | assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)' 71 | assert MolToSmiles(ans) == 'O=C1NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(=O)Nc2ccccc21' 72 | # Lactate, default parameter 73 | mol = MolFromSmiles('C[C@@H](C(=O)[O-])O') 74 | ans = Standardizer(sequence_fun='sequence_tunable').compute(mol) 75 | assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/t2-/m0/s1' 76 | assert MolToSmiles(ans) == '[H]OC(=O)[C@@]([H])(O[H])C([H])([H])[H]' 77 | # L-lactate, implicit Hs 78 | mol = MolFromSmiles('C[C@@H](C(=O)[O-])O') 79 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_ADD_HYDROGEN': False}).compute(mol) 80 | assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/t2-/m0/s1' 81 | assert MolToSmiles(ans) == 'C[C@H](O)C(=O)O' 82 | # L-lactate, no stereo 83 | mol = MolFromSmiles('C[C@@H](C(=O)[O-])O') 84 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_REMOVE_STEREO': True}).compute(mol) 85 | assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)' 86 | assert MolToSmiles(ans) == '[H]OC(=O)C([H])(O[H])C([H])([H])[H]' 87 | # L-lactate, no charge neutralisation 88 | mol = MolFromSmiles('C[C@@H](C(=O)[O-])O') 89 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_NEUTRALISE_CHARGE': False, 'OP_NEUTRALISE_CHARGE_LATE': False}).compute(mol) 90 | assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/p-1/t2-/m0/s1' 91 | assert MolToSmiles(ans) == '[H]O[C@]([H])(C(=O)[O-])C([H])([H])[H]' 92 | # L-lactate, implicit Hs & no stereo 93 | mol = MolFromSmiles('C[C@@H](C(=O)[O-])O') 94 | ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_ADD_HYDROGEN': False, 'OP_REMOVE_STEREO': True}).compute(mol) 95 | assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)' 96 | assert MolToSmiles(ans) == 'CC(O)C(=O)O' 97 | -------------------------------------------------------------------------------- /tests/test_Tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | Aim: test compound features 3 | """ 4 | 5 | # General utility packages 6 | import random 7 | import pickle 8 | 9 | # RP3 specific objects 10 | from compound import Compound, unpickle 11 | from chemical_compounds_state import ChemicalCompoundState 12 | from representation import Test_representation, Test_to_file 13 | from organisms import detectable_cmpds_H, Test_organism_H 14 | from organisms import detectable_cmpds_noH 15 | from rewarding import Basic_Rollout_Reward 16 | from MCTS_node import MCTS_node 17 | from UCT_policies import Biochemical_UCT_1, Nature_UCT, Classical_UCT_RAVE, Classical_UCT_with_bias, Classical_UCT 18 | from rule_sets_examples import applicable_rules_mixed_dict, applicable_rules_10_dict 19 | from Tree import Tree 20 | from rule_sets_similarity import get_rules_and_score, full_rules_forward_H, full_rules_retro_H, full_rules_forward_no_H, full_rules_retro_no_H 21 | 22 | 23 | 24 | random.seed(42) 25 | 26 | 27 | class TestTree(object): 28 | def test_equality_statement_not_expanded(self): 29 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 30 | compound = Compound(csmile, name = "821") 31 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 32 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 33 | 34 | test_Tree = Tree(root_state = state, itermax = 100) 35 | test_Tree_bis = Tree(root_state = state_bis, itermax = 100) 36 | assert test_Tree == test_Tree_bis 37 | 38 | def test_equality_statement_expanded(self): 39 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 40 | compound = Compound(csmile, name = "821") 41 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 42 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 43 | 44 | test_Tree = Tree(root_state = state, itermax = 100) 45 | test_Tree_bis = Tree(root_state = state_bis, itermax = 100) 46 | test_Tree.run_search() 47 | test_Tree_bis.run_search() 48 | assert test_Tree == test_Tree_bis 49 | 50 | def test_equality_statement_expanded_differnet_iter(self): 51 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 52 | compound = Compound(csmile, name = "821") 53 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 54 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 55 | 56 | test_Tree = Tree(root_state = state, itermax = 100) 57 | test_Tree_bis = Tree(root_state = state_bis, itermax = 1000) 58 | test_Tree.run_search() 59 | test_Tree_bis.run_search() 60 | assert test_Tree != test_Tree_bis 61 | 62 | def test_equality_statement_expanded_false(self): 63 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 64 | compound = Compound(csmile, name = "821") 65 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 66 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 67 | 68 | test_Tree = Tree(root_state = state, itermax = 100) 69 | test_Tree_bis = Tree(root_state = state_bis, itermax = 100) 70 | test_Tree.run_search() 71 | assert test_Tree != test_Tree_bis 72 | 73 | def test_equality_statement_expanded_states(self): 74 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 75 | compound = Compound(csmile, name = "821") 76 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 77 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 78 | 79 | test_Tree = Tree(root_state = state, itermax = 100, available_rules = applicable_rules_mixed_dict) 80 | test_Tree_bis = Tree(root_state = state_bis, itermax = 500, available_rules = applicable_rules_mixed_dict) 81 | test_Tree.run_search() 82 | test_Tree_bis.run_search() 83 | different_trees = test_Tree != test_Tree_bis 84 | same_states = test_Tree.equality_visited_states(test_Tree_bis) 85 | assert different_trees and same_states 86 | 87 | def test_equality_statement_expanded_states_other_policies(self): 88 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 89 | compound = Compound(csmile, name = "821") 90 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 91 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 92 | 93 | test_Tree = Tree(root_state = state, itermax = 100) 94 | test_Tree_bis = Tree(root_state = state_bis, itermax = 1000, UCT_policy = "Nature_UCT") 95 | test_Tree.run_search() 96 | test_Tree_bis.run_search() 97 | different_trees = test_Tree != test_Tree_bis 98 | same_states = test_Tree.equality_visited_states(test_Tree_bis) 99 | assert different_trees and same_states 100 | 101 | def test_pickling_unpickling(self, tmpdir): 102 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 103 | compound = Compound(csmile, name = "821") 104 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 105 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 106 | 107 | test_Tree = Tree(root_state = state, itermax = 10000, parallel = False, 108 | Rollout_policy = "Rollout_policy_first", 109 | UCT_policy = "Biochemical_UCT_1") 110 | test_Tree.run_search() 111 | test_Tree.save("test", folder_address = tmpdir) 112 | loaded_tree = unpickle(file_name = 'test', type = 'tree', folder_address = tmpdir) 113 | assert test_Tree == loaded_tree 114 | 115 | def test_pickling_unpickling_differ(self, tmpdir): 116 | csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]" 117 | compound = Compound(csmile, name = "821") 118 | state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 119 | state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation) # state is not sanitised 120 | 121 | test_Tree = Tree(root_state = state, itermax = 10000, parallel = False, 122 | Rollout_policy = "Rollout_policy_first", 123 | UCT_policy = "Biochemical_UCT_1") 124 | test_Tree.run_search() 125 | test_Tree.save("test", folder_address = tmpdir) 126 | test_Tree.run_search() 127 | loaded_tree = unpickle(file_name = 'test', type = 'tree', folder_address = tmpdir) 128 | assert test_Tree != loaded_tree 129 | 130 | def test_biosensor(self): 131 | organism = detectable_cmpds_H 132 | inchi = "InChI=1S/C6H11NO2/c8-6(9)5-3-1-2-4-7-5/h5,7H,1-4H2,(H,8,9)" 133 | compound = Compound(InChI = inchi, name = "pipecolate") 134 | present_in_state_detectable = organism.compound_in_state(compound) 135 | if present_in_state_detectable: 136 | logging.warning("Removed compound from the detectable set to force enzymatic detection") 137 | organism.remove_cmpd_from_state(compound) 138 | rules, biological_scoring = get_rules_and_score(full_rules_forward_H = full_rules_forward_H, 139 | full_rules_retro_H = full_rules_retro_H, 140 | full_rules_forward_no_H = full_rules_forward_no_H, 141 | full_rules_retro_no_H = full_rules_retro_no_H, 142 | add_Hs = True, 143 | retro = False, 144 | diameters = [10, 12, 14, 16], 145 | small = False, 146 | c_name = None, 147 | filtering_EC = ["1.5.3.7", "1.5.3"]) 148 | state = ChemicalCompoundState([compound]) # state is not sanitised 149 | test_Tree = Tree(root_state = state, itermax = 1000, parallel = False, 150 | Rollout_policy = "Rollout_policy_first", 151 | UCT_policy = "Biochemical_UCT_1", available_rules = rules, organism = organism, 152 | biological_scorer = biological_scoring, 153 | folder_to_save = "tests/generated_jsons") 154 | test_Tree.run_search() 155 | loaded_tree = unpickle(file_name = 'pipecolate_test', type = 'tree', folder_address = "tests/data") 156 | same_states = test_Tree.equality_visited_states(loaded_tree) 157 | assert same_states 158 | -------------------------------------------------------------------------------- /tests/test_Utils.py: -------------------------------------------------------------------------------- 1 | import rdkit 2 | from rdkit import Chem 3 | from rdkit.Chem import AllChem 4 | import pytest 5 | 6 | 7 | from utilities.reactor.Utils import standardize_chemical, standardize_results, handle_results 8 | 9 | 10 | class TestBasic2(object): 11 | 12 | def test_standardize_chemical_1(self): 13 | rdmol = Chem.MolFromSmiles('[H][O][C](=[O])[C]([H])([O][H])[C]([H])([H])[H]') 14 | rdmol_std_1 = standardize_chemical(rdmol, add_hs=False) 15 | assert Chem.MolToSmiles(rdmol_std_1) == 'CC(O)C(=O)O' 16 | rdmol_std_2 = standardize_chemical(rdmol, add_hs=True) 17 | assert Chem.MolToSmiles(rdmol_std_2, allHsExplicit=True) == '[H][O][C](=[O])[C]([H])([O][H])[C]([H])([H])[H]' 18 | 19 | def test_standardize_chemical_2(self): 20 | # Data 21 | violacein_smiles = 'OC1=NC(=C\\C1=C1/C(O)=NC2=CC=CC=C12)C1=CNC2=C1C=C(O)C=C2' 22 | violacein_mol = Chem.MolFromSmiles(violacein_smiles, sanitize=False) 23 | # Test simplest case 24 | std_mol_1 = standardize_chemical(violacein_mol, add_hs=False, rm_stereo=False) 25 | assert Chem.MolToSmiles(std_mol_1) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=C/C1=C1\\C(O)=Nc2ccccc21' 26 | # Test adding Hs 27 | std_mol_2 = standardize_chemical(violacein_mol, add_hs=True, rm_stereo=False) 28 | assert Chem.MolToSmiles(std_mol_2) == '[H]OC1=NC(c2c([H])n([H])c3c([H])c([H])c(O[H])c([H])c23)=C([H])/C1=C1\\C(O[H])=Nc2c([H])c([H])c([H])c([H])c21' 29 | # Test removing stereo 30 | std_mol_3 = standardize_chemical(violacein_mol, add_hs=False, rm_stereo=True) 31 | assert Chem.MolToSmiles(std_mol_3) == 'O=C1NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(=O)Nc2ccccc21' 32 | # Test adding Hs + removing stereo 33 | std_mol_4 = standardize_chemical(violacein_mol, add_hs=True, rm_stereo=True) 34 | assert Chem.MolToSmiles(std_mol_4) == '[H]Oc1c([H])c([H])c2c(c1[H])c(C1=C([H])C(=C3C(=O)N([H])c4c([H])c([H])c([H])c([H])c43)C(=O)N1[H])c([H])n2[H]' 35 | 36 | def test_standardize_chemical_3(self): 37 | # Data 38 | wrong_smiles = '[H]OC(=O)C([H])([H])C([H])([H])C([H])(N=C(O[H])C([H])([H])C([H])([H])C([H])(N=C(O[H])C([H])(OP(=O)(O[H])OC([H])([H])C([H])(O[H])C([H])(O[H])C([H])(O[H])C([H])([H])n1c2nc(=O)nc(O[H])c-2c([H])c2c([H])c([H])c(OP(=O)(OC([H])([H])C(C([H])([H])[H])(C([H])([H])[H])C([H])(O[H])C(=NC([H])([H])C([H])([H])C(=NC([H])([H])C([H])([H])SC(=O)C([H])([H])C([H])([H])C([H])([H])C([H])(C(=C([H])[H])C([H])([H])[H])C([H])([H])C(=O)O[H])O[H])O[H])OP(=O)(O[H])OC([H])([H])C3([H])OC([H])(n4[c]([H])n([H])[c]5[c](N([H])[H])[n][c]([H])[n][c]54)C([H])(O[H])C3([H])OP(=O)(O[H])O[H])c([H])c21)C([H])([H])[H])C(=O)O[H])C(=O)O[H]' 39 | # Test 40 | wrong_mol = Chem.MolFromSmiles(wrong_smiles, sanitize=False) 41 | with pytest.raises(Exception): 42 | standardize_chemical(wrong_mol) 43 | 44 | def test_standardize_results_1(self): 45 | tuple_tuple_raw = (( 46 | Chem.MolFromSmiles('[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]'), 47 | Chem.MolFromSmiles('[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]') 48 | ),( 49 | Chem.MolFromInchi('InChI=1S/C5H6N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H,9H2,(H,7,8)(H2,6,10,11)') 50 | )) 51 | tuple_tuple_rdmol, tuple_index_failed = standardize_results(tuple_tuple_raw, add_hs=True, rm_stereo=True) 52 | assert len(tuple_tuple_rdmol) == 1 53 | assert tuple_index_failed == [1] 54 | 55 | def test_handle_result(self): 56 | tuple_raw = ( 57 | Chem.MolFromSmiles('[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]'), 58 | Chem.MolFromSmiles('[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]') 59 | ) 60 | tuple_tuple_rdmol, tuple_tuple_failed = standardize_results(tuple_tuple_rdmol=(tuple_raw,), add_hs=True, rm_stereo=True) 61 | inchikeys, inchis, smiles = handle_results(list_list_rdmol=tuple_tuple_rdmol) 62 | # Check number products 63 | assert len(inchikeys) == len(inchis) == len(smiles) == 1 # Only one set of result 64 | assert len(inchikeys[0]) == len(inchis[0]) == len(smiles[0]) == 2 # 2 products 65 | # Check Inchikeys 66 | assert inchikeys[0][0] == 'CSZRNWHGZPKNKY-UHFFFAOYSA-N' 67 | assert inchikeys[0][1] == 'QGWNDRXFNXRZMB-UHFFFAOYSA-N' 68 | # Check Inchis 69 | assert inchis[0][0] == 'InChI=1S/C3H7O6P/c1-2(3(4)5)9-10(6,7)8/h2H,1H3,(H,4,5)(H2,6,7,8)' 70 | assert inchis[0][1] == 'InChI=1S/C10H15N5O11P2/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(25-9)1-24-28(22,23)26-27(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H2,19,20,21)(H3,11,13,14,18)' 71 | # Check SMILES #1 72 | assert smiles[0][0] == '[H]OC(=O)C([H])(OP(=O)(O[H])O[H])C([H])([H])[H]' 73 | rdmol = Chem.MolFromSmiles(smiles[0][0]) 74 | rdmol = Chem.AddHs(rdmol) 75 | assert Chem.MolToSmiles(rdmol, allHsExplicit=True) == '[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]' 76 | # Check SMILES #2 77 | assert smiles[0][1] == '[H]N=c1nc(O[H])c2nc([H])n(C3([H])OC([H])(C([H])([H])OP(=O)(O[H])OP(=O)(O[H])O[H])C([H])(O[H])C3([H])O[H])c2n1[H]' 78 | rdmol = Chem.MolFromSmiles(smiles[0][1]) 79 | rdmol = Chem.AddHs(rdmol) 80 | assert Chem.MolToSmiles(rdmol, allHsExplicit=True) == '[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]' 81 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test FireBurner class 3 | """ 4 | 5 | import rdkit 6 | from rdkit import Chem 7 | import pytest 8 | import multiprocessing 9 | 10 | 11 | from utilities.reactor.cli import RuleBurner, RuleConversionError, ChemConversionError 12 | 13 | 14 | # Data for tests 15 | substate_inchi = 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)' 16 | reaction_smarts = '([#8&v2:1](-[#6&v4:2](-[#6&v4:3](-[#8&v2:4]-[#1&v1:5])=[#8&v2:6])(-[#6&v4:7](-[#1&v1:8])(-[#1&v1:9])-[#1&v1:10])-[#1&v1:11])-[#1&v1:12])>>([#15&v5](=[#8&v2])(-[#8&v2]-[#1&v1])(-[#8&v2]-[#1&v1])-[#8&v2:1]-[#6&v4:2](-[#6&v4:3](-[#8&v2:4]-[#1&v1:5])=[#8&v2:6])(-[#6&v4:7](-[#1&v1:8])(-[#1&v1:9])-[#1&v1:10])-[#1&v1:11].[#7&v3](=[#6&v4]1:[#7&v3]:[#6&v4](-[#8&v2]-[#1&v1]):[#6&v4]2:[#7&v3]:[#6&v4](-[#1&v1]):[#7&v3](-[#6&v4]3(-[#1&v1])-[#8&v2]-[#6&v4](-[#6&v4](-[#8&v2]-[#15&v5](=[#8&v2])(-[#8&v2]-[#1&v1])-[#8&v2]-[#15&v5](-[#8&v2]-[#1&v1:12])(=[#8&v2])-[#8&v2]-[#1&v1])(-[#1&v1])-[#1&v1])(-[#1&v1])-[#6&v4](-[#8&v2]-[#1&v1])(-[#1&v1])-[#6&v4]-3(-[#8&v2]-[#1&v1])-[#1&v1]):[#6&v4]:2:[#7&v3]:1-[#1&v1])-[#1&v1])' 17 | tuple_product_inchikeys = ('CSZRNWHGZPKNKY-UHFFFAOYSA-N', 'QGWNDRXFNXRZMB-UHFFFAOYSA-N') 18 | tuple_product_smiles = ('[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]', '[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]') 19 | tuple_product_inchis = ('InChI=1S/C3H7O6P/c1-2(3(4)5)9-10(6,7)8/h2H,1H3,(H,4,5)(H2,6,7,8)', 'InChI=1S/C10H15N5O11P2/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(25-9)1-24-28(22,23)26-27(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H2,19,20,21)(H3,11,13,14,18)') 20 | 21 | 22 | def dummy_worker(**kwargs): 23 | import time 24 | time.sleep(1) 25 | 26 | 27 | def test_init(): 28 | # Empty is OK 29 | rb = RuleBurner(rsmarts_list=[], inchi_list=[]) # Empty is OK 30 | rb.compute() 31 | 32 | 33 | def test_run_with_timeout(): 34 | rb = RuleBurner(rsmarts_list=[], inchi_list=[]) 35 | with pytest.raises(multiprocessing.context.TimeoutError): 36 | rb._run_with_timeout(dummy_worker, None, timeout=0) 37 | rb._run_with_timeout(dummy_worker, None, timeout=2) 38 | 39 | 40 | def test_jsonify(): 41 | rb = RuleBurner(rsmarts_list=[], inchi_list=[]) 42 | assert rb._jsonify(rsmarts='', inchi='', rid='RID', cid='CID').replace('\n', '') == """{ "rule_id": "RID", "substrate_id": "CID", "fire_timed_out": null, "fire_exec_time": null}""" 43 | 44 | 45 | def test_compute(): 46 | # Wrong reaction depiction 47 | rb = RuleBurner(rsmarts_list=['DUMMY'], inchi_list=[]) 48 | with pytest.raises(RuleConversionError): 49 | rb.compute() 50 | # Wrong chemical depiction 51 | rb = RuleBurner(rsmarts_list=[reaction_smarts], inchi_list=['DUMMY']) 52 | with pytest.raises(ChemConversionError): 53 | rb.compute() 54 | # Timeout should be logged 55 | rb = RuleBurner(rsmarts_list=[reaction_smarts], inchi_list=[substate_inchi], fire_timeout=0) 56 | rb.compute() 57 | assert ''.join(rb._json).find('"fire_timed_out": true') 58 | # OK 59 | rb = RuleBurner(rsmarts_list=[reaction_smarts], inchi_list=[substate_inchi]) 60 | rb.compute() 61 | assert ''.join(rb._json).find('InChI=1S/C3H7O6P/c1-2(3(4)5)9-10(6,7)8/h2H,1H3,(H,4,5)(H2,6,7,8)') 62 | assert ''.join(rb._json).find('InChI=1S/C10H15N5O11P2/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(25-9)1-24-28(22,23)26-27(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H2,19,20,21)(H3,11,13,14,18)') 63 | -------------------------------------------------------------------------------- /tests/test_moves.py: -------------------------------------------------------------------------------- 1 | """ 2 | Aim: test compound features 3 | """ 4 | 5 | # RP3 objects 6 | from compound import Compound 7 | from move import Move 8 | 9 | class TestMove(object): 10 | """ 11 | Testing moves - should be fast 12 | """ 13 | def test_cloning(self): 14 | move = Move(rsmart = "rsmart", 15 | rid = "rid", 16 | compound_id= "compound_id") 17 | cloned_move = move.clone() 18 | different_python_object = (id(move) != id(cloned_move)) 19 | identical_move_object = move.eq_full_inchi_key(cloned_move) 20 | assert (different_python_object and identical_move_object) 21 | 22 | def test_equality_true(self): 23 | compound_1 = Compound("[H+]") 24 | compound_6 = Compound("[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]") 25 | compound_2345 = Compound("[H][C](=[O])[C]([H])=[C]([H])[H]") 26 | move = Move(rsmart = "rsmart", 27 | rid = "rid", 28 | compound_id= "compound_id", 29 | product_list = [compound_1, compound_6], 30 | set_number = 5) 31 | move_bis = Move(rsmart = "rsmart", 32 | rid = "rid", 33 | compound_id= "compound_id", 34 | product_list = [compound_6, compound_1]) 35 | 36 | assert move.eq_full_inchi_key(move_bis) 37 | 38 | def test_equality_false(self): 39 | compound_1 = Compound("[H+]") 40 | compound_6 = Compound("[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]") 41 | compound_2345 = Compound("[H][C](=[O])[C]([H])=[C]([H])[H]") 42 | move = Move(rsmart = "rsmart", 43 | rid = "rid", 44 | compound_id= "compound_id", 45 | product_list = [compound_1, compound_6]) 46 | move_bis = Move(rsmart = "rsmart", 47 | rid = "rid", 48 | compound_id= "compound_id", 49 | product_list = [compound_6, compound_1, compound_2345]) 50 | move_ter = Move(rsmart = "rsmart", 51 | rid = "rid", 52 | compound_id= "compound_id_2", 53 | product_list = [compound_6, compound_1]) 54 | 55 | assert move != move_bis and move != move_ter and move_bis != move_ter 56 | 57 | def test_rave_update(self): 58 | compound_1 = Compound("[H+]") 59 | compound_6 = Compound("[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]") 60 | move = Move(rsmart = "rsmart", 61 | rid = "rid", 62 | compound_id= "compound_id", 63 | product_list = [compound_1, compound_6]) 64 | 65 | move.update(5, visit_number = 10) 66 | move.update(0.2, 10) 67 | assert move.RAVE_total_score == 52 68 | assert move.RAVE_visits == 20 69 | 70 | # def more_compelx_tests_wthi_compouns 71 | -------------------------------------------------------------------------------- /tests/tree_test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/tree_test.pkl -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 -------------------------------------------------------------------------------- /tree_viewer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the tree objects for visualisation and export 3 | """ 4 | 5 | # General utility packages 6 | import logging 7 | import csv 8 | import copy 9 | import json 10 | import sys 11 | 12 | # RP3 specific objects 13 | from compound import Compound 14 | from move import Move 15 | from chemical_compounds_state import ChemicalCompoundState 16 | from MCTS_node import MCTS_node 17 | # General configuration 18 | from config import * 19 | 20 | class Tree_viewer(object): 21 | """ 22 | Tree_viewer object. 23 | Has methods for quick visualisation as well as export to json 24 | """ 25 | logger = logging.getLogger(__name__) 26 | 27 | def __init__(self, 28 | file_to_save = "temporary_tree_viewer_json"): 29 | """ 30 | Initialising a tree viewer object. 31 | A Node has: 32 | - level 33 | - scores (total and average) 34 | - visits 35 | - terminal 36 | - root 37 | - a chemical state 38 | - the id will be the chemical state and a number to id it 39 | - whether it has a solved child 40 | A Move: 41 | - Biological score 42 | - Chemical score 43 | - EC numbers 44 | - compound ID it applyes to 45 | - smarts 46 | - name 47 | An edge links both 48 | """ 49 | # Where to save the json 50 | self.file_to_save = file_to_save 51 | # For tree viewer json 52 | self.nodes_nodes = [] 53 | self.nodes_transformations = [] 54 | self.edges = [] 55 | 56 | def set_file_to_save(self, file_to_save): 57 | self.file_to_save = file_to_save 58 | 59 | def add_node(self, node): 60 | """ 61 | Adding a node object to the tree. 62 | """ 63 | if node.terminal: 64 | terminal = 1 65 | else: 66 | terminal = 0 67 | if node.move is None: 68 | root = 1 69 | else: 70 | root = 0 71 | node_dict = { 72 | 'type': 'node', 73 | 'id': "node_{}".format(node.id), 74 | 'level': node.level, 75 | 'root': root, 76 | 'terminal': terminal, 77 | 'Names': str(node.state), # If I want synonyms, keep them 78 | 'average_score': node.average_score, 79 | 'total_score': node.total_score, 80 | 'visits': node.visits, 81 | 'solved_child': node.has_a_solved_child 82 | } 83 | self.nodes_nodes.append({"data": node_dict}) 84 | 85 | if not node.move is None: 86 | move_to_child = { 87 | "target" : "move_{}".format(node.move.id), 88 | "source" : "node_{}".format(node.id), 89 | "id" : "{}_=>_{}".format("move_{}".format(node.move.id), "node_{}".format(node.id)) 90 | } 91 | self.edges.append({"data": move_to_child}) 92 | if use_transpositions: 93 | parent_nodes = transposition_table[node.parent.hash] 94 | for parent in parent_nodes: 95 | parent_to_move = { 96 | "target" : "node_{}".format(parent.id), 97 | "source" : "move_{}".format(node.move.id), 98 | "id" : "{}_=>_{}".format("node_{}".format(parent.id), "move_{}".format(node.move.id)) 99 | } 100 | self.edges.append({"data": parent_to_move}) 101 | else: 102 | parent_to_move = { 103 | "target" : "node_{}".format(node.parent.id), 104 | "source" : "move_{}".format(node.move.id), 105 | "id" : "{}_=>_{}".format("node_{}".format(node.parent.id), "move_{}".format(node.move.id)) 106 | } 107 | self.edges.append({"data": parent_to_move}) 108 | biological_score = node.move.biological_score 109 | try: 110 | diameter = int(node.move.rid.split("-")[3]) 111 | except: 112 | diameter = 42 113 | move_dict = { 114 | 'type': 'move', 115 | 'id': "move_{}".format(node.move.id), 116 | "Rule ID": node.move.synonyms, 117 | "EC number": node.move.EC_numbers, 118 | "Reaction SMILES": node.move.rsmiles, 119 | "Diameter": diameter, 120 | "Score": biological_score, 121 | "ChemicalScore": node.move.chemical_score, 122 | "Name": node.move.name 123 | } 124 | self.nodes_transformations.append({"data": move_dict}) 125 | 126 | def jsonify_tree_viewer(self): 127 | """ 128 | Use scope viewer to visualise pathways before the DBTL advances more. 129 | THe json file is a dict composed of one item called elements. 130 | The elements values is a dict composed of "nodes" and "edges" 131 | Nodes is a list of compounds, or reactions, with: 132 | """ 133 | pathway_as_dict = {"elements": {"nodes": self.nodes_nodes + self.nodes_transformations, 134 | "edges": self.edges}} 135 | with open(self.file_to_save, "w") as json_handler: 136 | json.dump(pathway_as_dict, json_handler, indent = 2) 137 | 138 | 139 | def __cli(): 140 | """Command line interface. Was actually used to make quick 141 | tests before implementing them in the testing file""" 142 | print("CLI is not available for this module - tree viewing is automatically generated by Tree module") 143 | 144 | 145 | if __name__ == "__main__": 146 | __cli() 147 | -------------------------------------------------------------------------------- /utilities/chemtools/Filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Set of filters to be used for chemical standardisation 4 | 5 | @author: Baudoin Delepine, 2016-2017 6 | @author: Thomas Duigou, 2018-2019 7 | """ 8 | 9 | from copy import deepcopy 10 | from rdkit.Chem import AddHs, GetMolFrags, Kekulize, MolToInchi, MolFromInchi, MolFromSmarts, MolFromSmiles, RemoveStereochemistry, MolToSmiles, RemoveHs 11 | from rdkit.Chem.AllChem import Compute2DCoords, ReplaceSubstructs 12 | from rdkit.Chem.Descriptors import MolWt 13 | 14 | 15 | class Filters(object): 16 | """Set of filters to be used for chemical standardization. 17 | """ 18 | 19 | @classmethod 20 | def _copy_properties(cls, mol_from, mol_to): 21 | """Copy properties from a RDKit compound to another one. 22 | 23 | :param mol_from: RDKit Mol source object 24 | :param mol_to: RDKit Mol target object 25 | 26 | Warning: aside from chemical's name, all private properties are lost. 27 | """ 28 | # NB: name is stored in its default location which is "_Name" and 29 | # is a private propertie. 30 | property_list = mol_from.GetPropNames(includePrivate=False) 31 | if mol_from.HasProp('_Name'): # TD: If _Name is set always save name 32 | property_list.append("_Name") 33 | for property_name in property_list: 34 | mol_to.SetProp(property_name, mol_from.GetProp(property_name)) 35 | 36 | @classmethod 37 | def keep_biggest(cls, mol_in): 38 | """Strip small fragments from compound. 39 | 40 | Returns a new compound where only the "biggest" fragment is conserved 41 | according to (i) the number of non-Hs atoms and if there is tie then 42 | according to (ii) the molecular weight. 43 | 44 | :param mol_in: RDKit Mol 45 | :return mol_out: new RDKit Mol having only one connected component 46 | """ 47 | def count_non_hs_atom(mol): 48 | ans = 0 49 | for atm in mol.GetAtoms(): 50 | if atm.GetAtomicNum() != 1: 51 | ans += 1 52 | return ans 53 | # Remove "other" molecules 54 | molfrag = GetMolFrags(mol_in, asMols=True, sanitizeFrags=False) 55 | mol_out = mol_in 56 | if len(molfrag) > 1: 57 | accepted_nbr_atm = 0 # flag number of atoms in fragment 58 | accepted_mw = 0 # flag the molecular weight of the biggest fragment 59 | for f in molfrag: 60 | nbr_atm = count_non_hs_atom(f) 61 | if nbr_atm > accepted_nbr_atm or (nbr_atm == accepted_nbr_atm and MolWt(f) > accepted_mass): 62 | accepted_nbr_atm = nbr_atm 63 | accepted_mass = MolWt(f) 64 | mol_out = f # keep only the biggest fragment 65 | cls._copy_properties(mol_in, mol_out) # save the name and stuff 66 | return mol_out 67 | 68 | @classmethod 69 | def commute_inchi(cls, mol_in): 70 | """Convert RDKit compound back and forth to InChi. 71 | 72 | Returns a new compound after the initial one has been converted 73 | back and forth to InChi. 74 | 75 | :param mol_in: RDKit Mol 76 | :return mol_out: RDKit Mol 77 | """ 78 | inchi = MolToInchi(mol_in, logLevel=None) # this is talkative... 79 | mol_out = MolFromInchi(inchi, sanitize=False, removeHs=False, 80 | logLevel=None, treatWarningAsError=False) 81 | if not mol_out: 82 | raise ValueError("Failed InChi validity filter.") 83 | # Copy the properties 84 | cls._copy_properties(mol_in, mol_out) 85 | return mol_out 86 | 87 | @classmethod 88 | def remove_isotope(cls, mol_in): 89 | """Strip all isotope information. 90 | 91 | Returns a new compound. 92 | 93 | :param mol_in: RDKit Mol 94 | :return mol_out: RDKit Mol 95 | """ 96 | mol_out = deepcopy(mol_in) # copy it, just for consistency with other filters 97 | for atm in mol_out.GetAtoms(): 98 | atm.SetIsotope(0) 99 | if not mol_out: 100 | raise ValueError("Failed isotope removing filter.") 101 | return mol_out 102 | 103 | @staticmethod 104 | def _rules_rdkit(): 105 | patts = ( 106 | ('[n+;H]', 'n'), # Imidazoles 107 | ('[N+;!H0]', 'N'), # Amines 108 | ('[$([O-]);!$([O-][#7])]', 'O'), # Carboxylic acids and alcohols 109 | ('[S-;X1]', 'S'), # Thiols 110 | ('[$([N-;X2]S(=O)=O)]', 'N'), # Sulfonamides 111 | ('[$([N-;X2][C,N]=C)]', 'N'), # Enamines 112 | ('[n-]', '[nH]'), # Tetrazoles 113 | ('[$([S-]=O)]', 'S'), # Sulfoxides 114 | ('[$([N-]C=O)]', 'N'), # Amides 115 | ) 116 | return [(MolFromSmarts(x), MolFromSmiles(y, False)) for x, y in patts] 117 | 118 | @staticmethod 119 | def _rules_molvs(): 120 | """Rules to neutralize compounds. Inspired by molvs.""" 121 | ans = {} 122 | # Neutralizable positive charge (with hydrogens attached) 123 | # ans["pos_h"] = Chem.MolFromSmarts('[+!H0!$(*~[-])]') 124 | ans["pos_h"] = MolFromSmarts('[+!H0]') 125 | # Non-neutralizable positive charge (no hydrogens attached) 126 | # ans["pos_quat"] = Chem.MolFromSmarts('[+H0!$(*~[-])]') 127 | # Negative charge, not bonded to a positive charge with no hydrogens 128 | # ans["neg"] = Chem.MolFromSmarts('[-!$(*~[+H0])]') 129 | ans["neg"] = MolFromSmarts('[-]') 130 | # Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen? 131 | # ans["neg_acid"] = Chem.MolFromSmarts('[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]') 132 | return ans 133 | 134 | @classmethod 135 | def _neutralise_charge_method1(cls, mol_in, rules=None): 136 | """Neutralise charges according to a set of predefined rules. 137 | 138 | From: 139 | http://www.rdkit.org/docs/Cookbook.html#neutralizing-charged-molecules 140 | """ 141 | # Fallback to default rules if none are provided 142 | if rules is None: 143 | fun_rules = cls._rules_rdkit 144 | 145 | # Check if rules are already initialised as an attribute 146 | if not hasattr(rules, "rules"): 147 | fun_rules.rules = fun_rules() 148 | 149 | # Apply rules 150 | # Better to use ReplaceSubstructs than RunReactant: the latter would give 151 | # several products (or we would need to use HasSubstructMatch anyway). 152 | for reactant, product in fun_rules.rules: 153 | while mol_in.HasSubstructMatch(reactant): 154 | rms = ReplaceSubstructs(mol_in, reactant, product) 155 | mol_in = rms[0] 156 | mol_in.UpdatePropertyCache() 157 | return mol_in 158 | 159 | @classmethod 160 | def _neutralise_charge_method2(cls, mol_in): 161 | """Neutralise charges as much as possible playing on hydrogens. 162 | 163 | You should sanitize the compounds after this operation. 164 | 165 | From: 166 | http://molvs.readthedocs.io/en/latest/_modules/molvs/charge.html 167 | """ 168 | mol_out = deepcopy(mol_in) # copy it, just for consistency with other operations 169 | mol_out.UpdatePropertyCache(strict=False) # recompute implicit valence 170 | # Check if rules are already initialised as an attribute 171 | if not hasattr(cls._rules_molvs, "rules"): 172 | cls._rules_molvs.rules = cls._rules_molvs() 173 | # Get atom ids for matches 174 | p = [x[0] for x in mol_out.GetSubstructMatches(cls._rules_molvs.rules['pos_h'])] 175 | # q = [x[0] for x in cc.GetSubstructMatches(cls._rules_molvs.rules['pos_quat'])] 176 | n = [x[0] for x in mol_out.GetSubstructMatches(cls._rules_molvs.rules['neg'])] 177 | # a = [x[0] for x in cc.GetSubstructMatches(cls._rules_molvs.rules['neg_acid'])] 178 | # Neutralize negative charges 179 | # if q: 180 | # # Surplus negative charges more than non-neutralizable positive charges 181 | # neg_surplus = len(n) - len(q) 182 | # if a and neg_surplus > 0: 183 | # # zwitterion with more negative charges than quaternary positive centres 184 | # while neg_surplus > 0 and a: 185 | # # Add hydrogen to first negative acid atom, increase formal charge 186 | # # Until quaternary positive == negative total or no more negative acid 187 | # atom = cc.GetAtomWithIdx(a.pop(0)) 188 | # atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1) 189 | # atom.SetFormalCharge(atom.GetFormalCharge() + 1) 190 | # neg_surplus -= 1 191 | # Finish of neutralization of negative charges (we don't care for zwitterion) 192 | for atom in [mol_out.GetAtomWithIdx(x) for x in n]: 193 | while atom.GetFormalCharge() < 0: 194 | atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1) 195 | atom.SetFormalCharge(atom.GetFormalCharge() + 1) 196 | # Neutralize positive charges 197 | for atom in [mol_out.GetAtomWithIdx(x) for x in p]: 198 | # Remove hydrogen and reduce formal charge until neutral or no more hydrogens 199 | while atom.GetFormalCharge() > 0 and atom.GetTotalNumHs() > 0: 200 | atom.SetFormalCharge(atom.GetFormalCharge() - 1) 201 | if atom.GetNumExplicitHs() > 0: 202 | atom.SetNumExplicitHs(atom.GetNumExplicitHs() - 1) 203 | return mol_out 204 | 205 | @classmethod 206 | def neutralise_charge(cls, mol_in): 207 | """Neutralise charges. 208 | 209 | :param mol_in: RDKit Mol 210 | :return mol_out: RDKit Mol 211 | """ 212 | return cls._neutralise_charge_method1(mol_in) 213 | # return cls._neutralise_charge_method2(mol_in) 214 | 215 | @classmethod 216 | def add_hydrogen(cls, mol_in, addCoords=True): 217 | """Explicit all hydrogens. 218 | 219 | :param mol_in: RDKit Mol 220 | :param addCoords: Add coordinate to added Hs, bool 221 | :return mol_out: RDKit Mol 222 | """ 223 | return AddHs(mol_in, explicitOnly=False, addCoords=addCoords) 224 | 225 | @classmethod 226 | def remove_hydrogen(cls, mol_in, addCoords=True): 227 | """Implicit all hydrogens. 228 | 229 | :param mol_in: RDKit Mol 230 | :param addCoords: Add coordinate to added Hs, bool 231 | :return mol_out: RDKit Mol 232 | """ 233 | return RemoveHs(mol_in, explicitOnly=False, addCoords=addCoords) 234 | 235 | @classmethod 236 | def kekulize(cls, mol_in): 237 | """Kekulize compound. 238 | 239 | :param mol_in: RDKit Mol 240 | :return mol_out: RDKit Mol 241 | """ 242 | mol_out = deepcopy(mol_in) 243 | Kekulize(mol_out, clearAromaticFlags=True) 244 | return mol_out 245 | 246 | @classmethod 247 | def remove_stereo(cls, mol_in): 248 | """Wild stereo removal. 249 | 250 | Warning: need a back and forth Inchi export/import to normalise tautomer 251 | 252 | :param mol_in: RDKit mol 253 | :return mol_out: RDKit mol 254 | """ 255 | mol_out = deepcopy(mol_in) 256 | RemoveStereochemistry(mol_out) 257 | return mol_out 258 | -------------------------------------------------------------------------------- /utilities/chemtools/Sequences.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Sequences of filters to be used for standardization.""" 3 | 4 | 5 | from utilities.chemtools.Filters import Filters 6 | from rdkit.Chem import Cleanup, SanitizeMol, SanitizeFlags 7 | from rdkit.Chem.AllChem import AssignStereochemistry 8 | 9 | 10 | def sequence_rr_legacy(mol): 11 | """Sequence of filters applied for the first version of RetroRules 12 | """ 13 | F = Filters() 14 | Cleanup(mol) 15 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 16 | AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Fix bug TD201904.01 17 | mol = F.remove_isotope(mol) 18 | mol = F.neutralise_charge(mol) 19 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 20 | mol = F.keep_biggest(mol) 21 | mol = F.add_hydrogen(mol, addCoords=True) 22 | mol = F.kekulize(mol) 23 | return mol 24 | 25 | 26 | def sequence_tunable( 27 | mol, 28 | OP_REMOVE_ISOTOPE=True, OP_NEUTRALISE_CHARGE=True, 29 | OP_REMOVE_STEREO=False, OP_COMMUTE_INCHI=False, 30 | OP_KEEP_BIGGEST=True, OP_ADD_HYDROGEN=True, 31 | OP_KEKULIZE=True, OP_NEUTRALISE_CHARGE_LATE=True 32 | ): 33 | """Tunable sequence of filters for standardization. 34 | 35 | Operations will made in the following order: 36 | 1 RDKit Cleanup -- always 37 | 2 RDKIT SanitizeMol -- always 38 | 3 Remove isotope -- optional (default: True) 39 | 4 Neutralise charges -- optional (default: True) 40 | 5 RDKit SanitizeMol -- if 4 or 5 41 | 6 Remove stereo -- optional (default: False) 42 | 7 Commute Inchi -- if 6 or optional (default: False) 43 | 8 Keep biggest -- optional (default: True) 44 | 9 RDKit SanitizeMol -- if any (6, 7, 8) 45 | 10 Add hydrogens -- optional (default: True) 46 | 11 Kekulize -- optional (default: True) 47 | """ 48 | F = Filters() 49 | # Always perform the basics.. 50 | Cleanup(mol) 51 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 52 | AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Fix bug TD201904.01 53 | # 54 | if OP_REMOVE_ISOTOPE: 55 | mol = F.remove_isotope(mol) 56 | if OP_NEUTRALISE_CHARGE: 57 | mol = F.neutralise_charge(mol) 58 | if any([OP_REMOVE_ISOTOPE, OP_REMOVE_ISOTOPE]): 59 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 60 | # 61 | if OP_REMOVE_STEREO: 62 | mol = F.remove_stereo(mol) 63 | OP_COMMUTE_INCHI = True 64 | if OP_COMMUTE_INCHI: 65 | mol = F.commute_inchi(mol) 66 | if OP_KEEP_BIGGEST: 67 | mol = F.keep_biggest(mol) 68 | if any([OP_REMOVE_STEREO, OP_COMMUTE_INCHI, OP_KEEP_BIGGEST]): 69 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 70 | # 71 | if OP_NEUTRALISE_CHARGE_LATE: 72 | mol = F.neutralise_charge(mol) 73 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 74 | # 75 | if OP_ADD_HYDROGEN: 76 | mol = F.add_hydrogen(mol, addCoords=True) 77 | if OP_KEKULIZE: 78 | mol = F.kekulize(mol) 79 | # 80 | return mol 81 | -------------------------------------------------------------------------------- /utilities/chemtools/Standardizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Standardize chemicals 4 | 5 | This is basically a rework of the standardizer.py written by Baudoin Delepine 6 | at INRA. 7 | 8 | @author: Baudoin Delepine, 2016-2017 9 | @author: Thomas Duigou, 2018-2019 10 | """ 11 | 12 | from utilities.chemtools import Sequences 13 | from utilities.chemtools.Filters import Filters 14 | from rdkit.Chem import SanitizeMol, SanitizeFlags 15 | from rdkit.Chem.AllChem import AssignStereochemistry 16 | 17 | class Standardizer(object): 18 | """Handle standardization of compound(s) through user-defined "filters". 19 | """ 20 | 21 | def __call__(self, mol): 22 | """Calling the Standardizer class like a function is the same 23 | as calling its "compute" method. 24 | 25 | Form: 26 | https://github.com/mcs07/MolVS/blob/master/molvs/standardize.py 27 | """ 28 | return self.compute(mol) 29 | 30 | def __init__(self, sequence_fun=None, params=None): 31 | """Set up parameters for the standardization 32 | 33 | :param rdmol: an RDKit Mol object 34 | """ 35 | # Function to be used for standardizing compounds 36 | # Add you own function as method class 37 | if sequence_fun is None: 38 | self.sequence_fun = self.sequence_minimal 39 | elif callable(sequence_fun): # Guess: fun_filters is the function itself 40 | self.sequence_fun = sequence_fun 41 | elif type(sequence_fun) == str: 42 | self.sequence_fun = getattr(Sequences, sequence_fun) # Guess: sequence_fun is the name of the function 43 | # Arguments to be passed to any custom standardization function 44 | self._params = params if params else None 45 | 46 | def sequence_minimal(self, mol): 47 | """Minimal standardization.""" 48 | SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) 49 | AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Fix bug TD201904.01 50 | return mol 51 | 52 | def compute(self, mol): 53 | """Do the job.""" 54 | if self._params is None: 55 | return self.sequence_fun(mol) 56 | else: 57 | return self.sequence_fun(mol, **self._params) 58 | -------------------------------------------------------------------------------- /utilities/chemtools/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Starting a new toolbox to handle chemical compounds 4 | """ 5 | 6 | from rdkit.Chem import MolFromSmiles, MolFromInchi, MolToSmiles, MolToInchi, MolToInchiKey, AddHs 7 | 8 | 9 | def convert_depiction(idepic, itype='smiles', otype={'inchikey'}): 10 | """Convert chemical depiction to others type of depictions 11 | 12 | :param idepic: string depiction to be converted, str 13 | :param itype: type of depiction provided as input, str 14 | :param otype: types of depiction to be generated, {"", "", ..} 15 | :return odepic: generated depictions, {"otype1": "odepic1", ..} 16 | 17 | Usage example: 18 | - convert_depiction(idepic='CCO', otype={'inchi', 'smiles', 'inchikey'}) 19 | - convert_depiction(idepic='InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3', itype='inchi', otype={'inchi', 'smiles', 'inchikey'}) 20 | """ 21 | # Import (if needed) 22 | if itype == 'smiles': 23 | rdmol = MolFromSmiles(idepic, sanitize=True) 24 | elif itype == 'inchi': 25 | rdmol = MolFromInchi(idepic, sanitize=True) 26 | else: 27 | raise NotImplementedError('"{}" is not a valid input type'.format(itype)) 28 | if rdmol is None: # Check imprt 29 | raise Exception('Import error from depiction "{}" of type "{}"'.format(idepic, itype)) 30 | 31 | # Export 32 | odepic = dict() 33 | for item in otype: 34 | if item == 'smiles': 35 | odepic[item] = MolToSmiles(rdmol) # MolToSmiles is tricky, one mays want to check the possible options.. 36 | elif item == 'inchi': 37 | odepic[item] = MolToInchi(rdmol) 38 | elif item == 'inchikey': 39 | odepic[item] = MolToInchiKey(rdmol) 40 | else: 41 | raise NotImplementedError('"{}" is not a valid output type'.format(otype)) 42 | 43 | return odepic 44 | -------------------------------------------------------------------------------- /utilities/reactor/Core.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core code for firing rules 3 | """ 4 | 5 | 6 | class RuleMatchError(Exception): 7 | """Raised when something went wrong when matching a rule.""" 8 | 9 | def __init__(self, msg): 10 | self._msg = msg 11 | 12 | def __str__(self): 13 | return "RULE-MATCH-ERROR: {}".format(self._msg) 14 | 15 | 16 | class RuleFireError(Exception): 17 | """Raised when something went wrong when firing a rule.""" 18 | 19 | def __init__(self, msg): 20 | self._msg = msg 21 | 22 | def __str__(self): 23 | return "RULE-FIRE-ERROR: {}".format(self._msg) 24 | 25 | 26 | class RuleBurnerCore(object): 27 | """Apply one rule on one chemical.""" 28 | 29 | def __init__(self, rd_rule, rd_mol): 30 | """Apply one rule on one chemical. 31 | 32 | Notice: no standardization is made on inputed chemicals and rules. 33 | 34 | :param rd_rule: RDKit reaction object, reactio rule to apply 35 | :param rd_mol: RDKit mol object, chemical 36 | :param timeout: str, Reaction rule SMARTS 37 | """ 38 | # Internal settings 39 | USE_CHIRALITY_IN_MATCH = False # Default value anyway substrucre matching 40 | # Input 41 | self._rd_rule = rd_rule 42 | self._rd_mol = rd_mol 43 | 44 | def match(self): 45 | """Check if left reaction side match the chemical. 46 | 47 | returns: bool, True if there is a match, else False 48 | """ 49 | try: 50 | for reactant in self._rd_rule.GetReactants(): 51 | if self._rd_mol.HasSubstructMatch(reactant, ): 52 | return True 53 | return False 54 | except Exception as e: 55 | raise RuleMatchError(e) from e 56 | 57 | def fire(self): 58 | """Fire the rule on the chemical. 59 | 60 | returns: tuple of tuple, list of results for each possible application. 61 | """ 62 | try: 63 | return self._rd_rule.RunReactants((self._rd_mol,)) 64 | except Exception as e: 65 | raise RuleFireError(e) from e 66 | -------------------------------------------------------------------------------- /utilities/reactor/Utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Set of methods to handle reaction I/Os 3 | """ 4 | 5 | 6 | import copy 7 | import rdkit 8 | import logging 9 | 10 | from rdkit import Chem 11 | from rdkit.Chem import MolToInchiKey 12 | from rdkit import RDLogger 13 | from utilities.chemtools.Standardizer import Standardizer 14 | 15 | 16 | RD_LOGGER = RDLogger.logger() 17 | RD_LOGGER.setLevel(RDLogger.CRITICAL) # Silent most of RDKit complains 18 | 19 | 20 | class ChemConversionError(Exception): 21 | """Raised when something went wrong during chemical conversion to RDKit mol object.""" 22 | 23 | def __init__(self, msg): 24 | self._msg = msg 25 | 26 | def __str__(self): 27 | return "CHEM-CONVERSION-ERROR: {}".format(self._msg) 28 | 29 | 30 | def wild_stereo_removal(rdmol): 31 | """Wild stereo removal using back and forth Inchi depiction. 32 | 33 | :param rdmol: RDKit mol 34 | :returns rdmol_new: newly generated RDKit mol 35 | """ 36 | tmp_rdmol = copy.deepcopy(rdmol) 37 | Chem.RemoveStereochemistry(tmp_rdmol) 38 | return Chem.MolFromInchi(Chem.MolToInchi(tmp_rdmol)) 39 | 40 | 41 | def standardize_chemical_archive(rdmol, add_hs=True, rm_stereo=True): 42 | """Standardize a chemical using RDKit sanitize method. 43 | 44 | :param rdmol: RDKit mol object 45 | :param add_hs: append Hs, bool (default: True) 46 | :param rm_stereo: remove stereo, bool (default: True) 47 | :returns rdmol: RDKit mol object 48 | """ 49 | try: 50 | Chem.SanitizeMol(rdmol) 51 | if rm_stereo: # Important: do this before adding Hs (else re-add Hs) 52 | rdmol = wild_stereo_removal(rdmol) 53 | if add_hs: 54 | rdmol = Chem.AddHs(rdmol) 55 | else: 56 | rdmol = Chem.RemoveHs(rdmol) 57 | return rdmol 58 | except Exception as e: 59 | logging.warning(e) 60 | raise e 61 | 62 | 63 | def standardize_chemical(rdmol, add_hs=True, rm_stereo=True, heavy=False): 64 | """Standardize a chemical using RDKit sanitize method. 65 | 66 | :param rdmol: RDKit mol object 67 | :param add_hs: append Hs, bool (default: True) 68 | :param rm_stereo: remove stereo, bool (default: True) 69 | :param heavy: perform custom in depth standardization (default: False) 70 | :returns rdmol: RDKit mol object 71 | """ 72 | # if not rm_stereo: 73 | # logging.warning("Stereo not handled at the time being.") 74 | # raise ChemConversionError("Stereo not handled at the time being.") 75 | simple_standardisation = { 76 | 'OP_REMOVE_ISOTOPE': False, 77 | 'OP_NEUTRALISE_CHARGE': False, 78 | 'OP_REMOVE_STEREO': rm_stereo, 79 | 'OP_COMMUTE_INCHI': True, 80 | 'OP_KEEP_BIGGEST': False, 81 | 'OP_ADD_HYDROGEN': add_hs, 82 | 'OP_KEKULIZE': False, 83 | 'OP_NEUTRALISE_CHARGE_LATE': True 84 | } 85 | heavy_standardisation = { 86 | 'OP_REMOVE_ISOTOPE': True, 87 | 'OP_NEUTRALISE_CHARGE': True, 88 | 'OP_REMOVE_STEREO': rm_stereo, 89 | 'OP_COMMUTE_INCHI': True, 90 | 'OP_KEEP_BIGGEST': True, 91 | 'OP_ADD_HYDROGEN': add_hs, 92 | 'OP_KEKULIZE': False, 93 | 'OP_NEUTRALISE_CHARGE_LATE': True 94 | } 95 | 96 | try: 97 | if heavy: 98 | rdmol = Standardizer(sequence_fun='sequence_tunable', params=heavy_standardisation).compute(rdmol) 99 | logging.debug("Performing heavy standardisation for compound {}".format(MolToInchiKey(rdmol))) 100 | else: 101 | rdmol = Standardizer(sequence_fun='sequence_tunable', params=simple_standardisation).compute(rdmol) 102 | return rdmol 103 | except Exception as e: 104 | logging.warning(e) 105 | raise e 106 | 107 | 108 | def standardize_results(tuple_tuple_rdmol, add_hs=True, rm_stereo=True): 109 | """Perform sanitization and remove duplicates from reaction rule results. 110 | 111 | :param tuple_tuple_rdmol: tuple of tuple of RDKit Mol 112 | :param add_hs: append Hs, bool (default: True) 113 | :param rm_stereo: remove stereo, bool (default: True) 114 | :returns list_list_std: list of list of standardized RDKit Mol 115 | :returns list_idx_tuple_failed: list of index of tuples that failed the standardization 116 | """ 117 | uniq_depics = set() 118 | list_list_std = list() 119 | list_idx_tuple_failed = list() 120 | 121 | for idx_tuple, tuple_rdmol in enumerate(tuple_tuple_rdmol): 122 | try: 123 | list_std = list() 124 | list_inchikeys = list() 125 | # Standardize 126 | for rdmol in tuple_rdmol: 127 | for rd_frag in Chem.GetMolFrags(rdmol, asMols=True, sanitizeFrags=False): 128 | list_std.append(standardize_chemical(rd_frag, add_hs=add_hs, rm_stereo=rm_stereo)) 129 | # Get Inchikeys 130 | for rdmol in list_std: 131 | inchikey = Chem.MolToInchiKey(rdmol) 132 | if inchikey: 133 | list_inchikeys.append(inchikey) 134 | else: 135 | msg = 'Product conversion to InChIKey raised an empty string' 136 | logging.warning(ChemConversionError(msg)) 137 | raise ChemConversionError(msg) 138 | # Get unique depiction 139 | depic = '.'.join(sorted(list_inchikeys)) 140 | # Stoer only if unique depiction never met 141 | if depic not in uniq_depics: 142 | uniq_depics.add(depic) 143 | list_list_std.append(list_std) 144 | except ChemConversionError as e: 145 | logging.warning("{}".format(e)) 146 | list_idx_tuple_failed.append(idx_tuple) 147 | raise e 148 | except Exception as e: 149 | logging.warning("Cannot handle a tuple of result, skipped") 150 | logging.warning("{}".format(e)) 151 | list_idx_tuple_failed.append(idx_tuple) 152 | 153 | return list_list_std, list_idx_tuple_failed 154 | 155 | 156 | def handle_results(list_list_rdmol): 157 | """Generate InchiKey, Inchi and SMILES from results. 158 | 159 | :param list_list_rdmol: list of list of RDKit Mol 160 | :returns list_list_inchikeys: list of list of InchiKeys 161 | :returns list_list_inchis: list of list of Inchis 162 | :returns list_list_smiles: list of list of SMILES 163 | """ 164 | list_list_inchikeys = list() 165 | list_list_inchis = list() 166 | list_list_smiles = list() 167 | 168 | for list_rdmol in list_list_rdmol: 169 | try: 170 | list_inchikeys = list() 171 | list_inchis = list() 172 | list_smiles = list() 173 | list_std = list() 174 | for rdmol in list_rdmol: 175 | # Get & check depictions 176 | inchikey = Chem.MolToInchiKey(rdmol) # DEBUG: this part could be optimized 177 | inchi = Chem.MolToInchi(rdmol) 178 | smiles = Chem.MolToSmiles(rdmol) 179 | if not all([inchikey, inchi, smiles]): 180 | raise ChemConversionError("Chemical conversion error") 181 | # Store if we reach there 182 | list_inchikeys.append(inchikey) 183 | list_inchis.append(inchi) 184 | list_smiles.append(smiles) 185 | # Store if we reach the end 186 | list_list_inchikeys.append(list_inchikeys) 187 | list_list_inchis.append(list_inchis) 188 | list_list_smiles.append(list_smiles) 189 | except ChemConversionError as e: 190 | logging.warning("{}".format(e)) 191 | raise e 192 | except Exception as e: 193 | logging.warning("Cannot handle a tuple of result, skipped") 194 | logging.warning("{}".format(e)) 195 | return list_list_inchikeys, list_list_inchis, list_list_smiles # Quick but dirty 196 | --------------------------------------------------------------------------------