├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── MCTS_node.py
├── README.md
├── Rollout_policies.py
├── Tree.py
├── UCT_policies.py
├── biological_scoring.py
├── calculate_organisms.py
├── calculate_rule_sets_similarity.py
├── change_config.py
├── chemical_compounds_state.py
├── chemical_scoring.py
├── chemistry_choices.md
├── compound.py
├── compound_scoring.py
├── config.py
├── convert_to_SBML.py
├── data
    ├── base_config.py
    ├── compounds_to_add
    │   └── TPA_to_add.csv
    ├── golden_dataset.csv
    ├── name_structure_toxicity.csv
    ├── sinks
    │   ├── bsubtilis_iYO844_sink_reduced_rp_ready.csv
    │   ├── detectable_metabolites_uncommented.csv
    │   ├── ecoli_core_sink_reduced_rp_ready.csv
    │   ├── ecoli_iJO1366_sink_reduced_rp_ready.csv
    │   └── ecoli_iML1515_sink_reduced_rp_ready.csv
    └── supplement_finder
    │   ├── data
    │       └── metanetx_extracted_inchikeys.json.tar.gz
    │   └── tree_for_testing
    │       ├── TPA
    │           └── pickles
    │           │   └── tree_end_search.pkl.tar.gz
    │       └── morphine
    │           └── pickles
    │               └── tree_end_search.pkl.tar.gz
├── document_all_options.md
├── expected_results
    ├── deoxiviolacein_1.json
    ├── deoxiviolacein_2.json
    ├── deoxiviolacein_3.json
    ├── deoxiviolacein_4.json
    ├── deoxiviolacein_best.json
    ├── deoxiviolacein_full_scope.json
    ├── deoxiviolacein_full_tree_for_MCTS.json
    ├── deoxiviolacein_iteration_12.json
    ├── deoxiviolacein_iteration_15.json
    ├── deoxiviolacein_iteration_82.json
    ├── deoxiviolacein_iteration_85.json
    ├── pickles
    │   └── tree_end_search.pkl.tar.gz
    ├── results.csv
    └── tree.log
├── move.py
├── organisms.py
├── pathway.py
├── pathway_scoring.py
├── pyproject.toml
├── representation.py
├── rewarding.py
├── rule_sets_examples.py
├── rule_sets_similarity.py
├── setup.py
├── supplement_finder.py
├── tests
    ├── data
    │   ├── rules_mixed_subset.tsv
    │   ├── rules_r10_subset.tsv
    │   ├── rules_r2_subset.tsv
    │   ├── state_BOPG_BSAB_GPRL.pkl
    │   └── tree_pipecolate_test.pkl
    ├── generated_jsons
    │   ├── .gitignore
    │   └── .gitkeep
    ├── test_Filters.py
    ├── test_MCTS_node.py
    ├── test_Standardizer.py
    ├── test_Tree.py
    ├── test_Utils.py
    ├── test_cli.py
    ├── test_compound.py
    ├── test_moves.py
    ├── test_state.py
    └── tree_test.pkl
├── tox.ini
├── tree_viewer.py
└── utilities
    ├── chemtools
        ├── Filters.py
        ├── Sequences.py
        ├── Standardizer.py
        └── Utils.py
    └── reactor
        ├── Core.py
        ├── Utils.py
        └── cli.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Usual stuff
 2 | .DS_Store
 3 | __pycache__
 4 | *.egg-info
 5 | 
 6 | # Data
 7 | data/*/*.log
 8 | data/*/*.pkl
 9 | 
10 | # Test data
11 | tests/generated_jsons/pipecolate_iteration_0.json
12 | 
13 | # IDE
14 | .vscode
15 | .idea


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## Unreleased
 2 | 
 3 | ### Feat
 4 | 
 5 | - enables execution without fire timeout
 6 | - **Tree**: refine debug logging
 7 | 
 8 | ### Fix
 9 | 
10 | - **compound**: use standardisation timeout ti new compounds
11 | - **Tree**: timeout arguments as int
12 | - further restrict rdkit version (reproducibility issue #21)
13 | 
14 | ### Refactor
15 | 
16 | - **Tree**: remove unused code
17 | 
18 | ## 1.0.1 (2024-06-20)
19 | 
20 | ### Fix
21 | 
22 | - **DATA_PATH**: fix typo
23 | - **Tree**: import missing pre-parsed organisms
24 | 
25 | ### Refactor
26 | 
27 | - **Tree**: sweep imports
28 | - **calculate_organisms**: clean organism data files generation
29 | - **calculate_organisms**: remove unused imports
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Mathilde Koch, INRA
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include chemistry_choices.md
 2 | include data/compounds_to_add/*
 3 | include data/sinks/*
 4 | include data/supplement_finder/*
 5 | include data/golden_dataset.csv
 6 | include data/name_structure_toxicity.csv
 7 | include expected_results/*
 8 | include tests/*
 9 | include utilities/*
10 | include README.md
11 | 


--------------------------------------------------------------------------------
/Rollout_policies.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | Defines the Rollout policies.
  4 | Usage is : move = RolloutPolicy.select_best_move(available_moves)
  5 | Remarks:
  6 | - various policies have been tested on toy examples on a Jupyter notebook during implementation
  7 | """
  8 | 
  9 | from math import sqrt, log
 10 | import random
 11 | 
 12 | class Rollout_policy(object):
 13 |     """
 14 |     Defines rollout policy.
 15 |     From a list of moves, select the one that should be used for rollout.
 16 |     This is the base object, subclasses necessitate a policy function.
 17 |     """
 18 |     def __init__(self, policy_type, description = "Default Rollout Policy"):
 19 |         self.policy_type = policy_type
 20 |         self.description = description
 21 | 
 22 |     def select_best_move(self, available_moves):
 23 |         try:
 24 |             move = self.policy(available_moves)
 25 |             return(move)
 26 |         except IndexError:
 27 |             return(None)
 28 | 
 29 |     def __str__(self):
 30 |         return("Policy type: {} \nDescription: {}".format(self.policy_type, self.description))
 31 | 
 32 | class Rollout_policy_first(Rollout_policy):
 33 |     """
 34 |     Defines rollout policy.
 35 |     Always returns the first element: first compound, first rule
 36 |     """
 37 |     def __init__(self):
 38 |         description = "Always select the first compound_rule combination"
 39 |         Rollout_policy.__init__(self, policy_type = "First found combination", description = description)
 40 |         self.name = "Rollout_policy_first"
 41 |         self.policy = self.policy()
 42 | 
 43 |     def policy(self):
 44 |         # CODE IT
 45 |         def select_best_inside(available_moves):
 46 |             move = available_moves[0]
 47 |             return(move)
 48 |         return(select_best_inside)
 49 | 
 50 | class Rollout_policy_chemical_best(Rollout_policy):
 51 |     """
 52 |     Defines rollout policy.
 53 |     Always returns the best chemical move
 54 |     """
 55 |     def __init__(self):
 56 |         description = "Always select the move with the highest chemical score"
 57 |         Rollout_policy.__init__(self, policy_type = "Best Chemical", description = description)
 58 |         self.policy = self.best_chemical_policy()
 59 |         self.name = "Rollout_policy_chemical_best"
 60 | 
 61 |     def best_chemical_policy(self):
 62 |         # CODE IT
 63 |         def select_best_inside(available_moves):
 64 |             current_best = available_moves[0]
 65 |             current_best_score = current_best.chemical_score
 66 |             for element in available_moves:
 67 |                 chemical_score = element.chemical_score
 68 |                 if chemical_score > current_best_score:
 69 |                     current_best_score = chemical_score
 70 |                     current_best = element
 71 |             return(current_best)
 72 |         return(select_best_inside)
 73 | 
 74 | class Rollout_policy_biological_best(Rollout_policy):
 75 |     """
 76 |     Defines rollout policy.
 77 |     Always returns the best biological move
 78 |     """
 79 |     def __init__(self):
 80 |         description = "Always select the move with the highest biological score"
 81 |         Rollout_policy.__init__(self, policy_type = "Best Biological", description = description)
 82 |         self.policy = self.best_biological_policy()
 83 |         self.name = "Rollout_policy_biological_best"
 84 | 
 85 |     def best_biological_policy(self):
 86 |         # CODE IT
 87 |         def select_best_inside(available_moves):
 88 |             current_best = available_moves[0]
 89 |             current_best_score = current_best.biological_score
 90 |             for element in available_moves:
 91 |                 biological_score = current_best_score = element.biological_score
 92 |                 if biological_score > current_best_score:
 93 |                     current_best_score = biological_score
 94 |                     current_best = element
 95 |             return(current_best)
 96 |         return(select_best_inside)
 97 | 
 98 | class Rollout_policy_biochemical_addition_best(Rollout_policy):
 99 |     """
100 |     Defines rollout policy.
101 |     Always returns the best biochemical (addition of scores) move
102 |     """
103 |     def __init__(self):
104 |         description = "Select the highest Biochemical addition score"
105 |         Rollout_policy.__init__(self, policy_type = "Best Biochemical addition", description = description)
106 |         self.policy = self.best_biochemical_policy()
107 |         self.name = "Rollout_policy_biochemical_addition_best"
108 | 
109 |     def best_biochemical_policy(self):
110 |         # CODE IT
111 |         def select_best_inside(available_moves):
112 |             current_best = available_moves[0]
113 |             current_best_score = current_best.biological_score + current_best.chemical_score
114 |             for element in available_moves:
115 |                 biological_score = element.biological_score
116 |                 chemical_score = element.chemical_score
117 |                 if biological_score + chemical_score > current_best_score:
118 |                     current_best_score = biological_score + chemical_score
119 |                     current_best = element
120 |             return(current_best)
121 |         return(select_best_inside)
122 | 
123 | class Rollout_policy_biochemical_multiplication_best(Rollout_policy):
124 |     """
125 |     Defines rollout policy.
126 |     Always returns the best biochemical (multiplication of scores) move
127 |     """
128 |     def __init__(self):
129 |         description = "Select the highest Biochemical multiplication score"
130 |         Rollout_policy.__init__(self, policy_type = "Best Biochemical multiplication", description = description)
131 |         self.policy = self.best_biochemical_policy()
132 |         self.name = "Rollout_policy_biochemical_multiplication_best"
133 | 
134 |     def best_biochemical_policy(self):
135 |         # CODE IT
136 |         def select_best_inside(available_moves):
137 |             current_best = available_moves[0]
138 |             current_best_score = current_best.biological_score * current_best.chemical_score
139 |             for element in available_moves:
140 |                 biological_score = element.biological_score
141 |                 chemical_score = element.chemical_score
142 |                 if biological_score * chemical_score > current_best_score:
143 |                     current_best_score = biological_score * chemical_score
144 |                     current_best = element
145 |             return(current_best)
146 |         return(select_best_inside)
147 | 
148 | class Rollout_policy_random_uniform(Rollout_policy):
149 |     """
150 |     Random sampling of the move amongst available moves
151 |     """
152 |     def __init__(self):
153 |         description = "Random selection - no scoring involved"
154 |         Rollout_policy.__init__(self, policy_type = "Random sampling", description = description)
155 |         self.policy = self.policy()
156 |         self.name = "Rollout_policy_random_uniform"
157 | 
158 |     def policy(self):
159 |         # CODE IT
160 |         def select_best_inside(available_moves):
161 |             index = random.randrange(0, len(available_moves))
162 |             move = available_moves[index]
163 |             return(move)
164 |         return(select_best_inside)
165 | 
166 | class Rollout_policy_random_uniform_on_chem_score(Rollout_policy):
167 |     """
168 |     Random sampling of the move amongst available moves, weighted by chemical score
169 |     """
170 |     def __init__(self):
171 |         description = "Random selection - uniform sampling from chemical weights"
172 |         Rollout_policy.__init__(self, policy_type = "Chemical uniform sampling", description = description)
173 |         self.policy = self.policy()
174 |         self.name = "Rollout_policy_random_uniform_on_chem_score"
175 | 
176 |     def policy(self):
177 |         # CODE IT
178 |         def select_best_inside(available_moves):
179 |             pop, cum, cum_w = [], [], 0
180 |             for move in available_moves:
181 |                 pop.append(move)
182 |                 cum_w = cum_w + move.chemical_score
183 |                 cum.append(cum_w)
184 |             move = random.choices(pop, cum_weights=cum, k=1)[0]
185 |             return(move)
186 |         return(select_best_inside)
187 | 
188 | class Rollout_policy_random_uniform_on_bio_score(Rollout_policy):
189 |     """
190 |     Random sampling of the move amongst available moves, weighted by biological score
191 |     """
192 |     def __init__(self):
193 |         description = "Random selection - uniform sampling from biological weights"
194 |         Rollout_policy.__init__(self, policy_type = "Biological uniform sampling", description = description)
195 |         self.policy = self.policy()
196 |         self.name = "Rollout_policy_random_uniform_on_bio_score"
197 |     def policy(self):
198 |         # CODE IT
199 |         def select_best_inside(available_moves):
200 |             pop, cum, cum_w = [], [], 0
201 | 
202 |             for move in available_moves:
203 |                 pop.append(move)
204 |                 cum_w = cum_w + move.biological_score
205 |                 cum.append(cum_w)
206 |             move = random.choices(pop, cum_weights=cum, k=1)[0]
207 |             return(move)
208 |         return(select_best_inside)
209 | 
210 | class Rollout_policy_random_uniform_on_biochemical_addition_score(Rollout_policy):
211 |     """
212 |     Random sampling of the move amongst available moves, weighted by biochemical (addition) score
213 |     """
214 |     def __init__(self):
215 |         description = "Random selection - uniform sampling from added biochemical weights"
216 |         Rollout_policy.__init__(self, policy_type = "Biochemical addition uniform sampling", description = description)
217 |         self.policy = self.policy()
218 |         self.name = "Rollout_policy_random_uniform_on_biochemical_addition_score"
219 | 
220 |     def policy(self):
221 |         # CODE IT
222 |         def select_best_inside(available_moves):
223 |             pop, cum, cum_w = [], [], 0
224 | 
225 |             for move in available_moves:
226 |                 pop.append(move)
227 |                 cum_w = cum_w + move.biological_score + move.chemical_score
228 |                 cum.append(cum_w)
229 |             move = random.choices(pop, cum_weights=cum, k=1)[0]
230 |             return(move)
231 |         return(select_best_inside)
232 | 
233 | class Rollout_policy_random_uniform_on_biochemical_multiplication_score(Rollout_policy):
234 |     """
235 |     Random sampling of the move amongst available moves, weighted by biochemical (multiplication) score
236 |     """
237 |     def __init__(self):
238 |         description = "Random selection - uniform sampling from multiplied biochemical weights"
239 |         Rollout_policy.__init__(self, policy_type = "Biochemical uniform sampling", description = description)
240 |         self.policy = self.policy()
241 |         self.name = "Rollout_policy_random_uniform_on_biochemical_multiplication_score"
242 | 
243 |     def policy(self):
244 |         # CODE IT
245 |         def select_best_inside(available_moves):
246 |             pop, cum, cum_w = [], [], 0
247 | 
248 |             for move in available_moves:
249 |                 pop.append(move)
250 |                 cum_w = cum_w + move.biological_score * move.chemical_score
251 |                 cum.append(cum_w)
252 |             move = random.choices(pop, cum_weights=cum, k=1)[0]
253 |             return(move)
254 |         return(select_best_inside)
255 | 


--------------------------------------------------------------------------------
/UCT_policies.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines the UCT (Upper Confidence Tree) policies.
  3 | It it the formula that allows for balancing between exploration and exploitation when selecting children in the Tree.
  4 | Implements a number of different policies.
  5 | Policies are Subclasses of UCT_policy Class.
  6 | They need to have a attribute function that does the calculation. See examples if you want to develop your own.
  7 | """
  8 | 
  9 | from math import sqrt, log
 10 | 
 11 | 
 12 | class UCT_policy(object):
 13 |     """
 14 |     Defines UCT_policies objects.
 15 |     They take a node and return the best child according to this policy.
 16 |     Only subclasses of this object can work as there is no default calculation function.
 17 |     """
 18 |     def __init__(self, parameters = {"UCTK": 2}, policy_type = 'Classical', function = None):
 19 |         self.parameters = parameters
 20 |         self.policy_type = policy_type
 21 | 
 22 |     def calculate(self, node, top_n = 1):
 23 |         s = sorted(node.children, key = lambda c: self.function(c, parent_visits = node.visits))
 24 |         s = s[-top_n]
 25 |         return s
 26 | 
 27 |     def __str__(self):
 28 |         return("Policy type: {} \nFormula: {}".format(self.policy_type, self.formula))
 29 | 
 30 | 
 31 | class Classical_UCT(UCT_policy):
 32 |     """
 33 |     This class implements the most basic UCT functions.
 34 |     Only uses number of visits as a criteria.
 35 |     It is the Classical UCT formula where no additionnal expert knowledge is inputed.
 36 |     """
 37 |     def __init__(self, parameters = {"UCTK": 1000}):
 38 |         UCT_policy.__init__(self, policy_type = "Classical")
 39 |         self.parameters = parameters
 40 |         self.formula = "mean_score + sqrt({}*log(N + 1)/(n+1))".format(parameters["UCTK"])
 41 |         self.function = self.simple_UCT_formula(self.parameters)
 42 | 
 43 |     def simple_UCT_formula(self, parameters):
 44 |         UCTK = parameters["UCTK"]
 45 |         def simple_formula_inside(c, parent_visits):
 46 |             value = c.average_score + sqrt(UCTK*log(parent_visits +1)/(c.visits + 1))
 47 |             return(value)
 48 |         return(simple_formula_inside)
 49 | 
 50 | class Classical_UCT_RAVE(UCT_policy):
 51 |     """
 52 |     This class implements UCT based on visit count and RAVE.
 53 |     RAVE stands for Rapid Action Value Estimation:
 54 |     - it adds another score based on usage of identical moves elsewhere in the Tree
 55 |     - this is ponderated by the number of visits: as visits increase, the actual score of the node becomes more important than this initila estimation.
 56 |     """
 57 |     def __init__(self, parameters = {"UCTK": 1000, "k_rave": 100}):
 58 |         UCT_policy.__init__(self, policy_type = "Classical_RAVE")
 59 |         self.parameters = parameters
 60 |         self.formula = "(1-b) mean_score + b rave_score + sqrt({}*log(N + 1)/(n+1)) with b = sqrt({}/(3N + {}))".format(parameters["UCTK"], parameters["k_rave"], parameters["k_rave"])
 61 |         self.function = self.RAVE_formula(parameters = self.parameters)
 62 | 
 63 |     def RAVE_formula(self, parameters):
 64 |         UCTK = parameters["UCTK"]
 65 |         k_rave = parameters["k_rave"]
 66 |         def simple_formula_inside(c, parent_visits):
 67 |             b = sqrt(k_rave/(3*parent_visits + k_rave))
 68 |             value = c.average_score *(1-b) + b * c.move.RAVE_average_score + sqrt(UCTK*log(parent_visits +1)/(c.visits + 1))
 69 |             return(value)
 70 |         return(simple_formula_inside)
 71 | 
 72 | class Classical_UCT_with_bias(UCT_policy):
 73 |     """
 74 |     This class implements UCT based on visits and progressive bias.
 75 |     Progressive bias works by
 76 |     - giving an initial value to a node (based on expert knowledge for example)
 77 |     - this importance decreases as the node gets visited and this initial estimation's importance decreases in favor of actual rollouts.
 78 |     """
 79 |     def __init__(self, parameters = {"UCTK": 1000, "bias_k": 1}):
 80 |         UCT_policy.__init__(self, policy_type = "Classical")
 81 |         self.parameters = parameters
 82 |         self.formula = "mean_score + sqrt({}*log(N + 1)/(n+1)) + {} *  progressive_bias/(n+1)".format(parameters["UCTK"], parameters["bias_k"])
 83 |         self.function = self.simple_UCT_formula(self.parameters)
 84 | 
 85 |     def simple_UCT_formula(self, parameters):
 86 |         UCTK = parameters["UCTK"]
 87 |         bias_k = parameters["bias_k"]
 88 |         def simple_formula_inside(c, parent_visits):
 89 |             value = c.average_score + sqrt(UCTK*log(parent_visits +1)/(c.visits + 1))+ bias_k * c.progressive_bias/(c.visits + 1)
 90 |             return(value)
 91 |         return(simple_formula_inside)
 92 | 
 93 | class Nature_UCT(UCT_policy):
 94 |     """
 95 |     This class implements the formula used in the following Nature paper :(https://doi.org/10.1038/nature25978)
 96 |     Planning chemical syntheses with deep neural networks and symbolic AI
 97 |     It is identical to the Chemical Scoring UCT (Chemical_UCT_1)
 98 |     """
 99 |     def __init__(self, parameters = {"UCTK": 3}):
100 |         UCT_policy.__init__(self, policy_type = "Nature Symbolic IA")
101 |         self.parameters = parameters
102 |         self.formula = "mean_score + {} * P * sqrt(N/(n+1))".format(parameters["UCTK"])
103 |         self.function = self.Nature_UCT_formula(self.parameters)
104 | 
105 |     def Nature_UCT_formula(self, parameters):
106 |         UCTK = parameters["UCTK"]
107 |         def simple_formula_inside(c, parent_visits):
108 |             chem_P = c.move.chemical_score
109 |             value = c.average_score + UCTK * chem_P *sqrt(parent_visits/(c.visits + 1))
110 |             return(value)
111 |         return(simple_formula_inside)
112 | 
113 | class Biochemical_UCT_1(UCT_policy):
114 |     """
115 |     This class implements a simple biochemical score UCT.
116 |     The selection is guided by a product of chemical and biological score.
117 |     """
118 |     def __init__(self, parameters = {"UCTK": 3}):
119 |         UCT_policy.__init__(self, policy_type = "Biochemical multiplication")
120 |         self.parameters = parameters
121 |         self.formula = "mean_score + {} * P_c * B * sqrt(N/(n+1))".format(parameters["UCTK"])
122 |         self.function = self.Biochemical_UCT_formula(self.parameters)
123 | 
124 |     def Biochemical_UCT_formula(self, parameters):
125 |         UCTK = parameters["UCTK"]
126 |         def simple_formula_inside(c, parent_visits):
127 |             chem_P = c.move.chemical_score
128 |             b_score = c.move.biological_score
129 |             value = c.average_score + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1))
130 |             return(value)
131 |         return(simple_formula_inside)
132 | 
133 | class Biological_UCT_1(UCT_policy):
134 |     """
135 |     This class implements a simple biological score UCT.
136 |     The selection is guided by Biological score only.
137 |     """
138 |     def __init__(self, parameters = {"UCTK": 3}):
139 |         UCT_policy.__init__(self, policy_type = "Biological score only")
140 |         self.parameters = parameters
141 |         self.formula = "mean_score + {} * B * sqrt(N/(n+1))".format(parameters["UCTK"])
142 |         self.function = self.Biological_UCT_formula(self.parameters)
143 | 
144 |     def Biological_UCT_formula(self, parameters):
145 |         UCTK = parameters["UCTK"]
146 |         def simple_formula_inside(c, parent_visits):
147 |             b_score = c.move.biological_score
148 |             value = c.average_score + UCTK * b_score *sqrt(parent_visits/(c.visits + 1))
149 |             return(value)
150 |         return(simple_formula_inside)
151 | 
152 | class Chemical_UCT_1(UCT_policy):
153 |     """
154 |     This class implements a simple chemical score UCT.
155 |     The selection is guided by Chemical score only.
156 |     """
157 |     def __init__(self, parameters = {"UCTK": 3}):
158 |         UCT_policy.__init__(self, policy_type = "Chemical multiplication")
159 |         self.parameters = parameters
160 |         self.formula = "mean_score + {} * P_c * sqrt(N/(n+1))".format(parameters["UCTK"])
161 |         self.function = self.Chemical_UCT_formula(self.parameters)
162 | 
163 |     # @staticmethod
164 |     def Chemical_UCT_formula(self, parameters):
165 |         UCTK = parameters["UCTK"]
166 |         def simple_formula_inside(c, parent_visits):
167 |             chem_P = c.move.chemical_score
168 |             value = c.average_score + UCTK * chem_P *sqrt(parent_visits/(c.visits + 1))
169 |             return(value)
170 |         return(simple_formula_inside)
171 | 
172 | class Biochemical_UCT_1_with_RAVE(UCT_policy):
173 |     """
174 |     This class implements a biochemical score UCT with RAVE augmentation.
175 |     RAVE stands for Rapid Action Value Estimation:
176 |     - it adds another score based on usage of identical moves elsewhere in the Tree
177 |     - this is ponderated by the number of visits: as visits increase, the actual score of the node becomes more important than this initila estimation.
178 |     """
179 |     def __init__(self, parameters = {"UCTK": 3, "k_rave": 100}):
180 |         UCT_policy.__init__(self, policy_type = "Biochemical multiplication with RAVE")
181 |         self.parameters = parameters
182 |         self.formula = "(1-b) mean_score + b rave_score + {} * P_c * B * sqrt(N/(n+1)) with b = sqrt({}/(3N + {})".format(parameters["UCTK"], parameters["k_rave"], parameters["k_rave"])
183 |         self.function = self.Biochemical_UCT_RAVE_formula(self.parameters)
184 | 
185 |     def Biochemical_UCT_RAVE_formula(self, parameters):
186 |         UCTK = parameters["UCTK"]
187 |         k_rave = parameters["k_rave"]
188 |         def simple_formula_inside(c, parent_visits):
189 |             b = sqrt(k_rave/(3*parent_visits + k_rave))
190 |             b_score = c.move.biological_score
191 |             chem_P = c.move.chemical_score
192 |             value = c.average_score * (1-b) + b * c.move.RAVE_average_score + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1))
193 |             return(value)
194 |         return(simple_formula_inside)
195 | 
196 | class Biochemical_UCT_with_progressive_bias(UCT_policy):
197 |     """
198 |     This class implements a biochemical score UCT and progressive bias.
199 |     Progressive bias works by
200 |     - giving an initial value to a node (based on expert knowledge for example)
201 |     - this importance decreases as the node gets visited and this initial estimation's importance decreases in favor of actual rollouts.
202 |     """
203 |     def __init__(self, parameters = {"UCTK": 3, "bias_k": 1}):
204 |         UCT_policy.__init__(self, policy_type = "Biochemical with progressive bias")
205 |         self.parameters = parameters
206 |         self.formula = "mean_score + {} * bias/(n+1) + {} * P_c * B * sqrt(N/(n+1))".format(parameters["bias_k"], parameters["UCTK"])
207 |         self.function = self.Biochemical_UCT_with_bias_formula(parameters)
208 | 
209 |     # @staticmethod
210 |     def Biochemical_UCT_with_bias_formula(self, parameters):
211 |         UCTK = parameters["UCTK"]
212 |         bias_k = parameters["bias_k"]
213 |         def simple_formula_inside(c, parent_visits):
214 |             chem_P = c.move.chemical_score
215 |             b_score = c.move.biological_score
216 |             bias = c.progressive_bias
217 |             value = c.average_score + bias_k * bias/(c.visits +1)  + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1))
218 |             return(value)
219 |         return(simple_formula_inside)
220 | 
221 | class Biochemical_UCT_with_toxicity(UCT_policy):
222 |     """
223 |     This class implements a biochemical score UCT combined with toxicity bias.
224 |     the formula is identical to the Biochemical_UCT_with_progressive_bias, the bias being the node's toxicity.
225 |     """
226 |     def __init__(self, parameters = {"UCTK": 3, "bias_k": 1}):
227 |         UCT_policy.__init__(self, policy_type = "Biochemical with toxicity")
228 |         self.parameters = parameters
229 |         self.formula = "mean_score + {} * toxicity/(n+1) + {} * P_c * B * sqrt(N/(n+1))".format(parameters["bias_k"], parameters["UCTK"])
230 |         self.function = self.Biochemical_UCT_with_toxicity_formula(parameters)
231 | 
232 |     def Biochemical_UCT_with_toxicity_formula(self, parameters):
233 |         UCTK = parameters["UCTK"]
234 |         bias_k = parameters["bias_k"]
235 |         def simple_formula_inside(c, parent_visits):
236 |             chem_P = c.move.chemical_score
237 |             b_score = c.move.biological_score
238 |             toxicity = c.toxicity
239 |             value = c.average_score + bias_k * toxicity/(c.visits +1)  + UCTK * chem_P * b_score *sqrt(parent_visits/(c.visits + 1))
240 |             return(value)
241 |         return(simple_formula_inside)
242 | 


--------------------------------------------------------------------------------
/biological_scoring.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Defines the biological scoring function.
 3 | Necessitates random for random scoring, and all rule sets for biological scoring.
 4 | """
 5 | 
 6 | import random
 7 | from rule_sets_examples import *
 8 | from rule_sets_similarity import *
 9 | 
10 | class BiologicalScoring(object):
11 |     """
12 |     Defines Biological Scorer object.
13 |     Returns the biological score associated to a reaction rule.
14 |     """
15 |     def __init__(self, scoring_function):
16 |         self.scoring_function = scoring_function
17 |         self.name = "Random"
18 | 
19 |     def __repr__(self):
20 |         return(self.name)
21 | 
22 |     def calculate(self, rule):
23 |         score = self.scoring_function(rule)
24 |         return(score)
25 | 
26 | def pseudo_random(rule):
27 |     score = random.uniform(0, 10)
28 |     return(score)
29 | 
30 | class BiologicalScoringOrganism(BiologicalScoring):
31 |     """
32 |     Defines Biological Scorer object from an organism with predefined scores.
33 |     Inverted converts a penalty to a score.
34 |     This will be analysed more in depth when biological score will evolve.
35 |     """
36 |     def __init__(self, rules_dictionnary, inverted = False, name = "None"):
37 |         BiologicalScoring.__init__(self, scoring_function = None)
38 |         self.scoring_function = self.assign_from_dict(rules_dictionnary, inverted)
39 |         self.name = name
40 | 
41 |     def __repr__(self):
42 |         return(self.name)
43 | 
44 |     def assign_from_dict(self, rules_dictionnary, inverted):
45 |         rules_dictionnary = rules_dictionnary
46 |         def simple_assign_inside(rule):
47 |             score = rules_dictionnary[rule]["biological_score"]
48 |             # Inverted if to use penalties instead of scors.
49 |             # if inverted:
50 |             #     try:
51 |             #         return(1/score)
52 |             #     except ZeroDivisionError:
53 |             #         return(33)
54 |             # else:
55 |             return(score)
56 |         return(simple_assign_inside)
57 | 
58 | 
59 | RandomBiologicalScorer = BiologicalScoring(scoring_function = pseudo_random)
60 | BiologicalFullScoringRetroH = BiologicalScoringOrganism(rules_dictionnary= full_rules_retro_H, name = "full_rules_retro_H")
61 | BiologicalFullScoringFwdH = BiologicalScoringOrganism(rules_dictionnary= full_rules_forward_H, name = "full_rules_forward_H")
62 | BiologicalFullScoringRetroNoH = BiologicalScoringOrganism(rules_dictionnary= full_rules_retro_no_H, name = "full_rules_retro_no_H")
63 | BiologicalFullScoringFwdNoH = BiologicalScoringOrganism(rules_dictionnary= full_rules_forward_no_H, name = "full_rules_forward_no_H")
64 | 
65 | full_H = full_rules_retro_H
66 | full_H.update(full_rules_forward_H)
67 | BiologicalFullScoringH = BiologicalScoringOrganism(rules_dictionnary= full_H, name = "full_rules_retro_H")
68 | 


--------------------------------------------------------------------------------
/calculate_organisms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module loads calculates organisms
  3 | - standardises compounds within the organism
  4 | - saves them as pickles that can be laoded by RP3
  5 | """
  6 | 
  7 | # General utilities
  8 | import logging
  9 | import os
 10 | import csv
 11 | import sys
 12 | import argparse
 13 | 
 14 | from config import DATA_PATH
 15 | 
 16 | # RP3 specific objects
 17 | from compound import Compound
 18 | from chemical_compounds_state import ChemicalCompoundState
 19 | from utilities.reactor.Utils import ChemConversionError
 20 | 
 21 | 
 22 | def __run__():
 23 |     def import_organism_from_csv(csv_file, add_Hs=True):
 24 |         with open(csv_file) as csv_handle:
 25 |             dict_reader = csv.DictReader(csv_handle, delimiter=",")
 26 |             compound_list = []
 27 |             for row in dict_reader:
 28 |                 name = row["name"]
 29 |                 inchi = row["inchi"]
 30 |                 if inchi is None or inchi == "None" or inchi == "":
 31 |                     pass
 32 |                 else:
 33 |                     try:
 34 |                         if name.startswith("InChI"):
 35 |                             compound = Compound(
 36 |                                 InChI=inchi,
 37 |                                 heavy_standardisation=True,
 38 |                                 force_add_H=add_Hs,
 39 |                             )
 40 |                         else:
 41 |                             compound = Compound(
 42 |                                 InChI=inchi,
 43 |                                 name=name,
 44 |                                 heavy_standardisation=True,
 45 |                                 force_add_H=add_Hs,
 46 |                             )
 47 |                         if not compound.in_list(compound_list, main_layer=False):
 48 |                             compound_list.append(compound)
 49 |                     except ChemConversionError:
 50 |                         logging.error(
 51 |                             "For compound {} with inchi {}: error ChemConversionError".format(
 52 |                                 name, inchi
 53 |                             )
 54 |                         )
 55 |         organism = ChemicalCompoundState(compound_list, main_layer=False)
 56 |         return organism
 57 | 
 58 |     # Calculate with H ========================================================
 59 |     logging.info("Calculating organisms with H...")
 60 | 
 61 |     # Test organism
 62 |     compound_1 = Compound(
 63 |         "[H+]", name="1", heavy_standardisation=True, force_add_H=True
 64 |     )
 65 |     compound_6 = Compound(
 66 |         "[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]",
 67 |         force_add_H=True,
 68 |         name="6",
 69 |         heavy_standardisation=True,
 70 |     )
 71 |     compound_3459 = Compound(
 72 |         "[H][O][C](=[O])[C](=[O])[C]([H])([H])[C]([H])([O][H])[C]([H])([O][H])[C]([H])([H])[H]",
 73 |         name="3459",
 74 |         heavy_standardisation=True,
 75 |         force_add_H=True,
 76 |     )
 77 |     test_organism = ChemicalCompoundState(
 78 |         state_name="Test", compound_list=[compound_1, compound_6, compound_3459]
 79 |     )
 80 | 
 81 |     # Load real organisms
 82 |     detectable_cmpds = import_organism_from_csv(
 83 |         f"{SINK_DATA_PATH}/detectable_metabolites_uncommented.csv", add_Hs=True
 84 |     )
 85 |     iML1515_chassis = import_organism_from_csv(
 86 |         f"{SINK_DATA_PATH}/ecoli_iML1515_sink_reduced_rp_ready.csv", add_Hs=True
 87 |     )
 88 |     core_ecoli = import_organism_from_csv(
 89 |         f"{SINK_DATA_PATH}/ecoli_core_sink_reduced_rp_ready.csv", add_Hs=True
 90 |     )
 91 |     iJO1366_chassis = import_organism_from_csv(
 92 |         f"{SINK_DATA_PATH}/ecoli_iJO1366_sink_reduced_rp_ready.csv", add_Hs=True
 93 |     )
 94 |     bsubtilis = import_organism_from_csv(
 95 |         f"{SINK_DATA_PATH}/bsubtilis_iYO844_sink_reduced_rp_ready.csv", add_Hs=True
 96 |     )
 97 | 
 98 |     # Save organisms
 99 |     test_organism.save(file_name="Test_organism_H", folder_address=ORGANISMS_DATA_PATH)
100 |     detectable_cmpds.save(
101 |         file_name="detectable_cmpds_H", folder_address=ORGANISMS_DATA_PATH
102 |     )
103 |     iML1515_chassis.save(
104 |         file_name="iML1515_chassis_H", folder_address=ORGANISMS_DATA_PATH
105 |     )
106 |     core_ecoli.save(file_name="core_ecoli_H", folder_address=ORGANISMS_DATA_PATH)
107 |     iJO1366_chassis.save(
108 |         file_name="iJO1366_chassis_H", folder_address=ORGANISMS_DATA_PATH
109 |     )
110 |     bsubtilis.save(file_name="bsubtilis_H", folder_address=ORGANISMS_DATA_PATH)
111 | 
112 |     # Calculate without H =====================================================
113 |     logging.info("Calculating organisms without H...")
114 | 
115 |     # Test organism
116 |     compound_1 = Compound(
117 |         "[H+]", name="1", heavy_standardisation=True, force_add_H=False
118 |     )
119 |     compound_6 = Compound(
120 |         "[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]",
121 |         force_add_H=False,
122 |         name="6",
123 |         heavy_standardisation=True,
124 |     )
125 |     compound_3459 = Compound(
126 |         "[H][O][C](=[O])[C](=[O])[C]([H])([H])[C]([H])([O][H])[C]([H])([O][H])[C]([H])([H])[H]",
127 |         name="3459",
128 |         heavy_standardisation=True,
129 |         force_add_H=False,
130 |     )
131 |     test_organism = ChemicalCompoundState(
132 |         state_name="Test", compound_list=[compound_1, compound_6, compound_3459]
133 |     )
134 | 
135 |     # Load real organisms
136 |     detectable_cmpds = import_organism_from_csv(
137 |         f"{SINK_DATA_PATH}/detectable_metabolites_uncommented.csv", add_Hs=True
138 |     )
139 |     iML1515_chassis = import_organism_from_csv(
140 |         f"{SINK_DATA_PATH}/ecoli_iML1515_sink_reduced_rp_ready.csv", add_Hs=False
141 |     )
142 |     core_ecoli = import_organism_from_csv(
143 |         f"{SINK_DATA_PATH}/ecoli_core_sink_reduced_rp_ready.csv", add_Hs=False
144 |     )
145 |     iJO1366_chassis = import_organism_from_csv(
146 |         f"{SINK_DATA_PATH}/ecoli_iJO1366_sink_reduced_rp_ready.csv", add_Hs=False
147 |     )
148 |     bsubtilis = import_organism_from_csv(
149 |         f"{SINK_DATA_PATH}/bsubtilis_iYO844_sink_reduced_rp_ready.csv", add_Hs=False
150 |     )
151 | 
152 |     # Save organisms
153 |     test_organism.save(
154 |         file_name="Test_organism_noH", folder_address=ORGANISMS_DATA_PATH
155 |     )
156 |     detectable_cmpds.save(
157 |         file_name="detectable_cmpds_noH", folder_address=ORGANISMS_DATA_PATH
158 |     )
159 |     iML1515_chassis.save(
160 |         file_name="iML1515_chassis_noH", folder_address=ORGANISMS_DATA_PATH
161 |     )
162 |     core_ecoli.save(file_name="core_ecoli_noH", folder_address=ORGANISMS_DATA_PATH)
163 |     iJO1366_chassis.save(
164 |         file_name="iJO1366_chassis_noH", folder_address=ORGANISMS_DATA_PATH
165 |     )
166 |     bsubtilis.save(file_name="bsubtilis_noH", folder_address=ORGANISMS_DATA_PATH)
167 | 
168 |     return 0
169 | 
170 | 
171 | if __name__ == "__main__":
172 |     d = "Formatting organisms in a RP3 compatible format"
173 |     parser = argparse.ArgumentParser(description=d)
174 |     parser.add_argument(
175 |         "--terminal",
176 |         help="Default logger is logs_organisms_set_up, switch to terminal if specified",
177 |         action="store_true",
178 |         default=False,
179 |     )
180 |     args = parser.parse_args()
181 | 
182 |     # Sink data path
183 |     global SINK_DATA_PATH
184 |     SINK_DATA_PATH = f"{DATA_PATH}/sinks"
185 |     assert os.path.exists(
186 |         SINK_DATA_PATH
187 |     ), f"Sink data path {SINK_DATA_PATH} does not exist"
188 | 
189 |     # Organisms data path
190 |     global ORGANISMS_DATA_PATH
191 |     ORGANISMS_DATA_PATH = f"{DATA_PATH}/organisms"
192 |     if not os.path.exists(ORGANISMS_DATA_PATH):
193 |         os.mkdir(ORGANISMS_DATA_PATH)
194 | 
195 |     if args.terminal is True:
196 |         logging.basicConfig(
197 |             stream=sys.stderr,
198 |             level=logging.INFO,
199 |             datefmt="%d/%m/%Y %H:%M:%S",
200 |             format="%(asctime)s -- %(levelname)s -- %(message)s",
201 |         )
202 |     else:
203 |         logging.basicConfig(
204 |             stream=open(
205 |                 "{}/{}.log".format(ORGANISMS_DATA_PATH, "logs_organisms_set_up"), "w"
206 |             ),
207 |             level=logging.INFO,
208 |             datefmt="%d/%m/%Y %H:%M:%S",
209 |             format="%(asctime)s -- %(levelname)s -- %(message)s",
210 |         )
211 |         print(
212 |             f"By default, logs are saved in {ORGANISMS_DATA_PATH}/logs_organisms_set_up.log. Please use --terminal to redirect to sys.stderr"
213 |         )
214 |     __run__()
215 | 


--------------------------------------------------------------------------------
/change_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The aim of this script is to change configuration file from command line.
  3 | It takes as input the base config file from data.
  4 | """
  5 | 
  6 | import argparse
  7 | import re
  8 | import os
  9 | 
 10 | 
 11 | def __cli():
 12 |     """
 13 |     Command line interface.
 14 |     """
 15 | 
 16 |     d = "Arguments to change the config file before running a Tree"
 17 |     parser = argparse.ArgumentParser(description=d)
 18 |     # Logs and saving information
 19 |     parser.add_argument(
 20 |         "--DB_CACHE", type=lambda x: (str(x).lower() == "true"), default=False
 21 |     )
 22 |     parser.add_argument(
 23 |         "--DB_REPLACE", type=lambda x: (str(x).lower() == "true"), default=False
 24 |     )
 25 |     parser.add_argument("--DB_time", default=1, type=float)
 26 |     parser.add_argument(
 27 |         "--biosensor", type=lambda x: (str(x).lower() == "true"), default=False
 28 |     )
 29 |     parser.add_argument(
 30 |         "--use_cache", type=lambda x: (str(x).lower() == "true"), default=False
 31 |     )
 32 |     parser.add_argument(
 33 |         "--add_Hs", type=lambda x: (str(x).lower() == "true"), default=False
 34 |     )
 35 |     parser.add_argument(
 36 |         "--use_transpositions", type=lambda x: (str(x).lower() == "true"), default=False
 37 |     )
 38 |     parser.add_argument(
 39 |         "--use_transpositions_depth",
 40 |         type=lambda x: (str(x).lower() == "true"),
 41 |         default=False,
 42 |     )
 43 |     parser.add_argument(
 44 |         "--folder_to_save", default=os.path.dirname(os.path.abspath(__file__))
 45 |     )
 46 |     args = parser.parse_args()
 47 | 
 48 |     def change_dB_setting(
 49 |         DB_CACHE,
 50 |         DB_REPLACE,
 51 |         DB_time,
 52 |         biosensor,
 53 |         use_cache,
 54 |         add_Hs,
 55 |         use_transpositions,
 56 |         use_transpositions_depth,
 57 |         folder_to_save,
 58 |     ):
 59 |         with open(
 60 |             "{}/data/base_config.py".format(os.path.dirname(os.path.abspath(__file__))),
 61 |             "r",
 62 |         ) as file_original:
 63 |             whole_text = file_original.read()
 64 |         with open("{}/config.py".format(folder_to_save), "w") as replacement_text:
 65 |             # Changing DB_cache
 66 |             if DB_CACHE:
 67 |                 if "DB_CACHE = True" not in whole_text:
 68 |                     whole_text = whole_text.replace(
 69 |                         "DB_CACHE = False", "DB_CACHE = True"
 70 |                     )
 71 |             else:
 72 |                 if "DB_CACHE = False" not in whole_text:
 73 |                     whole_text = whole_text.replace(
 74 |                         "DB_CACHE = True", "DB_CACHE = False"
 75 |                     )
 76 |             # Changing DB replace
 77 |             if DB_REPLACE:
 78 |                 if "DB_REPLACE = True" not in whole_text:
 79 |                     whole_text = whole_text.replace(
 80 |                         "DB_REPLACE = False", "DB_REPLACE = True"
 81 |                     )
 82 |             else:
 83 |                 if "DB_REPLACE = False" not in whole_text:
 84 |                     whole_text = whole_text.replace(
 85 |                         "DB_REPLACE = True", "DB_REPLACE = False"
 86 |                     )
 87 |             # Changing DB_time:
 88 |             whole_text = re.sub(
 89 |                 "DB_time = \d+.\d+", "DB_time = {}".format(DB_time), whole_text
 90 |             )
 91 | 
 92 |             # Changing running mode from biosensor to retrosynthesis
 93 |             if biosensor:
 94 |                 if "biosensor = True" not in whole_text:
 95 |                     whole_text = whole_text.replace(
 96 |                         "biosensor = False", "biosensor = True"
 97 |                     )
 98 |                     whole_text = whole_text.replace(
 99 |                         "retrosynthesis = True", "retrosynthesis = False"
100 |                     )
101 |             else:
102 |                 if "biosensor = False" not in whole_text:
103 |                     whole_text = whole_text.replace(
104 |                         "biosensor = True", "biosensor = False"
105 |                     )
106 |                     whole_text = whole_text.replace(
107 |                         "retrosynthesis = False", "retrosynthesis = True"
108 |                     )
109 |             # Changing use_cache
110 |             if use_cache:
111 |                 if "use_cache = True" not in whole_text:
112 |                     whole_text = whole_text.replace(
113 |                         "use_cache = False", "use_cache = True"
114 |                     )
115 |             else:
116 |                 if "use_cache = False" not in whole_text:
117 |                     whole_text = whole_text.replace(
118 |                         "use_cache = True", "use_cache = False"
119 |                     )
120 | 
121 |             # Hydrogen handling:
122 |             if add_Hs:
123 |                 if "add_Hs = True" not in whole_text:
124 |                     whole_text = whole_text.replace("add_Hs = False", "add_Hs = True")
125 |             else:
126 |                 if "add_Hs = False" not in whole_text:
127 |                     whole_text = whole_text.replace("add_Hs = True", "add_Hs = False")
128 | 
129 |             # Changing use_transpositions
130 |             if use_transpositions:
131 |                 if "use_transpositions = True" not in whole_text:
132 |                     whole_text = whole_text.replace(
133 |                         "use_transpositions = False", "use_transpositions = True"
134 |                     )
135 |             else:
136 |                 if "use_transpositions = False" not in whole_text:
137 |                     whole_text = whole_text.replace(
138 |                         "use_transpositions = True", "use_transpositions = False"
139 |                     )
140 |             # Changing use_transpositions_depth
141 |             if use_transpositions_depth:
142 |                 if "use_transpositions_depth = True" not in whole_text:
143 |                     whole_text = whole_text.replace(
144 |                         "use_transpositions_depth = False",
145 |                         "use_transpositions_depth = True",
146 |                     )
147 |             else:
148 |                 if "use_transpositions_depth = False" not in whole_text:
149 |                     whole_text = whole_text.replace(
150 |                         "use_transpositions_depth = True",
151 |                         "use_transpositions_depth = False",
152 |                     )
153 |             replacement_text.write(whole_text)
154 | 
155 |     change_dB_setting(
156 |         DB_CACHE=args.DB_CACHE,
157 |         DB_REPLACE=args.DB_REPLACE,
158 |         DB_time=args.DB_time,
159 |         biosensor=args.biosensor,
160 |         use_cache=args.use_cache,
161 |         add_Hs=args.add_Hs,
162 |         use_transpositions=args.use_transpositions,
163 |         use_transpositions_depth=args.use_transpositions_depth,
164 |         folder_to_save=args.folder_to_save,
165 |     )
166 | 
167 | 
168 | if __name__ == "__main__":
169 |     __cli()
170 | 


--------------------------------------------------------------------------------
/chemical_scoring.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines the chemical scoring functions
  3 | """
  4 | 
  5 | # General utility packages
  6 | import random
  7 | import itertools  # For all permutations when IDing the best products
  8 | import numpy as np  # Allows for simpler calculations on lists
  9 | import logging
 10 | 
 11 | # Chemistry packages
 12 | from rdkit import DataStructs  # For similarity computation
 13 | 
 14 | def list_product(combination):
 15 |     """
 16 |     Calculates the product of all elements from the list.
 17 |     Remark: deprecated, use geometric mean instead.
 18 |     """
 19 |     score = 1
 20 |     for tanimoto in combination:
 21 |         score = score * tanimoto
 22 |     return(score)
 23 | 
 24 | def combine_products(product_list, product_list_bis, max_combination = 1000):
 25 |     """
 26 |     Calculates all possible combinations of products (native and query products).
 27 |     Limited to 1000 combinations, knowing that combinations behave as n! with n the number of products.
 28 |     """
 29 |     combinations = [(x,product_list_bis) for x in itertools.permutations(product_list,len(product_list_bis))]
 30 |     if len(combinations) > max_combination:
 31 |         combinations = combinations[0:max_combination]
 32 |     return(combinations)
 33 | 
 34 | def list_geometric_mean(combination):
 35 |     """
 36 |     Calculates the geometric mean of the array.
 37 |     """
 38 |     a = np.array(combination)
 39 |     return a.prod()**(1.0/len(a))
 40 | 
 41 | def tanimoto_product_calc(native_products_ecfp, query_products_ecfp, verbose = False):
 42 |     all_scores = []
 43 |     if len(native_products_ecfp) != len(query_products_ecfp):
 44 |         # Reject rules that do not produce the same number of compounds.
 45 |         logging.debug("Rule does not generate the same number of products: native is {} and new is {}".format(len(native_products_ecfp), len(query_products_ecfp)))
 46 |         return(-1)
 47 |     combinations = combine_products(product_list = native_products_ecfp, product_list_bis = query_products_ecfp)
 48 |     score_list = []
 49 |     for combination in combinations:
 50 |         tanimoto_combination = []
 51 |         native, query = combination[0], combination[1]
 52 |         for i in range(len(native)):
 53 |             tanimoto = DataStructs.cDataStructs.TanimotoSimilarity(native[i], query[i])
 54 |             tanimoto_combination.append(tanimoto)
 55 |         score_list.append(list_geometric_mean(tanimoto_combination))
 56 |     if verbose:
 57 |         logging.debug("Score list length is {} and scores {}".format(len(score_list), score_list))
 58 |     return(max(score_list))
 59 | 
 60 | class ChemicalScoring(object):
 61 |     logger = logging.getLogger(__name__)
 62 |     """
 63 |     Defines Chemical Scorer objects.
 64 |     """
 65 |     def __init__(self, scoring_function, name = "ChemicalScoring"):
 66 |         self.scoring_function = scoring_function
 67 |         self.scoring_warning = True
 68 |         self.name = name
 69 | 
 70 |     def calculate(self, compound, products = None, rule = None, original_substrates_list = None, original_products_list_list = None):
 71 |         if original_substrates_list == [None] and (original_products_list_list is None or original_products_list_list == [None]):
 72 |             if self.scoring_warning:
 73 |                 self.scoring_warning = False
 74 |                 self.logger.warning("Not using chemical scoring for {}. Default is set to 1".format(self.name))
 75 |             return(1)
 76 |         score, warning = self.scoring_function(compound, products, rule, original_substrates_list, original_products_list_list)
 77 |         if not warning is None:
 78 |             self.logger.debug(warning)
 79 |         return(score)
 80 | 
 81 | def pseudo_random(compound, products, rule, original_substrates_list = None, original_products_list_list = None):
 82 |     """
 83 |     Was used during development.
 84 |     """
 85 |     warning = None
 86 |     if compound.InChIKey == "NBBJYMSMWIIQGU-UHFFFAOYSA-N":
 87 |         if rule == "MNXR94682_MNXM821":
 88 |             score = 0.99
 89 |         elif rule == "MNXR117465_MNXM821":
 90 |             score = 0.88
 91 |         else:
 92 |             score = random.uniform(0,0.75)
 93 |     elif compound.InChIKey == "DNIAPMSPPWPWGF-UHFFFAOYSA-N":
 94 |         if rule == "MNXR95713_MNXM90191":
 95 |             score = 0.80
 96 |         elif rule == "MNXR103108_MNXM90191":
 97 |             score = 0.76
 98 |         else:
 99 |             score = random.uniform(0,0.75)
100 |     else:
101 |         score = random.uniform(0,0.75)
102 |     return(score, warning)
103 | 
104 | def substrate_calculation(compound, products = None, rule = None, original_substrates_list = None, original_products_list_list = None):
105 |     """
106 |     If the original_substrates_list is none, it means chemical scoring is not implemented and scoring should eb neutral: 1 in mutiplication.
107 |     """
108 |     if original_substrates_list is None:
109 |         warning = "Score is set to 1 for cmp {} and rule {}".format(compound, rule)
110 |         tanimoto = 1
111 |     else:
112 |         tanimoto = 0
113 |         for native_substrate in original_substrates_list:
114 |             query_substrate = compound._get_ECFP()
115 |             tanimoto_this = DataStructs.cDataStructs.TanimotoSimilarity(query_substrate, native_substrate)
116 |             warning = None
117 |             tanimoto = max(tanimoto, tanimoto_this)
118 |     return(tanimoto, warning)
119 | 
120 | def substrate_and_product_calculation(compound, products, rule, original_substrates_list = None, original_products_list_list = None):
121 |     """
122 |     If the original_substrates_list is none, it means chemical scoring is not implemented and scoring should eb neutral: 1 in mutiplication.
123 |     """
124 |     warning = None
125 |     if original_substrates_list is None:
126 |         warning = "Score is set to 1 for cmp {} and rule {}".format(compound, rule)
127 |         tanimoto = 1
128 |         return(tanimoto, warning)
129 |     else:
130 |         tanimoto = 0
131 |         for i in range(len(original_substrates_list)):
132 |             native_substrate = original_substrates_list[i]
133 |             query_substrate = compound._get_ECFP()
134 |             tanimoto_substrate = DataStructs.cDataStructs.TanimotoSimilarity(query_substrate, native_substrate)
135 |             warning = None
136 |             query_products_ecfp = []
137 |             for prod in products:
138 |                 query_products_ecfp.append(prod._get_ECFP())
139 |             prod_result = tanimoto_product_calc(original_products_list_list[i], query_products_ecfp, verbose = False)
140 |             if prod_result == -1:
141 |                 warning = "Number of product issue with rule {} and products {}".format(rule, products)
142 |             tanimoto = max(tanimoto, tanimoto_substrate * prod_result)
143 |         return(tanimoto, warning)
144 | 
145 | def constant_scorer(compound, products, rule, original_substrates_list = None, original_products_list_list = None):
146 |     warning = None
147 |     return(1, warning)
148 | 
149 | RandomChemicalScorer = ChemicalScoring(scoring_function = pseudo_random, name = "RandomChemicalScorer")
150 | SubstrateChemicalScorer = ChemicalScoring(scoring_function = substrate_calculation, name = "SubstrateChemicalScorer")
151 | SubandprodChemicalScorer = ChemicalScoring(scoring_function = substrate_and_product_calculation, name = "SubandprodChemicalScorer")
152 | ConstantChemicalScorer = ChemicalScoring(scoring_function = constant_scorer, name = "ConstantChemicalScorer")
153 | # Chemical scoring utilities. Taken from similarity.
154 | 


--------------------------------------------------------------------------------
/chemistry_choices.md:
--------------------------------------------------------------------------------
 1 | The aim of this file is to document choices that handled bug correction and why, concerning precise chemoinformatics choices.
 2 | 
 3 | # Chemical rule application.
 4 | 
 5 | When a rule applies to a substrate and after standardisation produces this substrate again (S -> S + I), the rule is deleted as this is not biological.
 6 | This is corrected at the compound stage.
 7 | 
 8 | # Compound equality: either main layer or full inchikey
 9 | Choices: usually less stringent for the chassis.
10 | 
11 | # Moves generating duplicate compounds:
12 | - Only unique compounds are conserved.
13 | - Logs will say it is merged (and conserve the number of compounds in stoechiometry dictionnary)
14 | 
15 | 
16 | # Moves generating the same compounds:
17 | 
18 | Keep the one with the higher score. In practice, also keeping the synonyms (transformation IDs) of the other moves generating the same compounds.
19 | 
20 | # History of the state.
21 | 
22 | Keeping a history of visited compounds (excluding the organism's compounds).
23 | - Refuse moves that generate compounds present in the history to avoid loops.
24 | 
25 | # Refusing rules that produce a different number of compounds as the original.
26 | 
27 | This can happen when the rule that learned on 2 molecules, which subgroups appear in a much bigger molecule when doing the retrosynthetic search.
28 | It is unrealistic to expect an enzyme to work this way.
29 | 


--------------------------------------------------------------------------------
/compound_scoring.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines the compound scoring function.
  3 | Currently implements toxicity in E. coli, based on data from EcoliTox.
  4 | """
  5 | 
  6 | # General use packages
  7 | import random
  8 | import numpy as np
  9 | import sys
 10 | import csv
 11 | import math
 12 | import logging
 13 | from rdkit.Chem import DataStructs
 14 | from rdkit import Chem
 15 | 
 16 | from config import *
 17 | 
 18 | 
 19 | class CompoundScoring(object):
 20 |     """
 21 |     Defines Compound Scorer object.
 22 |     """
 23 |     logger = logging.getLogger(__name__)
 24 |     def __init__(self, scoring_function = None):
 25 |         if scoring_function is None:
 26 |             pass
 27 |         else:
 28 |             self.scoring_function = scoring_function
 29 | 
 30 |     def __repr__(self):
 31 |         """
 32 |         Name the used scorer.
 33 |         Raises an error is the class is not properly instanciated
 34 |         """
 35 |         return(self.name)
 36 | 
 37 |     def calculate(self, compound):
 38 |         score = self.scoring_function(compound)
 39 |         return(score)
 40 | 
 41 | def pseudo_random(compound):
 42 |     score = random.uniform(0, 10)
 43 |     return(score)
 44 | 
 45 | 
 46 | class ToxicityScoring(CompoundScoring):
 47 |     """
 48 |     Returns the log toxicity value of a compound.
 49 |     The data is stored in a csv file, tab delimited, with columns "name", 'InChI' and "toxicity"
 50 |     This can easily be changed to another data with a similar formatting.
 51 |     """
 52 |     def __init__(self, toxicity_data = "{}/name_structure_toxicity.csv".format(DATA_PATH)):
 53 |         CompoundScoring.__init__(self)
 54 |         self.scoring_function = self.scoring_function()
 55 |         self.name = "ToxicityScoring"
 56 |         self.fit_model(toxicity_data)
 57 | 
 58 |     def calculate_ECFP(self,inchi):
 59 |         rdmol = Chem.inchi.MolFromInchi(inchi, sanitize=False)
 60 |         # rd_mol = standardize_chemical(rdmol, add_hs=False, heavy = True, rm_stereo=True)
 61 |         ECFP= Chem.AllChem.GetMorganFingerprintAsBitVect(rdmol, radius = 2, nBits=1024, useFeatures = False, useChirality = False)
 62 |         return(ECFP)
 63 | 
 64 |     def select_current_best_model(self, X, y,
 65 |                                   models_number = 10,
 66 |                                   verbose = False):
 67 | 
 68 |         trained_model_list = []
 69 |         # Training all models
 70 |         for i in range(models_number):
 71 |             X_train, y_train = X, y
 72 |             other_MLP = MLPRegressor(hidden_layer_sizes  = (10, 100,100, 20), solver ="adam", max_iter=20000,
 73 |                                       early_stopping = True, learning_rate = "adaptive")
 74 |             other_MLP.fit(X_train, y_train.flatten())
 75 |             trained_model_list.append(other_MLP)
 76 | 
 77 |             big_MLP = MLPRegressor(hidden_layer_sizes  = (100,100, 20),solver ="adam", max_iter=20000,
 78 |                                       early_stopping = True, learning_rate = "adaptive")
 79 |             big_MLP.fit(X_train, y_train.flatten())
 80 |             trained_model_list.append(big_MLP)
 81 | 
 82 | 
 83 |             medium_MLP = MLPRegressor(hidden_layer_sizes  = (40, 10), solver ="adam", max_iter=20000,
 84 |                                       early_stopping = True, learning_rate = "adaptive")
 85 |             medium_MLP.fit(X_train, y_train.flatten())
 86 |             trained_model_list.append(medium_MLP)
 87 | 
 88 |             small_MLP = MLPRegressor(hidden_layer_sizes  = (10), solver ="adam", max_iter=20000,
 89 |                                       early_stopping = True, learning_rate = "adaptive")
 90 |             small_MLP.fit(X_train, y_train.flatten())
 91 |             trained_model_list.append(small_MLP)
 92 | 
 93 |         # Evaluating all
 94 |         all_scores = []
 95 |         for i in range(len(trained_model_list)):
 96 |             selected_mdoel = trained_model_list[i]
 97 |             y_pred = selected_mdoel.predict(X)
 98 |             score = sklearn.metrics.r2_score(y, y_pred)
 99 |             all_scores.append(score)
100 | 
101 |         try:
102 |             best_index = all_scores.index(max(all_scores))
103 |             best_score = all_scores[best_index]
104 |         except ValueError:
105 |             best_index = 0
106 |         best_model = trained_model_list[best_index]
107 |         return(best_model, best_score)
108 | 
109 |     def fit_model(self,toxicity_data):
110 |         y = []
111 |         X = None
112 |         # Loading data
113 |         with open(toxicity_data, "r") as file_hdl:
114 |             reader = csv.DictReader(file_hdl, delimiter = '\t')
115 |             for row in reader:
116 |                 y.append(math.log(float(row["toxicity"])))
117 |                 arr = np.zeros((1,))
118 |                 fp = self.calculate_ECFP(row["InChI"])
119 |                 DataStructs.ConvertToNumpyArray(fp, arr)
120 |                 arr = np.reshape(arr, (1, 1024))
121 |                 if X is None:
122 |                     X = arr
123 |                 else:
124 |                     X = np.concatenate((X, arr), axis = 0)
125 |         self.log_loading = "Loaded {} compounds from {}".format(len(y), toxicity_data)
126 |         y = np.array(y)
127 |         # Fitting mdoel:
128 |         best_model, score = self.select_current_best_model(X, y, models_number = 10)
129 |         y_pred = best_model.predict(X)
130 |         score = sklearn.metrics.r2_score(y, y_pred)
131 |         self.log_score = "The toxicity model has a R2 score of {} on itself".format(round(score, 2))
132 |         self.model = best_model
133 | 
134 |     def scoring_function(self):
135 |         # CODE IT
136 |         def compound_scoring(compound):
137 |             ECFP = compound._get_ECFP()
138 |             arr = np.zeros((1,))
139 |             DataStructs.ConvertToNumpyArray(ECFP, arr)
140 |             arr = np.reshape(arr, (1, 1024))
141 |             y_pred = self.model.predict(arr)
142 |             return(y_pred)
143 |         return(compound_scoring)
144 | 
145 | 
146 | RandomCompoundScorer = CompoundScoring(scoring_function = pseudo_random)
147 | if use_toxicity:
148 |     toxicity_scorer = ToxicityScoring()
149 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | the aim of this file is to store configuration parameters, notably for the DB.
 3 | It replaces what I previously wanted to define as 'global'
 4 | """
 5 | try:
 6 |     from rp3_dcache.Manager import Manager  # In house module
 7 |     from rp3_dcache.Utils import make_document_id, as_document, rdmols_from_document
 8 |     dcache_installed = True
 9 | except ModuleNotFoundError:
10 |     dcache_installed = False
11 | import logging
12 | import os
13 | 
14 | # Files and addresses configurations - should not be modified:
15 | global DATA_PATH
16 | DATA_PATH = "{}/data".format(os.path.dirname(os.path.abspath(__file__)))
17 | 
18 | global add_Hs
19 | add_Hs = True
20 | hydrogen_config = "Using explicit hydrogens : {}".format(add_Hs)
21 | 
22 | # Database for storing results configuration
23 | global DB_CACHE
24 | global DB_REPLACE
25 | DB_CACHE = False and dcache_installed
26 | DB_REPLACE = False and dcache_installed
27 | DB_time = 0
28 | if DB_CACHE:
29 |     global CACHE_MGR
30 |     if add_Hs:
31 |         CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_with_H")
32 |     else:
33 |         CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_without_H")
34 |     CACHE_MGR.connect()
35 |     DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE)
36 | elif dcache_installed:
37 |     DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE)
38 | else:
39 |     DB_config = "Setting the DB from config file: Installed package: {}".format(dcache_installed)
40 | 
41 | # Mode for using RP3: retrosynthesis or biosensor. QSAR might be implemented one day.
42 | global retrosynthesis
43 | global biosensor
44 | retrosynthesis = True
45 | biosensor = False
46 | tree_mode_config = "Using retrosynthesis: {} - using biosensor {}".format(retrosynthesis, biosensor)
47 | 
48 | # Configuring local cache. Could be replaced by a proper caching system one day.
49 | global home_made_cache
50 | home_made_cache = {}
51 | 
52 | global use_cache
53 | use_cache = False
54 | 
55 | cache_config = "Initialising an empty cache: {}; Using it: {}".format(home_made_cache, use_cache)
56 | 
57 | # MCTS parameters for configuration
58 | 
59 | global transposition_table
60 | global use_transpositions
61 | global use_transpositions_depth
62 | 
63 | transposition_table = {}
64 | use_transpositions = False
65 | use_transpositions_depth = False
66 | 
67 | transposition_table_config = "Using transposition tables: {}. With depth: {}".format(use_transpositions, use_transpositions_depth)
68 | 
69 | # For toxicity, using log(IC50) as penaly when below 0.
70 | global use_toxicity
71 | try:
72 |     import sklearn
73 |     from sklearn.neural_network import MLPRegressor
74 |     sklearn_here = True
75 | except ModuleNotFoundError:
76 |     toxicity_config = "Toxicity will not be enabled because sklearn is not installed"
77 |     sklearn_here = False
78 | use_toxicity = False
79 | use_toxicity = use_toxicity and sklearn_here
80 | 


--------------------------------------------------------------------------------
/convert_to_SBML.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Converts pathways under json format to SBML format
  3 | """
  4 | 
  5 | # General utilities
  6 | import sys
  7 | import logging
  8 | import csv
  9 | import copy
 10 | import json
 11 | import pickle
 12 | import libsbml
 13 | from hashlib import md5
 14 | import os
 15 | import argparse
 16 | 
 17 | # RP3 specific objects
 18 | from compound import Compound
 19 | from move import Move
 20 | 
 21 | def _nameToSbmlId(name):
 22 |     IdStream = []
 23 |     count = 0
 24 |     end = len(name)
 25 |     if '0' <= name[count] and name[count] <= '9':
 26 |         IdStream.append('_')
 27 |     for count in range(0, end):
 28 |         if (('0' <= name[count] and name[count] <= '9') or
 29 |                 ('a' <= name[count] and name[count] <= 'z') or
 30 |                 ('A' <= name[count] and name[count] <= 'Z')):
 31 |             IdStream.append(name[count])
 32 |         else:
 33 |             IdStream.append('_')
 34 |     Id = ''.join(IdStream)
 35 |     if Id[len(Id) - 1] != '_':
 36 |         return Id
 37 |     return Id[:-1]
 38 | 
 39 | def add_specy(sbml_model,
 40 |              chemId = 'Id_cmpound',
 41 |              smiles = "smilescomppoun",
 42 |              inchi = "inchicompounds",
 43 |              inchiKey = "inchiKeycomppoun",
 44 |              name = "compounds_name",
 45 |              in_sink = False):
 46 | 
 47 |     spe = sbml_model.createSpecies()
 48 |     spe.setCompartment("cytoplasm")
 49 |     spe.setHasOnlySubstanceUnits(False)
 50 |     spe.setBoundaryCondition(False)
 51 |     spe.setConstant(False)
 52 |     spe.setInitialConcentration(1.0)
 53 |     clean_id = str(chemId)+'__64__'+str("cytoplasm")
 54 |     clean_id = clean_id.replace('-', '_')  # No - in name
 55 |     metaid = _nameToSbmlId(md5(str(name).encode('utf-8')).hexdigest())
 56 |     spe.setMetaId(metaid)
 57 |     spe.setName(name)
 58 |     if in_sink:
 59 |         annotation = '''<annotation>
 60 |         <rdf:RDF
 61 |         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">'''
 62 |         annotation += '''
 63 |         <rdf:RP3 rdf:about="#'''+str(metaid or '')+'''">
 64 |         <RP3:RP3 xmlns:RP3="https://github.com/brsynth">
 65 |         <RP3:smiles>'''+str(smiles or '')+'''</RP3:smiles>
 66 |         <RP3:inchi>'''+str(inchi or '')+'''</RP3:inchi>
 67 |         <RP3:inchikey>'''+str(inchiKey or '')+'''</RP3:inchikey>
 68 |         <RP3:in_sink>'''+ str(True)+'''</RP3:in_sink>
 69 |         </RP3:RP3>
 70 |         </rdf:RP3>'''
 71 |         annotation += '''
 72 |         </rdf:RDF>
 73 |         </annotation>'''
 74 |     else:
 75 |         annotation = '''<annotation>
 76 |         <rdf:RDF
 77 |         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">'''
 78 |         annotation += '''
 79 |         <rdf:RP3 rdf:about="#'''+str(metaid or '')+'''">
 80 |         <RP3:RP3 xmlns:RP3="https://github.com/brsynth">
 81 |         <RP3:smiles>'''+str(smiles or '')+'''</RP3:smiles>
 82 |         <RP3:inchi>'''+str(inchi or '')+'''</RP3:inchi>
 83 |         <RP3:inchikey>'''+str(inchiKey or '')+'''</RP3:inchikey>
 84 |         </RP3:RP3>
 85 |         </rdf:RP3>'''
 86 |         annotation += '''
 87 |         </rdf:RDF>
 88 |         </annotation>'''
 89 |     spe.setAnnotation(annotation)
 90 |     return(sbml_model)
 91 | 
 92 | def add_reaction(sbml_model,
 93 |                  reacId = 'Id_reac',
 94 |                  ec = "Test_ec",
 95 |                  rule_id = "rule_id",
 96 |                  biological_score = "biological_score",
 97 |                  chemical_score = "chemical_score",
 98 |                  reactant_stoechio = {},
 99 |                  product = "product_name",
100 |                  reaction_smiles = "reaction_smiles",
101 |                  diameter = "diameter"):
102 |     reac = sbml_model.createReaction()
103 | 
104 |     reac_fbc = reac.getPlugin('fbc')
105 |     reac_fbc.setUpperFluxBound('B_999999')
106 |     reac_fbc.setLowerFluxBound('B_0')
107 |     #reactions
108 |     reac.setId(reacId)
109 |     reac.setSBOTerm(185)
110 |     reac.setReversible(True)
111 |     reac.setFast(False)
112 |     metaid = _nameToSbmlId(md5(str(reacId).encode('utf-8')).hexdigest())
113 |     reac.setMetaId(metaid)
114 |     #reactants_dict
115 |     for reactant in reactant_stoechio.keys():
116 |         chemId = reactant
117 |         spe = reac.createReactant()
118 |         clean_id = str(chemId)+'__64__'+str("cytoplasm")
119 |         clean_id = clean_id.replace('-', '_')  # No - in name
120 |         spe.setSpecies(clean_id)
121 |         spe.setConstant(True)
122 |         try:
123 |             stoechio = reactant_stoechio[reactant]
124 |         except KeyError:
125 |             stoechio = 1
126 |         spe.setStoichiometry(stoechio)
127 |     #products_dict
128 |     if not product is None:
129 |         pro = reac.createProduct()
130 |         clean_id = str(product)+'__64__'+str("cytoplasm")
131 |         clean_id = clean_id.replace('-', '_')  # No - in name
132 |         pro.setSpecies(clean_id)
133 |         pro.setConstant(True)
134 |         pro.setStoichiometry(1)
135 |     #annotation
136 |     annotation = '''<annotation>
137 |     <rdf:RDF
138 |     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">'''
139 | 
140 |     annotation += '''
141 |     <rdf:RP3 rdf:about="#'''+str(metaid or '')+'''">
142 |       <RP3:RP3 xmlns:RP3="https://github.com/brsynth">
143 |         <RP3:smiles>'''+str(reaction_smiles or '')+'''</RP3:smiles>
144 |         <RP3:rule_id>'''+str(rule_id or '')+'''</RP3:rule_id>
145 |         <RP3:EC>'''+str(ec)+'''</RP3:EC>
146 |         <RP3:biological_score value="'''+str(biological_score or '')+'''" />
147 |         <RP3:chemical_score value="'''+str(chemical_score or '')+'''" />
148 |         <RP3:diameter value="'''+str(diameter or '')+'''" />
149 |       </RP3:RP3>
150 |     </rdf:RP3>
151 |     </rdf:RDF>
152 |     </annotation>'''
153 |     reac.setAnnotation(annotation)
154 |     return(sbml_model)
155 | 
156 | 
157 | def convert_json_to_SBML(json_file, modelID = "test", folder_to_save = 'temp'):
158 |     # Set up the empty model
159 |     smbl_namespace = libsbml.SBMLNamespaces(3,1)
160 |     smbl_namespace.addPkgNamespace('fbc',2)
161 |     smbl_namespace.addPkgNamespace('groups',2)
162 |     document = libsbml.SBMLDocument(smbl_namespace)
163 |     sbml_model = document.createModel()
164 |     sbml_model.getPlugin('fbc')
165 |     sbml_model.getPlugin('groups')
166 |     sbml_model.setId(modelID)
167 |     sbml_model.setName(modelID)
168 |     sbml_model.setTimeUnits('second')
169 |     sbml_model.setExtentUnits('mole')
170 |     sbml_model.setSubstanceUnits('mole')
171 |     # Could implement units, currently removed from the model
172 |     # Should have it in a seperate function
173 |     compartment = sbml_model.createCompartment()
174 |     compartment.setId("cytoplasm")
175 |     target_node = None
176 |     for node in json_file["elements"]["nodes"]:
177 |         if node["data"]["type"] == "compound":
178 |             sbml_model = add_specy(sbml_model,
179 |                       chemId = node["data"]["id"],
180 |                       smiles = node["data"]["SMILES"],
181 |                       inchi = node["data"]["InChI"],
182 |                       inchiKey = node["data"]["id"],
183 |                       name = ",".join(node["data"]["Names"]),
184 |                       in_sink = node["data"]["inSink"] == 1)
185 |         if node["data"]["isSource"] == 1:
186 |             logging.info("Target node is {}".format(node["data"]["id"]))
187 |             target_node = node
188 |     for element in sbml_model.getListOfSpecies():
189 |         logging.debug(element)
190 |     for node in json_file["elements"]["nodes"]:
191 |         if node["data"]["type"] == "reaction":
192 |             try:
193 |                 reactant_stoechio = node["data"]["Stoechiometry"]
194 |             except KeyError:
195 |                 reactant_stoechio = {}
196 |             sbml_model = add_reaction(sbml_model,
197 |                                      reacId = node["data"]["id"],
198 |                                      ec = ','.join(node["data"]["EC number"]),
199 |                                      rule_id = ','.join(node["data"]["Rule ID"]),
200 |                                      biological_score = node["data"]["Score"],
201 |                                      chemical_score = node["data"]["ChemicalScore"],
202 |                                      reactant_stoechio = reactant_stoechio,
203 |                                      product = node["data"]["id"].split("-RR")[0],
204 |                                      reaction_smiles = node["data"]["Reaction SMILES"],
205 |                                      diameter = node["data"]["Diameter"])
206 |     sbml_model = add_reaction(sbml_model,
207 |                              reacId = "production",
208 |                              ec = 'NA',
209 |                              rule_id = 'NA',
210 |                              biological_score = 'NA',
211 |                              chemical_score = 'NA',
212 |                              reactant_stoechio = {target_node["data"]["id"]: 1},
213 |                              product = None,
214 |                              reaction_smiles = 'NA',
215 |                              diameter = 'NA')
216 | 
217 |     document.setModel(sbml_model)
218 |     libsbml.writeSBMLToFile(document,'{}/{}.xml'.format(folder_to_save, modelID))
219 |     pass
220 | 
221 | 
222 | def __cli():
223 |     def define_folder_to_save(folder):
224 |         if folder is None:
225 |             folder_to_save = os.path.join('debugging_results', args.c_name)
226 |         else:
227 |             folder_to_save = folder
228 |         if not os.path.exists(folder_to_save):
229 |             os.makedirs(folder_to_save, exist_ok=True)
230 |         return folder_to_save
231 |     d = "Command line interface to convert json files to SBML files"
232 |     parser = argparse.ArgumentParser(description=d)
233 |     # Logs and saving information
234 |     """Command line interface to convert json files to SBML files"""
235 |     parser.add_argument("--verbose", help="Default logger is INFO, switch to DEBUG is specified",
236 |                         dest='verbose', action='store_true', default=False)
237 |     parser.add_argument("--log_file", help="Default logger is stderr, switch to log_file if specified",
238 |                         default=None)
239 |     parser.add_argument("--folder_to_save",
240 |                         help="Folder to store results. Default: temp",
241 |                         default="temp")
242 |     parser.add_argument("--json_convert",
243 |                         help="File to convert",
244 |                         default="deoxi_07_no_H/deoxiviolacein_iteration_85.json")
245 |     parser.add_argument("--file_name", help = 'File name if name changes.', default = None)
246 |     args = parser.parse_args()
247 |     # Setting up the logs
248 |     if args.verbose:
249 |         logging_level = logging.DEBUG
250 |     else:
251 |         logging_level = logging.INFO
252 |     if args.log_file is None:
253 |         logging.basicConfig(stream=sys.stderr,
254 |                             level=logging_level,
255 |                             datefmt='%d/%m/%Y %H:%M:%S',
256 |                             format='%(asctime)s -- %(levelname)s -- %(message)s')
257 |     else:
258 |         if not "log" in args.log_file:
259 |             log_file = "log_" + args.log_file
260 |         else:
261 |             log_file = args.log_file
262 |         log_writer = open("{}/{}".format(folder_to_save, log_file), "w")
263 |         logging.basicConfig(stream=log_writer,
264 |                             level=logging_level,
265 |                             datefmt='%d/%m/%Y %H:%M:%S',
266 |                             format='%(asctime)s -- %(levelname)s -- %(message)s')
267 | 
268 |     folder_to_save = define_folder_to_save(args.folder_to_save)
269 |     # Choosing file
270 |     if args.file_name is None:
271 |         model_ID = args.json_convert.split("/")[-1].split(".json")[0]
272 |     else:
273 |         model_ID = args.file_name
274 |     pathway_to_test = json.load(open(args.json_convert, "r"))
275 |     convert_json_to_SBML(pathway_to_test, model_ID, folder_to_save = folder_to_save)
276 | 
277 | if __name__ == "__main__":
278 |     __cli()
279 | 


--------------------------------------------------------------------------------
/data/base_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | the aim of this file is to store configuration parameters, notably for the DB.
 3 | It replaces what I previously wanted to define as 'global'
 4 | """
 5 | try:
 6 |     from rp3_dcache.Manager import Manager  # In house module
 7 |     from rp3_dcache.Utils import make_document_id, as_document, rdmols_from_document
 8 |     dcache_installed = True
 9 | except ModuleNotFoundError:
10 |     dcache_installed = False
11 | import logging
12 | import os
13 | 
14 | # Files and addresses configurations - should not be modified:
15 | global DATA_PATH
16 | DATA_PATH = "{}/data".format(os.path.dirname(os.path.abspath(__file__)))
17 | 
18 | global add_Hs
19 | add_Hs = True
20 | hydrogen_config = "Using explicit hydrogens : {}".format(add_Hs)
21 | 
22 | # Database for storing results configuration
23 | global DB_CACHE
24 | global DB_REPLACE
25 | DB_CACHE = False and dcache_installed
26 | DB_REPLACE = False and dcache_installed
27 | DB_time = 0
28 | if DB_CACHE:
29 |     global CACHE_MGR
30 |     if add_Hs:
31 |         CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_with_H")
32 |     else:
33 |         CACHE_MGR = Manager(replace=DB_REPLACE, collection = "results_without_H")
34 |     CACHE_MGR.connect()
35 |     DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE)
36 | elif dcache_installed:
37 |     DB_config = "Setting the DB from config file: Installed package: {}. Using cache DB: {}; Replacing results: {}".format(dcache_installed, DB_CACHE, DB_REPLACE)
38 | else:
39 |     DB_config = "Setting the DB from config file: Installed package: {}".format(dcache_installed)
40 | 
41 | # Mode for using RP3: retrosynthesis or biosensor. QSAR might be implemented one day.
42 | global retrosynthesis
43 | global biosensor
44 | retrosynthesis = True
45 | biosensor = False
46 | tree_mode_config = "Using retrosynthesis: {} - using biosensor {}".format(retrosynthesis, biosensor)
47 | 
48 | # Configuring local cache. Could be replaced by a proper caching system one day.
49 | global home_made_cache
50 | home_made_cache = {}
51 | 
52 | global use_cache
53 | use_cache = True
54 | 
55 | cache_config = "Initialising an empty cache: {}; Using it: {}".format(home_made_cache, use_cache)
56 | 
57 | # MCTS parameters for configuration
58 | 
59 | global transposition_table
60 | global use_transpositions
61 | global use_transpositions_depth
62 | 
63 | transposition_table = {}
64 | use_transpositions = False
65 | use_transpositions_depth = False
66 | 
67 | transposition_table_config = "Using transposition tables: {}. With depth: {}".format(use_transpositions, use_transpositions_depth)
68 | 
69 | # For toxicity, using log(IC50) as penaly when below 0.
70 | global use_toxicity
71 | try:
72 |     import sklearn
73 |     from sklearn.neural_network import MLPRegressor
74 |     sklearn_here = True
75 | except ModuleNotFoundError:
76 |     toxicity_config = "Toxicity will not be enabled because sklearn is not installed"
77 |     sklearn_here = False
78 | use_toxicity = False
79 | use_toxicity = use_toxicity and sklearn_here
80 | 


--------------------------------------------------------------------------------
/data/compounds_to_add/TPA_to_add.csv:
--------------------------------------------------------------------------------
1 | name,inchi
2 | MNXM162174,"InChI=1S/C8H10/c1-7-3-5-8(2)6-4-7/h3-6H,1-2H3"
3 | 


--------------------------------------------------------------------------------
/data/golden_dataset.csv:
--------------------------------------------------------------------------------
 1 | name	inchi	file_to_add
 2 | 1,4-Butanediol	InChI=1S/C4H10O2/c5-3-1-2-4-6/h5-6H,1-4H2	
 3 | 2,3-amino-1,3-propanediol	InChI=1S/C3H9NO2/c4-3(1-5)2-6/h3,5-6H,1-2,4H2	
 4 | 2,5-DHBA	InChI=1S/C7H6O4/c8-4-1-2-6(9)5(3-4)7(10)11/h1-3,8-9H,(H,10,11)	
 5 | 3-methylbutanol	InChI=1S/C5H12O/c1-5(2)3-4-6/h5-6H,3-4H2,1-2H3	
 6 | N-methylpyrrolinium	InChI=1S/C5H10N/c1-6-4-2-3-5-6/h4H,2-3,5H2,1H3/q+1	
 7 | benzyl_alcohol	InChI=1S/C7H8O/c8-6-7-4-2-1-3-5-7/h1-5,8H,6H2	
 8 | caroten	InChI=1S/C40H56/c1-31(19-13-21-33(3)25-27-37-35(5)23-15-29-39(37,7)8)17-11-12-18-32(2)20-14-22-34(4)26-28-38-36(6)24-16-30-40(38,9)10/h11-14,17-22,25-28H,15-16,23-24,29-30H2,1-10H3/b12-11+,19-13+,20-14+,27-25+,28-26+,31-17+,32-18+,33-21+,34-22+	
 9 | cis,cis-muconate	InChI=1S/C6H6O4/c7-5(8)3-1-2-4-6(9)10/h1-4H,(H,7,8)(H,9,10)/p-2/b3-1-,4-2-	
10 | violacein	InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+	
11 | glutaric_acid	InChI=1S/C5H8O4/c6-4(7)2-1-3-5(8)9/h1-3H2,(H,6,7)(H,8,9)	
12 | mesaconic_acid	InChI=1S/C5H6O4/c1-3(5(8)9)2-4(6)7/h2H,1H3,(H,6,7)(H,8,9)/b3-2+	
13 | naringenin	InChI=1S/C15H12O5/c16-9-3-1-8(2-4-9)13-7-12(19)15-11(18)5-10(17)6-14(15)20-13/h1-6,13,16-18H,7H2	
14 | p-hydroxystyrene	InChI=1S/C8H8O/c1-2-7-3-5-8(9)6-4-7/h2-6,9H,1H2	
15 | piceatannol	InChI=1S/C14H12O4/c15-11-5-10(6-12(16)8-11)2-1-9-3-4-13(17)14(18)7-9/h1-8,15-18H/b2-1+	
16 | protopanaxadiol	InChI=1S/C30H52O3/c1-19(2)10-9-14-30(8,33)20-11-16-29(7)25(20)21(31)18-23-27(5)15-13-24(32)26(3,4)22(27)12-17-28(23,29)6/h10,20-25,31-33H,9,11-18H2,1-8H3/t20-,21+,22-,23+,24-,25-,27-,28+,29+,30+/m0/s1	
17 | TPA	InChI=1S/C8H6O4/c9-7(10)5-1-2-6(4-3-5)8(11)12/h1-4H,(H,9,10)(H,11,12)	clean_data/compounds_to_add/TPA_to_add.csv
18 | vanillin	InChI=1S/C8H8O3/c1-11-8-4-6(5-9)2-3-7(8)10/h2-5,10H,1H3	
19 | lycopene	InChI=1S/C40H56/c1-33(2)19-13-23-37(7)27-17-31-39(9)29-15-25-35(5)21-11-12-22-36(6)26-16-30-40(10)32-18-28-38(8)24-14-20-34(3)4/h11-12,15-22,25-32H,13-14,23-24H2,1-10H3/b12-11+,25-15+,26-16+,31-17+,32-18+,35-21+,36-22+,37-27+,38-28+,39-29+,40-30+	
20 | pinocembrin	InChI=1S/C15H12O4/c16-10-6-11(17)15-12(18)8-13(19-14(15)7-10)9-4-2-1-3-5-9/h1-7,13,16-17H,8H2/t13-/m0/s1	
21 | styrene	InChI=1S/C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2	
22 | 


--------------------------------------------------------------------------------
/data/sinks/ecoli_core_sink_reduced_rp_ready.csv:
--------------------------------------------------------------------------------
 1 | "name","inchi"
 2 | "2-Oxoglutarate","InChI=1S/C5H6O5/c6-3(5(9)10)1-2-4(7)8/h1-2H2,(H,7,8)(H,9,10)"
 3 | "3-Phospho-D-glycerate","InChI=1S/C3H7O7P/c4-2(3(5)6)1-10-11(7,8)9/h2,4H,1H2,(H,5,6)(H2,7,8,9)"
 4 | "3-Phospho-D-glyceroyl phosphate","InChI=1S/C3H8O10P2/c4-2(1-12-14(6,7)8)3(5)13-15(9,10)11/h2,4H,1H2,(H2,6,7,8)(H2,9,10,11)"
 5 | "6-Phospho-D-gluconate","InChI=1S/C6H13O10P/c7-2(1-16-17(13,14)15)3(8)4(9)5(10)6(11)12/h2-5,7-10H,1H2,(H,11,12)(H2,13,14,15)"
 6 | "6-phospho-D-glucono-1,5-lactone","InChI=1S/C6H11O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-5,7-9H,1H2,(H2,11,12,13)"
 7 | "ADP C10H12N5O10P2","InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)"
 8 | "AMP C10H12N5O7P","InChI=1S/C10H14N5O7P/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(22-10)1-21-23(18,19)20/h2-4,6-7,10,16-17H,1H2,(H2,11,12,13)(H2,18,19,20)"
 9 | "ATP C10H12N5O13P3","InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)"
10 | "Acetaldehyde","InChI=1S/C2H4O/c1-2-3/h2H,1H3"
11 | "Acetate","InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)"
12 | "Acetyl phosphate","InChI=1S/C2H5O5P/c1-2(3)7-8(4,5)6/h1H3,(H2,4,5,6)"
13 | "Acetyl-CoA","InChI=1S/C23H38N7O17P3S/c1-12(31)51-7-6-25-14(32)4-5-26-21(35)18(34)23(2,3)9-44-50(41,42)47-49(39,40)43-8-13-17(46-48(36,37)38)16(33)22(45-13)30-11-29-15-19(24)27-10-28-20(15)30/h10-11,13,16-18,22,33-34H,4-9H2,1-3H3,(H,25,32)(H,26,35)(H,39,40)(H,41,42)(H2,24,27,28)(H2,36,37,38)"
14 | "Alpha-D-Ribose 5-phosphate","InChI=1S/C5H11O8P/c6-3-2(1-12-14(9,10)11)13-5(8)4(3)7/h2-8H,1H2,(H2,9,10,11)"
15 | "Ammonium","InChI=1S/H3N/h1H3"
16 | "CO2 CO2","InChI=1S/CO2/c2-1-3"
17 | "Cis-Aconitate","InChI=1S/C6H6O6/c7-4(8)1-3(6(11)12)2-5(9)10/h1H,2H2,(H,7,8)(H,9,10)(H,11,12)"
18 | "Citrate","InChI=1S/C6H8O7/c7-3(8)1-6(13,5(11)12)2-4(9)10/h13H,1-2H2,(H,7,8)(H,9,10)(H,11,12)"
19 | "Coenzyme A","InChI=1S/C21H36N7O16P3S/c1-21(2,16(31)19(32)24-4-3-12(29)23-5-6-48)8-41-47(38,39)44-46(36,37)40-7-11-15(43-45(33,34)35)14(30)20(42-11)28-10-27-13-17(22)25-9-26-18(13)28/h9-11,14-16,20,30-31,48H,3-8H2,1-2H3,(H,23,29)(H,24,32)(H,36,37)(H,38,39)(H2,22,25,26)(H2,33,34,35)"
20 | "D-Erythrose 4-phosphate","InChI=1S/C4H9O7P/c5-1-3(6)4(7)2-11-12(8,9)10/h1,3-4,6-7H,2H2,(H2,8,9,10)"
21 | "D-Fructose 1,6-bisphosphate","InChI=1S/C6H14O12P2/c7-4-3(1-16-19(10,11)12)18-6(9,5(4)8)2-17-20(13,14)15/h3-5,7-9H,1-2H2,(H2,10,11,12)(H2,13,14,15)"
22 | "D-Fructose 6-phosphate","InChI=1S/C6H13O9P/c7-2-6(10)5(9)4(8)3(15-6)1-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)"
23 | "D-Glucose 6-phosphate","InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)"
24 | "D-Glycerate 2-phosphate","InChI=1S/C3H7O7P/c4-1-2(3(5)6)10-11(7,8)9/h2,4H,1H2,(H,5,6)(H2,7,8,9)"
25 | "D-Lactate","InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)"
26 | "D-Ribulose 5-phosphate","InChI=1S/C5H11O8P/c6-1-3(7)5(9)4(8)2-13-14(10,11)12/h4-6,8-9H,1-2H2,(H2,10,11,12)"
27 | "D-Xylulose 5-phosphate","InChI=1S/C5H11O8P/c6-1-3(7)5(9)4(8)2-13-14(10,11)12/h4-6,8-9H,1-2H2,(H2,10,11,12)"
28 | "Dihydroxyacetone phosphate","InChI=1S/C3H7O6P/c4-1-3(5)2-9-10(6,7)8/h4H,1-2H2,(H2,6,7,8)"
29 | "Ethanol","InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"
30 | "Formate","InChI=1S/CH2O2/c2-1-3/h1H,(H,2,3)"
31 | "Fumarate","InChI=1S/C4H4O4/c5-3(6)1-2-4(7)8/h1-2H,(H,5,6)(H,7,8)"
32 | "Glyceraldehyde 3-phosphate","InChI=1S/C3H7O6P/c4-1-3(5)2-9-10(6,7)8/h1,3,5H,2H2,(H2,6,7,8)"
33 | "Glyoxylate","InChI=1S/C2H2O3/c3-1-2(4)5/h1H,(H,4,5)"
34 | "H+","InChI=1S/p+1"
35 | "H2O H2O","InChI=1S/H2O/h1H2"
36 | "Isocitrate","InChI=1S/C6H8O7/c7-3(8)1-2(5(10)11)4(9)6(12)13/h2,4,9H,1H2,(H,7,8)(H,10,11)(H,12,13)"
37 | "L-Glutamate","InChI=1S/C5H9NO4/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H,7,8)(H,9,10)"
38 | "L-Glutamine","InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H2,7,8)(H,9,10)"
39 | "L-Malate","InChI=1S/C4H6O5/c5-2(4(8)9)1-3(6)7/h2,5H,1H2,(H,6,7)(H,8,9)"
40 | "Nicotinamide adenine dinucleotide","InChI=1S/C21H27N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,(H5-,22,23,24,25,33,34,35,36,37)/p+1"
41 | "Nicotinamide adenine dinucleotide - reduced","InChI=1S/C21H29N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,25)"
42 | "Nicotinamide adenine dinucleotide phosphate","InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1-4,7-8,10-11,13-16,20-21,29-31H,5-6H2,(H7-,22,23,24,25,32,33,34,35,36,37,38,39)/p+1"
43 | "Nicotinamide adenine dinucleotide phosphate - reduced","InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)"
44 | "O2 O2","InChI=1S/O2/c1-2"
45 | "Oxaloacetate","InChI=1S/C4H4O5/c5-2(4(8)9)1-3(6)7/h1H2,(H,6,7)(H,8,9)"
46 | "Phosphate","InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)"
47 | "Phosphoenolpyruvate","InChI=1S/C3H5O6P/c1-2(3(4)5)9-10(6,7)8/h1H2,(H,4,5)(H2,6,7,8)"
48 | "Pyruvate","InChI=1S/C3H4O3/c1-2(4)3(5)6/h1H3,(H,5,6)"
49 | "Sedoheptulose 7-phosphate","InChI=1S/C7H15O10P/c8-1-3(9)5(11)7(13)6(12)4(10)2-17-18(14,15)16/h4-8,10-13H,1-2H2,(H2,14,15,16)"
50 | "Succinate","InChI=1S/C4H6O4/c5-3(6)1-2-4(7)8/h1-2H2,(H,5,6)(H,7,8)"
51 | "Succinyl-CoA","InChI=1S/C25H40N7O19P3S/c1-25(2,20(38)23(39)28-6-5-14(33)27-7-8-55-16(36)4-3-15(34)35)10-48-54(45,46)51-53(43,44)47-9-13-19(50-52(40,41)42)18(37)24(49-13)32-12-31-17-21(26)29-11-30-22(17)32/h11-13,18-20,24,37-38H,3-10H2,1-2H3,(H,27,33)(H,28,39)(H,34,35)(H,43,44)(H,45,46)(H2,26,29,30)(H2,40,41,42)"
52 | "Ubiquinol-8","InChI=1S/C49H76O4/c1-36(2)20-13-21-37(3)22-14-23-38(4)24-15-25-39(5)26-16-27-40(6)28-17-29-41(7)30-18-31-42(8)32-19-33-43(9)34-35-45-44(10)46(50)48(52-11)49(53-12)47(45)51/h20,22,24,26,28,30,32,34,50-51H,13-19,21,23,25,27,29,31,33,35H2,1-12H3"
53 | "Ubiquinone-8","InChI=1S/C49H74O4/c1-36(2)20-13-21-37(3)22-14-23-38(4)24-15-25-39(5)26-16-27-40(6)28-17-29-41(7)30-18-31-42(8)32-19-33-43(9)34-35-45-44(10)46(50)48(52-11)49(53-12)47(45)51/h20,22,24,26,28,30,32,34H,13-19,21,23,25,27,29,31,33,35H2,1-12H3"
54 | 


--------------------------------------------------------------------------------
/data/supplement_finder/data/metanetx_extracted_inchikeys.json.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/data/supplement_finder/data/metanetx_extracted_inchikeys.json.tar.gz


--------------------------------------------------------------------------------
/data/supplement_finder/tree_for_testing/TPA/pickles/tree_end_search.pkl.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/data/supplement_finder/tree_for_testing/TPA/pickles/tree_end_search.pkl.tar.gz


--------------------------------------------------------------------------------
/data/supplement_finder/tree_for_testing/morphine/pickles/tree_end_search.pkl.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/data/supplement_finder/tree_for_testing/morphine/pickles/tree_end_search.pkl.tar.gz


--------------------------------------------------------------------------------
/document_all_options.md:
--------------------------------------------------------------------------------
 1 | # Documentation
 2 | 
 3 | The aim of this file is to document all options available to run the MCTS and where to find them.
 4 | More details are in the attached paper at https://doi.org/10.1101/800474 , especially in the Appendix.
 5 | 
 6 | ### Global configuration options
 7 | 
 8 | - where: in the config.py file
 9 | - how: either by modying the config.py file by hand or by running the change_config.py with its argparser (recommanded)
10 | 
11 | - DB_CACHE: uses the MongoDB cache when activated
12 | - DB_REPLACE: replaces data in the Mongo DB cache when activated
13 | - DB_time: time cut_off for loading in the DB: stored only if above the cut-off, otherwise the rule is applied by Python
14 | - use_cache: dictionnary for caching results within the script. Highly recommanded.
15 | - retrosynthesis: performs a retrosynthetic search; biosensor: performs a biosensor search
16 | Both cannot be activated at the same time.
17 | Main difference is how the state is considered sucessful: all compounds have to be found for retrosynthesis, and only 1 for biosensors
18 | - add_Hs: explicit hydrogens. Recommanded at false for faster calculations.
19 | - use_transpositions and transposition depth: not stable. Allow for sharing of information between nodes with the same chemical state but at different places in the tree, as done in doi:10.1007/BF03192151.
20 | 
21 | ### Tree search configuration:
22 | 
23 | - stop_at_first_result: stops once a signle pathway is found.
24 | - c_name, c_smile,s c_inchi: information on the chemical compound of interest
25 | - fire_timeout, standardisation_timeout: time allowed for firing a rule/standardising a compound
26 | - organism_name: which model to use for production of compounds
27 | - complementary_sink: csv file containing compounds to add to the sink. If organism is None, is the full sink.
28 | - representation: how to print results in logs
29 | - itermax: maximum number of iterations allowed for running the Tree search
30 | - parallel: not possible to use at the moment due to workaround for RDKit rule application. Aimed at parallelising rollouts.
31 | - expansion_width: maximum number of children per node
32 | - time budget: time allowed for running the tree search. The search will stop at the end of the iteration that exceeds this allotted time
33 | - max_depth: maximum depth of the Tree (also the maximum number of pathway steps)
34 | - minimal_visit_counts: Minimal number of times a node has to be rolled out before his brothers can be expanded
35 | - UCT_policy: define the UCT policy to use, ie the way to rank the children of a node. Allows various bias, and scoring considerations.
36 | - UCTK: the constant defining the exploration/exploitation parameter in the UCT formula
37 | - bias_k: if progressive bias is used, define the weight of the progressive bias in the UCT formula
38 | - k_rave: if RAVE is used, how to weight the RAVE. Roughly for visits below this value RAVE values lead the UCT and above, rollout values lead.
39 | - use_RAVE: moves have scores each time they are used throughout the Tree, adapting RAVE (Rapid Action Value Estimation) principle to the whole tree and not just rollouts.
40 | - penalty: penalty when no compound of the state belongs to the organism 
41 | - full_state_reward: reward when all compounds of the state belongs to the organism 
42 | - pathway_scoring: how to score a pathway when it is found.
43 | - Rollout_policy: how to select moves for the Rollout: randomly, wieghting by which scores. Many options.
44 | - max_rollout: maximum length of the rollout (it also stops when max_depth is reached)
45 | - chemical_scoring: chose the way to chemically score reactions (considering only substrates or both substrates and products). Possibility to use ConstantChemicalScorer which always returns 1.
46 | - biological_score_cut_off: cuts off with biological score at the specified level 
47 | - substrate_only_score_cut_off: cuts off with substrate similarity only score BEFORE applying the rule at the specified level 
48 | - chemical_score_cut_off: cuts off with specified chemical score AFTER applying the rule at the specified level 
49 | - virtual_visits: start nodes at virtual_visits values, to avoid stochasticity at initial simulations; used to avoid having to much variability at initial Monte Carlo simulations.
50 | - progressive_bias_strategy: policy for the progressive bias (untested)
51 | - progressive widening: add a child to nodes visited more than len(nodes)^2 (untested)
52 | - diameter: Speficy the diameters (as list) to use
53 | - EC_filter: allow only certain EC subclasses
54 | - small: development archive
55 | - seed: for reproducibility
56 | - tree_to_complete: if restarting the search from another tree.
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/expected_results/deoxiviolacein_1.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "elements": {
  3 |     "nodes": [
  4 |       {
  5 |         "data": {
  6 |           "SMILES": "NC(Cc1c[nH]c2ccccc12)C(=O)O",
  7 |           "inSink": 1,
  8 |           "isSource": 0,
  9 |           "InChI": "InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)",
 10 |           "Names": [
 11 |             "QIVBCDIJIAJPQS-UHFFFAOYSA-N",
 12 |             "L-Tryptophan"
 13 |           ],
 14 |           "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N",
 15 |           "type": "compound",
 16 |           "Rule ID": null,
 17 |           "EC number": null,
 18 |           "Reaction SMILES": null,
 19 |           "Diameter": null,
 20 |           "Score": null,
 21 |           "Iteration": null
 22 |         }
 23 |       },
 24 |       {
 25 |         "data": {
 26 |           "SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O",
 27 |           "inSink": 0,
 28 |           "isSource": 0,
 29 |           "InChI": "InChI=1S/C11H10N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,12-13H,5H2,(H,14,15)",
 30 |           "Names": [
 31 |             "LKYWXXAVLLVJAS-UHFFFAOYSA-N"
 32 |           ],
 33 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N",
 34 |           "type": "compound",
 35 |           "Rule ID": null,
 36 |           "EC number": null,
 37 |           "Reaction SMILES": null,
 38 |           "Diameter": null,
 39 |           "Score": null,
 40 |           "Iteration": null
 41 |         }
 42 |       },
 43 |       {
 44 |         "data": {
 45 |           "SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12",
 46 |           "inSink": 0,
 47 |           "isSource": 0,
 48 |           "InChI": "InChI=1S/C22H18N4O4/c23-19(21(27)28)17(13-9-25-15-7-3-1-5-11(13)15)18(20(24)22(29)30)14-10-26-16-8-4-2-6-12(14)16/h1-10,17-18,23-26H,(H,27,28)(H,29,30)",
 49 |           "Names": [
 50 |             "CKBGWXPNAUCVQQ-UHFFFAOYSA-N"
 51 |           ],
 52 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N",
 53 |           "type": "compound",
 54 |           "Rule ID": null,
 55 |           "EC number": null,
 56 |           "Reaction SMILES": null,
 57 |           "Diameter": null,
 58 |           "Score": null,
 59 |           "Iteration": null
 60 |         }
 61 |       },
 62 |       {
 63 |         "data": {
 64 |           "SMILES": "O=O",
 65 |           "inSink": 1,
 66 |           "isSource": 0,
 67 |           "InChI": "InChI=1S/O2/c1-2",
 68 |           "Names": [
 69 |             "MYMOFIZGZYHOMD-UHFFFAOYSA-N",
 70 |             "O2 O2"
 71 |           ],
 72 |           "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N",
 73 |           "type": "compound",
 74 |           "Rule ID": null,
 75 |           "EC number": null,
 76 |           "Reaction SMILES": null,
 77 |           "Diameter": null,
 78 |           "Score": null,
 79 |           "Iteration": null
 80 |         }
 81 |       },
 82 |       {
 83 |         "data": {
 84 |           "SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12",
 85 |           "inSink": 0,
 86 |           "isSource": 0,
 87 |           "InChI": "InChI=1S/C21H15N3O2/c25-21(26)20-14(15-10-22-17-7-3-1-5-12(15)17)9-19(24-20)16-11-23-18-8-4-2-6-13(16)18/h1-11,22-24H,(H,25,26)",
 88 |           "Names": [
 89 |             "SFLGFRJGKHRRID-UHFFFAOYSA-N"
 90 |           ],
 91 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N",
 92 |           "type": "compound",
 93 |           "Rule ID": null,
 94 |           "EC number": null,
 95 |           "Reaction SMILES": null,
 96 |           "Diameter": null,
 97 |           "Score": null,
 98 |           "Iteration": null
 99 |         }
100 |       },
101 |       {
102 |         "data": {
103 |           "SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21",
104 |           "inSink": 0,
105 |           "isSource": 1,
106 |           "InChI": "InChI=1S/C20H13N3O2/c24-19-13(18-12-6-2-4-8-16(12)22-20(18)25)9-17(23-19)14-10-21-15-7-3-1-5-11(14)15/h1-10,21H,(H,22,25)(H,23,24)",
107 |           "Names": [
108 |             "deoxiviolacein",
109 |             "OJUJNNKCVPCATE-UHFFFAOYSA-N"
110 |           ],
111 |           "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N",
112 |           "type": "compound",
113 |           "Rule ID": null,
114 |           "EC number": null,
115 |           "Reaction SMILES": null,
116 |           "Diameter": null,
117 |           "Score": null,
118 |           "Iteration": null
119 |         }
120 |       },
121 |       {
122 |         "data": {
123 |           "SMILES": null,
124 |           "inSink": null,
125 |           "isSource": null,
126 |           "InChI": null,
127 |           "Names": null,
128 |           "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
129 |           "type": "reaction",
130 |           "Rule ID": [
131 |             "RR-02-8907c369787578b3-16-F",
132 |             "RR-02-8907c369787578b3-14-F",
133 |             "RR-02-8907c369787578b3-12-F",
134 |             "RR-02-8907c369787578b3-10-F"
135 |           ],
136 |           "EC number": [
137 |             "1.14.13.224"
138 |           ],
139 |           "Reaction SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21>>O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12.O=O",
140 |           "Diameter": 16,
141 |           "Score": 1.0,
142 |           "ChemicalScore": 1.0,
143 |           "Iteration": 1,
144 |           "Stoechiometry": {
145 |             "SFLGFRJGKHRRID-UHFFFAOYSA-N": 1,
146 |             "MYMOFIZGZYHOMD-UHFFFAOYSA-N": 1
147 |           }
148 |         }
149 |       },
150 |       {
151 |         "data": {
152 |           "SMILES": null,
153 |           "inSink": null,
154 |           "isSource": null,
155 |           "InChI": null,
156 |           "Names": null,
157 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2",
158 |           "type": "reaction",
159 |           "Rule ID": [
160 |             "RR-02-74068b9f6b2efdc1-16-F",
161 |             "RR-02-74068b9f6b2efdc1-14-F",
162 |             "RR-02-74068b9f6b2efdc1-12-F",
163 |             "RR-02-74068b9f6b2efdc1-10-F"
164 |           ],
165 |           "EC number": [
166 |             ""
167 |           ],
168 |           "Reaction SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12>>N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12",
169 |           "Diameter": 16,
170 |           "Score": 1.0,
171 |           "ChemicalScore": 1.0,
172 |           "Iteration": 2,
173 |           "Stoechiometry": {
174 |             "CKBGWXPNAUCVQQ-UHFFFAOYSA-N": 1
175 |           }
176 |         }
177 |       },
178 |       {
179 |         "data": {
180 |           "SMILES": null,
181 |           "inSink": null,
182 |           "isSource": null,
183 |           "InChI": null,
184 |           "Names": null,
185 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3",
186 |           "type": "reaction",
187 |           "Rule ID": [
188 |             "RR-02-47e9577f4cb98f97-16-F",
189 |             "RR-02-47e9577f4cb98f97-14-F",
190 |             "RR-02-47e9577f4cb98f97-12-F",
191 |             "RR-02-47e9577f4cb98f97-10-F"
192 |           ],
193 |           "EC number": [
194 |             "1.21.98"
195 |           ],
196 |           "Reaction SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12>>N=C(Cc1c[nH]c2ccccc12)C(=O)O.N=C(Cc1c[nH]c2ccccc12)C(=O)O",
197 |           "Diameter": 16,
198 |           "Score": 1.0,
199 |           "ChemicalScore": 1.0,
200 |           "Iteration": 3,
201 |           "Stoechiometry": {
202 |             "LKYWXXAVLLVJAS-UHFFFAOYSA-N": 2
203 |           }
204 |         }
205 |       },
206 |       {
207 |         "data": {
208 |           "SMILES": null,
209 |           "inSink": null,
210 |           "isSource": null,
211 |           "InChI": null,
212 |           "Names": null,
213 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4",
214 |           "type": "reaction",
215 |           "Rule ID": [
216 |             "RR-02-0c9c5a5559e132c7-16-F",
217 |             "RR-02-0c9c5a5559e132c7-14-F",
218 |             "RR-02-0c9c5a5559e132c7-12-F",
219 |             "RR-02-bbedd3c9b9124d30-10-F"
220 |           ],
221 |           "EC number": [
222 |             "1.3.3.10",
223 |             "1.4.3",
224 |             "1.4.3.-"
225 |           ],
226 |           "Reaction SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O>>NC(Cc1c[nH]c2ccccc12)C(=O)O",
227 |           "Diameter": 16,
228 |           "Score": 0.453552175675181,
229 |           "ChemicalScore": 1.0,
230 |           "Iteration": 4,
231 |           "Stoechiometry": {
232 |             "QIVBCDIJIAJPQS-UHFFFAOYSA-N": 1
233 |           }
234 |         }
235 |       }
236 |     ],
237 |     "edges": [
238 |       {
239 |         "data": {
240 |           "target": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
241 |           "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N",
242 |           "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N"
243 |         }
244 |       },
245 |       {
246 |         "data": {
247 |           "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N",
248 |           "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
249 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1"
250 |         }
251 |       },
252 |       {
253 |         "data": {
254 |           "target": "MYMOFIZGZYHOMD-UHFFFAOYSA-N",
255 |           "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
256 |           "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1"
257 |         }
258 |       },
259 |       {
260 |         "data": {
261 |           "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2",
262 |           "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N",
263 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N"
264 |         }
265 |       },
266 |       {
267 |         "data": {
268 |           "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N",
269 |           "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2",
270 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2"
271 |         }
272 |       },
273 |       {
274 |         "data": {
275 |           "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3",
276 |           "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N",
277 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N"
278 |         }
279 |       },
280 |       {
281 |         "data": {
282 |           "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N",
283 |           "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3",
284 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3"
285 |         }
286 |       },
287 |       {
288 |         "data": {
289 |           "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4",
290 |           "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N",
291 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N"
292 |         }
293 |       },
294 |       {
295 |         "data": {
296 |           "target": "QIVBCDIJIAJPQS-UHFFFAOYSA-N",
297 |           "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4",
298 |           "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4"
299 |         }
300 |       }
301 |     ]
302 |   }
303 | }


--------------------------------------------------------------------------------
/expected_results/deoxiviolacein_iteration_15.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "elements": {
  3 |     "nodes": [
  4 |       {
  5 |         "data": {
  6 |           "SMILES": "NC(Cc1c[nH]c2ccccc12)C(=O)O",
  7 |           "inSink": 1,
  8 |           "isSource": 0,
  9 |           "InChI": "InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)",
 10 |           "Names": [
 11 |             "QIVBCDIJIAJPQS-UHFFFAOYSA-N",
 12 |             "L-Tryptophan"
 13 |           ],
 14 |           "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N",
 15 |           "type": "compound",
 16 |           "Rule ID": null,
 17 |           "EC number": null,
 18 |           "Reaction SMILES": null,
 19 |           "Diameter": null,
 20 |           "Score": null,
 21 |           "Iteration": null
 22 |         }
 23 |       },
 24 |       {
 25 |         "data": {
 26 |           "SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O",
 27 |           "inSink": 0,
 28 |           "isSource": 0,
 29 |           "InChI": "InChI=1S/C11H10N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,12-13H,5H2,(H,14,15)",
 30 |           "Names": [
 31 |             "LKYWXXAVLLVJAS-UHFFFAOYSA-N"
 32 |           ],
 33 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N",
 34 |           "type": "compound",
 35 |           "Rule ID": null,
 36 |           "EC number": null,
 37 |           "Reaction SMILES": null,
 38 |           "Diameter": null,
 39 |           "Score": null,
 40 |           "Iteration": null
 41 |         }
 42 |       },
 43 |       {
 44 |         "data": {
 45 |           "SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12",
 46 |           "inSink": 0,
 47 |           "isSource": 0,
 48 |           "InChI": "InChI=1S/C22H18N4O4/c23-19(21(27)28)17(13-9-25-15-7-3-1-5-11(13)15)18(20(24)22(29)30)14-10-26-16-8-4-2-6-12(14)16/h1-10,17-18,23-26H,(H,27,28)(H,29,30)",
 49 |           "Names": [
 50 |             "CKBGWXPNAUCVQQ-UHFFFAOYSA-N"
 51 |           ],
 52 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N",
 53 |           "type": "compound",
 54 |           "Rule ID": null,
 55 |           "EC number": null,
 56 |           "Reaction SMILES": null,
 57 |           "Diameter": null,
 58 |           "Score": null,
 59 |           "Iteration": null
 60 |         }
 61 |       },
 62 |       {
 63 |         "data": {
 64 |           "SMILES": "O=O",
 65 |           "inSink": 1,
 66 |           "isSource": 0,
 67 |           "InChI": "InChI=1S/O2/c1-2",
 68 |           "Names": [
 69 |             "MYMOFIZGZYHOMD-UHFFFAOYSA-N",
 70 |             "O2 O2"
 71 |           ],
 72 |           "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N",
 73 |           "type": "compound",
 74 |           "Rule ID": null,
 75 |           "EC number": null,
 76 |           "Reaction SMILES": null,
 77 |           "Diameter": null,
 78 |           "Score": null,
 79 |           "Iteration": null
 80 |         }
 81 |       },
 82 |       {
 83 |         "data": {
 84 |           "SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12",
 85 |           "inSink": 0,
 86 |           "isSource": 0,
 87 |           "InChI": "InChI=1S/C21H15N3O2/c25-21(26)20-14(15-10-22-17-7-3-1-5-12(15)17)9-19(24-20)16-11-23-18-8-4-2-6-13(16)18/h1-11,22-24H,(H,25,26)",
 88 |           "Names": [
 89 |             "SFLGFRJGKHRRID-UHFFFAOYSA-N"
 90 |           ],
 91 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N",
 92 |           "type": "compound",
 93 |           "Rule ID": null,
 94 |           "EC number": null,
 95 |           "Reaction SMILES": null,
 96 |           "Diameter": null,
 97 |           "Score": null,
 98 |           "Iteration": null
 99 |         }
100 |       },
101 |       {
102 |         "data": {
103 |           "SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21",
104 |           "inSink": 0,
105 |           "isSource": 1,
106 |           "InChI": "InChI=1S/C20H13N3O2/c24-19-13(18-12-6-2-4-8-16(12)22-20(18)25)9-17(23-19)14-10-21-15-7-3-1-5-11(14)15/h1-10,21H,(H,22,25)(H,23,24)",
107 |           "Names": [
108 |             "deoxiviolacein",
109 |             "OJUJNNKCVPCATE-UHFFFAOYSA-N"
110 |           ],
111 |           "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N",
112 |           "type": "compound",
113 |           "Rule ID": null,
114 |           "EC number": null,
115 |           "Reaction SMILES": null,
116 |           "Diameter": null,
117 |           "Score": null,
118 |           "Iteration": null
119 |         }
120 |       },
121 |       {
122 |         "data": {
123 |           "SMILES": null,
124 |           "inSink": null,
125 |           "isSource": null,
126 |           "InChI": null,
127 |           "Names": null,
128 |           "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
129 |           "type": "reaction",
130 |           "Rule ID": [
131 |             "RR-02-8907c369787578b3-16-F",
132 |             "RR-02-8907c369787578b3-14-F",
133 |             "RR-02-8907c369787578b3-12-F",
134 |             "RR-02-8907c369787578b3-10-F"
135 |           ],
136 |           "EC number": [
137 |             "1.14.13.224"
138 |           ],
139 |           "Reaction SMILES": "O=C1NC(c2c[nH]c3ccccc23)=CC1=C1C(=O)Nc2ccccc21>>O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12.O=O",
140 |           "Diameter": 16,
141 |           "Score": 1.0,
142 |           "ChemicalScore": 1.0,
143 |           "Iteration": 1,
144 |           "Stoechiometry": {
145 |             "SFLGFRJGKHRRID-UHFFFAOYSA-N": 1,
146 |             "MYMOFIZGZYHOMD-UHFFFAOYSA-N": 1
147 |           }
148 |         }
149 |       },
150 |       {
151 |         "data": {
152 |           "SMILES": null,
153 |           "inSink": null,
154 |           "isSource": null,
155 |           "InChI": null,
156 |           "Names": null,
157 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2",
158 |           "type": "reaction",
159 |           "Rule ID": [
160 |             "RR-02-74068b9f6b2efdc1-16-F",
161 |             "RR-02-74068b9f6b2efdc1-14-F",
162 |             "RR-02-74068b9f6b2efdc1-12-F",
163 |             "RR-02-74068b9f6b2efdc1-10-F"
164 |           ],
165 |           "EC number": [
166 |             ""
167 |           ],
168 |           "Reaction SMILES": "O=C(O)c1[nH]c(-c2c[nH]c3ccccc23)cc1-c1c[nH]c2ccccc12>>N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12",
169 |           "Diameter": 16,
170 |           "Score": 1.0,
171 |           "ChemicalScore": 1.0,
172 |           "Iteration": 2,
173 |           "Stoechiometry": {
174 |             "CKBGWXPNAUCVQQ-UHFFFAOYSA-N": 1
175 |           }
176 |         }
177 |       },
178 |       {
179 |         "data": {
180 |           "SMILES": null,
181 |           "inSink": null,
182 |           "isSource": null,
183 |           "InChI": null,
184 |           "Names": null,
185 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3",
186 |           "type": "reaction",
187 |           "Rule ID": [
188 |             "RR-02-47e9577f4cb98f97-16-F",
189 |             "RR-02-47e9577f4cb98f97-14-F",
190 |             "RR-02-47e9577f4cb98f97-12-F",
191 |             "RR-02-47e9577f4cb98f97-10-F"
192 |           ],
193 |           "EC number": [
194 |             "1.21.98"
195 |           ],
196 |           "Reaction SMILES": "N=C(C(=O)O)C(c1c[nH]c2ccccc12)C(C(=N)C(=O)O)c1c[nH]c2ccccc12>>N=C(Cc1c[nH]c2ccccc12)C(=O)O.N=C(Cc1c[nH]c2ccccc12)C(=O)O",
197 |           "Diameter": 16,
198 |           "Score": 1.0,
199 |           "ChemicalScore": 1.0,
200 |           "Iteration": 3,
201 |           "Stoechiometry": {
202 |             "LKYWXXAVLLVJAS-UHFFFAOYSA-N": 2
203 |           }
204 |         }
205 |       },
206 |       {
207 |         "data": {
208 |           "SMILES": null,
209 |           "inSink": null,
210 |           "isSource": null,
211 |           "InChI": null,
212 |           "Names": null,
213 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4",
214 |           "type": "reaction",
215 |           "Rule ID": [
216 |             "RR-02-0c9c5a5559e132c7-16-F",
217 |             "RR-02-0c9c5a5559e132c7-14-F",
218 |             "RR-02-0c9c5a5559e132c7-12-F",
219 |             "RR-02-bbedd3c9b9124d30-10-F"
220 |           ],
221 |           "EC number": [
222 |             "1.3.3.10",
223 |             "1.4.3",
224 |             "1.4.3.-"
225 |           ],
226 |           "Reaction SMILES": "N=C(Cc1c[nH]c2ccccc12)C(=O)O>>NC(Cc1c[nH]c2ccccc12)C(=O)O",
227 |           "Diameter": 16,
228 |           "Score": 0.453552175675181,
229 |           "ChemicalScore": 1.0,
230 |           "Iteration": 4,
231 |           "Stoechiometry": {
232 |             "QIVBCDIJIAJPQS-UHFFFAOYSA-N": 1
233 |           }
234 |         }
235 |       }
236 |     ],
237 |     "edges": [
238 |       {
239 |         "data": {
240 |           "target": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
241 |           "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N",
242 |           "id": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N"
243 |         }
244 |       },
245 |       {
246 |         "data": {
247 |           "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N",
248 |           "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
249 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1"
250 |         }
251 |       },
252 |       {
253 |         "data": {
254 |           "target": "MYMOFIZGZYHOMD-UHFFFAOYSA-N",
255 |           "source": "OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1",
256 |           "id": "MYMOFIZGZYHOMD-UHFFFAOYSA-N_=>_OJUJNNKCVPCATE-UHFFFAOYSA-N-RR-02-8907c369787578b3-16-F-0-1"
257 |         }
258 |       },
259 |       {
260 |         "data": {
261 |           "target": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2",
262 |           "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N",
263 |           "id": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N"
264 |         }
265 |       },
266 |       {
267 |         "data": {
268 |           "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N",
269 |           "source": "SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2",
270 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N_=>_SFLGFRJGKHRRID-UHFFFAOYSA-N-RR-02-74068b9f6b2efdc1-16-F-0-2"
271 |         }
272 |       },
273 |       {
274 |         "data": {
275 |           "target": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3",
276 |           "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N",
277 |           "id": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N"
278 |         }
279 |       },
280 |       {
281 |         "data": {
282 |           "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N",
283 |           "source": "CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3",
284 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N_=>_CKBGWXPNAUCVQQ-UHFFFAOYSA-N-RR-02-47e9577f4cb98f97-16-F-0-3"
285 |         }
286 |       },
287 |       {
288 |         "data": {
289 |           "target": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4",
290 |           "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N",
291 |           "id": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N"
292 |         }
293 |       },
294 |       {
295 |         "data": {
296 |           "target": "QIVBCDIJIAJPQS-UHFFFAOYSA-N",
297 |           "source": "LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4",
298 |           "id": "QIVBCDIJIAJPQS-UHFFFAOYSA-N_=>_LKYWXXAVLLVJAS-UHFFFAOYSA-N-RR-02-0c9c5a5559e132c7-16-F-0-4"
299 |         }
300 |       }
301 |     ]
302 |   }
303 | }


--------------------------------------------------------------------------------
/expected_results/pickles/tree_end_search.pkl.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/expected_results/pickles/tree_end_search.pkl.tar.gz


--------------------------------------------------------------------------------
/expected_results/results.csv:
--------------------------------------------------------------------------------
 1 | parameter,value
 2 | stop_at_first_result,False
 3 | c_name,deoxiviolacein
 4 | c_smiles,
 5 | c_inchi,"InChI=1S/C20H13N3O2/c24-19-13(18-12-6-2-4-8-16(12)22-20(18)25)9-17(23-19)14-10-21-15-7-3-1-5-11(14)15/h1-10,21H,(H,22,25)(H,23,24)/b18-13+"
 6 | fire_timeout,1
 7 | organism_name,ecoli
 8 | complementary_sink,
 9 | itermax,1000
10 | expansion_width,10
11 | time_budget,7200
12 | max_depth,7
13 | minimal_visit_counts,1
14 | UCT_policy,Biochemical_UCT_1
15 | UCTK,20.0
16 | bias_k,0.0
17 | k_rave,0.0
18 | use_RAVE,False
19 | penalty,-1
20 | full_state_reward,2
21 | Rollout_policy,Rollout_policy_random_uniform_on_biochemical_multiplication_score
22 | max_rollout,3
23 | chemical_scoring,SubandprodChemicalScorer
24 | biological_score_cut_off,0.1
25 | substrate_only_score_cut_off,0.7
26 | chemical_score_cut_off,0.7
27 | virtual_visits,0
28 | progressive_bias_strategy,0
29 | progressive_widening,False
30 | diameter,"[10, 12, 14, 16]"
31 | EC_filter,
32 | tree_to_complete,
33 | found_pathways,4
34 | TIME_EXECUTION,3.58
35 | STOP_REASON,iteration
36 | NUMBER_ITERATION,999
37 | 


--------------------------------------------------------------------------------
/move.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Contains the move class that contains:
  3 | - compound it applies to
  4 | - rsmart
  5 | - rid
  6 | - set (becuase a signle rule cna generate multiple product sets)
  7 | - biological_score
  8 | - chemical_score
  9 | """
 10 | 
 11 | # General utilities
 12 | import logging
 13 | import csv
 14 | 
 15 | class Move(object):
 16 |     """
 17 |     Basic move object. At the moment will have only attributes, no function per say.
 18 |     """
 19 | 
 20 |     logger = logging.getLogger(__name__)
 21 |     def __init__(self,
 22 |                  rsmart,
 23 |                  rid,
 24 |                  compound_id,
 25 |                  rsmiles = None,
 26 |                  set_number = 0,
 27 |                  chemical_score = 0,
 28 |                  chemical_substrate_score  = 0,
 29 |                  biological_score = 0,
 30 |                  product_list = [],
 31 |                  EC_number = ["EC: None"],
 32 |                  compound_index = 0, stoechiometry = {}):
 33 |         self.rsmart = rsmart
 34 |         if rsmiles is None:
 35 |             self.rsmiles = self.rsmart
 36 |         else:
 37 |             self.rsmiles = rsmiles
 38 |         self.rid = rid
 39 |         self.compound_id = compound_id
 40 |         self.set_number = set_number
 41 |         self.chemical_score = chemical_score
 42 |         self.chemical_substrate_score = chemical_substrate_score
 43 |         self.biological_score = biological_score
 44 |         self.EC_numbers = EC_number
 45 |         self.product_list = product_list
 46 |         self.name = "{}-{}-{}".format(self.compound_id, self.rid, str(self.set_number))
 47 |         self.synonyms = [self.rid]
 48 |         self.RAVE_visits = 0
 49 |         self.RAVE_total_score = 0
 50 |         self.RAVE_average_score = 0
 51 |         self.stoechiometry = stoechiometry
 52 | 
 53 |     def set_set_number(self, set_number):
 54 |         self.set_number = set_number
 55 |         self.name = "{}-{}-{}".format(self.compound_id, self.rid, str(self.set_number))
 56 | 
 57 |     def set_rsmart(self, rsmart):
 58 |         self.rsmart = rsmart
 59 | 
 60 |     def set_rsmiles(self, rsmiles):
 61 |         self.rsmiles = rsmiles
 62 | 
 63 |     def calculate_rsmiles(self, substrate):
 64 |         """
 65 |         Smiles of the actual transformation that is happening between the substrate and the products
 66 |         """
 67 |         sub_smiles = "{}".format(substrate.csmiles)
 68 |         prod_smiles = ".".join([prod.csmiles for prod in self.full_product_list()])
 69 |         self.rsmiles = "{}>>{}".format(sub_smiles, prod_smiles)
 70 | 
 71 |     def set_chemical_score(self, chemical_score):
 72 |         self.chemical_score = chemical_score
 73 | 
 74 |     def set_chemical_substrate_score(self, chemical_substrate_score):
 75 |         self.chemical_substrate_score = chemical_substrate_score
 76 | 
 77 |     def delete_intermediate_chemical_score(self):
 78 |         del self.original_substrates_list
 79 |         del self.original_products_list_list
 80 | 
 81 |     def set_intermediate_chemical_score(self, original_substrates_list, original_products_list_list):
 82 |         self.original_substrates_list = original_substrates_list
 83 |         self.original_products_list_list = original_products_list_list
 84 | 
 85 |     def set_id(self, id):
 86 |         self.id = id
 87 | 
 88 |     def set_EC_numbers(self, EC_numbers):
 89 |         self.EC_numbers = EC_numbers
 90 | 
 91 |     def set_biological_score(self, biological_score):
 92 |         self.biological_score = biological_score
 93 | 
 94 |     def set_product_list(self, product_list):
 95 |         self.product_list = product_list
 96 | 
 97 |     def set_stoechiometry(self, stoechiometry):
 98 |         self.stoechiometry = stoechiometry
 99 | 
100 |     def __repr__(self):
101 |         return self.name
102 | 
103 |     def print_all_attributes(self):
104 |         text = "For move {}, attributes are: rid: {}, cid: {} \n".format(self.name, self.rid, self.compound_id)
105 |         text_next = "set: {}, chem_score: {}, bio score: {} \n".format(self.set_number, self.chemical_score, self.biological_score)
106 |         text_last = "product_list: {}, stoechiometry: {} \n".format(self.product_list, self.stoechiometry)
107 |         text_appendix = "EC numbers are {}".format(self.EC_numbers)
108 |         return (text + text_next + text_last + text_appendix)
109 | 
110 |     def full_product_list(self):
111 |         full_list = []
112 |         ordered_product_list = sorted(self.product_list, key = lambda item: self.stoechiometry[item.InChIKey])
113 |         for product in ordered_product_list:
114 |             for i in range(self.stoechiometry[product.InChIKey]):
115 |                 full_list.append(product)
116 |         return full_list
117 | 
118 |     def _calculate_simles_from_move(self):
119 |         sub = "{}".format()
120 | 
121 |     def clone(self):
122 |         cloned_move = Move(
123 |             rsmart=self.rsmart,
124 |             rid=self.rid,
125 |             compound_id=self.compound_id,
126 |             set_number=self.set_number,
127 |             chemical_score=self.chemical_score,
128 |             biological_score=self.biological_score,
129 |             product_list=self.product_list,
130 |             EC_number=self.EC_numbers,
131 |             stoechiometry=self.stoechiometry,
132 |         )
133 |         try:
134 |             cloned_move.set_intermediate_chemical_score(
135 |                 self.original_substrates_list,
136 |                 self.original_products_list_list,
137 |             )
138 |         except AttributeError:
139 |             pass
140 |         return cloned_move
141 | 
142 |     def add_synonym(self, move):
143 |         """
144 |         Adds a synonym to this move.
145 |         (When another move was deemed equal to current move (self))
146 |         """
147 |         if move.rid not in self.synonyms:
148 |             self.synonyms.append(move.rid)
149 |             for EC in move.EC_numbers:
150 |                 if EC not in self.EC_numbers:
151 |                     self.EC_numbers.append(EC)
152 |             if self.biological_score * self.chemical_score < move.biological_score * move.chemical_score:
153 |                 self.biological_score = move.biological_score
154 |                 self.chemical_score = move.chemical_score
155 |                 self.stoechiometry = move.stoechiometry
156 | 
157 |     def eq_full_inchi_key(self, other):
158 |         """
159 |         Tow moves are identical if they
160 |         - apply to the same compound
161 |         - generate the same products
162 |         """
163 |         compound_eq = (self.compound_id == other.compound_id)
164 |         products_eq = len(self.product_list) == len(other.product_list)
165 |         for product in self.product_list:
166 |             products_eq = products_eq and (product.in_list(other.product_list, main_layer = False))
167 |         return(compound_eq and products_eq)
168 | 
169 |     def eq_main_layer(self, other):
170 |         """
171 |         Tow moves are identical if they
172 |         - apply to the same compound
173 |         - generate the same products
174 |         """
175 |         compound_eq = (self.compound_id == other.compound_id)
176 |         products_eq = len(self.product_list) == len(other.product_list)
177 |         for product in self.product_list:
178 |             products_eq = products_eq and (product.in_list(other.product_list, main_layer = True))
179 |         return(compound_eq and products_eq)
180 | 
181 |     def in_list(self, list_moves, main_layer = False):
182 |         in_list = False
183 |         for move_in_list in list_moves:
184 |             if main_layer:
185 |                 equality = self.eq_main_layer(move_in_list)
186 |                 if equality:
187 |                     in_list = True
188 |                     move_in_list.add_synonym(self)
189 |                     break
190 |             else:
191 |                 equality = self.eq_full_inchi_key(move_in_list)
192 |                 if equality:
193 |                     in_list = True
194 |                     move_in_list.add_synonym(self)
195 |                     break
196 |         return(in_list)
197 | 
198 |     def update(self, result, visit_number = 1):
199 |         """
200 |         Values are used only for RAVE implementation.
201 |         """
202 |         self.RAVE_visits = self.RAVE_visits + visit_number
203 |         self.RAVE_total_score = self.RAVE_total_score + result * visit_number
204 |         self.RAVE_average_score = self.RAVE_total_score/self.RAVE_visits
205 | 


--------------------------------------------------------------------------------
/organisms.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Defines organisms as chemical_compounds_state objects.
 3 | Unpickled after calculation when setting up RP3.
 4 | """
 5 | 
 6 | # General utilities
 7 | import logging
 8 | import pickle
 9 | import os
10 | import csv
11 | import sys
12 | 
13 | from config import *
14 | 
15 | # RP3 specific objects
16 | from compound import Compound, unpickle
17 | from chemical_compounds_state import ChemicalCompoundState
18 | from rdkit.Chem import AllChem
19 | from utilities.reactor.Utils import standardize_chemical, standardize_results, handle_results, ChemConversionError
20 | from utilities.reactor.cli import worker_match, worker_fire, RuleConversionError
21 | 
22 | 
23 | class NotReady(Exception):
24 |     """Raised when organisms or rules have not been caculated in advance"""
25 | 
26 |     def __init__(self, msg = "Not Ready. Need to run set-up scripts"):
27 |         self._msg = msg
28 | 
29 |     def __str__(self):
30 |         return self._msg
31 | 
32 | 
33 | def import_organism_from_csv(csv_file, add_Hs=True):
34 |     with open(csv_file) as csv_handle:
35 |         dict_reader = csv.DictReader(csv_handle, delimiter=",")
36 |         compound_list = []
37 |         for row in dict_reader:
38 |             name = row["name"]
39 |             inchi = row["inchi"]
40 |             if inchi is None or inchi == "None" or inchi == "":
41 |                 pass
42 |             else:
43 |                 try:
44 |                     if name.startswith("InChI"):
45 |                         compound = Compound(
46 |                             InChI=inchi, heavy_standardisation=True, force_add_H=add_Hs
47 |                         )
48 |                     else:
49 |                         compound = Compound(
50 |                             InChI=inchi,
51 |                             name=name,
52 |                             heavy_standardisation=True,
53 |                             force_add_H=add_Hs,
54 |                         )
55 |                     if not compound.in_list(compound_list, main_layer = False):
56 |                         compound_list.append(compound)
57 |                 except ChemConversionError as e:
58 |                     logging.error("For compound {} with inchi {}: error ChemConversionError".format(name, inchi))
59 |     organism = ChemicalCompoundState(compound_list, main_layer = False)
60 |     # organism.set_main_layer(True)
61 |     return(organism)
62 | 
63 | 
64 | organisms_data_path = "{}/organisms".format(DATA_PATH)
65 | if not os.path.exists(organisms_data_path):
66 |     os.mkdir(organisms_data_path)
67 | 
68 | if not os.path.exists(organisms_data_path + '/state_iML1515_chassis_H.pkl'):
69 |     logging.error("Please run calculate_organisms script")
70 |     raise NotReady
71 | 
72 | 
73 | Test_organism_H = unpickle(file_name = "{}".format('Test_organism_H'), type = 'state', folder_address = organisms_data_path)
74 | ecoli_chassis_H = unpickle(file_name = "{}".format('iML1515_chassis_H'), type = 'state', folder_address = organisms_data_path)
75 | detectable_cmpds_H = unpickle(file_name = "{}".format('detectable_cmpds_H'), type = 'state', folder_address = organisms_data_path)
76 | core_ecoli_H = unpickle(file_name = "{}".format('core_ecoli_H'), type = 'state', folder_address = organisms_data_path)
77 | bsubtilis_H = unpickle(file_name = "{}".format('bsubtilis_H'), type = 'state', folder_address = organisms_data_path)
78 | iJO1366_chassis_H = unpickle(file_name = "{}".format('iJO1366_chassis_H'), type = 'state', folder_address = organisms_data_path)
79 | 
80 | 
81 | Test_organism_noH = unpickle(file_name = "{}".format('Test_organism_noH'), type = 'state', folder_address = organisms_data_path)
82 | ecoli_chassis_noH = unpickle(file_name = "{}".format('iML1515_chassis_noH'), type = 'state', folder_address = organisms_data_path)
83 | detectable_cmpds_noH = unpickle(file_name = "{}".format('detectable_cmpds_noH'), type = 'state', folder_address = organisms_data_path)
84 | core_ecoli_noH = unpickle(file_name = "{}".format('core_ecoli_noH'), type = 'state', folder_address = organisms_data_path)
85 | bsubtilis_noH = unpickle(file_name = "{}".format('bsubtilis_noH'), type = 'state', folder_address = organisms_data_path)
86 | iJO1366_chassis_noH = unpickle(file_name = "{}".format('iJO1366_chassis_noH'), type = 'state', folder_address = organisms_data_path)
87 | 


--------------------------------------------------------------------------------
/pathway.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Contains the pathway objects for visualisation and export
  3 | """
  4 | 
  5 | # General utilities
  6 | import logging
  7 | import csv
  8 | import copy
  9 | import json
 10 | import pickle
 11 | # RP3 specific objects
 12 | from compound import Compound
 13 | from move import Move
 14 | from chemical_compounds_state import ChemicalCompoundState
 15 | from organisms import Test_organism_H
 16 | 
 17 | 
 18 | class Pathway(object):
 19 |     """
 20 |     Pathway object.
 21 |     Has methods for quick visualisation as well as export to json (for visualisation and treatment)
 22 |     Also has cloning and compound addition
 23 |     """
 24 |     logger = logging.getLogger(__name__)
 25 | 
 26 |     def __init__(self, first_iteration = -1, target = None, compounds = [], moves = [],
 27 |                  file_to_save = "temporary_pathway_json", main_layer = True,
 28 |                  organism = Test_organism_H, edges = [], nodes_compounds = [], nodes_transformations = []):
 29 |         """
 30 |         Initialising a pathway object.
 31 |         A compound has an ID and a dict with chemical structures
 32 |         A reaction links 2 compounds and has a smart, scores etc
 33 |         self.compounds is a dictionnary of ID: chemical_struct_of_compound
 34 |         Remarks:
 35 |         - a pathway can only be defined for a fully solved Node (ie: in the Tree, not in rollout)
 36 |         - it needs to verify at each step what products are formed
 37 |         as those could have been deleted in the tree search (already in state)
 38 |         """
 39 |         self.first_iteration = first_iteration
 40 |         self.target = target
 41 |         self.organism = organism
 42 |         self.main_layer = main_layer
 43 |         self.compounds = compounds
 44 |         self.moves = moves
 45 |         self.file_to_save = file_to_save
 46 |         self.nodes_compounds = nodes_compounds
 47 |         self.nodes_transformations = nodes_transformations
 48 |         self.edges = edges
 49 |         self.pathway_as_dict = None
 50 | 
 51 |     def __eq__(self, other):
 52 |         """
 53 |         Two pathways are identical if their compounds and moves are identical
 54 |         """
 55 |         node_compounds_equal = len(self.nodes_compounds) == len(other.nodes_compounds)
 56 |         node_trasnfo_equal = len(self.nodes_transformations) == len(other.nodes_transformations)
 57 |         node_edges_equal = len(self.edges) == len(other.edges)
 58 |         compounds_equal = len(self.compounds) == len(other.compounds)
 59 |         if compounds_equal:
 60 |             for compound in self.compounds:
 61 |                 in_other = compound.in_list(other.compounds, main_layer = True)
 62 |                 if not in_other:
 63 |                     compounds_equal = False
 64 |                     break
 65 |         moves_equal = len(self.moves) == len(other.moves)
 66 |         if moves_equal:
 67 |             for move in self.moves:
 68 |                 in_other = move.in_list(other.moves, main_layer = True)
 69 |                 if not in_other:
 70 |                     moves_equal = False
 71 |                     break
 72 |         equality = compounds_equal and moves_equal and node_compounds_equal and node_trasnfo_equal and node_edges_equal
 73 |         return (equality)
 74 | 
 75 |     def __repr__(self):
 76 |         """
 77 |         Print list of compoudns and list of moves
 78 |         """
 79 |         rep = 'Compound \n'
 80 |         for compound in self.compounds:
 81 |             rep = rep + str(compound) + "\n"
 82 |         rep = rep + 'Edges \n'
 83 |         for edge in self.edges:
 84 |             rep = rep + edge["data"]["id"] + "\n"
 85 |         return(rep)
 86 | 
 87 |     def all_attributes_with_nodes(self):
 88 |         """
 89 |         Print list of compounds and list of moves
 90 |         """
 91 |         rep = 'Compound \n'
 92 |         for compound in self.compounds:
 93 |             rep = rep + str(compound) + "\n"
 94 |         rep = rep + 'Edges \n'
 95 |         for edge in self.edges:
 96 |             rep = rep + edge["data"]["id"] + "\n"
 97 |         for node_cp in self.nodes_compounds:
 98 |             rep = rep + node_cp["data"]["id"] + "\n"
 99 |         for node_tf in self.nodes_transformations:
100 |             rep = rep + node_tf["data"]["id"] + "\n"
101 |         return(rep)
102 | 
103 |     def set_file_to_save(self, file_to_save):
104 |         self.file_to_save = file_to_save
105 | 
106 |     def set_main_layer(self, main_layer):
107 |         self.main_layer = main_layer
108 | 
109 |     def set_first_iteration(self, first_iteration):
110 |         self.first_iteration = first_iteration
111 | 
112 |     def clone(self):
113 |         """ Cloning """
114 |         duplicated_pathway = Pathway(
115 |             first_iteration=self.first_iteration,
116 |             organism=self.organism,
117 |             main_layer=self.main_layer,
118 |             target=self.target,
119 |             compounds=[cmp.clone() for cmp in self.compounds],
120 |             moves=[mv.clone() for mv in self.moves],
121 |             edges=copy.deepcopy(self.edges),
122 |             nodes_compounds=copy.deepcopy(self.nodes_compounds),
123 |             nodes_transformations=copy.deepcopy(self.nodes_transformations),
124 |         )
125 |         return duplicated_pathway
126 | 
127 |     def save(self, file_name = None, folder_address = "pickled_data"):
128 |         if file_name is None:
129 |             base_name = self.file_to_save
130 |         file_saving = open('{}/pathway_{}.pkl'.format(folder_address, file_name), 'wb')
131 |         pickle.dump(self, file_saving)
132 | 
133 |     def add_compound(self, compound, in_sink = None, is_source = 0):
134 |         """
135 |         Adding a compound object to the pathway.
136 |         """
137 |         if is_source:
138 |             self.target = compound
139 |         if not compound.in_list(self.compounds, main_layer = self.main_layer):
140 |             self.compounds.append(compound)
141 |             if in_sink is None:
142 |                 if self.organism.compound_in_state(compound):
143 |                     in_sink = 1
144 |                 else:
145 |                     in_sink = 0
146 |             data_dict = {
147 |                 'SMILES': compound.csmiles,
148 |                 'inSink':in_sink,
149 |                 'isSource': is_source,
150 |                 'InChI': compound.InChI,
151 |                 'Names': compound.synonyms_names,  # If I want synonyms, keep them
152 |                 'id': compound.InChIKey,
153 |                 'type': 'compound',
154 |                 'Rule ID': None,
155 |                 'EC number': None,
156 |                 'Reaction SMILES': None,
157 |                 'Diameter': None,
158 |                 'Score': None,
159 |                 'Iteration': None
160 |                 }
161 |             self.nodes_compounds.append({"data": data_dict})
162 |         else:
163 |             self.logger.warning("Compound {} is already in compounds".format(compound))
164 | 
165 |     def clean_up(self, move, depth):
166 |         str = "{}-{}-{}-{}".format(move.compound_id, move.rid, move.set_number, depth)
167 |         return(str)
168 | 
169 |     def add_reaction(self, move, depth = 1):
170 |         """
171 |         Adding a reaction to the pathway.
172 |         """
173 |         if not move.in_list(self.moves):
174 |             self.moves.append(move)
175 |             move_compound_id_present = False
176 |             for cp in self.compounds:
177 |                 for sym in cp.synonyms_names:
178 |                     if sym == move.compound_id:
179 |                         move_compound_id_present = True
180 |                         move_compound_ID = cp.InChIKey
181 |                         break
182 |             if not move_compound_id_present:
183 |                 self.logger.warning("Trying to add move {} when compound {} is not in the pathway".format(move, move.compound_id))
184 | 
185 |             for product in move.product_list:
186 |                 if not product.in_list(self.compounds):
187 |                     # Adding the products of the pathway
188 |                     self.add_compound(product, in_sink = None, is_source = 0)
189 | 
190 |             cleaned_up_moved = self.clean_up(move, depth)
191 |             try:
192 |                 diameter = int(move.rid.split("-")[3])
193 |             except:
194 |                 diameter = 42
195 |             data_dict = {
196 |                 "SMILES": None,
197 |                 "inSink": None,
198 |                 "isSource": None,
199 |                 "InChI": None,
200 |                 "Names": None,
201 |                 "id": cleaned_up_moved,
202 |                 "type": "reaction",
203 |                 "Rule ID": move.synonyms,
204 |                 "EC number": move.EC_numbers,
205 |                 "Reaction SMILES": move.rsmiles,
206 |                 "Diameter": diameter,
207 |                 "Score": move.biological_score,
208 |                 "ChemicalScore": move.chemical_score,
209 |                 "Iteration": depth,
210 |                 "Stoechiometry": move.stoechiometry
211 |             }
212 |             self.nodes_transformations.append({"data": data_dict})
213 |             # Adding all the edges:
214 |             # from compound to reaction (move as target, compound as source)
215 |             # From reactions to compound (move as source, product as target)
216 |             data_dict = {
217 |                 "target" : cleaned_up_moved,
218 |                 "source" : move_compound_ID,
219 |                 "id" : "{}_=>_{}".format(cleaned_up_moved, move.compound_id)
220 |             }
221 |             self.edges.append({"data": data_dict})
222 |             for product in move.product_list:
223 |                 data_dict = {
224 |                     "target" : product.name,
225 |                     "source" : cleaned_up_moved,
226 |                     "id" : "{}_=>_{}".format(product.name, cleaned_up_moved)
227 |                 }
228 |                 self.edges.append({"data": data_dict})
229 |         else:
230 |             self.logger.debug("Move {} is already in moves".format(move))
231 | 
232 |     def jsonify_scope_viewer(self):
233 |         """
234 |         Use scope viewer to visualise pathways before the DBTL advances more.
235 |         THe json file is a dict composed of one item called elements.
236 |         The elements values is a dict composed of "nodes" and "edges"
237 |         Nodes is a list of compounds, or reactions, with:
238 |         """
239 |         if self.pathway_as_dict is None:
240 |             self.nodes_compounds.reverse()
241 |             self.pathway_as_dict = {"elements": {"nodes": self.nodes_compounds + self.nodes_transformations,
242 |                                             "edges": self.edges}}
243 |         with open(self.file_to_save, "w") as json_handler:
244 |             json.dump(self.pathway_as_dict, json_handler, indent = 2)
245 | 
246 |     def export_as_json_dict(self):
247 |         """
248 |         To export as a dict without needing to read and write the json.
249 |         """
250 |         if self.pathway_as_dict is None:
251 |             self.nodes_compounds.reverse()
252 |             self.pathway_as_dict = {"elements": {"nodes": self.nodes_compounds + self.nodes_transformations,
253 |                                             "edges": self.edges}}
254 |         return(self.pathway_as_dict)
255 | 
256 | 
257 | def __cli():
258 |     """Command line interface. Was actually used to make quick
259 |     tests before implementing them in the testing file"""
260 |     logging.basicConfig(
261 |             stream=sys.stderr, level=logging.INFO,
262 |             datefmt='%d/%m/%Y %H:%M:%S',
263 |             format='%(asctime)s -- %(levelname)s -- %(message)s'
264 |             )
265 |     logging.warning("CLI is not available for Pathway")
266 | 
267 | 
268 | if __name__ == "__main__":
269 |     __cli()
270 | 


--------------------------------------------------------------------------------
/pathway_scoring.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines the pathway scoring functions.
  3 | Can take as inputs both Pathway objects and json dictionnaries exported from Pathways.
  4 | """
  5 | 
  6 | import random
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | # RP3 - specific objects
 11 | from pathway import Pathway
 12 | 
 13 | 
 14 | def geo_mean(iterable):
 15 |     a = np.array(iterable)
 16 |     return a.prod()**(1.0/len(a))
 17 | 
 18 | # def geo_mean_overflow(iterable):
 19 | #     a = np.log(iterable)
 20 | #     return np.exp(a.sum()/len(a))
 21 | 
 22 | class PathwayScoring(object):
 23 |     """
 24 |     Defines Pathway Scorer object.
 25 |     """
 26 |     def __init__(self, scoring_function = None, scoring_json_function = None):
 27 |         if scoring_function is None:
 28 |             pass
 29 |         else:
 30 |             self.scoring_function = scoring_function
 31 |         if scoring_json_function is None:
 32 |             pass
 33 |         else:
 34 |             self.scoring_json_function = scoring_json_function
 35 | 
 36 |     def __repr__(self):
 37 |         """
 38 |         Name the used scorer.
 39 |         Raises an error is the class is not properly instantiated
 40 |         """
 41 |         return(self.name)
 42 | 
 43 |     def calculate(self, pathway):
 44 |         score = self.scoring_function(pathway)
 45 |         return(score)
 46 | 
 47 |     def calculate_json(self, pathway):
 48 |         score = self.scoring_json_function(pathway)
 49 |         return(score)
 50 | 
 51 | def pseudo_random(pathway):
 52 |     score = random.uniform(0, 10)
 53 |     return(score)
 54 | 
 55 | class ConstantPathwayScoring(PathwayScoring):
 56 |     """
 57 |     Returns a constant reward, whichever the pathway.
 58 |     """
 59 |     def __init__(self, reward = 10):
 60 |         PathwayScoring.__init__(self)
 61 |         self.reward = reward
 62 |         self.scoring_function = self.scoring_function()
 63 |         self.scoring_json_function = self.scoring_json_function()
 64 |         self.name = "ConstantPathwayScoring of {}".format(reward)
 65 | 
 66 |     def set_reward(self,reward):
 67 |         # For changing the reward of the object
 68 |         self.reward = reward
 69 |         self.scoring_function = self.scoring_function()
 70 |         self.scoring_json_function = self.scoring_json_function()
 71 | 
 72 |     def scoring_function(self):
 73 |         def pathway_scoring(pathway):
 74 |             return(self.reward)
 75 |         return(pathway_scoring)
 76 | 
 77 |     def scoring_json_function(self):
 78 |         def pathway_scoring(pathway):
 79 |             return(self.reward)
 80 |         return(pathway_scoring)
 81 | 
 82 | class BiologicalPathwayScoring(PathwayScoring):
 83 |     """
 84 |     Returns the geometric mean of biological scores in the Pathway.
 85 |     """
 86 |     def __init__(self):
 87 |         PathwayScoring.__init__(self)
 88 |         self.scoring_function = self.scoring_function()
 89 |         self.scoring_json_function = self.scoring_json_function()
 90 |         self.name = "BiologicalPathwayScoring"
 91 | 
 92 |     def scoring_function(self):
 93 |         def pathway_scoring(pathway):
 94 |             scores = []
 95 |             for move in pathway.nodes_transformations:
 96 |                 scores.append(move["data"]["Score"])
 97 |             return(geo_mean(scores))
 98 |         return(pathway_scoring)
 99 | 
100 |     def scoring_json_function(self):
101 |         def pathway_scoring(pathway):
102 |             scores = []
103 |             for move in pathway["elements"]["nodes"]:
104 |                 if move["data"]["type"] == "reaction":
105 |                     scores.append(move["data"]["Score"])
106 |             return(geo_mean(scores))
107 |         return(pathway_scoring)
108 | 
109 | class ChemicalPathwayScoring(PathwayScoring):
110 |     """
111 |     Returns the geometric mean of chemical scores in the Pathway.
112 |     """
113 |     def __init__(self):
114 |         PathwayScoring.__init__(self)
115 |         self.scoring_function = self.scoring_function()
116 |         self.scoring_json_function = self.scoring_json_function()
117 |         self.name = "ChemicalPathwayScoring"
118 | 
119 |     def scoring_function(self):
120 |         def pathway_scoring(pathway):
121 |             scores = []
122 |             for move in pathway.nodes_transformations:
123 |                 scores.append(move["data"]["ChemicalScore"])
124 |             return(geo_mean(scores))
125 |         return(pathway_scoring)
126 | 
127 |     def scoring_json_function(self):
128 |         def pathway_scoring(pathway):
129 |             scores = []
130 |             for move in pathway["elements"]["nodes"]:
131 |                 if move["data"]["type"] == "reaction":
132 |                     scores.append(move["data"]["ChemicalScore"])
133 |             return(geo_mean(scores))
134 |         return(pathway_scoring)
135 | 
136 | class BiochemicalPathwayScoring(PathwayScoring):
137 |     """
138 |     Returns the geometric mean of biochemical scores in the Pathway.
139 |     """
140 |     def __init__(self):
141 |         PathwayScoring.__init__(self)
142 |         self.scoring_function = self.scoring_function()
143 |         self.scoring_json_function = self.scoring_json_function()
144 |         self.name = "ChemicalPathwayScoring"
145 | 
146 |     def scoring_function(self):
147 |         def pathway_scoring(pathway):
148 |             scores = []
149 |             for move in pathway.nodes_transformations:
150 |                 scores.append(move["data"]["ChemicalScore"] * move["data"]["Score"])
151 |             return(geo_mean(scores))
152 |         return(pathway_scoring)
153 | 
154 |     def scoring_json_function(self):
155 |         def pathway_scoring(pathway):
156 |             scores = []
157 |             for move in pathway["elements"]["nodes"]:
158 |                 if move["data"]["type"] == "reaction":
159 |                     scores.append(move["data"]["Score"] * move["data"]["ChemicalScore"])
160 |             return(geo_mean(scores))
161 |         return(pathway_scoring)
162 | 
163 | RandomPathwayScorer = PathwayScoring(scoring_function = pseudo_random)
164 | constant_pathway_scoring = ConstantPathwayScoring(reward = 10)
165 | null_pathway_scoring = ConstantPathwayScoring(reward = 0)
166 | biological_pathway_scoring = BiologicalPathwayScoring()
167 | chemical_pathway_scoring = ChemicalPathwayScoring()
168 | biochemical_pathway_scoring = BiochemicalPathwayScoring()
169 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.commitizen]
2 | name = "cz_conventional_commits"
3 | version = "1.1.0"
4 | version_provider = "commitizen"
5 | tag_format = "$version"
6 | version_type = "semver2"


--------------------------------------------------------------------------------
/representation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The aim of this file is to define a representation class for tree printing.
 3 | It is useful to switch between the 2 for terminal or text file output.
 4 | """
 5 | 
 6 | class Representation(object):
 7 |     """ Contains all things necessary for representing my nodes and trees"""
 8 |     def __init__(self, delimiter = "|", color = "red", printing_solved = "- solved"):
 9 |         self.delimiter = delimiter  # Delimiter between nodes
10 |         if color == "red":
11 |             self.color_begin = '\033[91m'
12 |             self.color_end = '\033[0m'
13 |         elif color == "":
14 |              self.color_begin = ''
15 |              self.color_end = ''
16 |         else:
17 |             raise NotImplementedError
18 |         self.printing_solved = printing_solved
19 | 
20 | Test_representation = Representation(delimiter = "|", color = "red", printing_solved = "")
21 | Test_to_file = Representation(delimiter = "|", color = "", printing_solved = "- solved")
22 | 


--------------------------------------------------------------------------------
/rewarding.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Defines the possible rewards for rollout.
 3 | Can be augmented for more complex policies using simialr scheme as Rollout or UCT policies.
 4 | Is defined through CLI in the Tree script.
 5 | """
 6 | 
 7 | class RolloutRewards(object):
 8 |     """
 9 |     Defines penalty and rewards for the rollout if it's in the chasis.
10 |     """
11 |     def __init__(self, penalty, full_state_reward):
12 |         self.penalty = penalty
13 |         self.full_state_reward = full_state_reward
14 | 
15 |     def __repr__(self):
16 |         """Reward representation is its values"""
17 |         return("Penalty is {} and full state reward is {}".format(self.penalty, self.full_state_reward))
18 | 
19 | Basic_Rollout_Reward = RolloutRewards(penalty = -1, full_state_reward = 2)
20 | 


--------------------------------------------------------------------------------
/rule_sets_examples.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Contains the rules examples that will be used throughout the tests.
 3 | The aim is to
 4 | """
 5 | 
 6 | import logging
 7 | import csv
 8 | import os
 9 | 
10 | rule_10_subset_address = "{}/tests/data/rules_r10_subset.tsv".format(os.path.dirname(__file__))
11 | applicable_rules_10_dict = {}
12 | with open(rule_10_subset_address, "r") as csv_file:
13 |     fieldnames = ["Rule_ID", "Reaction_ID", "Diameter", "Direction", "Rule_order", "Rule_SMARTS", "Substrate_ID", "Substrate_SMILES", "Product_IDs", "Product_SMILES", "Rule_SMILES", "Rule_SMARTS_lite"]
14 |     csv_reader = csv.DictReader(csv_file, delimiter = '\t', fieldnames = fieldnames)
15 |     next(csv_reader)  # skip first line
16 |     for element in csv_reader:
17 |         applicable_rules_10_dict[element["Rule_ID"]] = {"Rule_SMARTS": element["Rule_SMARTS"],
18 |                                                         "biological_score": 1,
19 |                                                         "EC_number": ["EC: None"],
20 |                                                         "Rule_SMILES": element["Rule_SMILES"]}
21 | 
22 | 
23 | rule_2_subset_address = "{}/tests/data/rules_r2_subset.tsv".format(os.path.dirname(__file__))
24 | applicable_rules_2_dict = {}
25 | with open(rule_2_subset_address, "r") as csv_file:
26 |     fieldnames = ["Rule_ID", "Reaction_ID", "Diameter", "Direction", "Rule_order", "Rule_SMARTS", "Substrate_ID", "Substrate_SMILES", "Product_IDs", "Product_SMILES", "Rule_SMILES", "Rule_SMARTS_lite"]
27 |     csv_reader = csv.DictReader(csv_file, delimiter = '\t', fieldnames = fieldnames)
28 |     next(csv_reader)  # skip first line
29 |     for element in csv_reader:
30 |         applicable_rules_2_dict[element["Rule_ID"]] =  {"Rule_SMARTS": element["Rule_SMARTS"],
31 |                                                         "biological_score": 1,
32 |                                                         "EC_number": ["EC: None"],
33 |                                                         "Rule_SMILES": element["Rule_SMILES"]}
34 | 
35 | 
36 | rule_mixed_subset_address = "{}/tests/data/rules_mixed_subset.tsv".format(os.path.dirname(__file__))
37 | applicable_rules_mixed_dict = {}
38 | with open(rule_mixed_subset_address, "r") as csv_file:
39 |     fieldnames = ["Rule_ID", "Reaction_ID", "Diameter", "Direction", "Rule_order", "Rule_SMARTS", "Substrate_ID", "Substrate_SMILES", "Product_IDs", "Product_SMILES", "Rule_SMILES", "Rule_SMARTS_lite"]
40 |     csv_reader = csv.DictReader(csv_file, delimiter = '\t', fieldnames = fieldnames)
41 |     next(csv_reader)  # skip first line
42 |     for element in csv_reader:
43 |         applicable_rules_mixed_dict[element["Rule_ID"]] = {"Rule_SMARTS": element["Rule_SMARTS"],
44 |                                                         "biological_score": 1,
45 |                                                         "EC_number": ["EC: None"],
46 |                                                         "Rule_SMILES": element["Rule_SMILES"]}
47 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name="rp3",
 8 |     version="0.0",
 9 |     author="Mathilde Koch",
10 |     author_email="mathilde.koch@inra.fr",
11 |     description="Perform retrosynthesis with Monte-Carlo Tree Search algorithm",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/brsynth/RetroPath3",
15 |     packages=find_packages(),
16 |     python_requires=">=3.6",
17 |     include_package_data=True,
18 | )
19 | 


--------------------------------------------------------------------------------
/supplement_finder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Find supplements to complete a Tree.
  3 | Read argparser for details of arguments.
  4 | Principle is to identify compounds needed to complete chemical states.
  5 | """
  6 | 
  7 | # General utilities
  8 | import os
  9 | import sys
 10 | import time
 11 | import signal
 12 | import datetime
 13 | import logging
 14 | import argparse
 15 | import pickle
 16 | import json
 17 | 
 18 | import random
 19 | 
 20 | from Tree import Tree
 21 | 
 22 | def unpickle(file_name, type = "tree", folder_address = "pickled_data"):
 23 |     with open('{}/{}_{}.pkl'.format(folder_address, type, file_name), 'rb') as input:
 24 |         return(pickle.load(input))
 25 | 
 26 | 
 27 | def run(tree, number_suggestions, rescued_states, folder_to_save, database = None):
 28 |     potential_supplements = {}
 29 |     # Extracting all potential supplements from the Tree.
 30 |     nodes_to_treat = [tree.root_node]
 31 |     while nodes_to_treat != []:
 32 |         node = nodes_to_treat[0]
 33 |         del nodes_to_treat[0]
 34 |         state = node.state
 35 |         supplement = state.GetSupplement_from_InChI_Keys()
 36 |         if not supplement is None:
 37 |             if supplement.InChIKey in potential_supplements.keys():
 38 |                 potential_supplements[supplement.InChIKey]["rescued_states"] = potential_supplements[supplement.InChIKey]["rescued_states"] + 1
 39 |             else:
 40 |                 information_to_keep = {"structure": supplement.csmiles,
 41 |                                        "name_from_MCTS": supplement.name,
 42 |                                        "synonyms_names": supplement.synonyms_names,
 43 |                                        "rescued_states":1}
 44 |                 potential_supplements[supplement.InChIKey] = information_to_keep
 45 |         if node.terminal:
 46 |             pass
 47 |         else:
 48 |             for child in node.children:
 49 |                 nodes_to_treat.append(child)
 50 |     logging.info("Potential supplements without filtering: {}".format(len(potential_supplements.keys())))
 51 |     # Sorting according to number of rescued states
 52 |     sorted_supplements = [suppl for suppl, value in sorted(potential_supplements.items(), key=lambda item: item[1]["rescued_states"], reverse=True) if value["rescued_states"] >= rescued_states]
 53 |     logging.info("Potential supplements after filtering with {} rescued states: {}".format(rescued_states, len(sorted_supplements)))
 54 | 
 55 |     # Filtering according to presence in a database of interest
 56 |     if database is None:
 57 |         supplements_of_interest = sorted_supplements
 58 |         logging.warning("Not checking availability within a Database of interest")
 59 |     else:
 60 |         supplements_of_interest = []
 61 |         for element in sorted_supplements:
 62 |             if element in database.keys():
 63 |                 logging.info("Element {} (with {} pathways) is in database ({})".format(element, potential_supplements[element], database[element]))
 64 |                 supplements_of_interest.append(element)
 65 |     # Filtering accoridng to maximal number of allwoed suggestions
 66 |     if len(supplements_of_interest) > number_suggestions:
 67 |         supplements_of_interest = supplements_of_interest[0:number_suggestions]
 68 |         logging.info("Keeping {} potential supplements".format(number_suggestions))
 69 |         assert len(supplements_of_interest) == number_suggestions
 70 |     else:
 71 |         logging.info("Keeping all supplements as there are only {} ({} allowed)".format(len(supplements_of_interest), number_suggestions))
 72 | 
 73 |     # Extracting pathways
 74 |     for supplement_to_extract in supplements_of_interest:
 75 |         # setting up search
 76 |         found_pathways = 0
 77 |         folder_to_save_pathways = "{}/{}".format(folder_to_save, supplement_to_extract.split("-")[0])
 78 |         if not os.path.exists(folder_to_save_pathways):
 79 |             os.mkdir(folder_to_save_pathways)
 80 |         # searching
 81 |         tree.set_folder_to_save(folder_to_save_pathways)
 82 |         nodes_to_treat = [tree.root_node]
 83 |         while nodes_to_treat != []:
 84 |             node = nodes_to_treat[0]
 85 |             del nodes_to_treat[0]
 86 |             state = node.state
 87 |             supplement = state.GetSupplement_from_InChI_Keys()
 88 |             if not supplement is None:
 89 |                 if supplement.InChIKey == supplement_to_extract:
 90 |                     found_pathways = found_pathways + 1
 91 |                     found_pathway = tree.extract_pathway_from_bottom(node, iteration=found_pathways)
 92 |             if node.terminal:
 93 |                 pass
 94 |             else:
 95 |                 for child in node.children:
 96 |                     nodes_to_treat.append(child)
 97 |         logging.info("Extract {} pathways for {}".format(found_pathways, supplement_to_extract))
 98 | 
 99 | def __cli():
100 |     """
101 |     Command line interface.
102 |     """
103 | 
104 |     d = "Arguments for supplement finder. Find compounds that can complete a Tree and be suppelmented to media."
105 |     parser = argparse.ArgumentParser(description=d)
106 |     parser.add_argument("--tree_to_complete", help="Tree to find supplements to", default="end_search")
107 |     parser.add_argument("--folder_tree_to_complete", help="Tree to find supplements to", default=None)
108 | 
109 |     parser.add_argument("--number_suggestions", default = 20,
110 |                         help = "Maximum number of suggestions returned")
111 |     parser.add_argument("--rescued_states", default = 1,
112 |                         help = "Minimum number of times the compound must complete states")
113 |     parser.add_argument("--folder_to_save", default="testing_supplement_finder")
114 |     parser.add_argument("--terminal", help="Default logger is within the new folder_to_save, switch to terminal if specified",
115 |                         action='store_true', default=False)
116 |     parser.add_argument("--database_address", default=None,
117 |                         help = "Address of a database to check availability. Json format required. Keys are inchikeys. Values are names, but could be cost or any metric of interest")
118 | 
119 |     args = parser.parse_args()
120 |     folder_to_save = args.folder_to_save
121 |     if not os.path.exists(folder_to_save):
122 |         os.makedirs(folder_to_save, exist_ok=True)
123 | 
124 |     if args.terminal is True:
125 |         logging.basicConfig(
126 |                 stream = sys.stderr,
127 |                 level=logging.INFO,
128 |                 datefmt='%d/%m/%Y %H:%M:%S',
129 |                 format='%(asctime)s -- %(levelname)s -- %(message)s'
130 |                 )
131 |     else:
132 |         logging.basicConfig(
133 |                 stream = open("{}/{}.log".format(folder_to_save, "supplement_finder"), "w"),
134 |                 level=logging.INFO,
135 |                 datefmt='%d/%m/%Y %H:%M:%S',
136 |                 format='%(asctime)s -- %(levelname)s -- %(message)s'
137 |                 )
138 |     completed_tree = unpickle(file_name=args.tree_to_complete,
139 |                            type='tree',
140 |                         folder_address="{}/pickles".format(args.folder_tree_to_complete))
141 |     if args.database_address is None:
142 |         database = None
143 |     else:
144 |         with open(args.database_address, "r") as json_file:
145 |             database = json.load(json_file)
146 | 
147 |     run(completed_tree, number_suggestions = args.number_suggestions,
148 |         rescued_states =args.rescued_states, folder_to_save = args.folder_to_save,
149 |         database = database)
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     __cli()
154 | 


--------------------------------------------------------------------------------
/tests/data/state_BOPG_BSAB_GPRL.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/data/state_BOPG_BSAB_GPRL.pkl


--------------------------------------------------------------------------------
/tests/data/tree_pipecolate_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/data/tree_pipecolate_test.pkl


--------------------------------------------------------------------------------
/tests/generated_jsons/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitkeep
3 | !.gitignore


--------------------------------------------------------------------------------
/tests/generated_jsons/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/generated_jsons/.gitkeep


--------------------------------------------------------------------------------
/tests/test_Filters.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import pytest
 3 | 
 4 | from utilities.chemtools.Filters import Filters
 5 | from rdkit.Chem import MolFromSmiles, MolToSmiles
 6 | from rdkit.Chem import MolFromInchi, MolToInchi
 7 | 
 8 | 
 9 | def test_init():
10 |     assert Filters()
11 | 
12 | def test_copy_properties():
13 |     # TODO: add some tests here
14 |     pass
15 | 
16 | def test_keep_biggest():
17 |     mol = Filters.keep_biggest(MolFromSmiles('CCCC.CC'))
18 |     assert MolToSmiles(mol) == 'CCCC'
19 |     mol = Filters.keep_biggest(MolFromSmiles('CCCCC.CC.[H].CCC'))
20 |     assert MolToSmiles(mol) == 'CCCCC'
21 |     mol = Filters.keep_biggest(MolFromInchi('InChI=1S/C5H12N2O2.C4H7NO4/c6-3-1-2-4(7)5(8)9;5-2(4(8)9)1-3(6)7/h4H,1-3,6-7H2,(H,8,9);2H,1,5H2,(H,6,7)(H,8,9)/t4-;2-/m00/s1'))
22 |     assert MolToInchi(mol) == 'InChI=1S/C4H7NO4/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,(H,6,7)(H,8,9)/t2-/m0/s1'
23 |     mol = Filters.keep_biggest(MolFromInchi('InChI=1S/Mo.4O/q;;;2*-1'))
24 |     assert MolToInchi(mol) == 'InChI=1S/Mo'
25 | 
26 | def test_commute_inchi():
27 |     inchi = 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/p-1'
28 |     mol = Filters.commute_inchi(MolFromInchi(inchi))
29 |     assert MolToInchi(mol) == inchi
30 |     
31 | def test_remove_isotope():
32 |     mol = Filters.remove_isotope(MolFromSmiles('c1cc[14cH]cc1'))
33 |     assert MolToSmiles(mol) == ('c1ccccc1')
34 | 
35 | def test_neutralise_charge():
36 |     mol = Filters.neutralise_charge(MolFromSmiles('CC(C(=O)[O-])O'))
37 |     assert MolToSmiles(mol) == ('CC(O)C(=O)O')
38 | 
39 | def test_add_hydrogen():
40 |     mol = Filters.add_hydrogen(MolFromSmiles('CC(O)C(=O)O'))
41 |     assert MolToSmiles(mol) == '[H]OC(=O)C([H])(O[H])C([H])([H])[H]'
42 |     mol = Filters.add_hydrogen(MolFromSmiles('CC(C(=O)[O-])O'))
43 |     assert MolToSmiles(mol) == '[H]OC([H])(C(=O)[O-])C([H])([H])[H]'
44 |     
45 | def test_kekulize():
46 |     mol = Filters.kekulize(MolFromSmiles('c1ccccc1'))
47 |     assert MolToSmiles(mol) == 'C1=CC=CC=C1'
48 | 
49 | def test_remove_stereo():
50 |     mol = Filters.remove_stereo(MolFromSmiles('C[C@@H](C(=O)[O-])O'))
51 |     assert MolToSmiles(mol) == 'CC(O)C(=O)[O-]'
52 |     mol = Filters.remove_stereo(MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'))
53 |     assert MolToSmiles(mol) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(O)=Nc2ccccc21'
54 |     mol = Filters.commute_inchi(mol)  # Expected to change tautomerism
55 |     assert MolToSmiles(mol) == 'O=C1NC(C2=CNC3=C2C=C(O)C=C3)=CC1=C1C(=O)NC2=CC=CC=C21'
56 | 


--------------------------------------------------------------------------------
/tests/test_Standardizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import pytest
 3 | 
 4 | import inspect
 5 | from utilities.chemtools.Standardizer import Standardizer
 6 | from utilities.chemtools.Sequences import sequence_tunable
 7 | from rdkit.Chem import MolFromSmiles, MolToSmiles
 8 | from rdkit.Chem import MolFromInchi, MolToInchi
 9 | 
10 | def test_init():
11 |     def sequence_dummy(mol):
12 |         return mol
13 |     assert Standardizer()
14 |     assert Standardizer(sequence_fun=sequence_dummy)
15 |     assert Standardizer(sequence_fun=sequence_dummy, params=dict())
16 | 
17 | def test_sequence_minimal():
18 |     # Violacein
19 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
20 |     ans = Standardizer().compute(mol)
21 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'
22 |     assert MolToSmiles(ans) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=C/C1=C1\\C(O)=Nc2ccccc21'
23 |     # L-Lactate
24 |     mol = MolFromInchi('')
25 | 
26 | def test_sequence_rr_legacy():
27 |     # Violacein
28 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
29 |     ans = Standardizer(sequence_fun='sequence_rr_legacy').compute(mol)
30 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'
31 |     assert MolToSmiles(ans) == '[H]OC1=NC(C2=C([H])N([H])C3=C2C([H])=C(O[H])C([H])=C3[H])=C([H])/C1=C1\\C(O[H])=NC2=C([H])C([H])=C([H])C([H])=C21'
32 | 
33 | def test_sequence_tunable():
34 |     # Check default arguments
35 |     args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations = inspect.getfullargspec(sequence_tunable)
36 |     default_params = dict(zip(args[-len(defaults):], defaults))
37 |     assert default_params == {
38 |             'OP_REMOVE_ISOTOPE':True,
39 |             'OP_NEUTRALISE_CHARGE': True,
40 |             'OP_REMOVE_STEREO': False,
41 |             'OP_COMMUTE_INCHI': False,
42 |             'OP_KEEP_BIGGEST': True,
43 |             'OP_ADD_HYDROGEN': True,
44 |             'OP_KEKULIZE': True,
45 |             'OP_NEUTRALISE_CHARGE_LATE': True
46 |     }
47 |     # Violacein, default parameter
48 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
49 |     ans = Standardizer(sequence_fun='sequence_tunable').compute(mol)
50 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'
51 |     assert MolToSmiles(ans) == '[H]OC1=NC(C2=C([H])N([H])C3=C2C([H])=C(O[H])C([H])=C3[H])=C([H])/C1=C1\\C(O[H])=NC2=C([H])C([H])=C([H])C([H])=C21'
52 |     # Violacein, strip stereo
53 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
54 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_REMOVE_STEREO': True}).compute(mol)
55 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)'
56 |     assert MolToSmiles(ans) == '[H]OC1=C([H])C2=C(C([H])=C1[H])N([H])C([H])=C2C1=C([H])C(=C2C(=O)N([H])C3=C([H])C([H])=C([H])C([H])=C23)C(=O)N1[H]'
57 |     # Violacien, implicit Hs
58 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
59 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_ADD_HYDROGEN': False}).compute(mol)
60 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'
61 |     assert MolToSmiles(ans) == 'OC1=CC2=C(C=C1)NC=C2C1=C/C(=C2/C3=CC=CC=C3N=C2O)C(O)=N1'
62 |     # Violacien, no kekulerization
63 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
64 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_KEKULIZE': False}).compute(mol)
65 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+'
66 |     assert MolToSmiles(ans) == '[H]OC1=NC(c2c([H])n([H])c3c([H])c([H])c(O[H])c([H])c23)=C([H])/C1=C1\\C(O[H])=Nc2c([H])c([H])c([H])c([H])c21'
67 |     # Violacien, strip stereo & implicit Hs & no kekulerization
68 |     mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')
69 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_REMOVE_STEREO': True, 'OP_ADD_HYDROGEN': False, 'OP_KEKULIZE': False}).compute(mol)
70 |     assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)'
71 |     assert MolToSmiles(ans) == 'O=C1NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(=O)Nc2ccccc21'
72 |     # Lactate, default parameter
73 |     mol = MolFromSmiles('C[C@@H](C(=O)[O-])O')
74 |     ans = Standardizer(sequence_fun='sequence_tunable').compute(mol)
75 |     assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/t2-/m0/s1'
76 |     assert MolToSmiles(ans) == '[H]OC(=O)[C@@]([H])(O[H])C([H])([H])[H]'
77 |     # L-lactate, implicit Hs
78 |     mol = MolFromSmiles('C[C@@H](C(=O)[O-])O')
79 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_ADD_HYDROGEN': False}).compute(mol)
80 |     assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/t2-/m0/s1'
81 |     assert MolToSmiles(ans) == 'C[C@H](O)C(=O)O'
82 |     # L-lactate, no stereo
83 |     mol = MolFromSmiles('C[C@@H](C(=O)[O-])O')
84 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_REMOVE_STEREO': True}).compute(mol)
85 |     assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)'
86 |     assert MolToSmiles(ans) == '[H]OC(=O)C([H])(O[H])C([H])([H])[H]'
87 |     # L-lactate, no charge neutralisation
88 |     mol = MolFromSmiles('C[C@@H](C(=O)[O-])O')
89 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_NEUTRALISE_CHARGE': False, 'OP_NEUTRALISE_CHARGE_LATE': False}).compute(mol)
90 |     assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/p-1/t2-/m0/s1'
91 |     assert MolToSmiles(ans) == '[H]O[C@]([H])(C(=O)[O-])C([H])([H])[H]'
92 |     # L-lactate, implicit Hs & no stereo
93 |     mol = MolFromSmiles('C[C@@H](C(=O)[O-])O')
94 |     ans = Standardizer(sequence_fun='sequence_tunable', params={'OP_ADD_HYDROGEN': False, 'OP_REMOVE_STEREO': True}).compute(mol)
95 |     assert MolToInchi(ans) == 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)'
96 |     assert MolToSmiles(ans) == 'CC(O)C(=O)O'
97 | 


--------------------------------------------------------------------------------
/tests/test_Tree.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Aim: test compound features
  3 | """
  4 | 
  5 | # General utility packages
  6 | import random
  7 | import pickle
  8 | 
  9 | # RP3  specific objects
 10 | from compound import Compound, unpickle
 11 | from chemical_compounds_state import ChemicalCompoundState
 12 | from representation import Test_representation, Test_to_file
 13 | from organisms import detectable_cmpds_H, Test_organism_H
 14 | from organisms import detectable_cmpds_noH
 15 | from rewarding import Basic_Rollout_Reward
 16 | from MCTS_node import MCTS_node
 17 | from UCT_policies import Biochemical_UCT_1, Nature_UCT, Classical_UCT_RAVE, Classical_UCT_with_bias, Classical_UCT
 18 | from rule_sets_examples import applicable_rules_mixed_dict, applicable_rules_10_dict
 19 | from Tree import Tree
 20 | from rule_sets_similarity import get_rules_and_score, full_rules_forward_H, full_rules_retro_H, full_rules_forward_no_H, full_rules_retro_no_H
 21 | 
 22 | 
 23 | 
 24 | random.seed(42)
 25 | 
 26 | 
 27 | class TestTree(object):
 28 |     def test_equality_statement_not_expanded(self):
 29 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
 30 |         compound = Compound(csmile, name = "821")
 31 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 32 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 33 | 
 34 |         test_Tree = Tree(root_state = state, itermax = 100)
 35 |         test_Tree_bis = Tree(root_state = state_bis, itermax = 100)
 36 |         assert test_Tree ==  test_Tree_bis
 37 | 
 38 |     def test_equality_statement_expanded(self):
 39 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
 40 |         compound = Compound(csmile, name = "821")
 41 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 42 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 43 | 
 44 |         test_Tree = Tree(root_state = state, itermax = 100)
 45 |         test_Tree_bis = Tree(root_state = state_bis, itermax = 100)
 46 |         test_Tree.run_search()
 47 |         test_Tree_bis.run_search()
 48 |         assert test_Tree ==  test_Tree_bis
 49 | 
 50 |     def test_equality_statement_expanded_differnet_iter(self):
 51 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
 52 |         compound = Compound(csmile, name = "821")
 53 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 54 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 55 | 
 56 |         test_Tree = Tree(root_state = state, itermax = 100)
 57 |         test_Tree_bis = Tree(root_state = state_bis, itermax = 1000)
 58 |         test_Tree.run_search()
 59 |         test_Tree_bis.run_search()
 60 |         assert test_Tree != test_Tree_bis
 61 | 
 62 |     def test_equality_statement_expanded_false(self):
 63 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
 64 |         compound = Compound(csmile, name = "821")
 65 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 66 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 67 | 
 68 |         test_Tree = Tree(root_state = state, itermax = 100)
 69 |         test_Tree_bis = Tree(root_state = state_bis, itermax = 100)
 70 |         test_Tree.run_search()
 71 |         assert test_Tree != test_Tree_bis
 72 | 
 73 |     def test_equality_statement_expanded_states(self):
 74 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
 75 |         compound = Compound(csmile, name = "821")
 76 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 77 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 78 | 
 79 |         test_Tree = Tree(root_state = state, itermax = 100, available_rules = applicable_rules_mixed_dict)
 80 |         test_Tree_bis = Tree(root_state = state_bis, itermax = 500, available_rules = applicable_rules_mixed_dict)
 81 |         test_Tree.run_search()
 82 |         test_Tree_bis.run_search()
 83 |         different_trees = test_Tree != test_Tree_bis
 84 |         same_states = test_Tree.equality_visited_states(test_Tree_bis)
 85 |         assert different_trees and same_states
 86 | 
 87 |     def test_equality_statement_expanded_states_other_policies(self):
 88 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
 89 |         compound = Compound(csmile, name = "821")
 90 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 91 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
 92 | 
 93 |         test_Tree = Tree(root_state = state, itermax = 100)
 94 |         test_Tree_bis = Tree(root_state = state_bis, itermax = 1000, UCT_policy = "Nature_UCT")
 95 |         test_Tree.run_search()
 96 |         test_Tree_bis.run_search()
 97 |         different_trees = test_Tree != test_Tree_bis
 98 |         same_states = test_Tree.equality_visited_states(test_Tree_bis)
 99 |         assert different_trees and same_states
100 | 
101 |     def test_pickling_unpickling(self, tmpdir):
102 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
103 |         compound = Compound(csmile, name = "821")
104 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
105 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
106 | 
107 |         test_Tree = Tree(root_state = state, itermax = 10000,  parallel = False,
108 |                          Rollout_policy = "Rollout_policy_first",
109 |                          UCT_policy = "Biochemical_UCT_1")
110 |         test_Tree.run_search()
111 |         test_Tree.save("test", folder_address = tmpdir)
112 |         loaded_tree = unpickle(file_name = 'test', type = 'tree', folder_address = tmpdir)
113 |         assert test_Tree == loaded_tree
114 | 
115 |     def test_pickling_unpickling_differ(self, tmpdir):
116 |         csmile = "[H][C](=[O])[C]([H])([H])[C]([H])([H])[H]"
117 |         compound = Compound(csmile, name = "821")
118 |         state = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
119 |         state_bis = ChemicalCompoundState([compound], organism = Test_organism_H, representation = Test_representation)  # state is not sanitised
120 | 
121 |         test_Tree = Tree(root_state = state, itermax = 10000,  parallel = False,
122 |                          Rollout_policy = "Rollout_policy_first",
123 |                          UCT_policy = "Biochemical_UCT_1")
124 |         test_Tree.run_search()
125 |         test_Tree.save("test", folder_address = tmpdir)
126 |         test_Tree.run_search()
127 |         loaded_tree = unpickle(file_name = 'test', type = 'tree', folder_address = tmpdir)
128 |         assert test_Tree != loaded_tree
129 | 
130 |     def test_biosensor(self):
131 |         organism = detectable_cmpds_H
132 |         inchi = "InChI=1S/C6H11NO2/c8-6(9)5-3-1-2-4-7-5/h5,7H,1-4H2,(H,8,9)"
133 |         compound = Compound(InChI = inchi, name = "pipecolate")
134 |         present_in_state_detectable = organism.compound_in_state(compound)
135 |         if present_in_state_detectable:
136 |             logging.warning("Removed compound from the detectable set to force enzymatic detection")
137 |             organism.remove_cmpd_from_state(compound)
138 |         rules, biological_scoring = get_rules_and_score(full_rules_forward_H = full_rules_forward_H,
139 |                                                     full_rules_retro_H = full_rules_retro_H,
140 |                                                     full_rules_forward_no_H = full_rules_forward_no_H,
141 |                                                     full_rules_retro_no_H = full_rules_retro_no_H,
142 |                                                     add_Hs = True,
143 |                                                     retro = False,
144 |                                                     diameters = [10, 12, 14, 16],
145 |                                                     small = False,
146 |                                                     c_name = None,
147 |                                                     filtering_EC = ["1.5.3.7", "1.5.3"])
148 |         state = ChemicalCompoundState([compound])  # state is not sanitised
149 |         test_Tree = Tree(root_state = state, itermax = 1000,  parallel = False,
150 |                          Rollout_policy = "Rollout_policy_first",
151 |                          UCT_policy = "Biochemical_UCT_1", available_rules = rules, organism  = organism,
152 |                          biological_scorer = biological_scoring,
153 |                          folder_to_save = "tests/generated_jsons")
154 |         test_Tree.run_search()
155 |         loaded_tree = unpickle(file_name = 'pipecolate_test', type = 'tree', folder_address = "tests/data")
156 |         same_states = test_Tree.equality_visited_states(loaded_tree)
157 |         assert same_states
158 | 


--------------------------------------------------------------------------------
/tests/test_Utils.py:
--------------------------------------------------------------------------------
 1 | import rdkit
 2 | from rdkit import Chem
 3 | from rdkit.Chem import AllChem
 4 | import pytest
 5 | 
 6 | 
 7 | from utilities.reactor.Utils import standardize_chemical, standardize_results, handle_results
 8 | 
 9 | 
10 | class TestBasic2(object):
11 | 
12 |     def test_standardize_chemical_1(self):
13 |         rdmol = Chem.MolFromSmiles('[H][O][C](=[O])[C]([H])([O][H])[C]([H])([H])[H]')
14 |         rdmol_std_1 = standardize_chemical(rdmol, add_hs=False)
15 |         assert Chem.MolToSmiles(rdmol_std_1) == 'CC(O)C(=O)O'
16 |         rdmol_std_2 = standardize_chemical(rdmol, add_hs=True)
17 |         assert Chem.MolToSmiles(rdmol_std_2, allHsExplicit=True) == '[H][O][C](=[O])[C]([H])([O][H])[C]([H])([H])[H]'
18 | 
19 |     def test_standardize_chemical_2(self):
20 |         # Data
21 |         violacein_smiles = 'OC1=NC(=C\\C1=C1/C(O)=NC2=CC=CC=C12)C1=CNC2=C1C=C(O)C=C2'
22 |         violacein_mol = Chem.MolFromSmiles(violacein_smiles, sanitize=False)
23 |         # Test simplest case
24 |         std_mol_1 = standardize_chemical(violacein_mol, add_hs=False, rm_stereo=False)
25 |         assert Chem.MolToSmiles(std_mol_1) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=C/C1=C1\\C(O)=Nc2ccccc21'
26 |         # Test adding Hs
27 |         std_mol_2 = standardize_chemical(violacein_mol, add_hs=True, rm_stereo=False)
28 |         assert Chem.MolToSmiles(std_mol_2) == '[H]OC1=NC(c2c([H])n([H])c3c([H])c([H])c(O[H])c([H])c23)=C([H])/C1=C1\\C(O[H])=Nc2c([H])c([H])c([H])c([H])c21'
29 |         # Test removing stereo
30 |         std_mol_3 = standardize_chemical(violacein_mol, add_hs=False, rm_stereo=True)
31 |         assert Chem.MolToSmiles(std_mol_3) == 'O=C1NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(=O)Nc2ccccc21'
32 |         # Test adding Hs + removing stereo
33 |         std_mol_4 = standardize_chemical(violacein_mol, add_hs=True, rm_stereo=True)
34 |         assert Chem.MolToSmiles(std_mol_4) == '[H]Oc1c([H])c([H])c2c(c1[H])c(C1=C([H])C(=C3C(=O)N([H])c4c([H])c([H])c([H])c([H])c43)C(=O)N1[H])c([H])n2[H]'
35 | 
36 |     def test_standardize_chemical_3(self):
37 |         # Data
38 |         wrong_smiles = '[H]OC(=O)C([H])([H])C([H])([H])C([H])(N=C(O[H])C([H])([H])C([H])([H])C([H])(N=C(O[H])C([H])(OP(=O)(O[H])OC([H])([H])C([H])(O[H])C([H])(O[H])C([H])(O[H])C([H])([H])n1c2nc(=O)nc(O[H])c-2c([H])c2c([H])c([H])c(OP(=O)(OC([H])([H])C(C([H])([H])[H])(C([H])([H])[H])C([H])(O[H])C(=NC([H])([H])C([H])([H])C(=NC([H])([H])C([H])([H])SC(=O)C([H])([H])C([H])([H])C([H])([H])C([H])(C(=C([H])[H])C([H])([H])[H])C([H])([H])C(=O)O[H])O[H])O[H])OP(=O)(O[H])OC([H])([H])C3([H])OC([H])(n4[c]([H])n([H])[c]5[c](N([H])[H])[n][c]([H])[n][c]54)C([H])(O[H])C3([H])OP(=O)(O[H])O[H])c([H])c21)C([H])([H])[H])C(=O)O[H])C(=O)O[H]'
39 |         # Test
40 |         wrong_mol = Chem.MolFromSmiles(wrong_smiles, sanitize=False)
41 |         with pytest.raises(Exception):
42 |             standardize_chemical(wrong_mol)
43 | 
44 |     def test_standardize_results_1(self):
45 |         tuple_tuple_raw = ((
46 |                     Chem.MolFromSmiles('[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]'),
47 |                     Chem.MolFromSmiles('[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]')
48 |                 ),(
49 |                     Chem.MolFromInchi('InChI=1S/C5H6N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H,9H2,(H,7,8)(H2,6,10,11)')
50 |                 ))
51 |         tuple_tuple_rdmol, tuple_index_failed = standardize_results(tuple_tuple_raw, add_hs=True, rm_stereo=True)
52 |         assert len(tuple_tuple_rdmol) == 1
53 |         assert tuple_index_failed == [1]
54 | 
55 |     def test_handle_result(self):
56 |         tuple_raw = (
57 |                 Chem.MolFromSmiles('[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]'),
58 |                 Chem.MolFromSmiles('[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]')
59 |                 )
60 |         tuple_tuple_rdmol, tuple_tuple_failed = standardize_results(tuple_tuple_rdmol=(tuple_raw,), add_hs=True, rm_stereo=True)
61 |         inchikeys, inchis, smiles = handle_results(list_list_rdmol=tuple_tuple_rdmol)
62 |         # Check number products
63 |         assert len(inchikeys) == len(inchis) == len(smiles) == 1   # Only one set of result
64 |         assert len(inchikeys[0]) == len(inchis[0]) == len(smiles[0]) == 2  # 2 products
65 |         # Check Inchikeys
66 |         assert inchikeys[0][0] == 'CSZRNWHGZPKNKY-UHFFFAOYSA-N'
67 |         assert inchikeys[0][1] == 'QGWNDRXFNXRZMB-UHFFFAOYSA-N'
68 |         # Check Inchis
69 |         assert inchis[0][0] == 'InChI=1S/C3H7O6P/c1-2(3(4)5)9-10(6,7)8/h2H,1H3,(H,4,5)(H2,6,7,8)'
70 |         assert inchis[0][1] == 'InChI=1S/C10H15N5O11P2/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(25-9)1-24-28(22,23)26-27(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H2,19,20,21)(H3,11,13,14,18)'
71 |         # Check SMILES #1
72 |         assert smiles[0][0] == '[H]OC(=O)C([H])(OP(=O)(O[H])O[H])C([H])([H])[H]'
73 |         rdmol = Chem.MolFromSmiles(smiles[0][0])
74 |         rdmol = Chem.AddHs(rdmol)
75 |         assert Chem.MolToSmiles(rdmol, allHsExplicit=True) == '[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]'
76 |         # Check SMILES #2
77 |         assert smiles[0][1] == '[H]N=c1nc(O[H])c2nc([H])n(C3([H])OC([H])(C([H])([H])OP(=O)(O[H])OP(=O)(O[H])O[H])C([H])(O[H])C3([H])O[H])c2n1[H]'
78 |         rdmol = Chem.MolFromSmiles(smiles[0][1])
79 |         rdmol = Chem.AddHs(rdmol)
80 |         assert Chem.MolToSmiles(rdmol, allHsExplicit=True) == '[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]'
81 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test FireBurner class
 3 | """
 4 | 
 5 | import rdkit
 6 | from rdkit import Chem
 7 | import pytest
 8 | import multiprocessing
 9 | 
10 | 
11 | from utilities.reactor.cli import RuleBurner, RuleConversionError, ChemConversionError
12 | 
13 | 
14 | # Data for tests
15 | substate_inchi = 'InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)'
16 | reaction_smarts = '([#8&v2:1](-[#6&v4:2](-[#6&v4:3](-[#8&v2:4]-[#1&v1:5])=[#8&v2:6])(-[#6&v4:7](-[#1&v1:8])(-[#1&v1:9])-[#1&v1:10])-[#1&v1:11])-[#1&v1:12])>>([#15&v5](=[#8&v2])(-[#8&v2]-[#1&v1])(-[#8&v2]-[#1&v1])-[#8&v2:1]-[#6&v4:2](-[#6&v4:3](-[#8&v2:4]-[#1&v1:5])=[#8&v2:6])(-[#6&v4:7](-[#1&v1:8])(-[#1&v1:9])-[#1&v1:10])-[#1&v1:11].[#7&v3](=[#6&v4]1:[#7&v3]:[#6&v4](-[#8&v2]-[#1&v1]):[#6&v4]2:[#7&v3]:[#6&v4](-[#1&v1]):[#7&v3](-[#6&v4]3(-[#1&v1])-[#8&v2]-[#6&v4](-[#6&v4](-[#8&v2]-[#15&v5](=[#8&v2])(-[#8&v2]-[#1&v1])-[#8&v2]-[#15&v5](-[#8&v2]-[#1&v1:12])(=[#8&v2])-[#8&v2]-[#1&v1])(-[#1&v1])-[#1&v1])(-[#1&v1])-[#6&v4](-[#8&v2]-[#1&v1])(-[#1&v1])-[#6&v4]-3(-[#8&v2]-[#1&v1])-[#1&v1]):[#6&v4]:2:[#7&v3]:1-[#1&v1])-[#1&v1])'
17 | tuple_product_inchikeys = ('CSZRNWHGZPKNKY-UHFFFAOYSA-N', 'QGWNDRXFNXRZMB-UHFFFAOYSA-N')
18 | tuple_product_smiles = ('[H][O][C](=[O])[C]([H])([O][P](=[O])([O][H])[O][H])[C]([H])([H])[H]', '[H][N]=[c]1[n][c]([O][H])[c]2[n][c]([H])[n]([C]3([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][H])[C]([H])([O][H])[C]3([H])[O][H])[c]2[n]1[H]')
19 | tuple_product_inchis = ('InChI=1S/C3H7O6P/c1-2(3(4)5)9-10(6,7)8/h2H,1H3,(H,4,5)(H2,6,7,8)', 'InChI=1S/C10H15N5O11P2/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(25-9)1-24-28(22,23)26-27(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H2,19,20,21)(H3,11,13,14,18)')
20 | 
21 | 
22 | def dummy_worker(**kwargs):
23 |     import time
24 |     time.sleep(1)
25 | 
26 | 
27 | def test_init():
28 |     # Empty is OK
29 |     rb = RuleBurner(rsmarts_list=[], inchi_list=[])  # Empty is OK
30 |     rb.compute()
31 | 
32 | 
33 | def test_run_with_timeout():
34 |     rb = RuleBurner(rsmarts_list=[], inchi_list=[])
35 |     with pytest.raises(multiprocessing.context.TimeoutError):
36 |         rb._run_with_timeout(dummy_worker, None, timeout=0)
37 |     rb._run_with_timeout(dummy_worker, None, timeout=2)
38 | 
39 | 
40 | def test_jsonify():
41 |     rb = RuleBurner(rsmarts_list=[], inchi_list=[])
42 |     assert rb._jsonify(rsmarts='', inchi='', rid='RID', cid='CID').replace('\n', '') == """{ "rule_id": "RID", "substrate_id": "CID", "fire_timed_out": null, "fire_exec_time": null}"""
43 | 
44 | 
45 | def test_compute():
46 |     # Wrong reaction depiction
47 |     rb = RuleBurner(rsmarts_list=['DUMMY'], inchi_list=[])
48 |     with pytest.raises(RuleConversionError):
49 |         rb.compute()
50 |     # Wrong chemical depiction
51 |     rb = RuleBurner(rsmarts_list=[reaction_smarts], inchi_list=['DUMMY'])
52 |     with pytest.raises(ChemConversionError):
53 |         rb.compute()
54 |     # Timeout should be logged
55 |     rb = RuleBurner(rsmarts_list=[reaction_smarts], inchi_list=[substate_inchi], fire_timeout=0)
56 |     rb.compute()
57 |     assert ''.join(rb._json).find('"fire_timed_out": true')
58 |     # OK
59 |     rb = RuleBurner(rsmarts_list=[reaction_smarts], inchi_list=[substate_inchi])
60 |     rb.compute()
61 |     assert ''.join(rb._json).find('InChI=1S/C3H7O6P/c1-2(3(4)5)9-10(6,7)8/h2H,1H3,(H,4,5)(H2,6,7,8)')
62 |     assert ''.join(rb._json).find('InChI=1S/C10H15N5O11P2/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(25-9)1-24-28(22,23)26-27(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H2,19,20,21)(H3,11,13,14,18)')
63 | 


--------------------------------------------------------------------------------
/tests/test_moves.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Aim: test compound features
 3 | """
 4 | 
 5 | # RP3 objects
 6 | from compound import Compound
 7 | from move import Move
 8 | 
 9 | class TestMove(object):
10 |     """
11 |     Testing moves - should be fast
12 |     """
13 |     def test_cloning(self):
14 |         move = Move(rsmart = "rsmart",
15 |                     rid = "rid",
16 |                     compound_id= "compound_id")
17 |         cloned_move = move.clone()
18 |         different_python_object = (id(move) != id(cloned_move))
19 |         identical_move_object = move.eq_full_inchi_key(cloned_move)
20 |         assert (different_python_object and identical_move_object)
21 | 
22 |     def test_equality_true(self):
23 |         compound_1 = Compound("[H+]")
24 |         compound_6 = Compound("[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]")
25 |         compound_2345 = Compound("[H][C](=[O])[C]([H])=[C]([H])[H]")
26 |         move = Move(rsmart = "rsmart",
27 |                     rid = "rid",
28 |                     compound_id= "compound_id",
29 |                     product_list = [compound_1, compound_6],
30 |                     set_number = 5)
31 |         move_bis = Move(rsmart = "rsmart",
32 |                     rid = "rid",
33 |                     compound_id= "compound_id",
34 |                     product_list = [compound_6, compound_1])
35 | 
36 |         assert move.eq_full_inchi_key(move_bis)
37 | 
38 |     def test_equality_false(self):
39 |         compound_1 = Compound("[H+]")
40 |         compound_6 = Compound("[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]")
41 |         compound_2345 = Compound("[H][C](=[O])[C]([H])=[C]([H])[H]")
42 |         move = Move(rsmart = "rsmart",
43 |                     rid = "rid",
44 |                     compound_id= "compound_id",
45 |                     product_list = [compound_1, compound_6])
46 |         move_bis = Move(rsmart = "rsmart",
47 |                     rid = "rid",
48 |                     compound_id= "compound_id",
49 |                     product_list = [compound_6, compound_1, compound_2345])
50 |         move_ter = Move(rsmart = "rsmart",
51 |                     rid = "rid",
52 |                     compound_id= "compound_id_2",
53 |                     product_list = [compound_6, compound_1])
54 | 
55 |         assert move != move_bis and move != move_ter and move_bis != move_ter
56 | 
57 |     def test_rave_update(self):
58 |         compound_1 = Compound("[H+]")
59 |         compound_6 = Compound("[H][N]=[C]([O][H])[C]1=[C]([H])[N]([C]2([H])[O][C]([H])([C]([H])([H])[O][P](=[O])([O][H])[O][P](=[O])([O][H])[O][C]([H])([H])[C]3([H])[O][C]([H])([n]4[c]([H])[n][c]5[c]([N]([H])[H])[n][c]([H])[n][c]54)[C]([H])([O][P](=[O])([O][H])[O][H])[C]3([H])[O][H])[C]([H])([O][H])[C]2([H])[O][H])[C]([H])=[C]([H])[C]1([H])[H]")
60 |         move = Move(rsmart = "rsmart",
61 |                     rid = "rid",
62 |                     compound_id= "compound_id",
63 |                     product_list = [compound_1, compound_6])
64 | 
65 |         move.update(5, visit_number = 10)
66 |         move.update(0.2, 10)
67 |         assert move.RAVE_total_score == 52
68 |         assert move.RAVE_visits == 20
69 | 
70 |     # def more_compelx_tests_wthi_compouns
71 | 


--------------------------------------------------------------------------------
/tests/tree_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brsynth/RetroPathRL/7de91f0236cf3c3dfc2c0455bd7dbcee9f715d2f/tests/tree_test.pkl


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88


--------------------------------------------------------------------------------
/tree_viewer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Contains the tree objects for visualisation and export
  3 | """
  4 | 
  5 | # General utility packages
  6 | import logging
  7 | import csv
  8 | import copy
  9 | import json
 10 | import sys
 11 | 
 12 | # RP3 specific objects
 13 | from compound import Compound
 14 | from move import Move
 15 | from chemical_compounds_state import ChemicalCompoundState
 16 | from MCTS_node import MCTS_node
 17 | # General configuration
 18 | from config import *
 19 | 
 20 | class Tree_viewer(object):
 21 |     """
 22 |     Tree_viewer object.
 23 |     Has methods for quick visualisation as well as export to json
 24 |     """
 25 |     logger = logging.getLogger(__name__)
 26 | 
 27 |     def __init__(self,
 28 |                  file_to_save = "temporary_tree_viewer_json"):
 29 |         """
 30 |         Initialising a tree viewer object.
 31 |         A Node has:
 32 |         - level
 33 |         - scores (total and average)
 34 |         - visits
 35 |         - terminal
 36 |         - root
 37 |         - a chemical state
 38 |         - the id will be the chemical state and a number to id it
 39 |         - whether it has a solved child
 40 |         A Move:
 41 |         - Biological score
 42 |         - Chemical score
 43 |         - EC numbers
 44 |         - compound ID it applyes to
 45 |         - smarts
 46 |         - name
 47 |         An edge links both
 48 |         """
 49 |         # Where to save the json
 50 |         self.file_to_save = file_to_save
 51 |         # For tree viewer json
 52 |         self.nodes_nodes = []
 53 |         self.nodes_transformations = []
 54 |         self.edges = []
 55 | 
 56 |     def set_file_to_save(self, file_to_save):
 57 |         self.file_to_save = file_to_save
 58 | 
 59 |     def add_node(self, node):
 60 |         """
 61 |         Adding a node object to the tree.
 62 |         """
 63 |         if node.terminal:
 64 |             terminal = 1
 65 |         else:
 66 |             terminal = 0
 67 |         if node.move is None:
 68 |             root = 1
 69 |         else:
 70 |             root = 0
 71 |         node_dict = {
 72 |             'type': 'node',
 73 |             'id': "node_{}".format(node.id),
 74 |             'level': node.level,
 75 |             'root': root,
 76 |             'terminal': terminal,
 77 |             'Names': str(node.state),  # If I want synonyms, keep them
 78 |             'average_score': node.average_score,
 79 |             'total_score': node.total_score,
 80 |             'visits': node.visits,
 81 |             'solved_child': node.has_a_solved_child
 82 |         }
 83 |         self.nodes_nodes.append({"data": node_dict})
 84 | 
 85 |         if not node.move is None:
 86 |             move_to_child = {
 87 |                 "target" : "move_{}".format(node.move.id),
 88 |                 "source" : "node_{}".format(node.id),
 89 |                 "id" : "{}_=>_{}".format("move_{}".format(node.move.id), "node_{}".format(node.id))
 90 |             }
 91 |             self.edges.append({"data": move_to_child})
 92 |             if use_transpositions:
 93 |                 parent_nodes = transposition_table[node.parent.hash]
 94 |                 for parent in parent_nodes:
 95 |                     parent_to_move = {
 96 |                         "target" : "node_{}".format(parent.id),
 97 |                         "source" : "move_{}".format(node.move.id),
 98 |                         "id" : "{}_=>_{}".format("node_{}".format(parent.id), "move_{}".format(node.move.id))
 99 |                     }
100 |                     self.edges.append({"data": parent_to_move})
101 |             else:
102 |                 parent_to_move = {
103 |                     "target" : "node_{}".format(node.parent.id),
104 |                     "source" : "move_{}".format(node.move.id),
105 |                     "id" : "{}_=>_{}".format("node_{}".format(node.parent.id), "move_{}".format(node.move.id))
106 |                 }
107 |                 self.edges.append({"data": parent_to_move})
108 |             biological_score = node.move.biological_score
109 |             try:
110 |                 diameter = int(node.move.rid.split("-")[3])
111 |             except:
112 |                 diameter = 42
113 |             move_dict = {
114 |                 'type': 'move',
115 |                 'id': "move_{}".format(node.move.id),
116 |                 "Rule ID": node.move.synonyms,
117 |                 "EC number": node.move.EC_numbers,
118 |                 "Reaction SMILES": node.move.rsmiles,
119 |                 "Diameter": diameter,
120 |                 "Score": biological_score,
121 |                 "ChemicalScore": node.move.chemical_score,
122 |                 "Name": node.move.name
123 |             }
124 |             self.nodes_transformations.append({"data": move_dict})
125 | 
126 |     def jsonify_tree_viewer(self):
127 |         """
128 |         Use scope viewer to visualise pathways before the DBTL advances more.
129 |         THe json file is a dict composed of one item called elements.
130 |         The elements values is a dict composed of "nodes" and "edges"
131 |         Nodes is a list of compounds, or reactions, with:
132 |         """
133 |         pathway_as_dict = {"elements": {"nodes": self.nodes_nodes + self.nodes_transformations,
134 |                                         "edges": self.edges}}
135 |         with open(self.file_to_save, "w") as json_handler:
136 |             json.dump(pathway_as_dict, json_handler, indent = 2)
137 | 
138 | 
139 | def __cli():
140 |     """Command line interface. Was actually used to make quick
141 |     tests before implementing them in the testing file"""
142 |     print("CLI is not available for this module - tree viewing is automatically generated by Tree module")
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     __cli()
147 | 


--------------------------------------------------------------------------------
/utilities/chemtools/Filters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Set of filters to be used for chemical standardisation
  4 | 
  5 | @author: Baudoin Delepine, 2016-2017
  6 | @author: Thomas Duigou, 2018-2019
  7 | """
  8 | 
  9 | from copy import deepcopy
 10 | from rdkit.Chem import AddHs, GetMolFrags, Kekulize, MolToInchi, MolFromInchi, MolFromSmarts, MolFromSmiles, RemoveStereochemistry, MolToSmiles, RemoveHs
 11 | from rdkit.Chem.AllChem import Compute2DCoords, ReplaceSubstructs
 12 | from rdkit.Chem.Descriptors import MolWt
 13 | 
 14 | 
 15 | class Filters(object):
 16 |     """Set of filters to be used for chemical standardization.
 17 |     """
 18 |     
 19 |     @classmethod
 20 |     def _copy_properties(cls, mol_from, mol_to):
 21 |         """Copy properties from a RDKit compound to another one.
 22 |         
 23 |         :param  mol_from: RDKit Mol source object
 24 |         :param  mol_to:   RDKit Mol target object
 25 | 
 26 |         Warning: aside from chemical's name, all private properties are lost.
 27 |         """
 28 |         # NB: name is stored in its default location which is "_Name" and
 29 |         # is a private propertie.
 30 |         property_list = mol_from.GetPropNames(includePrivate=False)
 31 |         if mol_from.HasProp('_Name'):  # TD: If _Name is set always save name
 32 |             property_list.append("_Name")
 33 |         for property_name in property_list:
 34 |             mol_to.SetProp(property_name, mol_from.GetProp(property_name))
 35 |     
 36 |     @classmethod
 37 |     def keep_biggest(cls, mol_in):
 38 |         """Strip small fragments from compound.
 39 | 
 40 |         Returns a new compound where only the "biggest" fragment is conserved
 41 |         according to (i) the number of non-Hs atoms and if there is tie then 
 42 |         according to (ii) the molecular weight.
 43 |         
 44 |         :param  mol_in:  RDKit Mol
 45 |         :return mol_out: new RDKit Mol having only one connected component
 46 |         """
 47 |         def count_non_hs_atom(mol):
 48 |             ans = 0
 49 |             for atm in mol.GetAtoms():
 50 |                 if atm.GetAtomicNum() != 1:
 51 |                     ans += 1
 52 |             return ans
 53 |         # Remove "other" molecules
 54 |         molfrag = GetMolFrags(mol_in, asMols=True, sanitizeFrags=False)
 55 |         mol_out = mol_in
 56 |         if len(molfrag) > 1:
 57 |             accepted_nbr_atm = 0  # flag number of atoms in fragment
 58 |             accepted_mw = 0  # flag the molecular weight of the biggest fragment
 59 |             for f in molfrag:
 60 |                 nbr_atm = count_non_hs_atom(f)
 61 |                 if nbr_atm > accepted_nbr_atm or (nbr_atm == accepted_nbr_atm and MolWt(f) > accepted_mass):
 62 |                     accepted_nbr_atm = nbr_atm
 63 |                     accepted_mass = MolWt(f)
 64 |                     mol_out = f  # keep only the biggest fragment
 65 |             cls._copy_properties(mol_in, mol_out)  # save the name and stuff
 66 |         return mol_out
 67 |         
 68 |     @classmethod
 69 |     def commute_inchi(cls, mol_in):
 70 |         """Convert RDKit compound back and forth to InChi.
 71 | 
 72 |         Returns a new compound after the initial one has been converted
 73 |         back and forth to InChi.
 74 |         
 75 |         :param   mol_in:  RDKit Mol
 76 |         :return  mol_out: RDKit Mol
 77 |         """
 78 |         inchi = MolToInchi(mol_in, logLevel=None)  # this is talkative...
 79 |         mol_out = MolFromInchi(inchi, sanitize=False, removeHs=False,
 80 |                           logLevel=None, treatWarningAsError=False)
 81 |         if not mol_out:
 82 |             raise ValueError("Failed InChi validity filter.")
 83 |         # Copy the properties
 84 |         cls._copy_properties(mol_in, mol_out)
 85 |         return mol_out
 86 | 
 87 |     @classmethod
 88 |     def remove_isotope(cls, mol_in):
 89 |         """Strip all isotope information.
 90 | 
 91 |         Returns a new compound.
 92 |         
 93 |         :param   mol_in:  RDKit Mol
 94 |         :return  mol_out: RDKit Mol
 95 |         """
 96 |         mol_out = deepcopy(mol_in)  # copy it, just for consistency with other filters
 97 |         for atm in mol_out.GetAtoms():
 98 |             atm.SetIsotope(0)
 99 |         if not mol_out:
100 |             raise ValueError("Failed isotope removing filter.")
101 |         return mol_out
102 | 
103 |     @staticmethod
104 |     def _rules_rdkit():
105 |         patts = (
106 |             ('[n+;H]', 'n'),  # Imidazoles
107 |             ('[N+;!H0]', 'N'),  # Amines
108 |             ('[$([O-]);!$([O-][#7])]', 'O'),  # Carboxylic acids and alcohols
109 |             ('[S-;X1]', 'S'),  # Thiols
110 |             ('[$([N-;X2]S(=O)=O)]', 'N'),  # Sulfonamides
111 |             ('[$([N-;X2][C,N]=C)]', 'N'),  # Enamines
112 |             ('[n-]', '[nH]'),  # Tetrazoles
113 |             ('[$([S-]=O)]', 'S'),  # Sulfoxides
114 |             ('[$([N-]C=O)]', 'N'),  # Amides
115 |         )
116 |         return [(MolFromSmarts(x), MolFromSmiles(y, False)) for x, y in patts]
117 | 
118 |     @staticmethod
119 |     def _rules_molvs():
120 |         """Rules to neutralize compounds. Inspired by molvs."""
121 |         ans = {}
122 |         # Neutralizable positive charge (with hydrogens attached)
123 |         # ans["pos_h"] = Chem.MolFromSmarts('[+!H0!$(*~[-])]')
124 |         ans["pos_h"] = MolFromSmarts('[+!H0]')
125 |         # Non-neutralizable positive charge (no hydrogens attached)
126 |         # ans["pos_quat"] = Chem.MolFromSmarts('[+H0!$(*~[-])]')
127 |         # Negative charge, not bonded to a positive charge with no hydrogens
128 |         # ans["neg"] = Chem.MolFromSmarts('[-!$(*~[+H0])]')
129 |         ans["neg"] = MolFromSmarts('[-]')
130 |         # Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen?
131 |         # ans["neg_acid"] = Chem.MolFromSmarts('[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]')
132 |         return ans
133 | 
134 |     @classmethod
135 |     def _neutralise_charge_method1(cls, mol_in, rules=None):
136 |         """Neutralise charges according to a set of predefined rules.
137 | 
138 |         From:
139 |             http://www.rdkit.org/docs/Cookbook.html#neutralizing-charged-molecules
140 |         """
141 |         # Fallback to default rules if none are provided
142 |         if rules is None:
143 |             fun_rules = cls._rules_rdkit
144 | 
145 |         # Check if rules are already initialised as an attribute
146 |         if not hasattr(rules, "rules"):
147 |             fun_rules.rules = fun_rules()
148 | 
149 |         # Apply rules
150 |         # Better to use ReplaceSubstructs than RunReactant: the latter would give
151 |         # several products (or we would need to use HasSubstructMatch anyway).
152 |         for reactant, product in fun_rules.rules:
153 |             while mol_in.HasSubstructMatch(reactant):
154 |                 rms = ReplaceSubstructs(mol_in, reactant, product)
155 |                 mol_in = rms[0]
156 |         mol_in.UpdatePropertyCache()
157 |         return mol_in
158 | 
159 |     @classmethod
160 |     def _neutralise_charge_method2(cls, mol_in):
161 |         """Neutralise charges as much as possible playing on hydrogens.
162 | 
163 |         You should sanitize the compounds after this operation.
164 | 
165 |         From:
166 |             http://molvs.readthedocs.io/en/latest/_modules/molvs/charge.html
167 |         """
168 |         mol_out = deepcopy(mol_in)  # copy it, just for consistency with other operations
169 |         mol_out.UpdatePropertyCache(strict=False)  # recompute implicit valence
170 |         # Check if rules are already initialised as an attribute
171 |         if not hasattr(cls._rules_molvs, "rules"):
172 |             cls._rules_molvs.rules = cls._rules_molvs()
173 |         # Get atom ids for matches
174 |         p = [x[0] for x in mol_out.GetSubstructMatches(cls._rules_molvs.rules['pos_h'])]
175 |         # q = [x[0] for x in cc.GetSubstructMatches(cls._rules_molvs.rules['pos_quat'])]
176 |         n = [x[0] for x in mol_out.GetSubstructMatches(cls._rules_molvs.rules['neg'])]
177 |         # a = [x[0] for x in cc.GetSubstructMatches(cls._rules_molvs.rules['neg_acid'])]
178 |         # Neutralize negative charges
179 |         # if q:
180 |         #     # Surplus negative charges more than non-neutralizable positive charges
181 |         #     neg_surplus = len(n) - len(q)
182 |         #     if a and neg_surplus > 0:
183 |         #         # zwitterion with more negative charges than quaternary positive centres
184 |         #         while neg_surplus > 0 and a:
185 |         #             # Add hydrogen to first negative acid atom, increase formal charge
186 |         #             # Until quaternary positive == negative total or no more negative acid
187 |         #             atom = cc.GetAtomWithIdx(a.pop(0))
188 |         #             atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1)
189 |         #             atom.SetFormalCharge(atom.GetFormalCharge() + 1)
190 |         #             neg_surplus -= 1
191 |         # Finish of neutralization of negative charges (we don't care for zwitterion)
192 |         for atom in [mol_out.GetAtomWithIdx(x) for x in n]:
193 |             while atom.GetFormalCharge() < 0:
194 |                 atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1)
195 |                 atom.SetFormalCharge(atom.GetFormalCharge() + 1)
196 |         # Neutralize positive charges
197 |         for atom in [mol_out.GetAtomWithIdx(x) for x in p]:
198 |             # Remove hydrogen and reduce formal charge until neutral or no more hydrogens
199 |             while atom.GetFormalCharge() > 0 and atom.GetTotalNumHs() > 0:
200 |                 atom.SetFormalCharge(atom.GetFormalCharge() - 1)
201 |                 if atom.GetNumExplicitHs() > 0:
202 |                     atom.SetNumExplicitHs(atom.GetNumExplicitHs() - 1)
203 |         return mol_out
204 | 
205 |     @classmethod
206 |     def neutralise_charge(cls, mol_in):
207 |         """Neutralise charges.
208 |         
209 |         :param   mol_in:  RDKit Mol
210 |         :return  mol_out: RDKit Mol
211 |         """
212 |         return cls._neutralise_charge_method1(mol_in)
213 |         # return cls._neutralise_charge_method2(mol_in)
214 | 
215 |     @classmethod
216 |     def add_hydrogen(cls, mol_in, addCoords=True):
217 |         """Explicit all hydrogens.
218 |         
219 |         :param    mol_in: RDKit Mol
220 |         :param addCoords: Add coordinate to added Hs, bool
221 |         :return  mol_out: RDKit Mol
222 |         """
223 |         return AddHs(mol_in, explicitOnly=False, addCoords=addCoords)
224 | 
225 |     @classmethod
226 |     def remove_hydrogen(cls, mol_in, addCoords=True):
227 |         """Implicit all hydrogens.
228 |         
229 |         :param    mol_in: RDKit Mol
230 |         :param addCoords: Add coordinate to added Hs, bool
231 |         :return  mol_out: RDKit Mol
232 |         """
233 |         return RemoveHs(mol_in, explicitOnly=False, addCoords=addCoords)
234 | 
235 |     @classmethod
236 |     def kekulize(cls, mol_in):
237 |         """Kekulize compound.
238 |         
239 |         :param   mol_in:  RDKit Mol
240 |         :return  mol_out: RDKit Mol
241 |         """
242 |         mol_out = deepcopy(mol_in)
243 |         Kekulize(mol_out, clearAromaticFlags=True)
244 |         return mol_out
245 | 
246 |     @classmethod
247 |     def remove_stereo(cls, mol_in):
248 |         """Wild stereo removal.
249 |         
250 |         Warning: need a back and forth Inchi export/import to normalise tautomer
251 |         
252 |         :param  mol_in:   RDKit mol
253 |         :return mol_out:  RDKit mol
254 |         """
255 |         mol_out = deepcopy(mol_in)
256 |         RemoveStereochemistry(mol_out)
257 |         return mol_out
258 | 


--------------------------------------------------------------------------------
/utilities/chemtools/Sequences.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Sequences of filters to be used for standardization."""
 3 | 
 4 | 
 5 | from utilities.chemtools.Filters import Filters
 6 | from rdkit.Chem import Cleanup, SanitizeMol, SanitizeFlags
 7 | from rdkit.Chem.AllChem import AssignStereochemistry
 8 | 
 9 | 
10 | def sequence_rr_legacy(mol):
11 |     """Sequence of filters applied for the first version of RetroRules
12 |     """
13 |     F = Filters()
14 |     Cleanup(mol)
15 |     SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
16 |     AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True)  # Fix bug TD201904.01
17 |     mol = F.remove_isotope(mol)
18 |     mol = F.neutralise_charge(mol)
19 |     SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
20 |     mol = F.keep_biggest(mol)
21 |     mol = F.add_hydrogen(mol, addCoords=True)
22 |     mol = F.kekulize(mol)
23 |     return mol
24 | 
25 | 
26 | def sequence_tunable(
27 |         mol,
28 |         OP_REMOVE_ISOTOPE=True, OP_NEUTRALISE_CHARGE=True,
29 |         OP_REMOVE_STEREO=False, OP_COMMUTE_INCHI=False,
30 |         OP_KEEP_BIGGEST=True, OP_ADD_HYDROGEN=True,
31 |         OP_KEKULIZE=True, OP_NEUTRALISE_CHARGE_LATE=True
32 |     ):
33 |     """Tunable sequence of filters for standardization.
34 |     
35 |     Operations will made in the following order:
36 |      1 RDKit Cleanup      -- always
37 |      2 RDKIT SanitizeMol  -- always
38 |      3 Remove isotope     -- optional (default: True)
39 |      4 Neutralise charges -- optional (default: True)
40 |      5 RDKit SanitizeMol  -- if 4 or 5
41 |      6 Remove stereo      -- optional (default: False)
42 |      7 Commute Inchi      -- if 6 or optional (default: False)
43 |      8 Keep biggest       -- optional (default: True)
44 |      9 RDKit SanitizeMol  -- if any (6, 7, 8)
45 |     10 Add hydrogens      -- optional (default: True)
46 |     11 Kekulize           -- optional (default: True)
47 |     """
48 |     F = Filters()
49 |     # Always perform the basics..
50 |     Cleanup(mol)
51 |     SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
52 |     AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True)  # Fix bug TD201904.01
53 |     # 
54 |     if OP_REMOVE_ISOTOPE:
55 |         mol = F.remove_isotope(mol)
56 |     if OP_NEUTRALISE_CHARGE:
57 |         mol = F.neutralise_charge(mol)
58 |     if any([OP_REMOVE_ISOTOPE, OP_REMOVE_ISOTOPE]):
59 |         SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
60 |     # 
61 |     if OP_REMOVE_STEREO:
62 |         mol = F.remove_stereo(mol)
63 |         OP_COMMUTE_INCHI = True
64 |     if OP_COMMUTE_INCHI:
65 |         mol = F.commute_inchi(mol)
66 |     if OP_KEEP_BIGGEST:
67 |         mol = F.keep_biggest(mol)
68 |     if any([OP_REMOVE_STEREO, OP_COMMUTE_INCHI, OP_KEEP_BIGGEST]):
69 |         SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
70 |     #
71 |     if OP_NEUTRALISE_CHARGE_LATE:
72 |         mol = F.neutralise_charge(mol)
73 |         SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
74 |     #
75 |     if OP_ADD_HYDROGEN:
76 |         mol = F.add_hydrogen(mol, addCoords=True)
77 |     if OP_KEKULIZE:
78 |         mol = F.kekulize(mol)
79 |     #
80 |     return mol
81 | 


--------------------------------------------------------------------------------
/utilities/chemtools/Standardizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Standardize chemicals
 4 | 
 5 | This is basically a rework of the standardizer.py written by Baudoin Delepine
 6 | at INRA.
 7 | 
 8 | @author: Baudoin Delepine, 2016-2017
 9 | @author: Thomas Duigou, 2018-2019
10 | """
11 | 
12 | from utilities.chemtools import Sequences
13 | from utilities.chemtools.Filters import Filters
14 | from rdkit.Chem import SanitizeMol, SanitizeFlags
15 | from rdkit.Chem.AllChem import AssignStereochemistry
16 | 
17 | class Standardizer(object):
18 |     """Handle standardization of compound(s) through user-defined "filters".
19 |     """
20 | 
21 |     def __call__(self, mol):
22 |             """Calling the Standardizer class like a function is the same
23 |             as calling its "compute" method.
24 |             
25 |             Form:
26 |                 https://github.com/mcs07/MolVS/blob/master/molvs/standardize.py
27 |             """
28 |             return self.compute(mol)
29 | 
30 |     def __init__(self, sequence_fun=None, params=None):
31 |         """Set up parameters for the standardization
32 |         
33 |         :param rdmol: an RDKit Mol object
34 |         """
35 |         # Function to be used for standardizing compounds
36 |         # Add you own function as method class
37 |         if sequence_fun is None:
38 |             self.sequence_fun = self.sequence_minimal
39 |         elif callable(sequence_fun):  # Guess: fun_filters is the function itself
40 |             self.sequence_fun = sequence_fun
41 |         elif type(sequence_fun) == str:
42 |             self.sequence_fun = getattr(Sequences, sequence_fun)  # Guess: sequence_fun is the name of the function
43 |         # Arguments to be passed to any custom standardization function
44 |         self._params = params if params else None
45 | 
46 |     def sequence_minimal(self, mol):
47 |         """Minimal standardization."""
48 |         SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False)
49 |         AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True)  # Fix bug TD201904.01
50 |         return mol
51 |         
52 |     def compute(self, mol):
53 |         """Do the job."""
54 |         if self._params is None:
55 |             return self.sequence_fun(mol)
56 |         else:
57 |             return self.sequence_fun(mol, **self._params)
58 | 


--------------------------------------------------------------------------------
/utilities/chemtools/Utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Starting a new toolbox to handle chemical compounds
 4 | """
 5 | 
 6 | from rdkit.Chem import MolFromSmiles, MolFromInchi, MolToSmiles, MolToInchi, MolToInchiKey, AddHs
 7 | 
 8 | 
 9 | def convert_depiction(idepic, itype='smiles', otype={'inchikey'}):
10 |     """Convert chemical depiction to others type of depictions
11 |     
12 |     :param  idepic: string depiction to be converted, str
13 |     :param   itype: type of depiction provided as input, str
14 |     :param   otype: types of depiction to be generated, {"", "", ..}
15 |     :return odepic: generated depictions, {"otype1": "odepic1", ..}
16 |     
17 |     Usage example:
18 |     - convert_depiction(idepic='CCO', otype={'inchi', 'smiles', 'inchikey'})
19 |     - convert_depiction(idepic='InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3', itype='inchi', otype={'inchi', 'smiles', 'inchikey'})
20 |     """
21 |     # Import (if needed)
22 |     if itype == 'smiles':
23 |         rdmol = MolFromSmiles(idepic, sanitize=True)
24 |     elif itype == 'inchi':
25 |         rdmol = MolFromInchi(idepic, sanitize=True)
26 |     else:
27 |         raise NotImplementedError('"{}" is not a valid input type'.format(itype))
28 |     if rdmol is None:  # Check imprt
29 |         raise Exception('Import error from depiction "{}" of type "{}"'.format(idepic, itype))
30 |     
31 |     # Export
32 |     odepic = dict()
33 |     for item in otype:
34 |         if item == 'smiles':
35 |             odepic[item] = MolToSmiles(rdmol)  # MolToSmiles is tricky, one mays want to check the possible options..
36 |         elif item == 'inchi':
37 |             odepic[item] = MolToInchi(rdmol)
38 |         elif item == 'inchikey':
39 |             odepic[item] = MolToInchiKey(rdmol)
40 |         else:
41 |             raise NotImplementedError('"{}" is not a valid output type'.format(otype))
42 | 
43 |     return odepic
44 | 


--------------------------------------------------------------------------------
/utilities/reactor/Core.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Core code for firing rules
 3 | """
 4 | 
 5 | 
 6 | class RuleMatchError(Exception):
 7 |     """Raised when something went wrong when matching a rule."""
 8 | 
 9 |     def __init__(self, msg):
10 |         self._msg = msg
11 | 
12 |     def __str__(self):
13 |         return "RULE-MATCH-ERROR: {}".format(self._msg)
14 | 
15 | 
16 | class RuleFireError(Exception):
17 |     """Raised when something went wrong when firing a rule."""
18 | 
19 |     def __init__(self, msg):
20 |         self._msg = msg
21 | 
22 |     def __str__(self):
23 |         return "RULE-FIRE-ERROR: {}".format(self._msg)
24 | 
25 | 
26 | class RuleBurnerCore(object):
27 |     """Apply one rule on one chemical.""" 
28 | 
29 |     def __init__(self, rd_rule, rd_mol):
30 |         """Apply one rule on one chemical.
31 |         
32 |         Notice: no standardization is made on inputed chemicals and rules.
33 |         
34 |         :param  rd_rule:    RDKit reaction object, reactio rule to apply
35 |         :param  rd_mol:     RDKit mol object, chemical
36 |         :param  timeout:    str, Reaction rule SMARTS
37 |         """
38 |         # Internal settings
39 |         USE_CHIRALITY_IN_MATCH = False  # Default value anyway substrucre matching
40 |         # Input
41 |         self._rd_rule = rd_rule
42 |         self._rd_mol = rd_mol
43 |     
44 |     def match(self):
45 |         """Check if left reaction side match the chemical.
46 |         
47 |         returns:    bool, True if there is a match, else False
48 |         """
49 |         try:
50 |             for reactant in self._rd_rule.GetReactants():
51 |                 if self._rd_mol.HasSubstructMatch(reactant, ):
52 |                     return True
53 |             return False
54 |         except Exception as e:
55 |             raise RuleMatchError(e) from e
56 |         
57 |     def fire(self):
58 |         """Fire the rule on the chemical.
59 |         
60 |         returns:    tuple of tuple, list of results for each possible application.
61 |         """
62 |         try:
63 |             return self._rd_rule.RunReactants((self._rd_mol,))
64 |         except Exception as e:
65 |             raise RuleFireError(e) from e
66 | 


--------------------------------------------------------------------------------
/utilities/reactor/Utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Set of methods to handle reaction I/Os
  3 | """
  4 | 
  5 | 
  6 | import copy
  7 | import rdkit
  8 | import logging
  9 | 
 10 | from rdkit import Chem
 11 | from rdkit.Chem import MolToInchiKey
 12 | from rdkit import RDLogger
 13 | from utilities.chemtools.Standardizer import Standardizer
 14 | 
 15 | 
 16 | RD_LOGGER = RDLogger.logger()
 17 | RD_LOGGER.setLevel(RDLogger.CRITICAL)  # Silent most of RDKit complains
 18 | 
 19 | 
 20 | class ChemConversionError(Exception):
 21 |     """Raised when something went wrong during chemical conversion to RDKit mol object."""
 22 | 
 23 |     def __init__(self, msg):
 24 |         self._msg = msg
 25 | 
 26 |     def __str__(self):
 27 |         return "CHEM-CONVERSION-ERROR: {}".format(self._msg)
 28 | 
 29 | 
 30 | def wild_stereo_removal(rdmol):
 31 |     """Wild stereo removal using back and forth Inchi depiction.
 32 | 
 33 |     :param      rdmol:      RDKit mol
 34 |     :returns    rdmol_new:  newly generated RDKit mol
 35 |     """
 36 |     tmp_rdmol = copy.deepcopy(rdmol)
 37 |     Chem.RemoveStereochemistry(tmp_rdmol)
 38 |     return Chem.MolFromInchi(Chem.MolToInchi(tmp_rdmol))
 39 | 
 40 | 
 41 | def standardize_chemical_archive(rdmol, add_hs=True, rm_stereo=True):
 42 |     """Standardize a chemical using RDKit sanitize method.
 43 | 
 44 |     :param      rdmol:      RDKit mol object
 45 |     :param      add_hs:     append Hs, bool (default: True)
 46 |     :param      rm_stereo:  remove stereo, bool (default: True)
 47 |     :returns    rdmol:      RDKit mol object
 48 |     """
 49 |     try:
 50 |         Chem.SanitizeMol(rdmol)
 51 |         if rm_stereo:  # Important: do this before adding Hs (else re-add Hs)
 52 |             rdmol = wild_stereo_removal(rdmol)
 53 |         if add_hs:
 54 |             rdmol = Chem.AddHs(rdmol)
 55 |         else:
 56 |             rdmol = Chem.RemoveHs(rdmol)
 57 |         return rdmol
 58 |     except Exception as e:
 59 |         logging.warning(e)
 60 |         raise e
 61 | 
 62 | 
 63 | def standardize_chemical(rdmol, add_hs=True, rm_stereo=True, heavy=False):
 64 |     """Standardize a chemical using RDKit sanitize method.
 65 | 
 66 |     :param      rdmol:      RDKit mol object
 67 |     :param      add_hs:     append Hs, bool (default: True)
 68 |     :param      rm_stereo:  remove stereo, bool (default: True)
 69 |     :param      heavy:      perform custom in depth standardization (default: False)
 70 |     :returns    rdmol:      RDKit mol object
 71 |     """
 72 |     # if not rm_stereo:
 73 |     #     logging.warning("Stereo not handled at the time being.")
 74 |     #     raise ChemConversionError("Stereo not handled at the time being.")
 75 |     simple_standardisation = {
 76 |         'OP_REMOVE_ISOTOPE': False,
 77 |         'OP_NEUTRALISE_CHARGE': False,
 78 |         'OP_REMOVE_STEREO': rm_stereo,
 79 |         'OP_COMMUTE_INCHI': True,
 80 |         'OP_KEEP_BIGGEST': False,
 81 |         'OP_ADD_HYDROGEN': add_hs,
 82 |         'OP_KEKULIZE': False,
 83 |         'OP_NEUTRALISE_CHARGE_LATE': True
 84 |     }
 85 |     heavy_standardisation = {
 86 |         'OP_REMOVE_ISOTOPE': True,
 87 |         'OP_NEUTRALISE_CHARGE': True,
 88 |         'OP_REMOVE_STEREO': rm_stereo,
 89 |         'OP_COMMUTE_INCHI': True,
 90 |         'OP_KEEP_BIGGEST': True,
 91 |         'OP_ADD_HYDROGEN': add_hs,
 92 |         'OP_KEKULIZE': False,
 93 |         'OP_NEUTRALISE_CHARGE_LATE': True
 94 |     }
 95 | 
 96 |     try:
 97 |         if heavy:
 98 |             rdmol = Standardizer(sequence_fun='sequence_tunable', params=heavy_standardisation).compute(rdmol)
 99 |             logging.debug("Performing heavy standardisation for compound {}".format(MolToInchiKey(rdmol)))
100 |         else:
101 |             rdmol = Standardizer(sequence_fun='sequence_tunable', params=simple_standardisation).compute(rdmol)
102 |         return rdmol
103 |     except Exception as e:
104 |         logging.warning(e)
105 |         raise e
106 | 
107 | 
108 | def standardize_results(tuple_tuple_rdmol, add_hs=True, rm_stereo=True):
109 |     """Perform sanitization and remove duplicates from reaction rule results.
110 | 
111 |     :param      tuple_tuple_rdmol:      tuple of tuple of RDKit Mol
112 |     :param      add_hs:                 append Hs, bool (default: True)
113 |     :param      rm_stereo:              remove stereo, bool (default: True)
114 |     :returns    list_list_std:          list of list of standardized RDKit Mol
115 |     :returns    list_idx_tuple_failed:  list of index of tuples that failed the standardization
116 |     """
117 |     uniq_depics = set()
118 |     list_list_std = list()
119 |     list_idx_tuple_failed = list()
120 | 
121 |     for idx_tuple, tuple_rdmol in enumerate(tuple_tuple_rdmol):
122 |         try:
123 |             list_std = list()
124 |             list_inchikeys = list()
125 |             # Standardize
126 |             for rdmol in tuple_rdmol:
127 |                 for rd_frag in Chem.GetMolFrags(rdmol, asMols=True, sanitizeFrags=False):
128 |                     list_std.append(standardize_chemical(rd_frag, add_hs=add_hs, rm_stereo=rm_stereo))
129 |             # Get Inchikeys
130 |             for rdmol in list_std:
131 |                 inchikey = Chem.MolToInchiKey(rdmol)
132 |                 if inchikey:
133 |                     list_inchikeys.append(inchikey)
134 |                 else:
135 |                     msg = 'Product conversion to InChIKey raised an empty string'
136 |                     logging.warning(ChemConversionError(msg))
137 |                     raise ChemConversionError(msg)
138 |             # Get unique depiction
139 |             depic = '.'.join(sorted(list_inchikeys))
140 |             # Stoer only if unique depiction never met
141 |             if depic not in uniq_depics:
142 |                 uniq_depics.add(depic)
143 |                 list_list_std.append(list_std)
144 |         except ChemConversionError as e:
145 |             logging.warning("{}".format(e))
146 |             list_idx_tuple_failed.append(idx_tuple)
147 |             raise e
148 |         except Exception as e:
149 |             logging.warning("Cannot handle a tuple of result, skipped")
150 |             logging.warning("{}".format(e))
151 |             list_idx_tuple_failed.append(idx_tuple)
152 | 
153 |     return list_list_std, list_idx_tuple_failed
154 | 
155 | 
156 | def handle_results(list_list_rdmol):
157 |     """Generate InchiKey, Inchi and SMILES from results.
158 | 
159 |     :param      list_list_rdmol:        list of list of RDKit Mol
160 |     :returns    list_list_inchikeys:    list of list of InchiKeys
161 |     :returns    list_list_inchis:       list of list of Inchis
162 |     :returns    list_list_smiles:       list of list of SMILES
163 |     """
164 |     list_list_inchikeys = list()
165 |     list_list_inchis = list()
166 |     list_list_smiles = list()
167 | 
168 |     for list_rdmol in list_list_rdmol:
169 |         try:
170 |             list_inchikeys = list()
171 |             list_inchis = list()
172 |             list_smiles = list()
173 |             list_std = list()
174 |             for rdmol in list_rdmol:
175 |                 # Get & check depictions
176 |                 inchikey = Chem.MolToInchiKey(rdmol)  # DEBUG: this part could be optimized
177 |                 inchi = Chem.MolToInchi(rdmol)
178 |                 smiles = Chem.MolToSmiles(rdmol)
179 |                 if not all([inchikey, inchi, smiles]):
180 |                     raise ChemConversionError("Chemical conversion error")
181 |                 # Store if we reach there
182 |                 list_inchikeys.append(inchikey)
183 |                 list_inchis.append(inchi)
184 |                 list_smiles.append(smiles)
185 |             # Store if we reach the end
186 |             list_list_inchikeys.append(list_inchikeys)
187 |             list_list_inchis.append(list_inchis)
188 |             list_list_smiles.append(list_smiles)
189 |         except ChemConversionError as e:
190 |             logging.warning("{}".format(e))
191 |             raise e
192 |         except Exception as e:
193 |             logging.warning("Cannot handle a tuple of result, skipped")
194 |             logging.warning("{}".format(e))
195 |     return list_list_inchikeys, list_list_inchis, list_list_smiles  # Quick but dirty
196 | 


--------------------------------------------------------------------------------