├── .github └── workflows │ └── linux_tests.yml ├── .gitignore ├── Dockerfiles └── nersc_Dockerfile ├── HiPRGen ├── bucketing.py ├── constants.py ├── initial_state.py ├── logging.py ├── mc_analysis.py ├── mol_entry.py ├── network_loader.py ├── network_renderer.py ├── reaction_filter.py ├── reaction_filter_payloads.py ├── reaction_questions.py ├── report_generator.py ├── species_filter.py └── species_questions.py ├── LICENSE ├── README.md ├── data ├── flicho_test.json ├── ronald_LIBE.json └── sam_G2.json ├── default.nix ├── figures ├── HiPRGen_schematic.svg ├── reaction_decision_tree.svg └── species_decision_tree.svg ├── flake.lock ├── flake.nix ├── logo.png ├── logo.svg ├── logo_dark.png ├── repl.py ├── run_network_generation.py ├── setup.py ├── shell.nix ├── test.py └── xyz_files ├── EC.xyz ├── EMC.xyz ├── LEDC.xyz ├── Li.xyz ├── bh4.xyz ├── c2h4.xyz ├── c2h6.xyz ├── co.xyz ├── fec.xyz ├── h.xyz ├── h2.xyz ├── h2o.xyz ├── lemc.xyz ├── li2co3_0.xyz ├── lico3-.xyz ├── mg_tfsi.xyz ├── mgg2.xyz ├── mgthf.xyz ├── n2.xyz ├── no.xyz └── oh.xyz /.github/workflows/linux_tests.yml: -------------------------------------------------------------------------------- 1 | name: "linux tests" 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - main 7 | jobs: 8 | tests: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2.4.0 12 | - uses: cachix/install-nix-action@v16 13 | with: 14 | nix_path: nixpkgs=channel:nixos-21.05 15 | - run: nix flake check 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .ipynb_checkpoints/ 3 | scratch -------------------------------------------------------------------------------- /Dockerfiles/nersc_Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sleak75/conda-mpi4py-haswell:latest 2 | SHELL ["/bin/bash", "-c"] 3 | WORKDIR /app 4 | 5 | RUN conda install -c conda-forge pymatgen=2022.0.10 openbabel pygraphviz 6 | 7 | # do this to reduce image size: 8 | RUN conda clean -a 9 | 10 | RUN /sbin/ldconfig -------------------------------------------------------------------------------- /HiPRGen/bucketing.py: -------------------------------------------------------------------------------- 1 | from HiPRGen.mol_entry import MoleculeEntry 2 | from itertools import combinations_with_replacement 3 | import sqlite3 4 | 5 | """ 6 | Phase 2: bucketing pairs of species input: filtered list of species 7 | with fixed indices output: buckets labeled by atom count containing 8 | individual species and pairs of species description: since each 9 | reaction conserves atom numbers, a concerted reaction only occurs 10 | between elements in a single bucket. There are tricks to reduce the 11 | number of pairs (like don't include (A,B) and (B,A)). If the number of 12 | species is 10,000, there are only 100 million such pairs which is 13 | within reach 14 | """ 15 | 16 | 17 | 18 | def bucket( 19 | mol_entries, 20 | bucket_db, 21 | commit_freq=2000, 22 | group_size=1000): 23 | 24 | con = sqlite3.connect(bucket_db) 25 | cur = con.cursor() 26 | cur.execute( 27 | "CREATE TABLE complexes (species_1, species_2, composition_id, group_id)") 28 | 29 | # we create an index on (composition, group_id) so worker processes 30 | # during reaction filtering can read their work batch faster 31 | 32 | cur.execute( 33 | "CREATE INDEX composition_index ON complexes (composition_id, group_id)") 34 | 35 | group_counts = {} 36 | bucket_counts = {} 37 | composition_ids = {} 38 | commit_count = 0 39 | composition_count = 0 40 | 41 | for m in mol_entries: 42 | composition = '_'.join(sorted(m.species)) 43 | 44 | if composition not in group_counts: 45 | group_counts[composition] = 0 46 | bucket_counts[composition] = 0 47 | composition_ids[composition] = composition_count 48 | composition_count += 1 49 | 50 | data = (m.ind, -1, composition_ids[composition], group_counts[composition]) 51 | cur.execute("INSERT INTO complexes VALUES (?, ?, ?, ?)", data) 52 | 53 | commit_count += 1 54 | if commit_count % commit_freq == 0: 55 | con.commit() 56 | 57 | bucket_counts[composition] += 1 58 | if bucket_counts[composition] % group_size == 0: 59 | group_counts[composition] += 1 60 | 61 | 62 | for (m1, m2) in combinations_with_replacement(mol_entries, 2): 63 | composition = '_'.join(sorted(m1.species + m2.species)) 64 | 65 | if composition not in group_counts: 66 | group_counts[composition] = 0 67 | bucket_counts[composition] = 0 68 | composition_ids[composition] = composition_count 69 | composition_count += 1 70 | 71 | 72 | data = ( 73 | m1.ind, 74 | m2.ind, 75 | composition_ids[composition], 76 | group_counts[composition]) 77 | 78 | cur.execute("INSERT INTO complexes VALUES (?, ?, ?, ?)", data) 79 | 80 | commit_count += 1 81 | if commit_count % commit_freq == 0: 82 | con.commit() 83 | 84 | bucket_counts[composition] += 1 85 | if bucket_counts[composition] % group_size == 0: 86 | group_counts[composition] += 1 87 | 88 | 89 | con.execute("CREATE TABLE group_counts (composition_id, count)") 90 | con.execute("CREATE TABLE compositions (composition_id, composition)") 91 | for composition in composition_ids: 92 | cur.execute( 93 | "INSERT INTO group_counts VALUES (?, ?)", 94 | (composition_ids[composition], 95 | group_counts[composition] + 1)) 96 | 97 | cur.execute( 98 | "INSERT INTO compositions VALUES (?,?)", 99 | ((composition_ids[composition], 100 | composition))) 101 | 102 | 103 | 104 | commit_count += 1 105 | if commit_count % commit_freq == 0: 106 | con.commit() 107 | 108 | 109 | 110 | 111 | con.commit() 112 | con.close() 113 | 114 | 115 | -------------------------------------------------------------------------------- /HiPRGen/constants.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Copyright (c) MR.Net development team 3 | 4 | 5 | from enum import Enum 6 | from monty.json import MSONable 7 | 8 | # Basic constants 9 | 10 | # Room temperature (25 C) in Kelvin 11 | ROOM_TEMP = 298.15 12 | 13 | # Boltzmann constant in eV / K 14 | KB = 8.617333262 * 10 ** -5 15 | 16 | # Planck constant in eV * s 17 | PLANCK = 4.135667696 * 10 ** -15 18 | 19 | class Terminal(MSONable, Enum): 20 | KEEP = 1 21 | DISCARD = -1 22 | 23 | metals = frozenset(["Li", "Na", "K", "Mg", "Ca", "Zn", "Al"]) 24 | m_formulas = frozenset([m + "1" for m in metals]) 25 | 26 | 27 | # solvation environments 28 | li_ec = { 29 | "solvation_correction" : { 30 | "Li_1" : -0.68 31 | }, 32 | 33 | "coordination_radius" : { 34 | "Li_1" : 2.4 35 | }, 36 | 37 | "max_number_of_coordination_bonds" : { 38 | "Li_1" : 4 39 | } 40 | } 41 | 42 | 43 | mg_g2 = { 44 | "solvation_correction" : { 45 | "Mg_1": -0.56, 46 | "Mg_2": -1.49 47 | }, 48 | 49 | "coordination_radius" : { 50 | "Mg_1": 2.4, 51 | "Mg_2": 2.4 52 | }, 53 | 54 | "max_number_of_coordination_bonds" : { 55 | "Mg_1": 5, 56 | "Mg_2": 6 57 | } 58 | } 59 | 60 | 61 | mg_thf = { 62 | "solvation_correction" : { 63 | "Mg_1": -0.70, 64 | "Mg_2": -1.91 65 | }, 66 | 67 | "coordination_radius" : { 68 | "Mg_1": 2.4, 69 | "Mg_2": 2.4 70 | }, 71 | 72 | "max_number_of_coordination_bonds" : { 73 | "Mg_1": 5, 74 | "Mg_2": 6 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /HiPRGen/initial_state.py: -------------------------------------------------------------------------------- 1 | from pymatgen.core.structure import Molecule 2 | from pymatgen.analysis.graphs import MoleculeGraph 3 | from pymatgen.analysis.local_env import OpenBabelNN 4 | from pymatgen.analysis.fragmenter import metal_edge_extender 5 | import sqlite3 6 | 7 | 8 | def find_mol_entry_from_xyz_and_charge(mol_entries, xyz_file_path, charge): 9 | """ 10 | given a file 'molecule.xyz', find the mol_entry corresponding to the 11 | molecule graph with given charge 12 | """ 13 | target_mol_graph = MoleculeGraph.with_local_env_strategy( 14 | Molecule.from_file(xyz_file_path), OpenBabelNN() 15 | ) 16 | 17 | # correction to the molecule graph 18 | target_mol_graph = metal_edge_extender(target_mol_graph) 19 | 20 | match = False 21 | index = -1 22 | while not match: 23 | index += 1 24 | mol_entry = mol_entries[index] 25 | species_mol_graph = mol_entry.mol_graph 26 | 27 | if mol_entry.charge == charge: 28 | match = target_mol_graph.isomorphic_to(species_mol_graph) 29 | 30 | if match: 31 | return mol_entry.ind 32 | else: 33 | return None 34 | 35 | def find_mol_entry_by_entry_id(mol_entries, entry_id): 36 | """ 37 | given an entry_id, return the corresponding mol enentry index 38 | """ 39 | 40 | for m in mol_entries: 41 | if m.entry_id == entry_id: 42 | return m.ind 43 | 44 | create_initial_state_table = """ 45 | CREATE TABLE initial_state ( 46 | species_id INTEGER NOT NULL PRIMARY KEY, 47 | count INTEGER NOT NULL 48 | ); 49 | """ 50 | 51 | create_trajectories_table = """ 52 | CREATE TABLE trajectories ( 53 | seed INTEGER NOT NULL, 54 | step INTEGER NOT NULL, 55 | reaction_id INTEGER NOT NULL, 56 | time REAL NOT NULL 57 | ); 58 | """ 59 | 60 | create_factors_table = """ 61 | CREATE TABLE factors ( 62 | factor_zero REAL NOT NULL, 63 | factor_two REAL NOT NULL, 64 | factor_duplicate REAL NOT NULL 65 | ); 66 | """ 67 | 68 | 69 | def insert_initial_state( 70 | initial_state, 71 | mol_entries, 72 | initial_state_db, 73 | factor_zero = 1.0, 74 | factor_two = 1.0, 75 | factor_duplicate = 0.5 76 | ): 77 | """ 78 | initial state is a dict mapping species ids to counts. 79 | """ 80 | 81 | rn_con = sqlite3.connect(initial_state_db) 82 | rn_cur = rn_con.cursor() 83 | rn_cur.execute(create_initial_state_table) 84 | rn_cur.execute(create_trajectories_table) 85 | rn_cur.execute(create_factors_table) 86 | rn_con.commit() 87 | 88 | rn_cur.execute( 89 | "INSERT INTO factors VALUES (?,?,?)", 90 | (factor_zero, factor_two, factor_duplicate)) 91 | 92 | num_species = len(mol_entries) 93 | 94 | 95 | for i in range(num_species): 96 | rn_cur.execute( 97 | "INSERT INTO initial_state VALUES (?,?)", 98 | (i, initial_state.get(i,0))) 99 | 100 | rn_con.commit() 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /HiPRGen/logging.py: -------------------------------------------------------------------------------- 1 | from time import localtime, strftime 2 | 3 | def log_message(*args, **kwargs): 4 | print( 5 | '[' + strftime('%H:%M:%S', localtime()) + ']', 6 | *args, **kwargs) 7 | -------------------------------------------------------------------------------- /HiPRGen/mol_entry.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import Any, Dict, List, Optional, Tuple 3 | 4 | import networkx as nx 5 | import numpy as np 6 | from pymatgen.analysis.graphs import MoleculeGraph, MolGraphSplitError 7 | from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender 8 | from pymatgen.core.structure import Molecule 9 | from networkx.algorithms.graph_hashing import weisfeiler_lehman_graph_hash 10 | from HiPRGen.constants import ROOM_TEMP, metals 11 | from itertools import permutations, product 12 | 13 | 14 | class FragmentComplex: 15 | 16 | def __init__( 17 | self, 18 | number_of_fragments, 19 | number_of_bonds_broken, 20 | bonds_broken, 21 | fragment_hashes): 22 | 23 | self.number_of_fragments = number_of_fragments 24 | self.number_of_bonds_broken = number_of_bonds_broken 25 | self.bonds_broken = bonds_broken 26 | self.fragment_hashes = fragment_hashes 27 | 28 | 29 | 30 | class MoleculeEntry: 31 | """ 32 | A molecule entry class to provide easy access to Molecule properties. 33 | 34 | Args: 35 | molecule: Molecule of interest. 36 | energy: Electronic energy of the molecule in Hartree. 37 | enthalpy: Enthalpy of the molecule (kcal/mol). Defaults to None. 38 | entropy: Entropy of the molecule (cal/mol.K). Defaults to None. 39 | entry_id: An optional id to uniquely identify the entry. 40 | mol_graph: MoleculeGraph of the molecule. 41 | """ 42 | 43 | def __init__( 44 | self, 45 | molecule, 46 | energy, 47 | enthalpy, 48 | entropy, 49 | entry_id, 50 | mol_graph, 51 | partial_charges_resp, 52 | partial_charges_mulliken, 53 | partial_charges_nbo, 54 | electron_affinity, 55 | ionization_energy, 56 | spin_multiplicity, 57 | partial_spins_nbo 58 | ): 59 | self.energy = energy 60 | self.enthalpy = enthalpy 61 | self.entropy = entropy 62 | self.electron_affinity = electron_affinity 63 | self.ionization_energy = ionization_energy 64 | self.spin_multiplicity = spin_multiplicity 65 | 66 | self.ind = None 67 | self.entry_id = entry_id 68 | 69 | self.star_hashes = {} 70 | self.fragment_data = [] 71 | 72 | 73 | if not mol_graph: 74 | mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN()) 75 | self.mol_graph = metal_edge_extender(mol_graph) 76 | else: 77 | self.mol_graph = mol_graph 78 | 79 | self.partial_charges_resp = partial_charges_resp 80 | self.partial_charges_mulliken = partial_charges_mulliken 81 | self.partial_charges_nbo = partial_charges_nbo 82 | self.partial_spins_nbo = partial_spins_nbo 83 | 84 | self.molecule = self.mol_graph.molecule 85 | self.graph = self.mol_graph.graph.to_undirected() 86 | self.species = [str(s) for s in self.molecule.species] 87 | 88 | self.m_inds = [ 89 | i for i, x in enumerate(self.species) if x in metals 90 | ] 91 | 92 | # penalty gets used in the non local part of species filtering. 93 | # certain species filters will increase penalty rather than explicitly filtering 94 | # out a molecule. The non local filtering step prioritizes mols with a lower 95 | # penalty. 96 | self.penalty = 0 97 | self.covalent_graph = copy.deepcopy(self.graph) 98 | self.covalent_graph.remove_nodes_from(self.m_inds) 99 | 100 | 101 | self.formula = self.molecule.composition.alphabetical_formula 102 | self.charge = self.molecule.charge 103 | self.num_atoms = len(self.molecule) 104 | 105 | self.atom_locations = [ 106 | site.coords for site in self.molecule] 107 | 108 | 109 | self.free_energy = self.get_free_energy() 110 | 111 | self.non_metal_atoms = [ 112 | i for i in range(self.num_atoms) 113 | if self.species[i] not in metals] 114 | 115 | 116 | 117 | 118 | @classmethod 119 | def from_dataset_entry( 120 | cls, 121 | doc: Dict, 122 | use_thermo: str = "raw", 123 | ): 124 | """ 125 | Initialize a MoleculeEntry from a document in the LIBE (Lithium-Ion 126 | Battery Electrolyte) or MADEIRA (MAgnesium Dataset of Electrolyte and 127 | Interphase ReAgents) datasets. 128 | 129 | Args: 130 | doc: Dictionary representing an entry from LIBE or MADEIRA 131 | use_thermo: One of "raw" (meaning raw, uncorrected thermo data will 132 | be used), "rrho_shifted" (meaning that a slightly modified 133 | Rigid-Rotor Harmonic Oscillator approximation will be used - 134 | see Ribiero et al., J. Phys. Chem. B 2011, 115, 14556-14562), or 135 | "qrrho" (meaning that Grimme's Quasi-Rigid Rotor Harmonic 136 | Oscillator - see Grimme, Chem. Eur. J. 2012, 18, 9955-9964) will 137 | be used. 138 | """ 139 | 140 | thermo = use_thermo.lower() 141 | 142 | if thermo not in ["raw", "rrho_shifted", "qrrho"]: 143 | raise ValueError( 144 | "Only allowed values for use_thermo are 'raw', 'rrho_shifted', " 145 | "and 'qrrho'!" 146 | ) 147 | try: 148 | if isinstance(doc["molecule"], Molecule): 149 | molecule = doc["molecule"] 150 | else: 151 | molecule = Molecule.from_dict(doc["molecule"]) # type: ignore 152 | 153 | if ( 154 | thermo == "rrho_shifted" 155 | and doc["thermo"]["shifted_rrho_eV"] is not None 156 | ): 157 | energy = ( 158 | doc["thermo"]["shifted_rrho_eV"]["electronic_energy"] * 0.0367493 159 | ) 160 | enthalpy = doc["thermo"]["shifted_rrho_eV"]["total_enthalpy"] * 23.061 161 | entropy = doc["thermo"]["shifted_rrho_eV"]["total_entropy"] * 23061 162 | elif thermo == "qrrho" and doc["thermo"]["quasi_rrho_eV"] is not None: 163 | energy = doc["thermo"]["quasi_rrho_eV"]["electronic_energy"] * 0.0367493 164 | enthalpy = doc["thermo"]["quasi_rrho_eV"]["total_enthalpy"] * 23.061 165 | entropy = doc["thermo"]["quasi_rrho_eV"]["total_entropy"] * 23061 166 | else: 167 | energy = doc["thermo"]["raw"]["electronic_energy_Ha"] 168 | enthalpy = doc["thermo"]["raw"]["total_enthalpy_kcal/mol"] 169 | entropy = doc["thermo"]["raw"]["total_entropy_cal/molK"] 170 | 171 | entry_id = doc["molecule_id"] 172 | 173 | if isinstance(doc["molecule_graph"], MoleculeGraph): 174 | mol_graph = doc["molecule_graph"] 175 | else: 176 | mol_graph = MoleculeGraph.from_dict(doc["molecule_graph"]) 177 | 178 | partial_charges_resp = doc['partial_charges']['resp'] 179 | partial_charges_mulliken = doc['partial_charges']['mulliken'] 180 | spin_multiplicity = doc['spin_multiplicity'] 181 | 182 | 183 | if doc['number_atoms'] == 1: 184 | partial_charges_nbo = doc['partial_charges']['mulliken'] 185 | partial_spins_nbo = doc['partial_spins']['mulliken'] 186 | else: 187 | partial_charges_nbo = doc['partial_charges']['nbo'] 188 | partial_spins_nbo = doc['partial_spins']['nbo'] 189 | 190 | electron_affinity_eV = None 191 | ionization_energy_eV = None 192 | if 'redox' in doc: 193 | if 'electron_affinity_eV' in doc['redox']: 194 | electron_affinity_eV = doc['redox']['electron_affinity_eV'] 195 | 196 | if 'ionization_energy_eV' in doc['redox']: 197 | ionization_energy_eV = doc['redox']['ionization_energy_eV'] 198 | 199 | except KeyError as e: 200 | raise Exception( 201 | "Unable to construct molecule entry from molecule document; missing " 202 | f"attribute {e} in `doc`." 203 | ) 204 | 205 | 206 | 207 | return cls( 208 | molecule=molecule, 209 | energy=energy, 210 | enthalpy=enthalpy, 211 | entropy=entropy, 212 | entry_id=entry_id, 213 | mol_graph=mol_graph, 214 | partial_charges_resp=partial_charges_resp, 215 | partial_charges_mulliken=partial_charges_mulliken, 216 | partial_charges_nbo=partial_charges_nbo, 217 | electron_affinity=electron_affinity_eV, 218 | ionization_energy=ionization_energy_eV, 219 | spin_multiplicity=spin_multiplicity, 220 | partial_spins_nbo=partial_spins_nbo 221 | ) 222 | 223 | 224 | 225 | def get_free_energy(self, temperature: float = ROOM_TEMP) -> Optional[float]: 226 | """ 227 | Get the free energy at the give temperature. 228 | """ 229 | if self.enthalpy is not None and self.entropy is not None: 230 | # TODO: fix these hard coded vals 231 | return ( 232 | self.energy * 27.21139 233 | + 0.0433641 * self.enthalpy 234 | - temperature * self.entropy * 0.0000433641 235 | ) 236 | else: 237 | return None 238 | 239 | def __repr__(self): 240 | 241 | output = [ 242 | f"MoleculeEntry {self.entry_id} - {self.formula}", 243 | f"Total charge = {self.charge}", 244 | ] 245 | 246 | energies = [ 247 | ("Energy", "Hartree", self.energy), 248 | ("Enthalpy", "kcal/mol", self.enthalpy), 249 | ("Entropy", "cal/mol.K", self.entropy), 250 | ("Free Energy (298.15 K)", "eV", self.get_free_energy()), 251 | ] 252 | for name, unit, value in energies: 253 | if value is None: 254 | output.append(f"{name} = {value} {unit}") 255 | else: 256 | output.append(f"{name} = {value:.4f} {unit}") 257 | 258 | if self.ind: 259 | output.append("index: {}".format(self.ind)) 260 | 261 | return "\n".join(output) 262 | 263 | def __str__(self): 264 | return self.__repr__() 265 | 266 | def __eq__(self, other): 267 | if type(self) == type(other): 268 | return str(self) == str(other) 269 | else: 270 | return False 271 | -------------------------------------------------------------------------------- /HiPRGen/network_loader.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import pickle 3 | import numpy as np 4 | 5 | """ 6 | class for dynamically loading a reaction network 7 | """ 8 | 9 | sql_get_reaction = """ 10 | SELECT * FROM reactions WHERE reaction_id = ?; 11 | """ 12 | 13 | sql_get_reaction_range = """ 14 | SELECT * FROM reactions WHERE ? <= reaction_id AND reaction_id < ?; 15 | """ 16 | 17 | sql_get_redox = """ 18 | SELECT * FROM reactions WHERE is_redox = 1; 19 | """ 20 | 21 | def sql_get_coord(metal_id): 22 | return "SELECT * FROM reactions WHERE (number_of_reactants=2 AND number_of_products=1 AND (reactant_1={0} OR reactant_2={0})) ORDER BY dG DESC;".format(metal_id) 23 | 24 | def sql_get_decoord(metal_id): 25 | return "SELECT * FROM reactions WHERE (number_of_reactants=1 AND number_of_products=2 AND (product_1={0} OR product_2={0})) ORDER BY dG DESC;".format(metal_id) 26 | 27 | 28 | sql_get_trajectory = """ 29 | SELECT * FROM trajectories; 30 | """ 31 | 32 | sql_get_initial_state = """ 33 | SELECT * FROM initial_state; 34 | """ 35 | 36 | 37 | 38 | class NetworkLoader: 39 | 40 | def __init__( 41 | self, 42 | network_database, 43 | mol_entries_pickle, 44 | initial_state_database=None 45 | ): 46 | 47 | 48 | self.rn_con = sqlite3.connect(network_database) 49 | 50 | with open(mol_entries_pickle, 'rb') as f: 51 | self.mol_entries = pickle.load(f) 52 | 53 | cur = self.rn_con.cursor() 54 | metadata = list(cur.execute("SELECT * FROM metadata"))[0] 55 | self.number_of_species = metadata[0] 56 | self.number_of_reactions = metadata[1] 57 | 58 | 59 | if initial_state_database: 60 | self.initial_state_con = sqlite3.connect(initial_state_database) 61 | 62 | self.reactions = {} 63 | 64 | def get_all_redox_reactions(self): 65 | redox_reactions = [] 66 | cur = self.rn_con.cursor() 67 | for res in cur.execute(sql_get_redox): 68 | reaction = {} 69 | reaction['number_of_reactants'] = res[1] 70 | reaction['number_of_products'] = res[2] 71 | reaction['reactants'] = res[3:5] 72 | reaction['products'] = res[5:7] 73 | reaction['rate'] = res[7] 74 | reaction['dG'] = res[8] 75 | reaction['dG_barrier'] = res[9] 76 | redox_reactions.append(reaction) 77 | 78 | return redox_reactions 79 | 80 | 81 | def get_all_coordination_reactions(self, metal_id): 82 | coordination_reactions = [] 83 | cur = self.rn_con.cursor() 84 | for res in cur.execute(sql_get_coord(metal_id)): 85 | reaction = {} 86 | reaction['number_of_reactants'] = res[1] 87 | reaction['number_of_products'] = res[2] 88 | reaction['reactants'] = res[3:5] 89 | reaction['products'] = res[5:7] 90 | reaction['rate'] = res[7] 91 | reaction['dG'] = res[8] 92 | reaction['dG_barrier'] = res[9] 93 | coordination_reactions.append(reaction) 94 | 95 | return coordination_reactions 96 | 97 | def get_all_decoordination_reactions(self, metal_id): 98 | decoordination_reactions = [] 99 | cur = self.rn_con.cursor() 100 | for res in cur.execute(sql_get_decoord(metal_id)): 101 | reaction = {} 102 | reaction['number_of_reactants'] = res[1] 103 | reaction['number_of_products'] = res[2] 104 | reaction['reactants'] = res[3:5] 105 | reaction['products'] = res[5:7] 106 | reaction['rate'] = res[7] 107 | reaction['dG'] = res[8] 108 | reaction['dG_barrier'] = res[9] 109 | decoordination_reactions.append(reaction) 110 | 111 | return decoordination_reactions 112 | 113 | 114 | def get_reactions_in_range(self, lower_bound, upper_bound): 115 | """ 116 | get range of reactions from database but don't cache them 117 | """ 118 | cur = self.rn_con.cursor() 119 | for res in cur.execute(sql_get_reaction_range, 120 | (lower_bound, upper_bound)): 121 | reaction = {} 122 | reaction['reaction_id'] = res[0] 123 | reaction['number_of_reactants'] = res[1] 124 | reaction['number_of_products'] = res[2] 125 | reaction['reactants'] = res[3:5] 126 | reaction['products'] = res[5:7] 127 | reaction['rate'] = res[7] 128 | reaction['dG'] = res[8] 129 | reaction['dG_barrier'] = res[9] 130 | yield reaction 131 | 132 | 133 | def index_to_reaction(self, reaction_index): 134 | 135 | """ 136 | this method gets called a lot, so we cache the reactions to 137 | minimize database interaction 138 | """ 139 | 140 | if reaction_index in self.reactions: 141 | return self.reactions[reaction_index] 142 | 143 | else: 144 | print("fetching data for reaction", reaction_index) 145 | cur = self.rn_con.cursor() 146 | res = list( 147 | cur.execute(sql_get_reaction, (reaction_index,)) 148 | )[0] 149 | reaction = {} 150 | reaction['number_of_reactants'] = res[1] 151 | reaction['number_of_products'] = res[2] 152 | reaction['reactants'] = res[3:5] 153 | reaction['products'] = res[5:7] 154 | reaction['rate'] = res[7] 155 | reaction['dG'] = res[8] 156 | reaction['dG_barrier'] = res[9] 157 | self.reactions[reaction_index] = reaction 158 | return reaction 159 | 160 | def load_trajectories(self): 161 | 162 | cur = self.initial_state_con.cursor() 163 | 164 | # trajectories[seed][step] = (reaction_id, time) 165 | trajectories = {} 166 | for row in cur.execute(sql_get_trajectory): 167 | seed = row[0] 168 | step = row[1] 169 | reaction_id = row[2] 170 | time = row[3] 171 | 172 | if seed not in trajectories: 173 | trajectories[seed] = {} 174 | 175 | trajectories[seed][step] = (reaction_id, time) 176 | 177 | self.trajectories = trajectories 178 | 179 | 180 | def load_initial_state(self): 181 | 182 | cur = self.initial_state_con.cursor() 183 | initial_state_dict = {} 184 | 185 | for row in cur.execute(sql_get_initial_state): 186 | initial_state_dict[row[0]] = row[1] 187 | 188 | initial_state_array = np.zeros( 189 | self.number_of_species, 190 | dtype=int 191 | ) 192 | 193 | for i in range(self.number_of_species): 194 | initial_state_array[i] = initial_state_dict[i] 195 | 196 | 197 | self.initial_state_dict = initial_state_dict 198 | self.initial_state_array = initial_state_array 199 | -------------------------------------------------------------------------------- /HiPRGen/network_renderer.py: -------------------------------------------------------------------------------- 1 | from HiPRGen.network_loader import NetworkLoader 2 | import cairo 3 | import math 4 | import random 5 | 6 | 7 | class QuadTreeNode: 8 | """ 9 | origin is at top left so to agree with 10 | the cairo canvas coordinates. 11 | 12 | Notice that this is a recursive initializer. It creates 13 | 1 + 4 + ... + 4^(depth) = O(4^(depth + 1)) QuadTreeNodes, 14 | so don't go too deep! 15 | """ 16 | def __init__(self, depth, x_min, x_max, y_min, y_max): 17 | 18 | self.x_min = x_min 19 | self.x_max = x_max 20 | self.y_min = y_min 21 | self.y_max = y_max 22 | 23 | # you either have quads or data 24 | # if you have quads, you are non terminal 25 | # if you have data you are terminal 26 | self.quads = None 27 | self.data = [] 28 | self.branch(depth) 29 | 30 | 31 | def branch(self, depth): 32 | """ 33 | break node into 4 nodes. 34 | """ 35 | 36 | if depth > 0: 37 | self.data = None 38 | self.quads = [ 39 | None, # top left 40 | None, # top right 41 | None, # bottom left 42 | None # bottom right 43 | ] 44 | 45 | self.x_mid = (self.x_min + self.x_max) / 2 46 | self.y_mid = (self.y_min + self.y_max) / 2 47 | 48 | # top left 49 | self.quads[0] = QuadTreeNode( 50 | depth - 1, 51 | self.x_min, 52 | self.x_mid, 53 | self.y_min, 54 | self.y_mid) 55 | 56 | # top right 57 | self.quads[1] = QuadTreeNode( 58 | depth - 1, 59 | self.x_mid, 60 | self.x_max, 61 | self.y_min, 62 | self.y_mid) 63 | 64 | # bottom left 65 | self.quads[2] = QuadTreeNode( 66 | depth - 1, 67 | self.x_min, 68 | self.x_mid, 69 | self.y_mid, 70 | self.y_max) 71 | 72 | # bottom right 73 | self.quads[3] = QuadTreeNode( 74 | depth - 1, 75 | self.x_mid, 76 | self.x_max, 77 | self.y_mid, 78 | self.y_max) 79 | 80 | def insert(self, x, y, val): 81 | node = self.find_node(x,y) 82 | node.data.append(val) 83 | return val 84 | 85 | def find_neighborhood(self,x,y): 86 | """ 87 | find all nodes adjacent to our point. 88 | doesn't return the node actually containing our point. 89 | """ 90 | node = self.find_node(x,y) 91 | x_diff = node.x_max - node.x_min 92 | y_diff = node.y_max - node.y_min 93 | maybe_adjacent_nodes = [ 94 | self.find_node(x + x_diff, y), 95 | self.find_node(x - x_diff, y), 96 | self.find_node(x, y + y_diff), 97 | self.find_node(x, y - y_diff), 98 | self.find_node(x + x_diff, y + y_diff), 99 | self.find_node(x - x_diff, y + y_diff), 100 | self.find_node(x + x_diff, y - y_diff), 101 | self.find_node(x - x_diff, y - y_diff) 102 | ] 103 | 104 | adjacent_nodes = [n for n in maybe_adjacent_nodes if n is not None] 105 | return adjacent_nodes 106 | 107 | def find_node(self, x, y): 108 | """ 109 | find the terminal node so that 110 | x_min <= x < x_max 111 | y_min <= y < y_max 112 | return None if there is no node. 113 | Note: this gives the wrong answer if called from a terminal node. 114 | """ 115 | if self.quads is not None: 116 | for quad in self.quads: 117 | if (quad.x_min <= x < quad.x_max and 118 | quad.y_min <= y < quad.y_max): 119 | return quad.find_node(x,y) 120 | 121 | return None 122 | 123 | else: 124 | return self 125 | 126 | def __str__(self): 127 | return ( 128 | "x : [" + str(self.x_min) + ", " + str(self.x_max) + ") " + 129 | "y : [" + str(self.y_min) + ", " + str(self.y_max) + ")" 130 | ) 131 | 132 | def __repr__(self): 133 | return self.__str__() 134 | 135 | 136 | class RepulsiveSampler: 137 | def __init__(self, 138 | rejection_radius, 139 | x_min, 140 | x_max, 141 | y_min, 142 | y_max, 143 | global_mask, # reject a sample if global mask returns false 144 | quad_tree_depth=7, 145 | seed=42, 146 | ): 147 | 148 | self.quad_tree = QuadTreeNode(quad_tree_depth, x_min, x_max, y_min, y_max) 149 | self.rejection_radius = rejection_radius 150 | self.internal_sampler = random.Random(seed) 151 | self.global_mask = global_mask 152 | 153 | def sample(self): 154 | while (True): 155 | 156 | x = self.internal_sampler.uniform( 157 | self.quad_tree.x_min, 158 | self.quad_tree.x_max) 159 | 160 | y = self.internal_sampler.uniform( 161 | self.quad_tree.y_min, 162 | self.quad_tree.y_max) 163 | 164 | if not self.global_mask(x,y): 165 | continue 166 | 167 | node = self.quad_tree.find_node(x,y) 168 | neighborhood = self.quad_tree.find_neighborhood(x,y) 169 | neighborhood.append(node) 170 | 171 | too_close = False 172 | for adjacent_node in neighborhood: 173 | for point in adjacent_node.data: 174 | if (point[0] - x)**2 + (point[1] - y)**2 < (self.rejection_radius **2): 175 | too_close = True 176 | break 177 | 178 | if too_close: 179 | break 180 | 181 | if (not too_close): 182 | result = (x,y) 183 | print(result) 184 | node.data.append(result) 185 | return result 186 | 187 | 188 | 189 | class Renderer: 190 | 191 | def __init__( 192 | self, 193 | width=1024, 194 | height=1024, 195 | rejection_radius=0.005, 196 | global_mask_radius=0.47, 197 | colors = [(x,x,x) for x in [0.3,0.4,0.5,0.6,0.7,0.8]] 198 | ): 199 | 200 | self.repulsive_sampler = RepulsiveSampler( 201 | rejection_radius, 202 | 0.0, 203 | 1.0, 204 | 0.0, 205 | 1.0, 206 | lambda x, y: ( 207 | True if (x - 0.5)**2 + (y - 0.5)**2 < global_mask_radius**2 208 | else False ) 209 | ) 210 | 211 | self.local_sampler = random.Random(42) 212 | self.node_dict = {} 213 | 214 | self.width = width 215 | self.global_mask_radius = global_mask_radius 216 | self.height = height 217 | self.colors = colors 218 | 219 | self.surface = cairo.ImageSurface(cairo.Format.ARGB32, width, height) 220 | self.context = cairo.Context(self.surface) 221 | self.context.scale(width, height) 222 | 223 | def new_node(self, tag, point=None): 224 | # if point is None, a node position will be generated 225 | # note: if you provide a point, it will go exactly where you say, which 226 | # may be very close to other points. If tag already used, do nothing. 227 | if tag not in self.node_dict: 228 | 229 | if point is not None: 230 | self.node_dict[tag] = ( 231 | self.repulsive_sampler.quad_tree.insert( 232 | point[0], 233 | point[1], 234 | point)) 235 | 236 | else: 237 | self.node_dict[tag] = self.repulsive_sampler.sample() 238 | 239 | 240 | def new_node_boundary(self, tag, angle): 241 | point = (0.5 + self.global_mask_radius * math.cos(angle), 242 | 0.5 + self.global_mask_radius * math.sin(angle)) 243 | 244 | self.new_node(tag, point=point) 245 | 246 | def draw_node(self, tag, color=(0,0,0), radius=0.0008): 247 | point = self.node_dict[tag] 248 | self.context.set_source_rgb(*color) 249 | self.context.arc(point[0], point[1], radius, 0, 2 * math.pi) 250 | self.context.fill() 251 | 252 | def draw_node_square(self, tag, color=(0,0,0), side=0.005): 253 | point = self.node_dict[tag] 254 | self.context.set_source_rgb(*color) 255 | self.context.rectangle(point[0] - side/2, point[1] - side/2, side, side) 256 | self.context.fill() 257 | 258 | def draw_edge(self, tag1, tag2, color=None, width=0.001): 259 | 260 | if color is None: 261 | color = self.local_sampler.choice(self.colors) 262 | 263 | 264 | 265 | point1 = self.node_dict[tag1] 266 | point2 = self.node_dict[tag2] 267 | self.context.set_source_rgb(*color) 268 | self.context.set_line_width(width) 269 | self.context.move_to(*point1) 270 | self.context.line_to(*point2) 271 | self.context.stroke() 272 | 273 | def render(self, path): 274 | self.surface.write_to_png(path) 275 | -------------------------------------------------------------------------------- /HiPRGen/reaction_filter.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | from itertools import permutations, product 3 | from HiPRGen.report_generator import ReportGenerator 4 | from time import time 5 | from HiPRGen.logging import log_message 6 | import sqlite3 7 | from enum import Enum 8 | from math import floor 9 | 10 | from HiPRGen.reaction_questions import ( 11 | run_decision_tree 12 | ) 13 | 14 | """ 15 | Phases 3 & 4 run in paralell using MPI 16 | 17 | Phase 3: reaction gen and filtering 18 | input: a bucket labeled by atom count 19 | output: a list of reactions from that bucket 20 | description: Loop through all possible reactions in the bucket and apply the decision tree. This will run in parallel over each bucket. 21 | 22 | Phase 4: collating and indexing 23 | input: all the outputs of phase 3 as they are generated 24 | output: reaction network database 25 | description: the worker processes from phase 3 are sending their reactions to this phase and it is writing them to DB as it gets them. We can ensure that duplicates don't get generated in phase 3 which means we don't need extra index tables on the db. 26 | 27 | the code in this file is designed to run on a compute cluster using MPI. 28 | """ 29 | 30 | 31 | create_metadata_table = """ 32 | CREATE TABLE metadata ( 33 | number_of_species INTEGER NOT NULL, 34 | number_of_reactions INTEGER NOT NULL 35 | ); 36 | """ 37 | 38 | insert_metadata = """ 39 | INSERT INTO metadata VALUES (?, ?) 40 | """ 41 | 42 | # it is important that reaction_id is the primary key 43 | # otherwise the network loader will be extremely slow. 44 | create_reactions_table = """ 45 | CREATE TABLE reactions ( 46 | reaction_id INTEGER NOT NULL PRIMARY KEY, 47 | number_of_reactants INTEGER NOT NULL, 48 | number_of_products INTEGER NOT NULL, 49 | reactant_1 INTEGER NOT NULL, 50 | reactant_2 INTEGER NOT NULL, 51 | product_1 INTEGER NOT NULL, 52 | product_2 INTEGER NOT NULL, 53 | rate REAL NOT NULL, 54 | dG REAL NOT NULL, 55 | dG_barrier REAL NOT NULL, 56 | is_redox INTEGER NOT NULL 57 | ); 58 | """ 59 | 60 | 61 | insert_reaction = """ 62 | INSERT INTO reactions VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 63 | """ 64 | 65 | get_complex_group_sql = """ 66 | SELECT * FROM complexes WHERE composition_id=? AND group_id=? 67 | """ 68 | 69 | 70 | # TODO: structure these global variables better 71 | DISPATCHER_RANK = 0 72 | 73 | # message tags 74 | 75 | # sent by workers to the dispatcher once they have finished initializing 76 | # only sent once 77 | INITIALIZATION_FINISHED = 0 78 | 79 | # sent by workers to the dispatcher to request a new table 80 | SEND_ME_A_WORK_BATCH = 1 81 | 82 | # sent by dispatcher to workers when delivering a new table 83 | HERE_IS_A_WORK_BATCH = 2 84 | 85 | # sent by workers to the dispatcher when reaction passes db decision tree 86 | NEW_REACTION_DB = 3 87 | 88 | # sent by workers to the dispatcher when reaction passes logging decision tree 89 | NEW_REACTION_LOGGING = 4 90 | 91 | class WorkerState(Enum): 92 | INITIALIZING = 0 93 | RUNNING = 1 94 | FINISHED = 2 95 | 96 | def dispatcher( 97 | mol_entries, 98 | dispatcher_payload 99 | ): 100 | 101 | comm = MPI.COMM_WORLD 102 | work_batch_list = [] 103 | bucket_con = sqlite3.connect(dispatcher_payload.bucket_db_file) 104 | bucket_cur = bucket_con.cursor() 105 | size_cur = bucket_con.cursor() 106 | 107 | res = bucket_cur.execute("SELECT * FROM group_counts") 108 | for (composition_id, count) in res: 109 | for (i,j) in product(range(count), repeat=2): 110 | work_batch_list.append( 111 | (composition_id, i, j)) 112 | 113 | composition_names = {} 114 | res = bucket_cur.execute("SELECT * FROM compositions") 115 | for (composition_id, composition) in res: 116 | composition_names[composition_id] = composition 117 | 118 | log_message("creating reaction network db") 119 | rn_con = sqlite3.connect(dispatcher_payload.reaction_network_db_file) 120 | rn_cur = rn_con.cursor() 121 | rn_cur.execute(create_metadata_table) 122 | rn_cur.execute(create_reactions_table) 123 | rn_con.commit() 124 | 125 | log_message("initializing report generator") 126 | 127 | # since MPI processes spin lock, we don't want to have the dispathcer 128 | # spend a bunch of time generating molecule pictures 129 | report_generator = ReportGenerator( 130 | mol_entries, 131 | dispatcher_payload.report_file, 132 | rebuild_mol_pictures=False 133 | ) 134 | 135 | worker_states = {} 136 | 137 | worker_ranks = [i for i in range(comm.Get_size()) if i != DISPATCHER_RANK] 138 | 139 | for i in worker_ranks: 140 | worker_states[i] = WorkerState.INITIALIZING 141 | 142 | for i in worker_states: 143 | # block, waiting for workers to initialize 144 | comm.recv(source=i, tag=INITIALIZATION_FINISHED) 145 | worker_states[i] = WorkerState.RUNNING 146 | 147 | log_message("all workers running") 148 | 149 | reaction_index = 0 150 | 151 | log_message("handling requests") 152 | 153 | batches_left_at_last_checkpoint = len(work_batch_list) 154 | last_checkpoint_time = floor(time()) 155 | while True: 156 | if WorkerState.RUNNING not in worker_states.values(): 157 | break 158 | 159 | current_time = floor(time()) 160 | time_diff = current_time - last_checkpoint_time 161 | if ( current_time % dispatcher_payload.checkpoint_interval == 0 and 162 | time_diff > 0): 163 | batches_left_at_current_checkpoint = len(work_batch_list) 164 | batch_count_diff = ( 165 | batches_left_at_last_checkpoint - 166 | batches_left_at_current_checkpoint) 167 | 168 | batch_consumption_rate = batch_count_diff / time_diff 169 | 170 | log_message("batches remaining:", batches_left_at_current_checkpoint) 171 | log_message("batch consumption rate:", 172 | batch_consumption_rate, 173 | "batches per second") 174 | 175 | 176 | batches_left_at_last_checkpoint = batches_left_at_current_checkpoint 177 | last_checkpoint_time = current_time 178 | 179 | 180 | status = MPI.Status() 181 | data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) 182 | tag = status.Get_tag() 183 | rank = status.Get_source() 184 | 185 | if tag == SEND_ME_A_WORK_BATCH: 186 | if len(work_batch_list) == 0: 187 | comm.send(None, dest=rank, tag=HERE_IS_A_WORK_BATCH) 188 | worker_states[rank] = WorkerState.FINISHED 189 | else: 190 | # pop removes and returns the last item in the list 191 | work_batch = work_batch_list.pop() 192 | comm.send(work_batch, dest=rank, tag=HERE_IS_A_WORK_BATCH) 193 | composition_id, group_id_0, group_id_1 = work_batch 194 | log_message( 195 | "dispatched", 196 | composition_names[composition_id], 197 | ": group ids:", 198 | group_id_0, group_id_1 199 | ) 200 | 201 | 202 | elif tag == NEW_REACTION_DB: 203 | reaction = data 204 | rn_cur.execute( 205 | insert_reaction, 206 | (reaction_index, 207 | reaction['number_of_reactants'], 208 | reaction['number_of_products'], 209 | reaction['reactants'][0], 210 | reaction['reactants'][1], 211 | reaction['products'][0], 212 | reaction['products'][1], 213 | reaction['rate'], 214 | reaction['dG'], 215 | reaction['dG_barrier'], 216 | reaction['is_redox'] 217 | )) 218 | 219 | reaction_index += 1 220 | if reaction_index % dispatcher_payload.commit_frequency == 0: 221 | rn_con.commit() 222 | 223 | 224 | elif tag == NEW_REACTION_LOGGING: 225 | 226 | reaction = data[0] 227 | decision_path = data[1] 228 | 229 | report_generator.emit_verbatim(decision_path) 230 | report_generator.emit_reaction(reaction) 231 | report_generator.emit_bond_breakage(reaction) 232 | report_generator.emit_newline() 233 | 234 | 235 | 236 | log_message("finalzing database and generation report") 237 | rn_cur.execute( 238 | insert_metadata, 239 | (len(mol_entries), 240 | reaction_index) 241 | ) 242 | 243 | 244 | report_generator.finished() 245 | rn_con.commit() 246 | bucket_con.close() 247 | rn_con.close() 248 | 249 | 250 | def worker( 251 | mol_entries, 252 | worker_payload 253 | ): 254 | 255 | comm = MPI.COMM_WORLD 256 | con = sqlite3.connect(worker_payload.bucket_db_file) 257 | cur = con.cursor() 258 | 259 | 260 | comm.send(None, dest=DISPATCHER_RANK, tag=INITIALIZATION_FINISHED) 261 | 262 | while True: 263 | comm.send(None, dest=DISPATCHER_RANK, tag=SEND_ME_A_WORK_BATCH) 264 | work_batch = comm.recv(source=DISPATCHER_RANK, tag=HERE_IS_A_WORK_BATCH) 265 | 266 | if work_batch is None: 267 | break 268 | 269 | 270 | composition_id, group_id_0, group_id_1 = work_batch 271 | 272 | 273 | if group_id_0 == group_id_1: 274 | 275 | res = cur.execute( 276 | get_complex_group_sql, 277 | (composition_id, group_id_0)) 278 | 279 | bucket = [] 280 | for row in res: 281 | bucket.append((row[0],row[1])) 282 | 283 | iterator = permutations(bucket, r=2) 284 | 285 | else: 286 | 287 | res_0 = cur.execute( 288 | get_complex_group_sql, 289 | (composition_id, group_id_0)) 290 | 291 | bucket_0 = [] 292 | for row in res_0: 293 | bucket_0.append((row[0],row[1])) 294 | 295 | res_1 = cur.execute( 296 | get_complex_group_sql, 297 | (composition_id, group_id_1)) 298 | 299 | bucket_1 = [] 300 | for row in res_1: 301 | bucket_1.append((row[0],row[1])) 302 | 303 | iterator = product(bucket_0, bucket_1) 304 | 305 | 306 | 307 | for (reactants, products) in iterator: 308 | reaction = { 309 | 'reactants' : reactants, 310 | 'products' : products, 311 | 'number_of_reactants' : len([i for i in reactants if i != -1]), 312 | 'number_of_products' : len([i for i in products if i != -1])} 313 | 314 | 315 | decision_pathway = [] 316 | if run_decision_tree(reaction, 317 | mol_entries, 318 | worker_payload.params, 319 | worker_payload.reaction_decision_tree, 320 | decision_pathway 321 | ): 322 | 323 | comm.send( 324 | reaction, 325 | dest=DISPATCHER_RANK, 326 | tag=NEW_REACTION_DB) 327 | 328 | 329 | if run_decision_tree(reaction, 330 | mol_entries, 331 | worker_payload.params, 332 | worker_payload.logging_decision_tree): 333 | 334 | comm.send( 335 | (reaction, 336 | '\n'.join([str(f) for f in decision_pathway]) 337 | ), 338 | 339 | dest=DISPATCHER_RANK, 340 | tag=NEW_REACTION_LOGGING) 341 | -------------------------------------------------------------------------------- /HiPRGen/reaction_filter_payloads.py: -------------------------------------------------------------------------------- 1 | from monty.json import MSONable 2 | 3 | class DispatcherPayload(MSONable): 4 | """ 5 | class for storing all the arguments required by the reaction 6 | filter dispatcher. We do this instead of passing arguments 7 | directly because it makes it easier to pass arguments through the 8 | MPI barrier. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | bucket_db_file, 14 | reaction_network_db_file, 15 | report_file, 16 | commit_frequency = 1000, 17 | checkpoint_interval = 10): 18 | 19 | self.bucket_db_file = bucket_db_file 20 | self.reaction_network_db_file = reaction_network_db_file 21 | self.report_file = report_file 22 | self.commit_frequency = commit_frequency 23 | self.checkpoint_interval = checkpoint_interval 24 | 25 | 26 | class WorkerPayload(MSONable): 27 | """ 28 | class for storing all the arguments required by the reaction 29 | filter dispatcher. 30 | """ 31 | def __init__( 32 | self, 33 | bucket_db_file, 34 | reaction_decision_tree, 35 | params, 36 | logging_decision_tree): 37 | 38 | self.bucket_db_file = bucket_db_file 39 | self.reaction_decision_tree = reaction_decision_tree 40 | self.params = params 41 | self.logging_decision_tree = logging_decision_tree 42 | -------------------------------------------------------------------------------- /HiPRGen/reaction_questions.py: -------------------------------------------------------------------------------- 1 | import math 2 | from HiPRGen.mol_entry import MoleculeEntry 3 | from functools import partial 4 | import itertools 5 | import networkx as nx 6 | from networkx.algorithms.graph_hashing import weisfeiler_lehman_graph_hash 7 | from HiPRGen.constants import Terminal, ROOM_TEMP, KB, PLANCK, m_formulas 8 | from monty.json import MSONable 9 | 10 | """ 11 | The reaction decision tree: 12 | 13 | A question is a function q(reaction, mol_entries, params) -> Bool 14 | 15 | reaction is a dict: 16 | 17 | reaction = { 'reactants' : reactant indices, 18 | 'products' : product indices, 19 | 'number_of_reactants', 20 | 'number_of_products'} 21 | params is a dict: 22 | 23 | 24 | params = { 'temperature', 25 | 'electron_free_energy' } 26 | 27 | The lists of reactant and product indices always have length two. We 28 | use -1 when there is a only a single reactant or product. 29 | 30 | The questions can also set reaction['rate'] and reaction['dG'] 31 | 32 | Questions will be writable by hand, or we could have machine learning 33 | filters. 34 | 35 | A node is either a Terminal or a non empty list [(question, node)] 36 | 37 | class Terminal(Enum): KEEP = 1 DISCARD = -1 38 | 39 | For the return value of a question, True means travel to this node and 40 | False means try next question in the list. 41 | 42 | for non terminal nodes, it is an error if every question returns 43 | False. i.e getting stuck at a non terminal node is an error. 44 | 45 | Once a Terminal node is reached, it tells us whether to keep or 46 | discard the reaction. 47 | 48 | logging decision tree: The dispatcher takes a second decision tree as 49 | an argument, the logging decision tree. Reactions which return 50 | Terminal.KEEP from the logging decision tree will be logged in the 51 | generation report, with location specified by the argument 52 | generation_report_path 53 | 54 | """ 55 | 56 | hydrogen_graph = nx.MultiGraph() 57 | hydrogen_graph.add_node(0, specie='H') 58 | hydrogen_hash = weisfeiler_lehman_graph_hash( 59 | hydrogen_graph, 60 | node_attr='specie') 61 | 62 | fluorine_graph = nx.MultiGraph() 63 | fluorine_graph.add_node(0, specie='F') 64 | fluorine_hash = weisfeiler_lehman_graph_hash( 65 | fluorine_graph, 66 | node_attr='specie') 67 | 68 | def run_decision_tree( 69 | reaction, 70 | mol_entries, 71 | params, 72 | decision_tree, 73 | decision_pathway=None): 74 | node = decision_tree 75 | 76 | while type(node) == list: 77 | next_node = None 78 | for (question, new_node) in node: 79 | if question(reaction, mol_entries, params): 80 | 81 | # if decision_pathway is a list, 82 | # append the question which 83 | # answered true i.e the edge we follow 84 | if decision_pathway is not None: 85 | decision_pathway.append(question) 86 | 87 | next_node = new_node 88 | break 89 | 90 | node = next_node 91 | 92 | 93 | if type(node) == Terminal: 94 | if decision_pathway is not None: 95 | decision_pathway.append(node) 96 | 97 | if node == Terminal.KEEP: 98 | return True 99 | else: 100 | return False 101 | else: 102 | print(node) 103 | raise Exception( 104 | """ 105 | unexpected node type reached. 106 | this is usually caused because none of the questions in some node returned True. 107 | """) 108 | 109 | 110 | 111 | def default_rate(dG_barrier, params): 112 | kT = KB * params['temperature'] 113 | max_rate = kT / PLANCK 114 | rate = max_rate * math.exp(- dG_barrier / kT) 115 | return rate 116 | 117 | class dG_above_threshold(MSONable): 118 | 119 | def __init__(self, threshold, free_energy_type, constant_barrier): 120 | 121 | self.threshold = threshold 122 | self.free_energy_type = free_energy_type 123 | self.constant_barrier = constant_barrier 124 | 125 | if free_energy_type == 'free_energy': 126 | self.get_free_energy = lambda mol: mol.free_energy 127 | elif free_energy_type == 'solvation_free_energy': 128 | self.get_free_energy = lambda mol: mol.solvation_free_energy 129 | else: 130 | raise Exception("unrecognized free energy type") 131 | 132 | def __str__(self): 133 | return ( 134 | self.free_energy_type + 135 | " dG is above threshold=" + 136 | str(self.threshold)) 137 | 138 | def __call__(self, reaction, mol_entries, params): 139 | 140 | 141 | dG = 0.0 142 | 143 | # positive dCharge means electrons are lost 144 | dCharge = 0.0 145 | 146 | for i in range(reaction['number_of_reactants']): 147 | reactant_index = reaction['reactants'][i] 148 | mol = mol_entries[reactant_index] 149 | dG -= self.get_free_energy(mol) 150 | dCharge -= mol.charge 151 | 152 | for j in range(reaction['number_of_products']): 153 | product_index = reaction['products'][j] 154 | mol = mol_entries[product_index] 155 | dG += self.get_free_energy(mol) 156 | dCharge += mol.charge 157 | 158 | dG += dCharge * params['electron_free_energy'] 159 | 160 | if dG > self.threshold: 161 | return True 162 | else: 163 | reaction['dG'] = dG 164 | if dG < 0: 165 | barrier = self.constant_barrier 166 | else: 167 | barrier = dG + self.constant_barrier 168 | 169 | reaction['dG_barrier'] = barrier 170 | reaction['rate'] = default_rate(barrier, params) 171 | return False 172 | 173 | 174 | class is_redox_reaction(MSONable): 175 | 176 | def __init__(self): 177 | pass 178 | 179 | def __str__(self): 180 | return "is redox reaction" 181 | 182 | def __call__(self, reaction, mol_entries, params): 183 | # positive dCharge means electrons are lost 184 | dCharge = 0.0 185 | 186 | for i in range(reaction['number_of_reactants']): 187 | reactant_index = reaction['reactants'][i] 188 | mol = mol_entries[reactant_index] 189 | dCharge -= mol.charge 190 | 191 | for j in range(reaction['number_of_products']): 192 | product_index = reaction['products'][j] 193 | mol = mol_entries[product_index] 194 | dCharge += mol.charge 195 | 196 | if dCharge == 0: 197 | reaction['is_redox'] = False 198 | return False 199 | else: 200 | reaction['is_redox'] = True 201 | return True 202 | 203 | 204 | class too_many_reactants_or_products(MSONable): 205 | def __init__(self): 206 | pass 207 | 208 | def __str__(self): 209 | return "too many reactants or products" 210 | 211 | 212 | def __call__(self, reaction, mols, params): 213 | if (reaction['number_of_reactants'] != 1 or 214 | reaction['number_of_products'] != 1): 215 | return True 216 | else: 217 | return False 218 | 219 | 220 | class metal_metal_reaction(MSONable): 221 | def __init__(self): 222 | pass 223 | 224 | def __call__(self, reaction, mol_entries, params): 225 | if (reaction['number_of_reactants'] == 1 and 226 | reaction['number_of_products'] == 1 and 227 | (mol_entries[reaction['reactants'][0]].formula in m_formulas) and 228 | (mol_entries[reaction['products'][0]].formula in m_formulas)): 229 | 230 | return True 231 | else: 232 | return False 233 | 234 | 235 | class dcharge_too_large(MSONable): 236 | def __init__(self): 237 | pass 238 | 239 | def __str__(self): 240 | return "change in charge is too large" 241 | 242 | def __call__(self, reaction, mol_entries, params): 243 | dCharge = 0.0 244 | 245 | for i in range(reaction['number_of_reactants']): 246 | reactant_index = reaction['reactants'][i] 247 | mol = mol_entries[reactant_index] 248 | dCharge -= mol.charge 249 | 250 | for j in range(reaction['number_of_products']): 251 | product_index = reaction['products'][j] 252 | mol = mol_entries[product_index] 253 | dCharge += mol.charge 254 | 255 | if abs(dCharge) > 1: 256 | return True 257 | else: 258 | return False 259 | 260 | 261 | 262 | def marcus_barrier(reaction, mols, params): 263 | 264 | """ 265 | Okay, so Marcus Theory.The math works out like so.∆G* = λ/4 (1 + 266 | ∆G / λ)^2 ∆G is the Gibbs free energy of the reaction, ∆G* is the 267 | energy barrier, and λ is the “reorganization energy” (basically the 268 | energy penalty for reorganizing the solvent environment to accommodate 269 | the change in local charge).The reorganization energy can be broken up 270 | into two terms, an inner term (“i”) representing the contribution from 271 | the first solvation shell and an outer term (“o”) representing the 272 | contribution from the bulk solvent: λ = λi + λoλo = ∆e/(8 pi ε0) (1/r 273 | - 1/R) (1/n^2 - 1/ε) where ∆e is the change in charge in terms of 274 | fundamental charge (1.602 * 10 ^-19 C), ε0 is the vacuum permittivity 275 | (8.854 * 10 ^-12 F/m), r is the first solvation shell radius (I 276 | usually just pick a constant, say 6 Angstrom), R is the distance to 277 | the electrode (again, for these purposes, just pick something - say 278 | 7.5 Angstrom), n is the index of refraction (1.415 for EC) and ε is 279 | the relative dielectric (18.5 for EC/EMC). 280 | """ 281 | 282 | reactant = mols[reaction['reactants'][0]] 283 | product = mols[reaction['products'][0]] 284 | dCharge = product.charge - reactant.charge 285 | n = 1.415 # index of refraction; variable 286 | eps = 18.5 # dielectric constant; variable 287 | 288 | r = 6.0 # in Angstrom 289 | R = 7.5 # in Angstrom 290 | 291 | eps_0 = 8.85419 * 10 ** -12 # vacuum permittivity 292 | e = 1.602 * 10 ** -19 # fundamental charge 293 | 294 | l_outer = e / (8 * math.pi * eps_0) 295 | l_outer *= (1 / r - 1/(2 * R)) * 10 ** 10 # Converting to SI units; factor of 2 is because of different definitions of the distance to electrode 296 | l_outer *= (1 / n ** 2 - 1 / eps) 297 | 298 | if dCharge == -1: 299 | vals = [reactant.electron_affinity, product.ionization_energy] 300 | vals_filtered = [v for v in vals if v is not None] 301 | l_inner = sum(vals_filtered) / len(vals_filtered) 302 | 303 | if dCharge == 1: 304 | vals = [reactant.ionization_energy, product.electron_affinity] 305 | vals_filtered = [v for v in vals if v is not None] 306 | l_inner = sum(vals_filtered) / len(vals_filtered) 307 | 308 | 309 | if l_inner < 0: 310 | l_inner = 0 311 | 312 | l = l_inner + l_outer 313 | 314 | 315 | dG = product.free_energy - reactant.free_energy + dCharge * params['electron_free_energy'] 316 | dG_barrier = l / 4 * (1 + dG / l) ** 2 317 | reaction['marcus_barrier'] = dG_barrier 318 | return False 319 | 320 | class reactant_and_product_not_isomorphic(MSONable): 321 | 322 | def __init__(self): 323 | pass 324 | 325 | def __str__(self): 326 | return "reactants and products are not covalent isomorphic" 327 | 328 | def __call__(self, reaction, mols, params): 329 | reactant = mols[reaction['reactants'][0]] 330 | product = mols[reaction['products'][0]] 331 | if reactant.covalent_hash != product.covalent_hash: 332 | return True 333 | else: 334 | return False 335 | 336 | 337 | class reaction_default_true(MSONable): 338 | 339 | def __init__(self): 340 | pass 341 | 342 | def __str__(self): 343 | return "default true" 344 | 345 | def __call__(self, reaction, mols, params): 346 | return True 347 | 348 | class star_count_diff_above_threshold(MSONable): 349 | """ 350 | if you want to filter out break-one-form-one reactions, the 351 | correct value for the threshold is 6. 352 | """ 353 | 354 | def __init__(self, threshold): 355 | self.threshold = threshold 356 | 357 | def __str__(self): 358 | return "star count diff above threshold=" + str(self.threshold) 359 | 360 | def __call__(self, reaction, mols, params): 361 | reactant_stars = {} 362 | product_stars = {} 363 | tags = set() 364 | 365 | for i in range(reaction['number_of_reactants']): 366 | reactant_index = reaction['reactants'][i] 367 | mol = mols[reactant_index] 368 | for h in mol.star_hashes.values(): 369 | tags.add(h) 370 | if h in reactant_stars: 371 | reactant_stars[h] += 1 372 | else: 373 | reactant_stars[h] = 1 374 | 375 | for j in range(reaction['number_of_products']): 376 | product_index = reaction['products'][j] 377 | mol = mols[product_index] 378 | for h in mol.star_hashes.values(): 379 | tags.add(h) 380 | if h in product_stars: 381 | product_stars[h] += 1 382 | else: 383 | product_stars[h] = 1 384 | 385 | count = 0 386 | 387 | for tag in tags: 388 | count += abs(reactant_stars.get(tag,0) - product_stars.get(tag,0)) 389 | 390 | if count > self.threshold: 391 | return True 392 | else: 393 | return False 394 | 395 | class reaction_is_covalent_decomposable(MSONable): 396 | def __init__(self): 397 | pass 398 | 399 | def __str__(self): 400 | return "reaction is covalent decomposable" 401 | 402 | def __call__(self, reaction, mols, params): 403 | if (reaction['number_of_reactants'] == 2 and 404 | reaction['number_of_products'] == 2): 405 | 406 | 407 | reactant_total_hashes = set() 408 | for i in range(reaction['number_of_reactants']): 409 | reactant_id = reaction['reactants'][i] 410 | reactant = mols[reactant_id] 411 | reactant_total_hashes.add(reactant.covalent_hash) 412 | 413 | product_total_hashes = set() 414 | for i in range(reaction['number_of_products']): 415 | product_id = reaction['products'][i] 416 | product = mols[product_id] 417 | product_total_hashes.add(product.covalent_hash) 418 | 419 | if len(reactant_total_hashes.intersection(product_total_hashes)) > 0: 420 | return True 421 | else: 422 | return False 423 | 424 | return False 425 | 426 | 427 | class metal_coordination_passthrough(MSONable): 428 | def __init__(self): 429 | pass 430 | 431 | def __str__(self): 432 | return "metal coordination passthrough" 433 | 434 | def __call__(self, reaction, mols, params): 435 | 436 | for i in range(reaction['number_of_reactants']): 437 | reactant_id = reaction['reactants'][i] 438 | reactant = mols[reactant_id] 439 | if reactant.formula in m_formulas: 440 | return True 441 | 442 | for i in range(reaction['number_of_products']): 443 | product_id = reaction['products'][i] 444 | product = mols[product_id] 445 | if product.formula in m_formulas: 446 | return True 447 | 448 | return False 449 | 450 | 451 | class fragment_matching_found(MSONable): 452 | def __init__(self): 453 | pass 454 | 455 | def __str__(self): 456 | return "fragment matching found" 457 | 458 | def __call__(self, reaction, mols, params): 459 | 460 | reactant_fragment_indices_list = [] 461 | product_fragment_indices_list = [] 462 | 463 | if reaction['number_of_reactants'] == 1: 464 | reactant = mols[reaction['reactants'][0]] 465 | for i in range(len(reactant.fragment_data)): 466 | reactant_fragment_indices_list.append([i]) 467 | 468 | 469 | if reaction['number_of_reactants'] == 2: 470 | reactant_0 = mols[reaction['reactants'][0]] 471 | reactant_1 = mols[reaction['reactants'][1]] 472 | for i in range(len(reactant_0.fragment_data)): 473 | for j in range(len(reactant_1.fragment_data)): 474 | if (reactant_0.fragment_data[i].number_of_bonds_broken + 475 | reactant_1.fragment_data[j].number_of_bonds_broken <= 1): 476 | 477 | reactant_fragment_indices_list.append([i,j]) 478 | 479 | 480 | if reaction['number_of_products'] == 1: 481 | product = mols[reaction['products'][0]] 482 | for i in range(len(product.fragment_data)): 483 | product_fragment_indices_list.append([i]) 484 | 485 | 486 | if reaction['number_of_products'] == 2: 487 | product_0 = mols[reaction['products'][0]] 488 | product_1 = mols[reaction['products'][1]] 489 | for i in range(len(product_0.fragment_data)): 490 | for j in range(len(product_1.fragment_data)): 491 | if (product_0.fragment_data[i].number_of_bonds_broken + 492 | product_1.fragment_data[j].number_of_bonds_broken <= 1): 493 | 494 | product_fragment_indices_list.append([i,j]) 495 | 496 | 497 | for reactant_fragment_indices in reactant_fragment_indices_list: 498 | for product_fragment_indices in product_fragment_indices_list: 499 | reactant_fragment_count = 0 500 | product_fragment_count = 0 501 | reactant_bonds_broken = [] 502 | product_bonds_broken = [] 503 | 504 | reactant_hashes = dict() 505 | for reactant_index, frag_complex_index in enumerate( 506 | reactant_fragment_indices): 507 | 508 | fragment_complex = mols[ 509 | reaction['reactants'][reactant_index]].fragment_data[ 510 | frag_complex_index] 511 | 512 | for bond in fragment_complex.bonds_broken: 513 | reactant_bonds_broken.append( 514 | [(reactant_index, x) for x in bond]) 515 | 516 | for i in range(fragment_complex.number_of_fragments): 517 | reactant_fragment_count += 1 518 | tag = fragment_complex.fragment_hashes[i] 519 | if tag in reactant_hashes: 520 | reactant_hashes[tag] += 1 521 | else: 522 | reactant_hashes[tag] = 1 523 | 524 | product_hashes = dict() 525 | for product_index, frag_complex_index in enumerate( 526 | product_fragment_indices): 527 | 528 | fragment_complex = mols[ 529 | reaction['products'][product_index]].fragment_data[ 530 | frag_complex_index] 531 | 532 | for bond in fragment_complex.bonds_broken: 533 | product_bonds_broken.append( 534 | [(product_index, x) for x in bond]) 535 | 536 | 537 | for i in range(fragment_complex.number_of_fragments): 538 | product_fragment_count += 1 539 | tag = fragment_complex.fragment_hashes[i] 540 | if tag in product_hashes: 541 | product_hashes[tag] += 1 542 | else: 543 | product_hashes[tag] = 1 544 | 545 | 546 | # don't consider fragmentations with both a ring opening and closing 547 | if (reaction['number_of_reactants'] == 2 and 548 | reaction['number_of_products'] == 2 and 549 | reactant_fragment_count == 2 and 550 | product_fragment_count == 2): 551 | continue 552 | 553 | 554 | if reactant_hashes == product_hashes: 555 | reaction['reactant_bonds_broken'] = reactant_bonds_broken 556 | reaction['product_bonds_broken'] = product_bonds_broken 557 | reaction['hashes'] = reactant_hashes 558 | reaction['reactant_fragment_count'] = reactant_fragment_count 559 | reaction['product_fragment_count'] = product_fragment_count 560 | 561 | return True 562 | 563 | return False 564 | 565 | 566 | class single_reactant_single_product_not_atom_transfer(MSONable): 567 | def __init__(self): 568 | pass 569 | 570 | def __str__(self): 571 | return "not hydrogen transfer" 572 | 573 | def __call__(self, reaction, mols, params): 574 | if (reaction['number_of_reactants'] == 1 and 575 | reaction['number_of_products'] == 1 and 576 | len(reaction['reactant_bonds_broken']) == 1 and 577 | len(reaction['product_bonds_broken']) == 1 and 578 | hydrogen_hash not in reaction['hashes'] and 579 | fluorine_hash not in reaction['hashes']): 580 | 581 | return True 582 | 583 | return False 584 | 585 | 586 | class single_reactant_double_product_ring_close(MSONable): 587 | def __init__(self): 588 | pass 589 | 590 | def __str__(self): 591 | return "ring close" 592 | 593 | 594 | def __call__(self, reaction, mols, params): 595 | 596 | if (reaction['number_of_reactants'] == 1 and 597 | reaction['number_of_products'] == 2 and 598 | len(reaction['reactant_bonds_broken']) == 1 and 599 | len(reaction['product_bonds_broken']) == 1 and 600 | reaction['product_fragment_count'] == 2): 601 | 602 | return True 603 | 604 | return False 605 | 606 | 607 | 608 | class concerted_metal_coordination(MSONable): 609 | def __init__(self): 610 | pass 611 | 612 | def __str__(self): 613 | return "concerted metal coordination" 614 | 615 | def __call__(self, reaction, mols, params): 616 | 617 | if (reaction['number_of_reactants'] == 2 and 618 | reaction['number_of_products'] == 2): 619 | 620 | reactant_0 = mols[reaction['reactants'][0]] 621 | reactant_1 = mols[reaction['reactants'][1]] 622 | product_0 = mols[reaction['products'][0]] 623 | product_1 = mols[reaction['products'][1]] 624 | 625 | 626 | 627 | if (reactant_0.formula in m_formulas or 628 | reactant_1.formula in m_formulas or 629 | product_0.formula in m_formulas or 630 | product_1.formula in m_formulas): 631 | return True 632 | else: 633 | return False 634 | 635 | return False 636 | 637 | class concerted_metal_coordination_one_product(MSONable): 638 | def __init__(self): 639 | pass 640 | 641 | def __str__(self): 642 | return "concerted metal coordination one product" 643 | 644 | 645 | 646 | def __call__(self, reaction, mols, params): 647 | 648 | if (reaction['number_of_reactants'] == 2 and 649 | reaction['number_of_products'] == 1): 650 | 651 | reactant_0 = mols[reaction['reactants'][0]] 652 | reactant_1 = mols[reaction['reactants'][1]] 653 | product = mols[reaction['products'][0]] 654 | 655 | reactant_covalent_hashes = set([ 656 | reactant_0.covalent_hash, 657 | reactant_1.covalent_hash]) 658 | 659 | if ((reactant_0.formula in m_formulas or 660 | reactant_1.formula in m_formulas) and 661 | product.covalent_hash not in reactant_covalent_hashes 662 | ): 663 | return True 664 | else: 665 | return False 666 | 667 | return False 668 | 669 | class concerted_metal_coordination_one_reactant(MSONable): 670 | def __init__(self): 671 | pass 672 | 673 | def __str__(self): 674 | return "concerted metal coordination one reactant" 675 | 676 | 677 | 678 | def __call__(self, reaction, mols, params): 679 | 680 | if (reaction['number_of_reactants'] == 1 and 681 | reaction['number_of_products'] == 2): 682 | 683 | product_0 = mols[reaction['products'][0]] 684 | product_1 = mols[reaction['products'][1]] 685 | reactant = mols[reaction['reactants'][0]] 686 | 687 | product_covalent_hashes = set([ 688 | product_0.covalent_hash, 689 | product_1.covalent_hash]) 690 | 691 | if ((product_0.formula in m_formulas or 692 | product_1.formula in m_formulas) and 693 | reactant.covalent_hash not in product_covalent_hashes 694 | ): 695 | return True 696 | else: 697 | return False 698 | 699 | return False 700 | 701 | 702 | class single_reactant_with_ring_break_two(MSONable): 703 | def __init__(self): 704 | pass 705 | 706 | def __str__(self): 707 | return "single reactant with a ring, break two" 708 | 709 | def __call__(self, reaction, mols, params): 710 | if (reaction["number_of_reactants"] == 1 and 711 | reaction["number_of_products"] == 2 and 712 | mols[reaction["reactants"][0]].has_covalent_ring): 713 | 714 | reactant = mols[reaction["reactants"][0]] 715 | product_1 = mols[reaction["products"][0]] 716 | product_2 = mols[reaction["products"][1]] 717 | for fragment_complex in reactant.ring_fragment_data: 718 | if (set(fragment_complex.fragment_hashes) == 719 | set([product_1.covalent_hash, product_2.covalent_hash])): 720 | return True 721 | 722 | 723 | return False 724 | 725 | 726 | class single_product_with_ring_form_two(MSONable): 727 | def __init__(self): 728 | pass 729 | 730 | def __str__(self): 731 | return "single product with a ring, form two" 732 | 733 | def __call__(self, reaction, mols, params): 734 | if (reaction["number_of_reactants"] == 2 and 735 | reaction["number_of_products"] == 1 and 736 | mols[reaction["products"][0]].has_covalent_ring): 737 | 738 | product = mols[reaction["products"][0]] 739 | reactant_1 = mols[reaction["reactants"][0]] 740 | reactant_2 = mols[reaction["reactants"][1]] 741 | for fragment_complex in product.ring_fragment_data: 742 | if (set(fragment_complex.fragment_hashes) == 743 | set([reactant_1.covalent_hash, reactant_2.covalent_hash])): 744 | return True 745 | 746 | 747 | return False 748 | 749 | 750 | 751 | default_reaction_decision_tree = [ 752 | 753 | (metal_metal_reaction(), Terminal.DISCARD), 754 | # redox branch 755 | (is_redox_reaction(), [ 756 | 757 | (too_many_reactants_or_products(), Terminal.DISCARD), 758 | (dcharge_too_large(), Terminal.DISCARD), 759 | (reactant_and_product_not_isomorphic(), Terminal.DISCARD), 760 | (dG_above_threshold(0.0, "free_energy", 0.0), Terminal.DISCARD), 761 | (reaction_default_true(), Terminal.KEEP) 762 | ]), 763 | 764 | (dG_above_threshold(0.0, "solvation_free_energy", 0.0), Terminal.DISCARD), 765 | 766 | 767 | # (single_reactant_with_ring_break_two(), Terminal.KEEP), 768 | # (single_product_with_ring_form_two(), Terminal.KEEP), 769 | 770 | (star_count_diff_above_threshold(6), Terminal.DISCARD), 771 | 772 | (reaction_is_covalent_decomposable(), Terminal.DISCARD), 773 | 774 | (concerted_metal_coordination(), Terminal.DISCARD), 775 | 776 | (concerted_metal_coordination_one_product(), Terminal.DISCARD), 777 | 778 | (concerted_metal_coordination_one_reactant(), Terminal.DISCARD), 779 | 780 | (metal_coordination_passthrough(), Terminal.KEEP), 781 | 782 | (fragment_matching_found(), [ 783 | (single_reactant_single_product_not_atom_transfer(), Terminal.DISCARD), 784 | (single_reactant_double_product_ring_close(), Terminal.DISCARD), 785 | (reaction_default_true(), Terminal.KEEP)] 786 | ), 787 | 788 | (reaction_default_true(), Terminal.DISCARD) 789 | ] 790 | -------------------------------------------------------------------------------- /HiPRGen/report_generator.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | from copy import deepcopy 3 | from pathlib import Path 4 | from HiPRGen.logging import log_message 5 | 6 | atom_colors = { 7 | "H": "gray", 8 | "C": "black", 9 | "Li": "purple", 10 | "B": "orange", 11 | "N": "blue", 12 | "O": "red", 13 | "F": "green4", 14 | "Mg": "green", 15 | "P": "darkorange", 16 | "S": "yellow", 17 | "Cl": "chartreuse" 18 | } 19 | 20 | def visualize_molecule_entry(molecule_entry, path): 21 | """ 22 | visualize a molecule using graphviz and 23 | output the resulting pdf to path 24 | """ 25 | graph = deepcopy(molecule_entry.graph) 26 | 27 | nx.set_node_attributes(graph, "filled", "style") 28 | nx.set_node_attributes(graph, "circle", "shape") 29 | nx.set_node_attributes(graph, "0.2", "width") 30 | nx.set_node_attributes(graph, "8.0", "fontsize") 31 | nx.set_node_attributes(graph, "white", "fontcolor") 32 | nx.set_node_attributes(graph, "true", "fixedsize") 33 | 34 | 35 | nx.set_node_attributes( 36 | graph, 37 | dict(enumerate([atom_colors[a] 38 | for a in molecule_entry.species])), 39 | "color" 40 | ) 41 | 42 | charge = molecule_entry.charge 43 | agraph = nx.nx_agraph.to_agraph(graph) 44 | if charge != 0: 45 | agraph.add_node( 46 | "charge", 47 | label=str(charge), 48 | fontsize="25.0", 49 | shape="box", 50 | color="gray", 51 | style="dashed, rounded", 52 | ) 53 | 54 | agraph.layout() 55 | log_message("writing " + path.as_posix()) 56 | agraph.draw(path.as_posix(), format="pdf") 57 | 58 | 59 | def visualize_molecules(mol_entries, folder): 60 | 61 | folder.mkdir() 62 | for index, molecule_entry in enumerate(mol_entries): 63 | visualize_molecule_entry( 64 | molecule_entry, 65 | folder.joinpath(str(index) + ".pdf")) 66 | 67 | 68 | 69 | class ReportGenerator: 70 | 71 | def __init__( 72 | self, 73 | mol_entries, 74 | report_file_path, 75 | mol_pictures_folder_name='mol_pictures', 76 | rebuild_mol_pictures=True 77 | ): 78 | self.report_file_path = Path(report_file_path) 79 | self.mol_pictures_folder_name = mol_pictures_folder_name 80 | self.mol_pictures_folder = self.report_file_path.parent.joinpath( 81 | mol_pictures_folder_name) 82 | 83 | 84 | if rebuild_mol_pictures: 85 | visualize_molecules(mol_entries, self.mol_pictures_folder) 86 | 87 | self.mol_entries = mol_entries 88 | self.f = self.report_file_path.open(mode='w') 89 | 90 | 91 | # write in header 92 | self.f.write("\\documentclass{article}\n") 93 | self.f.write("\\usepackage{graphicx}\n") 94 | self.f.write("\\usepackage[margin=1cm]{geometry}\n") 95 | self.f.write("\\usepackage{amsmath}\n") 96 | self.f.write("\\pagenumbering{gobble}\n") 97 | self.f.write("\\begin{document}\n") 98 | self.f.write("\\setlength\\parindent{0pt}\n") 99 | 100 | def finished(self): 101 | self.f.write("\\end{document}") 102 | self.f.close() 103 | 104 | def emit_molecule(self, species_index, include_index=True): 105 | if include_index: 106 | self.f.write(str(species_index) + "\n") 107 | 108 | self.f.write( 109 | "\\raisebox{-.5\\height}{" 110 | + "\\includegraphics[scale=0.2]{" 111 | + self.mol_pictures_folder_name + '/' 112 | + str(species_index) 113 | + ".pdf}}\n" 114 | ) 115 | 116 | def emit_newline(self): 117 | self.f.write( 118 | "\n\\vspace{1cm}\n") 119 | 120 | def emit_newpage(self): 121 | self.f.write("\\newpage\n\n\n") 122 | 123 | def emit_verbatim(self, s): 124 | self.f.write('\\begin{verbatim}\n') 125 | self.f.write(s) 126 | self.f.write('\n') 127 | self.f.write('\\end{verbatim}\n') 128 | 129 | def emit_text(self,s): 130 | self.f.write('\n\n' + s + '\n\n') 131 | 132 | def emit_initial_state(self, initial_state): 133 | self.emit_text("initial state:") 134 | for species_id in initial_state: 135 | num = initial_state[species_id] 136 | if num > 0: 137 | self.emit_text(str(num) + " molecules of") 138 | self.emit_molecule(species_id) 139 | self.emit_newline() 140 | 141 | 142 | def emit_reaction(self, reaction, label=None): 143 | reactants_filtered = [i for i in reaction['reactants'] 144 | if i != -1] 145 | 146 | products_filtered = [i for i in reaction['products'] 147 | if i != -1] 148 | 149 | self.f.write("$$\n") 150 | if label is not None: 151 | self.f.write(label + ": \n") 152 | 153 | first = True 154 | 155 | for reactant_index in reactants_filtered: 156 | if first: 157 | first = False 158 | else: 159 | self.f.write("+\n") 160 | 161 | self.emit_molecule(reactant_index) 162 | 163 | if 'dG' in reaction: 164 | self.f.write( 165 | "\\xrightarrow[" 166 | + ("%.2f" % reaction["dG_barrier"]) + 167 | "]{" + 168 | ("%.2f" % reaction["dG"]) + "}\n") 169 | else: 170 | self.f.write( 171 | "\\xrightarrow{}\n") 172 | 173 | first = True 174 | for product_index in products_filtered: 175 | if first: 176 | first = False 177 | else: 178 | self.f.write("+\n") 179 | 180 | self.emit_molecule(product_index) 181 | 182 | self.f.write("$$") 183 | self.f.write("\n\n\n") 184 | 185 | def emit_bond_breakage(self, reaction): 186 | if 'reactant_bonds_broken' in reaction: 187 | self.f.write("reactant bonds broken:") 188 | for bond in reaction['reactant_bonds_broken']: 189 | self.emit_verbatim(str(bond)) 190 | 191 | if 'product_bonds_broken' in reaction: 192 | self.f.write("product bonds broken:") 193 | for bond in reaction['product_bonds_broken']: 194 | self.emit_verbatim(str(bond)) 195 | 196 | self.f.write("\n\n\n") 197 | -------------------------------------------------------------------------------- /HiPRGen/species_filter.py: -------------------------------------------------------------------------------- 1 | from HiPRGen.mol_entry import MoleculeEntry 2 | import pickle 3 | from HiPRGen.species_questions import run_decision_tree 4 | from HiPRGen.constants import Terminal 5 | from HiPRGen.logging import log_message 6 | import networkx as nx 7 | import networkx.algorithms.isomorphism as iso 8 | from HiPRGen.report_generator import ReportGenerator 9 | 10 | """ 11 | Phase 1: species filtering 12 | input: a list of dataset entries 13 | output: a filtered list of mol_entries with fixed indices 14 | description: this is where we remove isomorphic species, and do other forms of filtering. Species decision tree is what we use for filtering. 15 | 16 | species isomorphism filtering: 17 | 18 | The input dataset entries will often contain isomorphic molecules. Identifying such isomorphisms doesn't fit into the species decision tree, so we have it as a preprocessing phase. 19 | """ 20 | 21 | def sort_into_tags(mols): 22 | isomorphism_buckets = {} 23 | for mol in mols: 24 | 25 | tag = (mol.charge, mol.formula, mol.covalent_hash) 26 | 27 | if tag in isomorphism_buckets: 28 | isomorphism_buckets[tag].append(mol) 29 | else: 30 | isomorphism_buckets[tag] = [mol] 31 | 32 | return isomorphism_buckets 33 | 34 | 35 | def really_covalent_isomorphic(mol1, mol2): 36 | """ 37 | check for isomorphism directly instead of using hash. 38 | warning: this is really slow. It is used in species filtering 39 | to avoid hash collisions. Do not use it anywhere else. 40 | """ 41 | return nx.is_isomorphic( 42 | mol1.covalent_graph, 43 | mol2.covalent_graph, 44 | node_match = iso.categorical_node_match('specie', None) 45 | ) 46 | 47 | 48 | 49 | def groupby(equivalence_relation, xs): 50 | """ 51 | warning: this has slightly different semantics than 52 | itertools groupby which depends on ordering. 53 | """ 54 | groups = [] 55 | 56 | for x in xs: 57 | group_found = False 58 | for group in groups: 59 | if equivalence_relation(x, group[0]): 60 | group.append(x) 61 | group_found = True 62 | break 63 | 64 | if not group_found: 65 | groups.append([x]) 66 | 67 | return groups 68 | 69 | 70 | def species_filter( 71 | dataset_entries, 72 | mol_entries_pickle_location, 73 | species_report, 74 | species_decision_tree, 75 | coordimer_weight, 76 | species_logging_decision_tree=Terminal.DISCARD, 77 | generate_unfiltered_mol_pictures=False 78 | ): 79 | 80 | """ 81 | run each molecule through the species decision tree and then choose the lowest weight 82 | coordimer based on the coordimer_weight function. 83 | """ 84 | 85 | log_message("starting species filter") 86 | log_message("loading molecule entries from json") 87 | 88 | mol_entries_unfiltered = [ 89 | MoleculeEntry.from_dataset_entry(e) for e in dataset_entries ] 90 | 91 | 92 | log_message("generating unfiltered mol pictures") 93 | 94 | report_generator = ReportGenerator( 95 | mol_entries_unfiltered, 96 | species_report, 97 | mol_pictures_folder_name='mol_pictures_unfiltered', 98 | rebuild_mol_pictures=generate_unfiltered_mol_pictures 99 | ) 100 | 101 | report_generator.emit_text("species report") 102 | 103 | log_message("applying local filters") 104 | mol_entries_filtered = [] 105 | 106 | # note: it is important here that we are applying the local filters before 107 | # the non local ones. We remove some molecules which are lower energy 108 | # than other more realistic lithomers. 109 | 110 | for i, mol in enumerate(mol_entries_unfiltered): 111 | log_message("filtering " + mol.entry_id) 112 | decision_pathway = [] 113 | if run_decision_tree(mol, species_decision_tree, decision_pathway): 114 | mol_entries_filtered.append(mol) 115 | 116 | if run_decision_tree(mol, species_logging_decision_tree): 117 | 118 | report_generator.emit_verbatim( 119 | '\n'.join([str(f) for f in decision_pathway])) 120 | 121 | report_generator.emit_text("number: " + str(i)) 122 | report_generator.emit_text("entry id: " + mol.entry_id) 123 | report_generator.emit_text("uncorrected free energy: " + 124 | str(mol.free_energy)) 125 | 126 | report_generator.emit_text( 127 | "number of coordination bonds: " + 128 | str(mol.number_of_coordination_bonds)) 129 | 130 | report_generator.emit_text( 131 | "corrected free energy: " + 132 | str(mol.solvation_free_energy)) 133 | 134 | report_generator.emit_text( 135 | "formula: " + mol.formula) 136 | 137 | report_generator.emit_molecule(i, include_index=False) 138 | report_generator.emit_newline() 139 | 140 | 141 | report_generator.finished() 142 | 143 | 144 | # python doesn't have shared memory. That means that every worker during 145 | # reaction filtering must maintain its own copy of the molecules. 146 | # for this reason, it is good to remove attributes that are only used 147 | # during species filtering. 148 | log_message("clearing unneeded attributes") 149 | for m in mol_entries_filtered: 150 | del m.partial_charges_resp 151 | del m.partial_charges_mulliken 152 | del m.partial_charges_nbo 153 | del m.partial_spins_nbo 154 | del m.atom_locations 155 | 156 | # currently, take lowest energy mol in each iso class 157 | log_message("applying non local filters") 158 | 159 | 160 | def collapse_isomorphism_group(g): 161 | lowest_energy_coordimer = min(g,key=coordimer_weight) 162 | return lowest_energy_coordimer 163 | 164 | 165 | mol_entries = [] 166 | 167 | for tag_group in sort_into_tags(mol_entries_filtered).values(): 168 | for iso_group in groupby(really_covalent_isomorphic, tag_group): 169 | mol_entries.append( 170 | collapse_isomorphism_group(iso_group)) 171 | 172 | 173 | log_message("assigning indices") 174 | 175 | for i, e in enumerate(mol_entries): 176 | e.ind = i 177 | 178 | 179 | log_message("creating molecule entry pickle") 180 | # ideally we would serialize mol_entries to a json 181 | # some of the auxilary_data we compute 182 | # has frozen set keys, so doesn't seralize well into json format. 183 | # pickles work better in this setting 184 | with open(mol_entries_pickle_location, 'wb') as f: 185 | pickle.dump(mol_entries, f) 186 | 187 | log_message("species filtering finished. " + 188 | str(len(mol_entries)) + 189 | " species") 190 | 191 | return mol_entries 192 | -------------------------------------------------------------------------------- /HiPRGen/species_questions.py: -------------------------------------------------------------------------------- 1 | from HiPRGen.mol_entry import MoleculeEntry, FragmentComplex 2 | import networkx as nx 3 | from networkx.algorithms.graph_hashing import weisfeiler_lehman_graph_hash 4 | import copy 5 | from functools import partial 6 | from HiPRGen.constants import li_ec, Terminal, mg_g2, mg_thf, m_formulas, metals 7 | import numpy as np 8 | from monty.json import MSONable 9 | from itertools import combinations 10 | 11 | """ 12 | species decision tree: 13 | 14 | A question is a function q(mol_entry) -> Bool 15 | 16 | Unlike for reaction filtering, these questions should not modify the mol_entry in any way. 17 | 18 | A node is either a Terminal or a non empty list [(question, node)] 19 | 20 | class Terminal(Enum): 21 | KEEP = 1 22 | DISCARD = -1 23 | 24 | For the return value of a question, True means travel to this node and False means try next question in the list. 25 | 26 | for non terminal nodes, it is an error if every question returns False. i.e getting stuck at a non terminal node is an error. 27 | 28 | Once a Terminal node is reached, it tells us whether to keep or discard the species. 29 | """ 30 | 31 | def run_decision_tree(mol_entry, 32 | decision_tree, 33 | decision_pathway=None): 34 | 35 | node = decision_tree 36 | 37 | while type(node) == list: 38 | next_node = None 39 | for (question, new_node) in node: 40 | if question(mol_entry): 41 | 42 | # if decision_pathway is a list, 43 | # append the question which 44 | # answered true i.e the edge we follow 45 | if decision_pathway is not None: 46 | decision_pathway.append(question) 47 | 48 | 49 | next_node = new_node 50 | break 51 | 52 | node = next_node 53 | 54 | 55 | if type(node) == Terminal: 56 | if decision_pathway is not None: 57 | decision_pathway.append(node) 58 | 59 | 60 | if node == Terminal.KEEP: 61 | return True 62 | else: 63 | return False 64 | else: 65 | print(node) 66 | raise Exception("unexpected node type reached") 67 | 68 | 69 | class metal_ion_filter(MSONable): 70 | "only allow positively charged metal ions" 71 | def __init__(self): 72 | pass 73 | 74 | def __call__(self, mol_entry): 75 | if mol_entry.formula in m_formulas and mol_entry.charge <= 0: 76 | return True 77 | else: 78 | return False 79 | 80 | class mol_not_connected(MSONable): 81 | def __init__(self): 82 | pass 83 | 84 | def __call__(self, mol): 85 | return not nx.is_connected(mol.graph) 86 | 87 | class spin_multiplicity_filter(MSONable): 88 | def __init__(self, threshold): 89 | self.threshold = threshold 90 | 91 | def __call__(self, mol): 92 | if (mol.spin_multiplicity == 2): 93 | num_partial_spins_above_threshold = 0 94 | for i in range(mol.num_atoms): 95 | if mol.partial_spins_nbo[i] > self.threshold: 96 | num_partial_spins_above_threshold += 1 97 | 98 | if num_partial_spins_above_threshold >= 2: 99 | mol.penalty += 1 100 | 101 | return False 102 | 103 | class positive_penalty(MSONable): 104 | def __init__(self): 105 | pass 106 | 107 | def __call__(self, mol): 108 | if mol.penalty > 0: 109 | return True 110 | else: 111 | return False 112 | 113 | class add_star_hashes(MSONable): 114 | def __init__(self): 115 | pass 116 | 117 | def __call__(self, mol): 118 | for i in range(mol.num_atoms): 119 | if i not in mol.m_inds: 120 | neighborhood = nx.generators.ego.ego_graph( 121 | mol.covalent_graph, 122 | i, 123 | 1, 124 | undirected=True) 125 | 126 | mol.star_hashes[i] = weisfeiler_lehman_graph_hash( 127 | neighborhood, 128 | node_attr='specie') 129 | 130 | return False 131 | 132 | class add_unbroken_fragment(MSONable): 133 | def __init__(self): 134 | pass 135 | 136 | def __call__(self, mol): 137 | if mol.formula in m_formulas: 138 | return False 139 | 140 | fragment_complex = FragmentComplex( 141 | 1, 142 | 0, 143 | [], 144 | [mol.covalent_hash]) 145 | 146 | mol.fragment_data.append(fragment_complex) 147 | 148 | return False 149 | 150 | class add_single_bond_fragments(MSONable): 151 | 152 | def __init__(self): 153 | pass 154 | 155 | def __call__(self, mol): 156 | 157 | if mol.formula in m_formulas: 158 | return False 159 | 160 | 161 | 162 | for edge in mol.covalent_graph.edges: 163 | fragments = [] 164 | h = copy.deepcopy(mol.covalent_graph) 165 | h.remove_edge(*edge) 166 | connected_components = nx.algorithms.components.connected_components(h) 167 | for c in connected_components: 168 | 169 | subgraph = h.subgraph(c) 170 | 171 | fragment_hash = weisfeiler_lehman_graph_hash( 172 | subgraph, 173 | node_attr='specie') 174 | 175 | 176 | fragments.append(fragment_hash) 177 | 178 | fragment_complex = FragmentComplex( 179 | len(fragments), 180 | 1, 181 | [edge[0:2]], 182 | fragments) 183 | 184 | mol.fragment_data.append(fragment_complex) 185 | 186 | return False 187 | 188 | class has_covalent_ring(MSONable): 189 | def __init__(self): 190 | pass 191 | 192 | def __call__(self, mol): 193 | # if mol is a metal, mol.covalent_graph is empty 194 | if mol.formula in m_formulas: 195 | mol.has_covalent_ring = False 196 | else: 197 | mol.has_covalent_ring = not nx.is_tree(mol.covalent_graph) 198 | 199 | if mol.has_covalent_ring: 200 | mol.ring_fragment_data = [] 201 | 202 | return mol.has_covalent_ring 203 | 204 | 205 | class covalent_ring_fragments(MSONable): 206 | def __init__(self): 207 | pass 208 | 209 | def __call__(self, mol): 210 | # maps edge to graph with that edge removed 211 | ring_edges = {} 212 | 213 | for edge in mol.covalent_graph.edges: 214 | h = copy.deepcopy(mol.covalent_graph) 215 | h.remove_edge(*edge) 216 | if nx.is_connected(h): 217 | ring_edges[edge] = { 218 | 'modified_graph' : h, 219 | 'node_set' : set([edge[0],edge[1]]) 220 | } 221 | 222 | 223 | for ring_edge_1, ring_edge_2 in combinations(ring_edges,2): 224 | 225 | if ring_edges[ring_edge_1]['node_set'].isdisjoint( 226 | ring_edges[ring_edge_2]['node_set']): 227 | 228 | 229 | potential_edges = [ (ring_edge_1[0], ring_edge_2[0],0), 230 | (ring_edge_1[0], ring_edge_2[1],0), 231 | (ring_edge_1[1], ring_edge_2[0],0), 232 | (ring_edge_1[1], ring_edge_2[1],0) ] 233 | 234 | one_bond_away = False 235 | for ring_edge_3 in ring_edges: 236 | if ring_edge_3 in potential_edges: 237 | one_bond_away = True 238 | 239 | if one_bond_away: 240 | h = copy.deepcopy(ring_edges[ring_edge_1]['modified_graph']) 241 | h.remove_edge(*ring_edge_2) 242 | if nx.is_connected(h): 243 | continue 244 | else: 245 | fragments = [] 246 | connected_components = nx.algorithms.components.connected_components(h) 247 | for c in connected_components: 248 | 249 | subgraph = h.subgraph(c) 250 | 251 | fragment_hash = weisfeiler_lehman_graph_hash( 252 | subgraph, 253 | node_attr='specie') 254 | 255 | 256 | fragments.append(fragment_hash) 257 | 258 | fragment_complex = FragmentComplex( 259 | len(fragments), 260 | 2, 261 | [ring_edge_1[0:2], ring_edge_2[0:2]], 262 | fragments) 263 | 264 | mol.ring_fragment_data.append(fragment_complex) 265 | 266 | return False 267 | 268 | 269 | class metal_complex(MSONable): 270 | def __init__(self): 271 | pass 272 | 273 | def __call__(self, mol): 274 | # if mol is a metal, it isn't a metal complex 275 | if mol.formula in m_formulas: 276 | return False 277 | 278 | return not nx.is_connected(mol.covalent_graph) 279 | 280 | 281 | class fix_hydrogen_bonding(MSONable): 282 | def __init__(self): 283 | pass 284 | 285 | def __call__(self, mol): 286 | if mol.num_atoms > 1: 287 | for i in range(mol.num_atoms): 288 | if mol.species[i] == 'H': 289 | 290 | adjacent_atoms = [] 291 | 292 | for bond in mol.graph.edges: 293 | if i in bond[0:2]: 294 | 295 | if i == bond[0]: 296 | adjacent_atom = bond[1] 297 | else: 298 | adjacent_atom = bond[0] 299 | 300 | displacement = (mol.atom_locations[adjacent_atom] - 301 | mol.atom_locations[i]) 302 | 303 | dist = np.inner(displacement, displacement) 304 | 305 | adjacent_atoms.append((adjacent_atom, dist)) 306 | 307 | 308 | closest_atom, _ = min(adjacent_atoms, key=lambda pair: pair[1]) 309 | 310 | for adjacent_atom, _ in adjacent_atoms: 311 | if adjacent_atom != closest_atom: 312 | mol.graph.remove_edge(i, adjacent_atom) 313 | if adjacent_atom in mol.covalent_graph: 314 | mol.covalent_graph.remove_edge(i, adjacent_atom) 315 | 316 | 317 | 318 | return False 319 | 320 | 321 | class bad_metal_coordination(MSONable): 322 | def __init__(self): 323 | pass 324 | 325 | def __call__(self, mol): 326 | 327 | if mol.formula not in m_formulas: 328 | 329 | if (len(metals.intersection(set(mol.species))) > 0 and 330 | mol.number_of_coordination_bonds == 0): 331 | 332 | return True 333 | 334 | return False 335 | 336 | 337 | class set_solvation_free_energy(MSONable): 338 | """ 339 | metal atoms coordinate with the surrounding solvent. We need to correct 340 | free energy to take this into account. The correction is 341 | solvation_correction * ( 342 | max_coodination_bonds - 343 | number_of_coordination_bonds_in_mol). 344 | Since coordination bonding can't reliably be detected from the molecule 345 | graph, we search for all atoms within a radius of the metal atom and 346 | discard them if they are positively charged. 347 | """ 348 | 349 | def __init__(self, solvation_env): 350 | self.solvation_env = solvation_env 351 | 352 | def __call__(self, mol): 353 | correction = 0.0 354 | mol.number_of_coordination_bonds = 0 355 | 356 | for i in mol.m_inds: 357 | 358 | species = mol.species[i] 359 | partial_charge = mol.partial_charges_nbo[i] 360 | 361 | if partial_charge < 1.2: 362 | effective_charge = "_1" 363 | elif partial_charge >= 1.2: 364 | effective_charge = "_2" 365 | 366 | coordination_partners = list() 367 | species_charge = species + effective_charge 368 | radius = self.solvation_env["coordination_radius"][species_charge] 369 | 370 | for j in range(mol.num_atoms): 371 | if j != i: 372 | displacement_vector = ( 373 | mol.atom_locations[j] - 374 | mol.atom_locations[i]) 375 | if (np.inner(displacement_vector, displacement_vector) 376 | < radius ** 2 and ( 377 | mol.partial_charges_resp[j] < 0 or 378 | mol.partial_charges_mulliken[j] < 0 or 379 | mol.partial_charges_nbo[j] < 0)): 380 | if not mol.graph.has_edge(i,j): 381 | mol.graph.add_edge(i,j) 382 | coordination_partners.append(j) 383 | 384 | number_of_coordination_bonds = len(coordination_partners) 385 | mol.number_of_coordination_bonds += number_of_coordination_bonds 386 | correction += self.solvation_env[ 387 | "solvation_correction"][species_charge] * ( 388 | self.solvation_env[ 389 | "max_number_of_coordination_bonds"][species_charge] - 390 | number_of_coordination_bonds) 391 | 392 | mol.solvation_free_energy = correction + mol.free_energy 393 | return False 394 | 395 | 396 | class species_default_true(MSONable): 397 | def __init__(self): 398 | pass 399 | 400 | def __call__(self, mol): 401 | return True 402 | 403 | 404 | def compute_graph_hashes(mol): 405 | mol.total_hash = weisfeiler_lehman_graph_hash( 406 | mol.graph, 407 | node_attr='specie') 408 | 409 | mol.covalent_hash = weisfeiler_lehman_graph_hash( 410 | mol.covalent_graph, 411 | node_attr='specie') 412 | 413 | return False 414 | 415 | 416 | class neutral_metal_filter(MSONable): 417 | def __init__(self, cutoff): 418 | self.cutoff = cutoff 419 | 420 | def __call__(self, mol): 421 | 422 | for i in mol.m_inds: 423 | if (mol.species[i] in metals and 424 | mol.partial_charges_nbo[i] < self.cutoff): 425 | return True 426 | 427 | return False 428 | 429 | class charge_too_big(MSONable): 430 | def __init__(self): 431 | pass 432 | 433 | def __call__(self, mol): 434 | if mol.charge > 1 or mol.charge < -1: 435 | return True 436 | 437 | else: 438 | return False 439 | 440 | # any species filter which modifies bonding has to come before 441 | # any filter checking for connectivity (which includes the metal-centric complex filter) 442 | 443 | li_species_decision_tree = [ 444 | (fix_hydrogen_bonding(), Terminal.KEEP), 445 | (set_solvation_free_energy(li_ec), Terminal.KEEP), 446 | (charge_too_big(), Terminal.DISCARD), 447 | (neutral_metal_filter(0.1), Terminal.DISCARD), 448 | (compute_graph_hashes, Terminal.KEEP), 449 | (metal_ion_filter(), Terminal.DISCARD), 450 | (bad_metal_coordination(), Terminal.DISCARD), 451 | (mol_not_connected(), Terminal.DISCARD), 452 | (metal_complex(), Terminal.DISCARD), 453 | (spin_multiplicity_filter(0.4), Terminal.DISCARD), 454 | (add_star_hashes(), Terminal.KEEP), 455 | (add_unbroken_fragment(), Terminal.KEEP), 456 | (add_single_bond_fragments(), Terminal.KEEP), 457 | # (has_covalent_ring(), [ 458 | # (covalent_ring_fragments(), Terminal.KEEP), 459 | # (species_default_true(), Terminal.KEEP) 460 | # ]), 461 | (species_default_true(), Terminal.KEEP) 462 | ] 463 | 464 | mg_species_decision_tree = [ 465 | (fix_hydrogen_bonding(), Terminal.KEEP), 466 | (set_solvation_free_energy(mg_g2), Terminal.KEEP), 467 | (neutral_metal_filter(0.5), Terminal.DISCARD), 468 | (compute_graph_hashes, Terminal.KEEP), 469 | (metal_ion_filter(), Terminal.DISCARD), 470 | (bad_metal_coordination(), Terminal.DISCARD), 471 | (mol_not_connected(), Terminal.DISCARD), 472 | (metal_complex(), Terminal.DISCARD), 473 | (add_star_hashes(), Terminal.KEEP), 474 | (add_unbroken_fragment(), Terminal.KEEP), 475 | (add_single_bond_fragments(), Terminal.KEEP), 476 | (species_default_true(), Terminal.KEEP) 477 | ] 478 | 479 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 2 | 3 | (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 4 | 5 | (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 6 | 7 | (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 10 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 11 | 12 | You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such Enhancements or derivative works thereof, in binary and source code form. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![logo](./logo_dark.png#gh-dark-mode-only) 2 | ![logo](./logo.png#gh-light-mode-only) 3 | 4 | HiPRGen (**Hi**_gh_ **P**_erformance_ **R**_eaction_ **Gen**_eration_) is a python module for constructing reaction networks via exhaustive reaction enumeration and filtering decision trees with the capacity to be applied to systems with hundreds of billions of possible reactions. HiPRGen is built on top of [MPI4py](https://mpi4py.readthedocs.io/en/stable/) which facilitates multi-node parallelism. 5 | 6 | ### Installation 7 | 8 | HiPRGen depends on `pymatgen`, `openbabel`, `pygraphviz`, `pycairo` and `mpi4py`. In our experience, the Conda version of MPI4py does not work consistently, so we use the [nix package manager](https://nixos.org/) to get HiPRGen running on a wide range of systems. Instructions for installing nix can be found [here](https://nixos.org/download.html). 9 | 10 | The whole process looks like this: 11 | ``` 12 | # The first step requires sudo to create the directory /nix as root. 13 | # Run the NixOS install script below and follow the prompts. 14 | # Note: On Linux, instructions for uninstalling nix can be found with a quick 15 | # web search. On MacOS, uninstalling can be accompished with this script: 16 | # https://gist.github.com/expelledboy/c00aebb004b178cf78b2c9b344526ff6 17 | 18 | sh <(curl -L https://nixos.org/nix/install) --daemon 19 | 20 | # If you have an M1 Mac, you also need to force nix to use x86 binaries 21 | # since some of our dependencies don't have native arm binaries. 22 | # Uncomment and run the following two lines if you have an M1 Mac: 23 | 24 | # mkdir -p ~/.config/nix 25 | # echo "system = x86_64-darwin" > ~/.config/nix/nix.conf 26 | 27 | 28 | # Close your existing terminal and open a new one, then run: 29 | 30 | git clone https://github.com/BlauGroup/HiPRGen 31 | cd HiPRGen 32 | nix-shell 33 | ``` 34 | 35 | HiPRGen is supported for MacOS and Linux and has been tested on MacOS 11.6 and 12.0.1 as well as Ubuntu 21.10. Installation should take less than five minutes. 36 | 37 | 38 | ### Running on the LRC cluster 39 | 40 | On the LRC cluster, an environment where HiPRGen can be run is set up as follows: 41 | 42 | ``` 43 | module load anaconda3/2024.02-1-11.4 44 | conda init 45 | 46 | logout, log back in 47 | 48 | module load gcc/11.4.0 49 | module load openmpi/4.1.6 50 | 51 | pip3 install --user mpi4py 52 | conda create -n HiPRGen_RNMC python=3.8 53 | conda activate HiPRGen_RNMC 54 | conda install -c conda-forge openbabel pygraphviz pycairo 55 | pip install pymatgen==2023.3.10 56 | pip install pydantic==V1.10.12 57 | 58 | cd $PROJ 59 | git clone https://github.com/BlauGroup/RNMC.git 60 | cd RNMC 61 | module load gsl 62 | CXX=g++ make 63 | export PATH=$PATH:$PROJ/RNMC/GMC 64 | 65 | 66 | can pick up from reloading the environment: 67 | 68 | conda activate HiPRGen_RNMC 69 | module load gcc/11.4.0 70 | module load openmpi/4.1.6 71 | module load gsl 72 | export PATH=$PATH:$PROJ/RNMC/GMC 73 | ``` 74 | 75 | ### Tests 76 | 77 | Once you are in an environment where HiPRGen is installed, the tests can be run with `python test.py 4`. This will run the tests using 4 threads, though you could use as many threads as your machine allows to speed up the execution. Running the tests will populate working directories in `scratch`. Note that `test.py` is heavily commented to explain how to use HiPRGen. With at least 4 threads, the tests should take less than five minutes to run. Along with a variety of other information, the following lines will be printed to standard output to confirm that the tests have passed: 78 | 79 | ``` 80 | mg_test: correct number of species 81 | mg_test: correct number of reactions 82 | li_test: correct number of species 83 | li_test: correct number of reactions 84 | ``` 85 | 86 | Once the tests have finished, you can run `python -i repl.py` and inspect the `network_loader` object, which contains all of the data associated with the test Lithium / Ethylene Carbonate network after running 1000 trajectories. Additionally, HiPRGen has a report generation system for visualizing results. For example, in `scratch/li_test`, run `pdflatex LEDC_pathways.tex` to generate a PDF of the top pathways to Lithium Ethylene Dicarbonate (LEDC) in the test Lithium / Ethylene Carbonate network. Explanation of other types of reports and the commands to generate them are given in `test.py`. 87 | 88 | 89 | ### Design 90 | 91 | - Species filtering: This phase loads a JSON generated from our database, constructs molecule entries, filters them by isomorphism, and then runs each molecule through a handcrafted decision tree in `species_questions.py`. The resulting list is then pickled for loading in other phases. The reason we use pickle here instead of JSON is that some of the species questions append non-trivial data structures to the molecule entries which get mangled when serialized to JSON. 92 | 93 | - Bucketing: Now we loop through pairs (A,B) where A and B are molecules in the saved pickle and group them by atom counts. These groups are stored in a bucket database. 94 | 95 | - Reaction filtering + network generation: This is where MPI is used. The program launches a dispatcher process and many filter processes. The filter processes request buckets from the dispatcher, generate all possible reactions from each bucket, run those reactions through a decision tree from `reaction_questions.py`, and then sends the reactions which pass the decision tree back to the dispatcher as they are generated. The dispatcher writes the reactions sent back from the filter processes into the reaction network database. 96 | 97 | - Simulation: Once the reaction network database has been generated, it is provided as an input to [RNMC](https://github.com/BlauGroup/RNMC) which runs simulations and writes them into the reaction network database. This is much more well-suited to Lustre filesystems than an approach involving writing each trajectory to an independent file. 98 | 99 | - Analysis: HiPRGen also has important primitives for useful analysis. The ReportGenerator class in `report_generator.py` facilitates the construction of a variety of useful PDFs via functions in `mc_analysis.py`, and the NetworkLoader class in `network_loader.py` allows for straightforward interrogation of the network and trajectories while abstracting away the fact that they are stored in a sqlite db. 100 | 101 | The network loader is a great place to start using the codebase and is run as follows: 102 | 103 | ``` 104 | # run from the root directory of HiPRGen after running the tests 105 | from HiPRGen.network_loader import * 106 | 107 | network_loader = NetworkLoader( 108 | './scratch/li_test/rn.sqlite', 109 | './scratch/li_test/mol_entries.pickle', 110 | './scratch/li_test/initial_state.sqlite', 111 | ) 112 | ``` 113 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | (import 2 | ( 3 | let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in 4 | fetchTarball { 5 | url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz"; 6 | sha256 = lock.nodes.flake-compat.locked.narHash; 7 | } 8 | ) 9 | { src = ./.; } 10 | ).defaultNix 11 | -------------------------------------------------------------------------------- /figures/HiPRGen_schematic.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 30 | 35 | 36 | 45 | 50 | 51 | 60 | 65 | 66 | 75 | 80 | 81 | 82 | 102 | 104 | 105 | 107 | image/svg+xml 108 | 110 | 111 | 112 | 113 | 114 | 118 | 126 | species filtering 142 | 150 | speciesbucketing 166 | 174 | 182 | 190 | reactionfiltering 206 | 210 | 214 | 218 | 222 | molecules froma database 238 | reaction networkdatabase 254 | each molecule is run through a decision tree and undesired moleculesare removed 275 | Generate a sequence of buckets in which we group the pairs (A) and (A,B) according to atomic composition. Reactants and productsfor a reaction must be taken from a single bucket 306 | For each bucket, generate each possible reactionand run those reactions through a decision tree.Undesirable reactions are removed. The reactionswhich survive are written into a reaction network database. 337 | 338 | 339 | -------------------------------------------------------------------------------- /figures/reaction_decision_tree.svg: -------------------------------------------------------------------------------- 1 | 2 | 18 | 20 | 28 | 33 | 34 | 43 | 48 | 49 | 58 | 63 | 64 | 73 | 78 | 79 | 88 | 93 | 94 | 103 | 108 | 109 | 118 | 123 | 124 | 132 | 137 | 138 | 147 | 152 | 153 | 159 | 164 | 168 | 173 | 176 | 179 | 182 | 183 | 184 | 185 | 206 | 208 | 209 | 211 | image/svg+xml 212 | 214 | 215 | 216 | 217 | 218 | 222 | 228 | 233 | 238 | 243 | 248 | 253 | 259 | 265 | 271 | 277 | 283 | 288 | 293 | 298 | 304 | 310 | 316 | 322 | 327 | 331 | 336 | 340 | 345 | 350 | 355 | 360 | 371 | dG abovethreshold 387 | is redox 398 | star count 409 | fragmentmatching 425 | default true 436 | dchargetoo large 452 | too many mols 468 | default true 479 | not isomorphic 490 | 495 | 499 | 500 | 501 | -------------------------------------------------------------------------------- /figures/species_decision_tree.svg: -------------------------------------------------------------------------------- 1 | 2 | 18 | 20 | 28 | 33 | 34 | 42 | 47 | 48 | 57 | 62 | 63 | 72 | 77 | 78 | 87 | 92 | 93 | 102 | 107 | 108 | 116 | 121 | 122 | 131 | 136 | 137 | 145 | 150 | 151 | 159 | 164 | 165 | 171 | 176 | 180 | 185 | 188 | 191 | 194 | 195 | 196 | 197 | 218 | 220 | 221 | 223 | image/svg+xml 224 | 226 | 227 | 228 | 229 | 230 | 234 | 240 | 246 | 257 | 263 | 269 | 275 | 281 | 287 | 292 | mol notconnected 308 | metalcomplex 324 | 329 | bad Licoord 345 | 350 | fix Hbonding 366 | 371 | 376 | add fraghashes 392 | 397 | 402 | defaulttrue 418 | add starhashes 434 | 439 | 444 | 450 | 455 | 460 | 466 | 471 | 476 | 481 | 482 | 483 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "RNMC": { 4 | "inputs": { 5 | "flake-compat": "flake-compat", 6 | "nixpkgs": "nixpkgs" 7 | }, 8 | "locked": { 9 | "lastModified": 1662497946, 10 | "narHash": "sha256-z+6rs+ZKzEzuh5Hg//4GhbVa99Jurg49BMeAFgDvBTw=", 11 | "owner": "BlauGroup", 12 | "repo": "RNMC", 13 | "rev": "4130a62df7cadf6dde473dbaa0eb9ca893c1026c", 14 | "type": "github" 15 | }, 16 | "original": { 17 | "owner": "BlauGroup", 18 | "repo": "RNMC", 19 | "type": "github" 20 | } 21 | }, 22 | "flake-compat": { 23 | "flake": false, 24 | "locked": { 25 | "lastModified": 1650374568, 26 | "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=", 27 | "owner": "edolstra", 28 | "repo": "flake-compat", 29 | "rev": "b4a34015c698c7793d592d66adbab377907a2be8", 30 | "type": "github" 31 | }, 32 | "original": { 33 | "owner": "edolstra", 34 | "repo": "flake-compat", 35 | "type": "github" 36 | } 37 | }, 38 | "flake-compat_2": { 39 | "flake": false, 40 | "locked": { 41 | "lastModified": 1650374568, 42 | "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=", 43 | "owner": "edolstra", 44 | "repo": "flake-compat", 45 | "rev": "b4a34015c698c7793d592d66adbab377907a2be8", 46 | "type": "github" 47 | }, 48 | "original": { 49 | "owner": "edolstra", 50 | "repo": "flake-compat", 51 | "type": "github" 52 | } 53 | }, 54 | "nixpkgs": { 55 | "locked": { 56 | "lastModified": 1659446231, 57 | "narHash": "sha256-hekabNdTdgR/iLsgce5TGWmfIDZ86qjPhxDg/8TlzhE=", 58 | "owner": "NixOS", 59 | "repo": "nixpkgs", 60 | "rev": "eabc38219184cc3e04a974fe31857d8e0eac098d", 61 | "type": "github" 62 | }, 63 | "original": { 64 | "owner": "NixOS", 65 | "ref": "nixos-21.11", 66 | "repo": "nixpkgs", 67 | "type": "github" 68 | } 69 | }, 70 | "nixpkgs_2": { 71 | "locked": { 72 | "lastModified": 1659914493, 73 | "narHash": "sha256-lkA5X3VNMKirvA+SUzvEhfA7XquWLci+CGi505YFAIs=", 74 | "owner": "NixOS", 75 | "repo": "nixpkgs", 76 | "rev": "022caabb5f2265ad4006c1fa5b1ebe69fb0c3faf", 77 | "type": "github" 78 | }, 79 | "original": { 80 | "owner": "NixOS", 81 | "ref": "nixos-21.05", 82 | "repo": "nixpkgs", 83 | "type": "github" 84 | } 85 | }, 86 | "root": { 87 | "inputs": { 88 | "RNMC": "RNMC", 89 | "flake-compat": "flake-compat_2", 90 | "nixpkgs": "nixpkgs_2" 91 | } 92 | } 93 | }, 94 | "root": "root", 95 | "version": 7 96 | } 97 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Multi node reaction network generator"; 3 | 4 | inputs = { 5 | nixpkgs.url = github:NixOS/nixpkgs/nixos-21.05; 6 | RNMC.url = github:BlauGroup/RNMC; 7 | flake-compat = { 8 | url = github:edolstra/flake-compat; 9 | flake = false; 10 | }; 11 | }; 12 | 13 | outputs = { self, nixpkgs, RNMC, flake-compat }: 14 | 15 | let 16 | 17 | HiPRGen = systemString: 18 | with import nixpkgs { system = systemString; }; 19 | with python38Packages; 20 | buildPythonPackage { 21 | pname = "HiPRGen"; 22 | version = "0.2"; 23 | src = ./.; 24 | checkInputs = [ 25 | pymatgen 26 | monty 27 | openbabel-bindings 28 | pygraphviz 29 | mpi4py 30 | pycairo 31 | mpi 32 | (builtins.getAttr systemString RNMC.defaultPackage) 33 | sqlite 34 | openssh # needed for correct MPI functioning 35 | ]; 36 | 37 | checkPhase = "python test.py 2"; 38 | }; 39 | 40 | 41 | genericDevShell = systemString: installHiPRGen: 42 | with import nixpkgs { system = systemString; }; 43 | mkShell { 44 | buildInputs = with python38Packages; [ 45 | pymatgen 46 | monty 47 | openbabel-bindings 48 | pygraphviz 49 | pyright 50 | mpi4py 51 | pycairo 52 | (if installHiPRGen then (HiPRGen systemString) else null) 53 | texlive.combined.scheme-small 54 | mpi 55 | (sqlite.override { interactive = true; }) 56 | (builtins.getAttr systemString RNMC.defaultPackage) 57 | ]; 58 | }; 59 | 60 | in { 61 | devShell = { 62 | x86_64-linux = genericDevShell "x86_64-linux" false; 63 | x86_64-darwin = genericDevShell "x86_64-darwin" false; 64 | }; 65 | 66 | defaultPackage = { 67 | x86_64-linux = HiPRGen "x86_64-linux"; 68 | x86_64-darwin = HiPRGen "x86_64-darwin"; 69 | }; 70 | 71 | checks = { 72 | x86_64-linux.tests = HiPRGen "x86_64-linux"; 73 | x86_64-darwin.tests = HiPRGen "x86_64-darwin"; 74 | }; 75 | }; 76 | 77 | } 78 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlauGroup/HiPRGen/a0dddfedc21be0121745e5f33f27ad8aafe796ea/logo.png -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 30 | 35 | 36 | 44 | 49 | 50 | 59 | 64 | 65 | 73 | 78 | 79 | 87 | 92 | 93 | 101 | 106 | 107 | 115 | 120 | 121 | 129 | 134 | 135 | 143 | 148 | 149 | 157 | 162 | 163 | 164 | 187 | 191 | 192 | 194 | 195 | 197 | image/svg+xml 198 | 200 | 201 | 202 | 203 | 204 | 208 | HiPRGen 219 | 228 | 233 | 238 | 243 | 247 | 251 | 256 | 261 | 266 | 267 | 268 | -------------------------------------------------------------------------------- /logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlauGroup/HiPRGen/a0dddfedc21be0121745e5f33f27ad8aafe796ea/logo_dark.png -------------------------------------------------------------------------------- /repl.py: -------------------------------------------------------------------------------- 1 | from HiPRGen.network_loader import * 2 | from HiPRGen.species_questions import * 3 | 4 | network_loader = NetworkLoader( 5 | './scratch/li_test/rn.sqlite', 6 | './scratch/li_test/mol_entries.pickle', 7 | './scratch/li_test/initial_state.sqlite', 8 | ) 9 | 10 | ec = network_loader.mol_entries[160] 11 | 12 | -------------------------------------------------------------------------------- /run_network_generation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pickle 3 | from mpi4py import MPI 4 | from monty.serialization import loadfn 5 | 6 | from HiPRGen.reaction_filter import ( 7 | dispatcher, 8 | worker, 9 | DISPATCHER_RANK 10 | ) 11 | 12 | 13 | # python run_network_generation.py mol_entries_pickle_file dispatcher_payload.json worker_payload.json 14 | 15 | 16 | comm = MPI.COMM_WORLD 17 | rank = comm.Get_rank() 18 | 19 | mol_entries_pickle_file = sys.argv[1] 20 | dispatcher_payload_json = sys.argv[2] 21 | worker_payload_json = sys.argv[3] 22 | 23 | with open(mol_entries_pickle_file, 'rb') as f: 24 | mol_entries = pickle.load(f) 25 | 26 | 27 | 28 | if rank == DISPATCHER_RANK: 29 | dispatcher_payload = loadfn(dispatcher_payload_json) 30 | dispatcher(mol_entries, 31 | dispatcher_payload 32 | ) 33 | 34 | else: 35 | worker_payload = loadfn(worker_payload_json) 36 | worker(mol_entries, 37 | worker_payload 38 | ) 39 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='HiPRGen', 4 | version='0.1', 5 | description='HiPRGen', 6 | url='https://github.com/BlauGroup/HiPRGen', 7 | author='Daniel Barter', 8 | author_email='danielbarter@gmail.com', 9 | license='LBNL', 10 | packages=['HiPRGen'] 11 | ) 12 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | (import 2 | ( 3 | let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in 4 | fetchTarball { 5 | url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz"; 6 | sha256 = lock.nodes.flake-compat.locked.narHash; 7 | } 8 | ) 9 | { src = ./.; } 10 | ).shellNix 11 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | import sqlite3 5 | import pickle 6 | 7 | 8 | from HiPRGen.network_loader import NetworkLoader 9 | from HiPRGen.initial_state import find_mol_entry_from_xyz_and_charge 10 | from monty.serialization import loadfn, dumpfn 11 | from HiPRGen.species_filter import species_filter 12 | from HiPRGen.bucketing import bucket 13 | from HiPRGen.report_generator import ReportGenerator 14 | from HiPRGen.initial_state import insert_initial_state 15 | from HiPRGen.constants import ROOM_TEMP, Terminal 16 | from HiPRGen.reaction_filter_payloads import ( 17 | DispatcherPayload, 18 | WorkerPayload 19 | ) 20 | 21 | from HiPRGen.species_questions import ( 22 | mg_species_decision_tree, 23 | li_species_decision_tree, 24 | positive_penalty, 25 | species_default_true 26 | ) 27 | 28 | from HiPRGen.reaction_questions import ( 29 | default_reaction_decision_tree, 30 | 31 | ) 32 | 33 | from HiPRGen.mc_analysis import ( 34 | reaction_tally_report, 35 | species_report, 36 | Pathfinding, 37 | SimulationReplayer, 38 | generate_pathway_report, 39 | sink_report, 40 | consumption_report, 41 | redox_report, 42 | coordination_report, 43 | decoordination_report 44 | ) 45 | 46 | # Since HiPRGen uses an end-to-end testing approach rather than testing 47 | # each individual function, we have decided to use the tests as 48 | # documentation, by explaining every single line through the first test. 49 | 50 | 51 | # The first thing you need to consider when using HiPRGen is how many 52 | # worker threads do you want to run. HiPRGen can be run with a single 53 | # thread or thousands distrubuted across several nodes. For reaction 54 | # networks with between ~5000 and ~10000 species, we have found that the 55 | # optimal number of worker threads is between 1000 and 2000. If you try 56 | # and use more than that, the worker threads are going to spend lots of 57 | # time waiting for the dispatcher to get through all of the reactions it 58 | # is being sent, which slows everything down. Fixing this would require 59 | # a more complex distrubuted system, but it hasn't been an issue even 60 | # for very large reaction networks. 61 | if len(sys.argv) != 2: 62 | print("usage: python test.py number_of_threads") 63 | quit() 64 | 65 | 66 | number_of_threads = sys.argv[1] 67 | 68 | class bcolors: 69 | PASS = '\u001b[32;1m' 70 | FAIL = '\u001b[31;1m' 71 | ENDC = '\u001b[0m' 72 | 73 | 74 | # HiPRGen is organized as a pipeline, where all the relevent data is 75 | # stored in a sqlite database between phases. For this reason, during 76 | # a single run of the full pipeline, it makes sense to store all the 77 | # relevent files in a single directory. We have two test sets, a lithium 78 | # set and a magnesium set. Since the lithium test set is older, we shall 79 | # document that instead of the mg test set. 80 | 81 | if os.path.isdir('./scratch'): 82 | subprocess.run(['rm', '-r', './scratch']) 83 | 84 | subprocess.run(['mkdir', './scratch']) 85 | 86 | 87 | 88 | def li_test(): 89 | 90 | 91 | # folder is the where we store all our intermediate databases 92 | folder = './scratch/li_test' 93 | subprocess.run(['mkdir', folder ]) 94 | 95 | # The initial input to the pipeline is a list of LIBE or MADEIRA 96 | # dataset entries. We provide two examples in the data foloder. 97 | mol_json = './data/ronald_LIBE.json' 98 | database_entries = loadfn(mol_json) 99 | # The first step of the HiPRGen pipeline is passing the input molecules 100 | # through the species decision tree to discard molecules. 101 | species_decision_tree = li_species_decision_tree 102 | 103 | 104 | # There is one non-local part of species filtering: we consider two 105 | # molecules to be equivalent if they have the same total charge, 106 | # composition, and covalent bonds, even if they have different metal 107 | # coordination, and we choose one such molecule in each "coordimer" 108 | # class using the coodimer weight function. Since most of our logging 109 | # later on is defined in terms of a fixed molecule set, logging for 110 | # the species filtering phase is messy, so ignore the species_report 111 | # argument for now. The second argument is where we store a pickle of 112 | # the filtered molecule entries for use in later phases. 113 | 114 | mol_entries = species_filter( 115 | database_entries, 116 | mol_entries_pickle_location=folder + '/mol_entries.pickle', 117 | species_report=folder + '/unfiltered_species_report.tex', 118 | species_decision_tree=species_decision_tree, 119 | coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy), 120 | ) 121 | 122 | 123 | # Once we have generated our molecule list, we generate the bucket database 124 | # which is how we break up the reaction filtering amongst all avaliable workers. 125 | # It gets stored in the buckets.sqlite database. 126 | bucket(mol_entries, folder + '/buckets.sqlite') 127 | 128 | 129 | # Reaction filtering is paralellized using MPI, so we need to spawn 130 | # an MPI instance to run it. This is why we can't just start 131 | # reaction filtering by calling a python function. We pass the 132 | # reaction decision tree, the logging decision tree, and the electron 133 | # free energy as strings across this barrier. Every possible 134 | # reaction gets passed through both the reaction decision tree and 135 | # the logging decision tree. If a reaction passes the reaction 136 | # decision tree, it gets written to the network. If a reaction 137 | # passes the logging decision tree, it gets logged to the reaction 138 | # report along with what happened to it in reaction_decision_tree. 139 | 140 | # The reaction decision trees are constructed in 141 | # HiPRGen.reaction_questions 142 | 143 | params = { 144 | 'temperature' : ROOM_TEMP, 145 | 'electron_free_energy' : -1.4 146 | } 147 | 148 | dispatcher_payload = DispatcherPayload( 149 | folder + '/buckets.sqlite', 150 | folder + '/rn.sqlite', 151 | folder + '/reaction_report.tex' 152 | ) 153 | 154 | worker_payload = WorkerPayload( 155 | folder + '/buckets.sqlite', 156 | default_reaction_decision_tree, 157 | params, 158 | Terminal.DISCARD 159 | ) 160 | 161 | 162 | # The dispatcher and worker payloads are passed through the MPI barrier 163 | # as JSON blobs dispatcher_payload and worker_payload 164 | dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json') 165 | dumpfn(worker_payload, folder + '/worker_payload.json') 166 | 167 | subprocess.run( 168 | [ 169 | 'mpirun', 170 | '--use-hwthread-cpus', 171 | '-n', 172 | number_of_threads, 173 | 'python', 174 | 'run_network_generation.py', 175 | folder + '/mol_entries.pickle', 176 | folder + '/dispatcher_payload.json', 177 | folder + '/worker_payload.json' 178 | ] 179 | ) 180 | 181 | # After we have generated the mol_entries, we refer to molecules by 182 | # their index. The function find_mol_entry_from_xyz_and_charge can 183 | # help find the indices of specific species to be used in the initial 184 | # condition for propagating trajectories and/or trajectory analysis. 185 | Li_plus_id = find_mol_entry_from_xyz_and_charge( 186 | mol_entries, 187 | './xyz_files/Li.xyz', 188 | 1) 189 | 190 | EC_id = find_mol_entry_from_xyz_and_charge( 191 | mol_entries, 192 | './xyz_files/EC.xyz', 193 | 0) 194 | 195 | LEDC_id = find_mol_entry_from_xyz_and_charge( 196 | mol_entries, 197 | './xyz_files/LEDC.xyz', 198 | 0) 199 | 200 | 201 | # After generating a reaction network, it is stored in rn.sqlite. We 202 | # use Monte Carlo simulation to interrogate the network, and for that 203 | # we need to define an initial condition. 204 | initial_state = { 205 | Li_plus_id : 30, 206 | EC_id : 30 207 | } 208 | 209 | # The initial state and the trajectories (after simulation) are stored in 210 | # a seperate database from the network, here called initial_state.sqlite. 211 | # This facilitates running multiple independent simulations of the same 212 | # network with different initial conditions at the same time, if desired. 213 | insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite') 214 | 215 | 216 | # GMC is a high performance reaction network Monte Carlo simulator using the 217 | # Gillespie algorithm: https://github.com/BlauGroup/RNMC. Here we run 1000 218 | # trajectories each of 200 steps. 219 | subprocess.run([ 220 | 'GMC', 221 | '--reaction_database=' + folder + '/rn.sqlite', 222 | '--initial_state_database=' + folder + '/initial_state.sqlite', 223 | '--number_of_simulations=1000', 224 | '--base_seed=1000', 225 | '--thread_count=' + number_of_threads, 226 | '--step_cutoff=200' 227 | ]) 228 | 229 | # The network loader builds a python object around a reaction network 230 | # and the molecules to make it easier to use them. 231 | network_loader = NetworkLoader( 232 | folder + '/rn.sqlite', 233 | folder + '/mol_entries.pickle', 234 | folder + '/initial_state.sqlite' 235 | ) 236 | 237 | network_loader.load_trajectories() 238 | network_loader.load_initial_state() 239 | 240 | 241 | 242 | # HiPRGen has analysis tools to understand what happened in our simulation. 243 | # The output files are written into the same folder in which the reaction 244 | # network is stored. 245 | 246 | # This report is empty, but we use it to generate the molecule pictures. 247 | # This is an expensive operation, so we only want do do it once. 248 | report_generator = ReportGenerator( 249 | network_loader.mol_entries, 250 | folder + '/dummy.tex', 251 | rebuild_mol_pictures=True) 252 | 253 | 254 | # The tally report shows reactions sorted by the number of times fired. 255 | reaction_tally_report( 256 | network_loader, 257 | folder + '/reaction_tally.tex' 258 | ) 259 | # Run `pdflatex reaction_tally.tex` in `scratch/li_test` to generate 260 | # the tally report PDF. 261 | 262 | 263 | # The species report shows every specie in the network and their IDs. 264 | species_report(network_loader, folder + '/species_report.tex') 265 | # Run `pdflatex species_report.tex` in `scratch/li_test` to generate 266 | # the species report PDF. 267 | 268 | 269 | # Pathfinding is a central goal of HiPRGen / GMC. See mc_analysis.py for 270 | # further documentation of the Pathfinding class. 271 | pathfinding = Pathfinding(network_loader) 272 | 273 | 274 | # The pathway report shows all the ways that a target species was 275 | # produced in the simulation trajectories, where each simulation only 276 | # contributes the shortest path responsible for the first formation 277 | # of the target species to the report. The report can be sorted by 278 | # pathway frequency, but instead here we sort by pathway cost. Note 279 | # that the test network has ~5000 reactions while production networks 280 | # have between 50-100 million reactions. 281 | generate_pathway_report( 282 | pathfinding, 283 | LEDC_id, 284 | folder + '/LEDC_pathways.tex', 285 | sort_by_frequency=False 286 | ) 287 | # Run `pdflatex LEDC_pathways.tex` in `scratch/li_test` to generate 288 | # the LEDC pathway report PDF. 289 | 290 | 291 | # The simulation replayer sweeps through all trajectories in order 292 | # to extract additional information that is used for consumption 293 | # reports and sink reports. 294 | simulation_replayer = SimulationReplayer(network_loader) 295 | 296 | 297 | # The consumption report shows reactions which consumed a target 298 | # species, sorted by the number of times the reaction fired. 299 | consumption_report(simulation_replayer, 300 | LEDC_id, 301 | folder + '/LEDC_consumption_report.tex') 302 | # Run `pdflatex LEDC_consumption_report.tex` in `scratch/li_test` 303 | # to generate the LEDC consumption report PDF. 304 | 305 | 306 | # The sink report shows species which have a production to 307 | # consumption ratio of greater than 3/2 and which have an expected 308 | # value above 0.1. These are two of the three heuristic criteria 309 | # that we use to identify network products. The third criteria is 310 | # that each network product must have a shortest path with cost 311 | # less than 10. This can be checked by generating pathway reports 312 | # to each species shown in the sink report. For the curious reader, 313 | # we note that generating pathway reports to the six species in the 314 | # sink report will show that only Li2CO3, C2H4, LiEDC-, and DLEMC 315 | # have sufficiently low-cost paths to pass the third criteria and 316 | # thus to be considered products of the test network used here. 317 | sink_report(simulation_replayer, folder + '/sink_report.tex') 318 | # Run `pdflatex sink_report.tex` in `scratch/li_test` to generate 319 | # the sink report PDF. 320 | 321 | 322 | 323 | tests_passed = True 324 | if network_loader.number_of_species == 190: 325 | print(bcolors.PASS + 326 | "li_test: correct number of species" + 327 | bcolors.ENDC) 328 | else: 329 | print(bcolors.FAIL + 330 | "li_test: correct number of species" + 331 | bcolors.ENDC) 332 | tests_passed = False 333 | 334 | 335 | 336 | if network_loader.number_of_reactions == 4921: 337 | print(bcolors.PASS + 338 | "li_test: correct number of reactions" + 339 | bcolors.ENDC) 340 | else: 341 | print(bcolors.FAIL + 342 | "li_test: correct number of reactions" + 343 | bcolors.ENDC) 344 | tests_passed = False 345 | 346 | return tests_passed 347 | 348 | 349 | def mg_test(): 350 | 351 | 352 | folder = './scratch/mg_test' 353 | subprocess.run(['mkdir', folder ]) 354 | 355 | mol_json = './data/sam_G2.json' 356 | species_decision_tree = mg_species_decision_tree 357 | 358 | database_entries = loadfn(mol_json) 359 | 360 | 361 | 362 | mol_entries = species_filter( 363 | database_entries, 364 | folder + '/mol_entries.pickle', 365 | folder + '/unfiltered_species_report.tex', 366 | species_decision_tree, 367 | coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy) 368 | ) 369 | 370 | 371 | 372 | bucket(mol_entries, folder + '/buckets.sqlite') 373 | 374 | 375 | dispatcher_payload = DispatcherPayload( 376 | folder + '/buckets.sqlite', 377 | folder + '/rn.sqlite', 378 | folder + '/reaction_report.tex' 379 | ) 380 | 381 | worker_payload = WorkerPayload( 382 | folder + '/buckets.sqlite', 383 | default_reaction_decision_tree, 384 | { 385 | 'temperature' : ROOM_TEMP, 386 | 'electron_free_energy' : -2.06 387 | }, 388 | Terminal.DISCARD 389 | ) 390 | 391 | 392 | dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json') 393 | dumpfn(worker_payload, folder + '/worker_payload.json') 394 | 395 | subprocess.run( 396 | [ 397 | 'mpiexec', 398 | '--use-hwthread-cpus', 399 | '-n', 400 | number_of_threads, 401 | 'python', 402 | 'run_network_generation.py', 403 | folder + '/mol_entries.pickle', 404 | folder + '/dispatcher_payload.json', 405 | folder + '/worker_payload.json' 406 | ] 407 | ) 408 | 409 | 410 | mg_g2_plus_plus_id = find_mol_entry_from_xyz_and_charge( 411 | mol_entries, 412 | './xyz_files/mgg2.xyz', 413 | 2) 414 | 415 | c2h4_id = find_mol_entry_from_xyz_and_charge( 416 | mol_entries, 417 | './xyz_files/c2h4.xyz', 418 | 0) 419 | 420 | c2h6_id = find_mol_entry_from_xyz_and_charge( 421 | mol_entries, 422 | './xyz_files/c2h6.xyz', 423 | 0) 424 | 425 | initial_state = { 426 | 33 : 30, 427 | 81 : 30 428 | } 429 | 430 | 431 | insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite') 432 | 433 | 434 | subprocess.run([ 435 | 'GMC', 436 | '--reaction_database=' + folder + '/rn.sqlite', 437 | '--initial_state_database=' + folder + '/initial_state.sqlite', 438 | '--number_of_simulations=1000', 439 | '--base_seed=1000', 440 | '--thread_count=' + number_of_threads, 441 | '--step_cutoff=200' 442 | ]) 443 | 444 | 445 | 446 | network_loader = NetworkLoader( 447 | folder + '/rn.sqlite', 448 | folder + '/mol_entries.pickle', 449 | folder + '/initial_state.sqlite' 450 | ) 451 | 452 | network_loader.load_trajectories() 453 | network_loader.load_initial_state() 454 | 455 | 456 | 457 | report_generator = ReportGenerator( 458 | network_loader.mol_entries, 459 | folder + '/dummy.tex', 460 | rebuild_mol_pictures=True) 461 | 462 | reaction_tally_report( 463 | network_loader, 464 | folder + '/reaction_tally.tex' 465 | ) 466 | 467 | pathfinding = Pathfinding(network_loader) 468 | 469 | generate_pathway_report( 470 | pathfinding, 471 | c2h6_id, 472 | folder + '/C2H6_pathways.tex', 473 | sort_by_frequency=False 474 | ) 475 | 476 | generate_pathway_report( 477 | pathfinding, 478 | c2h4_id, 479 | folder + '/C2H4_pathways.tex', 480 | sort_by_frequency=False 481 | ) 482 | 483 | 484 | 485 | species_report(network_loader, folder + '/species_report.tex') 486 | 487 | tests_passed = True 488 | if network_loader.number_of_species == 83: 489 | print(bcolors.PASS + 490 | "mg_test: correct number of species" + 491 | bcolors.ENDC) 492 | else: 493 | print(bcolors.FAIL + 494 | "mg_test: correct number of species" + 495 | bcolors.ENDC) 496 | tests_passed = False 497 | 498 | 499 | 500 | if network_loader.number_of_reactions == 788: 501 | print(bcolors.PASS + 502 | "mg_test: correct number of reactions" + 503 | bcolors.ENDC) 504 | else: 505 | print(bcolors.FAIL + 506 | "mg_test: correct number of reactions" + 507 | bcolors.ENDC) 508 | tests_passed = False 509 | 510 | return tests_passed 511 | 512 | 513 | def flicho_test(): 514 | 515 | 516 | folder = './scratch/flicho_test' 517 | subprocess.run(['mkdir', folder ]) 518 | 519 | mol_json = './data/flicho_test.json' 520 | database_entries = loadfn(mol_json) 521 | species_decision_tree = li_species_decision_tree 522 | 523 | 524 | mol_entries = species_filter( 525 | database_entries, 526 | mol_entries_pickle_location=folder + '/mol_entries.pickle', 527 | species_report=folder + '/unfiltered_species_report.tex', 528 | species_decision_tree=species_decision_tree, 529 | coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy), 530 | ) 531 | 532 | 533 | bucket(mol_entries, folder + '/buckets.sqlite') 534 | 535 | params = { 536 | 'temperature' : ROOM_TEMP, 537 | 'electron_free_energy' : -1.4 538 | } 539 | 540 | dispatcher_payload = DispatcherPayload( 541 | folder + '/buckets.sqlite', 542 | folder + '/rn.sqlite', 543 | folder + '/reaction_report.tex' 544 | ) 545 | 546 | worker_payload = WorkerPayload( 547 | folder + '/buckets.sqlite', 548 | default_reaction_decision_tree, 549 | params, 550 | Terminal.DISCARD 551 | ) 552 | 553 | 554 | dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json') 555 | dumpfn(worker_payload, folder + '/worker_payload.json') 556 | 557 | subprocess.run( 558 | [ 559 | 'mpirun', 560 | '--use-hwthread-cpus', 561 | '-n', 562 | number_of_threads, 563 | 'python', 564 | 'run_network_generation.py', 565 | folder + '/mol_entries.pickle', 566 | folder + '/dispatcher_payload.json', 567 | folder + '/worker_payload.json' 568 | ] 569 | ) 570 | 571 | Li_plus_id = find_mol_entry_from_xyz_and_charge( 572 | mol_entries, 573 | './xyz_files/Li.xyz', 574 | 1) 575 | 576 | EC_id = find_mol_entry_from_xyz_and_charge( 577 | mol_entries, 578 | './xyz_files/EC.xyz', 579 | 0) 580 | 581 | initial_state = { 582 | Li_plus_id : 30, 583 | EC_id : 30 584 | } 585 | 586 | insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite') 587 | 588 | 589 | subprocess.run([ 590 | 'GMC', 591 | '--reaction_database=' + folder + '/rn.sqlite', 592 | '--initial_state_database=' + folder + '/initial_state.sqlite', 593 | '--number_of_simulations=1000', 594 | '--base_seed=1000', 595 | '--thread_count=' + number_of_threads, 596 | '--step_cutoff=200' 597 | ]) 598 | 599 | network_loader = NetworkLoader( 600 | folder + '/rn.sqlite', 601 | folder + '/mol_entries.pickle', 602 | folder + '/initial_state.sqlite' 603 | ) 604 | 605 | network_loader.load_trajectories() 606 | network_loader.load_initial_state() 607 | 608 | report_generator = ReportGenerator( 609 | network_loader.mol_entries, 610 | folder + '/dummy.tex', 611 | rebuild_mol_pictures=True) 612 | 613 | coordination_report( 614 | network_loader, 615 | folder + '/coodination_report.tex', 616 | 'Li1', 617 | 1) 618 | 619 | decoordination_report( 620 | network_loader, 621 | folder + '/decoodination_report.tex', 622 | 'Li1', 623 | 1) 624 | 625 | 626 | tests = [ 627 | mg_test, 628 | li_test, 629 | # flicho_test 630 | ] 631 | 632 | for test in tests: 633 | if not test(): 634 | exit(1) 635 | -------------------------------------------------------------------------------- /xyz_files/EC.xyz: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | O 0.302660 -1.163080 -0.394382 4 | C -0.686505 -0.520106 0.343041 5 | C 1.512867 -0.493809 -0.093939 6 | O -1.903519 -0.645190 0.011939 7 | O -0.250883 0.958755 0.295313 8 | C 1.112995 0.987830 -0.090866 9 | H 2.245948 -0.740544 -0.861775 10 | H 1.886705 -0.803461 0.888700 11 | H 1.209475 1.427764 -1.089357 12 | H 1.696777 1.577418 0.621041 -------------------------------------------------------------------------------- /xyz_files/EMC.xyz: -------------------------------------------------------------------------------- 1 | 15 2 | H8 C4 O3 3 | O 0.663253 -0.458691 -0.000071 4 | O -1.470026 -0.780853 -0.000115 5 | O -0.701591 1.344432 -0.000727 6 | C 1.816688 0.416703 -0.000306 7 | C 3.048419 -0.455543 -0.000119 8 | C -0.517921 0.149803 -0.000318 9 | C -2.816792 -0.277655 -0.000423 10 | H 1.771371 1.051801 -0.888046 11 | H 1.771403 1.052229 0.887131 12 | H 3.936849 0.181457 -0.000297 13 | H 3.081074 -1.090123 0.889135 14 | H 3.081029 -1.090559 -0.889062 15 | H -3.000950 0.320857 0.893103 16 | H -3.457001 -1.158047 -0.000275 17 | H -3.000716 0.320389 -0.894309 -------------------------------------------------------------------------------- /xyz_files/LEDC.xyz: -------------------------------------------------------------------------------- 1 | 16 2 | 3 | O -2.255868 2.650457 0.012551 4 | C -1.809275 1.473031 0.015686 5 | O -2.557967 0.452047 0.026218 6 | O -0.474738 1.328948 0.007241 7 | C 0.021386 -0.012379 0.010851 8 | O 2.027099 -1.232759 0.003431 9 | C 3.361634 -1.376843 -0.005202 10 | C 1.530975 0.108567 -0.000226 11 | O 4.110324 -0.355861 -0.015950 12 | O 3.808227 -2.554269 -0.002085 13 | Li -3.876543 1.762975 0.028402 14 | Li 5.428900 -1.666789 -0.018264 15 | H -0.336046 -0.551480 -0.869288 16 | H -0.323794 -0.542177 0.901467 17 | H 1.876161 0.638342 -0.890852 18 | H 1.888400 0.647692 0.879903 -------------------------------------------------------------------------------- /xyz_files/Li.xyz: -------------------------------------------------------------------------------- 1 | 1 2 | 3 | Li -0.0 0.0 0.0 -------------------------------------------------------------------------------- /xyz_files/bh4.xyz: -------------------------------------------------------------------------------- 1 | 6 2 | Mg1 B1 H4 3 | B -1.631105 1.932915 -0.945766 4 | Mg -0.083010 0.900879 -0.063538 5 | H -0.726964 2.699089 -0.546295 6 | H -1.099810 1.124588 -1.738164 7 | H -2.524664 2.530521 -1.454051 8 | H -2.015696 1.273997 0.044884 9 | -------------------------------------------------------------------------------- /xyz_files/c2h4.xyz: -------------------------------------------------------------------------------- 1 | 6 2 | 3 | C -3.58659 1.25899 0.00000 4 | C -2.57512 2.12413 0.00000 5 | H -4.61135 1.61604 0.00000 6 | H -3.39269 0.19127 0.00000 7 | H -2.76902 3.19185 0.00000 8 | H -1.55036 1.76708 0.00000 9 | 10 | -------------------------------------------------------------------------------- /xyz_files/c2h6.xyz: -------------------------------------------------------------------------------- 1 | 8 2 | 3 | C -2.70568 3.13768 -0.10765 4 | C -1.27855 2.82617 0.30963 5 | H -3.31660 2.21084 -0.08117 6 | H -3.14664 3.88380 0.58648 7 | H -0.83759 2.08005 -0.38450 8 | H -0.66763 3.75302 0.28314 9 | H -1.26913 2.41380 1.34055 10 | H -2.71510 3.55006 -1.13858 11 | 12 | -------------------------------------------------------------------------------- /xyz_files/co.xyz: -------------------------------------------------------------------------------- 1 | 2 2 | C1 O1 3 | C 1.569086 -0.155515 0.000000 4 | O 1.931000 -1.221679 0.000000 5 | -------------------------------------------------------------------------------- /xyz_files/fec.xyz: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | O 0.29622 -1.26484 0.00459 4 | C -0.71392 -0.39195 0.05900 5 | C 1.47337 -0.56226 -0.09741 6 | O -1.88221 -0.73026 0.15091 7 | O -0.32657 0.88553 0.00354 8 | C 1.04401 0.91938 -0.09488 9 | H 1.98305 -0.82798 -1.04841 10 | H 2.12811 -0.79969 0.76859 11 | H 1.34411 1.42831 -1.03596 12 | F 1.57240 1.58508 0.99721 13 | -------------------------------------------------------------------------------- /xyz_files/h.xyz: -------------------------------------------------------------------------------- 1 | 1 2 | 3 | H 0.00000 0.00000 0.00000 4 | 5 | -------------------------------------------------------------------------------- /xyz_files/h2.xyz: -------------------------------------------------------------------------------- 1 | 2 2 | H2 3 | H -4.597931 4.247288 0.000000 4 | H -3.913329 4.539862 0.000000 5 | -------------------------------------------------------------------------------- /xyz_files/h2o.xyz: -------------------------------------------------------------------------------- 1 | 3 2 | 3 | O 4.05339 -0.01560 -3.14170 4 | H 3.59828 -0.71554 -3.67421 5 | H 3.42328 0.18415 -2.40436 6 | 7 | -------------------------------------------------------------------------------- /xyz_files/lemc.xyz: -------------------------------------------------------------------------------- 1 | 13 2 | Li1 H5 C3 O4 3 | C -0.040178 0.049609 0.056991 4 | C 1.464019 0.129795 -0.096416 5 | O 1.939947 -1.220713 -0.140556 6 | C 3.268548 -1.390984 -0.283742 7 | O 4.031683 -0.391403 -0.372522 8 | O 3.677600 -2.581216 -0.321202 9 | O -0.519156 1.390344 0.101596 10 | Li 5.369383 -1.742058 -0.509851 11 | H -0.293172 -0.487928 0.978851 12 | H -0.472876 -0.493287 -0.792022 13 | H 1.909895 0.657731 0.750360 14 | H 1.729926 0.652426 -1.018663 15 | H -1.475441 1.363691 0.199752 16 | 17 | -------------------------------------------------------------------------------- /xyz_files/li2co3_0.xyz: -------------------------------------------------------------------------------- 1 | 6 2 | 3 | O 0.60173 0.50763 -0.37574 4 | C -0.56879 0.05546 -0.58675 5 | O -1.25942 0.59711 -1.59229 6 | O -1.13674 -0.86917 0.07778 7 | Li -2.57436 -0.60812 -1.07444 8 | Li 0.26670 1.63265 -1.81952 9 | -------------------------------------------------------------------------------- /xyz_files/lico3-.xyz: -------------------------------------------------------------------------------- 1 | 5 2 | 3 | O 0.60173 0.50763 -0.37574 4 | C -0.56879 0.05546 -0.58675 5 | O -1.25942 0.59711 -1.59229 6 | O -1.13674 -0.86917 0.07778 7 | Li -2.57436 -0.60812 -1.07444 8 | -------------------------------------------------------------------------------- /xyz_files/mg_tfsi.xyz: -------------------------------------------------------------------------------- 1 | 16 2 | 3 | Mg -1.07367 -0.05831 -0.10101 4 | N 2.42522 0.48257 0.28180 5 | S 1.97819 -1.01170 0.30131 6 | O 2.71976 -1.78463 1.23345 7 | O 0.52148 -1.19624 0.31622 8 | C 2.44746 -1.59970 -1.38265 9 | F 3.74089 -1.41324 -1.58481 10 | F 1.74977 -0.91312 -2.28951 11 | F 2.15807 -2.89078 -1.49214 12 | S 1.56877 1.75875 0.00380 13 | O 0.18542 1.46730 -0.38547 14 | O 2.25872 2.72199 -0.77824 15 | C 1.36941 2.47079 1.69180 16 | F 0.71941 1.58692 2.45436 17 | F 2.55070 2.73020 2.22546 18 | F 0.65515 3.58612 1.61328 19 | 20 | -------------------------------------------------------------------------------- /xyz_files/mgg2.xyz: -------------------------------------------------------------------------------- 1 | 24 2 | 3 | C -0.58619 2.06146 2.11605 4 | O 0.39894 1.41095 1.27517 5 | C 1.76771 1.64962 1.70374 6 | C 2.67452 1.22553 0.57269 7 | O 2.22129 -0.08666 0.16701 8 | C 2.79099 -0.59837 -1.05985 9 | C 1.98609 -1.82657 -1.41402 10 | O 0.58062 -1.46778 -1.31578 11 | C -0.30120 -2.44470 -1.92299 12 | Mg 0.22994 -0.19511 0.14700 13 | H -1.56956 1.79800 1.72742 14 | H -0.47741 1.70973 3.14381 15 | H -0.43928 3.14078 2.06004 16 | H 1.94890 1.07287 2.61481 17 | H 1.89565 2.71432 1.90845 18 | H 3.71161 1.17496 0.91037 19 | H 2.59464 1.90428 -0.28226 20 | H 3.83994 -0.85874 -0.90473 21 | H 2.70939 0.17232 -1.83268 22 | H 2.17859 -2.65638 -0.72838 23 | H 2.20312 -2.13484 -2.43863 24 | H -1.32297 -2.09719 -1.77255 25 | H -0.07983 -2.50391 -2.98935 26 | H -0.15570 -3.41490 -1.44401 27 | 28 | -------------------------------------------------------------------------------- /xyz_files/mgthf.xyz: -------------------------------------------------------------------------------- 1 | 14 2 | 3 | O -0.26913 1.16414 -0.37632 4 | C 0.21081 0.01062 0.29888 5 | C 1.72560 0.25603 0.40696 6 | C 2.05071 1.25072 -0.72485 7 | C 0.71039 1.93708 -1.05693 8 | Mg -1.97979 1.48155 -0.55902 9 | H -0.00259 -0.87752 -0.30209 10 | H -0.25524 -0.08886 1.28066 11 | H 2.28855 -0.67533 0.29742 12 | H 1.96179 0.70511 1.37597 13 | H 2.41692 0.71117 -1.60407 14 | H 2.80766 1.97676 -0.41289 15 | H 0.51358 1.92340 -2.13239 16 | H 0.68510 2.96650 -0.69382 17 | 18 | -------------------------------------------------------------------------------- /xyz_files/n2.xyz: -------------------------------------------------------------------------------- 1 | 2 2 | 3 | N -4.24957 0.48001 1.92033 4 | N -3.20506 0.88382 1.93918 5 | 6 | -------------------------------------------------------------------------------- /xyz_files/no.xyz: -------------------------------------------------------------------------------- 1 | 2 2 | 3 | N -7.70920 -0.43579 2.25458 4 | O -6.68552 -0.15968 2.16050 5 | 6 | -------------------------------------------------------------------------------- /xyz_files/oh.xyz: -------------------------------------------------------------------------------- 1 | 2 2 | 3 | O -3.13522 -2.01140 -2.32296 4 | H -2.47296 -1.55935 -1.74187 5 | 6 | --------------------------------------------------------------------------------