├── .github
└── workflows
│ └── linux_tests.yml
├── .gitignore
├── Dockerfiles
└── nersc_Dockerfile
├── HiPRGen
├── bucketing.py
├── constants.py
├── initial_state.py
├── logging.py
├── mc_analysis.py
├── mol_entry.py
├── network_loader.py
├── network_renderer.py
├── reaction_filter.py
├── reaction_filter_payloads.py
├── reaction_questions.py
├── report_generator.py
├── species_filter.py
└── species_questions.py
├── LICENSE
├── README.md
├── data
├── flicho_test.json
├── ronald_LIBE.json
└── sam_G2.json
├── default.nix
├── figures
├── HiPRGen_schematic.svg
├── reaction_decision_tree.svg
└── species_decision_tree.svg
├── flake.lock
├── flake.nix
├── logo.png
├── logo.svg
├── logo_dark.png
├── repl.py
├── run_network_generation.py
├── setup.py
├── shell.nix
├── test.py
└── xyz_files
├── EC.xyz
├── EMC.xyz
├── LEDC.xyz
├── Li.xyz
├── bh4.xyz
├── c2h4.xyz
├── c2h6.xyz
├── co.xyz
├── fec.xyz
├── h.xyz
├── h2.xyz
├── h2o.xyz
├── lemc.xyz
├── li2co3_0.xyz
├── lico3-.xyz
├── mg_tfsi.xyz
├── mgg2.xyz
├── mgthf.xyz
├── n2.xyz
├── no.xyz
└── oh.xyz
/.github/workflows/linux_tests.yml:
--------------------------------------------------------------------------------
1 | name: "linux tests"
2 | on:
3 | pull_request:
4 | push:
5 | branches:
6 | - main
7 | jobs:
8 | tests:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v2.4.0
12 | - uses: cachix/install-nix-action@v16
13 | with:
14 | nix_path: nixpkgs=channel:nixos-21.05
15 | - run: nix flake check
16 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .ipynb_checkpoints/
3 | scratch
--------------------------------------------------------------------------------
/Dockerfiles/nersc_Dockerfile:
--------------------------------------------------------------------------------
1 | FROM sleak75/conda-mpi4py-haswell:latest
2 | SHELL ["/bin/bash", "-c"]
3 | WORKDIR /app
4 |
5 | RUN conda install -c conda-forge pymatgen=2022.0.10 openbabel pygraphviz
6 |
7 | # do this to reduce image size:
8 | RUN conda clean -a
9 |
10 | RUN /sbin/ldconfig
--------------------------------------------------------------------------------
/HiPRGen/bucketing.py:
--------------------------------------------------------------------------------
1 | from HiPRGen.mol_entry import MoleculeEntry
2 | from itertools import combinations_with_replacement
3 | import sqlite3
4 |
5 | """
6 | Phase 2: bucketing pairs of species input: filtered list of species
7 | with fixed indices output: buckets labeled by atom count containing
8 | individual species and pairs of species description: since each
9 | reaction conserves atom numbers, a concerted reaction only occurs
10 | between elements in a single bucket. There are tricks to reduce the
11 | number of pairs (like don't include (A,B) and (B,A)). If the number of
12 | species is 10,000, there are only 100 million such pairs which is
13 | within reach
14 | """
15 |
16 |
17 |
18 | def bucket(
19 | mol_entries,
20 | bucket_db,
21 | commit_freq=2000,
22 | group_size=1000):
23 |
24 | con = sqlite3.connect(bucket_db)
25 | cur = con.cursor()
26 | cur.execute(
27 | "CREATE TABLE complexes (species_1, species_2, composition_id, group_id)")
28 |
29 | # we create an index on (composition, group_id) so worker processes
30 | # during reaction filtering can read their work batch faster
31 |
32 | cur.execute(
33 | "CREATE INDEX composition_index ON complexes (composition_id, group_id)")
34 |
35 | group_counts = {}
36 | bucket_counts = {}
37 | composition_ids = {}
38 | commit_count = 0
39 | composition_count = 0
40 |
41 | for m in mol_entries:
42 | composition = '_'.join(sorted(m.species))
43 |
44 | if composition not in group_counts:
45 | group_counts[composition] = 0
46 | bucket_counts[composition] = 0
47 | composition_ids[composition] = composition_count
48 | composition_count += 1
49 |
50 | data = (m.ind, -1, composition_ids[composition], group_counts[composition])
51 | cur.execute("INSERT INTO complexes VALUES (?, ?, ?, ?)", data)
52 |
53 | commit_count += 1
54 | if commit_count % commit_freq == 0:
55 | con.commit()
56 |
57 | bucket_counts[composition] += 1
58 | if bucket_counts[composition] % group_size == 0:
59 | group_counts[composition] += 1
60 |
61 |
62 | for (m1, m2) in combinations_with_replacement(mol_entries, 2):
63 | composition = '_'.join(sorted(m1.species + m2.species))
64 |
65 | if composition not in group_counts:
66 | group_counts[composition] = 0
67 | bucket_counts[composition] = 0
68 | composition_ids[composition] = composition_count
69 | composition_count += 1
70 |
71 |
72 | data = (
73 | m1.ind,
74 | m2.ind,
75 | composition_ids[composition],
76 | group_counts[composition])
77 |
78 | cur.execute("INSERT INTO complexes VALUES (?, ?, ?, ?)", data)
79 |
80 | commit_count += 1
81 | if commit_count % commit_freq == 0:
82 | con.commit()
83 |
84 | bucket_counts[composition] += 1
85 | if bucket_counts[composition] % group_size == 0:
86 | group_counts[composition] += 1
87 |
88 |
89 | con.execute("CREATE TABLE group_counts (composition_id, count)")
90 | con.execute("CREATE TABLE compositions (composition_id, composition)")
91 | for composition in composition_ids:
92 | cur.execute(
93 | "INSERT INTO group_counts VALUES (?, ?)",
94 | (composition_ids[composition],
95 | group_counts[composition] + 1))
96 |
97 | cur.execute(
98 | "INSERT INTO compositions VALUES (?,?)",
99 | ((composition_ids[composition],
100 | composition)))
101 |
102 |
103 |
104 | commit_count += 1
105 | if commit_count % commit_freq == 0:
106 | con.commit()
107 |
108 |
109 |
110 |
111 | con.commit()
112 | con.close()
113 |
114 |
115 |
--------------------------------------------------------------------------------
/HiPRGen/constants.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Copyright (c) MR.Net development team
3 |
4 |
5 | from enum import Enum
6 | from monty.json import MSONable
7 |
8 | # Basic constants
9 |
10 | # Room temperature (25 C) in Kelvin
11 | ROOM_TEMP = 298.15
12 |
13 | # Boltzmann constant in eV / K
14 | KB = 8.617333262 * 10 ** -5
15 |
16 | # Planck constant in eV * s
17 | PLANCK = 4.135667696 * 10 ** -15
18 |
19 | class Terminal(MSONable, Enum):
20 | KEEP = 1
21 | DISCARD = -1
22 |
23 | metals = frozenset(["Li", "Na", "K", "Mg", "Ca", "Zn", "Al"])
24 | m_formulas = frozenset([m + "1" for m in metals])
25 |
26 |
27 | # solvation environments
28 | li_ec = {
29 | "solvation_correction" : {
30 | "Li_1" : -0.68
31 | },
32 |
33 | "coordination_radius" : {
34 | "Li_1" : 2.4
35 | },
36 |
37 | "max_number_of_coordination_bonds" : {
38 | "Li_1" : 4
39 | }
40 | }
41 |
42 |
43 | mg_g2 = {
44 | "solvation_correction" : {
45 | "Mg_1": -0.56,
46 | "Mg_2": -1.49
47 | },
48 |
49 | "coordination_radius" : {
50 | "Mg_1": 2.4,
51 | "Mg_2": 2.4
52 | },
53 |
54 | "max_number_of_coordination_bonds" : {
55 | "Mg_1": 5,
56 | "Mg_2": 6
57 | }
58 | }
59 |
60 |
61 | mg_thf = {
62 | "solvation_correction" : {
63 | "Mg_1": -0.70,
64 | "Mg_2": -1.91
65 | },
66 |
67 | "coordination_radius" : {
68 | "Mg_1": 2.4,
69 | "Mg_2": 2.4
70 | },
71 |
72 | "max_number_of_coordination_bonds" : {
73 | "Mg_1": 5,
74 | "Mg_2": 6
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/HiPRGen/initial_state.py:
--------------------------------------------------------------------------------
1 | from pymatgen.core.structure import Molecule
2 | from pymatgen.analysis.graphs import MoleculeGraph
3 | from pymatgen.analysis.local_env import OpenBabelNN
4 | from pymatgen.analysis.fragmenter import metal_edge_extender
5 | import sqlite3
6 |
7 |
8 | def find_mol_entry_from_xyz_and_charge(mol_entries, xyz_file_path, charge):
9 | """
10 | given a file 'molecule.xyz', find the mol_entry corresponding to the
11 | molecule graph with given charge
12 | """
13 | target_mol_graph = MoleculeGraph.with_local_env_strategy(
14 | Molecule.from_file(xyz_file_path), OpenBabelNN()
15 | )
16 |
17 | # correction to the molecule graph
18 | target_mol_graph = metal_edge_extender(target_mol_graph)
19 |
20 | match = False
21 | index = -1
22 | while not match:
23 | index += 1
24 | mol_entry = mol_entries[index]
25 | species_mol_graph = mol_entry.mol_graph
26 |
27 | if mol_entry.charge == charge:
28 | match = target_mol_graph.isomorphic_to(species_mol_graph)
29 |
30 | if match:
31 | return mol_entry.ind
32 | else:
33 | return None
34 |
35 | def find_mol_entry_by_entry_id(mol_entries, entry_id):
36 | """
37 | given an entry_id, return the corresponding mol enentry index
38 | """
39 |
40 | for m in mol_entries:
41 | if m.entry_id == entry_id:
42 | return m.ind
43 |
44 | create_initial_state_table = """
45 | CREATE TABLE initial_state (
46 | species_id INTEGER NOT NULL PRIMARY KEY,
47 | count INTEGER NOT NULL
48 | );
49 | """
50 |
51 | create_trajectories_table = """
52 | CREATE TABLE trajectories (
53 | seed INTEGER NOT NULL,
54 | step INTEGER NOT NULL,
55 | reaction_id INTEGER NOT NULL,
56 | time REAL NOT NULL
57 | );
58 | """
59 |
60 | create_factors_table = """
61 | CREATE TABLE factors (
62 | factor_zero REAL NOT NULL,
63 | factor_two REAL NOT NULL,
64 | factor_duplicate REAL NOT NULL
65 | );
66 | """
67 |
68 |
69 | def insert_initial_state(
70 | initial_state,
71 | mol_entries,
72 | initial_state_db,
73 | factor_zero = 1.0,
74 | factor_two = 1.0,
75 | factor_duplicate = 0.5
76 | ):
77 | """
78 | initial state is a dict mapping species ids to counts.
79 | """
80 |
81 | rn_con = sqlite3.connect(initial_state_db)
82 | rn_cur = rn_con.cursor()
83 | rn_cur.execute(create_initial_state_table)
84 | rn_cur.execute(create_trajectories_table)
85 | rn_cur.execute(create_factors_table)
86 | rn_con.commit()
87 |
88 | rn_cur.execute(
89 | "INSERT INTO factors VALUES (?,?,?)",
90 | (factor_zero, factor_two, factor_duplicate))
91 |
92 | num_species = len(mol_entries)
93 |
94 |
95 | for i in range(num_species):
96 | rn_cur.execute(
97 | "INSERT INTO initial_state VALUES (?,?)",
98 | (i, initial_state.get(i,0)))
99 |
100 | rn_con.commit()
101 |
102 |
103 |
104 |
--------------------------------------------------------------------------------
/HiPRGen/logging.py:
--------------------------------------------------------------------------------
1 | from time import localtime, strftime
2 |
3 | def log_message(*args, **kwargs):
4 | print(
5 | '[' + strftime('%H:%M:%S', localtime()) + ']',
6 | *args, **kwargs)
7 |
--------------------------------------------------------------------------------
/HiPRGen/mol_entry.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from typing import Any, Dict, List, Optional, Tuple
3 |
4 | import networkx as nx
5 | import numpy as np
6 | from pymatgen.analysis.graphs import MoleculeGraph, MolGraphSplitError
7 | from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender
8 | from pymatgen.core.structure import Molecule
9 | from networkx.algorithms.graph_hashing import weisfeiler_lehman_graph_hash
10 | from HiPRGen.constants import ROOM_TEMP, metals
11 | from itertools import permutations, product
12 |
13 |
14 | class FragmentComplex:
15 |
16 | def __init__(
17 | self,
18 | number_of_fragments,
19 | number_of_bonds_broken,
20 | bonds_broken,
21 | fragment_hashes):
22 |
23 | self.number_of_fragments = number_of_fragments
24 | self.number_of_bonds_broken = number_of_bonds_broken
25 | self.bonds_broken = bonds_broken
26 | self.fragment_hashes = fragment_hashes
27 |
28 |
29 |
30 | class MoleculeEntry:
31 | """
32 | A molecule entry class to provide easy access to Molecule properties.
33 |
34 | Args:
35 | molecule: Molecule of interest.
36 | energy: Electronic energy of the molecule in Hartree.
37 | enthalpy: Enthalpy of the molecule (kcal/mol). Defaults to None.
38 | entropy: Entropy of the molecule (cal/mol.K). Defaults to None.
39 | entry_id: An optional id to uniquely identify the entry.
40 | mol_graph: MoleculeGraph of the molecule.
41 | """
42 |
43 | def __init__(
44 | self,
45 | molecule,
46 | energy,
47 | enthalpy,
48 | entropy,
49 | entry_id,
50 | mol_graph,
51 | partial_charges_resp,
52 | partial_charges_mulliken,
53 | partial_charges_nbo,
54 | electron_affinity,
55 | ionization_energy,
56 | spin_multiplicity,
57 | partial_spins_nbo
58 | ):
59 | self.energy = energy
60 | self.enthalpy = enthalpy
61 | self.entropy = entropy
62 | self.electron_affinity = electron_affinity
63 | self.ionization_energy = ionization_energy
64 | self.spin_multiplicity = spin_multiplicity
65 |
66 | self.ind = None
67 | self.entry_id = entry_id
68 |
69 | self.star_hashes = {}
70 | self.fragment_data = []
71 |
72 |
73 | if not mol_graph:
74 | mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN())
75 | self.mol_graph = metal_edge_extender(mol_graph)
76 | else:
77 | self.mol_graph = mol_graph
78 |
79 | self.partial_charges_resp = partial_charges_resp
80 | self.partial_charges_mulliken = partial_charges_mulliken
81 | self.partial_charges_nbo = partial_charges_nbo
82 | self.partial_spins_nbo = partial_spins_nbo
83 |
84 | self.molecule = self.mol_graph.molecule
85 | self.graph = self.mol_graph.graph.to_undirected()
86 | self.species = [str(s) for s in self.molecule.species]
87 |
88 | self.m_inds = [
89 | i for i, x in enumerate(self.species) if x in metals
90 | ]
91 |
92 | # penalty gets used in the non local part of species filtering.
93 | # certain species filters will increase penalty rather than explicitly filtering
94 | # out a molecule. The non local filtering step prioritizes mols with a lower
95 | # penalty.
96 | self.penalty = 0
97 | self.covalent_graph = copy.deepcopy(self.graph)
98 | self.covalent_graph.remove_nodes_from(self.m_inds)
99 |
100 |
101 | self.formula = self.molecule.composition.alphabetical_formula
102 | self.charge = self.molecule.charge
103 | self.num_atoms = len(self.molecule)
104 |
105 | self.atom_locations = [
106 | site.coords for site in self.molecule]
107 |
108 |
109 | self.free_energy = self.get_free_energy()
110 |
111 | self.non_metal_atoms = [
112 | i for i in range(self.num_atoms)
113 | if self.species[i] not in metals]
114 |
115 |
116 |
117 |
118 | @classmethod
119 | def from_dataset_entry(
120 | cls,
121 | doc: Dict,
122 | use_thermo: str = "raw",
123 | ):
124 | """
125 | Initialize a MoleculeEntry from a document in the LIBE (Lithium-Ion
126 | Battery Electrolyte) or MADEIRA (MAgnesium Dataset of Electrolyte and
127 | Interphase ReAgents) datasets.
128 |
129 | Args:
130 | doc: Dictionary representing an entry from LIBE or MADEIRA
131 | use_thermo: One of "raw" (meaning raw, uncorrected thermo data will
132 | be used), "rrho_shifted" (meaning that a slightly modified
133 | Rigid-Rotor Harmonic Oscillator approximation will be used -
134 | see Ribiero et al., J. Phys. Chem. B 2011, 115, 14556-14562), or
135 | "qrrho" (meaning that Grimme's Quasi-Rigid Rotor Harmonic
136 | Oscillator - see Grimme, Chem. Eur. J. 2012, 18, 9955-9964) will
137 | be used.
138 | """
139 |
140 | thermo = use_thermo.lower()
141 |
142 | if thermo not in ["raw", "rrho_shifted", "qrrho"]:
143 | raise ValueError(
144 | "Only allowed values for use_thermo are 'raw', 'rrho_shifted', "
145 | "and 'qrrho'!"
146 | )
147 | try:
148 | if isinstance(doc["molecule"], Molecule):
149 | molecule = doc["molecule"]
150 | else:
151 | molecule = Molecule.from_dict(doc["molecule"]) # type: ignore
152 |
153 | if (
154 | thermo == "rrho_shifted"
155 | and doc["thermo"]["shifted_rrho_eV"] is not None
156 | ):
157 | energy = (
158 | doc["thermo"]["shifted_rrho_eV"]["electronic_energy"] * 0.0367493
159 | )
160 | enthalpy = doc["thermo"]["shifted_rrho_eV"]["total_enthalpy"] * 23.061
161 | entropy = doc["thermo"]["shifted_rrho_eV"]["total_entropy"] * 23061
162 | elif thermo == "qrrho" and doc["thermo"]["quasi_rrho_eV"] is not None:
163 | energy = doc["thermo"]["quasi_rrho_eV"]["electronic_energy"] * 0.0367493
164 | enthalpy = doc["thermo"]["quasi_rrho_eV"]["total_enthalpy"] * 23.061
165 | entropy = doc["thermo"]["quasi_rrho_eV"]["total_entropy"] * 23061
166 | else:
167 | energy = doc["thermo"]["raw"]["electronic_energy_Ha"]
168 | enthalpy = doc["thermo"]["raw"]["total_enthalpy_kcal/mol"]
169 | entropy = doc["thermo"]["raw"]["total_entropy_cal/molK"]
170 |
171 | entry_id = doc["molecule_id"]
172 |
173 | if isinstance(doc["molecule_graph"], MoleculeGraph):
174 | mol_graph = doc["molecule_graph"]
175 | else:
176 | mol_graph = MoleculeGraph.from_dict(doc["molecule_graph"])
177 |
178 | partial_charges_resp = doc['partial_charges']['resp']
179 | partial_charges_mulliken = doc['partial_charges']['mulliken']
180 | spin_multiplicity = doc['spin_multiplicity']
181 |
182 |
183 | if doc['number_atoms'] == 1:
184 | partial_charges_nbo = doc['partial_charges']['mulliken']
185 | partial_spins_nbo = doc['partial_spins']['mulliken']
186 | else:
187 | partial_charges_nbo = doc['partial_charges']['nbo']
188 | partial_spins_nbo = doc['partial_spins']['nbo']
189 |
190 | electron_affinity_eV = None
191 | ionization_energy_eV = None
192 | if 'redox' in doc:
193 | if 'electron_affinity_eV' in doc['redox']:
194 | electron_affinity_eV = doc['redox']['electron_affinity_eV']
195 |
196 | if 'ionization_energy_eV' in doc['redox']:
197 | ionization_energy_eV = doc['redox']['ionization_energy_eV']
198 |
199 | except KeyError as e:
200 | raise Exception(
201 | "Unable to construct molecule entry from molecule document; missing "
202 | f"attribute {e} in `doc`."
203 | )
204 |
205 |
206 |
207 | return cls(
208 | molecule=molecule,
209 | energy=energy,
210 | enthalpy=enthalpy,
211 | entropy=entropy,
212 | entry_id=entry_id,
213 | mol_graph=mol_graph,
214 | partial_charges_resp=partial_charges_resp,
215 | partial_charges_mulliken=partial_charges_mulliken,
216 | partial_charges_nbo=partial_charges_nbo,
217 | electron_affinity=electron_affinity_eV,
218 | ionization_energy=ionization_energy_eV,
219 | spin_multiplicity=spin_multiplicity,
220 | partial_spins_nbo=partial_spins_nbo
221 | )
222 |
223 |
224 |
225 | def get_free_energy(self, temperature: float = ROOM_TEMP) -> Optional[float]:
226 | """
227 | Get the free energy at the give temperature.
228 | """
229 | if self.enthalpy is not None and self.entropy is not None:
230 | # TODO: fix these hard coded vals
231 | return (
232 | self.energy * 27.21139
233 | + 0.0433641 * self.enthalpy
234 | - temperature * self.entropy * 0.0000433641
235 | )
236 | else:
237 | return None
238 |
239 | def __repr__(self):
240 |
241 | output = [
242 | f"MoleculeEntry {self.entry_id} - {self.formula}",
243 | f"Total charge = {self.charge}",
244 | ]
245 |
246 | energies = [
247 | ("Energy", "Hartree", self.energy),
248 | ("Enthalpy", "kcal/mol", self.enthalpy),
249 | ("Entropy", "cal/mol.K", self.entropy),
250 | ("Free Energy (298.15 K)", "eV", self.get_free_energy()),
251 | ]
252 | for name, unit, value in energies:
253 | if value is None:
254 | output.append(f"{name} = {value} {unit}")
255 | else:
256 | output.append(f"{name} = {value:.4f} {unit}")
257 |
258 | if self.ind:
259 | output.append("index: {}".format(self.ind))
260 |
261 | return "\n".join(output)
262 |
263 | def __str__(self):
264 | return self.__repr__()
265 |
266 | def __eq__(self, other):
267 | if type(self) == type(other):
268 | return str(self) == str(other)
269 | else:
270 | return False
271 |
--------------------------------------------------------------------------------
/HiPRGen/network_loader.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | import pickle
3 | import numpy as np
4 |
5 | """
6 | class for dynamically loading a reaction network
7 | """
8 |
9 | sql_get_reaction = """
10 | SELECT * FROM reactions WHERE reaction_id = ?;
11 | """
12 |
13 | sql_get_reaction_range = """
14 | SELECT * FROM reactions WHERE ? <= reaction_id AND reaction_id < ?;
15 | """
16 |
17 | sql_get_redox = """
18 | SELECT * FROM reactions WHERE is_redox = 1;
19 | """
20 |
21 | def sql_get_coord(metal_id):
22 | return "SELECT * FROM reactions WHERE (number_of_reactants=2 AND number_of_products=1 AND (reactant_1={0} OR reactant_2={0})) ORDER BY dG DESC;".format(metal_id)
23 |
24 | def sql_get_decoord(metal_id):
25 | return "SELECT * FROM reactions WHERE (number_of_reactants=1 AND number_of_products=2 AND (product_1={0} OR product_2={0})) ORDER BY dG DESC;".format(metal_id)
26 |
27 |
28 | sql_get_trajectory = """
29 | SELECT * FROM trajectories;
30 | """
31 |
32 | sql_get_initial_state = """
33 | SELECT * FROM initial_state;
34 | """
35 |
36 |
37 |
38 | class NetworkLoader:
39 |
40 | def __init__(
41 | self,
42 | network_database,
43 | mol_entries_pickle,
44 | initial_state_database=None
45 | ):
46 |
47 |
48 | self.rn_con = sqlite3.connect(network_database)
49 |
50 | with open(mol_entries_pickle, 'rb') as f:
51 | self.mol_entries = pickle.load(f)
52 |
53 | cur = self.rn_con.cursor()
54 | metadata = list(cur.execute("SELECT * FROM metadata"))[0]
55 | self.number_of_species = metadata[0]
56 | self.number_of_reactions = metadata[1]
57 |
58 |
59 | if initial_state_database:
60 | self.initial_state_con = sqlite3.connect(initial_state_database)
61 |
62 | self.reactions = {}
63 |
64 | def get_all_redox_reactions(self):
65 | redox_reactions = []
66 | cur = self.rn_con.cursor()
67 | for res in cur.execute(sql_get_redox):
68 | reaction = {}
69 | reaction['number_of_reactants'] = res[1]
70 | reaction['number_of_products'] = res[2]
71 | reaction['reactants'] = res[3:5]
72 | reaction['products'] = res[5:7]
73 | reaction['rate'] = res[7]
74 | reaction['dG'] = res[8]
75 | reaction['dG_barrier'] = res[9]
76 | redox_reactions.append(reaction)
77 |
78 | return redox_reactions
79 |
80 |
81 | def get_all_coordination_reactions(self, metal_id):
82 | coordination_reactions = []
83 | cur = self.rn_con.cursor()
84 | for res in cur.execute(sql_get_coord(metal_id)):
85 | reaction = {}
86 | reaction['number_of_reactants'] = res[1]
87 | reaction['number_of_products'] = res[2]
88 | reaction['reactants'] = res[3:5]
89 | reaction['products'] = res[5:7]
90 | reaction['rate'] = res[7]
91 | reaction['dG'] = res[8]
92 | reaction['dG_barrier'] = res[9]
93 | coordination_reactions.append(reaction)
94 |
95 | return coordination_reactions
96 |
97 | def get_all_decoordination_reactions(self, metal_id):
98 | decoordination_reactions = []
99 | cur = self.rn_con.cursor()
100 | for res in cur.execute(sql_get_decoord(metal_id)):
101 | reaction = {}
102 | reaction['number_of_reactants'] = res[1]
103 | reaction['number_of_products'] = res[2]
104 | reaction['reactants'] = res[3:5]
105 | reaction['products'] = res[5:7]
106 | reaction['rate'] = res[7]
107 | reaction['dG'] = res[8]
108 | reaction['dG_barrier'] = res[9]
109 | decoordination_reactions.append(reaction)
110 |
111 | return decoordination_reactions
112 |
113 |
114 | def get_reactions_in_range(self, lower_bound, upper_bound):
115 | """
116 | get range of reactions from database but don't cache them
117 | """
118 | cur = self.rn_con.cursor()
119 | for res in cur.execute(sql_get_reaction_range,
120 | (lower_bound, upper_bound)):
121 | reaction = {}
122 | reaction['reaction_id'] = res[0]
123 | reaction['number_of_reactants'] = res[1]
124 | reaction['number_of_products'] = res[2]
125 | reaction['reactants'] = res[3:5]
126 | reaction['products'] = res[5:7]
127 | reaction['rate'] = res[7]
128 | reaction['dG'] = res[8]
129 | reaction['dG_barrier'] = res[9]
130 | yield reaction
131 |
132 |
133 | def index_to_reaction(self, reaction_index):
134 |
135 | """
136 | this method gets called a lot, so we cache the reactions to
137 | minimize database interaction
138 | """
139 |
140 | if reaction_index in self.reactions:
141 | return self.reactions[reaction_index]
142 |
143 | else:
144 | print("fetching data for reaction", reaction_index)
145 | cur = self.rn_con.cursor()
146 | res = list(
147 | cur.execute(sql_get_reaction, (reaction_index,))
148 | )[0]
149 | reaction = {}
150 | reaction['number_of_reactants'] = res[1]
151 | reaction['number_of_products'] = res[2]
152 | reaction['reactants'] = res[3:5]
153 | reaction['products'] = res[5:7]
154 | reaction['rate'] = res[7]
155 | reaction['dG'] = res[8]
156 | reaction['dG_barrier'] = res[9]
157 | self.reactions[reaction_index] = reaction
158 | return reaction
159 |
160 | def load_trajectories(self):
161 |
162 | cur = self.initial_state_con.cursor()
163 |
164 | # trajectories[seed][step] = (reaction_id, time)
165 | trajectories = {}
166 | for row in cur.execute(sql_get_trajectory):
167 | seed = row[0]
168 | step = row[1]
169 | reaction_id = row[2]
170 | time = row[3]
171 |
172 | if seed not in trajectories:
173 | trajectories[seed] = {}
174 |
175 | trajectories[seed][step] = (reaction_id, time)
176 |
177 | self.trajectories = trajectories
178 |
179 |
180 | def load_initial_state(self):
181 |
182 | cur = self.initial_state_con.cursor()
183 | initial_state_dict = {}
184 |
185 | for row in cur.execute(sql_get_initial_state):
186 | initial_state_dict[row[0]] = row[1]
187 |
188 | initial_state_array = np.zeros(
189 | self.number_of_species,
190 | dtype=int
191 | )
192 |
193 | for i in range(self.number_of_species):
194 | initial_state_array[i] = initial_state_dict[i]
195 |
196 |
197 | self.initial_state_dict = initial_state_dict
198 | self.initial_state_array = initial_state_array
199 |
--------------------------------------------------------------------------------
/HiPRGen/network_renderer.py:
--------------------------------------------------------------------------------
1 | from HiPRGen.network_loader import NetworkLoader
2 | import cairo
3 | import math
4 | import random
5 |
6 |
7 | class QuadTreeNode:
8 | """
9 | origin is at top left so to agree with
10 | the cairo canvas coordinates.
11 |
12 | Notice that this is a recursive initializer. It creates
13 | 1 + 4 + ... + 4^(depth) = O(4^(depth + 1)) QuadTreeNodes,
14 | so don't go too deep!
15 | """
16 | def __init__(self, depth, x_min, x_max, y_min, y_max):
17 |
18 | self.x_min = x_min
19 | self.x_max = x_max
20 | self.y_min = y_min
21 | self.y_max = y_max
22 |
23 | # you either have quads or data
24 | # if you have quads, you are non terminal
25 | # if you have data you are terminal
26 | self.quads = None
27 | self.data = []
28 | self.branch(depth)
29 |
30 |
31 | def branch(self, depth):
32 | """
33 | break node into 4 nodes.
34 | """
35 |
36 | if depth > 0:
37 | self.data = None
38 | self.quads = [
39 | None, # top left
40 | None, # top right
41 | None, # bottom left
42 | None # bottom right
43 | ]
44 |
45 | self.x_mid = (self.x_min + self.x_max) / 2
46 | self.y_mid = (self.y_min + self.y_max) / 2
47 |
48 | # top left
49 | self.quads[0] = QuadTreeNode(
50 | depth - 1,
51 | self.x_min,
52 | self.x_mid,
53 | self.y_min,
54 | self.y_mid)
55 |
56 | # top right
57 | self.quads[1] = QuadTreeNode(
58 | depth - 1,
59 | self.x_mid,
60 | self.x_max,
61 | self.y_min,
62 | self.y_mid)
63 |
64 | # bottom left
65 | self.quads[2] = QuadTreeNode(
66 | depth - 1,
67 | self.x_min,
68 | self.x_mid,
69 | self.y_mid,
70 | self.y_max)
71 |
72 | # bottom right
73 | self.quads[3] = QuadTreeNode(
74 | depth - 1,
75 | self.x_mid,
76 | self.x_max,
77 | self.y_mid,
78 | self.y_max)
79 |
80 | def insert(self, x, y, val):
81 | node = self.find_node(x,y)
82 | node.data.append(val)
83 | return val
84 |
85 | def find_neighborhood(self,x,y):
86 | """
87 | find all nodes adjacent to our point.
88 | doesn't return the node actually containing our point.
89 | """
90 | node = self.find_node(x,y)
91 | x_diff = node.x_max - node.x_min
92 | y_diff = node.y_max - node.y_min
93 | maybe_adjacent_nodes = [
94 | self.find_node(x + x_diff, y),
95 | self.find_node(x - x_diff, y),
96 | self.find_node(x, y + y_diff),
97 | self.find_node(x, y - y_diff),
98 | self.find_node(x + x_diff, y + y_diff),
99 | self.find_node(x - x_diff, y + y_diff),
100 | self.find_node(x + x_diff, y - y_diff),
101 | self.find_node(x - x_diff, y - y_diff)
102 | ]
103 |
104 | adjacent_nodes = [n for n in maybe_adjacent_nodes if n is not None]
105 | return adjacent_nodes
106 |
107 | def find_node(self, x, y):
108 | """
109 | find the terminal node so that
110 | x_min <= x < x_max
111 | y_min <= y < y_max
112 | return None if there is no node.
113 | Note: this gives the wrong answer if called from a terminal node.
114 | """
115 | if self.quads is not None:
116 | for quad in self.quads:
117 | if (quad.x_min <= x < quad.x_max and
118 | quad.y_min <= y < quad.y_max):
119 | return quad.find_node(x,y)
120 |
121 | return None
122 |
123 | else:
124 | return self
125 |
126 | def __str__(self):
127 | return (
128 | "x : [" + str(self.x_min) + ", " + str(self.x_max) + ") " +
129 | "y : [" + str(self.y_min) + ", " + str(self.y_max) + ")"
130 | )
131 |
132 | def __repr__(self):
133 | return self.__str__()
134 |
135 |
136 | class RepulsiveSampler:
137 | def __init__(self,
138 | rejection_radius,
139 | x_min,
140 | x_max,
141 | y_min,
142 | y_max,
143 | global_mask, # reject a sample if global mask returns false
144 | quad_tree_depth=7,
145 | seed=42,
146 | ):
147 |
148 | self.quad_tree = QuadTreeNode(quad_tree_depth, x_min, x_max, y_min, y_max)
149 | self.rejection_radius = rejection_radius
150 | self.internal_sampler = random.Random(seed)
151 | self.global_mask = global_mask
152 |
153 | def sample(self):
154 | while (True):
155 |
156 | x = self.internal_sampler.uniform(
157 | self.quad_tree.x_min,
158 | self.quad_tree.x_max)
159 |
160 | y = self.internal_sampler.uniform(
161 | self.quad_tree.y_min,
162 | self.quad_tree.y_max)
163 |
164 | if not self.global_mask(x,y):
165 | continue
166 |
167 | node = self.quad_tree.find_node(x,y)
168 | neighborhood = self.quad_tree.find_neighborhood(x,y)
169 | neighborhood.append(node)
170 |
171 | too_close = False
172 | for adjacent_node in neighborhood:
173 | for point in adjacent_node.data:
174 | if (point[0] - x)**2 + (point[1] - y)**2 < (self.rejection_radius **2):
175 | too_close = True
176 | break
177 |
178 | if too_close:
179 | break
180 |
181 | if (not too_close):
182 | result = (x,y)
183 | print(result)
184 | node.data.append(result)
185 | return result
186 |
187 |
188 |
189 | class Renderer:
190 |
191 | def __init__(
192 | self,
193 | width=1024,
194 | height=1024,
195 | rejection_radius=0.005,
196 | global_mask_radius=0.47,
197 | colors = [(x,x,x) for x in [0.3,0.4,0.5,0.6,0.7,0.8]]
198 | ):
199 |
200 | self.repulsive_sampler = RepulsiveSampler(
201 | rejection_radius,
202 | 0.0,
203 | 1.0,
204 | 0.0,
205 | 1.0,
206 | lambda x, y: (
207 | True if (x - 0.5)**2 + (y - 0.5)**2 < global_mask_radius**2
208 | else False )
209 | )
210 |
211 | self.local_sampler = random.Random(42)
212 | self.node_dict = {}
213 |
214 | self.width = width
215 | self.global_mask_radius = global_mask_radius
216 | self.height = height
217 | self.colors = colors
218 |
219 | self.surface = cairo.ImageSurface(cairo.Format.ARGB32, width, height)
220 | self.context = cairo.Context(self.surface)
221 | self.context.scale(width, height)
222 |
223 | def new_node(self, tag, point=None):
224 | # if point is None, a node position will be generated
225 | # note: if you provide a point, it will go exactly where you say, which
226 | # may be very close to other points. If tag already used, do nothing.
227 | if tag not in self.node_dict:
228 |
229 | if point is not None:
230 | self.node_dict[tag] = (
231 | self.repulsive_sampler.quad_tree.insert(
232 | point[0],
233 | point[1],
234 | point))
235 |
236 | else:
237 | self.node_dict[tag] = self.repulsive_sampler.sample()
238 |
239 |
240 | def new_node_boundary(self, tag, angle):
241 | point = (0.5 + self.global_mask_radius * math.cos(angle),
242 | 0.5 + self.global_mask_radius * math.sin(angle))
243 |
244 | self.new_node(tag, point=point)
245 |
246 | def draw_node(self, tag, color=(0,0,0), radius=0.0008):
247 | point = self.node_dict[tag]
248 | self.context.set_source_rgb(*color)
249 | self.context.arc(point[0], point[1], radius, 0, 2 * math.pi)
250 | self.context.fill()
251 |
252 | def draw_node_square(self, tag, color=(0,0,0), side=0.005):
253 | point = self.node_dict[tag]
254 | self.context.set_source_rgb(*color)
255 | self.context.rectangle(point[0] - side/2, point[1] - side/2, side, side)
256 | self.context.fill()
257 |
258 | def draw_edge(self, tag1, tag2, color=None, width=0.001):
259 |
260 | if color is None:
261 | color = self.local_sampler.choice(self.colors)
262 |
263 |
264 |
265 | point1 = self.node_dict[tag1]
266 | point2 = self.node_dict[tag2]
267 | self.context.set_source_rgb(*color)
268 | self.context.set_line_width(width)
269 | self.context.move_to(*point1)
270 | self.context.line_to(*point2)
271 | self.context.stroke()
272 |
273 | def render(self, path):
274 | self.surface.write_to_png(path)
275 |
--------------------------------------------------------------------------------
/HiPRGen/reaction_filter.py:
--------------------------------------------------------------------------------
1 | from mpi4py import MPI
2 | from itertools import permutations, product
3 | from HiPRGen.report_generator import ReportGenerator
4 | from time import time
5 | from HiPRGen.logging import log_message
6 | import sqlite3
7 | from enum import Enum
8 | from math import floor
9 |
10 | from HiPRGen.reaction_questions import (
11 | run_decision_tree
12 | )
13 |
14 | """
15 | Phases 3 & 4 run in paralell using MPI
16 |
17 | Phase 3: reaction gen and filtering
18 | input: a bucket labeled by atom count
19 | output: a list of reactions from that bucket
20 | description: Loop through all possible reactions in the bucket and apply the decision tree. This will run in parallel over each bucket.
21 |
22 | Phase 4: collating and indexing
23 | input: all the outputs of phase 3 as they are generated
24 | output: reaction network database
25 | description: the worker processes from phase 3 are sending their reactions to this phase and it is writing them to DB as it gets them. We can ensure that duplicates don't get generated in phase 3 which means we don't need extra index tables on the db.
26 |
27 | the code in this file is designed to run on a compute cluster using MPI.
28 | """
29 |
30 |
31 | create_metadata_table = """
32 | CREATE TABLE metadata (
33 | number_of_species INTEGER NOT NULL,
34 | number_of_reactions INTEGER NOT NULL
35 | );
36 | """
37 |
38 | insert_metadata = """
39 | INSERT INTO metadata VALUES (?, ?)
40 | """
41 |
42 | # it is important that reaction_id is the primary key
43 | # otherwise the network loader will be extremely slow.
44 | create_reactions_table = """
45 | CREATE TABLE reactions (
46 | reaction_id INTEGER NOT NULL PRIMARY KEY,
47 | number_of_reactants INTEGER NOT NULL,
48 | number_of_products INTEGER NOT NULL,
49 | reactant_1 INTEGER NOT NULL,
50 | reactant_2 INTEGER NOT NULL,
51 | product_1 INTEGER NOT NULL,
52 | product_2 INTEGER NOT NULL,
53 | rate REAL NOT NULL,
54 | dG REAL NOT NULL,
55 | dG_barrier REAL NOT NULL,
56 | is_redox INTEGER NOT NULL
57 | );
58 | """
59 |
60 |
61 | insert_reaction = """
62 | INSERT INTO reactions VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
63 | """
64 |
65 | get_complex_group_sql = """
66 | SELECT * FROM complexes WHERE composition_id=? AND group_id=?
67 | """
68 |
69 |
70 | # TODO: structure these global variables better
71 | DISPATCHER_RANK = 0
72 |
73 | # message tags
74 |
75 | # sent by workers to the dispatcher once they have finished initializing
76 | # only sent once
77 | INITIALIZATION_FINISHED = 0
78 |
79 | # sent by workers to the dispatcher to request a new table
80 | SEND_ME_A_WORK_BATCH = 1
81 |
82 | # sent by dispatcher to workers when delivering a new table
83 | HERE_IS_A_WORK_BATCH = 2
84 |
85 | # sent by workers to the dispatcher when reaction passes db decision tree
86 | NEW_REACTION_DB = 3
87 |
88 | # sent by workers to the dispatcher when reaction passes logging decision tree
89 | NEW_REACTION_LOGGING = 4
90 |
91 | class WorkerState(Enum):
92 | INITIALIZING = 0
93 | RUNNING = 1
94 | FINISHED = 2
95 |
96 | def dispatcher(
97 | mol_entries,
98 | dispatcher_payload
99 | ):
100 |
101 | comm = MPI.COMM_WORLD
102 | work_batch_list = []
103 | bucket_con = sqlite3.connect(dispatcher_payload.bucket_db_file)
104 | bucket_cur = bucket_con.cursor()
105 | size_cur = bucket_con.cursor()
106 |
107 | res = bucket_cur.execute("SELECT * FROM group_counts")
108 | for (composition_id, count) in res:
109 | for (i,j) in product(range(count), repeat=2):
110 | work_batch_list.append(
111 | (composition_id, i, j))
112 |
113 | composition_names = {}
114 | res = bucket_cur.execute("SELECT * FROM compositions")
115 | for (composition_id, composition) in res:
116 | composition_names[composition_id] = composition
117 |
118 | log_message("creating reaction network db")
119 | rn_con = sqlite3.connect(dispatcher_payload.reaction_network_db_file)
120 | rn_cur = rn_con.cursor()
121 | rn_cur.execute(create_metadata_table)
122 | rn_cur.execute(create_reactions_table)
123 | rn_con.commit()
124 |
125 | log_message("initializing report generator")
126 |
127 | # since MPI processes spin lock, we don't want to have the dispathcer
128 | # spend a bunch of time generating molecule pictures
129 | report_generator = ReportGenerator(
130 | mol_entries,
131 | dispatcher_payload.report_file,
132 | rebuild_mol_pictures=False
133 | )
134 |
135 | worker_states = {}
136 |
137 | worker_ranks = [i for i in range(comm.Get_size()) if i != DISPATCHER_RANK]
138 |
139 | for i in worker_ranks:
140 | worker_states[i] = WorkerState.INITIALIZING
141 |
142 | for i in worker_states:
143 | # block, waiting for workers to initialize
144 | comm.recv(source=i, tag=INITIALIZATION_FINISHED)
145 | worker_states[i] = WorkerState.RUNNING
146 |
147 | log_message("all workers running")
148 |
149 | reaction_index = 0
150 |
151 | log_message("handling requests")
152 |
153 | batches_left_at_last_checkpoint = len(work_batch_list)
154 | last_checkpoint_time = floor(time())
155 | while True:
156 | if WorkerState.RUNNING not in worker_states.values():
157 | break
158 |
159 | current_time = floor(time())
160 | time_diff = current_time - last_checkpoint_time
161 | if ( current_time % dispatcher_payload.checkpoint_interval == 0 and
162 | time_diff > 0):
163 | batches_left_at_current_checkpoint = len(work_batch_list)
164 | batch_count_diff = (
165 | batches_left_at_last_checkpoint -
166 | batches_left_at_current_checkpoint)
167 |
168 | batch_consumption_rate = batch_count_diff / time_diff
169 |
170 | log_message("batches remaining:", batches_left_at_current_checkpoint)
171 | log_message("batch consumption rate:",
172 | batch_consumption_rate,
173 | "batches per second")
174 |
175 |
176 | batches_left_at_last_checkpoint = batches_left_at_current_checkpoint
177 | last_checkpoint_time = current_time
178 |
179 |
180 | status = MPI.Status()
181 | data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
182 | tag = status.Get_tag()
183 | rank = status.Get_source()
184 |
185 | if tag == SEND_ME_A_WORK_BATCH:
186 | if len(work_batch_list) == 0:
187 | comm.send(None, dest=rank, tag=HERE_IS_A_WORK_BATCH)
188 | worker_states[rank] = WorkerState.FINISHED
189 | else:
190 | # pop removes and returns the last item in the list
191 | work_batch = work_batch_list.pop()
192 | comm.send(work_batch, dest=rank, tag=HERE_IS_A_WORK_BATCH)
193 | composition_id, group_id_0, group_id_1 = work_batch
194 | log_message(
195 | "dispatched",
196 | composition_names[composition_id],
197 | ": group ids:",
198 | group_id_0, group_id_1
199 | )
200 |
201 |
202 | elif tag == NEW_REACTION_DB:
203 | reaction = data
204 | rn_cur.execute(
205 | insert_reaction,
206 | (reaction_index,
207 | reaction['number_of_reactants'],
208 | reaction['number_of_products'],
209 | reaction['reactants'][0],
210 | reaction['reactants'][1],
211 | reaction['products'][0],
212 | reaction['products'][1],
213 | reaction['rate'],
214 | reaction['dG'],
215 | reaction['dG_barrier'],
216 | reaction['is_redox']
217 | ))
218 |
219 | reaction_index += 1
220 | if reaction_index % dispatcher_payload.commit_frequency == 0:
221 | rn_con.commit()
222 |
223 |
224 | elif tag == NEW_REACTION_LOGGING:
225 |
226 | reaction = data[0]
227 | decision_path = data[1]
228 |
229 | report_generator.emit_verbatim(decision_path)
230 | report_generator.emit_reaction(reaction)
231 | report_generator.emit_bond_breakage(reaction)
232 | report_generator.emit_newline()
233 |
234 |
235 |
236 | log_message("finalzing database and generation report")
237 | rn_cur.execute(
238 | insert_metadata,
239 | (len(mol_entries),
240 | reaction_index)
241 | )
242 |
243 |
244 | report_generator.finished()
245 | rn_con.commit()
246 | bucket_con.close()
247 | rn_con.close()
248 |
249 |
250 | def worker(
251 | mol_entries,
252 | worker_payload
253 | ):
254 |
255 | comm = MPI.COMM_WORLD
256 | con = sqlite3.connect(worker_payload.bucket_db_file)
257 | cur = con.cursor()
258 |
259 |
260 | comm.send(None, dest=DISPATCHER_RANK, tag=INITIALIZATION_FINISHED)
261 |
262 | while True:
263 | comm.send(None, dest=DISPATCHER_RANK, tag=SEND_ME_A_WORK_BATCH)
264 | work_batch = comm.recv(source=DISPATCHER_RANK, tag=HERE_IS_A_WORK_BATCH)
265 |
266 | if work_batch is None:
267 | break
268 |
269 |
270 | composition_id, group_id_0, group_id_1 = work_batch
271 |
272 |
273 | if group_id_0 == group_id_1:
274 |
275 | res = cur.execute(
276 | get_complex_group_sql,
277 | (composition_id, group_id_0))
278 |
279 | bucket = []
280 | for row in res:
281 | bucket.append((row[0],row[1]))
282 |
283 | iterator = permutations(bucket, r=2)
284 |
285 | else:
286 |
287 | res_0 = cur.execute(
288 | get_complex_group_sql,
289 | (composition_id, group_id_0))
290 |
291 | bucket_0 = []
292 | for row in res_0:
293 | bucket_0.append((row[0],row[1]))
294 |
295 | res_1 = cur.execute(
296 | get_complex_group_sql,
297 | (composition_id, group_id_1))
298 |
299 | bucket_1 = []
300 | for row in res_1:
301 | bucket_1.append((row[0],row[1]))
302 |
303 | iterator = product(bucket_0, bucket_1)
304 |
305 |
306 |
307 | for (reactants, products) in iterator:
308 | reaction = {
309 | 'reactants' : reactants,
310 | 'products' : products,
311 | 'number_of_reactants' : len([i for i in reactants if i != -1]),
312 | 'number_of_products' : len([i for i in products if i != -1])}
313 |
314 |
315 | decision_pathway = []
316 | if run_decision_tree(reaction,
317 | mol_entries,
318 | worker_payload.params,
319 | worker_payload.reaction_decision_tree,
320 | decision_pathway
321 | ):
322 |
323 | comm.send(
324 | reaction,
325 | dest=DISPATCHER_RANK,
326 | tag=NEW_REACTION_DB)
327 |
328 |
329 | if run_decision_tree(reaction,
330 | mol_entries,
331 | worker_payload.params,
332 | worker_payload.logging_decision_tree):
333 |
334 | comm.send(
335 | (reaction,
336 | '\n'.join([str(f) for f in decision_pathway])
337 | ),
338 |
339 | dest=DISPATCHER_RANK,
340 | tag=NEW_REACTION_LOGGING)
341 |
--------------------------------------------------------------------------------
/HiPRGen/reaction_filter_payloads.py:
--------------------------------------------------------------------------------
1 | from monty.json import MSONable
2 |
3 | class DispatcherPayload(MSONable):
4 | """
5 | class for storing all the arguments required by the reaction
6 | filter dispatcher. We do this instead of passing arguments
7 | directly because it makes it easier to pass arguments through the
8 | MPI barrier.
9 | """
10 |
11 | def __init__(
12 | self,
13 | bucket_db_file,
14 | reaction_network_db_file,
15 | report_file,
16 | commit_frequency = 1000,
17 | checkpoint_interval = 10):
18 |
19 | self.bucket_db_file = bucket_db_file
20 | self.reaction_network_db_file = reaction_network_db_file
21 | self.report_file = report_file
22 | self.commit_frequency = commit_frequency
23 | self.checkpoint_interval = checkpoint_interval
24 |
25 |
26 | class WorkerPayload(MSONable):
27 | """
28 | class for storing all the arguments required by the reaction
29 | filter dispatcher.
30 | """
31 | def __init__(
32 | self,
33 | bucket_db_file,
34 | reaction_decision_tree,
35 | params,
36 | logging_decision_tree):
37 |
38 | self.bucket_db_file = bucket_db_file
39 | self.reaction_decision_tree = reaction_decision_tree
40 | self.params = params
41 | self.logging_decision_tree = logging_decision_tree
42 |
--------------------------------------------------------------------------------
/HiPRGen/reaction_questions.py:
--------------------------------------------------------------------------------
1 | import math
2 | from HiPRGen.mol_entry import MoleculeEntry
3 | from functools import partial
4 | import itertools
5 | import networkx as nx
6 | from networkx.algorithms.graph_hashing import weisfeiler_lehman_graph_hash
7 | from HiPRGen.constants import Terminal, ROOM_TEMP, KB, PLANCK, m_formulas
8 | from monty.json import MSONable
9 |
10 | """
11 | The reaction decision tree:
12 |
13 | A question is a function q(reaction, mol_entries, params) -> Bool
14 |
15 | reaction is a dict:
16 |
17 | reaction = { 'reactants' : reactant indices,
18 | 'products' : product indices,
19 | 'number_of_reactants',
20 | 'number_of_products'}
21 | params is a dict:
22 |
23 |
24 | params = { 'temperature',
25 | 'electron_free_energy' }
26 |
27 | The lists of reactant and product indices always have length two. We
28 | use -1 when there is a only a single reactant or product.
29 |
30 | The questions can also set reaction['rate'] and reaction['dG']
31 |
32 | Questions will be writable by hand, or we could have machine learning
33 | filters.
34 |
35 | A node is either a Terminal or a non empty list [(question, node)]
36 |
37 | class Terminal(Enum): KEEP = 1 DISCARD = -1
38 |
39 | For the return value of a question, True means travel to this node and
40 | False means try next question in the list.
41 |
42 | for non terminal nodes, it is an error if every question returns
43 | False. i.e getting stuck at a non terminal node is an error.
44 |
45 | Once a Terminal node is reached, it tells us whether to keep or
46 | discard the reaction.
47 |
48 | logging decision tree: The dispatcher takes a second decision tree as
49 | an argument, the logging decision tree. Reactions which return
50 | Terminal.KEEP from the logging decision tree will be logged in the
51 | generation report, with location specified by the argument
52 | generation_report_path
53 |
54 | """
55 |
56 | hydrogen_graph = nx.MultiGraph()
57 | hydrogen_graph.add_node(0, specie='H')
58 | hydrogen_hash = weisfeiler_lehman_graph_hash(
59 | hydrogen_graph,
60 | node_attr='specie')
61 |
62 | fluorine_graph = nx.MultiGraph()
63 | fluorine_graph.add_node(0, specie='F')
64 | fluorine_hash = weisfeiler_lehman_graph_hash(
65 | fluorine_graph,
66 | node_attr='specie')
67 |
68 | def run_decision_tree(
69 | reaction,
70 | mol_entries,
71 | params,
72 | decision_tree,
73 | decision_pathway=None):
74 | node = decision_tree
75 |
76 | while type(node) == list:
77 | next_node = None
78 | for (question, new_node) in node:
79 | if question(reaction, mol_entries, params):
80 |
81 | # if decision_pathway is a list,
82 | # append the question which
83 | # answered true i.e the edge we follow
84 | if decision_pathway is not None:
85 | decision_pathway.append(question)
86 |
87 | next_node = new_node
88 | break
89 |
90 | node = next_node
91 |
92 |
93 | if type(node) == Terminal:
94 | if decision_pathway is not None:
95 | decision_pathway.append(node)
96 |
97 | if node == Terminal.KEEP:
98 | return True
99 | else:
100 | return False
101 | else:
102 | print(node)
103 | raise Exception(
104 | """
105 | unexpected node type reached.
106 | this is usually caused because none of the questions in some node returned True.
107 | """)
108 |
109 |
110 |
111 | def default_rate(dG_barrier, params):
112 | kT = KB * params['temperature']
113 | max_rate = kT / PLANCK
114 | rate = max_rate * math.exp(- dG_barrier / kT)
115 | return rate
116 |
117 | class dG_above_threshold(MSONable):
118 |
119 | def __init__(self, threshold, free_energy_type, constant_barrier):
120 |
121 | self.threshold = threshold
122 | self.free_energy_type = free_energy_type
123 | self.constant_barrier = constant_barrier
124 |
125 | if free_energy_type == 'free_energy':
126 | self.get_free_energy = lambda mol: mol.free_energy
127 | elif free_energy_type == 'solvation_free_energy':
128 | self.get_free_energy = lambda mol: mol.solvation_free_energy
129 | else:
130 | raise Exception("unrecognized free energy type")
131 |
132 | def __str__(self):
133 | return (
134 | self.free_energy_type +
135 | " dG is above threshold=" +
136 | str(self.threshold))
137 |
138 | def __call__(self, reaction, mol_entries, params):
139 |
140 |
141 | dG = 0.0
142 |
143 | # positive dCharge means electrons are lost
144 | dCharge = 0.0
145 |
146 | for i in range(reaction['number_of_reactants']):
147 | reactant_index = reaction['reactants'][i]
148 | mol = mol_entries[reactant_index]
149 | dG -= self.get_free_energy(mol)
150 | dCharge -= mol.charge
151 |
152 | for j in range(reaction['number_of_products']):
153 | product_index = reaction['products'][j]
154 | mol = mol_entries[product_index]
155 | dG += self.get_free_energy(mol)
156 | dCharge += mol.charge
157 |
158 | dG += dCharge * params['electron_free_energy']
159 |
160 | if dG > self.threshold:
161 | return True
162 | else:
163 | reaction['dG'] = dG
164 | if dG < 0:
165 | barrier = self.constant_barrier
166 | else:
167 | barrier = dG + self.constant_barrier
168 |
169 | reaction['dG_barrier'] = barrier
170 | reaction['rate'] = default_rate(barrier, params)
171 | return False
172 |
173 |
174 | class is_redox_reaction(MSONable):
175 |
176 | def __init__(self):
177 | pass
178 |
179 | def __str__(self):
180 | return "is redox reaction"
181 |
182 | def __call__(self, reaction, mol_entries, params):
183 | # positive dCharge means electrons are lost
184 | dCharge = 0.0
185 |
186 | for i in range(reaction['number_of_reactants']):
187 | reactant_index = reaction['reactants'][i]
188 | mol = mol_entries[reactant_index]
189 | dCharge -= mol.charge
190 |
191 | for j in range(reaction['number_of_products']):
192 | product_index = reaction['products'][j]
193 | mol = mol_entries[product_index]
194 | dCharge += mol.charge
195 |
196 | if dCharge == 0:
197 | reaction['is_redox'] = False
198 | return False
199 | else:
200 | reaction['is_redox'] = True
201 | return True
202 |
203 |
204 | class too_many_reactants_or_products(MSONable):
205 | def __init__(self):
206 | pass
207 |
208 | def __str__(self):
209 | return "too many reactants or products"
210 |
211 |
212 | def __call__(self, reaction, mols, params):
213 | if (reaction['number_of_reactants'] != 1 or
214 | reaction['number_of_products'] != 1):
215 | return True
216 | else:
217 | return False
218 |
219 |
220 | class metal_metal_reaction(MSONable):
221 | def __init__(self):
222 | pass
223 |
224 | def __call__(self, reaction, mol_entries, params):
225 | if (reaction['number_of_reactants'] == 1 and
226 | reaction['number_of_products'] == 1 and
227 | (mol_entries[reaction['reactants'][0]].formula in m_formulas) and
228 | (mol_entries[reaction['products'][0]].formula in m_formulas)):
229 |
230 | return True
231 | else:
232 | return False
233 |
234 |
235 | class dcharge_too_large(MSONable):
236 | def __init__(self):
237 | pass
238 |
239 | def __str__(self):
240 | return "change in charge is too large"
241 |
242 | def __call__(self, reaction, mol_entries, params):
243 | dCharge = 0.0
244 |
245 | for i in range(reaction['number_of_reactants']):
246 | reactant_index = reaction['reactants'][i]
247 | mol = mol_entries[reactant_index]
248 | dCharge -= mol.charge
249 |
250 | for j in range(reaction['number_of_products']):
251 | product_index = reaction['products'][j]
252 | mol = mol_entries[product_index]
253 | dCharge += mol.charge
254 |
255 | if abs(dCharge) > 1:
256 | return True
257 | else:
258 | return False
259 |
260 |
261 |
262 | def marcus_barrier(reaction, mols, params):
263 |
264 | """
265 | Okay, so Marcus Theory.The math works out like so.∆G* = λ/4 (1 +
266 | ∆G / λ)^2 ∆G is the Gibbs free energy of the reaction, ∆G* is the
267 | energy barrier, and λ is the “reorganization energy” (basically the
268 | energy penalty for reorganizing the solvent environment to accommodate
269 | the change in local charge).The reorganization energy can be broken up
270 | into two terms, an inner term (“i”) representing the contribution from
271 | the first solvation shell and an outer term (“o”) representing the
272 | contribution from the bulk solvent: λ = λi + λoλo = ∆e/(8 pi ε0) (1/r
273 | - 1/R) (1/n^2 - 1/ε) where ∆e is the change in charge in terms of
274 | fundamental charge (1.602 * 10 ^-19 C), ε0 is the vacuum permittivity
275 | (8.854 * 10 ^-12 F/m), r is the first solvation shell radius (I
276 | usually just pick a constant, say 6 Angstrom), R is the distance to
277 | the electrode (again, for these purposes, just pick something - say
278 | 7.5 Angstrom), n is the index of refraction (1.415 for EC) and ε is
279 | the relative dielectric (18.5 for EC/EMC).
280 | """
281 |
282 | reactant = mols[reaction['reactants'][0]]
283 | product = mols[reaction['products'][0]]
284 | dCharge = product.charge - reactant.charge
285 | n = 1.415 # index of refraction; variable
286 | eps = 18.5 # dielectric constant; variable
287 |
288 | r = 6.0 # in Angstrom
289 | R = 7.5 # in Angstrom
290 |
291 | eps_0 = 8.85419 * 10 ** -12 # vacuum permittivity
292 | e = 1.602 * 10 ** -19 # fundamental charge
293 |
294 | l_outer = e / (8 * math.pi * eps_0)
295 | l_outer *= (1 / r - 1/(2 * R)) * 10 ** 10 # Converting to SI units; factor of 2 is because of different definitions of the distance to electrode
296 | l_outer *= (1 / n ** 2 - 1 / eps)
297 |
298 | if dCharge == -1:
299 | vals = [reactant.electron_affinity, product.ionization_energy]
300 | vals_filtered = [v for v in vals if v is not None]
301 | l_inner = sum(vals_filtered) / len(vals_filtered)
302 |
303 | if dCharge == 1:
304 | vals = [reactant.ionization_energy, product.electron_affinity]
305 | vals_filtered = [v for v in vals if v is not None]
306 | l_inner = sum(vals_filtered) / len(vals_filtered)
307 |
308 |
309 | if l_inner < 0:
310 | l_inner = 0
311 |
312 | l = l_inner + l_outer
313 |
314 |
315 | dG = product.free_energy - reactant.free_energy + dCharge * params['electron_free_energy']
316 | dG_barrier = l / 4 * (1 + dG / l) ** 2
317 | reaction['marcus_barrier'] = dG_barrier
318 | return False
319 |
320 | class reactant_and_product_not_isomorphic(MSONable):
321 |
322 | def __init__(self):
323 | pass
324 |
325 | def __str__(self):
326 | return "reactants and products are not covalent isomorphic"
327 |
328 | def __call__(self, reaction, mols, params):
329 | reactant = mols[reaction['reactants'][0]]
330 | product = mols[reaction['products'][0]]
331 | if reactant.covalent_hash != product.covalent_hash:
332 | return True
333 | else:
334 | return False
335 |
336 |
337 | class reaction_default_true(MSONable):
338 |
339 | def __init__(self):
340 | pass
341 |
342 | def __str__(self):
343 | return "default true"
344 |
345 | def __call__(self, reaction, mols, params):
346 | return True
347 |
348 | class star_count_diff_above_threshold(MSONable):
349 | """
350 | if you want to filter out break-one-form-one reactions, the
351 | correct value for the threshold is 6.
352 | """
353 |
354 | def __init__(self, threshold):
355 | self.threshold = threshold
356 |
357 | def __str__(self):
358 | return "star count diff above threshold=" + str(self.threshold)
359 |
360 | def __call__(self, reaction, mols, params):
361 | reactant_stars = {}
362 | product_stars = {}
363 | tags = set()
364 |
365 | for i in range(reaction['number_of_reactants']):
366 | reactant_index = reaction['reactants'][i]
367 | mol = mols[reactant_index]
368 | for h in mol.star_hashes.values():
369 | tags.add(h)
370 | if h in reactant_stars:
371 | reactant_stars[h] += 1
372 | else:
373 | reactant_stars[h] = 1
374 |
375 | for j in range(reaction['number_of_products']):
376 | product_index = reaction['products'][j]
377 | mol = mols[product_index]
378 | for h in mol.star_hashes.values():
379 | tags.add(h)
380 | if h in product_stars:
381 | product_stars[h] += 1
382 | else:
383 | product_stars[h] = 1
384 |
385 | count = 0
386 |
387 | for tag in tags:
388 | count += abs(reactant_stars.get(tag,0) - product_stars.get(tag,0))
389 |
390 | if count > self.threshold:
391 | return True
392 | else:
393 | return False
394 |
395 | class reaction_is_covalent_decomposable(MSONable):
396 | def __init__(self):
397 | pass
398 |
399 | def __str__(self):
400 | return "reaction is covalent decomposable"
401 |
402 | def __call__(self, reaction, mols, params):
403 | if (reaction['number_of_reactants'] == 2 and
404 | reaction['number_of_products'] == 2):
405 |
406 |
407 | reactant_total_hashes = set()
408 | for i in range(reaction['number_of_reactants']):
409 | reactant_id = reaction['reactants'][i]
410 | reactant = mols[reactant_id]
411 | reactant_total_hashes.add(reactant.covalent_hash)
412 |
413 | product_total_hashes = set()
414 | for i in range(reaction['number_of_products']):
415 | product_id = reaction['products'][i]
416 | product = mols[product_id]
417 | product_total_hashes.add(product.covalent_hash)
418 |
419 | if len(reactant_total_hashes.intersection(product_total_hashes)) > 0:
420 | return True
421 | else:
422 | return False
423 |
424 | return False
425 |
426 |
427 | class metal_coordination_passthrough(MSONable):
428 | def __init__(self):
429 | pass
430 |
431 | def __str__(self):
432 | return "metal coordination passthrough"
433 |
434 | def __call__(self, reaction, mols, params):
435 |
436 | for i in range(reaction['number_of_reactants']):
437 | reactant_id = reaction['reactants'][i]
438 | reactant = mols[reactant_id]
439 | if reactant.formula in m_formulas:
440 | return True
441 |
442 | for i in range(reaction['number_of_products']):
443 | product_id = reaction['products'][i]
444 | product = mols[product_id]
445 | if product.formula in m_formulas:
446 | return True
447 |
448 | return False
449 |
450 |
451 | class fragment_matching_found(MSONable):
452 | def __init__(self):
453 | pass
454 |
455 | def __str__(self):
456 | return "fragment matching found"
457 |
458 | def __call__(self, reaction, mols, params):
459 |
460 | reactant_fragment_indices_list = []
461 | product_fragment_indices_list = []
462 |
463 | if reaction['number_of_reactants'] == 1:
464 | reactant = mols[reaction['reactants'][0]]
465 | for i in range(len(reactant.fragment_data)):
466 | reactant_fragment_indices_list.append([i])
467 |
468 |
469 | if reaction['number_of_reactants'] == 2:
470 | reactant_0 = mols[reaction['reactants'][0]]
471 | reactant_1 = mols[reaction['reactants'][1]]
472 | for i in range(len(reactant_0.fragment_data)):
473 | for j in range(len(reactant_1.fragment_data)):
474 | if (reactant_0.fragment_data[i].number_of_bonds_broken +
475 | reactant_1.fragment_data[j].number_of_bonds_broken <= 1):
476 |
477 | reactant_fragment_indices_list.append([i,j])
478 |
479 |
480 | if reaction['number_of_products'] == 1:
481 | product = mols[reaction['products'][0]]
482 | for i in range(len(product.fragment_data)):
483 | product_fragment_indices_list.append([i])
484 |
485 |
486 | if reaction['number_of_products'] == 2:
487 | product_0 = mols[reaction['products'][0]]
488 | product_1 = mols[reaction['products'][1]]
489 | for i in range(len(product_0.fragment_data)):
490 | for j in range(len(product_1.fragment_data)):
491 | if (product_0.fragment_data[i].number_of_bonds_broken +
492 | product_1.fragment_data[j].number_of_bonds_broken <= 1):
493 |
494 | product_fragment_indices_list.append([i,j])
495 |
496 |
497 | for reactant_fragment_indices in reactant_fragment_indices_list:
498 | for product_fragment_indices in product_fragment_indices_list:
499 | reactant_fragment_count = 0
500 | product_fragment_count = 0
501 | reactant_bonds_broken = []
502 | product_bonds_broken = []
503 |
504 | reactant_hashes = dict()
505 | for reactant_index, frag_complex_index in enumerate(
506 | reactant_fragment_indices):
507 |
508 | fragment_complex = mols[
509 | reaction['reactants'][reactant_index]].fragment_data[
510 | frag_complex_index]
511 |
512 | for bond in fragment_complex.bonds_broken:
513 | reactant_bonds_broken.append(
514 | [(reactant_index, x) for x in bond])
515 |
516 | for i in range(fragment_complex.number_of_fragments):
517 | reactant_fragment_count += 1
518 | tag = fragment_complex.fragment_hashes[i]
519 | if tag in reactant_hashes:
520 | reactant_hashes[tag] += 1
521 | else:
522 | reactant_hashes[tag] = 1
523 |
524 | product_hashes = dict()
525 | for product_index, frag_complex_index in enumerate(
526 | product_fragment_indices):
527 |
528 | fragment_complex = mols[
529 | reaction['products'][product_index]].fragment_data[
530 | frag_complex_index]
531 |
532 | for bond in fragment_complex.bonds_broken:
533 | product_bonds_broken.append(
534 | [(product_index, x) for x in bond])
535 |
536 |
537 | for i in range(fragment_complex.number_of_fragments):
538 | product_fragment_count += 1
539 | tag = fragment_complex.fragment_hashes[i]
540 | if tag in product_hashes:
541 | product_hashes[tag] += 1
542 | else:
543 | product_hashes[tag] = 1
544 |
545 |
546 | # don't consider fragmentations with both a ring opening and closing
547 | if (reaction['number_of_reactants'] == 2 and
548 | reaction['number_of_products'] == 2 and
549 | reactant_fragment_count == 2 and
550 | product_fragment_count == 2):
551 | continue
552 |
553 |
554 | if reactant_hashes == product_hashes:
555 | reaction['reactant_bonds_broken'] = reactant_bonds_broken
556 | reaction['product_bonds_broken'] = product_bonds_broken
557 | reaction['hashes'] = reactant_hashes
558 | reaction['reactant_fragment_count'] = reactant_fragment_count
559 | reaction['product_fragment_count'] = product_fragment_count
560 |
561 | return True
562 |
563 | return False
564 |
565 |
566 | class single_reactant_single_product_not_atom_transfer(MSONable):
567 | def __init__(self):
568 | pass
569 |
570 | def __str__(self):
571 | return "not hydrogen transfer"
572 |
573 | def __call__(self, reaction, mols, params):
574 | if (reaction['number_of_reactants'] == 1 and
575 | reaction['number_of_products'] == 1 and
576 | len(reaction['reactant_bonds_broken']) == 1 and
577 | len(reaction['product_bonds_broken']) == 1 and
578 | hydrogen_hash not in reaction['hashes'] and
579 | fluorine_hash not in reaction['hashes']):
580 |
581 | return True
582 |
583 | return False
584 |
585 |
586 | class single_reactant_double_product_ring_close(MSONable):
587 | def __init__(self):
588 | pass
589 |
590 | def __str__(self):
591 | return "ring close"
592 |
593 |
594 | def __call__(self, reaction, mols, params):
595 |
596 | if (reaction['number_of_reactants'] == 1 and
597 | reaction['number_of_products'] == 2 and
598 | len(reaction['reactant_bonds_broken']) == 1 and
599 | len(reaction['product_bonds_broken']) == 1 and
600 | reaction['product_fragment_count'] == 2):
601 |
602 | return True
603 |
604 | return False
605 |
606 |
607 |
608 | class concerted_metal_coordination(MSONable):
609 | def __init__(self):
610 | pass
611 |
612 | def __str__(self):
613 | return "concerted metal coordination"
614 |
615 | def __call__(self, reaction, mols, params):
616 |
617 | if (reaction['number_of_reactants'] == 2 and
618 | reaction['number_of_products'] == 2):
619 |
620 | reactant_0 = mols[reaction['reactants'][0]]
621 | reactant_1 = mols[reaction['reactants'][1]]
622 | product_0 = mols[reaction['products'][0]]
623 | product_1 = mols[reaction['products'][1]]
624 |
625 |
626 |
627 | if (reactant_0.formula in m_formulas or
628 | reactant_1.formula in m_formulas or
629 | product_0.formula in m_formulas or
630 | product_1.formula in m_formulas):
631 | return True
632 | else:
633 | return False
634 |
635 | return False
636 |
637 | class concerted_metal_coordination_one_product(MSONable):
638 | def __init__(self):
639 | pass
640 |
641 | def __str__(self):
642 | return "concerted metal coordination one product"
643 |
644 |
645 |
646 | def __call__(self, reaction, mols, params):
647 |
648 | if (reaction['number_of_reactants'] == 2 and
649 | reaction['number_of_products'] == 1):
650 |
651 | reactant_0 = mols[reaction['reactants'][0]]
652 | reactant_1 = mols[reaction['reactants'][1]]
653 | product = mols[reaction['products'][0]]
654 |
655 | reactant_covalent_hashes = set([
656 | reactant_0.covalent_hash,
657 | reactant_1.covalent_hash])
658 |
659 | if ((reactant_0.formula in m_formulas or
660 | reactant_1.formula in m_formulas) and
661 | product.covalent_hash not in reactant_covalent_hashes
662 | ):
663 | return True
664 | else:
665 | return False
666 |
667 | return False
668 |
669 | class concerted_metal_coordination_one_reactant(MSONable):
670 | def __init__(self):
671 | pass
672 |
673 | def __str__(self):
674 | return "concerted metal coordination one reactant"
675 |
676 |
677 |
678 | def __call__(self, reaction, mols, params):
679 |
680 | if (reaction['number_of_reactants'] == 1 and
681 | reaction['number_of_products'] == 2):
682 |
683 | product_0 = mols[reaction['products'][0]]
684 | product_1 = mols[reaction['products'][1]]
685 | reactant = mols[reaction['reactants'][0]]
686 |
687 | product_covalent_hashes = set([
688 | product_0.covalent_hash,
689 | product_1.covalent_hash])
690 |
691 | if ((product_0.formula in m_formulas or
692 | product_1.formula in m_formulas) and
693 | reactant.covalent_hash not in product_covalent_hashes
694 | ):
695 | return True
696 | else:
697 | return False
698 |
699 | return False
700 |
701 |
702 | class single_reactant_with_ring_break_two(MSONable):
703 | def __init__(self):
704 | pass
705 |
706 | def __str__(self):
707 | return "single reactant with a ring, break two"
708 |
709 | def __call__(self, reaction, mols, params):
710 | if (reaction["number_of_reactants"] == 1 and
711 | reaction["number_of_products"] == 2 and
712 | mols[reaction["reactants"][0]].has_covalent_ring):
713 |
714 | reactant = mols[reaction["reactants"][0]]
715 | product_1 = mols[reaction["products"][0]]
716 | product_2 = mols[reaction["products"][1]]
717 | for fragment_complex in reactant.ring_fragment_data:
718 | if (set(fragment_complex.fragment_hashes) ==
719 | set([product_1.covalent_hash, product_2.covalent_hash])):
720 | return True
721 |
722 |
723 | return False
724 |
725 |
726 | class single_product_with_ring_form_two(MSONable):
727 | def __init__(self):
728 | pass
729 |
730 | def __str__(self):
731 | return "single product with a ring, form two"
732 |
733 | def __call__(self, reaction, mols, params):
734 | if (reaction["number_of_reactants"] == 2 and
735 | reaction["number_of_products"] == 1 and
736 | mols[reaction["products"][0]].has_covalent_ring):
737 |
738 | product = mols[reaction["products"][0]]
739 | reactant_1 = mols[reaction["reactants"][0]]
740 | reactant_2 = mols[reaction["reactants"][1]]
741 | for fragment_complex in product.ring_fragment_data:
742 | if (set(fragment_complex.fragment_hashes) ==
743 | set([reactant_1.covalent_hash, reactant_2.covalent_hash])):
744 | return True
745 |
746 |
747 | return False
748 |
749 |
750 |
751 | default_reaction_decision_tree = [
752 |
753 | (metal_metal_reaction(), Terminal.DISCARD),
754 | # redox branch
755 | (is_redox_reaction(), [
756 |
757 | (too_many_reactants_or_products(), Terminal.DISCARD),
758 | (dcharge_too_large(), Terminal.DISCARD),
759 | (reactant_and_product_not_isomorphic(), Terminal.DISCARD),
760 | (dG_above_threshold(0.0, "free_energy", 0.0), Terminal.DISCARD),
761 | (reaction_default_true(), Terminal.KEEP)
762 | ]),
763 |
764 | (dG_above_threshold(0.0, "solvation_free_energy", 0.0), Terminal.DISCARD),
765 |
766 |
767 | # (single_reactant_with_ring_break_two(), Terminal.KEEP),
768 | # (single_product_with_ring_form_two(), Terminal.KEEP),
769 |
770 | (star_count_diff_above_threshold(6), Terminal.DISCARD),
771 |
772 | (reaction_is_covalent_decomposable(), Terminal.DISCARD),
773 |
774 | (concerted_metal_coordination(), Terminal.DISCARD),
775 |
776 | (concerted_metal_coordination_one_product(), Terminal.DISCARD),
777 |
778 | (concerted_metal_coordination_one_reactant(), Terminal.DISCARD),
779 |
780 | (metal_coordination_passthrough(), Terminal.KEEP),
781 |
782 | (fragment_matching_found(), [
783 | (single_reactant_single_product_not_atom_transfer(), Terminal.DISCARD),
784 | (single_reactant_double_product_ring_close(), Terminal.DISCARD),
785 | (reaction_default_true(), Terminal.KEEP)]
786 | ),
787 |
788 | (reaction_default_true(), Terminal.DISCARD)
789 | ]
790 |
--------------------------------------------------------------------------------
/HiPRGen/report_generator.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | from copy import deepcopy
3 | from pathlib import Path
4 | from HiPRGen.logging import log_message
5 |
6 | atom_colors = {
7 | "H": "gray",
8 | "C": "black",
9 | "Li": "purple",
10 | "B": "orange",
11 | "N": "blue",
12 | "O": "red",
13 | "F": "green4",
14 | "Mg": "green",
15 | "P": "darkorange",
16 | "S": "yellow",
17 | "Cl": "chartreuse"
18 | }
19 |
20 | def visualize_molecule_entry(molecule_entry, path):
21 | """
22 | visualize a molecule using graphviz and
23 | output the resulting pdf to path
24 | """
25 | graph = deepcopy(molecule_entry.graph)
26 |
27 | nx.set_node_attributes(graph, "filled", "style")
28 | nx.set_node_attributes(graph, "circle", "shape")
29 | nx.set_node_attributes(graph, "0.2", "width")
30 | nx.set_node_attributes(graph, "8.0", "fontsize")
31 | nx.set_node_attributes(graph, "white", "fontcolor")
32 | nx.set_node_attributes(graph, "true", "fixedsize")
33 |
34 |
35 | nx.set_node_attributes(
36 | graph,
37 | dict(enumerate([atom_colors[a]
38 | for a in molecule_entry.species])),
39 | "color"
40 | )
41 |
42 | charge = molecule_entry.charge
43 | agraph = nx.nx_agraph.to_agraph(graph)
44 | if charge != 0:
45 | agraph.add_node(
46 | "charge",
47 | label=str(charge),
48 | fontsize="25.0",
49 | shape="box",
50 | color="gray",
51 | style="dashed, rounded",
52 | )
53 |
54 | agraph.layout()
55 | log_message("writing " + path.as_posix())
56 | agraph.draw(path.as_posix(), format="pdf")
57 |
58 |
59 | def visualize_molecules(mol_entries, folder):
60 |
61 | folder.mkdir()
62 | for index, molecule_entry in enumerate(mol_entries):
63 | visualize_molecule_entry(
64 | molecule_entry,
65 | folder.joinpath(str(index) + ".pdf"))
66 |
67 |
68 |
69 | class ReportGenerator:
70 |
71 | def __init__(
72 | self,
73 | mol_entries,
74 | report_file_path,
75 | mol_pictures_folder_name='mol_pictures',
76 | rebuild_mol_pictures=True
77 | ):
78 | self.report_file_path = Path(report_file_path)
79 | self.mol_pictures_folder_name = mol_pictures_folder_name
80 | self.mol_pictures_folder = self.report_file_path.parent.joinpath(
81 | mol_pictures_folder_name)
82 |
83 |
84 | if rebuild_mol_pictures:
85 | visualize_molecules(mol_entries, self.mol_pictures_folder)
86 |
87 | self.mol_entries = mol_entries
88 | self.f = self.report_file_path.open(mode='w')
89 |
90 |
91 | # write in header
92 | self.f.write("\\documentclass{article}\n")
93 | self.f.write("\\usepackage{graphicx}\n")
94 | self.f.write("\\usepackage[margin=1cm]{geometry}\n")
95 | self.f.write("\\usepackage{amsmath}\n")
96 | self.f.write("\\pagenumbering{gobble}\n")
97 | self.f.write("\\begin{document}\n")
98 | self.f.write("\\setlength\\parindent{0pt}\n")
99 |
100 | def finished(self):
101 | self.f.write("\\end{document}")
102 | self.f.close()
103 |
104 | def emit_molecule(self, species_index, include_index=True):
105 | if include_index:
106 | self.f.write(str(species_index) + "\n")
107 |
108 | self.f.write(
109 | "\\raisebox{-.5\\height}{"
110 | + "\\includegraphics[scale=0.2]{"
111 | + self.mol_pictures_folder_name + '/'
112 | + str(species_index)
113 | + ".pdf}}\n"
114 | )
115 |
116 | def emit_newline(self):
117 | self.f.write(
118 | "\n\\vspace{1cm}\n")
119 |
120 | def emit_newpage(self):
121 | self.f.write("\\newpage\n\n\n")
122 |
123 | def emit_verbatim(self, s):
124 | self.f.write('\\begin{verbatim}\n')
125 | self.f.write(s)
126 | self.f.write('\n')
127 | self.f.write('\\end{verbatim}\n')
128 |
129 | def emit_text(self,s):
130 | self.f.write('\n\n' + s + '\n\n')
131 |
132 | def emit_initial_state(self, initial_state):
133 | self.emit_text("initial state:")
134 | for species_id in initial_state:
135 | num = initial_state[species_id]
136 | if num > 0:
137 | self.emit_text(str(num) + " molecules of")
138 | self.emit_molecule(species_id)
139 | self.emit_newline()
140 |
141 |
142 | def emit_reaction(self, reaction, label=None):
143 | reactants_filtered = [i for i in reaction['reactants']
144 | if i != -1]
145 |
146 | products_filtered = [i for i in reaction['products']
147 | if i != -1]
148 |
149 | self.f.write("$$\n")
150 | if label is not None:
151 | self.f.write(label + ": \n")
152 |
153 | first = True
154 |
155 | for reactant_index in reactants_filtered:
156 | if first:
157 | first = False
158 | else:
159 | self.f.write("+\n")
160 |
161 | self.emit_molecule(reactant_index)
162 |
163 | if 'dG' in reaction:
164 | self.f.write(
165 | "\\xrightarrow["
166 | + ("%.2f" % reaction["dG_barrier"]) +
167 | "]{" +
168 | ("%.2f" % reaction["dG"]) + "}\n")
169 | else:
170 | self.f.write(
171 | "\\xrightarrow{}\n")
172 |
173 | first = True
174 | for product_index in products_filtered:
175 | if first:
176 | first = False
177 | else:
178 | self.f.write("+\n")
179 |
180 | self.emit_molecule(product_index)
181 |
182 | self.f.write("$$")
183 | self.f.write("\n\n\n")
184 |
185 | def emit_bond_breakage(self, reaction):
186 | if 'reactant_bonds_broken' in reaction:
187 | self.f.write("reactant bonds broken:")
188 | for bond in reaction['reactant_bonds_broken']:
189 | self.emit_verbatim(str(bond))
190 |
191 | if 'product_bonds_broken' in reaction:
192 | self.f.write("product bonds broken:")
193 | for bond in reaction['product_bonds_broken']:
194 | self.emit_verbatim(str(bond))
195 |
196 | self.f.write("\n\n\n")
197 |
--------------------------------------------------------------------------------
/HiPRGen/species_filter.py:
--------------------------------------------------------------------------------
1 | from HiPRGen.mol_entry import MoleculeEntry
2 | import pickle
3 | from HiPRGen.species_questions import run_decision_tree
4 | from HiPRGen.constants import Terminal
5 | from HiPRGen.logging import log_message
6 | import networkx as nx
7 | import networkx.algorithms.isomorphism as iso
8 | from HiPRGen.report_generator import ReportGenerator
9 |
10 | """
11 | Phase 1: species filtering
12 | input: a list of dataset entries
13 | output: a filtered list of mol_entries with fixed indices
14 | description: this is where we remove isomorphic species, and do other forms of filtering. Species decision tree is what we use for filtering.
15 |
16 | species isomorphism filtering:
17 |
18 | The input dataset entries will often contain isomorphic molecules. Identifying such isomorphisms doesn't fit into the species decision tree, so we have it as a preprocessing phase.
19 | """
20 |
21 | def sort_into_tags(mols):
22 | isomorphism_buckets = {}
23 | for mol in mols:
24 |
25 | tag = (mol.charge, mol.formula, mol.covalent_hash)
26 |
27 | if tag in isomorphism_buckets:
28 | isomorphism_buckets[tag].append(mol)
29 | else:
30 | isomorphism_buckets[tag] = [mol]
31 |
32 | return isomorphism_buckets
33 |
34 |
35 | def really_covalent_isomorphic(mol1, mol2):
36 | """
37 | check for isomorphism directly instead of using hash.
38 | warning: this is really slow. It is used in species filtering
39 | to avoid hash collisions. Do not use it anywhere else.
40 | """
41 | return nx.is_isomorphic(
42 | mol1.covalent_graph,
43 | mol2.covalent_graph,
44 | node_match = iso.categorical_node_match('specie', None)
45 | )
46 |
47 |
48 |
49 | def groupby(equivalence_relation, xs):
50 | """
51 | warning: this has slightly different semantics than
52 | itertools groupby which depends on ordering.
53 | """
54 | groups = []
55 |
56 | for x in xs:
57 | group_found = False
58 | for group in groups:
59 | if equivalence_relation(x, group[0]):
60 | group.append(x)
61 | group_found = True
62 | break
63 |
64 | if not group_found:
65 | groups.append([x])
66 |
67 | return groups
68 |
69 |
70 | def species_filter(
71 | dataset_entries,
72 | mol_entries_pickle_location,
73 | species_report,
74 | species_decision_tree,
75 | coordimer_weight,
76 | species_logging_decision_tree=Terminal.DISCARD,
77 | generate_unfiltered_mol_pictures=False
78 | ):
79 |
80 | """
81 | run each molecule through the species decision tree and then choose the lowest weight
82 | coordimer based on the coordimer_weight function.
83 | """
84 |
85 | log_message("starting species filter")
86 | log_message("loading molecule entries from json")
87 |
88 | mol_entries_unfiltered = [
89 | MoleculeEntry.from_dataset_entry(e) for e in dataset_entries ]
90 |
91 |
92 | log_message("generating unfiltered mol pictures")
93 |
94 | report_generator = ReportGenerator(
95 | mol_entries_unfiltered,
96 | species_report,
97 | mol_pictures_folder_name='mol_pictures_unfiltered',
98 | rebuild_mol_pictures=generate_unfiltered_mol_pictures
99 | )
100 |
101 | report_generator.emit_text("species report")
102 |
103 | log_message("applying local filters")
104 | mol_entries_filtered = []
105 |
106 | # note: it is important here that we are applying the local filters before
107 | # the non local ones. We remove some molecules which are lower energy
108 | # than other more realistic lithomers.
109 |
110 | for i, mol in enumerate(mol_entries_unfiltered):
111 | log_message("filtering " + mol.entry_id)
112 | decision_pathway = []
113 | if run_decision_tree(mol, species_decision_tree, decision_pathway):
114 | mol_entries_filtered.append(mol)
115 |
116 | if run_decision_tree(mol, species_logging_decision_tree):
117 |
118 | report_generator.emit_verbatim(
119 | '\n'.join([str(f) for f in decision_pathway]))
120 |
121 | report_generator.emit_text("number: " + str(i))
122 | report_generator.emit_text("entry id: " + mol.entry_id)
123 | report_generator.emit_text("uncorrected free energy: " +
124 | str(mol.free_energy))
125 |
126 | report_generator.emit_text(
127 | "number of coordination bonds: " +
128 | str(mol.number_of_coordination_bonds))
129 |
130 | report_generator.emit_text(
131 | "corrected free energy: " +
132 | str(mol.solvation_free_energy))
133 |
134 | report_generator.emit_text(
135 | "formula: " + mol.formula)
136 |
137 | report_generator.emit_molecule(i, include_index=False)
138 | report_generator.emit_newline()
139 |
140 |
141 | report_generator.finished()
142 |
143 |
144 | # python doesn't have shared memory. That means that every worker during
145 | # reaction filtering must maintain its own copy of the molecules.
146 | # for this reason, it is good to remove attributes that are only used
147 | # during species filtering.
148 | log_message("clearing unneeded attributes")
149 | for m in mol_entries_filtered:
150 | del m.partial_charges_resp
151 | del m.partial_charges_mulliken
152 | del m.partial_charges_nbo
153 | del m.partial_spins_nbo
154 | del m.atom_locations
155 |
156 | # currently, take lowest energy mol in each iso class
157 | log_message("applying non local filters")
158 |
159 |
160 | def collapse_isomorphism_group(g):
161 | lowest_energy_coordimer = min(g,key=coordimer_weight)
162 | return lowest_energy_coordimer
163 |
164 |
165 | mol_entries = []
166 |
167 | for tag_group in sort_into_tags(mol_entries_filtered).values():
168 | for iso_group in groupby(really_covalent_isomorphic, tag_group):
169 | mol_entries.append(
170 | collapse_isomorphism_group(iso_group))
171 |
172 |
173 | log_message("assigning indices")
174 |
175 | for i, e in enumerate(mol_entries):
176 | e.ind = i
177 |
178 |
179 | log_message("creating molecule entry pickle")
180 | # ideally we would serialize mol_entries to a json
181 | # some of the auxilary_data we compute
182 | # has frozen set keys, so doesn't seralize well into json format.
183 | # pickles work better in this setting
184 | with open(mol_entries_pickle_location, 'wb') as f:
185 | pickle.dump(mol_entries, f)
186 |
187 | log_message("species filtering finished. " +
188 | str(len(mol_entries)) +
189 | " species")
190 |
191 | return mol_entries
192 |
--------------------------------------------------------------------------------
/HiPRGen/species_questions.py:
--------------------------------------------------------------------------------
1 | from HiPRGen.mol_entry import MoleculeEntry, FragmentComplex
2 | import networkx as nx
3 | from networkx.algorithms.graph_hashing import weisfeiler_lehman_graph_hash
4 | import copy
5 | from functools import partial
6 | from HiPRGen.constants import li_ec, Terminal, mg_g2, mg_thf, m_formulas, metals
7 | import numpy as np
8 | from monty.json import MSONable
9 | from itertools import combinations
10 |
11 | """
12 | species decision tree:
13 |
14 | A question is a function q(mol_entry) -> Bool
15 |
16 | Unlike for reaction filtering, these questions should not modify the mol_entry in any way.
17 |
18 | A node is either a Terminal or a non empty list [(question, node)]
19 |
20 | class Terminal(Enum):
21 | KEEP = 1
22 | DISCARD = -1
23 |
24 | For the return value of a question, True means travel to this node and False means try next question in the list.
25 |
26 | for non terminal nodes, it is an error if every question returns False. i.e getting stuck at a non terminal node is an error.
27 |
28 | Once a Terminal node is reached, it tells us whether to keep or discard the species.
29 | """
30 |
31 | def run_decision_tree(mol_entry,
32 | decision_tree,
33 | decision_pathway=None):
34 |
35 | node = decision_tree
36 |
37 | while type(node) == list:
38 | next_node = None
39 | for (question, new_node) in node:
40 | if question(mol_entry):
41 |
42 | # if decision_pathway is a list,
43 | # append the question which
44 | # answered true i.e the edge we follow
45 | if decision_pathway is not None:
46 | decision_pathway.append(question)
47 |
48 |
49 | next_node = new_node
50 | break
51 |
52 | node = next_node
53 |
54 |
55 | if type(node) == Terminal:
56 | if decision_pathway is not None:
57 | decision_pathway.append(node)
58 |
59 |
60 | if node == Terminal.KEEP:
61 | return True
62 | else:
63 | return False
64 | else:
65 | print(node)
66 | raise Exception("unexpected node type reached")
67 |
68 |
69 | class metal_ion_filter(MSONable):
70 | "only allow positively charged metal ions"
71 | def __init__(self):
72 | pass
73 |
74 | def __call__(self, mol_entry):
75 | if mol_entry.formula in m_formulas and mol_entry.charge <= 0:
76 | return True
77 | else:
78 | return False
79 |
80 | class mol_not_connected(MSONable):
81 | def __init__(self):
82 | pass
83 |
84 | def __call__(self, mol):
85 | return not nx.is_connected(mol.graph)
86 |
87 | class spin_multiplicity_filter(MSONable):
88 | def __init__(self, threshold):
89 | self.threshold = threshold
90 |
91 | def __call__(self, mol):
92 | if (mol.spin_multiplicity == 2):
93 | num_partial_spins_above_threshold = 0
94 | for i in range(mol.num_atoms):
95 | if mol.partial_spins_nbo[i] > self.threshold:
96 | num_partial_spins_above_threshold += 1
97 |
98 | if num_partial_spins_above_threshold >= 2:
99 | mol.penalty += 1
100 |
101 | return False
102 |
103 | class positive_penalty(MSONable):
104 | def __init__(self):
105 | pass
106 |
107 | def __call__(self, mol):
108 | if mol.penalty > 0:
109 | return True
110 | else:
111 | return False
112 |
113 | class add_star_hashes(MSONable):
114 | def __init__(self):
115 | pass
116 |
117 | def __call__(self, mol):
118 | for i in range(mol.num_atoms):
119 | if i not in mol.m_inds:
120 | neighborhood = nx.generators.ego.ego_graph(
121 | mol.covalent_graph,
122 | i,
123 | 1,
124 | undirected=True)
125 |
126 | mol.star_hashes[i] = weisfeiler_lehman_graph_hash(
127 | neighborhood,
128 | node_attr='specie')
129 |
130 | return False
131 |
132 | class add_unbroken_fragment(MSONable):
133 | def __init__(self):
134 | pass
135 |
136 | def __call__(self, mol):
137 | if mol.formula in m_formulas:
138 | return False
139 |
140 | fragment_complex = FragmentComplex(
141 | 1,
142 | 0,
143 | [],
144 | [mol.covalent_hash])
145 |
146 | mol.fragment_data.append(fragment_complex)
147 |
148 | return False
149 |
150 | class add_single_bond_fragments(MSONable):
151 |
152 | def __init__(self):
153 | pass
154 |
155 | def __call__(self, mol):
156 |
157 | if mol.formula in m_formulas:
158 | return False
159 |
160 |
161 |
162 | for edge in mol.covalent_graph.edges:
163 | fragments = []
164 | h = copy.deepcopy(mol.covalent_graph)
165 | h.remove_edge(*edge)
166 | connected_components = nx.algorithms.components.connected_components(h)
167 | for c in connected_components:
168 |
169 | subgraph = h.subgraph(c)
170 |
171 | fragment_hash = weisfeiler_lehman_graph_hash(
172 | subgraph,
173 | node_attr='specie')
174 |
175 |
176 | fragments.append(fragment_hash)
177 |
178 | fragment_complex = FragmentComplex(
179 | len(fragments),
180 | 1,
181 | [edge[0:2]],
182 | fragments)
183 |
184 | mol.fragment_data.append(fragment_complex)
185 |
186 | return False
187 |
188 | class has_covalent_ring(MSONable):
189 | def __init__(self):
190 | pass
191 |
192 | def __call__(self, mol):
193 | # if mol is a metal, mol.covalent_graph is empty
194 | if mol.formula in m_formulas:
195 | mol.has_covalent_ring = False
196 | else:
197 | mol.has_covalent_ring = not nx.is_tree(mol.covalent_graph)
198 |
199 | if mol.has_covalent_ring:
200 | mol.ring_fragment_data = []
201 |
202 | return mol.has_covalent_ring
203 |
204 |
205 | class covalent_ring_fragments(MSONable):
206 | def __init__(self):
207 | pass
208 |
209 | def __call__(self, mol):
210 | # maps edge to graph with that edge removed
211 | ring_edges = {}
212 |
213 | for edge in mol.covalent_graph.edges:
214 | h = copy.deepcopy(mol.covalent_graph)
215 | h.remove_edge(*edge)
216 | if nx.is_connected(h):
217 | ring_edges[edge] = {
218 | 'modified_graph' : h,
219 | 'node_set' : set([edge[0],edge[1]])
220 | }
221 |
222 |
223 | for ring_edge_1, ring_edge_2 in combinations(ring_edges,2):
224 |
225 | if ring_edges[ring_edge_1]['node_set'].isdisjoint(
226 | ring_edges[ring_edge_2]['node_set']):
227 |
228 |
229 | potential_edges = [ (ring_edge_1[0], ring_edge_2[0],0),
230 | (ring_edge_1[0], ring_edge_2[1],0),
231 | (ring_edge_1[1], ring_edge_2[0],0),
232 | (ring_edge_1[1], ring_edge_2[1],0) ]
233 |
234 | one_bond_away = False
235 | for ring_edge_3 in ring_edges:
236 | if ring_edge_3 in potential_edges:
237 | one_bond_away = True
238 |
239 | if one_bond_away:
240 | h = copy.deepcopy(ring_edges[ring_edge_1]['modified_graph'])
241 | h.remove_edge(*ring_edge_2)
242 | if nx.is_connected(h):
243 | continue
244 | else:
245 | fragments = []
246 | connected_components = nx.algorithms.components.connected_components(h)
247 | for c in connected_components:
248 |
249 | subgraph = h.subgraph(c)
250 |
251 | fragment_hash = weisfeiler_lehman_graph_hash(
252 | subgraph,
253 | node_attr='specie')
254 |
255 |
256 | fragments.append(fragment_hash)
257 |
258 | fragment_complex = FragmentComplex(
259 | len(fragments),
260 | 2,
261 | [ring_edge_1[0:2], ring_edge_2[0:2]],
262 | fragments)
263 |
264 | mol.ring_fragment_data.append(fragment_complex)
265 |
266 | return False
267 |
268 |
269 | class metal_complex(MSONable):
270 | def __init__(self):
271 | pass
272 |
273 | def __call__(self, mol):
274 | # if mol is a metal, it isn't a metal complex
275 | if mol.formula in m_formulas:
276 | return False
277 |
278 | return not nx.is_connected(mol.covalent_graph)
279 |
280 |
281 | class fix_hydrogen_bonding(MSONable):
282 | def __init__(self):
283 | pass
284 |
285 | def __call__(self, mol):
286 | if mol.num_atoms > 1:
287 | for i in range(mol.num_atoms):
288 | if mol.species[i] == 'H':
289 |
290 | adjacent_atoms = []
291 |
292 | for bond in mol.graph.edges:
293 | if i in bond[0:2]:
294 |
295 | if i == bond[0]:
296 | adjacent_atom = bond[1]
297 | else:
298 | adjacent_atom = bond[0]
299 |
300 | displacement = (mol.atom_locations[adjacent_atom] -
301 | mol.atom_locations[i])
302 |
303 | dist = np.inner(displacement, displacement)
304 |
305 | adjacent_atoms.append((adjacent_atom, dist))
306 |
307 |
308 | closest_atom, _ = min(adjacent_atoms, key=lambda pair: pair[1])
309 |
310 | for adjacent_atom, _ in adjacent_atoms:
311 | if adjacent_atom != closest_atom:
312 | mol.graph.remove_edge(i, adjacent_atom)
313 | if adjacent_atom in mol.covalent_graph:
314 | mol.covalent_graph.remove_edge(i, adjacent_atom)
315 |
316 |
317 |
318 | return False
319 |
320 |
321 | class bad_metal_coordination(MSONable):
322 | def __init__(self):
323 | pass
324 |
325 | def __call__(self, mol):
326 |
327 | if mol.formula not in m_formulas:
328 |
329 | if (len(metals.intersection(set(mol.species))) > 0 and
330 | mol.number_of_coordination_bonds == 0):
331 |
332 | return True
333 |
334 | return False
335 |
336 |
337 | class set_solvation_free_energy(MSONable):
338 | """
339 | metal atoms coordinate with the surrounding solvent. We need to correct
340 | free energy to take this into account. The correction is
341 | solvation_correction * (
342 | max_coodination_bonds -
343 | number_of_coordination_bonds_in_mol).
344 | Since coordination bonding can't reliably be detected from the molecule
345 | graph, we search for all atoms within a radius of the metal atom and
346 | discard them if they are positively charged.
347 | """
348 |
349 | def __init__(self, solvation_env):
350 | self.solvation_env = solvation_env
351 |
352 | def __call__(self, mol):
353 | correction = 0.0
354 | mol.number_of_coordination_bonds = 0
355 |
356 | for i in mol.m_inds:
357 |
358 | species = mol.species[i]
359 | partial_charge = mol.partial_charges_nbo[i]
360 |
361 | if partial_charge < 1.2:
362 | effective_charge = "_1"
363 | elif partial_charge >= 1.2:
364 | effective_charge = "_2"
365 |
366 | coordination_partners = list()
367 | species_charge = species + effective_charge
368 | radius = self.solvation_env["coordination_radius"][species_charge]
369 |
370 | for j in range(mol.num_atoms):
371 | if j != i:
372 | displacement_vector = (
373 | mol.atom_locations[j] -
374 | mol.atom_locations[i])
375 | if (np.inner(displacement_vector, displacement_vector)
376 | < radius ** 2 and (
377 | mol.partial_charges_resp[j] < 0 or
378 | mol.partial_charges_mulliken[j] < 0 or
379 | mol.partial_charges_nbo[j] < 0)):
380 | if not mol.graph.has_edge(i,j):
381 | mol.graph.add_edge(i,j)
382 | coordination_partners.append(j)
383 |
384 | number_of_coordination_bonds = len(coordination_partners)
385 | mol.number_of_coordination_bonds += number_of_coordination_bonds
386 | correction += self.solvation_env[
387 | "solvation_correction"][species_charge] * (
388 | self.solvation_env[
389 | "max_number_of_coordination_bonds"][species_charge] -
390 | number_of_coordination_bonds)
391 |
392 | mol.solvation_free_energy = correction + mol.free_energy
393 | return False
394 |
395 |
396 | class species_default_true(MSONable):
397 | def __init__(self):
398 | pass
399 |
400 | def __call__(self, mol):
401 | return True
402 |
403 |
404 | def compute_graph_hashes(mol):
405 | mol.total_hash = weisfeiler_lehman_graph_hash(
406 | mol.graph,
407 | node_attr='specie')
408 |
409 | mol.covalent_hash = weisfeiler_lehman_graph_hash(
410 | mol.covalent_graph,
411 | node_attr='specie')
412 |
413 | return False
414 |
415 |
416 | class neutral_metal_filter(MSONable):
417 | def __init__(self, cutoff):
418 | self.cutoff = cutoff
419 |
420 | def __call__(self, mol):
421 |
422 | for i in mol.m_inds:
423 | if (mol.species[i] in metals and
424 | mol.partial_charges_nbo[i] < self.cutoff):
425 | return True
426 |
427 | return False
428 |
429 | class charge_too_big(MSONable):
430 | def __init__(self):
431 | pass
432 |
433 | def __call__(self, mol):
434 | if mol.charge > 1 or mol.charge < -1:
435 | return True
436 |
437 | else:
438 | return False
439 |
440 | # any species filter which modifies bonding has to come before
441 | # any filter checking for connectivity (which includes the metal-centric complex filter)
442 |
443 | li_species_decision_tree = [
444 | (fix_hydrogen_bonding(), Terminal.KEEP),
445 | (set_solvation_free_energy(li_ec), Terminal.KEEP),
446 | (charge_too_big(), Terminal.DISCARD),
447 | (neutral_metal_filter(0.1), Terminal.DISCARD),
448 | (compute_graph_hashes, Terminal.KEEP),
449 | (metal_ion_filter(), Terminal.DISCARD),
450 | (bad_metal_coordination(), Terminal.DISCARD),
451 | (mol_not_connected(), Terminal.DISCARD),
452 | (metal_complex(), Terminal.DISCARD),
453 | (spin_multiplicity_filter(0.4), Terminal.DISCARD),
454 | (add_star_hashes(), Terminal.KEEP),
455 | (add_unbroken_fragment(), Terminal.KEEP),
456 | (add_single_bond_fragments(), Terminal.KEEP),
457 | # (has_covalent_ring(), [
458 | # (covalent_ring_fragments(), Terminal.KEEP),
459 | # (species_default_true(), Terminal.KEEP)
460 | # ]),
461 | (species_default_true(), Terminal.KEEP)
462 | ]
463 |
464 | mg_species_decision_tree = [
465 | (fix_hydrogen_bonding(), Terminal.KEEP),
466 | (set_solvation_free_energy(mg_g2), Terminal.KEEP),
467 | (neutral_metal_filter(0.5), Terminal.DISCARD),
468 | (compute_graph_hashes, Terminal.KEEP),
469 | (metal_ion_filter(), Terminal.DISCARD),
470 | (bad_metal_coordination(), Terminal.DISCARD),
471 | (mol_not_connected(), Terminal.DISCARD),
472 | (metal_complex(), Terminal.DISCARD),
473 | (add_star_hashes(), Terminal.KEEP),
474 | (add_unbroken_fragment(), Terminal.KEEP),
475 | (add_single_bond_fragments(), Terminal.KEEP),
476 | (species_default_true(), Terminal.KEEP)
477 | ]
478 |
479 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
2 |
3 | (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
4 |
5 | (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
6 |
7 | (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
8 |
9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 |
12 | You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such Enhancements or derivative works thereof, in binary and source code form.
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |
4 | HiPRGen (**Hi**_gh_ **P**_erformance_ **R**_eaction_ **Gen**_eration_) is a python module for constructing reaction networks via exhaustive reaction enumeration and filtering decision trees with the capacity to be applied to systems with hundreds of billions of possible reactions. HiPRGen is built on top of [MPI4py](https://mpi4py.readthedocs.io/en/stable/) which facilitates multi-node parallelism.
5 |
6 | ### Installation
7 |
8 | HiPRGen depends on `pymatgen`, `openbabel`, `pygraphviz`, `pycairo` and `mpi4py`. In our experience, the Conda version of MPI4py does not work consistently, so we use the [nix package manager](https://nixos.org/) to get HiPRGen running on a wide range of systems. Instructions for installing nix can be found [here](https://nixos.org/download.html).
9 |
10 | The whole process looks like this:
11 | ```
12 | # The first step requires sudo to create the directory /nix as root.
13 | # Run the NixOS install script below and follow the prompts.
14 | # Note: On Linux, instructions for uninstalling nix can be found with a quick
15 | # web search. On MacOS, uninstalling can be accompished with this script:
16 | # https://gist.github.com/expelledboy/c00aebb004b178cf78b2c9b344526ff6
17 |
18 | sh <(curl -L https://nixos.org/nix/install) --daemon
19 |
20 | # If you have an M1 Mac, you also need to force nix to use x86 binaries
21 | # since some of our dependencies don't have native arm binaries.
22 | # Uncomment and run the following two lines if you have an M1 Mac:
23 |
24 | # mkdir -p ~/.config/nix
25 | # echo "system = x86_64-darwin" > ~/.config/nix/nix.conf
26 |
27 |
28 | # Close your existing terminal and open a new one, then run:
29 |
30 | git clone https://github.com/BlauGroup/HiPRGen
31 | cd HiPRGen
32 | nix-shell
33 | ```
34 |
35 | HiPRGen is supported for MacOS and Linux and has been tested on MacOS 11.6 and 12.0.1 as well as Ubuntu 21.10. Installation should take less than five minutes.
36 |
37 |
38 | ### Running on the LRC cluster
39 |
40 | On the LRC cluster, an environment where HiPRGen can be run is set up as follows:
41 |
42 | ```
43 | module load anaconda3/2024.02-1-11.4
44 | conda init
45 |
46 | logout, log back in
47 |
48 | module load gcc/11.4.0
49 | module load openmpi/4.1.6
50 |
51 | pip3 install --user mpi4py
52 | conda create -n HiPRGen_RNMC python=3.8
53 | conda activate HiPRGen_RNMC
54 | conda install -c conda-forge openbabel pygraphviz pycairo
55 | pip install pymatgen==2023.3.10
56 | pip install pydantic==V1.10.12
57 |
58 | cd $PROJ
59 | git clone https://github.com/BlauGroup/RNMC.git
60 | cd RNMC
61 | module load gsl
62 | CXX=g++ make
63 | export PATH=$PATH:$PROJ/RNMC/GMC
64 |
65 |
66 | can pick up from reloading the environment:
67 |
68 | conda activate HiPRGen_RNMC
69 | module load gcc/11.4.0
70 | module load openmpi/4.1.6
71 | module load gsl
72 | export PATH=$PATH:$PROJ/RNMC/GMC
73 | ```
74 |
75 | ### Tests
76 |
77 | Once you are in an environment where HiPRGen is installed, the tests can be run with `python test.py 4`. This will run the tests using 4 threads, though you could use as many threads as your machine allows to speed up the execution. Running the tests will populate working directories in `scratch`. Note that `test.py` is heavily commented to explain how to use HiPRGen. With at least 4 threads, the tests should take less than five minutes to run. Along with a variety of other information, the following lines will be printed to standard output to confirm that the tests have passed:
78 |
79 | ```
80 | mg_test: correct number of species
81 | mg_test: correct number of reactions
82 | li_test: correct number of species
83 | li_test: correct number of reactions
84 | ```
85 |
86 | Once the tests have finished, you can run `python -i repl.py` and inspect the `network_loader` object, which contains all of the data associated with the test Lithium / Ethylene Carbonate network after running 1000 trajectories. Additionally, HiPRGen has a report generation system for visualizing results. For example, in `scratch/li_test`, run `pdflatex LEDC_pathways.tex` to generate a PDF of the top pathways to Lithium Ethylene Dicarbonate (LEDC) in the test Lithium / Ethylene Carbonate network. Explanation of other types of reports and the commands to generate them are given in `test.py`.
87 |
88 |
89 | ### Design
90 |
91 | - Species filtering: This phase loads a JSON generated from our database, constructs molecule entries, filters them by isomorphism, and then runs each molecule through a handcrafted decision tree in `species_questions.py`. The resulting list is then pickled for loading in other phases. The reason we use pickle here instead of JSON is that some of the species questions append non-trivial data structures to the molecule entries which get mangled when serialized to JSON.
92 |
93 | - Bucketing: Now we loop through pairs (A,B) where A and B are molecules in the saved pickle and group them by atom counts. These groups are stored in a bucket database.
94 |
95 | - Reaction filtering + network generation: This is where MPI is used. The program launches a dispatcher process and many filter processes. The filter processes request buckets from the dispatcher, generate all possible reactions from each bucket, run those reactions through a decision tree from `reaction_questions.py`, and then sends the reactions which pass the decision tree back to the dispatcher as they are generated. The dispatcher writes the reactions sent back from the filter processes into the reaction network database.
96 |
97 | - Simulation: Once the reaction network database has been generated, it is provided as an input to [RNMC](https://github.com/BlauGroup/RNMC) which runs simulations and writes them into the reaction network database. This is much more well-suited to Lustre filesystems than an approach involving writing each trajectory to an independent file.
98 |
99 | - Analysis: HiPRGen also has important primitives for useful analysis. The ReportGenerator class in `report_generator.py` facilitates the construction of a variety of useful PDFs via functions in `mc_analysis.py`, and the NetworkLoader class in `network_loader.py` allows for straightforward interrogation of the network and trajectories while abstracting away the fact that they are stored in a sqlite db.
100 |
101 | The network loader is a great place to start using the codebase and is run as follows:
102 |
103 | ```
104 | # run from the root directory of HiPRGen after running the tests
105 | from HiPRGen.network_loader import *
106 |
107 | network_loader = NetworkLoader(
108 | './scratch/li_test/rn.sqlite',
109 | './scratch/li_test/mol_entries.pickle',
110 | './scratch/li_test/initial_state.sqlite',
111 | )
112 | ```
113 |
--------------------------------------------------------------------------------
/default.nix:
--------------------------------------------------------------------------------
1 | (import
2 | (
3 | let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in
4 | fetchTarball {
5 | url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz";
6 | sha256 = lock.nodes.flake-compat.locked.narHash;
7 | }
8 | )
9 | { src = ./.; }
10 | ).defaultNix
11 |
--------------------------------------------------------------------------------
/figures/HiPRGen_schematic.svg:
--------------------------------------------------------------------------------
1 |
2 |
339 |
--------------------------------------------------------------------------------
/figures/reaction_decision_tree.svg:
--------------------------------------------------------------------------------
1 |
2 |
501 |
--------------------------------------------------------------------------------
/figures/species_decision_tree.svg:
--------------------------------------------------------------------------------
1 |
2 |
483 |
--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": {
3 | "RNMC": {
4 | "inputs": {
5 | "flake-compat": "flake-compat",
6 | "nixpkgs": "nixpkgs"
7 | },
8 | "locked": {
9 | "lastModified": 1662497946,
10 | "narHash": "sha256-z+6rs+ZKzEzuh5Hg//4GhbVa99Jurg49BMeAFgDvBTw=",
11 | "owner": "BlauGroup",
12 | "repo": "RNMC",
13 | "rev": "4130a62df7cadf6dde473dbaa0eb9ca893c1026c",
14 | "type": "github"
15 | },
16 | "original": {
17 | "owner": "BlauGroup",
18 | "repo": "RNMC",
19 | "type": "github"
20 | }
21 | },
22 | "flake-compat": {
23 | "flake": false,
24 | "locked": {
25 | "lastModified": 1650374568,
26 | "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=",
27 | "owner": "edolstra",
28 | "repo": "flake-compat",
29 | "rev": "b4a34015c698c7793d592d66adbab377907a2be8",
30 | "type": "github"
31 | },
32 | "original": {
33 | "owner": "edolstra",
34 | "repo": "flake-compat",
35 | "type": "github"
36 | }
37 | },
38 | "flake-compat_2": {
39 | "flake": false,
40 | "locked": {
41 | "lastModified": 1650374568,
42 | "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=",
43 | "owner": "edolstra",
44 | "repo": "flake-compat",
45 | "rev": "b4a34015c698c7793d592d66adbab377907a2be8",
46 | "type": "github"
47 | },
48 | "original": {
49 | "owner": "edolstra",
50 | "repo": "flake-compat",
51 | "type": "github"
52 | }
53 | },
54 | "nixpkgs": {
55 | "locked": {
56 | "lastModified": 1659446231,
57 | "narHash": "sha256-hekabNdTdgR/iLsgce5TGWmfIDZ86qjPhxDg/8TlzhE=",
58 | "owner": "NixOS",
59 | "repo": "nixpkgs",
60 | "rev": "eabc38219184cc3e04a974fe31857d8e0eac098d",
61 | "type": "github"
62 | },
63 | "original": {
64 | "owner": "NixOS",
65 | "ref": "nixos-21.11",
66 | "repo": "nixpkgs",
67 | "type": "github"
68 | }
69 | },
70 | "nixpkgs_2": {
71 | "locked": {
72 | "lastModified": 1659914493,
73 | "narHash": "sha256-lkA5X3VNMKirvA+SUzvEhfA7XquWLci+CGi505YFAIs=",
74 | "owner": "NixOS",
75 | "repo": "nixpkgs",
76 | "rev": "022caabb5f2265ad4006c1fa5b1ebe69fb0c3faf",
77 | "type": "github"
78 | },
79 | "original": {
80 | "owner": "NixOS",
81 | "ref": "nixos-21.05",
82 | "repo": "nixpkgs",
83 | "type": "github"
84 | }
85 | },
86 | "root": {
87 | "inputs": {
88 | "RNMC": "RNMC",
89 | "flake-compat": "flake-compat_2",
90 | "nixpkgs": "nixpkgs_2"
91 | }
92 | }
93 | },
94 | "root": "root",
95 | "version": 7
96 | }
97 |
--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
1 | {
2 | description = "Multi node reaction network generator";
3 |
4 | inputs = {
5 | nixpkgs.url = github:NixOS/nixpkgs/nixos-21.05;
6 | RNMC.url = github:BlauGroup/RNMC;
7 | flake-compat = {
8 | url = github:edolstra/flake-compat;
9 | flake = false;
10 | };
11 | };
12 |
13 | outputs = { self, nixpkgs, RNMC, flake-compat }:
14 |
15 | let
16 |
17 | HiPRGen = systemString:
18 | with import nixpkgs { system = systemString; };
19 | with python38Packages;
20 | buildPythonPackage {
21 | pname = "HiPRGen";
22 | version = "0.2";
23 | src = ./.;
24 | checkInputs = [
25 | pymatgen
26 | monty
27 | openbabel-bindings
28 | pygraphviz
29 | mpi4py
30 | pycairo
31 | mpi
32 | (builtins.getAttr systemString RNMC.defaultPackage)
33 | sqlite
34 | openssh # needed for correct MPI functioning
35 | ];
36 |
37 | checkPhase = "python test.py 2";
38 | };
39 |
40 |
41 | genericDevShell = systemString: installHiPRGen:
42 | with import nixpkgs { system = systemString; };
43 | mkShell {
44 | buildInputs = with python38Packages; [
45 | pymatgen
46 | monty
47 | openbabel-bindings
48 | pygraphviz
49 | pyright
50 | mpi4py
51 | pycairo
52 | (if installHiPRGen then (HiPRGen systemString) else null)
53 | texlive.combined.scheme-small
54 | mpi
55 | (sqlite.override { interactive = true; })
56 | (builtins.getAttr systemString RNMC.defaultPackage)
57 | ];
58 | };
59 |
60 | in {
61 | devShell = {
62 | x86_64-linux = genericDevShell "x86_64-linux" false;
63 | x86_64-darwin = genericDevShell "x86_64-darwin" false;
64 | };
65 |
66 | defaultPackage = {
67 | x86_64-linux = HiPRGen "x86_64-linux";
68 | x86_64-darwin = HiPRGen "x86_64-darwin";
69 | };
70 |
71 | checks = {
72 | x86_64-linux.tests = HiPRGen "x86_64-linux";
73 | x86_64-darwin.tests = HiPRGen "x86_64-darwin";
74 | };
75 | };
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlauGroup/HiPRGen/a0dddfedc21be0121745e5f33f27ad8aafe796ea/logo.png
--------------------------------------------------------------------------------
/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
268 |
--------------------------------------------------------------------------------
/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BlauGroup/HiPRGen/a0dddfedc21be0121745e5f33f27ad8aafe796ea/logo_dark.png
--------------------------------------------------------------------------------
/repl.py:
--------------------------------------------------------------------------------
1 | from HiPRGen.network_loader import *
2 | from HiPRGen.species_questions import *
3 |
4 | network_loader = NetworkLoader(
5 | './scratch/li_test/rn.sqlite',
6 | './scratch/li_test/mol_entries.pickle',
7 | './scratch/li_test/initial_state.sqlite',
8 | )
9 |
10 | ec = network_loader.mol_entries[160]
11 |
12 |
--------------------------------------------------------------------------------
/run_network_generation.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pickle
3 | from mpi4py import MPI
4 | from monty.serialization import loadfn
5 |
6 | from HiPRGen.reaction_filter import (
7 | dispatcher,
8 | worker,
9 | DISPATCHER_RANK
10 | )
11 |
12 |
13 | # python run_network_generation.py mol_entries_pickle_file dispatcher_payload.json worker_payload.json
14 |
15 |
16 | comm = MPI.COMM_WORLD
17 | rank = comm.Get_rank()
18 |
19 | mol_entries_pickle_file = sys.argv[1]
20 | dispatcher_payload_json = sys.argv[2]
21 | worker_payload_json = sys.argv[3]
22 |
23 | with open(mol_entries_pickle_file, 'rb') as f:
24 | mol_entries = pickle.load(f)
25 |
26 |
27 |
28 | if rank == DISPATCHER_RANK:
29 | dispatcher_payload = loadfn(dispatcher_payload_json)
30 | dispatcher(mol_entries,
31 | dispatcher_payload
32 | )
33 |
34 | else:
35 | worker_payload = loadfn(worker_payload_json)
36 | worker(mol_entries,
37 | worker_payload
38 | )
39 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(name='HiPRGen',
4 | version='0.1',
5 | description='HiPRGen',
6 | url='https://github.com/BlauGroup/HiPRGen',
7 | author='Daniel Barter',
8 | author_email='danielbarter@gmail.com',
9 | license='LBNL',
10 | packages=['HiPRGen']
11 | )
12 |
--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
1 | (import
2 | (
3 | let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in
4 | fetchTarball {
5 | url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz";
6 | sha256 = lock.nodes.flake-compat.locked.narHash;
7 | }
8 | )
9 | { src = ./.; }
10 | ).shellNix
11 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import subprocess
4 | import sqlite3
5 | import pickle
6 |
7 |
8 | from HiPRGen.network_loader import NetworkLoader
9 | from HiPRGen.initial_state import find_mol_entry_from_xyz_and_charge
10 | from monty.serialization import loadfn, dumpfn
11 | from HiPRGen.species_filter import species_filter
12 | from HiPRGen.bucketing import bucket
13 | from HiPRGen.report_generator import ReportGenerator
14 | from HiPRGen.initial_state import insert_initial_state
15 | from HiPRGen.constants import ROOM_TEMP, Terminal
16 | from HiPRGen.reaction_filter_payloads import (
17 | DispatcherPayload,
18 | WorkerPayload
19 | )
20 |
21 | from HiPRGen.species_questions import (
22 | mg_species_decision_tree,
23 | li_species_decision_tree,
24 | positive_penalty,
25 | species_default_true
26 | )
27 |
28 | from HiPRGen.reaction_questions import (
29 | default_reaction_decision_tree,
30 |
31 | )
32 |
33 | from HiPRGen.mc_analysis import (
34 | reaction_tally_report,
35 | species_report,
36 | Pathfinding,
37 | SimulationReplayer,
38 | generate_pathway_report,
39 | sink_report,
40 | consumption_report,
41 | redox_report,
42 | coordination_report,
43 | decoordination_report
44 | )
45 |
46 | # Since HiPRGen uses an end-to-end testing approach rather than testing
47 | # each individual function, we have decided to use the tests as
48 | # documentation, by explaining every single line through the first test.
49 |
50 |
51 | # The first thing you need to consider when using HiPRGen is how many
52 | # worker threads do you want to run. HiPRGen can be run with a single
53 | # thread or thousands distrubuted across several nodes. For reaction
54 | # networks with between ~5000 and ~10000 species, we have found that the
55 | # optimal number of worker threads is between 1000 and 2000. If you try
56 | # and use more than that, the worker threads are going to spend lots of
57 | # time waiting for the dispatcher to get through all of the reactions it
58 | # is being sent, which slows everything down. Fixing this would require
59 | # a more complex distrubuted system, but it hasn't been an issue even
60 | # for very large reaction networks.
61 | if len(sys.argv) != 2:
62 | print("usage: python test.py number_of_threads")
63 | quit()
64 |
65 |
66 | number_of_threads = sys.argv[1]
67 |
68 | class bcolors:
69 | PASS = '\u001b[32;1m'
70 | FAIL = '\u001b[31;1m'
71 | ENDC = '\u001b[0m'
72 |
73 |
74 | # HiPRGen is organized as a pipeline, where all the relevent data is
75 | # stored in a sqlite database between phases. For this reason, during
76 | # a single run of the full pipeline, it makes sense to store all the
77 | # relevent files in a single directory. We have two test sets, a lithium
78 | # set and a magnesium set. Since the lithium test set is older, we shall
79 | # document that instead of the mg test set.
80 |
81 | if os.path.isdir('./scratch'):
82 | subprocess.run(['rm', '-r', './scratch'])
83 |
84 | subprocess.run(['mkdir', './scratch'])
85 |
86 |
87 |
88 | def li_test():
89 |
90 |
91 | # folder is the where we store all our intermediate databases
92 | folder = './scratch/li_test'
93 | subprocess.run(['mkdir', folder ])
94 |
95 | # The initial input to the pipeline is a list of LIBE or MADEIRA
96 | # dataset entries. We provide two examples in the data foloder.
97 | mol_json = './data/ronald_LIBE.json'
98 | database_entries = loadfn(mol_json)
99 | # The first step of the HiPRGen pipeline is passing the input molecules
100 | # through the species decision tree to discard molecules.
101 | species_decision_tree = li_species_decision_tree
102 |
103 |
104 | # There is one non-local part of species filtering: we consider two
105 | # molecules to be equivalent if they have the same total charge,
106 | # composition, and covalent bonds, even if they have different metal
107 | # coordination, and we choose one such molecule in each "coordimer"
108 | # class using the coodimer weight function. Since most of our logging
109 | # later on is defined in terms of a fixed molecule set, logging for
110 | # the species filtering phase is messy, so ignore the species_report
111 | # argument for now. The second argument is where we store a pickle of
112 | # the filtered molecule entries for use in later phases.
113 |
114 | mol_entries = species_filter(
115 | database_entries,
116 | mol_entries_pickle_location=folder + '/mol_entries.pickle',
117 | species_report=folder + '/unfiltered_species_report.tex',
118 | species_decision_tree=species_decision_tree,
119 | coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy),
120 | )
121 |
122 |
123 | # Once we have generated our molecule list, we generate the bucket database
124 | # which is how we break up the reaction filtering amongst all avaliable workers.
125 | # It gets stored in the buckets.sqlite database.
126 | bucket(mol_entries, folder + '/buckets.sqlite')
127 |
128 |
129 | # Reaction filtering is paralellized using MPI, so we need to spawn
130 | # an MPI instance to run it. This is why we can't just start
131 | # reaction filtering by calling a python function. We pass the
132 | # reaction decision tree, the logging decision tree, and the electron
133 | # free energy as strings across this barrier. Every possible
134 | # reaction gets passed through both the reaction decision tree and
135 | # the logging decision tree. If a reaction passes the reaction
136 | # decision tree, it gets written to the network. If a reaction
137 | # passes the logging decision tree, it gets logged to the reaction
138 | # report along with what happened to it in reaction_decision_tree.
139 |
140 | # The reaction decision trees are constructed in
141 | # HiPRGen.reaction_questions
142 |
143 | params = {
144 | 'temperature' : ROOM_TEMP,
145 | 'electron_free_energy' : -1.4
146 | }
147 |
148 | dispatcher_payload = DispatcherPayload(
149 | folder + '/buckets.sqlite',
150 | folder + '/rn.sqlite',
151 | folder + '/reaction_report.tex'
152 | )
153 |
154 | worker_payload = WorkerPayload(
155 | folder + '/buckets.sqlite',
156 | default_reaction_decision_tree,
157 | params,
158 | Terminal.DISCARD
159 | )
160 |
161 |
162 | # The dispatcher and worker payloads are passed through the MPI barrier
163 | # as JSON blobs dispatcher_payload and worker_payload
164 | dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json')
165 | dumpfn(worker_payload, folder + '/worker_payload.json')
166 |
167 | subprocess.run(
168 | [
169 | 'mpirun',
170 | '--use-hwthread-cpus',
171 | '-n',
172 | number_of_threads,
173 | 'python',
174 | 'run_network_generation.py',
175 | folder + '/mol_entries.pickle',
176 | folder + '/dispatcher_payload.json',
177 | folder + '/worker_payload.json'
178 | ]
179 | )
180 |
181 | # After we have generated the mol_entries, we refer to molecules by
182 | # their index. The function find_mol_entry_from_xyz_and_charge can
183 | # help find the indices of specific species to be used in the initial
184 | # condition for propagating trajectories and/or trajectory analysis.
185 | Li_plus_id = find_mol_entry_from_xyz_and_charge(
186 | mol_entries,
187 | './xyz_files/Li.xyz',
188 | 1)
189 |
190 | EC_id = find_mol_entry_from_xyz_and_charge(
191 | mol_entries,
192 | './xyz_files/EC.xyz',
193 | 0)
194 |
195 | LEDC_id = find_mol_entry_from_xyz_and_charge(
196 | mol_entries,
197 | './xyz_files/LEDC.xyz',
198 | 0)
199 |
200 |
201 | # After generating a reaction network, it is stored in rn.sqlite. We
202 | # use Monte Carlo simulation to interrogate the network, and for that
203 | # we need to define an initial condition.
204 | initial_state = {
205 | Li_plus_id : 30,
206 | EC_id : 30
207 | }
208 |
209 | # The initial state and the trajectories (after simulation) are stored in
210 | # a seperate database from the network, here called initial_state.sqlite.
211 | # This facilitates running multiple independent simulations of the same
212 | # network with different initial conditions at the same time, if desired.
213 | insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite')
214 |
215 |
216 | # GMC is a high performance reaction network Monte Carlo simulator using the
217 | # Gillespie algorithm: https://github.com/BlauGroup/RNMC. Here we run 1000
218 | # trajectories each of 200 steps.
219 | subprocess.run([
220 | 'GMC',
221 | '--reaction_database=' + folder + '/rn.sqlite',
222 | '--initial_state_database=' + folder + '/initial_state.sqlite',
223 | '--number_of_simulations=1000',
224 | '--base_seed=1000',
225 | '--thread_count=' + number_of_threads,
226 | '--step_cutoff=200'
227 | ])
228 |
229 | # The network loader builds a python object around a reaction network
230 | # and the molecules to make it easier to use them.
231 | network_loader = NetworkLoader(
232 | folder + '/rn.sqlite',
233 | folder + '/mol_entries.pickle',
234 | folder + '/initial_state.sqlite'
235 | )
236 |
237 | network_loader.load_trajectories()
238 | network_loader.load_initial_state()
239 |
240 |
241 |
242 | # HiPRGen has analysis tools to understand what happened in our simulation.
243 | # The output files are written into the same folder in which the reaction
244 | # network is stored.
245 |
246 | # This report is empty, but we use it to generate the molecule pictures.
247 | # This is an expensive operation, so we only want do do it once.
248 | report_generator = ReportGenerator(
249 | network_loader.mol_entries,
250 | folder + '/dummy.tex',
251 | rebuild_mol_pictures=True)
252 |
253 |
254 | # The tally report shows reactions sorted by the number of times fired.
255 | reaction_tally_report(
256 | network_loader,
257 | folder + '/reaction_tally.tex'
258 | )
259 | # Run `pdflatex reaction_tally.tex` in `scratch/li_test` to generate
260 | # the tally report PDF.
261 |
262 |
263 | # The species report shows every specie in the network and their IDs.
264 | species_report(network_loader, folder + '/species_report.tex')
265 | # Run `pdflatex species_report.tex` in `scratch/li_test` to generate
266 | # the species report PDF.
267 |
268 |
269 | # Pathfinding is a central goal of HiPRGen / GMC. See mc_analysis.py for
270 | # further documentation of the Pathfinding class.
271 | pathfinding = Pathfinding(network_loader)
272 |
273 |
274 | # The pathway report shows all the ways that a target species was
275 | # produced in the simulation trajectories, where each simulation only
276 | # contributes the shortest path responsible for the first formation
277 | # of the target species to the report. The report can be sorted by
278 | # pathway frequency, but instead here we sort by pathway cost. Note
279 | # that the test network has ~5000 reactions while production networks
280 | # have between 50-100 million reactions.
281 | generate_pathway_report(
282 | pathfinding,
283 | LEDC_id,
284 | folder + '/LEDC_pathways.tex',
285 | sort_by_frequency=False
286 | )
287 | # Run `pdflatex LEDC_pathways.tex` in `scratch/li_test` to generate
288 | # the LEDC pathway report PDF.
289 |
290 |
291 | # The simulation replayer sweeps through all trajectories in order
292 | # to extract additional information that is used for consumption
293 | # reports and sink reports.
294 | simulation_replayer = SimulationReplayer(network_loader)
295 |
296 |
297 | # The consumption report shows reactions which consumed a target
298 | # species, sorted by the number of times the reaction fired.
299 | consumption_report(simulation_replayer,
300 | LEDC_id,
301 | folder + '/LEDC_consumption_report.tex')
302 | # Run `pdflatex LEDC_consumption_report.tex` in `scratch/li_test`
303 | # to generate the LEDC consumption report PDF.
304 |
305 |
306 | # The sink report shows species which have a production to
307 | # consumption ratio of greater than 3/2 and which have an expected
308 | # value above 0.1. These are two of the three heuristic criteria
309 | # that we use to identify network products. The third criteria is
310 | # that each network product must have a shortest path with cost
311 | # less than 10. This can be checked by generating pathway reports
312 | # to each species shown in the sink report. For the curious reader,
313 | # we note that generating pathway reports to the six species in the
314 | # sink report will show that only Li2CO3, C2H4, LiEDC-, and DLEMC
315 | # have sufficiently low-cost paths to pass the third criteria and
316 | # thus to be considered products of the test network used here.
317 | sink_report(simulation_replayer, folder + '/sink_report.tex')
318 | # Run `pdflatex sink_report.tex` in `scratch/li_test` to generate
319 | # the sink report PDF.
320 |
321 |
322 |
323 | tests_passed = True
324 | if network_loader.number_of_species == 190:
325 | print(bcolors.PASS +
326 | "li_test: correct number of species" +
327 | bcolors.ENDC)
328 | else:
329 | print(bcolors.FAIL +
330 | "li_test: correct number of species" +
331 | bcolors.ENDC)
332 | tests_passed = False
333 |
334 |
335 |
336 | if network_loader.number_of_reactions == 4921:
337 | print(bcolors.PASS +
338 | "li_test: correct number of reactions" +
339 | bcolors.ENDC)
340 | else:
341 | print(bcolors.FAIL +
342 | "li_test: correct number of reactions" +
343 | bcolors.ENDC)
344 | tests_passed = False
345 |
346 | return tests_passed
347 |
348 |
349 | def mg_test():
350 |
351 |
352 | folder = './scratch/mg_test'
353 | subprocess.run(['mkdir', folder ])
354 |
355 | mol_json = './data/sam_G2.json'
356 | species_decision_tree = mg_species_decision_tree
357 |
358 | database_entries = loadfn(mol_json)
359 |
360 |
361 |
362 | mol_entries = species_filter(
363 | database_entries,
364 | folder + '/mol_entries.pickle',
365 | folder + '/unfiltered_species_report.tex',
366 | species_decision_tree,
367 | coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy)
368 | )
369 |
370 |
371 |
372 | bucket(mol_entries, folder + '/buckets.sqlite')
373 |
374 |
375 | dispatcher_payload = DispatcherPayload(
376 | folder + '/buckets.sqlite',
377 | folder + '/rn.sqlite',
378 | folder + '/reaction_report.tex'
379 | )
380 |
381 | worker_payload = WorkerPayload(
382 | folder + '/buckets.sqlite',
383 | default_reaction_decision_tree,
384 | {
385 | 'temperature' : ROOM_TEMP,
386 | 'electron_free_energy' : -2.06
387 | },
388 | Terminal.DISCARD
389 | )
390 |
391 |
392 | dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json')
393 | dumpfn(worker_payload, folder + '/worker_payload.json')
394 |
395 | subprocess.run(
396 | [
397 | 'mpiexec',
398 | '--use-hwthread-cpus',
399 | '-n',
400 | number_of_threads,
401 | 'python',
402 | 'run_network_generation.py',
403 | folder + '/mol_entries.pickle',
404 | folder + '/dispatcher_payload.json',
405 | folder + '/worker_payload.json'
406 | ]
407 | )
408 |
409 |
410 | mg_g2_plus_plus_id = find_mol_entry_from_xyz_and_charge(
411 | mol_entries,
412 | './xyz_files/mgg2.xyz',
413 | 2)
414 |
415 | c2h4_id = find_mol_entry_from_xyz_and_charge(
416 | mol_entries,
417 | './xyz_files/c2h4.xyz',
418 | 0)
419 |
420 | c2h6_id = find_mol_entry_from_xyz_and_charge(
421 | mol_entries,
422 | './xyz_files/c2h6.xyz',
423 | 0)
424 |
425 | initial_state = {
426 | 33 : 30,
427 | 81 : 30
428 | }
429 |
430 |
431 | insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite')
432 |
433 |
434 | subprocess.run([
435 | 'GMC',
436 | '--reaction_database=' + folder + '/rn.sqlite',
437 | '--initial_state_database=' + folder + '/initial_state.sqlite',
438 | '--number_of_simulations=1000',
439 | '--base_seed=1000',
440 | '--thread_count=' + number_of_threads,
441 | '--step_cutoff=200'
442 | ])
443 |
444 |
445 |
446 | network_loader = NetworkLoader(
447 | folder + '/rn.sqlite',
448 | folder + '/mol_entries.pickle',
449 | folder + '/initial_state.sqlite'
450 | )
451 |
452 | network_loader.load_trajectories()
453 | network_loader.load_initial_state()
454 |
455 |
456 |
457 | report_generator = ReportGenerator(
458 | network_loader.mol_entries,
459 | folder + '/dummy.tex',
460 | rebuild_mol_pictures=True)
461 |
462 | reaction_tally_report(
463 | network_loader,
464 | folder + '/reaction_tally.tex'
465 | )
466 |
467 | pathfinding = Pathfinding(network_loader)
468 |
469 | generate_pathway_report(
470 | pathfinding,
471 | c2h6_id,
472 | folder + '/C2H6_pathways.tex',
473 | sort_by_frequency=False
474 | )
475 |
476 | generate_pathway_report(
477 | pathfinding,
478 | c2h4_id,
479 | folder + '/C2H4_pathways.tex',
480 | sort_by_frequency=False
481 | )
482 |
483 |
484 |
485 | species_report(network_loader, folder + '/species_report.tex')
486 |
487 | tests_passed = True
488 | if network_loader.number_of_species == 83:
489 | print(bcolors.PASS +
490 | "mg_test: correct number of species" +
491 | bcolors.ENDC)
492 | else:
493 | print(bcolors.FAIL +
494 | "mg_test: correct number of species" +
495 | bcolors.ENDC)
496 | tests_passed = False
497 |
498 |
499 |
500 | if network_loader.number_of_reactions == 788:
501 | print(bcolors.PASS +
502 | "mg_test: correct number of reactions" +
503 | bcolors.ENDC)
504 | else:
505 | print(bcolors.FAIL +
506 | "mg_test: correct number of reactions" +
507 | bcolors.ENDC)
508 | tests_passed = False
509 |
510 | return tests_passed
511 |
512 |
513 | def flicho_test():
514 |
515 |
516 | folder = './scratch/flicho_test'
517 | subprocess.run(['mkdir', folder ])
518 |
519 | mol_json = './data/flicho_test.json'
520 | database_entries = loadfn(mol_json)
521 | species_decision_tree = li_species_decision_tree
522 |
523 |
524 | mol_entries = species_filter(
525 | database_entries,
526 | mol_entries_pickle_location=folder + '/mol_entries.pickle',
527 | species_report=folder + '/unfiltered_species_report.tex',
528 | species_decision_tree=species_decision_tree,
529 | coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy),
530 | )
531 |
532 |
533 | bucket(mol_entries, folder + '/buckets.sqlite')
534 |
535 | params = {
536 | 'temperature' : ROOM_TEMP,
537 | 'electron_free_energy' : -1.4
538 | }
539 |
540 | dispatcher_payload = DispatcherPayload(
541 | folder + '/buckets.sqlite',
542 | folder + '/rn.sqlite',
543 | folder + '/reaction_report.tex'
544 | )
545 |
546 | worker_payload = WorkerPayload(
547 | folder + '/buckets.sqlite',
548 | default_reaction_decision_tree,
549 | params,
550 | Terminal.DISCARD
551 | )
552 |
553 |
554 | dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json')
555 | dumpfn(worker_payload, folder + '/worker_payload.json')
556 |
557 | subprocess.run(
558 | [
559 | 'mpirun',
560 | '--use-hwthread-cpus',
561 | '-n',
562 | number_of_threads,
563 | 'python',
564 | 'run_network_generation.py',
565 | folder + '/mol_entries.pickle',
566 | folder + '/dispatcher_payload.json',
567 | folder + '/worker_payload.json'
568 | ]
569 | )
570 |
571 | Li_plus_id = find_mol_entry_from_xyz_and_charge(
572 | mol_entries,
573 | './xyz_files/Li.xyz',
574 | 1)
575 |
576 | EC_id = find_mol_entry_from_xyz_and_charge(
577 | mol_entries,
578 | './xyz_files/EC.xyz',
579 | 0)
580 |
581 | initial_state = {
582 | Li_plus_id : 30,
583 | EC_id : 30
584 | }
585 |
586 | insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite')
587 |
588 |
589 | subprocess.run([
590 | 'GMC',
591 | '--reaction_database=' + folder + '/rn.sqlite',
592 | '--initial_state_database=' + folder + '/initial_state.sqlite',
593 | '--number_of_simulations=1000',
594 | '--base_seed=1000',
595 | '--thread_count=' + number_of_threads,
596 | '--step_cutoff=200'
597 | ])
598 |
599 | network_loader = NetworkLoader(
600 | folder + '/rn.sqlite',
601 | folder + '/mol_entries.pickle',
602 | folder + '/initial_state.sqlite'
603 | )
604 |
605 | network_loader.load_trajectories()
606 | network_loader.load_initial_state()
607 |
608 | report_generator = ReportGenerator(
609 | network_loader.mol_entries,
610 | folder + '/dummy.tex',
611 | rebuild_mol_pictures=True)
612 |
613 | coordination_report(
614 | network_loader,
615 | folder + '/coodination_report.tex',
616 | 'Li1',
617 | 1)
618 |
619 | decoordination_report(
620 | network_loader,
621 | folder + '/decoodination_report.tex',
622 | 'Li1',
623 | 1)
624 |
625 |
626 | tests = [
627 | mg_test,
628 | li_test,
629 | # flicho_test
630 | ]
631 |
632 | for test in tests:
633 | if not test():
634 | exit(1)
635 |
--------------------------------------------------------------------------------
/xyz_files/EC.xyz:
--------------------------------------------------------------------------------
1 | 10
2 |
3 | O 0.302660 -1.163080 -0.394382
4 | C -0.686505 -0.520106 0.343041
5 | C 1.512867 -0.493809 -0.093939
6 | O -1.903519 -0.645190 0.011939
7 | O -0.250883 0.958755 0.295313
8 | C 1.112995 0.987830 -0.090866
9 | H 2.245948 -0.740544 -0.861775
10 | H 1.886705 -0.803461 0.888700
11 | H 1.209475 1.427764 -1.089357
12 | H 1.696777 1.577418 0.621041
--------------------------------------------------------------------------------
/xyz_files/EMC.xyz:
--------------------------------------------------------------------------------
1 | 15
2 | H8 C4 O3
3 | O 0.663253 -0.458691 -0.000071
4 | O -1.470026 -0.780853 -0.000115
5 | O -0.701591 1.344432 -0.000727
6 | C 1.816688 0.416703 -0.000306
7 | C 3.048419 -0.455543 -0.000119
8 | C -0.517921 0.149803 -0.000318
9 | C -2.816792 -0.277655 -0.000423
10 | H 1.771371 1.051801 -0.888046
11 | H 1.771403 1.052229 0.887131
12 | H 3.936849 0.181457 -0.000297
13 | H 3.081074 -1.090123 0.889135
14 | H 3.081029 -1.090559 -0.889062
15 | H -3.000950 0.320857 0.893103
16 | H -3.457001 -1.158047 -0.000275
17 | H -3.000716 0.320389 -0.894309
--------------------------------------------------------------------------------
/xyz_files/LEDC.xyz:
--------------------------------------------------------------------------------
1 | 16
2 |
3 | O -2.255868 2.650457 0.012551
4 | C -1.809275 1.473031 0.015686
5 | O -2.557967 0.452047 0.026218
6 | O -0.474738 1.328948 0.007241
7 | C 0.021386 -0.012379 0.010851
8 | O 2.027099 -1.232759 0.003431
9 | C 3.361634 -1.376843 -0.005202
10 | C 1.530975 0.108567 -0.000226
11 | O 4.110324 -0.355861 -0.015950
12 | O 3.808227 -2.554269 -0.002085
13 | Li -3.876543 1.762975 0.028402
14 | Li 5.428900 -1.666789 -0.018264
15 | H -0.336046 -0.551480 -0.869288
16 | H -0.323794 -0.542177 0.901467
17 | H 1.876161 0.638342 -0.890852
18 | H 1.888400 0.647692 0.879903
--------------------------------------------------------------------------------
/xyz_files/Li.xyz:
--------------------------------------------------------------------------------
1 | 1
2 |
3 | Li -0.0 0.0 0.0
--------------------------------------------------------------------------------
/xyz_files/bh4.xyz:
--------------------------------------------------------------------------------
1 | 6
2 | Mg1 B1 H4
3 | B -1.631105 1.932915 -0.945766
4 | Mg -0.083010 0.900879 -0.063538
5 | H -0.726964 2.699089 -0.546295
6 | H -1.099810 1.124588 -1.738164
7 | H -2.524664 2.530521 -1.454051
8 | H -2.015696 1.273997 0.044884
9 |
--------------------------------------------------------------------------------
/xyz_files/c2h4.xyz:
--------------------------------------------------------------------------------
1 | 6
2 |
3 | C -3.58659 1.25899 0.00000
4 | C -2.57512 2.12413 0.00000
5 | H -4.61135 1.61604 0.00000
6 | H -3.39269 0.19127 0.00000
7 | H -2.76902 3.19185 0.00000
8 | H -1.55036 1.76708 0.00000
9 |
10 |
--------------------------------------------------------------------------------
/xyz_files/c2h6.xyz:
--------------------------------------------------------------------------------
1 | 8
2 |
3 | C -2.70568 3.13768 -0.10765
4 | C -1.27855 2.82617 0.30963
5 | H -3.31660 2.21084 -0.08117
6 | H -3.14664 3.88380 0.58648
7 | H -0.83759 2.08005 -0.38450
8 | H -0.66763 3.75302 0.28314
9 | H -1.26913 2.41380 1.34055
10 | H -2.71510 3.55006 -1.13858
11 |
12 |
--------------------------------------------------------------------------------
/xyz_files/co.xyz:
--------------------------------------------------------------------------------
1 | 2
2 | C1 O1
3 | C 1.569086 -0.155515 0.000000
4 | O 1.931000 -1.221679 0.000000
5 |
--------------------------------------------------------------------------------
/xyz_files/fec.xyz:
--------------------------------------------------------------------------------
1 | 10
2 |
3 | O 0.29622 -1.26484 0.00459
4 | C -0.71392 -0.39195 0.05900
5 | C 1.47337 -0.56226 -0.09741
6 | O -1.88221 -0.73026 0.15091
7 | O -0.32657 0.88553 0.00354
8 | C 1.04401 0.91938 -0.09488
9 | H 1.98305 -0.82798 -1.04841
10 | H 2.12811 -0.79969 0.76859
11 | H 1.34411 1.42831 -1.03596
12 | F 1.57240 1.58508 0.99721
13 |
--------------------------------------------------------------------------------
/xyz_files/h.xyz:
--------------------------------------------------------------------------------
1 | 1
2 |
3 | H 0.00000 0.00000 0.00000
4 |
5 |
--------------------------------------------------------------------------------
/xyz_files/h2.xyz:
--------------------------------------------------------------------------------
1 | 2
2 | H2
3 | H -4.597931 4.247288 0.000000
4 | H -3.913329 4.539862 0.000000
5 |
--------------------------------------------------------------------------------
/xyz_files/h2o.xyz:
--------------------------------------------------------------------------------
1 | 3
2 |
3 | O 4.05339 -0.01560 -3.14170
4 | H 3.59828 -0.71554 -3.67421
5 | H 3.42328 0.18415 -2.40436
6 |
7 |
--------------------------------------------------------------------------------
/xyz_files/lemc.xyz:
--------------------------------------------------------------------------------
1 | 13
2 | Li1 H5 C3 O4
3 | C -0.040178 0.049609 0.056991
4 | C 1.464019 0.129795 -0.096416
5 | O 1.939947 -1.220713 -0.140556
6 | C 3.268548 -1.390984 -0.283742
7 | O 4.031683 -0.391403 -0.372522
8 | O 3.677600 -2.581216 -0.321202
9 | O -0.519156 1.390344 0.101596
10 | Li 5.369383 -1.742058 -0.509851
11 | H -0.293172 -0.487928 0.978851
12 | H -0.472876 -0.493287 -0.792022
13 | H 1.909895 0.657731 0.750360
14 | H 1.729926 0.652426 -1.018663
15 | H -1.475441 1.363691 0.199752
16 |
17 |
--------------------------------------------------------------------------------
/xyz_files/li2co3_0.xyz:
--------------------------------------------------------------------------------
1 | 6
2 |
3 | O 0.60173 0.50763 -0.37574
4 | C -0.56879 0.05546 -0.58675
5 | O -1.25942 0.59711 -1.59229
6 | O -1.13674 -0.86917 0.07778
7 | Li -2.57436 -0.60812 -1.07444
8 | Li 0.26670 1.63265 -1.81952
9 |
--------------------------------------------------------------------------------
/xyz_files/lico3-.xyz:
--------------------------------------------------------------------------------
1 | 5
2 |
3 | O 0.60173 0.50763 -0.37574
4 | C -0.56879 0.05546 -0.58675
5 | O -1.25942 0.59711 -1.59229
6 | O -1.13674 -0.86917 0.07778
7 | Li -2.57436 -0.60812 -1.07444
8 |
--------------------------------------------------------------------------------
/xyz_files/mg_tfsi.xyz:
--------------------------------------------------------------------------------
1 | 16
2 |
3 | Mg -1.07367 -0.05831 -0.10101
4 | N 2.42522 0.48257 0.28180
5 | S 1.97819 -1.01170 0.30131
6 | O 2.71976 -1.78463 1.23345
7 | O 0.52148 -1.19624 0.31622
8 | C 2.44746 -1.59970 -1.38265
9 | F 3.74089 -1.41324 -1.58481
10 | F 1.74977 -0.91312 -2.28951
11 | F 2.15807 -2.89078 -1.49214
12 | S 1.56877 1.75875 0.00380
13 | O 0.18542 1.46730 -0.38547
14 | O 2.25872 2.72199 -0.77824
15 | C 1.36941 2.47079 1.69180
16 | F 0.71941 1.58692 2.45436
17 | F 2.55070 2.73020 2.22546
18 | F 0.65515 3.58612 1.61328
19 |
20 |
--------------------------------------------------------------------------------
/xyz_files/mgg2.xyz:
--------------------------------------------------------------------------------
1 | 24
2 |
3 | C -0.58619 2.06146 2.11605
4 | O 0.39894 1.41095 1.27517
5 | C 1.76771 1.64962 1.70374
6 | C 2.67452 1.22553 0.57269
7 | O 2.22129 -0.08666 0.16701
8 | C 2.79099 -0.59837 -1.05985
9 | C 1.98609 -1.82657 -1.41402
10 | O 0.58062 -1.46778 -1.31578
11 | C -0.30120 -2.44470 -1.92299
12 | Mg 0.22994 -0.19511 0.14700
13 | H -1.56956 1.79800 1.72742
14 | H -0.47741 1.70973 3.14381
15 | H -0.43928 3.14078 2.06004
16 | H 1.94890 1.07287 2.61481
17 | H 1.89565 2.71432 1.90845
18 | H 3.71161 1.17496 0.91037
19 | H 2.59464 1.90428 -0.28226
20 | H 3.83994 -0.85874 -0.90473
21 | H 2.70939 0.17232 -1.83268
22 | H 2.17859 -2.65638 -0.72838
23 | H 2.20312 -2.13484 -2.43863
24 | H -1.32297 -2.09719 -1.77255
25 | H -0.07983 -2.50391 -2.98935
26 | H -0.15570 -3.41490 -1.44401
27 |
28 |
--------------------------------------------------------------------------------
/xyz_files/mgthf.xyz:
--------------------------------------------------------------------------------
1 | 14
2 |
3 | O -0.26913 1.16414 -0.37632
4 | C 0.21081 0.01062 0.29888
5 | C 1.72560 0.25603 0.40696
6 | C 2.05071 1.25072 -0.72485
7 | C 0.71039 1.93708 -1.05693
8 | Mg -1.97979 1.48155 -0.55902
9 | H -0.00259 -0.87752 -0.30209
10 | H -0.25524 -0.08886 1.28066
11 | H 2.28855 -0.67533 0.29742
12 | H 1.96179 0.70511 1.37597
13 | H 2.41692 0.71117 -1.60407
14 | H 2.80766 1.97676 -0.41289
15 | H 0.51358 1.92340 -2.13239
16 | H 0.68510 2.96650 -0.69382
17 |
18 |
--------------------------------------------------------------------------------
/xyz_files/n2.xyz:
--------------------------------------------------------------------------------
1 | 2
2 |
3 | N -4.24957 0.48001 1.92033
4 | N -3.20506 0.88382 1.93918
5 |
6 |
--------------------------------------------------------------------------------
/xyz_files/no.xyz:
--------------------------------------------------------------------------------
1 | 2
2 |
3 | N -7.70920 -0.43579 2.25458
4 | O -6.68552 -0.15968 2.16050
5 |
6 |
--------------------------------------------------------------------------------
/xyz_files/oh.xyz:
--------------------------------------------------------------------------------
1 | 2
2 |
3 | O -3.13522 -2.01140 -2.32296
4 | H -2.47296 -1.55935 -1.74187
5 |
6 |
--------------------------------------------------------------------------------