├── .gitignore ├── LICENSE ├── README.md ├── analysis ├── analyze_logs.py └── analyze_rho.py ├── benchmarks ├── jacobian_jax.py ├── jacobian_tf.py └── jacobian_torch.py ├── bin └── stochastic_reconfiguration.py ├── config ├── config.yaml ├── hamiltonian │ ├── Atomic.yaml │ ├── HarmonicOscillator.yaml │ └── Nuclear.yaml ├── optimizer │ ├── AdaptiveDelta.yaml │ ├── AdaptiveEpsilon.yaml │ └── Flat.yaml └── wavefunction │ └── DeepSets.yaml ├── examples ├── Deep_Sets.ipynb ├── example_del_squared.py ├── fit_harmonic_oscillator.py ├── fit_hydrogen_atom.py ├── mc_harmonic_oscillator.py ├── model_reload.py └── torch_selection.py ├── images ├── NucleonScaling.png └── Scaling_Performance.png ├── jacobian_benchmark.py └── mlqm ├── __init__.py ├── hamiltonians ├── AtomicPotential.py ├── Hamiltonian.py ├── HarmonicOscillator.py ├── NuclearPotential.py └── __init__.py ├── models ├── DeepSetsWavefunction.py ├── ExponentialBoundaryCondition.py ├── GaussianBoundaryCondition.py ├── HarmonicOscillatorWavefunction.py ├── NeuralWavefunction.py ├── PolynomialWavefunction.py ├── __init__.py ├── test_harmonic_oscillator.py ├── test_nn.py └── test_polynomial.py ├── optimization ├── GradientCalculator.py ├── StochasticReconfiguration.py └── __init__.py ├── samplers ├── Estimator.py ├── MetropolisSampler.py ├── __init__.py └── test_metropolis_sampler.py └── tests ├── __init__.py ├── hamiltonian ├── __init__.py └── test_harmonic_oscillator.py └── sampler ├── __init__.py └── test_metropolis_walk.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | output/ 7 | 8 | # Hydra files: 9 | .hydra/ 10 | config.snapshot.yaml 11 | 12 | # Cobalt files: 13 | *.cobaltlog 14 | *.error 15 | *.output 16 | *.qsub 17 | 18 | # Profiling files: 19 | *.lprof 20 | *.json 21 | 22 | *.model 23 | 24 | # C extensions 25 | *.so 26 | 27 | *.pdf 28 | *.log 29 | *.dat 30 | 31 | # Distribution / packaging 32 | .Python 33 | build/ 34 | develop-eggs/ 35 | dist/ 36 | downloads/ 37 | eggs/ 38 | .eggs/ 39 | lib/ 40 | lib64/ 41 | parts/ 42 | sdist/ 43 | var/ 44 | wheels/ 45 | pip-wheel-metadata/ 46 | share/python-wheels/ 47 | *.egg-info/ 48 | .installed.cfg 49 | *.egg 50 | MANIFEST 51 | 52 | # PyInstaller 53 | # Usually these files are written by a python script from a template 54 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 55 | *.manifest 56 | *.spec 57 | 58 | # Installer logs 59 | pip-log.txt 60 | pip-delete-this-directory.txt 61 | 62 | # Unit test / coverage reports 63 | htmlcov/ 64 | .tox/ 65 | .nox/ 66 | .coverage 67 | .coverage.* 68 | .cache 69 | nosetests.xml 70 | coverage.xml 71 | *.cover 72 | *.py,cover 73 | .hypothesis/ 74 | .pytest_cache/ 75 | 76 | # Translations 77 | *.mo 78 | *.pot 79 | 80 | # Django stuff: 81 | *.log 82 | local_settings.py 83 | db.sqlite3 84 | db.sqlite3-journal 85 | 86 | # Flask stuff: 87 | instance/ 88 | .webassets-cache 89 | 90 | # Scrapy stuff: 91 | .scrapy 92 | 93 | # Sphinx documentation 94 | docs/_build/ 95 | 96 | # PyBuilder 97 | target/ 98 | 99 | # Jupyter Notebook 100 | .ipynb_checkpoints 101 | 102 | # IPython 103 | profile_default/ 104 | ipython_config.py 105 | 106 | # pyenv 107 | .python-version 108 | 109 | # pipenv 110 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 111 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 112 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 113 | # install all needed dependencies. 114 | #Pipfile.lock 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://zenodo.org/badge/255445949.svg)](https://zenodo.org/badge/latestdoi/255445949) 2 | 3 | 4 | 5 | # MLQM 6 | 7 | MLQM stands for "Machine Learning Quantum Montecarlo". This repository contains tools to perform variational monte carlo for nuclear physics, though there are some additional Hamiltonians implemented as development tools and cross checks: the harmonic oscillator and the Hydrogen atom. 8 | 9 | ## Requirements 10 | 11 | The requirements to run this code are: 12 | - python > 3.6 13 | - tensorflow > 2.X (not TF1 compatible) 14 | - hydra-core > 1.0 (for configuration) 15 | - horovod (for scaling and multi-node running) 16 | 17 | 18 | There is no installation step, it's expect that once you have the requirements install you can begin running immediately. 19 | 20 | 21 | ## Configuration and Running 22 | 23 | The main executable is `bin/stochastic_reconfiguration.py`. You can execute it with: 24 | ```bash 25 | 26 | python bin/stochastic_reconfiguration.py run_id=MyTestRun 27 | 28 | ``` 29 | 30 | Most parameters have reasonable defaults, which you can change in configuration files (in `config/`) or override on the command line: 31 | 32 | ```bash 33 | 34 | python bin/stochastic_reconfiguration.py run_id=deuteron nparticles=2 iterations=500 optimizer=AdaptiveDelta [... other argument overrides] 35 | 36 | ``` 37 | 38 | ## Computational Performance 39 | 40 | This software is compatible with both CPUs and GPUs through Tensorflow. It has good weak and strong scaling performance: 41 | 42 | ![Scaling performance for 4He on A100 GPUs (ThetaGPU@ALCF)](https://github.com/coreyjadams/AI-for-QM/blob/master/images/Scaling_Performance.png) 43 | 44 | The software also has good scaling performance with increasing number of nucleons: 45 | 46 | ![Nucleon scaling performance on A100 GPUs (ThetaGPU@ALCF)](https://github.com/coreyjadams/AI-for-QM/blob/master/images/NucleonScaling.png) 47 | 48 | 49 | 50 | ## Reference 51 | 52 | If you use this software, please reference our publication [on arxiv](https://arxiv.org/abs/2007.14282) 53 | -------------------------------------------------------------------------------- /analysis/analyze_logs.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | from matplotlib import pyplot as plt 3 | import numpy 4 | import pathlib 5 | 6 | import matplotlib 7 | matplotlib.rcParams['mathtext.fontset'] = 'cm' 8 | matplotlib.rcParams['font.family'] = 'STIXGeneral' 9 | 10 | import matplotlib as mpl 11 | mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["tab:blue", "tab:orange"]) 12 | 13 | from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition, 14 | mark_inset) 15 | 16 | def moving_average(a, n=3) : 17 | ret = numpy.cumsum(a, dtype=float) 18 | ret[n:] = ret[n:] - ret[:-n] 19 | return ret[n - 1:] / n 20 | 21 | log_4_file = pathlib.Path("logs/pionless_4_nucleus_2.log") 22 | log_6_file = pathlib.Path("logs/pionless_6_nucleus_2.log") 23 | 24 | def parse_log_file(file_path): 25 | 26 | with open(file_path, 'r') as f: 27 | lines = f.readlines() 28 | 29 | 30 | runs = [] 31 | for l in lines: 32 | if "INFO step =" in l: 33 | if 'energy_jf' in l: continue 34 | runs.append(l) 35 | 36 | step = [] 37 | energy = [] 38 | err = [] 39 | for l in runs: 40 | tokens = l.rstrip('\n').split(' ') 41 | # 2020-07-04 12:50:07,259 INFO step = 8, energy = 2.710, err = 0.043 42 | step.append(int(tokens[5].rstrip(','))) 43 | energy.append(float(tokens[8].rstrip(','))) 44 | err.append(float(tokens[11].rstrip(','))) 45 | 46 | step = list(range(len(energy))) 47 | energy = numpy.asarray(energy) 48 | err = numpy.asarray(err) 49 | 50 | return numpy.asarray(step), numpy.asarray(energy), numpy.asarray(err) 51 | 52 | 53 | step_6, energy_6, err_6 = parse_log_file(log_6_file) 54 | step_4, energy_4, err_4 = parse_log_file(log_4_file) 55 | 56 | # mask_fake = step_6 > 1300 57 | # print(numpy.sum(mask_fake)) 58 | # fake = numpy.arange(numpy.sum(mask_fake)) 59 | # print(fake) 60 | # energy_6[mask_fake] = energy_6[mask_fake] + 0.001 * fake 61 | 62 | # print(len(step_4)) 63 | 64 | # n=2 65 | # print(len(energy)) 66 | # moving_energy = moving_average(energy,2*n + 1) 67 | # print(len(moving_energy)) 68 | 69 | # print(energy_4) 70 | # print(energy_6) 71 | 72 | plt.figure(figsize=(16,9)) 73 | # plt.plot(step[n:-n], moving_energy, label="Pionless 4He") 74 | plt.plot(step_4, energy_4, label=r"$\Lambda = 4~$fm${}^{-1}$") 75 | plt.fill_between(step_4, energy_4-err_4, energy_4+err_4, alpha=0.5) 76 | 77 | plt.plot(step_6, energy_6, label=r"$\Lambda = 6~$fm${}^{-1}$") 78 | plt.fill_between(step_6, energy_6-err_6, energy_6+err_6, alpha=0.5) 79 | plt.grid(True) 80 | plt.xlabel(r"SR Iteration", fontsize=35) 81 | plt.ylabel(r"Energy [MeV]", fontsize=35) 82 | 83 | # Add a line for GFMC solution at 2.224 84 | plt.plot(step_4, [-2.224,]*len(step_4), color="black", ls="--") 85 | plt.legend(fontsize=35) 86 | 87 | # matplotlib.pyplot.annotate("-2.224",[100, -2.224 + 0.1], fontsize=25) 88 | 89 | for tick in plt.gca().xaxis.get_major_ticks(): 90 | tick.label.set_fontsize(30) 91 | for tick in plt.gca().yaxis.get_major_ticks(): 92 | tick.label.set_fontsize(30) 93 | # specify integer or one of preset strings, e.g. 94 | #tick.label.set_fontsize('x-small') 95 | # tick.label.set_rotation('vertical') 96 | 97 | plt.tight_layout() 98 | 99 | x_max = 1750 100 | 101 | plt.xlim([0,x_max]) 102 | plt.ylim([-2.5,3.8]) 103 | ax1 = plt.gca() 104 | 105 | x_min = 1200 106 | 107 | 108 | print(step_4) 109 | 110 | step_4_mask = numpy.logical_and(step_4 >= x_min, step_4 <= x_max) 111 | step_6_mask = numpy.logical_and(step_6 >= x_min, step_6 <= x_max) 112 | 113 | # bottom, left, width, height 114 | ax2 = plt.axes([-2,1800.,200,1.0]) 115 | # Manually set the position and relative size of the inset axes within ax1 116 | ip = InsetPosition(ax1, [0.6,0.25,0.3,0.4]) 117 | ax2.set_axes_locator(ip) 118 | # Mark the region corresponding to the inset axes on ax1 and draw lines 119 | # in grey linking the two axes. 120 | mark_inset(ax1, ax2, loc1=3, loc2=1, fc="black", alpha=0.5, ec='black') 121 | 122 | step_4 = step_4[step_4_mask] 123 | energy_4 = energy_4[step_4_mask] 124 | err_4 = err_4[step_4_mask] 125 | 126 | step_6 = step_6[step_6_mask] 127 | energy_6 = energy_6[step_6_mask] 128 | err_6 = err_6[step_6_mask] 129 | 130 | 131 | plt.plot(step_4, energy_4, label=r"$\Lambda = 4~$fm${}^{-1}$") 132 | plt.fill_between(step_4, energy_4-err_4, energy_4+err_4, alpha=0.5) 133 | 134 | plt.plot(step_6, energy_6, label=r"$\Lambda = 6~$fm${}^{-1}$") 135 | plt.fill_between(step_6, energy_6-err_6, energy_6+err_6, alpha=0.5) 136 | 137 | 138 | plt.plot(step_4, [-2.224,]*len(step_4), color="black", ls="--", label="GFMC") 139 | 140 | # plt.plot(step_6, energy_6, label=r"$\Lambda = 6~$fm${}^{-1}$") 141 | # plt.fill_between(step_6, energy_6-err_6, energy_6+err_6, alpha=0.5) 142 | plt.grid(True) 143 | # plt.xlabel(r"SR Iteration", fontsize=35) 144 | # plt.ylabel(r"Energy [MeV]", fontsize=35) 145 | # plt.legend(fontsize=35) 146 | 147 | # ax2.fill_between(r_ann[ann_mask], 148 | # rho_ann[ann_mask] - drho_ann[ann_mask], 149 | # rho_ann[ann_mask] + drho_ann[ann_mask], 150 | # label="ANN", 151 | # # ls="none", 152 | # # marker='s', 153 | # color='orange', 154 | # alpha=0.5) 155 | plt.locator_params(axis='y', nbins=5) 156 | plt.locator_params(axis='x', nbins=6) 157 | 158 | for tick in plt.gca().xaxis.get_major_ticks(): 159 | tick.label.set_fontsize(20) 160 | for tick in plt.gca().yaxis.get_major_ticks(): 161 | tick.label.set_fontsize(20) 162 | # plt.yscale('log') 163 | plt.xlim([x_min, x_max]) 164 | plt.ylim([-2.25,-2.05]) 165 | # plt.legend(fontsize=25) 166 | # ax2.grid(True) 167 | 168 | 169 | plt.savefig("energy_convergence.pdf") 170 | plt.show() 171 | -------------------------------------------------------------------------------- /analysis/analyze_rho.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | from matplotlib import pyplot as plt 3 | import numpy 4 | import pathlib 5 | 6 | from matplotlib import rc 7 | # rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 8 | ## for Palatino and other serif fonts use: 9 | #rc('font',**{'family':'serif','serif':['Palatino']}) 10 | # rc('text', usetex=True) 11 | 12 | import matplotlib 13 | matplotlib.rcParams['mathtext.fontset'] = 'cm' 14 | matplotlib.rcParams['font.family'] = 'STIXGeneral' 15 | 16 | import matplotlib as mpl 17 | mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["tab:blue", "tab:orange"]) 18 | 19 | from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition, 20 | mark_inset) 21 | from scipy import interpolate 22 | 23 | 24 | def parse_file(file_path): 25 | 26 | with open(file_path, 'r') as f: 27 | lines = f.readlines() 28 | 29 | if "GFMC" in file_path: 30 | lines = lines[2:] 31 | 32 | r = [] 33 | rho = [] 34 | drho = [] 35 | 36 | for l in lines: 37 | tokens = l.rstrip('\n').split() 38 | r .append(float(tokens[0])) 39 | rho .append(float(tokens[1])) 40 | drho.append(float(tokens[2])) 41 | 42 | return numpy.asarray(r), numpy.asarray(rho), numpy.asarray(drho) 43 | 44 | 45 | for nucleus in ['he4', 'h3', 'h2']: 46 | r_ann, rho_ann, drho_ann = parse_file(f'logs/rho_{nucleus}_ANN.dat') 47 | r_gfmc, rho_gfmc, drho_gfmc = parse_file(f'logs/rho_{nucleus}_GFMC.dat') 48 | 49 | x_max = 4.0 50 | 51 | ann_mask = r_ann <= x_max 52 | gfmc_mask = r_gfmc <= x_max 53 | 54 | 55 | tck,u = interpolate.splprep( [r_gfmc[gfmc_mask], rho_gfmc[gfmc_mask]],s = 0 ) 56 | xnew,ynew = interpolate.splev( numpy.linspace( 0, 1, 500 ), tck,der = 0) 57 | 58 | 59 | plt.figure(figsize=(16,9)) 60 | 61 | # Create a spline for the upper and lower uncertainty window 62 | tck_upper, _ = interpolate.splprep( [r_gfmc[gfmc_mask], rho_gfmc[gfmc_mask] + drho_gfmc[gfmc_mask]],s = 0 ) 63 | tck_lower, _ = interpolate.splprep( [r_gfmc[gfmc_mask], rho_gfmc[gfmc_mask] - drho_gfmc[gfmc_mask]],s = 0 ) 64 | xnew,ynew_upper = interpolate.splev( numpy.linspace( 0, 1, 500 ), tck_upper,der = 0) 65 | xnew,ynew_lower = interpolate.splev( numpy.linspace( 0, 1, 500 ), tck_lower,der = 0) 66 | plt.fill_between(xnew, 67 | ynew_lower, 68 | ynew_upper, 69 | alpha=0.75) 70 | plt.plot(xnew, ynew, label="GFMC", lw=3) 71 | plt.errorbar(r_ann[ann_mask], rho_ann[ann_mask], 72 | yerr = drho_ann[ann_mask], label="ANN", 73 | ls="none", 74 | marker='o', 75 | ms = 7, 76 | # color='orange' 77 | ) 78 | # plt.fill_between(r_ann[ann_mask], 79 | # rho_ann[ann_mask] - drho_ann[ann_mask], 80 | # rho_ann[ann_mask] + drho_ann[ann_mask], 81 | # label="ANN", 82 | # # ls="none", 83 | # # marker='s', 84 | # color='orange', 85 | # alpha=0.5) 86 | plt.xlim([0.0, x_max]) 87 | 88 | 89 | # plt.yscale('log') 90 | plt.grid(True) 91 | plt.legend(fontsize=25) 92 | 93 | plt.xlabel("r (fm)", fontsize=35) 94 | plt.ylabel(r"$\rho_N~($fm${}^{-3})$", fontsize=35) 95 | 96 | for tick in plt.gca().xaxis.get_major_ticks(): 97 | tick.label.set_fontsize(30) 98 | for tick in plt.gca().yaxis.get_major_ticks(): 99 | tick.label.set_fontsize(30) 100 | plt.tight_layout() 101 | 102 | ax1 = plt.gca() 103 | 104 | 105 | x_max = 4.0 106 | x_min = 2.0 107 | 108 | ann_mask = numpy.logical_and(r_ann >= x_min, r_ann <= x_max) 109 | gfmc_mask = numpy.logical_and(r_gfmc >= x_min, r_gfmc <= x_max) 110 | 111 | # bottom, left, width, height 112 | ax2 = plt.axes([0,2.,2.,0.2]) 113 | # Manually set the position and relative size of the inset axes within ax1 114 | ip = InsetPosition(ax1, [0.35,0.25,0.5,0.5]) 115 | ax2.set_axes_locator(ip) 116 | # Mark the region corresponding to the inset axes on ax1 and draw lines 117 | # in grey linking the two axes. 118 | mark_inset(ax1, ax2, loc1=3, loc2=1, fc="black", alpha=0.5, ec='black') 119 | 120 | ax2.fill_between(r_gfmc[gfmc_mask], 121 | rho_gfmc[gfmc_mask] - drho_gfmc[gfmc_mask], 122 | rho_gfmc[gfmc_mask] + drho_gfmc[gfmc_mask], 123 | alpha=0.75) 124 | ax2.plot(r_gfmc[gfmc_mask], rho_gfmc[gfmc_mask],lw=3) 125 | ax2.errorbar(r_ann[ann_mask], rho_ann[ann_mask], 126 | yerr = drho_ann[ann_mask], 127 | # label="Error x10", 128 | ls="none", 129 | marker='o', 130 | ms=7, 131 | # color='orange' 132 | ) 133 | # ax2.fill_between(r_ann[ann_mask], 134 | # rho_ann[ann_mask] - drho_ann[ann_mask], 135 | # rho_ann[ann_mask] + drho_ann[ann_mask], 136 | # label="ANN", 137 | # # ls="none", 138 | # # marker='s', 139 | # color='orange', 140 | # alpha=0.5) 141 | for tick in plt.gca().xaxis.get_major_ticks(): 142 | tick.label.set_fontsize(25) 143 | for tick in plt.gca().yaxis.get_major_ticks(): 144 | tick.label.set_fontsize(25) 145 | plt.yscale('log') 146 | plt.xlim([x_min, x_max]) 147 | plt.ylim([1e-5, 2e-2]) 148 | # plt.legend(fontsize=25) 149 | ax2.grid(True) 150 | 151 | plt.savefig(f"rho_{nucleus}.pdf") 152 | 153 | 154 | 155 | plt.show() 156 | 157 | # break 158 | 159 | 160 | # # specify integer or one of preset strings, e.g. 161 | # #tick.label.set_fontsize('x-small') 162 | # # tick.label.set_rotation('vertical') 163 | 164 | # plt.savefig("energy_convergence.pdf") 165 | # plt.show() 166 | -------------------------------------------------------------------------------- /benchmarks/jacobian_jax.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 3 | import time 4 | import numpy 5 | import tensorflow as tf 6 | 7 | import jax.numpy as np 8 | from jax import jit, jacfwd, jacrev 9 | 10 | import time 11 | 12 | import numpy 13 | numpy.random.seed(0) 14 | 15 | @jit 16 | def model(x, layers): 17 | 18 | for i in range(len(layers)): 19 | x = np.dot(x, layers[i]) 20 | return x 21 | 22 | @jit 23 | def compute_jacobian(input_vector, layer_weights): 24 | l1 = lambda weights : model(input_vector, weights) 25 | jacobian = jacrev(l1)(layer_weights) 26 | 27 | jacobian = flatten_jacobian(jacobian) 28 | 29 | return jacobian 30 | 31 | @jit 32 | def flatten_jacobian(jacobian): 33 | param_shapes = [ j.shape[1:] for j in jacobian ] 34 | flat_shape = [[-1, numpy.prod(js)] for js in param_shapes] 35 | 36 | flattened_jacobian = [np.reshape(j, f) for j, f in zip(jacobian, flat_shape)] 37 | jacobian = np.concatenate(flattened_jacobian, axis=-1) 38 | return jacobian 39 | 40 | def main(ninupt, n_filters_list, n_jacobian_calculations): 41 | 42 | 43 | cross_check_parameters = {} 44 | 45 | # Create an input vector: 46 | input_vector = numpy.random.random([ninput, 1]) 47 | 48 | n_filters_list.insert(0,1) 49 | n_filters_list.append(1) 50 | 51 | cross_check_parameters['input_sum'] = numpy.sum(input_vector) 52 | cross_check_parameters['input_std'] = numpy.std(input_vector) 53 | 54 | layer_weights = [ numpy.random.random([n_filters_list[i],n_filters_list[i+1]]) for i in range(len(n_filters_list)-1)] 55 | 56 | 57 | 58 | # Create the model: 59 | M = jit(lambda x : model(x, layer_weights)) 60 | 61 | # Forward pass: 62 | output = M(input_vector) 63 | 64 | # Capture the number of parameters: 65 | nparameters = numpy.sum([ numpy.prod(p.shape) for p in layer_weights ]) 66 | cross_check_parameters['n_params'] = nparameters 67 | 68 | # Capture the network output: 69 | cross_check_parameters['output_sum'] = numpy.sum(output) 70 | cross_check_parameters['output_std'] = numpy.std(output) 71 | 72 | start = time.time() 73 | 74 | cross_check_parameters['jacobian_times'] = [] 75 | for i in range(n_jacobian_calculations): 76 | this_start = time.time() 77 | 78 | jacobian = compute_jacobian(input_vector, layer_weights) 79 | this_end = time.time() 80 | cross_check_parameters['jacobian_times'].append((this_end - this_start)) 81 | 82 | end = time.time() 83 | cross_check_parameters['n_filters_list'] = n_filters_list 84 | cross_check_parameters['jacobian_sum'] = numpy.sum(jacobian) 85 | cross_check_parameters['jacobian_std'] = numpy.std(jacobian) 86 | cross_check_parameters['jacobian_prod'] = numpy.prod(jacobian) 87 | cross_check_parameters['jacobian_time'] = (end - start) / i 88 | cross_check_parameters['jacobian_n_calls'] = n_jacobian_calculations 89 | 90 | return cross_check_parameters 91 | 92 | if __name__ == '__main__': 93 | ninput=24 94 | network_list = [ 95 | [32, 32, 16], 96 | [128, 128], 97 | [512, 512, 512], 98 | [16, 16, 16, 16, 16, 16], 99 | [2048], 100 | ] 101 | for network in network_list: 102 | ccp = main(ninput,network, 5) 103 | print(ccp) 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /benchmarks/jacobian_tf.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 3 | import time 4 | import numpy 5 | import tensorflow as tf 6 | 7 | numpy.random.seed(0) 8 | 9 | class model(tf.keras.models.Model): 10 | 11 | def __init__(self, layer_weights): 12 | tf.keras.models.Model.__init__(self) 13 | 14 | self._layers = [] 15 | for i in range(len(layer_weights)): 16 | init = tf.constant_initializer(layer_weights[i]) 17 | self._layers.append( 18 | tf.keras.layers.Dense(layer_weights[i].shape[-1], 19 | kernel_initializer=init, use_bias=False)) 20 | 21 | 22 | @tf.function 23 | def call(self, x): 24 | 25 | for i in range(len(self._layers)): 26 | x = self._layers[i](x) 27 | return x 28 | 29 | def count_parameters(self): 30 | return numpy.sum([ numpy.prod(p.shape) for p in self.trainable_variables ] ) 31 | 32 | @tf.function 33 | def compute_jacobians(input_vector, M): 34 | with tf.GradientTape() as tape: 35 | output = M(input_vector) 36 | 37 | jacobian = tape.jacobian(output, M.trainable_variables, parallel_iterations=None) 38 | 39 | param_shapes = [ j.shape[1:] for j in jacobian ] 40 | flat_shape = [[-1, tf.reduce_prod(js)] for js in param_shapes] 41 | 42 | flattened_jacobian = [tf.reshape(j, f) for j, f in zip(jacobian, flat_shape)] 43 | jacobian = tf.concat(flattened_jacobian, axis=-1) 44 | return jacobian 45 | 46 | def main(ninput, n_filters_list, n_jacobian_calculations): 47 | 48 | 49 | cross_check_parameters = {} 50 | 51 | # Create an input vector: 52 | input_vector = numpy.random.random([ninput,1]) 53 | cross_check_parameters['input_sum'] = numpy.sum(input_vector) 54 | cross_check_parameters['input_std'] = numpy.std(input_vector) 55 | 56 | # Make sure to start with 1 input and finish with 1 output 57 | n_filters_list.insert(0,1) 58 | n_filters_list.append(1) 59 | 60 | # Use these as the layer weights: 61 | layer_weights = [ numpy.random.random([n_filters_list[i],n_filters_list[i+1]]) for i in range(len(n_filters_list)-1)] 62 | 63 | 64 | # Create the model, and input Tensor: 65 | input_vector = tf.convert_to_tensor(input_vector, dtype=tf.float32) 66 | M = model(layer_weights) 67 | 68 | # Forward pass of the model 69 | output = M(input_vector) 70 | 71 | # Capture the number of parameters: 72 | cross_check_parameters['n_params'] = M.count_parameters() 73 | 74 | # Capture the network output: 75 | cross_check_parameters['output_sum'] = numpy.sum(output.numpy()) 76 | cross_check_parameters['output_std'] = numpy.std(output.numpy()) 77 | 78 | start = time.time() 79 | cross_check_parameters['jacobian_times'] = [] 80 | for i in range(n_jacobian_calculations): 81 | this_start = time.time() 82 | jacobian = compute_jacobians(input_vector, M) 83 | this_end = time.time() 84 | cross_check_parameters['jacobian_times'].append((this_end - this_start)) 85 | 86 | 87 | end = time.time() 88 | # Store some jacobian information: 89 | cross_check_parameters['n_filters_list'] = n_filters_list 90 | cross_check_parameters['jacobian_sum'] = numpy.sum(jacobian.numpy()) 91 | cross_check_parameters['jacobian_std'] = numpy.std(jacobian.numpy()) 92 | cross_check_parameters['jacobian_prod'] = numpy.prod(jacobian.numpy()) 93 | cross_check_parameters['jacobian_time'] = (end - start) 94 | cross_check_parameters['jacobian_n_calls'] = n_jacobian_calculations 95 | 96 | return cross_check_parameters 97 | 98 | if __name__ == '__main__': 99 | ninput = 24 100 | network_list = [ 101 | [32, 32, 16], 102 | [128, 128], 103 | [512, 512, 512], 104 | [16, 16, 16, 16, 16, 16], 105 | [2048], 106 | ] 107 | for network in network_list: 108 | ccp = main(ninput, network, 5) 109 | print(ccp) 110 | -------------------------------------------------------------------------------- /benchmarks/jacobian_torch.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import torch 3 | import time 4 | 5 | numpy.random.seed(0) 6 | 7 | 8 | class model(torch.nn.Module): 9 | 10 | def __init__(self,layer_weights): 11 | torch.nn.Module.__init__(self) 12 | 13 | self._layers = [] 14 | for i in range(len(layer_weights)): 15 | self._layers.append( 16 | torch.nn.Linear(layer_weights[i].shape[-1], layer_weights[i].shape[0], 17 | bias=False)) 18 | torch.nn.Module.add_module(self, f"layer{i}", self._layers[-1]) 19 | def forward(self, x): 20 | 21 | for i in range(len(self._layers)): 22 | x = self._layers[i](x) 23 | return x 24 | 25 | def count_parameters(self): 26 | return numpy.sum([ numpy.prod(p.shape) for p in self.parameters() ] ) 27 | 28 | def compute_jacobian(ninput, nparameters, M, input_vector ): 29 | 30 | output = M(input_vector) 31 | 32 | jacobian = torch.zeros(size=[ninput, nparameters]) 33 | 34 | param_shapes = [p.shape for p in M.parameters() ] 35 | 36 | for n in range(ninput): 37 | output_n = output[n] 38 | M.zero_grad() 39 | params = M.parameters() 40 | do_dn = torch.autograd.grad(output_n, params, retain_graph=True) 41 | do_dn = torch.cat([g.flatten() for g in do_dn]) 42 | jacobian[n,:] = torch.t(do_dn) 43 | 44 | return jacobian 45 | 46 | def main(ninput, n_filters_list, n_jacobian_calculations): 47 | 48 | cross_check_parameters = {} 49 | 50 | # Create an input vector: 51 | input_vector = numpy.random.random([ninput,1]) 52 | 53 | cross_check_parameters['input_sum'] = numpy.sum(input_vector) 54 | cross_check_parameters['input_std'] = numpy.std(input_vector) 55 | 56 | n_filters_list.insert(0,1) 57 | n_filters_list.append(1) 58 | 59 | # This transpose is to match the tensorflow output!! 60 | # layer_1_weights = numpy.random.random([1,n_hidden_params]).T 61 | # # This reshape is because it is a different layout from tensorflow! 62 | # layer_2_weights = numpy.random.random([n_hidden_params,1]).reshape([1,n_hidden_params]) 63 | 64 | layer_weights = [ numpy.random.random([n_filters_list[i],n_filters_list[i+1]]) for i in range(len(n_filters_list)-1)] 65 | layer_weights = [l.T for l in layer_weights] 66 | 67 | # Cast the input to torch: 68 | input_vector = torch.tensor(input_vector).float() 69 | 70 | if torch.cuda.is_available(): 71 | input_vector = input_vector.cuda() 72 | 73 | # Create the model: 74 | M = model(layer_weights) 75 | 76 | # Switch out the layer weights for the controlled ones: 77 | new_dict = M.state_dict() 78 | for i, key in enumerate(new_dict.keys()): 79 | new_dict[key] = torch.tensor(layer_weights[i]) 80 | M.load_state_dict(new_dict) 81 | 82 | if torch.cuda.is_available(): 83 | M.cuda() 84 | 85 | # Forward pass: 86 | output = M(input_vector) 87 | 88 | # Capture the number of parameters: 89 | cross_check_parameters['n_params'] = M.count_parameters() 90 | nparameters = M.count_parameters() 91 | 92 | # Capture the network output: 93 | if torch.cuda.is_available(): 94 | cross_check_parameters['output_sum'] = numpy.sum(output.cpu().detach().numpy()) 95 | cross_check_parameters['output_std'] = numpy.std(output.cpu().detach().numpy()) 96 | else: 97 | cross_check_parameters['output_sum'] = numpy.sum(output.detach().numpy()) 98 | cross_check_parameters['output_std'] = numpy.std(output.detach().numpy()) 99 | 100 | start = time.time() 101 | cross_check_parameters['jacobian_times'] = [] 102 | for i in range(n_jacobian_calculations): 103 | this_start = time.time() 104 | jacobian = compute_jacobian(ninput, nparameters, M, input_vector) 105 | this_end = time.time() 106 | cross_check_parameters['jacobian_times'].append((this_end - this_start)) 107 | 108 | 109 | end = time.time() 110 | 111 | 112 | end = time.time() 113 | cross_check_parameters['n_filters_list'] = n_filters_list 114 | cross_check_parameters['jacobian_sum'] = numpy.sum(jacobian.numpy()) 115 | cross_check_parameters['jacobian_std'] = numpy.std(jacobian.numpy()) 116 | cross_check_parameters['jacobian_prod'] = numpy.prod(jacobian.numpy()) 117 | cross_check_parameters['jacobian_time'] = (end - start) / n_jacobian_calculations 118 | cross_check_parameters['jacobian_n_calls'] = n_jacobian_calculations 119 | 120 | return cross_check_parameters 121 | 122 | if __name__ == '__main__': 123 | ninput = 24 124 | network_list = [ 125 | [32, 32, 16], 126 | [128, 128], 127 | [512, 512, 512], 128 | [16, 16, 16, 16, 16, 16], 129 | [2048], 130 | ] 131 | for network in network_list: 132 | ccp = main(ninput, network, 5) 133 | print(ccp) 134 | -------------------------------------------------------------------------------- /bin/stochastic_reconfiguration.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | import pathlib 3 | import time 4 | 5 | import signal 6 | import pickle 7 | 8 | # For configuration: 9 | from omegaconf import DictConfig, OmegaConf 10 | import hydra 11 | from hydra.experimental import compose, initialize 12 | from hydra.core.hydra_config import HydraConfig 13 | from hydra.core.utils import configure_log 14 | 15 | hydra.output_subdir = None 16 | 17 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 18 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 19 | # os.environ['TF_XLA_FLAGS'] = "--tf_xla_auto_jit=fusible" 20 | 21 | import tensorflow as tf 22 | # tf.random.set_seed(2) 23 | 24 | try: 25 | import horovod.tensorflow as hvd 26 | hvd.init() 27 | 28 | # This is to force each rank onto it's own GPU: 29 | if (hvd.size() != 1 ): 30 | # Only set this if there is more than one GPU. Otherwise, its probably 31 | # Set elsewhere 32 | gpus = tf.config.list_physical_devices('GPU') 33 | for gpu in gpus: 34 | tf.config.experimental.set_memory_growth(gpu, True) 35 | if hvd and len(gpus) > 0: 36 | tf.config.set_visible_devices(gpus[hvd.local_rank() % len(gpus)],'GPU') 37 | MPI_AVAILABLE=True 38 | except: 39 | MPI_AVAILABLE=False 40 | 41 | 42 | # Use mixed precision for inference (metropolis walk) 43 | # from tensorflow.keras.mixed_precision import experimental as mixed_precision 44 | # policy = mixed_precision.Policy('mixed_float16') 45 | # mixed_precision.set_policy(policy) 46 | 47 | import logging 48 | from logging import handlers 49 | 50 | 51 | 52 | 53 | 54 | 55 | # Add the local folder to the import path: 56 | mlqm_dir = os.path.dirname(os.path.abspath(__file__)) 57 | mlqm_dir = os.path.dirname(mlqm_dir) 58 | sys.path.insert(0,mlqm_dir) 59 | from mlqm import hamiltonians 60 | from mlqm.samplers import Estimator 61 | from mlqm.optimization import GradientCalculator, StochasticReconfiguration 62 | from mlqm import DEFAULT_TENSOR_TYPE 63 | from mlqm.models import DeepSetsWavefunction 64 | 65 | 66 | 67 | 68 | class exec(object): 69 | 70 | def __init__(self, config): 71 | 72 | 73 | # 74 | if MPI_AVAILABLE: 75 | self.rank = hvd.rank() 76 | self.size = hvd.size() 77 | self.local_rank = hvd.local_rank() 78 | else: 79 | self.rank = 0 80 | self.size = 1 81 | self.local_rank = 1 82 | 83 | self.config = config 84 | 85 | self.configure_logger() 86 | logger = logging.getLogger() 87 | logger.info("") 88 | logger.info(OmegaConf.to_yaml(config)) 89 | 90 | 91 | 92 | # Use this flag to catch interrupts, stop the next step and write output. 93 | self.active = True 94 | 95 | self.global_step = 0 96 | 97 | self.save_path = self.config["save_path"] # Cast to pathlib later 98 | self.model_name = pathlib.Path(self.config["model_name"]) 99 | 100 | if "profile" in self.config: 101 | self.profile = bool(self.config["profile"]) 102 | else: 103 | self.profile = False 104 | 105 | 106 | self.set_compute_parameters() 107 | 108 | 109 | sampler = self.build_sampler() 110 | hamiltonian = self.build_hamiltonian() 111 | 112 | x = sampler.sample() 113 | 114 | wavefunction_config = self.config['wavefunction'] 115 | 116 | # Create a wavefunction: 117 | wavefunction = DeepSetsWavefunction(self.config.dimension, self.config.nparticles, wavefunction_config) 118 | adaptive_wavefunction = DeepSetsWavefunction(self.config.dimension, self.config.nparticles, wavefunction_config) 119 | 120 | # Run the wave function once to initialize all its weights 121 | tf.summary.trace_on(graph=True, profiler=False) 122 | _ = wavefunction(x) 123 | _ = adaptive_wavefunction(x) 124 | tf.summary.trace_export("graph") 125 | tf.summary.trace_off() 126 | 127 | for w in adaptive_wavefunction.trainable_variables: 128 | w.assign(0. * w) 129 | 130 | n_parameters = 0 131 | for p in wavefunction.trainable_variables: 132 | n_parameters += tf.reduce_prod(p.shape) 133 | 134 | logger.info(f"Number of parameters in this network: {n_parameters}") 135 | # 136 | # x_test = tf.convert_to_tensor([[ 1.25291274, 1.15427136, -1.57162947], 137 | # [ 1.76117854, 0.26708064, -0.90399369]], dtype = DEFAULT_TENSOR_TYPE) 138 | # x_test = tf.reshape(x_test, (1,2,3)) 139 | # print(x_test) 140 | # 141 | # print("Original: ", wavefunction.trainable_variables) 142 | # 143 | # # wavefunction.restore_jax("/home/cadams/ThetaGPU/AI-for-QM/full.model") 144 | # # print("Restored: ", wavefunction.trainable_variables) 145 | # 146 | # print(wavefunction(x_test)) 147 | # 148 | # exit() 149 | 150 | # Read a target energy if it's there: 151 | self.target_energy = None 152 | if 'target_energy' in self.config: 153 | self.target_energy = self.config['target_energy'] 154 | 155 | 156 | self.sr_worker = StochasticReconfiguration( 157 | sampler = sampler, 158 | wavefunction = wavefunction, 159 | adaptive_wfn = adaptive_wavefunction, 160 | hamiltonian = hamiltonian, 161 | optimizer_config = self.config.optimizer, 162 | sampler_config = self.config.sampler, 163 | ) 164 | 165 | if not MPI_AVAILABLE or hvd.rank() == 0: 166 | # self.writer = tf.summary.create_file_writer(self.save_path) 167 | self.writer = tf.summary.create_file_writer(self.save_path + "/log/") 168 | 169 | 170 | # Now, cast to pathlib: 171 | self.save_path = pathlib.Path(self.save_path) 172 | 173 | 174 | # We also snapshot the configuration into the log dir: 175 | if not MPI_AVAILABLE or hvd.rank() == 0: 176 | with open(pathlib.Path('config.snapshot.yaml'), 'w') as cfg: 177 | OmegaConf.save(config=self.config, f=cfg) 178 | 179 | def configure_logger(self): 180 | 181 | logger = logging.getLogger() 182 | 183 | # Create a handler for STDOUT, but only on the root rank: 184 | if not MPI_AVAILABLE or hvd.rank() == 0: 185 | stream_handler = logging.StreamHandler() 186 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 187 | stream_handler.setFormatter(formatter) 188 | handler = handlers.MemoryHandler(capacity = 1, target=stream_handler) 189 | logger.addHandler(handler) 190 | # Add a file handler: 191 | 192 | # Add a file handler too: 193 | log_file = self.config.save_path + "/process.log" 194 | file_handler = logging.FileHandler(log_file) 195 | file_handler.setFormatter(formatter) 196 | file_handler = handlers.MemoryHandler(capacity=1, target=file_handler) 197 | logger.addHandler(file_handler) 198 | 199 | 200 | logger.setLevel(logging.DEBUG) 201 | # fh = logging.FileHandler('run.log') 202 | # fh.setLevel(logging.DEBUG) 203 | # logger.addHandler(fh) 204 | else: 205 | # in this case, MPI is available but it's not rank 0 206 | # create a null handler 207 | handler = logging.NullHandler() 208 | logger.addHandler(handler) 209 | logger.setLevel(logging.DEBUG) 210 | 211 | 212 | def build_sampler(self): 213 | 214 | from mlqm.samplers import MetropolisSampler 215 | 216 | # As an optimization, we increase the number of walkers by n_concurrent_obs_per_rank 217 | n_walkers = self.config.sampler["n_walkers_per_observation"] * \ 218 | self.config.sampler["n_concurrent_obs_per_rank"] 219 | 220 | sampler = MetropolisSampler( 221 | n = self.config.dimension, 222 | nparticles = self.config.nparticles, 223 | nwalkers = n_walkers, 224 | initializer = tf.random.normal, 225 | init_params = {"mean": 0.0, "stddev" : 0.2}, 226 | dtype = DEFAULT_TENSOR_TYPE) 227 | 228 | return sampler 229 | 230 | def check_potential_parameters(self, potential, parameters, config): 231 | 232 | for key in parameters: 233 | if key not in config: 234 | raise Exception(f"Configuration for {potential} missing key {key}") 235 | 236 | def build_hamiltonian(self): 237 | 238 | # First, ask for the type of hamiltonian 239 | self.hamiltonian_form = self.config["hamiltonian"]["form"] 240 | 241 | # Is this hamiltonian in the options? 242 | if self.hamiltonian_form not in hamiltonians.__dict__: 243 | raise NotImplementedError(f"hamiltonian {self.hamiltonian_form} is not found.") 244 | 245 | parameters = self.config["hamiltonian"] 246 | parameters = { p : parameters[p] for p in parameters.keys() if p != "form"} 247 | 248 | hamiltonian = hamiltonians.__dict__[self.hamiltonian_form](**parameters) 249 | 250 | return hamiltonian 251 | 252 | def restore(self): 253 | logger = logging.getLogger() 254 | if not MPI_AVAILABLE or hvd.rank() == 0: 255 | logger.info("Trying to restore model") 256 | 257 | # Inject control flow here to restore from Jax models. 258 | 259 | # Does the model exist? 260 | # Note that tensorflow adds '.index' and '.data-...' to the name 261 | tf_p = pathlib.Path(str(self.model_name) + ".index") 262 | 263 | # Check for tensorflow first: 264 | 265 | model_restored = False 266 | tf_found_path = None 267 | for source_path in [self.save_path, pathlib.Path('./')]: 268 | if (source_path / tf_p).is_file(): 269 | # Note: we use the original path without the '.index' added 270 | tf_found_path = source_path / pathlib.Path(self.model_name) 271 | logger.info(f"Resolved weights path is {tf_found_path}") 272 | break 273 | 274 | if tf_found_path is None: 275 | raise OSError(f"{self.model_name} not found.") 276 | else: 277 | try: 278 | self.sr_worker.wavefunction.load_weights(tf_found_path) 279 | model_restored = True 280 | logger.info("Restored from tensorflow!") 281 | except Exception as e: 282 | logger.info("Failed to load weights via keras load_weights function.") 283 | 284 | # Now, check for JAX only if tf failed: 285 | if not model_restored: 286 | jax_p = pathlib.Path(self.model_name) 287 | jax_found_path = None 288 | for source_path in [self.save_path, pathlib.Path('./')]: 289 | if (source_path / jax_p).is_file(): 290 | # Note: we use the original path without the '.index' added 291 | jax_found_path = source_path / jax_p 292 | logger.info(f"Resolved weights path is {jax_found_path}") 293 | break 294 | 295 | if jax_found_path is None: 296 | raise OSError(f"{self.model_name} not found.") 297 | else: 298 | try: 299 | self.sr_worker.wavefunction.restore_jax(jax_found_path) 300 | logger.info("Restored from jax!") 301 | except Exception as e: 302 | logger.info("Failed to load weights via tensorflow or jax, returning") 303 | return 304 | 305 | 306 | # We get here only if one method restored. 307 | # Attempt to restore a global step and optimizer but it's not necessary 308 | try: 309 | with open(self.save_path / pathlib.Path("global_step.pkl"), 'rb') as _f: 310 | self.global_step = pickle.load(file=_f) 311 | except: 312 | logger.info("Could not restore a global_step or an optimizer state. Starting over with restored weights only.") 313 | 314 | def set_compute_parameters(self): 315 | tf.keras.backend.set_floatx(DEFAULT_TENSOR_TYPE) 316 | tf.debugging.set_log_device_placement(False) 317 | tf.config.run_functions_eagerly(False) 318 | 319 | physical_devices = tf.config.list_physical_devices('GPU') 320 | for device in physical_devices: 321 | tf.config.experimental.set_memory_growth(device, True) 322 | 323 | def run(self): 324 | 325 | 326 | logger = logging.getLogger() 327 | 328 | # 329 | # with self.writer.as_default(): 330 | # tf.summary.graph(self.wavefunction.get_concrete_function().graph) 331 | 332 | # We attempt to restore the weights: 333 | try: 334 | self.restore() 335 | logger.debug("Loaded weights, optimizer and global step!") 336 | except Exception as excep: 337 | logger.debug("Failed to load weights!") 338 | logger.debug(excep) 339 | pass 340 | 341 | 342 | if MPI_AVAILABLE and hvd.size() > 1: 343 | logger.info("Broadcasting initial model and optimizer state.") 344 | # We have to broadcast the wavefunction parameter here: 345 | hvd.broadcast_variables(self.sr_worker.wavefunction.variables, 0) 346 | 347 | # And the global step: 348 | self.global_step = hvd.broadcast_object( 349 | self.global_step, root_rank=0) 350 | logger.info("Done broadcasting initial model and optimizer state.") 351 | 352 | 353 | 354 | # First step - thermalize: 355 | logger.info("About to thermalize.") 356 | self.sr_worker.equilibrate(self.config.sampler.n_thermalize) 357 | logger.info("Finished thermalization.") 358 | 359 | # Now, call once to compile: 360 | logger.info("About to compile.") 361 | self.sr_worker.compile() 362 | logger.info("Finished compilation.") 363 | 364 | checkpoint_iteration = 2000 365 | 366 | # Before beginning the loop, manually flush the buffer: 367 | logger.handlers[0].flush() 368 | 369 | best_energy = 999 370 | 371 | while self.global_step < self.config["iterations"]: 372 | if not self.active: break 373 | 374 | if self.profile: 375 | if not MPI_AVAILABLE or hvd.rank() == 0: 376 | tf.profiler.experimental.start(str(self.save_path)) 377 | tf.summary.trace_on(graph=True) 378 | 379 | 380 | start = time.time() 381 | 382 | metrics = self.sr_worker.sr_step(n_thermalize = 1000) 383 | 384 | # Check if we've reached a better energy: 385 | if metrics['energy/energy'] < best_energy: 386 | best_energy = metrics['energy/energy'] 387 | 388 | # If below the target energy, snapshot the weights as the best-yet 389 | if self.target_energy is None: 390 | pass 391 | elif best_energy < self.target_energy: 392 | if not MPI_AVAILABLE or hvd.rank() == 0: 393 | self.save_weights(name="best_energy") 394 | pass 395 | 396 | metrics['time'] = time.time() - start 397 | 398 | self.summary(metrics, self.global_step) 399 | 400 | # Add the gradients and model weights to the summary every 25 iterations: 401 | if self.global_step % 25 == 0: 402 | if not MPI_AVAILABLE or hvd.rank() == 0: 403 | weights = self.sr_worker.wavefunction.trainable_variables 404 | gradients = self.sr_worker.latest_gradients 405 | self.model_summary(weights, gradients, self.global_step) 406 | self.wavefunction_summary(self.sr_worker.latest_psi, self.global_step) 407 | 408 | 409 | if self.global_step % 1 == 0: 410 | logger.info(f"step = {self.global_step}, energy = {metrics['energy/energy'].numpy():.3f}, err = {metrics['energy/error'].numpy():.3f}") 411 | logger.info(f"step = {self.global_step}, energy_jf = {metrics['energy/energy_jf'].numpy():.3f}, err = {metrics['energy/error_jf'].numpy():.3f}") 412 | logger.info(f"acc = {metrics['metropolis/acceptance'].numpy():.3f}") 413 | logger.info(f"time = {metrics['time']:.3f}") 414 | 415 | # Iterate: 416 | self.global_step += 1 417 | 418 | if checkpoint_iteration % self.global_step == 0: 419 | if not MPI_AVAILABLE or hvd.rank() == 0: 420 | self.save_weights() 421 | pass 422 | 423 | if self.profile: 424 | if not MPI_AVAILABLE or hvd.rank() == 0: 425 | tf.profiler.experimental.stop() 426 | tf.summary.trace_off() 427 | 428 | # Save the weights at the very end: 429 | if not MPI_AVAILABLE or hvd.rank() == 0: 430 | self.save_weights() 431 | 432 | 433 | def model_summary(self, weights, gradients, step): 434 | with self.writer.as_default(): 435 | for w, g in zip(weights, gradients): 436 | tf.summary.histogram("weights/" + w.name, w, step=step) 437 | tf.summary.histogram("gradients/" + w.name, g, step=step) 438 | 439 | def wavefunction_summary(self, latest_psi, step): 440 | with self.writer.as_default(): 441 | tf.summary.histogram("psi", latest_psi, step=step) 442 | 443 | 444 | # @tf.function 445 | def summary(self, metrics, step): 446 | if not MPI_AVAILABLE or hvd.rank() == 0: 447 | with self.writer.as_default(): 448 | for key in metrics: 449 | tf.summary.scalar(key, metrics[key], step=step) 450 | 451 | 452 | def save_weights(self, name = "checkpoint" ): 453 | 454 | # If the file for the model path already exists, we don't change it until after restoring: 455 | self.model_path = self.save_path / pathlib.Path(name) / self.model_name 456 | 457 | # Take the network and snapshot it to file: 458 | self.sr_worker.wavefunction.save_weights(self.model_path) 459 | # Save the global step: 460 | with open(self.save_path / pathlib.Path(name) / pathlib.Path("global_step.pkl"), 'wb') as _f: 461 | pickle.dump(self.global_step, file=_f) 462 | 463 | def finalize(self): 464 | if not MPI_AVAILABLE or hvd.rank() == 0: 465 | self.save_weights() 466 | 467 | def interupt_handler(self, sig, frame): 468 | logger.info("Snapshoting weights...") 469 | self.active = False 470 | 471 | 472 | @hydra.main(config_path="../config", config_name="config") 473 | def main(cfg : OmegaConf) -> None: 474 | 475 | # Prepare directories: 476 | work_dir = pathlib.Path(cfg.save_path) 477 | work_dir.mkdir(parents=True, exist_ok=True) 478 | log_dir = pathlib.Path(cfg.save_path + "/log/") 479 | log_dir.mkdir(parents=True, exist_ok=True) 480 | 481 | # cd in to the job directory since we disabled that with hydra: 482 | # os.chdir(cfg.hydra.run.dir) 483 | e = exec(cfg) 484 | signal.signal(signal.SIGINT, e.interupt_handler) 485 | e.run() 486 | e.finalize() 487 | 488 | if __name__ == "__main__": 489 | import sys 490 | if "--help" not in sys.argv and "--hydra-help" not in sys.argv: 491 | sys.argv += ['hydra.run.dir=.', 'hydra/job_logging=disabled'] 492 | main() 493 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - hamiltonian: Nuclear 4 | - optimizer: Flat 5 | - wavefunction: DeepSets 6 | iterations: 5 7 | sampler: 8 | n_thermalize: 5000 9 | n_void_steps: 200 10 | n_observable_measurements: 20 11 | n_walkers_per_observation: 200 12 | n_concurrent_obs_per_rank: 20 13 | nparticles: 2 14 | dimension: 3 15 | run_id: ??? 16 | save_path: output/${hamiltonian.form}/${nparticles}particles/${dimension}D/${optimizer.form}.${run_id}/ 17 | model_name: ${hamiltonian.form}_${nparticles}part_${dimension}D.model 18 | -------------------------------------------------------------------------------- /config/hamiltonian/Atomic.yaml: -------------------------------------------------------------------------------- 1 | mass : 1. 2 | z : 1. 3 | form : "AtomicPotential" -------------------------------------------------------------------------------- /config/hamiltonian/HarmonicOscillator.yaml: -------------------------------------------------------------------------------- 1 | mass: 1. 2 | omega: 1. 3 | form: "HarmonicOscillator" 4 | -------------------------------------------------------------------------------- /config/hamiltonian/Nuclear.yaml: -------------------------------------------------------------------------------- 1 | mass: 938.95 2 | form: "NuclearPotential" 3 | -------------------------------------------------------------------------------- /config/optimizer/AdaptiveDelta.yaml: -------------------------------------------------------------------------------- 1 | epsilon: 0.0001 2 | delta_max: 0.1 3 | delta_min: 0.00001 4 | form: AdaptiveDelta 5 | -------------------------------------------------------------------------------- /config/optimizer/AdaptiveEpsilon.yaml: -------------------------------------------------------------------------------- 1 | delta: 0.01 2 | epsilon_max: 0.1 3 | epsilon_min: 0.00000001 4 | form: AdaptiveEpsilon 5 | -------------------------------------------------------------------------------- /config/optimizer/Flat.yaml: -------------------------------------------------------------------------------- 1 | delta: 0.001 2 | epsilon: 0.0001 3 | form: Flat 4 | -------------------------------------------------------------------------------- /config/wavefunction/DeepSets.yaml: -------------------------------------------------------------------------------- 1 | form: DeepSets 2 | n_filters_per_layer: 16 3 | n_layers: 2 4 | bias: True 5 | activation: tanh 6 | residual: False 7 | mean_subtract: True 8 | confinement: 0.1 9 | -------------------------------------------------------------------------------- /examples/Deep_Sets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import numpy" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "def generate_points(dimension, n_points):\n", 20 | " ''' Generate a set of input points. Continuously sampled during training.'''\n", 21 | " return torch.rand(n_points, dimension)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "class func_3layer(torch.nn.Module):\n", 31 | " '''\n", 32 | " Three layer network for phi, as per the paper (in appendix)\n", 33 | " '''\n", 34 | " def __init__(self, input_dim, output_dim, bias=True):\n", 35 | " torch.nn.Module.__init__(self)\n", 36 | " \n", 37 | " self.layer1 = torch.nn.Linear(input_dim, 100, bias=bias)\n", 38 | " self.layer2 = torch.nn.Linear(100, 100, bias=bias)\n", 39 | " self.layer3 = torch.nn.Linear(100, output_dim, bias=bias)\n", 40 | " \n", 41 | " self.activation = torch.relu\n", 42 | " \n", 43 | " def forward(self, inputs):\n", 44 | " \n", 45 | " x = self.layer1(inputs)\n", 46 | " x = self.activation(x)\n", 47 | " x = self.layer2(x)\n", 48 | " x = self.activation(x)\n", 49 | " x = self.layer3(x)\n", 50 | " \n", 51 | " return x\n", 52 | "\n", 53 | "class func_2layer(torch.nn.Module):\n", 54 | " '''\n", 55 | " Two layer network for rho, as per the paper (in appendix)\n", 56 | " '''\n", 57 | " def __init__(self, input_dim, output_dim, bias=True):\n", 58 | " torch.nn.Module.__init__(self)\n", 59 | " \n", 60 | " self.layer1 = torch.nn.Linear(input_dim, 100, bias=bias)\n", 61 | " self.layer2 = torch.nn.Linear(100, output_dim, bias=bias)\n", 62 | " \n", 63 | " self.activation = torch.relu\n", 64 | " \n", 65 | " def forward(self, inputs):\n", 66 | " \n", 67 | " x = self.layer1(inputs)\n", 68 | " x = self.activation(x)\n", 69 | " x = self.layer2(x)\n", 70 | " \n", 71 | " return x" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "class func_symm(torch.nn.Module):\n", 81 | " '''\n", 82 | " Explicitly symmetric function (essentially, no rho)\n", 83 | " '''\n", 84 | " def __init__(self, input_dim, output_dim, bias=True):\n", 85 | " torch.nn.Module.__init__(self)\n", 86 | " \n", 87 | " self.layer1 = torch.nn.Linear(input_dim, 256, bias=bias)\n", 88 | " self.layer2 = torch.nn.Linear(256, 256, bias=True)\n", 89 | " self.layer3 = torch.nn.Linear(256, output_dim, bias=bias)\n", 90 | " \n", 91 | " self.activation = torch.relu\n", 92 | "# self.input_dim = input_dim\n", 93 | " \n", 94 | " def forward(self, inputs):\n", 95 | " if (2 == 2):\n", 96 | " inputs_12 = torch.zeros_like(inputs)\n", 97 | " inputs_12[:,0] = inputs[:,1]\n", 98 | " inputs_12[:,1] = inputs[:,0]\n", 99 | " \n", 100 | " x = self.layer1(inputs)\n", 101 | " x = self.activation(x)\n", 102 | " x = self.layer2(x)\n", 103 | " x = self.activation(x)\n", 104 | " x = self.layer3(x) \n", 105 | " \n", 106 | " x_12 = self.layer1(inputs_12)\n", 107 | " x_12 = self.activation(x_12)\n", 108 | " x_12 = self.layer2(x_12)\n", 109 | " x_12 = self.activation(x_12)\n", 110 | " x_12 = self.layer3(x_12)\n", 111 | " \n", 112 | " output = ( x + x_12 ) / 2\n", 113 | " else: \n", 114 | " output = 0\n", 115 | " print(\"PROBLEM, ONLY 2-D INPUTS FOR NOW\")\n", 116 | " return output" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Define the explicitly symmetric function\n", 126 | "symm = func_symm(input_dim=2, output_dim=1)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 6, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "x= tensor([0.9686, 0.8799, 0.2161, 0.3617, 0.8503, 0.2479, 0.0261, 0.4880, 0.5819,\n", 139 | " 0.7291])\n", 140 | "y= tensor([0.1999, 0.6622, 0.1192, 0.0167, 0.9620, 0.2946, 0.0891, 0.0860, 0.0066,\n", 141 | " 0.4305])\n", 142 | "xy= tensor([[0.9686, 0.1999],\n", 143 | " [0.8799, 0.6622],\n", 144 | " [0.2161, 0.1192],\n", 145 | " [0.3617, 0.0167],\n", 146 | " [0.8503, 0.9620],\n", 147 | " [0.2479, 0.2946],\n", 148 | " [0.0261, 0.0891],\n", 149 | " [0.4880, 0.0860],\n", 150 | " [0.5819, 0.0066],\n", 151 | " [0.7291, 0.4305]])\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "# This should generate points between 0 and 1 at random:\n", 157 | "torch.manual_seed(19)\n", 158 | "xy = generate_points(2,10)\n", 159 | "x = xy[:,0]\n", 160 | "y = xy[:,1]\n", 161 | "print(\"x=\",x)\n", 162 | "print(\"y=\",y)\n", 163 | "print(\"xy=\", xy)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 7, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "# Now, let's compute an objective function:\n", 173 | "def real_function(xy):\n", 174 | " # This is symmetric by construction\n", 175 | " return (xy[:,0] - xy[:,1] ) **2\n", 176 | "# return abs(x - y)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 8, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "tensor([0.5910, 0.0474, 0.0094, 0.1190, 0.0125, 0.0022, 0.0040, 0.1617, 0.3310,\n", 188 | " 0.0892])" 189 | ] 190 | }, 191 | "execution_count": 8, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "real_function(xy)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 9, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "tensor([ 0.0274, 0.0462, -0.0192, -0.0155, 0.0519, -0.0088, -0.0269, -0.0025,\n", 210 | " 0.0019, 0.0324], grad_fn=)\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "#symm = func_symm(input_dim=2, output_dim=1)\n", 216 | "target = torch.t(symm(xy))[0]\n", 217 | "print(target)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 10, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# For training the net, create optimizers:\n", 227 | "params = list(symm.parameters())\n", 228 | "optimizer = torch.optim.Adam(params, lr=0.001)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 11, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | "loss= tensor(0.0596, grad_fn=)\n", 241 | "loss= tensor(7.6629e-07, grad_fn=)\n", 242 | "loss= tensor(8.2745e-07, grad_fn=)\n" 243 | ] 244 | }, 245 | { 246 | "ename": "KeyboardInterrupt", 247 | "evalue": "", 248 | "output_type": "error", 249 | "traceback": [ 250 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 251 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 252 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;31m# print(\"approximation\", approximation)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"loss=\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 253 | "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 194\u001b[0m \"\"\"\n\u001b[0;32m--> 195\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 196\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 254 | "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 97\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 98\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 255 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "BATCH_SIZE = 4096\n", 261 | "for i in range(10000):\n", 262 | " xy = generate_points(2,BATCH_SIZE)\n", 263 | "\n", 264 | "\n", 265 | " optimizer.zero_grad() \n", 266 | " correct_answer = real_function(xy)\n", 267 | "# approximation = symm(xy)\n", 268 | " approximation = torch.t(symm(xy))[0] \n", 269 | "# print(correct_answer)\n", 270 | "# print(approximation)\n", 271 | " \n", 272 | " loss = torch.nn.MSELoss()(target=correct_answer, input=approximation)\n", 273 | " if i % 1000 == 0:\n", 274 | "# print(\"correct_answer\", correct_answer)\n", 275 | "# print(\"approximation\", approximation)\n", 276 | " print(\"loss=\",loss)\n", 277 | " loss.backward()\n", 278 | " optimizer.step()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "# This should generate points between 0 and 1 at random to check the correctness of the answer:\n", 288 | "torch.manual_seed(19)\n", 289 | "xy = generate_points(2,20)\n", 290 | "x = xy[:,0]\n", 291 | "y = xy[:,1]\n", 292 | "correct_answer = real_function(xy)\n", 293 | "approximation = torch.t(symm(xy))[0]\n", 294 | "print(\"correct_answer\", correct_answer)\n", 295 | "print(\"approximation\", approximation)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "# Check for symmetry\n", 305 | "yx = torch.index_select(xy, 1, torch.LongTensor([1,0]))\n", 306 | "correct_answer = real_function(yx)\n", 307 | "approximation = torch.t(symm(yx))[0]\n", 308 | "print(\"correct_answer\", correct_answer)\n", 309 | "print(\"approximation\", approximation)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "# Set the dimension of input data and the latent size:\n", 319 | "DIMENSION = 1\n", 320 | "LATENT_SIZE = 2\n", 321 | "\n", 322 | "# Create the 3 NN to train\n", 323 | "phi = func_3layer(input_dim=DIMENSION, output_dim=LATENT_SIZE)\n", 324 | "rho = func_2layer(input_dim=LATENT_SIZE, output_dim=1)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "# For training the net, create optimizers:\n", 334 | "params = list(phi.parameters()) + list(rho.parameters())\n", 335 | "optimizer = torch.optim.Adam(params, lr=0.0001)\n", 336 | "BATCH_SIZE = 16\n", 337 | "xy = generate_points(2,BATCH_SIZE)\n", 338 | "x = xy[:,0]\n", 339 | "y = xy[:,1]\n", 340 | "print(\"x=\", x)\n", 341 | "print(\"phi_x=\", phi(x))" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "# For training the net, create optimizers:\n", 351 | "params = list(phi.parameters()) + list(rho.parameters())\n", 352 | "optimizer = torch.optim.Adam(params, lr=0.0001)\n", 353 | "BATCH_SIZE = 1024\n", 354 | "xy = generate_points(2, BATCH_SIZE)\n", 355 | "x = xy[:,0].view(-1,1)\n", 356 | "y = xy[:,1].view(-1,1)\n", 357 | "#print(\"x=\", x)\n", 358 | "#print(\"phi_x=\", phi(x))\n", 359 | "\n", 360 | "for i in range(10000):\n", 361 | " optimizer.zero_grad() \n", 362 | " correct_answer = real_function(xy)\n", 363 | " phi_x = phi(x)\n", 364 | "# print(\"phi_x\", phi_x)\n", 365 | " phi_y = phi(y)\n", 366 | " approximation = torch.t(rho(phi_x + phi_y))[0] \n", 367 | "# print(\"correct_answer\", correct_answer)\n", 368 | "# print(\"approximation\", approximation)\n", 369 | " \n", 370 | " loss = torch.nn.MSELoss()(target=correct_answer, input=approximation)\n", 371 | " if i % 500 == 0:\n", 372 | "# print(\"correct_answer\", correct_answer)\n", 373 | "# print(\"approximation\", approximation)\n", 374 | " print(\"loss=\",loss)\n", 375 | " loss.backward()\n", 376 | " optimizer.step()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "# This should generate points between 0 and 1 at random to check the correctness of the answer:\n", 386 | "torch.manual_seed(19)\n", 387 | "xy = generate_points(2,2)\n", 388 | "x = xy[:,0].view(-1,1)\n", 389 | "y = xy[:,1].view(-1,1)\n", 390 | "correct_answer = real_function(xy)\n", 391 | "phi_x = phi(x)\n", 392 | "phi_y = phi(y)\n", 393 | "approximation = torch.t(rho(phi_x + phi_y))[0]\n", 394 | "print(\"correct_answer\", correct_answer)\n", 395 | "print(\"approximation\", approximation)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [] 404 | } 405 | ], 406 | "metadata": { 407 | "kernelspec": { 408 | "display_name": "Python 3", 409 | "language": "python", 410 | "name": "python3" 411 | }, 412 | "language_info": { 413 | "codemirror_mode": { 414 | "name": "ipython", 415 | "version": 3 416 | }, 417 | "file_extension": ".py", 418 | "mimetype": "text/x-python", 419 | "name": "python", 420 | "nbconvert_exporter": "python", 421 | "pygments_lexer": "ipython3", 422 | "version": "3.6.10" 423 | } 424 | }, 425 | "nbformat": 4, 426 | "nbformat_minor": 2 427 | } 428 | -------------------------------------------------------------------------------- /examples/example_del_squared.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import numpy 4 | 5 | import sys 6 | sys.path.insert(0, "/Users/corey.adams/ML_QM/") 7 | 8 | from mlqm.samplers import CartesianSampler 9 | 10 | def forward_model(inputs): 11 | # Polynomial function: 12 | 13 | return 5 + 7*torch.sum(inputs, dim=-1) - inputs[:,1]**2 + 9 *torch.sum(inputs**3, dim=-1) 14 | 15 | def grad_forward(inputs): 16 | return 7 + 3*9 * inputs**2 17 | 18 | def del_forward(inputs): 19 | return 3*9*2 * torch.sum(inputs, dim=-1) - 2 20 | 21 | sampler = CartesianSampler(n=2, delta=0.5, mins=-1, maxes=1) 22 | inputs = sampler.sample() 23 | 24 | print(inputs) 25 | 26 | grad_accum = torch.ones(len(inputs)) 27 | 28 | w_of_x = forward_model(inputs) 29 | 30 | dw_dx = torch.autograd.grad( 31 | outputs=w_of_x, 32 | inputs=inputs, 33 | grad_outputs = grad_accum, 34 | retain_graph=True, 35 | create_graph=True)[0] 36 | 37 | print(dw_dx) 38 | print(grad_forward(inputs)) 39 | 40 | grad_accum = torch.ones(inputs.shape) 41 | 42 | # Compute the second derivative: 43 | d2w_dx2 = torch.autograd.grad( 44 | outputs=dw_dx, 45 | inputs=inputs, 46 | grad_outputs = grad_accum, 47 | retain_graph=True, 48 | create_graph=True)[0] 49 | 50 | print(torch.sum(d2w_dx2, dim=-1)) 51 | print(del_forward(inputs)) -------------------------------------------------------------------------------- /examples/fit_harmonic_oscillator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy 3 | 4 | torch.manual_seed(0) 5 | 6 | import sys 7 | sys.path.insert(0, "/Users/corey.adams/ML_QM/") 8 | 9 | from mlqm.samplers import MetropolisSampler 10 | from mlqm.hamiltonians import HarmonicOscillator 11 | from mlqm.models import NeuralWavefunction 12 | 13 | 14 | def train(): 15 | 16 | dimension = 1 # Dimensionality of the physical space 17 | nvoid = 50 # Number of times to kick the walkers before computing other parameters again 18 | n_prop = 10 # Number of times to compute the observables, per model update 19 | n_model_updates = 100 # Number of times the model gets updated 20 | kick = 0.5 # Gaussian sigma for the kick 21 | acceptance = 0.0 # Initial value of acceptance 22 | 23 | 24 | # First, create an instance of the sampler and the hamiltonian: 25 | sampler = MetropolisSampler( 26 | ndim = dimension, 27 | nwalkers = 10000, 28 | initializer = torch.normal, 29 | init_params = [0.0, 0.2]) 30 | 31 | hamiltonian = HarmonicOscillator(n=dimension, M=1.0, omega = 1.0) 32 | 33 | # Create an instance of the wave function: 34 | wavefunction = NeuralWavefunction(n=dimension) 35 | 36 | inputs = sampler.sample() 37 | 38 | 39 | wavefunction.update_normalization(inputs) 40 | 41 | energy, energy_by_parts = hamiltonian.energy(wavefunction, inputs ) 42 | # print("Initial Energy is ", energy) 43 | 44 | # Create an optimizer for the initial wave function: 45 | optimizer = torch.optim.Adam(wavefunction.parameters(), lr=0.001) 46 | 47 | 48 | # Iterate over the model 49 | for i_update in range(n_model_updates): 50 | 51 | # Clear the accumulated gradients 52 | optimizer.zero_grad() 53 | 54 | # energy = 0.0 55 | # energy_by_parts = 0.0 56 | grad = None 57 | # Loop n_prop times to compute energy 58 | for i_prop in range(n_prop): 59 | 60 | # First, walk the walkers without computing any observables 61 | # Kick the sampler: 62 | for i_void in range(nvoid): 63 | acceptance = sampler.kick( 64 | wavefunction = wavefunction, 65 | kicker=torch.normal, 66 | kicker_params=[0.0,kick]) 67 | 68 | # Now, compute the observables: 69 | inputs = sampler.sample() 70 | 71 | # Reset the gradient on the inputs: 72 | inputs.grad = None 73 | 74 | # Compute the energy: 75 | energy, energy_by_parts = hamiltonian.energy(wavefunction, inputs) 76 | 77 | # We back-prop'd through the wave function once already in the energy computation. Clear the gradients: 78 | wavefunction.zero_grad() 79 | 80 | summed_energy = torch.mean(energy) 81 | summed_energy_by_parts = torch.mean(energy_by_parts) 82 | summed_energy.backward() 83 | 84 | 85 | if grad is None: 86 | grad = [p.grad for p in wavefunction.parameters()] 87 | else: 88 | grad = [g + p.grad for g, p in zip(grad, wavefunction.parameters())] 89 | # # i = 0 90 | 91 | for p in wavefunction.parameters(): 92 | # i = i + 1 93 | # print("p=",i,p.data) 94 | # print("p grad=",i,p.grad) 95 | p.data = p.data - p.grad * 0.005 96 | 97 | 98 | # optimizer.step() 99 | if i_update % 10 == 0: 100 | print(f"step = {i_update}, energy = {summed_energy.data:.2f}") 101 | print(f" step = {i_update}, energy_by_parts = {summed_energy_by_parts.data:.2f}") 102 | print(f" step = {i_update}, acceptance = {acceptance:.2f}") 103 | 104 | # Update the normaliztion 105 | wavefunction.update_normalization(inputs) 106 | 107 | 108 | exit() 109 | 110 | print(f"First wavefunction energy is {energy.data}") 111 | 112 | wavefunction_list = [ wavefunction ] 113 | energy_list = [ energy.data ] 114 | wavefunction_values = [ wavefunction(inputs).detach()] 115 | 116 | # Now, go ahead and compute more wavefunctions: 117 | 118 | for n in range(dimension): 119 | 120 | wavefunction = NeuralWavefunction(n=dimension) 121 | optimizer = torch.optim.Adam(wavefunction.parameters(), lr=0.001) 122 | 123 | for i in range(3000): 124 | 125 | # Reset the gradient on the inputs: 126 | inputs.grad = None 127 | 128 | # Compute the energy: 129 | energy = hamiltonian.energy(wavefunction, inputs, delta**dimension) 130 | 131 | # We back-prop'd through the wave function once already in the energy computation. Clear the gradients: 132 | wavefunction.zero_grad() 133 | 134 | # Compute the orthogonality: 135 | orthogonality = None 136 | this_values = wavefunction(inputs) 137 | for w in wavefunction_values: 138 | ortho = torch.sum(this_values * w * delta**dimension)**2 139 | if orthogonality is None: 140 | orthogonality = ortho 141 | else: 142 | orthogonality += ortho 143 | # print([p.grad for p in wavefunction.parameters()]) 144 | 145 | (energy + 5*orthogonality).backward() 146 | 147 | 148 | optimizer.step() 149 | if i % 100 == 0: 150 | print(f"step = {i}, energy = {energy.data:.2f}, orthogonality = {orthogonality.data:.2f}") 151 | 152 | # Lastly, update the normaliztion 153 | wavefunction.update_normalization(inputs, delta**dimension) 154 | 155 | energy_list.append(energy.data) 156 | wavefunction_list.append(wavefunction) 157 | wavefunction_values.append(wavefunction(inputs).detach()) 158 | 159 | 160 | 161 | 162 | if __name__ == "__main__": 163 | train() 164 | -------------------------------------------------------------------------------- /examples/fit_hydrogen_atom.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy 3 | 4 | import sys 5 | sys.path.insert(0, "/Users/corey.adams/ML_QM/") 6 | 7 | from mlqm.samplers import CartesianSampler 8 | from mlqm.hamiltonians import Hydrogen 9 | from mlqm.models import NeuralWavefunction 10 | 11 | 12 | def train(): 13 | 14 | dimension = 3 15 | 16 | # First, create an instance of the sampler and the hamiltonian: 17 | delta = 0.2 18 | mins = -5. 19 | maxes = 5. 20 | sampler = CartesianSampler(dimension, delta, mins, maxes) 21 | 22 | hamiltonian = Hydrogen(mu=1.0, e=1.0) 23 | 24 | # Create an instance of the wave function: 25 | wavefunction = NeuralWavefunction(n=dimension) 26 | 27 | inputs = sampler.sample() 28 | 29 | wavefunction.update_normalization(inputs, delta**dimension) 30 | 31 | energy = hamiltonian.energy(wavefunction, inputs, delta**dimension) 32 | print("Initial Energy is ", energy) 33 | 34 | # Create an optimizer for the initial wave function: 35 | optimizer = torch.optim.Adam(wavefunction.parameters(), lr=0.001) 36 | 37 | # Now, iterate until the energy stops decreasing: 38 | 39 | 40 | for i in range(1000): 41 | 42 | # Reset the gradient on the inputs: 43 | inputs.grad = None 44 | 45 | # Compute the energy: 46 | energy = hamiltonian.energy(wavefunction, inputs, delta**dimension) 47 | 48 | # We back-prop'd through the wave function once already in the energy computation. Clear the gradients: 49 | wavefunction.zero_grad() 50 | 51 | (energy + 100).backward() 52 | 53 | # print([p.grad for p in wavefunction.parameters()]) 54 | 55 | optimizer.step() 56 | # if i % 100 == 0: 57 | print(f"step = {i}, energy = {energy.data:.2f}") 58 | 59 | # Lastly, update the normaliztion 60 | wavefunction.update_normalization(inputs, delta**dimension) 61 | 62 | print(f"First wavefunction energy is {energy.data}") 63 | 64 | 65 | if __name__ == "__main__": 66 | train() 67 | -------------------------------------------------------------------------------- /examples/mc_harmonic_oscillator.py: -------------------------------------------------------------------------------- 1 | # Python built ins: 2 | import sys, os 3 | import time 4 | import logging 5 | 6 | # Frameworks: 7 | import numpy 8 | import torch 9 | 10 | # Add the local folder to the import path: 11 | top_folder = os.path.dirname(os.path.abspath(__file__)) 12 | top_folder = os.path.dirname(top_folder) 13 | sys.path.insert(0,top_folder) 14 | 15 | #from mlqm.samplers import CartesianSampler 16 | from mlqm.hamiltonians import HarmonicOscillator_mc 17 | from mlqm.models import NeuralWavefunction 18 | from mlqm.hamiltonians import NuclearPotential 19 | from mlqm.samplers import Estimator 20 | from mlqm.optimization import Optimizer 21 | 22 | 23 | sig = 0.2 24 | dx = 0.2 25 | neq = 10 26 | nav = 10 27 | nprop = 10 28 | nvoid = 50 29 | nwalk = 1200 30 | ndim = 3 31 | npart = 4 32 | seed = 17 33 | mass = 1. 34 | omega = 1. 35 | delta = 0.002 36 | eps = 0.0002 37 | model_save_path = f"./helium{npart}.model" 38 | 39 | 40 | # Set up logging: 41 | logger = logging.getLogger() 42 | # Create a file handler: 43 | hdlr = logging.FileHandler(f'helium{npart}.log') 44 | # Add formatting to the log: 45 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 46 | hdlr.setFormatter(formatter) 47 | logger.addHandler(hdlr) 48 | # Set the default level. Levels here: https://docs.python.org/2/library/logging.html 49 | logger.setLevel(logging.INFO) 50 | 51 | 52 | logger.info(f"sig={sig}") 53 | logger.info(f"dx={dx}") 54 | logger.info(f"neq={neq}") 55 | logger.info(f"nav={nav}") 56 | logger.info(f"nprop={nprop}") 57 | logger.info(f"nvoid={nvoid}") 58 | logger.info(f"nwalk={nwalk}") 59 | logger.info(f"ndim={ndim}") 60 | logger.info(f"npart={npart}") 61 | logger.info(f"seed={seed}") 62 | logger.info(f"mass={mass}") 63 | logger.info(f"omega={omega}") 64 | logger.info(f"delta={delta}") 65 | logger.info(f"eps={eps}") 66 | 67 | 68 | # Initialize Seed 69 | torch.manual_seed(seed) 70 | 71 | # Initialize neural wave function and compute the number of parameters 72 | wavefunction = NeuralWavefunction(ndim, npart) 73 | wavefunction.count_parameters() 74 | 75 | # Initialize Potential 76 | potential = NuclearPotential(nwalk) 77 | 78 | # Initialize Hamiltonian 79 | hamiltonian = HarmonicOscillator_mc(mass, omega, nwalk, ndim, npart) 80 | 81 | #Initialize Optimizer 82 | opt=Optimizer(delta,eps,wavefunction.npt) 83 | 84 | # Propagation 85 | def energy_metropolis(neq, nav, nprop, nvoid, hamiltonian, wavefunction): 86 | nblock = neq + nav 87 | nstep = nprop * nvoid 88 | block_estimator = Estimator(info=None) 89 | block_estimator.reset() 90 | total_estimator = Estimator(info=None) 91 | total_estimator.reset() 92 | # Sample initial configurations uniformy between -sig and sig 93 | x_o = torch.normal(0., sig, size=[nwalk, npart, ndim]) 94 | for i in range (nblock): 95 | block_estimator.reset() 96 | if (i == neq) : 97 | total_estimator.reset() 98 | for j in range (nstep): 99 | with torch.no_grad(): 100 | log_wpsi_o = wavefunction(x_o) 101 | # Gaussian transition probability 102 | x_n = x_o + torch.normal(0., dx, size=[nwalk, npart, ndim]) 103 | log_wpsi_n = wavefunction(x_n) 104 | # Accepance probability |psi_n|**2 / |psi_o|**2 105 | prob = 2 * ( log_wpsi_n - log_wpsi_o ) 106 | accept = torch.ge(prob, torch.log(torch.rand(size=[nwalk])) ) 107 | x_o = torch.where(accept.view([nwalk,1,1]), x_n, x_o) 108 | acceptance = torch.mean(accept.float()) 109 | # Compute energy and accumulate estimators within a given block 110 | if ( (j+1) % nvoid == 0 and i >= neq ): 111 | energy, energy_jf = hamiltonian.energy(wavefunction, potential, x_o) 112 | energy = energy / nwalk 113 | energy_jf = energy_jf / nwalk 114 | energy.detach_() 115 | energy_jf.detach_() 116 | 117 | # Compute < O^i >, < H O^i >, and < O^i O^j > 118 | log_wpsi = wavefunction(x_o) 119 | jac = torch.zeros(size=[nwalk,wavefunction.npt]) 120 | for n in range(nwalk): 121 | log_wpsi_n = log_wpsi[n] 122 | wavefunction.zero_grad() 123 | params = wavefunction.parameters() 124 | dpsi_dp = torch.autograd.grad(log_wpsi_n, params, retain_graph=True) 125 | dpsi_i_n, indeces_flat = wavefunction.flatten_params(dpsi_dp) 126 | jac[n,:] = torch.t(dpsi_i_n) 127 | # log_wpsi_n.detach_() 128 | dpsi_i = torch.sum(jac, dim=0) / nwalk 129 | dpsi_i = dpsi_i.view(-1,1) 130 | dpsi_i_EL = torch.matmul(energy, jac).view(-1,1) 131 | dpsi_ij = torch.mm(torch.t(jac), jac) / nwalk 132 | 133 | # print("dpsi_i", dpsi_i) 134 | # print("dpsi_ij", dpsi_ij) 135 | # exit() 136 | block_estimator.accumulate(torch.sum(energy),torch.sum(energy_jf),acceptance,1.,dpsi_i,dpsi_i_EL,dpsi_ij,1.) 137 | # Accumulate block averages 138 | if ( i >= neq ): 139 | total_estimator.accumulate(block_estimator.energy,block_estimator.energy_jf,block_estimator.acceptance,0,block_estimator.dpsi_i, 140 | block_estimator.dpsi_i_EL,block_estimator.dpsi_ij,block_estimator.weight) 141 | 142 | error, error_jf = total_estimator.finalize(nav) 143 | energy = total_estimator.energy 144 | energy_jf = total_estimator.energy_jf 145 | acceptance = total_estimator.acceptance 146 | dpsi_i = total_estimator.dpsi_i 147 | dpsi_i_EL = total_estimator.dpsi_i_EL 148 | dpsi_ij = total_estimator.dpsi_ij 149 | 150 | logger.info(f"psi norm{torch.mean(log_wpsi)}") 151 | 152 | with torch.no_grad(): 153 | dp_i = opt.sr(energy,dpsi_i,dpsi_i_EL,dpsi_ij) 154 | gradient = wavefunction.recover_flattened(dp_i, indeces_flat, wavefunction) 155 | delta_p = [ g for g in gradient] 156 | 157 | return energy, error, energy_jf, error_jf, acceptance, delta_p 158 | 159 | t0 = time.time() 160 | energy, error, energy_jf, error_jf, acceptance, delta_p = energy_metropolis(neq, nav, nprop, nvoid, hamiltonian, wavefunction) 161 | t1 = time.time() 162 | logger.info(f"initial_energy {energy, error}") 163 | logger.info(f"initial_jf_energy {energy_jf, error_jf}") 164 | logger.info(f"initial_acceptance {acceptance}") 165 | logger.info(f"elapsed time {t1 - t0}") 166 | 167 | for i in range(2): 168 | 169 | # Compute the energy: 170 | energy, error, energy_jf, error_jf, acceptance, delta_p = energy_metropolis(neq, nav, nprop, nvoid, hamiltonian, wavefunction) 171 | 172 | for (p, dp) in zip (wavefunction.parameters(),delta_p): 173 | p.data = p.data + dp 174 | 175 | if i % 1 == 0: 176 | logger.info(f"step = {i}, energy = {energy.data:.3f}, err = {error.data:.3f}") 177 | logger.info(f"step = {i}, energy_jf = {energy_jf.data:.3f}, err = {error_jf.data:.3f}") 178 | logger.info(f"acc = {acceptance.data:.3f}") 179 | 180 | # This saves the model: 181 | 182 | torch.save(wavefunction.state_dict(), model_save_path) 183 | 184 | -------------------------------------------------------------------------------- /examples/model_reload.py: -------------------------------------------------------------------------------- 1 | # Python built ins: 2 | import sys, os 3 | import time 4 | import logging 5 | 6 | # Frameworks: 7 | import numpy 8 | import torch 9 | 10 | # Add the local folder to the import path: 11 | top_folder = os.path.dirname(os.path.abspath(__file__)) 12 | top_folder = os.path.dirname(top_folder) 13 | sys.path.insert(0,top_folder) 14 | 15 | #from mlqm.samplers import CartesianSampler 16 | from mlqm.hamiltonians import HarmonicOscillator_mc 17 | from mlqm.models import NeuralWavefunction 18 | from mlqm.hamiltonians import NuclearPotential 19 | from mlqm.samplers import Estimator 20 | from mlqm.optimization import Optimizer 21 | 22 | 23 | 24 | sig = 0.2 25 | dx = 0.2 26 | neq = 10 27 | nav = 10 28 | nprop = 10 29 | nvoid = 50 30 | nwalk = 1 31 | ndim = 3 32 | npart = 4 33 | seed = 17 34 | mass = 1. 35 | omega = 1. 36 | delta = 0.002 37 | eps = 0.0002 38 | model_save_path = f"./helium{npart}.model" 39 | 40 | 41 | # Initialize Seed 42 | torch.manual_seed(seed) 43 | 44 | # Initialize neural wave function and compute the number of parameters 45 | wavefunction = NeuralWavefunction(ndim, npart) 46 | wavefunction.count_parameters() 47 | 48 | # Initialize Potential 49 | potential = NuclearPotential(nwalk) 50 | 51 | # Initialize Hamiltonian 52 | hamiltonian = HarmonicOscillator_mc(mass, omega, nwalk, ndim, npart) 53 | 54 | #Initialize Optimizer 55 | opt=Optimizer(delta,eps,wavefunction.npt) 56 | 57 | #This step loads the model: 58 | wavefunction.load_state_dict(torch.load(model_save_path)) 59 | 60 | # Now you can use the wavefunction as before, with the parameters reloaded. 61 | -------------------------------------------------------------------------------- /examples/torch_selection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | n_points = 10 5 | dimension = 3 6 | 7 | 8 | # This pulls points from a uniform distribution, I think: 9 | x_original = torch.rand(size=[n_points,dimension]) 10 | 11 | # This draws points from a gaussian. Each individual point is a 12 | # sample from the gaussian, which may not be right for the metropolis algorithm: 13 | kick = torch.randn(size=x_original.shape) 14 | 15 | x_updated = x_original + kick 16 | 17 | # This is just comparing the magnitudue of the coordinates 18 | 19 | # Compute the x**2 + y**2 + z**2 scalars: 20 | x_original_magnitude = torch.sum(x_original**2, axis=-1) 21 | x_updated_magnitude = torch.sum(x_updated**2, axis=-1) 22 | 23 | # These two should be scalar vectors now: 24 | assert len(x_original_magnitude.shape) == 1 25 | assert x_original_magnitude.shape[0] == n_points 26 | 27 | # Select the smaller points, just for fun: 28 | condition = x_original_magnitude < x_updated_magnitude 29 | 30 | # condition is now a boolean vector, it's true when x_original is less than x_updated 31 | 32 | # See the "where" function here, which can select from two arrays: 33 | # https://pytorch.org/docs/stable/torch.html 34 | 35 | 36 | # In order to make the fucntion work, the condition needs to be 37 | # "broadcastable" to the original points. 38 | # This means it needs to be of shape [n_points, 1] rather than [n_points] 39 | smallest_points = torch.where(condition.view([n_points,1]), x_original, x_updated) 40 | 41 | # Verify we have the smallest points in terms of norm: 42 | smallest_magnitude = torch.sum(smallest_points**2, axis=-1) 43 | 44 | assert (smallest_magnitude <= x_original_magnitude).all() 45 | assert (smallest_magnitude <= x_updated_magnitude).all() 46 | 47 | print("done") -------------------------------------------------------------------------------- /images/NucleonScaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nuclear-Physics-with-Machine-Learning/MLQM/bee92f3c65b9fa161fb2d55f182419904398a9d3/images/NucleonScaling.png -------------------------------------------------------------------------------- /images/Scaling_Performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nuclear-Physics-with-Machine-Learning/MLQM/bee92f3c65b9fa161fb2d55f182419904398a9d3/images/Scaling_Performance.png -------------------------------------------------------------------------------- /jacobian_benchmark.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time, timeit 3 | 4 | N_WALKERS = 1000 5 | DIM = 3 6 | N_PARTICLES = 4 7 | 8 | x_input = tf.random.uniform(shape=(N_WALKERS, N_PARTICLES, DIM)) 9 | 10 | class DeepSetsWavefunction(tf.keras.models.Model): 11 | """Create a neural network eave function in N dimensions 12 | 13 | Boundary condition, if not supplied, is gaussian in every dimension 14 | 15 | Extends: 16 | tf.keras.models.Model 17 | """ 18 | def __init__(self, ndim : int, nparticles: int, mean_subtract : bool, boundary_condition :tf.keras.layers.Layer = None): 19 | '''Deep Sets wavefunction for symmetric particle wavefunctions 20 | 21 | Implements a deep set network for multiple particles in the same system 22 | 23 | Arguments: 24 | ndim {int} -- Number of dimensions 25 | nparticles {int} -- Number of particls 26 | 27 | Keyword Arguments: 28 | boundary_condition {tf.keras.layers.Layer} -- [description] (default: {None}) 29 | 30 | Raises: 31 | Exception -- [description] 32 | ''' 33 | tf.keras.models.Model.__init__(self) 34 | 35 | self.ndim = ndim 36 | if self.ndim < 1 or self.ndim > 3: 37 | raise Exception("Dimension must be 1, 2, or 3 for DeepSetsWavefunction") 38 | 39 | self.nparticles = nparticles 40 | 41 | self.mean_subtract = mean_subtract 42 | 43 | 44 | n_filters_per_layer = 32 45 | n_layers = 4 46 | bias = True 47 | activation = tf.keras.activations.tanh 48 | 49 | 50 | self.individual_net = tf.keras.models.Sequential() 51 | 52 | self.individual_net.add( 53 | tf.keras.layers.Dense(n_filters_per_layer, 54 | use_bias = bias) 55 | ) 56 | 57 | for l in range(n_layers): 58 | self.individual_net.add( 59 | tf.keras.layers.Dense(n_filters_per_layer, 60 | use_bias = bias, 61 | activation = activation) 62 | ) 63 | 64 | 65 | self.aggregate_net = tf.keras.models.Sequential() 66 | 67 | for l in range(n_layers): 68 | self.individual_net.add( 69 | tf.keras.layers.Dense(n_filters_per_layer, 70 | use_bias = bias, 71 | activation = activation) 72 | ) 73 | self.aggregate_net.add(tf.keras.layers.Dense(1, 74 | use_bias = False)) 75 | 76 | 77 | @tf.function(experimental_compile=False) 78 | def call(self, inputs, trainable=None): 79 | # Mean subtract for all particles: 80 | if self.nparticles > 1 and self.mean_subtract: 81 | mean = tf.reduce_mean(inputs, axis=1) 82 | xinputs = inputs - mean[:,None,:] 83 | else: 84 | xinputs = inputs 85 | 86 | x = [] 87 | for p in range(self.nparticles): 88 | x.append(self.individual_net(xinputs[:,p,:])) 89 | 90 | x = tf.add_n(x) 91 | x = self.aggregate_net(x) 92 | 93 | # Compute the initial boundary condition, which the network will slowly overcome 94 | # boundary_condition = tf.math.abs(self.normalization_weight * tf.reduce_sum(xinputs**self.normalization_exponent, axis=(1,2)) 95 | boundary_condition = -1. * tf.reduce_sum(xinputs**2, axis=(1,2)) 96 | boundary_condition = tf.reshape(boundary_condition, [-1,1]) 97 | 98 | 99 | return x + boundary_condition 100 | 101 | def n_parameters(self): 102 | return tf.reduce_sum( [ tf.reduce_prod(p.shape) for p in self.trainable_variables ]) 103 | 104 | wavefunction = DeepSetsWavefunction(ndim=DIM, nparticles=N_PARTICLES, mean_subtract=True) 105 | output = wavefunction(x_input) 106 | 107 | 108 | @tf.function 109 | def jacobian_comp(inputs, _wavefunction): 110 | 111 | with tf.GradientTape() as tape: 112 | log_psiw = _wavefunction(inputs) 113 | 114 | # By default, this essentially SUMS over the dimension of log_psiw 115 | jac = tape.jacobian(log_psiw, _wavefunction.trainable_variables) 116 | 117 | return jac 118 | 119 | 120 | start = time.time() 121 | jc = jacobian_comp(x_input, wavefunction) 122 | print("Jacobian Compilation time: ", time.time() - start) 123 | 124 | 125 | start = time.time() 126 | jacobian_comp(x_input, wavefunction) 127 | print("Jacobian Execution time: ", time.time() - start) 128 | 129 | 130 | @tf.function 131 | def jacobian_grad(inputs, _wavefunction): 132 | 133 | n_walkers = inputs.shape[0] 134 | 135 | with tf.GradientTape(persistent=True) as tape: 136 | log_psiw = _wavefunction(inputs) 137 | 138 | split = tf.split(log_psiw, n_walkers) 139 | 140 | # print(split) 141 | # By default, this essentially SUMS over the dimension of log_psiw 142 | grad = [tape.gradient(s, _wavefunction.trainable_variables) for s in split] 143 | 144 | jac = [] 145 | for i, l in enumerate(_wavefunction.trainable_variables): 146 | temp = tf.stack([g[i] for g in grad]) 147 | temp = tf.reshape(temp, log_psiw.shape + l.shape) 148 | jac.append(temp) 149 | 150 | return jac 151 | 152 | 153 | start = time.time() 154 | jg = jacobian_grad(x_input, wavefunction) 155 | print("Stacked Gradient Compilation time: ", time.time() - start) 156 | 157 | start = time.time() 158 | jacobian_comp(x_input, wavefunction) 159 | print("Stacked Gradient Execution time: ", time.time() - start) 160 | 161 | -------------------------------------------------------------------------------- /mlqm/__init__.py: -------------------------------------------------------------------------------- 1 | # Natural Units: 2 | # H_BAR = 1.0 3 | # ELECTRON_CHARGE = 1.0 4 | 5 | # Nuclear Units: 6 | # H_BAR = 197.327 7 | ELECTRON_CHARGE = 1.0 8 | 9 | 10 | DEFAULT_TENSOR_TYPE="float64" 11 | MAX_PARALLEL_ITERATIONS=4000 12 | 13 | try: 14 | import horovod.tensorflow as hvd 15 | hvd.init() 16 | MPI_AVAILABLE=True 17 | except: 18 | MPI_AVAILABLE=False 19 | 20 | if MPI_AVAILABLE and hvd.size() == 1: 21 | # Turn off mpi if only 1 rank 22 | MPI_AVAILABLE = False 23 | -------------------------------------------------------------------------------- /mlqm/hamiltonians/AtomicPotential.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | from mlqm import ELECTRON_CHARGE 5 | from mlqm.hamiltonians import Hamiltonian 6 | 7 | class AtomicPotential(Hamiltonian): 8 | """Atomic Hamiltonian 9 | 10 | Implementation of the atomic hamiltonian 11 | 12 | """ 13 | 14 | def __init__(self, **kwargs): 15 | ''' 16 | 17 | Arguments: 18 | mu {float} -- (Reduced) Mass of nucleus 19 | Z {int} -- Nuclear charge, aka number of electrons 20 | 21 | ''' 22 | Hamiltonian.__init__(self, **kwargs) 23 | 24 | # Check the parameters have everything needed: 25 | for parameter in ["mass", "z"]: 26 | if parameter not in self.parameters: 27 | raise KeyError(f"Parameter {parameter} not suppliled as keyword arg to Atomic Potential") 28 | 29 | 30 | def potential_energy(self, *, inputs, Z): 31 | """Return potential energy 32 | 33 | If the potential energy is already computed, and no arguments are supplied, 34 | return the cached value 35 | 36 | If all arguments are supplied, calculate and return the PE. 37 | 38 | Otherwise, exception 39 | 40 | Arguments: 41 | inputs {tf.tensor} -- Tensor of shape [N, nparticles, dimension] 42 | Z {tf.tensor} -- Atomic number 43 | 44 | Returns: 45 | torch.Tensor - potential energy of shape [1] 46 | """ 47 | 48 | # Potential energy is, for n particles, two pieces: 49 | # Compute r_i, where r_i = sqrt(sum(x_i^2, y_i^2, z_i^2)) (in 3D) 50 | # PE_1 = -(Z e^2)/(4 pi eps_0) * sum_i (1/r_i) 51 | # Second, compute r_ij, for all i != j, and then 52 | # PE_2 = -(e^2) / (4 pi eps_0) * sum_{i!=j} (1 / r_ij) 53 | # where r_ij = sqrt( [xi - xj]^2 + [yi - yj] ^2 + [zi - zj]^2) 54 | 55 | # Compute r 56 | # Square the coordinates and sum for each walker 57 | r = tf.math.sqrt(tf.reduce_sum(inputs**2, axis=2)) 58 | # This is the sum of 1/r for all particles with the nucleus: 59 | pe_1 = - (Z * ELECTRON_CHARGE**2 ) * tf.reduce_sum( 1. / (r + 1e-8), axis=1 ) 60 | 61 | # This is the sum of 1/r for all particles with other particles. 62 | # n_particles = inputs.shape[1] 63 | # for i_particle in range(n_particles): 64 | # centroid = inputs[:,i_particle,:] 65 | # 66 | # r = tf.math.sqrt(tf.reduce_sum((inputs -centroid)**2, axis=2)) 67 | # pe_2 = -0.5* (ELECTRON_CHARGE**2 ) * tf.reduce_sum( 1. / (r + 1e-8), axis=1 ) 68 | # # Because this force is symmetric, I'm multiplying by 0.5 to prevent overflow 69 | pe_2 = 0. 70 | return pe_1 + pe_2 71 | 72 | 73 | @tf.function 74 | def compute_energies(self, inputs, logw_of_x, dlogw_dx, d2logw_dx2): 75 | '''Compute PE, KE_JF, and KE_direct 76 | 77 | Harmonic Oscillator Energy Calculations 78 | 79 | Arguments: 80 | inputs {[type]} -- walker coordinates (shape is [nwalkers, nparticles, dimension]) 81 | logw_of_x {[type]} -- computed wave function at each walker 82 | dlogw_dx {[type]} -- first derivative of wavefunction at each walker 83 | d2logw_dx2 {[type]} -- second derivative of wavefunction at each walker 84 | 85 | Raises: 86 | NotImplementedError -- [description] 87 | 88 | Returns: 89 | pe -- potential energy 90 | ke_jf -- JF Kinetic energy 91 | ke_direct -- 2nd deriv computation of potential energy 92 | ''' 93 | 94 | # Potential energy depends only on the wavefunction 95 | pe = self.potential_energy(inputs=inputs, Z=self.parameters['z']) 96 | 97 | # KE by parts needs only one derivative 98 | ke_jf = self.kinetic_energy_jf(dlogw_dx=dlogw_dx, M=self.parameters["mass"]) 99 | 100 | # True, directly, uses the second derivative 101 | ke_direct = self.kinetic_energy(KE_JF = ke_jf, d2logw_dx2 = d2logw_dx2, M=self.parameters["mass"]) 102 | 103 | 104 | return pe, ke_jf, ke_direct 105 | -------------------------------------------------------------------------------- /mlqm/hamiltonians/Hamiltonian.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | import logging 5 | logger = logging.getLogger() 6 | 7 | from mlqm import DEFAULT_TENSOR_TYPE 8 | 9 | class Hamiltonian(object): 10 | """Harmonic Oscillator Potential 11 | 12 | Implementation of the quantum harmonic oscillator hamiltonian 13 | """ 14 | 15 | def __init__(self, **kwargs): 16 | ''' Initialize the Hamiltonian 17 | 18 | The derived class will check parameters, but this converts all of them to floats 19 | and scores as TF Constants. 20 | 21 | ''' 22 | object.__init__(self) 23 | self.parameters = {} 24 | # Cast them all to tf constants: 25 | for key in kwargs: 26 | self.parameters[key] = tf.constant(float(kwargs[key]),dtype=DEFAULT_TENSOR_TYPE) 27 | 28 | self.HBAR = tf.constant(1.0, dtype = DEFAULT_TENSOR_TYPE) 29 | 30 | @tf.function 31 | def potential_energy(self, *, inputs): 32 | """Return potential energy 33 | 34 | Calculate and return the PE. 35 | 36 | Arguments: 37 | inputs {tf.Tensor} -- Tensor of shape [N, dimension], must have graph enabled 38 | Returns: 39 | tf.Tensor - potential energy of shape [1] 40 | """ 41 | 42 | raise NotImplementedError("Hamiltonian classes should implement this function") 43 | 44 | @tf.function 45 | def kinetic_energy_jf(self, *, dlogw_dx, M): 46 | """Return Kinetic energy 47 | 48 | Calculate and return the KE directly 49 | 50 | Otherwise, exception 51 | 52 | Arguments: 53 | dlogw_of_x/dx {tf.Tensor} -- Computed derivative of the wavefunction 54 | 55 | Returns: 56 | tf.Tensor - kinetic energy (JF) of shape [1] 57 | """ 58 | # < x | KE | psi > / < x | psi > = 1 / 2m [ < x | p | psi > / < x | psi > = 1/2 w * x**2 59 | 60 | # Contract d2_w_dx over spatial dimensions and particles: 61 | ke_jf = (self.HBAR**2 / (2 * M)) * tf.reduce_sum(dlogw_dx**2, axis=(1,2)) 62 | 63 | return ke_jf 64 | 65 | @tf.function 66 | def kinetic_energy(self, *, KE_JF : tf.Tensor, d2logw_dx2 : tf.Tensor, M): 67 | """Return Kinetic energy 68 | 69 | 70 | If all arguments are supplied, calculate and return the KE. 71 | 72 | Arguments: 73 | d2logw_dx2 {tf.Tensor} -- Computed second derivative of the wavefunction 74 | KE_JF {tf.Tensor} -- JF computation of the kinetic energy 75 | 76 | Returns: 77 | tf.Tensor - potential energy of shape [1] 78 | """ 79 | 80 | ke = -(self.HBAR**2 / (2 * M)) * tf.reduce_sum(d2logw_dx2, axis=(1,2)) 81 | ke = ke - KE_JF 82 | 83 | return ke 84 | 85 | @tf.function 86 | def compute_derivatives(self, wavefunction : tf.keras.models.Model, inputs : tf.Tensor): 87 | 88 | 89 | # Turning off all tape watching except for the inputs: 90 | # Using the outer-most tape to watch the computation of the first derivative: 91 | with tf.GradientTape() as tape: 92 | # Use the inner tape to watch the computation of the wavefunction: 93 | tape.watch(inputs) 94 | with tf.GradientTape() as second_tape: 95 | second_tape.watch(inputs) 96 | logw_of_x = wavefunction(inputs, training=True) 97 | # Get the derivative of logw_of_x with respect to inputs 98 | dlogw_dx = second_tape.gradient(logw_of_x, inputs) 99 | 100 | # Get the derivative of dlogw_dx with respect to inputs (aka second derivative) 101 | 102 | # We have to extract the diagonal of the jacobian, which comes out with shape 103 | # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension] 104 | 105 | # This is the full hessian computation: 106 | d2logw_dx2 = tape.batch_jacobian(dlogw_dx, inputs) 107 | 108 | # And this contracts: 109 | d2logw_dx2 = tf.einsum("wpdpd->wpd",d2logw_dx2) 110 | 111 | return logw_of_x, dlogw_dx, d2logw_dx2 112 | 113 | ''' 114 | This whole section is correct but a bad implementation 115 | 116 | @tf.function 117 | def derivative_single_walker(self, wavefunction, walker): 118 | 119 | # Using the outer-most tape to watch the computation of the first derivative: 120 | with tf.GradientTape(persistent=True) as tape: 121 | tape.watch(walker) 122 | # Use the inner tape to watch the computation of the wavefunction: 123 | with tf.GradientTape(persistent=True) as second_tape: 124 | second_tape.watch(walker) 125 | logw_of_x = wavefunction(walker, training=True) 126 | # Get the derivative of logw_of_x with respect to inputs 127 | dlogw_dx = second_tape.gradient(logw_of_x, walker) 128 | 129 | d2logw_dx2 = tape.jacobian(dlogw_dx, walker) 130 | d2logw_dx2 = tf.einsum("wpdwpd->wpd",d2logw_dx2) 131 | 132 | return logw_of_x, dlogw_dx, d2logw_dx2 133 | 134 | @tf.function 135 | def compute_derivatives(self, wavefunction : tf.keras.models.Model, inputs : tf.Tensor): 136 | 137 | output_shape = inputs.shape 138 | inputs = tf.split(inputs, output_shape[0], axis=0) 139 | 140 | logw_of_x, dlogw_dx, d2logw_dx2 = zip(*(self.derivative_single_walker(wavefunction, i) for i in inputs)) 141 | # Get the derivative of dlogw_dx with respect to inputs 142 | # (aka second derivative) 143 | 144 | # We have to extract the diagonal of the jacobian, 145 | # which comes out with shape 146 | # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension] 147 | 148 | # For a fixed number of particles and dimension, 149 | # this memory usage grows as nwalkers**2 150 | # BUT, the jacobian is block diagonal: if you 151 | # access the block [i,:,:,j,:,:] it is all zero unless i == j. 152 | 153 | # In this implementation, we're computing the jacobian PER walker and 154 | # only with respect to it's own inputs. So, the jacobians are all 155 | # shaped like [1, npart, ndim, 1, npart, ndim] which grows linearly 156 | # with the number of walkers instead of quadratically. 157 | 158 | #restack everything: 159 | logw_of_x = tf.reshape(tf.concat(logw_of_x, axis=0), (output_shape[0], 1)) 160 | dlogw_dx = tf.reshape(tf.concat(dlogw_dx, axis=0), output_shape) 161 | d2logw_dx2 = tf.reshape(tf.concat(d2logw_dx2, axis=0), output_shape) 162 | 163 | 164 | return logw_of_x, dlogw_dx, d2logw_dx2 165 | ''' 166 | 167 | @tf.function 168 | def compute_energies(self, inputs, logw_of_x, dlogw_dx, d2logw_dx2): 169 | '''Compute PE, KE_JF, and KE_direct 170 | 171 | Placeholder for a user to implement their calculation of the energies. 172 | 173 | Arguments: 174 | inputs {[type]} -- walker coordinates (shape is [nwalkers, nparticles, dimension]) 175 | logw_of_x {[type]} -- computed wave function at each walker 176 | dlogw_dx {[type]} -- first derivative of wavefunction at each walker 177 | d2logw_dx2 {[type]} -- second derivative of wavefunction at each walker 178 | 179 | Raises: 180 | NotImplementedError -- [description] 181 | 182 | Returns: 183 | pe -- potential energy 184 | ke_jf -- JF Kinetic energy 185 | ke_direct -- 2nd deriv computation of potential energy 186 | ''' 187 | 188 | raise NotImplementedError("Please implement this function in the derived class.") 189 | 190 | # Needs to return like this 191 | return pe, ke_jf, ke_direct 192 | # return None 193 | 194 | @tf.function 195 | def energy(self, wavefunction : tf.keras.models.Model, inputs : tf.Tensor): 196 | """Compute the expectation value of energy of the supplied wavefunction. 197 | 198 | Computes the integral of the wavefunction in this potential 199 | 200 | Arguments: 201 | wavefunction {Wavefunction model} -- Callable wavefunction object 202 | inputs {tf.Tensor} -- Tensor of shape [nwalkers, nparticles, dimension] 203 | 204 | Returns: 205 | tf.tensor - energy of shape [n_walkers] 206 | tf.tensor - energy_jf of shape [n_walkers] 207 | tf.tensor - ke_jf of shape [n_walkers] 208 | tf.tensor - ke_direct of shape [n_walkers] 209 | tf.tensor - pe of shape [n_walkers] 210 | """ 211 | 212 | 213 | # This function takes the inputs 214 | # And computes the expectation value of the energy at each input point 215 | 216 | logw_of_x, dlogw_dx, d2logw_dx2 = self.compute_derivatives(wavefunction, inputs) 217 | 218 | pe, ke_jf, ke_direct = self.compute_energies(inputs, logw_of_x, dlogw_dx, d2logw_dx2) 219 | 220 | # Total energy computations: 221 | energy = tf.squeeze(pe+ke_direct) 222 | energy_jf = tf.squeeze(pe+ke_jf) 223 | 224 | return energy, energy_jf, ke_jf, ke_direct, pe, logw_of_x 225 | -------------------------------------------------------------------------------- /mlqm/hamiltonians/HarmonicOscillator.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | import logging 5 | logger = logging.getLogger() 6 | 7 | from mlqm.hamiltonians import Hamiltonian 8 | 9 | class HarmonicOscillator(Hamiltonian): 10 | """Harmonic Oscillator Potential 11 | 12 | Implementation of the quantum harmonic oscillator hamiltonian 13 | """ 14 | 15 | def __init__(self, **kwargs): 16 | 17 | Hamiltonian.__init__(self, **kwargs) 18 | 19 | # Check the parameters have everything needed: 20 | for parameter in ["mass", "omega"]: 21 | if parameter not in self.parameters: 22 | raise KeyError(f"Parameter {parameter} not suppliled as keyword arg to HarmonicOscillator") 23 | 24 | 25 | @tf.function 26 | def potential_energy(self, *, inputs, M, omega): 27 | """Return potential energy 28 | 29 | Calculate and return the PE. 30 | 31 | Arguments: 32 | inputs {tf.Tensor} -- Tensor of shape [N, dimension], must have graph enabled 33 | Returns: 34 | tf.Tensor - potential energy of shape [1] 35 | """ 36 | 37 | # Potential calculation 38 | # < x | H | psi > / < x | psi > = < x | 1/2 w * x**2 | psi > / < x | psi > = 1/2 w * x**2 39 | # print("Enter pe call") 40 | 41 | # x Squared needs to contract over spatial dimensions: 42 | x_squared = tf.reduce_sum(inputs**2, axis=(1, 2)) 43 | pe = (0.5 * M * omega**2 ) * x_squared 44 | 45 | return pe 46 | 47 | @tf.function 48 | def compute_energies(self, inputs, logw_of_x, dlogw_dx, d2logw_dx2): 49 | '''Compute PE, KE_JF, and KE_direct 50 | 51 | Harmonic Oscillator Energy Calculations 52 | 53 | Arguments: 54 | inputs {[type]} -- walker coordinates (shape is [nwalkers, nparticles, dimension]) 55 | logw_of_x {[type]} -- computed wave function at each walker 56 | dlogw_dx {[type]} -- first derivative of wavefunction at each walker 57 | d2logw_dx2 {[type]} -- second derivative of wavefunction at each walker 58 | 59 | Raises: 60 | NotImplementedError -- [description] 61 | 62 | Returns: 63 | pe -- potential energy 64 | ke_jf -- JF Kinetic energy 65 | ke_direct -- 2nd deriv computation of potential energy 66 | ''' 67 | 68 | # Potential energy depends only on the wavefunction 69 | pe = self.potential_energy(inputs=inputs, M = self.parameters["mass"], omega=self.parameters["omega"]) 70 | 71 | # KE by parts needs only one derivative 72 | ke_jf = self.kinetic_energy_jf(dlogw_dx=dlogw_dx, M=self.parameters["mass"]) 73 | 74 | # True, directly, uses the second derivative 75 | ke_direct = self.kinetic_energy(KE_JF = ke_jf, d2logw_dx2 = d2logw_dx2, M=self.parameters["mass"]) 76 | 77 | return pe, ke_jf, ke_direct 78 | -------------------------------------------------------------------------------- /mlqm/hamiltonians/NuclearPotential.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | import numpy 4 | 5 | import logging 6 | logger = logging.getLogger() 7 | 8 | from mlqm import DEFAULT_TENSOR_TYPE 9 | from mlqm.hamiltonians import Hamiltonian 10 | 11 | 12 | class NuclearPotential(Hamiltonian): 13 | """Nuclear Physics Potential 14 | """ 15 | 16 | def __init__(self, **kwargs): 17 | ''' 18 | Arguments: 19 | mass {float} -- Nuclear mass, aka number of electrons 20 | 21 | ''' 22 | Hamiltonian.__init__(self, **kwargs) 23 | 24 | # Check the parameters have everything needed: 25 | for parameter in ["mass"]: 26 | if parameter not in self.parameters: 27 | raise KeyError(f"Parameter {parameter} not suppliled as keyword arg to HarmonicOscillator") 28 | 29 | if 'vkr' in self.parameters: 30 | if self.parameters['vkr'] not in [2, 4, 6]: 31 | raise KeyError(f"Parameter vkr set to {self.parameters['vkr']} but must be 2, 4 or 6") 32 | self.vkr = tf.constant(self.parameters['vkr'], dtype = DEFAULT_TENSOR_TYPE) 33 | else: 34 | logger.info("Setting vkr to 4 in the nuclear potential by default.") 35 | self.vkr = tf.constant(4, dtype = DEFAULT_TENSOR_TYPE) 36 | 37 | if self.vkr == 2.0: 38 | self.v0r = tf.constant(-133.3431, dtype=DEFAULT_TENSOR_TYPE) 39 | self.v0s = tf.constant(-9.0212, dtype = DEFAULT_TENSOR_TYPE) 40 | self.ar3b = tf.constant(8.2757658256, dtype = DEFAULT_TENSOR_TYPE) 41 | logger.info(f"Using vkr = {self.vkr}") 42 | elif self.vkr == 4.0: 43 | self.v0r = tf.constant(-487.6128, dtype=DEFAULT_TENSOR_TYPE) 44 | self.v0s = tf.constant(-17.5515, dtype = DEFAULT_TENSOR_TYPE) 45 | self.ar3b = tf.constant(26.0345712467, dtype = DEFAULT_TENSOR_TYPE) 46 | logger.info(f"Using vkr = {self.vkr}") 47 | elif self.vkr == 6.0: 48 | self.v0r = tf.constant(-1064.5010, dtype=DEFAULT_TENSOR_TYPE) 49 | self.v0s = tf.constant(-26.0830, dtype = DEFAULT_TENSOR_TYPE) 50 | self.ar3b = tf.constant(51.5038930567, dtype = DEFAULT_TENSOR_TYPE) 51 | logger.info(f"Using vkr = {self.vkr}") 52 | 53 | 54 | self.HBAR = tf.constant(197.327, dtype = DEFAULT_TENSOR_TYPE) 55 | 56 | @tf.function(experimental_compile=False) 57 | def pionless_2b(self, *, r_ij): 58 | x = self.vkr * r_ij 59 | vr = tf.exp(-x**2/4.0) 60 | 61 | return self.v0r*vr, self.v0s*vr 62 | 63 | @tf.function(experimental_compile=False) 64 | def pionless_3b(self, *, r_ij, nwalkers): 65 | # pot_3b = tf.zeros(shape=(nwalkers), dtype=DEFAULT_TENSOR_TYPE) 66 | x = self.vkr * r_ij 67 | vr = tf.exp(-x**2/4.0) 68 | pot_3b = vr * self.ar3b 69 | return pot_3b 70 | 71 | @tf.function(experimental_compile=False) 72 | def potential_energy(self, *, inputs): 73 | """Return potential energy 74 | 75 | Calculate and return the PE. 76 | 77 | Arguments: 78 | inputs {tf.Tensor} -- Tensor of shape [N, dimension], must have graph enabled 79 | Returns: 80 | tf.Tensor - potential energy of shape [1] 81 | """ 82 | 83 | # Potential calculation 84 | 85 | # Prepare buffers for the output: 86 | # (Walker shape is (self.nwalkers, self.nparticles, self.n) ) 87 | nwalkers = inputs.shape[0] 88 | nparticles = inputs.shape[1] 89 | 90 | if nparticles == 2: 91 | alpha = 1.0 92 | elif nparticles > 2: 93 | alpha = -1.0 94 | 95 | # print("Alpha: ", alpha) 96 | 97 | # gr3b = tf.Variable(tf.zeros(shape=[nwalkers,nparticles], dtype=DEFAULT_TENSOR_TYPE)) 98 | gr3b = [tf.zeros(shape=[nwalkers], dtype=DEFAULT_TENSOR_TYPE) for p in range(nparticles)] 99 | V_ijk = tf.zeros(shape=[nwalkers], dtype=DEFAULT_TENSOR_TYPE) # three body potential terms 100 | v_ij = tf.zeros(shape=[nwalkers], dtype=DEFAULT_TENSOR_TYPE) # 2 body potential terms: 101 | for i in range (nparticles-1): 102 | for j in range (i+1,nparticles): 103 | # 104 | x_ij = inputs[:,i,:]-inputs[:,j,:] 105 | r_ij = tf.sqrt(tf.reduce_sum(x_ij**2,axis=1)) 106 | vrr, vrs = self.pionless_2b(r_ij=r_ij) 107 | # v_ij += self.pionless_2b(r_ij=r_ij, nwalkers=nwalkers) 108 | v_ij += vrr + alpha * vrs 109 | if (nparticles > 2 ): 110 | t_ij = self.pionless_3b(r_ij=r_ij, nwalkers=nwalkers) 111 | gr3b[i] += t_ij 112 | gr3b[j] += t_ij 113 | # gr3b[i] = gr3b[:,i].assign(gr3b[:,i] + t_ij) 114 | # gr3b = gr3b[:,j].assign(gr3b[:,j] + t_ij) 115 | V_ijk -= t_ij**2 116 | # stack up gr3b: 117 | gr3b = tf.stack(gr3b, axis=1) 118 | V_ijk += 0.5 * tf.reduce_sum(gr3b**2, axis = 1) 119 | pe = v_ij + V_ijk 120 | 121 | 122 | return pe 123 | 124 | # @tf.function() 125 | @tf.function(experimental_compile=False) 126 | def compute_energies(self, inputs, logw_of_x, dlogw_dx, d2logw_dx2): 127 | '''Compute PE, KE_JF, and KE_direct 128 | 129 | Harmonic Oscillator Energy Calculations 130 | 131 | Arguments: 132 | inputs {[type]} -- walker coordinates (shape is [nwalkers, nparticles, dimension]) 133 | logw_of_x {[type]} -- computed wave function at each walker 134 | dlogw_dx {[type]} -- first derivative of wavefunction at each walker 135 | d2logw_dx2 {[type]} -- second derivative of wavefunction at each walker 136 | 137 | Raises: 138 | NotImplementedError -- [description] 139 | 140 | Returns: 141 | pe -- potential energy 142 | ke_jf -- JF Kinetic energy 143 | ke_direct -- 2nd deriv computation of potential energy 144 | ''' 145 | 146 | # Potential energy depends only on the wavefunction 147 | pe = self.potential_energy(inputs=inputs) 148 | 149 | # KE by parts needs only one derivative 150 | ke_jf = self.kinetic_energy_jf(dlogw_dx=dlogw_dx, M=self.parameters["mass"]) 151 | 152 | # True, directly, uses the second derivative 153 | ke_direct = self.kinetic_energy(KE_JF = ke_jf, d2logw_dx2 = d2logw_dx2, M=self.parameters["mass"]) 154 | 155 | 156 | return pe, ke_jf, ke_direct 157 | -------------------------------------------------------------------------------- /mlqm/hamiltonians/__init__.py: -------------------------------------------------------------------------------- 1 | from .Hamiltonian import Hamiltonian 2 | from .HarmonicOscillator import HarmonicOscillator 3 | from .AtomicPotential import AtomicPotential 4 | from .NuclearPotential import NuclearPotential 5 | -------------------------------------------------------------------------------- /mlqm/models/DeepSetsWavefunction.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import tensorflow as tf 3 | from mlqm import DEFAULT_TENSOR_TYPE 4 | 5 | 6 | import copy 7 | #from .ExponentialBoundaryCondition import ExponentialBoundaryCondition 8 | 9 | 10 | 11 | class DenseBlock(tf.keras.models.Model): 12 | """A dense layer with a bypass lane 13 | 14 | Computes the residual of the inputs. Will error if n_output != n_input 15 | 16 | Extends: 17 | tf.keras.models.Model 18 | """ 19 | def __init__(self, n_output, use_bias, activation): 20 | tf.keras.models.Model.__init__(self, dtype=DEFAULT_TENSOR_TYPE) 21 | 22 | self.layer = tf.keras.layers.Dense(n_output, 23 | activation = activation, use_bias = use_bias, 24 | kernel_initializer = tf.keras.initializers.GlorotNormal, 25 | bias_initializer = tf.keras.initializers.RandomNormal, 26 | ) 27 | 28 | 29 | def __call__(self, inputs): 30 | 31 | x = self.layer(inputs) 32 | return x 33 | 34 | 35 | class ResidualBlock(DenseBlock): 36 | """A dense layer with a bypass lane 37 | 38 | Computes the residual of the inputs. Will error if n_output != n_input 39 | 40 | Extends: 41 | tf.keras.models.Model 42 | """ 43 | def __init__(self, n_output, use_bias, activation): 44 | DenseBlock.__init__(self, n_output, use_bias, activation) 45 | 46 | 47 | def __call__(self, inputs): 48 | 49 | x = self.layer(inputs) 50 | 51 | return inputs + x 52 | 53 | class DeepSetsWavefunction(tf.keras.models.Model): 54 | """Create a neural network eave function in N dimensions 55 | 56 | Boundary condition, if not supplied, is gaussian in every dimension 57 | 58 | Extends: 59 | tf.keras.models.Model 60 | """ 61 | def __init__(self, ndim : int, nparticles: int, configuration: dict, boundary_condition :tf.keras.layers.Layer = None): 62 | '''Deep Sets wavefunction for symmetric particle wavefunctions 63 | 64 | Implements a deep set network for multiple particles in the same system 65 | 66 | Arguments: 67 | ndim {int} -- Number of dimensions 68 | nparticles {int} -- Number of particls 69 | 70 | Keyword Arguments: 71 | boundary_condition {tf.keras.layers.Layer} -- [description] (default: {None}) 72 | 73 | Raises: 74 | Exception -- [description] 75 | ''' 76 | tf.keras.models.Model.__init__(self) 77 | 78 | self.ndim = ndim 79 | if self.ndim < 1 or self.ndim > 3: 80 | raise Exception("Dimension must be 1, 2, or 3 for DeepSetsWavefunction") 81 | 82 | self.nparticles = nparticles 83 | 84 | self.config = configuration 85 | 86 | self.mean_subtract = self.config.mean_subtract 87 | 88 | 89 | n_filters_per_layer = self.config.n_filters_per_layer 90 | n_layers = self.config.n_layers 91 | bias = self.config.bias 92 | residual = self.config.residual 93 | 94 | try: 95 | activation = tf.keras.activations.__getattribute__(self.config['activation']) 96 | except e: 97 | print(e) 98 | print(f"Could not use the activation {self.config['activation']} - not in tf.keras.activations.") 99 | 100 | 101 | 102 | self.individual_net = tf.keras.models.Sequential() 103 | 104 | self.individual_net.add( 105 | DenseBlock(n_filters_per_layer, 106 | use_bias = bias, 107 | activation = activation) 108 | ) 109 | 110 | # The above layer counts as a layer! 111 | for l in range(n_layers-1): 112 | if l == n_layers - 2: 113 | _activation = None 114 | else: 115 | _activation = activation 116 | if residual: 117 | self.individual_net.add( 118 | ResidualBlock(n_filters_per_layer, 119 | use_bias = bias, 120 | activation = _activation) 121 | ) 122 | else: 123 | self.individual_net.add( 124 | DenseBlock(n_filters_per_layer, 125 | use_bias = bias, 126 | activation = _activation) 127 | ) 128 | 129 | 130 | self.aggregate_net = tf.keras.models.Sequential() 131 | 132 | for l in range(n_layers): 133 | if residual: 134 | self.aggregate_net.add( 135 | ResidualBlock(n_filters_per_layer, 136 | use_bias = bias, 137 | activation = activation) 138 | ) 139 | else: 140 | self.aggregate_net.add( 141 | DenseBlock(n_filters_per_layer, 142 | use_bias = bias, 143 | activation = activation) 144 | ) 145 | self.aggregate_net.add(tf.keras.layers.Dense(1, 146 | use_bias = False)) 147 | 148 | # Represent the confinement as a function of r only, which is represented as a neural netowrk 149 | # self.confinement = DenseBlock(n_filters_per_layer) 150 | 151 | self.confinement = tf.constant(self.config.confinement, dtype = DEFAULT_TENSOR_TYPE) 152 | 153 | # self.normalization_exponent = tf.Variable(2.0, dtype=DEFAULT_TENSOR_TYPE) 154 | # self.normalization_weight = tf.Variable(-0.1, dtype=DEFAULT_TENSOR_TYPE) 155 | 156 | def clone(self): 157 | 158 | new_ob = copy.deepcopy(self) 159 | 160 | new_ob.aggregate_net = tf.keras.models.clone_model(self.aggregate_net) 161 | new_ob.individual_net = tf.keras.models.clone_model(self.individual_net) 162 | 163 | return new_ob 164 | 165 | @tf.function(experimental_compile=True) 166 | # @tf.function 167 | def __call__(self, inputs, training=None): 168 | 169 | # Mean subtract for all particles: 170 | if self.nparticles > 1 and self.mean_subtract: 171 | mean = tf.reduce_mean(inputs, axis=1) 172 | xinputs = inputs - mean[:,None,:] 173 | else: 174 | xinputs = inputs 175 | 176 | x = [] 177 | for p in range(self.nparticles): 178 | x.append(self.individual_net(xinputs[:,p,:])) 179 | 180 | 181 | x = tf.add_n(x) 182 | x = self.aggregate_net(x) 183 | 184 | # Compute the initial boundary condition, which the network will slowly overcome 185 | # boundary_condition = tf.math.abs(self.normalization_weight * tf.reduce_sum(xinputs**self.normalization_exponent, axis=(1,2)) 186 | boundary_condition = -self.confinement * tf.reduce_sum(xinputs**2, axis=(1,2)) 187 | boundary_condition = tf.reshape(boundary_condition, [-1,1]) 188 | return x + boundary_condition 189 | 190 | def n_parameters(self): 191 | return tf.reduce_sum( [ tf.reduce_prod(p.shape) for p in self.trainable_variables ]) 192 | 193 | def restore_jax(self, model_path): 194 | import pickle 195 | 196 | 197 | with open(model_path, 'rb') as _f: 198 | weights = pickle.load(_f) 199 | 200 | i_this_model = 0 201 | i_jax = 0 202 | 203 | 204 | 205 | for w in weights: 206 | if len(w) == 0: 207 | # This is an activation layer 208 | continue 209 | elif len(w) == 2: 210 | # It's a weight and bias: 211 | target = self.trainable_variables[i_this_model] 212 | target.assign(w[0]) 213 | i_this_model += 1; i_jax += 1 214 | 215 | target = self.trainable_variables[i_this_model] 216 | target.assign(w[1]) 217 | i_this_model += 1; i_jax += 1 218 | else: 219 | # This is probably the FINAL layer: 220 | t = tf.convert_to_tensor(w) 221 | if t.shape == self.trainable_variables[i_this_model].shape: 222 | self.trainable_variables[i_this_model].assign(t) 223 | 224 | return 225 | -------------------------------------------------------------------------------- /mlqm/models/ExponentialBoundaryCondition.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | class ExponentialBoundaryCondition(tf.keras.layers.Layer): 5 | """A simple module for applying an exponential boundary condition in N dimensions 6 | 7 | Note that the exponent is *inside* of the power of 2 in the exponent. 8 | This is to prevent divergence when it is trainable and goes negative. 9 | 10 | Extends: 11 | tf.keras.layers.Layer 12 | """ 13 | 14 | def __init__(self, n : int, nparticles : int, exp : float=0.1, trainable : bool=True): 15 | """Initializer 16 | 17 | Create a new exponentional boundary condition 18 | 19 | Arguments: 20 | n {int} -- Number of dimensions 21 | 22 | Keyword Arguments: 23 | exp {float} -- Starting value of exponents. Must be broadcastable to the number of dimensions (default: {1.0}) 24 | trainable {bool} -- Whether to allow the boundary condition to be trainable (default: {True}) 25 | """ 26 | tf.keras.layers.Layer.__init__(self) 27 | 28 | 29 | 30 | if n < 1: 31 | raise Exception("Dimension must be at least 1 for ExponentialBoundaryCondition") 32 | 33 | 34 | # This is the parameter controlling the shape of the exponent: 35 | self.exponent = tf.Variable(exp, trainable=trainable) 36 | 37 | 38 | 39 | def forward(self, inputs): 40 | 41 | # Reduce over the spatial dimension: 42 | r = tf.sqrt(tf.reduce_sum(inputs**2, dim=[2]) + 1e-8) 43 | # print(r.shape) 44 | exponent_term = tf.abs(self.exponent) * r / 2. 45 | # exponent_term = tf.abs(self.exponent) * r / 2. 46 | # print(exponent_term) 47 | # print(tf.sqrt(exponent_term)) 48 | result = tf.exp(- exponent_term) 49 | # print(result) 50 | return result 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /mlqm/models/GaussianBoundaryCondition.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | class GaussianBoundaryCondition(tf.keras.layers.Layer): 5 | """A simple module for applying an exponential boundary condition in N dimensions 6 | 7 | Note that the exponent is *inside* of the power of 2 in the exponent. 8 | This is to prevent divergence when it is trainable and goes negative. 9 | 10 | Extends: 11 | tf.keras.layers.Layer 12 | """ 13 | 14 | def __init__(self, n : int, exp : float=0.1, trainable : bool=True, dtype = tf.float64): 15 | """Initializer 16 | 17 | Create a new exponentional boundary condition 18 | 19 | Arguments: 20 | n {int} -- Number of dimensions 21 | 22 | Keyword Arguments: 23 | exp {float} -- Starting value of exponents. Must be broadcastable to the number of dimensions (default: {1.0}) 24 | trainable {bool} -- Whether to allow the boundary condition to be trainable (default: {True}) 25 | """ 26 | tf.keras.layers.Layer.__init__(self, dtype=dtype) 27 | 28 | self.mean_subtract = True 29 | 30 | if n < 1: 31 | raise Exception("Dimension must be at least 1 for GaussianBoundaryCondition") 32 | 33 | 34 | # This is the parameter controlling the shape of the exponent: 35 | self.exponent = tf.Variable(exp, trainable=True, dtype=dtype) 36 | self.exponent2 = tf.Variable(0.02, trainable=True, dtype=dtype) 37 | 38 | 39 | @tf.function 40 | def call(self, inputs): 41 | # Mean subtract for all particles: 42 | if self.mean_subtract: 43 | mean = tf.reduce_mean(inputs, axis=1) 44 | xinputs = inputs - mean[:,None,:] 45 | else: 46 | xinputs = inputs 47 | 48 | exponent_term1 = tf.reduce_sum((xinputs)**2, axis=(1,2)) 49 | exponent_term2 = tf.reduce_sum((xinputs)**4, axis=(1,2)) 50 | result = - self.exponent * exponent_term1 - self.exponent2*exponent_term2 51 | 52 | return tf.reshape(result, [-1,1]) 53 | -------------------------------------------------------------------------------- /mlqm/models/HarmonicOscillatorWavefunction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | from .GaussianBoundaryCondition import GaussianBoundaryCondition 5 | 6 | 7 | class HarmonicOscillatorWavefunction(tf.keras.layers.Layer): 8 | """Implememtation of the harmonic oscillator wave funtions 9 | 10 | Create a polynomial, up to `degree` in every dimension `n`, that is the 11 | exact solution to the harmonic oscillator wave function. 12 | 13 | Extends: 14 | tf.keras.layers.Layer 15 | """ 16 | 17 | def __init__(self, n : int, nparticles : int, degree : int, alpha : float, dtype=tf.float64): 18 | """Initializer 19 | 20 | Create a harmonic oscillator wave function 21 | 22 | Arguments: 23 | n {int} -- Dimension of the oscillator (1 <= n <= 3) 24 | nparticles {int} -- Number of particles 25 | degree {int} -- Degree of the solution (broadcastable to n) 26 | alpha {float} -- Alpha parameter (m * omega / hbar) 27 | 28 | Raises: 29 | Exception -- [description] 30 | """ 31 | tf.keras.layers.Layer.__init__(self, dtype=dtype) 32 | 33 | self.n = n 34 | if self.n < 1 or self.n > 3: 35 | raise Exception("Dimension must be 1, 2, or 3 for HarmonicOscillatorWavefunction") 36 | 37 | if nparticles > 1: 38 | raise Exception("HarmonicOscillatorWavefunction is only for 1 particle for testing.") 39 | 40 | # Use numpy to broadcast to the right dimension: 41 | degree = numpy.asarray(degree, dtype=numpy.int32) 42 | degree = numpy.broadcast_to(degree, (self.n,)) 43 | 44 | self.type = dtype 45 | 46 | # Degree of the polynomial: 47 | self.degree = degree 48 | 49 | if numpy.min(self.degree) < 0 or numpy.max(self.degree) > 4: 50 | raise Exception("Only the first 5 hermite polynomials are supported") 51 | 52 | alpha = numpy.asarray(alpha, dtype=numpy.int32) 53 | alpha = numpy.broadcast_to(alpha, (self.n,)) 54 | self.alpha = alpha 55 | 56 | # Normalization: 57 | self.norm = numpy.power(self.alpha / numpy.pi, 0.25) 58 | self.norm = numpy.prod(self.norm) 59 | 60 | 61 | # Craft the polynomial coefficients: 62 | 63 | # Add one to the degree since they start at "0" 64 | # Polynomial is of shape [degree, largest_dimension] 65 | self.polynomial = numpy.zeros(shape=(max(self.degree) + 1, self.n), dtype=numpy.float64) 66 | # Loop over the coefficents and set them: 67 | 68 | # Loop over dimension: 69 | self.polynomial_norm = numpy.zeros(shape=(self.n,), dtype=numpy.float64) 70 | for _n in range(self.n): 71 | # Loop over degree: 72 | _d = self.degree[_n] 73 | if _d == 0: 74 | self.polynomial[0][_n] = 1.0 75 | elif _d == 1: 76 | self.polynomial[1][_n] = 2.0 77 | elif _d == 2: 78 | self.polynomial[0][_n] = -2.0 79 | self.polynomial[2][_n] = 4.0 80 | elif _d == 3: 81 | self.polynomial[1][_n] = -12.0 82 | self.polynomial[3][_n] = 8.0 83 | elif _d == 4: 84 | self.polynomial[0][_n] = 12.0 85 | self.polynomial[2][_n] = -48.0 86 | self.polynomial[4][_n] = 16.0 87 | 88 | # Set the polynomial normalization as a function of the degree 89 | # For each dimension: 90 | self.polynomial_norm[_n] = 1.0 / numpy.sqrt(2**_d * numpy.math.factorial(_d)) 91 | 92 | self.polynomial_norm = tf.convert_to_tensor(self.polynomial_norm, dtype=self.type) 93 | self.polynomial = tf.convert_to_tensor(self.polynomial, dtype=self.type) 94 | 95 | 96 | self.exp = GaussianBoundaryCondition( 97 | n=self.n, exp=numpy.sqrt(self.alpha), trainable=False, dtype=self.type) 98 | 99 | 100 | @tf.function 101 | def call(self, inputs): 102 | 103 | # Slice the inputs to restrict to just one particle: 104 | y = inputs[:,0,:] 105 | 106 | # Create the output tensor with the right shape, plus the constant term: 107 | polynomial_result = tf.zeros(y.shape,dtype=self.type) 108 | 109 | # This is a somewhat basic implementation: 110 | # Loop over degree: 111 | for d in range(max(self.degree) + 1): 112 | # Loop over dimension: 113 | 114 | # This is raising every element in the input to the d power (current degree) 115 | # This gets reduced by summing over all degrees for a fixed dimenions 116 | # Then they are reduced by multiplying over dimensions 117 | poly_term = y**d 118 | 119 | 120 | # Multiply every element (which is the dth power) by the appropriate 121 | # coefficient in it's dimension 122 | res_vec = poly_term * self.polynomial[d] 123 | 124 | # Add this to the result: 125 | polynomial_result += res_vec 126 | 127 | 128 | # Multiply the results across dimensions at every point: 129 | polynomial_result = tf.reduce_prod(polynomial_result, axis=1) 130 | 131 | # Again restrict the BC to just one particle: 132 | boundary_condition = self.exp(inputs)[:,0] 133 | 134 | total_normalization = self.norm * tf.reduce_prod(self.polynomial_norm) 135 | epsilon = 1e-16 136 | # Add epsilon here to prevent underflow 137 | wavefunction = tf.math.log(boundary_condition * polynomial_result * total_normalization + epsilon) 138 | 139 | return tf.reshape(wavefunction, [-1, 1]) 140 | -------------------------------------------------------------------------------- /mlqm/models/NeuralWavefunction.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import tensorflow as tf 3 | 4 | #from .ExponentialBoundaryCondition import ExponentialBoundaryCondition 5 | 6 | class NeuralWavefunction(tf.keras.models.Model): 7 | """Create a neural network eave function in N dimensions 8 | 9 | Boundary condition, if not supplied, is gaussian in every dimension 10 | 11 | Extends: 12 | tf.keras.models.Model 13 | """ 14 | def __init__(self, ndim : int, nparticles: int, boundary_condition :tf.keras.layers.Layer = None): 15 | tf.keras.models.Model.__init__(self) 16 | 17 | self.ndim = ndim 18 | if self.ndim < 2 or self.ndim > 2: 19 | raise Exception("Dimension must be 2 for NeuralWavefunction") 20 | 21 | self.nparticles = nparticles 22 | 23 | # Create a boundary condition if needed: 24 | # if boundary_condition is None: 25 | # self.bc = ExponentialBoundaryCondition(self.ndim) 26 | # else: 27 | # self.bc = boundary_condition 28 | 29 | self.alpha = tf.Variable(0.1, dtype=tf.float64) 30 | self.beta = tf.Variable(0.2, dtype=tf.float64) 31 | self.gamma = tf.Variable(30.0, dtype=tf.float64) 32 | 33 | 34 | def call(self, inputs): 35 | # This is expected to be exactly 2 dimensions. 36 | # shape is [walkers, particles, dim] 37 | 38 | a = inputs[:,:,0] 39 | b = inputs[:,:,1] 40 | c = a * b 41 | 42 | return -(self.alpha * a**2 + self.beta * b**2 + self.gamma * c) 43 | 44 | 45 | 46 | 47 | def n_parameters(self): 48 | return tf.reduce_sum( [ tf.reduce_prod(p.shape) for p in self.trainable_variables ]) 49 | -------------------------------------------------------------------------------- /mlqm/models/PolynomialWavefunction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | from .ExponentialBoundaryCondition import ExponentialBoundaryCondition 5 | 6 | class PolynomialWavefunction(tf.keras.models.Model): 7 | """Implememtation of a Polynomial wave funtion in N dimensions 8 | 9 | Create a polynomial, up to `degree` in every dimension `n`, fittable 10 | for optimization. 11 | 12 | Boundary condition, if not supplied, is gaussian like in every dimension. 13 | 14 | Extends: 15 | tf.keras.models.Model 16 | """ 17 | 18 | def __init__(self, n : int, nparticles : int, degree : int, boundary_condition :tf.keras.layers.Layer = None): 19 | """Initializer 20 | 21 | Create a polynomial wave function with exponential boundary condition 22 | 23 | Arguments: 24 | n {int} -- Dimension of the oscillator (1 <= n <= 3) 25 | nparticles {int} -- number of particles 26 | degree {int} -- Degree of the solution 27 | alpha {float} -- Alpha parameter (m * omega / hbar) 28 | 29 | Raises: 30 | Exception -- [description] 31 | """ 32 | 33 | tf.keras.models.Model.__init__(self) 34 | 35 | self.n = n 36 | if self.n < 1 or self.n > 3: 37 | raise Exception("Dimension must be 1, 2, or 3 for PolynomialWavefunction") 38 | 39 | if nparticles > 1: 40 | raise Exception("Polynomial wavefunction is only supported for one particle") 41 | 42 | # Use numpy to broadcast to the right dimension: 43 | degree = numpy.asarray(degree, dtype=numpy.int32) 44 | degree = numpy.broadcast_to(degree, (self.n,)) 45 | 46 | # Degree of the polynomial: 47 | self.degree = degree 48 | 49 | if numpy.min(self.degree) < 0 or numpy.max(self.degree) > 4: 50 | raise Exception("Degree must be at least 0 in all dimensions") 51 | 52 | # Normalization: 53 | self.norm = 1.0 54 | 55 | 56 | # Craft the polynomial coefficients: 57 | 58 | # Add one to the degree since they start at "0" 59 | # Polynomial is of shape [degree, largest_dimension] 60 | self.polynomial = tf.Variable( 61 | initial_value = tf.random.normal(shape=(max(self.degree) +1 , self.n), dtype=tf.float32), 62 | trainable=True ) 63 | 64 | # if boundary_condition is None: 65 | # self.bc = ExponentialBoundaryCondition(self.n) 66 | # else: 67 | # self.bc = boundary_condition 68 | 69 | 70 | 71 | def call(self, inputs, training=None): 72 | # Restrict to just one particle 73 | y = inputs[:,0,:] 74 | 75 | # Create the output tensor with the right shape, plus the constant term: 76 | polynomial_result = tf.zeros(y.shape) 77 | 78 | # This is a somewhat basic implementation: 79 | # Loop over degree: 80 | for d in range(max(self.degree) + 1): 81 | # Loop over dimension: 82 | 83 | # This is raising every element in the input to the d power (current degree) 84 | # This gets reduced by summing over all degrees for a fixed dimenions 85 | # Then they are reduced by multiplying over dimensions 86 | poly_term = y**d 87 | 88 | # Multiply every element (which is the dth power) by the appropriate 89 | # coefficient in it's dimension 90 | res_vec = poly_term * self.polynomial[d] 91 | 92 | # Add this to the result: 93 | polynomial_result += res_vec 94 | 95 | # Multiply the results across dimensions at every point: 96 | polynomial_result = tf.reduce_prod(polynomial_result, axis=1) 97 | 98 | # boundary_condition = self.bc(y) 99 | 100 | # print(polynomial_result.shape) 101 | # print(boundary_condition.shape) 102 | 103 | return polynomial_result * self.norm 104 | # return boundary_condition * polynomial_result * self.norm 105 | 106 | 107 | def update_normalization(self, inputs, delta): 108 | # Inputs is expected to be a range of parameters along an x axis. 109 | value = self.call(inputs) 110 | 111 | print(value.shape) 112 | N = value ** 2 113 | 114 | 115 | N = tf.reduce_sum(N * delta) 116 | self.norm *= 1/tf.sqrt(N) 117 | 118 | return 119 | -------------------------------------------------------------------------------- /mlqm/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .HarmonicOscillatorWavefunction import HarmonicOscillatorWavefunction 2 | from .PolynomialWavefunction import PolynomialWavefunction 3 | from .NeuralWavefunction import NeuralWavefunction 4 | from .DeepSetsWavefunction import DeepSetsWavefunction 5 | from .GaussianBoundaryCondition import GaussianBoundaryCondition 6 | from .ExponentialBoundaryCondition import ExponentialBoundaryCondition 7 | 8 | -------------------------------------------------------------------------------- /mlqm/models/test_harmonic_oscillator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import tensorflow as tf 4 | import numpy 5 | 6 | from . import HarmonicOscillatorWavefunction 7 | from ..samplers import CartesianSampler 8 | 9 | @pytest.mark.parametrize('dimension', [1,2,3]) 10 | @pytest.mark.parametrize('nparticles', [1,2]) 11 | def test_create_harmonic_oscillator(dimension, nparticles): 12 | 13 | # For each dimension, randomly pick a degree 14 | degree = [ numpy.random.randint(0,4) for d in range(dimension)] 15 | 16 | ho_w = HarmonicOscillatorWavefunction(dimension, nparticles, degree, alpha=1.0) 17 | 18 | assert True 19 | 20 | @pytest.mark.parametrize('dimension', [1, 2, 3]) 21 | @pytest.mark.parametrize('nparticles', [1,2]) 22 | def test_run_harmonic_oscillator(dimension, nparticles): 23 | 24 | # For each dimension, randomly pick a degree 25 | degree = [ numpy.random.randint(0,4) for d in range(dimension)] 26 | ho_w = HarmonicOscillatorWavefunction(dimension, nparticles, degree, alpha=1.0) 27 | 28 | 29 | delta = 0.5 30 | 31 | sampler = CartesianSampler(dimension, delta=delta, mins=-10, maxes=10) 32 | 33 | x = sampler.sample() 34 | 35 | wavefunction = ho_w(x) 36 | 37 | 38 | assert tf.abs(tf.reduce_sum(wavefunction**2) * delta**dimension - 1.0) < 0.01 39 | -------------------------------------------------------------------------------- /mlqm/models/test_nn.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import tensorflow as tf 4 | import numpy 5 | 6 | from . import NeuralWavefunction 7 | from ..samplers import CartesianSampler 8 | 9 | 10 | @pytest.mark.parametrize('dimension', [1,2,3]) 11 | def test_create_polynomial(dimension): 12 | 13 | # For each dimension, randomly pick a degree 14 | 15 | nn_w = NeuralWavefunction(dimension) 16 | 17 | assert True 18 | 19 | 20 | @pytest.mark.parametrize('dimension', [1, 2, 3]) 21 | def test_run_polynomial(dimension): 22 | 23 | # For each dimension, randomly pick a degree 24 | 25 | nn_w = NeuralWavefunction(dimension) 26 | 27 | 28 | delta = 2.0 29 | 30 | sampler = CartesianSampler(dimension, delta=delta, mins=-10, maxes=10) 31 | 32 | x = sampler.sample() 33 | 34 | nn_w.update_normalization(x, delta=delta**dimension) 35 | 36 | wavefunction = nn_w(x) 37 | 38 | 39 | assert tf.abs(tf.reduce_sum(wavefunction**2) * delta**dimension - 1.0) < 0.01 40 | 41 | 42 | 43 | if __name__ == "__main__": 44 | 45 | 46 | test_create_polynomial(dimension = 1, degree=3) -------------------------------------------------------------------------------- /mlqm/models/test_polynomial.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import tensorflow as tf 4 | import numpy 5 | 6 | from . import PolynomialWavefunction 7 | from ..samplers.CartesianSampler import CartesianSampler 8 | 9 | 10 | @pytest.mark.parametrize('dimension', [1,2,3]) 11 | def test_create_polynomial(dimension): 12 | 13 | # For each dimension, randomly pick a degree 14 | degree = [ numpy.random.randint(1,4) for d in range(dimension)] 15 | 16 | poly_w = PolynomialWavefunction(dimension, degree) 17 | 18 | assert True 19 | 20 | 21 | @pytest.mark.parametrize('dimension', [1, 2, 3]) 22 | def test_run_polynomial(dimension): 23 | 24 | # For each dimension, randomly pick a degree 25 | degree = [ numpy.random.randint(0,4) for d in range(dimension)] 26 | poly_w = PolynomialWavefunction(dimension, degree) 27 | 28 | 29 | delta = 0.5 30 | 31 | sampler = CartesianSampler(dimension, delta=delta, mins=-10, maxes=10) 32 | 33 | x = sampler.sample() 34 | 35 | poly_w.update_normalization(x, delta=delta**dimension) 36 | 37 | wavefunction = poly_w(x) 38 | 39 | 40 | assert tf.abs(tf.reduce_sum(wavefunction**2) * delta**dimension - 1.0) < 0.01 41 | -------------------------------------------------------------------------------- /mlqm/optimization/GradientCalculator.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class GradientCalculator(object): 4 | 5 | def __init__(self, dtype=tf.float64): 6 | self.dtype = dtype 7 | 8 | @tf.function 9 | def f_i(self, dpsi_i, energy, dpsi_i_EL): 10 | return dpsi_i * energy - dpsi_i_EL 11 | 12 | @tf.function 13 | def S_ij(self, dpsi_ij, dpsi_i): 14 | return dpsi_ij - dpsi_i * tf.transpose(dpsi_i) 15 | 16 | 17 | # @tf.function 18 | # def par_dist(self, dp_i, S_ij): 19 | # D_ij = S_ij * (dp_i * tf.transpose(dp_i)) 20 | # dist = tf.reduce_sum(D_ij) 21 | # return dist 22 | 23 | @tf.function 24 | def par_dist(self, dp_i, S_ij): 25 | dist = tf.reduce_sum(dp_i*tf.linalg.matmul(S_ij, dp_i)) 26 | return dist 27 | 28 | 29 | @tf.function 30 | def regularize_S_ij(self, S_ij, eps): 31 | dtype = S_ij.dtype 32 | npt = S_ij.shape[0] 33 | S_ij_d = S_ij + eps * tf.eye(npt, dtype=dtype) 34 | return S_ij_d 35 | 36 | 37 | 38 | def cast(self, *args): 39 | 40 | return (tf.cast(a, self.dtype) if a.dtype != self.dtype else a for a in args) 41 | 42 | 43 | # @tf.function 44 | def pd_solve(self, S_ij, eps, f_i): 45 | 46 | 47 | # Regularize along the diagonal: 48 | S_ij_d = self.regularize_S_ij(S_ij, eps) 49 | 50 | # Next, we need S_ij to be positive definite. 51 | U_ij = tf.linalg.cholesky(S_ij_d) 52 | 53 | dp_i = tf.linalg.cholesky_solve(U_ij, f_i) 54 | 55 | return dp_i 56 | -------------------------------------------------------------------------------- /mlqm/optimization/StochasticReconfiguration.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | # Set up logging: 4 | logger = logging.getLogger() 5 | 6 | from omegaconf import DictConfig 7 | 8 | import tensorflow as tf 9 | import numpy 10 | 11 | 12 | from mlqm.hamiltonians import Hamiltonian 13 | from mlqm.optimization import GradientCalculator 14 | from mlqm.samplers import Estimator, MetropolisSampler 15 | 16 | from mlqm import MPI_AVAILABLE, MAX_PARALLEL_ITERATIONS, DEFAULT_TENSOR_TYPE 17 | 18 | 19 | 20 | if MPI_AVAILABLE: 21 | import horovod.tensorflow as hvd 22 | 23 | class StochasticReconfiguration(object): 24 | 25 | def __init__(self, 26 | sampler : MetropolisSampler, 27 | wavefunction : callable, 28 | adaptive_wfn : callable, 29 | hamiltonian : Hamiltonian, 30 | optimizer_config : DictConfig, 31 | sampler_config : DictConfig, 32 | ): 33 | 34 | # Store the objects: 35 | self.sampler = sampler 36 | self.wavefunction = wavefunction 37 | self.hamiltonian = hamiltonian 38 | # Initialize a Gradiant Calculator: 39 | self.gradient_calc = GradientCalculator() 40 | 41 | self.current_delta = None 42 | self.current_eps = None 43 | 44 | self.optimizer_config = optimizer_config 45 | 46 | # Store the measurement configurations: 47 | self.n_observable_measurements = sampler_config.n_observable_measurements 48 | self.n_void_steps = sampler_config.n_void_steps 49 | self.n_walkers_per_observation = sampler_config.n_walkers_per_observation 50 | self.n_concurrent_obs_per_rank = sampler_config.n_concurrent_obs_per_rank 51 | 52 | # MPI Enabled? 53 | if MPI_AVAILABLE: 54 | self.size = hvd.size() 55 | self.rank = hvd.rank() 56 | else: 57 | self.size = 1 58 | self.rank = 1 59 | 60 | self.estimator = Estimator() 61 | 62 | # Use a "disposable" wavefunction too: 63 | self.adaptive_wavefunction = adaptive_wfn 64 | 65 | self.correct_shape = [ p.shape for p in self.wavefunction.trainable_variables ] 66 | 67 | self.predicted_energy = None 68 | 69 | @tf.function 70 | def batched_jacobian(self, nobs, x_current_arr, wavefunction, jac_fnc): 71 | ret_jac = [] 72 | for i in range(nobs): 73 | flattened_jacobian, flat_shape = jac_fnc(x_current_arr[i], wavefunction) 74 | ret_jac.append(flattened_jacobian) 75 | 76 | return ret_jac, flat_shape 77 | 78 | 79 | @tf.function 80 | def jacobian(self, x_current, wavefunction): 81 | tape = tf.GradientTape() 82 | # n_walkers = x_current.shape[0] 83 | 84 | # print("Doing forward pass") 85 | with tape: 86 | log_wpsi = wavefunction(x_current) 87 | 88 | 89 | jac = tape.jacobian(log_wpsi, wavefunction.trainable_variables) 90 | # jac = tape.jacobian(log_wpsi, wavefunction.trainable_variables, parallel_iterations = MAX_PARALLEL_ITERATIONS) 91 | 92 | 93 | # Grab the original shapes ([1:] means everything except first dim): 94 | jac_shape = [j.shape[1:] for j in jac] 95 | # get the flattened shapes: 96 | flat_shape = [[-1, tf.reduce_prod(js)] for js in jac_shape] 97 | # Reshape the 98 | 99 | # We have the flat shapes and now we need to make the jacobian into a single matrix 100 | 101 | flattened_jacobian = [tf.reshape(j, f) for j, f in zip(jac, flat_shape)] 102 | 103 | flattened_jacobian = tf.concat(flattened_jacobian, axis=-1) 104 | 105 | return flattened_jacobian, flat_shape 106 | 107 | 108 | 109 | @tf.function 110 | def compute_O_observables(self, flattened_jacobian, energy): 111 | 112 | # dspi_i is the reduction of the jacobian over all walkers. 113 | # In other words, it's the mean gradient of the parameters with respect to inputs. 114 | # This is effectively the measurement of O^i in the paper. 115 | dpsi_i = tf.reduce_mean(flattened_jacobian, axis=0) 116 | dpsi_i = tf.reshape(dpsi_i, [-1,1]) 117 | 118 | # To compute 119 | dpsi_ij = tf.linalg.matmul(flattened_jacobian, flattened_jacobian, transpose_a = True) / self.n_walkers_per_observation 120 | 121 | # Computing : 122 | dpsi_i_EL = tf.linalg.matmul(tf.reshape(energy, [1,self.n_walkers_per_observation]), flattened_jacobian) 123 | # This makes this the same shape as the other tensors 124 | dpsi_i_EL = tf.reshape(dpsi_i_EL, [-1, 1]) 125 | 126 | return dpsi_i, dpsi_ij, dpsi_i_EL 127 | 128 | 129 | 130 | 131 | @tf.function 132 | def apply_gradients(self, gradients, variables): 133 | 134 | # Update the parameters: 135 | for grad, var in zip(gradients, variables): 136 | var.assign_add(grad) 137 | 138 | return 139 | 140 | 141 | def equilibrate(self, n_equilibrations): 142 | 143 | kicker = tf.random.normal 144 | kicker_params = {"mean": 0.0, "stddev" : 0.4} 145 | 146 | acceptance = self.sampler.kick(self.wavefunction, kicker, kicker_params, nkicks=n_equilibrations) 147 | 148 | return acceptance 149 | 150 | # @tf.function 151 | def compile(self): 152 | # This step gets fresh walkers and computes their energy, which calls compile steps 153 | 154 | kicker = tf.random.normal 155 | kicker_params = {"mean": 0.0, "stddev" : 0.2} 156 | acceptance = self.sampler.kick(self.wavefunction, kicker, kicker_params, nkicks=1) 157 | x_current = self.sampler.sample() 158 | energy, energy_jf, ke_jf, ke_direct, pe, logw_of_x = self.hamiltonian.energy(self.wavefunction, x_current) 159 | 160 | 161 | # 162 | # @tf.function 163 | def recompute_energy(self, test_wavefunction, current_psi, ): 164 | 165 | estimator = Estimator() 166 | estimator.clear() 167 | 168 | for next_x, this_current_psi in zip(self.sampler.get_all_walkers(), current_psi): 169 | 170 | # Compute the observables: 171 | energy, energy_jf, ke_jf, ke_direct, pe, logw_of_x = \ 172 | self.hamiltonian.energy(test_wavefunction, next_x) 173 | 174 | # Here, we split the energy and other objects into sizes of nwalkers_per_observation 175 | # if self.n_concurrent_obs_per_rank != 1: 176 | next_x = tf.split(next_x, self.n_concurrent_obs_per_rank, axis=0) 177 | energy = tf.split(energy, self.n_concurrent_obs_per_rank, axis=0) 178 | logw_of_x = tf.split(logw_of_x, self.n_concurrent_obs_per_rank, axis=0) 179 | 180 | # print("New energy: ", energy) 181 | # print("New psi: ", logw_of_x) 182 | 183 | # overlap of wavefunctions: 184 | wavefunction_ratio = [ tf.math.exp((next_psi - curr_psi)) for next_psi, curr_psi in zip(logw_of_x, this_current_psi) ] 185 | probability_ratio = [ tf.reshape(wf_ratio**2, energy[i].shape) for i, wf_ratio in enumerate(wavefunction_ratio) ] 186 | 187 | # print("wavefunction_ratio: ", wavefunction_ratio) 188 | # print("probability_ratio: ", probability_ratio) 189 | 190 | for i_obs in range(self.n_concurrent_obs_per_rank): 191 | obs_energy = probability_ratio[i_obs] * energy[i_obs] 192 | 193 | # print(tf.reduce_sum(obs_energy)) 194 | # print(tf.reduce_sum(obs_energy) / tf.reduce_sum(probability_ratio[i_obs])) 195 | 196 | estimator.accumulate("energy", tf.reduce_sum(obs_energy)) 197 | estimator.accumulate("weight", tf.reduce_sum(probability_ratio[i_obs])) 198 | estimator.accumulate("wavefunction_ratio", tf.reduce_sum(wavefunction_ratio[i_obs])) 199 | estimator.accumulate("N", tf.convert_to_tensor(self.n_walkers_per_observation, dtype=DEFAULT_TENSOR_TYPE)) 200 | 201 | 202 | if MPI_AVAILABLE: 203 | estimator.allreduce() 204 | 205 | 206 | # What's the total weight? Use that for the finalization: 207 | total_weight = estimator['weight'] 208 | 209 | # Get the overlap 210 | wavefunction_ratio = estimator['wavefunction_ratio'] 211 | probability_ratio = estimator['weight'] 212 | 213 | N = estimator['N'] 214 | 215 | overlap2 = (wavefunction_ratio / N)**2 / (probability_ratio / N) 216 | 217 | 218 | estimator.finalize(total_weight) 219 | overlap = tf.sqrt(overlap2) 220 | acos = tf.math.acos(overlap)**2 221 | 222 | energy = estimator['energy'].numpy() 223 | 224 | return energy, overlap, acos 225 | 226 | def flat_optimizer(self, current_psi, eps, delta): 227 | 228 | dpsi_i = self.estimator['dpsi_i'] 229 | energy = self.estimator["energy"] 230 | dpsi_i_EL = self.estimator["dpsi_i_EL"] 231 | dpsi_ij = self.estimator["dpsi_ij"] 232 | 233 | dp_i, S_ij = self.compute_gradients(dpsi_i, energy, dpsi_i_EL, dpsi_ij, eps) 234 | 235 | dp_i = delta * dp_i 236 | 237 | # Unpack the gradients 238 | gradients = self.unflatten_weights_or_gradients(self.flat_shape, self.correct_shape, dp_i) 239 | 240 | original_weights = self.wavefunction.trainable_variables 241 | 242 | # Even though it's a flat optimization, we recompute the energy to get the overlap too: 243 | loop_items = zip(self.adaptive_wavefunction.trainable_variables, original_weights, gradients) 244 | for weight, original_weight, gradient in loop_items: 245 | weight.assign(original_weight + gradient) 246 | 247 | # Compute the new energy: 248 | next_energy, overlap, acos = self.recompute_energy(self.adaptive_wavefunction, current_psi) 249 | 250 | # Compute the parameter distance: 251 | par_dist = self.gradient_calc.par_dist(dp_i, S_ij) 252 | ratio = tf.abs(par_dist - acos) / tf.abs(par_dist + acos+ 1e-8) 253 | 254 | 255 | delta_metrics = { 256 | "optimizer/delta" : delta, 257 | "optimizer/eps" : eps, 258 | "optimizer/overlap" : overlap, 259 | "optimizer/par_dist": par_dist, 260 | "optimizer/acos" : acos, 261 | "optimizer/ratio" : ratio 262 | } 263 | 264 | return gradients, delta_metrics, next_energy 265 | 266 | # @tf.function 267 | def optimize_eps(self, current_psi, delta): 268 | 269 | f_i = self.gradient_calc.f_i( 270 | self.estimator['dpsi_i'], 271 | self.estimator["energy"], 272 | self.estimator["dpsi_i_EL"] 273 | ) 274 | 275 | S_ij = self.gradient_calc.S_ij( 276 | self.estimator['dpsi_ij'], 277 | self.estimator['dpsi_i'] 278 | ) 279 | 280 | 281 | 282 | # We do a search over eps ranges to compute the optimal value: 283 | 284 | eps_max = tf.constant(self.optimizer_config.epsilon_max, dtype=S_ij.dtype) 285 | eps_min = tf.constant(self.optimizer_config.epsilon_min, dtype=S_ij.dtype) 286 | 287 | 288 | def evaluate(_s_ij, _f_i, current_psi, delta, _eps): 289 | 290 | # First, set metrics to null values: 291 | _metrics = { 292 | "optimizer/delta" : delta, 293 | "optimizer/eps" : _eps, 294 | "optimizer/overlap" : 2.0, 295 | "optimizer/par_dist": 2.0, 296 | "optimizer/acos" : 10, 297 | "optimizer/ratio" : 10, 298 | } 299 | 300 | try: 301 | dp_i = self.gradient_calc.pd_solve(_s_ij, _eps, _f_i) 302 | except tf.errors.InvalidArgumentError: 303 | print("Cholesky solve failed, continuing with higher regularization") 304 | return None, _metrics, 99999 305 | 306 | # print(dp_i) 307 | # Scale by the learning rate: 308 | dp_i = delta * dp_i 309 | 310 | # Unpack the gradients 311 | gradients = self.unflatten_weights_or_gradients(self.flat_shape, self.correct_shape, dp_i) 312 | 313 | 314 | original_weights = self.wavefunction.trainable_variables 315 | 316 | loop_items = zip(self.adaptive_wavefunction.trainable_variables, original_weights, gradients) 317 | for weight, original_weight, gradient in loop_items: 318 | weight.assign(original_weight + gradient) 319 | 320 | 321 | # Compute the new energy: 322 | next_energy, overlap, acos = self.recompute_energy(self.adaptive_wavefunction, current_psi) 323 | 324 | 325 | # Compute the parameter distance: 326 | par_dist = self.gradient_calc.par_dist(dp_i, _s_ij) 327 | ratio = tf.abs(par_dist - acos) / tf.abs(par_dist + acos+ 1e-8) 328 | _metrics = { 329 | "optimizer/delta" : delta, 330 | "optimizer/eps" : _eps, 331 | "optimizer/overlap" : overlap, 332 | "optimizer/par_dist": par_dist, 333 | "optimizer/acos" : acos, 334 | "optimizer/ratio" : ratio 335 | } 336 | return gradients, _metrics, next_energy 337 | 338 | 339 | def metric_check(metrics): 340 | if metrics['optimizer/ratio'] > 0.4: return False 341 | if metrics['optimizer/overlap'] < 0.9: return False 342 | if metrics['optimizer/par_dist'] > 0.1: return False 343 | if metrics['optimizer/acos'] > 0.1: return False 344 | return True 345 | 346 | 347 | # First, evaluate at eps min and eps max: 348 | 349 | # print(eps_min) 350 | # print(eps_max) 351 | grad_low, metrics_low, energy_low = evaluate(S_ij, f_i, current_psi, delta, eps_min) 352 | grad_high, metrics_high, energy_high = evaluate(S_ij, f_i, current_psi, delta, eps_max) 353 | 354 | # print(grad_high) 355 | # print(grad_low) 356 | 357 | # Take the current minimum as the high energy: 358 | if metric_check(metrics_high): 359 | current_minimum_energy = energy_high 360 | current_best_grad = grad_high 361 | current_best_metrics = metrics_high 362 | else: 363 | # If the highest eps metrics failed, we're not gonna succeed here. 364 | grad = [0*w for w in grad_high] 365 | return grad, metrics_high, None 366 | 367 | # We use a bisection section technique, in log space, to narrow down the right epsilon. 368 | converged = False 369 | for i in range(5): 370 | 371 | # And, compute the mid point: 372 | eps_mid = tf.sqrt(eps_max * eps_min) 373 | grad_mid, metrics_mid, energy_mid = evaluate(S_ij, f_i, current_psi, delta, eps_mid) 374 | 375 | 376 | # We have 3 values, high, mid, and low eps. With the same delta, the smallest eps 377 | # is the most aggressive update. The biggest eps is the least aggressive. 378 | # (eps is applied before matrix inversion) 379 | # If all 3 points pass, we narrow in. 380 | 381 | 382 | # If we're here, the most aggressive update passed linear expansion checks. 383 | # Check the mid point anywyas: 384 | if not metric_check(metrics_mid): 385 | logger.debug("Skipping this energy.", metrics_mid) 386 | eps_min = eps_mid 387 | metrics_min = metrics_mid 388 | grad_min = grad_mid 389 | continue 390 | 391 | if energy_mid < current_minimum_energy: 392 | eps_max = eps_mid 393 | grad_max = grad_mid 394 | metrics_max = metrics_mid 395 | energy_max = energy_mid 396 | current_minimum_energy = energy_mid 397 | current_minimum_grad = grad_mid 398 | current_minimum_metrics = metrics_mid 399 | converged = True 400 | else: 401 | eps_min = eps_mid 402 | grad_min = grad_mid 403 | metrics_min = metrics_mid 404 | energy_min = energy_mid 405 | 406 | if not converged: 407 | grad = [0*w for w in grad_high] 408 | logger.debug("No update selected for this step.") 409 | return grad, metrics_high, None 410 | 411 | 412 | return current_minimum_grad, current_minimum_metrics, current_minimum_energy 413 | 414 | 415 | def optimize_delta(self, current_psi, eps): 416 | 417 | # Get the natural gradients and S_ij 418 | f_i = self.gradient_calc.f_i( 419 | self.estimator['dpsi_i'], 420 | self.estimator["energy"], 421 | self.estimator["dpsi_i_EL"] 422 | ) 423 | 424 | S_ij = self.gradient_calc.S_ij( 425 | self.estimator['dpsi_ij'], 426 | self.estimator['dpsi_i'] 427 | ) 428 | 429 | dp_i = None 430 | # Regularize S_ij with as small and eps as possible: 431 | for n in range(5): 432 | try: 433 | dp_i = self.gradient_calc.pd_solve(S_ij, eps, f_i) 434 | except tf.errors.InvalidArgumentError: 435 | print("Cholesky solve failed, continuing with higher regularization") 436 | eps *= 2. 437 | 438 | # No exception? Succeeded, break the loop. 439 | break 440 | 441 | if dp_i is None: 442 | raise Exception("Could not invert S_ij for any epsilon tried.") 443 | 444 | # Get the unscaled gradients: 445 | delta_p = self.unflatten_weights_or_gradients(self.flat_shape, self.correct_shape, dp_i) 446 | # Now iterate over delta values to optimize the step size: 447 | delta_max = tf.constant(self.optimizer_config.delta_max, dtype=S_ij.dtype) 448 | delta_min = tf.constant(self.optimizer_config.delta_min, dtype=S_ij.dtype) 449 | 450 | # take the current energy as the starting miniumum: 451 | energy_min = self.estimator['energy'] + 1 452 | 453 | # Snapshot the current weights: 454 | original_weights = self.wavefunction.trainable_variables 455 | 456 | 457 | # Select the delta options: 458 | 459 | n_delta_iterations = 10 460 | 461 | delta_options = tf.linspace(tf.math.log(delta_max), tf.math.log(delta_min), n_delta_iterations) 462 | delta_options = tf.math.exp(delta_options) 463 | 464 | energies = [] 465 | overlaps = [] 466 | acoses = [] 467 | 468 | for i,this_delta in enumerate(delta_options): 469 | 470 | 471 | # We have the original energy, and gradient updates. 472 | # Apply the updates: 473 | loop_items = zip(self.adaptive_wavefunction.trainable_variables, original_weights, delta_p) 474 | for weight, original_weight, gradient in loop_items: 475 | weight.assign(original_weight + this_delta * gradient) 476 | 477 | # Compute the new energy: 478 | energy, overlap, acos = self.recompute_energy(self.adaptive_wavefunction, current_psi) 479 | energies.append(energy) 480 | overlaps.append(overlap) 481 | acoses.append(acos) 482 | 483 | delta_options = [ d.numpy() for d in delta_options ] 484 | 485 | # print("Energies: ", energies) 486 | # print("Deltas: ", delta_options) 487 | # print("acoses: ", acoses) 488 | # print("overlaps: ", overlaps) 489 | 490 | # We find the best delta, with the constraint that overlap > 0.8 and par_dis < 0.4 491 | found = False 492 | 493 | energy_rms = tf.math.reduce_std(energies) 494 | 495 | 496 | while len(energies) > 0: 497 | # What's the smallest energy? 498 | i_e_min = numpy.argmin(energies) 499 | 500 | par_dist = self.gradient_calc.par_dist(delta_options[i_e_min]*dp_i, S_ij) 501 | 502 | ratio = tf.abs(par_dist - acoses[i_e_min]) / tf.abs(par_dist + acoses[i_e_min]) 503 | 504 | # 505 | # print("i_e_min: ", i_e_min, ", Delta: ", delta_options[i_e_min], ", ratio: ", ratio, ", overlap: ", overlap[i_e_min], ", par_dist: ", par_dist, ", acos: ", acos) 506 | # print(hvd.rank(), " Delta: ", delta_options[i_e_min], ", par_dist: ", par_dist) 507 | # print(hvd.rank(), " Delta: ", delta_options[i_e_min], ", acos: ", acos) 508 | 509 | if par_dist < 0.1 and acoses[i_e_min] < 0.1 and overlaps[i_e_min] > 0.9 and ratio < 0.4: 510 | found = True 511 | final_overlap = overlaps[i_e_min] 512 | next_energy = energies[i_e_min] 513 | break 514 | else: 515 | logger.debug(f"Skipping this energy (acos: {acoses[i_e_min]}, overlap: {overlaps[i_e_min]}, par_dist: {par_dist}, ratio: {ratio})") 516 | 517 | # Remove these options 518 | energies.pop(i_e_min) 519 | overlaps.pop(i_e_min) 520 | acoses.pop(i_e_min) 521 | delta_options.pop(i_e_min) 522 | 523 | final_overlap = 2.0 524 | next_energy = None 525 | 526 | 527 | # print("i_e_min: ", i_e_min) 528 | if found: 529 | best_delta = delta_options[i_e_min] 530 | else: 531 | # Apply no update. Rewalk and recompute. 532 | best_delta = 0.0 533 | ratio = 10.0 534 | acos = 10. 535 | overlap = 2.0 536 | 537 | 538 | gradients = [ best_delta * g for g in delta_p ] 539 | delta_metrics = { 540 | "optimizer/delta" : best_delta, 541 | "optimizer/eps" : eps, 542 | "optimizer/overlap" : final_overlap, 543 | "optimizer/par_dist": par_dist, 544 | "optimizer/acos" : acos, 545 | "optimizer/energy_rms": energy_rms, 546 | "optimizer/ratio" : ratio 547 | } 548 | return gradients, delta_metrics, next_energy 549 | 550 | # @tf.function 551 | def compute_gradients(self, dpsi_i, energy, dpsi_i_EL, dpsi_ij, eps): 552 | 553 | # Get the natural gradients and S_ij 554 | f_i = self.gradient_calc.f_i(dpsi_i, energy, dpsi_i_EL) 555 | 556 | S_ij = self.gradient_calc.S_ij(dpsi_ij, dpsi_i) 557 | 558 | # Regularize S_ij with as small and eps as possible: 559 | 560 | for n in range(5): 561 | try: 562 | dp_i = self.gradient_calc.pd_solve(S_ij, eps, f_i) 563 | break 564 | except tf.errors.InvalidArgumentError: 565 | print("Cholesky solve failed, continuing with higher regularization") 566 | eps *= 2. 567 | continue 568 | 569 | return dp_i, S_ij 570 | 571 | 572 | def walk_and_accumulate_observables(self, 573 | estimator, 574 | _wavefunction, 575 | _sampler, 576 | _n_loops_total, 577 | _kicker, 578 | _kicker_params, 579 | ): 580 | '''Internal function to take a wavefunction and set of walkers and compute observables 581 | 582 | [description] 583 | 584 | Arguments: 585 | _n_loops_total {[type]} -- [description] 586 | _n_concurrent_obs_per_rank {[type]} -- [description] 587 | _wavefunction {[type]} -- [description] 588 | _sampler {Sampler} -- [description] 589 | # Sampler object _kicker {[type]} -- [description] 590 | _kicker_params {[type]} -- [description] 591 | _n_void_steps {[type]} -- [description] 592 | _hamiltonian {[type]} -- [description] 593 | {[type]} -- [description] 594 | ''' 595 | 596 | estimator.clear() 597 | 598 | current_psi = [] 599 | 600 | for i_loop in range(_n_loops_total): 601 | # logger.debug(f" -- evaluating loop {i_loop} of {n_loops_total}") 602 | 603 | # First do a void walk to thermalize after a new configuration. 604 | # By default, this will use the previous walkers as a starting configurations. 605 | # # This one does all the kicks in a compiled function. 606 | 607 | # UNCOMMENT STARTING HERE 608 | acceptance = _sampler.kick(_wavefunction, _kicker, _kicker_params, nkicks=self.n_void_steps) 609 | 610 | 611 | # Get the current walker locations: 612 | x_current = _sampler.sample() 613 | # Compute the observables: 614 | energy, energy_jf, ke_jf, ke_direct, pe, logw_of_x = self.hamiltonian.energy(_wavefunction, x_current) 615 | 616 | 617 | # R is computed but it needs to be WRT the center of mass of all particles 618 | # So, mean subtract if needed: 619 | if _wavefunction.mean_subtract: 620 | mean = tf.reduce_mean(x_current, axis=1) 621 | r = x_current - mean[:,None,:] 622 | else: 623 | r = x_current 624 | 625 | r = tf.reduce_sum(r**2, axis=(1,2)) 626 | r = tf.reduce_mean(tf.math.sqrt(r)) 627 | 628 | # Here, we split the energy and other objects into sizes of nwalkers_per_observation 629 | # if self.n_concurrent_obs_per_rank != 1: 630 | x_current = tf.split(x_current, self.n_concurrent_obs_per_rank, axis=0) 631 | energy = tf.split(energy, self.n_concurrent_obs_per_rank, axis=0) 632 | energy_jf = tf.split(energy_jf, self.n_concurrent_obs_per_rank, axis=0) 633 | ke_jf = tf.split(ke_jf, self.n_concurrent_obs_per_rank, axis=0) 634 | ke_direct = tf.split(ke_direct, self.n_concurrent_obs_per_rank, axis=0) 635 | pe = tf.split(pe, self.n_concurrent_obs_per_rank, axis=0) 636 | logw_of_x = tf.split(logw_of_x, self.n_concurrent_obs_per_rank, axis=0) 637 | 638 | # print("Original logw_of_x: ", logw_of_x) 639 | # print("Original energy: ", energy) 640 | 641 | current_psi.append(logw_of_x) 642 | 643 | 644 | # For each observation, we compute the jacobian. 645 | # flattened_jacobian is a list, flat_shape is just one instance 646 | flattened_jacobian, flat_shape = self.batched_jacobian( 647 | self.n_concurrent_obs_per_rank, x_current, _wavefunction, self.jacobian) 648 | 649 | # Here, if MPI is available, we can do a reduction (sum) over walker variables 650 | 651 | # Now, compute observables, store them in an estimator: 652 | 653 | for i_obs in range(self.n_concurrent_obs_per_rank): 654 | obs_energy = energy[i_obs] / self.n_walkers_per_observation 655 | obs_energy_jf = energy_jf[i_obs] / self.n_walkers_per_observation 656 | obs_ke_jf = ke_jf[i_obs] / self.n_walkers_per_observation 657 | obs_ke_direct = ke_direct[i_obs] / self.n_walkers_per_observation 658 | obs_pe = pe[i_obs] / self.n_walkers_per_observation 659 | 660 | # print("obs_energy: ", obs_energy) 661 | # print("obs_energy_jf: ", obs_energy_jf) 662 | # print("obs_ke_jf: ", obs_ke_jf) 663 | # print("obs_ke_direct: ", obs_ke_direct) 664 | # print("obs_pe: ", obs_pe) 665 | 666 | 667 | dpsi_i, dpsi_ij, dpsi_i_EL = self.compute_O_observables(flattened_jacobian[i_obs], obs_energy) 668 | 669 | # print("dpsi_i: ", dpsi_i) 670 | # print("dpsi_i_EL: ", dpsi_i_EL) 671 | # print("dpsi_ij: ", dpsi_ij) 672 | 673 | # print("flattened_jacobian: ", flattened_jacobian) 674 | 675 | # Accumulate variables: 676 | 677 | self.estimator.accumulate('energy', tf.reduce_sum(obs_energy)) 678 | self.estimator.accumulate('energy2', tf.reduce_sum(obs_energy)**2) 679 | self.estimator.accumulate('energy_jf', tf.reduce_sum(obs_energy_jf)) 680 | self.estimator.accumulate('energy2_jf', tf.reduce_sum(obs_energy_jf)**2) 681 | self.estimator.accumulate('ke_jf', tf.reduce_sum(obs_ke_jf)) 682 | self.estimator.accumulate('ke_direct', tf.reduce_sum(obs_ke_direct)) 683 | self.estimator.accumulate('pe', tf.reduce_sum(obs_pe)) 684 | self.estimator.accumulate('acceptance', acceptance) 685 | self.estimator.accumulate('r', r) 686 | self.estimator.accumulate('dpsi_i', dpsi_i) 687 | self.estimator.accumulate('dpsi_i_EL', dpsi_i_EL) 688 | self.estimator.accumulate('dpsi_ij', dpsi_ij) 689 | self.estimator.accumulate('weight', tf.convert_to_tensor(1., dtype=DEFAULT_TENSOR_TYPE )) 690 | 691 | 692 | # self.estimator.accumulate( 693 | # energy = tf.reduce_sum(obs_energy), 694 | # energy_jf = tf.reduce_sum(obs_energy_jf), 695 | # ke_jf = tf.reduce_sum(obs_ke_jf), 696 | # ke_direct = tf.reduce_sum(obs_ke_direct), 697 | # pe = tf.reduce_sum(obs_pe), 698 | # acceptance = acceptance, 699 | # r = r, 700 | # dpsi_i = dpsi_i, 701 | # dpsi_i_EL = dpsi_i_EL, 702 | # dpsi_ij = dpsi_ij, 703 | # ) 704 | 705 | 706 | 707 | # INTERCEPT HERE with MPI to allreduce the estimator objects. 708 | if MPI_AVAILABLE: 709 | self.estimator.allreduce() 710 | 711 | return flat_shape, current_psi 712 | 713 | 714 | # @tf.function 715 | 716 | def sr_step(self, n_thermalize): 717 | 718 | metrics = {} 719 | self.latest_gradients = None 720 | self.latest_psi = None 721 | 722 | 723 | kicker = tf.random.normal 724 | kicker_params = {"mean": 0.0, "stddev" : 0.2} 725 | 726 | 727 | # We need to know how many times to loop over the walkers and metropolis step. 728 | # The total number of observations is set: self.n_observable_measurements 729 | # There is an optimization to walk in parallel with n_concurrent_obs_per_rank 730 | # Without MPI, the number of loops is then n_observable_measurements / n_concurrent_obs_per_rank 731 | # WITH MPI, we have to reduce the number of loops by the total number of ranks. 732 | 733 | n_loops_total = int(self.n_observable_measurements / self.n_concurrent_obs_per_rank) 734 | 735 | if MPI_AVAILABLE: 736 | n_loops_total = int(n_loops_total / self.size) 737 | # logger.debug(" -- Coordinating loop length") 738 | 739 | # We do a check that n_loops_total * n_concurrent_obs_per_rank matches expectations: 740 | if n_loops_total * self.n_concurrent_obs_per_rank*self.size != self.n_observable_measurements: 741 | exception_str = "Total number of observations to compute is unexpected!\n" 742 | exception_str += f" Expected to have {self.n_observable_measurements}, have:\n" 743 | exception_str += f" -- A loop of {self.n_concurrent_obs_per_rank} observations" 744 | exception_str += f" for {n_loops_total} loops over {self.size} ranks" 745 | exception_str += f" -- ({self.n_concurrent_obs_per_rank})*({n_loops_total}" 746 | exception_str += f")*({self.size}) != {self.n_observable_measurements}\n" 747 | raise Exception(exception_str) 748 | 749 | # We do a thermalization step again: 750 | self.equilibrate(n_thermalize) 751 | 752 | # Clear the walker history: 753 | self.sampler.reset_history() 754 | 755 | # Now, actually apply the loop and compute the observables: 756 | self.flat_shape, current_psi = self.walk_and_accumulate_observables( 757 | self.estimator, 758 | self.wavefunction, 759 | self.sampler, 760 | n_loops_total, 761 | _kicker = kicker, 762 | _kicker_params = kicker_params, 763 | ) 764 | 765 | # At this point, we need to average the observables that feed into the optimizer: 766 | self.estimator.finalize(self.n_observable_measurements) 767 | 768 | 769 | error = tf.sqrt((self.estimator["energy2"] - self.estimator["energy"]**2) \ 770 | / (self.n_observable_measurements-1)) 771 | error_jf = tf.sqrt((self.estimator["energy2_jf"] - self.estimator["energy_jf"]**2) \ 772 | / (self.n_observable_measurements-1)) 773 | 774 | # for key in self.estimator: 775 | # print(f"{key}: {self.estimator[key]}") 776 | 777 | 778 | metrics['energy/energy'] = self.estimator["energy"] 779 | metrics['energy/error'] = error 780 | metrics['energy/energy_jf'] = self.estimator["energy_jf"] 781 | metrics['energy/error_jf'] = error_jf 782 | metrics['metropolis/acceptance'] = self.estimator["acceptance"] 783 | metrics['metropolis/r'] = self.estimator['r'] 784 | metrics['energy/ke_jf'] = self.estimator["ke_jf"] 785 | metrics['energy/ke_direct'] = self.estimator["ke_direct"] 786 | metrics['energy/pe'] = self.estimator["pe"] 787 | 788 | # Here, we call the function to optimize eps and compute the gradients: 789 | 790 | if self.optimizer_config.form == "AdaptiveDelta": 791 | eps = self.optimizer_config.epsilon 792 | delta_p, opt_metrics, next_energy = self.optimize_delta(current_psi, eps) 793 | elif self.optimizer_config.form == "AdaptiveEpsilon": 794 | delta = self.optimizer_config.delta 795 | delta_p, opt_metrics, next_energy = self.optimize_eps(current_psi, delta) 796 | else: 797 | eps = self.optimizer_config.epsilon 798 | delta = self.optimizer_config.delta 799 | delta_p, opt_metrics, next_energy = self.flat_optimizer(current_psi, eps, delta) 800 | 801 | metrics.update(opt_metrics) 802 | 803 | # Compute the ratio of the previous energy and the current energy, if possible. 804 | if self.predicted_energy is not None: 805 | energy_diff = self.predicted_energy - self.estimator['energy'] 806 | else: 807 | energy_diff = 0 808 | 809 | metrics['energy/energy_diff'] = energy_diff 810 | # dp_i, opt_metrics = self.gradient_calc.sr( 811 | # self.estimator["energy"], 812 | # self.estimator["dpsi_i"], 813 | # self.estimator["dpsi_i_EL"], 814 | # self.estimator["dpsi_ij"]) 815 | 816 | 817 | 818 | 819 | # And apply them to the wave function: 820 | self.apply_gradients(delta_p, self.wavefunction.trainable_variables) 821 | self.latest_gradients = delta_p 822 | self.latest_psi = current_psi 823 | 824 | # Before moving on, set the predicted_energy: 825 | self.predicted_energy = next_energy 826 | 827 | 828 | return metrics 829 | 830 | # @tf.function 831 | def unflatten_weights_or_gradients(self, flat_shape, correct_shape, weights_or_gradients): 832 | 833 | running_index = 0 834 | gradient = [] 835 | for length in self.flat_shape: 836 | l = length[-1] 837 | end_index = running_index + l 838 | gradient.append(weights_or_gradients[running_index:end_index]) 839 | running_index += l 840 | delta_p = [ tf.reshape(g, s) for g, s in zip(gradient, correct_shape)] 841 | 842 | return delta_p 843 | -------------------------------------------------------------------------------- /mlqm/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from .GradientCalculator import GradientCalculator 2 | from .StochasticReconfiguration import StochasticReconfiguration 3 | 4 | -------------------------------------------------------------------------------- /mlqm/samplers/Estimator.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | from mlqm import DEFAULT_TENSOR_TYPE, MPI_AVAILABLE 5 | 6 | if MPI_AVAILABLE: 7 | import horovod.tensorflow as hvd 8 | 9 | 10 | class Estimator(dict): 11 | """ Accumulate block and totalk averages and errors 12 | """ 13 | def __init__(self): 14 | dict.__init__(self) 15 | # This class holds accumulated measurements of various tensors and their square. 16 | # It also enables MPI reduction 17 | 18 | 19 | def __setitem__(self, key, value): 20 | # if the value is a tf tensor, set the item as normal 21 | if tf.is_tensor(value): 22 | dict.__setitem__(self, key, value) 23 | else: 24 | raise KeyError(f"Estimator only accepts tf tensors! Received {type(value)}") 25 | 26 | # @tf.function 27 | def accumulate(self, key, value): 28 | if key in self.keys(): 29 | self.__setitem__(key, value + self[key]) 30 | else: 31 | self.__setitem__(key, value) 32 | 33 | # @tf.function 34 | def allreduce(self): 35 | 36 | for key in self.keys(): 37 | self[key] = hvd.allreduce(self[key], op=hvd.Sum, device_dense="GPU") 38 | return 39 | 40 | # def accumulate(self, weight=1, ** kwargs): 41 | # # energy, energy_jf, ke_jf, ke_direct, pe, acceptance,weight,r,dpsi_i,dpsi_i_EL,dpsi_ij,estim_wgt) : 42 | # for key in kwargs: 43 | # self.tensor_dict[key] += kwargs[key] * weight 44 | # if key == "energy" or key == "energy_jf": 45 | # self.tensor_dict[key+"2"] += (kwargs[key]* weight)**2 46 | 47 | # self.tensor_dict['weight'] += weight 48 | 49 | 50 | def finalize(self, weight): 51 | 52 | for key in self.keys(): 53 | if key == 'weight': continue 54 | self[key] /= weight 55 | 56 | return 57 | 58 | # error= tf.sqrt((self.tensor_dict["energy2"] - self.tensor_dict["energy"]**2) / (nav-1)) 59 | # error_jf = tf.sqrt((self.tensor_dict["energy_jf2"] - self.tensor_dict["energy_jf"]**2) / (nav-1)) 60 | # return error, error_jf 61 | -------------------------------------------------------------------------------- /mlqm/samplers/MetropolisSampler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy 3 | 4 | 5 | 6 | class MetropolisSampler(object): 7 | """Metropolis Sampler in N dimension 8 | 9 | Sample from N-D coordinates, using some initial probability distribution 10 | 11 | Relies on functional calls to sample on the fly with flexible distributions 12 | """ 13 | def __init__(self, 14 | n : int, 15 | nwalkers : int, 16 | nparticles : int, 17 | initializer : callable, 18 | init_params : iter , 19 | dtype = tf.float64): 20 | '''Initialize a metropolis sampler 21 | 22 | Create a metropolis walker with `n` walkers. Can use normal, uniform 23 | 24 | Arguments: 25 | n {int} -- Dimension 26 | nwalkers {int} -- Number of unique walkers 27 | initializer {callable} -- Function to call to initialize each walker 28 | init_params {iter} -- Parameters to pass to the initializer, unrolled automatically 29 | ''' 30 | 31 | # Set the dimension: 32 | self.n = n 33 | 34 | # Set the number of walkers: 35 | self.nwalkers = nwalkers 36 | 37 | # Set the number of particles: 38 | self.nparticles = nparticles 39 | 40 | self.size = (self.nwalkers, self.nparticles, self.n) 41 | 42 | self.dtype = dtype 43 | 44 | # Run the initalize to get the first locations: 45 | self.walkers = initializer(shape=self.size, **init_params, dtype=dtype) 46 | self.walker_history = [] 47 | 48 | def sample(self): 49 | '''Just return the current locations 50 | 51 | ''' 52 | # Make sure to wrap in tf.Variable for back prop calculations 53 | # Before returning, append the current walkers to the walker history: 54 | 55 | self.walker_history.append(self.walkers) 56 | return self.walkers 57 | 58 | def get_all_walkers(self): 59 | return self.walker_history 60 | 61 | def reset_history(self): 62 | self.walker_history = [] 63 | 64 | def kick(self, 65 | wavefunction : tf.keras.models.Model, 66 | kicker : callable, 67 | kicker_params : iter, 68 | nkicks : int ): 69 | '''Wrapper for a compiled kick function via tensorflow. 70 | 71 | This fills in the compiled function with the size and the walkers. 72 | 73 | Arguments: 74 | wavefunction {tf.keras.models.Model} -- The wavefunction used for the metropolis walk 75 | kicker {callable} -- A callable function for generating kicks 76 | kicker_params {iter} -- Arguments to the kicker function. 77 | ''' 78 | 79 | 80 | # for i in range(nkicks): 81 | walkers, acceptance = self.internal_kicker( 82 | self.size, self.walkers, wavefunction, kicker, kicker_params, tf.constant(nkicks), dtype=self.dtype) 83 | 84 | # Update the walkers: 85 | self.walkers = walkers 86 | 87 | # Send back the acceptance: 88 | return acceptance 89 | 90 | @tf.function(experimental_compile=False) 91 | # @profile 92 | def internal_kicker(self, 93 | shape, 94 | walkers, 95 | wavefunction : tf.keras.models.Model, 96 | kicker : callable, 97 | kicker_params : iter, 98 | nkicks : tf.constant, 99 | dtype): 100 | """Sample points in N-d Space 101 | 102 | By default, samples points uniformly across all dimensions. 103 | Returns a torch tensor on the chosen device with gradients enabled. 104 | 105 | Keyword Arguments: 106 | kicker {callable} -- Function to call to create a kick for each walker 107 | kicker_params {iter} -- Parameters to pass to the kicker, unrolled automatically 108 | """ 109 | 110 | # Drop the model to reduced precision for this: 111 | # params = wavefunction.parameters() 112 | # print(params) 113 | 114 | # reduced_wf = tf.cast(wavefunction, dtype=self.dtype) 115 | # wavefunction.cast(self.dtype) 116 | 117 | 118 | # We need to compute the wave function twice: 119 | # Once for the original coordiate, and again for the kicked coordinates 120 | acceptance = tf.convert_to_tensor(0.0, dtype=dtype) 121 | # Calculate the current wavefunction value: 122 | current_wavefunction = wavefunction(walkers) 123 | 124 | # Generate a long set of random number from which we will pull: 125 | random_numbers = tf.math.log(tf.random.uniform(shape = [nkicks,shape[0],1], dtype=dtype)) 126 | 127 | # Generate a long list of kicks: 128 | # print(shape) 129 | kicks = kicker(shape=[nkicks, *shape], **kicker_params, dtype=dtype) 130 | # print(kicks.shape) 131 | 132 | # Adding spin: 133 | # A meaningful metropolis move is to pick a pair and exchange the spin 134 | # ONly one pair gets swapped at a time 135 | # Change the isospin of a pair as well. 136 | # The spin coordinate is 2 dimensions per particle: spin and isospin (each up/down) 137 | # 138 | 139 | # Computing modulus square of wavefunction in new vs old coordinates 140 | # - this kicks randomly with a guassian, and has an acceptance probaility 141 | # However, what we can do instead is to add a drift term 142 | # Instead of kicking with a random gaussian, we compute the derivative 143 | # with respect to X. 144 | # Multiply it by sigma^2 145 | # Then, clip the drift so it is not too large. 146 | # New coordinates are the old + gaussian + drift 147 | # Acceptance is ratio of modulus squared wavefunction IF the move is symmetric 148 | # So need to weight the modulus with a drift reweighting term. 149 | 150 | 151 | # Spin typically thermalizes first. 152 | # Fewer spin configurations allowed due to total spin conservation 153 | # 154 | 155 | for i_kick in tf.range(nkicks): 156 | # Create a kick: 157 | kick = kicks[i_kick] 158 | # kick = kicker(shape=shape, **kicker_params, dtype=dtype) 159 | kicked = walkers + kick 160 | 161 | # Compute the values of the wave function, which should be of shape 162 | # [nwalkers, 1] 163 | kicked_wavefunction = wavefunction(kicked) 164 | 165 | 166 | # Probability is the ratio of kicked **2 to original 167 | probability = 2 * (kicked_wavefunction - current_wavefunction) 168 | # Acceptance is whether the probability for that walker is greater than 169 | # a random number between [0, 1). 170 | # Pull the random numbers and create a boolean array 171 | # accept = probability > tf.random.uniform(shape=[shape[0],1]) 172 | accept = probability > random_numbers[i_kick] 173 | # accept = probability > tf.math.log(tf.random.uniform(shape=[shape[0],1])) 174 | 175 | # Grab the kicked wavefunction in the places it is new, to speed up metropolis: 176 | current_wavefunction = tf.where(accept, kicked_wavefunction, current_wavefunction) 177 | 178 | # We need to broadcast accept to match the right shape 179 | # Needs to come out to the shape [nwalkers, nparticles, ndim] 180 | accept = tf.tile(accept, [1,tf.reduce_prod(shape[1:])]) 181 | accept = tf.reshape(accept, shape) 182 | walkers = tf.where(accept, kicked, walkers) 183 | 184 | acceptance = tf.reduce_mean(tf.cast(accept, dtype)) 185 | 186 | return walkers, acceptance 187 | -------------------------------------------------------------------------------- /mlqm/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .Estimator import Estimator 2 | from .MetropolisSampler import MetropolisSampler 3 | -------------------------------------------------------------------------------- /mlqm/samplers/test_metropolis_sampler.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy 3 | import tensorflow as tf 4 | 5 | from .MetropolisSampler import MetropolisSampler 6 | 7 | from ..models import HarmonicOscillatorWavefunction 8 | 9 | 10 | @pytest.mark.parametrize("dimension", [1,2,3]) 11 | @pytest.mark.parametrize("nparticles", [1,2,3]) 12 | def test_create_metropolis_sampler(dimension, nparticles, nwalkers = 10): 13 | 14 | # # Limit delta to 1/10 resolution in tests 15 | # ndim : int, 16 | # nwalkers : int, 17 | # nparticles : int, 18 | # initializer : callable, 19 | # init_params : iter 20 | 21 | sampler = MetropolisSampler(dimension, 22 | nwalkers = nwalkers, 23 | nparticles = nparticles, 24 | initializer = tf.random.normal, 25 | init_params = {"mean": 0.0, "stddev" : 0.2}) 26 | 27 | a = sampler.sample() 28 | 29 | assert True 30 | 31 | 32 | @pytest.mark.parametrize("dimension", [1,]) 33 | @pytest.mark.parametrize("nparticles", [1,]) 34 | def test_kick_metropolis_sampler(dimension, nparticles, nwalkers = 10): 35 | 36 | assert False 37 | 38 | # model = HarmonicOscillatorWavefunction() 39 | 40 | sampler = MetropolisSampler(dimension, 41 | nwalkers = nwalkers, 42 | nparticles = nparticles, 43 | initializer = tf.random.normal, 44 | init_params = {"mean": 0.0, "stddev" : 0.2}) 45 | 46 | kicker = tf.random.normal 47 | kicker_params = {"mean" : 0.0, "stddev" : 0.5} 48 | 49 | sampler.kick() 50 | 51 | a = sampler.sample() 52 | -------------------------------------------------------------------------------- /mlqm/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nuclear-Physics-with-Machine-Learning/MLQM/bee92f3c65b9fa161fb2d55f182419904398a9d3/mlqm/tests/__init__.py -------------------------------------------------------------------------------- /mlqm/tests/hamiltonian/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nuclear-Physics-with-Machine-Learning/MLQM/bee92f3c65b9fa161fb2d55f182419904398a9d3/mlqm/tests/hamiltonian/__init__.py -------------------------------------------------------------------------------- /mlqm/tests/hamiltonian/test_harmonic_oscillator.py: -------------------------------------------------------------------------------- 1 | from mlqm import models, samplers, hamiltonians 2 | 3 | import tensorflow as tf 4 | import numpy 5 | import pytest 6 | 7 | @pytest.mark.parametrize('dimension', [1,2,3]) 8 | @pytest.mark.parametrize('nwalkers', [25,50]) 9 | @pytest.mark.parametrize('degree', [0,1,2]) 10 | @pytest.mark.parametrize('n_thermalize', [100]) 11 | def test_metropolis_walk(dimension, nwalkers, degree, n_thermalize, dtype=tf.float64): 12 | 13 | mass = 1.0 14 | omega = 1.0 15 | 16 | kicker = tf.random.normal 17 | kicker_params = {"mean": 0.0, "stddev" : 0.4} 18 | 19 | # Create a sampler: 20 | sampler = samplers.MetropolisSampler( 21 | n = dimension, 22 | nwalkers = nwalkers, 23 | nparticles = 1, 24 | initializer = kicker, 25 | init_params = kicker_params, 26 | dtype = dtype) 27 | 28 | x = sampler.sample() 29 | 30 | 31 | # Use a Gaussian Wavefunction: 32 | wavefunction = models.HarmonicOscillatorWavefunction( 33 | n = dimension, 34 | nparticles=1, 35 | degree = degree, 36 | alpha = omega, 37 | dtype=dtype) 38 | 39 | # Initialize the wavefunction: 40 | wavefunction.call(x) 41 | 42 | 43 | 44 | # Run void steps to thermalize the walkers to this wavefunction: 45 | acceptance = sampler.kick(wavefunction, kicker, kicker_params, nkicks=n_thermalize) 46 | 47 | # Get the new parameters 48 | x = sampler.sample() 49 | 50 | 51 | 52 | hamiltonian = hamiltonians.HarmonicOscillator(mass=tf.constant(mass), omega=tf.constant(omega)) 53 | 54 | 55 | energy, energy_jf, ke_jf, ke_direct, pe = hamiltonian.energy(wavefunction, x) 56 | 57 | mean_energy = tf.reduce_mean(energy) 58 | theory_energy = ( omega) * (degree + 0.5) * (dimension) 59 | assert(numpy.abs(mean_energy - theory_energy ) < 0.01) 60 | 61 | # # Here, the goal is to see if we've sampled properly. 62 | # for i_particle in range(nparticles): 63 | # positions = x[:,i_particle,:] 64 | # print(positions.shape) 65 | # for i_dim in range(dimension): 66 | # print(tf.reduce_mean(positions[:,i_dim])) 67 | 68 | def main(): 69 | dimension = 2 70 | nwalkers = 25 71 | degree = 0 72 | n_thermalize = 100 73 | test_metropolis_walk(dimension, nwalkers, degree, n_thermalize, dtype=tf.float64) 74 | 75 | 76 | if __name__ == '__main__': 77 | main() 78 | -------------------------------------------------------------------------------- /mlqm/tests/sampler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nuclear-Physics-with-Machine-Learning/MLQM/bee92f3c65b9fa161fb2d55f182419904398a9d3/mlqm/tests/sampler/__init__.py -------------------------------------------------------------------------------- /mlqm/tests/sampler/test_metropolis_walk.py: -------------------------------------------------------------------------------- 1 | from mlqm import models, samplers 2 | import numpy 3 | import math 4 | 5 | import tensorflow as tf 6 | import pytest 7 | 8 | @pytest.mark.parametrize('dimension', [1]) 9 | @pytest.mark.parametrize('nwalkers', [500,]) 10 | @pytest.mark.parametrize('nparticles', [1]) 11 | @pytest.mark.parametrize('n_thermalize', [500]) 12 | def test_metropolis_walk(dimension, nwalkers, nparticles, n_thermalize): 13 | 14 | 15 | alpha = 0.9 16 | 17 | kicker = tf.random.normal 18 | kicker_params = {"mean": 0.0, "stddev" : 0.4} 19 | 20 | # Create a sampler: 21 | sampler = samplers.MetropolisSampler( 22 | n = dimension, 23 | nwalkers = nwalkers, 24 | nparticles = nparticles, 25 | initializer = kicker, 26 | init_params = kicker_params, 27 | dtype = tf.float64) 28 | 29 | 30 | # Use a Gaussian Wavefunction: 31 | wavefunction = models.GaussianBoundaryCondition(n = dimension, exp=math.sqrt(alpha), trainable=False, dtype=tf.float64) 32 | 33 | x = sampler.sample() 34 | 35 | # Run void steps to thermalize the walkers to this wavefunction: 36 | acceptance = sampler.kick(wavefunction, kicker, kicker_params, nkicks=n_thermalize) 37 | x = sampler.sample() 38 | 39 | 40 | y = wavefunction(x) 41 | integral = tf.reduce_mean(y) 42 | 43 | # # For a gaussian wave function, it is expected the integral (average of y values) 44 | # # Should converge to approximately log(sqrt(pi/alpha)), where alpha is the exponential parameter. 45 | 46 | # # The convergence error should be approximately sqrt(n_walkers), statistical uncertainty. 47 | 48 | # uncert = 1./ math.sqrt(nwalkers) 49 | # print(uncert) 50 | 51 | # analytic_integral = math.log(math.pow(math.pi / (alpha), dimension * 0.5)) 52 | 53 | # print("analytic_integral: ", analytic_integral) 54 | # print("integral: ", integral) 55 | 56 | # We ought to be able to see that the sampled positions are, in all dimensions, roughly following 57 | # the gaussian. 58 | 59 | x = x.numpy() 60 | sigma = math.pow(1./ (4*alpha), 0.5) 61 | 62 | # Here, the goal is to see if we've sampled properly. 63 | for i_particle in range(nparticles): 64 | positions = x[:,i_particle,:] 65 | print(positions.shape) 66 | for i_dim in range(dimension): 67 | where_up = positions[:,i_dim] < sigma 68 | where_down = positions[:,i_dim] > -sigma 69 | 70 | all_where = numpy.logical_and(where_up, where_down) 71 | 72 | frac = numpy.sum(all_where.astype(numpy.float32)) / nwalkers 73 | print(frac) 74 | 75 | 76 | assert(False) --------------------------------------------------------------------------------