├── .gitignore ├── 00_Example ├── example.ipynb └── presentation.pdf ├── 01_Introduction ├── Introduction.ipynb ├── einsum.ipynb └── einsum_solutions.ipynb ├── 02_SCF ├── advanced │ ├── diis_psi4.ipynb │ ├── diis_psi4_solution.ipynb │ ├── diis_pyscf.ipynb │ ├── diis_pyscf_solution.ipynb │ ├── uhf_diis_psi4.ipynb │ ├── uhf_diis_pyscf.ipynb │ ├── uhf_psi4.ipynb │ └── uhf_pyscf.ipynb ├── basics │ ├── scf_psi4.ipynb │ ├── scf_psi4_solution.ipynb │ ├── scf_pyscf.ipynb │ └── scf_pyscf_solution.ipynb └── scf_slides.pdf ├── 03_MP2 ├── basics │ ├── mp2_psi4.ipynb │ ├── mp2_psi4_solution.ipynb │ ├── mp2_pyscf.ipynb │ └── mp2_pyscf_solution.ipynb └── mp2_slides.pdf ├── 04_Machine_Learning ├── advanced │ └── bayesopt_boston.ipynb ├── basics │ ├── coulomb_matrix.ipynb │ ├── coulomb_matrix_solutions.ipynb │ ├── methane.xyz │ ├── ml_boston.ipynb │ └── ml_boston_solutions.ipynb └── ml_slides.pdf ├── 05_MolecularDynamics ├── README.txt ├── enhanced_sampling │ ├── EnhancedSampling.pdf │ └── metadynamics.pdf ├── pdb │ ├── 2mwy.pdb │ ├── 3rgf.pdb │ ├── cdk8.pdb │ └── mdm2.pdb ├── template │ ├── images │ │ ├── divx2pass.log.mbtree │ │ ├── encode.sh │ │ ├── gfp.mov │ │ └── render.sh │ ├── parameter.file.01 │ ├── run.sh │ ├── setup │ │ ├── NOTES.txt │ │ ├── protein.inp │ │ └── run_prep.sh │ ├── sim.inp │ └── sim.inp.notes ├── tools │ ├── calc_rmsd.py │ └── rmsd.py └── trajectories │ ├── .p53.xtc_offsets.npz │ ├── cdk8.xtc │ ├── cdk8_folded.pdb │ ├── mdm2.xtc │ └── mdm2_folded.pdb ├── 06_Basis_Sets └── presentation │ ├── beyond_lcao.pdf │ ├── images │ └── fourier_tranform.gif │ └── lcao_basis_sets.pdf ├── A1_Git └── git_intro.pdf ├── AUTHORS.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md └── environment.yml /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # OS generated files 107 | .DS_Store 108 | 109 | #psi4numpy timer for jobs 110 | timer.dat 111 | -------------------------------------------------------------------------------- /00_Example/example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Example Notebook" 12 | ] 13 | } 14 | ], 15 | "metadata": { 16 | "kernelspec": { 17 | "display_name": "Python 3", 18 | "language": "python", 19 | "name": "python3" 20 | }, 21 | "language_info": { 22 | "codemirror_mode": { 23 | "name": "ipython", 24 | "version": 3 25 | }, 26 | "file_extension": ".py", 27 | "mimetype": "text/x-python", 28 | "name": "python", 29 | "nbconvert_exporter": "python", 30 | "pygments_lexer": "ipython3", 31 | "version": "3.6.5" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 2 36 | } 37 | -------------------------------------------------------------------------------- /00_Example/presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/00_Example/presentation.pdf -------------------------------------------------------------------------------- /01_Introduction/Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Introduction Notebook\n", 12 | "\n", 13 | "Here we will cover common python libraries.\n", 14 | "\n", 15 | "1. [Numpy](#numpy) \n", 16 | "\n", 17 | "2. [Scipy](#scipy) \n", 18 | "\n", 19 | "3. [Matplotlib](#matplotlib) \n", 20 | "\n", 21 | "4. [PySCF](#pyscf)\n", 22 | "\n", 23 | "5. [Psi4](#psi4)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Extra Practice\n", 31 | "For a more hands-on introduction notebook, check out the notebook at [this link](https://github.com/amandadumi/numerical_methods_release). Click the 'launch binder' badge. This will take you to a web-hosted Jupyter notebook set on Binder. Navigate to `IPython_notebooks/01_Introduction` and click on the 01_Introduction.ipynb to launch it in the browser. You are also welcome to clone the repository and run the notebook locally." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": { 37 | "slideshow": { 38 | "slide_type": "slide" 39 | } 40 | }, 41 | "source": [ 42 | "\n", 43 | "## Numpy\n", 44 | "Fundamental package for scientific computing with Python" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "subslide" 53 | } 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import numpy as np\n", 58 | "\n", 59 | "a = np.array((4, 5, 6, 6, 7, 8))\n", 60 | "b = np.array((8, 9, 2, 4, 6, 7))\n", 61 | "\n", 62 | "c = np.dot(a, b)\n", 63 | "print(c)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": { 69 | "slideshow": { 70 | "slide_type": "slide" 71 | } 72 | }, 73 | "source": [ 74 | "\n", 75 | "## Scipy\n", 76 | "\n", 77 | "Provides many user-friendly and efficient numerical routines such as routines for numerical integration and optimization" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "scrolled": true, 85 | "slideshow": { 86 | "slide_type": "subslide" 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "import scipy as sp\n", 92 | "import scipy.linalg as la\n", 93 | "\n", 94 | "mat = np.random.rand(5, 5)\n", 95 | "eig_val, eig_vec = la.eig(mat)\n", 96 | "\n", 97 | "print('eigenvalues:\\n {}\\n'.format(eig_val))\n", 98 | "print('eigenvectors:\\n {}'.format(eig_vec))" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "collapsed": true, 105 | "slideshow": { 106 | "slide_type": "slide" 107 | } 108 | }, 109 | "source": [ 110 | "## Matplotlib\n", 111 | "\n", 112 | "Python library for 2- and 3-D visualization.\n", 113 | "\n", 114 | "Pyplot provides convenient functions to generate plots." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "slideshow": { 122 | "slide_type": "slide" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "import matplotlib.pyplot as plt\n", 128 | "\n", 129 | "x = np.linspace(0, 5, 100)\n", 130 | "y = np.sin(x)\n", 131 | "plt.plot(x, y)\n", 132 | "plt.show()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "slideshow": { 146 | "slide_type": "slide" 147 | } 148 | }, 149 | "source": [ 150 | "## Psi4Numpy\n", 151 | "\n", 152 | "Psi4 is an open source quantum chemistry package.\n", 153 | "\n", 154 | "Recently introduced [Psi4Numpy](https://github.com/psi4/psi4numpy), a collections of notebooks for teaching quantum chemistry. \n", 155 | "\n" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": { 161 | "slideshow": { 162 | "slide_type": "subslide" 163 | } 164 | }, 165 | "source": [ 166 | "The cell below runs an SCF cyle for water with the cc-pvdz basis using Psi4Numpy\n" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "slideshow": { 174 | "slide_type": "subslide" 175 | } 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "import psi4\n", 180 | "\n", 181 | "# read in geometry for water\n", 182 | "h2o = psi4.geometry(\"\"\"\n", 183 | "O 0.0000000 0.0000000 0.0000000\n", 184 | "H 0.7569685 0.0000000 -0.5858752\n", 185 | "H -0.7569685 0.0000000 -0.5858752\n", 186 | "\"\"\")\n", 187 | "\n", 188 | "# set basis set\n", 189 | "psi4.set_options({'basis': 'cc-pvdz'})\n", 190 | "\n", 191 | "# run an scf calculation\n", 192 | "scf_e, scf_wfn = psi4.energy('scf', return_wfn=True)\n", 193 | "print('converged SCF energy: {}'.format(scf_e))" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "slideshow": { 200 | "slide_type": "slide" 201 | } 202 | }, 203 | "source": [ 204 | "## PySCF\n", 205 | "\n", 206 | "Python-based quantum simulations" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "slideshow": { 213 | "slide_type": "slide" 214 | } 215 | }, 216 | "source": [ 217 | "The cell below runs an SCF cycle for water with the cc-pvdz basis using PySCF" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "slideshow": { 225 | "slide_type": "subslide" 226 | } 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "from pyscf import gto, scf\n", 231 | "\n", 232 | "# read in geometry\n", 233 | "mol = gto.M(atom='O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752')\n", 234 | "mol.basis = 'ccpvdz'\n", 235 | "# run an scf calculation\n", 236 | "mol_scf = scf.RHF(mol)\n", 237 | "mol_scf.kernel()" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [] 246 | } 247 | ], 248 | "metadata": { 249 | "celltoolbar": "Slideshow", 250 | "kernelspec": { 251 | "display_name": "Python 3", 252 | "language": "python", 253 | "name": "python3" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.6.5" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 2 270 | } 271 | -------------------------------------------------------------------------------- /01_Introduction/einsum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Einstein Notation and np.einsum\n", 8 | "\n", 9 | "## Useful Resources\n", 10 | "- [NumPy einsum](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html)\n", 11 | "- [A basic introduction to NumPy's einsum](http://ajcr.net/Basic-guide-to-einsum/)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## What is Einstein notation?\n", 28 | "Einstein notation is a notational convention that simplifies expressions containing vectors, matrices, or tensors.\n", 29 | "\n", 30 | "\"I have made a great discovery in mathematics; I have suppressed the summation sign every time that the summation must be made over an index which occurs twice...\" ~ Einstein (Kollros 1956; Pais 1982, p. 216).\n", 31 | "\n", 32 | "### Vector Example\n", 33 | "Let's have two three dimensional vectors $\\textbf{A}$ and $\\textbf{B}$:\n", 34 | "$$\\textbf{A} = A_x \\hat{x} + A_y \\hat{y} + A_z \\hat{z}$$\n", 35 | "$$\\textbf{B} = B_x \\hat{x} + B_y \\hat{y} + B_z \\hat{z}$$\n", 36 | "\n", 37 | "If we wanted to do the dot product of $\\textbf{A}$ and $\\textbf{B}$ we would have:\n", 38 | "$$\\textbf{A}\\cdot \\textbf{B} = A_x B_x + A_y B_y + A_z B_z$$\n", 39 | "\n", 40 | "This gives us a scalar that is the sum of the products:\n", 41 | "$$\\textbf{A}\\cdot \\textbf{B} = \\sum_{i=1}^{N} A_i B_i \\quad \\textrm{where} \\quad N = 3$$" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "A = np.array([1, 2, 3])\n", 51 | "B = np.array([4, 5, 6])" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "np.sum(np.multiply(A, B))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "np.einsum('i,i->', A, B)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "### Let's look at a 3x3 example" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "C = np.random.rand(3, 3)\n", 86 | "D = np.random.rand(3, 3)\n", 87 | "\n", 88 | "print(C)\n", 89 | "print('\\n')\n", 90 | "print(D)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "np.sum(np.multiply(C, D)) " 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "%%timeit\n", 109 | "np.sum(np.multiply(C, D)) " 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "np.einsum('ij,ij->', C, D)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "scrolled": true 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "%%timeit\n", 130 | "np.einsum('ij,ij->', C, D)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## What can Einstein notation do?\n", 138 | "\n", 139 | "| String | np equiv. | Description|\n", 140 | "|-|-|-|\n", 141 | "| 'ij', C | C | returns C |\n", 142 | "| 'ji', C | C.T | transpose of C |\n", 143 | "| 'ii->i', C | np.diag(C) | returns diagonal |\n", 144 | "| 'ii', C | np.trace(C) | returns trace |\n", 145 | "| 'ij->', C | np.sum(C) | sum of C |\n", 146 | "| 'ij->j', C | np.sum(C, axis=0) | sum down columns of C |\n", 147 | "| 'ij,ij->ij', C, D | C * D | element-wise multiplication of C and D |\n", 148 | "| 'ij,jk', C, D | C.dot(D) | matrix multiplication of C and D |\n", 149 | "\n", 150 | "[For more](http://ajcr.net/Basic-guide-to-einsum/)\n", 151 | "\n", 152 | "## Try your hand at Einstein notation\n", 153 | "- sum along rows of C\n", 154 | "- C * D.T\n", 155 | "- inner product of C and D" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "#### Sum along rows of C" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "#### C * D.T" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "#### Inner product of C and D" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Dot Product\n", 205 | "Time 4 different ways a dot product can be performed" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "%%timeit\n" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "%%timeit\n" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "%%timeit\n" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "%%timeit\n" 242 | ] 243 | } 244 | ], 245 | "metadata": { 246 | "kernelspec": { 247 | "display_name": "Python 3", 248 | "language": "python", 249 | "name": "python3" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": { 253 | "name": "ipython", 254 | "version": 3 255 | }, 256 | "file_extension": ".py", 257 | "mimetype": "text/x-python", 258 | "name": "python", 259 | "nbconvert_exporter": "python", 260 | "pygments_lexer": "ipython3", 261 | "version": "3.6.6" 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 2 266 | } 267 | -------------------------------------------------------------------------------- /01_Introduction/einsum_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Einstein Notation and np.einsum\n", 8 | "\n", 9 | "## Useful Resources\n", 10 | "- [NumPy einsum](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html)\n", 11 | "- [A basic introduction to NumPy's einsum](http://ajcr.net/Basic-guide-to-einsum/)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## What is Einstein notation?\n", 28 | "Einstein notation is a notational convention that simplifies expressions containing vectors, matrices, or tensors.\n", 29 | "\n", 30 | "\"I have made a great discovery in mathematics; I have suppressed the summation sign every time that the summation must be made over an index which occurs twice...\" ~ Einstein (Kollros 1956; Pais 1982, p. 216).\n", 31 | "\n", 32 | "### Vector Example\n", 33 | "Let's have two three dimensional vectors $\\textbf{A}$ and $\\textbf{B}$:\n", 34 | "$$\\textbf{A} = A_x \\hat{x} + A_y \\hat{y} + A_z \\hat{z}$$\n", 35 | "$$\\textbf{B} = B_x \\hat{x} + B_y \\hat{y} + B_z \\hat{z}$$\n", 36 | "\n", 37 | "If we wanted to do the dot product of $\\textbf{A}$ and $\\textbf{B}$ we would have:\n", 38 | "$$\\textbf{A}\\cdot \\textbf{B} = A_x B_x + A_y B_y + A_z B_z$$\n", 39 | "\n", 40 | "This gives us a scalar that is the sum of the products:\n", 41 | "$$\\textbf{A}\\cdot \\textbf{B} = \\sum_{i=1}^{N} A_i B_i \\quad \\textrm{where} \\quad N = 3$$" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "A = np.array([1, 2, 3])\n", 51 | "B = np.array([4, 5, 6])" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "32" 63 | ] 64 | }, 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "np.sum(np.multiply(A, B))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "32" 83 | ] 84 | }, 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "np.einsum('i,i->', A, B)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Let's look at a 3x3 example" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "[[0.44892352 0.23946046 0.3720617 ]\n", 111 | " [0.29549156 0.0521187 0.5270508 ]\n", 112 | " [0.27981144 0.73042791 0.9594615 ]]\n", 113 | "\n", 114 | "\n", 115 | "[[0.16955709 0.30609099 0.08329563]\n", 116 | " [0.0690296 0.76714205 0.89986847]\n", 117 | " [0.15000005 0.35051006 0.47822482]]\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "C = np.random.rand(3, 3)\n", 123 | "D = np.random.rand(3, 3)\n", 124 | "\n", 125 | "print(C)\n", 126 | "print('\\n')\n", 127 | "print(D)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "1.4718948471833324" 139 | ] 140 | }, 141 | "execution_count": 6, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "np.sum(np.multiply(C, D)) " 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 7, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "3.77 µs ± 90.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "%%timeit\n", 165 | "np.sum(np.multiply(C, D)) " 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 8, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "1.4718948471833326" 177 | ] 178 | }, 179 | "execution_count": 8, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "np.einsum('ij,ij->', C, D)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 9, 191 | "metadata": { 192 | "scrolled": true 193 | }, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "1.93 µs ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "%%timeit\n", 205 | "np.einsum('ij,ij->', C, D)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "## What can Einstein notation do?\n", 213 | "\n", 214 | "| String | np equiv. | Description|\n", 215 | "|-|-|-|\n", 216 | "| 'ij', C | C | returns C |\n", 217 | "| 'ji', C | C.T | transpose of C |\n", 218 | "| 'ii->i', C | np.diag(C) | returns diagonal |\n", 219 | "| 'ii', C | np.trace(C) | returns trace |\n", 220 | "| 'ij->', C | np.sum(C) | sum of C |\n", 221 | "| 'ij->j', C | np.sum(C, axis=0) | sum down columns of C |\n", 222 | "| 'ij,ij->ij', C, D | C * D | element-wise multiplication of C and D |\n", 223 | "| 'ij,jk', C, D | C.dot(D) | matrix multiplication of C and D |\n", 224 | "\n", 225 | "[For more](http://ajcr.net/Basic-guide-to-einsum/)\n", 226 | "\n", 227 | "## Try your hand at Einstein notation\n", 228 | "- sum along rows of C\n", 229 | "- C * D.T\n", 230 | "- inner product of C and D" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "#### Sum along rows of C" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 10, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "array([1.06044568, 0.87466107, 1.96970085])" 249 | ] 250 | }, 251 | "execution_count": 10, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "np.einsum('ij->i', C)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "#### C * D.T" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 11, 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/plain": [ 275 | "array([[0.07611817, 0.01652986, 0.05580928],\n", 276 | " [0.09044731, 0.03998245, 0.18473661],\n", 277 | " [0.02330707, 0.65728904, 0.4588383 ]])" 278 | ] 279 | }, 280 | "execution_count": 11, 281 | "metadata": {}, 282 | "output_type": "execute_result" 283 | } 284 | ], 285 | "source": [ 286 | "np.einsum('ij,ji->ij', C, D)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "#### Inner product of C and D" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 12, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "array([[0.18040597, 0.5494958 , 0.32920099],\n", 305 | " [0.10995678, 0.53465651, 0.31464066],\n", 306 | " [0.35094037, 1.44304639, 0.75683236]])" 307 | ] 308 | }, 309 | "execution_count": 12, 310 | "metadata": {}, 311 | "output_type": "execute_result" 312 | } 313 | ], 314 | "source": [ 315 | "np.einsum('ij,kj->ik', C, D)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "### Dot Product\n", 323 | "Time 4 different ways a dot product can be performed" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 13, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "865 ns ± 2.93 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "%%timeit\n", 341 | "C @ D" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 14, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "917 ns ± 4.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 354 | ] 355 | } 356 | ], 357 | "source": [ 358 | "%%timeit\n", 359 | "np.dot(C, D)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 15, 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "name": "stdout", 369 | "output_type": "stream", 370 | "text": [ 371 | "897 ns ± 5.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 372 | ] 373 | } 374 | ], 375 | "source": [ 376 | "%%timeit\n", 377 | "C.dot(D)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 16, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "1.95 µs ± 8.14 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "%%timeit\n", 395 | "np.einsum('ij,jk', C, D)" 396 | ] 397 | } 398 | ], 399 | "metadata": { 400 | "kernelspec": { 401 | "display_name": "Python 3", 402 | "language": "python", 403 | "name": "python3" 404 | }, 405 | "language_info": { 406 | "codemirror_mode": { 407 | "name": "ipython", 408 | "version": 3 409 | }, 410 | "file_extension": ".py", 411 | "mimetype": "text/x-python", 412 | "name": "python", 413 | "nbconvert_exporter": "python", 414 | "pygments_lexer": "ipython3", 415 | "version": "3.6.6" 416 | } 417 | }, 418 | "nbformat": 4, 419 | "nbformat_minor": 2 420 | } 421 | -------------------------------------------------------------------------------- /02_SCF/advanced/diis_psi4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Direct Inversion of Iterative Subspace" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import scipy.linalg as spla\n", 18 | "import psi4\n", 19 | "import time" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Useful Resources\n", 27 | "- [P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)](https://www.sciencedirect.com/science/article/pii/0009261480803964)\n", 28 | "- [DIIS by C. David Sherril](http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf)\n", 29 | "- [DePrince Research Group DIIS Tutorial](https://www.chem.fsu.edu/~deprince/programming_projects/diis/)\n", 30 | "- [Psi4Numpy DIIS Tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/03_Hartree-Fock/3b_rhf-diis.ipynb)\n", 31 | "- [DIIS by MolSSI-Education](https://github.com/MolSSI-Education/QM_2017_SSS_Team8/blob/master/Tutorial_PDFs/02_SCF_DIIS.pdf)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Introduction\n", 39 | "Iterative methods are usually used in order to solve systems of linear equations. These methods can suffer from numerous convergence issues such as slow convergence and high computational cost. Today we are going to work with DIIS to accelerate our convergence. DIIS stands for Direct Inversion of Iterative Subspace and is commonly used to aid in the convergence of SCF wavefunctions. Today we will build off of our previous example of a simple RHF.\n", 40 | "\n", 41 | "## General Theory\n", 42 | "During the iterative solution we generate a set of trial vectors $p^{i}$ that are converging to the true solution $p^{f}$. This allows for us to form a set of residual vectors\n", 43 | "$$\n", 44 | "\\Delta \\mathbf{p} = \\mathbf{p}^{i+1} - \\mathbf{p}^{i}\n", 45 | "$$\n", 46 | "\n", 47 | "DIIS assumes that the true solution can be approximated as a linear combination of the previous trial vector guesses, \n", 48 | "$$\\mathbf{p} = \\sum_{i} c_{i} \\mathbf{p}^{i}$$\n", 49 | "\n", 50 | "\n", 51 | "The coefficients $c_{i}$ can be obtained by requiring the residual vector to be a least-squares approximate to the zero vector \n", 52 | "\n", 53 | "$$\\Delta \\mathbf{p} = \\sum_{i} c_{i} \\Delta \\mathbf{p}^{i}$$\n", 54 | "\n", 55 | "\n", 56 | "constrained by,\n", 57 | "\n", 58 | "$$\\sum_{i} c_{i} =1$$\n", 59 | "\n", 60 | "\n", 61 | "This allows for us to to represent each trial function $p^{i}$ as the true solution plus an error vector. \n", 62 | "$$\\mathbf{p} = \\sum_{i} c_{i} (\\mathbf{p}^{f} + \\mathbf{e}^{i}) = \\mathbf{p}^{f} \\sum_{i} c_{i} + \\sum_{i} c_{i} \\mathbf{e}^{i}$$\n", 63 | "\n", 64 | "Convergence will result in minimizing the error which in turn causes the second term above to vanish. For our DIIS solution $\\mathbf{p}$ to be equal to the true solution $\\mathbf{p}^{f}$, we must have $\\sum_{i} c_{i} =1$.\n", 65 | "\n", 66 | "Need to minimize the norm of the residual vector subject to the constraint\n", 67 | "$$ \\left \\langle \\Delta \\mathbf{p} | \\Delta \\mathbf{p} \\right \\rangle = \\sum_{ij} c_{i}^{\\ast} c_{j} \\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n", 68 | "\n", 69 | "We can minimize using a Lagrange multiplier\n", 70 | "$$ \\cal L = c^{\\dagger} \\mathbf{B} c - \\lambda (1 - \\sum_{i} c_{i})$$\n", 71 | "\n", 72 | "where B is the residual vector overlap.\n", 73 | "$$ B_{ij}=\\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n", 74 | "\n", 75 | "This allows for us to minimize $\\cal L$ with respect to a coeff $c_{k}$\n", 76 | "$$\\frac{\\partial \\cal L }{\\partial c_{k}}=0 = \\sum_{j} c_{j} B_{kj} + \\sum_{i} c_{i} B_{ik} - \\lambda = 2 \\sum_{i} c_{i} B_{ik} - \\lambda$$\n", 77 | "\n", 78 | "We can represent this with the matrix below\n", 79 | "\n", 80 | "$$\n", 81 | "\\begin{bmatrix}\n", 82 | "B_{11} & B_{12} & \\cdots & B_{1m} & -1 & \\\\ \n", 83 | "B_{21} & B_{22} & \\cdots & B_{2m} & -1 & \\\\ \n", 84 | "\\vdots & \\vdots & \\ddots & \\vdots & \\vdots & \\\\ \n", 85 | "B_{m1} & B_{m2} & \\cdots & B_{mm} & -1 & \\\\ \n", 86 | "-1 & -1 & \\cdots & -1 & 0 & \n", 87 | "\\end{bmatrix} \n", 88 | "\\begin{bmatrix}\n", 89 | "c_{1} & \\\\ \n", 90 | "c_{2} & \\\\ \n", 91 | "\\vdots & \\\\ \n", 92 | "c_{m} & \\\\ \n", 93 | "\\lambda & \n", 94 | "\\end{bmatrix} \n", 95 | "=\n", 96 | "\\begin{bmatrix}\n", 97 | "0 & \\\\ \n", 98 | "0 & \\\\ \n", 99 | "\\vdots & \\\\ \n", 100 | "0 & \\\\ \n", 101 | "-1 & \n", 102 | "\\end{bmatrix} \n", 103 | "$$" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## Imports" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Load Molecule" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# Define molecule\n", 127 | "mol = psi4.geometry(\"\"\"\n", 128 | "O 0.0000000 0.0000000 0.0000000\n", 129 | "H 0.7569685 0.0000000 -0.5858752\n", 130 | "H -0.7569685 0.0000000 -0.5858752\n", 131 | "symmetry c1\n", 132 | "\"\"\")\n", 133 | "psi4.set_options({'basis': 'sto-3g'})\n", 134 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n", 135 | "mints = psi4.core.MintsHelper(wfn.basisset())\n", 136 | "\n", 137 | "# Get number of electrons\n", 138 | "num_elec_alpha = wfn.nalpha()\n", 139 | "num_elec_beta = wfn.nbeta()\n", 140 | "num_elec = num_elec_alpha + num_elec_beta\n", 141 | "\n", 142 | "# Get nuclear repulsion energy\n", 143 | "E_nuc = mol.nuclear_repulsion_energy()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## Calculate Molecular Integrals" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# Calculate overlap integrals\n", 160 | "S = np.asarray(mints.ao_overlap())\n", 161 | "\n", 162 | "# Calculate kinetic energy integrals\n", 163 | "T = np.asarray(mints.ao_kinetic())\n", 164 | "\n", 165 | "# Calculate nuclear attraction integrals\n", 166 | "V = np.asarray(mints.ao_potential())\n", 167 | "\n", 168 | "# Form core Hamiltonian\n", 169 | "H = T + V\n", 170 | "\n", 171 | "# Calculate two electron integrals\n", 172 | "eri = np.asarray(mints.ao_eri())\n", 173 | "\n", 174 | "# Get number of atomic orbitals\n", 175 | "num_ao = np.shape(S)[0]\n", 176 | "\n", 177 | "print(np.shape(eri))" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "## Core Guess" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "# AO orthogonalization matrix\n", 194 | "A = spla.fractional_matrix_power(S, -0.5)\n", 195 | "\n", 196 | "# Solve the generalized eigenvalue problem\n", 197 | "E_orbitals, C = spla.eigh(H, S)\n", 198 | "\n", 199 | "# Compute initial density matrix\n", 200 | "D = np.zeros((num_ao, num_ao))\n", 201 | "for i in range(num_ao):\n", 202 | " for j in range(num_ao):\n", 203 | " for k in range(num_elec_alpha):\n", 204 | " D[i, j] += C[i, k] * C[j, k]" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## DIIS Function\n", 212 | "\n", 213 | "### Steps in DIIS Function\n", 214 | "1. Build B matrix\n", 215 | "2. Solve the Pulay equation\n", 216 | "3. Build the DIIS Fock matrix" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "def diis(F_list, diis_res):\n", 226 | " # Build B matrix\n", 227 | "\n", 228 | " # Right hand side of Pulay eqn\n", 229 | "\n", 230 | " # Solve Pulay for coeffs\n", 231 | "\n", 232 | " # Build DIIS Fock\n", 233 | "\n", 234 | " return F_diis" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "## Variables, Criteria, and Organization" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "# 2 helper functions for printing during SCF\n", 251 | "def print_start_iterations():\n", 252 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 253 | " \"Iter\", \"Time(s)\", \"DIIS RMS\", \"delta E\", \"E_elec\")))\n", 254 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 255 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 256 | "\n", 257 | "\n", 258 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, diis_rms, iteration_E_diff, E_elec):\n", 259 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(\n", 260 | " iteration_num, iteration_end_time - iteration_start_time, diis_rms, iteration_E_diff, E_elec)))\n", 261 | "\n", 262 | "\n", 263 | "# Set stopping criteria\n", 264 | "iteration_max = 100\n", 265 | "convergence_E = 1e-9\n", 266 | "convergence_DIIS = 1e-5\n", 267 | "\n", 268 | "# Loop variables\n", 269 | "iteration_num = 0\n", 270 | "E_total = 0\n", 271 | "E_elec = 0.0\n", 272 | "iteration_E_diff = 0.0\n", 273 | "iteration_rmsc_dm = 0.0\n", 274 | "converged = False\n", 275 | "exceeded_iterations = False" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "## DIIS SCF Iteration\n", 283 | "Our trial vector will be the Fock matrix with the error vector being the orthonormalized orbital gradient.\n", 284 | "\n", 285 | "$$ r_{\\mu \\upsilon} = (\\mathbf{A^{T}}(\\mathbf{FDS} - \\mathbf{SDF}) \\mathbf{A})_{\\mu \\upsilon} $$\n", 286 | "\n", 287 | "### Call DIIS in SCF Iteration\n", 288 | "1. Build DIIS Residual (error vector) that will be used to make the B matrix\n", 289 | "2. Store trial and residual vectors\n", 290 | "3. Call DIIS to start after the first iteration\n", 291 | "4. Compute the next guess with the DIIS Fock matrix" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "scrolled": true 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "# Trial & Residual vector lists\n", 303 | "F_list = []\n", 304 | "DIIS_resid = []\n", 305 | "\n", 306 | "print(\"{:^79}\".format('=====> Starting SCF Iterations <=====\\n'))\n", 307 | "print_start_iterations()\n", 308 | "while (not converged and not exceeded_iterations):\n", 309 | " # Store last iteration and increment counters\n", 310 | " iteration_start_time = time.time()\n", 311 | " iteration_num += 1\n", 312 | " E_elec_last = E_elec\n", 313 | " D_last = np.copy(D)\n", 314 | "\n", 315 | " # Form G matrix\n", 316 | " G = np.zeros((num_ao, num_ao))\n", 317 | " for i in range(num_ao):\n", 318 | " for j in range(num_ao):\n", 319 | " for k in range(num_ao):\n", 320 | " for l in range(num_ao):\n", 321 | " G[i, j] += D[k, l] * \\\n", 322 | " ((2.0*(eri[i, j, k, l])) - (eri[i, k, j, l]))\n", 323 | "\n", 324 | " # Build fock matrix\n", 325 | " F = H + G\n", 326 | "\n", 327 | " # Calculate electronic energy\n", 328 | " E_elec = np.sum(np.multiply(D, (H + F)))\n", 329 | "\n", 330 | " # Calculate energy change of iteration\n", 331 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n", 332 | "\n", 333 | " # =======> Start of DIIS stuff <=========\n", 334 | " # Build the DIIS AO gradient\n", 335 | "\n", 336 | " # DIIS RMS\n", 337 | "\n", 338 | " # Append lists\n", 339 | " F_list.append(F)\n", 340 | " DIIS_resid.append(diis_r)\n", 341 | "\n", 342 | " if iteration_num >= 2:\n", 343 | " # preform DIIS to get Fock Matrix\n", 344 | "\n", 345 | " # Compute new guess with F DIIS\n", 346 | "\n", 347 | " D = np.zeros((num_ao, num_ao))\n", 348 | " for i in range(num_ao):\n", 349 | " for j in range(num_ao):\n", 350 | " for k in range(num_elec_alpha):\n", 351 | " D[i, j] += C[i, k] * C[j, k]\n", 352 | "\n", 353 | " # =======> End of DIIS stuff <=========\n", 354 | "\n", 355 | " iteration_end_time = time.time()\n", 356 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time,\n", 357 | " diis_rms, iteration_E_diff, E_elec)\n", 358 | "\n", 359 | " if(np.abs(iteration_E_diff) < convergence_E and diis_rms < convergence_DIIS):\n", 360 | " converged = True\n", 361 | " print('\\n', \"{:^79}\".format('=====> SCF Converged <=====\\n'))\n", 362 | " # calculate total energy\n", 363 | " E_total = E_elec + E_nuc\n", 364 | " print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))\n", 365 | "\n", 366 | " if(iteration_num == iteration_max):\n", 367 | " exceeded_iterations = True\n", 368 | " print(\"{:^79}\".format('=====> SCF Exceded Max Iterations <=====\\n'))" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "## References\n", 376 | "1. P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)\n", 377 | "2. C. David Sherrill. \"Some comments on accellerating convergence of iterative sequences using direct inversion of the iterative subspace (DIIS)\". http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf. (1998)" 378 | ] 379 | } 380 | ], 381 | "metadata": { 382 | "kernelspec": { 383 | "display_name": "Python 3", 384 | "language": "python", 385 | "name": "python3" 386 | }, 387 | "language_info": { 388 | "codemirror_mode": { 389 | "name": "ipython", 390 | "version": 3 391 | }, 392 | "file_extension": ".py", 393 | "mimetype": "text/x-python", 394 | "name": "python", 395 | "nbconvert_exporter": "python", 396 | "pygments_lexer": "ipython3", 397 | "version": "3.6.6" 398 | } 399 | }, 400 | "nbformat": 4, 401 | "nbformat_minor": 2 402 | } 403 | -------------------------------------------------------------------------------- /02_SCF/advanced/diis_pyscf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Direct Inversion of Iterative Subspace" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import scipy.linalg as spla\n", 18 | "import pyscf\n", 19 | "from pyscf import gto, scf\n", 20 | "import time" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Useful Resources\n", 28 | "- [P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)](https://www.sciencedirect.com/science/article/pii/0009261480803964)\n", 29 | "- [DIIS by C. David Sherril](http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf)\n", 30 | "- [DePrince Research Group DIIS Tutorial](https://www.chem.fsu.edu/~deprince/programming_projects/diis/)\n", 31 | "- [Psi4Numpy DIIS Tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/03_Hartree-Fock/3b_rhf-diis.ipynb)\n", 32 | "- [DIIS by MolSSI-Education](https://github.com/MolSSI-Education/QM_2017_SSS_Team8/blob/master/Tutorial_PDFs/02_SCF_DIIS.pdf)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Introduction\n", 40 | "Iterative methods are usually used in order to solve systems of linear equations. These methods can suffer from numerous convergence issues such as slow convergence and high computational cost. Today we are going to work with DIIS to accelerate our convergence. DIIS stands for Direct Inversion of Iterative Subspace and is commonly used to aid in the convergence of SCF wavefunctions. Today we will build off of our previous example of a simple RHF.\n", 41 | "\n", 42 | "## General Theory\n", 43 | "During the iterative solution we generate a set of trial vectors $p^{i}$ that are converging to the true solution $p^{f}$. This allows for us to form a set of residual vectors\n", 44 | "$$\n", 45 | "\\Delta \\mathbf{p} = \\mathbf{p}^{i+1} - \\mathbf{p}^{i}\n", 46 | "$$\n", 47 | "\n", 48 | "DIIS assumes that the true solution can be approximated as a linear combination of the previous trial vector guesses, \n", 49 | "$$\\mathbf{p} = \\sum_{i} c_{i} \\mathbf{p}^{i}$$\n", 50 | "\n", 51 | "\n", 52 | "The coefficients $c_{i}$ can be obtained by requiring the residual vector to be a least-squares approximate to the zero vector \n", 53 | "\n", 54 | "$$\\Delta \\mathbf{p} = \\sum_{i} c_{i} \\Delta \\mathbf{p}^{i}$$\n", 55 | "\n", 56 | "\n", 57 | "constrained by,\n", 58 | "\n", 59 | "$$\\sum_{i} c_{i} =1$$\n", 60 | "\n", 61 | "\n", 62 | "This allows for us to to represent each trial function $p^{i}$ as the true solution plus an error vector. \n", 63 | "$$\\mathbf{p} = \\sum_{i} c_{i} (\\mathbf{p}^{f} + \\mathbf{e}^{i}) = \\mathbf{p}^{f} \\sum_{i} c_{i} + \\sum_{i} c_{i} \\mathbf{e}^{i}$$\n", 64 | "\n", 65 | "Convergence will result in minimizing the error which in turn causes the second term above to vanish. For our DIIS solution $\\mathbf{p}$ to be equal to the true solution $\\mathbf{p}^{f}$, we must have $\\sum_{i} c_{i} =1$.\n", 66 | "\n", 67 | "Need to minimize the norm of the residual vector subject to the constraint\n", 68 | "$$ \\left \\langle \\Delta \\mathbf{p} | \\Delta \\mathbf{p} \\right \\rangle = \\sum_{ij} c_{i}^{\\ast} c_{j} \\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n", 69 | "\n", 70 | "We can minimize using a Lagrange multiplier\n", 71 | "$$ \\cal L = c^{\\dagger} \\mathbf{B} c - \\lambda (1 - \\sum_{i} c_{i})$$\n", 72 | "\n", 73 | "where B is the residual vector overlap.\n", 74 | "$$ B_{ij}=\\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n", 75 | "\n", 76 | "This allows for us to minimize $\\cal L$ with respect to a coeff $c_{k}$\n", 77 | "$$\\frac{\\partial \\cal L }{\\partial c_{k}}=0 = \\sum_{j} c_{j} B_{kj} + \\sum_{i} c_{i} B_{ik} - \\lambda = 2 \\sum_{i} c_{i} B_{ik} - \\lambda$$\n", 78 | "\n", 79 | "We can represent this with the matrix below\n", 80 | "\n", 81 | "$$\n", 82 | "\\begin{bmatrix}\n", 83 | "B_{11} & B_{12} & \\cdots & B_{1m} & -1 & \\\\ \n", 84 | "B_{21} & B_{22} & \\cdots & B_{2m} & -1 & \\\\ \n", 85 | "\\vdots & \\vdots & \\ddots & \\vdots & \\vdots & \\\\ \n", 86 | "B_{m1} & B_{m2} & \\cdots & B_{mm} & -1 & \\\\ \n", 87 | "-1 & -1 & \\cdots & -1 & 0 & \n", 88 | "\\end{bmatrix} \n", 89 | "\\begin{bmatrix}\n", 90 | "c_{1} & \\\\ \n", 91 | "c_{2} & \\\\ \n", 92 | "\\vdots & \\\\ \n", 93 | "c_{m} & \\\\ \n", 94 | "\\lambda & \n", 95 | "\\end{bmatrix} \n", 96 | "=\n", 97 | "\\begin{bmatrix}\n", 98 | "0 & \\\\ \n", 99 | "0 & \\\\ \n", 100 | "\\vdots & \\\\ \n", 101 | "0 & \\\\ \n", 102 | "-1 & \n", 103 | "\\end{bmatrix} \n", 104 | "$$" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Imports" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## Load Molecule" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# Define molecule\n", 128 | "mol = pyscf.gto.M(\n", 129 | " atom=\"O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752\",\n", 130 | " basis='sto-3g',\n", 131 | " unit=\"Ang\",\n", 132 | " verbose=0,\n", 133 | " symmetry=False,\n", 134 | " spin=0,\n", 135 | " charge=0\n", 136 | ")\n", 137 | "\n", 138 | "# Get number of atomic orbitals\n", 139 | "num_ao = mol.nao_nr()\n", 140 | "\n", 141 | "# Get number of electrons\n", 142 | "num_elec_alpha, num_elec_beta = mol.nelec\n", 143 | "num_elec = num_elec_alpha + num_elec_beta\n", 144 | "\n", 145 | "# Get nuclear repulsion energy\n", 146 | "E_nuc = mol.energy_nuc()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "## Calculate Molecular Integrals" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# Calculate overlap integrals\n", 163 | "S = mol.intor('cint1e_ovlp_sph')\n", 164 | "\n", 165 | "# Calculate kinetic energy integrals\n", 166 | "T = mol.intor('cint1e_kin_sph')\n", 167 | "\n", 168 | "# Calculate nuclear attraction integrals\n", 169 | "V = mol.intor('cint1e_nuc_sph')\n", 170 | "\n", 171 | "# Form core Hamiltonian\n", 172 | "H = T + V\n", 173 | "\n", 174 | "# Calculate two electron integrals\n", 175 | "eri = mol.intor('cint2e_sph', aosym='s8')\n", 176 | "\n", 177 | "# Since we are using the 8 fold symmetry of the 2 electron integrals\n", 178 | "# the functions below will help us when accessing elements\n", 179 | "__idx2_cache = {}\n", 180 | "\n", 181 | "\n", 182 | "def idx2(i, j):\n", 183 | " if (i, j) in __idx2_cache:\n", 184 | " return __idx2_cache[i, j]\n", 185 | " elif i >= j:\n", 186 | " __idx2_cache[i, j] = int(i*(i+1)/2+j)\n", 187 | " else:\n", 188 | " __idx2_cache[i, j] = int(j*(j+1)/2+i)\n", 189 | " return __idx2_cache[i, j]\n", 190 | "\n", 191 | "\n", 192 | "def idx4(i, j, k, l):\n", 193 | " return idx2(idx2(i, j), idx2(k, l))\n", 194 | "\n", 195 | "\n", 196 | "print(np.shape(eri))" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "## Core Guess" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "# AO orthogonalization matrix\n", 213 | "A = spla.fractional_matrix_power(S, -0.5)\n", 214 | "\n", 215 | "# Solve the generalized eigenvalue problem\n", 216 | "E_orbitals, C = spla.eigh(H, S)\n", 217 | "\n", 218 | "# Compute initial density matrix\n", 219 | "D = np.zeros((num_ao, num_ao))\n", 220 | "for i in range(num_ao):\n", 221 | " for j in range(num_ao):\n", 222 | " for k in range(num_elec_alpha):\n", 223 | " D[i, j] += C[i, k] * C[j, k]" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "## DIIS Function\n", 231 | "\n", 232 | "### Steps in DIIS Function\n", 233 | "1. Build B matrix\n", 234 | "2. Solve the Pulay equation\n", 235 | "3. Build the DIIS Fock matrix" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "def diis(F_list, diis_res):\n", 245 | " # Build B matrix\n", 246 | "\n", 247 | " # Right hand side of Pulay eqn\n", 248 | "\n", 249 | " # Solve Pulay for coeffs\n", 250 | "\n", 251 | " # Build DIIS Fock\n", 252 | "\n", 253 | " return F_diis" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## Variables, Criteria, and Organization" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "# 2 helper functions for printing during SCF\n", 270 | "def print_start_iterations():\n", 271 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 272 | " \"Iter\", \"Time(s)\", \"DIIS RMS\", \"delta E\", \"E_elec\")))\n", 273 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 274 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 275 | "\n", 276 | "\n", 277 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, diis_rms, iteration_E_diff, E_elec):\n", 278 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(\n", 279 | " iteration_num, iteration_end_time - iteration_start_time, diis_rms, iteration_E_diff, E_elec)))\n", 280 | "\n", 281 | "\n", 282 | "# Set stopping criteria\n", 283 | "iteration_max = 100\n", 284 | "convergence_E = 1e-9\n", 285 | "convergence_DIIS = 1e-5\n", 286 | "\n", 287 | "# Loop variables\n", 288 | "iteration_num = 0\n", 289 | "E_total = 0\n", 290 | "E_elec = 0.0\n", 291 | "iteration_E_diff = 0.0\n", 292 | "iteration_rmsc_dm = 0.0\n", 293 | "converged = False\n", 294 | "exceeded_iterations = False" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "## DIIS SCF Iteration\n", 302 | "Our trial vector will be the Fock matrix with the error vector being the orthonormalized orbital gradient.\n", 303 | "\n", 304 | "$$ r_{\\mu \\upsilon} = (\\mathbf{A^{T}}(\\mathbf{FDS} - \\mathbf{SDF}) \\mathbf{A})_{\\mu \\upsilon} $$\n", 305 | "\n", 306 | "### Call DIIS in SCF Iteration\n", 307 | "1. Build DIIS Residual (error vector) that will be used to make the B matrix\n", 308 | "2. Store trial and residual vectors\n", 309 | "3. Call DIIS to start after the first iteration\n", 310 | "4. Compute the next guess with the DIIS Fock matrix" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": { 317 | "scrolled": true 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "# Trial & Residual vector lists\n", 322 | "F_list = []\n", 323 | "DIIS_resid = []\n", 324 | "\n", 325 | "print(\"{:^79}\".format('=====> Starting SCF Iterations <=====\\n'))\n", 326 | "print_start_iterations()\n", 327 | "while (not converged and not exceeded_iterations):\n", 328 | " # Store last iteration and increment counters\n", 329 | " iteration_start_time = time.time()\n", 330 | " iteration_num += 1\n", 331 | " E_elec_last = E_elec\n", 332 | " D_last = np.copy(D)\n", 333 | "\n", 334 | " # Form G matrix\n", 335 | " G = np.zeros((num_ao, num_ao))\n", 336 | " for i in range(num_ao):\n", 337 | " for j in range(num_ao):\n", 338 | " for k in range(num_ao):\n", 339 | " for l in range(num_ao):\n", 340 | " G[i, j] += D[k, l] * \\\n", 341 | " ((2.0*(eri[idx4(i, j, k, l)])) -\n", 342 | " (eri[idx4(i, k, j, l)]))\n", 343 | "\n", 344 | " # Build fock matrix\n", 345 | " F = H + G\n", 346 | "\n", 347 | " # Calculate electronic energy\n", 348 | " E_elec = np.sum(np.multiply(D, (H + F)))\n", 349 | "\n", 350 | " # Calculate energy change of iteration\n", 351 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n", 352 | "\n", 353 | " # =======> Start of DIIS stuff <=========\n", 354 | " # Build the DIIS AO gradient\n", 355 | "\n", 356 | " # DIIS RMS\n", 357 | "\n", 358 | " # Append lists\n", 359 | " F_list.append(F)\n", 360 | " DIIS_resid.append(diis_r)\n", 361 | "\n", 362 | " if iteration_num >= 2:\n", 363 | " # preform DIIS to get Fock Matrix\n", 364 | "\n", 365 | " # Compute new guess with F DIIS\n", 366 | "\n", 367 | " D = np.zeros((num_ao, num_ao))\n", 368 | " for i in range(num_ao):\n", 369 | " for j in range(num_ao):\n", 370 | " for k in range(num_elec_alpha):\n", 371 | " D[i, j] += C[i, k] * C[j, k]\n", 372 | "\n", 373 | " # =======> End of DIIS stuff <=========\n", 374 | "\n", 375 | " iteration_end_time = time.time()\n", 376 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time,\n", 377 | " diis_rms, iteration_E_diff, E_elec)\n", 378 | "\n", 379 | " if(np.abs(iteration_E_diff) < convergence_E and diis_rms < convergence_DIIS):\n", 380 | " converged = True\n", 381 | " print('\\n', \"{:^79}\".format('=====> SCF Converged <=====\\n'))\n", 382 | " # calculate total energy\n", 383 | " E_total = E_elec + E_nuc\n", 384 | " print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))\n", 385 | "\n", 386 | " if(iteration_num == iteration_max):\n", 387 | " exceeded_iterations = True\n", 388 | " print(\"{:^79}\".format('=====> SCF Exceded Max Iterations <=====\\n'))" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "## References\n", 396 | "1. P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)\n", 397 | "2. C. David Sherrill. \"Some comments on accellerating convergence of iterative sequences using direct inversion of the iterative subspace (DIIS)\". http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf. (1998)" 398 | ] 399 | } 400 | ], 401 | "metadata": { 402 | "kernelspec": { 403 | "display_name": "Python 3", 404 | "language": "python", 405 | "name": "python3" 406 | }, 407 | "language_info": { 408 | "codemirror_mode": { 409 | "name": "ipython", 410 | "version": 3 411 | }, 412 | "file_extension": ".py", 413 | "mimetype": "text/x-python", 414 | "name": "python", 415 | "nbconvert_exporter": "python", 416 | "pygments_lexer": "ipython3", 417 | "version": "3.6.6" 418 | } 419 | }, 420 | "nbformat": 4, 421 | "nbformat_minor": 2 422 | } 423 | -------------------------------------------------------------------------------- /02_SCF/basics/scf_psi4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SCF" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Imports" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import scipy.linalg as spla\n", 25 | "import psi4\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import time\n", 28 | "%matplotlib notebook" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Some useful resources:\n", 36 | " - Szabo and Ostlund Chapter 3 (for algorithm see page 146)\n", 37 | " - [Notes by David Sherrill](http://vergil.chemistry.gatech.edu/notes/hf-intro/hf-intro.html)\n", 38 | " - [Notes by Joshua Goings](http://joshuagoings.com/2013/04/24/hartree-fock-self-consistent-field-procedure/)\n", 39 | " - [Programming notes by Francesco Evangelista](http://www.evangelistalab.org/wp-content/uploads/2013/12/Hartree-Fock-Theory.pdf)\n", 40 | " - [Psi4Numpy SCF page](https://github.com/psi4/psi4numpy/tree/master/Tutorials/03_Hartree-Fock)\n", 41 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project3)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## The SCF algorithm from Szabo and Ostlund:\n", 49 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n", 50 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n", 51 | " 3. Diagonalize the overlap matrix $S$ to obtain the transformation matrix $X$.\n", 52 | " 4. Make a guess at the original density matrix $P$.\n", 53 | " 5. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n", 54 | " 6. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$.\n", 55 | " 7. Transform the Fock matrix $F' = X^\\dagger F X$.\n", 56 | " 8. Diagonalize the Fock matrix to get orbital energies $\\epsilon$ and molecular orbitals (in the transformed basis) $C'$.\n", 57 | " 9. Transform the molecular orbitals back to the AO basis $C = X C'$.\n", 58 | " 10. Form a new guess at the density matrix $P$ using $C$.\n", 59 | " 11. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 5.\n", 60 | " 12. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Quick note\n", 68 | "The reason we need to calculate the transformation matrix $X$ is because the atomic orbital basis is not orthonormal by default. This means without transformation we would need to solve a generalized eigenvalue problem $FC = ESC$. If we use scipy to solve this generalized eigenvalue problem we can simply the SCF algorithm.\n", 69 | "## Simplified SCF\n", 70 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n", 71 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n", 72 | " 3. Make a guess at the original density matrix $P$.\n", 73 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n", 74 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n", 75 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n", 76 | " 7. Form a new guess at the density matrix $P$ using $C$.\n", 77 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n", 78 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "# STEP 1 : Specify the molecule" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# start timer\n", 95 | "start_time = time.time()\n", 96 | "# define molecule\n", 97 | "mol = psi4.geometry(\"\"\"\n", 98 | "O 0.0000000 0.0000000 0.0000000\n", 99 | "H 0.7569685 0.0000000 -0.5858752\n", 100 | "H -0.7569685 0.0000000 -0.5858752\n", 101 | "symmetry c1\n", 102 | "\"\"\")\n", 103 | "psi4.set_options({'basis': 'sto-3g'})\n", 104 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n", 105 | "mints = psi4.core.MintsHelper(wfn.basisset())\n", 106 | "# get number of electrons\n", 107 | "num_elec_alpha = wfn.nalpha()\n", 108 | "num_elec_beta = wfn.nbeta()\n", 109 | "num_elec = num_elec_alpha + num_elec_beta\n", 110 | "# get nuclear repulsion energy\n", 111 | "E_nuc = mol.nuclear_repulsion_energy()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "# STEP 2 : Calculate molecular integrals \n", 119 | "\n", 120 | "Overlap \n", 121 | "\n", 122 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n", 123 | "\n", 124 | "Kinetic\n", 125 | "\n", 126 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n", 127 | "\n", 128 | "Nuclear Attraction\n", 129 | "\n", 130 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n", 131 | "\n", 132 | "Form Core Hamiltonian\n", 133 | "\n", 134 | "$$ H = T + V $$\n", 135 | "\n", 136 | "Two electron integrals\n", 137 | "\n", 138 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "# calculate overlap integrals\n", 148 | "S = np.asarray(mints.ao_overlap())\n", 149 | "# calculate kinetic energy integrals\n", 150 | "T = np.asarray(mints.ao_kinetic())\n", 151 | "# calculate nuclear attraction integrals\n", 152 | "V = np.asarray(mints.ao_potential())\n", 153 | "# form core Hamiltonian\n", 154 | "H = T + V\n", 155 | "# calculate two electron integrals\n", 156 | "eri = np.asarray(mints.ao_eri())\n", 157 | "# get number of atomic orbitals\n", 158 | "num_ao = np.shape(S)[0]\n", 159 | "\n", 160 | "print(np.shape(eri))" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "# STEP 3 : Form guess density matrix" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# set inital density matrix to zero\n", 177 | "D = np.zeros((num_ao, num_ao))" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "# STEPS 4 - 8 : SCF loop\n", 185 | "\n", 186 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n", 187 | " \n", 188 | " $$G_{\\mu\\nu} = \\sum_{\\lambda\\sigma}^{\\mathrm{num\\_ao}} P_{\\lambda \\sigma}[2(\\mu\\nu|\\lambda\\sigma)-(\\mu\\lambda|\\nu\\sigma)]$$ \n", 189 | " \n", 190 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n", 191 | " \n", 192 | " $$ F = H + G $$\n", 193 | " \n", 194 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n", 195 | " \n", 196 | " $$F C = E S C $$\n", 197 | " \n", 198 | " 7. Form a new guess at the density matrix $P$ using $C$.\n", 199 | " \n", 200 | " $$ P_{\\mu\\nu} = \\sum_{i}^{\\mathrm{num\\_elec}/2} C_{\\mu i} C_{\\nu i} $$\n", 201 | " \n", 202 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n", 203 | " \n", 204 | " $$ E_{\\mathrm{elec}} = \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} P_{\\mu\\nu} (H_{\\mu\\nu} + F_{\\mu\\nu}) $$\n", 205 | " $$ \\Delta E = E_{\\mathrm{new}} - E_{\\mathrm{old}} $$\n", 206 | " $$ |\\Delta P| = \\left[ \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} [P^{\\mathrm{new}}_{\\mu\\nu} - P_{\\mu\\nu}^{\\mathrm{old}}]^2 \\right]^{1/2}$$\n", 207 | " \n", 208 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n", 209 | " \n", 210 | " $$ E_{\\mathrm{total}} = V_{\\mathrm{NN}} + E_{\\mathrm{elec}} $$\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "# 2 helper functions for printing during SCF\n", 220 | "def print_start_iterations():\n", 221 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 222 | " \"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_elec\")))\n", 223 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 224 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 225 | "\n", 226 | "\n", 227 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec):\n", 228 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num,\n", 229 | " iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_elec)))\n", 230 | "\n", 231 | "\n", 232 | "# set stopping criteria\n", 233 | "iteration_max = 100\n", 234 | "convergence_E = 1e-9\n", 235 | "convergence_DM = 1e-5\n", 236 | "# loop variables\n", 237 | "iteration_num = 0\n", 238 | "E_total = 0\n", 239 | "E_elec = 0.0\n", 240 | "iteration_E_diff = 0.0\n", 241 | "iteration_rmsc_dm = 0.0\n", 242 | "converged = False\n", 243 | "exceeded_iterations = False" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "print_start_iterations()\n", 253 | "while (not converged and not exceeded_iterations):\n", 254 | " # store last iteration and increment counters\n", 255 | " iteration_start_time = time.time()\n", 256 | " iteration_num += 1\n", 257 | " E_elec_last = E_elec\n", 258 | " D_last = np.copy(D)\n", 259 | " # form G matrix\n", 260 | " G = np.zeros((num_ao, num_ao))\n", 261 | "\n", 262 | " #########################################################\n", 263 | " # FILL IN HOW TO MAKE THE G MATRIX HERE\n", 264 | " #########################################################\n", 265 | "\n", 266 | " # build fock matrix\n", 267 | "\n", 268 | " #########################################################\n", 269 | " # FILL IN HOW TO MAKE THE FOCK MATRIX HERE\n", 270 | " #########################################################\n", 271 | "\n", 272 | " # solve the generalized eigenvalue problem\n", 273 | " E_orbitals, C = spla.eigh(F, S)\n", 274 | " # compute new density matrix\n", 275 | " D = np.zeros((num_ao, num_ao))\n", 276 | "\n", 277 | " #########################################################\n", 278 | " # FILL IN HOW TO MAKE THE DENSITY MATRIX HERE\n", 279 | " #########################################################\n", 280 | "\n", 281 | " # calculate electronic energy\n", 282 | "\n", 283 | " #########################################################\n", 284 | " # FILL IN HOW TO CALCULATE THE ELECTRONIC ENERGY HERE\n", 285 | " #########################################################\n", 286 | "\n", 287 | " # calculate energy change of iteration\n", 288 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n", 289 | " # rms change of density matrix\n", 290 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n", 291 | " iteration_end_time = time.time()\n", 292 | " print_iteration(iteration_num, iteration_start_time,\n", 293 | " iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec)\n", 294 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM):\n", 295 | " converged = True\n", 296 | " if(iteration_num == iteration_max):\n", 297 | " exceeded_iterations = True" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "# STEP 9 : Calculate Observables" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "# calculate total energy\n", 314 | "\n", 315 | "####################################################\n", 316 | "# FILL IN HOW TO CALCULATE THE TOTAL ENERGY HERE\n", 317 | "####################################################" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [] 335 | } 336 | ], 337 | "metadata": { 338 | "kernelspec": { 339 | "display_name": "Python 3", 340 | "language": "python", 341 | "name": "python3" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 3 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython3", 353 | "version": "3.6.6" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 2 358 | } 359 | -------------------------------------------------------------------------------- /02_SCF/basics/scf_pyscf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SCF" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Imports" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import scipy.linalg as spla\n", 25 | "import pyscf\n", 26 | "from pyscf import gto, scf\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import time\n", 29 | "%matplotlib notebook" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Some useful resources:\n", 37 | " - Szabo and Ostlund Chapter 3 (for algorithm see page 146)\n", 38 | " - [Notes by David Sherrill](http://vergil.chemistry.gatech.edu/notes/hf-intro/hf-intro.html)\n", 39 | " - [Notes by Joshua Goings](http://joshuagoings.com/2013/04/24/hartree-fock-self-consistent-field-procedure/)\n", 40 | " - [Programming notes by Francesco Evangelista](http://www.evangelistalab.org/wp-content/uploads/2013/12/Hartree-Fock-Theory.pdf)\n", 41 | " - [Psi4Numpy SCF page](https://github.com/psi4/psi4numpy/tree/master/Tutorials/03_Hartree-Fock)\n", 42 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project3)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## The SCF algorithm from Szabo and Ostlund:\n", 50 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n", 51 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n", 52 | " 3. Diagonalize the overlap matrix $S$ to obtain the transformation matrix $X$.\n", 53 | " 4. Make a guess at the original density matrix $P$.\n", 54 | " 5. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n", 55 | " 6. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$.\n", 56 | " 7. Transform the Fock matrix $F' = X^\\dagger F X$.\n", 57 | " 8. Diagonalize the Fock matrix to get orbital energies $\\epsilon$ and molecular orbitals (in the transformed basis) $C'$.\n", 58 | " 9. Transform the molecular orbitals back to the AO basis $C = X C'$.\n", 59 | " 10. Form a new guess at the density matrix $P$ using $C$.\n", 60 | " 11. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 5.\n", 61 | " 12. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Quick note\n", 69 | "The reason we need to calculate the transformation matrix $X$ is because the atomic orbital basis is not orthonormal by default. This means without transformation we would need to solve a generalized eigenvalue problem $FC = ESC$. If we use scipy to solve this generalized eigenvalue problem we can simply the SCF algorithm.\n", 70 | "## Simplified SCF\n", 71 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n", 72 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n", 73 | " 3. Make a guess at the original density matrix $P$.\n", 74 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n", 75 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n", 76 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n", 77 | " 7. Form a new guess at the density matrix $P$ using $C$.\n", 78 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n", 79 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# STEP 1 : Specify the molecule" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "# start timer\n", 96 | "start_time = time.time()\n", 97 | "# define molecule\n", 98 | "mol = pyscf.gto.M(\n", 99 | " atom=\"O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752\",\n", 100 | " basis='sto-3g',\n", 101 | " unit=\"Ang\",\n", 102 | " verbose=0,\n", 103 | " symmetry=False,\n", 104 | " spin=0,\n", 105 | " charge=0\n", 106 | ")\n", 107 | "# get number of atomic orbitals\n", 108 | "num_ao = mol.nao_nr()\n", 109 | "# get number of electrons\n", 110 | "num_elec_alpha, num_elec_beta = mol.nelec\n", 111 | "num_elec = num_elec_alpha + num_elec_beta\n", 112 | "# get nuclear repulsion energy\n", 113 | "E_nuc = mol.energy_nuc()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "# STEP 2 : Calculate molecular integrals \n", 121 | "\n", 122 | "Overlap \n", 123 | "\n", 124 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n", 125 | "\n", 126 | "Kinetic\n", 127 | "\n", 128 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n", 129 | "\n", 130 | "Nuclear Attraction\n", 131 | "\n", 132 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n", 133 | "\n", 134 | "Form Core Hamiltonian\n", 135 | "\n", 136 | "$$ H = T + V $$\n", 137 | "\n", 138 | "Two electron integrals\n", 139 | "\n", 140 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "# calculate overlap integrals\n", 150 | "S = mol.intor('cint1e_ovlp_sph')\n", 151 | "# calculate kinetic energy integrals\n", 152 | "T = mol.intor('cint1e_kin_sph')\n", 153 | "# calculate nuclear attraction integrals\n", 154 | "V = mol.intor('cint1e_nuc_sph')\n", 155 | "# form core Hamiltonian\n", 156 | "H = T + V\n", 157 | "# calculate two electron integrals\n", 158 | "eri = mol.intor('cint2e_sph', aosym='s8')\n", 159 | "# since we are using the 8 fold symmetry of the 2 electron integrals\n", 160 | "# the functions below will help us when accessing elements\n", 161 | "__idx2_cache = {}\n", 162 | "\n", 163 | "\n", 164 | "def idx2(i, j):\n", 165 | " if (i, j) in __idx2_cache:\n", 166 | " return __idx2_cache[i, j]\n", 167 | " elif i >= j:\n", 168 | " __idx2_cache[i, j] = int(i*(i+1)/2+j)\n", 169 | " else:\n", 170 | " __idx2_cache[i, j] = int(j*(j+1)/2+i)\n", 171 | " return __idx2_cache[i, j]\n", 172 | "\n", 173 | "\n", 174 | "def idx4(i, j, k, l):\n", 175 | " return idx2(idx2(i, j), idx2(k, l))\n", 176 | "\n", 177 | "\n", 178 | "print(np.shape(eri))" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "# STEP 3 : Form guess density matrix" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "# set inital density matrix to zero\n", 195 | "D = np.zeros((num_ao, num_ao))" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "# STEPS 4 - 8 : SCF loop\n", 203 | "\n", 204 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n", 205 | " \n", 206 | " $$G_{\\mu\\nu} = \\sum_{\\lambda\\sigma}^{\\mathrm{num\\_ao}} P_{\\lambda \\sigma}[2(\\mu\\nu|\\lambda\\sigma)-(\\mu\\lambda|\\nu\\sigma)]$$ \n", 207 | " \n", 208 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n", 209 | " \n", 210 | " $$ F = H + G $$\n", 211 | " \n", 212 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n", 213 | " \n", 214 | " $$F C = E S C $$\n", 215 | " \n", 216 | " 7. Form a new guess at the density matrix $P$ using $C$.\n", 217 | " \n", 218 | " $$ P_{\\mu\\nu} = \\sum_{i}^{\\mathrm{num\\_elec}/2} C_{\\mu i} C_{\\nu i} $$\n", 219 | " \n", 220 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n", 221 | " \n", 222 | " $$ E_{\\mathrm{elec}} = \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} P_{\\mu\\nu} (H_{\\mu\\nu} + F_{\\mu\\nu}) $$\n", 223 | " $$ \\Delta E = E_{\\mathrm{new}} - E_{\\mathrm{old}} $$\n", 224 | " $$ |\\Delta P| = \\left[ \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} [P^{\\mathrm{new}}_{\\mu\\nu} - P_{\\mu\\nu}^{\\mathrm{old}}]^2 \\right]^{1/2}$$\n", 225 | " \n", 226 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n", 227 | " \n", 228 | " $$ E_{\\mathrm{total}} = V_{\\mathrm{NN}} + E_{\\mathrm{elec}} $$" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# 2 helper functions for printing during SCF\n", 238 | "def print_start_iterations():\n", 239 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 240 | " \"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_elec\")))\n", 241 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 242 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 243 | "\n", 244 | "\n", 245 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec):\n", 246 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num,\n", 247 | " iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_elec)))\n", 248 | "\n", 249 | "\n", 250 | "# set stopping criteria\n", 251 | "iteration_max = 100\n", 252 | "convergence_E = 1e-9\n", 253 | "convergence_DM = 1e-5\n", 254 | "# loop variables\n", 255 | "iteration_num = 0\n", 256 | "E_total = 0\n", 257 | "E_elec = 0.0\n", 258 | "iteration_E_diff = 0.0\n", 259 | "iteration_rmsc_dm = 0.0\n", 260 | "converged = False\n", 261 | "exceeded_iterations = False" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "print_start_iterations()\n", 271 | "while (not converged and not exceeded_iterations):\n", 272 | " # store last iteration and increment counters\n", 273 | " iteration_start_time = time.time()\n", 274 | " iteration_num += 1\n", 275 | " E_elec_last = E_elec\n", 276 | " D_last = np.copy(D)\n", 277 | " # form G matrix\n", 278 | " G = np.zeros((num_ao, num_ao))\n", 279 | "\n", 280 | " #########################################################\n", 281 | " # FILL IN HOW TO MAKE THE G MATRIX HERE\n", 282 | " #########################################################\n", 283 | "\n", 284 | " # build fock matrix\n", 285 | "\n", 286 | " #########################################################\n", 287 | " # FILL IN HOW TO MAKE THE FOCK MATRIX HERE\n", 288 | " #########################################################\n", 289 | "\n", 290 | " # solve the generalized eigenvalue problem\n", 291 | " E_orbitals, C = spla.eigh(F, S)\n", 292 | " # compute new density matrix\n", 293 | " D = np.zeros((num_ao, num_ao))\n", 294 | "\n", 295 | " #########################################################\n", 296 | " # FILL IN HOW TO MAKE THE DENSITY MATRIX HERE\n", 297 | " #########################################################\n", 298 | "\n", 299 | " # calculate electronic energy\n", 300 | "\n", 301 | " #########################################################\n", 302 | " # FILL IN HOW TO CALCULATE THE ELECTRONIC ENERGY HERE\n", 303 | " #########################################################\n", 304 | "\n", 305 | " # calculate energy change of iteration\n", 306 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n", 307 | " # rms change of density matrix\n", 308 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n", 309 | " iteration_end_time = time.time()\n", 310 | " print_iteration(iteration_num, iteration_start_time,\n", 311 | " iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec)\n", 312 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM):\n", 313 | " converged = True\n", 314 | " if(iteration_num == iteration_max):\n", 315 | " exceeded_iterations = True" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "# STEP 9 : Calculate Observables" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "# calculate total energy\n", 332 | "\n", 333 | "####################################################\n", 334 | "# FILL IN HOW TO CALCULATE THE TOTAL ENERGY HERE\n", 335 | "####################################################" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [] 353 | } 354 | ], 355 | "metadata": { 356 | "kernelspec": { 357 | "display_name": "Python 3", 358 | "language": "python", 359 | "name": "python3" 360 | }, 361 | "language_info": { 362 | "codemirror_mode": { 363 | "name": "ipython", 364 | "version": 3 365 | }, 366 | "file_extension": ".py", 367 | "mimetype": "text/x-python", 368 | "name": "python", 369 | "nbconvert_exporter": "python", 370 | "pygments_lexer": "ipython3", 371 | "version": "3.6.6" 372 | } 373 | }, 374 | "nbformat": 4, 375 | "nbformat_minor": 2 376 | } 377 | -------------------------------------------------------------------------------- /02_SCF/scf_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/02_SCF/scf_slides.pdf -------------------------------------------------------------------------------- /03_MP2/basics/mp2_psi4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MP2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Some useful resources:\n", 15 | " - [original paper](https://journals.aps.org/pr/abstract/10.1103/PhysRev.46.618)\n", 16 | " - Levine Chapter 16\n", 17 | " - [psi4numpy tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/05_Moller-Plesset/5a_conventional-mp2.ipynb)\n", 18 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project4)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "# MP2 algorithm\n", 26 | "1. The starting point will be the Hartree-Fock wavefunction" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Imports" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np\n", 43 | "import scipy.linalg as spla\n", 44 | "import psi4\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "import time\n", 47 | "%matplotlib notebook" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Specify the molecule" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# start timer\n", 64 | "start_time = time.time()\n", 65 | "# define molecule\n", 66 | "mol = psi4.geometry(\"\"\"\n", 67 | "O 0.0000000 0.0000000 0.0000000\n", 68 | "H 0.7569685 0.0000000 -0.5858752\n", 69 | "H -0.7569685 0.0000000 -0.5858752\n", 70 | "symmetry c1\n", 71 | "\"\"\")\n", 72 | "psi4.set_options({'basis': 'sto-3g'})\n", 73 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n", 74 | "mints = psi4.core.MintsHelper(wfn.basisset())\n", 75 | "# get number of electrons\n", 76 | "num_elec_alpha = wfn.nalpha()\n", 77 | "num_elec_beta = wfn.nbeta()\n", 78 | "num_elec = num_elec_alpha + num_elec_beta\n", 79 | "# get nuclear repulsion energy\n", 80 | "E_nuc = mol.nuclear_repulsion_energy()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# STEP 2 : Calculate molecular integrals \n", 88 | "\n", 89 | "Overlap \n", 90 | "\n", 91 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n", 92 | "\n", 93 | "Kinetic\n", 94 | "\n", 95 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n", 96 | "\n", 97 | "Nuclear Attraction\n", 98 | "\n", 99 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n", 100 | "\n", 101 | "Form Core Hamiltonian\n", 102 | "\n", 103 | "$$ H = T + V $$\n", 104 | "\n", 105 | "Two electron integrals\n", 106 | "\n", 107 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# calculate overlap integrals\n", 117 | "S = np.asarray(mints.ao_overlap())\n", 118 | "# calculate kinetic energy integrals\n", 119 | "T = np.asarray(mints.ao_kinetic())\n", 120 | "# calculate nuclear attraction integrals\n", 121 | "V = np.asarray(mints.ao_potential())\n", 122 | "# form core Hamiltonian\n", 123 | "H = T + V\n", 124 | "# calculate two electron integrals\n", 125 | "eri = np.asarray(mints.ao_eri())\n", 126 | "# get number of atomic orbitals\n", 127 | "num_ao = np.shape(S)[0]\n", 128 | "\n", 129 | "\n", 130 | "\n", 131 | "print(np.shape(eri))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# set inital density matrix to zero\n", 141 | "D = np.zeros((num_ao,num_ao))\n", 142 | "\n", 143 | "# 2 helper functions for printing during SCF\n", 144 | "def print_start_iterations():\n", 145 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_scf_elec\")))\n", 146 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 147 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec):\n", 148 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num, iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)))\n", 149 | "\n", 150 | "# set stopping criteria\n", 151 | "iteration_max = 100\n", 152 | "convergence_E = 1e-9\n", 153 | "convergence_DM = 1e-5\n", 154 | "# loop variables\n", 155 | "iteration_num = 0\n", 156 | "E_scf_total = 0\n", 157 | "E_scf_elec = 0.0\n", 158 | "iteration_E_diff = 0.0\n", 159 | "iteration_rmsc_dm = 0.0\n", 160 | "converged = False\n", 161 | "exceeded_iterations = False\n", 162 | "print_start_iterations()\n", 163 | "while (not converged and not exceeded_iterations):\n", 164 | " # store last iteration and increment counters\n", 165 | " iteration_start_time = time.time()\n", 166 | " iteration_num += 1\n", 167 | " E_elec_last = E_scf_elec\n", 168 | " D_last = np.copy(D)\n", 169 | " # form G matrix\n", 170 | " G = np.zeros((num_ao,num_ao))\n", 171 | " for i in range(num_ao):\n", 172 | " for j in range(num_ao):\n", 173 | " for k in range(num_ao):\n", 174 | " for l in range(num_ao):\n", 175 | " G[i,j] += D[k,l] * ((2.0*(eri[i,j,k,l])) - (eri[i,k,j,l]))\n", 176 | " # build fock matrix\n", 177 | " F = H + G\n", 178 | " # solve the generalized eigenvalue problem\n", 179 | " E_orbitals, C = spla.eigh(F,S)\n", 180 | " # compute new density matrix\n", 181 | " D = np.zeros((num_ao,num_ao))\n", 182 | " for i in range(num_ao):\n", 183 | " for j in range(num_ao):\n", 184 | " for k in range(num_elec_alpha):\n", 185 | " D[i,j] += C[i,k] * C[j,k]\n", 186 | " # calculate electronic energy\n", 187 | " E_scf_elec = np.sum(np.multiply(D , (H + F)))\n", 188 | " # calculate energy change of iteration\n", 189 | " iteration_E_diff = np.abs(E_scf_elec - E_elec_last)\n", 190 | " # rms change of density matrix\n", 191 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n", 192 | " iteration_end_time = time.time()\n", 193 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)\n", 194 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM): \n", 195 | " converged = True\n", 196 | " if(iteration_num == iteration_max):\n", 197 | " exceeded_iterations = True\n", 198 | "\n", 199 | "# calculate total energy\n", 200 | "E_scf_total = E_scf_elec + E_nuc\n", 201 | "print(\"{:^79}\".format(\"Total HF energy : {:>11f}\".format(E_scf_total)))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "# Perform MP2 calculation" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "## Convert the two-electron integrals from AO basis to the MO basis\n", 216 | "\n", 217 | "$$(pq|rs) = \\sum_\\mu \\sum_\\nu \\sum_\\lambda \\sum_\\sigma C_\\mu^p C_\\nu^q\n", 218 | "(\\mu \\nu|\\lambda \\sigma) C_\\lambda^r C_\\sigma^s.$$\n", 219 | "\n", 220 | "\n", 221 | "Attempt to code this conversion below, remember that the electron repulsion integrals above are stored as vector `eri` that is of the shape (num_ao,num_ao,num_ao,num_ao). Here the num_ao's for sto-3g water is 7. The resulting tensor will have the same shape as `eri`.\n" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "## place code for two-electron integral conversion here." 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Compute the MP2 Energy\n", 238 | "Now we can calculate the MP2 estimation of the correlation energy. \n", 239 | "$$E_{\\mathrm{corr(MP2)}}\\ =\\ \\frac{( ia \\mid jb ) [ 2 (ia \\mid jb ) - ( ib \\mid ja )]}{\\epsilon_i + \\epsilon_j + \\epsilon_a - \\epsilon_b}$$\n", 240 | "\n", 241 | "Here $i$ and $j$ represent all occupied orbitals, where as $a$ and $b$ will be unoccupied orbitals. \n", 242 | "\n", 243 | "Remember during this coding step that we are basing our MP2 correction on an RHF calculation and thus there are the same amount of $\\alpha$ and $\\beta$ electrons." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "#initialize the variable forthe mp2 correlation energy\n", 253 | "E_corr_mp2 = 0\n", 254 | "# code the equation above and adjust the value of E_corr_mp2\n", 255 | "\n", 256 | "\n", 257 | "#this will print your E_corr mp2\n", 258 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "The correlation energy should be very small compared to the total energy (-0.035493 Ha), which is generally the case. However, this correlation energy can be very important to describing properties such as dispersion. " 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "## A comparison with Psi4" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "\n", 282 | "# Get the SCF wavefunction & energies# Get t \n", 283 | "scf_e, scf_wfn = psi4.energy('scf', return_wfn=True)\n", 284 | "mp2_e = psi4.energy('mp2')\n", 285 | "print(mp2_e)\n", 286 | "\n", 287 | "E_diff = (mp2_e - (E_total + E_corr_mp2)) \n", 288 | "print(E_diff)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "Python 3", 302 | "language": "python", 303 | "name": "python3" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.6.6" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 2 320 | } 321 | -------------------------------------------------------------------------------- /03_MP2/basics/mp2_psi4_solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MP2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Some useful resources:\n", 15 | " - [original paper](https://journals.aps.org/pr/abstract/10.1103/PhysRev.46.618)\n", 16 | " - Levine Chapter 16\n", 17 | " - [psi4numpy tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/05_Moller-Plesset/5a_conventional-mp2.ipynb)\n", 18 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project4)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "# MP2 algorithm\n", 26 | "1. The starting point will be the Hartree-Fock wavefunction" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Imports" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np\n", 43 | "import scipy.linalg as spla\n", 44 | "import psi4\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "import time\n", 47 | "%matplotlib notebook" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Specify the molecule" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# start timer\n", 64 | "start_time = time.time()\n", 65 | "# define molecule\n", 66 | "mol = psi4.geometry(\"\"\"\n", 67 | "O 0.0000000 0.0000000 0.0000000\n", 68 | "H 0.7569685 0.0000000 -0.5858752\n", 69 | "H -0.7569685 0.0000000 -0.5858752\n", 70 | "symmetry c1\n", 71 | "\"\"\")\n", 72 | "psi4.set_options({'basis': 'sto-3g'})\n", 73 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n", 74 | "mints = psi4.core.MintsHelper(wfn.basisset())\n", 75 | "# get number of electrons\n", 76 | "num_elec_alpha = wfn.nalpha()\n", 77 | "num_elec_beta = wfn.nbeta()\n", 78 | "num_elec = num_elec_alpha + num_elec_beta\n", 79 | "# get nuclear repulsion energy\n", 80 | "E_nuc = mol.nuclear_repulsion_energy()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# STEP 2 : Calculate molecular integrals \n", 88 | "\n", 89 | "Overlap \n", 90 | "\n", 91 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n", 92 | "\n", 93 | "Kinetic\n", 94 | "\n", 95 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n", 96 | "\n", 97 | "Nuclear Attraction\n", 98 | "\n", 99 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n", 100 | "\n", 101 | "Form Core Hamiltonian\n", 102 | "\n", 103 | "$$ H = T + V $$\n", 104 | "\n", 105 | "Two electron integrals\n", 106 | "\n", 107 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# calculate overlap integrals\n", 117 | "S = np.asarray(mints.ao_overlap())\n", 118 | "# calculate kinetic energy integrals\n", 119 | "T = np.asarray(mints.ao_kinetic())\n", 120 | "# calculate nuclear attraction integrals\n", 121 | "V = np.asarray(mints.ao_potential())\n", 122 | "# form core Hamiltonian\n", 123 | "H = T + V\n", 124 | "# calculate two electron integrals\n", 125 | "eri = np.asarray(mints.ao_eri())\n", 126 | "# get number of atomic orbitals\n", 127 | "num_ao = np.shape(S)[0]\n", 128 | "\n", 129 | "\n", 130 | "\n", 131 | "print(np.shape(eri))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# set inital density matrix to zero\n", 141 | "D = np.zeros((num_ao,num_ao))\n", 142 | "\n", 143 | "# 2 helper functions for printing during SCF\n", 144 | "def print_start_iterations():\n", 145 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_scf_elec\")))\n", 146 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 147 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec):\n", 148 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num, iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)))\n", 149 | "\n", 150 | "# set stopping criteria\n", 151 | "iteration_max = 100\n", 152 | "convergence_E = 1e-9\n", 153 | "convergence_DM = 1e-5\n", 154 | "# loop variables\n", 155 | "iteration_num = 0\n", 156 | "E_scf_total = 0\n", 157 | "E_scf_elec = 0.0\n", 158 | "iteration_E_diff = 0.0\n", 159 | "iteration_rmsc_dm = 0.0\n", 160 | "converged = False\n", 161 | "exceeded_iterations = False\n", 162 | "print_start_iterations()\n", 163 | "while (not converged and not exceeded_iterations):\n", 164 | " # store last iteration and increment counters\n", 165 | " iteration_start_time = time.time()\n", 166 | " iteration_num += 1\n", 167 | " E_elec_last = E_scf_elec\n", 168 | " D_last = np.copy(D)\n", 169 | " # form G matrix\n", 170 | " G = np.zeros((num_ao,num_ao))\n", 171 | " for i in range(num_ao):\n", 172 | " for j in range(num_ao):\n", 173 | " for k in range(num_ao):\n", 174 | " for l in range(num_ao):\n", 175 | " G[i,j] += D[k,l] * ((2.0*(eri[i,j,k,l])) - (eri[i,k,j,l]))\n", 176 | " # build fock matrix\n", 177 | " F = H + G\n", 178 | " # solve the generalized eigenvalue problem\n", 179 | " E_orbitals, C = spla.eigh(F,S)\n", 180 | " # compute new density matrix\n", 181 | " D = np.zeros((num_ao,num_ao))\n", 182 | " for i in range(num_ao):\n", 183 | " for j in range(num_ao):\n", 184 | " for k in range(num_elec_alpha):\n", 185 | " D[i,j] += C[i,k] * C[j,k]\n", 186 | " # calculate electronic energy\n", 187 | " E_scf_elec = np.sum(np.multiply(D , (H + F)))\n", 188 | " # calculate energy change of iteration\n", 189 | " iteration_E_diff = np.abs(E_scf_elec - E_elec_last)\n", 190 | " # rms change of density matrix\n", 191 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n", 192 | " iteration_end_time = time.time()\n", 193 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)\n", 194 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM): \n", 195 | " converged = True\n", 196 | " if(iteration_num == iteration_max):\n", 197 | " exceeded_iterations = True\n", 198 | "\n", 199 | "# calculate total energy\n", 200 | "E_scf_total = E_scf_elec + E_nuc\n", 201 | "print(\"{:^79}\".format(\"Total HF energy : {:>11f}\".format(E_scf_total)))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "# Perform MP2 calculation" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "## Convert the two-electron integrals from AO basis to the MO basis\n", 216 | "\n", 217 | "$$(pq|rs) = \\sum_\\mu \\sum_\\nu \\sum_\\lambda \\sum_\\sigma C_\\mu^p C_\\nu^q\n", 218 | "(\\mu \\nu|\\lambda \\sigma) C_\\lambda^r C_\\sigma^s.$$\n", 219 | "\n", 220 | "This is implemented in the cell block below. There are a few ways to implement this, below is by far the worst. The algorithm coded below is the naive approach known as the Noddy algorithm. This algorithm scales as $N^8$, although MP2 is formally known to scale as $N^5$; however. The Noddy algorithm is a great starting point." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "noddy_start = time.time()\n", 230 | "eri_mo = np.zeros((num_ao, num_ao, num_ao, num_ao))\n", 231 | "for p in range(num_ao):\n", 232 | " for q in range(num_ao):\n", 233 | " for r in range(num_ao):\n", 234 | " for s in range(num_ao):\n", 235 | " for mu in range(num_ao):\n", 236 | " for nu in range(num_ao):\n", 237 | " for lmda in range(num_ao):\n", 238 | " for sigma in range(num_ao):\n", 239 | " eri_mo[p, q, r, s] += C[mu, p]*C[nu, q]*C[lmda,r]*C[sigma, s]*eri[mu, nu, lmda, sigma]\n", 240 | " \n", 241 | "noddy_end = time.time()\n", 242 | "noddy_time = noddy_end - noddy_start\n", 243 | "print(noddy_time)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "### Compute the MP2 Energy\n", 251 | "Now we can calculate the MP2 estimation of the correlation energy. \n", 252 | "$$E_{\\mathrm{corr(MP2)}}\\ =\\ \\frac{( ia \\mid jb ) [ 2 (ia \\mid jb ) - ( ib \\mid ja )]}{\\epsilon_i + \\epsilon_j + \\epsilon_a - \\epsilon_b}$$" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "E_corr_mp2 = 0\n", 262 | "for i in range(num_elec_alpha):\n", 263 | " for j in range(num_elec_alpha):\n", 264 | " for a in range(num_elec_alpha, num_ao):\n", 265 | " for b in range(num_elec_alpha, num_ao):\n", 266 | " temp = eri_mo[i, a, j, b] * \\\n", 267 | " (2*eri_mo[i, a, j, b] - eri_mo[i, b, j, a])\n", 268 | " temp /= (E_orbitals[i] + E_orbitals[j] - E_orbitals[a] - E_orbitals[b])\n", 269 | " E_corr_mp2 += temp\n", 270 | " \n", 271 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "The correlation energy is very small compared to the total energy, which is generally the case. However, this correlation energy can be very important to describing properties such as dispersion. " 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "## A comparison with Psi4" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# Get the SCF wavefunction & energies# Get t \n", 295 | "scf_e, scf_wfn = psi4.energy('scf', return_wfn=True)\n", 296 | "mp2_e = psi4.energy('mp2')\n", 297 | "print(mp2_e)\n", 298 | "\n", 299 | "E_diff = (mp2_e - (E_scf_total + E_corr_mp2)) \n", 300 | "print(E_diff)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "### An implementation of the smart algorithm" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "# nbasis, norb = C.shape\n", 317 | "import time\n", 318 | "\n", 319 | "smart_start = time.time()\n", 320 | "sump = np.zeros((num_ao,num_ao,num_ao,num_ao))\n", 321 | "for mu in range(num_ao):\n", 322 | " sumq = np.zeros((num_ao,num_ao,num_ao))\n", 323 | " for nu in range(num_ao):\n", 324 | " sumr = np.zeros((num_ao,num_ao))\n", 325 | " for lmda in range(num_ao):\n", 326 | " sums = np.zeros((num_ao))\n", 327 | " for sigma in range(num_ao):\n", 328 | " for s in range(num_ao):\n", 329 | " sums[s] += C[sigma,s]*eri[mu,nu,lmda,sigma]\n", 330 | " for r in range(num_ao):\n", 331 | " for s in range(num_ao):\n", 332 | " sumr[r,s] += C[lmda,r]*sums[s]\n", 333 | " for q in range(num_ao):\n", 334 | " for r in range(num_ao):\n", 335 | " for s in range(num_ao):\n", 336 | " sumq[q,r,s] += C[nu,q]*sumr[r,s]\n", 337 | " for p in range(num_ao):\n", 338 | " for q in range(num_ao):\n", 339 | " for r in range(num_ao):\n", 340 | " for s in range(num_ao):\n", 341 | " sump[p,q,r,s] += C[mu,p]*sumq[q,r,s]\n", 342 | "eri_mo = sump\n", 343 | "\n", 344 | "smart_end = time.time()\n", 345 | "smart_time = smart_end -smart_start\n", 346 | "print(smart_time)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "E_corr_mp2 = 0\n", 356 | "for i in range(num_elec_alpha):\n", 357 | " for j in range(num_elec_alpha):\n", 358 | " for a in range(num_elec_alpha, num_ao):\n", 359 | " for b in range(num_elec_alpha, num_ao):\n", 360 | " temp = eri_mo[i, a, j, b] * \\\n", 361 | " (2*eri_mo[i, a, j, b] - eri_mo[i, b, j, a])\n", 362 | " temp /= (E_orbitals[i] + E_orbitals[j] - E_orbitals[a] - E_orbitals[b])\n", 363 | " E_corr_mp2 += temp\n", 364 | " \n", 365 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "# algorithm time comparison" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "ns_time = noddy_time/smart_time\n", 382 | "print(ns_time)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [] 391 | } 392 | ], 393 | "metadata": { 394 | "kernelspec": { 395 | "display_name": "Python 3", 396 | "language": "python", 397 | "name": "python3" 398 | }, 399 | "language_info": { 400 | "codemirror_mode": { 401 | "name": "ipython", 402 | "version": 3 403 | }, 404 | "file_extension": ".py", 405 | "mimetype": "text/x-python", 406 | "name": "python", 407 | "nbconvert_exporter": "python", 408 | "pygments_lexer": "ipython3", 409 | "version": "3.6.6" 410 | } 411 | }, 412 | "nbformat": 4, 413 | "nbformat_minor": 2 414 | } 415 | -------------------------------------------------------------------------------- /03_MP2/basics/mp2_pyscf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MP2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Some useful resources:\n", 15 | " - [original paper](https://journals.aps.org/pr/abstract/10.1103/PhysRev.46.618)\n", 16 | " - Levine Chapter 16\n", 17 | " - [psi4numpy tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/05_Moller-Plesset/5a_conventional-mp2.ipynb)\n", 18 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project4)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "# MP2 algorithm\n", 26 | "1. The starting point will be the Hartree-Fock wavefunction. " 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Imports" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np\n", 43 | "import scipy.linalg as spla\n", 44 | "import pyscf\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "import time\n", 47 | "%matplotlib notebook" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Specify the molecule" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# start timer\n", 64 | "start_time = time.time()\n", 65 | "# define molecule\n", 66 | "mol = pyscf.gto.M(\n", 67 | " atom=\"O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752\",\n", 68 | " basis='sto-3g',\n", 69 | " unit=\"Ang\",\n", 70 | " verbose=0,\n", 71 | " symmetry=False,\n", 72 | " spin=0,\n", 73 | " charge=0\n", 74 | ")\n", 75 | "# get number of atomic orbitals\n", 76 | "num_ao = mol.nao_nr()\n", 77 | "# get number of electrons\n", 78 | "num_elec_alpha, num_elec_beta = mol.nelec\n", 79 | "num_elec = num_elec_alpha + num_elec_beta\n", 80 | "# get nuclear repulsion energy\n", 81 | "E_nuc = mol.energy_nuc()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Calculate molecular integrals \n", 89 | "\n", 90 | "\n", 91 | "Overlap \n", 92 | "\n", 93 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n", 94 | "\n", 95 | "Kinetic\n", 96 | "\n", 97 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n", 98 | "\n", 99 | "Nuclear Attraction\n", 100 | "\n", 101 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n", 102 | "\n", 103 | "Form Core Hamiltonian\n", 104 | "\n", 105 | "$$ H = T + V $$\n", 106 | "\n", 107 | "Two electron integrals\n", 108 | "\n", 109 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi^*_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# calculate overlap integrals\n", 119 | "S = mol.intor('cint1e_ovlp_sph')\n", 120 | "# calculate kinetic energy integrals\n", 121 | "T = mol.intor('cint1e_kin_sph')\n", 122 | "# calculate nuclear attraction integrals\n", 123 | "V = mol.intor('cint1e_nuc_sph')\n", 124 | "# form core Hamiltonian\n", 125 | "H = T + V\n", 126 | "# calculate two electron integrals\n", 127 | "eri = mol.intor('cint2e_sph', aosym='s8')\n", 128 | "# since we are using the 8 fold symmetry of the 2 electron integrals\n", 129 | "# the functions below will help us when accessing elements\n", 130 | "__idx2_cache = {}\n", 131 | "\n", 132 | "\n", 133 | "def idx2(i, j):\n", 134 | " if (i, j) in __idx2_cache:\n", 135 | " return __idx2_cache[i, j]\n", 136 | " elif i >= j:\n", 137 | " __idx2_cache[i, j] = int(i*(i+1)/2+j)\n", 138 | " else:\n", 139 | " __idx2_cache[i, j] = int(j*(j+1)/2+i)\n", 140 | " return __idx2_cache[i, j]\n", 141 | "\n", 142 | "\n", 143 | "def idx4(i, j, k, l):\n", 144 | " return idx2(idx2(i, j), idx2(k, l))\n", 145 | "\n", 146 | "\n", 147 | "print(np.shape(eri))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "## Perform Hartree-Fock SCF" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "# set inital density matrix to zero\n", 164 | "D = np.zeros((num_ao, num_ao))\n", 165 | "\n", 166 | "# 2 helper functions for printing during SCF\n", 167 | "\n", 168 | "\n", 169 | "def print_start_iterations():\n", 170 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 171 | " \"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_elec\")))\n", 172 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n", 173 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n", 174 | "\n", 175 | "\n", 176 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec):\n", 177 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num,\n", 178 | " iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_elec)))\n", 179 | "\n", 180 | "\n", 181 | "# set stopping criteria\n", 182 | "iteration_max = 100\n", 183 | "convergence_E = 1e-9\n", 184 | "convergence_DM = 1e-5\n", 185 | "# loop variables\n", 186 | "iteration_num = 0\n", 187 | "E_scf_total = 0\n", 188 | "E_scf_elec = 0.0\n", 189 | "iteration_E_diff = 0.0\n", 190 | "iteration_rmsc_dm = 0.0\n", 191 | "converged = False\n", 192 | "exceeded_iterations = False\n", 193 | "\n", 194 | "print_start_iterations()\n", 195 | "while (not converged and not exceeded_iterations):\n", 196 | " # store last iteration and increment counters\n", 197 | " iteration_start_time = time.time()\n", 198 | " iteration_num += 1\n", 199 | " E_elec_last = E_scf_elec\n", 200 | " D_last = np.copy(D)\n", 201 | " # form G matrix\n", 202 | " G = np.zeros((num_ao, num_ao))\n", 203 | " for i in range(num_ao):\n", 204 | " for j in range(num_ao):\n", 205 | " for k in range(num_ao):\n", 206 | " for l in range(num_ao):\n", 207 | " G[i, j] += D[k, l] * \\\n", 208 | " ((2.0*(eri[idx4(i, j, k, l)])) -\n", 209 | " (eri[idx4(i, k, j, l)]))\n", 210 | " # build fock matrix\n", 211 | " F = H + G\n", 212 | " # solve the generalized eigenvalue problem\n", 213 | " E_orbitals, C = spla.eigh(F, S)\n", 214 | " # compute new density matrix\n", 215 | " D = np.zeros((num_ao, num_ao))\n", 216 | " for i in range(num_ao):\n", 217 | " for j in range(num_ao):\n", 218 | " for k in range(num_elec_alpha):\n", 219 | " D[i, j] += C[i, k] * C[j, k]\n", 220 | " # calculate electronic energy\n", 221 | " E_scf_elec = np.sum(np.multiply(D, (H + F)))\n", 222 | " # calculate energy change of iteration\n", 223 | " iteration_E_diff = np.abs(E_scf_elec - E_elec_last)\n", 224 | " # rms change of density matrix\n", 225 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n", 226 | " iteration_end_time = time.time()\n", 227 | " print_iteration(iteration_num, iteration_start_time,\n", 228 | " iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)\n", 229 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM):\n", 230 | " converged = True\n", 231 | " if(iteration_num == iteration_max):\n", 232 | " exceeded_iterations = True\n", 233 | "\n", 234 | "# calculate total energy\n", 235 | "E_scf_total = E_scf_elec + E_nuc\n", 236 | "print(\"{:^79}\".format(\"Total HF Energy : {:>11f}\".format(E_scf_total)))" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "# Perform MP2 calculation" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "## Convert the two-electron integrals from AO basis to the MO basis\n", 258 | "\n", 259 | "$$(pq|rs) = \\sum_\\mu \\sum_\\nu \\sum_\\lambda \\sum_\\sigma C_\\mu^p C_\\nu^q\n", 260 | "(\\mu \\nu|\\lambda \\sigma) C_\\lambda^r C_\\sigma^s.$$\n", 261 | "\n", 262 | "\n", 263 | "Attempt to code this conversion below. Although this was introduced previously, we want to remention a note about the electron repulsion integrals. The electron repulsion integrals are stored as vector `eri`. This vector represent what would otherwise be a 4-D tensor. The reason we can store it as a vector is that there is symmetry present for the two-electron integrals and pyscf incorporates that. Thus we access our integrals using a helper function we have written for you, `idx_4` above which will turn the 4 index into the correct index for the pyscf data structure. For example:\n", 264 | "\n", 265 | "To access the specific integral for $(\\phi_1\\phi_4|\\phi_7\\phi_9)$, we would use the command\n", 266 | "`eri[idx4(1,4,7,9)]`. This will be essential for writing the transofmration inthe code cell below and in calculating the MP2 energy afterwards.\n", 267 | "\n" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "## place code for two-electron integral conversion here." 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "### Compute the MP2 Energy\n", 284 | "Now we can calculate the MP2 estimation of the correlation energy. \n", 285 | "$$E_{\\mathrm{corr(MP2)}}\\ =\\ \\frac{( ia \\mid jb ) [ 2 (ia \\mid jb ) - ( ib \\mid ja )]}{\\epsilon_i + \\epsilon_j + \\epsilon_a - \\epsilon_b}$$\n", 286 | "\n", 287 | "Here $i$ and $j$ represent all occupied orbitals, where as $a$ and $b$ will be unoccupied orbitals. \n", 288 | "\n", 289 | "Remember during this coding step that we are basing our MP2 correction on an RHF calculation and thus there are the same amount of $\\alpha$ and $\\beta$ electrons." 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "#initialize the variable forthe mp2 correlation energy\n", 299 | "E_corr_mp2 = 0\n", 300 | "# code the equation above and adjust the value of E_corr_mp2\n", 301 | "\n", 302 | "\n", 303 | "#this will print your E_corr mp2\n", 304 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "The correlation energy is very small compared to the total energy, which is generally the case. However, this correlation energy can be very important to describing properties such as dispersion. " 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "## A comparison with PySCF" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "import pyscf\n", 328 | "m = pyscf.scf.RHF(mol)\n", 329 | "print('E(HF) = %g' % m.kernel())\n", 330 | "mp2 = pyscf.mp.MP2(m)\n", 331 | "E_corr_mp2_pyscf = mp2.kernel()[0]\n", 332 | "print('E(MP2) = {:.9g}'.format(E_corr_mp2_pyscf))" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "# comparison from pyscf\n", 342 | "E_diff = E_corr_mp2_pyscf - E_corr_mp2\n", 343 | "print(E_diff)" 344 | ] 345 | } 346 | ], 347 | "metadata": { 348 | "kernelspec": { 349 | "display_name": "Python 3", 350 | "language": "python", 351 | "name": "python3" 352 | }, 353 | "language_info": { 354 | "codemirror_mode": { 355 | "name": "ipython", 356 | "version": 3 357 | }, 358 | "file_extension": ".py", 359 | "mimetype": "text/x-python", 360 | "name": "python", 361 | "nbconvert_exporter": "python", 362 | "pygments_lexer": "ipython3", 363 | "version": "3.6.5" 364 | }, 365 | "toc": { 366 | "base_numbering": 1, 367 | "nav_menu": {}, 368 | "number_sections": true, 369 | "sideBar": true, 370 | "skip_h1_title": false, 371 | "title_cell": "Table of Contents", 372 | "title_sidebar": "Contents", 373 | "toc_cell": false, 374 | "toc_position": {}, 375 | "toc_section_display": true, 376 | "toc_window_display": false 377 | } 378 | }, 379 | "nbformat": 4, 380 | "nbformat_minor": 2 381 | } 382 | -------------------------------------------------------------------------------- /03_MP2/mp2_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/03_MP2/mp2_slides.pdf -------------------------------------------------------------------------------- /04_Machine_Learning/advanced/bayesopt_boston.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Bayesian Optimization Example: Boston Housing Dataset" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Useful Resources\n", 15 | " - [Scikit-Learn](http://scikit-learn.org/)\n", 16 | " - [Scikit-Optimize](https://github.com/scikit-optimize/scikit-optimize) \n", 17 | " - [GPyOpt](https://gpyopt.readthedocs.io/en/latest/)\n", 18 | " - [GPyOpt GitHub](https://github.com/SheffieldML/GPyOpt)\n", 19 | " - [fmfn/BayesianOptimization](https://github.com/fmfn/BayesianOptimization)\n", 20 | " - [Taking the Human Out of the Loop: A Review of Bayesian Optimization](https://ieeexplore.ieee.org/document/7352306/)\n", 21 | " - [Practical Bayesian Optimization of Machine Learning Algorithms](https://arxiv.org/abs/1206.2944)\n", 22 | " - [Evaluating Hyperparameter Optimization Strategies](https://blog.sigopt.com/posts/evaluating-hyperparameter-optimization-strategies)\n", 23 | " - [A Tutorial on Bayesian Optimization of Expensive Cost Functions, with Application to Active User Modeling and Hierarchial Reinforcement Learning ](https://arxiv.org/abs/1012.2599)\n", 24 | " \n", 25 | "## Introduction\n", 26 | "Bayesian Optimization is a strategy for global optimization of black-box functions with the goal of finding a min/max of a function f(x) bounded by X. The Bayesian optimization will construct a probabilistic model for f(x) to exploit in order to determine where in X to evaluate the function next. It performs this determination using the information from previous evaluations of f(x).\n", 27 | "\n", 28 | "## General Theory\n", 29 | "#### Objective:\n", 30 | "Find global maximizer or minimizer of a function $f$(x)\n", 31 | "$$\\textbf{x}^{*} = \\text{arg} \\max_{\\textbf{x} \\in \\chi } f(\\textbf{x})$$\n", 32 | "$\\chi$ is the space of interest and can be categorical, conditional, or both\n", 33 | "\n", 34 | "#### Strategy \n", 35 | "- Unknown objective function \n", 36 | "- Treat as a random function \n", 37 | "- Place prior over it\n", 38 | "- Prior captures belief about function\n", 39 | "- Gather information and update the prior with posterior \n", 40 | "- Determine next query point based on priors\n", 41 | "\n", 42 | "![](https://ieeexplore.ieee.org/mediastore_new/IEEE/content/media/5/7360840/7352306/shahr1-2494218-large.gif)[A Tutorial on Bayesian Optimization of Expensive Cost Functions, with Application to Active User Modeling and Hierarchial Reinforcement Learning ](https://arxiv.org/abs/1012.2599)\n", 43 | "\n", 44 | "\n", 45 | "## Summarize\n", 46 | "- Finds min/max with relatively few evaluations\n", 47 | "- Cost of more computation to determine next point to try\n", 48 | "- Good for expensive functions such as ML\n", 49 | "\n", 50 | "## What can $f$ be?\n", 51 | "Bayesian Optimization is best used for costly functions as Bayesian Optimization can become rather costly due to the strategy of determining the next point of query based on the prior guesses. While Bayesian Optimization is more computationaly expensive than other search methods, it often requires less iterations to find the maxima/minima thus reducing the amount of times something like training a neural net is performed, reducing the overall computational cost. \n", 52 | "\n", 53 | "### Random Search\n", 54 | "![Random Search](https://daks2k3a4ib2z.cloudfront.net/59235ff882b78a59a72fa9bd/593477f37fa7db0d44d42510_tumblr_inline_o7181jRDUR1toi3ym_540.gif)\n", 55 | "\n", 56 | "### Grid Search\n", 57 | "![Grid Search](https://daks2k3a4ib2z.cloudfront.net/59235ff882b78a59a72fa9bd/593477f0c5b12e2f0b26ec3a_tumblr_inline_o7181iRIMT1toi3ym_540.gif)\n", 58 | "\n", 59 | "### Bayesian Optimization\n", 60 | "![Bayesian Optimization](https://daks2k3a4ib2z.cloudfront.net/59235ff882b78a59a72fa9bd/593477fa4beb0a0d64a26806_tumblr_inline_o7181mi1eT1toi3ym_540.gif)\n", 61 | "\n", 62 | "## Summarize\n", 63 | "- Finds min/max with relatively few evaluations\n", 64 | "- Cost of more computation to determine next point to try\n", 65 | "- Good for expensive functions such as ML\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "%matplotlib inline\n", 75 | "import numpy as np\n", 76 | "import pandas as pd\n", 77 | "import matplotlib.pyplot as plt\n", 78 | "from math import sqrt" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "## Load dataset" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "from sklearn.datasets import load_boston\n", 95 | "boston = load_boston()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "## Import scikit models" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "from sklearn.linear_model import LinearRegression\n", 112 | "from sklearn.linear_model import BayesianRidge\n", 113 | "from sklearn.linear_model import Ridge\n", 114 | "from sklearn.kernel_ridge import KernelRidge\n", 115 | "from sklearn.ensemble import RandomForestRegressor\n", 116 | "from sklearn.metrics import mean_absolute_error\n", 117 | "from sklearn.metrics import mean_squared_error\n", 118 | "from sklearn.model_selection import train_test_split" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "X_train, X_test, y_train, y_test = train_test_split(\n", 128 | " boston.data, boston.target, train_size=0.9, test_size=0.1)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Model to Optimize" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "regr = RandomForestRegressor(n_jobs=-1)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## Parameters to Optimize" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "from skopt.space import Integer\n", 161 | "from skopt.space import Categorical\n", 162 | "\n", 163 | "space = [Integer(1, 200, name='n_estimators'),\n", 164 | " Categorical(('auto', 'sqrt', 'log2'), name='max_features'),\n", 165 | " Integer(2, 100, name='min_samples_split'),\n", 166 | " Integer(1, 100, name='min_samples_leaf')]" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "## Objective" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "from skopt.utils import use_named_args\n", 183 | "\n", 184 | "\n", 185 | "@use_named_args(space)\n", 186 | "def objective(**params):\n", 187 | " regr.set_params(**params)\n", 188 | " regr.fit(X_train, y_train)\n", 189 | "\n", 190 | " return mean_absolute_error(y_test, regr.predict(X_test))" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## Optimization" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "from skopt import gp_minimize\n", 207 | "\n", 208 | "res_gp = gp_minimize(objective, space, n_calls=20, random_state=0)\n", 209 | "\n", 210 | "res_gp.fun" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "res_gp.x" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "n_estimators = res_gp.x[0]\n", 229 | "max_features = res_gp.x[1]\n", 230 | "min_samples_split = res_gp.x[2]\n", 231 | "min_samples_leaf = res_gp.x[3]\n", 232 | "\n", 233 | "regr = RandomForestRegressor(n_jobs=-1, n_estimators=n_estimators, max_features=max_features,\n", 234 | " min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)\n", 235 | "regr.fit(X_train, y_train)\n", 236 | "\n", 237 | "predicted = regr.predict(X_test)\n", 238 | "\n", 239 | "mae = mean_absolute_error(y_test, predicted)\n", 240 | "mse = mean_squared_error(y_test, predicted)\n", 241 | "rmse = sqrt(mse)\n", 242 | "print('MAE:', mae, '\\tMSE:', mse, '\\tRMSE:', rmse)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "## Plot Results" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "training_size = ('%0.1f' % (100 - (len(y_test)/len(boston.target) * 100)))\n", 259 | "class_type = str(regr).split('(')[0]\n", 260 | "label1 = ('MAE = {}'.format('%8.3f' % mae))\n", 261 | "label2 = (class_type + '\\nTraining size = ' + training_size + '%')\n", 262 | "\n", 263 | "plt.figure(dpi=250)\n", 264 | "plt.plot([min(boston.target), max(boston.target)], [\n", 265 | " min(boston.target), max(boston.target)], ls=\"--\", c=\"g\")\n", 266 | "plt.plot(y_test, predicted, 'o', markersize=1.5)\n", 267 | "plt.xlabel(\"Actual Price\")\n", 268 | "plt.ylabel(\"Predicted Price\")\n", 269 | "legend1 = plt.legend([label1], loc='lower right',\n", 270 | " markerscale=0, fontsize=6, handlelength=0)\n", 271 | "plt.legend([label2], loc='upper left',\n", 272 | " markerscale=0, fontsize=6, handlelength=0)\n", 273 | "plt.gca().add_artist(legend1)\n", 274 | "plt.show()" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## What to do now? \n", 282 | "- Try a different classifier/regressor\n", 283 | "- Try with a different dataset provided by scikit-learn\n", 284 | "- Try out a different library" 285 | ] 286 | } 287 | ], 288 | "metadata": { 289 | "kernelspec": { 290 | "display_name": "Python 3", 291 | "language": "python", 292 | "name": "python3" 293 | }, 294 | "language_info": { 295 | "codemirror_mode": { 296 | "name": "ipython", 297 | "version": 3 298 | }, 299 | "file_extension": ".py", 300 | "mimetype": "text/x-python", 301 | "name": "python", 302 | "nbconvert_exporter": "python", 303 | "pygments_lexer": "ipython3", 304 | "version": "3.6.6" 305 | }, 306 | "varInspector": { 307 | "cols": { 308 | "lenName": 16, 309 | "lenType": 16, 310 | "lenVar": 40 311 | }, 312 | "kernels_config": { 313 | "python": { 314 | "delete_cmd_postfix": "", 315 | "delete_cmd_prefix": "del ", 316 | "library": "var_list.py", 317 | "varRefreshCmd": "print(var_dic_list())" 318 | }, 319 | "r": { 320 | "delete_cmd_postfix": ") ", 321 | "delete_cmd_prefix": "rm(", 322 | "library": "var_list.r", 323 | "varRefreshCmd": "cat(var_dic_list()) " 324 | } 325 | }, 326 | "types_to_exclude": [ 327 | "module", 328 | "function", 329 | "builtin_function_or_method", 330 | "instance", 331 | "_Feature" 332 | ], 333 | "window_display": false 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 2 338 | } 339 | -------------------------------------------------------------------------------- /04_Machine_Learning/basics/coulomb_matrix.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Coulomb Matrix Representation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from math import sqrt" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Useful Resources\n", 25 | " - [Fast and Accurate Modeling of Molecular Atomization Energies with Machine Learning](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.108.058301)\n", 26 | " - [Prediction Errors of Molecular Machine learning Models lower than Hybrid DFT Error](https://pubs.acs.org/doi/abs/10.1021/acs.jctc.7b00577)\n", 27 | " - [Understanding molecular representations in machine learning: The role of uniqueness and target similarity](https://aip.scitation.org/doi/10.1063/1.4964627)\n", 28 | "\n", 29 | "## Introduction\n", 30 | "For machine learning there needs to be some way to represent the data to the model in a way in which the model can infer knowledge about the data and use it for future predictions. In chemistry, the data we are trying to represent are molecules with the information we are trying to teach the model are property values for those molecules. The goal is to represent the molecule in a way that provides a detailed enough description about the underlying physics of the molecule in order to accurately predict the properties of the molecule. This has led to a lot of work to determine how to best represent the molecule for the model to learn from. One of the most simplistic ways to describe the molecule is what we are going to work on today, the Coulomb matrix.\n", 31 | "\n", 32 | "## General Theory\n", 33 | "The Coulomb matrix is one of the more simplistic representations used to describe the molecule. The Coulomb matrix is a square matrix with diagonal elements being the electronic potential energy of the atom and off diagonal elements being the Coulomb nuclear repulsion between atom I and J. \n", 34 | "\n", 35 | "$$M_{IJ} =\\begin{cases}0.5Z_{I}^{2.4} &\\text{for } I = J, \\\\ \\frac{Z_I Z_J}{\\left | R_I - R_J \\right |} &\\text{for } I \\neq J.\\end{cases} $$\n", 36 | " \n", 37 | "## Setup\n", 38 | "1. Parse file for atoms and coordinates\n", 39 | "2. Build Coulomb Matrix" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "file = open('methane.xyz', 'r')\n", 49 | "\n", 50 | "doc = []\n", 51 | "for line in file:\n", 52 | " doc.append(line)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# read number of atoms\n", 62 | "natoms = int(doc[0].split()[0])\n", 63 | "\n", 64 | "# parse coordinates\n", 65 | "coords = []\n", 66 | "for i in range(natoms):\n", 67 | " a_coords = doc[i + 2].split()[0:4]\n", 68 | " coords.append(a_coords)\n", 69 | "\n", 70 | "coords" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "## What do we need for CM?\n", 78 | "1. Nuclear charges\n", 79 | "2. Calculate when $I = J$\n", 80 | "3. Calculate when $I \\neq J$\n", 81 | "4. Output lower triangle of matrix" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# nuclear charges\n", 91 | "\n", 92 | "# build CM matrix\n", 93 | "\n", 94 | "# return the lower trinagle of the CM as a vector\n", 95 | "mat = mat[np.tril_indices(natoms)]" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "mat" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## If this interests you, feel free to help out with [chemreps](https://github.com/dlf57/chemreps)!" 112 | ] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.6.6" 132 | }, 133 | "varInspector": { 134 | "cols": { 135 | "lenName": 16, 136 | "lenType": 16, 137 | "lenVar": 40 138 | }, 139 | "kernels_config": { 140 | "python": { 141 | "delete_cmd_postfix": "", 142 | "delete_cmd_prefix": "del ", 143 | "library": "var_list.py", 144 | "varRefreshCmd": "print(var_dic_list())" 145 | }, 146 | "r": { 147 | "delete_cmd_postfix": ") ", 148 | "delete_cmd_prefix": "rm(", 149 | "library": "var_list.r", 150 | "varRefreshCmd": "cat(var_dic_list()) " 151 | } 152 | }, 153 | "types_to_exclude": [ 154 | "module", 155 | "function", 156 | "builtin_function_or_method", 157 | "instance", 158 | "_Feature" 159 | ], 160 | "window_display": false 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 2 165 | } 166 | -------------------------------------------------------------------------------- /04_Machine_Learning/basics/coulomb_matrix_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Coulomb Matrix Representation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from math import sqrt" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Useful Resources\n", 25 | " - [Fast and Accurate Modeling of Molecular Atomization Energies with Machine Learning](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.108.058301)\n", 26 | " - [Prediction Errors of Molecular Machine learning Models lower than Hybrid DFT Error](https://pubs.acs.org/doi/abs/10.1021/acs.jctc.7b00577)\n", 27 | " - [Understanding molecular representations in machine learning: The role of uniqueness and target similarity](https://aip.scitation.org/doi/10.1063/1.4964627)\n", 28 | "\n", 29 | "## Introduction\n", 30 | "For machine learning there needs to be some way to represent the data to the model in a way in which the model can infer knowledge about the data and use it for future predictions. In chemistry, the data we are trying to represent are molecules with the information we are trying to teach the model are property values for those molecules. The goal is to represent the molecule in a way that provides a detailed enough description about the underlying physics of the molecule in order to accurately predict the properties of the molecule. This has led to a lot of work to determine how to best represent the molecule for the model to learn from. One of the most simplistic ways to describe the molecule is what we are going to work on today, the Coulomb matrix.\n", 31 | "\n", 32 | "## General Theory\n", 33 | "The Coulomb matrix is one of the more simplistic representations used to describe the molecule. The Coulomb matrix is a square matrix with diagonal elements being the electronic potential energy of the atom and off diagonal elements being the Coulomb nuclear repulsion between atom I and J. \n", 34 | "\n", 35 | "$$M_{IJ} =\\begin{cases}0.5Z_{I}^{2.4} &\\text{for } I = J, \\\\ \\frac{Z_I Z_J}{\\left | R_I - R_J \\right |} &\\text{for } I \\neq J.\\end{cases} $$\n", 36 | " \n", 37 | "## Setup\n", 38 | "1. Parse file for atoms and coordinates\n", 39 | "2. Build Coulomb Matrix" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "file = open('methane.xyz', 'r')\n", 49 | "\n", 50 | "doc = []\n", 51 | "for line in file:\n", 52 | " doc.append(line)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "[['C', '1.041682', '-0.056200', '-0.071481'],\n", 64 | " ['H', '2.130894', '-0.056202', '-0.071496'],\n", 65 | " ['H', '0.678598', '0.174941', '-1.072044'],\n", 66 | " ['H', '0.678613', '0.694746', '0.628980'],\n", 67 | " ['H', '0.678614', '-1.038285', '0.228641']]" 68 | ] 69 | }, 70 | "execution_count": 3, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "# read number of atoms\n", 77 | "natoms = int(doc[0].split()[0])\n", 78 | "\n", 79 | "# parse coordinates\n", 80 | "coords = []\n", 81 | "for i in range(natoms):\n", 82 | " a_coords = doc[i + 2].split()[0:4]\n", 83 | " coords.append(a_coords)\n", 84 | "\n", 85 | "coords" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "## What do we need for CM?\n", 93 | "1. Nuclear charges\n", 94 | "2. Calculate when $I = J$\n", 95 | "3. Calculate when $I \\neq J$\n", 96 | "4. Output lower triangle of matrix" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 4, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "[[36.8581052 0. 0. 0. 0. ]\n", 109 | " [ 5.5085695 0.5 0. 0. 0. ]\n", 110 | " [ 5.50857022 0.56221605 0.5 0. 0. ]\n", 111 | " [ 5.50856526 0.56221405 0.56221669 0.5 0. ]\n", 112 | " [ 5.50857007 0.56221611 0.56221501 0.56221777 0.5 ]]\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "# nuclear charges\n", 118 | "nuc = {'C': 6, 'H': 1}\n", 119 | "\n", 120 | "# build CM matrix\n", 121 | "mat = np.zeros((5, 5))\n", 122 | "for i in range(5):\n", 123 | " for j in range(i, 5):\n", 124 | " zi = nuc[coords[i][0]] # nuc['C'] = 6\n", 125 | " zj = nuc[coords[j][0]]\n", 126 | " if i == j:\n", 127 | " mii = 0.5 * zi ** 2.4\n", 128 | " mat[i, i] = mii\n", 129 | "\n", 130 | " else:\n", 131 | " # mij = zizj/rij\n", 132 | " # rij = sqrt((xi - xj)^2 + (yi - yj)^2 + (zi - zj)^2)\n", 133 | " x = float(coords[i][1]) - float(coords[j][1])\n", 134 | " y = float(coords[i][2]) - float(coords[j][2])\n", 135 | " z = float(coords[i][3]) - float(coords[j][3])\n", 136 | " rij = sqrt((x ** 2) + (y ** 2) + (z ** 2))\n", 137 | " mij = (zi * zj) / rij\n", 138 | "\n", 139 | " mat[j, i] = mij\n", 140 | "\n", 141 | "print(mat)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 5, 147 | "metadata": { 148 | "scrolled": true 149 | }, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "array([36.8581052 , 5.5085695 , 0.5 , 5.50857022, 0.56221605,\n", 155 | " 0.5 , 5.50856526, 0.56221405, 0.56221669, 0.5 ,\n", 156 | " 5.50857007, 0.56221611, 0.56221501, 0.56221777, 0.5 ])" 157 | ] 158 | }, 159 | "execution_count": 5, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "# return the lower trinagle of the CM as a vector\n", 166 | "mat = mat[np.tril_indices(natoms)]\n", 167 | "mat" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## What if we wanted this as a function so we could do multiple molecules?" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 6, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "def cm(natoms, coords, size):\n", 184 | " '''\n", 185 | " Paramters\n", 186 | " ---------\n", 187 | " natoms: int\n", 188 | " number of atoms in the molecule\n", 189 | " coords: \n", 190 | " molecule coordinates\n", 191 | " size: int\n", 192 | " size of CM matrix\n", 193 | " Returns\n", 194 | " -------\n", 195 | " mat: triangle matrix\n", 196 | " triangle CM matrix\n", 197 | " '''\n", 198 | " # build CM matrix\n", 199 | " mat = np.zeros((size, size))\n", 200 | " for i in range(natoms):\n", 201 | " for j in range(i, natoms):\n", 202 | " zi = nuc[coords[i][0]] # nuc['C'] = 6\n", 203 | " zj = nuc[coords[j][0]]\n", 204 | " if i == j:\n", 205 | " mii = 0.5 * zi ** 2.4\n", 206 | " mat[i, i] = mii\n", 207 | "\n", 208 | " else:\n", 209 | " # mij = zizj/rij\n", 210 | " # rij = sqrt((xi - xj)^2 + (yi - yj)^2 + (zi - zj)^2)\n", 211 | " x = float(coords[i][1]) - float(coords[j][1])\n", 212 | " y = float(coords[i][2]) - float(coords[j][2])\n", 213 | " z = float(coords[i][3]) - float(coords[j][3])\n", 214 | " rij = sqrt((x ** 2) + (y ** 2) + (z ** 2))\n", 215 | " mij = (zi * zj) / rij\n", 216 | "\n", 217 | " mat[j, i] = mij\n", 218 | "\n", 219 | " # return the lower trinagle of the CM as a vector\n", 220 | " mat = mat[np.tril_indices(natoms)]\n", 221 | "\n", 222 | " return mat" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 7, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "array([36.8581052 , 5.5085695 , 0.5 , 5.50857022, 0.56221605,\n", 234 | " 0.5 , 5.50856526, 0.56221405, 0.56221669, 0.5 ,\n", 235 | " 5.50857007, 0.56221611, 0.56221501, 0.56221777, 0.5 ])" 236 | ] 237 | }, 238 | "execution_count": 7, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "cm(natoms, coords, size=29)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## If this interests you, feel free to help out with [chemreps](https://github.com/dlf57/chemreps)!" 252 | ] 253 | } 254 | ], 255 | "metadata": { 256 | "kernelspec": { 257 | "display_name": "Python 3", 258 | "language": "python", 259 | "name": "python3" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 3 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython3", 271 | "version": "3.6.6" 272 | }, 273 | "varInspector": { 274 | "cols": { 275 | "lenName": 16, 276 | "lenType": 16, 277 | "lenVar": 40 278 | }, 279 | "kernels_config": { 280 | "python": { 281 | "delete_cmd_postfix": "", 282 | "delete_cmd_prefix": "del ", 283 | "library": "var_list.py", 284 | "varRefreshCmd": "print(var_dic_list())" 285 | }, 286 | "r": { 287 | "delete_cmd_postfix": ") ", 288 | "delete_cmd_prefix": "rm(", 289 | "library": "var_list.r", 290 | "varRefreshCmd": "cat(var_dic_list()) " 291 | } 292 | }, 293 | "types_to_exclude": [ 294 | "module", 295 | "function", 296 | "builtin_function_or_method", 297 | "instance", 298 | "_Feature" 299 | ], 300 | "window_display": false 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 2 305 | } 306 | -------------------------------------------------------------------------------- /04_Machine_Learning/basics/methane.xyz: -------------------------------------------------------------------------------- 1 | 5 2 | 3 | C 1.041682 -0.056200 -0.071481 4 | H 2.130894 -0.056202 -0.071496 5 | H 0.678598 0.174941 -1.072044 6 | H 0.678613 0.694746 0.628980 7 | H 0.678614 -1.038285 0.228641 8 | 9 | -------------------------------------------------------------------------------- /04_Machine_Learning/basics/ml_boston.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Machine Learning Example: Boston Housing Dataset" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "%matplotlib inline\n", 17 | "import numpy as np\n", 18 | "import pandas as pd\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from math import sqrt" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Useful Resources\n", 28 | " - [Scikit-Learn](http://scikit-learn.org/)\n", 29 | " - [Learning Data Science Boston Housing Example](https://medium.com/@haydar_ai/learning-data-science-day-9-linear-regression-on-boston-housing-dataset-cd62a80775ef) \n", 30 | " - [Python Data Analysis Library: pandas](https://pandas.pydata.org/)\n", 31 | " - [10 Minutes to pandas](https://pandas.pydata.org/pandas-docs/stable/10min.html)\n", 32 | " - [Our Slides on ML](https://github.com/shivupa/QMMM_study_group/blob/master/04_Machine_Learning/ml_slides.pdf) \n", 33 | " \n", 34 | "## Setup\n", 35 | " 1. Organize data\n", 36 | " 2. Setup Classifier/Regressor\n", 37 | " 3. Train, Test, Visualize!\n", 38 | " \n", 39 | "## Organize Data\n", 40 | "Data organization is one of the most important steps in machine learning. Unorganized data can lead to wasted compute time on improper data as well as make it more difficult for others to understand and replicate your method. For data organization we are going to use pandas (unfortunately not this one 🐼)." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from sklearn.datasets import load_boston\n", 50 | "boston = load_boston()\n", 51 | "boston.keys()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "print(boston.DESCR)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "boston.data[0]" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "df_boston = pd.DataFrame(boston.data, columns=boston.feature_names)\n", 79 | "df_boston['PRICE'] = boston.target\n", 80 | "df_boston.head()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "scrolled": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "df_boston.describe()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Machine Learning\n", 99 | "1. Split data into training and testing sets\n", 100 | "2. Train classifier/regressor\n", 101 | "3. Test trained classifier/regressor on test set\n", 102 | "4. Visualize\n", 103 | "\n", 104 | "## Import Scikit-Learn" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "from sklearn.linear_model import LinearRegression\n", 114 | "from sklearn.linear_model import BayesianRidge\n", 115 | "from sklearn.linear_model import Ridge\n", 116 | "from sklearn.kernel_ridge import KernelRidge\n", 117 | "from sklearn.ensemble import RandomForestRegressor\n", 118 | "from sklearn.metrics import mean_absolute_error\n", 119 | "from sklearn.metrics import mean_squared_error\n", 120 | "from sklearn.model_selection import train_test_split" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Split data into training and testing sets" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "X_train, X_test, y_train, y_test = train_test_split(\n", 137 | " boston.data, boston.target, train_size=0.9, test_size=0.1)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## Train Regressor" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "####################### Choose ML Model #######################\n", 154 | "regr = \n", 155 | "\n", 156 | "####################### Train ML Model ########################\n", 157 | "regr.fit()\n", 158 | "\n", 159 | "###################### Predict Test Set #######################\n", 160 | "predicted = regr.predict()\n", 161 | "\n", 162 | "##################### Evaluate Prediciton #####################\n", 163 | "mae = mean_absolute_error()\n", 164 | "mse = mean_squared_error()\n", 165 | "rmse = sqrt(mse)\n", 166 | "print('MAE:', mae, '\\tMSE:', mse, '\\tRMSE:', rmse)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "## Plot Results" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "training_size = ('%0.1f' % (100 - (len(y_test)/len(boston.target) * 100)))\n", 183 | "class_type = str(regr).split('(')[0]\n", 184 | "label1 = ('MAE = {}'.format('%8.3f' % mae))\n", 185 | "label2 = (class_type + '\\nTraining size = ' + training_size + '%')\n", 186 | "\n", 187 | "plt.figure(dpi=250)\n", 188 | "plt.plot([df_boston['PRICE'].min(), df_boston['PRICE'].max()], [df_boston['PRICE'].min(), df_boston['PRICE'].max()], ls=\"--\", c=\"g\")\n", 189 | "plt.plot(y_test, predicted, 'o', markersize=1.5)\n", 190 | "plt.xlabel(\"Actual Price\")\n", 191 | "plt.ylabel(\"Predicted Price\")\n", 192 | "legend1 = plt.legend([label1], loc='lower right', markerscale=0, fontsize=6, handlelength=0)\n", 193 | "plt.legend([label2], loc='upper left', markerscale=0, fontsize=6, handlelength=0)\n", 194 | "plt.gca().add_artist(legend1)\n", 195 | "plt.show()" 196 | ] 197 | } 198 | ], 199 | "metadata": { 200 | "kernelspec": { 201 | "display_name": "Python 3", 202 | "language": "python", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "codemirror_mode": { 207 | "name": "ipython", 208 | "version": 3 209 | }, 210 | "file_extension": ".py", 211 | "mimetype": "text/x-python", 212 | "name": "python", 213 | "nbconvert_exporter": "python", 214 | "pygments_lexer": "ipython3", 215 | "version": "3.6.6" 216 | }, 217 | "varInspector": { 218 | "cols": { 219 | "lenName": 16, 220 | "lenType": 16, 221 | "lenVar": 40 222 | }, 223 | "kernels_config": { 224 | "python": { 225 | "delete_cmd_postfix": "", 226 | "delete_cmd_prefix": "del ", 227 | "library": "var_list.py", 228 | "varRefreshCmd": "print(var_dic_list())" 229 | }, 230 | "r": { 231 | "delete_cmd_postfix": ") ", 232 | "delete_cmd_prefix": "rm(", 233 | "library": "var_list.r", 234 | "varRefreshCmd": "cat(var_dic_list()) " 235 | } 236 | }, 237 | "types_to_exclude": [ 238 | "module", 239 | "function", 240 | "builtin_function_or_method", 241 | "instance", 242 | "_Feature" 243 | ], 244 | "window_display": false 245 | } 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 2 249 | } 250 | -------------------------------------------------------------------------------- /04_Machine_Learning/ml_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/04_Machine_Learning/ml_slides.pdf -------------------------------------------------------------------------------- /05_MolecularDynamics/README.txt: -------------------------------------------------------------------------------- 1 | Introduction to Molecular dynamics 2 | 3 | Please see the trajectories folder for precomputed trajectories that can be visualized with VMD. 4 | -------------------------------------------------------------------------------- /05_MolecularDynamics/enhanced_sampling/EnhancedSampling.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/enhanced_sampling/EnhancedSampling.pdf -------------------------------------------------------------------------------- /05_MolecularDynamics/enhanced_sampling/metadynamics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/enhanced_sampling/metadynamics.pdf -------------------------------------------------------------------------------- /05_MolecularDynamics/template/images/divx2pass.log.mbtree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/template/images/divx2pass.log.mbtree -------------------------------------------------------------------------------- /05_MolecularDynamics/template/images/encode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # encode.sh 4 | # 5 | # Script for encoding images to a movie. The images should be numbered, with 6 | # zero-padding. The movie is encoded with the h.264 codec (the same codec used 7 | # in high-definition movies). 8 | # 9 | # This script must be run from the directory containing the saved frames. 10 | 11 | # Set the mplayer environment variables (change for your configuration) 12 | LD_LIBRARY_PATH=/local/usr/lib; export LD_LIBRARY_PATH 13 | PATH=${PATH}:/local/usr/bin; export PATH 14 | 15 | # convert targa to png 16 | for i in *.tga; do 17 | PREFIX=$(basename ${i} .tga) 18 | convert ${i} ${PREFIX}.sgi 19 | done 20 | 21 | # This script is meant to be used with 1280x720 images 22 | WIDTH=$(identify -format "%w" ${PREFIX}.sgi) 23 | HEIGHT=$(identify -format "%h" ${PREFIX}.sgi) 24 | 25 | # high motion = 5928 kbps 26 | # moderate motion = 4512 kbps 27 | mencoder \ 28 | -ovc x264 \ 29 | -x264encopts pass=1:turbo:bitrate=5928:bframes=1:subq=6:frameref=6:me=hex:partitions=all:threads=auto:keyint=300 \ 30 | -mf type=sgi:w=${WIDTH}:h=${HEIGHT}:fps=60 \ 31 | -nosound \ 32 | -o /dev/null mf://\*.sgi 33 | 34 | mencoder \ 35 | -ovc x264 \ 36 | -x264encopts pass=2:turbo:bitrate=5928:bframes=1:subq=6:frameref=6:me=hex:partitions=all:threads=auto:keyint=300 \ 37 | -mf type=sgi:w=${WIDTH}:h=${HEIGHT}:fps=60 \ 38 | -nosound \ 39 | -o gfp.mov mf://\*.sgi 40 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/images/gfp.mov: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/template/images/gfp.mov -------------------------------------------------------------------------------- /05_MolecularDynamics/template/images/render.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in *.tachyon; do 4 | PREFIX=$(basename ${i} .tachyon) 5 | cat ${i} | sed -e 's/Resolution [[:digit:]]\+\s[[:digit:]]\+/Resolution 1280 720/' > temp.tachyon 6 | tachyon temp.tachyon -o ${PREFIX}.tga 7 | done 8 | 9 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/parameter.file.01: -------------------------------------------------------------------------------- 1 | C 4.00 0.10 0 2 | CA 4.00 0.10 0 3 | CB 4.00 0.10 0 4 | CG 4.00 0.10 0 5 | CD 4.00 0.10 0 6 | CE 4.00 0.10 0 7 | CZ 4.00 0.10 0 8 | C5 4.00 0.10 0 9 | C6 4.00 0.10 0 10 | C3 4.00 0.10 0 11 | C1 4.00 0.10 0 12 | DA 4.00 0.10 0 13 | DB 4.00 0.10 0 14 | DG 4.00 0.10 0 15 | SB 4.00 0.10 0 16 | SG 4.00 0.10 0 17 | SD 4.00 0.10 0 18 | N 4.00 0.10 0 19 | NG 4.00 0.10 0 20 | NZ 4.00 0.10 0 21 | N3 4.00 0.10 0 22 | N7 4.00 0.10 0 23 | NE 4.00 0.10 0 24 | ND 4.00 0.10 0 25 | NH 4.00 0.10 0 26 | O 4.00 0.10 0 27 | OG 4.00 0.10 0 28 | OD 4.00 0.10 0 29 | OE 4.00 0.10 0 30 | OH 4.00 0.10 0 31 | OX 4.00 0.10 0 32 | PA 4.00 0.10 0 33 | EB 4.00 0.10 0 34 | EG 4.00 0.10 0 35 | ED 4.00 0.10 0 36 | EE 4.00 0.10 0 37 | XX 5.00 0.10 0 38 | Y 0.0 0.00 0 39 | Z 0.0 0.00 0 40 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Run the UIOWA_BD simulation 4 | # 5 | 6 | # Path to the UIOWA BD binary. 7 | # Currently, the binary specified represents a static compilation using Intel's 8 | # ifort compiler, along with the -O2 option (second level of optimization). I 9 | # find this gives good speed. 10 | UIOWA_BD=../../uiowa_bd/binaries/uiowa_bd_45-07-12-o2 11 | 12 | # Make sure the necessary directories are set up. 13 | if [ ! -d MOVIE ]; then 14 | mkdir MOVIE 15 | fi 16 | 17 | if [ ! -d OUTPUT ]; then 18 | mkdir OUTPUT 19 | fi 20 | 21 | if [ ! -d RESTARTS ]; then 22 | mkdir RESTARTS 23 | fi 24 | 25 | # Make sure there is a restart.file 26 | if [ ! -e restart.file ]; then 27 | echo "Using restart.file.initial as the initial configuration of the system." 28 | cp restart.file.initial restart.file 29 | fi 30 | 31 | ### Run the simualtion ### 32 | # For the UIOWA BD main binary, you need to specify the input parameters via 33 | # STDIN. I use redirects to do this. 34 | # sim.out contains log information. 35 | # Information on the arguments: 36 | # - The first argument is the random seed. This should be less than (2^31-1) 37 | # - The second and third integers are "multiplicative factors" for determining 38 | # memory usage. Just leave them as 1 and 1. 39 | ${UIOWA_BD} 100 1 1 < sim.inp > sim.out 40 | 41 | # Note that coordinates are found in the testout.xtc file. 42 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/setup/NOTES.txt: -------------------------------------------------------------------------------- 1 | This file contains information on setting up the simulation directory. 2 | 3 | -------------------------------------------------------------------------------- 4 | All that needs to be done is to run the script "run_prep.sh" from this 5 | directory. Before running this script, the following was already done: 6 | 7 | Prepare an input file for the preparation program (uiowa_goprep.16-06-12.exe) 8 | 9 | The input file is: 10 | 2b3p.inp 11 | Most of the parameters are straightforward, but here are some important points: 12 | - rhydro1: For the C-alpha models, this should be set to 5.3 angstroms. For 13 | the side-chain model, this should be set to 3.5 angstroms. See Adrian 14 | Elcock's paper: dio:10.1021/ct800499p. This reproduces translational and 15 | rotational diffusion coefficients for a variety of proteins, as compared 16 | to the program Hydropro. 17 | 18 | num_first_type: specifies the molecule number. 19 | 20 | pH: I believe this is only used in determining charges. 21 | 22 | Set the go distance cutoff to 5.5 angstroms. This means that any pair of 23 | residues with atoms that are closer than 5.5 angstroms counts as a go 24 | pair (native contact). 25 | 26 | epsilon, edihed1_ca, and edihed2_ca: These are the well depth for the 27 | favorable Go-potential that applies to native contacts (the 12-10 potential), 28 | and the "V1" and "V3" pseudo-dihedral barrier heights. These terms can vary, 29 | but should stay in the ratio of 12:10:5 for c-alpha models, and 25:41:21 for 30 | side-chain models. These sets of parameters were used in Dr. Elcock's paper, 31 | "Striking effects of diffusion...". 32 | 33 | coarse/fine: Here we use the coarse model, which includes one "bead" at the 34 | c-alpha position of each residue. 35 | 36 | edihed1_sc and edihed2_sc are for the side chain model, which we are not using. 37 | 38 | kbond and kangle: force constants for bond and angle terms. These are usually 39 | set to 20 kcal/angstrom/mol and 10 kcal/radian/mol, respectively. 40 | 41 | -------------------------------------------------------------------------------- 42 | Here is some information on what the script "run_prep.sh" does: 43 | 44 | 1. Download the PDB file from www.rcsb.org. We use PDB code 2b3p. 45 | 46 | 2. Run the prep program, and rename the files. 47 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/setup/protein.inp: -------------------------------------------------------------------------------- 1 | --- pdbname 2 | protein.pdb 3 | --- protein/nucleic flex/rigd coarse/fine rhydro1 rhydro2(used for C-alpha with hybrid model) 4 | protein flex coarse 5.300 5.300 5 | --- num_first_type skip_first skip_last 6 | 1 no no 7 | --- charge/nocharge pH disulfides 8 | nocharge 7.1 no 9 | --- i_use_go/no i_skip_go_on_loops epsilon go-dist-cutoff i_use_12_10 10 | yes no 0.600 5.500 1 11 | --- edihed1_ca - edihed3_ca - edihed1_sc - edihed3_sc - kbond - kangle - i_do_impropers 12 | 0.500 0.250 0.410 0.210 20.0 10.0 no 13 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/setup/run_prep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_prep.sh 4 | 5 | BINARIES=../../uiowa_bd/binaries 6 | SCRIPTS=../../scripts 7 | 8 | GOPREP=${BINARIES}/uiowa_goprep.16-06-12.exe 9 | 10 | echo "Using protein.pdb from this directory." 11 | 12 | # Generate the parameters for Go contacts (all of which are intramolecular) 13 | INPUT=protein.inp 14 | PREFIX=protein 15 | ${GOPREP} ${INPUT} 16 | 17 | mv annotated.pdb ${PREFIX}.annotated.pdb 18 | mv internal.parameters ${PREFIX}.internal.parameters 19 | mv centered.charge.parameters ${PREFIX}.charge.parameters 20 | mv uncentered.charge.parameters ${PREFIX}.uncentered.charge.parameters 21 | mv go.parameters ${PREFIX}.go.parameters 22 | mv folded.restart ${PREFIX}.restart.file 23 | 24 | 25 | 26 | # Move all the necessary files to the main simulation directory (../) 27 | cp ${PREFIX}.charge.parameters ../ 28 | cp ${PREFIX}.internal.parameters ../ 29 | 30 | cp ${PREFIX}.go.parameters ../ 31 | cp ${PREFIX}.restart.file ../restart.file.initial 32 | cp ${PREFIX}.restart.file ../restart.file 33 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/sim.inp: -------------------------------------------------------------------------------- 1 | 0 ------------------------------------------------------------------------ 2 | dseed teprint ttprint tmprint num_lst_stp num_fmd_stp num_hyd_stp num_bal_stp num_threads bond_dev_quit i_continue_after_problem? 3 | 111.0 100.000 100.0 1000.0 400 80 400 -1 1 2.5 no 4 | ----------------------- *** --------------------------------- *** --------------- 5 | f_typs f_mols i_debug q_desired mol_to_watch 6 | 1 1 no 1.050 1 1 7 | --------------------------------------------------------------------------------- 8 | rot 1 2 3 4 5 6 1 1 max zmin zmax i_pbc i_look_for_crashes periodic_bonds_okay? 9 | -750.000 750.000 -750.000 750.000 -750.000 750.000 1 no no 10 | --------------------------------------------------------------------------------- 11 | replica_exchange i_append_movie i_limit_verbosity i_use_12_10 uniform_moves steepest_descent 12 | no 0 yes 1 no no 13 | --------------------------------------------------------------------------------- 14 | r_temperature r_ionic_strength r_ion r_dielectric r_pH r_viscosity r_fconst 15 | 293.15 0.0 0.0 80.2 7.1 1.002 5.0 16 | --------------------------------------------------------------------------------- 17 | parameter file name no_elec wrap_molecules i_use_hydro full/diag integer/real scale_nb BD/LD rtemp kcut 18 | parameter.file.01 yes 2 yes full real 4.0 brownian 0.001 1 19 | - *** -------------------------------------------------------------------- 20 | linker file, 21 | none 22 | --------------------------------------------------------------------------------- 23 | growrigd_file, i_grow_rigds 24 | grow_rigd_file no 25 | --------------------------------------------------------------------------------- 26 | go potentials file name, i_use_go_pairs i_use_exclusive_go num_exc_stp go_nonexclusive_file i_compare_go_with_others q_mode_threshold 27 | protein.go.parameters yes no 1000000000 none no 0.01 28 | --------------------------------------------------------------------------------- 29 | reaction criteria file, i_do_reactions nrequired 30 | reaction.criteria no 2 31 | --------------------------------------------------------------------------------- 32 | no-force file, i_omit_some_forces 33 | noforce.file no 34 | --------------------------------------------------------------------------------- 35 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 36 | protein.charge.parameters ! charges/hydrodynamic radii in this file 37 | protein.internal.parameters ! bonds/angles/dihedrals 38 | 1 ! # of copies of this molecule type 39 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 40 | time_step_s time_step_l totsimtime vdw_s vdw_m go_s go_m ele_s ele_m junk junk ff_cell 41 | 0.050 0.050 1000000.000 15.00 30.00 15.00 30.00 15.00 30.00 30.0 10.0 60.000 42 | -- *** -------------------------------------------------------------------------- 43 | harmonicfile i_do_harmonic num_harm 44 | none no 3 45 | --------------------------------------------------------------------------------- 46 | pos_restraint_file i_do_pos_restraints mission_creep? 47 | System.restraints.file no no 48 | --------------------------------------------------------------------------------- 49 | wobble_file i_wobble_rigds num_wobble_rigd_typs 50 | none no 1 51 | --------------------------------------------------------------------------------- 52 | r_size l_size r_size_fac n_size f_size 53 | -1.0 -1.0 1.0 -1 0.1 54 | -------------------------------------------------------------------------------- 55 | ewald_elec? kmax kappa ewald_elec_grid? ewald_elec_grid_file 56 | no 7 0.1795 no ewald_elec_grid.file 57 | --------------------------------------------------------------------------------- 58 | ewald_hydro? kmax kappa ewald_hydro_grid? ewald_hydro_grid_file 59 | no 17 0.050 no ewald_hydro_grid.file 60 | --------------------------------------------------------------------------------- 61 | fixman? fixman_tol fixman_order fixman_override? lmin lmax 62 | no 0.100 100 no 0.0010 10.0 63 | -------------------------------------------------------------------------------- 64 | treecode? theta order shrink maxatm 65 | no 0.025 4 1 10 66 | -------------------------------------------------------------------------------- 67 | schedule files: 68 | diff_schedule_file 69 | chol_schedule_file 70 | move_schedule_file 71 | -------------------------------------------------------------------------------- 72 | i_do_growth? grow_schedule_file 73 | no grow_schedule_file 74 | -------------------------------------------------------------------------------- 75 | i_do_slide? slide_schedule_file 76 | no slide_schedule_file 77 | -------------------------------------------------------------------------------- 78 | walls? num_walls wall_file 79 | no 0 wall_file 80 | -------------------------------------------------------------------------------- 81 | go_spline? force_function_file go_epsilon 82 | no energy_function_single_minimum.txt 1.00 83 | -------------------------------------------------------------------------------- 84 | afm? x,y,z beg; x,y,z end; tip radius; force; #steps 85 | no -130.0 0.0 0.0 -90.0 0.0 0.0 40.0 10.0 80000 86 | -------------------------------------------------------------------------------- 87 | dpd_sigma dpd_cut dpd_sol_mol_id 88 | 0.0 0.0 1 89 | -------------------------------------------------------------------------------- 90 | umbrella? umb_mol1 umb_mol2 umb_dst umb_frc num_umb_stp 91 | no 1 1 30.0 100.0 10 92 | -------------------------------------------------------------------------------- 93 | B22? B22_sample1 B22_sample2, B22_sample3, B22_rng B22_stp B22_restart_1 B22_restart_2 B22_density_pdb B22_grid_cut 94 | no 1 5000 20 150.0 2.0 Flat.restart four_structures.restart junk.pdb 5.0 95 | -------------------------------------------------------------------------------- 96 | i_do_protease? protease_schedule_file 97 | no protease_schedule_file 200.0 98 | -------------------------------------------------------------------------------- 99 | i_read_bond_functions i_read_nonbond_functions i_write_bond_histogram i_write_nonbond_histogram i_write_user_histogram arbitrary_intra i_read_ref_hist nref_oversample 100 | no no no no no no no 200 101 | junk 0.000 0.000 102 | angl_NMRFF_ener_uiowa_ff001.txt.final 0.000 0.000 103 | dihe_NMRFF_ener_uiowa_ff001.txt.final 0.000 0.000 104 | nbnd_ener_uiowa_ff001.txt.final 0.000 0.000 18.0 20.0 1.0 ! RDFs set to 1 between dist_ref_lo dist_ref_hi / nbnd_func_scale 105 | -------------------------------------------------------------------------------- 106 | i_have_rigid_domains? rigid_domain_file domn_schedule_file 107 | no junk junk 108 | -------------------------------------------------------------------------------- 109 | i_do_MC_moves? monte_carlo_file mont_schedule_file MC_clstr_E MC_factr_E MC_timestep 110 | no monte_carlo_file mont_schedule_file -2.0 1.0 12500.0 111 | -------------------------------------------------------------------------------- 112 | i_do_NAM? NAM_runtype NAM_bsurf NAM_qsurf NAM_num_runs NAM_mol_file 113 | no 1 300.0 405.0 100 NAM_mol_file 114 | -------------------------------------------------------------------------------- 115 | i_user_energy num_user_energy user_energy_matchup_file 116 | no 1 matchup.txt 117 | test_free_energy_final.txt.smooth 0.80 100.0 ! user_energy_file(1),user_energy_scal(1),user_energy_ceil(1) 118 | -------------------------------------------------------------------------------- /05_MolecularDynamics/template/sim.inp.notes: -------------------------------------------------------------------------------- 1 | This file describes the parameters that one may specify in the simulation 2 | input ("sim.inp") file. 3 | -------------------------------------------------------------------------------- 4 | 5 | dseed: At one point, this appears to have been used as the random seed for a 6 | simulation. As of versions 32-07-12 and 45-07-12, this no longer does 7 | anything. 8 | 9 | teprint: Time between writing energy information, in ps. 10 | 11 | ttprint: time between writing center-of-mass information, in ps. 12 | 13 | tmprint: time between writing out movie files (PDB format), in ps. 14 | 15 | num_lst_step: number of steps between updating the nonbonded list. 16 | 17 | num_fmd_step: number of steps between updating medium range forces (see vdw_med). 18 | 19 | num_hyd_step: number of steps between updating hydrodynamic forces 20 | 21 | num_bal_step: According to comments in uiowa_bd_openmp.32-07-12.f and 22 | uiowa_bd_openmp.45-07-12.f, this is the "number of steps required before 23 | update of hydrodynamics", just like num_hyd_step. Inspection of the code 24 | suggests that it is actually the number of steps between balancing the 25 | computational load between multiple OMP processes. 26 | 27 | num_threads: The number of OMP threads to use. This appears to override OMP 28 | environment variables. 29 | 30 | bond_dev_quit: If a bound length deviates from its equilibrium length by more 31 | than this amount (presumably, it is in angstroms), then the program will quit. 32 | Set to a negative value to turn this off. 33 | 34 | i_continue_after_problem?: if 'yes', the program will continue to run even after 35 | running into problems. 36 | 37 | f_typs: number of molecule types 38 | 39 | f_mols: number of molecules 40 | 41 | i_debug: if 'yes', write out extra information for debugging 42 | 43 | q_desired: terminate the simulation if the fraction of native contacts (Q) is 44 | at least this great. 45 | 46 | mol_to_watch: You should specify two integers (such as "1 1") here, but it is 47 | not clear to me what this actually does. 48 | 49 | rot: if "rot" begins this line, then the next 8 numbers are read and used as: 50 | ivec1a 51 | ivec1b 52 | ivec2a 53 | ivec2b 54 | ivec3a 55 | ivec3b 56 | numcen1 57 | numcen2 58 | It is not clear what these numbers do. Just leave these numbers as you found them. 59 | 60 | xmin, xmax, ymin, ymax, zmin, zmax: 61 | While the xmin, xmax, ymin, ymax, and zmin labels do not appear, the 6 numbers 62 | on the line after "rot" are indeed these values. This specifies the 63 | dimensions of the system. This appears to apply for periodic boundary conditions. 64 | 65 | i_pbc: Specifies whether periodic boundary conditions should be applied. If 66 | periodic boundary conditions should be applied, this should be set to "1". 67 | 68 | i_look_for_crashes: It is not clear what this does. Just keep it set to "no". 69 | 70 | periodic_bounds_okay: Again, it was not clear from the source code what this 71 | does. Keep it set to "no" to be safe. 72 | 73 | replica_exchange: "yes" if you want to do replica exchange, and "no" otherwise. 74 | It appears that replica exchange needs to be performed with a "master bash 75 | script" that is mentioned in comments in the uiowa_bd source code, but I do 76 | not think we have such a script. 77 | 78 | i_append_movie: Comment from source code: 79 | """ 80 | i_append_movie is used to determine whether we want to add to movies 81 | rather than start again at zero - it looks for the movie with the 82 | higher number and either starts there or adds one depending on the 83 | value of i_append_movie 84 | i_append_movie = 1 --> overwrite the last movie 85 | = 2 --> add to the list 86 | """ 87 | 88 | i_limit_verbosity: Comment from source code: 89 | """ 90 | i_limit_verbosity limits how much is written out when we use the 91 | arbitrary functions of our force field if 'yes' then don't write out 92 | much 93 | """ 94 | 95 | i_use_12_10: 96 | 1: 12-10 potential 97 | 2: 12-06 potential 98 | 3: 08-04 potential 99 | Note that this corresponds to the variable "i_use_v_typ" in the source code. 100 | 101 | uniform_moves: Comment from the uiowa_bd source code: 102 | 02-08-07 implement a uniform random step move 103 | determine max distance that any one protein can move in one timestep 104 | note that this only works for r ms at the moment - and doesn't 105 | deal with their rotations either yet 106 | 107 | steepest_descent: Doesn't do anything. Ostensibly setting this to "yes" would 108 | make the program do steepest descent minimization, but there isn't actually 109 | code to do so. 110 | 111 | r_temperature: The temperature, in Kelvin. 112 | 113 | r_ionic_strength: The ionic strength. Presumably units of "molar". 114 | 115 | r_ion: Set this to 0.0, or the program will not run. 116 | 117 | r_dielectric: The dielectric constant (relative permittivity). 118 | 119 | r_pH: This is only used if "i_do_protease" is set to true, in which case it is 120 | used in calculating the charge of the carboxy terminus of the new peptide. 121 | 122 | r_viscosity: The viscosity in units of mPa*s, also known as cP 123 | 124 | r_fconst: A force constant for some sort of short range harmonic potential. It 125 | is not clear to me what this applies to. 126 | 127 | parameter file name: specify the name of the parameter file with information on 128 | the "epsilon" for repulsive terms. 129 | 130 | no_elec: "no" if you want electrostatics, "yes" if you do not want electrostatics. 131 | 132 | wrap_molecules: Specify how molecules are wrapped in movie files. 133 | 0: do not wrap in movie files 134 | 1: Wrap atoms in movie files 135 | 2: wrap molecules in movie files 136 | 137 | i_use_hydro: "yes" to use hydrodynamic interactions, otherwise "no". 138 | 139 | full/diag: Specify the type of hydrodynamics to use. 140 | Options include: 141 | no: none 142 | full: full 143 | tree: tree 144 | diag: diagonal 145 | geye: geyer 146 | mult: multi 147 | intr: intra 148 | 149 | integer/real: Valid options appear to be "integer" and "cutoff". This appears 150 | to relate to the hydrodynamic calculations. 151 | 152 | scale_nb: Not clear what this does, but it appears to relate to the hydrodynamic 153 | calculations. 154 | 155 | BD/LD: What type of dynamics to do. The following are choices: 156 | langevin 157 | brownian 158 | dpd: dissipative particle dynamics 159 | 160 | r_temp: Comment from uiowa_bd code: 161 | """ 162 | mass of individual subunits or radius of 163 | molecules if using multi_hydrodynamics 164 | """ 165 | 166 | kcut: Does nothing. The code for associated calculations is present but 167 | commented out. 168 | 169 | linker file: Not sure what this does. Keep it set to "1". 170 | 171 | growrigd file: Name of file with information on growing rigid molecules. 172 | 173 | i_grow_rigds: Not sure what growing rigid molecules does, so keep this set to "no". 174 | 175 | go potentials file name: Name of file with Go parameters (usually something like 176 | "protein.go.parameters"). 177 | 178 | i_use_go_pairs: Set to "yes" to use Go potentials, otherwise no. 179 | 180 | i_use_exclusive_go: According to comments in the uiowa_bd code, this means: 181 | "go-pairs are exclusive for mol pairs and domain types" 182 | """ 183 | note that i_use_exclusive_go is used for cases where a molecule has 184 | multiple modes of interaction with other molecules 185 | """ 186 | 187 | num_exc_stp: Number of steps between updating the list of exclusive go pairs. 188 | 189 | go_nonexclusive_file: Not sure how to use this. Presumably this is the name of 190 | a file where one can specify excepts to the "i_use_exclusive_go" rule. 191 | 192 | i_compare_go_with_others: Not sure what this does. 193 | 194 | q_mode_threshold: Not sure what this does. It appears to relate to i_use_exclusive_go. 195 | 196 | reaction criteria file, i_do_reactions, nrequired: ?? 197 | 198 | no-force file, i_omit_some_forces: ?? 199 | 200 | time_step_s, time_step_l: short and long time steps, respectively. In my 201 | experience we keep these the same. Some interactions apparently can be updated 202 | at longer time steps. 203 | 204 | totsimtime: The total simulation time, in ps. 205 | 206 | vdw_s: van der Waals interactions at this length (in Angstroms) or shorter count 207 | as "short range" interactions 208 | 209 | vdw_m: van der Waals interactions less than this length (in Angstroms) and 210 | greater than vdw_s count as as "medium range" interactions 211 | 212 | go_s, go_m: Same as vdw_s and vdw_m, except for Go interactions 213 | 214 | ele_s, ele_m: Same as vdw_s and vdw_m, except for electrostatic interactions 215 | 216 | ele_l ("junk"): distance cutoff for long range electrostatics 217 | 218 | cut_h (the second "junk"): distance cutoff for hydrodynamic interactions 219 | 220 | ff_cell: ?? 221 | 222 | harmonicfile, i_do_harmonic, num_harm: defunct. 223 | 224 | position restraint information: Not sure how to use this. 225 | 226 | wobble_file, etc: defunct. 227 | 228 | r_size, l_size, r_size_fac, n_size, f_size: Parameters for confinement potential. 229 | Spherical potential: Set "r_size" to the radius of the sphere in angstroms. 230 | Set "l_size" to a negative number. 231 | Cylindrical potential: Set "r_size" to the radius of the cylinder in angstroms, 232 | and set "l_size" to the height of the cylinder in angstroms. 233 | 234 | r_size_fac: the factor by which r_size is first scaled. 235 | n_size: appears to be used with shrinking boxes. 236 | f_size: a force constant for the confinement potential 237 | 238 | 239 | We do not use any of the parameters found lower in the file. 240 | -------------------------------------------------------------------------------- /05_MolecularDynamics/tools/calc_rmsd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import numpy 4 | import os 5 | import sys 6 | sys.path.append(os.path.realpath(__file__)) 7 | import rmsd 8 | import MDAnalysis 9 | 10 | ''' 11 | --------- 12 | Script 13 | --------- 14 | This script takes an xtc trajectory file generated from a UIOWA-BD simulation and a pdb file for the protein of interest 15 | and calculates the RMS deviation between each frame in the trajectory and the reference pdb structure 16 | 17 | Resulting RMS deviation values for each frame are outputted to a text file named 'output.txt' (or whatever name the 18 | user specifies 19 | 20 | --------- 21 | Notes 22 | --------- 23 | This script should be run from the main simulation directory with the command 'python (path to script) > output.txt' 24 | 25 | The rmsd.py script must be located in the same directory as the xtc_rmsd.py script when this script is run 26 | 27 | The pdb files must be 'go' models (not full atom models) and can be taken from the first frame located in the output 28 | MOVIE directory 29 | ''' 30 | 31 | def xtc2numpyarray(xtcfilepath, pdbfilepath): 32 | ''' 33 | Loads coordinates from an xtc trajectory file and returns a numpy array, 34 | using a pdb file as the topology. 35 | 36 | --------- 37 | Arguments 38 | --------- 39 | xtcfilepath: (str) path to the xtc file 40 | pdbfilepath: (str) path to the pdb file 41 | 42 | --------- 43 | Returns 44 | --------- 45 | coords_arr: (numpy.ndarray) numpy array of shape (nframes, natoms, 3) 46 | representing coordinates of each frame in trajectory 47 | ''' 48 | 49 | f = MDAnalysis.Universe(pdbfilepath, xtcfilepath) 50 | coords = [] 51 | for frame in f.trajectory: 52 | coords.append(f.atoms.positions) 53 | coords_arr = numpy.asarray(coords) 54 | return coords_arr 55 | 56 | def pdb2numpyarray(pdbfilepath): 57 | ''' 58 | Loads a pdb file and returns a numpy array of the coordinates 59 | 60 | --------- 61 | Arguments 62 | --------- 63 | filepath: (str) path to the pdb file 64 | 65 | --------- 66 | Returns 67 | --------- 68 | coords_arr: (numpy.ndarray) numpy array of shape (nframes, natoms, 3) 69 | representing coordinates of reference structure 70 | ''' 71 | 72 | f = MDAnalysis.Universe(pdbfilepath) 73 | coords = f.atoms.positions 74 | coords_arr = numpy.asarray(coords) 75 | return coords_arr 76 | 77 | def parse_arguments(): 78 | ''' 79 | Parses command lines arguments. 80 | 81 | -------- 82 | Returns 83 | -------- 84 | pdbpath: (str) The file path to the reference pdb, which is also used as a 85 | topology file for the xtc file 86 | 87 | xtcpath: (str) The file path the xtc trajectory file. 88 | ''' 89 | parser = argparse.ArgumentParser() 90 | parser.add_argument('--pdb', dest='pdbpath', required=True, 91 | help="The file path to the reference PDB, which is used" 92 | " both as the reference structure and topology " 93 | "file for the XTC trajectory file." 94 | ) 95 | parser.add_argument('--xtc', dest='xtcpath', required=True, 96 | help="The file path to the XTC trajectory file." 97 | ) 98 | args = parser.parse_args() 99 | return args.pdbpath, args.xtcpath 100 | 101 | 102 | def main(): 103 | ''' 104 | Run the main function. 105 | ''' 106 | # First, parse command line arguments 107 | pdbpath, xtcpath = parse_arguments() 108 | 109 | # Get numpy array of coordinates for trajectory 110 | # Again, note the pdb files must be 'go'models and not full-atom models 111 | coordinates = xtc2numpyarray(xtcpath, pdbpath) 112 | 113 | # Get numpy array of coordinates for reference structure 114 | ref_coordinates = pdb2numpyarray(pdbpath) 115 | 116 | # Iterate through the frames of the trajectory and calculate the RMS 117 | # deviation relative to the ref_coordinates 118 | rmsd_list = [] 119 | for frame in coordinates: 120 | 121 | # This module is from the rmsd.py script that must be in the same directory 122 | # as this script 123 | r = rmsd.rmsd(frame, ref_coordinates) 124 | rmsd_list.append(r) 125 | numpy.savetxt("rmsd.txt", rmsd_list) 126 | 127 | if __name__ == "__main__": 128 | main() 129 | -------------------------------------------------------------------------------- /05_MolecularDynamics/tools/rmsd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy 3 | import scipy.spatial.distance 4 | 5 | def distance_squared(p0, p1): 6 | ''' 7 | Find the square of the distance between ``p0`` and ``p1``. 8 | 9 | --------- 10 | Arguments 11 | --------- 12 | p0: (numpy.ndarray) A shape (3,) array representing x,y,z coordinates 13 | p1: (numpy.ndarray) A shape (3,) array representing x,y,z coordinates 14 | 15 | ------- 16 | Returns 17 | ------- 18 | d2: square of the euclidean distance between ``p0`` and ``p1``. 19 | ''' 20 | return scipy.spatial.distance.euclidean(p0,p1)**2 21 | 22 | def centroid(coordinates): 23 | ''' 24 | Find the centroid of ``coordinates``. 25 | 26 | --------- 27 | Arguments 28 | --------- 29 | coordinates: (numpy.ndarray) A shape (natoms, 3) array of coordinates 30 | 31 | ------- 32 | Returns 33 | ------- 34 | c: (numpy.ndarray) A shape (1,3) array of coordinates indicating the center 35 | of geometry of ``coordinates``. 36 | ''' 37 | return coordinates.mean(axis=0)[numpy.newaxis,:] 38 | 39 | def rmsd(mobile, reference): 40 | ''' 41 | Calculates the RMS deviation between two structures following least- 42 | squares alignment. Uses the Kabsh algorithm. 43 | 44 | --------- 45 | Arguments 46 | --------- 47 | mobile: (numpy.ndarray) shape (natoms, 3) numpy array, where natoms is the 48 | number of atoms, representing the coordinates of the protein for which 49 | to calculate the RMS deviation 50 | 51 | reference: (numpy.ndarray) shape (natoms, 3) numpy array representing the 52 | reference structure. 53 | 54 | ------- 55 | Returns 56 | ------- 57 | mobile: (float) The RMS deviation of ``mobile`` relative to ``reference``, 58 | calculated via the following equation: 59 | 60 | RMS deviation = sqrt( sum( (x_0,i-x_1,i)^2 + (y_0,i - y_1,i)^2 + (z_0,i - z_1,i)^2 )) 61 | 62 | where i runs over the atom index (from 0 to natoms-1), and the 63 | calculation is performed following least-squares alignment. 64 | ''' 65 | 66 | # Center both mobile and reference on centroid. 67 | c = centroid(reference) 68 | reference -= c 69 | c = centroid(mobile) 70 | mobile -= c 71 | 72 | # Use Kabsch algorithm to calculate optimal rotation matrix. 73 | # Calculate covariance matrix. 74 | covariance_matrix = numpy.dot(numpy.transpose(reference), 75 | mobile) 76 | 77 | # Singular Value Decomposition. 78 | V, S, Wt = numpy.linalg.svd(covariance_matrix) 79 | d = numpy.sign(numpy.linalg.det(numpy.dot(numpy.transpose(Wt), 80 | numpy.transpose(V) 81 | ) 82 | ) 83 | ) 84 | 85 | U = numpy.dot(numpy.transpose(Wt), 86 | numpy.dot(numpy.array(((1,0,0), 87 | (0,1,0), 88 | (0,0,d))), 89 | numpy.transpose(V) 90 | ) 91 | ) 92 | 93 | # Multiplying mobile (n*3 matrix) by 3*3 optimal rotation matrix 94 | # ``U`` gives least_squares alignment. 95 | l_aligned = mobile.dot(U) 96 | 97 | # Sum distances squared over each particle, and take the square root to 98 | # return RMSD. 99 | square_sum = 0 100 | for i in range(len(l_aligned)): 101 | square_sum += distance_squared(l_aligned[i],reference[i]) 102 | av = square_sum/len(l_aligned) 103 | rmsd_ = numpy.sqrt(av) 104 | return rmsd_ 105 | -------------------------------------------------------------------------------- /05_MolecularDynamics/trajectories/.p53.xtc_offsets.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/trajectories/.p53.xtc_offsets.npz -------------------------------------------------------------------------------- /05_MolecularDynamics/trajectories/cdk8.xtc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/trajectories/cdk8.xtc -------------------------------------------------------------------------------- /05_MolecularDynamics/trajectories/mdm2.xtc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/trajectories/mdm2.xtc -------------------------------------------------------------------------------- /05_MolecularDynamics/trajectories/mdm2_folded.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N GLN 1 2.092 0.001 -1.242 0.56 2 | ATOM 2 CA ILE 2 5.388 0.769 0.492 1.12 3 | ATOM 3 CA ASN 3 7.769 -1.743 -1.086 1.69 4 | ATOM 4 CA GLN 4 11.468 -2.556 -0.742 2.25 5 | ATOM 5 CA VAL 5 14.099 -0.388 -2.430 2.81 6 | ATOM 6 CA ARG 6 17.818 -0.369 -3.219 3.37 7 | ATOM 7 CA PRO 7 19.905 2.832 -2.987 3.93 8 | ATOM 8 CA LYS 8 22.282 2.960 -5.954 4.49 9 | ATOM 9 CA LEU 9 25.648 4.732 -5.867 5.06 10 | ATOM 10 CA PRO 10 24.486 8.378 -5.793 5.62 11 | ATOM 11 CA LEU 11 21.520 7.188 -3.729 6.18 12 | ATOM 12 CA LEU 12 23.440 5.292 -1.048 6.74 13 | ATOM 13 CA LYS 13 25.655 8.211 -0.026 7.30 14 | ATOM 14 CA ILE 14 22.443 10.183 0.477 7.87 15 | ATOM 15 CA LEU 15 21.070 7.909 3.199 8.43 16 | ATOM 16 CA HIS 16 24.563 7.173 4.516 8.99 17 | ATOM 17 CA ALA 17 25.241 10.845 5.243 9.55 18 | ATOM 18 CA ALA 18 21.862 10.857 6.988 10.11 19 | ATOM 19 CA GLY 19 22.824 8.011 9.301 10.67 20 | ATOM 20 CA ALA 20 22.019 5.157 6.931 11.24 21 | ATOM 21 CA GLN 21 24.109 2.154 5.894 11.80 22 | ATOM 22 CA GLY 22 23.733 -0.895 3.671 12.36 23 | ATOM 23 CA GLU 23 22.188 -1.620 0.280 12.92 24 | ATOM 24 CA MET 24 18.574 -2.392 1.179 13.48 25 | ATOM 25 CA PHE 25 15.787 -0.130 2.442 14.04 26 | ATOM 26 CA THR 26 12.006 0.229 2.242 14.61 27 | ATOM 27 CA VAL 27 9.821 3.241 1.455 15.17 28 | ATOM 28 CA LYS 28 9.542 3.952 5.180 15.73 29 | ATOM 29 CA GLU 29 13.307 3.804 5.697 16.29 30 | ATOM 30 CA VAL 30 13.979 6.045 2.698 16.85 31 | ATOM 31 CA MET 31 11.302 8.595 3.587 17.42 32 | ATOM 32 CA HIS 32 12.770 8.796 7.091 17.98 33 | ATOM 33 CA TYR 33 16.249 9.388 5.672 18.54 34 | ATOM 34 CA LEU 34 15.088 12.068 3.235 19.10 35 | ATOM 35 CA GLY 35 13.849 14.396 5.959 19.66 36 | ATOM 36 CA GLN 36 16.924 13.797 8.102 20.22 37 | ATOM 37 CA TYR 37 19.148 14.398 5.076 20.79 38 | ATOM 38 CA ILE 38 17.580 17.773 4.296 21.35 39 | ATOM 39 CA MET 39 17.776 18.859 7.936 21.91 40 | ATOM 40 CA VAL 40 21.379 17.649 8.101 22.47 41 | ATOM 41 CA LYS 41 22.403 19.661 5.040 23.03 42 | ATOM 42 CA GLN 42 20.104 22.527 6.022 23.60 43 | ATOM 43 CA LEU 43 18.044 22.904 2.846 24.16 44 | ATOM 44 CA TYR 44 14.675 24.269 3.965 24.72 45 | ATOM 45 CA ASP 45 12.883 27.341 5.313 25.28 46 | ATOM 46 CA GLN 46 13.596 27.319 9.050 25.84 47 | ATOM 47 CA GLN 47 10.411 29.379 9.325 26.40 48 | ATOM 48 CA GLU 48 8.396 26.697 7.532 26.97 49 | ATOM 49 CA GLN 49 9.711 23.143 7.859 27.53 50 | ATOM 50 CA HIS 50 6.915 22.083 5.508 28.09 51 | ATOM 51 CA MET 51 8.779 24.114 2.886 28.65 52 | ATOM 52 CA VAL 52 11.986 22.469 1.669 29.21 53 | ATOM 53 CA TYR 53 14.643 24.349 -0.295 29.78 54 | ATOM 54 CA CYS 54 16.316 21.909 -2.686 30.34 55 | ATOM 55 CA GLY 55 18.135 24.903 -4.134 30.90 56 | ATOM 56 CA GLY 56 20.755 23.015 -6.107 31.46 57 | ATOM 57 CA ASP 57 21.882 20.132 -3.911 32.02 58 | ATOM 58 CA LEU 58 21.711 16.410 -4.683 32.58 59 | ATOM 59 CA LEU 59 17.944 16.363 -4.154 33.15 60 | ATOM 60 CA GLY 60 17.627 19.590 -6.122 33.71 61 | ATOM 61 CA GLU 61 19.225 18.039 -9.197 34.27 62 | ATOM 62 CA LEU 62 17.412 14.709 -8.896 34.83 63 | ATOM 63 CA LEU 63 14.074 16.533 -8.824 35.39 64 | ATOM 64 CA GLY 64 15.040 19.446 -11.051 35.96 65 | ATOM 65 CA ARG 65 13.201 22.142 -9.112 36.52 66 | ATOM 66 CA GLN 66 13.882 24.911 -6.594 37.08 67 | ATOM 67 CA SER 67 11.818 23.827 -3.588 37.64 68 | ATOM 68 CA PHE 68 9.135 21.336 -2.556 38.20 69 | ATOM 69 CA SER 69 6.622 21.013 0.282 38.76 70 | ATOM 70 CA VAL 70 7.157 18.185 2.768 39.33 71 | ATOM 71 CA LYS 71 3.532 18.036 3.908 39.89 72 | ATOM 72 CA ASP 72 2.661 17.638 0.227 40.45 73 | ATOM 73 CA PRO 73 4.063 14.374 -1.198 41.01 74 | ATOM 74 CA SER 74 3.678 15.582 -4.785 41.57 75 | ATOM 75 CA PRO 75 7.146 16.101 -6.316 42.13 76 | ATOM 76 CA LEU 76 8.687 13.684 -3.815 42.70 77 | ATOM 77 CA TYR 77 6.909 10.675 -5.314 43.26 78 | ATOM 78 CA ASP 78 7.251 11.956 -8.878 43.82 79 | ATOM 79 CA MET 79 11.032 11.965 -8.461 44.38 80 | ATOM 80 CA LEU 80 11.257 8.738 -6.459 44.94 81 | ATOM 81 CA ARG 81 9.377 7.085 -9.322 45.51 82 | ATOM 82 CA LYS 82 12.665 6.649 -11.187 46.07 83 | ATOM 83 CA ASN 83 14.845 7.661 -8.239 46.63 84 | ATOM 84 CA LEU 84 13.838 5.178 -5.538 47.19 85 | ATOM 85 CA VAL 85 14.896 1.846 -7.037 47.75 86 | ATOM 86 CA THR 86 12.022 -0.390 -5.939 48.31 87 | ATOM 87 CA LEU 87 11.556 -4.126 -6.474 48.88 88 | ATOM 88 CA ALA 88 8.630 -6.464 -7.136 49.44 89 | ATOM 89 O THR 89 7.596 -4.702 -10.344 50.00 90 | CONECT 1 2 91 | CONECT 2 3 92 | CONECT 3 4 93 | CONECT 4 5 94 | CONECT 5 6 95 | CONECT 6 7 96 | CONECT 7 8 97 | CONECT 8 9 98 | CONECT 9 10 99 | CONECT 10 11 100 | CONECT 11 12 101 | CONECT 12 13 102 | CONECT 13 14 103 | CONECT 14 15 104 | CONECT 15 16 105 | CONECT 16 17 106 | CONECT 17 18 107 | CONECT 18 19 108 | CONECT 19 20 109 | CONECT 20 21 110 | CONECT 21 22 111 | CONECT 22 23 112 | CONECT 23 24 113 | CONECT 24 25 114 | CONECT 25 26 115 | CONECT 26 27 116 | CONECT 27 28 117 | CONECT 28 29 118 | CONECT 29 30 119 | CONECT 30 31 120 | CONECT 31 32 121 | CONECT 32 33 122 | CONECT 33 34 123 | CONECT 34 35 124 | CONECT 35 36 125 | CONECT 36 37 126 | CONECT 37 38 127 | CONECT 38 39 128 | CONECT 39 40 129 | CONECT 40 41 130 | CONECT 41 42 131 | CONECT 42 43 132 | CONECT 43 44 133 | CONECT 44 45 134 | CONECT 45 46 135 | CONECT 46 47 136 | CONECT 47 48 137 | CONECT 48 49 138 | CONECT 49 50 139 | CONECT 50 51 140 | CONECT 51 52 141 | CONECT 52 53 142 | CONECT 53 54 143 | CONECT 54 55 144 | CONECT 55 56 145 | CONECT 56 57 146 | CONECT 57 58 147 | CONECT 58 59 148 | CONECT 59 60 149 | CONECT 60 61 150 | CONECT 61 62 151 | CONECT 62 63 152 | CONECT 63 64 153 | CONECT 64 65 154 | CONECT 65 66 155 | CONECT 66 67 156 | CONECT 67 68 157 | CONECT 68 69 158 | CONECT 69 70 159 | CONECT 70 71 160 | CONECT 71 72 161 | CONECT 72 73 162 | CONECT 73 74 163 | CONECT 74 75 164 | CONECT 75 76 165 | CONECT 76 77 166 | CONECT 77 78 167 | CONECT 78 79 168 | CONECT 79 80 169 | CONECT 80 81 170 | CONECT 81 82 171 | CONECT 82 83 172 | CONECT 83 84 173 | CONECT 84 85 174 | CONECT 85 86 175 | CONECT 86 87 176 | CONECT 87 88 177 | CONECT 88 89 178 | -------------------------------------------------------------------------------- /06_Basis_Sets/presentation/beyond_lcao.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/06_Basis_Sets/presentation/beyond_lcao.pdf -------------------------------------------------------------------------------- /06_Basis_Sets/presentation/images/fourier_tranform.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/06_Basis_Sets/presentation/images/fourier_tranform.gif -------------------------------------------------------------------------------- /06_Basis_Sets/presentation/lcao_basis_sets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/06_Basis_Sets/presentation/lcao_basis_sets.pdf -------------------------------------------------------------------------------- /A1_Git/git_intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/A1_Git/git_intro.pdf -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # People 2 | The following people are contributors to the QM/MM Study Group: 3 | - Shiv Upadhyay: shu8@pitt.edu 4 | - Amanda Dumi: aed63@pitt.edu 5 | - Dakota Folmsbee: dlf57@pitt.edu 6 | - Bryan Henderson: bvh5@pitt.edu -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions in the form of example notebooks, example code, theory documentation, and bug fixes. Below there are directions on how to get started as well as guidelines on contributions. 4 | 5 | 6 | # Getting started 7 | ## 1. Fork this repository 8 | This creates a copy for you on your Github page (Click the button that says fork this repository). The changes you make to this version will not be a part of everyone's code yet. 9 | 10 | ## 2. Clone your fork locally 11 | This creates a local copy on your computer(s) that you can edit. 12 | 13 | ## 3. Add remote 14 | Once your forked copy of the repository is cloned locally, you will want to add the main repository as an upstream source in order to get any changes that are made to the main repository. To do this, add the main repository as an upstream source by `git remote add upstream https://github.com/shivupa/QMMM_study_group`. Check that the upstream was added by running `git remote -v`. If added correctly, you should be able to `git pull upstream master` in order to obtain any new changes made to the main repository. 15 | 16 | ## 4. Create development environment 17 | To ensure that all of us are working with the same packages and libraries, we have provided a `environment.yml` file in the root directory of this repository. This helps to reduce the number of "well, it works on my system" allowing for all users to be able to setup the environment and immediately try out the notebooks. Instructions for how to set up the provided development environment can be found in the [wiki](https://github.com/shivupa/QMMM_study_group/wiki/Environment-Setup). 18 | 19 | ## 5. Add notebooks, code, presentations, or fix bugs 20 | 21 | ## 6. Add the changes to your forked version 22 | To see what files have changed use the command `git status`. 23 | Add the files that you have changed using `git add filename`. 24 | Explain what changes you have made using an informative commit message with `git commit -m "My message.."`. 25 | Push the changes to your forked version using `git push`. 26 | 27 | [Git](https://git-scm.com/docs/gittutorial) provides great tutorials on how to use git for version control for your projects. 28 | 29 | ## 7. Add your changes to the main repository 30 | Go to the [main repository](https://github.com/shivupa/QMMM_study_group) and open a pull request from *your* master branch to the project master branch. For more information on pull requests see [GitHub Help: About pull requests](https://help.github.com/articles/about-pull-requests/). Also, **DO NOT** merge your own pull requests. 31 | 32 | # Guidelines 33 | Not following these guidelines will ensure your pull request is not merged. 34 | 35 | ## Directory structure 36 | For best readability and consistency each topic will contain be a topic directory. In this topic directory there needs to be the pdf presentation of the topic (if applicable), a basics directory, and an advanced directory. The basics directory will contain framework notebooks/code that supplements the presentation of that topic. After a few days a solution notebook needs to be added to the basics directory. The advanced directory will contain notebooks/code that further builds on the topic such as faster algorithms or accounting for more complex systems. 37 | 38 | 39 | ## Naming 40 | #### Topic directory 41 | Topic directories are named such that the first letter in the name of the directory and all letters in the techniques name are capitalized. 42 | Example: 05_Modern_DFT 43 | 44 | #### Subdirectories and files 45 | All subdirectories and files need to be lowercase. Do not use spaces in names, use underscores. 46 | Example: uhf_psi4.ipynb 47 | 48 | ## Presentation 49 | The topic presentation can be made in your preferred presentation editor but must be exported as a pdf. 50 | 51 | ## pep8 52 | This project aims to follow the [pep8 style guide](https://www.python.org/dev/peps/pep-0008/) for python programming. The development environment that we provide includes a package, jupyter notebook extensions, that allows users to use the package autopep8 to help ensure the notebooks follow the pep8 style guide. This extension is off by default but can be turned on in the Nbextension tab after launching jupyter notebook by clicking the box next to autopep8. To autopep8 a jupyter notebook, hold down shift and select the hammer icon. 53 | 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, QM/MM Study Group (See AUTHORS.md) 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # QM/MM study group 2 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/shivupa/QMMM_study_group/master) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 3 | 4 | **Meetings are Fridays at 10 am.** Meetings will consist of sections spanning two weeks for each topic. The fist week will cover theory and present framework code. In the week following you will attempt to code the method yourself. During the second meeting within a section, we will present a working version of the code, address any issues or questions, and discuss more advanced topics relating to the method. 5 | 6 | Set up instructions and a list of useful references can be found in the [Wiki](https://github.com/shivupa/QMMM_study_group/wiki)! 7 | 8 | Try our notebooks without setting anything up by clicking on the Binder badge! 9 | 10 | ## Contact information 11 | - Amanda Dumi: aed63@pitt.edu 12 | - Dakota Folmsbee: dlf57@pitt.edu 13 | - Shiv Upadhyay: shu8@pitt.edu 14 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: qmmm 2 | channels: 3 | - psi4 4 | - anaconda 5 | dependencies: 6 | - python=3.6 7 | - numpy 8 | - scipy 9 | - matplotlib 10 | - pandas 11 | - sympy 12 | - IPython 13 | - jupyter 14 | - cython 15 | - scikit-learn 16 | - pip: 17 | - pyscf 18 | - autopep8 19 | - jupyter_contrib_nbextensions 20 | - scikit-optimize 21 | - psi4 22 | - cmake 23 | --------------------------------------------------------------------------------