├── .gitignore
├── 00_Example
├── example.ipynb
└── presentation.pdf
├── 01_Introduction
├── Introduction.ipynb
├── einsum.ipynb
└── einsum_solutions.ipynb
├── 02_SCF
├── advanced
│ ├── diis_psi4.ipynb
│ ├── diis_psi4_solution.ipynb
│ ├── diis_pyscf.ipynb
│ ├── diis_pyscf_solution.ipynb
│ ├── uhf_diis_psi4.ipynb
│ ├── uhf_diis_pyscf.ipynb
│ ├── uhf_psi4.ipynb
│ └── uhf_pyscf.ipynb
├── basics
│ ├── scf_psi4.ipynb
│ ├── scf_psi4_solution.ipynb
│ ├── scf_pyscf.ipynb
│ └── scf_pyscf_solution.ipynb
└── scf_slides.pdf
├── 03_MP2
├── basics
│ ├── mp2_psi4.ipynb
│ ├── mp2_psi4_solution.ipynb
│ ├── mp2_pyscf.ipynb
│ └── mp2_pyscf_solution.ipynb
└── mp2_slides.pdf
├── 04_Machine_Learning
├── advanced
│ └── bayesopt_boston.ipynb
├── basics
│ ├── coulomb_matrix.ipynb
│ ├── coulomb_matrix_solutions.ipynb
│ ├── methane.xyz
│ ├── ml_boston.ipynb
│ └── ml_boston_solutions.ipynb
└── ml_slides.pdf
├── 05_MolecularDynamics
├── README.txt
├── enhanced_sampling
│ ├── EnhancedSampling.pdf
│ └── metadynamics.pdf
├── pdb
│ ├── 2mwy.pdb
│ ├── 3rgf.pdb
│ ├── cdk8.pdb
│ └── mdm2.pdb
├── template
│ ├── images
│ │ ├── divx2pass.log.mbtree
│ │ ├── encode.sh
│ │ ├── gfp.mov
│ │ └── render.sh
│ ├── parameter.file.01
│ ├── run.sh
│ ├── setup
│ │ ├── NOTES.txt
│ │ ├── protein.inp
│ │ └── run_prep.sh
│ ├── sim.inp
│ └── sim.inp.notes
├── tools
│ ├── calc_rmsd.py
│ └── rmsd.py
└── trajectories
│ ├── .p53.xtc_offsets.npz
│ ├── cdk8.xtc
│ ├── cdk8_folded.pdb
│ ├── mdm2.xtc
│ └── mdm2_folded.pdb
├── 06_Basis_Sets
└── presentation
│ ├── beyond_lcao.pdf
│ ├── images
│ └── fourier_tranform.gif
│ └── lcao_basis_sets.pdf
├── A1_Git
└── git_intro.pdf
├── AUTHORS.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── environment.yml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # OS generated files
107 | .DS_Store
108 |
109 | #psi4numpy timer for jobs
110 | timer.dat
111 |
--------------------------------------------------------------------------------
/00_Example/example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Example Notebook"
12 | ]
13 | }
14 | ],
15 | "metadata": {
16 | "kernelspec": {
17 | "display_name": "Python 3",
18 | "language": "python",
19 | "name": "python3"
20 | },
21 | "language_info": {
22 | "codemirror_mode": {
23 | "name": "ipython",
24 | "version": 3
25 | },
26 | "file_extension": ".py",
27 | "mimetype": "text/x-python",
28 | "name": "python",
29 | "nbconvert_exporter": "python",
30 | "pygments_lexer": "ipython3",
31 | "version": "3.6.5"
32 | }
33 | },
34 | "nbformat": 4,
35 | "nbformat_minor": 2
36 | }
37 |
--------------------------------------------------------------------------------
/00_Example/presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/00_Example/presentation.pdf
--------------------------------------------------------------------------------
/01_Introduction/Introduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Introduction Notebook\n",
12 | "\n",
13 | "Here we will cover common python libraries.\n",
14 | "\n",
15 | "1. [Numpy](#numpy) \n",
16 | "\n",
17 | "2. [Scipy](#scipy) \n",
18 | "\n",
19 | "3. [Matplotlib](#matplotlib) \n",
20 | "\n",
21 | "4. [PySCF](#pyscf)\n",
22 | "\n",
23 | "5. [Psi4](#psi4)"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "### Extra Practice\n",
31 | "For a more hands-on introduction notebook, check out the notebook at [this link](https://github.com/amandadumi/numerical_methods_release). Click the 'launch binder' badge. This will take you to a web-hosted Jupyter notebook set on Binder. Navigate to `IPython_notebooks/01_Introduction` and click on the 01_Introduction.ipynb to launch it in the browser. You are also welcome to clone the repository and run the notebook locally."
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "slideshow": {
38 | "slide_type": "slide"
39 | }
40 | },
41 | "source": [
42 | "\n",
43 | "## Numpy\n",
44 | "Fundamental package for scientific computing with Python"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {
51 | "slideshow": {
52 | "slide_type": "subslide"
53 | }
54 | },
55 | "outputs": [],
56 | "source": [
57 | "import numpy as np\n",
58 | "\n",
59 | "a = np.array((4, 5, 6, 6, 7, 8))\n",
60 | "b = np.array((8, 9, 2, 4, 6, 7))\n",
61 | "\n",
62 | "c = np.dot(a, b)\n",
63 | "print(c)"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "slideshow": {
70 | "slide_type": "slide"
71 | }
72 | },
73 | "source": [
74 | "\n",
75 | "## Scipy\n",
76 | "\n",
77 | "Provides many user-friendly and efficient numerical routines such as routines for numerical integration and optimization"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {
84 | "scrolled": true,
85 | "slideshow": {
86 | "slide_type": "subslide"
87 | }
88 | },
89 | "outputs": [],
90 | "source": [
91 | "import scipy as sp\n",
92 | "import scipy.linalg as la\n",
93 | "\n",
94 | "mat = np.random.rand(5, 5)\n",
95 | "eig_val, eig_vec = la.eig(mat)\n",
96 | "\n",
97 | "print('eigenvalues:\\n {}\\n'.format(eig_val))\n",
98 | "print('eigenvectors:\\n {}'.format(eig_vec))"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {
104 | "collapsed": true,
105 | "slideshow": {
106 | "slide_type": "slide"
107 | }
108 | },
109 | "source": [
110 | "## Matplotlib\n",
111 | "\n",
112 | "Python library for 2- and 3-D visualization.\n",
113 | "\n",
114 | "Pyplot provides convenient functions to generate plots."
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "slideshow": {
122 | "slide_type": "slide"
123 | }
124 | },
125 | "outputs": [],
126 | "source": [
127 | "import matplotlib.pyplot as plt\n",
128 | "\n",
129 | "x = np.linspace(0, 5, 100)\n",
130 | "y = np.sin(x)\n",
131 | "plt.plot(x, y)\n",
132 | "plt.show()"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": null,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": []
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {
145 | "slideshow": {
146 | "slide_type": "slide"
147 | }
148 | },
149 | "source": [
150 | "## Psi4Numpy\n",
151 | "\n",
152 | "Psi4 is an open source quantum chemistry package.\n",
153 | "\n",
154 | "Recently introduced [Psi4Numpy](https://github.com/psi4/psi4numpy), a collections of notebooks for teaching quantum chemistry. \n",
155 | "\n"
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "metadata": {
161 | "slideshow": {
162 | "slide_type": "subslide"
163 | }
164 | },
165 | "source": [
166 | "The cell below runs an SCF cyle for water with the cc-pvdz basis using Psi4Numpy\n"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {
173 | "slideshow": {
174 | "slide_type": "subslide"
175 | }
176 | },
177 | "outputs": [],
178 | "source": [
179 | "import psi4\n",
180 | "\n",
181 | "# read in geometry for water\n",
182 | "h2o = psi4.geometry(\"\"\"\n",
183 | "O 0.0000000 0.0000000 0.0000000\n",
184 | "H 0.7569685 0.0000000 -0.5858752\n",
185 | "H -0.7569685 0.0000000 -0.5858752\n",
186 | "\"\"\")\n",
187 | "\n",
188 | "# set basis set\n",
189 | "psi4.set_options({'basis': 'cc-pvdz'})\n",
190 | "\n",
191 | "# run an scf calculation\n",
192 | "scf_e, scf_wfn = psi4.energy('scf', return_wfn=True)\n",
193 | "print('converged SCF energy: {}'.format(scf_e))"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {
199 | "slideshow": {
200 | "slide_type": "slide"
201 | }
202 | },
203 | "source": [
204 | "## PySCF\n",
205 | "\n",
206 | "Python-based quantum simulations"
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "metadata": {
212 | "slideshow": {
213 | "slide_type": "slide"
214 | }
215 | },
216 | "source": [
217 | "The cell below runs an SCF cycle for water with the cc-pvdz basis using PySCF"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {
224 | "slideshow": {
225 | "slide_type": "subslide"
226 | }
227 | },
228 | "outputs": [],
229 | "source": [
230 | "from pyscf import gto, scf\n",
231 | "\n",
232 | "# read in geometry\n",
233 | "mol = gto.M(atom='O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752')\n",
234 | "mol.basis = 'ccpvdz'\n",
235 | "# run an scf calculation\n",
236 | "mol_scf = scf.RHF(mol)\n",
237 | "mol_scf.kernel()"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": null,
243 | "metadata": {},
244 | "outputs": [],
245 | "source": []
246 | }
247 | ],
248 | "metadata": {
249 | "celltoolbar": "Slideshow",
250 | "kernelspec": {
251 | "display_name": "Python 3",
252 | "language": "python",
253 | "name": "python3"
254 | },
255 | "language_info": {
256 | "codemirror_mode": {
257 | "name": "ipython",
258 | "version": 3
259 | },
260 | "file_extension": ".py",
261 | "mimetype": "text/x-python",
262 | "name": "python",
263 | "nbconvert_exporter": "python",
264 | "pygments_lexer": "ipython3",
265 | "version": "3.6.5"
266 | }
267 | },
268 | "nbformat": 4,
269 | "nbformat_minor": 2
270 | }
271 |
--------------------------------------------------------------------------------
/01_Introduction/einsum.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Einstein Notation and np.einsum\n",
8 | "\n",
9 | "## Useful Resources\n",
10 | "- [NumPy einsum](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html)\n",
11 | "- [A basic introduction to NumPy's einsum](http://ajcr.net/Basic-guide-to-einsum/)"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import numpy as np"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## What is Einstein notation?\n",
28 | "Einstein notation is a notational convention that simplifies expressions containing vectors, matrices, or tensors.\n",
29 | "\n",
30 | "\"I have made a great discovery in mathematics; I have suppressed the summation sign every time that the summation must be made over an index which occurs twice...\" ~ Einstein (Kollros 1956; Pais 1982, p. 216).\n",
31 | "\n",
32 | "### Vector Example\n",
33 | "Let's have two three dimensional vectors $\\textbf{A}$ and $\\textbf{B}$:\n",
34 | "$$\\textbf{A} = A_x \\hat{x} + A_y \\hat{y} + A_z \\hat{z}$$\n",
35 | "$$\\textbf{B} = B_x \\hat{x} + B_y \\hat{y} + B_z \\hat{z}$$\n",
36 | "\n",
37 | "If we wanted to do the dot product of $\\textbf{A}$ and $\\textbf{B}$ we would have:\n",
38 | "$$\\textbf{A}\\cdot \\textbf{B} = A_x B_x + A_y B_y + A_z B_z$$\n",
39 | "\n",
40 | "This gives us a scalar that is the sum of the products:\n",
41 | "$$\\textbf{A}\\cdot \\textbf{B} = \\sum_{i=1}^{N} A_i B_i \\quad \\textrm{where} \\quad N = 3$$"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "A = np.array([1, 2, 3])\n",
51 | "B = np.array([4, 5, 6])"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "np.sum(np.multiply(A, B))"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "np.einsum('i,i->', A, B)"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "### Let's look at a 3x3 example"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "C = np.random.rand(3, 3)\n",
86 | "D = np.random.rand(3, 3)\n",
87 | "\n",
88 | "print(C)\n",
89 | "print('\\n')\n",
90 | "print(D)"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {},
97 | "outputs": [],
98 | "source": [
99 | "np.sum(np.multiply(C, D)) "
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "%%timeit\n",
109 | "np.sum(np.multiply(C, D)) "
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "np.einsum('ij,ij->', C, D)"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {
125 | "scrolled": true
126 | },
127 | "outputs": [],
128 | "source": [
129 | "%%timeit\n",
130 | "np.einsum('ij,ij->', C, D)"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "## What can Einstein notation do?\n",
138 | "\n",
139 | "| String | np equiv. | Description|\n",
140 | "|-|-|-|\n",
141 | "| 'ij', C | C | returns C |\n",
142 | "| 'ji', C | C.T | transpose of C |\n",
143 | "| 'ii->i', C | np.diag(C) | returns diagonal |\n",
144 | "| 'ii', C | np.trace(C) | returns trace |\n",
145 | "| 'ij->', C | np.sum(C) | sum of C |\n",
146 | "| 'ij->j', C | np.sum(C, axis=0) | sum down columns of C |\n",
147 | "| 'ij,ij->ij', C, D | C * D | element-wise multiplication of C and D |\n",
148 | "| 'ij,jk', C, D | C.dot(D) | matrix multiplication of C and D |\n",
149 | "\n",
150 | "[For more](http://ajcr.net/Basic-guide-to-einsum/)\n",
151 | "\n",
152 | "## Try your hand at Einstein notation\n",
153 | "- sum along rows of C\n",
154 | "- C * D.T\n",
155 | "- inner product of C and D"
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "metadata": {},
161 | "source": [
162 | "#### Sum along rows of C"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "metadata": {},
169 | "outputs": [],
170 | "source": []
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "#### C * D.T"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "metadata": {},
183 | "outputs": [],
184 | "source": []
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {},
189 | "source": [
190 | "#### Inner product of C and D"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "metadata": {},
197 | "outputs": [],
198 | "source": []
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "### Dot Product\n",
205 | "Time 4 different ways a dot product can be performed"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": null,
211 | "metadata": {},
212 | "outputs": [],
213 | "source": [
214 | "%%timeit\n"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "%%timeit\n"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {},
230 | "outputs": [],
231 | "source": [
232 | "%%timeit\n"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": null,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "%%timeit\n"
242 | ]
243 | }
244 | ],
245 | "metadata": {
246 | "kernelspec": {
247 | "display_name": "Python 3",
248 | "language": "python",
249 | "name": "python3"
250 | },
251 | "language_info": {
252 | "codemirror_mode": {
253 | "name": "ipython",
254 | "version": 3
255 | },
256 | "file_extension": ".py",
257 | "mimetype": "text/x-python",
258 | "name": "python",
259 | "nbconvert_exporter": "python",
260 | "pygments_lexer": "ipython3",
261 | "version": "3.6.6"
262 | }
263 | },
264 | "nbformat": 4,
265 | "nbformat_minor": 2
266 | }
267 |
--------------------------------------------------------------------------------
/01_Introduction/einsum_solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Einstein Notation and np.einsum\n",
8 | "\n",
9 | "## Useful Resources\n",
10 | "- [NumPy einsum](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html)\n",
11 | "- [A basic introduction to NumPy's einsum](http://ajcr.net/Basic-guide-to-einsum/)"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import numpy as np"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## What is Einstein notation?\n",
28 | "Einstein notation is a notational convention that simplifies expressions containing vectors, matrices, or tensors.\n",
29 | "\n",
30 | "\"I have made a great discovery in mathematics; I have suppressed the summation sign every time that the summation must be made over an index which occurs twice...\" ~ Einstein (Kollros 1956; Pais 1982, p. 216).\n",
31 | "\n",
32 | "### Vector Example\n",
33 | "Let's have two three dimensional vectors $\\textbf{A}$ and $\\textbf{B}$:\n",
34 | "$$\\textbf{A} = A_x \\hat{x} + A_y \\hat{y} + A_z \\hat{z}$$\n",
35 | "$$\\textbf{B} = B_x \\hat{x} + B_y \\hat{y} + B_z \\hat{z}$$\n",
36 | "\n",
37 | "If we wanted to do the dot product of $\\textbf{A}$ and $\\textbf{B}$ we would have:\n",
38 | "$$\\textbf{A}\\cdot \\textbf{B} = A_x B_x + A_y B_y + A_z B_z$$\n",
39 | "\n",
40 | "This gives us a scalar that is the sum of the products:\n",
41 | "$$\\textbf{A}\\cdot \\textbf{B} = \\sum_{i=1}^{N} A_i B_i \\quad \\textrm{where} \\quad N = 3$$"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "A = np.array([1, 2, 3])\n",
51 | "B = np.array([4, 5, 6])"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/plain": [
62 | "32"
63 | ]
64 | },
65 | "execution_count": 3,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "np.sum(np.multiply(A, B))"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "data": {
81 | "text/plain": [
82 | "32"
83 | ]
84 | },
85 | "execution_count": 4,
86 | "metadata": {},
87 | "output_type": "execute_result"
88 | }
89 | ],
90 | "source": [
91 | "np.einsum('i,i->', A, B)"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "### Let's look at a 3x3 example"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 5,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "name": "stdout",
108 | "output_type": "stream",
109 | "text": [
110 | "[[0.44892352 0.23946046 0.3720617 ]\n",
111 | " [0.29549156 0.0521187 0.5270508 ]\n",
112 | " [0.27981144 0.73042791 0.9594615 ]]\n",
113 | "\n",
114 | "\n",
115 | "[[0.16955709 0.30609099 0.08329563]\n",
116 | " [0.0690296 0.76714205 0.89986847]\n",
117 | " [0.15000005 0.35051006 0.47822482]]\n"
118 | ]
119 | }
120 | ],
121 | "source": [
122 | "C = np.random.rand(3, 3)\n",
123 | "D = np.random.rand(3, 3)\n",
124 | "\n",
125 | "print(C)\n",
126 | "print('\\n')\n",
127 | "print(D)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 6,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "1.4718948471833324"
139 | ]
140 | },
141 | "execution_count": 6,
142 | "metadata": {},
143 | "output_type": "execute_result"
144 | }
145 | ],
146 | "source": [
147 | "np.sum(np.multiply(C, D)) "
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 7,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "name": "stdout",
157 | "output_type": "stream",
158 | "text": [
159 | "3.77 µs ± 90.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "%%timeit\n",
165 | "np.sum(np.multiply(C, D)) "
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 8,
171 | "metadata": {},
172 | "outputs": [
173 | {
174 | "data": {
175 | "text/plain": [
176 | "1.4718948471833326"
177 | ]
178 | },
179 | "execution_count": 8,
180 | "metadata": {},
181 | "output_type": "execute_result"
182 | }
183 | ],
184 | "source": [
185 | "np.einsum('ij,ij->', C, D)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 9,
191 | "metadata": {
192 | "scrolled": true
193 | },
194 | "outputs": [
195 | {
196 | "name": "stdout",
197 | "output_type": "stream",
198 | "text": [
199 | "1.93 µs ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
200 | ]
201 | }
202 | ],
203 | "source": [
204 | "%%timeit\n",
205 | "np.einsum('ij,ij->', C, D)"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "## What can Einstein notation do?\n",
213 | "\n",
214 | "| String | np equiv. | Description|\n",
215 | "|-|-|-|\n",
216 | "| 'ij', C | C | returns C |\n",
217 | "| 'ji', C | C.T | transpose of C |\n",
218 | "| 'ii->i', C | np.diag(C) | returns diagonal |\n",
219 | "| 'ii', C | np.trace(C) | returns trace |\n",
220 | "| 'ij->', C | np.sum(C) | sum of C |\n",
221 | "| 'ij->j', C | np.sum(C, axis=0) | sum down columns of C |\n",
222 | "| 'ij,ij->ij', C, D | C * D | element-wise multiplication of C and D |\n",
223 | "| 'ij,jk', C, D | C.dot(D) | matrix multiplication of C and D |\n",
224 | "\n",
225 | "[For more](http://ajcr.net/Basic-guide-to-einsum/)\n",
226 | "\n",
227 | "## Try your hand at Einstein notation\n",
228 | "- sum along rows of C\n",
229 | "- C * D.T\n",
230 | "- inner product of C and D"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "#### Sum along rows of C"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 10,
243 | "metadata": {},
244 | "outputs": [
245 | {
246 | "data": {
247 | "text/plain": [
248 | "array([1.06044568, 0.87466107, 1.96970085])"
249 | ]
250 | },
251 | "execution_count": 10,
252 | "metadata": {},
253 | "output_type": "execute_result"
254 | }
255 | ],
256 | "source": [
257 | "np.einsum('ij->i', C)"
258 | ]
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "metadata": {},
263 | "source": [
264 | "#### C * D.T"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": 11,
270 | "metadata": {},
271 | "outputs": [
272 | {
273 | "data": {
274 | "text/plain": [
275 | "array([[0.07611817, 0.01652986, 0.05580928],\n",
276 | " [0.09044731, 0.03998245, 0.18473661],\n",
277 | " [0.02330707, 0.65728904, 0.4588383 ]])"
278 | ]
279 | },
280 | "execution_count": 11,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "np.einsum('ij,ji->ij', C, D)"
287 | ]
288 | },
289 | {
290 | "cell_type": "markdown",
291 | "metadata": {},
292 | "source": [
293 | "#### Inner product of C and D"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 12,
299 | "metadata": {},
300 | "outputs": [
301 | {
302 | "data": {
303 | "text/plain": [
304 | "array([[0.18040597, 0.5494958 , 0.32920099],\n",
305 | " [0.10995678, 0.53465651, 0.31464066],\n",
306 | " [0.35094037, 1.44304639, 0.75683236]])"
307 | ]
308 | },
309 | "execution_count": 12,
310 | "metadata": {},
311 | "output_type": "execute_result"
312 | }
313 | ],
314 | "source": [
315 | "np.einsum('ij,kj->ik', C, D)"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "### Dot Product\n",
323 | "Time 4 different ways a dot product can be performed"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 13,
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | "865 ns ± 2.93 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
336 | ]
337 | }
338 | ],
339 | "source": [
340 | "%%timeit\n",
341 | "C @ D"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 14,
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "name": "stdout",
351 | "output_type": "stream",
352 | "text": [
353 | "917 ns ± 4.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
354 | ]
355 | }
356 | ],
357 | "source": [
358 | "%%timeit\n",
359 | "np.dot(C, D)"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 15,
365 | "metadata": {},
366 | "outputs": [
367 | {
368 | "name": "stdout",
369 | "output_type": "stream",
370 | "text": [
371 | "897 ns ± 5.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
372 | ]
373 | }
374 | ],
375 | "source": [
376 | "%%timeit\n",
377 | "C.dot(D)"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 16,
383 | "metadata": {},
384 | "outputs": [
385 | {
386 | "name": "stdout",
387 | "output_type": "stream",
388 | "text": [
389 | "1.95 µs ± 8.14 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
390 | ]
391 | }
392 | ],
393 | "source": [
394 | "%%timeit\n",
395 | "np.einsum('ij,jk', C, D)"
396 | ]
397 | }
398 | ],
399 | "metadata": {
400 | "kernelspec": {
401 | "display_name": "Python 3",
402 | "language": "python",
403 | "name": "python3"
404 | },
405 | "language_info": {
406 | "codemirror_mode": {
407 | "name": "ipython",
408 | "version": 3
409 | },
410 | "file_extension": ".py",
411 | "mimetype": "text/x-python",
412 | "name": "python",
413 | "nbconvert_exporter": "python",
414 | "pygments_lexer": "ipython3",
415 | "version": "3.6.6"
416 | }
417 | },
418 | "nbformat": 4,
419 | "nbformat_minor": 2
420 | }
421 |
--------------------------------------------------------------------------------
/02_SCF/advanced/diis_psi4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Direct Inversion of Iterative Subspace"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import scipy.linalg as spla\n",
18 | "import psi4\n",
19 | "import time"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "## Useful Resources\n",
27 | "- [P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)](https://www.sciencedirect.com/science/article/pii/0009261480803964)\n",
28 | "- [DIIS by C. David Sherril](http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf)\n",
29 | "- [DePrince Research Group DIIS Tutorial](https://www.chem.fsu.edu/~deprince/programming_projects/diis/)\n",
30 | "- [Psi4Numpy DIIS Tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/03_Hartree-Fock/3b_rhf-diis.ipynb)\n",
31 | "- [DIIS by MolSSI-Education](https://github.com/MolSSI-Education/QM_2017_SSS_Team8/blob/master/Tutorial_PDFs/02_SCF_DIIS.pdf)"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "## Introduction\n",
39 | "Iterative methods are usually used in order to solve systems of linear equations. These methods can suffer from numerous convergence issues such as slow convergence and high computational cost. Today we are going to work with DIIS to accelerate our convergence. DIIS stands for Direct Inversion of Iterative Subspace and is commonly used to aid in the convergence of SCF wavefunctions. Today we will build off of our previous example of a simple RHF.\n",
40 | "\n",
41 | "## General Theory\n",
42 | "During the iterative solution we generate a set of trial vectors $p^{i}$ that are converging to the true solution $p^{f}$. This allows for us to form a set of residual vectors\n",
43 | "$$\n",
44 | "\\Delta \\mathbf{p} = \\mathbf{p}^{i+1} - \\mathbf{p}^{i}\n",
45 | "$$\n",
46 | "\n",
47 | "DIIS assumes that the true solution can be approximated as a linear combination of the previous trial vector guesses, \n",
48 | "$$\\mathbf{p} = \\sum_{i} c_{i} \\mathbf{p}^{i}$$\n",
49 | "\n",
50 | "\n",
51 | "The coefficients $c_{i}$ can be obtained by requiring the residual vector to be a least-squares approximate to the zero vector \n",
52 | "\n",
53 | "$$\\Delta \\mathbf{p} = \\sum_{i} c_{i} \\Delta \\mathbf{p}^{i}$$\n",
54 | "\n",
55 | "\n",
56 | "constrained by,\n",
57 | "\n",
58 | "$$\\sum_{i} c_{i} =1$$\n",
59 | "\n",
60 | "\n",
61 | "This allows for us to to represent each trial function $p^{i}$ as the true solution plus an error vector. \n",
62 | "$$\\mathbf{p} = \\sum_{i} c_{i} (\\mathbf{p}^{f} + \\mathbf{e}^{i}) = \\mathbf{p}^{f} \\sum_{i} c_{i} + \\sum_{i} c_{i} \\mathbf{e}^{i}$$\n",
63 | "\n",
64 | "Convergence will result in minimizing the error which in turn causes the second term above to vanish. For our DIIS solution $\\mathbf{p}$ to be equal to the true solution $\\mathbf{p}^{f}$, we must have $\\sum_{i} c_{i} =1$.\n",
65 | "\n",
66 | "Need to minimize the norm of the residual vector subject to the constraint\n",
67 | "$$ \\left \\langle \\Delta \\mathbf{p} | \\Delta \\mathbf{p} \\right \\rangle = \\sum_{ij} c_{i}^{\\ast} c_{j} \\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n",
68 | "\n",
69 | "We can minimize using a Lagrange multiplier\n",
70 | "$$ \\cal L = c^{\\dagger} \\mathbf{B} c - \\lambda (1 - \\sum_{i} c_{i})$$\n",
71 | "\n",
72 | "where B is the residual vector overlap.\n",
73 | "$$ B_{ij}=\\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n",
74 | "\n",
75 | "This allows for us to minimize $\\cal L$ with respect to a coeff $c_{k}$\n",
76 | "$$\\frac{\\partial \\cal L }{\\partial c_{k}}=0 = \\sum_{j} c_{j} B_{kj} + \\sum_{i} c_{i} B_{ik} - \\lambda = 2 \\sum_{i} c_{i} B_{ik} - \\lambda$$\n",
77 | "\n",
78 | "We can represent this with the matrix below\n",
79 | "\n",
80 | "$$\n",
81 | "\\begin{bmatrix}\n",
82 | "B_{11} & B_{12} & \\cdots & B_{1m} & -1 & \\\\ \n",
83 | "B_{21} & B_{22} & \\cdots & B_{2m} & -1 & \\\\ \n",
84 | "\\vdots & \\vdots & \\ddots & \\vdots & \\vdots & \\\\ \n",
85 | "B_{m1} & B_{m2} & \\cdots & B_{mm} & -1 & \\\\ \n",
86 | "-1 & -1 & \\cdots & -1 & 0 & \n",
87 | "\\end{bmatrix} \n",
88 | "\\begin{bmatrix}\n",
89 | "c_{1} & \\\\ \n",
90 | "c_{2} & \\\\ \n",
91 | "\\vdots & \\\\ \n",
92 | "c_{m} & \\\\ \n",
93 | "\\lambda & \n",
94 | "\\end{bmatrix} \n",
95 | "=\n",
96 | "\\begin{bmatrix}\n",
97 | "0 & \\\\ \n",
98 | "0 & \\\\ \n",
99 | "\\vdots & \\\\ \n",
100 | "0 & \\\\ \n",
101 | "-1 & \n",
102 | "\\end{bmatrix} \n",
103 | "$$"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "## Imports"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "## Load Molecule"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": null,
123 | "metadata": {},
124 | "outputs": [],
125 | "source": [
126 | "# Define molecule\n",
127 | "mol = psi4.geometry(\"\"\"\n",
128 | "O 0.0000000 0.0000000 0.0000000\n",
129 | "H 0.7569685 0.0000000 -0.5858752\n",
130 | "H -0.7569685 0.0000000 -0.5858752\n",
131 | "symmetry c1\n",
132 | "\"\"\")\n",
133 | "psi4.set_options({'basis': 'sto-3g'})\n",
134 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n",
135 | "mints = psi4.core.MintsHelper(wfn.basisset())\n",
136 | "\n",
137 | "# Get number of electrons\n",
138 | "num_elec_alpha = wfn.nalpha()\n",
139 | "num_elec_beta = wfn.nbeta()\n",
140 | "num_elec = num_elec_alpha + num_elec_beta\n",
141 | "\n",
142 | "# Get nuclear repulsion energy\n",
143 | "E_nuc = mol.nuclear_repulsion_energy()"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "metadata": {},
149 | "source": [
150 | "## Calculate Molecular Integrals"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {},
157 | "outputs": [],
158 | "source": [
159 | "# Calculate overlap integrals\n",
160 | "S = np.asarray(mints.ao_overlap())\n",
161 | "\n",
162 | "# Calculate kinetic energy integrals\n",
163 | "T = np.asarray(mints.ao_kinetic())\n",
164 | "\n",
165 | "# Calculate nuclear attraction integrals\n",
166 | "V = np.asarray(mints.ao_potential())\n",
167 | "\n",
168 | "# Form core Hamiltonian\n",
169 | "H = T + V\n",
170 | "\n",
171 | "# Calculate two electron integrals\n",
172 | "eri = np.asarray(mints.ao_eri())\n",
173 | "\n",
174 | "# Get number of atomic orbitals\n",
175 | "num_ao = np.shape(S)[0]\n",
176 | "\n",
177 | "print(np.shape(eri))"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "## Core Guess"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "# AO orthogonalization matrix\n",
194 | "A = spla.fractional_matrix_power(S, -0.5)\n",
195 | "\n",
196 | "# Solve the generalized eigenvalue problem\n",
197 | "E_orbitals, C = spla.eigh(H, S)\n",
198 | "\n",
199 | "# Compute initial density matrix\n",
200 | "D = np.zeros((num_ao, num_ao))\n",
201 | "for i in range(num_ao):\n",
202 | " for j in range(num_ao):\n",
203 | " for k in range(num_elec_alpha):\n",
204 | " D[i, j] += C[i, k] * C[j, k]"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "## DIIS Function\n",
212 | "\n",
213 | "### Steps in DIIS Function\n",
214 | "1. Build B matrix\n",
215 | "2. Solve the Pulay equation\n",
216 | "3. Build the DIIS Fock matrix"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "def diis(F_list, diis_res):\n",
226 | " # Build B matrix\n",
227 | "\n",
228 | " # Right hand side of Pulay eqn\n",
229 | "\n",
230 | " # Solve Pulay for coeffs\n",
231 | "\n",
232 | " # Build DIIS Fock\n",
233 | "\n",
234 | " return F_diis"
235 | ]
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {},
240 | "source": [
241 | "## Variables, Criteria, and Organization"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": null,
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "# 2 helper functions for printing during SCF\n",
251 | "def print_start_iterations():\n",
252 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
253 | " \"Iter\", \"Time(s)\", \"DIIS RMS\", \"delta E\", \"E_elec\")))\n",
254 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
255 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
256 | "\n",
257 | "\n",
258 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, diis_rms, iteration_E_diff, E_elec):\n",
259 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(\n",
260 | " iteration_num, iteration_end_time - iteration_start_time, diis_rms, iteration_E_diff, E_elec)))\n",
261 | "\n",
262 | "\n",
263 | "# Set stopping criteria\n",
264 | "iteration_max = 100\n",
265 | "convergence_E = 1e-9\n",
266 | "convergence_DIIS = 1e-5\n",
267 | "\n",
268 | "# Loop variables\n",
269 | "iteration_num = 0\n",
270 | "E_total = 0\n",
271 | "E_elec = 0.0\n",
272 | "iteration_E_diff = 0.0\n",
273 | "iteration_rmsc_dm = 0.0\n",
274 | "converged = False\n",
275 | "exceeded_iterations = False"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "metadata": {},
281 | "source": [
282 | "## DIIS SCF Iteration\n",
283 | "Our trial vector will be the Fock matrix with the error vector being the orthonormalized orbital gradient.\n",
284 | "\n",
285 | "$$ r_{\\mu \\upsilon} = (\\mathbf{A^{T}}(\\mathbf{FDS} - \\mathbf{SDF}) \\mathbf{A})_{\\mu \\upsilon} $$\n",
286 | "\n",
287 | "### Call DIIS in SCF Iteration\n",
288 | "1. Build DIIS Residual (error vector) that will be used to make the B matrix\n",
289 | "2. Store trial and residual vectors\n",
290 | "3. Call DIIS to start after the first iteration\n",
291 | "4. Compute the next guess with the DIIS Fock matrix"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {
298 | "scrolled": true
299 | },
300 | "outputs": [],
301 | "source": [
302 | "# Trial & Residual vector lists\n",
303 | "F_list = []\n",
304 | "DIIS_resid = []\n",
305 | "\n",
306 | "print(\"{:^79}\".format('=====> Starting SCF Iterations <=====\\n'))\n",
307 | "print_start_iterations()\n",
308 | "while (not converged and not exceeded_iterations):\n",
309 | " # Store last iteration and increment counters\n",
310 | " iteration_start_time = time.time()\n",
311 | " iteration_num += 1\n",
312 | " E_elec_last = E_elec\n",
313 | " D_last = np.copy(D)\n",
314 | "\n",
315 | " # Form G matrix\n",
316 | " G = np.zeros((num_ao, num_ao))\n",
317 | " for i in range(num_ao):\n",
318 | " for j in range(num_ao):\n",
319 | " for k in range(num_ao):\n",
320 | " for l in range(num_ao):\n",
321 | " G[i, j] += D[k, l] * \\\n",
322 | " ((2.0*(eri[i, j, k, l])) - (eri[i, k, j, l]))\n",
323 | "\n",
324 | " # Build fock matrix\n",
325 | " F = H + G\n",
326 | "\n",
327 | " # Calculate electronic energy\n",
328 | " E_elec = np.sum(np.multiply(D, (H + F)))\n",
329 | "\n",
330 | " # Calculate energy change of iteration\n",
331 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n",
332 | "\n",
333 | " # =======> Start of DIIS stuff <=========\n",
334 | " # Build the DIIS AO gradient\n",
335 | "\n",
336 | " # DIIS RMS\n",
337 | "\n",
338 | " # Append lists\n",
339 | " F_list.append(F)\n",
340 | " DIIS_resid.append(diis_r)\n",
341 | "\n",
342 | " if iteration_num >= 2:\n",
343 | " # preform DIIS to get Fock Matrix\n",
344 | "\n",
345 | " # Compute new guess with F DIIS\n",
346 | "\n",
347 | " D = np.zeros((num_ao, num_ao))\n",
348 | " for i in range(num_ao):\n",
349 | " for j in range(num_ao):\n",
350 | " for k in range(num_elec_alpha):\n",
351 | " D[i, j] += C[i, k] * C[j, k]\n",
352 | "\n",
353 | " # =======> End of DIIS stuff <=========\n",
354 | "\n",
355 | " iteration_end_time = time.time()\n",
356 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time,\n",
357 | " diis_rms, iteration_E_diff, E_elec)\n",
358 | "\n",
359 | " if(np.abs(iteration_E_diff) < convergence_E and diis_rms < convergence_DIIS):\n",
360 | " converged = True\n",
361 | " print('\\n', \"{:^79}\".format('=====> SCF Converged <=====\\n'))\n",
362 | " # calculate total energy\n",
363 | " E_total = E_elec + E_nuc\n",
364 | " print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))\n",
365 | "\n",
366 | " if(iteration_num == iteration_max):\n",
367 | " exceeded_iterations = True\n",
368 | " print(\"{:^79}\".format('=====> SCF Exceded Max Iterations <=====\\n'))"
369 | ]
370 | },
371 | {
372 | "cell_type": "markdown",
373 | "metadata": {},
374 | "source": [
375 | "## References\n",
376 | "1. P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)\n",
377 | "2. C. David Sherrill. \"Some comments on accellerating convergence of iterative sequences using direct inversion of the iterative subspace (DIIS)\". http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf. (1998)"
378 | ]
379 | }
380 | ],
381 | "metadata": {
382 | "kernelspec": {
383 | "display_name": "Python 3",
384 | "language": "python",
385 | "name": "python3"
386 | },
387 | "language_info": {
388 | "codemirror_mode": {
389 | "name": "ipython",
390 | "version": 3
391 | },
392 | "file_extension": ".py",
393 | "mimetype": "text/x-python",
394 | "name": "python",
395 | "nbconvert_exporter": "python",
396 | "pygments_lexer": "ipython3",
397 | "version": "3.6.6"
398 | }
399 | },
400 | "nbformat": 4,
401 | "nbformat_minor": 2
402 | }
403 |
--------------------------------------------------------------------------------
/02_SCF/advanced/diis_pyscf.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Direct Inversion of Iterative Subspace"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import scipy.linalg as spla\n",
18 | "import pyscf\n",
19 | "from pyscf import gto, scf\n",
20 | "import time"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## Useful Resources\n",
28 | "- [P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)](https://www.sciencedirect.com/science/article/pii/0009261480803964)\n",
29 | "- [DIIS by C. David Sherril](http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf)\n",
30 | "- [DePrince Research Group DIIS Tutorial](https://www.chem.fsu.edu/~deprince/programming_projects/diis/)\n",
31 | "- [Psi4Numpy DIIS Tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/03_Hartree-Fock/3b_rhf-diis.ipynb)\n",
32 | "- [DIIS by MolSSI-Education](https://github.com/MolSSI-Education/QM_2017_SSS_Team8/blob/master/Tutorial_PDFs/02_SCF_DIIS.pdf)"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Introduction\n",
40 | "Iterative methods are usually used in order to solve systems of linear equations. These methods can suffer from numerous convergence issues such as slow convergence and high computational cost. Today we are going to work with DIIS to accelerate our convergence. DIIS stands for Direct Inversion of Iterative Subspace and is commonly used to aid in the convergence of SCF wavefunctions. Today we will build off of our previous example of a simple RHF.\n",
41 | "\n",
42 | "## General Theory\n",
43 | "During the iterative solution we generate a set of trial vectors $p^{i}$ that are converging to the true solution $p^{f}$. This allows for us to form a set of residual vectors\n",
44 | "$$\n",
45 | "\\Delta \\mathbf{p} = \\mathbf{p}^{i+1} - \\mathbf{p}^{i}\n",
46 | "$$\n",
47 | "\n",
48 | "DIIS assumes that the true solution can be approximated as a linear combination of the previous trial vector guesses, \n",
49 | "$$\\mathbf{p} = \\sum_{i} c_{i} \\mathbf{p}^{i}$$\n",
50 | "\n",
51 | "\n",
52 | "The coefficients $c_{i}$ can be obtained by requiring the residual vector to be a least-squares approximate to the zero vector \n",
53 | "\n",
54 | "$$\\Delta \\mathbf{p} = \\sum_{i} c_{i} \\Delta \\mathbf{p}^{i}$$\n",
55 | "\n",
56 | "\n",
57 | "constrained by,\n",
58 | "\n",
59 | "$$\\sum_{i} c_{i} =1$$\n",
60 | "\n",
61 | "\n",
62 | "This allows for us to to represent each trial function $p^{i}$ as the true solution plus an error vector. \n",
63 | "$$\\mathbf{p} = \\sum_{i} c_{i} (\\mathbf{p}^{f} + \\mathbf{e}^{i}) = \\mathbf{p}^{f} \\sum_{i} c_{i} + \\sum_{i} c_{i} \\mathbf{e}^{i}$$\n",
64 | "\n",
65 | "Convergence will result in minimizing the error which in turn causes the second term above to vanish. For our DIIS solution $\\mathbf{p}$ to be equal to the true solution $\\mathbf{p}^{f}$, we must have $\\sum_{i} c_{i} =1$.\n",
66 | "\n",
67 | "Need to minimize the norm of the residual vector subject to the constraint\n",
68 | "$$ \\left \\langle \\Delta \\mathbf{p} | \\Delta \\mathbf{p} \\right \\rangle = \\sum_{ij} c_{i}^{\\ast} c_{j} \\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n",
69 | "\n",
70 | "We can minimize using a Lagrange multiplier\n",
71 | "$$ \\cal L = c^{\\dagger} \\mathbf{B} c - \\lambda (1 - \\sum_{i} c_{i})$$\n",
72 | "\n",
73 | "where B is the residual vector overlap.\n",
74 | "$$ B_{ij}=\\left \\langle \\Delta \\mathbf{p}^{i} | \\Delta \\mathbf{p}^{j} \\right \\rangle $$\n",
75 | "\n",
76 | "This allows for us to minimize $\\cal L$ with respect to a coeff $c_{k}$\n",
77 | "$$\\frac{\\partial \\cal L }{\\partial c_{k}}=0 = \\sum_{j} c_{j} B_{kj} + \\sum_{i} c_{i} B_{ik} - \\lambda = 2 \\sum_{i} c_{i} B_{ik} - \\lambda$$\n",
78 | "\n",
79 | "We can represent this with the matrix below\n",
80 | "\n",
81 | "$$\n",
82 | "\\begin{bmatrix}\n",
83 | "B_{11} & B_{12} & \\cdots & B_{1m} & -1 & \\\\ \n",
84 | "B_{21} & B_{22} & \\cdots & B_{2m} & -1 & \\\\ \n",
85 | "\\vdots & \\vdots & \\ddots & \\vdots & \\vdots & \\\\ \n",
86 | "B_{m1} & B_{m2} & \\cdots & B_{mm} & -1 & \\\\ \n",
87 | "-1 & -1 & \\cdots & -1 & 0 & \n",
88 | "\\end{bmatrix} \n",
89 | "\\begin{bmatrix}\n",
90 | "c_{1} & \\\\ \n",
91 | "c_{2} & \\\\ \n",
92 | "\\vdots & \\\\ \n",
93 | "c_{m} & \\\\ \n",
94 | "\\lambda & \n",
95 | "\\end{bmatrix} \n",
96 | "=\n",
97 | "\\begin{bmatrix}\n",
98 | "0 & \\\\ \n",
99 | "0 & \\\\ \n",
100 | "\\vdots & \\\\ \n",
101 | "0 & \\\\ \n",
102 | "-1 & \n",
103 | "\\end{bmatrix} \n",
104 | "$$"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "## Imports"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "## Load Molecule"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "# Define molecule\n",
128 | "mol = pyscf.gto.M(\n",
129 | " atom=\"O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752\",\n",
130 | " basis='sto-3g',\n",
131 | " unit=\"Ang\",\n",
132 | " verbose=0,\n",
133 | " symmetry=False,\n",
134 | " spin=0,\n",
135 | " charge=0\n",
136 | ")\n",
137 | "\n",
138 | "# Get number of atomic orbitals\n",
139 | "num_ao = mol.nao_nr()\n",
140 | "\n",
141 | "# Get number of electrons\n",
142 | "num_elec_alpha, num_elec_beta = mol.nelec\n",
143 | "num_elec = num_elec_alpha + num_elec_beta\n",
144 | "\n",
145 | "# Get nuclear repulsion energy\n",
146 | "E_nuc = mol.energy_nuc()"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "## Calculate Molecular Integrals"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# Calculate overlap integrals\n",
163 | "S = mol.intor('cint1e_ovlp_sph')\n",
164 | "\n",
165 | "# Calculate kinetic energy integrals\n",
166 | "T = mol.intor('cint1e_kin_sph')\n",
167 | "\n",
168 | "# Calculate nuclear attraction integrals\n",
169 | "V = mol.intor('cint1e_nuc_sph')\n",
170 | "\n",
171 | "# Form core Hamiltonian\n",
172 | "H = T + V\n",
173 | "\n",
174 | "# Calculate two electron integrals\n",
175 | "eri = mol.intor('cint2e_sph', aosym='s8')\n",
176 | "\n",
177 | "# Since we are using the 8 fold symmetry of the 2 electron integrals\n",
178 | "# the functions below will help us when accessing elements\n",
179 | "__idx2_cache = {}\n",
180 | "\n",
181 | "\n",
182 | "def idx2(i, j):\n",
183 | " if (i, j) in __idx2_cache:\n",
184 | " return __idx2_cache[i, j]\n",
185 | " elif i >= j:\n",
186 | " __idx2_cache[i, j] = int(i*(i+1)/2+j)\n",
187 | " else:\n",
188 | " __idx2_cache[i, j] = int(j*(j+1)/2+i)\n",
189 | " return __idx2_cache[i, j]\n",
190 | "\n",
191 | "\n",
192 | "def idx4(i, j, k, l):\n",
193 | " return idx2(idx2(i, j), idx2(k, l))\n",
194 | "\n",
195 | "\n",
196 | "print(np.shape(eri))"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": [
203 | "## Core Guess"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": null,
209 | "metadata": {},
210 | "outputs": [],
211 | "source": [
212 | "# AO orthogonalization matrix\n",
213 | "A = spla.fractional_matrix_power(S, -0.5)\n",
214 | "\n",
215 | "# Solve the generalized eigenvalue problem\n",
216 | "E_orbitals, C = spla.eigh(H, S)\n",
217 | "\n",
218 | "# Compute initial density matrix\n",
219 | "D = np.zeros((num_ao, num_ao))\n",
220 | "for i in range(num_ao):\n",
221 | " for j in range(num_ao):\n",
222 | " for k in range(num_elec_alpha):\n",
223 | " D[i, j] += C[i, k] * C[j, k]"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "## DIIS Function\n",
231 | "\n",
232 | "### Steps in DIIS Function\n",
233 | "1. Build B matrix\n",
234 | "2. Solve the Pulay equation\n",
235 | "3. Build the DIIS Fock matrix"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "def diis(F_list, diis_res):\n",
245 | " # Build B matrix\n",
246 | "\n",
247 | " # Right hand side of Pulay eqn\n",
248 | "\n",
249 | " # Solve Pulay for coeffs\n",
250 | "\n",
251 | " # Build DIIS Fock\n",
252 | "\n",
253 | " return F_diis"
254 | ]
255 | },
256 | {
257 | "cell_type": "markdown",
258 | "metadata": {},
259 | "source": [
260 | "## Variables, Criteria, and Organization"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {},
267 | "outputs": [],
268 | "source": [
269 | "# 2 helper functions for printing during SCF\n",
270 | "def print_start_iterations():\n",
271 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
272 | " \"Iter\", \"Time(s)\", \"DIIS RMS\", \"delta E\", \"E_elec\")))\n",
273 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
274 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
275 | "\n",
276 | "\n",
277 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, diis_rms, iteration_E_diff, E_elec):\n",
278 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(\n",
279 | " iteration_num, iteration_end_time - iteration_start_time, diis_rms, iteration_E_diff, E_elec)))\n",
280 | "\n",
281 | "\n",
282 | "# Set stopping criteria\n",
283 | "iteration_max = 100\n",
284 | "convergence_E = 1e-9\n",
285 | "convergence_DIIS = 1e-5\n",
286 | "\n",
287 | "# Loop variables\n",
288 | "iteration_num = 0\n",
289 | "E_total = 0\n",
290 | "E_elec = 0.0\n",
291 | "iteration_E_diff = 0.0\n",
292 | "iteration_rmsc_dm = 0.0\n",
293 | "converged = False\n",
294 | "exceeded_iterations = False"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "## DIIS SCF Iteration\n",
302 | "Our trial vector will be the Fock matrix with the error vector being the orthonormalized orbital gradient.\n",
303 | "\n",
304 | "$$ r_{\\mu \\upsilon} = (\\mathbf{A^{T}}(\\mathbf{FDS} - \\mathbf{SDF}) \\mathbf{A})_{\\mu \\upsilon} $$\n",
305 | "\n",
306 | "### Call DIIS in SCF Iteration\n",
307 | "1. Build DIIS Residual (error vector) that will be used to make the B matrix\n",
308 | "2. Store trial and residual vectors\n",
309 | "3. Call DIIS to start after the first iteration\n",
310 | "4. Compute the next guess with the DIIS Fock matrix"
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": null,
316 | "metadata": {
317 | "scrolled": true
318 | },
319 | "outputs": [],
320 | "source": [
321 | "# Trial & Residual vector lists\n",
322 | "F_list = []\n",
323 | "DIIS_resid = []\n",
324 | "\n",
325 | "print(\"{:^79}\".format('=====> Starting SCF Iterations <=====\\n'))\n",
326 | "print_start_iterations()\n",
327 | "while (not converged and not exceeded_iterations):\n",
328 | " # Store last iteration and increment counters\n",
329 | " iteration_start_time = time.time()\n",
330 | " iteration_num += 1\n",
331 | " E_elec_last = E_elec\n",
332 | " D_last = np.copy(D)\n",
333 | "\n",
334 | " # Form G matrix\n",
335 | " G = np.zeros((num_ao, num_ao))\n",
336 | " for i in range(num_ao):\n",
337 | " for j in range(num_ao):\n",
338 | " for k in range(num_ao):\n",
339 | " for l in range(num_ao):\n",
340 | " G[i, j] += D[k, l] * \\\n",
341 | " ((2.0*(eri[idx4(i, j, k, l)])) -\n",
342 | " (eri[idx4(i, k, j, l)]))\n",
343 | "\n",
344 | " # Build fock matrix\n",
345 | " F = H + G\n",
346 | "\n",
347 | " # Calculate electronic energy\n",
348 | " E_elec = np.sum(np.multiply(D, (H + F)))\n",
349 | "\n",
350 | " # Calculate energy change of iteration\n",
351 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n",
352 | "\n",
353 | " # =======> Start of DIIS stuff <=========\n",
354 | " # Build the DIIS AO gradient\n",
355 | "\n",
356 | " # DIIS RMS\n",
357 | "\n",
358 | " # Append lists\n",
359 | " F_list.append(F)\n",
360 | " DIIS_resid.append(diis_r)\n",
361 | "\n",
362 | " if iteration_num >= 2:\n",
363 | " # preform DIIS to get Fock Matrix\n",
364 | "\n",
365 | " # Compute new guess with F DIIS\n",
366 | "\n",
367 | " D = np.zeros((num_ao, num_ao))\n",
368 | " for i in range(num_ao):\n",
369 | " for j in range(num_ao):\n",
370 | " for k in range(num_elec_alpha):\n",
371 | " D[i, j] += C[i, k] * C[j, k]\n",
372 | "\n",
373 | " # =======> End of DIIS stuff <=========\n",
374 | "\n",
375 | " iteration_end_time = time.time()\n",
376 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time,\n",
377 | " diis_rms, iteration_E_diff, E_elec)\n",
378 | "\n",
379 | " if(np.abs(iteration_E_diff) < convergence_E and diis_rms < convergence_DIIS):\n",
380 | " converged = True\n",
381 | " print('\\n', \"{:^79}\".format('=====> SCF Converged <=====\\n'))\n",
382 | " # calculate total energy\n",
383 | " E_total = E_elec + E_nuc\n",
384 | " print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))\n",
385 | "\n",
386 | " if(iteration_num == iteration_max):\n",
387 | " exceeded_iterations = True\n",
388 | " print(\"{:^79}\".format('=====> SCF Exceded Max Iterations <=====\\n'))"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {},
394 | "source": [
395 | "## References\n",
396 | "1. P. Pulay. Chem. Phys. Lett. 73, 393-398 (1980)\n",
397 | "2. C. David Sherrill. \"Some comments on accellerating convergence of iterative sequences using direct inversion of the iterative subspace (DIIS)\". http://vergil.chemistry.gatech.edu/notes/diis/diis.pdf. (1998)"
398 | ]
399 | }
400 | ],
401 | "metadata": {
402 | "kernelspec": {
403 | "display_name": "Python 3",
404 | "language": "python",
405 | "name": "python3"
406 | },
407 | "language_info": {
408 | "codemirror_mode": {
409 | "name": "ipython",
410 | "version": 3
411 | },
412 | "file_extension": ".py",
413 | "mimetype": "text/x-python",
414 | "name": "python",
415 | "nbconvert_exporter": "python",
416 | "pygments_lexer": "ipython3",
417 | "version": "3.6.6"
418 | }
419 | },
420 | "nbformat": 4,
421 | "nbformat_minor": 2
422 | }
423 |
--------------------------------------------------------------------------------
/02_SCF/basics/scf_psi4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# SCF"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Imports"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import numpy as np\n",
24 | "import scipy.linalg as spla\n",
25 | "import psi4\n",
26 | "import matplotlib.pyplot as plt\n",
27 | "import time\n",
28 | "%matplotlib notebook"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "## Some useful resources:\n",
36 | " - Szabo and Ostlund Chapter 3 (for algorithm see page 146)\n",
37 | " - [Notes by David Sherrill](http://vergil.chemistry.gatech.edu/notes/hf-intro/hf-intro.html)\n",
38 | " - [Notes by Joshua Goings](http://joshuagoings.com/2013/04/24/hartree-fock-self-consistent-field-procedure/)\n",
39 | " - [Programming notes by Francesco Evangelista](http://www.evangelistalab.org/wp-content/uploads/2013/12/Hartree-Fock-Theory.pdf)\n",
40 | " - [Psi4Numpy SCF page](https://github.com/psi4/psi4numpy/tree/master/Tutorials/03_Hartree-Fock)\n",
41 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project3)"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "## The SCF algorithm from Szabo and Ostlund:\n",
49 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n",
50 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n",
51 | " 3. Diagonalize the overlap matrix $S$ to obtain the transformation matrix $X$.\n",
52 | " 4. Make a guess at the original density matrix $P$.\n",
53 | " 5. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n",
54 | " 6. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$.\n",
55 | " 7. Transform the Fock matrix $F' = X^\\dagger F X$.\n",
56 | " 8. Diagonalize the Fock matrix to get orbital energies $\\epsilon$ and molecular orbitals (in the transformed basis) $C'$.\n",
57 | " 9. Transform the molecular orbitals back to the AO basis $C = X C'$.\n",
58 | " 10. Form a new guess at the density matrix $P$ using $C$.\n",
59 | " 11. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 5.\n",
60 | " 12. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc."
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "## Quick note\n",
68 | "The reason we need to calculate the transformation matrix $X$ is because the atomic orbital basis is not orthonormal by default. This means without transformation we would need to solve a generalized eigenvalue problem $FC = ESC$. If we use scipy to solve this generalized eigenvalue problem we can simply the SCF algorithm.\n",
69 | "## Simplified SCF\n",
70 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n",
71 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n",
72 | " 3. Make a guess at the original density matrix $P$.\n",
73 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n",
74 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n",
75 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n",
76 | " 7. Form a new guess at the density matrix $P$ using $C$.\n",
77 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n",
78 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "# STEP 1 : Specify the molecule"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "# start timer\n",
95 | "start_time = time.time()\n",
96 | "# define molecule\n",
97 | "mol = psi4.geometry(\"\"\"\n",
98 | "O 0.0000000 0.0000000 0.0000000\n",
99 | "H 0.7569685 0.0000000 -0.5858752\n",
100 | "H -0.7569685 0.0000000 -0.5858752\n",
101 | "symmetry c1\n",
102 | "\"\"\")\n",
103 | "psi4.set_options({'basis': 'sto-3g'})\n",
104 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n",
105 | "mints = psi4.core.MintsHelper(wfn.basisset())\n",
106 | "# get number of electrons\n",
107 | "num_elec_alpha = wfn.nalpha()\n",
108 | "num_elec_beta = wfn.nbeta()\n",
109 | "num_elec = num_elec_alpha + num_elec_beta\n",
110 | "# get nuclear repulsion energy\n",
111 | "E_nuc = mol.nuclear_repulsion_energy()"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "# STEP 2 : Calculate molecular integrals \n",
119 | "\n",
120 | "Overlap \n",
121 | "\n",
122 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n",
123 | "\n",
124 | "Kinetic\n",
125 | "\n",
126 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n",
127 | "\n",
128 | "Nuclear Attraction\n",
129 | "\n",
130 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n",
131 | "\n",
132 | "Form Core Hamiltonian\n",
133 | "\n",
134 | "$$ H = T + V $$\n",
135 | "\n",
136 | "Two electron integrals\n",
137 | "\n",
138 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "# calculate overlap integrals\n",
148 | "S = np.asarray(mints.ao_overlap())\n",
149 | "# calculate kinetic energy integrals\n",
150 | "T = np.asarray(mints.ao_kinetic())\n",
151 | "# calculate nuclear attraction integrals\n",
152 | "V = np.asarray(mints.ao_potential())\n",
153 | "# form core Hamiltonian\n",
154 | "H = T + V\n",
155 | "# calculate two electron integrals\n",
156 | "eri = np.asarray(mints.ao_eri())\n",
157 | "# get number of atomic orbitals\n",
158 | "num_ao = np.shape(S)[0]\n",
159 | "\n",
160 | "print(np.shape(eri))"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "# STEP 3 : Form guess density matrix"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": null,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "# set inital density matrix to zero\n",
177 | "D = np.zeros((num_ao, num_ao))"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "# STEPS 4 - 8 : SCF loop\n",
185 | "\n",
186 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n",
187 | " \n",
188 | " $$G_{\\mu\\nu} = \\sum_{\\lambda\\sigma}^{\\mathrm{num\\_ao}} P_{\\lambda \\sigma}[2(\\mu\\nu|\\lambda\\sigma)-(\\mu\\lambda|\\nu\\sigma)]$$ \n",
189 | " \n",
190 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n",
191 | " \n",
192 | " $$ F = H + G $$\n",
193 | " \n",
194 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n",
195 | " \n",
196 | " $$F C = E S C $$\n",
197 | " \n",
198 | " 7. Form a new guess at the density matrix $P$ using $C$.\n",
199 | " \n",
200 | " $$ P_{\\mu\\nu} = \\sum_{i}^{\\mathrm{num\\_elec}/2} C_{\\mu i} C_{\\nu i} $$\n",
201 | " \n",
202 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n",
203 | " \n",
204 | " $$ E_{\\mathrm{elec}} = \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} P_{\\mu\\nu} (H_{\\mu\\nu} + F_{\\mu\\nu}) $$\n",
205 | " $$ \\Delta E = E_{\\mathrm{new}} - E_{\\mathrm{old}} $$\n",
206 | " $$ |\\Delta P| = \\left[ \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} [P^{\\mathrm{new}}_{\\mu\\nu} - P_{\\mu\\nu}^{\\mathrm{old}}]^2 \\right]^{1/2}$$\n",
207 | " \n",
208 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n",
209 | " \n",
210 | " $$ E_{\\mathrm{total}} = V_{\\mathrm{NN}} + E_{\\mathrm{elec}} $$\n"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "metadata": {},
217 | "outputs": [],
218 | "source": [
219 | "# 2 helper functions for printing during SCF\n",
220 | "def print_start_iterations():\n",
221 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
222 | " \"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_elec\")))\n",
223 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
224 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
225 | "\n",
226 | "\n",
227 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec):\n",
228 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num,\n",
229 | " iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_elec)))\n",
230 | "\n",
231 | "\n",
232 | "# set stopping criteria\n",
233 | "iteration_max = 100\n",
234 | "convergence_E = 1e-9\n",
235 | "convergence_DM = 1e-5\n",
236 | "# loop variables\n",
237 | "iteration_num = 0\n",
238 | "E_total = 0\n",
239 | "E_elec = 0.0\n",
240 | "iteration_E_diff = 0.0\n",
241 | "iteration_rmsc_dm = 0.0\n",
242 | "converged = False\n",
243 | "exceeded_iterations = False"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "print_start_iterations()\n",
253 | "while (not converged and not exceeded_iterations):\n",
254 | " # store last iteration and increment counters\n",
255 | " iteration_start_time = time.time()\n",
256 | " iteration_num += 1\n",
257 | " E_elec_last = E_elec\n",
258 | " D_last = np.copy(D)\n",
259 | " # form G matrix\n",
260 | " G = np.zeros((num_ao, num_ao))\n",
261 | "\n",
262 | " #########################################################\n",
263 | " # FILL IN HOW TO MAKE THE G MATRIX HERE\n",
264 | " #########################################################\n",
265 | "\n",
266 | " # build fock matrix\n",
267 | "\n",
268 | " #########################################################\n",
269 | " # FILL IN HOW TO MAKE THE FOCK MATRIX HERE\n",
270 | " #########################################################\n",
271 | "\n",
272 | " # solve the generalized eigenvalue problem\n",
273 | " E_orbitals, C = spla.eigh(F, S)\n",
274 | " # compute new density matrix\n",
275 | " D = np.zeros((num_ao, num_ao))\n",
276 | "\n",
277 | " #########################################################\n",
278 | " # FILL IN HOW TO MAKE THE DENSITY MATRIX HERE\n",
279 | " #########################################################\n",
280 | "\n",
281 | " # calculate electronic energy\n",
282 | "\n",
283 | " #########################################################\n",
284 | " # FILL IN HOW TO CALCULATE THE ELECTRONIC ENERGY HERE\n",
285 | " #########################################################\n",
286 | "\n",
287 | " # calculate energy change of iteration\n",
288 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n",
289 | " # rms change of density matrix\n",
290 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n",
291 | " iteration_end_time = time.time()\n",
292 | " print_iteration(iteration_num, iteration_start_time,\n",
293 | " iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec)\n",
294 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM):\n",
295 | " converged = True\n",
296 | " if(iteration_num == iteration_max):\n",
297 | " exceeded_iterations = True"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "# STEP 9 : Calculate Observables"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": null,
310 | "metadata": {},
311 | "outputs": [],
312 | "source": [
313 | "# calculate total energy\n",
314 | "\n",
315 | "####################################################\n",
316 | "# FILL IN HOW TO CALCULATE THE TOTAL ENERGY HERE\n",
317 | "####################################################"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": null,
323 | "metadata": {},
324 | "outputs": [],
325 | "source": [
326 | "print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))"
327 | ]
328 | },
329 | {
330 | "cell_type": "code",
331 | "execution_count": null,
332 | "metadata": {},
333 | "outputs": [],
334 | "source": []
335 | }
336 | ],
337 | "metadata": {
338 | "kernelspec": {
339 | "display_name": "Python 3",
340 | "language": "python",
341 | "name": "python3"
342 | },
343 | "language_info": {
344 | "codemirror_mode": {
345 | "name": "ipython",
346 | "version": 3
347 | },
348 | "file_extension": ".py",
349 | "mimetype": "text/x-python",
350 | "name": "python",
351 | "nbconvert_exporter": "python",
352 | "pygments_lexer": "ipython3",
353 | "version": "3.6.6"
354 | }
355 | },
356 | "nbformat": 4,
357 | "nbformat_minor": 2
358 | }
359 |
--------------------------------------------------------------------------------
/02_SCF/basics/scf_pyscf.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# SCF"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Imports"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import numpy as np\n",
24 | "import scipy.linalg as spla\n",
25 | "import pyscf\n",
26 | "from pyscf import gto, scf\n",
27 | "import matplotlib.pyplot as plt\n",
28 | "import time\n",
29 | "%matplotlib notebook"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "## Some useful resources:\n",
37 | " - Szabo and Ostlund Chapter 3 (for algorithm see page 146)\n",
38 | " - [Notes by David Sherrill](http://vergil.chemistry.gatech.edu/notes/hf-intro/hf-intro.html)\n",
39 | " - [Notes by Joshua Goings](http://joshuagoings.com/2013/04/24/hartree-fock-self-consistent-field-procedure/)\n",
40 | " - [Programming notes by Francesco Evangelista](http://www.evangelistalab.org/wp-content/uploads/2013/12/Hartree-Fock-Theory.pdf)\n",
41 | " - [Psi4Numpy SCF page](https://github.com/psi4/psi4numpy/tree/master/Tutorials/03_Hartree-Fock)\n",
42 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project3)"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "## The SCF algorithm from Szabo and Ostlund:\n",
50 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n",
51 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n",
52 | " 3. Diagonalize the overlap matrix $S$ to obtain the transformation matrix $X$.\n",
53 | " 4. Make a guess at the original density matrix $P$.\n",
54 | " 5. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n",
55 | " 6. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$.\n",
56 | " 7. Transform the Fock matrix $F' = X^\\dagger F X$.\n",
57 | " 8. Diagonalize the Fock matrix to get orbital energies $\\epsilon$ and molecular orbitals (in the transformed basis) $C'$.\n",
58 | " 9. Transform the molecular orbitals back to the AO basis $C = X C'$.\n",
59 | " 10. Form a new guess at the density matrix $P$ using $C$.\n",
60 | " 11. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 5.\n",
61 | " 12. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "## Quick note\n",
69 | "The reason we need to calculate the transformation matrix $X$ is because the atomic orbital basis is not orthonormal by default. This means without transformation we would need to solve a generalized eigenvalue problem $FC = ESC$. If we use scipy to solve this generalized eigenvalue problem we can simply the SCF algorithm.\n",
70 | "## Simplified SCF\n",
71 | " 1. Specify a molecule (coordinates $\\{R_A\\}$, atomic numbers $\\{Z_A\\}$, number electrons $N$) and atomic orbital basis $\\{\\phi_\\mu\\}$.\n",
72 | " 2. Calculate molecular integrals over AOs ( overlap $S_{\\mu\\nu}$, core Hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$, and 2 electron integrals $(\\mu \\nu | \\lambda \\sigma)$ ).\n",
73 | " 3. Make a guess at the original density matrix $P$.\n",
74 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n",
75 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n",
76 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n",
77 | " 7. Form a new guess at the density matrix $P$ using $C$.\n",
78 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n",
79 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "# STEP 1 : Specify the molecule"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "# start timer\n",
96 | "start_time = time.time()\n",
97 | "# define molecule\n",
98 | "mol = pyscf.gto.M(\n",
99 | " atom=\"O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752\",\n",
100 | " basis='sto-3g',\n",
101 | " unit=\"Ang\",\n",
102 | " verbose=0,\n",
103 | " symmetry=False,\n",
104 | " spin=0,\n",
105 | " charge=0\n",
106 | ")\n",
107 | "# get number of atomic orbitals\n",
108 | "num_ao = mol.nao_nr()\n",
109 | "# get number of electrons\n",
110 | "num_elec_alpha, num_elec_beta = mol.nelec\n",
111 | "num_elec = num_elec_alpha + num_elec_beta\n",
112 | "# get nuclear repulsion energy\n",
113 | "E_nuc = mol.energy_nuc()"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "# STEP 2 : Calculate molecular integrals \n",
121 | "\n",
122 | "Overlap \n",
123 | "\n",
124 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n",
125 | "\n",
126 | "Kinetic\n",
127 | "\n",
128 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n",
129 | "\n",
130 | "Nuclear Attraction\n",
131 | "\n",
132 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n",
133 | "\n",
134 | "Form Core Hamiltonian\n",
135 | "\n",
136 | "$$ H = T + V $$\n",
137 | "\n",
138 | "Two electron integrals\n",
139 | "\n",
140 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "# calculate overlap integrals\n",
150 | "S = mol.intor('cint1e_ovlp_sph')\n",
151 | "# calculate kinetic energy integrals\n",
152 | "T = mol.intor('cint1e_kin_sph')\n",
153 | "# calculate nuclear attraction integrals\n",
154 | "V = mol.intor('cint1e_nuc_sph')\n",
155 | "# form core Hamiltonian\n",
156 | "H = T + V\n",
157 | "# calculate two electron integrals\n",
158 | "eri = mol.intor('cint2e_sph', aosym='s8')\n",
159 | "# since we are using the 8 fold symmetry of the 2 electron integrals\n",
160 | "# the functions below will help us when accessing elements\n",
161 | "__idx2_cache = {}\n",
162 | "\n",
163 | "\n",
164 | "def idx2(i, j):\n",
165 | " if (i, j) in __idx2_cache:\n",
166 | " return __idx2_cache[i, j]\n",
167 | " elif i >= j:\n",
168 | " __idx2_cache[i, j] = int(i*(i+1)/2+j)\n",
169 | " else:\n",
170 | " __idx2_cache[i, j] = int(j*(j+1)/2+i)\n",
171 | " return __idx2_cache[i, j]\n",
172 | "\n",
173 | "\n",
174 | "def idx4(i, j, k, l):\n",
175 | " return idx2(idx2(i, j), idx2(k, l))\n",
176 | "\n",
177 | "\n",
178 | "print(np.shape(eri))"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "# STEP 3 : Form guess density matrix"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "# set inital density matrix to zero\n",
195 | "D = np.zeros((num_ao, num_ao))"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "# STEPS 4 - 8 : SCF loop\n",
203 | "\n",
204 | " 4. Calculate the intermediate matrix $G$ using the density matrix $P$ and the two electron integrals $(\\mu \\nu | \\lambda \\sigma)$.\n",
205 | " \n",
206 | " $$G_{\\mu\\nu} = \\sum_{\\lambda\\sigma}^{\\mathrm{num\\_ao}} P_{\\lambda \\sigma}[2(\\mu\\nu|\\lambda\\sigma)-(\\mu\\lambda|\\nu\\sigma)]$$ \n",
207 | " \n",
208 | " 5. Construct the Fock matrix $F$ from the core hamiltonian $H^{\\mathrm{core}}_{\\mu\\nu}$ and the intermediate matrix $G$. \n",
209 | " \n",
210 | " $$ F = H + G $$\n",
211 | " \n",
212 | " 6. Solve the generalized eigenvalue problem using the Fock matrix $F$ and the overlap matrix $S$ to get orbital energies $\\epsilon$ and molecular orbitals.\n",
213 | " \n",
214 | " $$F C = E S C $$\n",
215 | " \n",
216 | " 7. Form a new guess at the density matrix $P$ using $C$.\n",
217 | " \n",
218 | " $$ P_{\\mu\\nu} = \\sum_{i}^{\\mathrm{num\\_elec}/2} C_{\\mu i} C_{\\nu i} $$\n",
219 | " \n",
220 | " 8. Check for convergence. (Are the changes in energy and/or density smaller than some threshold?) If not, return to step 4.\n",
221 | " \n",
222 | " $$ E_{\\mathrm{elec}} = \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} P_{\\mu\\nu} (H_{\\mu\\nu} + F_{\\mu\\nu}) $$\n",
223 | " $$ \\Delta E = E_{\\mathrm{new}} - E_{\\mathrm{old}} $$\n",
224 | " $$ |\\Delta P| = \\left[ \\sum^{\\mathrm{num\\_ao}}_{\\mu\\nu} [P^{\\mathrm{new}}_{\\mu\\nu} - P_{\\mu\\nu}^{\\mathrm{old}}]^2 \\right]^{1/2}$$\n",
225 | " \n",
226 | " 9. If converged, use the molecular orbitals $C$, density matrix $P$, and Fock matrix $F$ to calculate observables like the total Energy, etc.\n",
227 | " \n",
228 | " $$ E_{\\mathrm{total}} = V_{\\mathrm{NN}} + E_{\\mathrm{elec}} $$"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "# 2 helper functions for printing during SCF\n",
238 | "def print_start_iterations():\n",
239 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
240 | " \"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_elec\")))\n",
241 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
242 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
243 | "\n",
244 | "\n",
245 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec):\n",
246 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num,\n",
247 | " iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_elec)))\n",
248 | "\n",
249 | "\n",
250 | "# set stopping criteria\n",
251 | "iteration_max = 100\n",
252 | "convergence_E = 1e-9\n",
253 | "convergence_DM = 1e-5\n",
254 | "# loop variables\n",
255 | "iteration_num = 0\n",
256 | "E_total = 0\n",
257 | "E_elec = 0.0\n",
258 | "iteration_E_diff = 0.0\n",
259 | "iteration_rmsc_dm = 0.0\n",
260 | "converged = False\n",
261 | "exceeded_iterations = False"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "print_start_iterations()\n",
271 | "while (not converged and not exceeded_iterations):\n",
272 | " # store last iteration and increment counters\n",
273 | " iteration_start_time = time.time()\n",
274 | " iteration_num += 1\n",
275 | " E_elec_last = E_elec\n",
276 | " D_last = np.copy(D)\n",
277 | " # form G matrix\n",
278 | " G = np.zeros((num_ao, num_ao))\n",
279 | "\n",
280 | " #########################################################\n",
281 | " # FILL IN HOW TO MAKE THE G MATRIX HERE\n",
282 | " #########################################################\n",
283 | "\n",
284 | " # build fock matrix\n",
285 | "\n",
286 | " #########################################################\n",
287 | " # FILL IN HOW TO MAKE THE FOCK MATRIX HERE\n",
288 | " #########################################################\n",
289 | "\n",
290 | " # solve the generalized eigenvalue problem\n",
291 | " E_orbitals, C = spla.eigh(F, S)\n",
292 | " # compute new density matrix\n",
293 | " D = np.zeros((num_ao, num_ao))\n",
294 | "\n",
295 | " #########################################################\n",
296 | " # FILL IN HOW TO MAKE THE DENSITY MATRIX HERE\n",
297 | " #########################################################\n",
298 | "\n",
299 | " # calculate electronic energy\n",
300 | "\n",
301 | " #########################################################\n",
302 | " # FILL IN HOW TO CALCULATE THE ELECTRONIC ENERGY HERE\n",
303 | " #########################################################\n",
304 | "\n",
305 | " # calculate energy change of iteration\n",
306 | " iteration_E_diff = np.abs(E_elec - E_elec_last)\n",
307 | " # rms change of density matrix\n",
308 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n",
309 | " iteration_end_time = time.time()\n",
310 | " print_iteration(iteration_num, iteration_start_time,\n",
311 | " iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec)\n",
312 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM):\n",
313 | " converged = True\n",
314 | " if(iteration_num == iteration_max):\n",
315 | " exceeded_iterations = True"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "# STEP 9 : Calculate Observables"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": null,
328 | "metadata": {},
329 | "outputs": [],
330 | "source": [
331 | "# calculate total energy\n",
332 | "\n",
333 | "####################################################\n",
334 | "# FILL IN HOW TO CALCULATE THE TOTAL ENERGY HERE\n",
335 | "####################################################"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": null,
341 | "metadata": {},
342 | "outputs": [],
343 | "source": [
344 | "print(\"{:^79}\".format(\"Total Energy : {:>11f}\".format(E_total)))"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": null,
350 | "metadata": {},
351 | "outputs": [],
352 | "source": []
353 | }
354 | ],
355 | "metadata": {
356 | "kernelspec": {
357 | "display_name": "Python 3",
358 | "language": "python",
359 | "name": "python3"
360 | },
361 | "language_info": {
362 | "codemirror_mode": {
363 | "name": "ipython",
364 | "version": 3
365 | },
366 | "file_extension": ".py",
367 | "mimetype": "text/x-python",
368 | "name": "python",
369 | "nbconvert_exporter": "python",
370 | "pygments_lexer": "ipython3",
371 | "version": "3.6.6"
372 | }
373 | },
374 | "nbformat": 4,
375 | "nbformat_minor": 2
376 | }
377 |
--------------------------------------------------------------------------------
/02_SCF/scf_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/02_SCF/scf_slides.pdf
--------------------------------------------------------------------------------
/03_MP2/basics/mp2_psi4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MP2"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Some useful resources:\n",
15 | " - [original paper](https://journals.aps.org/pr/abstract/10.1103/PhysRev.46.618)\n",
16 | " - Levine Chapter 16\n",
17 | " - [psi4numpy tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/05_Moller-Plesset/5a_conventional-mp2.ipynb)\n",
18 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project4)"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "# MP2 algorithm\n",
26 | "1. The starting point will be the Hartree-Fock wavefunction"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Imports"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "import numpy as np\n",
43 | "import scipy.linalg as spla\n",
44 | "import psi4\n",
45 | "import matplotlib.pyplot as plt\n",
46 | "import time\n",
47 | "%matplotlib notebook"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "## Specify the molecule"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "# start timer\n",
64 | "start_time = time.time()\n",
65 | "# define molecule\n",
66 | "mol = psi4.geometry(\"\"\"\n",
67 | "O 0.0000000 0.0000000 0.0000000\n",
68 | "H 0.7569685 0.0000000 -0.5858752\n",
69 | "H -0.7569685 0.0000000 -0.5858752\n",
70 | "symmetry c1\n",
71 | "\"\"\")\n",
72 | "psi4.set_options({'basis': 'sto-3g'})\n",
73 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n",
74 | "mints = psi4.core.MintsHelper(wfn.basisset())\n",
75 | "# get number of electrons\n",
76 | "num_elec_alpha = wfn.nalpha()\n",
77 | "num_elec_beta = wfn.nbeta()\n",
78 | "num_elec = num_elec_alpha + num_elec_beta\n",
79 | "# get nuclear repulsion energy\n",
80 | "E_nuc = mol.nuclear_repulsion_energy()"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "# STEP 2 : Calculate molecular integrals \n",
88 | "\n",
89 | "Overlap \n",
90 | "\n",
91 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n",
92 | "\n",
93 | "Kinetic\n",
94 | "\n",
95 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n",
96 | "\n",
97 | "Nuclear Attraction\n",
98 | "\n",
99 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n",
100 | "\n",
101 | "Form Core Hamiltonian\n",
102 | "\n",
103 | "$$ H = T + V $$\n",
104 | "\n",
105 | "Two electron integrals\n",
106 | "\n",
107 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "# calculate overlap integrals\n",
117 | "S = np.asarray(mints.ao_overlap())\n",
118 | "# calculate kinetic energy integrals\n",
119 | "T = np.asarray(mints.ao_kinetic())\n",
120 | "# calculate nuclear attraction integrals\n",
121 | "V = np.asarray(mints.ao_potential())\n",
122 | "# form core Hamiltonian\n",
123 | "H = T + V\n",
124 | "# calculate two electron integrals\n",
125 | "eri = np.asarray(mints.ao_eri())\n",
126 | "# get number of atomic orbitals\n",
127 | "num_ao = np.shape(S)[0]\n",
128 | "\n",
129 | "\n",
130 | "\n",
131 | "print(np.shape(eri))"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "# set inital density matrix to zero\n",
141 | "D = np.zeros((num_ao,num_ao))\n",
142 | "\n",
143 | "# 2 helper functions for printing during SCF\n",
144 | "def print_start_iterations():\n",
145 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_scf_elec\")))\n",
146 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
147 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec):\n",
148 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num, iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)))\n",
149 | "\n",
150 | "# set stopping criteria\n",
151 | "iteration_max = 100\n",
152 | "convergence_E = 1e-9\n",
153 | "convergence_DM = 1e-5\n",
154 | "# loop variables\n",
155 | "iteration_num = 0\n",
156 | "E_scf_total = 0\n",
157 | "E_scf_elec = 0.0\n",
158 | "iteration_E_diff = 0.0\n",
159 | "iteration_rmsc_dm = 0.0\n",
160 | "converged = False\n",
161 | "exceeded_iterations = False\n",
162 | "print_start_iterations()\n",
163 | "while (not converged and not exceeded_iterations):\n",
164 | " # store last iteration and increment counters\n",
165 | " iteration_start_time = time.time()\n",
166 | " iteration_num += 1\n",
167 | " E_elec_last = E_scf_elec\n",
168 | " D_last = np.copy(D)\n",
169 | " # form G matrix\n",
170 | " G = np.zeros((num_ao,num_ao))\n",
171 | " for i in range(num_ao):\n",
172 | " for j in range(num_ao):\n",
173 | " for k in range(num_ao):\n",
174 | " for l in range(num_ao):\n",
175 | " G[i,j] += D[k,l] * ((2.0*(eri[i,j,k,l])) - (eri[i,k,j,l]))\n",
176 | " # build fock matrix\n",
177 | " F = H + G\n",
178 | " # solve the generalized eigenvalue problem\n",
179 | " E_orbitals, C = spla.eigh(F,S)\n",
180 | " # compute new density matrix\n",
181 | " D = np.zeros((num_ao,num_ao))\n",
182 | " for i in range(num_ao):\n",
183 | " for j in range(num_ao):\n",
184 | " for k in range(num_elec_alpha):\n",
185 | " D[i,j] += C[i,k] * C[j,k]\n",
186 | " # calculate electronic energy\n",
187 | " E_scf_elec = np.sum(np.multiply(D , (H + F)))\n",
188 | " # calculate energy change of iteration\n",
189 | " iteration_E_diff = np.abs(E_scf_elec - E_elec_last)\n",
190 | " # rms change of density matrix\n",
191 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n",
192 | " iteration_end_time = time.time()\n",
193 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)\n",
194 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM): \n",
195 | " converged = True\n",
196 | " if(iteration_num == iteration_max):\n",
197 | " exceeded_iterations = True\n",
198 | "\n",
199 | "# calculate total energy\n",
200 | "E_scf_total = E_scf_elec + E_nuc\n",
201 | "print(\"{:^79}\".format(\"Total HF energy : {:>11f}\".format(E_scf_total)))"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "# Perform MP2 calculation"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "## Convert the two-electron integrals from AO basis to the MO basis\n",
216 | "\n",
217 | "$$(pq|rs) = \\sum_\\mu \\sum_\\nu \\sum_\\lambda \\sum_\\sigma C_\\mu^p C_\\nu^q\n",
218 | "(\\mu \\nu|\\lambda \\sigma) C_\\lambda^r C_\\sigma^s.$$\n",
219 | "\n",
220 | "\n",
221 | "Attempt to code this conversion below, remember that the electron repulsion integrals above are stored as vector `eri` that is of the shape (num_ao,num_ao,num_ao,num_ao). Here the num_ao's for sto-3g water is 7. The resulting tensor will have the same shape as `eri`.\n"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": null,
227 | "metadata": {},
228 | "outputs": [],
229 | "source": [
230 | "## place code for two-electron integral conversion here."
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "### Compute the MP2 Energy\n",
238 | "Now we can calculate the MP2 estimation of the correlation energy. \n",
239 | "$$E_{\\mathrm{corr(MP2)}}\\ =\\ \\frac{( ia \\mid jb ) [ 2 (ia \\mid jb ) - ( ib \\mid ja )]}{\\epsilon_i + \\epsilon_j + \\epsilon_a - \\epsilon_b}$$\n",
240 | "\n",
241 | "Here $i$ and $j$ represent all occupied orbitals, where as $a$ and $b$ will be unoccupied orbitals. \n",
242 | "\n",
243 | "Remember during this coding step that we are basing our MP2 correction on an RHF calculation and thus there are the same amount of $\\alpha$ and $\\beta$ electrons."
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "#initialize the variable forthe mp2 correlation energy\n",
253 | "E_corr_mp2 = 0\n",
254 | "# code the equation above and adjust the value of E_corr_mp2\n",
255 | "\n",
256 | "\n",
257 | "#this will print your E_corr mp2\n",
258 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "The correlation energy should be very small compared to the total energy (-0.035493 Ha), which is generally the case. However, this correlation energy can be very important to describing properties such as dispersion. "
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "## A comparison with Psi4"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": null,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "\n",
282 | "# Get the SCF wavefunction & energies# Get t \n",
283 | "scf_e, scf_wfn = psi4.energy('scf', return_wfn=True)\n",
284 | "mp2_e = psi4.energy('mp2')\n",
285 | "print(mp2_e)\n",
286 | "\n",
287 | "E_diff = (mp2_e - (E_total + E_corr_mp2)) \n",
288 | "print(E_diff)"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": null,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": []
297 | }
298 | ],
299 | "metadata": {
300 | "kernelspec": {
301 | "display_name": "Python 3",
302 | "language": "python",
303 | "name": "python3"
304 | },
305 | "language_info": {
306 | "codemirror_mode": {
307 | "name": "ipython",
308 | "version": 3
309 | },
310 | "file_extension": ".py",
311 | "mimetype": "text/x-python",
312 | "name": "python",
313 | "nbconvert_exporter": "python",
314 | "pygments_lexer": "ipython3",
315 | "version": "3.6.6"
316 | }
317 | },
318 | "nbformat": 4,
319 | "nbformat_minor": 2
320 | }
321 |
--------------------------------------------------------------------------------
/03_MP2/basics/mp2_psi4_solution.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MP2"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Some useful resources:\n",
15 | " - [original paper](https://journals.aps.org/pr/abstract/10.1103/PhysRev.46.618)\n",
16 | " - Levine Chapter 16\n",
17 | " - [psi4numpy tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/05_Moller-Plesset/5a_conventional-mp2.ipynb)\n",
18 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project4)"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "# MP2 algorithm\n",
26 | "1. The starting point will be the Hartree-Fock wavefunction"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Imports"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "import numpy as np\n",
43 | "import scipy.linalg as spla\n",
44 | "import psi4\n",
45 | "import matplotlib.pyplot as plt\n",
46 | "import time\n",
47 | "%matplotlib notebook"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "## Specify the molecule"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "# start timer\n",
64 | "start_time = time.time()\n",
65 | "# define molecule\n",
66 | "mol = psi4.geometry(\"\"\"\n",
67 | "O 0.0000000 0.0000000 0.0000000\n",
68 | "H 0.7569685 0.0000000 -0.5858752\n",
69 | "H -0.7569685 0.0000000 -0.5858752\n",
70 | "symmetry c1\n",
71 | "\"\"\")\n",
72 | "psi4.set_options({'basis': 'sto-3g'})\n",
73 | "wfn = psi4.core.Wavefunction.build(mol, psi4.core.get_global_option('BASIS'))\n",
74 | "mints = psi4.core.MintsHelper(wfn.basisset())\n",
75 | "# get number of electrons\n",
76 | "num_elec_alpha = wfn.nalpha()\n",
77 | "num_elec_beta = wfn.nbeta()\n",
78 | "num_elec = num_elec_alpha + num_elec_beta\n",
79 | "# get nuclear repulsion energy\n",
80 | "E_nuc = mol.nuclear_repulsion_energy()"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "# STEP 2 : Calculate molecular integrals \n",
88 | "\n",
89 | "Overlap \n",
90 | "\n",
91 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n",
92 | "\n",
93 | "Kinetic\n",
94 | "\n",
95 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n",
96 | "\n",
97 | "Nuclear Attraction\n",
98 | "\n",
99 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n",
100 | "\n",
101 | "Form Core Hamiltonian\n",
102 | "\n",
103 | "$$ H = T + V $$\n",
104 | "\n",
105 | "Two electron integrals\n",
106 | "\n",
107 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "# calculate overlap integrals\n",
117 | "S = np.asarray(mints.ao_overlap())\n",
118 | "# calculate kinetic energy integrals\n",
119 | "T = np.asarray(mints.ao_kinetic())\n",
120 | "# calculate nuclear attraction integrals\n",
121 | "V = np.asarray(mints.ao_potential())\n",
122 | "# form core Hamiltonian\n",
123 | "H = T + V\n",
124 | "# calculate two electron integrals\n",
125 | "eri = np.asarray(mints.ao_eri())\n",
126 | "# get number of atomic orbitals\n",
127 | "num_ao = np.shape(S)[0]\n",
128 | "\n",
129 | "\n",
130 | "\n",
131 | "print(np.shape(eri))"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "# set inital density matrix to zero\n",
141 | "D = np.zeros((num_ao,num_ao))\n",
142 | "\n",
143 | "# 2 helper functions for printing during SCF\n",
144 | "def print_start_iterations():\n",
145 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_scf_elec\")))\n",
146 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
147 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec):\n",
148 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num, iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)))\n",
149 | "\n",
150 | "# set stopping criteria\n",
151 | "iteration_max = 100\n",
152 | "convergence_E = 1e-9\n",
153 | "convergence_DM = 1e-5\n",
154 | "# loop variables\n",
155 | "iteration_num = 0\n",
156 | "E_scf_total = 0\n",
157 | "E_scf_elec = 0.0\n",
158 | "iteration_E_diff = 0.0\n",
159 | "iteration_rmsc_dm = 0.0\n",
160 | "converged = False\n",
161 | "exceeded_iterations = False\n",
162 | "print_start_iterations()\n",
163 | "while (not converged and not exceeded_iterations):\n",
164 | " # store last iteration and increment counters\n",
165 | " iteration_start_time = time.time()\n",
166 | " iteration_num += 1\n",
167 | " E_elec_last = E_scf_elec\n",
168 | " D_last = np.copy(D)\n",
169 | " # form G matrix\n",
170 | " G = np.zeros((num_ao,num_ao))\n",
171 | " for i in range(num_ao):\n",
172 | " for j in range(num_ao):\n",
173 | " for k in range(num_ao):\n",
174 | " for l in range(num_ao):\n",
175 | " G[i,j] += D[k,l] * ((2.0*(eri[i,j,k,l])) - (eri[i,k,j,l]))\n",
176 | " # build fock matrix\n",
177 | " F = H + G\n",
178 | " # solve the generalized eigenvalue problem\n",
179 | " E_orbitals, C = spla.eigh(F,S)\n",
180 | " # compute new density matrix\n",
181 | " D = np.zeros((num_ao,num_ao))\n",
182 | " for i in range(num_ao):\n",
183 | " for j in range(num_ao):\n",
184 | " for k in range(num_elec_alpha):\n",
185 | " D[i,j] += C[i,k] * C[j,k]\n",
186 | " # calculate electronic energy\n",
187 | " E_scf_elec = np.sum(np.multiply(D , (H + F)))\n",
188 | " # calculate energy change of iteration\n",
189 | " iteration_E_diff = np.abs(E_scf_elec - E_elec_last)\n",
190 | " # rms change of density matrix\n",
191 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n",
192 | " iteration_end_time = time.time()\n",
193 | " print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)\n",
194 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM): \n",
195 | " converged = True\n",
196 | " if(iteration_num == iteration_max):\n",
197 | " exceeded_iterations = True\n",
198 | "\n",
199 | "# calculate total energy\n",
200 | "E_scf_total = E_scf_elec + E_nuc\n",
201 | "print(\"{:^79}\".format(\"Total HF energy : {:>11f}\".format(E_scf_total)))"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "# Perform MP2 calculation"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "## Convert the two-electron integrals from AO basis to the MO basis\n",
216 | "\n",
217 | "$$(pq|rs) = \\sum_\\mu \\sum_\\nu \\sum_\\lambda \\sum_\\sigma C_\\mu^p C_\\nu^q\n",
218 | "(\\mu \\nu|\\lambda \\sigma) C_\\lambda^r C_\\sigma^s.$$\n",
219 | "\n",
220 | "This is implemented in the cell block below. There are a few ways to implement this, below is by far the worst. The algorithm coded below is the naive approach known as the Noddy algorithm. This algorithm scales as $N^8$, although MP2 is formally known to scale as $N^5$; however. The Noddy algorithm is a great starting point."
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "noddy_start = time.time()\n",
230 | "eri_mo = np.zeros((num_ao, num_ao, num_ao, num_ao))\n",
231 | "for p in range(num_ao):\n",
232 | " for q in range(num_ao):\n",
233 | " for r in range(num_ao):\n",
234 | " for s in range(num_ao):\n",
235 | " for mu in range(num_ao):\n",
236 | " for nu in range(num_ao):\n",
237 | " for lmda in range(num_ao):\n",
238 | " for sigma in range(num_ao):\n",
239 | " eri_mo[p, q, r, s] += C[mu, p]*C[nu, q]*C[lmda,r]*C[sigma, s]*eri[mu, nu, lmda, sigma]\n",
240 | " \n",
241 | "noddy_end = time.time()\n",
242 | "noddy_time = noddy_end - noddy_start\n",
243 | "print(noddy_time)"
244 | ]
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "metadata": {},
249 | "source": [
250 | "### Compute the MP2 Energy\n",
251 | "Now we can calculate the MP2 estimation of the correlation energy. \n",
252 | "$$E_{\\mathrm{corr(MP2)}}\\ =\\ \\frac{( ia \\mid jb ) [ 2 (ia \\mid jb ) - ( ib \\mid ja )]}{\\epsilon_i + \\epsilon_j + \\epsilon_a - \\epsilon_b}$$"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "E_corr_mp2 = 0\n",
262 | "for i in range(num_elec_alpha):\n",
263 | " for j in range(num_elec_alpha):\n",
264 | " for a in range(num_elec_alpha, num_ao):\n",
265 | " for b in range(num_elec_alpha, num_ao):\n",
266 | " temp = eri_mo[i, a, j, b] * \\\n",
267 | " (2*eri_mo[i, a, j, b] - eri_mo[i, b, j, a])\n",
268 | " temp /= (E_orbitals[i] + E_orbitals[j] - E_orbitals[a] - E_orbitals[b])\n",
269 | " E_corr_mp2 += temp\n",
270 | " \n",
271 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {},
277 | "source": [
278 | "The correlation energy is very small compared to the total energy, which is generally the case. However, this correlation energy can be very important to describing properties such as dispersion. "
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {},
284 | "source": [
285 | "## A comparison with Psi4"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "# Get the SCF wavefunction & energies# Get t \n",
295 | "scf_e, scf_wfn = psi4.energy('scf', return_wfn=True)\n",
296 | "mp2_e = psi4.energy('mp2')\n",
297 | "print(mp2_e)\n",
298 | "\n",
299 | "E_diff = (mp2_e - (E_scf_total + E_corr_mp2)) \n",
300 | "print(E_diff)"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {},
306 | "source": [
307 | "### An implementation of the smart algorithm"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {},
314 | "outputs": [],
315 | "source": [
316 | "# nbasis, norb = C.shape\n",
317 | "import time\n",
318 | "\n",
319 | "smart_start = time.time()\n",
320 | "sump = np.zeros((num_ao,num_ao,num_ao,num_ao))\n",
321 | "for mu in range(num_ao):\n",
322 | " sumq = np.zeros((num_ao,num_ao,num_ao))\n",
323 | " for nu in range(num_ao):\n",
324 | " sumr = np.zeros((num_ao,num_ao))\n",
325 | " for lmda in range(num_ao):\n",
326 | " sums = np.zeros((num_ao))\n",
327 | " for sigma in range(num_ao):\n",
328 | " for s in range(num_ao):\n",
329 | " sums[s] += C[sigma,s]*eri[mu,nu,lmda,sigma]\n",
330 | " for r in range(num_ao):\n",
331 | " for s in range(num_ao):\n",
332 | " sumr[r,s] += C[lmda,r]*sums[s]\n",
333 | " for q in range(num_ao):\n",
334 | " for r in range(num_ao):\n",
335 | " for s in range(num_ao):\n",
336 | " sumq[q,r,s] += C[nu,q]*sumr[r,s]\n",
337 | " for p in range(num_ao):\n",
338 | " for q in range(num_ao):\n",
339 | " for r in range(num_ao):\n",
340 | " for s in range(num_ao):\n",
341 | " sump[p,q,r,s] += C[mu,p]*sumq[q,r,s]\n",
342 | "eri_mo = sump\n",
343 | "\n",
344 | "smart_end = time.time()\n",
345 | "smart_time = smart_end -smart_start\n",
346 | "print(smart_time)"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": null,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "E_corr_mp2 = 0\n",
356 | "for i in range(num_elec_alpha):\n",
357 | " for j in range(num_elec_alpha):\n",
358 | " for a in range(num_elec_alpha, num_ao):\n",
359 | " for b in range(num_elec_alpha, num_ao):\n",
360 | " temp = eri_mo[i, a, j, b] * \\\n",
361 | " (2*eri_mo[i, a, j, b] - eri_mo[i, b, j, a])\n",
362 | " temp /= (E_orbitals[i] + E_orbitals[j] - E_orbitals[a] - E_orbitals[b])\n",
363 | " E_corr_mp2 += temp\n",
364 | " \n",
365 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "# algorithm time comparison"
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": null,
378 | "metadata": {},
379 | "outputs": [],
380 | "source": [
381 | "ns_time = noddy_time/smart_time\n",
382 | "print(ns_time)"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": null,
388 | "metadata": {},
389 | "outputs": [],
390 | "source": []
391 | }
392 | ],
393 | "metadata": {
394 | "kernelspec": {
395 | "display_name": "Python 3",
396 | "language": "python",
397 | "name": "python3"
398 | },
399 | "language_info": {
400 | "codemirror_mode": {
401 | "name": "ipython",
402 | "version": 3
403 | },
404 | "file_extension": ".py",
405 | "mimetype": "text/x-python",
406 | "name": "python",
407 | "nbconvert_exporter": "python",
408 | "pygments_lexer": "ipython3",
409 | "version": "3.6.6"
410 | }
411 | },
412 | "nbformat": 4,
413 | "nbformat_minor": 2
414 | }
415 |
--------------------------------------------------------------------------------
/03_MP2/basics/mp2_pyscf.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MP2"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Some useful resources:\n",
15 | " - [original paper](https://journals.aps.org/pr/abstract/10.1103/PhysRev.46.618)\n",
16 | " - Levine Chapter 16\n",
17 | " - [psi4numpy tutorial](https://github.com/psi4/psi4numpy/blob/master/Tutorials/05_Moller-Plesset/5a_conventional-mp2.ipynb)\n",
18 | " - [Crawdad programming notes](http://sirius.chem.vt.edu/wiki/doku.php?id=crawdad:programming:project4)"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "# MP2 algorithm\n",
26 | "1. The starting point will be the Hartree-Fock wavefunction. "
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Imports"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "import numpy as np\n",
43 | "import scipy.linalg as spla\n",
44 | "import pyscf\n",
45 | "import matplotlib.pyplot as plt\n",
46 | "import time\n",
47 | "%matplotlib notebook"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "## Specify the molecule"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "# start timer\n",
64 | "start_time = time.time()\n",
65 | "# define molecule\n",
66 | "mol = pyscf.gto.M(\n",
67 | " atom=\"O 0.0000000 0.0000000 0.0000000; H 0.7569685 0.0000000 -0.5858752; H -0.7569685 0.0000000 -0.5858752\",\n",
68 | " basis='sto-3g',\n",
69 | " unit=\"Ang\",\n",
70 | " verbose=0,\n",
71 | " symmetry=False,\n",
72 | " spin=0,\n",
73 | " charge=0\n",
74 | ")\n",
75 | "# get number of atomic orbitals\n",
76 | "num_ao = mol.nao_nr()\n",
77 | "# get number of electrons\n",
78 | "num_elec_alpha, num_elec_beta = mol.nelec\n",
79 | "num_elec = num_elec_alpha + num_elec_beta\n",
80 | "# get nuclear repulsion energy\n",
81 | "E_nuc = mol.energy_nuc()"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "## Calculate molecular integrals \n",
89 | "\n",
90 | "\n",
91 | "Overlap \n",
92 | "\n",
93 | "$$ S_{\\mu\\nu} = (\\mu|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\phi_{\\nu}(r) $$\n",
94 | "\n",
95 | "Kinetic\n",
96 | "\n",
97 | "$$ T_{\\mu\\nu} = (\\mu\\left|-\\frac{\\nabla}{2}\\right|\\nu) = \\int dr \\phi^*_{\\mu}(r) \\left(-\\frac{\\nabla}{2}\\right) \\phi_{\\nu}(r) $$\n",
98 | "\n",
99 | "Nuclear Attraction\n",
100 | "\n",
101 | "$$ V_{\\mu\\nu} = (\\mu|r^{-1}|\\nu) = \\int dr \\phi^*_{\\mu}(r) r^{-1} \\phi_{\\nu}(r) $$\n",
102 | "\n",
103 | "Form Core Hamiltonian\n",
104 | "\n",
105 | "$$ H = T + V $$\n",
106 | "\n",
107 | "Two electron integrals\n",
108 | "\n",
109 | "$$ (\\mu\\nu|\\lambda\\sigma) = \\int dr_1 dr_2 \\phi^*_{\\mu}(r_1) \\phi_{\\nu}(r_1) r_{12}^{-1} \\phi^*_{\\lambda}(r_2) \\phi_{\\sigma}(r_2) $$\n"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "# calculate overlap integrals\n",
119 | "S = mol.intor('cint1e_ovlp_sph')\n",
120 | "# calculate kinetic energy integrals\n",
121 | "T = mol.intor('cint1e_kin_sph')\n",
122 | "# calculate nuclear attraction integrals\n",
123 | "V = mol.intor('cint1e_nuc_sph')\n",
124 | "# form core Hamiltonian\n",
125 | "H = T + V\n",
126 | "# calculate two electron integrals\n",
127 | "eri = mol.intor('cint2e_sph', aosym='s8')\n",
128 | "# since we are using the 8 fold symmetry of the 2 electron integrals\n",
129 | "# the functions below will help us when accessing elements\n",
130 | "__idx2_cache = {}\n",
131 | "\n",
132 | "\n",
133 | "def idx2(i, j):\n",
134 | " if (i, j) in __idx2_cache:\n",
135 | " return __idx2_cache[i, j]\n",
136 | " elif i >= j:\n",
137 | " __idx2_cache[i, j] = int(i*(i+1)/2+j)\n",
138 | " else:\n",
139 | " __idx2_cache[i, j] = int(j*(j+1)/2+i)\n",
140 | " return __idx2_cache[i, j]\n",
141 | "\n",
142 | "\n",
143 | "def idx4(i, j, k, l):\n",
144 | " return idx2(idx2(i, j), idx2(k, l))\n",
145 | "\n",
146 | "\n",
147 | "print(np.shape(eri))"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "## Perform Hartree-Fock SCF"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {},
161 | "outputs": [],
162 | "source": [
163 | "# set inital density matrix to zero\n",
164 | "D = np.zeros((num_ao, num_ao))\n",
165 | "\n",
166 | "# 2 helper functions for printing during SCF\n",
167 | "\n",
168 | "\n",
169 | "def print_start_iterations():\n",
170 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
171 | " \"Iter\", \"Time(s)\", \"RMSC DM\", \"delta E\", \"E_elec\")))\n",
172 | " print(\"{:^79}\".format(\"{:>4} {:>11} {:>11} {:>11} {:>11}\".format(\n",
173 | " \"****\", \"*******\", \"*******\", \"*******\", \"******\")))\n",
174 | "\n",
175 | "\n",
176 | "def print_iteration(iteration_num, iteration_start_time, iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_elec):\n",
177 | " print(\"{:^79}\".format(\"{:>4d} {:>11f} {:>.5E} {:>.5E} {:>11f}\".format(iteration_num,\n",
178 | " iteration_end_time - iteration_start_time, iteration_rmsc_dm, iteration_E_diff, E_elec)))\n",
179 | "\n",
180 | "\n",
181 | "# set stopping criteria\n",
182 | "iteration_max = 100\n",
183 | "convergence_E = 1e-9\n",
184 | "convergence_DM = 1e-5\n",
185 | "# loop variables\n",
186 | "iteration_num = 0\n",
187 | "E_scf_total = 0\n",
188 | "E_scf_elec = 0.0\n",
189 | "iteration_E_diff = 0.0\n",
190 | "iteration_rmsc_dm = 0.0\n",
191 | "converged = False\n",
192 | "exceeded_iterations = False\n",
193 | "\n",
194 | "print_start_iterations()\n",
195 | "while (not converged and not exceeded_iterations):\n",
196 | " # store last iteration and increment counters\n",
197 | " iteration_start_time = time.time()\n",
198 | " iteration_num += 1\n",
199 | " E_elec_last = E_scf_elec\n",
200 | " D_last = np.copy(D)\n",
201 | " # form G matrix\n",
202 | " G = np.zeros((num_ao, num_ao))\n",
203 | " for i in range(num_ao):\n",
204 | " for j in range(num_ao):\n",
205 | " for k in range(num_ao):\n",
206 | " for l in range(num_ao):\n",
207 | " G[i, j] += D[k, l] * \\\n",
208 | " ((2.0*(eri[idx4(i, j, k, l)])) -\n",
209 | " (eri[idx4(i, k, j, l)]))\n",
210 | " # build fock matrix\n",
211 | " F = H + G\n",
212 | " # solve the generalized eigenvalue problem\n",
213 | " E_orbitals, C = spla.eigh(F, S)\n",
214 | " # compute new density matrix\n",
215 | " D = np.zeros((num_ao, num_ao))\n",
216 | " for i in range(num_ao):\n",
217 | " for j in range(num_ao):\n",
218 | " for k in range(num_elec_alpha):\n",
219 | " D[i, j] += C[i, k] * C[j, k]\n",
220 | " # calculate electronic energy\n",
221 | " E_scf_elec = np.sum(np.multiply(D, (H + F)))\n",
222 | " # calculate energy change of iteration\n",
223 | " iteration_E_diff = np.abs(E_scf_elec - E_elec_last)\n",
224 | " # rms change of density matrix\n",
225 | " iteration_rmsc_dm = np.sqrt(np.sum((D - D_last)**2))\n",
226 | " iteration_end_time = time.time()\n",
227 | " print_iteration(iteration_num, iteration_start_time,\n",
228 | " iteration_end_time, iteration_rmsc_dm, iteration_E_diff, E_scf_elec)\n",
229 | " if(np.abs(iteration_E_diff) < convergence_E and iteration_rmsc_dm < convergence_DM):\n",
230 | " converged = True\n",
231 | " if(iteration_num == iteration_max):\n",
232 | " exceeded_iterations = True\n",
233 | "\n",
234 | "# calculate total energy\n",
235 | "E_scf_total = E_scf_elec + E_nuc\n",
236 | "print(\"{:^79}\".format(\"Total HF Energy : {:>11f}\".format(E_scf_total)))"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": null,
242 | "metadata": {},
243 | "outputs": [],
244 | "source": []
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "metadata": {},
249 | "source": [
250 | "# Perform MP2 calculation"
251 | ]
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "metadata": {},
256 | "source": [
257 | "## Convert the two-electron integrals from AO basis to the MO basis\n",
258 | "\n",
259 | "$$(pq|rs) = \\sum_\\mu \\sum_\\nu \\sum_\\lambda \\sum_\\sigma C_\\mu^p C_\\nu^q\n",
260 | "(\\mu \\nu|\\lambda \\sigma) C_\\lambda^r C_\\sigma^s.$$\n",
261 | "\n",
262 | "\n",
263 | "Attempt to code this conversion below. Although this was introduced previously, we want to remention a note about the electron repulsion integrals. The electron repulsion integrals are stored as vector `eri`. This vector represent what would otherwise be a 4-D tensor. The reason we can store it as a vector is that there is symmetry present for the two-electron integrals and pyscf incorporates that. Thus we access our integrals using a helper function we have written for you, `idx_4` above which will turn the 4 index into the correct index for the pyscf data structure. For example:\n",
264 | "\n",
265 | "To access the specific integral for $(\\phi_1\\phi_4|\\phi_7\\phi_9)$, we would use the command\n",
266 | "`eri[idx4(1,4,7,9)]`. This will be essential for writing the transofmration inthe code cell below and in calculating the MP2 energy afterwards.\n",
267 | "\n"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": null,
273 | "metadata": {},
274 | "outputs": [],
275 | "source": [
276 | "## place code for two-electron integral conversion here."
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {},
282 | "source": [
283 | "### Compute the MP2 Energy\n",
284 | "Now we can calculate the MP2 estimation of the correlation energy. \n",
285 | "$$E_{\\mathrm{corr(MP2)}}\\ =\\ \\frac{( ia \\mid jb ) [ 2 (ia \\mid jb ) - ( ib \\mid ja )]}{\\epsilon_i + \\epsilon_j + \\epsilon_a - \\epsilon_b}$$\n",
286 | "\n",
287 | "Here $i$ and $j$ represent all occupied orbitals, where as $a$ and $b$ will be unoccupied orbitals. \n",
288 | "\n",
289 | "Remember during this coding step that we are basing our MP2 correction on an RHF calculation and thus there are the same amount of $\\alpha$ and $\\beta$ electrons."
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": null,
295 | "metadata": {},
296 | "outputs": [],
297 | "source": [
298 | "#initialize the variable forthe mp2 correlation energy\n",
299 | "E_corr_mp2 = 0\n",
300 | "# code the equation above and adjust the value of E_corr_mp2\n",
301 | "\n",
302 | "\n",
303 | "#this will print your E_corr mp2\n",
304 | "print(\"{:^79}\".format(\"Total MP2 correlation energy : {:>11f}\".format(E_corr_mp2)))"
305 | ]
306 | },
307 | {
308 | "cell_type": "markdown",
309 | "metadata": {},
310 | "source": [
311 | "The correlation energy is very small compared to the total energy, which is generally the case. However, this correlation energy can be very important to describing properties such as dispersion. "
312 | ]
313 | },
314 | {
315 | "cell_type": "markdown",
316 | "metadata": {},
317 | "source": [
318 | "## A comparison with PySCF"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": null,
324 | "metadata": {},
325 | "outputs": [],
326 | "source": [
327 | "import pyscf\n",
328 | "m = pyscf.scf.RHF(mol)\n",
329 | "print('E(HF) = %g' % m.kernel())\n",
330 | "mp2 = pyscf.mp.MP2(m)\n",
331 | "E_corr_mp2_pyscf = mp2.kernel()[0]\n",
332 | "print('E(MP2) = {:.9g}'.format(E_corr_mp2_pyscf))"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": null,
338 | "metadata": {},
339 | "outputs": [],
340 | "source": [
341 | "# comparison from pyscf\n",
342 | "E_diff = E_corr_mp2_pyscf - E_corr_mp2\n",
343 | "print(E_diff)"
344 | ]
345 | }
346 | ],
347 | "metadata": {
348 | "kernelspec": {
349 | "display_name": "Python 3",
350 | "language": "python",
351 | "name": "python3"
352 | },
353 | "language_info": {
354 | "codemirror_mode": {
355 | "name": "ipython",
356 | "version": 3
357 | },
358 | "file_extension": ".py",
359 | "mimetype": "text/x-python",
360 | "name": "python",
361 | "nbconvert_exporter": "python",
362 | "pygments_lexer": "ipython3",
363 | "version": "3.6.5"
364 | },
365 | "toc": {
366 | "base_numbering": 1,
367 | "nav_menu": {},
368 | "number_sections": true,
369 | "sideBar": true,
370 | "skip_h1_title": false,
371 | "title_cell": "Table of Contents",
372 | "title_sidebar": "Contents",
373 | "toc_cell": false,
374 | "toc_position": {},
375 | "toc_section_display": true,
376 | "toc_window_display": false
377 | }
378 | },
379 | "nbformat": 4,
380 | "nbformat_minor": 2
381 | }
382 |
--------------------------------------------------------------------------------
/03_MP2/mp2_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/03_MP2/mp2_slides.pdf
--------------------------------------------------------------------------------
/04_Machine_Learning/advanced/bayesopt_boston.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Bayesian Optimization Example: Boston Housing Dataset"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Useful Resources\n",
15 | " - [Scikit-Learn](http://scikit-learn.org/)\n",
16 | " - [Scikit-Optimize](https://github.com/scikit-optimize/scikit-optimize) \n",
17 | " - [GPyOpt](https://gpyopt.readthedocs.io/en/latest/)\n",
18 | " - [GPyOpt GitHub](https://github.com/SheffieldML/GPyOpt)\n",
19 | " - [fmfn/BayesianOptimization](https://github.com/fmfn/BayesianOptimization)\n",
20 | " - [Taking the Human Out of the Loop: A Review of Bayesian Optimization](https://ieeexplore.ieee.org/document/7352306/)\n",
21 | " - [Practical Bayesian Optimization of Machine Learning Algorithms](https://arxiv.org/abs/1206.2944)\n",
22 | " - [Evaluating Hyperparameter Optimization Strategies](https://blog.sigopt.com/posts/evaluating-hyperparameter-optimization-strategies)\n",
23 | " - [A Tutorial on Bayesian Optimization of Expensive Cost Functions, with Application to Active User Modeling and Hierarchial Reinforcement Learning ](https://arxiv.org/abs/1012.2599)\n",
24 | " \n",
25 | "## Introduction\n",
26 | "Bayesian Optimization is a strategy for global optimization of black-box functions with the goal of finding a min/max of a function f(x) bounded by X. The Bayesian optimization will construct a probabilistic model for f(x) to exploit in order to determine where in X to evaluate the function next. It performs this determination using the information from previous evaluations of f(x).\n",
27 | "\n",
28 | "## General Theory\n",
29 | "#### Objective:\n",
30 | "Find global maximizer or minimizer of a function $f$(x)\n",
31 | "$$\\textbf{x}^{*} = \\text{arg} \\max_{\\textbf{x} \\in \\chi } f(\\textbf{x})$$\n",
32 | "$\\chi$ is the space of interest and can be categorical, conditional, or both\n",
33 | "\n",
34 | "#### Strategy \n",
35 | "- Unknown objective function \n",
36 | "- Treat as a random function \n",
37 | "- Place prior over it\n",
38 | "- Prior captures belief about function\n",
39 | "- Gather information and update the prior with posterior \n",
40 | "- Determine next query point based on priors\n",
41 | "\n",
42 | "[A Tutorial on Bayesian Optimization of Expensive Cost Functions, with Application to Active User Modeling and Hierarchial Reinforcement Learning ](https://arxiv.org/abs/1012.2599)\n",
43 | "\n",
44 | "\n",
45 | "## Summarize\n",
46 | "- Finds min/max with relatively few evaluations\n",
47 | "- Cost of more computation to determine next point to try\n",
48 | "- Good for expensive functions such as ML\n",
49 | "\n",
50 | "## What can $f$ be?\n",
51 | "Bayesian Optimization is best used for costly functions as Bayesian Optimization can become rather costly due to the strategy of determining the next point of query based on the prior guesses. While Bayesian Optimization is more computationaly expensive than other search methods, it often requires less iterations to find the maxima/minima thus reducing the amount of times something like training a neural net is performed, reducing the overall computational cost. \n",
52 | "\n",
53 | "### Random Search\n",
54 | "\n",
55 | "\n",
56 | "### Grid Search\n",
57 | "\n",
58 | "\n",
59 | "### Bayesian Optimization\n",
60 | "\n",
61 | "\n",
62 | "## Summarize\n",
63 | "- Finds min/max with relatively few evaluations\n",
64 | "- Cost of more computation to determine next point to try\n",
65 | "- Good for expensive functions such as ML\n"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "%matplotlib inline\n",
75 | "import numpy as np\n",
76 | "import pandas as pd\n",
77 | "import matplotlib.pyplot as plt\n",
78 | "from math import sqrt"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "## Load dataset"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "from sklearn.datasets import load_boston\n",
95 | "boston = load_boston()"
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "## Import scikit models"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": null,
108 | "metadata": {},
109 | "outputs": [],
110 | "source": [
111 | "from sklearn.linear_model import LinearRegression\n",
112 | "from sklearn.linear_model import BayesianRidge\n",
113 | "from sklearn.linear_model import Ridge\n",
114 | "from sklearn.kernel_ridge import KernelRidge\n",
115 | "from sklearn.ensemble import RandomForestRegressor\n",
116 | "from sklearn.metrics import mean_absolute_error\n",
117 | "from sklearn.metrics import mean_squared_error\n",
118 | "from sklearn.model_selection import train_test_split"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "X_train, X_test, y_train, y_test = train_test_split(\n",
128 | " boston.data, boston.target, train_size=0.9, test_size=0.1)"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {},
134 | "source": [
135 | "## Model to Optimize"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "regr = RandomForestRegressor(n_jobs=-1)"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "## Parameters to Optimize"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {},
158 | "outputs": [],
159 | "source": [
160 | "from skopt.space import Integer\n",
161 | "from skopt.space import Categorical\n",
162 | "\n",
163 | "space = [Integer(1, 200, name='n_estimators'),\n",
164 | " Categorical(('auto', 'sqrt', 'log2'), name='max_features'),\n",
165 | " Integer(2, 100, name='min_samples_split'),\n",
166 | " Integer(1, 100, name='min_samples_leaf')]"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "## Objective"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": null,
179 | "metadata": {},
180 | "outputs": [],
181 | "source": [
182 | "from skopt.utils import use_named_args\n",
183 | "\n",
184 | "\n",
185 | "@use_named_args(space)\n",
186 | "def objective(**params):\n",
187 | " regr.set_params(**params)\n",
188 | " regr.fit(X_train, y_train)\n",
189 | "\n",
190 | " return mean_absolute_error(y_test, regr.predict(X_test))"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "## Optimization"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "from skopt import gp_minimize\n",
207 | "\n",
208 | "res_gp = gp_minimize(objective, space, n_calls=20, random_state=0)\n",
209 | "\n",
210 | "res_gp.fun"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "metadata": {},
217 | "outputs": [],
218 | "source": [
219 | "res_gp.x"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": [
228 | "n_estimators = res_gp.x[0]\n",
229 | "max_features = res_gp.x[1]\n",
230 | "min_samples_split = res_gp.x[2]\n",
231 | "min_samples_leaf = res_gp.x[3]\n",
232 | "\n",
233 | "regr = RandomForestRegressor(n_jobs=-1, n_estimators=n_estimators, max_features=max_features,\n",
234 | " min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)\n",
235 | "regr.fit(X_train, y_train)\n",
236 | "\n",
237 | "predicted = regr.predict(X_test)\n",
238 | "\n",
239 | "mae = mean_absolute_error(y_test, predicted)\n",
240 | "mse = mean_squared_error(y_test, predicted)\n",
241 | "rmse = sqrt(mse)\n",
242 | "print('MAE:', mae, '\\tMSE:', mse, '\\tRMSE:', rmse)"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {},
248 | "source": [
249 | "## Plot Results"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {},
256 | "outputs": [],
257 | "source": [
258 | "training_size = ('%0.1f' % (100 - (len(y_test)/len(boston.target) * 100)))\n",
259 | "class_type = str(regr).split('(')[0]\n",
260 | "label1 = ('MAE = {}'.format('%8.3f' % mae))\n",
261 | "label2 = (class_type + '\\nTraining size = ' + training_size + '%')\n",
262 | "\n",
263 | "plt.figure(dpi=250)\n",
264 | "plt.plot([min(boston.target), max(boston.target)], [\n",
265 | " min(boston.target), max(boston.target)], ls=\"--\", c=\"g\")\n",
266 | "plt.plot(y_test, predicted, 'o', markersize=1.5)\n",
267 | "plt.xlabel(\"Actual Price\")\n",
268 | "plt.ylabel(\"Predicted Price\")\n",
269 | "legend1 = plt.legend([label1], loc='lower right',\n",
270 | " markerscale=0, fontsize=6, handlelength=0)\n",
271 | "plt.legend([label2], loc='upper left',\n",
272 | " markerscale=0, fontsize=6, handlelength=0)\n",
273 | "plt.gca().add_artist(legend1)\n",
274 | "plt.show()"
275 | ]
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {},
280 | "source": [
281 | "## What to do now? \n",
282 | "- Try a different classifier/regressor\n",
283 | "- Try with a different dataset provided by scikit-learn\n",
284 | "- Try out a different library"
285 | ]
286 | }
287 | ],
288 | "metadata": {
289 | "kernelspec": {
290 | "display_name": "Python 3",
291 | "language": "python",
292 | "name": "python3"
293 | },
294 | "language_info": {
295 | "codemirror_mode": {
296 | "name": "ipython",
297 | "version": 3
298 | },
299 | "file_extension": ".py",
300 | "mimetype": "text/x-python",
301 | "name": "python",
302 | "nbconvert_exporter": "python",
303 | "pygments_lexer": "ipython3",
304 | "version": "3.6.6"
305 | },
306 | "varInspector": {
307 | "cols": {
308 | "lenName": 16,
309 | "lenType": 16,
310 | "lenVar": 40
311 | },
312 | "kernels_config": {
313 | "python": {
314 | "delete_cmd_postfix": "",
315 | "delete_cmd_prefix": "del ",
316 | "library": "var_list.py",
317 | "varRefreshCmd": "print(var_dic_list())"
318 | },
319 | "r": {
320 | "delete_cmd_postfix": ") ",
321 | "delete_cmd_prefix": "rm(",
322 | "library": "var_list.r",
323 | "varRefreshCmd": "cat(var_dic_list()) "
324 | }
325 | },
326 | "types_to_exclude": [
327 | "module",
328 | "function",
329 | "builtin_function_or_method",
330 | "instance",
331 | "_Feature"
332 | ],
333 | "window_display": false
334 | }
335 | },
336 | "nbformat": 4,
337 | "nbformat_minor": 2
338 | }
339 |
--------------------------------------------------------------------------------
/04_Machine_Learning/basics/coulomb_matrix.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Coulomb Matrix Representation"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "from math import sqrt"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Useful Resources\n",
25 | " - [Fast and Accurate Modeling of Molecular Atomization Energies with Machine Learning](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.108.058301)\n",
26 | " - [Prediction Errors of Molecular Machine learning Models lower than Hybrid DFT Error](https://pubs.acs.org/doi/abs/10.1021/acs.jctc.7b00577)\n",
27 | " - [Understanding molecular representations in machine learning: The role of uniqueness and target similarity](https://aip.scitation.org/doi/10.1063/1.4964627)\n",
28 | "\n",
29 | "## Introduction\n",
30 | "For machine learning there needs to be some way to represent the data to the model in a way in which the model can infer knowledge about the data and use it for future predictions. In chemistry, the data we are trying to represent are molecules with the information we are trying to teach the model are property values for those molecules. The goal is to represent the molecule in a way that provides a detailed enough description about the underlying physics of the molecule in order to accurately predict the properties of the molecule. This has led to a lot of work to determine how to best represent the molecule for the model to learn from. One of the most simplistic ways to describe the molecule is what we are going to work on today, the Coulomb matrix.\n",
31 | "\n",
32 | "## General Theory\n",
33 | "The Coulomb matrix is one of the more simplistic representations used to describe the molecule. The Coulomb matrix is a square matrix with diagonal elements being the electronic potential energy of the atom and off diagonal elements being the Coulomb nuclear repulsion between atom I and J. \n",
34 | "\n",
35 | "$$M_{IJ} =\\begin{cases}0.5Z_{I}^{2.4} &\\text{for } I = J, \\\\ \\frac{Z_I Z_J}{\\left | R_I - R_J \\right |} &\\text{for } I \\neq J.\\end{cases} $$\n",
36 | " \n",
37 | "## Setup\n",
38 | "1. Parse file for atoms and coordinates\n",
39 | "2. Build Coulomb Matrix"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "file = open('methane.xyz', 'r')\n",
49 | "\n",
50 | "doc = []\n",
51 | "for line in file:\n",
52 | " doc.append(line)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "# read number of atoms\n",
62 | "natoms = int(doc[0].split()[0])\n",
63 | "\n",
64 | "# parse coordinates\n",
65 | "coords = []\n",
66 | "for i in range(natoms):\n",
67 | " a_coords = doc[i + 2].split()[0:4]\n",
68 | " coords.append(a_coords)\n",
69 | "\n",
70 | "coords"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "## What do we need for CM?\n",
78 | "1. Nuclear charges\n",
79 | "2. Calculate when $I = J$\n",
80 | "3. Calculate when $I \\neq J$\n",
81 | "4. Output lower triangle of matrix"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "# nuclear charges\n",
91 | "\n",
92 | "# build CM matrix\n",
93 | "\n",
94 | "# return the lower trinagle of the CM as a vector\n",
95 | "mat = mat[np.tril_indices(natoms)]"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "mat"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "## If this interests you, feel free to help out with [chemreps](https://github.com/dlf57/chemreps)!"
112 | ]
113 | }
114 | ],
115 | "metadata": {
116 | "kernelspec": {
117 | "display_name": "Python 3",
118 | "language": "python",
119 | "name": "python3"
120 | },
121 | "language_info": {
122 | "codemirror_mode": {
123 | "name": "ipython",
124 | "version": 3
125 | },
126 | "file_extension": ".py",
127 | "mimetype": "text/x-python",
128 | "name": "python",
129 | "nbconvert_exporter": "python",
130 | "pygments_lexer": "ipython3",
131 | "version": "3.6.6"
132 | },
133 | "varInspector": {
134 | "cols": {
135 | "lenName": 16,
136 | "lenType": 16,
137 | "lenVar": 40
138 | },
139 | "kernels_config": {
140 | "python": {
141 | "delete_cmd_postfix": "",
142 | "delete_cmd_prefix": "del ",
143 | "library": "var_list.py",
144 | "varRefreshCmd": "print(var_dic_list())"
145 | },
146 | "r": {
147 | "delete_cmd_postfix": ") ",
148 | "delete_cmd_prefix": "rm(",
149 | "library": "var_list.r",
150 | "varRefreshCmd": "cat(var_dic_list()) "
151 | }
152 | },
153 | "types_to_exclude": [
154 | "module",
155 | "function",
156 | "builtin_function_or_method",
157 | "instance",
158 | "_Feature"
159 | ],
160 | "window_display": false
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 2
165 | }
166 |
--------------------------------------------------------------------------------
/04_Machine_Learning/basics/coulomb_matrix_solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Coulomb Matrix Representation"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "from math import sqrt"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Useful Resources\n",
25 | " - [Fast and Accurate Modeling of Molecular Atomization Energies with Machine Learning](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.108.058301)\n",
26 | " - [Prediction Errors of Molecular Machine learning Models lower than Hybrid DFT Error](https://pubs.acs.org/doi/abs/10.1021/acs.jctc.7b00577)\n",
27 | " - [Understanding molecular representations in machine learning: The role of uniqueness and target similarity](https://aip.scitation.org/doi/10.1063/1.4964627)\n",
28 | "\n",
29 | "## Introduction\n",
30 | "For machine learning there needs to be some way to represent the data to the model in a way in which the model can infer knowledge about the data and use it for future predictions. In chemistry, the data we are trying to represent are molecules with the information we are trying to teach the model are property values for those molecules. The goal is to represent the molecule in a way that provides a detailed enough description about the underlying physics of the molecule in order to accurately predict the properties of the molecule. This has led to a lot of work to determine how to best represent the molecule for the model to learn from. One of the most simplistic ways to describe the molecule is what we are going to work on today, the Coulomb matrix.\n",
31 | "\n",
32 | "## General Theory\n",
33 | "The Coulomb matrix is one of the more simplistic representations used to describe the molecule. The Coulomb matrix is a square matrix with diagonal elements being the electronic potential energy of the atom and off diagonal elements being the Coulomb nuclear repulsion between atom I and J. \n",
34 | "\n",
35 | "$$M_{IJ} =\\begin{cases}0.5Z_{I}^{2.4} &\\text{for } I = J, \\\\ \\frac{Z_I Z_J}{\\left | R_I - R_J \\right |} &\\text{for } I \\neq J.\\end{cases} $$\n",
36 | " \n",
37 | "## Setup\n",
38 | "1. Parse file for atoms and coordinates\n",
39 | "2. Build Coulomb Matrix"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "file = open('methane.xyz', 'r')\n",
49 | "\n",
50 | "doc = []\n",
51 | "for line in file:\n",
52 | " doc.append(line)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 3,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "data": {
62 | "text/plain": [
63 | "[['C', '1.041682', '-0.056200', '-0.071481'],\n",
64 | " ['H', '2.130894', '-0.056202', '-0.071496'],\n",
65 | " ['H', '0.678598', '0.174941', '-1.072044'],\n",
66 | " ['H', '0.678613', '0.694746', '0.628980'],\n",
67 | " ['H', '0.678614', '-1.038285', '0.228641']]"
68 | ]
69 | },
70 | "execution_count": 3,
71 | "metadata": {},
72 | "output_type": "execute_result"
73 | }
74 | ],
75 | "source": [
76 | "# read number of atoms\n",
77 | "natoms = int(doc[0].split()[0])\n",
78 | "\n",
79 | "# parse coordinates\n",
80 | "coords = []\n",
81 | "for i in range(natoms):\n",
82 | " a_coords = doc[i + 2].split()[0:4]\n",
83 | " coords.append(a_coords)\n",
84 | "\n",
85 | "coords"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "## What do we need for CM?\n",
93 | "1. Nuclear charges\n",
94 | "2. Calculate when $I = J$\n",
95 | "3. Calculate when $I \\neq J$\n",
96 | "4. Output lower triangle of matrix"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 4,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "[[36.8581052 0. 0. 0. 0. ]\n",
109 | " [ 5.5085695 0.5 0. 0. 0. ]\n",
110 | " [ 5.50857022 0.56221605 0.5 0. 0. ]\n",
111 | " [ 5.50856526 0.56221405 0.56221669 0.5 0. ]\n",
112 | " [ 5.50857007 0.56221611 0.56221501 0.56221777 0.5 ]]\n"
113 | ]
114 | }
115 | ],
116 | "source": [
117 | "# nuclear charges\n",
118 | "nuc = {'C': 6, 'H': 1}\n",
119 | "\n",
120 | "# build CM matrix\n",
121 | "mat = np.zeros((5, 5))\n",
122 | "for i in range(5):\n",
123 | " for j in range(i, 5):\n",
124 | " zi = nuc[coords[i][0]] # nuc['C'] = 6\n",
125 | " zj = nuc[coords[j][0]]\n",
126 | " if i == j:\n",
127 | " mii = 0.5 * zi ** 2.4\n",
128 | " mat[i, i] = mii\n",
129 | "\n",
130 | " else:\n",
131 | " # mij = zizj/rij\n",
132 | " # rij = sqrt((xi - xj)^2 + (yi - yj)^2 + (zi - zj)^2)\n",
133 | " x = float(coords[i][1]) - float(coords[j][1])\n",
134 | " y = float(coords[i][2]) - float(coords[j][2])\n",
135 | " z = float(coords[i][3]) - float(coords[j][3])\n",
136 | " rij = sqrt((x ** 2) + (y ** 2) + (z ** 2))\n",
137 | " mij = (zi * zj) / rij\n",
138 | "\n",
139 | " mat[j, i] = mij\n",
140 | "\n",
141 | "print(mat)"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 5,
147 | "metadata": {
148 | "scrolled": true
149 | },
150 | "outputs": [
151 | {
152 | "data": {
153 | "text/plain": [
154 | "array([36.8581052 , 5.5085695 , 0.5 , 5.50857022, 0.56221605,\n",
155 | " 0.5 , 5.50856526, 0.56221405, 0.56221669, 0.5 ,\n",
156 | " 5.50857007, 0.56221611, 0.56221501, 0.56221777, 0.5 ])"
157 | ]
158 | },
159 | "execution_count": 5,
160 | "metadata": {},
161 | "output_type": "execute_result"
162 | }
163 | ],
164 | "source": [
165 | "# return the lower trinagle of the CM as a vector\n",
166 | "mat = mat[np.tril_indices(natoms)]\n",
167 | "mat"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "## What if we wanted this as a function so we could do multiple molecules?"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 6,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": [
183 | "def cm(natoms, coords, size):\n",
184 | " '''\n",
185 | " Paramters\n",
186 | " ---------\n",
187 | " natoms: int\n",
188 | " number of atoms in the molecule\n",
189 | " coords: \n",
190 | " molecule coordinates\n",
191 | " size: int\n",
192 | " size of CM matrix\n",
193 | " Returns\n",
194 | " -------\n",
195 | " mat: triangle matrix\n",
196 | " triangle CM matrix\n",
197 | " '''\n",
198 | " # build CM matrix\n",
199 | " mat = np.zeros((size, size))\n",
200 | " for i in range(natoms):\n",
201 | " for j in range(i, natoms):\n",
202 | " zi = nuc[coords[i][0]] # nuc['C'] = 6\n",
203 | " zj = nuc[coords[j][0]]\n",
204 | " if i == j:\n",
205 | " mii = 0.5 * zi ** 2.4\n",
206 | " mat[i, i] = mii\n",
207 | "\n",
208 | " else:\n",
209 | " # mij = zizj/rij\n",
210 | " # rij = sqrt((xi - xj)^2 + (yi - yj)^2 + (zi - zj)^2)\n",
211 | " x = float(coords[i][1]) - float(coords[j][1])\n",
212 | " y = float(coords[i][2]) - float(coords[j][2])\n",
213 | " z = float(coords[i][3]) - float(coords[j][3])\n",
214 | " rij = sqrt((x ** 2) + (y ** 2) + (z ** 2))\n",
215 | " mij = (zi * zj) / rij\n",
216 | "\n",
217 | " mat[j, i] = mij\n",
218 | "\n",
219 | " # return the lower trinagle of the CM as a vector\n",
220 | " mat = mat[np.tril_indices(natoms)]\n",
221 | "\n",
222 | " return mat"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 7,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "data": {
232 | "text/plain": [
233 | "array([36.8581052 , 5.5085695 , 0.5 , 5.50857022, 0.56221605,\n",
234 | " 0.5 , 5.50856526, 0.56221405, 0.56221669, 0.5 ,\n",
235 | " 5.50857007, 0.56221611, 0.56221501, 0.56221777, 0.5 ])"
236 | ]
237 | },
238 | "execution_count": 7,
239 | "metadata": {},
240 | "output_type": "execute_result"
241 | }
242 | ],
243 | "source": [
244 | "cm(natoms, coords, size=29)"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "## If this interests you, feel free to help out with [chemreps](https://github.com/dlf57/chemreps)!"
252 | ]
253 | }
254 | ],
255 | "metadata": {
256 | "kernelspec": {
257 | "display_name": "Python 3",
258 | "language": "python",
259 | "name": "python3"
260 | },
261 | "language_info": {
262 | "codemirror_mode": {
263 | "name": "ipython",
264 | "version": 3
265 | },
266 | "file_extension": ".py",
267 | "mimetype": "text/x-python",
268 | "name": "python",
269 | "nbconvert_exporter": "python",
270 | "pygments_lexer": "ipython3",
271 | "version": "3.6.6"
272 | },
273 | "varInspector": {
274 | "cols": {
275 | "lenName": 16,
276 | "lenType": 16,
277 | "lenVar": 40
278 | },
279 | "kernels_config": {
280 | "python": {
281 | "delete_cmd_postfix": "",
282 | "delete_cmd_prefix": "del ",
283 | "library": "var_list.py",
284 | "varRefreshCmd": "print(var_dic_list())"
285 | },
286 | "r": {
287 | "delete_cmd_postfix": ") ",
288 | "delete_cmd_prefix": "rm(",
289 | "library": "var_list.r",
290 | "varRefreshCmd": "cat(var_dic_list()) "
291 | }
292 | },
293 | "types_to_exclude": [
294 | "module",
295 | "function",
296 | "builtin_function_or_method",
297 | "instance",
298 | "_Feature"
299 | ],
300 | "window_display": false
301 | }
302 | },
303 | "nbformat": 4,
304 | "nbformat_minor": 2
305 | }
306 |
--------------------------------------------------------------------------------
/04_Machine_Learning/basics/methane.xyz:
--------------------------------------------------------------------------------
1 | 5
2 |
3 | C 1.041682 -0.056200 -0.071481
4 | H 2.130894 -0.056202 -0.071496
5 | H 0.678598 0.174941 -1.072044
6 | H 0.678613 0.694746 0.628980
7 | H 0.678614 -1.038285 0.228641
8 |
9 |
--------------------------------------------------------------------------------
/04_Machine_Learning/basics/ml_boston.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Machine Learning Example: Boston Housing Dataset"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "%matplotlib inline\n",
17 | "import numpy as np\n",
18 | "import pandas as pd\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "from math import sqrt"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## Useful Resources\n",
28 | " - [Scikit-Learn](http://scikit-learn.org/)\n",
29 | " - [Learning Data Science Boston Housing Example](https://medium.com/@haydar_ai/learning-data-science-day-9-linear-regression-on-boston-housing-dataset-cd62a80775ef) \n",
30 | " - [Python Data Analysis Library: pandas](https://pandas.pydata.org/)\n",
31 | " - [10 Minutes to pandas](https://pandas.pydata.org/pandas-docs/stable/10min.html)\n",
32 | " - [Our Slides on ML](https://github.com/shivupa/QMMM_study_group/blob/master/04_Machine_Learning/ml_slides.pdf) \n",
33 | " \n",
34 | "## Setup\n",
35 | " 1. Organize data\n",
36 | " 2. Setup Classifier/Regressor\n",
37 | " 3. Train, Test, Visualize!\n",
38 | " \n",
39 | "## Organize Data\n",
40 | "Data organization is one of the most important steps in machine learning. Unorganized data can lead to wasted compute time on improper data as well as make it more difficult for others to understand and replicate your method. For data organization we are going to use pandas (unfortunately not this one 🐼)."
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "from sklearn.datasets import load_boston\n",
50 | "boston = load_boston()\n",
51 | "boston.keys()"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "print(boston.DESCR)"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "boston.data[0]"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "df_boston = pd.DataFrame(boston.data, columns=boston.feature_names)\n",
79 | "df_boston['PRICE'] = boston.target\n",
80 | "df_boston.head()"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "scrolled": true
88 | },
89 | "outputs": [],
90 | "source": [
91 | "df_boston.describe()"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "## Machine Learning\n",
99 | "1. Split data into training and testing sets\n",
100 | "2. Train classifier/regressor\n",
101 | "3. Test trained classifier/regressor on test set\n",
102 | "4. Visualize\n",
103 | "\n",
104 | "## Import Scikit-Learn"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "from sklearn.linear_model import LinearRegression\n",
114 | "from sklearn.linear_model import BayesianRidge\n",
115 | "from sklearn.linear_model import Ridge\n",
116 | "from sklearn.kernel_ridge import KernelRidge\n",
117 | "from sklearn.ensemble import RandomForestRegressor\n",
118 | "from sklearn.metrics import mean_absolute_error\n",
119 | "from sklearn.metrics import mean_squared_error\n",
120 | "from sklearn.model_selection import train_test_split"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "## Split data into training and testing sets"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "X_train, X_test, y_train, y_test = train_test_split(\n",
137 | " boston.data, boston.target, train_size=0.9, test_size=0.1)"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "## Train Regressor"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "####################### Choose ML Model #######################\n",
154 | "regr = \n",
155 | "\n",
156 | "####################### Train ML Model ########################\n",
157 | "regr.fit()\n",
158 | "\n",
159 | "###################### Predict Test Set #######################\n",
160 | "predicted = regr.predict()\n",
161 | "\n",
162 | "##################### Evaluate Prediciton #####################\n",
163 | "mae = mean_absolute_error()\n",
164 | "mse = mean_squared_error()\n",
165 | "rmse = sqrt(mse)\n",
166 | "print('MAE:', mae, '\\tMSE:', mse, '\\tRMSE:', rmse)"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "## Plot Results"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": null,
179 | "metadata": {},
180 | "outputs": [],
181 | "source": [
182 | "training_size = ('%0.1f' % (100 - (len(y_test)/len(boston.target) * 100)))\n",
183 | "class_type = str(regr).split('(')[0]\n",
184 | "label1 = ('MAE = {}'.format('%8.3f' % mae))\n",
185 | "label2 = (class_type + '\\nTraining size = ' + training_size + '%')\n",
186 | "\n",
187 | "plt.figure(dpi=250)\n",
188 | "plt.plot([df_boston['PRICE'].min(), df_boston['PRICE'].max()], [df_boston['PRICE'].min(), df_boston['PRICE'].max()], ls=\"--\", c=\"g\")\n",
189 | "plt.plot(y_test, predicted, 'o', markersize=1.5)\n",
190 | "plt.xlabel(\"Actual Price\")\n",
191 | "plt.ylabel(\"Predicted Price\")\n",
192 | "legend1 = plt.legend([label1], loc='lower right', markerscale=0, fontsize=6, handlelength=0)\n",
193 | "plt.legend([label2], loc='upper left', markerscale=0, fontsize=6, handlelength=0)\n",
194 | "plt.gca().add_artist(legend1)\n",
195 | "plt.show()"
196 | ]
197 | }
198 | ],
199 | "metadata": {
200 | "kernelspec": {
201 | "display_name": "Python 3",
202 | "language": "python",
203 | "name": "python3"
204 | },
205 | "language_info": {
206 | "codemirror_mode": {
207 | "name": "ipython",
208 | "version": 3
209 | },
210 | "file_extension": ".py",
211 | "mimetype": "text/x-python",
212 | "name": "python",
213 | "nbconvert_exporter": "python",
214 | "pygments_lexer": "ipython3",
215 | "version": "3.6.6"
216 | },
217 | "varInspector": {
218 | "cols": {
219 | "lenName": 16,
220 | "lenType": 16,
221 | "lenVar": 40
222 | },
223 | "kernels_config": {
224 | "python": {
225 | "delete_cmd_postfix": "",
226 | "delete_cmd_prefix": "del ",
227 | "library": "var_list.py",
228 | "varRefreshCmd": "print(var_dic_list())"
229 | },
230 | "r": {
231 | "delete_cmd_postfix": ") ",
232 | "delete_cmd_prefix": "rm(",
233 | "library": "var_list.r",
234 | "varRefreshCmd": "cat(var_dic_list()) "
235 | }
236 | },
237 | "types_to_exclude": [
238 | "module",
239 | "function",
240 | "builtin_function_or_method",
241 | "instance",
242 | "_Feature"
243 | ],
244 | "window_display": false
245 | }
246 | },
247 | "nbformat": 4,
248 | "nbformat_minor": 2
249 | }
250 |
--------------------------------------------------------------------------------
/04_Machine_Learning/ml_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/04_Machine_Learning/ml_slides.pdf
--------------------------------------------------------------------------------
/05_MolecularDynamics/README.txt:
--------------------------------------------------------------------------------
1 | Introduction to Molecular dynamics
2 |
3 | Please see the trajectories folder for precomputed trajectories that can be visualized with VMD.
4 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/enhanced_sampling/EnhancedSampling.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/enhanced_sampling/EnhancedSampling.pdf
--------------------------------------------------------------------------------
/05_MolecularDynamics/enhanced_sampling/metadynamics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/enhanced_sampling/metadynamics.pdf
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/images/divx2pass.log.mbtree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/template/images/divx2pass.log.mbtree
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/images/encode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # encode.sh
4 | #
5 | # Script for encoding images to a movie. The images should be numbered, with
6 | # zero-padding. The movie is encoded with the h.264 codec (the same codec used
7 | # in high-definition movies).
8 | #
9 | # This script must be run from the directory containing the saved frames.
10 |
11 | # Set the mplayer environment variables (change for your configuration)
12 | LD_LIBRARY_PATH=/local/usr/lib; export LD_LIBRARY_PATH
13 | PATH=${PATH}:/local/usr/bin; export PATH
14 |
15 | # convert targa to png
16 | for i in *.tga; do
17 | PREFIX=$(basename ${i} .tga)
18 | convert ${i} ${PREFIX}.sgi
19 | done
20 |
21 | # This script is meant to be used with 1280x720 images
22 | WIDTH=$(identify -format "%w" ${PREFIX}.sgi)
23 | HEIGHT=$(identify -format "%h" ${PREFIX}.sgi)
24 |
25 | # high motion = 5928 kbps
26 | # moderate motion = 4512 kbps
27 | mencoder \
28 | -ovc x264 \
29 | -x264encopts pass=1:turbo:bitrate=5928:bframes=1:subq=6:frameref=6:me=hex:partitions=all:threads=auto:keyint=300 \
30 | -mf type=sgi:w=${WIDTH}:h=${HEIGHT}:fps=60 \
31 | -nosound \
32 | -o /dev/null mf://\*.sgi
33 |
34 | mencoder \
35 | -ovc x264 \
36 | -x264encopts pass=2:turbo:bitrate=5928:bframes=1:subq=6:frameref=6:me=hex:partitions=all:threads=auto:keyint=300 \
37 | -mf type=sgi:w=${WIDTH}:h=${HEIGHT}:fps=60 \
38 | -nosound \
39 | -o gfp.mov mf://\*.sgi
40 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/images/gfp.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/template/images/gfp.mov
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/images/render.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for i in *.tachyon; do
4 | PREFIX=$(basename ${i} .tachyon)
5 | cat ${i} | sed -e 's/Resolution [[:digit:]]\+\s[[:digit:]]\+/Resolution 1280 720/' > temp.tachyon
6 | tachyon temp.tachyon -o ${PREFIX}.tga
7 | done
8 |
9 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/parameter.file.01:
--------------------------------------------------------------------------------
1 | C 4.00 0.10 0
2 | CA 4.00 0.10 0
3 | CB 4.00 0.10 0
4 | CG 4.00 0.10 0
5 | CD 4.00 0.10 0
6 | CE 4.00 0.10 0
7 | CZ 4.00 0.10 0
8 | C5 4.00 0.10 0
9 | C6 4.00 0.10 0
10 | C3 4.00 0.10 0
11 | C1 4.00 0.10 0
12 | DA 4.00 0.10 0
13 | DB 4.00 0.10 0
14 | DG 4.00 0.10 0
15 | SB 4.00 0.10 0
16 | SG 4.00 0.10 0
17 | SD 4.00 0.10 0
18 | N 4.00 0.10 0
19 | NG 4.00 0.10 0
20 | NZ 4.00 0.10 0
21 | N3 4.00 0.10 0
22 | N7 4.00 0.10 0
23 | NE 4.00 0.10 0
24 | ND 4.00 0.10 0
25 | NH 4.00 0.10 0
26 | O 4.00 0.10 0
27 | OG 4.00 0.10 0
28 | OD 4.00 0.10 0
29 | OE 4.00 0.10 0
30 | OH 4.00 0.10 0
31 | OX 4.00 0.10 0
32 | PA 4.00 0.10 0
33 | EB 4.00 0.10 0
34 | EG 4.00 0.10 0
35 | ED 4.00 0.10 0
36 | EE 4.00 0.10 0
37 | XX 5.00 0.10 0
38 | Y 0.0 0.00 0
39 | Z 0.0 0.00 0
40 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Run the UIOWA_BD simulation
4 | #
5 |
6 | # Path to the UIOWA BD binary.
7 | # Currently, the binary specified represents a static compilation using Intel's
8 | # ifort compiler, along with the -O2 option (second level of optimization). I
9 | # find this gives good speed.
10 | UIOWA_BD=../../uiowa_bd/binaries/uiowa_bd_45-07-12-o2
11 |
12 | # Make sure the necessary directories are set up.
13 | if [ ! -d MOVIE ]; then
14 | mkdir MOVIE
15 | fi
16 |
17 | if [ ! -d OUTPUT ]; then
18 | mkdir OUTPUT
19 | fi
20 |
21 | if [ ! -d RESTARTS ]; then
22 | mkdir RESTARTS
23 | fi
24 |
25 | # Make sure there is a restart.file
26 | if [ ! -e restart.file ]; then
27 | echo "Using restart.file.initial as the initial configuration of the system."
28 | cp restart.file.initial restart.file
29 | fi
30 |
31 | ### Run the simualtion ###
32 | # For the UIOWA BD main binary, you need to specify the input parameters via
33 | # STDIN. I use redirects to do this.
34 | # sim.out contains log information.
35 | # Information on the arguments:
36 | # - The first argument is the random seed. This should be less than (2^31-1)
37 | # - The second and third integers are "multiplicative factors" for determining
38 | # memory usage. Just leave them as 1 and 1.
39 | ${UIOWA_BD} 100 1 1 < sim.inp > sim.out
40 |
41 | # Note that coordinates are found in the testout.xtc file.
42 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/setup/NOTES.txt:
--------------------------------------------------------------------------------
1 | This file contains information on setting up the simulation directory.
2 |
3 | --------------------------------------------------------------------------------
4 | All that needs to be done is to run the script "run_prep.sh" from this
5 | directory. Before running this script, the following was already done:
6 |
7 | Prepare an input file for the preparation program (uiowa_goprep.16-06-12.exe)
8 |
9 | The input file is:
10 | 2b3p.inp
11 | Most of the parameters are straightforward, but here are some important points:
12 | - rhydro1: For the C-alpha models, this should be set to 5.3 angstroms. For
13 | the side-chain model, this should be set to 3.5 angstroms. See Adrian
14 | Elcock's paper: dio:10.1021/ct800499p. This reproduces translational and
15 | rotational diffusion coefficients for a variety of proteins, as compared
16 | to the program Hydropro.
17 |
18 | num_first_type: specifies the molecule number.
19 |
20 | pH: I believe this is only used in determining charges.
21 |
22 | Set the go distance cutoff to 5.5 angstroms. This means that any pair of
23 | residues with atoms that are closer than 5.5 angstroms counts as a go
24 | pair (native contact).
25 |
26 | epsilon, edihed1_ca, and edihed2_ca: These are the well depth for the
27 | favorable Go-potential that applies to native contacts (the 12-10 potential),
28 | and the "V1" and "V3" pseudo-dihedral barrier heights. These terms can vary,
29 | but should stay in the ratio of 12:10:5 for c-alpha models, and 25:41:21 for
30 | side-chain models. These sets of parameters were used in Dr. Elcock's paper,
31 | "Striking effects of diffusion...".
32 |
33 | coarse/fine: Here we use the coarse model, which includes one "bead" at the
34 | c-alpha position of each residue.
35 |
36 | edihed1_sc and edihed2_sc are for the side chain model, which we are not using.
37 |
38 | kbond and kangle: force constants for bond and angle terms. These are usually
39 | set to 20 kcal/angstrom/mol and 10 kcal/radian/mol, respectively.
40 |
41 | --------------------------------------------------------------------------------
42 | Here is some information on what the script "run_prep.sh" does:
43 |
44 | 1. Download the PDB file from www.rcsb.org. We use PDB code 2b3p.
45 |
46 | 2. Run the prep program, and rename the files.
47 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/setup/protein.inp:
--------------------------------------------------------------------------------
1 | --- pdbname
2 | protein.pdb
3 | --- protein/nucleic flex/rigd coarse/fine rhydro1 rhydro2(used for C-alpha with hybrid model)
4 | protein flex coarse 5.300 5.300
5 | --- num_first_type skip_first skip_last
6 | 1 no no
7 | --- charge/nocharge pH disulfides
8 | nocharge 7.1 no
9 | --- i_use_go/no i_skip_go_on_loops epsilon go-dist-cutoff i_use_12_10
10 | yes no 0.600 5.500 1
11 | --- edihed1_ca - edihed3_ca - edihed1_sc - edihed3_sc - kbond - kangle - i_do_impropers
12 | 0.500 0.250 0.410 0.210 20.0 10.0 no
13 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/setup/run_prep.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # run_prep.sh
4 |
5 | BINARIES=../../uiowa_bd/binaries
6 | SCRIPTS=../../scripts
7 |
8 | GOPREP=${BINARIES}/uiowa_goprep.16-06-12.exe
9 |
10 | echo "Using protein.pdb from this directory."
11 |
12 | # Generate the parameters for Go contacts (all of which are intramolecular)
13 | INPUT=protein.inp
14 | PREFIX=protein
15 | ${GOPREP} ${INPUT}
16 |
17 | mv annotated.pdb ${PREFIX}.annotated.pdb
18 | mv internal.parameters ${PREFIX}.internal.parameters
19 | mv centered.charge.parameters ${PREFIX}.charge.parameters
20 | mv uncentered.charge.parameters ${PREFIX}.uncentered.charge.parameters
21 | mv go.parameters ${PREFIX}.go.parameters
22 | mv folded.restart ${PREFIX}.restart.file
23 |
24 |
25 |
26 | # Move all the necessary files to the main simulation directory (../)
27 | cp ${PREFIX}.charge.parameters ../
28 | cp ${PREFIX}.internal.parameters ../
29 |
30 | cp ${PREFIX}.go.parameters ../
31 | cp ${PREFIX}.restart.file ../restart.file.initial
32 | cp ${PREFIX}.restart.file ../restart.file
33 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/sim.inp:
--------------------------------------------------------------------------------
1 | 0 ------------------------------------------------------------------------
2 | dseed teprint ttprint tmprint num_lst_stp num_fmd_stp num_hyd_stp num_bal_stp num_threads bond_dev_quit i_continue_after_problem?
3 | 111.0 100.000 100.0 1000.0 400 80 400 -1 1 2.5 no
4 | ----------------------- *** --------------------------------- *** ---------------
5 | f_typs f_mols i_debug q_desired mol_to_watch
6 | 1 1 no 1.050 1 1
7 | ---------------------------------------------------------------------------------
8 | rot 1 2 3 4 5 6 1 1 max zmin zmax i_pbc i_look_for_crashes periodic_bonds_okay?
9 | -750.000 750.000 -750.000 750.000 -750.000 750.000 1 no no
10 | ---------------------------------------------------------------------------------
11 | replica_exchange i_append_movie i_limit_verbosity i_use_12_10 uniform_moves steepest_descent
12 | no 0 yes 1 no no
13 | ---------------------------------------------------------------------------------
14 | r_temperature r_ionic_strength r_ion r_dielectric r_pH r_viscosity r_fconst
15 | 293.15 0.0 0.0 80.2 7.1 1.002 5.0
16 | ---------------------------------------------------------------------------------
17 | parameter file name no_elec wrap_molecules i_use_hydro full/diag integer/real scale_nb BD/LD rtemp kcut
18 | parameter.file.01 yes 2 yes full real 4.0 brownian 0.001 1
19 | - *** --------------------------------------------------------------------
20 | linker file,
21 | none
22 | ---------------------------------------------------------------------------------
23 | growrigd_file, i_grow_rigds
24 | grow_rigd_file no
25 | ---------------------------------------------------------------------------------
26 | go potentials file name, i_use_go_pairs i_use_exclusive_go num_exc_stp go_nonexclusive_file i_compare_go_with_others q_mode_threshold
27 | protein.go.parameters yes no 1000000000 none no 0.01
28 | ---------------------------------------------------------------------------------
29 | reaction criteria file, i_do_reactions nrequired
30 | reaction.criteria no 2
31 | ---------------------------------------------------------------------------------
32 | no-force file, i_omit_some_forces
33 | noforce.file no
34 | ---------------------------------------------------------------------------------
35 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
36 | protein.charge.parameters ! charges/hydrodynamic radii in this file
37 | protein.internal.parameters ! bonds/angles/dihedrals
38 | 1 ! # of copies of this molecule type
39 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
40 | time_step_s time_step_l totsimtime vdw_s vdw_m go_s go_m ele_s ele_m junk junk ff_cell
41 | 0.050 0.050 1000000.000 15.00 30.00 15.00 30.00 15.00 30.00 30.0 10.0 60.000
42 | -- *** --------------------------------------------------------------------------
43 | harmonicfile i_do_harmonic num_harm
44 | none no 3
45 | ---------------------------------------------------------------------------------
46 | pos_restraint_file i_do_pos_restraints mission_creep?
47 | System.restraints.file no no
48 | ---------------------------------------------------------------------------------
49 | wobble_file i_wobble_rigds num_wobble_rigd_typs
50 | none no 1
51 | ---------------------------------------------------------------------------------
52 | r_size l_size r_size_fac n_size f_size
53 | -1.0 -1.0 1.0 -1 0.1
54 | --------------------------------------------------------------------------------
55 | ewald_elec? kmax kappa ewald_elec_grid? ewald_elec_grid_file
56 | no 7 0.1795 no ewald_elec_grid.file
57 | ---------------------------------------------------------------------------------
58 | ewald_hydro? kmax kappa ewald_hydro_grid? ewald_hydro_grid_file
59 | no 17 0.050 no ewald_hydro_grid.file
60 | ---------------------------------------------------------------------------------
61 | fixman? fixman_tol fixman_order fixman_override? lmin lmax
62 | no 0.100 100 no 0.0010 10.0
63 | --------------------------------------------------------------------------------
64 | treecode? theta order shrink maxatm
65 | no 0.025 4 1 10
66 | --------------------------------------------------------------------------------
67 | schedule files:
68 | diff_schedule_file
69 | chol_schedule_file
70 | move_schedule_file
71 | --------------------------------------------------------------------------------
72 | i_do_growth? grow_schedule_file
73 | no grow_schedule_file
74 | --------------------------------------------------------------------------------
75 | i_do_slide? slide_schedule_file
76 | no slide_schedule_file
77 | --------------------------------------------------------------------------------
78 | walls? num_walls wall_file
79 | no 0 wall_file
80 | --------------------------------------------------------------------------------
81 | go_spline? force_function_file go_epsilon
82 | no energy_function_single_minimum.txt 1.00
83 | --------------------------------------------------------------------------------
84 | afm? x,y,z beg; x,y,z end; tip radius; force; #steps
85 | no -130.0 0.0 0.0 -90.0 0.0 0.0 40.0 10.0 80000
86 | --------------------------------------------------------------------------------
87 | dpd_sigma dpd_cut dpd_sol_mol_id
88 | 0.0 0.0 1
89 | --------------------------------------------------------------------------------
90 | umbrella? umb_mol1 umb_mol2 umb_dst umb_frc num_umb_stp
91 | no 1 1 30.0 100.0 10
92 | --------------------------------------------------------------------------------
93 | B22? B22_sample1 B22_sample2, B22_sample3, B22_rng B22_stp B22_restart_1 B22_restart_2 B22_density_pdb B22_grid_cut
94 | no 1 5000 20 150.0 2.0 Flat.restart four_structures.restart junk.pdb 5.0
95 | --------------------------------------------------------------------------------
96 | i_do_protease? protease_schedule_file
97 | no protease_schedule_file 200.0
98 | --------------------------------------------------------------------------------
99 | i_read_bond_functions i_read_nonbond_functions i_write_bond_histogram i_write_nonbond_histogram i_write_user_histogram arbitrary_intra i_read_ref_hist nref_oversample
100 | no no no no no no no 200
101 | junk 0.000 0.000
102 | angl_NMRFF_ener_uiowa_ff001.txt.final 0.000 0.000
103 | dihe_NMRFF_ener_uiowa_ff001.txt.final 0.000 0.000
104 | nbnd_ener_uiowa_ff001.txt.final 0.000 0.000 18.0 20.0 1.0 ! RDFs set to 1 between dist_ref_lo dist_ref_hi / nbnd_func_scale
105 | --------------------------------------------------------------------------------
106 | i_have_rigid_domains? rigid_domain_file domn_schedule_file
107 | no junk junk
108 | --------------------------------------------------------------------------------
109 | i_do_MC_moves? monte_carlo_file mont_schedule_file MC_clstr_E MC_factr_E MC_timestep
110 | no monte_carlo_file mont_schedule_file -2.0 1.0 12500.0
111 | --------------------------------------------------------------------------------
112 | i_do_NAM? NAM_runtype NAM_bsurf NAM_qsurf NAM_num_runs NAM_mol_file
113 | no 1 300.0 405.0 100 NAM_mol_file
114 | --------------------------------------------------------------------------------
115 | i_user_energy num_user_energy user_energy_matchup_file
116 | no 1 matchup.txt
117 | test_free_energy_final.txt.smooth 0.80 100.0 ! user_energy_file(1),user_energy_scal(1),user_energy_ceil(1)
118 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/template/sim.inp.notes:
--------------------------------------------------------------------------------
1 | This file describes the parameters that one may specify in the simulation
2 | input ("sim.inp") file.
3 | --------------------------------------------------------------------------------
4 |
5 | dseed: At one point, this appears to have been used as the random seed for a
6 | simulation. As of versions 32-07-12 and 45-07-12, this no longer does
7 | anything.
8 |
9 | teprint: Time between writing energy information, in ps.
10 |
11 | ttprint: time between writing center-of-mass information, in ps.
12 |
13 | tmprint: time between writing out movie files (PDB format), in ps.
14 |
15 | num_lst_step: number of steps between updating the nonbonded list.
16 |
17 | num_fmd_step: number of steps between updating medium range forces (see vdw_med).
18 |
19 | num_hyd_step: number of steps between updating hydrodynamic forces
20 |
21 | num_bal_step: According to comments in uiowa_bd_openmp.32-07-12.f and
22 | uiowa_bd_openmp.45-07-12.f, this is the "number of steps required before
23 | update of hydrodynamics", just like num_hyd_step. Inspection of the code
24 | suggests that it is actually the number of steps between balancing the
25 | computational load between multiple OMP processes.
26 |
27 | num_threads: The number of OMP threads to use. This appears to override OMP
28 | environment variables.
29 |
30 | bond_dev_quit: If a bound length deviates from its equilibrium length by more
31 | than this amount (presumably, it is in angstroms), then the program will quit.
32 | Set to a negative value to turn this off.
33 |
34 | i_continue_after_problem?: if 'yes', the program will continue to run even after
35 | running into problems.
36 |
37 | f_typs: number of molecule types
38 |
39 | f_mols: number of molecules
40 |
41 | i_debug: if 'yes', write out extra information for debugging
42 |
43 | q_desired: terminate the simulation if the fraction of native contacts (Q) is
44 | at least this great.
45 |
46 | mol_to_watch: You should specify two integers (such as "1 1") here, but it is
47 | not clear to me what this actually does.
48 |
49 | rot: if "rot" begins this line, then the next 8 numbers are read and used as:
50 | ivec1a
51 | ivec1b
52 | ivec2a
53 | ivec2b
54 | ivec3a
55 | ivec3b
56 | numcen1
57 | numcen2
58 | It is not clear what these numbers do. Just leave these numbers as you found them.
59 |
60 | xmin, xmax, ymin, ymax, zmin, zmax:
61 | While the xmin, xmax, ymin, ymax, and zmin labels do not appear, the 6 numbers
62 | on the line after "rot" are indeed these values. This specifies the
63 | dimensions of the system. This appears to apply for periodic boundary conditions.
64 |
65 | i_pbc: Specifies whether periodic boundary conditions should be applied. If
66 | periodic boundary conditions should be applied, this should be set to "1".
67 |
68 | i_look_for_crashes: It is not clear what this does. Just keep it set to "no".
69 |
70 | periodic_bounds_okay: Again, it was not clear from the source code what this
71 | does. Keep it set to "no" to be safe.
72 |
73 | replica_exchange: "yes" if you want to do replica exchange, and "no" otherwise.
74 | It appears that replica exchange needs to be performed with a "master bash
75 | script" that is mentioned in comments in the uiowa_bd source code, but I do
76 | not think we have such a script.
77 |
78 | i_append_movie: Comment from source code:
79 | """
80 | i_append_movie is used to determine whether we want to add to movies
81 | rather than start again at zero - it looks for the movie with the
82 | higher number and either starts there or adds one depending on the
83 | value of i_append_movie
84 | i_append_movie = 1 --> overwrite the last movie
85 | = 2 --> add to the list
86 | """
87 |
88 | i_limit_verbosity: Comment from source code:
89 | """
90 | i_limit_verbosity limits how much is written out when we use the
91 | arbitrary functions of our force field if 'yes' then don't write out
92 | much
93 | """
94 |
95 | i_use_12_10:
96 | 1: 12-10 potential
97 | 2: 12-06 potential
98 | 3: 08-04 potential
99 | Note that this corresponds to the variable "i_use_v_typ" in the source code.
100 |
101 | uniform_moves: Comment from the uiowa_bd source code:
102 | 02-08-07 implement a uniform random step move
103 | determine max distance that any one protein can move in one timestep
104 | note that this only works for r ms at the moment - and doesn't
105 | deal with their rotations either yet
106 |
107 | steepest_descent: Doesn't do anything. Ostensibly setting this to "yes" would
108 | make the program do steepest descent minimization, but there isn't actually
109 | code to do so.
110 |
111 | r_temperature: The temperature, in Kelvin.
112 |
113 | r_ionic_strength: The ionic strength. Presumably units of "molar".
114 |
115 | r_ion: Set this to 0.0, or the program will not run.
116 |
117 | r_dielectric: The dielectric constant (relative permittivity).
118 |
119 | r_pH: This is only used if "i_do_protease" is set to true, in which case it is
120 | used in calculating the charge of the carboxy terminus of the new peptide.
121 |
122 | r_viscosity: The viscosity in units of mPa*s, also known as cP
123 |
124 | r_fconst: A force constant for some sort of short range harmonic potential. It
125 | is not clear to me what this applies to.
126 |
127 | parameter file name: specify the name of the parameter file with information on
128 | the "epsilon" for repulsive terms.
129 |
130 | no_elec: "no" if you want electrostatics, "yes" if you do not want electrostatics.
131 |
132 | wrap_molecules: Specify how molecules are wrapped in movie files.
133 | 0: do not wrap in movie files
134 | 1: Wrap atoms in movie files
135 | 2: wrap molecules in movie files
136 |
137 | i_use_hydro: "yes" to use hydrodynamic interactions, otherwise "no".
138 |
139 | full/diag: Specify the type of hydrodynamics to use.
140 | Options include:
141 | no: none
142 | full: full
143 | tree: tree
144 | diag: diagonal
145 | geye: geyer
146 | mult: multi
147 | intr: intra
148 |
149 | integer/real: Valid options appear to be "integer" and "cutoff". This appears
150 | to relate to the hydrodynamic calculations.
151 |
152 | scale_nb: Not clear what this does, but it appears to relate to the hydrodynamic
153 | calculations.
154 |
155 | BD/LD: What type of dynamics to do. The following are choices:
156 | langevin
157 | brownian
158 | dpd: dissipative particle dynamics
159 |
160 | r_temp: Comment from uiowa_bd code:
161 | """
162 | mass of individual subunits or radius of
163 | molecules if using multi_hydrodynamics
164 | """
165 |
166 | kcut: Does nothing. The code for associated calculations is present but
167 | commented out.
168 |
169 | linker file: Not sure what this does. Keep it set to "1".
170 |
171 | growrigd file: Name of file with information on growing rigid molecules.
172 |
173 | i_grow_rigds: Not sure what growing rigid molecules does, so keep this set to "no".
174 |
175 | go potentials file name: Name of file with Go parameters (usually something like
176 | "protein.go.parameters").
177 |
178 | i_use_go_pairs: Set to "yes" to use Go potentials, otherwise no.
179 |
180 | i_use_exclusive_go: According to comments in the uiowa_bd code, this means:
181 | "go-pairs are exclusive for mol pairs and domain types"
182 | """
183 | note that i_use_exclusive_go is used for cases where a molecule has
184 | multiple modes of interaction with other molecules
185 | """
186 |
187 | num_exc_stp: Number of steps between updating the list of exclusive go pairs.
188 |
189 | go_nonexclusive_file: Not sure how to use this. Presumably this is the name of
190 | a file where one can specify excepts to the "i_use_exclusive_go" rule.
191 |
192 | i_compare_go_with_others: Not sure what this does.
193 |
194 | q_mode_threshold: Not sure what this does. It appears to relate to i_use_exclusive_go.
195 |
196 | reaction criteria file, i_do_reactions, nrequired: ??
197 |
198 | no-force file, i_omit_some_forces: ??
199 |
200 | time_step_s, time_step_l: short and long time steps, respectively. In my
201 | experience we keep these the same. Some interactions apparently can be updated
202 | at longer time steps.
203 |
204 | totsimtime: The total simulation time, in ps.
205 |
206 | vdw_s: van der Waals interactions at this length (in Angstroms) or shorter count
207 | as "short range" interactions
208 |
209 | vdw_m: van der Waals interactions less than this length (in Angstroms) and
210 | greater than vdw_s count as as "medium range" interactions
211 |
212 | go_s, go_m: Same as vdw_s and vdw_m, except for Go interactions
213 |
214 | ele_s, ele_m: Same as vdw_s and vdw_m, except for electrostatic interactions
215 |
216 | ele_l ("junk"): distance cutoff for long range electrostatics
217 |
218 | cut_h (the second "junk"): distance cutoff for hydrodynamic interactions
219 |
220 | ff_cell: ??
221 |
222 | harmonicfile, i_do_harmonic, num_harm: defunct.
223 |
224 | position restraint information: Not sure how to use this.
225 |
226 | wobble_file, etc: defunct.
227 |
228 | r_size, l_size, r_size_fac, n_size, f_size: Parameters for confinement potential.
229 | Spherical potential: Set "r_size" to the radius of the sphere in angstroms.
230 | Set "l_size" to a negative number.
231 | Cylindrical potential: Set "r_size" to the radius of the cylinder in angstroms,
232 | and set "l_size" to the height of the cylinder in angstroms.
233 |
234 | r_size_fac: the factor by which r_size is first scaled.
235 | n_size: appears to be used with shrinking boxes.
236 | f_size: a force constant for the confinement potential
237 |
238 |
239 | We do not use any of the parameters found lower in the file.
240 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/tools/calc_rmsd.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import numpy
4 | import os
5 | import sys
6 | sys.path.append(os.path.realpath(__file__))
7 | import rmsd
8 | import MDAnalysis
9 |
10 | '''
11 | ---------
12 | Script
13 | ---------
14 | This script takes an xtc trajectory file generated from a UIOWA-BD simulation and a pdb file for the protein of interest
15 | and calculates the RMS deviation between each frame in the trajectory and the reference pdb structure
16 |
17 | Resulting RMS deviation values for each frame are outputted to a text file named 'output.txt' (or whatever name the
18 | user specifies
19 |
20 | ---------
21 | Notes
22 | ---------
23 | This script should be run from the main simulation directory with the command 'python (path to script) > output.txt'
24 |
25 | The rmsd.py script must be located in the same directory as the xtc_rmsd.py script when this script is run
26 |
27 | The pdb files must be 'go' models (not full atom models) and can be taken from the first frame located in the output
28 | MOVIE directory
29 | '''
30 |
31 | def xtc2numpyarray(xtcfilepath, pdbfilepath):
32 | '''
33 | Loads coordinates from an xtc trajectory file and returns a numpy array,
34 | using a pdb file as the topology.
35 |
36 | ---------
37 | Arguments
38 | ---------
39 | xtcfilepath: (str) path to the xtc file
40 | pdbfilepath: (str) path to the pdb file
41 |
42 | ---------
43 | Returns
44 | ---------
45 | coords_arr: (numpy.ndarray) numpy array of shape (nframes, natoms, 3)
46 | representing coordinates of each frame in trajectory
47 | '''
48 |
49 | f = MDAnalysis.Universe(pdbfilepath, xtcfilepath)
50 | coords = []
51 | for frame in f.trajectory:
52 | coords.append(f.atoms.positions)
53 | coords_arr = numpy.asarray(coords)
54 | return coords_arr
55 |
56 | def pdb2numpyarray(pdbfilepath):
57 | '''
58 | Loads a pdb file and returns a numpy array of the coordinates
59 |
60 | ---------
61 | Arguments
62 | ---------
63 | filepath: (str) path to the pdb file
64 |
65 | ---------
66 | Returns
67 | ---------
68 | coords_arr: (numpy.ndarray) numpy array of shape (nframes, natoms, 3)
69 | representing coordinates of reference structure
70 | '''
71 |
72 | f = MDAnalysis.Universe(pdbfilepath)
73 | coords = f.atoms.positions
74 | coords_arr = numpy.asarray(coords)
75 | return coords_arr
76 |
77 | def parse_arguments():
78 | '''
79 | Parses command lines arguments.
80 |
81 | --------
82 | Returns
83 | --------
84 | pdbpath: (str) The file path to the reference pdb, which is also used as a
85 | topology file for the xtc file
86 |
87 | xtcpath: (str) The file path the xtc trajectory file.
88 | '''
89 | parser = argparse.ArgumentParser()
90 | parser.add_argument('--pdb', dest='pdbpath', required=True,
91 | help="The file path to the reference PDB, which is used"
92 | " both as the reference structure and topology "
93 | "file for the XTC trajectory file."
94 | )
95 | parser.add_argument('--xtc', dest='xtcpath', required=True,
96 | help="The file path to the XTC trajectory file."
97 | )
98 | args = parser.parse_args()
99 | return args.pdbpath, args.xtcpath
100 |
101 |
102 | def main():
103 | '''
104 | Run the main function.
105 | '''
106 | # First, parse command line arguments
107 | pdbpath, xtcpath = parse_arguments()
108 |
109 | # Get numpy array of coordinates for trajectory
110 | # Again, note the pdb files must be 'go'models and not full-atom models
111 | coordinates = xtc2numpyarray(xtcpath, pdbpath)
112 |
113 | # Get numpy array of coordinates for reference structure
114 | ref_coordinates = pdb2numpyarray(pdbpath)
115 |
116 | # Iterate through the frames of the trajectory and calculate the RMS
117 | # deviation relative to the ref_coordinates
118 | rmsd_list = []
119 | for frame in coordinates:
120 |
121 | # This module is from the rmsd.py script that must be in the same directory
122 | # as this script
123 | r = rmsd.rmsd(frame, ref_coordinates)
124 | rmsd_list.append(r)
125 | numpy.savetxt("rmsd.txt", rmsd_list)
126 |
127 | if __name__ == "__main__":
128 | main()
129 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/tools/rmsd.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import numpy
3 | import scipy.spatial.distance
4 |
5 | def distance_squared(p0, p1):
6 | '''
7 | Find the square of the distance between ``p0`` and ``p1``.
8 |
9 | ---------
10 | Arguments
11 | ---------
12 | p0: (numpy.ndarray) A shape (3,) array representing x,y,z coordinates
13 | p1: (numpy.ndarray) A shape (3,) array representing x,y,z coordinates
14 |
15 | -------
16 | Returns
17 | -------
18 | d2: square of the euclidean distance between ``p0`` and ``p1``.
19 | '''
20 | return scipy.spatial.distance.euclidean(p0,p1)**2
21 |
22 | def centroid(coordinates):
23 | '''
24 | Find the centroid of ``coordinates``.
25 |
26 | ---------
27 | Arguments
28 | ---------
29 | coordinates: (numpy.ndarray) A shape (natoms, 3) array of coordinates
30 |
31 | -------
32 | Returns
33 | -------
34 | c: (numpy.ndarray) A shape (1,3) array of coordinates indicating the center
35 | of geometry of ``coordinates``.
36 | '''
37 | return coordinates.mean(axis=0)[numpy.newaxis,:]
38 |
39 | def rmsd(mobile, reference):
40 | '''
41 | Calculates the RMS deviation between two structures following least-
42 | squares alignment. Uses the Kabsh algorithm.
43 |
44 | ---------
45 | Arguments
46 | ---------
47 | mobile: (numpy.ndarray) shape (natoms, 3) numpy array, where natoms is the
48 | number of atoms, representing the coordinates of the protein for which
49 | to calculate the RMS deviation
50 |
51 | reference: (numpy.ndarray) shape (natoms, 3) numpy array representing the
52 | reference structure.
53 |
54 | -------
55 | Returns
56 | -------
57 | mobile: (float) The RMS deviation of ``mobile`` relative to ``reference``,
58 | calculated via the following equation:
59 |
60 | RMS deviation = sqrt( sum( (x_0,i-x_1,i)^2 + (y_0,i - y_1,i)^2 + (z_0,i - z_1,i)^2 ))
61 |
62 | where i runs over the atom index (from 0 to natoms-1), and the
63 | calculation is performed following least-squares alignment.
64 | '''
65 |
66 | # Center both mobile and reference on centroid.
67 | c = centroid(reference)
68 | reference -= c
69 | c = centroid(mobile)
70 | mobile -= c
71 |
72 | # Use Kabsch algorithm to calculate optimal rotation matrix.
73 | # Calculate covariance matrix.
74 | covariance_matrix = numpy.dot(numpy.transpose(reference),
75 | mobile)
76 |
77 | # Singular Value Decomposition.
78 | V, S, Wt = numpy.linalg.svd(covariance_matrix)
79 | d = numpy.sign(numpy.linalg.det(numpy.dot(numpy.transpose(Wt),
80 | numpy.transpose(V)
81 | )
82 | )
83 | )
84 |
85 | U = numpy.dot(numpy.transpose(Wt),
86 | numpy.dot(numpy.array(((1,0,0),
87 | (0,1,0),
88 | (0,0,d))),
89 | numpy.transpose(V)
90 | )
91 | )
92 |
93 | # Multiplying mobile (n*3 matrix) by 3*3 optimal rotation matrix
94 | # ``U`` gives least_squares alignment.
95 | l_aligned = mobile.dot(U)
96 |
97 | # Sum distances squared over each particle, and take the square root to
98 | # return RMSD.
99 | square_sum = 0
100 | for i in range(len(l_aligned)):
101 | square_sum += distance_squared(l_aligned[i],reference[i])
102 | av = square_sum/len(l_aligned)
103 | rmsd_ = numpy.sqrt(av)
104 | return rmsd_
105 |
--------------------------------------------------------------------------------
/05_MolecularDynamics/trajectories/.p53.xtc_offsets.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/trajectories/.p53.xtc_offsets.npz
--------------------------------------------------------------------------------
/05_MolecularDynamics/trajectories/cdk8.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/trajectories/cdk8.xtc
--------------------------------------------------------------------------------
/05_MolecularDynamics/trajectories/mdm2.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/05_MolecularDynamics/trajectories/mdm2.xtc
--------------------------------------------------------------------------------
/05_MolecularDynamics/trajectories/mdm2_folded.pdb:
--------------------------------------------------------------------------------
1 | ATOM 1 N GLN 1 2.092 0.001 -1.242 0.56
2 | ATOM 2 CA ILE 2 5.388 0.769 0.492 1.12
3 | ATOM 3 CA ASN 3 7.769 -1.743 -1.086 1.69
4 | ATOM 4 CA GLN 4 11.468 -2.556 -0.742 2.25
5 | ATOM 5 CA VAL 5 14.099 -0.388 -2.430 2.81
6 | ATOM 6 CA ARG 6 17.818 -0.369 -3.219 3.37
7 | ATOM 7 CA PRO 7 19.905 2.832 -2.987 3.93
8 | ATOM 8 CA LYS 8 22.282 2.960 -5.954 4.49
9 | ATOM 9 CA LEU 9 25.648 4.732 -5.867 5.06
10 | ATOM 10 CA PRO 10 24.486 8.378 -5.793 5.62
11 | ATOM 11 CA LEU 11 21.520 7.188 -3.729 6.18
12 | ATOM 12 CA LEU 12 23.440 5.292 -1.048 6.74
13 | ATOM 13 CA LYS 13 25.655 8.211 -0.026 7.30
14 | ATOM 14 CA ILE 14 22.443 10.183 0.477 7.87
15 | ATOM 15 CA LEU 15 21.070 7.909 3.199 8.43
16 | ATOM 16 CA HIS 16 24.563 7.173 4.516 8.99
17 | ATOM 17 CA ALA 17 25.241 10.845 5.243 9.55
18 | ATOM 18 CA ALA 18 21.862 10.857 6.988 10.11
19 | ATOM 19 CA GLY 19 22.824 8.011 9.301 10.67
20 | ATOM 20 CA ALA 20 22.019 5.157 6.931 11.24
21 | ATOM 21 CA GLN 21 24.109 2.154 5.894 11.80
22 | ATOM 22 CA GLY 22 23.733 -0.895 3.671 12.36
23 | ATOM 23 CA GLU 23 22.188 -1.620 0.280 12.92
24 | ATOM 24 CA MET 24 18.574 -2.392 1.179 13.48
25 | ATOM 25 CA PHE 25 15.787 -0.130 2.442 14.04
26 | ATOM 26 CA THR 26 12.006 0.229 2.242 14.61
27 | ATOM 27 CA VAL 27 9.821 3.241 1.455 15.17
28 | ATOM 28 CA LYS 28 9.542 3.952 5.180 15.73
29 | ATOM 29 CA GLU 29 13.307 3.804 5.697 16.29
30 | ATOM 30 CA VAL 30 13.979 6.045 2.698 16.85
31 | ATOM 31 CA MET 31 11.302 8.595 3.587 17.42
32 | ATOM 32 CA HIS 32 12.770 8.796 7.091 17.98
33 | ATOM 33 CA TYR 33 16.249 9.388 5.672 18.54
34 | ATOM 34 CA LEU 34 15.088 12.068 3.235 19.10
35 | ATOM 35 CA GLY 35 13.849 14.396 5.959 19.66
36 | ATOM 36 CA GLN 36 16.924 13.797 8.102 20.22
37 | ATOM 37 CA TYR 37 19.148 14.398 5.076 20.79
38 | ATOM 38 CA ILE 38 17.580 17.773 4.296 21.35
39 | ATOM 39 CA MET 39 17.776 18.859 7.936 21.91
40 | ATOM 40 CA VAL 40 21.379 17.649 8.101 22.47
41 | ATOM 41 CA LYS 41 22.403 19.661 5.040 23.03
42 | ATOM 42 CA GLN 42 20.104 22.527 6.022 23.60
43 | ATOM 43 CA LEU 43 18.044 22.904 2.846 24.16
44 | ATOM 44 CA TYR 44 14.675 24.269 3.965 24.72
45 | ATOM 45 CA ASP 45 12.883 27.341 5.313 25.28
46 | ATOM 46 CA GLN 46 13.596 27.319 9.050 25.84
47 | ATOM 47 CA GLN 47 10.411 29.379 9.325 26.40
48 | ATOM 48 CA GLU 48 8.396 26.697 7.532 26.97
49 | ATOM 49 CA GLN 49 9.711 23.143 7.859 27.53
50 | ATOM 50 CA HIS 50 6.915 22.083 5.508 28.09
51 | ATOM 51 CA MET 51 8.779 24.114 2.886 28.65
52 | ATOM 52 CA VAL 52 11.986 22.469 1.669 29.21
53 | ATOM 53 CA TYR 53 14.643 24.349 -0.295 29.78
54 | ATOM 54 CA CYS 54 16.316 21.909 -2.686 30.34
55 | ATOM 55 CA GLY 55 18.135 24.903 -4.134 30.90
56 | ATOM 56 CA GLY 56 20.755 23.015 -6.107 31.46
57 | ATOM 57 CA ASP 57 21.882 20.132 -3.911 32.02
58 | ATOM 58 CA LEU 58 21.711 16.410 -4.683 32.58
59 | ATOM 59 CA LEU 59 17.944 16.363 -4.154 33.15
60 | ATOM 60 CA GLY 60 17.627 19.590 -6.122 33.71
61 | ATOM 61 CA GLU 61 19.225 18.039 -9.197 34.27
62 | ATOM 62 CA LEU 62 17.412 14.709 -8.896 34.83
63 | ATOM 63 CA LEU 63 14.074 16.533 -8.824 35.39
64 | ATOM 64 CA GLY 64 15.040 19.446 -11.051 35.96
65 | ATOM 65 CA ARG 65 13.201 22.142 -9.112 36.52
66 | ATOM 66 CA GLN 66 13.882 24.911 -6.594 37.08
67 | ATOM 67 CA SER 67 11.818 23.827 -3.588 37.64
68 | ATOM 68 CA PHE 68 9.135 21.336 -2.556 38.20
69 | ATOM 69 CA SER 69 6.622 21.013 0.282 38.76
70 | ATOM 70 CA VAL 70 7.157 18.185 2.768 39.33
71 | ATOM 71 CA LYS 71 3.532 18.036 3.908 39.89
72 | ATOM 72 CA ASP 72 2.661 17.638 0.227 40.45
73 | ATOM 73 CA PRO 73 4.063 14.374 -1.198 41.01
74 | ATOM 74 CA SER 74 3.678 15.582 -4.785 41.57
75 | ATOM 75 CA PRO 75 7.146 16.101 -6.316 42.13
76 | ATOM 76 CA LEU 76 8.687 13.684 -3.815 42.70
77 | ATOM 77 CA TYR 77 6.909 10.675 -5.314 43.26
78 | ATOM 78 CA ASP 78 7.251 11.956 -8.878 43.82
79 | ATOM 79 CA MET 79 11.032 11.965 -8.461 44.38
80 | ATOM 80 CA LEU 80 11.257 8.738 -6.459 44.94
81 | ATOM 81 CA ARG 81 9.377 7.085 -9.322 45.51
82 | ATOM 82 CA LYS 82 12.665 6.649 -11.187 46.07
83 | ATOM 83 CA ASN 83 14.845 7.661 -8.239 46.63
84 | ATOM 84 CA LEU 84 13.838 5.178 -5.538 47.19
85 | ATOM 85 CA VAL 85 14.896 1.846 -7.037 47.75
86 | ATOM 86 CA THR 86 12.022 -0.390 -5.939 48.31
87 | ATOM 87 CA LEU 87 11.556 -4.126 -6.474 48.88
88 | ATOM 88 CA ALA 88 8.630 -6.464 -7.136 49.44
89 | ATOM 89 O THR 89 7.596 -4.702 -10.344 50.00
90 | CONECT 1 2
91 | CONECT 2 3
92 | CONECT 3 4
93 | CONECT 4 5
94 | CONECT 5 6
95 | CONECT 6 7
96 | CONECT 7 8
97 | CONECT 8 9
98 | CONECT 9 10
99 | CONECT 10 11
100 | CONECT 11 12
101 | CONECT 12 13
102 | CONECT 13 14
103 | CONECT 14 15
104 | CONECT 15 16
105 | CONECT 16 17
106 | CONECT 17 18
107 | CONECT 18 19
108 | CONECT 19 20
109 | CONECT 20 21
110 | CONECT 21 22
111 | CONECT 22 23
112 | CONECT 23 24
113 | CONECT 24 25
114 | CONECT 25 26
115 | CONECT 26 27
116 | CONECT 27 28
117 | CONECT 28 29
118 | CONECT 29 30
119 | CONECT 30 31
120 | CONECT 31 32
121 | CONECT 32 33
122 | CONECT 33 34
123 | CONECT 34 35
124 | CONECT 35 36
125 | CONECT 36 37
126 | CONECT 37 38
127 | CONECT 38 39
128 | CONECT 39 40
129 | CONECT 40 41
130 | CONECT 41 42
131 | CONECT 42 43
132 | CONECT 43 44
133 | CONECT 44 45
134 | CONECT 45 46
135 | CONECT 46 47
136 | CONECT 47 48
137 | CONECT 48 49
138 | CONECT 49 50
139 | CONECT 50 51
140 | CONECT 51 52
141 | CONECT 52 53
142 | CONECT 53 54
143 | CONECT 54 55
144 | CONECT 55 56
145 | CONECT 56 57
146 | CONECT 57 58
147 | CONECT 58 59
148 | CONECT 59 60
149 | CONECT 60 61
150 | CONECT 61 62
151 | CONECT 62 63
152 | CONECT 63 64
153 | CONECT 64 65
154 | CONECT 65 66
155 | CONECT 66 67
156 | CONECT 67 68
157 | CONECT 68 69
158 | CONECT 69 70
159 | CONECT 70 71
160 | CONECT 71 72
161 | CONECT 72 73
162 | CONECT 73 74
163 | CONECT 74 75
164 | CONECT 75 76
165 | CONECT 76 77
166 | CONECT 77 78
167 | CONECT 78 79
168 | CONECT 79 80
169 | CONECT 80 81
170 | CONECT 81 82
171 | CONECT 82 83
172 | CONECT 83 84
173 | CONECT 84 85
174 | CONECT 85 86
175 | CONECT 86 87
176 | CONECT 87 88
177 | CONECT 88 89
178 |
--------------------------------------------------------------------------------
/06_Basis_Sets/presentation/beyond_lcao.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/06_Basis_Sets/presentation/beyond_lcao.pdf
--------------------------------------------------------------------------------
/06_Basis_Sets/presentation/images/fourier_tranform.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/06_Basis_Sets/presentation/images/fourier_tranform.gif
--------------------------------------------------------------------------------
/06_Basis_Sets/presentation/lcao_basis_sets.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/06_Basis_Sets/presentation/lcao_basis_sets.pdf
--------------------------------------------------------------------------------
/A1_Git/git_intro.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shivupa/QMMM_study_group/ce663d01876e878d84fadc6a411f636269d2a3f3/A1_Git/git_intro.pdf
--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | # People
2 | The following people are contributors to the QM/MM Study Group:
3 | - Shiv Upadhyay: shu8@pitt.edu
4 | - Amanda Dumi: aed63@pitt.edu
5 | - Dakota Folmsbee: dlf57@pitt.edu
6 | - Bryan Henderson: bvh5@pitt.edu
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | This project welcomes contributions in the form of example notebooks, example code, theory documentation, and bug fixes. Below there are directions on how to get started as well as guidelines on contributions.
4 |
5 |
6 | # Getting started
7 | ## 1. Fork this repository
8 | This creates a copy for you on your Github page (Click the button that says fork this repository). The changes you make to this version will not be a part of everyone's code yet.
9 |
10 | ## 2. Clone your fork locally
11 | This creates a local copy on your computer(s) that you can edit.
12 |
13 | ## 3. Add remote
14 | Once your forked copy of the repository is cloned locally, you will want to add the main repository as an upstream source in order to get any changes that are made to the main repository. To do this, add the main repository as an upstream source by `git remote add upstream https://github.com/shivupa/QMMM_study_group`. Check that the upstream was added by running `git remote -v`. If added correctly, you should be able to `git pull upstream master` in order to obtain any new changes made to the main repository.
15 |
16 | ## 4. Create development environment
17 | To ensure that all of us are working with the same packages and libraries, we have provided a `environment.yml` file in the root directory of this repository. This helps to reduce the number of "well, it works on my system" allowing for all users to be able to setup the environment and immediately try out the notebooks. Instructions for how to set up the provided development environment can be found in the [wiki](https://github.com/shivupa/QMMM_study_group/wiki/Environment-Setup).
18 |
19 | ## 5. Add notebooks, code, presentations, or fix bugs
20 |
21 | ## 6. Add the changes to your forked version
22 | To see what files have changed use the command `git status`.
23 | Add the files that you have changed using `git add filename`.
24 | Explain what changes you have made using an informative commit message with `git commit -m "My message.."`.
25 | Push the changes to your forked version using `git push`.
26 |
27 | [Git](https://git-scm.com/docs/gittutorial) provides great tutorials on how to use git for version control for your projects.
28 |
29 | ## 7. Add your changes to the main repository
30 | Go to the [main repository](https://github.com/shivupa/QMMM_study_group) and open a pull request from *your* master branch to the project master branch. For more information on pull requests see [GitHub Help: About pull requests](https://help.github.com/articles/about-pull-requests/). Also, **DO NOT** merge your own pull requests.
31 |
32 | # Guidelines
33 | Not following these guidelines will ensure your pull request is not merged.
34 |
35 | ## Directory structure
36 | For best readability and consistency each topic will contain be a topic directory. In this topic directory there needs to be the pdf presentation of the topic (if applicable), a basics directory, and an advanced directory. The basics directory will contain framework notebooks/code that supplements the presentation of that topic. After a few days a solution notebook needs to be added to the basics directory. The advanced directory will contain notebooks/code that further builds on the topic such as faster algorithms or accounting for more complex systems.
37 |
38 |
39 | ## Naming
40 | #### Topic directory
41 | Topic directories are named such that the first letter in the name of the directory and all letters in the techniques name are capitalized.
42 | Example: 05_Modern_DFT
43 |
44 | #### Subdirectories and files
45 | All subdirectories and files need to be lowercase. Do not use spaces in names, use underscores.
46 | Example: uhf_psi4.ipynb
47 |
48 | ## Presentation
49 | The topic presentation can be made in your preferred presentation editor but must be exported as a pdf.
50 |
51 | ## pep8
52 | This project aims to follow the [pep8 style guide](https://www.python.org/dev/peps/pep-0008/) for python programming. The development environment that we provide includes a package, jupyter notebook extensions, that allows users to use the package autopep8 to help ensure the notebooks follow the pep8 style guide. This extension is off by default but can be turned on in the Nbextension tab after launching jupyter notebook by clicking the box next to autopep8. To autopep8 a jupyter notebook, hold down shift and select the hammer icon.
53 |
54 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2018, QM/MM Study Group (See AUTHORS.md)
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # QM/MM study group
2 | [](https://mybinder.org/v2/gh/shivupa/QMMM_study_group/master) [](https://opensource.org/licenses/BSD-3-Clause)
3 |
4 | **Meetings are Fridays at 10 am.** Meetings will consist of sections spanning two weeks for each topic. The fist week will cover theory and present framework code. In the week following you will attempt to code the method yourself. During the second meeting within a section, we will present a working version of the code, address any issues or questions, and discuss more advanced topics relating to the method.
5 |
6 | Set up instructions and a list of useful references can be found in the [Wiki](https://github.com/shivupa/QMMM_study_group/wiki)!
7 |
8 | Try our notebooks without setting anything up by clicking on the Binder badge!
9 |
10 | ## Contact information
11 | - Amanda Dumi: aed63@pitt.edu
12 | - Dakota Folmsbee: dlf57@pitt.edu
13 | - Shiv Upadhyay: shu8@pitt.edu
14 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: qmmm
2 | channels:
3 | - psi4
4 | - anaconda
5 | dependencies:
6 | - python=3.6
7 | - numpy
8 | - scipy
9 | - matplotlib
10 | - pandas
11 | - sympy
12 | - IPython
13 | - jupyter
14 | - cython
15 | - scikit-learn
16 | - pip:
17 | - pyscf
18 | - autopep8
19 | - jupyter_contrib_nbextensions
20 | - scikit-optimize
21 | - psi4
22 | - cmake
23 |
--------------------------------------------------------------------------------