├── .github
└── workflows
│ └── pythonpackage.yml
├── .gitignore
├── LICENSE
├── README.md
├── assets
├── 404error.png
├── colab.png
├── conda_env_name.png
├── kaggle_upload.png
├── kernel_example.png
├── kernel_mapping.png
├── mof_building_principle.png
├── racs.png
├── result.gif
├── save_copy_colab.png
└── spheres.png
├── data
├── .gitkeep
├── data.csv
├── features.csv
└── submission.csv
├── environment.yml
├── molsim_ml.ipynb
└── test
└── test.py
/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
1 | name: Python package
2 |
3 | on:
4 | push:
5 | branches:
6 | - "*"
7 | pull_request:
8 | branches:
9 | - "*"
10 |
11 | jobs:
12 | test_conda:
13 | name: Ex1 (${{ matrix.python-version }}, ${{ matrix.os }})
14 | runs-on: ${{ matrix.os }}
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | os: ["ubuntu-latest", "macos-latest", "windows-latest"]
19 | python-version: ["3.7", "3.8", "3.9"]
20 | steps:
21 | - uses: actions/checkout@v1
22 | - uses: conda-incubator/setup-miniconda@v2
23 | with:
24 | auto-update-conda: true
25 | python-version: ${{ matrix.python-version }}
26 | - name: Conda bash
27 | shell: bash -l {0}
28 | run: export CONDA_ALWAYS_YES="true" && conda env create --file environment.yml --name molsim_ml && conda activate molsim_ml && python test/test.py
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.gitignore.io/api/vim,macos,python,jupyternotebooks,visualstudiocode
3 | # Edit at https://www.gitignore.io/?templates=vim,macos,python,jupyternotebooks,visualstudiocode
4 |
5 | ### JupyterNotebooks ###
6 | # gitignore template for Jupyter Notebooks
7 | # website: http://jupyter.org/
8 |
9 | .ipynb_checkpoints
10 | */.ipynb_checkpoints/*
11 |
12 | # IPython
13 | profile_default/
14 | ipython_config.py
15 |
16 | # Remove previous ipynb_checkpoints
17 | # git rm -r .ipynb_checkpoints/
18 |
19 | ### macOS ###
20 | # General
21 | .DS_Store
22 | .AppleDouble
23 | .LSOverride
24 |
25 | # Icon must end with two \r
26 | Icon
27 |
28 | # Thumbnails
29 | ._*
30 |
31 | # Files that might appear in the root of a volume
32 | .DocumentRevisions-V100
33 | .fseventsd
34 | .Spotlight-V100
35 | .TemporaryItems
36 | .Trashes
37 | .VolumeIcon.icns
38 | .com.apple.timemachine.donotpresent
39 |
40 | # Directories potentially created on remote AFP share
41 | .AppleDB
42 | .AppleDesktop
43 | Network Trash Folder
44 | Temporary Items
45 | .apdisk
46 |
47 | ### Python ###
48 | # Byte-compiled / optimized / DLL files
49 | __pycache__/
50 | *.py[cod]
51 | *$py.class
52 |
53 | # C extensions
54 | *.so
55 |
56 | # Distribution / packaging
57 | .Python
58 | build/
59 | develop-eggs/
60 | dist/
61 | downloads/
62 | eggs/
63 | .eggs/
64 | lib/
65 | lib64/
66 | parts/
67 | sdist/
68 | var/
69 | wheels/
70 | pip-wheel-metadata/
71 | share/python-wheels/
72 | *.egg-info/
73 | .installed.cfg
74 | *.egg
75 | MANIFEST
76 |
77 | # PyInstaller
78 | # Usually these files are written by a python script from a template
79 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
80 | *.manifest
81 | *.spec
82 |
83 | # Installer logs
84 | pip-log.txt
85 | pip-delete-this-directory.txt
86 |
87 | # Unit test / coverage reports
88 | htmlcov/
89 | .tox/
90 | .nox/
91 | .coverage
92 | .coverage.*
93 | .cache
94 | nosetests.xml
95 | coverage.xml
96 | *.cover
97 | .hypothesis/
98 | .pytest_cache/
99 |
100 | # Translations
101 | *.mo
102 | *.pot
103 |
104 | # Scrapy stuff:
105 | .scrapy
106 |
107 | # Sphinx documentation
108 | docs/_build/
109 |
110 | # PyBuilder
111 | target/
112 |
113 | # pyenv
114 | .python-version
115 |
116 | # pipenv
117 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
118 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
119 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
120 | # install all needed dependencies.
121 | #Pipfile.lock
122 |
123 | # celery beat schedule file
124 | celerybeat-schedule
125 |
126 | # SageMath parsed files
127 | *.sage.py
128 |
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 |
133 | # Rope project settings
134 | .ropeproject
135 |
136 | # Mr Developer
137 | .mr.developer.cfg
138 | .project
139 | .pydevproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
152 | ### Vim ###
153 | # Swap
154 | [._]*.s[a-v][a-z]
155 | [._]*.sw[a-p]
156 | [._]s[a-rt-v][a-z]
157 | [._]ss[a-gi-z]
158 | [._]sw[a-p]
159 |
160 | # Session
161 | Session.vim
162 | Sessionx.vim
163 |
164 | # Temporary
165 | .netrwhist
166 | *~
167 |
168 | # Auto-generated tag files
169 | tags
170 |
171 | # Persistent undo
172 | [._]*.un~
173 |
174 | # Coc configuration directory
175 | .vim
176 |
177 | ### VisualStudioCode ###
178 | .vscode/*
179 | !.vscode/settings.json
180 | !.vscode/tasks.json
181 | !.vscode/launch.json
182 | !.vscode/extensions.json
183 |
184 | ### VisualStudioCode Patch ###
185 | # Ignore all local history of files
186 | .history
187 |
188 | # End of https://www.gitignore.io/api/vim,macos,python,jupyternotebooks,visualstudiocode
189 | molsim_solution.ipynb
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Kevin Jablonka
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ML workshop for MolSim 2024
2 |
3 | [](https://colab.research.google.com/github/kjappelbaum/ml_molsim/blob/2024/molsim_ml.ipynb)
4 | [](https://mybinder.org/v2/gh/kjappelbaum/ml_molsim/2024?filepath=molsim_ml.ipynb)
5 | [](https://www.python.org/download/releases/3.7.0/)
6 | [](https://opensource.org/licenses/MIT)
7 | [](https://github.com/kjappelbaum/ml_molsim/actions)
8 | [](https://doi.org/10.5281/zenodo.3605363)
9 |
10 | In this exercise we will build a simple model that can predict the carbon dioxide uptake in MOFs. The goal is to get familiar with the tools that are used for machine learning and to develop an understanding of the workflow, tricks, and pitfalls (e.g., why baselines are important). Some more of the theory can be found [in our review](https://pubs.acs.org/doi/abs/10.1021/acs.chemrev.0c00004).
11 |
12 | 
13 |
14 | If you find some errors, typos or issues feel free to [open an issue](https://help.github.com/en/github/managing-your-work-on-github/about-issues) or directly make a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests).
15 |
16 | # How to run it
17 |
18 | If you have a modern laptop, we recommend you run them
19 | on the laptop. If you do not want to use your machine or the cluster, you
20 | can also run the exercises on Google Colab.
21 |
22 | ## Some tips
23 |
24 | - If you are not with the Python data science stack, we can recommend you [some cheatsheets](https://www.utc.fr/~jlaforet/Suppl/python-cheatsheets.pdf).
25 | - If you are not familiar with a function you can get help in a Jupyter notebook by going into the parentheses of a function and hitting SHIFT + ENTER, alternatively, you can just prepend a variable/function/library with `?`, e.g., `?str.replace()`
26 | - The errors you'll run into are most likely some that someone else already encountered. If you copy/paste the error message into a search engine like Google you will often find the solution to your problem on a site like StackOverflow
27 | - [Here](https://www.dataquest.io/blog/jupyter-notebook-tips-tricks-shortcuts/) are some nice tips/tricks for using Jupyter notebooks
28 | - For plotting, we use the [holoviews library](https://holoviews.org/) as it is one of the simplest ways to create interactive figures in Python (it is a high-level interface to the [bokeh library](https://bokeh.org/)). You mind find the [Getting Started section of the documentation](https://holoviews.org/getting_started/) useful if you want to understand it better. I also found [this guide from Caltech](http://bebi103.caltech.edu.s3-website-us-east-1.amazonaws.com/2019a/content/lessons/lesson_03/intro_to_holoviews.html) useful.
29 |
30 | ## Run it locally (recommended)
31 |
32 | The following steps assume that you use MacOS or some Linux flavor. If you use Windows, we recommend that you first install the [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl/install-win10).
33 |
34 | Create a new folder and clone this repository (you need `git` for this, if you get a `missing command` error for `git` you can install it with `sudo apt-get install git`)
35 |
36 | ```bash
37 | git clone --depth 1 https://github.com/kjappelbaum/ml_molsim.git\
38 | git checkout 2024
39 | cd ml_molsim
40 | ```
41 |
42 | We recommend that you create a virtual conda environment on your computer in which you install the dependencies for this exercise. To do so head over to [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and follow the installation instructions there.
43 |
44 | Then, use
45 |
46 | ```bash
47 | conda env create -f environment.yml -n ml_molsim
48 | ```
49 |
50 | You can activate this environment using
51 |
52 | ```bash
53 | conda activate ml_molsim
54 | ```
55 |
56 | After this you can start Jupyter Lab and select the `molsim_ml.ipynb` file from the file browser.
57 |
58 | ```(bash)
59 | jupyter lab
60 | ```
61 |
62 | Make sure that the notebook kernel runs in the correct environment:
63 |
64 | 
65 |
66 | If the environment name that is shown is different from "ml_molsim" you can click on it and select the correct one.
67 |
68 | ## Use it on Google Colab
69 |
70 | 
71 |
72 | Here, you can use relatively powerful computing resources (like [GPUs](https://en.wikipedia.org/wiki/Graphics_processing_unit) and [TPUs](https://en.wikipedia.org/wiki/Tensor_Processing_Unit)) from Google for free.
73 | Click the "Open in Colab" button on the top, then make a copy of the notebook into your Google Drive, and run the first three cells to
74 | install the dependencies.
75 | Then you should be able to use the notebook in Colab.
76 |
77 | 
78 |
79 | **Make sure to make a copy into your Google Drive and work on this copy. And
80 | not on the shared notebook!**
81 |
82 | _Note:_ If you have a Google Account from your organization, e.g. university, you might
83 | need to log out and use your personal account as many organizations block
84 | third-party applications.
85 |
86 | _Note:_ Google Colab also requires that you reload the JavaScript of holoviews in each plotting cell.
87 | So, you have to start every cell with a holoviews plot with `hv.extension('bokeh')`
88 |
89 | ## Acknowledgements
90 |
91 | We want to thank [Leopold Talirz](https://github.com/ltalirz) for incredibly valuable feedback and input during the initial phases of development.
92 | We also want to thank Peter Alexander Knudsen for spotting typos, as well as [Prof. Tristan Bereau](https://github.com/tbereau) and all MolSim participant and TAs for feedback.
93 |
--------------------------------------------------------------------------------
/assets/404error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/404error.png
--------------------------------------------------------------------------------
/assets/colab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/colab.png
--------------------------------------------------------------------------------
/assets/conda_env_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/conda_env_name.png
--------------------------------------------------------------------------------
/assets/kaggle_upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/kaggle_upload.png
--------------------------------------------------------------------------------
/assets/kernel_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/kernel_example.png
--------------------------------------------------------------------------------
/assets/kernel_mapping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/kernel_mapping.png
--------------------------------------------------------------------------------
/assets/mof_building_principle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/mof_building_principle.png
--------------------------------------------------------------------------------
/assets/racs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/racs.png
--------------------------------------------------------------------------------
/assets/result.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/result.gif
--------------------------------------------------------------------------------
/assets/save_copy_colab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/save_copy_colab.png
--------------------------------------------------------------------------------
/assets/spheres.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/assets/spheres.png
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjappelbaum/ml_molsim/0ebed176b972c9ba9ddf7b6e3a4d04eaf3f32409/data/.gitkeep
--------------------------------------------------------------------------------
/data/submission.csv:
--------------------------------------------------------------------------------
1 | id,prediction
2 | 0,4568
3 | 1,
4 | 2,
5 | 3,
6 | 4,
7 | 5,
8 | 6,
9 | 7,
10 | 8,
11 | 9,
12 | 10,
13 | 11,
14 | 12,
15 | 13,
16 | 14,
17 | 15,
18 | 16,
19 | 17,
20 | 18,
21 | 19,
22 | 20,
23 | 21,
24 | 22,
25 | 23,
26 | 24,
27 | 25,
28 | 26,
29 | 27,
30 | 28,
31 | 29,
32 | 30,
33 | 31,
34 | 32,
35 | 33,
36 | 34,
37 | 35,
38 | 36,
39 | 37,
40 | 38,
41 | 39,
42 | 40,
43 | 41,
44 | 42,
45 | 43,
46 | 44,
47 | 45,
48 | 46,
49 | 47,
50 | 48,
51 | 49,
52 | 50,
53 | 51,
54 | 52,
55 | 53,
56 | 54,
57 | 55,
58 | 56,
59 | 57,
60 | 58,
61 | 59,
62 | 60,
63 | 61,
64 | 62,
65 | 63,
66 | 64,
67 | 65,
68 | 66,
69 | 67,
70 | 68,
71 | 69,
72 | 70,
73 | 71,
74 | 72,
75 | 73,
76 | 74,
77 | 75,
78 | 76,
79 | 77,
80 | 78,
81 | 79,
82 | 80,
83 | 81,
84 | 82,
85 | 83,
86 | 84,
87 | 85,
88 | 86,
89 | 87,
90 | 88,
91 | 89,
92 | 90,
93 | 91,
94 | 92,
95 | 93,
96 | 94,
97 | 95,
98 | 96,
99 | 97,
100 | 98,
101 | 99,
102 | 100,
103 | 101,
104 | 102,
105 | 103,
106 | 104,
107 | 105,
108 | 106,
109 | 107,
110 | 108,
111 | 109,
112 | 110,
113 | 111,
114 | 112,
115 | 113,
116 | 114,
117 | 115,
118 | 116,
119 | 117,
120 | 118,
121 | 119,
122 | 120,
123 | 121,
124 | 122,
125 | 123,
126 | 124,
127 | 125,
128 | 126,
129 | 127,
130 | 128,
131 | 129,
132 | 130,
133 | 131,
134 | 132,
135 | 133,
136 | 134,
137 | 135,
138 | 136,
139 | 137,
140 | 138,
141 | 139,
142 | 140,
143 | 141,
144 | 142,
145 | 143,
146 | 144,
147 | 145,
148 | 146,
149 | 147,
150 | 148,
151 | 149,
152 | 150,
153 | 151,
154 | 152,
155 | 153,
156 | 154,
157 | 155,
158 | 156,
159 | 157,
160 | 158,
161 | 159,
162 | 160,
163 | 161,
164 | 162,
165 | 163,
166 | 164,
167 | 165,
168 | 166,
169 | 167,
170 | 168,
171 | 169,
172 | 170,
173 | 171,
174 | 172,
175 | 173,
176 | 174,
177 | 175,
178 | 176,
179 | 177,
180 | 178,
181 | 179,
182 | 180,
183 | 181,
184 | 182,
185 | 183,
186 | 184,
187 | 185,
188 | 186,
189 | 187,
190 | 188,
191 | 189,
192 | 190,
193 | 191,
194 | 192,
195 | 193,
196 | 194,
197 | 195,
198 | 196,
199 | 197,
200 | 198,
201 | 199,
202 | 200,
203 | 201,
204 | 202,
205 | 203,
206 | 204,
207 | 205,
208 | 206,
209 | 207,
210 | 208,
211 | 209,
212 | 210,
213 | 211,
214 | 212,
215 | 213,
216 | 214,
217 | 215,
218 | 216,
219 | 217,
220 | 218,
221 | 219,
222 | 220,
223 | 221,
224 | 222,
225 | 223,
226 | 224,
227 | 225,
228 | 226,
229 | 227,
230 | 228,
231 | 229,
232 | 230,
233 | 231,
234 | 232,
235 | 233,
236 | 234,
237 | 235,
238 | 236,
239 | 237,
240 | 238,
241 | 239,
242 | 240,
243 | 241,
244 | 242,
245 | 243,
246 | 244,
247 | 245,
248 | 246,
249 | 247,
250 | 248,
251 | 249,
252 | 250,
253 | 251,
254 | 252,
255 | 253,
256 | 254,
257 | 255,
258 | 256,
259 | 257,
260 | 258,
261 | 259,
262 | 260,
263 | 261,
264 | 262,
265 | 263,
266 | 264,
267 | 265,
268 | 266,
269 | 267,
270 | 268,
271 | 269,
272 | 270,
273 | 271,
274 | 272,
275 | 273,
276 | 274,
277 | 275,
278 | 276,
279 | 277,
280 | 278,
281 | 279,
282 | 280,
283 | 281,
284 | 282,
285 | 283,
286 | 284,
287 | 285,
288 | 286,
289 | 287,
290 | 288,
291 | 289,
292 | 290,
293 | 291,
294 | 292,
295 | 293,
296 | 294,
297 | 295,
298 | 296,
299 | 297,
300 | 298,
301 | 299,
302 | 300,
303 | 301,
304 | 302,
305 | 303,
306 | 304,
307 | 305,
308 | 306,
309 | 307,
310 | 308,
311 | 309,
312 | 310,
313 | 311,
314 | 312,
315 | 313,
316 | 314,
317 | 315,
318 | 316,
319 | 317,
320 | 318,
321 | 319,
322 | 320,
323 | 321,
324 | 322,
325 | 323,
326 | 324,
327 | 325,
328 | 326,
329 | 327,
330 | 328,
331 | 329,
332 | 330,
333 | 331,
334 | 332,
335 | 333,
336 | 334,
337 | 335,
338 | 336,
339 | 337,
340 | 338,
341 | 339,
342 | 340,
343 | 341,
344 | 342,
345 | 343,
346 | 344,
347 | 345,
348 | 346,
349 | 347,
350 | 348,
351 | 349,
352 | 350,
353 | 351,
354 | 352,
355 | 353,
356 | 354,
357 | 355,
358 | 356,
359 | 357,
360 | 358,
361 | 359,
362 | 360,
363 | 361,
364 | 362,
365 | 363,
366 | 364,
367 | 365,
368 | 366,
369 | 367,
370 | 368,
371 | 369,
372 | 370,
373 | 371,
374 | 372,
375 | 373,
376 | 374,
377 | 375,
378 | 376,
379 | 377,
380 | 378,
381 | 379,
382 | 380,
383 | 381,
384 | 382,
385 | 383,
386 | 384,
387 | 385,
388 | 386,
389 | 387,
390 | 388,
391 | 389,
392 | 390,
393 | 391,
394 | 392,
395 | 393,
396 | 394,
397 | 395,
398 | 396,
399 | 397,
400 | 398,
401 | 399,
402 | 400,
403 | 401,
404 | 402,
405 | 403,
406 | 404,
407 | 405,
408 | 406,
409 | 407,
410 | 408,
411 | 409,
412 | 410,
413 | 411,
414 | 412,
415 | 413,
416 | 414,
417 | 415,
418 | 416,
419 | 417,
420 | 418,
421 | 419,
422 | 420,
423 | 421,
424 | 422,
425 | 423,
426 | 424,
427 | 425,
428 | 426,
429 | 427,
430 | 428,
431 | 429,
432 | 430,
433 | 431,
434 | 432,
435 | 433,
436 | 434,
437 | 435,
438 | 436,
439 | 437,
440 | 438,
441 | 439,
442 | 440,
443 | 441,
444 | 442,
445 | 443,
446 | 444,
447 | 445,
448 | 446,
449 | 447,
450 | 448,
451 | 449,
452 | 450,
453 | 451,
454 | 452,
455 | 453,
456 | 454,
457 | 455,
458 | 456,
459 | 457,
460 | 458,
461 | 459,
462 | 460,
463 | 461,
464 | 462,
465 | 463,
466 | 464,
467 | 465,
468 | 466,
469 | 467,
470 | 468,
471 | 469,
472 | 470,
473 | 471,
474 | 472,
475 | 473,
476 | 474,
477 | 475,
478 | 476,
479 | 477,
480 | 478,
481 | 479,
482 | 480,
483 | 481,
484 | 482,
485 | 483,
486 | 484,
487 | 485,
488 | 486,
489 | 487,
490 | 488,
491 | 489,
492 | 490,
493 | 491,
494 | 492,
495 | 493,
496 | 494,
497 | 495,
498 | 496,
499 | 497,
500 | 498,
501 | 499,
502 | 500,
503 | 501,
504 | 502,
505 | 503,
506 | 504,
507 | 505,
508 | 506,
509 | 507,
510 | 508,
511 | 509,
512 | 510,
513 | 511,
514 | 512,
515 | 513,
516 | 514,
517 | 515,
518 | 516,
519 | 517,
520 | 518,
521 | 519,
522 | 520,
523 | 521,
524 | 522,
525 | 523,
526 | 524,
527 | 525,
528 | 526,
529 | 527,
530 | 528,
531 | 529,
532 | 530,
533 | 531,
534 | 532,
535 | 533,
536 | 534,
537 | 535,
538 | 536,
539 | 537,
540 | 538,
541 | 539,
542 | 540,
543 | 541,
544 | 542,
545 | 543,
546 | 544,
547 | 545,
548 | 546,
549 | 547,
550 | 548,
551 | 549,
552 | 550,
553 | 551,
554 | 552,
555 | 553,
556 | 554,
557 | 555,
558 | 556,
559 | 557,
560 | 558,
561 | 559,
562 | 560,
563 | 561,
564 | 562,
565 | 563,
566 | 564,
567 | 565,
568 | 566,
569 | 567,
570 | 568,
571 | 569,
572 | 570,
573 | 571,
574 | 572,
575 | 573,
576 | 574,
577 | 575,
578 | 576,
579 | 577,
580 | 578,
581 | 579,
582 | 580,
583 | 581,
584 | 582,
585 | 583,
586 | 584,
587 | 585,
588 | 586,
589 | 587,
590 | 588,
591 | 589,
592 | 590,
593 | 591,
594 | 592,
595 | 593,
596 | 594,
597 | 595,
598 | 596,
599 | 597,
600 | 598,
601 | 599,
602 | 600,
603 | 601,
604 | 602,
605 | 603,
606 | 604,
607 | 605,
608 | 606,
609 | 607,
610 | 608,
611 | 609,
612 | 610,
613 | 611,
614 | 612,
615 | 613,
616 | 614,
617 | 615,
618 | 616,
619 | 617,
620 | 618,
621 | 619,
622 | 620,
623 | 621,
624 | 622,
625 | 623,
626 | 624,
627 | 625,
628 | 626,
629 | 627,
630 | 628,
631 | 629,
632 | 630,
633 | 631,
634 | 632,
635 | 633,
636 | 634,
637 | 635,
638 | 636,
639 | 637,
640 | 638,
641 | 639,
642 | 640,
643 | 641,
644 | 642,
645 | 643,
646 | 644,
647 | 645,
648 | 646,
649 | 647,
650 | 648,
651 | 649,
652 | 650,
653 | 651,
654 | 652,
655 | 653,
656 | 654,
657 | 655,
658 | 656,
659 | 657,
660 | 658,
661 | 659,
662 | 660,
663 | 661,
664 | 662,
665 | 663,
666 | 664,
667 | 665,
668 | 666,
669 | 667,
670 | 668,
671 | 669,
672 | 670,
673 | 671,
674 | 672,
675 | 673,
676 | 674,
677 | 675,
678 | 676,
679 | 677,
680 | 678,
681 | 679,
682 | 680,
683 | 681,
684 | 682,
685 | 683,
686 | 684,
687 | 685,
688 | 686,
689 | 687,
690 | 688,
691 | 689,
692 | 690,
693 | 691,
694 | 692,
695 | 693,
696 | 694,
697 | 695,
698 | 696,
699 | 697,
700 | 698,
701 | 699,
702 | 700,
703 | 701,
704 | 702,
705 | 703,
706 | 704,
707 | 705,
708 | 706,
709 | 707,
710 | 708,
711 | 709,
712 | 710,
713 | 711,
714 | 712,
715 | 713,
716 | 714,
717 | 715,
718 | 716,
719 | 717,
720 | 718,
721 | 719,
722 | 720,
723 | 721,
724 | 722,
725 | 723,
726 | 724,
727 | 725,
728 | 726,
729 | 727,
730 | 728,
731 | 729,
732 | 730,
733 | 731,
734 | 732,
735 | 733,
736 | 734,
737 | 735,
738 | 736,
739 | 737,
740 | 738,
741 | 739,
742 | 740,
743 | 741,
744 | 742,
745 | 743,
746 | 744,
747 | 745,
748 | 746,
749 | 747,
750 | 748,
751 | 749,
752 | 750,
753 | 751,
754 | 752,
755 | 753,
756 | 754,
757 | 755,
758 | 756,
759 | 757,
760 | 758,
761 | 759,
762 | 760,
763 | 761,
764 | 762,
765 | 763,
766 | 764,
767 | 765,
768 | 766,
769 | 767,
770 | 768,
771 | 769,
772 | 770,
773 | 771,
774 | 772,
775 | 773,
776 | 774,
777 | 775,
778 | 776,
779 | 777,
780 | 778,
781 | 779,
782 | 780,
783 | 781,
784 | 782,
785 | 783,
786 | 784,
787 | 785,
788 | 786,
789 | 787,
790 | 788,
791 | 789,
792 | 790,
793 | 791,
794 | 792,
795 | 793,
796 | 794,
797 | 795,
798 | 796,
799 | 797,
800 | 798,
801 | 799,
802 | 800,
803 | 801,
804 | 802,
805 | 803,
806 | 804,
807 | 805,
808 | 806,
809 | 807,
810 | 808,
811 | 809,
812 | 810,
813 | 811,
814 | 812,
815 | 813,
816 | 814,
817 | 815,
818 | 816,
819 | 817,
820 | 818,
821 | 819,
822 | 820,
823 | 821,
824 | 822,
825 | 823,
826 | 824,
827 | 825,
828 | 826,
829 | 827,
830 | 828,
831 | 829,
832 | 830,
833 | 831,
834 | 832,
835 | 833,
836 | 834,
837 | 835,
838 | 836,
839 | 837,
840 | 838,
841 | 839,
842 | 840,
843 | 841,
844 | 842,
845 | 843,
846 | 844,
847 | 845,
848 | 846,
849 | 847,
850 | 848,
851 | 849,
852 | 850,
853 | 851,
854 | 852,
855 | 853,
856 | 854,
857 | 855,
858 | 856,
859 | 857,
860 | 858,
861 | 859,
862 | 860,
863 | 861,
864 | 862,
865 | 863,
866 | 864,
867 | 865,
868 | 866,
869 | 867,
870 | 868,
871 | 869,
872 | 870,
873 | 871,
874 | 872,
875 | 873,
876 | 874,
877 | 875,
878 | 876,
879 | 877,
880 | 878,
881 | 879,
882 | 880,
883 | 881,
884 | 882,
885 | 883,
886 | 884,
887 | 885,
888 | 886,
889 | 887,
890 | 888,
891 | 889,
892 | 890,
893 | 891,
894 | 892,
895 | 893,
896 | 894,
897 | 895,
898 | 896,
899 | 897,
900 | 898,
901 | 899,
902 | 900,
903 | 901,
904 | 902,
905 | 903,
906 | 904,
907 | 905,
908 | 906,
909 | 907,
910 | 908,
911 | 909,
912 | 910,
913 | 911,
914 | 912,
915 | 913,
916 | 914,
917 | 915,
918 | 916,
919 | 917,
920 | 918,
921 | 919,
922 | 920,
923 | 921,
924 | 922,
925 | 923,
926 | 924,
927 | 925,
928 | 926,
929 | 927,
930 | 928,
931 | 929,
932 | 930,
933 | 931,
934 | 932,
935 | 933,
936 | 934,
937 | 935,
938 | 936,
939 | 937,
940 | 938,
941 | 939,
942 | 940,
943 | 941,
944 | 942,
945 | 943,
946 | 944,
947 | 945,
948 | 946,
949 | 947,
950 | 948,
951 | 949,
952 | 950,
953 | 951,
954 | 952,
955 | 953,
956 | 954,
957 | 955,
958 | 956,
959 | 957,
960 | 958,
961 | 959,
962 | 960,
963 | 961,
964 | 962,
965 | 963,
966 | 964,
967 | 965,
968 | 966,
969 | 967,
970 | 968,
971 | 969,
972 | 970,
973 | 971,
974 | 972,
975 | 973,
976 | 974,
977 | 975,
978 | 976,
979 | 977,
980 | 978,
981 | 979,
982 | 980,
983 | 981,
984 | 982,
985 | 983,
986 | 984,
987 | 985,
988 | 986,
989 | 987,
990 | 988,
991 | 989,
992 | 990,
993 | 991,
994 | 992,
995 | 993,
996 | 994,
997 | 995,
998 | 996,
999 | 997,
1000 | 998,
1001 | 999,
1002 | 1000,
1003 | 1001,
1004 | 1002,
1005 | 1003,
1006 | 1004,
1007 | 1005,
1008 | 1006,
1009 | 1007,
1010 | 1008,
1011 | 1009,
1012 | 1010,
1013 | 1011,
1014 | 1012,
1015 | 1013,
1016 | 1014,
1017 | 1015,
1018 | 1016,
1019 | 1017,
1020 | 1018,
1021 | 1019,
1022 | 1020,
1023 | 1021,
1024 | 1022,
1025 | 1023,
1026 | 1024,
1027 | 1025,
1028 | 1026,
1029 | 1027,
1030 | 1028,
1031 | 1029,
1032 | 1030,
1033 | 1031,
1034 | 1032,
1035 | 1033,
1036 | 1034,
1037 | 1035,
1038 | 1036,
1039 | 1037,
1040 | 1038,
1041 | 1039,
1042 | 1040,
1043 | 1041,
1044 | 1042,
1045 | 1043,
1046 | 1044,
1047 | 1045,
1048 | 1046,
1049 | 1047,
1050 | 1048,
1051 | 1049,
1052 | 1050,
1053 | 1051,
1054 | 1052,
1055 | 1053,
1056 | 1054,
1057 | 1055,
1058 | 1056,
1059 | 1057,
1060 | 1058,
1061 | 1059,
1062 | 1060,
1063 | 1061,
1064 | 1062,
1065 | 1063,
1066 | 1064,
1067 | 1065,
1068 | 1066,
1069 | 1067,
1070 | 1068,
1071 | 1069,
1072 | 1070,
1073 | 1071,
1074 | 1072,
1075 | 1073,
1076 | 1074,
1077 | 1075,
1078 | 1076,
1079 | 1077,
1080 | 1078,
1081 | 1079,
1082 | 1080,
1083 | 1081,
1084 | 1082,
1085 | 1083,
1086 | 1084,
1087 | 1085,
1088 | 1086,
1089 | 1087,
1090 | 1088,
1091 | 1089,
1092 | 1090,
1093 | 1091,
1094 | 1092,
1095 | 1093,
1096 | 1094,
1097 | 1095,
1098 | 1096,
1099 | 1097,
1100 | 1098,
1101 | 1099,
1102 | 1100,
1103 | 1101,
1104 | 1102,
1105 | 1103,
1106 | 1104,
1107 | 1105,
1108 | 1106,
1109 | 1107,
1110 | 1108,
1111 | 1109,
1112 | 1110,
1113 | 1111,
1114 | 1112,
1115 | 1113,
1116 | 1114,
1117 | 1115,
1118 | 1116,
1119 | 1117,
1120 | 1118,
1121 | 1119,
1122 | 1120,
1123 | 1121,
1124 | 1122,
1125 | 1123,
1126 | 1124,
1127 | 1125,
1128 | 1126,
1129 | 1127,
1130 | 1128,
1131 | 1129,
1132 | 1130,
1133 | 1131,
1134 | 1132,
1135 | 1133,
1136 | 1134,
1137 | 1135,
1138 | 1136,
1139 | 1137,
1140 | 1138,
1141 | 1139,
1142 | 1140,
1143 | 1141,
1144 | 1142,
1145 | 1143,
1146 | 1144,
1147 | 1145,
1148 | 1146,
1149 | 1147,
1150 | 1148,
1151 | 1149,
1152 | 1150,
1153 | 1151,
1154 | 1152,
1155 | 1153,
1156 | 1154,
1157 | 1155,
1158 | 1156,
1159 | 1157,
1160 | 1158,
1161 | 1159,
1162 | 1160,
1163 | 1161,
1164 | 1162,
1165 | 1163,
1166 | 1164,
1167 | 1165,
1168 | 1166,
1169 | 1167,
1170 | 1168,
1171 | 1169,
1172 | 1170,
1173 | 1171,
1174 | 1172,
1175 | 1173,
1176 | 1174,
1177 | 1175,
1178 | 1176,
1179 | 1177,
1180 | 1178,
1181 | 1179,
1182 | 1180,
1183 | 1181,
1184 | 1182,
1185 | 1183,
1186 | 1184,
1187 | 1185,
1188 | 1186,
1189 | 1187,
1190 | 1188,
1191 | 1189,
1192 | 1190,
1193 | 1191,
1194 | 1192,
1195 | 1193,
1196 | 1194,
1197 | 1195,
1198 | 1196,
1199 | 1197,
1200 | 1198,
1201 | 1199,
1202 | 1200,
1203 | 1201,
1204 | 1202,
1205 | 1203,
1206 | 1204,
1207 | 1205,
1208 | 1206,
1209 | 1207,
1210 | 1208,
1211 | 1209,
1212 | 1210,
1213 | 1211,
1214 | 1212,
1215 | 1213,
1216 | 1214,
1217 | 1215,
1218 | 1216,
1219 | 1217,
1220 | 1218,
1221 | 1219,
1222 | 1220,
1223 | 1221,
1224 | 1222,
1225 | 1223,
1226 | 1224,
1227 | 1225,
1228 | 1226,
1229 | 1227,
1230 | 1228,
1231 | 1229,
1232 | 1230,
1233 | 1231,
1234 | 1232,
1235 | 1233,
1236 | 1234,
1237 | 1235,
1238 | 1236,
1239 | 1237,
1240 | 1238,
1241 | 1239,
1242 | 1240,
1243 | 1241,
1244 | 1242,
1245 | 1243,
1246 | 1244,
1247 | 1245,
1248 | 1246,
1249 | 1247,
1250 | 1248,
1251 | 1249,
1252 | 1250,
1253 | 1251,
1254 | 1252,
1255 | 1253,
1256 | 1254,
1257 | 1255,
1258 | 1256,
1259 | 1257,
1260 | 1258,
1261 | 1259,
1262 | 1260,
1263 | 1261,
1264 | 1262,
1265 | 1263,
1266 | 1264,
1267 | 1265,
1268 | 1266,
1269 | 1267,
1270 | 1268,
1271 | 1269,
1272 | 1270,
1273 | 1271,
1274 | 1272,
1275 | 1273,
1276 | 1274,
1277 | 1275,
1278 | 1276,
1279 | 1277,
1280 | 1278,
1281 | 1279,
1282 | 1280,
1283 | 1281,
1284 | 1282,
1285 | 1283,
1286 | 1284,
1287 | 1285,
1288 | 1286,
1289 | 1287,
1290 | 1288,
1291 | 1289,
1292 | 1290,
1293 | 1291,
1294 | 1292,
1295 | 1293,
1296 | 1294,
1297 | 1295,
1298 | 1296,
1299 | 1297,
1300 | 1298,
1301 | 1299,
1302 | 1300,
1303 | 1301,
1304 | 1302,
1305 | 1303,
1306 | 1304,
1307 | 1305,
1308 | 1306,
1309 | 1307,
1310 | 1308,
1311 | 1309,
1312 | 1310,
1313 | 1311,
1314 | 1312,
1315 | 1313,
1316 | 1314,
1317 | 1315,
1318 | 1316,
1319 | 1317,
1320 | 1318,
1321 | 1319,
1322 | 1320,
1323 | 1321,
1324 | 1322,
1325 | 1323,
1326 | 1324,
1327 | 1325,
1328 | 1326,
1329 | 1327,
1330 | 1328,
1331 | 1329,
1332 | 1330,
1333 | 1331,
1334 | 1332,
1335 | 1333,
1336 | 1334,
1337 | 1335,
1338 | 1336,
1339 | 1337,
1340 | 1338,
1341 | 1339,
1342 | 1340,
1343 | 1341,
1344 | 1342,
1345 | 1343,
1346 | 1344,
1347 | 1345,
1348 | 1346,
1349 | 1347,
1350 | 1348,
1351 | 1349,
1352 | 1350,
1353 | 1351,
1354 | 1352,
1355 | 1353,
1356 | 1354,
1357 | 1355,
1358 | 1356,
1359 | 1357,
1360 | 1358,
1361 | 1359,
1362 | 1360,
1363 | 1361,
1364 | 1362,
1365 | 1363,
1366 | 1364,
1367 | 1365,
1368 | 1366,
1369 | 1367,
1370 | 1368,
1371 | 1369,
1372 | 1370,
1373 | 1371,
1374 | 1372,
1375 | 1373,
1376 | 1374,
1377 | 1375,
1378 | 1376,
1379 | 1377,
1380 | 1378,
1381 | 1379,
1382 | 1380,
1383 | 1381,
1384 | 1382,
1385 | 1383,
1386 | 1384,
1387 | 1385,
1388 | 1386,
1389 | 1387,
1390 | 1388,
1391 | 1389,
1392 | 1390,
1393 | 1391,
1394 | 1392,
1395 | 1393,
1396 | 1394,
1397 | 1395,
1398 | 1396,
1399 | 1397,
1400 | 1398,
1401 | 1399,
1402 | 1400,
1403 | 1401,
1404 | 1402,
1405 | 1403,
1406 | 1404,
1407 | 1405,
1408 | 1406,
1409 | 1407,
1410 | 1408,
1411 | 1409,
1412 | 1410,
1413 | 1411,
1414 | 1412,
1415 | 1413,
1416 | 1414,
1417 | 1415,
1418 | 1416,
1419 | 1417,
1420 | 1418,
1421 | 1419,
1422 | 1420,
1423 | 1421,
1424 | 1422,
1425 | 1423,
1426 | 1424,
1427 | 1425,
1428 | 1426,
1429 | 1427,
1430 | 1428,
1431 | 1429,
1432 | 1430,
1433 | 1431,
1434 | 1432,
1435 | 1433,
1436 | 1434,
1437 | 1435,
1438 | 1436,
1439 | 1437,
1440 | 1438,
1441 | 1439,
1442 | 1440,
1443 | 1441,
1444 | 1442,
1445 | 1443,
1446 | 1444,
1447 | 1445,
1448 | 1446,
1449 | 1447,
1450 | 1448,
1451 | 1449,
1452 | 1450,
1453 | 1451,
1454 | 1452,
1455 | 1453,
1456 | 1454,
1457 | 1455,
1458 | 1456,
1459 | 1457,
1460 | 1458,
1461 | 1459,
1462 | 1460,
1463 | 1461,
1464 | 1462,
1465 | 1463,
1466 | 1464,
1467 | 1465,
1468 | 1466,
1469 | 1467,
1470 | 1468,
1471 | 1469,
1472 | 1470,
1473 | 1471,
1474 | 1472,
1475 | 1473,
1476 | 1474,
1477 | 1475,
1478 | 1476,
1479 | 1477,
1480 | 1478,
1481 | 1479,
1482 | 1480,
1483 | 1481,
1484 | 1482,
1485 | 1483,
1486 | 1484,
1487 | 1485,
1488 | 1486,
1489 | 1487,
1490 | 1488,
1491 | 1489,
1492 | 1490,
1493 | 1491,
1494 | 1492,
1495 | 1493,
1496 | 1494,
1497 | 1495,
1498 | 1496,
1499 | 1497,
1500 | 1498,
1501 | 1499,
1502 | 1500,
1503 | 1501,
1504 | 1502,
1505 | 1503,
1506 | 1504,
1507 | 1505,
1508 | 1506,
1509 | 1507,
1510 | 1508,
1511 | 1509,
1512 | 1510,
1513 | 1511,
1514 | 1512,
1515 | 1513,
1516 | 1514,
1517 | 1515,
1518 | 1516,
1519 | 1517,
1520 | 1518,
1521 | 1519,
1522 | 1520,
1523 | 1521,
1524 | 1522,
1525 | 1523,
1526 | 1524,
1527 | 1525,
1528 | 1526,
1529 | 1527,
1530 | 1528,
1531 | 1529,
1532 | 1530,
1533 | 1531,
1534 | 1532,
1535 | 1533,
1536 | 1534,
1537 | 1535,
1538 | 1536,
1539 | 1537,
1540 | 1538,
1541 | 1539,
1542 | 1540,
1543 | 1541,
1544 | 1542,
1545 | 1543,
1546 | 1544,
1547 | 1545,
1548 | 1546,
1549 | 1547,
1550 | 1548,
1551 | 1549,
1552 | 1550,
1553 | 1551,
1554 | 1552,
1555 | 1553,
1556 | 1554,
1557 | 1555,
1558 | 1556,
1559 | 1557,
1560 | 1558,
1561 | 1559,
1562 | 1560,
1563 | 1561,
1564 | 1562,
1565 | 1563,
1566 | 1564,
1567 | 1565,
1568 | 1566,
1569 | 1567,
1570 | 1568,
1571 | 1569,
1572 | 1570,
1573 | 1571,
1574 | 1572,
1575 | 1573,
1576 | 1574,
1577 | 1575,
1578 | 1576,
1579 | 1577,
1580 | 1578,
1581 | 1579,
1582 | 1580,
1583 | 1581,
1584 | 1582,
1585 | 1583,
1586 | 1584,
1587 | 1585,
1588 | 1586,
1589 | 1587,
1590 | 1588,
1591 | 1589,
1592 | 1590,
1593 | 1591,
1594 | 1592,
1595 | 1593,
1596 | 1594,
1597 | 1595,
1598 | 1596,
1599 | 1597,
1600 | 1598,
1601 | 1599,
1602 | 1600,
1603 | 1601,
1604 | 1602,
1605 | 1603,
1606 | 1604,
1607 | 1605,
1608 | 1606,
1609 | 1607,
1610 | 1608,
1611 | 1609,
1612 | 1610,
1613 | 1611,
1614 | 1612,
1615 | 1613,
1616 | 1614,
1617 | 1615,
1618 | 1616,
1619 | 1617,
1620 | 1618,
1621 | 1619,
1622 | 1620,
1623 | 1621,
1624 | 1622,
1625 | 1623,
1626 | 1624,
1627 | 1625,
1628 | 1626,
1629 | 1627,
1630 | 1628,
1631 | 1629,
1632 | 1630,
1633 | 1631,
1634 | 1632,
1635 | 1633,
1636 | 1634,
1637 | 1635,
1638 | 1636,
1639 | 1637,
1640 | 1638,
1641 | 1639,
1642 | 1640,
1643 | 1641,
1644 | 1642,
1645 | 1643,
1646 | 1644,
1647 | 1645,
1648 | 1646,
1649 | 1647,
1650 | 1648,
1651 | 1649,
1652 | 1650,
1653 | 1651,
1654 | 1652,
1655 | 1653,
1656 | 1654,
1657 | 1655,
1658 | 1656,
1659 | 1657,
1660 | 1658,
1661 | 1659,
1662 | 1660,
1663 | 1661,
1664 | 1662,
1665 | 1663,
1666 | 1664,
1667 | 1665,
1668 | 1666,
1669 | 1667,
1670 | 1668,
1671 | 1669,
1672 | 1670,
1673 | 1671,
1674 | 1672,
1675 | 1673,
1676 | 1674,
1677 | 1675,
1678 | 1676,
1679 | 1677,
1680 | 1678,
1681 | 1679,
1682 | 1680,
1683 | 1681,
1684 | 1682,
1685 | 1683,
1686 | 1684,
1687 | 1685,
1688 | 1686,
1689 | 1687,
1690 | 1688,
1691 | 1689,
1692 | 1690,
1693 | 1691,
1694 | 1692,
1695 | 1693,
1696 | 1694,
1697 | 1695,
1698 | 1696,
1699 | 1697,
1700 | 1698,
1701 | 1699,
1702 | 1700,
1703 | 1701,
1704 | 1702,
1705 | 1703,
1706 | 1704,
1707 | 1705,
1708 | 1706,
1709 | 1707,
1710 | 1708,
1711 | 1709,
1712 | 1710,
1713 | 1711,
1714 | 1712,
1715 | 1713,
1716 | 1714,
1717 | 1715,
1718 | 1716,
1719 | 1717,
1720 | 1718,
1721 | 1719,
1722 | 1720,
1723 | 1721,
1724 | 1722,
1725 | 1723,
1726 | 1724,
1727 | 1725,
1728 | 1726,
1729 | 1727,
1730 | 1728,
1731 | 1729,
1732 | 1730,
1733 | 1731,
1734 | 1732,
1735 | 1733,
1736 | 1734,
1737 | 1735,
1738 | 1736,
1739 | 1737,
1740 | 1738,
1741 | 1739,
1742 | 1740,
1743 | 1741,
1744 | 1742,
1745 | 1743,
1746 | 1744,
1747 | 1745,
1748 | 1746,
1749 | 1747,
1750 | 1748,
1751 | 1749,
1752 | 1750,
1753 | 1751,
1754 | 1752,
1755 | 1753,
1756 | 1754,
1757 | 1755,
1758 | 1756,
1759 | 1757,
1760 | 1758,
1761 | 1759,
1762 | 1760,
1763 | 1761,
1764 | 1762,
1765 | 1763,
1766 | 1764,
1767 | 1765,
1768 | 1766,
1769 | 1767,
1770 | 1768,
1771 | 1769,
1772 | 1770,
1773 | 1771,
1774 | 1772,
1775 | 1773,
1776 | 1774,
1777 | 1775,
1778 | 1776,
1779 | 1777,
1780 | 1778,
1781 | 1779,
1782 | 1780,
1783 | 1781,
1784 | 1782,
1785 | 1783,
1786 | 1784,
1787 | 1785,
1788 | 1786,
1789 | 1787,
1790 | 1788,
1791 | 1789,
1792 | 1790,
1793 | 1791,
1794 | 1792,
1795 | 1793,
1796 | 1794,
1797 | 1795,
1798 | 1796,
1799 | 1797,
1800 | 1798,
1801 | 1799,
1802 | 1800,
1803 | 1801,
1804 | 1802,
1805 | 1803,
1806 | 1804,
1807 | 1805,
1808 | 1806,
1809 | 1807,
1810 | 1808,
1811 | 1809,
1812 | 1810,
1813 | 1811,
1814 | 1812,
1815 | 1813,
1816 | 1814,
1817 | 1815,
1818 | 1816,
1819 | 1817,
1820 | 1818,
1821 | 1819,
1822 | 1820,
1823 | 1821,
1824 | 1822,
1825 | 1823,
1826 | 1824,
1827 | 1825,
1828 | 1826,
1829 | 1827,
1830 | 1828,
1831 | 1829,
1832 | 1830,
1833 | 1831,
1834 | 1832,
1835 | 1833,
1836 | 1834,
1837 | 1835,
1838 | 1836,
1839 | 1837,
1840 | 1838,
1841 | 1839,
1842 | 1840,
1843 | 1841,
1844 | 1842,
1845 | 1843,
1846 | 1844,
1847 | 1845,
1848 | 1846,
1849 | 1847,
1850 | 1848,
1851 | 1849,
1852 | 1850,
1853 | 1851,
1854 | 1852,
1855 | 1853,
1856 | 1854,
1857 | 1855,
1858 | 1856,
1859 | 1857,
1860 | 1858,
1861 | 1859,
1862 | 1860,
1863 | 1861,
1864 | 1862,
1865 | 1863,
1866 | 1864,
1867 | 1865,
1868 | 1866,
1869 | 1867,
1870 | 1868,
1871 | 1869,
1872 | 1870,
1873 | 1871,
1874 | 1872,
1875 | 1873,
1876 | 1874,
1877 | 1875,
1878 | 1876,
1879 | 1877,
1880 | 1878,
1881 | 1879,
1882 | 1880,
1883 | 1881,
1884 | 1882,
1885 | 1883,
1886 | 1884,
1887 | 1885,
1888 | 1886,
1889 | 1887,
1890 | 1888,
1891 | 1889,
1892 | 1890,
1893 | 1891,
1894 | 1892,
1895 | 1893,
1896 | 1894,
1897 | 1895,
1898 | 1896,
1899 | 1897,
1900 | 1898,
1901 | 1899,
1902 | 1900,
1903 | 1901,
1904 | 1902,
1905 | 1903,
1906 | 1904,
1907 | 1905,
1908 | 1906,
1909 | 1907,
1910 | 1908,
1911 | 1909,
1912 | 1910,
1913 | 1911,
1914 | 1912,
1915 | 1913,
1916 | 1914,
1917 | 1915,
1918 | 1916,
1919 | 1917,
1920 | 1918,
1921 | 1919,
1922 | 1920,
1923 | 1921,
1924 | 1922,
1925 | 1923,
1926 | 1924,
1927 | 1925,
1928 | 1926,
1929 | 1927,
1930 | 1928,
1931 | 1929,
1932 | 1930,
1933 | 1931,
1934 | 1932,
1935 | 1933,
1936 | 1934,
1937 | 1935,
1938 | 1936,
1939 | 1937,
1940 | 1938,
1941 | 1939,
1942 | 1940,
1943 | 1941,
1944 | 1942,
1945 | 1943,
1946 | 1944,
1947 | 1945,
1948 | 1946,
1949 | 1947,
1950 | 1948,
1951 | 1949,
1952 | 1950,
1953 | 1951,
1954 | 1952,
1955 | 1953,
1956 | 1954,
1957 | 1955,
1958 | 1956,
1959 | 1957,
1960 | 1958,
1961 | 1959,
1962 | 1960,
1963 | 1961,
1964 | 1962,
1965 | 1963,
1966 | 1964,
1967 | 1965,
1968 | 1966,
1969 | 1967,
1970 | 1968,
1971 | 1969,
1972 | 1970,
1973 | 1971,
1974 | 1972,
1975 | 1973,
1976 | 1974,
1977 | 1975,
1978 | 1976,
1979 | 1977,
1980 | 1978,
1981 | 1979,
1982 | 1980,
1983 | 1981,
1984 | 1982,
1985 | 1983,
1986 | 1984,
1987 | 1985,
1988 | 1986,
1989 | 1987,
1990 | 1988,
1991 | 1989,
1992 | 1990,
1993 | 1991,
1994 | 1992,
1995 | 1993,
1996 | 1994,
1997 | 1995,
1998 | 1996,
1999 | 1997,
2000 | 1998,
2001 | 1999,
2002 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | - defaults
4 | dependencies:
5 | - pip
6 | - ipywidgets
7 | - hyperopt
8 | - kaggle
9 | - plotly
10 | - scikit-learn
11 | - scipy
12 | - numpy
13 | - python
14 | - py-xgboost
15 | - jupyterlab
16 | - nb_conda
17 | - bokeh
18 | - pandas
19 | - holoviews
20 | - seaborn
21 | - notebook<7.0.0
22 | - pymatviz
--------------------------------------------------------------------------------
/molsim_ml.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ML for Gas Adsorption"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## -1. Only if you run this notebook on Colab"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "If you use this notebook on Colab, please uncomment the lines below (remove the `#`) and execute the cell."
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "#import sys\n",
31 | "#!{sys.executable} -m pip install -U pandas-profiling[notebook]\n",
32 | "#!jupyter nbextension enable --py widgetsnbextension\n",
33 | "#!pip install --upgrade pandas sklearn holoviews bokeh plotly matplotlib\n",
34 | "#!wget https://raw.githubusercontent.com/kjappelbaum/ml_molsim/2022/descriptornames.py\n",
35 | "#!mkdir data\n",
36 | "#!cd data && wget https://github.com/kjappelbaum/ml_molsim/raw/2022/data/data.csv\n",
37 | "#!cd data && wget https://github.com/kjappelbaum/ml_molsim/raw/2022/data/features.csv\n",
38 | "# import os, holoviews as hv\n",
39 | "# os.environ['HV_DOC_HTML'] = 'true'"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "## Import packages we will need"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 7,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "# basics\n",
56 | "import os\n",
57 | "import numpy as np\n",
58 | "import pprint as pp\n",
59 | "\n",
60 | "# pandas is used to read/process data\n",
61 | "import pandas as pd\n",
62 | "\n",
63 | "# machine learning dependencies\n",
64 | "# scaling of data\n",
65 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler\n",
66 | "# train/test split\n",
67 | "from sklearn.model_selection import train_test_split\n",
68 | "# model selection\n",
69 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n",
70 | "# the KRR model\n",
71 | "from sklearn.kernel_ridge import KernelRidge\n",
72 | "# linear model\n",
73 | "from sklearn.linear_model import LinearRegression\n",
74 | "# pipeline to streamline modeling pipelines\n",
75 | "from sklearn.pipeline import Pipeline\n",
76 | "# principal component analysis\n",
77 | "from sklearn.decomposition import PCA\n",
78 | "# polynomial kernel\n",
79 | "from sklearn.metrics.pairwise import polynomial_kernel\n",
80 | "# Dummy model as baseline\n",
81 | "from sklearn.dummy import DummyClassifier, DummyRegressor\n",
82 | "# Variance Threshold for feature selection\n",
83 | "from sklearn.feature_selection import VarianceThreshold, SelectFromModel\n",
84 | "# metrics to measure model performance\n",
85 | "from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,\n",
86 | " mean_absolute_error, mean_squared_error, max_error)\n",
87 | "\n",
88 | "# save/load models\n",
89 | "import joblib\n",
90 | "\n",
91 | "# For the permutation importance implementation\n",
92 | "from joblib import Parallel\n",
93 | "from joblib import delayed\n",
94 | "from sklearn.metrics import check_scoring\n",
95 | "from sklearn.utils import Bunch\n",
96 | "from sklearn.utils import check_random_state\n",
97 | "from sklearn.utils import check_array\n",
98 | "\n",
99 | "# plotting\n",
100 | "import matplotlib.pyplot as plt\n",
101 | "%matplotlib inline\n",
102 | "from pymatviz.parity import hist_density\n",
103 | "\n",
104 | "RANDOM_SEED = 4242424242\n",
105 | "DATA_DIR = 'data'\n",
106 | "DATA_FILE = os.path.join(DATA_DIR, 'data.csv')\n",
107 | "\n",
108 | "np.random.seed(RANDOM_SEED)\n"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "\n",
118 | "other_descriptors = [\"CellV [A^3]\"]\n",
119 | "\n",
120 | "geometric_descriptors = [\n",
121 | " \"Di\",\n",
122 | " \"Df\",\n",
123 | " \"Dif\",\n",
124 | " \"density [g/cm^3]\",\n",
125 | " \"total_SA_volumetric\",\n",
126 | " \"total_SA_gravimetric\",\n",
127 | " \"total_POV_volumetric\",\n",
128 | " \"total_POV_gravimetric\",\n",
129 | "]\n",
130 | "\n",
131 | "linker_descriptors = [\n",
132 | " \"f-lig-chi-0\",\n",
133 | " \"f-lig-chi-1\",\n",
134 | " \"f-lig-chi-2\",\n",
135 | " \"f-lig-chi-3\",\n",
136 | " \"f-lig-Z-0\",\n",
137 | " \"f-lig-Z-1\",\n",
138 | " \"f-lig-Z-2\",\n",
139 | " \"f-lig-Z-3\",\n",
140 | " \"f-lig-I-0\",\n",
141 | " \"f-lig-I-1\",\n",
142 | " \"f-lig-I-2\",\n",
143 | " \"f-lig-I-3\",\n",
144 | " \"f-lig-T-0\",\n",
145 | " \"f-lig-T-1\",\n",
146 | " \"f-lig-T-2\",\n",
147 | " \"f-lig-T-3\",\n",
148 | " \"f-lig-S-0\",\n",
149 | " \"f-lig-S-1\",\n",
150 | " \"f-lig-S-2\",\n",
151 | " \"f-lig-S-3\",\n",
152 | " \"lc-chi-0-all\",\n",
153 | " \"lc-chi-1-all\",\n",
154 | " \"lc-chi-2-all\",\n",
155 | " \"lc-chi-3-all\",\n",
156 | " \"lc-Z-0-all\",\n",
157 | " \"lc-Z-1-all\",\n",
158 | " \"lc-Z-2-all\",\n",
159 | " \"lc-Z-3-all\",\n",
160 | " \"lc-I-0-all\",\n",
161 | " \"lc-I-1-all\",\n",
162 | " \"lc-I-2-all\",\n",
163 | " \"lc-I-3-all\",\n",
164 | " \"lc-T-0-all\",\n",
165 | " \"lc-T-1-all\",\n",
166 | " \"lc-T-2-all\",\n",
167 | " \"lc-T-3-all\",\n",
168 | " \"lc-S-0-all\",\n",
169 | " \"lc-S-1-all\",\n",
170 | " \"lc-S-2-all\",\n",
171 | " \"lc-S-3-all\",\n",
172 | " \"lc-alpha-0-all\",\n",
173 | " \"lc-alpha-1-all\",\n",
174 | " \"lc-alpha-2-all\",\n",
175 | " \"lc-alpha-3-all\",\n",
176 | " \"D_lc-chi-0-all\",\n",
177 | " \"D_lc-chi-1-all\",\n",
178 | " \"D_lc-chi-2-all\",\n",
179 | " \"D_lc-chi-3-all\",\n",
180 | " \"D_lc-Z-0-all\",\n",
181 | " \"D_lc-Z-1-all\",\n",
182 | " \"D_lc-Z-2-all\",\n",
183 | " \"D_lc-Z-3-all\",\n",
184 | " \"D_lc-I-0-all\",\n",
185 | " \"D_lc-I-1-all\",\n",
186 | " \"D_lc-I-2-all\",\n",
187 | " \"D_lc-I-3-all\",\n",
188 | " \"D_lc-T-0-all\",\n",
189 | " \"D_lc-T-1-all\",\n",
190 | " \"D_lc-T-2-all\",\n",
191 | " \"D_lc-T-3-all\",\n",
192 | " \"D_lc-S-0-all\",\n",
193 | " \"D_lc-S-1-all\",\n",
194 | " \"D_lc-S-2-all\",\n",
195 | " \"D_lc-S-3-all\",\n",
196 | " \"D_lc-alpha-0-all\",\n",
197 | " \"D_lc-alpha-1-all\",\n",
198 | " \"D_lc-alpha-2-all\",\n",
199 | " \"D_lc-alpha-3-all\",\n",
200 | "]\n",
201 | "\n",
202 | "metalcenter_descriptors = [\n",
203 | " \"mc_CRY-chi-0-all\",\n",
204 | " \"mc_CRY-chi-1-all\",\n",
205 | " \"mc_CRY-chi-2-all\",\n",
206 | " \"mc_CRY-chi-3-all\",\n",
207 | " \"mc_CRY-Z-0-all\",\n",
208 | " \"mc_CRY-Z-1-all\",\n",
209 | " \"mc_CRY-Z-2-all\",\n",
210 | " \"mc_CRY-Z-3-all\",\n",
211 | " \"mc_CRY-I-0-all\",\n",
212 | " \"mc_CRY-I-1-all\",\n",
213 | " \"mc_CRY-I-2-all\",\n",
214 | " \"mc_CRY-I-3-all\",\n",
215 | " \"mc_CRY-T-0-all\",\n",
216 | " \"mc_CRY-T-1-all\",\n",
217 | " \"mc_CRY-T-2-all\",\n",
218 | " \"mc_CRY-T-3-all\",\n",
219 | " \"mc_CRY-S-0-all\",\n",
220 | " \"mc_CRY-S-1-all\",\n",
221 | " \"mc_CRY-S-2-all\",\n",
222 | " \"mc_CRY-S-3-all\",\n",
223 | " \"D_mc_CRY-chi-0-all\",\n",
224 | " \"D_mc_CRY-chi-1-all\",\n",
225 | " \"D_mc_CRY-chi-2-all\",\n",
226 | " \"D_mc_CRY-chi-3-all\",\n",
227 | " \"D_mc_CRY-Z-0-all\",\n",
228 | " \"D_mc_CRY-Z-1-all\",\n",
229 | " \"D_mc_CRY-Z-2-all\",\n",
230 | " \"D_mc_CRY-Z-3-all\",\n",
231 | " \"D_mc_CRY-I-0-all\",\n",
232 | " \"D_mc_CRY-I-1-all\",\n",
233 | " \"D_mc_CRY-I-2-all\",\n",
234 | " \"D_mc_CRY-I-3-all\",\n",
235 | " \"D_mc_CRY-T-0-all\",\n",
236 | " \"D_mc_CRY-T-1-all\",\n",
237 | " \"D_mc_CRY-T-2-all\",\n",
238 | " \"D_mc_CRY-T-3-all\",\n",
239 | " \"D_mc_CRY-S-0-all\",\n",
240 | " \"D_mc_CRY-S-1-all\",\n",
241 | " \"D_mc_CRY-S-2-all\",\n",
242 | " \"D_mc_CRY-S-3-all\",\n",
243 | "]\n",
244 | "\n",
245 | "functionalgroup_descriptors = [\n",
246 | " \"func-chi-0-all\",\n",
247 | " \"func-chi-1-all\",\n",
248 | " \"func-chi-2-all\",\n",
249 | " \"func-chi-3-all\",\n",
250 | " \"func-Z-0-all\",\n",
251 | " \"func-Z-1-all\",\n",
252 | " \"func-Z-2-all\",\n",
253 | " \"func-Z-3-all\",\n",
254 | " \"func-I-0-all\",\n",
255 | " \"func-I-1-all\",\n",
256 | " \"func-I-2-all\",\n",
257 | " \"func-I-3-all\",\n",
258 | " \"func-T-0-all\",\n",
259 | " \"func-T-1-all\",\n",
260 | " \"func-T-2-all\",\n",
261 | " \"func-T-3-all\",\n",
262 | " \"func-S-0-all\",\n",
263 | " \"func-S-1-all\",\n",
264 | " \"func-S-2-all\",\n",
265 | " \"func-S-3-all\",\n",
266 | " \"func-alpha-0-all\",\n",
267 | " \"func-alpha-1-all\",\n",
268 | " \"func-alpha-2-all\",\n",
269 | " \"func-alpha-3-all\",\n",
270 | " \"D_func-chi-0-all\",\n",
271 | " \"D_func-chi-1-all\",\n",
272 | " \"D_func-chi-2-all\",\n",
273 | " \"D_func-chi-3-all\",\n",
274 | " \"D_func-Z-0-all\",\n",
275 | " \"D_func-Z-1-all\",\n",
276 | " \"D_func-Z-2-all\",\n",
277 | " \"D_func-Z-3-all\",\n",
278 | " \"D_func-I-0-all\",\n",
279 | " \"D_func-I-1-all\",\n",
280 | " \"D_func-I-2-all\",\n",
281 | " \"D_func-I-3-all\",\n",
282 | " \"D_func-T-0-all\",\n",
283 | " \"D_func-T-1-all\",\n",
284 | " \"D_func-T-2-all\",\n",
285 | " \"D_func-T-3-all\",\n",
286 | " \"D_func-S-0-all\",\n",
287 | " \"D_func-S-1-all\",\n",
288 | " \"D_func-S-2-all\",\n",
289 | " \"D_func-S-3-all\",\n",
290 | " \"D_func-alpha-0-all\",\n",
291 | " \"D_func-alpha-1-all\",\n",
292 | " \"D_func-alpha-2-all\",\n",
293 | " \"D_func-alpha-3-all\",\n",
294 | "]\n",
295 | "\n",
296 | "\n",
297 | "summed_linker_descriptors = [\n",
298 | " \"sum-f-lig-chi-0\",\n",
299 | " \"sum-f-lig-chi-1\",\n",
300 | " \"sum-f-lig-chi-2\",\n",
301 | " \"sum-f-lig-chi-3\",\n",
302 | " \"sum-f-lig-Z-0\",\n",
303 | " \"sum-f-lig-Z-1\",\n",
304 | " \"sum-f-lig-Z-2\",\n",
305 | " \"sum-f-lig-Z-3\",\n",
306 | " \"sum-f-lig-I-0\",\n",
307 | " \"sum-f-lig-I-1\",\n",
308 | " \"sum-f-lig-I-2\",\n",
309 | " \"sum-f-lig-I-3\",\n",
310 | " \"sum-f-lig-T-0\",\n",
311 | " \"sum-f-lig-T-1\",\n",
312 | " \"sum-f-lig-T-2\",\n",
313 | " \"sum-f-lig-T-3\",\n",
314 | " \"sum-f-lig-S-0\",\n",
315 | " \"sum-f-lig-S-1\",\n",
316 | " \"sum-f-lig-S-2\",\n",
317 | " \"sum-f-lig-S-3\",\n",
318 | " \"sum-lc-chi-0-all\",\n",
319 | " \"sum-lc-chi-1-all\",\n",
320 | " \"sum-lc-chi-2-all\",\n",
321 | " \"sum-lc-chi-3-all\",\n",
322 | " \"sum-lc-Z-0-all\",\n",
323 | " \"sum-lc-Z-1-all\",\n",
324 | " \"sum-lc-Z-2-all\",\n",
325 | " \"sum-lc-Z-3-all\",\n",
326 | " \"sum-lc-I-0-all\",\n",
327 | " \"sum-lc-I-1-all\",\n",
328 | " \"sum-lc-I-2-all\",\n",
329 | " \"sum-lc-I-3-all\",\n",
330 | " \"sum-lc-T-0-all\",\n",
331 | " \"sum-lc-T-1-all\",\n",
332 | " \"sum-lc-T-2-all\",\n",
333 | " \"sum-lc-T-3-all\",\n",
334 | " \"sum-lc-S-0-all\",\n",
335 | " \"sum-lc-S-1-all\",\n",
336 | " \"sum-lc-S-2-all\",\n",
337 | " \"sum-lc-S-3-all\",\n",
338 | " \"sum-lc-alpha-0-all\",\n",
339 | " \"sum-lc-alpha-1-all\",\n",
340 | " \"sum-lc-alpha-2-all\",\n",
341 | " \"sum-lc-alpha-3-all\",\n",
342 | " \"sum-D_lc-chi-0-all\",\n",
343 | " \"sum-D_lc-chi-1-all\",\n",
344 | " \"sum-D_lc-chi-2-all\",\n",
345 | " \"sum-D_lc-chi-3-all\",\n",
346 | " \"sum-D_lc-Z-0-all\",\n",
347 | " \"sum-D_lc-Z-1-all\",\n",
348 | " \"sum-D_lc-Z-2-all\",\n",
349 | " \"sum-D_lc-Z-3-all\",\n",
350 | " \"sum-D_lc-I-0-all\",\n",
351 | " \"sum-D_lc-I-1-all\",\n",
352 | " \"sum-D_lc-I-2-all\",\n",
353 | " \"sum-D_lc-I-3-all\",\n",
354 | " \"sum-D_lc-T-0-all\",\n",
355 | " \"sum-D_lc-T-1-all\",\n",
356 | " \"sum-D_lc-T-2-all\",\n",
357 | " \"sum-D_lc-T-3-all\",\n",
358 | " \"sum-D_lc-S-0-all\",\n",
359 | " \"sum-D_lc-S-1-all\",\n",
360 | " \"sum-D_lc-S-2-all\",\n",
361 | " \"sum-D_lc-S-3-all\",\n",
362 | " \"sum-D_lc-alpha-0-all\",\n",
363 | " \"sum-D_lc-alpha-1-all\",\n",
364 | " \"sum-D_lc-alpha-2-all\",\n",
365 | " \"sum-D_lc-alpha-3-all\",\n",
366 | "]\n",
367 | "\n",
368 | "summed_metalcenter_descriptors = [\n",
369 | " \"sum-mc_CRY-chi-0-all\",\n",
370 | " \"sum-mc_CRY-chi-1-all\",\n",
371 | " \"sum-mc_CRY-chi-2-all\",\n",
372 | " \"sum-mc_CRY-chi-3-all\",\n",
373 | " \"sum-mc_CRY-Z-0-all\",\n",
374 | " \"sum-mc_CRY-Z-1-all\",\n",
375 | " \"sum-mc_CRY-Z-2-all\",\n",
376 | " \"sum-mc_CRY-Z-3-all\",\n",
377 | " \"sum-mc_CRY-I-0-all\",\n",
378 | " \"sum-mc_CRY-I-1-all\",\n",
379 | " \"sum-mc_CRY-I-2-all\",\n",
380 | " \"sum-mc_CRY-I-3-all\",\n",
381 | " \"sum-mc_CRY-T-0-all\",\n",
382 | " \"sum-mc_CRY-T-1-all\",\n",
383 | " \"sum-mc_CRY-T-2-all\",\n",
384 | " \"sum-mc_CRY-T-3-all\",\n",
385 | " \"sum-mc_CRY-S-0-all\",\n",
386 | " \"sum-mc_CRY-S-1-all\",\n",
387 | " \"sum-mc_CRY-S-2-all\",\n",
388 | " \"sum-mc_CRY-S-3-all\",\n",
389 | " \"sum-D_mc_CRY-chi-0-all\",\n",
390 | " \"sum-D_mc_CRY-chi-1-all\",\n",
391 | " \"sum-D_mc_CRY-chi-2-all\",\n",
392 | " \"sum-D_mc_CRY-chi-3-all\",\n",
393 | " \"sum-D_mc_CRY-Z-0-all\",\n",
394 | " \"sum-D_mc_CRY-Z-1-all\",\n",
395 | " \"sum-D_mc_CRY-Z-2-all\",\n",
396 | " \"sum-D_mc_CRY-Z-3-all\",\n",
397 | " \"sum-D_mc_CRY-I-0-all\",\n",
398 | " \"sum-D_mc_CRY-I-1-all\",\n",
399 | " \"sum-D_mc_CRY-I-2-all\",\n",
400 | " \"sum-D_mc_CRY-I-3-all\",\n",
401 | " \"sum-D_mc_CRY-T-0-all\",\n",
402 | " \"sum-D_mc_CRY-T-1-all\",\n",
403 | " \"sum-D_mc_CRY-T-2-all\",\n",
404 | " \"sum-D_mc_CRY-T-3-all\",\n",
405 | " \"sum-D_mc_CRY-S-0-all\",\n",
406 | " \"sum-D_mc_CRY-S-1-all\",\n",
407 | " \"sum-D_mc_CRY-S-2-all\",\n",
408 | " \"sum-D_mc_CRY-S-3-all\",\n",
409 | "]\n",
410 | "\n",
411 | "summed_functionalgroup_descriptors = [\n",
412 | " \"sum-func-chi-0-all\",\n",
413 | " \"sum-func-chi-1-all\",\n",
414 | " \"sum-func-chi-2-all\",\n",
415 | " \"sum-func-chi-3-all\",\n",
416 | " \"sum-func-Z-0-all\",\n",
417 | " \"sum-func-Z-1-all\",\n",
418 | " \"sum-func-Z-2-all\",\n",
419 | " \"sum-func-Z-3-all\",\n",
420 | " \"sum-func-I-0-all\",\n",
421 | " \"sum-func-I-1-all\",\n",
422 | " \"sum-func-I-2-all\",\n",
423 | " \"sum-func-I-3-all\",\n",
424 | " \"sum-func-T-0-all\",\n",
425 | " \"sum-func-T-1-all\",\n",
426 | " \"sum-func-T-2-all\",\n",
427 | " \"sum-func-T-3-all\",\n",
428 | " \"sum-func-S-0-all\",\n",
429 | " \"sum-func-S-1-all\",\n",
430 | " \"sum-func-S-2-all\",\n",
431 | " \"sum-func-S-3-all\",\n",
432 | " \"sum-func-alpha-0-all\",\n",
433 | " \"sum-func-alpha-1-all\",\n",
434 | " \"sum-func-alpha-2-all\",\n",
435 | " \"sum-func-alpha-3-all\",\n",
436 | " \"sum-D_func-chi-0-all\",\n",
437 | " \"sum-D_func-chi-1-all\",\n",
438 | " \"sum-D_func-chi-2-all\",\n",
439 | " \"sum-D_func-chi-3-all\",\n",
440 | " \"sum-D_func-Z-0-all\",\n",
441 | " \"sum-D_func-Z-1-all\",\n",
442 | " \"sum-D_func-Z-2-all\",\n",
443 | " \"sum-D_func-Z-3-all\",\n",
444 | " \"sum-D_func-I-0-all\",\n",
445 | " \"sum-D_func-I-1-all\",\n",
446 | " \"sum-D_func-I-2-all\",\n",
447 | " \"sum-D_func-I-3-all\",\n",
448 | " \"sum-D_func-T-0-all\",\n",
449 | " \"sum-D_func-T-1-all\",\n",
450 | " \"sum-D_func-T-2-all\",\n",
451 | " \"sum-D_func-T-3-all\",\n",
452 | " \"sum-D_func-S-0-all\",\n",
453 | " \"sum-D_func-S-1-all\",\n",
454 | " \"sum-D_func-S-2-all\",\n",
455 | " \"sum-D_func-S-3-all\",\n",
456 | " \"sum-D_func-alpha-0-all\",\n",
457 | " \"sum-D_func-alpha-1-all\",\n",
458 | " \"sum-D_func-alpha-2-all\",\n",
459 | " \"sum-D_func-alpha-3-all\",\n",
460 | "]\n"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {},
466 | "source": [
467 | " $\\color{DarkBlue}{\\textsf{Short question}}$\n",
468 | "- We declared a global variable to fix the random seed (`RANDOM_SEED`). Why did we do this? "
469 | ]
470 | },
471 | {
472 | "cell_type": "markdown",
473 | "metadata": {},
474 | "source": [
475 | "## Hands-on Project: Carbon-dioxide uptake in MOFs"
476 | ]
477 | },
478 | {
479 | "cell_type": "markdown",
480 | "metadata": {},
481 | "source": [
482 | "In this exercise we will build a model that can predict the CO$_2$ uptake of metal-organic frameworks (MOFs), which are crystalline materials consisting of inorganic metal nodes linked by organic linkers.\n",
483 | "\n",
484 | "\n",
485 | "\n",
486 | "There are two main **learning goals** for this exercise: \n",
487 | "\n",
488 | "1. Understand the typical workflow for machine learning in materials science. We will cover exploratory data analysis (EDA) and supervised learning (KRR).\n",
489 | "\n",
490 | "2. Get familiar with some Python packages that are useful for data analysis and visualization. \n",
491 | "\n",
492 | "At the end of the exercise, you will produce an interactive plot like the one below, comparing the predictions of your model against CO$_2$ computed with GCMC simulations.\n",
493 | "The histograms show the distributions of the errors on the training set (left) and on the test set (right).\n",
494 | "\n",
495 | "\n",
496 | "\n",
497 | "\n",
498 | "\n",
499 | "This exercise requires a basic knowledge of Python, e.g. that you can write list comprehensions, and are able to read documentation of functions provided by Python packages.\n",
500 | "You will be asked to provide some function arguments (indicated by `#fillme` comments).\n",
501 | "\n",
502 | "You can execute all the following code cells by pressing SHIFT and ENTER and get informations about the functions by pressing TAB when you are between the parentheses (see the notes for more tips). \n",
503 | "\n",
504 | "Also the [sklearn documentation](https://scikit-learn.org/stable/user_guide.html) is a great source of reference with many explanations and examples.\n",
505 | "\n",
506 | "In pandas dataframe (df) you can select columns using their name by running `df[columnname]`. If at any point you think that the dataset is too large for your computer, you can select a subset using `df.sample()` or by making the test set larger in the train/test split (section 2). "
507 | ]
508 | },
509 | {
510 | "cell_type": "markdown",
511 | "metadata": {},
512 | "source": [
513 | "## 1. Import the data"
514 | ]
515 | },
516 | {
517 | "cell_type": "code",
518 | "execution_count": 3,
519 | "metadata": {},
520 | "outputs": [],
521 | "source": [
522 | "df = pd.read_csv(DATA_FILE)"
523 | ]
524 | },
525 | {
526 | "cell_type": "markdown",
527 | "metadata": {},
528 | "source": [
529 | "Let's take a look at the first few rows to see if everythings seems reasonable ..."
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": 4,
535 | "metadata": {},
536 | "outputs": [
537 | {
538 | "data": {
539 | "text/html": [
540 | "
\n", 558 | " | ASA [m^2/cm^3] | \n", 559 | "CellV [A^3] | \n", 560 | "Df | \n", 561 | "Di | \n", 562 | "Dif | \n", 563 | "NASA [m^2/cm^3] | \n", 564 | "POAV [cm^3/g] | \n", 565 | "POAVF | \n", 566 | "PONAV [cm^3/g] | \n", 567 | "PONAVF | \n", 568 | "... | \n", 569 | "pure_methane_widomHOA | \n", 570 | "pure_uptake_CO2_298.00_15000 | \n", 571 | "pure_uptake_CO2_298.00_1600000 | \n", 572 | "pure_uptake_methane_298.00_580000 | \n", 573 | "pure_uptake_methane_298.00_6500000 | \n", 574 | "logKH_CO2 | \n", 575 | "logKH_CH4 | \n", 576 | "CH4DC | \n", 577 | "CH4HPSTP | \n", 578 | "CH4LPSTP | \n", 579 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 584 | "2329.01 | \n", 585 | "1251.28 | \n", 586 | "6.61256 | \n", 587 | "8.87694 | \n", 588 | "8.48668 | \n", 589 | "0.0 | \n", 590 | "0.818919 | \n", 591 | "0.68874 | \n", 592 | "0.0 | \n", 593 | "0.0 | \n", 594 | "... | \n", 595 | "-8.144317 | \n", 596 | "0.111981 | \n", 597 | "14.218595 | \n", 598 | "1.680640 | \n", 599 | "9.163066 | \n", 600 | "-5.125451 | \n", 601 | "-5.511444 | \n", 602 | "175.569974 | \n", 603 | "215.005044 | \n", 604 | "39.435070 | \n", 605 | "
1 | \n", 608 | "1983.81 | \n", 609 | "1254.01 | \n", 610 | "5.80566 | \n", 611 | "7.13426 | \n", 612 | "7.13154 | \n", 613 | "0.0 | \n", 614 | "0.495493 | \n", 615 | "0.58032 | \n", 616 | "0.0 | \n", 617 | "0.0 | \n", 618 | "... | \n", 619 | "-10.208005 | \n", 620 | "0.481625 | \n", 621 | "9.312424 | \n", 622 | "1.513152 | \n", 623 | "5.908356 | \n", 624 | "-4.502967 | \n", 625 | "-5.505947 | \n", 626 | "143.616349 | \n", 627 | "193.059644 | \n", 628 | "49.443295 | \n", 629 | "
2 | \n", 632 | "2259.13 | \n", 633 | "1250.58 | \n", 634 | "5.99131 | \n", 635 | "8.01682 | \n", 636 | "7.98933 | \n", 637 | "0.0 | \n", 638 | "0.728036 | \n", 639 | "0.65710 | \n", 640 | "0.0 | \n", 641 | "0.0 | \n", 642 | "... | \n", 643 | "-8.479801 | \n", 644 | "0.401683 | \n", 645 | "14.796071 | \n", 646 | "1.569714 | \n", 647 | "7.933198 | \n", 648 | "-4.433968 | \n", 649 | "-5.525707 | \n", 650 | "160.238808 | \n", 651 | "199.765744 | \n", 652 | "39.526937 | \n", 653 | "
3 | \n", 656 | "1424.54 | \n", 657 | "1249.27 | \n", 658 | "4.73477 | \n", 659 | "7.05822 | \n", 660 | "7.05822 | \n", 661 | "0.0 | \n", 662 | "0.453157 | \n", 663 | "0.47338 | \n", 664 | "0.0 | \n", 665 | "0.0 | \n", 666 | "... | \n", 667 | "-12.615382 | \n", 668 | "0.821747 | \n", 669 | "10.816880 | \n", 670 | "2.161833 | \n", 671 | "6.710778 | \n", 672 | "-4.135434 | \n", 673 | "-5.297082 | \n", 674 | "132.576623 | \n", 675 | "195.582107 | \n", 676 | "63.005483 | \n", 677 | "
4 | \n", 680 | "2228.31 | \n", 681 | "1250.61 | \n", 682 | "6.40783 | \n", 683 | "8.35944 | \n", 684 | "8.26946 | \n", 685 | "0.0 | \n", 686 | "0.700539 | \n", 687 | "0.65092 | \n", 688 | "0.0 | \n", 689 | "0.0 | \n", 690 | "... | \n", 691 | "-8.743404 | \n", 692 | "0.258905 | \n", 693 | "14.153999 | \n", 694 | "1.653013 | \n", 695 | "8.272621 | \n", 696 | "-4.774301 | \n", 697 | "-5.515219 | \n", 698 | "171.601539 | \n", 699 | "214.452966 | \n", 700 | "42.851427 | \n", 701 | "
5 rows × 343 columns
\n", 705 | "pd.options.display.max_columns=100
to adjust how many columns are shown.pd.options.display.max_columns=100
would show at maximum 100 columns. [1 if value > THRESHOLD else 0 for value in df[TARGET]]
df.corr(method='spearman')[TARGET]
.sort_values()
method on the output of `df.corr()` to sort by the value of the correlation coefficient scatter = hv.Scatter(df, 'Di', [TARGET, 'density [g/cm^3]']).opts(color='density [g/cm^3]', cmap='rainbow')
for plotting. Also consider the holoviews
documentation. In case holoviews
is too new for you, you can of course just use matplotlib
and something like plt.scatter(x,y)
DummyRegressor
you can for example use dummyregressor_mean = DummyRegressor(strategy='mean')
DummyRegressor
you can check out the source code on GitHub classifier.predict(X)
accuracy_score(true_values, predicted_values)
np.histogram
\n",
1389 | "hv.extension(\"bokeh\")\n",
1390 | "hex_train = hv.HexTiles(res_train, [\"y true\", \"y pred\"]).hist(\n",
1391 | " dimension=[\"y true\", \"y pred\"]\n",
1392 | ")\n",
1393 | "hex_test = hv.HexTiles(res_test, [\"y true\", \"y pred\"]).hist(\n",
1394 | " dimension=[\"y true\", \"y pred\"]\n",
1395 | ")\n",
1396 | "hex_train + hex_test\n",
1397 | "
\n",
1398 | "
fit
, predict
methods also work for pipelines np.logspace
function to generate a grid for values that you want to vary on a logarithmic scale alpha
and the Gaussian width gamma
\n",
1645 | "from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, mix, rand, anneal, space_eval\n",
1646 | "from functools import partial\n",
1647 | "
\n",
1648 | "\n",
1649 | "Define the grid\n",
1650 | "\n",
1651 | "param_hyperopt = {\n",
1652 | " \"krr__alpha\": hp.loguniform(\"krr__alpha\", np.log(0.001), np.log(10)),\n",
1653 | " \"krr__gamma\": hp.loguniform(\"krr__gamma\", np.log(0.001), np.log(10)),\n",
1654 | "}\n",
1655 | "
\n",
1656 | "\n",
1657 | "Define the objective function\n",
1658 | "\n",
1659 | "def objective_function(params):\n",
1660 | " pipe.set_params(\n",
1661 | " **{\n",
1662 | " \"krr__alpha\": params[\"krr__alpha\"],\n",
1663 | " \"krr__gamma\": params[\"krr__gamma\"],\n",
1664 | " }\n",
1665 | " )\n",
1666 | " score = cross_val_score(\n",
1667 | " pipe, X_train, y_train, cv=10, scoring=\"neg_mean_absolute_error\"\n",
1668 | " ).mean()\n",
1669 | " return {\"loss\": -score, \"status\": STATUS_OK} \n",
1670 | "
\n",
1671 | "\n",
1672 | "We will use a search in which we mix random search, annealing and tpe\n",
1673 | "\n",
1674 | "trials = Trials()\n",
1675 | "mix_search = partial(\n",
1676 | " mix.suggest,\n",
1677 | " p_suggest=[(0.15, rand.suggest), (0.15, anneal.suggest), (0.7, tpe.suggest)],\n",
1678 | ")\n",
1679 | "
\n",
1680 | "\n",
1681 | "Now, we can minimize the objective function.\n",
1682 | "\n",
1683 | "best_param = fmin(\n",
1684 | " objective_function,\n",
1685 | " param_hyperopt,\n",
1686 | " algo=mix_search,\n",
1687 | " max_evals=MAX_EVALES,\n",
1688 | " trials=trials,\n",
1689 | " rstate=np.random.RandomState(RANDOM_SEED),\n",
1690 | " )\n",
1691 | "
\n",
1692 | "\n",
1693 | "a
, you can use np.argsort(a)[-n:]
FEATURES
list np.array(FEATURES)[np.argsort(a)[-n:]]