├── .gitattributes ├── images ├── materials_project.png ├── scklearn-flowchart.png └── elasticity_calculations.png ├── requirements.txt ├── data ├── mpdata.csv └── mpdata.json ├── Pipfile ├── .gitlab-ci.yml ├── LICENSE ├── .gitignore ├── CONDUCT.md ├── README.md ├── notebooks ├── Overview.ipynb ├── 1-gather-data.ipynb ├── 3-predicting-data.ipynb └── 2-explore-sanitize-data.ipynb └── Pipfile.lock /.gitattributes: -------------------------------------------------------------------------------- 1 | *.csv filter=lfs diff=lfs merge=lfs -text 2 | *.json filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /images/materials_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/costrouc/mse-machinelearning-notebooks/HEAD/images/materials_project.png -------------------------------------------------------------------------------- /images/scklearn-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/costrouc/mse-machinelearning-notebooks/HEAD/images/scklearn-flowchart.png -------------------------------------------------------------------------------- /images/elasticity_calculations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/costrouc/mse-machinelearning-notebooks/HEAD/images/elasticity_calculations.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyterlab 2 | 3 | requests 4 | numpy 5 | scipy 6 | pandas 7 | matplotlib 8 | scikit-learn 9 | 10 | pymatgen 11 | -------------------------------------------------------------------------------- /data/mpdata.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c082d16993c62742af4231cecaab45ae0790e6e0867f59a5436b0716ce7e4698 3 | size 894528 4 | -------------------------------------------------------------------------------- /data/mpdata.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b606e188c70e5822ff5ec6d3708de91300c801be039b81abc16267e1f270f0da 3 | size 22463882 4 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | 3 | url = "https://pypi.python.org/simple" 4 | verify_ssl = true 5 | name = "pypi" 6 | 7 | 8 | [packages] 9 | 10 | jupyterlab = "*" 11 | requests = "*" 12 | numpy = "*" 13 | scipy = "*" 14 | pandas = "*" 15 | matplotlib = "*" 16 | scikit-learn = "*" 17 | pymatgen = "*" 18 | 19 | 20 | [dev-packages] 21 | 22 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | test_binder: 2 | image: ubuntu:latest 3 | services: 4 | - docker:dind 5 | variables: 6 | DOCKER_HOST: tcp://docker:2375 7 | DOCKER_DRIVER: overlay2 8 | script: 9 | # installing docker, python, and repo2docker 10 | - apt update && apt install docker.io python3-pip -y 11 | - pip3 install jupyter-repo2docker 12 | # build binder image 13 | - export CONTAINER_NAME="$CI_REGISTRY/$CI_PROJECT_PATH" 14 | - export CONTAINER_TAG=$(echo "${CI_COMMIT_SHA}" | cut -c1-10) 15 | - jupyter-repo2docker --debug --user-name username --user-id 1000 --no-run --image-name "${CONTAINER_NAME}:${CONTAINER_TAG}" "${CI_PROJECT_URL}" 16 | # push docker image to gitlab registry 17 | - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN $CI_REGISTRY 18 | - docker push "${CONTAINER_NAME}:${CONTAINER_TAG}" 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | =============== 3 | 4 | Copyright (c) 2018 Chris Ostrouchov 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at [INSERT EMAIL ADDRESS]. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Binder](https://mybinder.org/badge.svg)](http://mybinder.org/v2/gh/costrouc/mse-machinelearning-notebooks/master?urlpath=lab/tree/notebooks/Overview.ipynb) 2 | 3 | [![pipeline status](https://gitlab.com/costrouc/mse-machinelearning-notebooks/badges/master/pipeline.svg)](https://gitlab.com/costrouc/mse-machinelearning-notebooks/commits/master) 4 | 5 | # Machine Learning with a focus on Material Science 6 | 7 | A presentation given and written by Christopher Ostrouchov all 8 | contributions are welcome. We will be using the materials project and 9 | it's available data to "predict" material properties through machine 10 | learning. Many of the examples may be trivial but the focus is on 11 | introducing the workflow that is typical in machine learning. 12 | 13 | # Introduction to Python and Packages 14 | 15 | The goal of this set of notebooks is to introduce you to the most 16 | important concepts of machine learning. While there are many many 17 | algorithms for fitting your data the methodology of gathering, 18 | sanitizing, investigating, and evaluating the goodness of fit is 19 | mostly the same. I hope to show you the process along with showing 20 | some methods from each branch of machine learning. Python has evolved 21 | into a great solution for easily performning these steps and along 22 | with R are great choices. My favorite description of Python is that it 23 | is the 2nd best language for every problem. Also it is probably the 24 | best glue language out there. 25 | 26 | Python is a language that while it comes with "batteries included" 27 | most of the functionality is provided through packages. I myself may 28 | consider myself an "expert" the standard library (packages that are 29 | included by default with python) but there are always new packages to 30 | learn. The packages that we will be using: 31 | 32 | - [jupyter notebooks](https://jupyter.org/) for [literate 33 | programming](https://en.wikipedia.org/wiki/Literate_programming) 34 | 35 | - [requests](http://docs.python-requests.org/en/master/) for 36 | gathering the materials project data 37 | 38 | - [pandas](https://pandas.pydata.org/) for storing data, sanitizing, 39 | and investigating the data. A supercharged excell spreadsheet. 40 | 41 | - [matplotlib](https://matplotlib.org/users/pyplot_tutorial.html) 42 | visualizing data 43 | 44 | - [numpy](http://www.numpy.org/) used underneath the covers for 45 | pandas and basis of linear algebra in python 46 | 47 | - [scikit-learn](http://scikit-learn.org/) most popular machine 48 | learning library. Does not perform neural network 49 | calculations. Overview of available algorithms (does not cover all) 50 | [link](http://scikit-learn.org/stable/tutorial/machine_learning_map/index.html) 51 | 52 | - [pymatgen](https://github.com/materialsproject/pymatgen/) a package 53 | by the Materials Project for working with material science 54 | structures and analysis of calculations. 55 | 56 | These packages have many many features but learning these core 57 | libraries will be more than enough for getting started. 58 | 59 | Resources that we will be using that are not python specific are: 60 | 61 | - [mybinder](https://mybinder.org/) which is a way to make a custom 62 | programming environment available for free hosted on google 63 | cloud. Note that resources are limited about 1 CPU and 8 GiB RAM 64 | per instance. It is awesome you should use it too. 65 | 66 | - [materialsproject](https://materialsproject.org/) based at Lawrence 67 | Berkely National Lab using the NERSC resources they provide data 68 | from their VASP similations available using a [RESTfull 69 | API](https://www.quora.com/What-is-a-REST-API). 70 | 71 | # Getting Started 72 | 73 | To get started we will lanch the introduction notebook with binderhub. 74 | 75 | [![Binder](https://mybinder.org/badge.svg)](http://mybinder.org/v2/gh/costrouc/mse-machinelearning-notebooks/master?urlpath=lab/tree/notebooks/Overview.ipynb) 76 | 77 | # Contributing 78 | 79 | All contributions, bug reports, bug fixes, documentation improvements, 80 | enhancements and ideas are welcome! These should be submitted at the 81 | [Gitlab repository](https://gitlab.com/costrouc/ 82 | mse-machinelearning-notebooks). Github is only used for visibility. 83 | 84 | Contributors: 85 | - [Chris Ostrouchov](https://gitlab.com/costrouc) (maintainer) 86 | 87 | # License 88 | 89 | MIT 90 | -------------------------------------------------------------------------------- /notebooks/Overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Welcome to the Material Science Machine Learning Jupyter Notebooks!\n", 8 | "\n", 9 | "Here you will get an introduction to Material Science and Machine Learning using Python.\n", 10 | "\n", 11 | "**These examples will most likely only work with python 3.6+**\n", 12 | "\n", 13 | "Machine Learning while there are many algorithms and techniques the workflow is **always the same**.\n", 14 | "\n", 15 | "1. [gather the data](1-gather-data.ipynb) (web scraping, experiments, simulations)\n", 16 | "2. [explore the data](2-explore-sanitize-data.ipynb)\n", 17 | " - get a feel for what data you have\n", 18 | " - are there any interesting features to explore?\n", 19 | "3. [sanitize the data](2-explore-sanitize-data.ipynb)\n", 20 | " - how do you handle missing data?\n", 21 | " - how do you handle categorical data? (for example 'metal, 'non-metal', 'spacegroup')\n", 22 | "4. [apply machine learning algorithms](3-predicting-data.ipynb)\n", 23 | " - often the easiest part\n", 24 | "5. [validate predictions](3-predicting-data.ipynb)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# How is Machine Learning different from Statistics?\n", 32 | "\n", 33 | "While a huge generalization.\n", 34 | "\n", 35 | " - statistician: care about understanding how the data is generated, and understanding the model and its parameters\n", 36 | " - machine learning: mostly care about ability from prediction\n", 37 | " \n", 38 | "I feel that scientists fall mostly in the `statisticians` camp.\n", 39 | "\n", 40 | "Occam's razor: one should select the simplest model that describes the data.\n", 41 | "\n", 42 | "As an example https://www.youtube.com/watch?v=1A1yaWS8gSg\n", 43 | "\n", 44 | "Sofisticated model that predicts planet positions with circles can be replaced by a far simpler one that uses elispses. This comes from our **understanding** of the physics." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Which Machine Learning Algorithms should I use?\n", 52 | "\n", 53 | "There are hundreds of algorithms to choose from. Always start with the simplest so that you can just how more complex models perform.\n", 54 | "\n", 55 | "General Fields of Machine Learning. You will notice that some algorithms appear in multiple areas.\n", 56 | "\n", 57 | "## Classification\n", 58 | "\n", 59 | "SVM, nearest neighbors, random forests, gradient boost, nearual networks.\n", 60 | "\n", 61 | "Great [starting example dataset](http://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html)\n", 62 | "\n", 63 | "![Clustering](http://scikit-learn.org/stable/_images/sphx_glr_plot_classification_thumb.png)\n", 64 | "\n", 65 | "## Regression\n", 66 | "\n", 67 | "SRV, ridge regression, Lasso, **Bayession Methods**, neural networks\n", 68 | "\n", 69 | "![linear regression](http://scikit-learn.org/stable/_images/sphx_glr_plot_ols_0011.png)\n", 70 | "\n", 71 | "I would like to highlight how awesome bayession methods are. [pymc3](http://docs.pymc.io/) is the python package to use. If you can create a model that describes your data you can use bayessian methods. It not gaussian processes are amazing (they are \"parameter free\" fitting methods.\n", 72 | "\n", 73 | "Gaussian process. Notice how you get the variance of your prediction with your data.\n", 74 | "\n", 75 | "![Gaussian Process](https://blog.dominodatalab.com/wp-content/uploads/2017/03/output_57_0-1.png)\n", 76 | "\n", 77 | "Bayessian Methods predicting the effect of regulation on coal miner deaths.\n", 78 | "\n", 79 | "![coal miner deaths](http://docs.pymc.io/_images/notebooks_getting_started_52_0.png)\n", 80 | "\n", 81 | "I am not very knowledgable on neural networks but [pytorch](https://pytorch.org/) is the most userfriendly way to get started.\n", 82 | "\n", 83 | "Play with neural networks in your browser to get a feel for them. [link](http://playground.tensorflow.org/#activation=tanh&batchSize=10&dataset=circle®Dataset=reg-plane&learningRate=0.03®ularizationRate=0&noise=0&networkShape=4,2&seed=0.46804&showTestData=false&discretize=false&percTrainData=50&x=true&y=true&xTimesY=false&xSquared=false&ySquared=false&cosX=false&sinX=false&cosY=false&sinY=false&collectStats=false&problem=classification&initZero=false&hideText=false)\n", 84 | "\n", 85 | "\n", 86 | "## Clustering\n", 87 | "\n", 88 | "k-Means, spectral clustering, mean-shift\n", 89 | "\n", 90 | "![effect of cluster size etc](http://scikit-learn.org/stable/_images/sphx_glr_plot_kmeans_assumptions_001.png)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "# Bias Variance Tradeoff\n", 98 | "\n", 99 | " - bias error: error of model with training data\n", 100 | " - variance error: error of model with a different set of training data\n", 101 | " - irreducible error: error that cannot be reduced regardless of algorithm (sometimes noise)\n", 102 | "\n", 103 | "![bias variance](http://scott.fortmann-roe.com/docs/docs/BiasVariance/biasvariance.png)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# How do we tell where we are on the bias variance curve?\n", 111 | "\n", 112 | "[Cross Validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics)): split your data into a traning and test set. Use the training set to fit your model. Use the test set to evaluate the performance of your model.\n", 113 | "\n", 114 | "Often times you split your data 90% training, 10% testing.\n", 115 | "\n", 116 | "sklearn provides many methods for automating this.\n" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "# Additional Resources\n", 124 | "\n", 125 | " - [great introduction](https://docs.google.com/presentation/d/1O6ozzZHHxGzU-McpvEG09hl7K6oQDd2Taw0FOlnxJc8/edit?usp=docslist_api)\n", 126 | " - [Kaggle](https://www.kaggle.com/) competitions that teach you how to use machine learning (best way to learn is to apply)\n", 127 | " - [fast.ai](www.fast.ai) the place to learn about neural networks\n", 128 | " - coursera, edx, udacity too many to name" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [] 137 | } 138 | ], 139 | "metadata": { 140 | "kernelspec": { 141 | "display_name": "Python 3", 142 | "language": "python", 143 | "name": "python3" 144 | }, 145 | "language_info": { 146 | "codemirror_mode": { 147 | "name": "ipython", 148 | "version": 3 149 | }, 150 | "file_extension": ".py", 151 | "mimetype": "text/x-python", 152 | "name": "python", 153 | "nbconvert_exporter": "python", 154 | "pygments_lexer": "ipython3", 155 | "version": "3.6.5" 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 2 160 | } 161 | -------------------------------------------------------------------------------- /notebooks/1-gather-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Gathering and Investigating Materials Project Data\n", 8 | "\n", 9 | "This notebooks will show how you can use `requests` and `pandas` so gather and explore your data. Often times you will need to suply your data by other methods.\n", 10 | "\n", 11 | "The `api` that we will be using is the material project. Link to the [api description](https://materialsproject.org/docs/api#materials_.28calculated_materials_data.29)\n", 12 | "\n", 13 | "![Materials Projnect](../images/materials_project.png)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import requests\n", 23 | "\n", 24 | "base_url = 'https://materialsproject.org/rest/v2/'" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Getting Materials Project Api Key\n", 32 | "\n", 33 | "This [link](https://www.materialsproject.org/open) details the steps necissary. \n", 34 | "\n", 35 | "1. Visit [dashboard](https://materialsproject.org/dashboard) you may need to login\n", 36 | "2. Generate API key if it has not already been generated and set `API_KEY` to this value.\n", 37 | "\n", 38 | "The subprocess method is a way that I store my passwords on my computer and will not work for you.\n", 39 | "\n", 40 | "Afterwards in the next cell we will test that our API key works. \n", 41 | "\n", 42 | "This is done by performing a `GET` or `POST` request to `https://www.materialsproject.org/rest/v1/api_check`." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "import subprocess\n", 52 | "API_KEY = subprocess.check_output('gopass www/materialsproject.com apikey'.split()).decode('utf-8')\n", 53 | "# API_KEY = \"\"\n", 54 | "\n", 55 | "session = requests.Session()\n", 56 | "session.headers.update({'X-API-KEY': API_KEY})" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "{'valid_response': True, 'api_key_valid': True}\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "# for some reason the v2 API does not include an API check method??\n", 74 | "response = session.get(f'https://www.materialsproject.org/rest/v1/api_check')\n", 75 | "data = response.json()\n", 76 | "print(data)\n", 77 | "\n", 78 | "if not data['api_key_valid']:\n", 79 | " raise ValueError('You are not authenticated!')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# Materials Project API\n", 87 | "\n", 88 | "The materials project provides a RESTfull API for getting material properties which is detailed [here](https://www.materialsproject.org/docs/api#materials_.28calculated_materials_data.29).\n", 89 | "\n", 90 | "If you have followed the steps above you should be ready to parse materials project data.\n", 91 | "\n", 92 | "A RESTfull API is a nice way to expose data over the web. While they provide convenient methods for getting each individual material property they have a limit of 500 queries per day so we need to be efficient in our queries. To do this we will use the `npquery` to get properties in batch.\n", 93 | "\n", 94 | "Lets start by getting a list of materials that are compossed of the following elements `Fe`, `Ti`, `O`, `C`, `N`, `He`. This does not affect your API limit" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 20, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def get_materials(elements):\n", 104 | " elements_str = '-'.join(elements)\n", 105 | " response = session.get(f'{base_url}/materials/{elements_str}/mids')\n", 106 | " data = response.json()\n", 107 | " print(f'Found {len(data[\"response\"])} Materials in the Materials Project with the elements: {elements}')\n", 108 | " return data['response']\n", 109 | "\n", 110 | "def get_material_experimental_properties(mid):\n", 111 | " response = session.get(f'{base_url}/materials/{mid}/exp/')\n", 112 | " print(response.content)\n", 113 | " data = response.json()['response'][0]\n", 114 | " print(data)\n", 115 | " return data\n", 116 | "\n", 117 | "def get_material_vasp_properties(mid, piezoelectric=False, dielelectric=False):\n", 118 | " response = session.get(f'{base_url}/materials/{mid}/vasp/')\n", 119 | " material_data = response.json()['response'][0]\n", 120 | " \n", 121 | " if piezoelectric:\n", 122 | " response = session.get(f'{base_url}/materials/{mid}/vasp/piezo')\n", 123 | " data = response.json()\n", 124 | " if not data['valid_response']:\n", 125 | " material_data['piezoelectric'] = None\n", 126 | " else:\n", 127 | " material_data['piezoelectric'] = data['response']\n", 128 | " \n", 129 | " if dielelectric:\n", 130 | " response = session.get(f'{base_url}/materials/{mid}/vasp/diel')\n", 131 | " data = response.json()\n", 132 | " if not data['valid_response']:\n", 133 | " material_data['dielelectric'] = None\n", 134 | " else:\n", 135 | " material_data['dielelectric'] = data['response']\n", 136 | " \n", 137 | " return material_data" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 27, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Found 385 Materials in the Materials Project with the elements: ['Fe', 'O', 'Ni', 'He', 'Zn', 'Cu']\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "material_ids = get_materials(['Fe', 'O', 'Ni', 'He', 'Zn', 'Cu'])" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# Basic VASP properties\n", 162 | "\n", 163 | "Includes:\n", 164 | "\n", 165 | " - `energy`, `energy_per_atom`, `volume`, `formation_energy_per_atom`, `nsites`, `unit_cell_formula`, `pretty_formula`, `e_above_hull`, `spacegroup`, `icsd_ids`, `cif`, \n", 166 | "\n", 167 | " - properties: `band_gap`, `density`, `energry`, `energy_per_atom`, `formation_energy_per_atom`, `elascticity`, `total_magnetization`\n", 168 | " \n", 169 | "But some properties are still not included:\n", 170 | " \n", 171 | " - `piezo`, `diel`" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 86, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# MgO\n", 181 | "\n", 182 | "material_id = 'mp-1265'\n", 183 | "\n", 184 | "# Na2O\n", 185 | "\n", 186 | "material_id = 'mp-776952'" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 26, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "dict_keys(['energy', 'energy_per_atom', 'volume', 'formation_energy_per_atom', 'nsites', 'unit_cell_formula', 'pretty_formula', 'is_hubbard', 'elements', 'nelements', 'e_above_hull', 'hubbards', 'is_compatible', 'spacegroup', 'task_ids', 'band_gap', 'density', 'icsd_id', 'icsd_ids', 'cif', 'total_magnetization', 'material_id', 'oxide_type', 'tags', 'elasticity', 'full_formula', 'piezoelectric', 'dielelectric'])" 198 | ] 199 | }, 200 | "execution_count": 26, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "data = get_material_vasp_properties(material_id, piezoelectric=True, dielelectric=True)\n", 207 | "data.keys()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "# Basic Experimental properties\n", 215 | "\n", 216 | "Turns out to be thermochemical data and not worth looking at" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "get_material_experimental_properties(material_id)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "# Let's gather the material data\n", 233 | "\n", 234 | "The Material Project definently is not enforcing their `500` materials per day rate limit.\n", 235 | "\n", 236 | "Also if you have a query that get greater than 3,000 materials it fails. Thus why some are commented out." 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 29, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "materials_data = {}" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 56, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "name": "stdout", 255 | "output_type": "stream", 256 | "text": [ 257 | "Found 2661 Materials in the Materials Project with the elements: ['H', 'He', 'O', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn']\n", 258 | "Number of materials 2661\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "# Lets just grab a bunch of materials\n", 264 | "material_ids = get_materials(['H', 'He', \n", 265 | " #'Li', 'Be', \n", 266 | " #'B', 'C', 'N', \n", 267 | " 'O', \n", 268 | " #'F', 'Ne', \n", 269 | " #'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar'\n", 270 | " 'K', 'Ca', \n", 271 | " 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',\n", 272 | " # 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr',\n", 273 | " ])\n", 274 | "print('Number of materials', len(material_ids))" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 57, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "# store the results\n", 284 | "for mid in material_ids:\n", 285 | " if mid in materials_data:\n", 286 | " continue\n", 287 | " materials_data[mid] = get_material_vasp_properties(mid)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 58, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "6928" 299 | ] 300 | }, 301 | "execution_count": 58, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "len(materials_data)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "# Save all of the downloaded data to a json file" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 59, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "import json" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 60, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "json.dump(materials_data, open('mpdata.json', 'w'))" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 61, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "name": "stdout", 342 | "output_type": "stream", 343 | "text": [ 344 | "12K\t1-gather-data.ipynb\n", 345 | "4.0K\tOverview.ipynb\n", 346 | "22M\tmpdata.json\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "! du -sh *" 352 | ] 353 | } 354 | ], 355 | "metadata": { 356 | "kernelspec": { 357 | "display_name": "scratch", 358 | "language": "python", 359 | "name": "scratch" 360 | }, 361 | "language_info": { 362 | "codemirror_mode": { 363 | "name": "ipython", 364 | "version": 3 365 | }, 366 | "file_extension": ".py", 367 | "mimetype": "text/x-python", 368 | "name": "python", 369 | "nbconvert_exporter": "python", 370 | "pygments_lexer": "ipython3", 371 | "version": "3.6.5" 372 | } 373 | }, 374 | "nbformat": 4, 375 | "nbformat_minor": 2 376 | } 377 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "91d80bf8235bd8dd7f31e7425ffc63adc7a8a241388744ea23c82596d9092dd0" 5 | }, 6 | "host-environment-markers": { 7 | "implementation_name": "cpython", 8 | "implementation_version": "3.6.5", 9 | "os_name": "posix", 10 | "platform_machine": "x86_64", 11 | "platform_python_implementation": "CPython", 12 | "platform_release": "4.15.0-23-generic", 13 | "platform_system": "Linux", 14 | "platform_version": "#25-Ubuntu SMP Wed May 23 18:02:16 UTC 2018", 15 | "python_full_version": "3.6.5", 16 | "python_version": "3.6", 17 | "sys_platform": "linux" 18 | }, 19 | "pipfile-spec": 6, 20 | "requires": {}, 21 | "sources": [ 22 | { 23 | "name": "pypi", 24 | "url": "https://pypi.python.org/simple", 25 | "verify_ssl": true 26 | } 27 | ] 28 | }, 29 | "default": { 30 | "backcall": { 31 | "hashes": [ 32 | "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4", 33 | "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2" 34 | ], 35 | "version": "==0.1.0" 36 | }, 37 | "bleach": { 38 | "hashes": [ 39 | "sha256:b8fa79e91f96c2c2cd9fd1f9eda906efb1b88b483048978ba62fef680e962b34", 40 | "sha256:eb7386f632349d10d9ce9d4a838b134d4731571851149f9cc2c05a9a837a9a44" 41 | ], 42 | "version": "==2.1.3" 43 | }, 44 | "certifi": { 45 | "hashes": [ 46 | "sha256:9fa520c1bacfb634fa7af20a76bcbd3d5fb390481724c597da32c719a7dca4b0", 47 | "sha256:13e698f54293db9f89122b0581843a782ad0934a4fe0172d2a980ba77fc61bb7" 48 | ], 49 | "version": "==2018.4.16" 50 | }, 51 | "chardet": { 52 | "hashes": [ 53 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", 54 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" 55 | ], 56 | "version": "==3.0.4" 57 | }, 58 | "cycler": { 59 | "hashes": [ 60 | "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", 61 | "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8" 62 | ], 63 | "version": "==0.10.0" 64 | }, 65 | "decorator": { 66 | "hashes": [ 67 | "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82", 68 | "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c" 69 | ], 70 | "version": "==4.3.0" 71 | }, 72 | "entrypoints": { 73 | "hashes": [ 74 | "sha256:10ad569bb245e7e2ba425285b9fa3e8178a0dc92fc53b1e1c553805e15a8825b", 75 | "sha256:d2d587dde06f99545fb13a383d2cd336a8ff1f359c5839ce3a64c917d10c029f" 76 | ], 77 | "version": "==0.2.3" 78 | }, 79 | "html5lib": { 80 | "hashes": [ 81 | "sha256:20b159aa3badc9d5ee8f5c647e5efd02ed2a66ab8d354930bd9ff139fc1dc0a3", 82 | "sha256:66cb0dcfdbbc4f9c3ba1a63fdb511ffdbd4f513b2b6d81b80cd26ce6b3fb3736" 83 | ], 84 | "version": "==1.0.1" 85 | }, 86 | "idna": { 87 | "hashes": [ 88 | "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e", 89 | "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16" 90 | ], 91 | "version": "==2.7" 92 | }, 93 | "ipykernel": { 94 | "hashes": [ 95 | "sha256:395f020610e33ffa0b0c9c0cd1a1d927d51ab9aa9f30a7ae36bb0c908a33e89c", 96 | "sha256:935941dba29d856eee34b8b5261d971bd5012547239ed73ddfff099143748c37", 97 | "sha256:c091449dd0fad7710ddd9c4a06e8b9e15277da306590bc07a3a1afa6b4453c8f" 98 | ], 99 | "version": "==4.8.2" 100 | }, 101 | "ipython": { 102 | "hashes": [ 103 | "sha256:a0c96853549b246991046f32d19db7140f5b1a644cc31f0dc1edc86713b7676f", 104 | "sha256:eca537aa61592aca2fef4adea12af8e42f5c335004dfa80c78caf80e8b525e5c" 105 | ], 106 | "version": "==6.4.0" 107 | }, 108 | "ipython-genutils": { 109 | "hashes": [ 110 | "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", 111 | "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" 112 | ], 113 | "version": "==0.2.0" 114 | }, 115 | "jedi": { 116 | "hashes": [ 117 | "sha256:5861f6dc0c16e024cbb0044999f9cf8013b292c05f287df06d3d991a87a4eb89", 118 | "sha256:1972f694c6bc66a2fac8718299e2ab73011d653a6d8059790c3476d2353b99ad" 119 | ], 120 | "version": "==0.12.0" 121 | }, 122 | "jinja2": { 123 | "hashes": [ 124 | "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", 125 | "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4" 126 | ], 127 | "version": "==2.10" 128 | }, 129 | "jsonschema": { 130 | "hashes": [ 131 | "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08", 132 | "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02" 133 | ], 134 | "version": "==2.6.0" 135 | }, 136 | "jupyter-client": { 137 | "hashes": [ 138 | "sha256:59e6d791e22a8002ad0e80b78c6fd6deecab4f9e1b1aa1a22f4213de271b29ea", 139 | "sha256:27befcf0446b01e29853014d6a902dd101ad7d7f94e2252b1adca17c3466b761" 140 | ], 141 | "version": "==5.2.3" 142 | }, 143 | "jupyter-core": { 144 | "hashes": [ 145 | "sha256:927d713ffa616ea11972534411544589976b2493fc7e09ad946e010aa7eb9970", 146 | "sha256:ba70754aa680300306c699790128f6fbd8c306ee5927976cbe48adacf240c0b7" 147 | ], 148 | "version": "==4.4.0" 149 | }, 150 | "jupyterlab": { 151 | "hashes": [ 152 | "sha256:6a1af69fe0c0890891b5bb4cb9914edca68ed59b00ed50cd2bc78e230c739b55", 153 | "sha256:88290656a2db2e38ef913a257ec283f3b5bd99144ed3d52899c9af7030077554" 154 | ], 155 | "version": "==0.32.1" 156 | }, 157 | "jupyterlab-launcher": { 158 | "hashes": [ 159 | "sha256:d6308617fbdb3949c76356e8149c9835b65b01653fdefd983085c61d75f3c810", 160 | "sha256:c78646afa354856a7ba3d9583122b89603d24587126f4c49a04dd42f50c831ec" 161 | ], 162 | "version": "==0.10.5" 163 | }, 164 | "kiwisolver": { 165 | "hashes": [ 166 | "sha256:8b6a7b596ce1d2a6d93c3562f1178ebd3b7bb445b3b0dd33b09f9255e312a965", 167 | "sha256:1a078f5dd7e99317098f0e0d490257fd0349d79363e8c923d5bb76428f318421", 168 | "sha256:e0f910f84b35c36a3513b96d816e6442ae138862257ae18a0019d2fc67b041dc", 169 | "sha256:aaec1cfd94f4f3e9a25e144d5b0ed1eb8a9596ec36d7318a504d813412563a85", 170 | "sha256:f923406e6b32c86309261b8195e24e18b6a8801df0cfc7814ac44017bfcb3939", 171 | "sha256:4329008a167fac233e398e8a600d1b91539dc33c5a3eadee84c0d4b04d4494fa", 172 | "sha256:3b791ddf2aefc56382aadc26ea5b352e86a2921e4e85c31c1f770f527eb06ce4", 173 | "sha256:379d97783ba8d2934d52221c833407f20ca287b36d949b4bba6c75274bcf6363", 174 | "sha256:1aa0b55a0eb1bd3fa82e704f44fb8f16e26702af1a073cc5030eea399e617b56", 175 | "sha256:ea36e19ac0a483eea239320aef0bd40702404ff8c7e42179a2d9d36c5afcb55c", 176 | "sha256:0f7f532f3c94e99545a29f4c3f05637f4d2713e7fd91b4dd8abfc18340b86cd5", 177 | "sha256:2874060b91e131ceeff00574b7c2140749c9355817a4ed498e82a4ffa308ecbc", 178 | "sha256:95a25d9f3449046ecbe9065be8f8380c03c56081bc5d41fe0fb964aaa30b2195", 179 | "sha256:79e5fe3ccd5144ae80777e12973027bd2f4f5e3ae8eb286cabe787bed9780138", 180 | "sha256:9576cb63897fbfa69df60f994082c3f4b8e6adb49cccb60efb2a80a208e6f996", 181 | "sha256:0ee4ed8b3ae8f5f712b0aa9ebd2858b5b232f1b9a96b0943dceb34df2a223bc3", 182 | "sha256:66f82819ff47fa67a11540da96966fb9245504b7f496034f534b81cacf333861", 183 | "sha256:b1c240d565e977d80c0083404c01e4d59c5772c977fae2c483f100567f50847b", 184 | "sha256:53a5b27e6b5717bdc0125338a822605084054c80f382051fb945d2c0e6899a20", 185 | "sha256:acb673eecbae089ea3be3dcf75bfe45fc8d4dcdc951e27d8691887963cf421c7", 186 | "sha256:b15bc8d2c2848a4a7c04f76c9b3dc3561e95d4dabc6b4f24bfabe5fd81a0b14f", 187 | "sha256:45813e0873bbb679334a161b28cb9606d9665e70561fd6caa8863e279b5e464b", 188 | "sha256:ce3be5d520b4d2c3e5eeb4cd2ef62b9b9ab8ac6b6fedbaa0e39cdb6f50644278" 189 | ], 190 | "version": "==1.0.1" 191 | }, 192 | "markupsafe": { 193 | "hashes": [ 194 | "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665" 195 | ], 196 | "version": "==1.0" 197 | }, 198 | "matplotlib": { 199 | "hashes": [ 200 | "sha256:3fd90b407d1ab0dae686a4200030ce305526ff20b85a443dc490d194114b2dfa", 201 | "sha256:7b3d03c876684618e2a2be6abeb8d3a033c3a1bb38a786f751199753ef6227e6", 202 | "sha256:abfd3d9390eb4f2d82cbcaa3a5c2834c581329b64eccb7a071ed9d5df27424f7", 203 | "sha256:07055eb872fa109bd88f599bdb52065704b2e22d475b67675f345d75d32038a0", 204 | "sha256:0f2f253d6d51f5ed52a819921f8a0a8e054ce0daefcfbc2557e1c433f14dc77d", 205 | "sha256:1ef9fd285334bd6b0495b6de9d56a39dc95081577f27bafabcf28e0d318bed31", 206 | "sha256:70f0e407fbe9e97f16597223269c849597047421af5eb8b60dbaca0382037e78", 207 | "sha256:3fb2db66ef98246bafc04b4ef4e9b0e73c6369f38a29716844e939d197df816a", 208 | "sha256:dc0ba2080fd0cfdd07b3458ee4324d35806733feb2b080838d7094731d3f73d9", 209 | "sha256:4bb10087e09629ba3f9b25b6c734fd3f99542f93d71c5b9c023f28cb377b43a9", 210 | "sha256:bc4d7481f0e8ec94cb1afc4a59905d6274b3b4c389aba7a2539e071766671735", 211 | "sha256:4f6a516d5ef39128bb16af7457e73dde25c30625c4916d8fbd1cc7c14c55e691", 212 | "sha256:8ff08eaa25c66383fe3b6c7eb288da3c22dcedc4b110a0b592b35f68d0e093b2", 213 | "sha256:f26fba7fc68994ab2805d77e0695417f9377a00d36ba4248b5d0f1e5adb08d24", 214 | "sha256:8944d311ce37bee1ba0e41a9b58dcf330ffe0cf29d7654c3d07c572215da68ac", 215 | "sha256:45dac8589ef1721d7f2ab0f48f986694494dfcc5d13a3e43a5cb6c816276094e", 216 | "sha256:9d12378d6a236aa38326e27f3a29427b63edce4ce325745785aec1a7535b1f85", 217 | "sha256:4dc7ef528aad21f22be85e95725234c5178c0f938e2228ca76640e5e84d8cde8" 218 | ], 219 | "version": "==2.2.2" 220 | }, 221 | "mistune": { 222 | "hashes": [ 223 | "sha256:b4c512ce2fc99e5a62eb95a4aba4b73e5f90264115c40b70a21e1f7d4e0eac91", 224 | "sha256:bc10c33bfdcaa4e749b779f62f60d6e12f8215c46a292d05e486b869ae306619" 225 | ], 226 | "version": "==0.8.3" 227 | }, 228 | "monty": { 229 | "hashes": [ 230 | "sha256:b0d731e1a07b0953d5e534c17b9c5d998dc21ca5eb28a2cbde8787cdded5a59a", 231 | "sha256:8b2b9ae3b3c77a17a49f86408b7ca35e5dd0eeee9e5208c1b7888df1b8e09976", 232 | "sha256:8cc68ee4717f2d306065679eba9bd13e40b40ac4b94937729effdd4ba21d948d" 233 | ], 234 | "version": "==1.0.3" 235 | }, 236 | "mpmath": { 237 | "hashes": [ 238 | "sha256:04d14803b6875fe6d69e6dccea87d5ae5599802e4b1df7997bddd2024001050c" 239 | ], 240 | "version": "==1.0.0" 241 | }, 242 | "nbconvert": { 243 | "hashes": [ 244 | "sha256:260d390b989a647575b8ecae2cd06a9eaead10d396733d6e50185d5ebd08996e", 245 | "sha256:12b1a4671d4463ab73af6e4cbcc965b62254e05d182cd54995dda0d0ef9e2db9" 246 | ], 247 | "version": "==5.3.1" 248 | }, 249 | "nbformat": { 250 | "hashes": [ 251 | "sha256:b9a0dbdbd45bb034f4f8893cafd6f652ea08c8c1674ba83f2dc55d3955743b0b", 252 | "sha256:f7494ef0df60766b7cabe0a3651556345a963b74dbc16bc7c18479041170d402" 253 | ], 254 | "version": "==4.4.0" 255 | }, 256 | "notebook": { 257 | "hashes": [ 258 | "sha256:95dc9afa0444d05a0b76d6d1f5133c34a92a95f4e248efef3f59ccd2c2616c7d", 259 | "sha256:fa915c231e64a30d19cc2c70ccab6444cbaa93e44e92b5f8233dd9147ad0e664" 260 | ], 261 | "version": "==5.5.0" 262 | }, 263 | "numpy": { 264 | "hashes": [ 265 | "sha256:e1864a4e9f93ddb2dc6b62ccc2ec1f8250ff4ac0d3d7a15c8985dd4e1fbd6418", 266 | "sha256:085afac75bbc97a096744fcfc97a4b321c5a87220286811e85089ae04885acdd", 267 | "sha256:6c57f973218b776195d0356e556ec932698f3a563e2f640cfca7020086383f50", 268 | "sha256:589336ba5199c8061239cf446ee2f2f1fcc0c68e8531ee1382b6fc0c66b2d388", 269 | "sha256:5edf1acc827ed139086af95ce4449b7b664f57a8c29eb755411a634be280d9f2", 270 | "sha256:6b82b81c6b3b70ed40bc6d0b71222ebfcd6b6c04a6e7945a936e514b9113d5a3", 271 | "sha256:385f1ce46e08676505b692bfde918c1e0b350963a15ef52d77691c2cf0f5dbf6", 272 | "sha256:758d1091a501fd2d75034e55e7e98bfd1370dc089160845c242db1c760d944d9", 273 | "sha256:c725d11990a9243e6ceffe0ab25a07c46c1cc2c5dc55e305717b5afe856c9608", 274 | "sha256:07379fe0b450f6fd6e5934a9bc015025bb4ce1c8fbed3ca8bef29328b1bc9570", 275 | "sha256:9e1f53afae865cc32459ad211493cf9e2a3651a7295b7a38654ef3d123808996", 276 | "sha256:4d278c2261be6423c5e63d8f0ceb1b0c6db3ff83f2906f4b860db6ae99ca1bb5", 277 | "sha256:d696a8c87315a83983fc59dd27efe034292b9e8ad667aeae51a68b4be14690d9", 278 | "sha256:2df854df882d322d5c23087a4959e145b953dfff2abe1774fec4f639ac2f3160", 279 | "sha256:baadc5f770917ada556afb7651a68176559f4dca5f4b2d0947cd15b9fb84fb51", 280 | "sha256:2d6481c6bdab1c75affc0fc71eb1bd4b3ecef620d06f2f60c3f00521d54be04f", 281 | "sha256:51c5dcb51cf88b34b7d04c15f600b07c6ccbb73a089a38af2ab83c02862318da", 282 | "sha256:8b8dcfcd630f1981f0f1e3846fae883376762a0c1b472baa35b145b911683b7b", 283 | "sha256:9d69967673ab7b028c2df09cae05ba56bf4e39e3cb04ebe452b6035c3b49848e", 284 | "sha256:8622db292b766719810e0cb0f62ef6141e15fe32b04e4eb2959888319e59336b", 285 | "sha256:97fa8f1dceffab782069b291e38c4c2227f255cdac5f1e3346666931df87373e", 286 | "sha256:381ad13c30cd1d0b2f3da8a0c1a4aa697487e8bb0e9e0cbeb7439776bcb645f8", 287 | "sha256:a4a433b3a264dbc9aa9c7c241e87c0358a503ea6394f8737df1683c7c9a102ac" 288 | ], 289 | "version": "==1.14.5" 290 | }, 291 | "palettable": { 292 | "hashes": [ 293 | "sha256:7306d63c55216f66d5e5509aacda8bb51c5918debfd6bda4aac8d0940e1788a2", 294 | "sha256:0685b223a236bb7e2a900ef7a855ccf9a4027361c8acf400f3b350ea51870f80" 295 | ], 296 | "version": "==3.1.1" 297 | }, 298 | "pandas": { 299 | "hashes": [ 300 | "sha256:3790a3348ab0f416e58061d21693cb662fbb2f638001b94bf2b2199fedc1b1c2", 301 | "sha256:e1b86f7c55467ce1f6c12715f2fd1817f4a909b5c8c39bd4b5d2415ef2b04bd8", 302 | "sha256:28fd087514616549a0e3259cd68ac88d7eaed6bd3062017a7f312e27941266bd", 303 | "sha256:cbbecca0c7af6a2160b2d6ba30becc286824a98c61dcc6a41fada664f226424c", 304 | "sha256:b704fd73022342cce612996de495a16954311e0c0cf077c1b83d5cf0b9656a60", 305 | "sha256:b4fb71acbc2709b8f5993cb4b5445d8182864f11c39787e317aae39f21206270", 306 | "sha256:372435456c349a8d39ff001967b161f6bd29d4c3de145a4cf9b366648defbb1f", 307 | "sha256:d8154c5c68713a82461aba735832f0b4692be8a45a0a340a303bf90d6f80f36f", 308 | "sha256:211cfdb9f72f26d2ede21c751d27e08fed4434d47fb9bb82ebc8ff753888b8b6", 309 | "sha256:437a6e906a6717a9ed2627cf6e7895b63dfaa0172567cbd75a553f55cf78cc17", 310 | "sha256:2fb7c63138bd5ead296b18b2cb6abd3a394f7581e5ae052b02b27df8244b03ca", 311 | "sha256:d2a071de755cc8ee7784e1b4c7b9b643d951d35c8adea7d64fe7c57cff9c47a7", 312 | "sha256:720daad75b5d35dd1b446842210c4f3fd447464c9c0884972f3f12b213a9edd1", 313 | "sha256:fcc63e8134516e93e16eb4ceac9afaa51f4adc5bf58efddae7cbc562f5b77dd0", 314 | "sha256:50b52af2af2e15f4aeb2fe196da073a8c131fa02e433e105d95ce40016df5690" 315 | ], 316 | "version": "==0.23.1" 317 | }, 318 | "pandocfilters": { 319 | "hashes": [ 320 | "sha256:b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9" 321 | ], 322 | "version": "==1.4.2" 323 | }, 324 | "parso": { 325 | "hashes": [ 326 | "sha256:cdef26e8adc10d589f3ec4eb444bd0a29f3f1eb6d72a4292ab8afcb9d68976a6", 327 | "sha256:f0604a40b96e062b0fd99cf134cc2d5cdf66939d0902f8267d938b0d5b26707f" 328 | ], 329 | "version": "==0.2.1" 330 | }, 331 | "pexpect": { 332 | "hashes": [ 333 | "sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b", 334 | "sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba" 335 | ], 336 | "markers": "sys_platform != 'win32'", 337 | "version": "==4.6.0" 338 | }, 339 | "pickleshare": { 340 | "hashes": [ 341 | "sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5", 342 | "sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b" 343 | ], 344 | "version": "==0.7.4" 345 | }, 346 | "prompt-toolkit": { 347 | "hashes": [ 348 | "sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4", 349 | "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381", 350 | "sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917" 351 | ], 352 | "version": "==1.0.15" 353 | }, 354 | "ptyprocess": { 355 | "hashes": [ 356 | "sha256:e8c43b5eee76b2083a9badde89fd1bbce6c8942d1045146e100b7b5e014f4f1a", 357 | "sha256:e64193f0047ad603b71f202332ab5527c5e52aa7c8b609704fc28c0dc20c4365" 358 | ], 359 | "markers": "os_name != 'nt'", 360 | "version": "==0.5.2" 361 | }, 362 | "pydispatcher": { 363 | "hashes": [ 364 | "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf", 365 | "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433" 366 | ], 367 | "version": "==2.0.5" 368 | }, 369 | "pygments": { 370 | "hashes": [ 371 | "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d", 372 | "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc" 373 | ], 374 | "version": "==2.2.0" 375 | }, 376 | "pymatgen": { 377 | "hashes": [ 378 | "sha256:6b4aab943af78e66758cd5bf7b6c55543b1eebe8add2525bb3becc920fdfcc4a", 379 | "sha256:6b626956b58b37e8412852ca95e618be9ddddee4456e93f8c82ad953ba4f6a9a", 380 | "sha256:be1b2f101abf6477b5388b3112c5e4d0ff8f0f6cefb7f9d3d509a88882c515ed" 381 | ], 382 | "version": "==2018.6.11" 383 | }, 384 | "pyparsing": { 385 | "hashes": [ 386 | "sha256:fee43f17a9c4087e7ed1605bd6df994c6173c1e977d7ade7b651292fab2bd010", 387 | "sha256:0832bcf47acd283788593e7a0f542407bd9550a55a8a8435214a1960e04bcb04", 388 | "sha256:9e8143a3e15c13713506886badd96ca4b579a87fbdf49e550dbfc057d6cb218e", 389 | "sha256:281683241b25fe9b80ec9d66017485f6deff1af5cde372469134b56ca8447a07", 390 | "sha256:b8b3117ed9bdf45e14dcc89345ce638ec7e0e29b2b579fa1ecf32ce45ebac8a5", 391 | "sha256:8f1e18d3fd36c6795bb7e02a39fd05c611ffc2596c1e0d995d34d67630426c18", 392 | "sha256:e4d45427c6e20a59bf4f88c639dcc03ce30d193112047f94012102f235853a58" 393 | ], 394 | "version": "==2.2.0" 395 | }, 396 | "python-dateutil": { 397 | "hashes": [ 398 | "sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0", 399 | "sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8" 400 | ], 401 | "version": "==2.7.3" 402 | }, 403 | "pytz": { 404 | "hashes": [ 405 | "sha256:65ae0c8101309c45772196b21b74c46b2e5d11b6275c45d251b150d5da334555", 406 | "sha256:c06425302f2cf668f1bba7a0a03f3c1d34d4ebeef2c72003da308b3947c7f749" 407 | ], 408 | "version": "==2018.4" 409 | }, 410 | "pyzmq": { 411 | "hashes": [ 412 | "sha256:2fb4d745ffe0a65ebf8fd29df093bb5c0ac96a506cb05b9a7b7c94b2524ae7f6", 413 | "sha256:b89268020a843d4c3cc04180577ec061fe96d35f267b0b672cb006e4d70560da", 414 | "sha256:d51eb3902d27d691483243707bfa67972167a70269bbbc172b74eeac4f780a1d", 415 | "sha256:e5578ae84bb94e97adadfcb00106a1cb161cb8017f89b01f6c3737f356257811", 416 | "sha256:4193cc666591495ab7fe8d24fa8374a35f9775f16dc7c46e03615559e1fc1855", 417 | "sha256:b328c538061757f627d32f7f8885c16f1d2f59f5374e057822f3c8e6cd94c41b", 418 | "sha256:18de8a02768b1c0b3495ac635b24bd902fafc08befb70a6e68c4d343ccbd6cbd", 419 | "sha256:fb983aec4bddee3680a0b7395f99e4595d70d81841370da736c5dc642bad4cd2", 420 | "sha256:ad5a8b19b6671b52d30ccfc3a0f4c600e49c4e2dcc88caf4106ed5958dec8d5e", 421 | "sha256:767e1d0b1f7fff1950127abc08c5a5af2754987bc6480c6d641bed6971278a7a", 422 | "sha256:c30d27c9b35285597b8ef3019f97b9b98457b053f65dcc87a90dfdd4db09ca78", 423 | "sha256:bdb12b485b3440b5193cd337d27cc126cdfc54ea9f38df237e1ead6216435cbe", 424 | "sha256:ba0b43aebf856e5e249250d74c1232d6600b6859328920d12e2ba72a565ab1b1", 425 | "sha256:630fb21f7474eb9e409a1ad476bf1ec489a69eb021172d422f2485cc3a44cd79", 426 | "sha256:6c3632d2c17cf03ce728ffaa328d45bb053623b3a0aa9747adcde81778d5a4d5", 427 | "sha256:538dfdd9542cf9ff37cd958da03b58d56b53b90800159ea07adc51a8ec7ffcb8", 428 | "sha256:613ac1fc4591b1c6a0a52ce3ed17dbffd6a17e985df504e8b4cdb987f97285b1", 429 | "sha256:a0ecf4c3eccd92f030a4e3e334b9da6fa3ee86be00249343c74e476d70567d0f", 430 | "sha256:863ec1bfa52da6eaa5c4aa59143eeaeb4ef7a076862407a548ec645f25e6d6df", 431 | "sha256:f35b4cdeffff79357a9d929daa2a8620fb362b2cbeebdc5dd2cf9fcd27c44821", 432 | "sha256:445fed4d71ac48da258ba38f2e29c88c5091124212a4004a0a6a42e6586a7de1", 433 | "sha256:b31f2b50ad2920f21b904f5edf66bee324e42bb978df1407ecf381b210d4678e", 434 | "sha256:0145ae59139b41f65e047a3a9ed11bbc36e37d5e96c64382fcdff911c4d8c3f0" 435 | ], 436 | "version": "==17.0.0" 437 | }, 438 | "requests": { 439 | "hashes": [ 440 | "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1", 441 | "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a" 442 | ], 443 | "version": "==2.19.1" 444 | }, 445 | "ruamel.yaml": { 446 | "hashes": [ 447 | "sha256:7aa829110c77f237d7041391db4501374d7b9672dce2fe49c805345f1712d7cf", 448 | "sha256:0bfd91e4c23a00871f83046db312dd5071fcc56be82228e5560440b85d73f197", 449 | "sha256:68413b9a849261cf0d904c11670f4153f93baf6ce1ec40c5b8c7a81b1c9130f3", 450 | "sha256:3d244bc3bb6adce091e20692ad82c465a6fb27a5df0c7e8c8005d5c51f0cbded", 451 | "sha256:ac65e8b668dde7a7804a039ee31f66e3b316dadeeaf506b0b6de1c51ff44ad81", 452 | "sha256:b3b17c95c542e3caf2c85db739fe8c69c4589c3d7fd8f59c1bc68891126eb351", 453 | "sha256:384cb3fbc0c7eb0166cc009e6410e295f25c663e22b0e93ec62b1dc0c7fddc4e", 454 | "sha256:345f8c276b1af138f243a560c4f21884d7c150559ab13e344070044c73a31ab5", 455 | "sha256:3908c5b389b1543118b8b85ad48299969cd5d1ed22b2a2d8c4d6d9a3f3cfa24f", 456 | "sha256:961e6e1f41054140466ab5479a4c5c20e569fc40ac5f7fbdde62f97993db8aec", 457 | "sha256:d1dc969b690c38928f1ea25a86b0288d2c0aa9cb6f549bedbcb69deef8da975c", 458 | "sha256:c239fd438cbfab8f0af02b931ee4205821a615659475c87983bc866590104141", 459 | "sha256:8ef2c81d03a18ac20bae4e6563cf6a547eac4f22147dd9ce91430f68b35561f1", 460 | "sha256:65e9496033a2592e342be5c5c7846a05504077f843fb52de7fff4bd85484cd99", 461 | "sha256:06bb1cd211704a90e4d8e9f66f9baa6345300106d8e156acbc4061d513d40201", 462 | "sha256:0fc8bafd7d3a54dfdb4bd8bd0ee6ed12100d8125149d818cbce9f0d694121bbe", 463 | "sha256:974e91b23273eb6c32aef979a32c20ed0f6c6e4d9c1523611ea10c5fda9b8928" 464 | ], 465 | "version": "==0.15.40" 466 | }, 467 | "scikit-learn": { 468 | "hashes": [ 469 | "sha256:3775cca4ce3f94508bb7c8a6b113044b78c16b0a30a5c169ddeb6b9fe57a8a72", 470 | "sha256:873245b03361710f47c5410a050dc56ee8ae97b9f8dcc6e3a81521ca2b64ad10", 471 | "sha256:370919e3148253fd6552496c33a1e3d78290a336fc8d1b9349d9e9770fae6ec0", 472 | "sha256:ce78bf4d10bd7e28807c36c6d2ab25a9934aaf80906ad987622a5e45627d91a2", 473 | "sha256:ba3fd442ae1a46830789b3578867daaf2c8409dcca6bf192e30e85beeabbfc2f", 474 | "sha256:a21cf8217e31a9e8e32c559246e05e6909981816152406945ae2e3e244dfcc1f", 475 | "sha256:e54a3dd1fe1f8124de90b93c48d120e6da2ea8df29b6895325df01ddc1bd8e26", 476 | "sha256:f9abae483f4d52acd6f660addb1b67e35dc5748655250af479de2ea6aefc6df0", 477 | "sha256:5c9ff456d67ef9094e5ea272fff2be05d399a47fc30c6c8ed653b94bdf787bd1", 478 | "sha256:871669cdb5b3481650fe3adff46eb97c455e30ecdc307eaf382ef90d4e2570ab", 479 | "sha256:d4da369614e55540c7e830ccdd17ab4fe5412ff8e803a4906d3ece393e2e3a63", 480 | "sha256:42f3c5bd893ed73bf47ccccf04dfb98fae743f397d688bb58c2238c0e6ec15d2", 481 | "sha256:95b155ef6bf829ddfba6026f100ba8e4218b7171ecab97b2163bc9e8d206848f", 482 | "sha256:72c194c5092e921d6107a8de8a5adae58c35bbc54e030ba624b6f02fd823bb21", 483 | "sha256:f528c4b2bba652cf116f5cccf36f4db95a7f9cbfcd1ee549c4e8d0f8628783b5", 484 | "sha256:d384e6f9a055b7a43492f9d27779adb717eb5dcf78b0603b01d0f070a608d241", 485 | "sha256:ee8c3b1898c728b6e5b5659c233f547700a1fea13ce876b6fe7d3434c70cc0e0", 486 | "sha256:56cfa19c31edf62e6414da0a337efee37a4af488b135640e67238786b9be6ab3", 487 | "sha256:5db9e68a384ce80a17fc449d4d5d9b45025fe17cf468429599bf404eccb51049", 488 | "sha256:8b17fc29554c5c98d88142f895516a5bec2b6b61daa815e1193a64c868ad53d2", 489 | "sha256:13136c6e4f6b808569f7f59299d439b2cd718f85d72ea14b5b6077d44ebc7d17", 490 | "sha256:ddc1eb10138ae93c136cc4b5945d3977f302b5d693592a4731b2805a7d7f2a74", 491 | "sha256:5ca0ad32ee04abe0d4ba02c8d89d501b4e5e0304bdf4d45c2e9875a735b323a0", 492 | "sha256:6e0899953611d0c47c0d49c5950082ab016b38811fced91cd2dcc889dd94f50a", 493 | "sha256:b2a10e2f9b73de10d8486f7a23549093436062b69139158802910a0f154aa53b", 494 | "sha256:a58746d4f389ea7df1d908dba8b52f709835f91c342f459a3ade5424330c69d1", 495 | "sha256:fdc39e89bd3466befb76dfc0c258d4ccad159df974954a87de3be5759172a067" 496 | ], 497 | "version": "==0.19.1" 498 | }, 499 | "scipy": { 500 | "hashes": [ 501 | "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463", 502 | "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a", 503 | "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01", 504 | "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692", 505 | "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a", 506 | "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552", 507 | "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020", 508 | "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd", 509 | "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40", 510 | "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631", 511 | "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac", 512 | "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae", 513 | "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a", 514 | "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7", 515 | "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37", 516 | "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3", 517 | "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2", 518 | "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40", 519 | "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5", 520 | "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559", 521 | "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3", 522 | "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f", 523 | "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1" 524 | ], 525 | "version": "==1.1.0" 526 | }, 527 | "send2trash": { 528 | "hashes": [ 529 | "sha256:f1691922577b6fa12821234aeb57599d887c4900b9ca537948d2dac34aea888b", 530 | "sha256:60001cc07d707fe247c94f74ca6ac0d3255aabcb930529690897ca2a39db28b2" 531 | ], 532 | "version": "==1.5.0" 533 | }, 534 | "simplegeneric": { 535 | "hashes": [ 536 | "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173" 537 | ], 538 | "version": "==0.8.1" 539 | }, 540 | "six": { 541 | "hashes": [ 542 | "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", 543 | "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" 544 | ], 545 | "version": "==1.11.0" 546 | }, 547 | "spglib": { 548 | "hashes": [ 549 | "sha256:96e3a7e8c1b0ec9792ce2cd80eb7ac3549a8b14c235a21a4cff9ca990ffef4af", 550 | "sha256:c34a506ea50a4dbbcbcb335887a7b5441f4d4fdb06d807c49b46df2f22ecea60", 551 | "sha256:6d43a7a0f8f5a11b30a91ac3d0b6abeed3afee6d875a0d1f12aa68329c30ebec", 552 | "sha256:d9671483c536b9f1c326997436edce2ce2c361ada185e2ccddc648a1f8eea469", 553 | "sha256:29d80b9b4a4acbed69328617897a6724fe0bd9d2c195c28f73fcd5675f629418", 554 | "sha256:b98c35b1bf868eee4e5cc829e7692157dc19bbb1cff23bbef9de57cbf9eab306", 555 | "sha256:8060708aca7346488841482a9db51b1c99ea3a7b81e5852d7d9fa0f9032dd2eb", 556 | "sha256:8606242bdb8d2683f5be8a4e82bfd4446507e4266ab5f52f482f5c14ee14544c", 557 | "sha256:55b49227835396b2bcd6afe724e9f37202ad0f61e273bedebd5bf740bad2e8e3" 558 | ], 559 | "version": "==1.10.3.65" 560 | }, 561 | "sympy": { 562 | "hashes": [ 563 | "sha256:ac5b57691bc43919dcc21167660a57cc51797c28a4301a6144eff07b751216a4" 564 | ], 565 | "version": "==1.1.1" 566 | }, 567 | "tabulate": { 568 | "hashes": [ 569 | "sha256:e4ca13f26d0a6be2a2915428dc21e732f1e44dad7f76d7030b2ef1ec251cf7f2" 570 | ], 571 | "version": "==0.8.2" 572 | }, 573 | "terminado": { 574 | "hashes": [ 575 | "sha256:65011551baff97f5414c67018e908110693143cfbaeb16831b743fe7cad8b927", 576 | "sha256:55abf9ade563b8f9be1f34e4233c7b7bde726059947a593322e8a553cc4c067a" 577 | ], 578 | "version": "==0.8.1" 579 | }, 580 | "testpath": { 581 | "hashes": [ 582 | "sha256:039fa6a6c9fd3488f8336d23aebbfead5fa602c4a47d49d83845f55a595ec1b4", 583 | "sha256:0d5337839c788da5900df70f8e01015aec141aa3fe7936cb0d0a2953f7ac7609" 584 | ], 585 | "version": "==0.3.1" 586 | }, 587 | "tornado": { 588 | "hashes": [ 589 | "sha256:88ce0282cce70df9045e515f578c78f1ebc35dcabe1d70f800c3583ebda7f5f5", 590 | "sha256:ba9fbb249ac5390bff8a1d6aa4b844fd400701069bda7d2e380dfe2217895101", 591 | "sha256:408d129e9d13d3c55aa73f8084aa97d5f90ed84132e38d6932e63a67d5bec563", 592 | "sha256:c050089173c2e9272244bccfb6a8615fb9e53b79420a5551acfa76094ecc3111", 593 | "sha256:1b83d5c10550f2653380b4c77331d6f8850f287c4f67d7ce1e1c639d9222fbc7" 594 | ], 595 | "version": "==5.0.2" 596 | }, 597 | "traitlets": { 598 | "hashes": [ 599 | "sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9", 600 | "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835" 601 | ], 602 | "version": "==4.3.2" 603 | }, 604 | "urllib3": { 605 | "hashes": [ 606 | "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5", 607 | "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf" 608 | ], 609 | "version": "==1.23" 610 | }, 611 | "wcwidth": { 612 | "hashes": [ 613 | "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c", 614 | "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e" 615 | ], 616 | "version": "==0.1.7" 617 | }, 618 | "webencodings": { 619 | "hashes": [ 620 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", 621 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" 622 | ], 623 | "version": "==0.5.1" 624 | } 625 | }, 626 | "develop": {} 627 | } 628 | -------------------------------------------------------------------------------- /notebooks/3-predicting-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Machine Learning with Python\n", 8 | "\n", 9 | "We now have our data. We have sanitized it into a csv format. We have explored it.\n", 10 | "\n", 11 | "Now lets try to predict some properties." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "\n", 22 | "import matplotlib.pyplot as plt" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "## Load the data\n", 32 | "# df = pd.read_csv('../data/mpdata.csv')\n", 33 | "df = pd.read_csv('https://gitlab.com/costrouc/mse-machinelearning-notebooks/raw/master/data/mpdata.csv')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | "
material_idenergyvolumensitesenergy_per_atompretty_formulaspacegroupband_gapdensitytotal_magnetizationpoisson_ratiobulk_modulus_voigtbulk_modulus_reussbulk_modulus_vrhshear_modulus_voigtshear_modulus_vrh
2621mp-559777-372.248991732.10601656-6.647303Na5Ca2Al(PO4)494.64962.730746-8.000000e-07NaNNaNNaNNaNNaNNaN
4063mp-603327-355.351207654.01937464-5.552363H3SNO3615.18851.9721481.312010e-02NaNNaNNaNNaNNaNNaN
5443mp-559382-17.05370733.4821243-5.684569CoO21640.00004.5097549.997688e-01NaNNaNNaNNaNNaNNaN
4616mp-558564-281.494573681.18521136-7.819294SiO2125.51131.757625-1.510000e-05NaNNaNNaNNaNNaNNaN
2883mp-667374-1188.8764402214.927434168-7.076645NaAlSiO41694.54142.5559691.502800e-03NaNNaNNaNNaNNaNNaN
\n", 178 | "
" 179 | ], 180 | "text/plain": [ 181 | " material_id energy volume nsites energy_per_atom \\\n", 182 | "2621 mp-559777 -372.248991 732.106016 56 -6.647303 \n", 183 | "4063 mp-603327 -355.351207 654.019374 64 -5.552363 \n", 184 | "5443 mp-559382 -17.053707 33.482124 3 -5.684569 \n", 185 | "4616 mp-558564 -281.494573 681.185211 36 -7.819294 \n", 186 | "2883 mp-667374 -1188.876440 2214.927434 168 -7.076645 \n", 187 | "\n", 188 | " pretty_formula spacegroup band_gap density total_magnetization \\\n", 189 | "2621 Na5Ca2Al(PO4)4 9 4.6496 2.730746 -8.000000e-07 \n", 190 | "4063 H3SNO3 61 5.1885 1.972148 1.312010e-02 \n", 191 | "5443 CoO2 164 0.0000 4.509754 9.997688e-01 \n", 192 | "4616 SiO2 12 5.5113 1.757625 -1.510000e-05 \n", 193 | "2883 NaAlSiO4 169 4.5414 2.555969 1.502800e-03 \n", 194 | "\n", 195 | " poisson_ratio bulk_modulus_voigt bulk_modulus_reuss bulk_modulus_vrh \\\n", 196 | "2621 NaN NaN NaN NaN \n", 197 | "4063 NaN NaN NaN NaN \n", 198 | "5443 NaN NaN NaN NaN \n", 199 | "4616 NaN NaN NaN NaN \n", 200 | "2883 NaN NaN NaN NaN \n", 201 | "\n", 202 | " shear_modulus_voigt shear_modulus_vrh \n", 203 | "2621 NaN NaN \n", 204 | "4063 NaN NaN \n", 205 | "5443 NaN NaN \n", 206 | "4616 NaN NaN \n", 207 | "2883 NaN NaN " 208 | ] 209 | }, 210 | "execution_count": 3, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "df.sample(5)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 4, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/html": [ 227 | "
\n", 228 | "\n", 241 | "\n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | "
energyvolumensitesenergy_per_atomspacegroupband_gapdensitytotal_magnetizationpoisson_ratiobulk_modulus_voigtbulk_modulus_reussbulk_modulus_vrhshear_modulus_voigtshear_modulus_vrh
energy1.000000-0.852020-0.9651980.3451710.176241-0.3787380.185483-0.1219210.045966-0.080688-0.059895-0.071393-0.128289-0.056726
volume-0.8520201.0000000.862495-0.110264-0.1623440.302136-0.3093430.0341270.000059-0.208067-0.187330-0.201073-0.110852-0.052474
nsites-0.9651980.8624951.000000-0.194668-0.2172660.371992-0.2482810.095329-0.032862-0.032160-0.028786-0.0309920.0423270.017493
energy_per_atom0.345171-0.110264-0.1946681.000000-0.038482-0.221015-0.334075-0.1715080.051704-0.565429-0.465262-0.523804-0.434698-0.211633
spacegroup0.176241-0.162344-0.217266-0.0384821.000000-0.0936780.250417-0.0655600.0284440.1956540.2059960.2044830.1138080.058407
band_gap-0.3787380.3021360.371992-0.221015-0.0936781.000000-0.421409-0.220998-0.068310-0.266141-0.249368-0.262229-0.0366940.000743
density0.185483-0.309343-0.248281-0.3340750.250417-0.4214091.0000000.3221210.0655130.5018740.5380170.5294850.1654600.086553
total_magnetization-0.1219210.0341270.095329-0.171508-0.065560-0.2209980.3221211.0000000.0230070.0801650.0895850.086458-0.006023-0.031027
poisson_ratio0.0459660.000059-0.0328620.0517040.028444-0.0683100.0655130.0230071.000000-0.019499-0.022243-0.021264-0.1434810.082642
bulk_modulus_voigt-0.080688-0.208067-0.032160-0.5654290.195654-0.2661410.5018740.080165-0.0194991.0000000.9304990.9819580.6760280.325090
bulk_modulus_reuss-0.059895-0.187330-0.028786-0.4652620.205996-0.2493680.5380170.089585-0.0222430.9304991.0000000.9829770.5982730.312095
bulk_modulus_vrh-0.071393-0.201073-0.030992-0.5238040.204483-0.2622290.5294850.086458-0.0212640.9819580.9829771.0000000.6479450.324180
shear_modulus_voigt-0.128289-0.1108520.042327-0.4346980.113808-0.0366940.165460-0.006023-0.1434810.6760280.5982730.6479451.0000000.460951
shear_modulus_vrh-0.056726-0.0524740.017493-0.2116330.0584070.0007430.086553-0.0310270.0826420.3250900.3120950.3241800.4609511.000000
\n", 502 | "
" 503 | ], 504 | "text/plain": [ 505 | " energy volume nsites energy_per_atom \\\n", 506 | "energy 1.000000 -0.852020 -0.965198 0.345171 \n", 507 | "volume -0.852020 1.000000 0.862495 -0.110264 \n", 508 | "nsites -0.965198 0.862495 1.000000 -0.194668 \n", 509 | "energy_per_atom 0.345171 -0.110264 -0.194668 1.000000 \n", 510 | "spacegroup 0.176241 -0.162344 -0.217266 -0.038482 \n", 511 | "band_gap -0.378738 0.302136 0.371992 -0.221015 \n", 512 | "density 0.185483 -0.309343 -0.248281 -0.334075 \n", 513 | "total_magnetization -0.121921 0.034127 0.095329 -0.171508 \n", 514 | "poisson_ratio 0.045966 0.000059 -0.032862 0.051704 \n", 515 | "bulk_modulus_voigt -0.080688 -0.208067 -0.032160 -0.565429 \n", 516 | "bulk_modulus_reuss -0.059895 -0.187330 -0.028786 -0.465262 \n", 517 | "bulk_modulus_vrh -0.071393 -0.201073 -0.030992 -0.523804 \n", 518 | "shear_modulus_voigt -0.128289 -0.110852 0.042327 -0.434698 \n", 519 | "shear_modulus_vrh -0.056726 -0.052474 0.017493 -0.211633 \n", 520 | "\n", 521 | " spacegroup band_gap density total_magnetization \\\n", 522 | "energy 0.176241 -0.378738 0.185483 -0.121921 \n", 523 | "volume -0.162344 0.302136 -0.309343 0.034127 \n", 524 | "nsites -0.217266 0.371992 -0.248281 0.095329 \n", 525 | "energy_per_atom -0.038482 -0.221015 -0.334075 -0.171508 \n", 526 | "spacegroup 1.000000 -0.093678 0.250417 -0.065560 \n", 527 | "band_gap -0.093678 1.000000 -0.421409 -0.220998 \n", 528 | "density 0.250417 -0.421409 1.000000 0.322121 \n", 529 | "total_magnetization -0.065560 -0.220998 0.322121 1.000000 \n", 530 | "poisson_ratio 0.028444 -0.068310 0.065513 0.023007 \n", 531 | "bulk_modulus_voigt 0.195654 -0.266141 0.501874 0.080165 \n", 532 | "bulk_modulus_reuss 0.205996 -0.249368 0.538017 0.089585 \n", 533 | "bulk_modulus_vrh 0.204483 -0.262229 0.529485 0.086458 \n", 534 | "shear_modulus_voigt 0.113808 -0.036694 0.165460 -0.006023 \n", 535 | "shear_modulus_vrh 0.058407 0.000743 0.086553 -0.031027 \n", 536 | "\n", 537 | " poisson_ratio bulk_modulus_voigt bulk_modulus_reuss \\\n", 538 | "energy 0.045966 -0.080688 -0.059895 \n", 539 | "volume 0.000059 -0.208067 -0.187330 \n", 540 | "nsites -0.032862 -0.032160 -0.028786 \n", 541 | "energy_per_atom 0.051704 -0.565429 -0.465262 \n", 542 | "spacegroup 0.028444 0.195654 0.205996 \n", 543 | "band_gap -0.068310 -0.266141 -0.249368 \n", 544 | "density 0.065513 0.501874 0.538017 \n", 545 | "total_magnetization 0.023007 0.080165 0.089585 \n", 546 | "poisson_ratio 1.000000 -0.019499 -0.022243 \n", 547 | "bulk_modulus_voigt -0.019499 1.000000 0.930499 \n", 548 | "bulk_modulus_reuss -0.022243 0.930499 1.000000 \n", 549 | "bulk_modulus_vrh -0.021264 0.981958 0.982977 \n", 550 | "shear_modulus_voigt -0.143481 0.676028 0.598273 \n", 551 | "shear_modulus_vrh 0.082642 0.325090 0.312095 \n", 552 | "\n", 553 | " bulk_modulus_vrh shear_modulus_voigt shear_modulus_vrh \n", 554 | "energy -0.071393 -0.128289 -0.056726 \n", 555 | "volume -0.201073 -0.110852 -0.052474 \n", 556 | "nsites -0.030992 0.042327 0.017493 \n", 557 | "energy_per_atom -0.523804 -0.434698 -0.211633 \n", 558 | "spacegroup 0.204483 0.113808 0.058407 \n", 559 | "band_gap -0.262229 -0.036694 0.000743 \n", 560 | "density 0.529485 0.165460 0.086553 \n", 561 | "total_magnetization 0.086458 -0.006023 -0.031027 \n", 562 | "poisson_ratio -0.021264 -0.143481 0.082642 \n", 563 | "bulk_modulus_voigt 0.981958 0.676028 0.325090 \n", 564 | "bulk_modulus_reuss 0.982977 0.598273 0.312095 \n", 565 | "bulk_modulus_vrh 1.000000 0.647945 0.324180 \n", 566 | "shear_modulus_voigt 0.647945 1.000000 0.460951 \n", 567 | "shear_modulus_vrh 0.324180 0.460951 1.000000 " 568 | ] 569 | }, 570 | "execution_count": 4, 571 | "metadata": {}, 572 | "output_type": "execute_result" 573 | } 574 | ], 575 | "source": [ 576 | "df.corr()" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": 5, 582 | "metadata": {}, 583 | "outputs": [ 584 | { 585 | "data": { 586 | "text/plain": [ 587 | "" 588 | ] 589 | }, 590 | "execution_count": 5, 591 | "metadata": {}, 592 | "output_type": "execute_result" 593 | }, 594 | { 595 | "data": { 596 | "image/png": "\n", 597 | "text/plain": [ 598 | "
" 599 | ] 600 | }, 601 | "metadata": {}, 602 | "output_type": "display_data" 603 | } 604 | ], 605 | "source": [ 606 | "plt.matshow(df.corr())\n", 607 | "plt.colorbar()" 608 | ] 609 | }, 610 | { 611 | "cell_type": "markdown", 612 | "metadata": {}, 613 | "source": [ 614 | "# Lets choose a very simple example to show methodology\n", 615 | "\n", 616 | "How about we try to predict the `energy_per_atom`. You can see from the correlation plot that there are two very highly correlated values in purple.\n", 617 | "\n", 618 | "We will simplify our model and only use the first four columns. Obviously `volume` is not usefull in the calculation but we want to see if our algorithm can automatically determine this." 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": 6, 624 | "metadata": {}, 625 | "outputs": [], 626 | "source": [ 627 | "simplified_df = df[['energy', 'volume', 'nsites', 'energy_per_atom']]" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 7, 633 | "metadata": {}, 634 | "outputs": [ 635 | { 636 | "data": { 637 | "text/html": [ 638 | "
\n", 639 | "\n", 652 | "\n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | "
energyvolumensitesenergy_per_atom
0-4.06460011.8527651-4.064600
1-16.38209647.2641584-4.095524
2-8.18695923.6173882-4.093479
3-4.06414211.8747031-4.064142
4-2.157191603.4752101-2.157191
\n", 700 | "
" 701 | ], 702 | "text/plain": [ 703 | " energy volume nsites energy_per_atom\n", 704 | "0 -4.064600 11.852765 1 -4.064600\n", 705 | "1 -16.382096 47.264158 4 -4.095524\n", 706 | "2 -8.186959 23.617388 2 -4.093479\n", 707 | "3 -4.064142 11.874703 1 -4.064142\n", 708 | "4 -2.157191 603.475210 1 -2.157191" 709 | ] 710 | }, 711 | "execution_count": 7, 712 | "metadata": {}, 713 | "output_type": "execute_result" 714 | } 715 | ], 716 | "source": [ 717 | "simplified_df.head(5)" 718 | ] 719 | }, 720 | { 721 | "cell_type": "markdown", 722 | "metadata": {}, 723 | "source": [ 724 | "All (99%) of machine learning algoritms need the data as arrays of floating point numbers. Scikit learn is no different. This is how easy it is to convery a pandas dataframe from a numpy array.\n", 725 | "\n", 726 | "Not covered here but you most likely will need it at one point [preprocessing data](http://scikit-learn.org/stable/modules/preprocessing.html) and how to handle categorical data." 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": 8, 732 | "metadata": {}, 733 | "outputs": [ 734 | { 735 | "name": "stdout", 736 | "output_type": "stream", 737 | "text": [ 738 | "(6928, 3) (6928,)\n", 739 | "[[ -4.0645998 11.85276501 1. ]\n", 740 | " [-16.38209642 47.26415795 4. ]\n", 741 | " [ -8.18695876 23.61738783 2. ]] [-4.0645998 -4.0955241 -4.09347938]\n" 742 | ] 743 | } 744 | ], 745 | "source": [ 746 | "# convert from pandas dataframe to numpy array\n", 747 | "X = simplified_df[['energy', 'volume', 'nsites']].values\n", 748 | "y = simplified_df['energy_per_atom'].values\n", 749 | "\n", 750 | "print(X.shape, y.shape)\n", 751 | "print(X[:3], y[:3])" 752 | ] 753 | }, 754 | { 755 | "cell_type": "markdown", 756 | "metadata": {}, 757 | "source": [ 758 | "## Scikit Learn\n", 759 | "\n", 760 | "Very quick overview. [Scikit](http://scikit-learn.org/stable/) learn provides a unified framework for working with machine learning algorithms. It includes classification, regression, clustering, dimensionality reduction, model tuning, pre and post processing of data.\n", 761 | "\n", 762 | "Is that a lot? **YES** scikit learn is huge and you cannot expect to use and learn everything.\n", 763 | "\n", 764 | "The flow chart gives some good advice for which algorithms to use for your problem. See their [flow chart](http://scikit-learn.org/stable/tutorial/machine_learning_map/index.html)\n", 765 | "\n", 766 | "![sklearn flowchart](../images/scklearn-flowchart.png)\n", 767 | "\n", 768 | "There are a ton of algoritms over 100! This is where sklearn really shines. All algorithms have the exact same api (this is the pseudocode).\n", 769 | "\n", 770 | "```python\n", 771 | "from sklearn import MyImportantModel\n", 772 | "\n", 773 | "model = MyImportantModel()\n", 774 | "model.fit(X, y)\n", 775 | "```\n", 776 | "\n", 777 | "Once you have `fit` your model you can using is to predict future data.\n", 778 | "\n", 779 | "```python\n", 780 | "y_predict = model.predict(X_predict)\n", 781 | "```\n", 782 | "\n", 783 | "We will be using a linear model to fit our data. Always start with the simplest model! That way you know what sort of improvement a complex one can get you.\n", 784 | "\n", 785 | "[sklearn.LinearRegression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html)" 786 | ] 787 | }, 788 | { 789 | "cell_type": "code", 790 | "execution_count": 9, 791 | "metadata": {}, 792 | "outputs": [], 793 | "source": [ 794 | "# Lets use a simple linear model\n", 795 | "from sklearn.linear_model import LinearRegression\n", 796 | "\n", 797 | "model = LinearRegression()" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": 10, 803 | "metadata": {}, 804 | "outputs": [ 805 | { 806 | "data": { 807 | "image/png": "\n", 808 | "text/plain": [ 809 | "
" 810 | ] 811 | }, 812 | "metadata": {}, 813 | "output_type": "display_data" 814 | } 815 | ], 816 | "source": [ 817 | "from sklearn.model_selection import cross_val_predict\n", 818 | "\n", 819 | "predicted = cross_val_predict(model, X, y, cv=10)\n", 820 | "\n", 821 | "fig, ax = plt.subplots()\n", 822 | "ax.scatter(y, predicted, edgecolors=(0, 0, 0))\n", 823 | "ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\n", 824 | "ax.set_xlabel('Measured')\n", 825 | "ax.set_ylabel('Predicted')\n", 826 | "plt.show()" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": 13, 832 | "metadata": {}, 833 | "outputs": [], 834 | "source": [ 835 | "# lest do the cross validation by hand\n", 836 | "import sklearn" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 14, 842 | "metadata": {}, 843 | "outputs": [], 844 | "source": [ 845 | "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.1)" 846 | ] 847 | }, 848 | { 849 | "cell_type": "code", 850 | "execution_count": 15, 851 | "metadata": {}, 852 | "outputs": [ 853 | { 854 | "data": { 855 | "text/plain": [ 856 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" 857 | ] 858 | }, 859 | "execution_count": 15, 860 | "metadata": {}, 861 | "output_type": "execute_result" 862 | } 863 | ], 864 | "source": [ 865 | "model = LinearRegression()\n", 866 | "model.fit(X_train, y_train)" 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": 16, 872 | "metadata": {}, 873 | "outputs": [ 874 | { 875 | "data": { 876 | "text/plain": [ 877 | "1.5827284267819932" 878 | ] 879 | }, 880 | "execution_count": 16, 881 | "metadata": {}, 882 | "output_type": "execute_result" 883 | } 884 | ], 885 | "source": [ 886 | "y_predict = model.predict(X_test)\n", 887 | "\n", 888 | "# calculate mean square error\n", 889 | "sklearn.metrics.mean_squared_error(y_test, y_predict)" 890 | ] 891 | }, 892 | { 893 | "cell_type": "code", 894 | "execution_count": null, 895 | "metadata": {}, 896 | "outputs": [], 897 | "source": [] 898 | } 899 | ], 900 | "metadata": { 901 | "kernelspec": { 902 | "display_name": "Python 3", 903 | "language": "python", 904 | "name": "python3" 905 | }, 906 | "language_info": { 907 | "codemirror_mode": { 908 | "name": "ipython", 909 | "version": 3 910 | }, 911 | "file_extension": ".py", 912 | "mimetype": "text/x-python", 913 | "name": "python", 914 | "nbconvert_exporter": "python", 915 | "pygments_lexer": "ipython3", 916 | "version": "3.6.5" 917 | } 918 | }, 919 | "nbformat": 4, 920 | "nbformat_minor": 2 921 | } 922 | -------------------------------------------------------------------------------- /notebooks/2-explore-sanitize-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sanitizing Data\n", 8 | "\n", 9 | "Often data is messy. In our case we have two problems with the data.\n", 10 | "\n", 11 | " - values are missing\n", 12 | " - not in a format easy to do machine learning on.\n", 13 | " \n", 14 | "Machine Learning algorithms and in general all statistics algorithms like arrays." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 116, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import json\n", 24 | "from collections import defaultdict\n", 25 | "import re\n", 26 | "\n", 27 | "import pandas as pd\n", 28 | "import numpy as np\n", 29 | "import requests\n", 30 | "\n", 31 | "def convert_dict_to_pandas_frame(data, dict_frames):\n", 32 | " data_columns = defaultdict(list)\n", 33 | " for d in data.values():\n", 34 | " for key in dict_frames:\n", 35 | " subkey = d\n", 36 | " subkeys = dict_frames[key].split('.')\n", 37 | " for k in subkeys:\n", 38 | " if re.match(r'\\d+', k) and subkey is not None:\n", 39 | " index = int(k)\n", 40 | " if index >= len(subkey):\n", 41 | " subkey = None\n", 42 | " else:\n", 43 | " subkey = subkey[index]\n", 44 | " else:\n", 45 | " if subkey is not None:\n", 46 | " subkey = subkey.get(k)\n", 47 | " data_columns[key].append(subkey)\n", 48 | " df = pd.DataFrame.from_dict(data_columns)\n", 49 | " return df.set_index('material_id')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 117, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "Number of materials 6928\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "## data will not be available locally so we need to download it\n", 67 | "# data = json.load(open('../data/mpdata.json'))\n", 68 | "data = requests.get('https://gitlab.com/costrouc/mse-machinelearning-notebooks/raw/master/data/mpdata.json').json()\n", 69 | "print('Number of materials', len(data))" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "If we look at one of the materials we can see that the data is not in the best format." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/plain": [ 87 | "{'energy': -4.0645998,\n", 88 | " 'energy_per_atom': -4.0645998,\n", 89 | " 'volume': 11.852765009390795,\n", 90 | " 'formation_energy_per_atom': 0.03469247000000042,\n", 91 | " 'nsites': 1,\n", 92 | " 'unit_cell_formula': {'Cu': 1.0},\n", 93 | " 'pretty_formula': 'Cu',\n", 94 | " 'is_hubbard': False,\n", 95 | " 'elements': ['Cu'],\n", 96 | " 'nelements': 1,\n", 97 | " 'e_above_hull': 0.03469247000000042,\n", 98 | " 'hubbards': {},\n", 99 | " 'is_compatible': True,\n", 100 | " 'spacegroup': {'source': 'spglib',\n", 101 | " 'symbol': 'Im-3m',\n", 102 | " 'number': 229,\n", 103 | " 'point_group': 'm-3m',\n", 104 | " 'crystal_system': 'cubic',\n", 105 | " 'hall': '-I 4 2 3'},\n", 106 | " 'task_ids': ['mp-998890',\n", 107 | " 'mp-998895',\n", 108 | " 'mp-998898',\n", 109 | " 'mp-998906',\n", 110 | " 'mp-1056211',\n", 111 | " 'mp-1056219',\n", 112 | " 'mp-1056226',\n", 113 | " 'mp-1056233'],\n", 114 | " 'band_gap': 0.0,\n", 115 | " 'density': 8.902615866613178,\n", 116 | " 'icsd_id': None,\n", 117 | " 'icsd_ids': [183263],\n", 118 | " 'cif': \"# generated using pymatgen\\ndata_Cu\\n_symmetry_space_group_name_H-M 'P 1'\\n_cell_length_a 2.48779079\\n_cell_length_b 2.48779079\\n_cell_length_c 2.48779079\\n_cell_angle_alpha 109.47122063\\n_cell_angle_beta 109.47122063\\n_cell_angle_gamma 109.47122063\\n_symmetry_Int_Tables_number 1\\n_chemical_formula_structural Cu\\n_chemical_formula_sum Cu1\\n_cell_volume 11.85276501\\n_cell_formula_units_Z 1\\nloop_\\n _symmetry_equiv_pos_site_id\\n _symmetry_equiv_pos_as_xyz\\n 1 'x, y, z'\\nloop_\\n _atom_site_type_symbol\\n _atom_site_label\\n _atom_site_symmetry_multiplicity\\n _atom_site_fract_x\\n _atom_site_fract_y\\n _atom_site_fract_z\\n _atom_site_occupancy\\n Cu Cu1 1 0.000000 0.000000 0.000000 1\\n\",\n", 119 | " 'total_magnetization': -7e-07,\n", 120 | " 'material_id': 'mp-998890',\n", 121 | " 'oxide_type': 'None',\n", 122 | " 'tags': ['Copper', 'High pressure experimental phase'],\n", 123 | " 'elasticity': {'G_Reuss': -17.960241741427517,\n", 124 | " 'G_VRH': 19.06895589595666,\n", 125 | " 'G_Voigt': 56.098153533340835,\n", 126 | " 'G_Voigt_Reuss_Hill': 19.06895589595666,\n", 127 | " 'K_Reuss': 145.87229552135727,\n", 128 | " 'K_VRH': 145.87229552135733,\n", 129 | " 'K_Voigt': 145.87229552135736,\n", 130 | " 'K_Voigt_Reuss_Hill': 145.87229552135733,\n", 131 | " 'elastic_anisotropy': -20.617315830427692,\n", 132 | " 'elastic_tensor': [[137.0, 150.0, 150.0, -0.0, 0.0, 0.0],\n", 133 | " [150.0, 137.0, 150.0, 0.0, -0.0, -0.0],\n", 134 | " [150.0, 150.0, 137.0, -0.0, 0.0, -0.0],\n", 135 | " [-0.0, 0.0, -0.0, 98.0, 0.0, 0.0],\n", 136 | " [0.0, -0.0, 0.0, 0.0, 98.0, 0.0],\n", 137 | " [0.0, -0.0, -0.0, 0.0, 0.0, 98.0]],\n", 138 | " 'homogeneous_poisson': 0.43736737340081105,\n", 139 | " 'poisson_ratio': 0.43736737340081105,\n", 140 | " 'universal_anisotropy': -20.617315830427692,\n", 141 | " 'elastic_tensor_original': [[137.23435775103798,\n", 142 | " 150.17225544048853,\n", 143 | " 150.20736652133866,\n", 144 | " 0.0,\n", 145 | " 0.00570621333333458,\n", 146 | " -0.0016961700000014818],\n", 147 | " [150.17956449887507,\n", 148 | " 137.23059429293244,\n", 149 | " 150.20736652133866,\n", 150 | " 0.0011605700000007649,\n", 151 | " 0.00553183666666768,\n", 152 | " -0.001712109999999591],\n", 153 | " [150.17956449887507,\n", 154 | " 150.17225544048853,\n", 155 | " 137.26733472684245,\n", 156 | " 0.0,\n", 157 | " 0.0056197499999995045,\n", 158 | " -0.0018734200000007824],\n", 159 | " [0.0, 0.0, 0.0, 97.81118299333336, 0.0, 0.0],\n", 160 | " [0.0, 0.0, 0.0, 0.0, 97.81105978333335, 0.0],\n", 161 | " [0.0, 0.0, 0.0, 0.0, 0.0, 97.81082478666669]],\n", 162 | " 'nsites': 1,\n", 163 | " 'compliance_tensor': [[-50.7, 26.5, 26.5, -0.0, -0.0, 0.0],\n", 164 | " [26.5, -50.7, 26.5, 0.0, -0.0, -0.0],\n", 165 | " [26.5, 26.5, -50.7, -0.0, 0.0, -0.0],\n", 166 | " [-0.0, 0.0, -0.0, 10.2, 0.0, 0.0],\n", 167 | " [-0.0, -0.0, 0.0, 0.0, 10.2, 0.0],\n", 168 | " [0.0, -0.0, -0.0, 0.0, 0.0, 10.2]]},\n", 169 | " 'full_formula': 'Cu1'}" 170 | ] 171 | }, 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "output_type": "execute_result" 175 | } 176 | ], 177 | "source": [ 178 | "first_key = next(iter(data))\n", 179 | "data[first_key]" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "Always aim to convert your data to a tabular form first" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 118, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "key_map = {\n", 196 | " 'material_id': 'material_id',\n", 197 | " 'energy': 'energy',\n", 198 | " 'volume': 'volume',\n", 199 | " 'nsites': 'nsites',\n", 200 | " 'energy_per_atom': 'energy_per_atom',\n", 201 | " 'pretty_formula': 'pretty_formula',\n", 202 | " 'spacegroup': 'spacegroup.number',\n", 203 | " 'band_gap': 'band_gap',\n", 204 | " 'density': 'density',\n", 205 | " 'total_magnetization': 'total_magnetization',\n", 206 | " # Elacticity\n", 207 | " 'poisson_ratio': 'elasticity.poisson_ratio',\n", 208 | " 'bulk_modulus_voigt': 'elasticity.K_Voigt',\n", 209 | " 'bulk_modulus_reuss': 'elasticity.K_Reuss',\n", 210 | " 'bulk_modulus_vrh': 'elasticity.K_VRH',\n", 211 | " 'shear_modulus_voigt': 'elasticity.G_Voigt',\n", 212 | " 'shear_modulus_vrh': 'elasticity.G_VRH'\n", 213 | "}\n", 214 | "\n", 215 | "df = convert_dict_to_pandas_frame(data, key_map)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 119, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "\n", 228 | "Index: 6928 entries, mp-998890 to mp-30883\n", 229 | "Data columns (total 15 columns):\n", 230 | "energy 6928 non-null float64\n", 231 | "volume 6928 non-null float64\n", 232 | "nsites 6928 non-null int64\n", 233 | "energy_per_atom 6928 non-null float64\n", 234 | "pretty_formula 6928 non-null object\n", 235 | "spacegroup 6928 non-null int64\n", 236 | "band_gap 6928 non-null float64\n", 237 | "density 6928 non-null float64\n", 238 | "total_magnetization 6928 non-null float64\n", 239 | "poisson_ratio 896 non-null float64\n", 240 | "bulk_modulus_voigt 896 non-null float64\n", 241 | "bulk_modulus_reuss 896 non-null float64\n", 242 | "bulk_modulus_vrh 896 non-null float64\n", 243 | "shear_modulus_voigt 896 non-null float64\n", 244 | "shear_modulus_vrh 896 non-null float64\n", 245 | "dtypes: float64(12), int64(2), object(1)\n", 246 | "memory usage: 866.0+ KB\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "df.info()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 120, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/html": [ 262 | "
\n", 263 | "\n", 276 | "\n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | "
energyvolumensitesenergy_per_atompretty_formulaspacegroupband_gapdensitytotal_magnetizationpoisson_ratiobulk_modulus_voigtbulk_modulus_reussbulk_modulus_vrhshear_modulus_voigtshear_modulus_vrh
material_id
mvc-15868-102.229807182.56240020-5.111490ZnCoO3620.00006.2702504.401870NaNNaNNaNNaNNaNNaN
mp-714908-278.665566451.64541642-6.634894Fe10O1120.50455.4005732.000009NaNNaNNaNNaNNaNNaN
mp-2327-58.231276518.17163234-1.712685TiZn16630.00007.0143250.0000330.34179379.29482778.59976478.94729529.01469727.925264
mvc-12563-183.252030324.83637628-6.544715Mn2ZnO4590.29724.89278032.000000NaNNaNNaNNaNNaNNaN
mp-1038987-19.747382310.75306312-1.645615CaMg5380.00001.727083-0.000737NaNNaNNaNNaNNaNNaN
\n", 408 | "
" 409 | ], 410 | "text/plain": [ 411 | " energy volume nsites energy_per_atom pretty_formula \\\n", 412 | "material_id \n", 413 | "mvc-15868 -102.229807 182.562400 20 -5.111490 ZnCoO3 \n", 414 | "mp-714908 -278.665566 451.645416 42 -6.634894 Fe10O11 \n", 415 | "mp-2327 -58.231276 518.171632 34 -1.712685 TiZn16 \n", 416 | "mvc-12563 -183.252030 324.836376 28 -6.544715 Mn2ZnO4 \n", 417 | "mp-1038987 -19.747382 310.753063 12 -1.645615 CaMg5 \n", 418 | "\n", 419 | " spacegroup band_gap density total_magnetization \\\n", 420 | "material_id \n", 421 | "mvc-15868 62 0.0000 6.270250 4.401870 \n", 422 | "mp-714908 2 0.5045 5.400573 2.000009 \n", 423 | "mp-2327 63 0.0000 7.014325 0.000033 \n", 424 | "mvc-12563 59 0.2972 4.892780 32.000000 \n", 425 | "mp-1038987 38 0.0000 1.727083 -0.000737 \n", 426 | "\n", 427 | " poisson_ratio bulk_modulus_voigt bulk_modulus_reuss \\\n", 428 | "material_id \n", 429 | "mvc-15868 NaN NaN NaN \n", 430 | "mp-714908 NaN NaN NaN \n", 431 | "mp-2327 0.341793 79.294827 78.599764 \n", 432 | "mvc-12563 NaN NaN NaN \n", 433 | "mp-1038987 NaN NaN NaN \n", 434 | "\n", 435 | " bulk_modulus_vrh shear_modulus_voigt shear_modulus_vrh \n", 436 | "material_id \n", 437 | "mvc-15868 NaN NaN NaN \n", 438 | "mp-714908 NaN NaN NaN \n", 439 | "mp-2327 78.947295 29.014697 27.925264 \n", 440 | "mvc-12563 NaN NaN NaN \n", 441 | "mp-1038987 NaN NaN NaN " 442 | ] 443 | }, 444 | "execution_count": 120, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "df.sample(5)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 121, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "# write the data to a csv file\n", 460 | "df.to_csv('../data/mpdata.csv')" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "# Lets Explore the Data\n", 468 | "\n", 469 | "Exploring the data is important." 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 88, 475 | "metadata": {}, 476 | "outputs": [ 477 | { 478 | "data": { 479 | "text/plain": [ 480 | "6928" 481 | ] 482 | }, 483 | "execution_count": 88, 484 | "metadata": {}, 485 | "output_type": "execute_result" 486 | } 487 | ], 488 | "source": [ 489 | "# how many materials are we looking at?\n", 490 | "len(df)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 87, 496 | "metadata": {}, 497 | "outputs": [ 498 | { 499 | "data": { 500 | "text/plain": [ 501 | "1 680\n", 502 | "14 584\n", 503 | "2 452\n", 504 | "12 314\n", 505 | "62 291\n", 506 | "8 251\n", 507 | "15 208\n", 508 | "63 205\n", 509 | "166 160\n", 510 | "225 158\n", 511 | "221 155\n", 512 | "194 146\n", 513 | "123 117\n", 514 | "227 110\n", 515 | "61 102\n", 516 | "74 99\n", 517 | "5 96\n", 518 | "139 88\n", 519 | "19 86\n", 520 | "4 83\n", 521 | "160 81\n", 522 | "33 72\n", 523 | "6 72\n", 524 | "186 71\n", 525 | "11 69\n", 526 | "Name: spacegroup, dtype: int64" 527 | ] 528 | }, 529 | "execution_count": 87, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [ 535 | "# look at most common spacegroups\n", 536 | "df['spacegroup'].value_counts()[:25]" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": 115, 542 | "metadata": {}, 543 | "outputs": [ 544 | { 545 | "name": "stdout", 546 | "output_type": "stream", 547 | "text": [ 548 | "how many are metals? 2270 / 6928\n", 549 | "material with highest band gap 17.8914 He\n" 550 | ] 551 | }, 552 | { 553 | "data": { 554 | "text/plain": [ 555 | "" 556 | ] 557 | }, 558 | "execution_count": 115, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | }, 562 | { 563 | "data": { 564 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD8CAYAAABthzNFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAE9lJREFUeJzt3X+w5XVdx/Hny8VS1ETjRgTcLjpEoeVCN7MMxyQLtST7gTCmSIyrk1aWM4nUqFPjDJVI9ItcBZUigvihlGSilU4zKe4CAyiSgIvsurIrGovKQMC7P8734uH23b3n7t7v+Z57z/Mxc+Z+z+d8v+f7vgf2vO7n8/18v99UFZIkLfaYvguQJE0mA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUqv9unrjJIcBFwAHAQVsrKpzkjwVuBiYA7YAJ1bV15MEOAd4MfAt4NVVde2e9nHggQfW3NxcV7+CJK1Jmzdv/mpVzSy1XmcBATwIvKmqrk3yJGBzkquBVwMfr6ozk5wOnA68GXgRcETz+HHg3Obnbs3NzbFp06YOfwVJWnuS3DHKep0NMVXV9oUeQFXdC9wMHAKcAHygWe0DwC82yycAF9TAp4ADkhzcVX2SpD0byzGIJHPA0cCngYOqanvz0lcYDEHBIDzuHNpsa9O2+L02JNmUZNPOnTs7q1mSpl3nAZHkicBlwBuratfwazW4lOyyLidbVRurar6q5mdmlhxCkyTtpU4DIsljGYTDhVV1edN818LQUfNzR9O+DThsaPNDmzZJUg86C4hmVtJ5wM1V9a6hl64ETmmWTwE+NNT+qgw8B7hnaChKkjRmXc5iei7wSuDGJNc3bWcAZwKXJDkNuAM4sXntKgZTXG9lMM311A5rkyQtobOAqKr/BLKbl49rWb+A13dVjyRpeTyTWpLUyoCQJLXq8hjExJs7/cOPLG858yU9ViJJk8cehCSplQEhSWplQEiSWhkQkqRWBoQkqZUBIUlqZUBIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWrV5T2pz0+yI8lNQ20XJ7m+eWxZuBVpkrkk9w299jdd1SVJGk2X94N4P/CXwAULDVX18oXlJGcB9wytf1tVre+wHknSMnR5T+pPJplrey1JgBOBF3S1f0nSvunrGMSxwF1V9YWhtsOTXJfkE0mO7akuSVKjr1uOngxcNPR8OzBbVXcn+VHgg0meUVW7Fm+YZAOwAWB2dnYsxUrSNBp7DyLJfsAvARcvtFXV/VV1d7O8GbgN+IG27atqY1XNV9X8zMzMOEqWpKnUxxDTzwCfr6qtCw1JZpKsa5afBhwB3N5DbZKkRpfTXC8C/gs4MsnWJKc1L53Eo4eXAJ4H3NBMe70UeF1Vfa2r2iRJS+tyFtPJu2l/dUvbZcBlXdUiSVo+z6SWJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa26vCf1+Ul2JLlpqO3tSbYlub55vHjotbckuTXJLUl+rqu6JEmj6bIH8X7g+Jb2s6tqffO4CiDJUcBJwDOabf46yboOa5MkLaGzgKiqTwJfG3H1E4B/qKr7q+qLwK3As7uqTZK0tD6OQbwhyQ3NENRTmrZDgDuH1tnatEmSejLugDgXeDqwHtgOnLXcN0iyIcmmJJt27ty50vVJkhpjDYiququqHqqqh4H38O1hpG3AYUOrHtq0tb3Hxqqar6r5mZmZbguWpCk21oBIcvDQ05cBCzOcrgROSvKdSQ4HjgCuGWdtkqRH26+rN05yEfB84MAkW4G3Ac9Psh4oYAvwWoCq+mySS4DPAQ8Cr6+qh7qqTZK0tM4CoqpObmk+bw/rvwN4R1f1SJKWxzOpJUmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVKrzgIiyflJdiS5aajtT5N8PskNSa5IckDTPpfkviTXN4+/6aouSdJouuxBvB84flHb1cAzq+pHgP8G3jL02m1Vtb55vK7DuiRJI9ivqzeuqk8mmVvU9tGhp58CfqWr/UvTau70Dz+yvOXMl/RYiVa7Po9B/DrwL0PPD09yXZJPJDm2r6IkSQOd9SD2JMnvAw8CFzZN24HZqro7yY8CH0zyjKra1bLtBmADwOzs7LhKlqSpM1IPIskPr9QOk7wa+HngFVVVAFV1f1Xd3SxvBm4DfqBt+6raWFXzVTU/MzOzUmVJkhYZdYjpr5Nck+Q3kjx5b3eW5Hjg94CXVtW3htpnkqxrlp8GHAHcvrf7kSTtu5ECoqqOBV4BHAZsTvL3SV64p22SXAT8F3Bkkq1JTgP+EngScPWi6azPA25Icj1wKfC6qvra3v1KkqSVMPIxiKr6QpI/ADYBfw4cnSTAGVV1ecv6J7e8zXm7ee/LgMtGrUWS1L1Rj0H8SJKzgZuBFwC/UFU/1Cyf3WF9kqSejNqD+AvgvQx6C/ctNFbVl5tehSRpjRk1IF4C3FdVDwEkeQzwuKr6VlX9bWfVSZJ6M+ospo8Bjx96vn/TJklao0YNiMdV1TcWnjTL+3dTkiRpEow6xPTNJMdU1bUAzdnO9y2xjaQOec0ldW3UgHgj8I9JvgwE+F7g5Z1VJUnq3UgBUVWfSfKDwJFN0y1V9b/dlSVJ6ttyLtb3Y8Bcs80xSaiqCzqpStKyDA83SStlpIBI8rfA04HrgYea5gIMCElao0btQcwDRy1cfVWStPaNOs31JgYHpiVJU2LUHsSBwOeSXAPcv9BYVS/tpCpJUu9GDYi3d1mEJGnyjDrN9RNJvh84oqo+lmR/YF23pUmS+jTq5b5fw+BGPu9umg4BPthVUZKk/o16kPr1wHOBXTC4eRDwPV0VJUnq36gBcX9VPbDwJMl+DM6DkCStUaMGxCeSnAE8vrkX9T8C/7TURknOT7IjyU1DbU9NcnWSLzQ/n9K0J8mfJ7k1yQ1JjtmbX0iStDJGDYjTgZ3AjcBrgauAUe4k937g+Jb3+nhVHQF8vHkO8CLgiOaxATh3xNokSR0YdRbTw8B7msfIquqTSeYWNZ8APL9Z/gDwH8Cbm/YLmrO1P5XkgCQHV9X25exTkrQyRr0W0xdpOeZQVU/bi30eNPSl/xXgoGb5EODOofW2Nm0GhCT1YDnXYlrwOOBXgafu686rqpIs62B3kg0MhqCYnZ3d1xIkSbsx6hDT3Yua/izJZuCte7HPuxaGjpIcDOxo2rcBhw2td2jTtriWjcBGgPn5eWdSaVXzrnCaZKOeKHfM0GM+yetY3r0khl0JnNIsnwJ8aKj9Vc1spucA93j8QZL6M+qX/FlDyw8CW4ATl9ooyUUMDkgfmGQr8DbgTOCSJKcBdwy9z1XAi4FbgW8Bp45YmySpA6MOMf303rx5VZ28m5eOa1m3GJyxrUUchpDUh1FnMf3unl6vqnetTDmSpEmxnFlMP8bgOAHALwDXAF/ooihJUv9GDYhDgWOq6l6AJG8HPlxVv9ZVYZPOYR9Ja92ol9o4CHhg6PkDfPsEN0nSGjRqD+IC4JokVzTPf5HBZTIkSWvUqLOY3pHkX4Bjm6ZTq+q67sqSJPVtOSe77Q/sqqr3JZlJcnhVfbGrwqRp5jEuTYJRp7m+jcFMpiOB9wGPBf6OwV3m1gT/QUrSo43ag3gZcDRwLUBVfTnJkzqrapUxXDQuw/+vSV0bdRbTA82ZzgWQ5AndlSRJmgSjBsQlSd4NHJDkNcDHWObNgyRJq8uos5je2dyLeheD4xBvraqrO61MktSrJQMiyTrgY80F+wwFSZoSSwZEVT2U5OEkT66qe8ZR1CTxALSkaTXqLKZvADcmuRr45kJjVf1WJ1WtYgaKpLVi1IC4vHloLxkcWuBUVa0WewyIJLNV9aWq8rpLa8ziLylDS9JiS/UgPggcA5Dksqr65e5Lml72MtYW/3tqtVsqIDK0/LSV2GGSI4GLF73vW4EDgNcAO5v2M6rqqpXYp7SaOSSlviwVELWb5b1WVbcA6+GRKbTbgCuAU4Gzq+qdK7EfSdK+WSognpVkF4OexOObZZrnVVXftY/7Pw64raruSLLkyno0hzAkdWmPAVFV6zre/0nARUPP35DkVcAm4E1V9fXFGyTZAGwAmJ2d7bi8bvjFLmk1WM79IFZUku8AXgq8pWk6F/gjBkNZfwScBfz64u2qaiOwEWB+fn5Fhr0mkePOkvrWW0AALwKuraq7ABZ+AiR5D/DPfRUmLccoYW7gazUa9WquXTiZoeGlJAcPvfYy4KaxVyRJekQvPYjmfhIvBF471PwnSdYzGGLasui1sfKvPUnqKSCq6pvAdy9qe2UftXSpr6EHD4JLWgl9HoOQNMSeqyaNAbHK7O5LZNxfLuPspdgjkvrR50FqSdIEswchrWH2vrQv7EFIklrZg1jjpuUvyGn5PaVxMiCkveCMI00Dh5gkSa0MCElSK4eYBOzbGP5aGv/3Xt3StxkQyzDt487T/vtL08aAmCJr4Qu+z97KWvj8pOXwGIQkqZUBIUlqZUBIkloZEJKkVgaEJKlVb7OYkmwB7gUeAh6sqvkkTwUuBuYY3Hb0xKr6el81am1ZS+drSOPQ9zTXn66qrw49Px34eFWdmeT05vmb+ylteq3UF6lfyNLq1ndALHYC8Pxm+QPAf2BA9Mq5/9L06jMgCvhokgLeXVUbgYOqanvz+leAg3qrTmuCASftvT4D4qeqaluS7wGuTvL54RerqprweJQkG4ANALOzs+OpVBPJL3+pW70FRFVta37uSHIF8GzgriQHV9X2JAcDO1q22whsBJifn/9/AaLJ57EJaXXoJSCSPAF4TFXd2yz/LPCHwJXAKcCZzc8P9VGfVp5/7UurT189iIOAK5Is1PD3VfWRJJ8BLklyGnAHcGJP9UnS1OslIKrqduBZLe13A8eNvyJJ0mKeSS1JamVASJJaTdqJcppizm6SJos9CElSK3sQmkr2VqSlGRBaUZ7vIK0dBoTWnOWGlKEmtTMgpD0wPDTNPEgtSWplQEiSWhkQkqRWHoNQrxzjlyaXPQhJUit7ENKU8ORALZc9CElSKwNCktTKISZNJA9eS/2zByFJajX2HkSSw4ALGNyXuoCNVXVOkrcDrwF2NqueUVVXjbs+aRp4wFqj6GOI6UHgTVV1bZInAZuTXN28dnZVvbOHmiRJi4w9IKpqO7C9Wb43yc3AIeOuQ5K0Z70eg0gyBxwNfLppekOSG5Kcn+QpvRUmSeovIJI8EbgMeGNV7QLOBZ4OrGfQwzhrN9ttSLIpyaadO3e2rSJJWgG9BESSxzIIhwur6nKAqrqrqh6qqoeB9wDPbtu2qjZW1XxVzc/MzIyvaEmaMn3MYgpwHnBzVb1rqP3g5vgEwMuAm8ZdmzTtnN2kYX3MYnou8ErgxiTXN21nACcnWc9g6usW4LU91CZJavQxi+k/gbS85DkPkjRBvNSGpFYON8mAkLRiDJW1xWsxSZJaGRCSpFYGhCSplQEhSWplQEiSWhkQkqRWTnOVtGxOZ50OBoQ05Sbx/t8G0GQwICSNlV/+q4cBIWlJe9PLMAhWPwNC0j7ZlyGq5YbI7vZlAHXDgJDUuVFCZBKPhUw7p7lKklrZg5C06nm8oxsGhKQ1xbBYOQ4xSZJaTVwPIsnxwDnAOuC9VXVmzyVJ6tE4D17b+3i0iQqIJOuAvwJeCGwFPpPkyqr6XL+VSVqN9uULf3fbTlOITFRAAM8Gbq2q2wGS/ANwAmBASFoxy+2V7GsvZrWevzFpAXEIcOfQ863Aj/dUi6Q1ZDWdZzEpvZdJC4glJdkAbGiefiPJLfvwdgcCX933qlY1PwM/A/AzWLCszyF/vG87G2X73a2zj/v+/lFWmrSA2AYcNvT80KbtEVW1Edi4EjtLsqmq5lfivVYrPwM/A/AzWODn8GiTNs31M8ARSQ5P8h3AScCVPdckSVNponoQVfVgkjcA/8pgmuv5VfXZnsuSpKk0UQEBUFVXAVeNaXcrMlS1yvkZ+BmAn8ECP4chqaq+a5AkTaBJOwYhSZoQUxkQSY5PckuSW5Oc3nc945bksCT/nuRzST6b5Lf7rqkvSdYluS7JP/ddS1+SHJDk0iSfT3Jzkp/ou6ZxS/I7zb+Fm5JclORxfdc0CaYuIIYu5/Ei4Cjg5CRH9VvV2D0IvKmqjgKeA7x+Cj+DBb8N3Nx3ET07B/hIVf0g8Cym7PNIcgjwW8B8VT2TwQSZk/qtajJMXUAwdDmPqnoAWLicx9Soqu1VdW2zfC+DL4RD+q1q/JIcCrwEeG/ftfQlyZOB5wHnAVTVA1X1P/1W1Yv9gMcn2Q/YH/hyz/VMhGkMiLbLeUzdl+OCJHPA0cCn+62kF38G/B7wcN+F9OhwYCfwvmao7b1JntB3UeNUVduAdwJfArYD91TVR/utajJMY0CokeSJwGXAG6tqV9/1jFOSnwd2VNXmvmvp2X7AMcC5VXU08E1gqo7LJXkKg1GEw4HvA56Q5Nf6rWoyTGNALHk5j2mQ5LEMwuHCqrq873p68FzgpUm2MBhmfEGSv+u3pF5sBbZW1UIP8lIGgTFNfgb4YlXtrKr/BS4HfrLnmibCNAbE1F/OI0kYjDnfXFXv6ruePlTVW6rq0KqaY/D/wL9V1dT91VhVXwHuTHJk03Qc03d5/S8Bz0myf/Nv4zim7ED97kzcmdRd83IewOCv51cCNya5vmk7ozmLXdPnN4ELmz+YbgdO7bmesaqqTye5FLiWwQy/6/CMasAzqSVJuzGNQ0ySpBEYEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWr1f60V57czCYkDAAAAAElFTkSuQmCC\n", 565 | "text/plain": [ 566 | "
" 567 | ] 568 | }, 569 | "metadata": {}, 570 | "output_type": "display_data" 571 | } 572 | ], 573 | "source": [ 574 | "# band gaps\n", 575 | "print('how many are metals?', len(df[df['band_gap'] == 0.0]), '/', len(df))\n", 576 | "print('material with highest band gap', df['band_gap'].max(), df.loc[df['band_gap'].idxmax()]['pretty_formula'])\n", 577 | "\n", 578 | "condition = (df['band_gap'] > 0.0) & (df['band_gap'] < 10)\n", 579 | "df[condition]['band_gap'].plot.hist(bins=100)" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 100, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "data": { 589 | "text/plain": [ 590 | "" 591 | ] 592 | }, 593 | "execution_count": 100, 594 | "metadata": {}, 595 | "output_type": "execute_result" 596 | }, 597 | { 598 | "data": { 599 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD8CAYAAABthzNFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFaZJREFUeJzt3XuwXeV53/Hvz2Audm3ERVYogggaxQ5pig0nDpnUbQKxyyW2SOJgPK5RCY3aBrfJuJ1YjtOYmTYdaOsQ6AWHGCfCsYMxjo1iaBLAl0w7A1gCyp0iCBTJXBTCxTYOFPP0j/0e2Mjr6OwjnXX2Pjrfz8ye/a53vWvvR+8+Ws9+37X2WqkqJEna0avGHYAkaTKZICRJnUwQkqROJghJUicThCSpkwlCktTJBCFJ6mSCkCR1MkFIkjrtPe4AdschhxxSq1atGncYkrSobN68+a+qavls7RZ1gli1ahWbNm0adxiStKgkeWiUdk4xSZI6mSAkSZ1MEJKkTiYISVInE4QkqZMJQpLUyQQhSepkgpAkdTJBSJI6LepfUkvSnmjV+qtfKj943qlji8MRhCSpkwlCktTJKSZJGoNRp5HGOd3kCEKS1MkEIUnqZIKQJHUyQUiSOpkgJEmdTBCSpE4mCElSp14TRJJlSa5Mck+Su5P8eJKDklyb5L72fGBrmyQXJdmS5LYkx/YZmyRp5/oeQVwI/GlVvQk4BrgbWA9cX1WrgevbMsDJwOr2WAdc3HNskqSd6C1BJDkA+AfApQBV9XxVPQWsATa0ZhuA01p5DXBZDdwALEtyaF/xSZJ2rs8RxJHAduD3k9yS5BNJXgusqKpHWptHgRWtfBjw8ND2W1udJGkM+kwQewPHAhdX1VuAb/PydBIAVVVAzeVFk6xLsinJpu3bt89bsJKkV+ozQWwFtlbVjW35SgYJ47HpqaP2/Hhbvw04fGj7la3uFarqkqqaqqqp5cuX9xa8JC11vSWIqnoUeDjJG1vVicBdwEZgbatbC1zVyhuBM9vZTMcDTw9NRUmSFljfl/v+l8Cnk+wDPACcxSApXZHkbOAh4PTW9hrgFGAL8GxrK0kak14TRFXdCkx1rDqxo20B5/QZjyRpdP6SWpLUyTvKSdICGb473GLgCEKS1MkEIUnqZIKQJHUyQUiSOpkgJEmdTBCSpE4mCElSJxOEJKmTCUKS1MkEIUnqZIKQJHUyQUiSOpkgJEmdTBCSpE5e7luSFonhy4U/eN6pvb+fIwhJUicThCSpkwlCktTJBCFJ6mSCkCR1MkFIkjr1miCSPJjk9iS3JtnU6g5Kcm2S+9rzga0+SS5KsiXJbUmO7TM2SdLOLcQI4qeq6s1VNdWW1wPXV9Vq4Pq2DHAysLo91gEXL0BskqQZjGOKaQ2woZU3AKcN1V9WAzcAy5IcOob4JEn0nyAK+PMkm5Osa3UrquqRVn4UWNHKhwEPD227tdVJksag70tt/P2q2pbkDcC1Se4ZXllVlaTm8oIt0awDOOKII+YvUknSK/Q6gqiqbe35ceALwFuBx6anjtrz4635NuDwoc1XtrodX/OSqpqqqqnly5f3Gb4kLWm9JYgkr03yuuky8A7gDmAjsLY1Wwtc1cobgTPb2UzHA08PTUVJkhZYn1NMK4AvJJl+n89U1Z8m+TpwRZKzgYeA01v7a4BTgC3As8BZPcYmSZpFbwmiqh4AjumofwI4saO+gHP6ikeSNDf+klqS1MkEIUnqZIKQJHUyQUiSOpkgJEmdTBCSpE4mCElSJxOEJKmTCUKS1MkEIUnqZIKQJHUyQUiSOpkgJEmdTBCSpE4mCElSJxOEJKmTCUKS1MkEIUnqZIKQJHXq7Z7UkqTRrFp/9bhD6OQIQpLUyQQhSeo0UoJI8iN9ByJJmiyjjiD+e5KbkvxykgPm8gZJ9kpyS5IvteUjk9yYZEuSzybZp9Xv25a3tPWr5vQvkSTNq5ESRFW9DXgfcDiwOclnkrx9xPf4FeDuoeXzgQuq6geAJ4GzW/3ZwJOt/oLWTpI0JiMfg6iq+4DfAD4E/EPgoiT3JPm5mbZJshI4FfhEWw5wAnBla7IBOK2V17Rl2voTW3tJ0hiMegzi7yW5gMFI4ATgnVX1Q618wU42/R3g14AX2/LBwFNV9UJb3goc1sqHAQ8DtPVPt/aSpDEYdQTxX4CbgWOq6pyquhmgqr7BYFTxPZL8DPB4VW2el0hfft11STYl2bR9+/b5fGlJ0pBRfyh3KvCdqvouQJJXAftV1bNV9akZtvkJ4F1JTgH2A14PXAgsS7J3GyWsBLa19tsYHOPYmmRv4ADgiR1ftKouAS4BmJqaqhHjlyTN0agjiOuA/YeWX9PqZlRVH66qlVW1CjgD+HJVvQ/4CvDu1mwtcFUrb2zLtPVfrioTgCSNyagJYr+q+tb0Qiu/Zhff80PAB5NsYXCM4dJWfylwcKv/ILB+F19fkjQPRp1i+naSY6ePPSQ5DvjOqG9SVV8FvtrKDwBv7WjzN8AvjPqakqR+jZogfhX4XJJvAAG+D3hPb1FJksZupARRVV9P8ibgja3q3qr6f/2FJUkat7lc7vtHgVVtm2OTUFWX9RKVJGnsRkoQST4F/B3gVuC7rboAE4Qk7aFGHUFMAUd72qkkLR2jnuZ6B4MD05KkJWLUEcQhwF1JbgKem66sqnf1EpUkaexGTRDn9hmEJGnyjHqa69eSfD+wuqquS/IaYK9+Q5MkjdOol/v+JQb3aPjdVnUY8MW+gpIkjd+oB6nPYXB11mfgpZsHvaGvoCRJ4zdqgniuqp6fXmiX4/aUV0nag42aIL6W5NeB/du9qD8H/El/YUmSxm3UBLEe2A7cDvwz4BpmuJOcJGnPMOpZTC8Cv9cekqQlYNRrMf0lHcccquqoeY9IkjQR5nItpmn7Mbixz0HzH44kaVKMdAyiqp4Yemyrqt8BTu05NknSGI06xXTs0OKrGIwo5nIvCUnSIjPqTv5jQ+UXgAeB0+c9GknSxBj1LKaf6jsQSdJkGXWK6YM7W19Vvz0/4UiSJsVczmL6UWBjW34ncBNwXx9BSZLGb9QEsRI4tqq+CZDkXODqqvrHM22QZD/gL4B92/tcWVUfTXIkcDlwMLAZeH9VPZ9kXwb3uD4OeAJ4T1U9uEv/KknSbhv1UhsrgOeHlp9vdTvzHHBCVR0DvBk4KcnxwPnABVX1A8CTwNmt/dnAk63+gtZOkjQmoyaIy4CbkpzbRg83Aht2tkENfKstvro9CjiBwb0laK9xWiuvGXrNK4ETk2TE+CRJ82zUH8r9FnAWg2/8TwJnVdV/mG27JHsluRV4HLgWuB94qqpeaE22Mrj5EO354fZ+LwBPM5iGkiSNwagjCIDXAM9U1YXA1nYsYaeq6rtV9WYGxzDeCrxp18J8WZJ1STYl2bR9+/bdfTlJ0gxGveXoR4EPAR9uVa8G/nDUN6mqp4CvAD8OLGs3HIJB4tjWytuAw9v77Q0cwOBg9Y6vdUlVTVXV1PLly0cNQZI0R6OOIH4WeBfwbYCq+gbwup1tkGR5kmWtvD/wduBuBoni3a3ZWuCqVt7Ylmnrv1xV3rVOksZk1NNcn6+qSlIASV47wjaHAhuS7MUgEV1RVV9KchdweZJ/D9wCXNraXwp8KskW4K+BM+byD5Ekza9RE8QVSX6XwfTQLwG/yCw3D6qq24C3dNQ/wOB4xI71f8PgMuKSpAkw6rWY/nO7F/UzwBuB36yqa3uNTJI0VrMmiDZFdF27YJ9JQZKWiFkPUlfVd4EXkxywAPFIkibEqMcgvgXcnuRa2plMAFX1r3qJSpI0dqMmiD9uD0nSErHTBJHkiKr6v1W10+suSZJetmr91S+VHzzv1DFGsntmOwbxxelCks/3HIskaYLMliCGr6Z6VJ+BSJImy2wJomYoS5L2cLMdpD4myTMMRhL7tzJtuarq9b1GJ0kTZq7HF4bbLzY7TRBVtddCBSJJmixzuR+EJGkJMUFIkjqZICRJnUwQkqROo15qQ5K0E4v5bKWZOIKQJHUyQUiSOpkgJEmdTBCSpE4epJakXbQnHpge5ghCktTJBCFJ6tRbgkhyeJKvJLkryZ1JfqXVH5Tk2iT3tecDW32SXJRkS5LbkhzbV2ySpNn1OYJ4AfjXVXU0cDxwTpKjgfXA9VW1Gri+LQOcDKxuj3XAxT3GJkmaRW8JoqoeqaqbW/mbwN3AYcAaYPoe1xuA01p5DXBZDdwALEtyaF/xSZJ2bkGOQSRZBbwFuBFYUVWPtFWPAita+TDg4aHNtrY6SdIY9J4gkvwt4PPAr1bVM8PrqqqY461Mk6xLsinJpu3bt89jpJKkYb0miCSvZpAcPl1Vf9yqH5ueOmrPj7f6bcDhQ5uvbHWvUFWXVNVUVU0tX768v+AlaYnr8yymAJcCd1fVbw+t2gisbeW1wFVD9We2s5mOB54emoqSJC2wPn9J/RPA+4Hbk9za6n4dOA+4IsnZwEPA6W3dNcApwBbgWeCsHmOTJM2itwRRVf8TyAyrT+xoX8A5fcUjSZobf0ktSepkgpAkdTJBSJI6mSAkSZ28H4QkzWJPv+/DTBxBSJI6mSAkSZ1MEJKkTiYISVInD1JLWpSGDxw/eN6pY4xkz+UIQpLUyQQhSerkFJOkJW2mqaql+tuHYY4gJEmdHEFIWnIcHYzGEYQkqZMJQpLUyQQhSepkgpAkdfIgtaQ9lr+23j2OICRJnRxBSFLj6a+vZIKQtCS485+73qaYknwyyeNJ7hiqOyjJtUnua88HtvokuSjJliS3JTm2r7gkSaPp8xjEHwAn7VC3Hri+qlYD17dlgJOB1e2xDri4x7gkSSPoLUFU1V8Af71D9RpgQytvAE4bqr+sBm4AliU5tK/YJEmzW+izmFZU1SOt/CiwopUPAx4eare11UmSxmRsp7lWVQE11+2SrEuyKcmm7du39xCZJAkWPkE8Nj111J4fb/XbgMOH2q1sdd+jqi6pqqmqmlq+fHmvwUrSUrbQCWIjsLaV1wJXDdWf2c5mOh54emgqSpI0Br39DiLJHwE/CRySZCvwUeA84IokZwMPAae35tcApwBbgGeBs/qKS5I0mt4SRFW9d4ZVJ3a0LeCcvmKRJM2d12KSJHXyUhuSFj2v2toPE4SkibM7O3yvuTR/nGKSJHUyQUiSOpkgJEmdPAYhaaJ5TGF8HEFIkjo5gpA0J55SunSYICRNBKeSJo8JQtLYmBQmm8cgJEmdTBCSpE5OMUlaME4pLS4mCEm7zDOa9mwmCEnzwmSx5zFBSIvUJO+QJzk2jc4EIWlWHjtYmkwQ0hI307f9+UoKJpfFywQhLUHutDUKfwchSepkgpAkdXKKSdoDjHLW0CjTSk49adhEJYgkJwEXAnsBn6iq88YckibYTDuzxXRa5Vx37KP829zJa75MTIJIshfw34C3A1uBryfZWFV3jTcy7cyOO6NJ3jn3cW5+X0lqptd156+FNDEJAngrsKWqHgBIcjmwBjBBjMAfJu26+Zqe2Vl7v/lrMUpVjTsGAJK8Gzipqv5pW34/8GNV9YGZtpmamqpNmzbt0vv1tUMd1456vnYufZwHL2n+7c7+Jcnmqpqard0kjSBGkmQdsK4tfivJvbv9mufPqfkhwF/18LrzbeQ4hy1wzLsU4xgshjgXQ4ywOOJcDDGS83crzu8fpdEkJYhtwOFDyytb3StU1SXAJQsV1I6SbBol847bYohzMcQIiyPOxRAjLI44F0OMsDBxTtLvIL4OrE5yZJJ9gDOAjWOOSZKWrIkZQVTVC0k+APwZg9NcP1lVd445LElasiYmQQBU1TXANeOOYxZjm96ao8UQ52KIERZHnIshRlgccS6GGGEB4pyYs5gkSZNlko5BSJImiAliB0l+IcmdSV5MMjVU/74ktw49Xkzy5rbuq0nuHVr3hla/b5LPJtmS5MYkq3qOcVWS7wzF8fGhdcclub3FclGStPqDklyb5L72fOB8xDhLnG9PsrnFsznJCUPrJqIv27oPt/e7N8k/Gqo/qdVtSbJ+qP7IFtuWFus+8xFjR8yfHeqfB5Pc2urn/Pn3Jcm5SbYNxXLK0Lo59WvPcf6nJPckuS3JF5Isa/UT05cdMS9cP1WVj6EH8EPAG4GvAlMztPkR4P6h5c62wC8DH2/lM4DP9hkjsAq4Y4ZtbgKOBwL8D+DkVv8fgfWtvB44v+++BN4C/O1W/rvAtgnsy6OB/w3sCxwJ3M/g5Im9WvkoYJ/W5ui2zRXAGa38ceBfLMDf68eA39zVz7/HuM4F/k1H/Zz7tec43wHs3crnT//9T1Jf7vDeC9pPjiB2UFV3V9VsP757L3D5CC+3BtjQylcCJ87Ht40RY3xJkkOB11fVDTX4K7sMOK0jxg1D9bttpjir6paq+kZbvBPYP8m+s7zcQvflGuDyqnquqv4S2MLgcjAvXRKmqp5n8HewpsVyQosN5rkvu7T3PB34o1na7ezzX2hz6te+g6mqP6+qF9riDQx+fzWjCejLBe0nE8SueQ/f+5/y99tQ9N8O7bgOAx6GwWm8wNPAwT3HdmSSW5J8LcnbhuLYOtRma6sDWFFVj7Tyo8CKnuPb0c8DN1fVc0N1k9CXL71fM91nM9UfDDw1tLMZ7uO+vA14rKruG6qb6+ffpw+0qZtPDk1dzrVfF9IvMhgRTJukvpy2oP00Uae5LpQk1wHf17HqI1V11Szb/hjwbFXdMVT9vqraluR1wOeB9zP4ZrHQMT4CHFFVTyQ5Dvhikh8e9T2rqpLM6bS23ezLH2YwrH/HUPWk9OVYjRjze3nlF5Xd+vznM0bgYuDfAdWeP8ZgB7zgRunLJB8BXgA+3dYtaF9OqiWZIKrqp3dj8zPYYfRQVdva8zeTfIbBMPAyXr58yNYkewMHAE/0FWP7Fv5cK29Ocj/wgy2O4aHz8GVMHktyaFU90obPj8/xPXepL5OsBL4AnFlV9w+93kT0JTu/9EtX/RPAsiR7t1FE56ViRjVbzK0Pfg44bmibXfn8d9mo/Zrk94AvtcW59utuG6Ev/wnwM8CJbdpowftyDka6JNF8cYppDpK8isGc7+VDdXsnOaSVX83gD216dLERWNvK7wa+PP0H2FN8yzO4rwZJjgJWAw+0KaRnkhzfpmzOBKa/hQ7HuHaovjftTJGrGRwc/19D9RPTl+39zsjg7KkjGfTlTcxwSZgWy1dabNB/X/40cE9VvTTdsYuffy/al41pP8srP8eR+7XPGFucJwG/Bryrqp4dqp+YvtzBwvZTX0e/F+uDwR/zVgbfHh4D/mxo3U8CN+zQ/rXAZuA2BgdcLwT2auv2Az7H4EDcTcBRfcbIYD7/TuBW4GbgnUPbTDH4T3o/8F95+UeSBwPXA/cB1wEH9d2XwG8A325xTj/eMEl92dZ9pPXXvQydqQKcAvyftu4jQ/VHtdi2tFj37fHv9A+Af75D3Zw//x7j+xRwe/ssNwKH7mq/9hznFgZz+tN/h9Nnyk1MX3bEvGD95C+pJUmdnGKSJHUyQUiSOpkgJEmdTBCSpE4mCElSJxOEJKmTCUKS1MkEIUnq9P8BISSzSbJMi6EAAAAASUVORK5CYII=\n", 600 | "text/plain": [ 601 | "
" 602 | ] 603 | }, 604 | "metadata": {}, 605 | "output_type": "display_data" 606 | } 607 | ], 608 | "source": [ 609 | "# cohesive energies\n", 610 | "df['energy'].plot.hist(bins=100)" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 79, 616 | "metadata": {}, 617 | "outputs": [], 618 | "source": [ 619 | "conditions = (df['shear_modulus_vrh'] > 0) & (df['shear_modulus_vrh'] < 500) & \\\n", 620 | " (df['bulk_modulus_vrh'] > 0) & (df['bulk_modulus_vrh'] < 500) & \\\n", 621 | " (df['poisson_ratio'] > 0) & (df['poisson_ratio'] < 0.5)" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 142, 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEACAYAAACZLPCyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFCpJREFUeJzt3X+sX/V93/HnK3ZwSFSIZpyutWF2aicrSdoFXEaU5Y+GmTlrg9kCwykrqLVwq5RUTTRpkDUMoUQa0jY6EpqVFBKgyoB6Ib1VTD0i+kOtCNgkaAQy0guhw6ZSDEEQWICZvvfH93j5crk/zveee+69X9/nQ/rK53zO55z7/nK4fvlzfqaqkCRpvl631AVIksabQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODBJJUicGiSSpE4NEktSJQSJJ6mT1UhewGE466aTauHHjUpchSWPl/vvvf6qq1s3Vb0UEycaNGzlw4MBSlyFJYyXJ37Tp56EtSVInBokkqRODRJLUiUEiSerEIJEkdWKQSJI6MUgkSZ0YJJKkTlbEDYnL3TV3fWfkdT627W09VCJJo3NEIknqxCCRJHVikEiSOjFIJEmdGCSSpE4MEklSJwaJJKkTg0SS1IlBIknqxCCRJHVikEiSOjFIJEmdGCSSpE4MEklSJwaJJKkTg0SS1IlBIknqpNcgSbI9ySNJJpNcNs3yNUlua5bfm2Rj074tyf1JHmz+fP/QOn/WbPOB5vOWPr+DJGl2vb1qN8kq4DpgG3AQ2J9koqoeHuq2C3imqjYn2QlcDVwAPAV8sKqeTPJOYB+wfmi9C6vqQF+1S5La63NEcgYwWVWPVdXLwK3Ajil9dgA3NdN7gLOSpKq+WVVPNu0PAccnWdNjrZKkeeozSNYDTwzNH+TVo4pX9amqI8CzwNopfT4EfKOqXhpq+0JzWOuTSbKwZUuSRrGsT7YneQeDw12/NtR8YVW9C3hf8/nlGdbdneRAkgOHDx/uv1hJWqH6DJJDwMlD8xuatmn7JFkNnAg83cxvAO4ALqqqR4+uUFWHmj9/AHyJwSG016iq66tqa1VtXbdu3YJ8IUnSa/UZJPuBLUk2JTkO2AlMTOkzAVzcTJ8H3F1VleTNwFeBy6rqr452TrI6yUnN9OuBXwS+1eN3kCTNobcgac55XMrgiqtvA7dX1UNJrkpyTtPtBmBtkkng48DRS4QvBTYDV0y5zHcNsC/J/wQeYDCi+Xxf30GSNLfeLv8FqKq9wN4pbVcMTb8InD/Nep8CPjXDZk9fyBolSd0s65PtkqTlzyCRJHVikEiSOjFIJEmdGCSSpE4MEklSJwaJJKkTg0SS1IlBIknqxCCRJHVikEiSOjFIJEmd9PrQxpXqmru+s9QlSNKicUQiSerEIJEkdWKQSJI6MUgkSZ0YJJKkTgwSSVInBokkqRODRJLUiUEiSerEIJEkdWKQSJI6MUgkSZ0YJJKkTgwSSVInBokkqZNegyTJ9iSPJJlMctk0y9ckua1Zfm+SjU37tiT3J3mw+fP9Q+uc3rRPJrk2Sfr8DpKk2fUWJElWAdcBHwBOBT6c5NQp3XYBz1TVZuAa4Oqm/Sngg1X1LuBi4JahdT4HXAJsaT7b+/oOkqS59TkiOQOYrKrHqupl4FZgx5Q+O4Cbmuk9wFlJUlXfrKonm/aHgOOb0ctPACdU1derqoCbgXN7/A6SpDn0GSTrgSeG5g82bdP2qaojwLPA2il9PgR8o6peavofnGObkqRFtKzf2Z7kHQwOd509j3V3A7sBTjnllAWuTJJ0VJ8jkkPAyUPzG5q2afskWQ2cCDzdzG8A7gAuqqpHh/pvmGObAFTV9VW1taq2rlu3ruNXkSTNpM8g2Q9sSbIpyXHATmBiSp8JBifTAc4D7q6qSvJm4KvAZVX1V0c7V9XfAs8lObO5Wusi4I96/A6SpDn0FiTNOY9LgX3At4Hbq+qhJFclOafpdgOwNskk8HHg6CXClwKbgSuSPNB83tIs+wjw+8Ak8ChwZ1/fQZI0t17PkVTVXmDvlLYrhqZfBM6fZr1PAZ+aYZsHgHcubKWSpPnyznZJUicGiSSpE4NEktSJQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODBJJUicGiSSpk1ZBkuRdfRciSRpPbUckv5vkviQfSXJirxVJksZKqyCpqvcBFzJ4d8j9Sb6UZFuvlUmSxkLrp/9W1V8n+W3gAHAt8O7mnSCfqKov91XgUrvmru8sdQmStKy1PUfyM0muYfBekfcDH6yqn26mr+mxPknSMtd2RPIZBi+T+kRV/fBoY1U92YxSJEkrVNsg+QXgh1X1CkCS1wFvqKr/U1W39FadJGnZa3vV1teA44fm39i0SZJWuLZB8oaqev7oTDP9xn5KkiSNk7ZB8kKS047OJDkd+OEs/SVJK0TbcyS/BfxhkieBAH8fuKC3qiRJY6NVkFTV/iT/EHh70/RIVf3f/sqSJI2L1jckAj8HbGzWOS0JVXVzL1VJksZGqyBJcgvwU8ADwCtNcwEGyRKZzx33H9v2th4qkbTStR2RbAVOrarqsxiNPwNOWnnaXrX1LQYn2CVJepW2I5KTgIeT3Ae8dLSxqs7ppSpJ0thoGyRX9lmEJGl8tX0fyZ8DjwOvb6b3A9+Ya70k25M8kmQyyWXTLF+T5LZm+b1JNjbta5P8aZLnk3x2yjp/1mzzgebzljbfQZLUj7aPkb8E2AP8XtO0HvjKHOusAq4DPgCcCnw4yalTuu0CnqmqzQweR3910/4i8Eng38yw+Qur6h81n++1+Q6SpH60Pdn+G8B7gedg8JIrYK6RwBnAZFU9VlUvA7cCO6b02QHc1EzvAc5Kkqp6oar+kkGgSJKWsbZB8lITBgAkWc3gPpLZrAeeGJo/2LRN26eqjgDPAmtb1POF5rDWJ5u3NEqSlkjbIPnzJJ8Ajm/e1f6HwB/3V9asLqyqdwHvaz6/PF2nJLuTHEhy4PDhw4taoCStJG2D5DLgMPAg8GvAXmCuNyMeAk4emt/QtE3bpxnlnAg8PdtGq+pQ8+cPgC8xOIQ2Xb/rq2prVW1dt27dHKVKkuar7UMb/w74fPNpaz+wJckmBoGxE/ilKX0mgIuBe4DzgLtnu3u+CZs3V9VTSV4P/CK+YGvF8e55aXlp+6yt7zLNOZGqeutM61TVkSSXAvuAVcCNVfVQkquAA1U1AdwA3JJkEvg+g7A5+jMfB04AjktyLnA28DfAviZEVjEIkVHCbUUb9S9g//KV1MYoz9o66g3A+cDfm2ulqtrL4DDYcNsVQ9MvNtuabt2NM2z29Ll+riRp8bS9IfHpoc+hqvod4Bd6rk2SNAbaHto6bWj2dQxGKKO8y0SSdIxqGwb/aWj6CIPHpfyrBa9GkjR22l619fN9FyJJGk9tD219fLblVfWfF6YcSdK4GeWqrZ9jcN8HwAeB+4C/7qMoaaF574nUn7ZBsgE4rbmbnCRXAl+tqn/dV2GSpPHQNkh+HHh5aP7lpk3qbD6jBUnLR9sguRm4L8kdzfy5/Ojx75KkFaztVVufTnIng6ftAvxKVX2zv7IkSeNilJsK3wg8V1VfSLIuyaaq+m5fhUmawZVX9ttfGlHbV+3+e+DfApc3Ta8H/qCvoiRJ46Pt+0j+BXAO8AJAVT0J/FhfRUmSxkfbIHm5eU9IASR5U38lSZLGSdsguT3J7wFvTnIJvgdEktRoe9XWf2ze1f4c8Hbgiqq6q9fKJEljYc4gSbIK+Frz4EbDQ5L0KnMe2qqqV4C/S3LiItQjSRozbe8jeR54MMldNFduAVTVb/ZSlSRpbLQNki83H0mSXmXWIElySlX976ryuVqSpGnNNSL5CnAaQJL/XlUf6r8kSQtqPo9I8bEqGsFcJ9szNP3WPguRJI2nuUYkNcO0pDFxz6NPj7zOe3qoQ8euuYLkZ5M8x2BkcnwzTTNfVXVCr9WNqTNv/sxI/b9+0Ud7qkTL3bxeAdxDHVIXswZJVa1arEKklWjUf3QA3NNDHVIXbZ+1JUnStAwSSVInvQZJku1JHkkymeSyaZavSXJbs/zeJBub9rVJ/jTJ80k+O2Wd05M82KxzbZJM3a4kafGM8qrdkTQPe7wO2AYcBPYnmaiqh4e67QKeqarNSXYCVwMXAC8CnwTe2XyGfQ64BLgX2AtsB+7s63tIK5L3nmgEvQUJcAYwWVWPASS5FdgBDAfJDuDKZnoP8NkkqaoXgL9Msnl4g0l+Ajihqr7ezN8MnItB0srIJ3a3jX4iWNLK0+ehrfXAE0PzB5u2aftU1RHgWWDtHNs8OMc2JUmL6Jg92Z5kd5IDSQ4cPnx4qcuRpGNWn0FyCDh5aH5D0zZtnySrgROB2W7DPdRsZ7ZtAlBV11fV1qraum7duhFLlyS11WeQ7Ae2JNmU5DhgJzAxpc8EcHEzfR5wd1XN+CiWqvpb4LkkZzZXa10E/NHCly5Jaqu3k+1VdSTJpcA+YBVwY1U9lOQq4EBVTQA3ALckmQS+zyBsAEjyOHACcFySc4Gzmyu+PgJ8ETiewUl2T7T3ZD6P75C08vR51RZVtZfBJbrDbVcMTb8InD/DuhtnaD/Aay8JliQtkWP2ZLskaXEYJJKkTno9tCVpPPkOE43CEYkkqRODRJLUiYe2loH5vNzItypKWi4ckUiSOnFEohVhPqM+n34steOIRJLUiSMSaYHM55EyZ/ZQh7TYHJFIkjpxRKIZeTWZpDYMEklLZ9T3vPte+GXJIJEWyLyuDJOOAQaJNINRT5574lwrlUGiseO//KXlxau2JEmdOCLRknOEIY03RySSpE4MEklSJwaJJKkTg0SS1IlBIknqxCCRJHVikEiSOvE+EkkLYj7vY/lYD3Vo8TkikSR1YpBIkjrp9dBWku3AfwFWAb9fVf9hyvI1wM3A6cDTwAVV9Xiz7HJgF/AK8JtVta9pfxz4QdN+pKq29vkdJLUzr0fd/NTahS9Ei663IEmyCrgO2AYcBPYnmaiqh4e67QKeqarNSXYCVwMXJDkV2Am8A/hJ4GtJ3lZVrzTr/XxVPdVX7ZKk9vo8tHUGMFlVj1XVy8CtwI4pfXYANzXTe4CzkqRpv7WqXqqq7wKTzfYkSctMn0GyHnhiaP5g0zZtn6o6AjwLrJ1j3QL+R5L7k+zuoW5J0gjG8fLff1JVh5K8Bbgryf+qqr+Y2qkJmd0Ap5xyymLXKEkrRp8jkkPAyUPzG5q2afskWQ2cyOCk+4zrVtXRP78H3MEMh7yq6vqq2lpVW9etW9f5y0iSptdnkOwHtiTZlOQ4BifPJ6b0mQAubqbPA+6uqmradyZZk2QTsAW4L8mbkvwYQJI3AWcD3+rxO0iS5tDboa2qOpLkUmAfg8t/b6yqh5JcBRyoqgngBuCWJJPA9xmEDU2/24GHgSPAb1TVK0l+HLhjcD6e1cCXqupP+voOkqS59XqOpKr2AnuntF0xNP0icP4M634a+PSUtseAn134SiVJ8+Wd7ZKkTgwSSVInBokkqRODRJLUiUEiSerEIJEkdWKQSJI6MUgkSZ2M40MbpUUxrxc1aST3PPr0SP3f01Md6sYg0YLyL19p5fHQliSpE4NEktSJQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqROvI9E0vi48srFWUcjcUQiSerEIJEkdeKhLUljY9Rnc4HP51oMjkgkSZ0YJJKkTgwSSVInBokkqRNPtkvSFNfc9Z2R+n9s29t6qmQ8OCKRJHXiiETSsW0+d7a/95cWvIxjWa8jkiTbkzySZDLJZdMsX5Pktmb5vUk2Di27vGl/JMk/a7tNSdLi6m1EkmQVcB2wDTgI7E8yUVUPD3XbBTxTVZuT7ASuBi5IciqwE3gH8JPA15IcPQg51zYl6f+bz02MvHfh6ziW9TkiOQOYrKrHqupl4FZgx5Q+O4Cbmuk9wFlJ0rTfWlUvVdV3gclme222KUlaRH2eI1kPPDE0fxD4xzP1qaojSZ4F1jbtX5+y7vpmeq5tSlInZ978mZH6X8NHR/4Zx9KVXsfsyfYku4HdzezzSR4ZWnwi8Ow0q03XfhLw1MJXOLJX1/YHn13Y7fWzXpu+s/UZddlM/ZfnPly6bbVdt+v+m235+P8ODpvH7+PHx+N38B+0+ilV1cuHwbPS9g3NXw5cPqXPPuA9zfRqBv+zZGrfo/3abLNlbde3bQcO9PXfaCFqXuztjbJem76z9Rl12Sz79Zjbh1221Xbdrvtvjn3i7+AY/A62/fR5jmQ/sCXJpiTHMTh5PjGlzwRwcTN9HnB3Db7ZBLCzuaprE7AFuK/lNtv44xHbl4OFrm2+2xtlvTZ9Z+sz6rLlvP9gYevrsq2263bdf7Mt93dwPH4HW0mTSL1I8s+B3wFWATdW1aeTXMXgXxgTSd4A3AK8G/g+sLOqHmvW/XfArwJHgN+qqjtn2mZvX2Dw8w5U1dY+f4b65T4cb+6/5a/XIDkWJNldVdcvdR2aP/fheHP/LX8GiSSpE5+1JUnqxCCRJHVikEiSOjFIRpTkTUluSvL5JBcudT0aTZK3JrkhyZ6lrkXzk+Tc5vfvtiRnL3U9MkgASHJjku8l+daU9umeNPwvgT1VdQlwzqIXq9cYZf/V4Dltu5amUs1kxH34leb379eBC5aiXr2aQTLwRWD7cMPQ04s/AJwKfLh5KvEGfvS8r1cWsUbN7Iu0339anr7I6Pvwt5vlWmIGCVBVf8HghshhMz1p+CCDMAH/+y0LI+4/LUOj7MMMXA3cWVXfWOxa9Vr+RTiz6Z5evB74MvChJJ9jeT/OYaWbdv8lWZvkvwLvTnL50pSmlmb6Hfwo8E+B85L8+lIUplc7Zp/+25eqegH4laWuQ/NTVU8zOLauMVVV1wLXLnUd+hFHJDM7BJw8NL+hadN4cP+NP/fhmDBIZrZQTxrW0nD/jT/34ZgwSIAk/w24B3h7koNJdlXVEeBSBu9C+TZwe1U9tJR1anruv/HnPhxvPrRRktSJIxJJUicGiSSpE4NEktSJQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODBJJUif/D2vLPa1vW/SRAAAAAElFTkSuQmCC\n", 632 | "text/plain": [ 633 | "
" 634 | ] 635 | }, 636 | "metadata": {}, 637 | "output_type": "display_data" 638 | } 639 | ], 640 | "source": [ 641 | "# shear modulus vrh of metals vs nonmetals\n", 642 | "\n", 643 | "is_metal = df['band_gap'] == 0.0\n", 644 | "is_not_metal = df['band_gap'] > 0.0\n", 645 | "bins = 10**np.linspace(0, 2.7, 25) # 10^2.7 ~ 500\n", 646 | "ax = df[conditions & is_not_metal]['shear_modulus_vrh'].plot.hist(bins=bins, alpha=0.5, density=True)\n", 647 | "ax = df[conditions & is_metal]['shear_modulus_vrh'].plot.hist(ax=ax, bins=bins, color='red', alpha=0.5, density=True)\n", 648 | "ax.set_xscale('log')" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 82, 654 | "metadata": {}, 655 | "outputs": [ 656 | { 657 | "data": { 658 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEACAYAAACpoOGTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAD6VJREFUeJzt3XusZWV9xvHvI0i4tBWVU0oZpgcroSW2jXQ0NsRexDboKFBLUWNbSidOjdZLbVIHYor/mAyJFbFpqaNYRmsVilSwYK1SL2nSggOaoFDiBAcdBBkVxQsRwV//2GuY4/jOzNpnzt5rn3O+n+Rk1nr3Wmv/4M2ZZ951eVeqCkmS9va4oQuQJM0mA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktR06NAFHIxjjjmm5ufnhy5DkpaVW2655etVNXeg7ZZ1QMzPz7Nt27ahy5CkZSXJ3X228xSTJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU3L+kE5SeOb33T9WNvv2Lx+QpVo1jmCkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqmth030neDbwAuL+qnta1PQm4EpgHdgDnVtUDSQJcCjwf+D7wp1V166Rqk1aKcafulsYxyRHEFcAZe7VtAm6sqpOAG7t1gOcBJ3U/G4HLJliXJKmHiQVEVX0a+OZezWcBW7vlrcDZC9rfUyP/Cxyd5LhJ1SZJOrBpX4M4tqru7ZbvA47tlo8HvrJgu51dmyRpIINdpK6qAmrc/ZJsTLItybZdu3ZNoDJJEkw/IL62+9RR9+f9Xfs9wAkLtlvTtf2EqtpSVeuqat3c3NxEi5Wk1WzaAXEdcF63fB5w7YL2P8nIs4BvLzgVJUkawCRvc30/8NvAMUl2AhcBm4GrkmwA7gbO7Ta/gdEtrtsZ3eZ6/qTqkiT1M7GAqKqX7uOj0xvbFvCqSdUiSRqfT1JLkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkpom9D0LS6jW/6fqx99mxef0EKtHBcAQhSWpyBCFpvxYzGtDK4AhCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElS0yABkeQvk3whyeeTvD/J4UlOTHJTku1Jrkxy2BC1SZJGph4QSY4HXgOsq6qnAYcALwEuBi6pqqcCDwAbpl2bJGmPoU4xHQockeRQ4EjgXuA5wNXd51uBsweqTZLEANN9V9U9Sd4CfBl4CPhP4BbgW1X1SLfZTuD41v5JNgIbAdauXTv5gqUpcVptzZohTjE9ETgLOBH4eeAo4Iy++1fVlqpaV1Xr5ubmJlSlJGmIU0zPBb5UVbuq6ofANcBpwNHdKSeANcA9A9QmSeoMERBfBp6V5MgkAU4Hbgc+AZzTbXMecO0AtUmSOlMPiKq6idHF6FuB27oatgBvAF6fZDvwZODyadcmSdpjkHdSV9VFwEV7Nd8FPHOAciRJDT5JLUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ19QqIJL8y6UIkSbOl7wjiH5LcnOSVSZ4w0YokSTOhV0BU1bOBlwEnALck+ZckvzvRyiRJg+p9DaKqvgi8kdGLfX4LeHuS/0vyokkVJ0kaTt9rEL+a5BLgDuA5wAur6pe75UsmWJ8kaSB93yj3d8C7gAur6qHdjVX11SRvnEhlkqRB9Q2I9cBDVfUoQJLHAYdX1fer6r0Tq06SNJi+1yA+DhyxYP3Irk2StEL1DYjDq+q7u1e65SMnU5IkaRb0DYjvJTl190qSXwce2s/2kqRlru81iNcB/5rkq0CAnwNePLGqJEmD6xUQVfWZJL8EnNw13VlVP5xcWZKkofUdQQA8A5jv9jk1CVX1nolUJUkaXK+ASPJe4BeBzwGPds0FGBCStEL1HUGsA06pqppkMZKk2dH3LqbPM7owLUlaJfqOII4Bbk9yM/CD3Y1VdeZivjTJ0Yym7ngao1NVfwbcCVzJ6DrHDuDcqnpgMceXJB28vgHxpiX+3kuB/6iqc5IcxuihuwuBG6tqc5JNwCZGM8dKkgbQ930Qn2L0r/rHd8ufAW5dzBd2Lxz6TeDy7tgPV9W3gLOArd1mW4GzF3N8SdLS6Dvd98uBq4F3dE3HAx9a5HeeCOwC/inJZ5O8K8lRwLFVdW+3zX3AsYs8viRpCfS9SP0q4DTgQXjs5UE/u8jvPBQ4Fbisqp4OfI/R6aTHdHdLNe+YSrIxybYk23bt2rXIEiRJB9I3IH5QVQ/vXklyKPv4C7yHncDOqrqpW7+aUWB8Lclx3fGPA+5v7VxVW6pqXVWtm5ubW2QJkqQD6XuR+lNJLgSO6N5F/Urgw4v5wqq6L8lXkpxcVXcCpwO3dz/nAZu7P69dzPGlWTC/6fqhS5AOWt+A2ARsAG4D/hy4gdFtqov1auB93R1MdwHnMxrNXJVkA3A3cO5BHF+SdJD6Ttb3I+Cd3c9Bq6rPMXo6e2+nL8XxJUkHr+9cTF+icc2hqp6y5BVJkmbCOHMx7XY48IfAk5a+HEnSrOj7oNw3FvzcU1VvA9ZPuDZJ0oD6nmI6dcHq4xiNKMZ5l4QkaZnp+5f83y5YfoRuMr0lr0aSNDP63sX0O5MuRJI0W/qeYnr9/j6vqrcuTTmSpFkxzl1MzwCu69ZfCNwMfHESRUmShtc3INYAp1bVdwCSvAm4vqr+aFKFSZKG1XeyvmOBhxesP4zTcUvSitZ3BPEe4OYk/9atn82el/tIklagvncxvTnJR4Bnd03nV9VnJ1eWJGlofU8xwei90Q9W1aXAziQnTqgmSdIM6PvK0YuANwAXdE2PB/55UkVJkobXdwTx+8CZjF4PSlV9FfjpSRUlSRpe34B4eOF7opMcNbmSJEmzoG9AXJXkHcDRSV4OfJwlenmQJGk29b2L6S3du6gfBE4G/qaqPjbRyiRJgzpgQCQ5BPh4N2GfoSBJq8QBTzFV1aPAj5I8YQr1SJJmRN8nqb8L3JbkY3R3MgFU1WsmUpUkaXB9A+Ka7keStErsNyCSrK2qL1eV8y5J0ipzoGsQH9q9kOSDE65FkjRDDhQQWbD8lEkWIkmaLQe6BlH7WJakJTW/6fqxtt+xef2EKtFuBwqIX0vyIKORxBHdMt16VdXPTLQ6SdJg9hsQVXXItAqRJM2Wcd4HIUlaRQYLiCSHJPlskn/v1k9MclOS7UmuTHLYULVJkoYdQbwWuGPB+sXAJVX1VOABYMMgVUmSgIECIskaYD3wrm49wHOAq7tNtgJnD1GbJGlkqBHE24C/Bn7UrT8Z+FZVPdKt7wSOH6IwSdLI1AMiyQuA+6vqlkXuvzHJtiTbdu3atcTVSZJ2G2IEcRpwZpIdwAcYnVq6lNHb6nbfdrsGuKe1c1Vtqap1VbVubm5uGvVK0qo09YCoqguqak1VzQMvAf6rql4GfAI4p9vsPODaadcmSdpjlp6DeAPw+iTbGV2TuHzgeiRpVev7PoiJqKpPAp/slu8CnjlkPZKkPWZpBCFJmiEGhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJahr0QTlpuZjfdP3QJUhT5whCktRkQEiSmgwISVKT1yAkrRqLuZa0Y/P6CVSyPDiCkCQ1OYLQsua/CKXJcQQhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU1OtaGZ4ot5pNnhCEKS1GRASJKaph4QSU5I8okktyf5QpLXdu1PSvKxJF/s/nzitGuTJO0xxAjiEeCvquoU4FnAq5KcAmwCbqyqk4Abu3VJ0kCmHhBVdW9V3dotfwe4AzgeOAvY2m22FTh72rVJkvYY9BpEknng6cBNwLFVdW/30X3AsQOVJUliwNtck/wU8EHgdVX1YJLHPquqSlL72G8jsBFg7dq10yhVK4y30kr9DDKCSPJ4RuHwvqq6pmv+WpLjus+PA+5v7VtVW6pqXVWtm5ubm07BkrQKDXEXU4DLgTuq6q0LProOOK9bPg+4dtq1SZL2GOIU02nAHwO3Jflc13YhsBm4KskG4G7g3AFqkyR1ph4QVfXfQPbx8enTrEXS8uW1pMnzSWpJUpOT9Wli/BeetLw5gpAkNRkQkqQmA0KS1GRASJKaDAhJUpN3MakX70iSVh9HEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1+aDchI37gNmOzesn/h2L/R5Jq4sjCElSkyOIVcqpMyQdiCMISVKTASFJajIgJElNXoMYwzTO23ttQNKscAQhSWpyBCFJ+zGNZ5lmlSMISVKTASFJalq1p5i8GCxpElbS1DeOICRJTTMVEEnOSHJnku1JNg1djyStZjMTEEkOAf4eeB5wCvDSJKcMW5UkrV6zdA3imcD2qroLIMkHgLOA2wetSpImbFavW8zMCAI4HvjKgvWdXZskaQCzNILoJclGYGO3+t0kdy74+AnAtxu7tdqPAb6+9BWObV81T/t44+zXZ9sDbTNOX+2r3T5c/H4H24eL+cw+XML9cvGif8cAfqFXNVU1Ez/AbwAfXbB+AXDBmMfY0rcd2Db0f/P+ap728cbZr8+2B9pmnL6yD2evDxfzmX043T5civ+mWTrF9BngpCQnJjkMeAlw3ZjH+PCY7bNgqWtb7PHG2a/PtgfaZty+sg+Xdr+D7cPFfGYfLu1+i/0d6y1d0syEJM8H3gYcAry7qt48we/aVlXrJnV8TZ59uPzZh7Ntpq5BVNUNwA1T+rotU/oeTY59uPzZhzNspkYQkqTZMUvXICRJM8SAkCQ1GRCSpCYDopPkqCRbk7wzycuGrkfjS/KUJJcnuXroWjS+JGd3v39XJvm9oevRCg+IJO9Ocn+Sz+/V3po19kXA1VX1cuDMqRerpnH6sKruqqoNw1SqljH770Pd798rgBcPUa9+3IoOCOAK4IyFDfuZNXYNe+aCenSKNWr/rqB/H2r2XMH4/ffG7nMNbEUHRFV9GvjmXs2PzRpbVQ8Du2eN3ckoJGCF/39ZTsbsQ82YcfovIxcDH6mqW6ddq37SavyLcF+zxl4D/EGSy5jtKQG0jz5M8uQk/wg8PckFw5SmHvb1O/hq4LnAOUleMURh+nEz9ST1kKrqe8D5Q9ehxauqbzA6f61lqKreDrx96Dq0x2ocQdwDnLBgfU3XpuXDPlze7L9lYjUGxFLMGqth2YfLm/23TKzogEjyfuB/gJOT7EyyoaoeAf4C+ChwB3BVVX1hyDq1b/bh8mb/LW9O1idJalrRIwhJ0uIZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1/T8ejToprcTVoQAAAABJRU5ErkJggg==\n", 659 | "text/plain": [ 660 | "
" 661 | ] 662 | }, 663 | "metadata": {}, 664 | "output_type": "display_data" 665 | } 666 | ], 667 | "source": [ 668 | "bins = 10**np.linspace(0, 2.7, 25) # 10^2.7 ~ 500\n", 669 | "ax = df[conditions]['shear_modulus_vrh'].plot.hist(bins=bins)\n", 670 | "ax.set_xscale('log')" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 83, 676 | "metadata": {}, 677 | "outputs": [ 678 | { 679 | "data": { 680 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEACAYAAACpoOGTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAEaNJREFUeJzt3X+wbWVdx/H3R8wUK1E5It0LHiyGItORjmbjWCZaKCVUZpgVKuPNifyRztiFnOgfZ3AyEZqyrkKgGUpkQoEpkj+mmUQvZoIQeQdR7hXkFCb+mhD89sdeN47X596z92GvvfY55/2a2XPXetZae33hmXM+51k/U1VIkrSvBwxdgCRpPhkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkpgcOXcD9ceihh9bi4uLQZUjSunLttdf+V1UtrLbeug6IxcVFdu7cOXQZkrSuJPn8OOt5iEmS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkpnV9o5ykzWtx+xUTb3PL2Sf2UMnG5QhCktRkQEiSmnoLiCQXJLkjyfWNZa9JUkkO7eaT5Lwku5J8OslxfdUlSRpPnyOIC4ET9m1McgTwc8AXVjQ/Gzi6+2wD3tJjXZKkMfQWEFX1UeDOxqJzgNcCtaLtJODtNfIx4JAkh/dVmyRpdTM9B5HkJGBPVf37Pou2ALeumN/dtUmSBjKzy1yTHAycyejw0v35nm2MDkNx5JFHTqEySVLLLEcQPwQcBfx7kluArcAnkzwa2AMcsWLdrV3bd6mqHVW1VFVLCwurvhBJkrRGMwuIqrquqh5VVYtVtcjoMNJxVXU7cDnwW93VTE8BvlJVt82qNknSd+vzMteLgX8FjkmyO8lpB1j9SuBmYBfwVuB3+qpLkjSe3s5BVNULVlm+uGK6gNP7qkWSNDnvpJYkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJaprZK0claWiL26+YeJtbzj6xh0rWB0cQkqQmA0KS1GRASJKaDAhJUpMBIUlq6i0gklyQ5I4k169o++Mk/5Hk00n+PskhK5adkWRXkpuS/HxfdUmSxtPnCOJC4IR92q4CHldVjwf+EzgDIMmxwCnAj3Xb/HmSg3qsTZK0it4Coqo+Cty5T9sHquqebvZjwNZu+iTgXVX1v1X1OWAX8OS+apMkrW7IcxAvAd7XTW8Bbl2xbHfXJkkayCABkeQPgHuAd65h221JdibZuby8PP3iJEnAAAGR5EXALwAvrKrqmvcAR6xYbWvX9l2qakdVLVXV0sLCQq+1StJmNtOASHIC8FrguVX1jRWLLgdOSfK9SY4CjgY+PsvaJEnfqbeH9SW5GHg6cGiS3cBZjK5a+l7gqiQAH6uql1XVZ5JcAtzA6NDT6VV1b1+1SZJW11tAVNULGs3nH2D91wOv76seSdJkvJNaktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUlNvAZHkgiR3JLl+RdsjklyV5LPdvw/v2pPkvCS7knw6yXF91SVJGk+fI4gLgRP2adsOXF1VRwNXd/MAzwaO7j7bgLf0WJckaQy9BURVfRS4c5/mk4CLuumLgJNXtL+9Rj4GHJLk8L5qkyStbtbnIA6rqtu66duBw7rpLcCtK9bb3bVJkgYy2EnqqiqgJt0uybYkO5PsXF5e7qEySRLMPiC+tPfQUffvHV37HuCIFett7dq+S1XtqKqlqlpaWFjotVhJ2sxmHRCXA6d206cCl61o/63uaqanAF9ZcShKkjSAB/b1xUkuBp4OHJpkN3AWcDZwSZLTgM8Dz+9WvxJ4DrAL+Abw4r7qkiSNp7eAqKoX7GfR8Y11Czi9r1okSZPzTmpJUpMBIUlq6u0QkyRNYnH7FUOXoH04gpAkNRkQkqSmsQIiyY/3XYgkab6MO4L48yQfT/I7SR7Wa0WSpLkwVkBU1dOAFzJ6HMa1Sf4mybN6rUySNKixz0FU1WeB1wG/D/wMcF6S/0jyy30VJ0kazrjnIB6f5BzgRuAZwC9W1Y920+f0WJ8kaSDj3gfxp8DbgDOr6pt7G6vqi0le10tlkqRBjRsQJwLfrKp7AZI8AHhwVX2jqt7RW3WSpMGMew7ig8BDVswf3LVJkjaocQPiwVX1tb0z3fTB/ZQkSZoH4wbE15Mct3cmyU8A3zzA+pKkdW7ccxCvAv42yReBAI8Gfq23qiRJgxsrIKrqE0l+BDima7qpqr7VX1mSpKFN8rjvJwGL3TbHJaGq3t5LVZLWNR/dvTGMFRBJ3gH8EPAp4N6uuQADQpI2qHFHEEvAsd27oyVJm8C4VzFdz+jE9FQk+b0kn0lyfZKLkzw4yVFJrkmyK8m7kzxoWvuTJE1u3IA4FLghyfuTXL73s5YdJtkCvAJYqqrHAQcBpwBvAM6pqh8GvgyctpbvlyRNx7iHmP6oh/0+JMm3GN1wdxujB//9erf8om6fb5nyfiVJYxr3MtePJHkMcHRVfTDJwYz+8p9YVe1J8kbgC4xutvsAcC3wP1V1T7fabmDLWr5fkjQd4z7u+6XApcBfdk1bgPeuZYdJHg6cBBwF/CDwUOCECbbflmRnkp3Ly8trKUGSNIZxz0GcDjwVuAv+/+VBj1rjPp8JfK6qlrub7d7TffchSfaOaLYCe1obV9WOqlqqqqWFhYU1liBJWs24AfG/VXX33pnuF/laL3n9AvCUJAcnCXA8cAPwIeB53TqnApet8fslSVMwbkB8JMmZjE4sPwv4W+Af1rLDqrqG0eGqTwLXdTXsYPQq01cn2QU8Ejh/Ld8vSZqOca9i2s7ostPrgN8GrmT0hrk1qaqzgLP2ab4ZePJav1OSNF3jXsX0beCt3UeStAmM+yymz9E451BVj516RZKkuTDJs5j2ejDwq8Ajpl+OJGlejHWSuqr+e8VnT1W9GTix59okSQMa9xDTcStmH8BoRDHJuyQkSevMuL/k/2TF9D3ALcDzp16NJGlujHsV08/2XYgkab6Me4jp1QdaXlVvmk45kqR5MclVTE8C9r4D4heBjwOf7aMoSdLwxg2IrcBxVfVVgCR/BFxRVb/RV2GSpGGN+yymw4C7V8zf3bVJkjaocUcQbwc+nuTvu/mTGb31TZK0QY17FdPrk7wPeFrX9OKq+rf+ypIkDW3cQ0wwenf0XVV1LrA7yVE91SRJmgPjvnL0LEbvazija/oe4K/7KkqSNLxxRxC/BDwX+DpAVX0R+P6+ipIkDW/cgLi7qorukd9JHtpfSZKkeTBuQFyS5C+BQ5K8FPggvjxIkja0ca9iemP3Luq7gGOAP6yqq3qtTJI0qFUDIslBwAe7B/ZNJRSSHMLondaPY3TY6iXATcC7gUW6p8VW1ZensT9J0uRWDYiqujfJt5M8rKq+MqX9ngv8U1U9L8mDGF1CeyZwdVWdnWQ7sJ3RlVOSBrS4/YqhS9BAxr2T+mvAdUmuoruSCaCqXjHpDpM8DPhp4EXdd9wN3J3kJODp3WoXAR/GgJCkwYwbEO/pPtNwFLAM/FWSJwDXAq8EDquq27p1bsdnPUnSoA4YEEmOrKovVNU0n7v0QOA44OVVdU2ScxkdTvp/VVVJaj81bQO2ARx55JFTLEuStNJql7m+d+9Ekr+b0j53A7ur6ppu/lJGgfGlJId3+zocuKO1cVXtqKqlqlpaWFiYUkmSpH2tFhBZMf3Yaeywqm4Hbk1yTNd0PHADo5cRndq1nQpcNo39SZLWZrVzELWf6fvr5cA7uyuYbgZezCisLklyGvB54PlT3J8kaUKrBcQTktzFaCTxkG6abr6q6gfWstOq+hSj15ju6/i1fJ8kafoOGBBVddCsCpE0G97XoHFN8j4ISdImYkBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNY37NFdJc8h7GtQnRxCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaBguIJAcl+bck/9jNH5XkmiS7krw7yYOGqk2SNOwI4pXAjSvm3wCcU1U/DHwZOG2QqiRJwEABkWQrcCLwtm4+wDOAS7tVLgJOHqI2SdLIUCOINwOvBb7dzT8S+J+quqeb3w1sGaIwSdLIzAMiyS8Ad1TVtWvcfluSnUl2Li8vT7k6SdJeQ4wgngo8N8ktwLsYHVo6Fzgkyd4XGG0F9rQ2rqodVbVUVUsLCwuzqFeSNqWZB0RVnVFVW6tqETgF+OeqeiHwIeB53WqnApfNujZJ0n3m6T6I3wdenWQXo3MS5w9cjyRtaoO+k7qqPgx8uJu+GXjykPVIku4zTyMISdIcMSAkSU0GhCSpadBzENJGtrj9ionWv+XsE3uqRFobRxCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJ+yCkOTHpfRNS3xxBSJKaHEFI0gFs5jviHUFIkpoMCElSkwEhSWoyICRJTZ6k1qazlstJN9KJR2lcjiAkSU0zD4gkRyT5UJIbknwmySu79kckuSrJZ7t/Hz7r2iRJ9xliBHEP8JqqOhZ4CnB6kmOB7cDVVXU0cHU3L0kayMwDoqpuq6pPdtNfBW4EtgAnARd1q10EnDzr2iRJ9xn0HESSReCJwDXAYVV1W7foduCw/WyzLcnOJDuXl5dnUqckbUaDBUSS7wP+DnhVVd21cllVFVCt7apqR1UtVdXSwsLCDCqVpM1pkMtck3wPo3B4Z1W9p2v+UpLDq+q2JIcDdwxRm9Tik1a1GQ1xFVOA84Ebq+pNKxZdDpzaTZ8KXDbr2iRJ9xliBPFU4DeB65J8qms7EzgbuCTJacDngecPUJskqTPzgKiqfwGyn8XHz7IWSdL+eSe1JKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlq8o1ymiu+7U2aH44gJElNjiA0tkn/uvcve2l9cwQhSWoyICRJTR5imjOepJU0LwwI9caX7Ggz2kh/5BkQm5S/vCWtxnMQkqQmRxBa9xwNSf0wICawkY4tStJqPMQkSWqauxFEkhOAc4GDgLdV1dl97GdWhyU8/CFpvZqrgEhyEPBnwLOA3cAnklxeVTcMW5kk9WdeD1/P2yGmJwO7qurmqrobeBdw0sA1SdKmNFcjCGALcOuK+d3ATw5Uy7rhYSxJfZi3gFhVkm3Atm72a0luWrH4YcBXGpu12g8F/mv6FU5sfzXP+vsm2W6cdVdbZ5K+2l+7fbj27e5vH65lmX04xe3yhvvVh48Zq5qqmpsP8FPA+1fMnwGcMcH2O8ZtB3YO/d97oJpn/X2TbDfOuqutM0lf2Yfz14drWWYfzlcfjvOZt3MQnwCOTnJUkgcBpwCXT7D9P0zYPg+mXdtav2+S7cZZd7V1Ju0r+3C6293fPlzLMvtwuttN4+fwgNKlzNxI8hzgzYwuc72gql7f0352VtVSH9+t2bAP1z/7cL7N3TmIqroSuHIGu9oxg32oX/bh+mcfzrG5G0FIkubDvJ2DkCTNCQNCktRkQEiSmgyITpKHJrkoyVuTvHDoejS5JI9Ncn6SS4euRWuT5OTuZ/DdSX5u6Ho2uw0dEEkuSHJHkuv3aT8hyU1JdiXZ3jX/MnBpVb0UeO7Mi1XTJH1Yo2d4nTZMpdqfCfvwvd3P4MuAXxuiXt1nQwcEcCFwwsqGFU+MfTZwLPCCJMcCW7nvOVD3zrBGHdiFjN+Hmk8XMnkfvq5brgFt6ICoqo8Cd+7TvL8nxu5mFBKwwf+/rCcT9qHm0CR9mJE3AO+rqk/OulZ9p834i7D1xNgtwHuAX0nyFub7kQDaTx8meWSSvwCemOSMYUrTmPb3c/hy4JnA85K8bIjCdJ+5u5N6KFX1deDFQ9ehtauq/2Z07FrrVFWdB5w3dB0a2YwjiD3AESvmt3ZtWj/sw/XPPlwHNmNA3N8nxmp49uH6Zx+uAxs6IJJcDPwrcEyS3UlOq6p7gN8F3g/cCFxSVZ8Zsk7tn324/tmH65cP65MkNW3oEYQkae0MCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKa/g/vTP5HkwBbGAAAAABJRU5ErkJggg==\n", 681 | "text/plain": [ 682 | "
" 683 | ] 684 | }, 685 | "metadata": {}, 686 | "output_type": "display_data" 687 | } 688 | ], 689 | "source": [ 690 | "bins = 10**np.linspace(0, 2.7, 25)\n", 691 | "ax = df[conditions]['bulk_modulus_vrh'].plot.hist(bins=bins)\n", 692 | "ax.set_xscale('log')" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": 84, 698 | "metadata": {}, 699 | "outputs": [ 700 | { 701 | "data": { 702 | "image/png": "\n", 703 | "text/plain": [ 704 | "
" 705 | ] 706 | }, 707 | "metadata": {}, 708 | "output_type": "display_data" 709 | } 710 | ], 711 | "source": [ 712 | "ax = df[conditions].plot(kind='scatter', x='shear_modulus_vrh', y='bulk_modulus_vrh', c='poisson_ratio')\n", 713 | "ax.set_xscale('log')\n", 714 | "ax.set_yscale('log')" 715 | ] 716 | }, 717 | { 718 | "cell_type": "markdown", 719 | "metadata": {}, 720 | "source": [ 721 | "Compare with publication [from materials project](https://www.nature.com/articles/sdata20159).\n", 722 | "\n", 723 | "![Elastic Constant Image](../images/elasticity_calculations.png)\n", 724 | "\n" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": null, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [] 733 | } 734 | ], 735 | "metadata": { 736 | "kernelspec": { 737 | "display_name": "Python 3", 738 | "language": "python", 739 | "name": "python3" 740 | }, 741 | "language_info": { 742 | "codemirror_mode": { 743 | "name": "ipython", 744 | "version": 3 745 | }, 746 | "file_extension": ".py", 747 | "mimetype": "text/x-python", 748 | "name": "python", 749 | "nbconvert_exporter": "python", 750 | "pygments_lexer": "ipython3", 751 | "version": "3.6.5" 752 | } 753 | }, 754 | "nbformat": 4, 755 | "nbformat_minor": 2 756 | } 757 | --------------------------------------------------------------------------------