├── .gitignore ├── Notebook ├── TrainBravais.ipynb ├── TrainLattice.ipynb ├── TrainSpaceGroup.ipynb └── ultrahardmat.ipynb ├── README.md ├── cryspnet ├── config.py ├── learner │ └── .gitkeep ├── losslandscape.py ├── models.py └── utils.py ├── demo ├── demo.csv ├── demo_metal.csv ├── demo_oxide.csv └── demo_train.csv ├── environment.yml ├── output └── .gitkeep ├── predict.py ├── random_crystal.py ├── requirement.txt ├── setup.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | src/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | Notebook/.ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # pycharm 109 | */.idea/** 110 | .idea/** 111 | 112 | # project specific 113 | 114 | # vscode 115 | .vscode 116 | 117 | # temporary 118 | README_example.md 119 | README_pytorch.md 120 | 121 | # output 122 | output/* 123 | !output/.gitkeep 124 | 125 | # demo 126 | demo/smallholdout 127 | demo/smallholdout/* 128 | demo/mp*.csv 129 | 130 | # learner 131 | cryspnet/learner/* 132 | !cryspnet/learner/.gitkeep 133 | 134 | # notebook 135 | Notebook/Notdemo 136 | 137 | # temporary files 138 | tmp/* 139 | 140 | # README example 141 | README_example.md 142 | README_pytorch.md 143 | 144 | # old version 145 | *.old 146 | 147 | # other file 148 | other/* 149 | -------------------------------------------------------------------------------- /Notebook/ultrahardmat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# for loading the pre-trained models\n", 10 | "from cryspnet.models import *\n", 11 | "# for feature generation and plotting\n", 12 | "from cryspnet.utils import *\n", 13 | "# for accessing some pre-defined constant\n", 14 | "from cryspnet.config import *" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "import numpy as np" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Load in the Pre-Trained Models \n", 32 | "\n", 33 | "These three API helps you to retrieved them\n", 34 | "```Python\n", 35 | " load_Bravais_models(n_ensembler, which)\n", 36 | " load_Lattice_models()\n", 37 | " load_SpaceGroup_models()\n", 38 | "```\n", 39 | "\n", 40 | "**Note**: If the file cannot be found error showed, make sure your follow the instruction to download and extract those models to the correct place." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "BE = load_Bravais_models(n_ensembler=5, which=\"whole\")" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "LB = load_Lattice_models()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "SGB = load_SpaceGroup_models()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# Download the Superhard Dataset\n", 75 | "\n", 76 | "Matminer provide an easy to access API for fast dataset retrieval. Check out [this page](https://hackingmaterials.lbl.gov/matminer/dataset_summary.html) for datasets that are all available" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "from matminer.datasets import load_dataset" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 7, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "shard = load_dataset(\"brgoch_superhard_training\")" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/html": [ 105 | "
\n", 106 | "\n", 119 | "\n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | "
formulabulk_modulusshear_moduluscompositionmaterial_idstructurebrgoch_featssuspect_value
0AlPt3225.23046191.197748(Al, Pt)mp-188[[0. 0. 0.] Al, [0. 1.96140395 1.96140395] Pt, [1.96140395 1.96140395 0. ] Pt, [1.96140395 0. 1.96140395] Pt]{'atomic_number_feat_1': 123.5, 'atomic_number_feat_2': 221, 'atomic_number_feat_3': 13, 'atomic_number_feat_4': 78, 'atomic_weight_feat_1': 306.1107695, 'atomic_weight_feat_2': 558.258461, 'atomic_weight_feat_3': 26.981539, 'atomic_weight_feat_4': 195.08, 'period_number_feat_1': 10.5, 'period_number_feat_2': 15, 'period_number_feat_3': 3, 'period_number_feat_4': 6, 'group_number_feat_1': 21.5, 'group_number_feat_2': 17, 'group_number_feat_3': 10, 'group_number_feat_4': 13, 'family_number_feat_1': 8.5, 'family_number_feat_2': 7, 'family_number_feat_3': 4, 'family_number_feat_4': 5, 'Mendel...False
1Mn2Nb232.69634074.590157(Mn, Nb)mp-12659[[-2.23765223e-08 1.42974191e+00 5.92614104e+00] Mn, [3.57977169 0.71487093 1.97538035] Mn, [1.23819283 0.71487093 1.97538035] Mn, [1.17078941 3.45760869 5.92614104] Mn, [0. 0. 0.] Mn, [0. 0. 3.9507607] Mn, [2.40898226 2.74273771 1.97538035] Mn, [-1.17078945 3.45760869 5.92614104] Mn, [-2.07564892e-08 2.78165309e+00 3.46044846e+00] Nb, [2.40898226 1.39082653 7.41120915] Nb, [2.40898226 1.39082653 4.44107293] Nb, [-2.07564892e-08 2.78165309e+00 4.90312238e-01] Nb]{'atomic_number_feat_1': 45.5, 'atomic_number_feat_2': 9, 'atomic_number_feat_3': 25, 'atomic_number_feat_4': 41, 'atomic_weight_feat_1': 101.39124, 'atomic_weight_feat_2': 16.96972, 'atomic_weight_feat_3': 54.93805, 'atomic_weight_feat_4': 92.90638, 'period_number_feat_1': 6.5, 'period_number_feat_2': 3, 'period_number_feat_3': 4, 'period_number_feat_4': 5, 'group_number_feat_1': 9.5, 'group_number_feat_2': 9, 'group_number_feat_3': 5, 'group_number_feat_4': 7, 'family_number_feat_1': 6.0, 'family_number_feat_2': 4, 'family_number_feat_3': 4, 'family_number_feat_4': 4, 'Mendeleev_number_f...False
2HfO2204.57343398.564374(Hf, O)mp-352[[2.24450185 3.85793022 4.83390736] O, [2.77883096 1.26035638 2.32337921] O, [ 2.79593023 1.33721745 -0.3395849 ] O, [2.26160112 3.93479129 2.17094325] O, [0.33243749 1.71503186 1.79217508] O, [4.72509386 4.3126057 0.03918327] O, [4.70799459 3.48011581 2.70214738] O, [0.31533822 0.88254197 4.45513919] O, [1.37618055 2.37584161 3.54068723] Hf, [3.64715226 4.97341544 3.61659934] Hf, [3.66425153 2.81930606 0.95363523] Hf, [1.39327982 0.22173223 0.87772312] Hf]{'atomic_number_feat_1': 44.0, 'atomic_number_feat_2': 56, 'atomic_number_feat_3': 8, 'atomic_number_feat_4': 72, 'atomic_weight_feat_1': 105.2444, 'atomic_weight_feat_2': 146.4912, 'atomic_weight_feat_3': 15.9994, 'atomic_weight_feat_4': 178.49, 'period_number_feat_1': 5.0, 'period_number_feat_2': 2, 'period_number_feat_3': 2, 'period_number_feat_4': 6, 'group_number_feat_1': 18.0, 'group_number_feat_2': 28, 'group_number_feat_3': 4, 'group_number_feat_4': 16, 'family_number_feat_1': 9.0, 'family_number_feat_2': 10, 'family_number_feat_3': 4, 'family_number_feat_4': 7, 'Mendeleev_number_f...False
3Cu3Pt159.31264051.778816(Cu, Pt)mp-12086[[0. 1.86144248 1.86144248] Cu, [1.86144248 1.86144248 0. ] Cu, [1.86144248 0. 1.86144248] Cu, [0. 0. 0.] Pt]{'atomic_number_feat_1': 82.5, 'atomic_number_feat_2': 9, 'atomic_number_feat_3': 29, 'atomic_number_feat_4': 78, 'atomic_weight_feat_1': 192.859, 'atomic_weight_feat_2': 4.44200000000001, 'atomic_weight_feat_3': 63.546, 'atomic_weight_feat_4': 195.08, 'period_number_feat_1': 9.0, 'period_number_feat_2': 6, 'period_number_feat_3': 4, 'period_number_feat_4': 6, 'group_number_feat_1': 21.5, 'group_number_feat_2': 23, 'group_number_feat_3': 10, 'group_number_feat_4': 11, 'family_number_feat_1': 8.0, 'family_number_feat_2': 8, 'family_number_feat_3': 4, 'family_number_feat_4': 4, 'Mendeleev_nu...False
4Mg3Pt69.63756527.588765(Mg, Pt)mp-18707[[0. 0. 2.73626461] Mg, [0. 0. 6.95125305] Mg, [-3.97440991e-08 4.58925330e+00 1.79637294e+00] Mg, [3.97440996 2.29462661 6.01136137] Mg, [-3.97440991e-08 4.58925330e+00 6.01136137e+00] Mg, [3.97440996 2.29462661 1.79637294] Mg, [1.12369616 1.94629885 0.72058152] Mg, [-1.12369616 1.94629885 4.93556996] Mg, [2.24739233 0. 4.93556996] Mg, [5.70142751 0. 0.72058152] Mg, [-2.85071376 4.93758106 0.72058152] Mg, [2.85071376 4.93758106 4.93556996] Mg, [1.48939724 2.57971169 3.61780896] Mg, [-1.48939724 2.57971169 7.83279739] Mg, [2.9787944...{'atomic_number_feat_1': 57.0, 'atomic_number_feat_2': 42, 'atomic_number_feat_3': 12, 'atomic_number_feat_4': 78, 'atomic_weight_feat_1': 133.9975, 'atomic_weight_feat_2': 122.165, 'atomic_weight_feat_3': 24.305, 'atomic_weight_feat_4': 195.08, 'period_number_feat_1': 7.5, 'period_number_feat_2': 3, 'period_number_feat_3': 3, 'period_number_feat_4': 6, 'group_number_feat_1': 8.0, 'group_number_feat_2': 4, 'group_number_feat_3': 2, 'group_number_feat_4': 10, 'family_number_feat_1': 5.0, 'family_number_feat_2': 2, 'family_number_feat_3': 2, 'family_number_feat_4': 4, 'Mendeleev_number_feat_...False
\n", 191 | "
" 192 | ], 193 | "text/plain": [ 194 | " formula bulk_modulus shear_modulus composition material_id \\\n", 195 | "0 AlPt3 225.230461 91.197748 (Al, Pt) mp-188 \n", 196 | "1 Mn2Nb 232.696340 74.590157 (Mn, Nb) mp-12659 \n", 197 | "2 HfO2 204.573433 98.564374 (Hf, O) mp-352 \n", 198 | "3 Cu3Pt 159.312640 51.778816 (Cu, Pt) mp-12086 \n", 199 | "4 Mg3Pt 69.637565 27.588765 (Mg, Pt) mp-18707 \n", 200 | "\n", 201 | " structure \\\n", 202 | "0 [[0. 0. 0.] Al, [0. 1.96140395 1.96140395] Pt, [1.96140395 1.96140395 0. ] Pt, [1.96140395 0. 1.96140395] Pt] \n", 203 | "1 [[-2.23765223e-08 1.42974191e+00 5.92614104e+00] Mn, [3.57977169 0.71487093 1.97538035] Mn, [1.23819283 0.71487093 1.97538035] Mn, [1.17078941 3.45760869 5.92614104] Mn, [0. 0. 0.] Mn, [0. 0. 3.9507607] Mn, [2.40898226 2.74273771 1.97538035] Mn, [-1.17078945 3.45760869 5.92614104] Mn, [-2.07564892e-08 2.78165309e+00 3.46044846e+00] Nb, [2.40898226 1.39082653 7.41120915] Nb, [2.40898226 1.39082653 4.44107293] Nb, [-2.07564892e-08 2.78165309e+00 4.90312238e-01] Nb] \n", 204 | "2 [[2.24450185 3.85793022 4.83390736] O, [2.77883096 1.26035638 2.32337921] O, [ 2.79593023 1.33721745 -0.3395849 ] O, [2.26160112 3.93479129 2.17094325] O, [0.33243749 1.71503186 1.79217508] O, [4.72509386 4.3126057 0.03918327] O, [4.70799459 3.48011581 2.70214738] O, [0.31533822 0.88254197 4.45513919] O, [1.37618055 2.37584161 3.54068723] Hf, [3.64715226 4.97341544 3.61659934] Hf, [3.66425153 2.81930606 0.95363523] Hf, [1.39327982 0.22173223 0.87772312] Hf] \n", 205 | "3 [[0. 1.86144248 1.86144248] Cu, [1.86144248 1.86144248 0. ] Cu, [1.86144248 0. 1.86144248] Cu, [0. 0. 0.] Pt] \n", 206 | "4 [[0. 0. 2.73626461] Mg, [0. 0. 6.95125305] Mg, [-3.97440991e-08 4.58925330e+00 1.79637294e+00] Mg, [3.97440996 2.29462661 6.01136137] Mg, [-3.97440991e-08 4.58925330e+00 6.01136137e+00] Mg, [3.97440996 2.29462661 1.79637294] Mg, [1.12369616 1.94629885 0.72058152] Mg, [-1.12369616 1.94629885 4.93556996] Mg, [2.24739233 0. 4.93556996] Mg, [5.70142751 0. 0.72058152] Mg, [-2.85071376 4.93758106 0.72058152] Mg, [2.85071376 4.93758106 4.93556996] Mg, [1.48939724 2.57971169 3.61780896] Mg, [-1.48939724 2.57971169 7.83279739] Mg, [2.9787944... \n", 207 | "\n", 208 | " brgoch_feats \\\n", 209 | "0 {'atomic_number_feat_1': 123.5, 'atomic_number_feat_2': 221, 'atomic_number_feat_3': 13, 'atomic_number_feat_4': 78, 'atomic_weight_feat_1': 306.1107695, 'atomic_weight_feat_2': 558.258461, 'atomic_weight_feat_3': 26.981539, 'atomic_weight_feat_4': 195.08, 'period_number_feat_1': 10.5, 'period_number_feat_2': 15, 'period_number_feat_3': 3, 'period_number_feat_4': 6, 'group_number_feat_1': 21.5, 'group_number_feat_2': 17, 'group_number_feat_3': 10, 'group_number_feat_4': 13, 'family_number_feat_1': 8.5, 'family_number_feat_2': 7, 'family_number_feat_3': 4, 'family_number_feat_4': 5, 'Mendel... \n", 210 | "1 {'atomic_number_feat_1': 45.5, 'atomic_number_feat_2': 9, 'atomic_number_feat_3': 25, 'atomic_number_feat_4': 41, 'atomic_weight_feat_1': 101.39124, 'atomic_weight_feat_2': 16.96972, 'atomic_weight_feat_3': 54.93805, 'atomic_weight_feat_4': 92.90638, 'period_number_feat_1': 6.5, 'period_number_feat_2': 3, 'period_number_feat_3': 4, 'period_number_feat_4': 5, 'group_number_feat_1': 9.5, 'group_number_feat_2': 9, 'group_number_feat_3': 5, 'group_number_feat_4': 7, 'family_number_feat_1': 6.0, 'family_number_feat_2': 4, 'family_number_feat_3': 4, 'family_number_feat_4': 4, 'Mendeleev_number_f... \n", 211 | "2 {'atomic_number_feat_1': 44.0, 'atomic_number_feat_2': 56, 'atomic_number_feat_3': 8, 'atomic_number_feat_4': 72, 'atomic_weight_feat_1': 105.2444, 'atomic_weight_feat_2': 146.4912, 'atomic_weight_feat_3': 15.9994, 'atomic_weight_feat_4': 178.49, 'period_number_feat_1': 5.0, 'period_number_feat_2': 2, 'period_number_feat_3': 2, 'period_number_feat_4': 6, 'group_number_feat_1': 18.0, 'group_number_feat_2': 28, 'group_number_feat_3': 4, 'group_number_feat_4': 16, 'family_number_feat_1': 9.0, 'family_number_feat_2': 10, 'family_number_feat_3': 4, 'family_number_feat_4': 7, 'Mendeleev_number_f... \n", 212 | "3 {'atomic_number_feat_1': 82.5, 'atomic_number_feat_2': 9, 'atomic_number_feat_3': 29, 'atomic_number_feat_4': 78, 'atomic_weight_feat_1': 192.859, 'atomic_weight_feat_2': 4.44200000000001, 'atomic_weight_feat_3': 63.546, 'atomic_weight_feat_4': 195.08, 'period_number_feat_1': 9.0, 'period_number_feat_2': 6, 'period_number_feat_3': 4, 'period_number_feat_4': 6, 'group_number_feat_1': 21.5, 'group_number_feat_2': 23, 'group_number_feat_3': 10, 'group_number_feat_4': 11, 'family_number_feat_1': 8.0, 'family_number_feat_2': 8, 'family_number_feat_3': 4, 'family_number_feat_4': 4, 'Mendeleev_nu... \n", 213 | "4 {'atomic_number_feat_1': 57.0, 'atomic_number_feat_2': 42, 'atomic_number_feat_3': 12, 'atomic_number_feat_4': 78, 'atomic_weight_feat_1': 133.9975, 'atomic_weight_feat_2': 122.165, 'atomic_weight_feat_3': 24.305, 'atomic_weight_feat_4': 195.08, 'period_number_feat_1': 7.5, 'period_number_feat_2': 3, 'period_number_feat_3': 3, 'period_number_feat_4': 6, 'group_number_feat_1': 8.0, 'group_number_feat_2': 4, 'group_number_feat_3': 2, 'group_number_feat_4': 10, 'family_number_feat_1': 5.0, 'family_number_feat_2': 2, 'family_number_feat_3': 2, 'family_number_feat_4': 4, 'Mendeleev_number_feat_... \n", 214 | "\n", 215 | " suspect_value \n", 216 | "0 False \n", 217 | "1 False \n", 218 | "2 False \n", 219 | "3 False \n", 220 | "4 False " 221 | ] 222 | }, 223 | "execution_count": 8, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "shard.head()" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "# Generate Predictors" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 9, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "FG = FeatureGenerator()" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 10, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "application/vnd.jupyter.widget-view+json": { 256 | "model_id": "1a3dee015bf2427aa123e96545dd3f42", 257 | "version_major": 2, 258 | "version_minor": 0 259 | }, 260 | "text/plain": [ 261 | "StrToComposition: 0%| | 0/2574 [00:00" 318 | ] 319 | }, 320 | "metadata": {}, 321 | "output_type": "display_data" 322 | }, 323 | { 324 | "data": { 325 | "text/html": [], 326 | "text/plain": [ 327 | "" 328 | ] 329 | }, 330 | "metadata": {}, 331 | "output_type": "display_data" 332 | }, 333 | { 334 | "data": { 335 | "text/html": [], 336 | "text/plain": [ 337 | "" 338 | ] 339 | }, 340 | "metadata": {}, 341 | "output_type": "display_data" 342 | }, 343 | { 344 | "data": { 345 | "text/html": [], 346 | "text/plain": [ 347 | "" 348 | ] 349 | }, 350 | "metadata": {}, 351 | "output_type": "display_data" 352 | }, 353 | { 354 | "data": { 355 | "text/html": [], 356 | "text/plain": [ 357 | "" 358 | ] 359 | }, 360 | "metadata": {}, 361 | "output_type": "display_data" 362 | }, 363 | { 364 | "name": "stderr", 365 | "output_type": "stream", 366 | "text": [ 367 | "/home/aurora/cryspnet/cryspnet/models.py:255: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead.\n", 368 | " return preds[auxidxs, idxs], self.classes[idxs]\n" 369 | ] 370 | } 371 | ], 372 | "source": [ 373 | "# first one is the probability of the top-N prediction\n", 374 | "# second one is the predicted top-N Bravais Lattice\n", 375 | "Bravais_probs, Bravais = BE.predicts(predictors, topn_bravais=2)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "### Example-Bravais Lattice Prediction" 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "The probability is order from left to right meaning the highest one are always on the left." 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 12, 395 | "metadata": {}, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "text/plain": [ 400 | "array([[9.9996740e-01, 2.2746925e-05],\n", 401 | " [4.1430849e-01, 2.8673396e-01]], dtype=float32)" 402 | ] 403 | }, 404 | "execution_count": 12, 405 | "metadata": {}, 406 | "output_type": "execute_result" 407 | } 408 | ], 409 | "source": [ 410 | "Bravais_probs[:2]" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "Here is the corresponding Bravais Lattice" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 13, 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "data": { 427 | "text/plain": [ 428 | "array([['orthorhombic (P)', 'monoclinic (C)'],\n", 429 | " ['rhombohedral (P)', 'hexagonal (P)']], dtype=object)" 430 | ] 431 | }, 432 | "execution_count": 13, 433 | "metadata": {}, 434 | "output_type": "execute_result" 435 | } 436 | ], 437 | "source": [ 438 | "Bravais[:2]" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "## Lattice Parameter and Space Group\n", 446 | "\n", 447 | "To use those two model, a column with the name **Bravais** must exists in the input predictors dataframe.\n", 448 | "
This information could be obtained from previous predictions or experimental data." 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 14, 454 | "metadata": { 455 | "jupyter": { 456 | "outputs_hidden": true 457 | } 458 | }, 459 | "outputs": [ 460 | { 461 | "data": { 462 | "text/html": [], 463 | "text/plain": [ 464 | "" 465 | ] 466 | }, 467 | "metadata": {}, 468 | "output_type": "display_data" 469 | }, 470 | { 471 | "data": { 472 | "text/html": [], 473 | "text/plain": [ 474 | "" 475 | ] 476 | }, 477 | "metadata": {}, 478 | "output_type": "display_data" 479 | }, 480 | { 481 | "data": { 482 | "text/html": [], 483 | "text/plain": [ 484 | "" 485 | ] 486 | }, 487 | "metadata": {}, 488 | "output_type": "display_data" 489 | }, 490 | { 491 | "data": { 492 | "text/html": [], 493 | "text/plain": [ 494 | "" 495 | ] 496 | }, 497 | "metadata": {}, 498 | "output_type": "display_data" 499 | }, 500 | { 501 | "data": { 502 | "text/html": [], 503 | "text/plain": [ 504 | "" 505 | ] 506 | }, 507 | "metadata": {}, 508 | "output_type": "display_data" 509 | }, 510 | { 511 | "data": { 512 | "text/html": [], 513 | "text/plain": [ 514 | "" 515 | ] 516 | }, 517 | "metadata": {}, 518 | "output_type": "display_data" 519 | }, 520 | { 521 | "data": { 522 | "text/html": [], 523 | "text/plain": [ 524 | "" 525 | ] 526 | }, 527 | "metadata": {}, 528 | "output_type": "display_data" 529 | }, 530 | { 531 | "data": { 532 | "text/html": [], 533 | "text/plain": [ 534 | "" 535 | ] 536 | }, 537 | "metadata": {}, 538 | "output_type": "display_data" 539 | }, 540 | { 541 | "data": { 542 | "text/html": [], 543 | "text/plain": [ 544 | "" 545 | ] 546 | }, 547 | "metadata": {}, 548 | "output_type": "display_data" 549 | }, 550 | { 551 | "data": { 552 | "text/html": [], 553 | "text/plain": [ 554 | "" 555 | ] 556 | }, 557 | "metadata": {}, 558 | "output_type": "display_data" 559 | }, 560 | { 561 | "data": { 562 | "text/html": [], 563 | "text/plain": [ 564 | "" 565 | ] 566 | }, 567 | "metadata": {}, 568 | "output_type": "display_data" 569 | }, 570 | { 571 | "data": { 572 | "text/html": [], 573 | "text/plain": [ 574 | "" 575 | ] 576 | }, 577 | "metadata": {}, 578 | "output_type": "display_data" 579 | }, 580 | { 581 | "data": { 582 | "text/html": [], 583 | "text/plain": [ 584 | "" 585 | ] 586 | }, 587 | "metadata": {}, 588 | "output_type": "display_data" 589 | }, 590 | { 591 | "data": { 592 | "text/html": [], 593 | "text/plain": [ 594 | "" 595 | ] 596 | }, 597 | "metadata": {}, 598 | "output_type": "display_data" 599 | }, 600 | { 601 | "name": "stderr", 602 | "output_type": "stream", 603 | "text": [ 604 | "/home/aurora/cryspnet/cryspnet/models.py:200: RuntimeWarning: overflow encountered in exp\n", 605 | " return np.exp(preds)\n", 606 | "/home/aurora/anaconda3/envs/cryspnetv2/lib/python3.8/site-packages/pandas/core/arraylike.py:274: RuntimeWarning: invalid value encountered in sin\n", 607 | " result = getattr(ufunc, method)(*inputs, **kwargs)\n", 608 | "/home/aurora/anaconda3/envs/cryspnetv2/lib/python3.8/site-packages/pandas/core/arraylike.py:274: RuntimeWarning: invalid value encountered in cos\n", 609 | " result = getattr(ufunc, method)(*inputs, **kwargs)\n" 610 | ] 611 | }, 612 | { 613 | "data": { 614 | "text/html": [], 615 | "text/plain": [ 616 | "" 617 | ] 618 | }, 619 | "metadata": {}, 620 | "output_type": "display_data" 621 | }, 622 | { 623 | "data": { 624 | "text/html": [], 625 | "text/plain": [ 626 | "" 627 | ] 628 | }, 629 | "metadata": {}, 630 | "output_type": "display_data" 631 | }, 632 | { 633 | "data": { 634 | "text/html": [], 635 | "text/plain": [ 636 | "" 637 | ] 638 | }, 639 | "metadata": {}, 640 | "output_type": "display_data" 641 | }, 642 | { 643 | "data": { 644 | "text/html": [], 645 | "text/plain": [ 646 | "" 647 | ] 648 | }, 649 | "metadata": {}, 650 | "output_type": "display_data" 651 | }, 652 | { 653 | "data": { 654 | "text/html": [], 655 | "text/plain": [ 656 | "" 657 | ] 658 | }, 659 | "metadata": {}, 660 | "output_type": "display_data" 661 | }, 662 | { 663 | "data": { 664 | "text/html": [], 665 | "text/plain": [ 666 | "" 667 | ] 668 | }, 669 | "metadata": {}, 670 | "output_type": "display_data" 671 | }, 672 | { 673 | "data": { 674 | "text/html": [], 675 | "text/plain": [ 676 | "" 677 | ] 678 | }, 679 | "metadata": {}, 680 | "output_type": "display_data" 681 | }, 682 | { 683 | "data": { 684 | "text/html": [], 685 | "text/plain": [ 686 | "" 687 | ] 688 | }, 689 | "metadata": {}, 690 | "output_type": "display_data" 691 | }, 692 | { 693 | "data": { 694 | "text/html": [], 695 | "text/plain": [ 696 | "" 697 | ] 698 | }, 699 | "metadata": {}, 700 | "output_type": "display_data" 701 | }, 702 | { 703 | "data": { 704 | "text/html": [], 705 | "text/plain": [ 706 | "" 707 | ] 708 | }, 709 | "metadata": {}, 710 | "output_type": "display_data" 711 | }, 712 | { 713 | "data": { 714 | "text/html": [], 715 | "text/plain": [ 716 | "" 717 | ] 718 | }, 719 | "metadata": {}, 720 | "output_type": "display_data" 721 | }, 722 | { 723 | "data": { 724 | "text/html": [], 725 | "text/plain": [ 726 | "" 727 | ] 728 | }, 729 | "metadata": {}, 730 | "output_type": "display_data" 731 | }, 732 | { 733 | "data": { 734 | "text/html": [], 735 | "text/plain": [ 736 | "" 737 | ] 738 | }, 739 | "metadata": {}, 740 | "output_type": "display_data" 741 | }, 742 | { 743 | "data": { 744 | "text/html": [], 745 | "text/plain": [ 746 | "" 747 | ] 748 | }, 749 | "metadata": {}, 750 | "output_type": "display_data" 751 | }, 752 | { 753 | "data": { 754 | "text/html": [], 755 | "text/plain": [ 756 | "" 757 | ] 758 | }, 759 | "metadata": {}, 760 | "output_type": "display_data" 761 | }, 762 | { 763 | "data": { 764 | "text/html": [], 765 | "text/plain": [ 766 | "" 767 | ] 768 | }, 769 | "metadata": {}, 770 | "output_type": "display_data" 771 | }, 772 | { 773 | "data": { 774 | "text/html": [], 775 | "text/plain": [ 776 | "" 777 | ] 778 | }, 779 | "metadata": {}, 780 | "output_type": "display_data" 781 | }, 782 | { 783 | "data": { 784 | "text/html": [], 785 | "text/plain": [ 786 | "" 787 | ] 788 | }, 789 | "metadata": {}, 790 | "output_type": "display_data" 791 | }, 792 | { 793 | "data": { 794 | "text/html": [], 795 | "text/plain": [ 796 | "" 797 | ] 798 | }, 799 | "metadata": {}, 800 | "output_type": "display_data" 801 | }, 802 | { 803 | "data": { 804 | "text/html": [], 805 | "text/plain": [ 806 | "" 807 | ] 808 | }, 809 | "metadata": {}, 810 | "output_type": "display_data" 811 | }, 812 | { 813 | "data": { 814 | "text/html": [], 815 | "text/plain": [ 816 | "" 817 | ] 818 | }, 819 | "metadata": {}, 820 | "output_type": "display_data" 821 | }, 822 | { 823 | "data": { 824 | "text/html": [], 825 | "text/plain": [ 826 | "" 827 | ] 828 | }, 829 | "metadata": {}, 830 | "output_type": "display_data" 831 | }, 832 | { 833 | "data": { 834 | "text/html": [], 835 | "text/plain": [ 836 | "" 837 | ] 838 | }, 839 | "metadata": {}, 840 | "output_type": "display_data" 841 | }, 842 | { 843 | "data": { 844 | "text/html": [], 845 | "text/plain": [ 846 | "" 847 | ] 848 | }, 849 | "metadata": {}, 850 | "output_type": "display_data" 851 | }, 852 | { 853 | "data": { 854 | "text/html": [], 855 | "text/plain": [ 856 | "" 857 | ] 858 | }, 859 | "metadata": {}, 860 | "output_type": "display_data" 861 | }, 862 | { 863 | "data": { 864 | "text/html": [], 865 | "text/plain": [ 866 | "" 867 | ] 868 | }, 869 | "metadata": {}, 870 | "output_type": "display_data" 871 | }, 872 | { 873 | "data": { 874 | "text/html": [], 875 | "text/plain": [ 876 | "" 877 | ] 878 | }, 879 | "metadata": {}, 880 | "output_type": "display_data" 881 | }, 882 | { 883 | "data": { 884 | "text/html": [], 885 | "text/plain": [ 886 | "" 887 | ] 888 | }, 889 | "metadata": {}, 890 | "output_type": "display_data" 891 | }, 892 | { 893 | "data": { 894 | "text/html": [], 895 | "text/plain": [ 896 | "" 897 | ] 898 | }, 899 | "metadata": {}, 900 | "output_type": "display_data" 901 | }, 902 | { 903 | "data": { 904 | "text/html": [], 905 | "text/plain": [ 906 | "" 907 | ] 908 | }, 909 | "metadata": {}, 910 | "output_type": "display_data" 911 | }, 912 | { 913 | "data": { 914 | "text/html": [], 915 | "text/plain": [ 916 | "" 917 | ] 918 | }, 919 | "metadata": {}, 920 | "output_type": "display_data" 921 | }, 922 | { 923 | "data": { 924 | "text/html": [], 925 | "text/plain": [ 926 | "" 927 | ] 928 | }, 929 | "metadata": {}, 930 | "output_type": "display_data" 931 | }, 932 | { 933 | "data": { 934 | "text/html": [], 935 | "text/plain": [ 936 | "" 937 | ] 938 | }, 939 | "metadata": {}, 940 | "output_type": "display_data" 941 | }, 942 | { 943 | "data": { 944 | "text/html": [], 945 | "text/plain": [ 946 | "" 947 | ] 948 | }, 949 | "metadata": {}, 950 | "output_type": "display_data" 951 | }, 952 | { 953 | "data": { 954 | "text/html": [], 955 | "text/plain": [ 956 | "" 957 | ] 958 | }, 959 | "metadata": {}, 960 | "output_type": "display_data" 961 | }, 962 | { 963 | "data": { 964 | "text/html": [], 965 | "text/plain": [ 966 | "" 967 | ] 968 | }, 969 | "metadata": {}, 970 | "output_type": "display_data" 971 | }, 972 | { 973 | "data": { 974 | "text/html": [], 975 | "text/plain": [ 976 | "" 977 | ] 978 | }, 979 | "metadata": {}, 980 | "output_type": "display_data" 981 | }, 982 | { 983 | "data": { 984 | "text/html": [], 985 | "text/plain": [ 986 | "" 987 | ] 988 | }, 989 | "metadata": {}, 990 | "output_type": "display_data" 991 | }, 992 | { 993 | "data": { 994 | "text/html": [], 995 | "text/plain": [ 996 | "" 997 | ] 998 | }, 999 | "metadata": {}, 1000 | "output_type": "display_data" 1001 | }, 1002 | { 1003 | "data": { 1004 | "text/html": [], 1005 | "text/plain": [ 1006 | "" 1007 | ] 1008 | }, 1009 | "metadata": {}, 1010 | "output_type": "display_data" 1011 | }, 1012 | { 1013 | "data": { 1014 | "text/html": [], 1015 | "text/plain": [ 1016 | "" 1017 | ] 1018 | }, 1019 | "metadata": {}, 1020 | "output_type": "display_data" 1021 | }, 1022 | { 1023 | "data": { 1024 | "text/html": [], 1025 | "text/plain": [ 1026 | "" 1027 | ] 1028 | }, 1029 | "metadata": {}, 1030 | "output_type": "display_data" 1031 | } 1032 | ], 1033 | "source": [ 1034 | "lattices = []\n", 1035 | "spacegroups = []\n", 1036 | "for i in range(0, 2):\n", 1037 | " predictors['Bravais'] = Bravais[:, i]\n", 1038 | " lattice = LB.predicts(predictors)\n", 1039 | " lattices.append(lattice)\n", 1040 | " \n", 1041 | " spacegroup = SGB.predicts(predictors)\n", 1042 | " spacegroups.append(spacegroup)" 1043 | ] 1044 | }, 1045 | { 1046 | "cell_type": "markdown", 1047 | "metadata": {}, 1048 | "source": [ 1049 | "### Example-Lattice Parameter Prediction \n", 1050 | "***Lattice Parameter Model*** would output a ***dataframe*** contains columns: ***a***, ***b***, ***c***, ***α***, ***β***,and ***γ***" 1051 | ] 1052 | }, 1053 | { 1054 | "cell_type": "code", 1055 | "execution_count": 15, 1056 | "metadata": {}, 1057 | "outputs": [ 1058 | { 1059 | "data": { 1060 | "text/html": [ 1061 | "
\n", 1062 | "\n", 1075 | "\n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | "
abcalphabetagammav
07.4180739.9415526.82174190.090.090.0503.084013
10.0000000.0000000.0000000.00.00.00.000000
27.8148457.8148457.81484590.090.090.0477.266677
38.2617398.2617398.26173990.090.090.0563.915990
47.4473629.9127626.66921490.090.090.0492.347582
\n", 1141 | "
" 1142 | ], 1143 | "text/plain": [ 1144 | " a b c alpha beta gamma v\n", 1145 | "0 7.418073 9.941552 6.821741 90.0 90.0 90.0 503.084013\n", 1146 | "1 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.000000\n", 1147 | "2 7.814845 7.814845 7.814845 90.0 90.0 90.0 477.266677\n", 1148 | "3 8.261739 8.261739 8.261739 90.0 90.0 90.0 563.915990\n", 1149 | "4 7.447362 9.912762 6.669214 90.0 90.0 90.0 492.347582" 1150 | ] 1151 | }, 1152 | "execution_count": 15, 1153 | "metadata": {}, 1154 | "output_type": "execute_result" 1155 | } 1156 | ], 1157 | "source": [ 1158 | "lattices[0].head()" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "markdown", 1163 | "metadata": {}, 1164 | "source": [ 1165 | "### Example-Space Group Prediction\n", 1166 | "***Space Group Model*** would output ***top-n probability*** and corresponding ***space group number***\n", 1167 | "in similar manner as Bravais Lattice Model" 1168 | ] 1169 | }, 1170 | { 1171 | "cell_type": "code", 1172 | "execution_count": 16, 1173 | "metadata": {}, 1174 | "outputs": [], 1175 | "source": [ 1176 | "# top-N probability, top-N space group number\n", 1177 | "spacegroup_probs, spacegroup = spacegroups[0]" 1178 | ] 1179 | }, 1180 | { 1181 | "cell_type": "code", 1182 | "execution_count": 17, 1183 | "metadata": {}, 1184 | "outputs": [ 1185 | { 1186 | "data": { 1187 | "text/plain": [ 1188 | "array([[9.55480874e-01, 4.43613380e-02, 1.57732997e-04],\n", 1189 | " [1.00000000e+00, 0.00000000e+00, 0.00000000e+00],\n", 1190 | " [5.59777439e-01, 3.83206278e-01, 5.70162982e-02],\n", 1191 | " [9.72254515e-01, 2.10020747e-02, 6.15153601e-03],\n", 1192 | " [1.00000000e+00, 0.00000000e+00, 0.00000000e+00]])" 1193 | ] 1194 | }, 1195 | "execution_count": 17, 1196 | "metadata": {}, 1197 | "output_type": "execute_result" 1198 | } 1199 | ], 1200 | "source": [ 1201 | "spacegroup_probs[:5]" 1202 | ] 1203 | }, 1204 | { 1205 | "cell_type": "code", 1206 | "execution_count": 18, 1207 | "metadata": {}, 1208 | "outputs": [ 1209 | { 1210 | "data": { 1211 | "text/plain": [ 1212 | "array([[ 74., 62., 61.],\n", 1213 | " [148., 167., 166.],\n", 1214 | " [225., 227., 216.],\n", 1215 | " [220., 204., 229.],\n", 1216 | " [ 62., 74., 61.]])" 1217 | ] 1218 | }, 1219 | "execution_count": 18, 1220 | "metadata": {}, 1221 | "output_type": "execute_result" 1222 | } 1223 | ], 1224 | "source": [ 1225 | "spacegroup[:5]" 1226 | ] 1227 | }, 1228 | { 1229 | "cell_type": "markdown", 1230 | "metadata": {}, 1231 | "source": [ 1232 | "## Low Level API" 1233 | ] 1234 | }, 1235 | { 1236 | "cell_type": "markdown", 1237 | "metadata": {}, 1238 | "source": [ 1239 | "### Load dataset to the model\n", 1240 | "\n", 1241 | "All models contain **.load()** method to attach the dataset to the model. The model then would perform pre-processing on the input dataset. " 1242 | ] 1243 | }, 1244 | { 1245 | "cell_type": "code", 1246 | "execution_count": 19, 1247 | "metadata": {}, 1248 | "outputs": [], 1249 | "source": [ 1250 | "BE.load(predictors)\n", 1251 | "LB.load(predictors) # note we have \"Bravais\" in this dataframe already\n", 1252 | "SGB.load(predictors)" 1253 | ] 1254 | }, 1255 | { 1256 | "cell_type": "markdown", 1257 | "metadata": {}, 1258 | "source": [ 1259 | "### Get the Raw Prediction\n", 1260 | "Sometimes, processing raw prediction is easy for further analysis. Make sure you load in dataset to the models before running lines in below.\n", 1261 | "\n", 1262 | "The raw prediction is stored in the form of **torch tensor**. Prediction is stored in different format that is specified to each models. " 1263 | ] 1264 | }, 1265 | { 1266 | "cell_type": "markdown", 1267 | "metadata": {}, 1268 | "source": [ 1269 | "#### Bravais Lattice Model" 1270 | ] 1271 | }, 1272 | { 1273 | "cell_type": "markdown", 1274 | "metadata": {}, 1275 | "source": [ 1276 | "For Bravais Lattice Model, raw prediction contains output from **N** (defined by user) models. i.e. the shape of the tensor is (N, dataset_size, 14)" 1277 | ] 1278 | }, 1279 | { 1280 | "cell_type": "code", 1281 | "execution_count": 20, 1282 | "metadata": { 1283 | "jupyter": { 1284 | "outputs_hidden": true 1285 | } 1286 | }, 1287 | "outputs": [ 1288 | { 1289 | "data": { 1290 | "text/html": [], 1291 | "text/plain": [ 1292 | "" 1293 | ] 1294 | }, 1295 | "metadata": {}, 1296 | "output_type": "display_data" 1297 | }, 1298 | { 1299 | "data": { 1300 | "text/html": [], 1301 | "text/plain": [ 1302 | "" 1303 | ] 1304 | }, 1305 | "metadata": {}, 1306 | "output_type": "display_data" 1307 | }, 1308 | { 1309 | "data": { 1310 | "text/html": [], 1311 | "text/plain": [ 1312 | "" 1313 | ] 1314 | }, 1315 | "metadata": {}, 1316 | "output_type": "display_data" 1317 | }, 1318 | { 1319 | "data": { 1320 | "text/html": [], 1321 | "text/plain": [ 1322 | "" 1323 | ] 1324 | }, 1325 | "metadata": {}, 1326 | "output_type": "display_data" 1327 | }, 1328 | { 1329 | "data": { 1330 | "text/html": [], 1331 | "text/plain": [ 1332 | "" 1333 | ] 1334 | }, 1335 | "metadata": {}, 1336 | "output_type": "display_data" 1337 | } 1338 | ], 1339 | "source": [ 1340 | "preds = BE.get_preds()" 1341 | ] 1342 | }, 1343 | { 1344 | "cell_type": "code", 1345 | "execution_count": 21, 1346 | "metadata": {}, 1347 | "outputs": [ 1348 | { 1349 | "data": { 1350 | "text/plain": [ 1351 | "torch.Size([5, 2574, 14])" 1352 | ] 1353 | }, 1354 | "execution_count": 21, 1355 | "metadata": {}, 1356 | "output_type": "execute_result" 1357 | } 1358 | ], 1359 | "source": [ 1360 | "preds.shape" 1361 | ] 1362 | }, 1363 | { 1364 | "cell_type": "markdown", 1365 | "metadata": {}, 1366 | "source": [ 1367 | "The Bravais Lattice map to the predicted probability is stored in **.classes**" 1368 | ] 1369 | }, 1370 | { 1371 | "cell_type": "code", 1372 | "execution_count": 22, 1373 | "metadata": {}, 1374 | "outputs": [ 1375 | { 1376 | "data": { 1377 | "text/plain": [ 1378 | "Index(['cubic (I)', 'cubic (F)', 'cubic (P)', 'hexagonal (P)',\n", 1379 | " 'rhombohedral (P)', 'tetragonal (I)', 'tetragonal (P)',\n", 1380 | " 'orthorhombic (I)', 'orthorhombic (F)', 'orthorhombic (C)',\n", 1381 | " 'orthorhombic (P)', 'monoclinic (C)', 'monoclinic (P)',\n", 1382 | " 'triclinic (P)'],\n", 1383 | " dtype='object')" 1384 | ] 1385 | }, 1386 | "execution_count": 22, 1387 | "metadata": {}, 1388 | "output_type": "execute_result" 1389 | } 1390 | ], 1391 | "source": [ 1392 | "BE.classes" 1393 | ] 1394 | }, 1395 | { 1396 | "cell_type": "markdown", 1397 | "metadata": {}, 1398 | "source": [ 1399 | "#### Space Group Model and Lattice Parameter Model" 1400 | ] 1401 | }, 1402 | { 1403 | "cell_type": "markdown", 1404 | "metadata": {}, 1405 | "source": [ 1406 | "For Space Group Model and Lattice Parameter Model, the raw prediction is a dictionary containing prediction from different sub-models. The key of the dictionary shows which sub-models the prediction comes from." 1407 | ] 1408 | }, 1409 | { 1410 | "cell_type": "code", 1411 | "execution_count": 23, 1412 | "metadata": { 1413 | "jupyter": { 1414 | "outputs_hidden": true 1415 | } 1416 | }, 1417 | "outputs": [ 1418 | { 1419 | "data": { 1420 | "text/html": [], 1421 | "text/plain": [ 1422 | "" 1423 | ] 1424 | }, 1425 | "metadata": {}, 1426 | "output_type": "display_data" 1427 | }, 1428 | { 1429 | "data": { 1430 | "text/html": [], 1431 | "text/plain": [ 1432 | "" 1433 | ] 1434 | }, 1435 | "metadata": {}, 1436 | "output_type": "display_data" 1437 | }, 1438 | { 1439 | "data": { 1440 | "text/html": [], 1441 | "text/plain": [ 1442 | "" 1443 | ] 1444 | }, 1445 | "metadata": {}, 1446 | "output_type": "display_data" 1447 | }, 1448 | { 1449 | "data": { 1450 | "text/html": [], 1451 | "text/plain": [ 1452 | "" 1453 | ] 1454 | }, 1455 | "metadata": {}, 1456 | "output_type": "display_data" 1457 | }, 1458 | { 1459 | "data": { 1460 | "text/html": [], 1461 | "text/plain": [ 1462 | "" 1463 | ] 1464 | }, 1465 | "metadata": {}, 1466 | "output_type": "display_data" 1467 | }, 1468 | { 1469 | "data": { 1470 | "text/html": [], 1471 | "text/plain": [ 1472 | "" 1473 | ] 1474 | }, 1475 | "metadata": {}, 1476 | "output_type": "display_data" 1477 | }, 1478 | { 1479 | "data": { 1480 | "text/html": [], 1481 | "text/plain": [ 1482 | "" 1483 | ] 1484 | }, 1485 | "metadata": {}, 1486 | "output_type": "display_data" 1487 | }, 1488 | { 1489 | "data": { 1490 | "text/html": [], 1491 | "text/plain": [ 1492 | "" 1493 | ] 1494 | }, 1495 | "metadata": {}, 1496 | "output_type": "display_data" 1497 | }, 1498 | { 1499 | "data": { 1500 | "text/html": [], 1501 | "text/plain": [ 1502 | "" 1503 | ] 1504 | }, 1505 | "metadata": {}, 1506 | "output_type": "display_data" 1507 | }, 1508 | { 1509 | "data": { 1510 | "text/html": [], 1511 | "text/plain": [ 1512 | "" 1513 | ] 1514 | }, 1515 | "metadata": {}, 1516 | "output_type": "display_data" 1517 | }, 1518 | { 1519 | "data": { 1520 | "text/html": [], 1521 | "text/plain": [ 1522 | "" 1523 | ] 1524 | }, 1525 | "metadata": {}, 1526 | "output_type": "display_data" 1527 | }, 1528 | { 1529 | "data": { 1530 | "text/html": [], 1531 | "text/plain": [ 1532 | "" 1533 | ] 1534 | }, 1535 | "metadata": {}, 1536 | "output_type": "display_data" 1537 | }, 1538 | { 1539 | "data": { 1540 | "text/html": [], 1541 | "text/plain": [ 1542 | "" 1543 | ] 1544 | }, 1545 | "metadata": {}, 1546 | "output_type": "display_data" 1547 | }, 1548 | { 1549 | "data": { 1550 | "text/html": [], 1551 | "text/plain": [ 1552 | "" 1553 | ] 1554 | }, 1555 | "metadata": {}, 1556 | "output_type": "display_data" 1557 | } 1558 | ], 1559 | "source": [ 1560 | "preds = SGB.get_preds()" 1561 | ] 1562 | }, 1563 | { 1564 | "cell_type": "code", 1565 | "execution_count": 24, 1566 | "metadata": {}, 1567 | "outputs": [ 1568 | { 1569 | "data": { 1570 | "text/plain": [ 1571 | "dict_keys(['cubic (I)', 'cubic (F)', 'cubic (P)', 'hexagonal (P)', 'rhombohedral (P)', 'tetragonal (I)', 'tetragonal (P)', 'orthorhombic (I)', 'orthorhombic (F)', 'orthorhombic (C)', 'orthorhombic (P)', 'monoclinic (C)', 'monoclinic (P)', 'triclinic (P)'])" 1572 | ] 1573 | }, 1574 | "execution_count": 24, 1575 | "metadata": {}, 1576 | "output_type": "execute_result" 1577 | } 1578 | ], 1579 | "source": [ 1580 | "preds.keys()" 1581 | ] 1582 | }, 1583 | { 1584 | "cell_type": "code", 1585 | "execution_count": 25, 1586 | "metadata": {}, 1587 | "outputs": [ 1588 | { 1589 | "data": { 1590 | "text/plain": [ 1591 | "tensor([[0.0000e+00, 1.0000e+00, 0.0000e+00],\n", 1592 | " [0.0000e+00, 1.0000e+00, 0.0000e+00],\n", 1593 | " [5.0087e-05, 2.6173e-01, 7.3822e-01]])" 1594 | ] 1595 | }, 1596 | "execution_count": 25, 1597 | "metadata": {}, 1598 | "output_type": "execute_result" 1599 | } 1600 | ], 1601 | "source": [ 1602 | "preds['cubic (F)'][:3,]" 1603 | ] 1604 | }, 1605 | { 1606 | "cell_type": "markdown", 1607 | "metadata": {}, 1608 | "source": [ 1609 | "The space group number for each sub model prediction is little more complicated. All submodels is stored in the **.Ps** field. For each sub-model access classes is similar to Bravais Lattice one. Here is an example of getting the classes for cubic (F) sub-model" 1610 | ] 1611 | }, 1612 | { 1613 | "cell_type": "code", 1614 | "execution_count": 27, 1615 | "metadata": {}, 1616 | "outputs": [ 1617 | { 1618 | "data": { 1619 | "text/plain": [ 1620 | "array([216, 225, 227])" 1621 | ] 1622 | }, 1623 | "execution_count": 27, 1624 | "metadata": {}, 1625 | "output_type": "execute_result" 1626 | } 1627 | ], 1628 | "source": [ 1629 | "SGB.Ms['cubic (F)'].classes" 1630 | ] 1631 | }, 1632 | { 1633 | "cell_type": "code", 1634 | "execution_count": 28, 1635 | "metadata": { 1636 | "jupyter": { 1637 | "outputs_hidden": true 1638 | } 1639 | }, 1640 | "outputs": [ 1641 | { 1642 | "data": { 1643 | "text/html": [], 1644 | "text/plain": [ 1645 | "" 1646 | ] 1647 | }, 1648 | "metadata": {}, 1649 | "output_type": "display_data" 1650 | }, 1651 | { 1652 | "data": { 1653 | "text/html": [], 1654 | "text/plain": [ 1655 | "" 1656 | ] 1657 | }, 1658 | "metadata": {}, 1659 | "output_type": "display_data" 1660 | }, 1661 | { 1662 | "data": { 1663 | "text/html": [], 1664 | "text/plain": [ 1665 | "" 1666 | ] 1667 | }, 1668 | "metadata": {}, 1669 | "output_type": "display_data" 1670 | }, 1671 | { 1672 | "data": { 1673 | "text/html": [], 1674 | "text/plain": [ 1675 | "" 1676 | ] 1677 | }, 1678 | "metadata": {}, 1679 | "output_type": "display_data" 1680 | }, 1681 | { 1682 | "data": { 1683 | "text/html": [], 1684 | "text/plain": [ 1685 | "" 1686 | ] 1687 | }, 1688 | "metadata": {}, 1689 | "output_type": "display_data" 1690 | }, 1691 | { 1692 | "data": { 1693 | "text/html": [], 1694 | "text/plain": [ 1695 | "" 1696 | ] 1697 | }, 1698 | "metadata": {}, 1699 | "output_type": "display_data" 1700 | }, 1701 | { 1702 | "data": { 1703 | "text/html": [], 1704 | "text/plain": [ 1705 | "" 1706 | ] 1707 | }, 1708 | "metadata": {}, 1709 | "output_type": "display_data" 1710 | }, 1711 | { 1712 | "data": { 1713 | "text/html": [], 1714 | "text/plain": [ 1715 | "" 1716 | ] 1717 | }, 1718 | "metadata": {}, 1719 | "output_type": "display_data" 1720 | }, 1721 | { 1722 | "data": { 1723 | "text/html": [], 1724 | "text/plain": [ 1725 | "" 1726 | ] 1727 | }, 1728 | "metadata": {}, 1729 | "output_type": "display_data" 1730 | }, 1731 | { 1732 | "data": { 1733 | "text/html": [], 1734 | "text/plain": [ 1735 | "" 1736 | ] 1737 | }, 1738 | "metadata": {}, 1739 | "output_type": "display_data" 1740 | }, 1741 | { 1742 | "data": { 1743 | "text/html": [], 1744 | "text/plain": [ 1745 | "" 1746 | ] 1747 | }, 1748 | "metadata": {}, 1749 | "output_type": "display_data" 1750 | }, 1751 | { 1752 | "data": { 1753 | "text/html": [], 1754 | "text/plain": [ 1755 | "" 1756 | ] 1757 | }, 1758 | "metadata": {}, 1759 | "output_type": "display_data" 1760 | }, 1761 | { 1762 | "data": { 1763 | "text/html": [], 1764 | "text/plain": [ 1765 | "" 1766 | ] 1767 | }, 1768 | "metadata": {}, 1769 | "output_type": "display_data" 1770 | }, 1771 | { 1772 | "data": { 1773 | "text/html": [], 1774 | "text/plain": [ 1775 | "" 1776 | ] 1777 | }, 1778 | "metadata": {}, 1779 | "output_type": "display_data" 1780 | } 1781 | ], 1782 | "source": [ 1783 | "preds = LB.get_preds()" 1784 | ] 1785 | }, 1786 | { 1787 | "cell_type": "code", 1788 | "execution_count": 29, 1789 | "metadata": {}, 1790 | "outputs": [ 1791 | { 1792 | "data": { 1793 | "text/plain": [ 1794 | "dict_keys(['cubic (I)', 'cubic (F)', 'cubic (P)', 'hexagonal (P)', 'rhombohedral (P)', 'tetragonal (I)', 'tetragonal (P)', 'orthorhombic (I)', 'orthorhombic (F)', 'orthorhombic (C)', 'orthorhombic (P)', 'monoclinic (C)', 'monoclinic (P)', 'triclinic (P)'])" 1795 | ] 1796 | }, 1797 | "execution_count": 29, 1798 | "metadata": {}, 1799 | "output_type": "execute_result" 1800 | } 1801 | ], 1802 | "source": [ 1803 | "preds.keys()" 1804 | ] 1805 | }, 1806 | { 1807 | "cell_type": "code", 1808 | "execution_count": 30, 1809 | "metadata": {}, 1810 | "outputs": [ 1811 | { 1812 | "data": { 1813 | "text/plain": [ 1814 | "tensor([[-9.8452e+07, -8.6062e+07, -8.4030e+07, 4.0698e+06, 1.3044e+07,\n", 1815 | " 2.9655e+07],\n", 1816 | " [-5.2221e-01, 1.4229e-01, -5.3990e-01, -5.9890e-02, -1.5190e-01,\n", 1817 | " 2.1077e-01],\n", 1818 | " [ 2.2412e+06, -2.8403e+07, -2.1658e+07, 1.8027e+07, 1.4471e+07,\n", 1819 | " 2.2346e+07]])" 1820 | ] 1821 | }, 1822 | "execution_count": 30, 1823 | "metadata": {}, 1824 | "output_type": "execute_result" 1825 | } 1826 | ], 1827 | "source": [ 1828 | "preds['triclinic (P)'][:3]" 1829 | ] 1830 | }, 1831 | { 1832 | "cell_type": "markdown", 1833 | "metadata": {}, 1834 | "source": [ 1835 | "The **PRED_COLS** constant defined *config.py* tells you which lattice parameters the sub model is predicting. " 1836 | ] 1837 | }, 1838 | { 1839 | "cell_type": "code", 1840 | "execution_count": 31, 1841 | "metadata": {}, 1842 | "outputs": [ 1843 | { 1844 | "data": { 1845 | "text/plain": [ 1846 | "{'cubic (P)': ['a'],\n", 1847 | " 'cubic (F)': ['a'],\n", 1848 | " 'cubic (I)': ['a'],\n", 1849 | " 'monoclinic (P)': ['a', 'b', 'c', 'beta'],\n", 1850 | " 'monoclinic (C)': ['a', 'b', 'c', 'beta'],\n", 1851 | " 'hexagonal (P)': ['a', 'c'],\n", 1852 | " 'rhombohedral (P)': ['a', 'alpha'],\n", 1853 | " 'tetragonal (P)': ['a', 'c'],\n", 1854 | " 'tetragonal (I)': ['a', 'c'],\n", 1855 | " 'orthorhombic (P)': ['a', 'b', 'c'],\n", 1856 | " 'orthorhombic (C)': ['a', 'b', 'c'],\n", 1857 | " 'orthorhombic (F)': ['a', 'b', 'c'],\n", 1858 | " 'orthorhombic (I)': ['a', 'b', 'c'],\n", 1859 | " 'triclinic (P)': ['a', 'b', 'c', 'alpha', 'beta', 'gamma']}" 1860 | ] 1861 | }, 1862 | "execution_count": 31, 1863 | "metadata": {}, 1864 | "output_type": "execute_result" 1865 | } 1866 | ], 1867 | "source": [ 1868 | "PRED_COLS" 1869 | ] 1870 | } 1871 | ], 1872 | "metadata": { 1873 | "kernelspec": { 1874 | "display_name": "Python 3", 1875 | "language": "python", 1876 | "name": "python3" 1877 | }, 1878 | "language_info": { 1879 | "codemirror_mode": { 1880 | "name": "ipython", 1881 | "version": 3 1882 | }, 1883 | "file_extension": ".py", 1884 | "mimetype": "text/x-python", 1885 | "name": "python", 1886 | "nbconvert_exporter": "python", 1887 | "pygments_lexer": "ipython3", 1888 | "version": "3.8.5" 1889 | }, 1890 | "toc": { 1891 | "base_numbering": 1, 1892 | "nav_menu": {}, 1893 | "number_sections": true, 1894 | "sideBar": true, 1895 | "skip_h1_title": false, 1896 | "title_cell": "Table of Contents", 1897 | "title_sidebar": "Contents", 1898 | "toc_cell": false, 1899 | "toc_position": {}, 1900 | "toc_section_display": true, 1901 | "toc_window_display": true 1902 | }, 1903 | "varInspector": { 1904 | "cols": { 1905 | "lenName": 16, 1906 | "lenType": 16, 1907 | "lenVar": 40 1908 | }, 1909 | "kernels_config": { 1910 | "python": { 1911 | "delete_cmd_postfix": "", 1912 | "delete_cmd_prefix": "del ", 1913 | "library": "var_list.py", 1914 | "varRefreshCmd": "print(var_dic_list())" 1915 | }, 1916 | "r": { 1917 | "delete_cmd_postfix": ") ", 1918 | "delete_cmd_prefix": "rm(", 1919 | "library": "var_list.r", 1920 | "varRefreshCmd": "cat(var_dic_list()) " 1921 | } 1922 | }, 1923 | "types_to_exclude": [ 1924 | "module", 1925 | "function", 1926 | "builtin_function_or_method", 1927 | "instance", 1928 | "_Feature" 1929 | ], 1930 | "window_display": false 1931 | } 1932 | }, 1933 | "nbformat": 4, 1934 | "nbformat_minor": 4 1935 | } 1936 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CRYSPNet 2 | 3 | The Crystal Structure Prediction Network ([CRYSPNet](hhttps://journals.aps.org/prmaterials/abstract/10.1103/PhysRevMaterials.4.123802)) project introduces an alternative way to perform fast prediction on Crystal Structure Information (Bravais Lattice, Space Group, and Lattice Parameter) with the power of neural networks. 4 | 5 | ## Installation 6 | 7 | **Note:** **Python 3.6** or later is required. Since Fastai library does not support Windows, the following installation only works on a linux-based environment. We recommend using [CONDA environment](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) to create a new environment for this installation. 8 | To install the project with pip and git, run the following commands: 9 | ```bash 10 | git clone https://github.com/auroralht/cryspnet.git 11 | cd cryspnet 12 | pip install -e . 13 | ``` 14 | 15 | Pre-trained models are stored in google drive. Download the file `learner.zip` from the [drive](https://drive.google.com/file/d/1rpbV2-mnNj3M16-4BKvhuo5pkeoIY96q/view?usp=sharing). After downloading the file, pls copy it to `cryspnet/cryspnet` and extract it. Five folders: `BravaisEsmMetal`, `BravaisEsmOxide`, `BravaisEsmWhole`, `LatticeParam`, and `SpaceGroup` should be in the `cryspnet/cryspnet/learner` directory after the extraction is completed. 16 | 17 | ## 🔥Update🔥 18 | The library has moved from fastai v1 to fastai v2; thus Bravais Lattice, Lattice Parameters and Space Group models are retrained. Please **download** the latest models from [here](https://drive.google.com/file/d/1rpbV2-mnNj3M16-4BKvhuo5pkeoIY96q/view?usp=sharing). The link to the [old](https://drive.google.com/file/d/1s9OkrBRTSWTvufSia-ee625zR73bgBDA/view?usp=sharing) version. 19 | 20 | To update the library itself: 21 | ```bash 22 | cd cryspnet 23 | git pull 24 | pip install -r requirements.txt 25 | ``` 26 | 27 | ## ⚠️About fastai⚠️ 28 | *If you are not interested in training your model, then you can skip this part* 29 | 30 | *This section would be removed after they fixed this issue in the next release version* 31 | 32 | The current fastai v2 tabular modulus has data leakage issues when trying to export the learner. The fix is only made in the development repository. To install: 33 | ```bash 34 | pip uninstall fastai 35 | git clone https://github.com/fastai/fastai 36 | pip install -e "fastai[dev]" 37 | ``` 38 | 39 | ## Dependancy 40 | 41 | [fastai](https://github.com/fastai/fastai), [pytorch](https://github.com/pytorch/pytorch), and [Matminer](https://hackingmaterials.lbl.gov/matminer/installation.html) are three major package used heavily in this project. Please go to their GitHub/documentation site for more information if these packages cannot be installed. 42 | 43 | (optional) We recommend using [JupyterLab](https://github.com/jupyterlab/jupyterlab/tree/acf208ed6f6843d03f34666ffc0cb2c37bdf2f3e) to execute our Notebook example. Running with Jupyter Notebook is extremely fine also. To install: 44 | 45 | ### conda install 46 | ```bash 47 | conda install -c conda-forge jupyterlab 48 | ``` 49 | 50 | ### pip install 51 | ```bash 52 | pip install jupyterlab 53 | ``` 54 | 55 | (⚠️ISSUE⚠️) When running through the notebook, a tqdm issue might raise, saying IProcess is not found. It could be solved by installing the [Jupyter Widgets](https://ipywidgets.readthedocs.io/en/stable/user_install.html) 56 | 57 | ### conda install 58 | ```bash 59 | conda install -c conda-forge ipywidgets 60 | ``` 61 | 62 | ### pip install 63 | ```bash 64 | pip install ipywidgets 65 | ``` 66 | 67 | 68 | ## Usage 69 | 70 | Input requirement: The input would be stored in a csv file with 71 | a column name called formula. 72 | 73 | Here is an example of predicting the Bravais, space group, and lattice parameter of formula listed in [demo.csv](https://github.com/AuroraLHT/cryspnet/tree/master/demo) 74 | ```bash 75 | cd cryspnet 76 | python predict.py -i demo/demo.csv -o output/output.csv 77 | ``` 78 | 79 | You can also use the Bravais lattice model trained on Metal or Oxide compounds by: 80 | 81 | ```bash 82 | python predict.py -i demo/demo.csv -o output/output.csv --use_metal 83 | python predict.py -i demo/demo.csv -o output/output.csv --use_oxide 84 | ``` 85 | 86 | You could also change the `topn_bravais` and `topn_spacegroup` to see more or less top-N prediction from the Bravais lattice and space group models. 87 | ```bash 88 | python predict.py 89 | -i demo/demo.csv \ 90 | -o output/output.csv \ 91 | --topn_bravais 2 \ 92 | --topn_spacegroup 3 \ 93 | ``` 94 | 95 | The output is stored in .csv format with the first two rows as headers. The layout of output is shown by this example: 96 | | formula | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | Top-1 Bravais | 97 | | ------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------ | ------- |------ | ------------- | ------------- | 98 | | - | Bravais | Bravais prob | a | b | c | alpha | beta | gamma | v | Top-1 SpaceGroup | Top-2 SpaceGroup | Top-1 SpaceGroup prob | Top-2 SpaceGroup prob | 99 | | Co15Ga2Pr2 | rhombohedral (P) | 0.847 | 6.50 | 6.50 | 6.50 | 86.4 | 86.4 | 86.4 | 274 | 166 | 160 | 0.98 | 0.01 | 100 | 101 | The first row is the major index that groups various predictions of one Bravais Lattice type together. The first column, "**formula**" like its name said, shows the chemical formula. "**Top-n Bravais**" shows this part of the prediction is from the n-th most likely Bravais Lattice. In the second row, "**Bravais**" and "**Bravais prob**" shows the Bravais Lattice and its predicted probability. "**a**", "**b**", "**c**", "**alpha**", "**beta**", "**gamma**" show the predicted lattice parameters and "**v**" shows the unit cell volume. "**Top-k Spacegroup**" and "**Top-k Spacegroup prob**" show the k-th most likely spacegroup number and its predicted probability. 102 | 103 | To open this .csv in python, consider using these lines: 104 | ```python 105 | import pandas as pd 106 | pd.read_csv("output/output.csv", header=[0,1]) 107 | ``` 108 | 109 | ## As Library 110 | 111 | The package is a wrapper of fastai learner, and is easy to use. The following example shows the basic workflow: 112 | 113 | ```python 114 | import pandas as pd 115 | from cryspnet.models import * 116 | 117 | formula = pd.read_csv("demo/demo.csv") 118 | BE = load_Bravais_models() 119 | LPB = load_Lattice_models() 120 | SGB = load_SpaceGroup_models() 121 | 122 | featurizer = FeatureGenerator() 123 | predictors = featurizer.generate(formula) 124 | 125 | bravais_probs, bravais = BE.predicts(predictors, topn_bravais=1) 126 | predictors['bravais'] = bravais 127 | 128 | spacegroup_probs, spacegroup = SGB.predicts(predictors, topn_spacegroup=1) 129 | latticeparameter = LPB.predicts(predictors) 130 | ``` 131 | 132 | More **examples** could be finded in [Notebook](https://github.com/AuroraLHT/cryspnet/tree/master/Notebook). 133 | 134 | ## 🌟Train Your Own CRYSPNET🌟 135 | We provide three notebooks: [TrainBravais](https://github.com/AuroraLHT/cryspnet/tree/master/Notebook/TrainBravais.ipynb), [TrainSpaceGroup](https://github.com/AuroraLHT/cryspnet/tree/master/Notebook/TrainSpaceGroup.ipynb), and [TrainLattice](https://github.com/AuroraLHT/cryspnet/tree/master/Notebook/TrainLattice.ipynb) to showcase the training process of each component. 136 | 137 | 138 | ## Randan Crystal Generation with PyXtal 139 | 140 | [PyXtal](https://github.com/qzhu2017/PyXtal) is an open-source library that could generate structures from chemical formulas and space group inputs. Combining with this library, here we demonstrate a workflow to create candidate crystal structures for only chemical formula input. To install PyXtal: 141 | 142 | ### pip install 143 | ```bash 144 | pip install pyxtal==0.0.8 145 | ``` 146 | 147 | Here is an example of using PyXtal in conjunct with the prediction from CRYSPNet: 148 | ```bash 149 | python predict.py -i demo/demo.csv -o output/output.csv 150 | 151 | python random_crystal.py \ 152 | -i output/output.csv \ 153 | -o output/random_crystal \ 154 | --topn_bravais 1 \ 155 | --topn_spacegroup 1 \ 156 | --n_workers 4 \ 157 | --n_trails 100 158 | ``` 159 | -------------------------------------------------------------------------------- /cryspnet/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | N_ESMBLER = 5 4 | 5 | TOPN_BRAVAIS = 2 6 | 7 | TOPN_SPACEGROUP = 3 8 | 9 | BATCHSIZE = 256 10 | 11 | BRAVAIS_SPLIT_NAME = "Bravais" 12 | 13 | LEARNER = f"{Path(__file__).parents[0]}/learner/" 14 | 15 | BRAVAIS_MODELS_FOLDER = { 16 | "metal" : "BravaisEsmMetal", 17 | "oxide" : "BravaisEsmOxide", 18 | "whole" : "BravaisEsmWhole", 19 | } 20 | 21 | BRAVAIS_ENSEMBLER_PREFIX = "ensembler" 22 | 23 | LATTICE_PARAM_NAMES = ['a', 'b', 'c', 'alpha', 'beta','gamma'] 24 | 25 | LATTICE_PARAM_MODELS_FOLDER = "LatticeParam" 26 | 27 | LATTICE_PARAM_MODELS = { 28 | 'cubic (F)': 'cubic (F).pkl', 29 | 'cubic (I)': 'cubic (I).pkl', 30 | 'cubic (P)': 'cubic (P).pkl', 31 | 'hexagonal (P)': 'hexagonal (P).pkl', 32 | 'monoclinic (C)': 'monoclinic (C).pkl', 33 | 'monoclinic (P)': 'monoclinic (P).pkl', 34 | 'orthorhombic (C)': 'orthorhombic (C).pkl', 35 | 'orthorhombic (F)': 'orthorhombic (F).pkl', 36 | 'orthorhombic (I)': 'orthorhombic (I).pkl', 37 | 'orthorhombic (P)': 'orthorhombic (P).pkl', 38 | 'rhombohedral (P)': 'rhombohedral (P).pkl', 39 | 'tetragonal (I)': 'tetragonal (I).pkl', 40 | 'tetragonal (P)': 'tetragonal (P).pkl', 41 | 'triclinic (P)': 'triclinic (P).pkl' 42 | } 43 | 44 | LATTICE_NORM = "lattice_norm.pkl" 45 | 46 | LATTICE_PARAM_ERROR = "error.pkl" 47 | 48 | SPACE_GROUP_MODELS_FOLDER = "SpaceGroup" 49 | 50 | SPACE_GROUP_MODELS = { 51 | 'cubic (F)': 'cubic (F).pkl', 52 | 'cubic (I)': 'cubic (I).pkl', 53 | 'cubic (P)': 'cubic (P).pkl', 54 | 'hexagonal (P)': 'hexagonal (P).pkl', 55 | 'monoclinic (C)': 'monoclinic (C).pkl', 56 | 'monoclinic (P)': 'monoclinic (P).pkl', 57 | 'orthorhombic (C)': 'orthorhombic (C).pkl', 58 | 'orthorhombic (F)': 'orthorhombic (F).pkl', 59 | 'orthorhombic (I)': 'orthorhombic (I).pkl', 60 | 'orthorhombic (P)': 'orthorhombic (P).pkl', 61 | 'rhombohedral (P)': 'rhombohedral (P).pkl', 62 | 'tetragonal (I)': 'tetragonal (I).pkl', 63 | 'tetragonal (P)': 'tetragonal (P).pkl', 64 | 'triclinic (P)': 'triclinic (P).pkl' 65 | } 66 | 67 | 68 | BRAVAIS_LATTICE = [ 69 | 'cubic (I)', 70 | 'cubic (F)', 71 | 'cubic (P)', 72 | 'hexagonal (P)', 73 | 'rhombohedral (P)', 74 | 'tetragonal (I)', 75 | 'tetragonal (P)', 76 | 'orthorhombic (I)', 77 | 'orthorhombic (F)', 78 | 'orthorhombic (C)', 79 | 'orthorhombic (P)', 80 | 'monoclinic (C)', 81 | 'monoclinic (P)', 82 | 'triclinic (P)' 83 | ] 84 | 85 | PRED_COLS = { 86 | "cubic (P)" : ['a'], 87 | "cubic (F)" : ['a'], 88 | "cubic (I)" : ['a'], 89 | 90 | "monoclinic (P)": ["a", "b", "c", "beta"], 91 | "monoclinic (C)": ["a", "b", "c", "beta"], 92 | 93 | "hexagonal (P)": ["a", "c"], 94 | 95 | "rhombohedral (P)": ["a", "alpha"], 96 | 97 | "tetragonal (P)" : ["a", "c"], 98 | "tetragonal (I)" : ["a", "c"], 99 | 100 | "orthorhombic (P)" : ["a", "b", "c"], 101 | "orthorhombic (C)" : ["a", "b", "c"], 102 | "orthorhombic (F)" : ["a", "b", "c"], 103 | "orthorhombic (I)" : ["a", "b", "c"], 104 | 105 | "triclinic (P)" : ["a", "b", "c", "alpha", "beta", "gamma"] 106 | } 107 | 108 | ICSDINFO = [ 109 | 'formula', 110 | 'cif_names', 111 | 'Bravais', 112 | 'sym_group', 113 | 'a', 114 | 'b', 115 | 'c', 116 | 'alpha', 117 | 'beta', 118 | 'gamma', 119 | 'v', 120 | 'Date', 121 | 'Bravais_xp', 122 | 'is_oxide', 123 | 'is_metal', 124 | 'is_pervoskite', 125 | 'composition' 126 | ] 127 | 128 | FEATURES = [ 129 | 'MagpieData minimum Number', 130 | 'MagpieData maximum Number', 131 | 'MagpieData range Number', 132 | 'MagpieData mean Number', 133 | 'MagpieData avg_dev Number', 134 | 'MagpieData mode Number', 135 | 'MagpieData minimum MendeleevNumber', 136 | 'MagpieData maximum MendeleevNumber', 137 | 'MagpieData range MendeleevNumber', 138 | 'MagpieData mean MendeleevNumber', 139 | 'MagpieData avg_dev MendeleevNumber', 140 | 'MagpieData mode MendeleevNumber', 141 | 'MagpieData minimum AtomicWeight', 142 | 'MagpieData maximum AtomicWeight', 143 | 'MagpieData range AtomicWeight', 144 | 'MagpieData mean AtomicWeight', 145 | 'MagpieData avg_dev AtomicWeight', 146 | 'MagpieData mode AtomicWeight', 147 | 'MagpieData minimum MeltingT', 148 | 'MagpieData maximum MeltingT', 149 | 'MagpieData range MeltingT', 150 | 'MagpieData mean MeltingT', 151 | 'MagpieData avg_dev MeltingT', 152 | 'MagpieData mode MeltingT', 153 | 'MagpieData minimum Column', 154 | 'MagpieData maximum Column', 155 | 'MagpieData range Column', 156 | 'MagpieData mean Column', 157 | 'MagpieData avg_dev Column', 158 | 'MagpieData mode Column', 159 | 'MagpieData minimum Row', 160 | 'MagpieData maximum Row', 161 | 'MagpieData range Row', 162 | 'MagpieData mean Row', 163 | 'MagpieData avg_dev Row', 164 | 'MagpieData mode Row', 165 | 'MagpieData minimum CovalentRadius', 166 | 'MagpieData maximum CovalentRadius', 167 | 'MagpieData range CovalentRadius', 168 | 'MagpieData mean CovalentRadius', 169 | 'MagpieData avg_dev CovalentRadius', 170 | 'MagpieData mode CovalentRadius', 171 | 'MagpieData minimum Electronegativity', 172 | 'MagpieData maximum Electronegativity', 173 | 'MagpieData range Electronegativity', 174 | 'MagpieData mean Electronegativity', 175 | 'MagpieData avg_dev Electronegativity', 176 | 'MagpieData mode Electronegativity', 177 | 'MagpieData minimum NsValence', 178 | 'MagpieData maximum NsValence', 179 | 'MagpieData range NsValence', 180 | 'MagpieData mean NsValence', 181 | 'MagpieData avg_dev NsValence', 182 | 'MagpieData mode NsValence', 183 | 'MagpieData minimum NpValence', 184 | 'MagpieData maximum NpValence', 185 | 'MagpieData range NpValence', 186 | 'MagpieData mean NpValence', 187 | 'MagpieData avg_dev NpValence', 188 | 'MagpieData mode NpValence', 189 | 'MagpieData minimum NdValence', 190 | 'MagpieData maximum NdValence', 191 | 'MagpieData range NdValence', 192 | 'MagpieData mean NdValence', 193 | 'MagpieData avg_dev NdValence', 194 | 'MagpieData mode NdValence', 195 | 'MagpieData minimum NfValence', 196 | 'MagpieData maximum NfValence', 197 | 'MagpieData range NfValence', 198 | 'MagpieData mean NfValence', 199 | 'MagpieData avg_dev NfValence', 200 | 'MagpieData mode NfValence', 201 | 'MagpieData minimum NValence', 202 | 'MagpieData maximum NValence', 203 | 'MagpieData range NValence', 204 | 'MagpieData mean NValence', 205 | 'MagpieData avg_dev NValence', 206 | 'MagpieData mode NValence', 207 | 'MagpieData minimum NsUnfilled', 208 | 'MagpieData maximum NsUnfilled', 209 | 'MagpieData range NsUnfilled', 210 | 'MagpieData mean NsUnfilled', 211 | 'MagpieData avg_dev NsUnfilled', 212 | 'MagpieData mode NsUnfilled', 213 | 'MagpieData minimum NpUnfilled', 214 | 'MagpieData maximum NpUnfilled', 215 | 'MagpieData range NpUnfilled', 216 | 'MagpieData mean NpUnfilled', 217 | 'MagpieData avg_dev NpUnfilled', 218 | 'MagpieData mode NpUnfilled', 219 | 'MagpieData minimum NdUnfilled', 220 | 'MagpieData maximum NdUnfilled', 221 | 'MagpieData range NdUnfilled', 222 | 'MagpieData mean NdUnfilled', 223 | 'MagpieData avg_dev NdUnfilled', 224 | 'MagpieData mode NdUnfilled', 225 | 'MagpieData minimum NfUnfilled', 226 | 'MagpieData maximum NfUnfilled', 227 | 'MagpieData range NfUnfilled', 228 | 'MagpieData mean NfUnfilled', 229 | 'MagpieData avg_dev NfUnfilled', 230 | 'MagpieData mode NfUnfilled', 231 | 'MagpieData minimum NUnfilled', 232 | 'MagpieData maximum NUnfilled', 233 | 'MagpieData range NUnfilled', 234 | 'MagpieData mean NUnfilled', 235 | 'MagpieData avg_dev NUnfilled', 236 | 'MagpieData mode NUnfilled', 237 | 'MagpieData minimum GSvolume_pa', 238 | 'MagpieData maximum GSvolume_pa', 239 | 'MagpieData range GSvolume_pa', 240 | 'MagpieData mean GSvolume_pa', 241 | 'MagpieData avg_dev GSvolume_pa', 242 | 'MagpieData mode GSvolume_pa', 243 | 'MagpieData minimum GSbandgap', 244 | 'MagpieData maximum GSbandgap', 245 | 'MagpieData range GSbandgap', 246 | 'MagpieData mean GSbandgap', 247 | 'MagpieData avg_dev GSbandgap', 248 | 'MagpieData mode GSbandgap', 249 | 'MagpieData minimum GSmagmom', 250 | 'MagpieData maximum GSmagmom', 251 | 'MagpieData range GSmagmom', 252 | 'MagpieData mean GSmagmom', 253 | 'MagpieData avg_dev GSmagmom', 254 | 'MagpieData mode GSmagmom', 255 | 'MagpieData minimum SpaceGroupNumber', 256 | 'MagpieData maximum SpaceGroupNumber', 257 | 'MagpieData range SpaceGroupNumber', 258 | 'MagpieData mean SpaceGroupNumber', 259 | 'MagpieData avg_dev SpaceGroupNumber', 260 | 'MagpieData mode SpaceGroupNumber', 261 | 'NComp', 262 | '0-norm', 263 | '2-norm', 264 | '3-norm', 265 | '5-norm', 266 | '7-norm', 267 | '10-norm', 268 | 'frac s valence electrons', 269 | 'frac p valence electrons', 270 | 'frac d valence electrons', 271 | 'frac f valence electrons', 272 | 'compound possible', 273 | 'max ionic char', 274 | 'avg ionic char', 275 | 'band center', 276 | 'H', 277 | 'He', 278 | 'Li', 279 | 'Be', 280 | 'B', 281 | 'C', 282 | 'N', 283 | 'O', 284 | 'F', 285 | 'Ne', 286 | 'Na', 287 | 'Mg', 288 | 'Al', 289 | 'Si', 290 | 'P', 291 | 'S', 292 | 'Cl', 293 | 'Ar', 294 | 'K', 295 | 'Ca', 296 | 'Sc', 297 | 'Ti', 298 | 'V', 299 | 'Cr', 300 | 'Mn', 301 | 'Fe', 302 | 'Co', 303 | 'Ni', 304 | 'Cu', 305 | 'Zn', 306 | 'Ga', 307 | 'Ge', 308 | 'As', 309 | 'Se', 310 | 'Br', 311 | 'Kr', 312 | 'Rb', 313 | 'Sr', 314 | 'Y', 315 | 'Zr', 316 | 'Nb', 317 | 'Mo', 318 | 'Tc', 319 | 'Ru', 320 | 'Rh', 321 | 'Pd', 322 | 'Ag', 323 | 'Cd', 324 | 'In', 325 | 'Sn', 326 | 'Sb', 327 | 'Te', 328 | 'I', 329 | 'Xe', 330 | 'Cs', 331 | 'Ba', 332 | 'La', 333 | 'Ce', 334 | 'Pr', 335 | 'Nd', 336 | 'Pm', 337 | 'Sm', 338 | 'Eu', 339 | 'Gd', 340 | 'Tb', 341 | 'Dy', 342 | 'Ho', 343 | 'Er', 344 | 'Tm', 345 | 'Yb', 346 | 'Lu', 347 | 'Hf', 348 | 'Ta', 349 | 'W', 350 | 'Re', 351 | 'Os', 352 | 'Ir', 353 | 'Pt', 354 | 'Au', 355 | 'Hg', 356 | 'Tl', 357 | 'Pb', 358 | 'Bi', 359 | 'Po', 360 | 'At', 361 | 'Rn', 362 | 'Fr', 363 | 'Ra', 364 | 'Ac', 365 | 'Th', 366 | 'Pa', 367 | 'U', 368 | 'Np', 369 | 'Pu', 370 | 'Am', 371 | 'Cm', 372 | 'Bk', 373 | 'Cf', 374 | 'Es', 375 | 'Fm', 376 | 'Md', 377 | 'No', 378 | 'Lr' 379 | ] 380 | -------------------------------------------------------------------------------- /cryspnet/learner/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AuroraLHT/cryspnet/d0cf3c67ebb40ec6d8ca96f8dd6dbab73fc352d7/cryspnet/learner/.gitkeep -------------------------------------------------------------------------------- /cryspnet/losslandscape.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from fastai.torch_core import * 3 | import copy 4 | 5 | 6 | __all__ = ['LossLandscape', 'plot_landscape', 'plot_landscape_contour'] 7 | 8 | def randomize( linear ): 9 | weight = linear.weight 10 | bias = linear.bias 11 | 12 | wn = weight.norm(dim=1, keepdim=True) 13 | # bn = bias.norm(dim=1, keepdim=True) 14 | 15 | randn_w = torch.randn_like( weight ) 16 | # randn_b = torch.randn_like( bias ) 17 | 18 | randwn = randn_w.norm(dim=1, keepdim=True) 19 | # randbn = randn_b.norm(dim=1, keepdim=True) 20 | 21 | randn_w = randn_w / (randwn / wn) 22 | # randn_b = randn_b / bias.data) 23 | 24 | weight.data.copy_(randn_w) 25 | # bias.data.copy_(randn_b) 26 | 27 | def randomize_model(m): 28 | for child in children(m): 29 | if isinstance( child, nn. Linear ): 30 | randomize( child ) 31 | else: 32 | randomize_model( child ) 33 | return m 34 | 35 | def linear_morph1d(m_fin, m_ran1, m_out, x1): 36 | childrens_o, childrens_f, childrens_r1, childrens_r2 = children(m_out)[3], children(m_fin)[3], children(m_ran1)[3], children(m_ran2)[3] 37 | 38 | for child_o, child_f, child_r1 in zip(childrens_o, childrens_f, childrens_r1, childrens_r2): 39 | if isinstance(child, nn.Linear): 40 | w_out,w_fin, w_ran1 = child_o.weight, child_f.weight, child_r1.weight 41 | b_out,b_fin, b_ran1 = child_o.bias, child_f.bias, child_r1.bias 42 | 43 | w_out = w_ran1 * x1 + w_ran2 44 | b_out = b_ran1 * x1 + b_ran2 45 | 46 | return m_out 47 | 48 | def linear_morph2d(m_fin, m_ran1, m_ran2, m_out, x1, x2): 49 | childrens_o, childrens_f, childrens_r1, childrens_r2 = children(m_out)[3], children(m_fin)[3], children(m_ran1)[3], children(m_ran2)[3] 50 | 51 | for child_o, child_f, child_r1, child_r2 in zip(childrens_o, childrens_f, childrens_r1, childrens_r2): 52 | if isinstance(child_o, nn.Linear): 53 | w_out,w_fin, w_ran1, w_ran2 = child_o.weight, child_f.weight, child_r1.weight, child_r2.weight 54 | # b_out,b_fin, b_ran1, b_ran2 = child_o.bias, child_f.bias, child_r1.bias, child_r2.bias 55 | 56 | w_out.data.copy_( w_ran1.data * x1 + w_ran2.data * x2 + w_fin.data ) 57 | # b_out.data.copy_( b_ran1.data * x1 + b_ran2.data * x2 + b_fin.data ) 58 | return m_out 59 | 60 | 61 | class LossLandscape: 62 | def __init__(self, learn): 63 | self.learn = learn 64 | self.m = learn.model 65 | 66 | def prob2D(self, start=-1, end=1, n=10): 67 | val_err = np.zeros((n,n,)) 68 | mo = copy.deepcopy(self.m) 69 | mr1 = randomize_model( copy.deepcopy(self.m) ) 70 | mr2 = randomize_model( copy.deepcopy(self.m) ) 71 | 72 | for i, x1 in enumerate(np.linspace(start, end, n)): 73 | for j, x2 in enumerate(np.linspace(start, end, n)): 74 | mo = linear_morph2d(self.m, mr1, mr2, mo, x1, x2) 75 | self.learn.model = mo 76 | val_err[i, j] = self.learn.validate()[0] 77 | 78 | self.reset() 79 | return val_err 80 | 81 | def prob1D(self, start=-1, end=1, n=10): 82 | val_err = np.zeros(n) 83 | mo = copy.deepcopy(self.m) 84 | mr1 = randomize_model( copy.deepcopy(self.m) ) 85 | 86 | for i, x1 in enumerate(np.linspace(start, end, n)): 87 | mo = linear_morph1d(self.m, mr1, mo, x1, x2) 88 | self.learn.model = mo 89 | val_err[i] = self.learn.validate()[0] 90 | 91 | self.reset() 92 | return val_err 93 | 94 | def reset(self): 95 | self.learn.model = self.m 96 | 97 | 98 | def plot_landscape(landscape, start=-1, end=1, n=10, figsize=(9,9)): 99 | fig, ax = plt.subplots(figsize=figsize) 100 | 101 | plt.imshow(landscape, cmap='gray') 102 | # plt.yticks( np.arange(n), np.linspace(start, end, n) ) 103 | # plt.xticks( np.arange(n), np.linspace(start, end, n), rotation=45, ha = 'right' ) 104 | 105 | plt.colorbar() 106 | 107 | return fig, ax 108 | 109 | 110 | def plot_landscape_contour(landscape, start=-1, end=1, n=10, figsize=(9,9), lmin=None, lmax=None, density=10): 111 | fig, ax = plt.subplots(figsize=figsize) 112 | 113 | level = np.linspace(lmin, lmax, density) 114 | 115 | X, Y= np.meshgrid(np.linspace(start, end, n), np.linspace(start, end, n)) 116 | CS=plt.contour(X, Y, landscape, level) 117 | ax.clabel(CS, inline=1, fontsize=10) 118 | 119 | return fig, ax -------------------------------------------------------------------------------- /cryspnet/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | # from fastai.tabular import LabelLists, TabularList, is_pathlike, defaults 3 | # from fastai.basic_data import DatasetType 4 | # from fastai.basic_train import load_callback 5 | from fastai.tabular.all import load_learner, accuracy 6 | 7 | import pickle 8 | 9 | import numpy as np 10 | import pandas as pd 11 | 12 | from pathlib import Path 13 | from .config import * 14 | from .utils import to_np, topkacc 15 | 16 | from typing import Tuple, Dict, Union, List 17 | 18 | def load_Bravais_models(n_ensembler:int = N_ESMBLER, which:str="whole", batch_size:int = BATCHSIZE, cpu:bool=False)->torch.nn.Module: 19 | models_dir = Path(LEARNER) / Path(BRAVAIS_MODELS_FOLDER[which]) 20 | BE = BravaisEnsembleModel.from_folder(str(models_dir), n=n_ensembler, batch_size=batch_size, cpu=cpu) 21 | return BE 22 | 23 | def load_Lattice_models(batch_size:int = BATCHSIZE, cpu:bool=False)->torch.nn.Module: 24 | models_dir = Path(LEARNER) / Path(LATTICE_PARAM_MODELS_FOLDER) 25 | LPB = LatticeParamModelBundle.from_folder(str(models_dir), batch_size=batch_size, cpu=cpu) 26 | return LPB 27 | 28 | def load_SpaceGroup_models(batch_size:int = BATCHSIZE, cpu:bool=False)->torch.nn.Module: 29 | models_dir = Path(LEARNER) / Path(SPACE_GROUP_MODELS_FOLDER) 30 | SGB = SpaceGroupModelBundle.from_folder(str(models_dir), batch_size=batch_size, cpu=cpu) 31 | return SGB 32 | 33 | def top_n(preds:np.ndarray, n:int)->np.ndarray: 34 | """ return a (preds.shape[0], n) size array """ 35 | if n == 1: 36 | idxs = np.argmax(preds, axis=1)[:, None] 37 | else: 38 | idxs = np.argsort(preds, axis=1)[:, :-n-1:-1] 39 | 40 | return idxs 41 | 42 | def _pad_sg_out(prob:np.ndarray, sg:np.ndarray, n:int)->Tuple[np.ndarray, np.ndarray]: 43 | sgfill=-1 44 | filltype=int 45 | 46 | if (n==1 and len(prob.shape)==1 and len(sg.shape==1)): 47 | prob = prob[:, None] 48 | sg = sg[:, None] 49 | 50 | if prob.shape[1]!=n: 51 | residual = n - prob.shape[1] 52 | pad_prob = np.zeros((prob.shape[0], residual)) 53 | pad_sg = np.empty((prob.shape[0], residual), dtype=filltype) 54 | pad_sg.fill(sgfill) 55 | prob = np.concatenate((prob,pad_prob), axis=1) 56 | sg = np.concatenate((sg,pad_sg), axis=1) 57 | 58 | return prob, sg 59 | 60 | 61 | _vol_f = { 62 | "cubic (P)" : lambda a, b, c, alpha, beta, gamma : a**3, 63 | "cubic (F)" : lambda a, b, c, alpha, beta, gamma : a**3, 64 | "cubic (I)" : lambda a, b, c, alpha, beta, gamma : a**3, 65 | 66 | "monoclinic (P)": lambda a, b, c, alpha, beta, gamma : a*b*c*np.sin(beta), 67 | "monoclinic (C)": lambda a, b, c, alpha, beta, gamma : a*b*c*np.sin(beta), 68 | 69 | "hexagonal (P)": lambda a, b, c, alpha, beta, gamma : np.sqrt(3)/2 * a**2 * c, 70 | 71 | "rhombohedral (P)": lambda a, b, c, alpha, beta, gamma : a**3*np.sqrt(1 - 3*np.cos(alpha)**2 + 2*np.cos(alpha)**3), 72 | 73 | "tetragonal (P)" : lambda a, b, c, alpha, beta, gamma : a**2*c, 74 | "tetragonal (I)" : lambda a, b, c, alpha, beta, gamma : a**2*c, 75 | 76 | "orthorhombic (P)" : lambda a, b, c, alpha, beta, gamma : a*b*c, 77 | "orthorhombic (C)" : lambda a, b, c, alpha, beta, gamma : a*b*c, 78 | "orthorhombic (F)" : lambda a, b, c, alpha, beta, gamma : a*b*c, 79 | "orthorhombic (I)" : lambda a, b, c, alpha, beta, gamma : a*b*c, 80 | 81 | 82 | "triclinic (P)" : lambda a, b, c, alpha, beta, gamma : a*b*c*np.sqrt(1-np.cos(alpha)**2- \ 83 | np.cos(beta)**2-np.cos(gamma)**2+2*np.cos(alpha)*np.cos(beta)*np.cos(gamma)) 84 | } 85 | 86 | def complete_lattice_param(out:pd.DataFrame, bravais:str): 87 | if "cubic" in bravais: 88 | out[["alpha", "beta", "gamma"]] = 90 89 | out["b"] = out["a"] 90 | out["c"] = out["a"] 91 | 92 | elif "tetragonal" in bravais: 93 | out[["alpha", "beta", "gamma"]] = 90 94 | out["b"] = out["a"] 95 | 96 | elif "orthorhombic" in bravais: 97 | out[["alpha", "beta", "gamma"]] = 90 98 | 99 | elif "hexagonal" in bravais: 100 | out[["alpha", "beta"]] = 90 101 | out["gamma"] = 120 102 | out["b"] = out["a"] 103 | 104 | elif "monoclinic" in bravais: 105 | out[["alpha", "gamma"]] = 90 106 | 107 | elif "triclinic" in bravais: 108 | pass 109 | 110 | elif "rhombohedral" in bravais: 111 | out["beta"] = out['alpha'] 112 | out["gamma"] = out['alpha'] 113 | out["b"] = out['a'] 114 | out["c"] = out['a'] 115 | 116 | out["v"] = _vol_f[bravais]( 117 | out["a"], out["b"], out["c"], 118 | np.deg2rad(out["alpha"]), 119 | np.deg2rad(out["beta"]), 120 | np.deg2rad(out["gamma"]), 121 | ) 122 | 123 | return out 124 | 125 | class Model: 126 | """ 127 | base class for loading a single pytorch model from pre-trained weights 128 | """ 129 | 130 | def __init__(self, file_name:Union[Path, str], batch_size:int, cpu:bool=True): 131 | # adopted from fastai.load_learner 132 | self.learn = load_learner(file_name, cpu=cpu) 133 | self.batch_size = batch_size 134 | 135 | def load(self, ext_magpie:pd.DataFrame, **db_kwargs): 136 | # adopted from fastai.load_learner 137 | dl = self.learn.dls.test_dl(ext_magpie, bs=self.batch_size) 138 | self.dl = dl 139 | return self 140 | 141 | def get_preds(self): 142 | return self.learn.get_preds(dl=self.dl)[0] 143 | 144 | def p2o(self, preds:torch.Tensor, **args): 145 | return to_np(preds) 146 | 147 | def predicts(self, ext_magpie:pd.DataFrame, **args): 148 | self.load(ext_magpie) 149 | return self.p2o(self.get_preds(), **args) 150 | 151 | class BravaisModel(Model): 152 | """A single model for predicting Bravais Lattice""" 153 | 154 | def __init__(self, file_name:Union[Path, str], batch_size:int=BATCHSIZE, cpu:bool=False): 155 | super().__init__(file_name, batch_size, cpu=cpu) 156 | 157 | @property 158 | def classes(self,): 159 | # return self.learn.data.classes 160 | return self.learn.classes # stored previously by training script 161 | 162 | def p2o(self, preds:torch.Tensor, n:int=1, **args)->Tuple[np.ndarray, np.ndarray]: 163 | preds = super().p2o(preds, **args) 164 | idxs = top_n(preds, n) 165 | auxidxs = ( np.tile( np.arange(len(idxs) ), reps=(idxs.shape[1], 1) ) ).T 166 | return preds[auxidxs, idxs], self.classes[idxs] 167 | 168 | def predicts(self, ext_magpie:pd.DataFrame, topn_bravais:int=TOPN_BRAVAIS): 169 | return super().predicts(ext_magpie, n=topn_bravais) 170 | 171 | class SpaceGroupModel(Model): 172 | """A single model for predicting Space Group""" 173 | 174 | def __init__(self, file_name:Union[Path, str], batch_size:int, cpu:bool=False): 175 | super().__init__(file_name, batch_size, cpu=cpu) 176 | 177 | @property 178 | def classes(self,): 179 | return self.learn.classes 180 | 181 | def p2o(self, preds:torch.Tensor, n:int=1, **args)->Tuple[np.ndarray, np.ndarray]: 182 | preds = super().p2o(preds, **args) 183 | idxs = top_n(preds, n) 184 | auxidxs = ( np.tile( np.arange(len(idxs) ), reps=(idxs.shape[1], 1) ) ).T 185 | return preds[auxidxs, idxs], self.classes[idxs] 186 | 187 | 188 | def predicts(self, ext_magpie:pd.DataFrame, topn_spacegroup:int = TOPN_SPACEGROUP): 189 | return super().predicts(ext_magpie, n=topn_spacegroup) 190 | 191 | class LatticeParamModel(Model): 192 | """A single model for predicting Lattice Parameters""" 193 | 194 | def __init__(self, filename:Union[Path, str], norm:Dict[str, np.ndarray], batch_size:int, cpu:bool=False): 195 | super().__init__(filename, batch_size, cpu=cpu) 196 | self.norm = norm 197 | 198 | def label_denorm(self, preds:torch.Tensor): 199 | preds = preds * self.norm['std'] + self.norm['mean'] 200 | return np.exp(preds) 201 | 202 | def p2o(self, preds:torch.Tensor, **args): 203 | preds = super().p2o(preds, **args) 204 | return self.label_denorm(preds) 205 | 206 | class EnsembleModel: 207 | """A base class for applying ensembling on prediction from many models""" 208 | 209 | def __init__(self, Ms:List[Model]): 210 | self.models = Ms 211 | 212 | def load(self, ext_magpie:pd.DataFrame, **db_kwargs): 213 | for m in self.models: 214 | m.load(ext_magpie, **db_kwargs) 215 | 216 | def get_preds(self): 217 | return torch.stack([m.get_preds() for m in self.models], dim=0) 218 | 219 | def p2o(self, esm_preds:torch.Tensor, **args): 220 | return to_np(esm_preds) 221 | 222 | def predicts(self, ext_magpie:pd.DataFrame, **args): 223 | self.load(ext_magpie) 224 | return self.p2o(self.get_preds(), **args) 225 | 226 | class BravaisEnsembleModel(EnsembleModel): 227 | """A class for merging prediction from many Bravais Lattice models""" 228 | 229 | _esm_prefix = BRAVAIS_ENSEMBLER_PREFIX 230 | 231 | @classmethod 232 | def from_folder(cls, folder:Union[Path, str], n:int=5, batch_size:int=BATCHSIZE, cpu:bool=False): 233 | Ms = [] 234 | folder = Path(folder) 235 | if not folder.exists(): raise FileNotFoundError(str(folder)) 236 | 237 | for i in range(n): 238 | filename = Path(folder) / f"{cls._esm_prefix}{i}.pkl" 239 | if not folder.exists(): raise FileNotFoundError(str(filename)) 240 | Ms.append(BravaisModel( filename, batch_size, cpu=cpu)) 241 | return cls(Ms) 242 | 243 | @property 244 | def classes(self): 245 | return self.models[0].classes 246 | 247 | def p2o(self, esm_preds:torch.Tensor, n:int=1, **args): 248 | # from tensor to numpy 249 | esm_preds = super().p2o(esm_preds, **args) 250 | 251 | #ensembling strategy: vertical voting 252 | preds = esm_preds.mean(axis=0) 253 | idxs = top_n(preds, n=n) 254 | auxidxs = ( np.tile( np.arange(len(idxs) ), reps=(idxs.shape[1], 1) ) ).T 255 | return preds[auxidxs, idxs], self.classes[idxs] 256 | 257 | def predicts(self, ext_magpie:pd.DataFrame, topn_bravais:int = TOPN_BRAVAIS): 258 | return super().predicts(ext_magpie, n=topn_bravais) 259 | 260 | class BLSpliterBundle: 261 | """ A base class that split the input data by Bravias Lattice and send each group to the corresponding model""" 262 | 263 | _spliter = BRAVAIS_SPLIT_NAME 264 | 265 | def __init__(self, Ms:Dict[str, Model]): 266 | self.Ms = Ms 267 | 268 | def load(self, ext_magpie_brav:pd.DataFrame): 269 | groups = ext_magpie_brav.groupby(self._spliter) 270 | self.idxs = {} 271 | self.widx = ext_magpie_brav.index 272 | self.data_size = ext_magpie_brav.shape[0] 273 | for n, g in groups: 274 | self.idxs[n] = g.index 275 | self.Ms[n].load(g.reset_index()) 276 | self.active = self.idxs.keys() 277 | 278 | def get_preds(self): 279 | return {n: P.get_preds() for n, P in self.Ms.items() if n in self.active } 280 | 281 | def p2o(self, preds:Dict[str, torch.Tensor], **args): 282 | return {n: self.Ms[n].p2o(pred, **args) for n, pred in preds.items() if n in self.active} 283 | 284 | def predicts(self, ext_magpie_brav:pd.DataFrame, **args): 285 | self.load(ext_magpie_brav) 286 | return self.p2o(self.get_preds(), **args) 287 | 288 | 289 | class LatticeParamModelBundle(BLSpliterBundle): 290 | """ A class that store Lattice Parameters models for each Bravais Lattice and provide a unified API to predict Lattice Parameters""" 291 | 292 | _columns = LATTICE_PARAM_NAMES 293 | _LatticeParamModels = LATTICE_PARAM_MODELS 294 | _norms = LATTICE_NORM 295 | 296 | @classmethod 297 | def from_folder(cls, folder:Union[Path, str], batch_size:int = BATCHSIZE, cpu:bool=False): 298 | LPMs = {} 299 | 300 | folder = Path(folder) 301 | if not folder.exists() : raise FileExistsError(folder) 302 | 303 | with (folder/cls._norms).open("rb") as f: 304 | norms = pickle.load(f) 305 | 306 | for name in BRAVAIS_LATTICE: 307 | path = cls._LatticeParamModels[name] 308 | filename = folder / path 309 | if not filename.exists() : raise FileExistsError(filename) 310 | LPMs[name] = LatticeParamModel(filename, norms[name], batch_size, cpu=cpu) 311 | return cls(LPMs) 312 | 313 | def p2o(self, preds:Dict[str, torch.Tensor], **args): 314 | preds = super().p2o(preds, **args) 315 | out = pd.DataFrame( 316 | np.zeros((self.data_size, 7)), 317 | columns = self._columns+['v'], 318 | index = self.widx, 319 | ) 320 | 321 | for n, idx in self.idxs.items(): 322 | out.loc[idx, PRED_COLS[n]] = preds[n] 323 | out.loc[idx] = complete_lattice_param(out.loc[idx], n) 324 | 325 | return out 326 | 327 | class SpaceGroupModelBundle(BLSpliterBundle): 328 | """ A class that store Lattice Parameters models for each Bravais Lattice and provide a unified API to predict Lattice Parameters""" 329 | 330 | _SpaceGroupModels = SPACE_GROUP_MODELS 331 | @classmethod 332 | def from_folder(cls, folder:Union[Path, str], batch_size:int = BATCHSIZE, cpu:bool=False): 333 | SGMs = {} 334 | folder = Path(folder) 335 | if not folder.exists(): raise FileExistsError(folder) 336 | 337 | for name in BRAVAIS_LATTICE: 338 | path = cls._SpaceGroupModels[name] 339 | filename = folder / path 340 | if not filename.exists(): raise FileExistsError(filename) 341 | SGMs[name] = SpaceGroupModel(filename, batch_size, cpu=cpu) 342 | return cls(SGMs) 343 | 344 | def p2o(self, preds:Dict[str, torch.Tensor], **args): 345 | preds = super().p2o(preds, **args) 346 | # output topN probability and classes 347 | # n = args["n"] if "n" in args.keys() else 1 348 | n = args["n"] 349 | outs = np.zeros((self.data_size, n)) 350 | outs_probs = np.zeros((self.data_size, n)) 351 | for name, idx in self.idxs.items(): 352 | prob, sg = preds[name] 353 | prob, sg = _pad_sg_out(prob, sg, n) 354 | outs_probs[idx] = prob 355 | outs[idx] = sg 356 | return outs_probs, outs 357 | 358 | def predicts(self, ext_magpie:pd.DataFrame, topn_spacegroup:int = TOPN_SPACEGROUP): 359 | return super().predicts(ext_magpie, n=topn_spacegroup) 360 | -------------------------------------------------------------------------------- /cryspnet/utils.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from collections.abc import Iterable 3 | from collections import defaultdict 4 | import re 5 | from pathlib import Path 6 | 7 | 8 | # from IPython.display import display 9 | import pandas as pd 10 | import numpy as np 11 | from tqdm import tqdm_notebook 12 | 13 | # import matplotlib as mpl 14 | # import matplotlib.pyplot as plt 15 | # import seaborn as sns 16 | # from mpl_toolkits.axes_grid1 import make_axes_locatable 17 | # from sklearn.metrics import confusion_matrix 18 | 19 | # import torch 20 | import plotly.graph_objs as go 21 | 22 | # Matminer: Author logan 23 | from matminer.featurizers.conversions import StrToComposition 24 | from matminer.featurizers.composition import ElementProperty 25 | 26 | from matminer.featurizers.base import MultipleFeaturizer 27 | from matminer.featurizers import composition as cf 28 | 29 | from .config import * 30 | 31 | import Equation # math expression parser 32 | 33 | from typing import List 34 | 35 | __all__ = ["to_np", "ELEMENTS", "NON_METAL_ELEMENTS", "METAL_ELEMENTS", "is_oxide", "is_metal", "has_metal", 36 | "load_input", "dump_output", "group_outputs", "topkacc", "FeatureGenerator", "oversample"] 37 | 38 | # pytorch stuff 39 | def to_np(x): 40 | "Convert a tensor to a numpy array." 41 | return x.data.cpu().numpy() 42 | 43 | ELEMENTS = set(['Pu', 'Re', 'Y', 'Bk', 'S', 'Hf', 'Br', 'Eu', 'Al', 'Li', 44 | 'Md', 'Sm', 'Be', 'B', 'No', 'Te', 'Kr', 'Co', 'P', 'Cu', 'N', 'Ac', 45 | 'Nd','Yb', 'Gd', 'Tb', 'Es', 'Fr', 'Th', 'Si', 'Zr', 'Na', 'Pd', 'U', 46 | 'Ni', 'Rn', 'H', 'Cl', 'Au', 'Lu', 'Pr', 'Pa', 'In', 'Er', 'Mn', 'I', 47 | 'Ne', 'Os', 'Mg', 'O', 'Ga', 'F', 'Sr', 'Ru', 'Bi', 'Dy', 'Ra', 'Ho', 48 | 'Xe', 'Tm', 'As', 'Am', 'Ir', 'Hg', 'Sc', 'Cd', 'Cr', 'Se', 'Ta', 49 | 'Fm', 'Rb', 'Sn', 'Tc', 'Rh', 'Lr', 'Np', 'Pm', 'Pb', 'Ca', 'Cs', 50 | 'Nb', 'Ag', 'V', 'He', 'Zn', 'Mo', 'Ti', 'Sb', 'Fe', 'Ge', 'Po', 'La', 51 | 'Tl', 'Ba', 'Ce', 'C', 'Cm', 'Cf', 'Pt', 'W', 'K', 'Ar', 'At']) 52 | 53 | NON_METAL_ELEMENTS = set(["H", "He", "B", "C", "N", "O", "F", "Ne", 54 | "Si", "P", "S", "Cl", "Ar", "Ge", "As", 55 | "Se", "Br", "Kr", "Sb", "Te", "I", 56 | "Xe", "Po", "At", "Rn", "Ts", "Og"]) 57 | 58 | METAL_ELEMENTS = ELEMENTS - NON_METAL_ELEMENTS 59 | 60 | def is_oxide(x:str)->bool: 61 | return len(re.findall("O[0-9]", x))>0 62 | 63 | def is_metal(x:str)->bool: 64 | return set(re.findall(r"([A-Za-z]+)[0-9\.]*", x)).issubset(METAL_ELEMENTS) 65 | 66 | def has_metal(x:str)->bool: 67 | return len(set(re.findall(r"([A-Za-z]+)[0-9\.]*", x)) & METAL_ELEMENTS)>0 68 | 69 | def load_input(path:str)->pd.DataFrame: 70 | """ 71 | Load the formula information from file designated by the path argument 72 | Currently supported format: .csv .xlsx .xls and space seperated text file 73 | """ 74 | 75 | path = Path(path) 76 | if path.suffix == ".csv": 77 | data = pd.read_csv(str(path), index_col=False) 78 | 79 | elif path.suffix == ".xlsx" or path.suffix == ".xls": 80 | xls = pd.ExcelFile(path) 81 | sheets = [xls.parse(sheet_name) for sheet_name in xls.sheet_names] 82 | data = pd.concat(sheets, axis=0) 83 | 84 | else: 85 | # we hope the best here 86 | data = pd.read_csv(path, delimiter= r'\s+', index_col=False, header=None) 87 | 88 | assert data.shape[1] == 1, "the input is not formula and has multiple dimension, plz check the input" 89 | data.columns = ['formula'] 90 | return data 91 | 92 | def dump_output(output:pd.DataFrame, path:str, **args)->None: 93 | """ 94 | Save the output to csv 95 | """ 96 | output.to_csv(path, **args) 97 | 98 | def group_outputs(bravais:np.array, bravais_probs:np.array, spacegroups:List[np.array], spacegroups_probs:List[np.array], lattices:List[pd.DataFrame], formula:pd.DataFrame)->pd.DataFrame: 99 | """ 100 | Group Bravais Lattice predictions, Space Group predictions, and Lattice Parameters predictions into a dataframe 101 | 102 | Arguements: 103 | bravais : bravais lattice prediction from BravaisEnsembleModel 104 | bravais_probs : the associated probabilities of B.L from BravaisEnsembleModel 105 | spacegroups : space group prediction from SpaceGroupModelBundle 106 | spacegroups_probs : the associated probabilities of S.G from SpaceGroupModelBundle 107 | lattices : lattice parameter prediction from LatticeParamModelBundle 108 | formula : a DataFrame contains a column "formula" for each predicted entry 109 | """ 110 | topn_bravais = bravais.shape[1] 111 | topn_spacegroup = spacegroups[0].shape[1] 112 | 113 | inner_columns = ["Bravais", "Bravais prob"] + \ 114 | LATTICE_PARAM_NAMES + ['v'] + \ 115 | [f"Top-{i+1} SpaceGroup" for i in range(topn_spacegroup) ] + \ 116 | [f"Top-{i+1} SpaceGroup prob" for i in range(topn_spacegroup) ] 117 | 118 | idxs = [('formula', "-")]+ [ (f"Top-{i+1} Bravais", c) for i in range(topn_bravais) for c in inner_columns ] 119 | 120 | idxs = pd.MultiIndex.from_tuples(idxs) 121 | out = pd.DataFrame(columns = idxs) 122 | 123 | out['formula'] = formula['formula'] 124 | 125 | for i in range(topn_bravais): 126 | out[f"Top-{i+1} Bravais", "Bravais"] = bravais[:, i] 127 | out[f"Top-{i+1} Bravais", "Bravais prob"] = bravais_probs[:, i] 128 | for j in range(topn_spacegroup): 129 | out[f"Top-{i+1} Bravais", f"Top-{j+1} SpaceGroup"] = spacegroups[i][:, j].astype(int) 130 | out[f"Top-{i+1} Bravais", f"Top-{j+1} SpaceGroup prob"] = spacegroups_probs[i][:, j] 131 | out.loc[:, (f"Top-{i+1} Bravais", lattices[i].columns) ] = lattices[i].values 132 | 133 | return out 134 | 135 | # top-k accuracy, since fastaiv2's top-k acc metric is currently not working 136 | def topkacc(inp, targ, k=5, axis=-1): 137 | "Computes the Top-k accuracy (`targ` is in the top `k` predictions of `inp`)" 138 | inp = inp.topk(k=k, dim=axis)[1] 139 | if inp.ndim >= targ.ndim: 140 | targ = targ.expand_as(inp) 141 | return (inp == targ).sum(dim=-1).float().mean() 142 | 143 | def oversample(df, val_idx, dep_var, nsamples): 144 | """ Perform oversampling on a dataframe given a target columns 145 | The target columns should be categorical and this function 146 | would make sure each class has a minimum number of population 147 | in the dataframe 148 | 149 | Args: 150 | df : a dataframe 151 | val_idx : validation index that would not sampled from 152 | dep_var : the target column the oversample would based on 153 | nsamples : the minimum sample that a classes should have 154 | 155 | """ 156 | 157 | 158 | df = df.copy() 159 | df_val = df.loc[val_idx].reset_index(drop=True) 160 | df_train = df.drop(val_idx, axis=0) 161 | 162 | n = nsamples 163 | resampled = [] 164 | groups = df_train.groupby(dep_var) 165 | for n, g in groups: 166 | if len(g)= zr[0] and 1-c1-c2 <= zr[1]: 264 | yield (c1, c2, 1-c1-c2) 265 | 266 | 267 | VAR_SYMBOLS = {"x", "y", "z"} 268 | 269 | def get_vars(comp_formulas): 270 | """find the variable in the formulas""" 271 | character_set = set("".join(comp_formulas)) 272 | return sorted(list(character_set.intersection(VAR_SYMBOLS))) 273 | 274 | class Element: 275 | def __init__(self, name, comp): 276 | self.name = name 277 | self.comp = comp 278 | 279 | # matminer wrapper to generate features 280 | class Compound: 281 | def __init__(self, *elements): 282 | self.elements = elements 283 | self.vars = get_vars([ele.comp for ele in self.elements]) 284 | self.eles = [ele.name for ele in self.elements] 285 | 286 | @classmethod 287 | def from_str(cls, formula): 288 | inps = re.findall("([a-zA-Z]+)\s?([x-z/0-9*\(\)\-\.]*)", formula) 289 | elements = [Element(*inp) for inp in inps] 290 | return cls(*elements) 291 | 292 | def feagen(self, limits=None, n=50): 293 | if len(self.elements) == 3 and len(self.vars) == 0: 294 | if limits is None: limits = [(0,1), (0,1), (0,1)] 295 | fg = SingleCompFeatureGenerator(n=n) 296 | return fg.ternary(compound=self, limits=limits) 297 | else: 298 | assert limits is not None, "plz set the search limits to each variable" 299 | fg = SingleCompFeatureGenerator(n=n) 300 | return fg.formula_search(compound=self, limits=limits) 301 | 302 | def __getitem__(self, key): 303 | return self.elements[key] 304 | 305 | def __str__(self): 306 | return " ".join(["{} {}".format(ele.name, ele.comp) for ele in self.elements]) 307 | 308 | 309 | class CompoundBundle: 310 | def __init__(self, *compounds): 311 | self.compounds = compounds 312 | self.eles = sorted(set(ele.name for comp in compounds for ele in comp.elements)) 313 | 314 | def __getitem__(self, key): 315 | return self.compounds[key] 316 | 317 | class FeatureGenerator: 318 | """ 319 | A wraper class to generate multiple type of elemental features 320 | """ 321 | def __init__(self): 322 | self.feature_calculators = MultipleFeaturizer([ 323 | cf.ElementProperty.from_preset(preset_name="magpie"), 324 | cf.Stoichiometry(), 325 | cf.ValenceOrbital(props=['frac']), 326 | cf.IonProperty(fast=True), 327 | cf.BandCenter(), 328 | cf.ElementFraction(), 329 | ]) 330 | 331 | self.str2composition = StrToComposition() 332 | 333 | def generate(self, df:pd.DataFrame, ignore_errors:bool=False, drop_mode=True): 334 | """ 335 | generate feature from a dataframe with a "formula" column that contains 336 | chemical formulas of the compositions. 337 | 338 | df : a dataframe with a column name formula 339 | ignore_errors : ignore errors when generating features 340 | drop_mode : drop property that generated from mode aggregation function 341 | 342 | """ 343 | df = self.str2composition.featurize_dataframe(df, "formula", ignore_errors=ignore_errors) 344 | df = df.dropna() 345 | df = self.feature_calculators.featurize_dataframe(df, col_id='composition', ignore_errors=ignore_errors); 346 | df["NComp"] = df["composition"].apply(len) 347 | if drop_mode : 348 | df = df.drop(columns= [ c for c in df.columns if "mode" in c and c.startswith("Magpie") ] ) 349 | return df 350 | 351 | class MultiCompFeatureGenerator(FeatureGenerator): 352 | def __init__(self, n=50): 353 | super().__init__() 354 | self.n = n 355 | 356 | def ternary_dope(self, compounds, limits): 357 | eles = compounds.eles 358 | concentrations = np.stack(list( tri_grid(self.n, *limits)), axis=0) 359 | x, y, z = concentrations[:,0,None], concentrations[:,1,None], concentrations[:,2,None] 360 | bases = [] 361 | for compound in compounds: 362 | base = np.zeros(len(eles)) 363 | for ele in compound: 364 | base[eles.index(ele.name)] = ele.comp 365 | bases.append(base) 366 | 367 | concentrations = x*bases[0] + y*bases[1] + z*bases[2] 368 | composition_format = "{:.2f}".join(eles) + "{:.2f}" 369 | formula = [composition_format.format(*row) for row in concentrations] 370 | fake_df = pd.DataFrame({"formula":formula, "x": x.ravel(), "y": y.ravel(), "z": z.ravel()}) 371 | return self.generate(fake_df) 372 | 373 | class SingleCompFeatureGenerator(FeatureGenerator): 374 | def __init__(self, n=50): 375 | super().__init__() 376 | self.n = n 377 | 378 | def ternary(self, compound, limits): 379 | """ generate a search list for the ternary compound """ 380 | 381 | vars = compound.vars 382 | eqs = [Equation.Expression(ele.comp, compound.vars) for ele in compound.elements ] 383 | 384 | eles = [ ele.name for ele in compound.elements ] 385 | composition_format = "{:.2f}".join(eles) + "{:.2f}" 386 | 387 | concentrations = np.stack(list( tri_grid(self.n, *limits)), axis=0) 388 | formula = [composition_format.format(*row) for row in concentrations] 389 | fake_df = pd.DataFrame({"formula":formula, "x": concentrations[:,0], "y": concentrations[:,1], "z": concentrations[:,2]}) 390 | return self.generate(fake_df) 391 | 392 | def formula_search(self, compound, limits): 393 | """ generate a search list for the compound with variable in it""" 394 | variable_search_lists = [np.linspace(start=limit[0], stop=limit[1], num=self.n ) for limit in limits] 395 | search_sheet = {var: variable_search_list for var, variable_search_list in zip(compound.vars, variable_search_lists)} 396 | 397 | # generate the composition given the variable using the equation from Compound 398 | compositions = [eq(*variable_search_lists) for eq in self.eqs] 399 | compositions = [np.repeat(comp, self.n) if not isinstance(comp, np.ndarray) else comp for comp in compositions] 400 | 401 | composition_format = "{:.2f}".join(eles) + "{:.2f}" 402 | search_sheet['formula'] = [composition_format.format(*row) for row in np.stack(compositions, axis=1)] 403 | return self.generate(pd.DataFrame(search_sheet)) 404 | 405 | def alias_lookup(name:str)->str: 406 | 407 | alias_book = defaultdict(lambda : None, 408 | cubic_F = "fcc", 409 | cubic_I = "bcc", 410 | tetragonal_I = "bct", 411 | hexagonal = "hcp" 412 | ) 413 | 414 | alias = alias_book[name] 415 | if alias is not None: return alias 416 | else: return name -------------------------------------------------------------------------------- /demo/demo.csv: -------------------------------------------------------------------------------- 1 | formula 2 | B1Gd0.63Si0.23 3 | Ge3Li2Sm2 4 | In0.209Mo15S19 5 | Ag1Zr1 6 | O6.5Pb2Ru2 7 | Nd1O3Ru0.883 8 | Sn7Tb3 9 | Ce5Cu1Pb3 10 | Al1Co0.7Fe0.3 11 | Gd0.97Ni2 12 | Al2Fe1O4 13 | Cu0.16Se2Ta1.08 14 | Cs2S14Sn1 15 | Cu1.15Se2Tb0.95 16 | As2Cu1Tb1 17 | C0.85W2 18 | Rh2Sm3 19 | Er1Th1 20 | Al0.79Ge2.22Lu2 21 | O0.532V1 22 | Cu5.372Y0.814 23 | O6P4Se2 24 | Au0.775Cd0.175In0.05 25 | Cr0.35Fe0.65Ga1 26 | Cu1Dy2Ga3 27 | B2Mo5Si1 28 | Fe2Hf1Si2 29 | As1S1Tb1 30 | Al1.5Cu3.5La1 31 | B2Ni13Tm3 32 | B4Mn1 33 | Cl6Ga1Sb1 34 | O4P1Tb1 35 | S9Sb4Sr3 36 | Mn0.01O1Zn0.99 37 | H2O5S1 38 | Er1Ge1Pd2 39 | Pb3Th5 40 | Fe3.07Sc2Si0.93 41 | Nd1S1 42 | Hf0.16Mn8.8Zr3.04 43 | Ag0.8Mo3.2S4 44 | Au3Ba1Ge1 45 | La1Pt5 46 | Ca0.4O1Zn0.6 47 | Al1Fe2Tb1 48 | Al0.75Ge0.25Nb3 49 | O5Tl0.5V2 50 | Si5V6 51 | B4Er1Re1 52 | Au4.4Ba8Ge40.3 53 | Fe2In1O4 54 | C1.6B13.4 55 | O3Sm1.4Y0.6 56 | F6Hg4O1 57 | Ba1Pt1Sb1 58 | Ba1Cl5Gd1 59 | Ba1In2.95Pt1.04 60 | Fe2.25Ni0.75P1 61 | Fe21Mo3Tb2 62 | As1S1Sm1 63 | Al2Br7K1 64 | Fe6.72Ga5.28Sc1 65 | Au4Mn1 66 | C1Cr1V1 67 | Bi1.6S3Sm0.4 68 | Co1Ge1O3 69 | Pr1Ru1Si2 70 | Al12Ca1O19 71 | Gd3.22Ge4Zr1.77 72 | H1.25Ti1 73 | Nd1Ni3 74 | Ba1O5Ti2 75 | B34.94Ni0.18Zn1.21 76 | Al2Fe3Si4 77 | B1O3V1 78 | C70Cs1.068Na2.04 79 | Ge1Mn1Yb1 80 | Sb2Zr1 81 | H1I3O8 82 | Ga1Li1Mg2 83 | H0.81Ni1.94O2.81 84 | Ge5La2Rh3 85 | Mn0.25S2Ta1 86 | Al8Ce1Fe4 87 | O1Sm2 88 | Fe1Ga1Ge1 89 | Al2.4Co14.4Zr7.2 90 | As4Hg9O16 91 | Gd5Ge2.43Si1.57 92 | Al5W1 93 | Ni3S1Se1 94 | Bi3La4 95 | O1.91653Y0.16506Zr0.83447 96 | Mn1.1Sb1 97 | Al18Co5Ni3 98 | Se8Yb7.24 99 | Co0.24Mo0.56Si0.2 100 | Fe1La3O6 101 | Ni0.85Sn0.15 102 | Al1Lu1Pd1 103 | Al6.5Cu6.5Eu1 104 | Bi1O4Pd2 105 | Pd2Sb2Sr1 106 | Fe2Gd0.97Ho0.03 107 | Mo3Pu2Si4 108 | As10Ni15Sm6 109 | Cu0.66Sc3Zn17.34 110 | Ba8Pt2.7Si43.3 111 | Na1Pt2 112 | Fe1.45Sb0.5Sn0.5 113 | Tc1W1 114 | Ba1Cu5.65S4.5 115 | C0.2B1.8Mg1 116 | Cl7Fe1Te1 117 | Mo0.5Ni3Ta0.5 118 | In1Li2 119 | Er3S6Sm1 120 | B7.91Eu3Pd23.2 121 | Nb1Rh1Sn1 122 | C1Ho3In1 123 | Co1F3Na1 124 | Co9Si2Y1 125 | Ho1Ni1Sn1 126 | Au6.09Ga2.91Sr2 127 | Cu1.18Ge3.63P3.19 128 | Hg5In2Te8 129 | K1Mn1P1 130 | Cd2O5U1 131 | O6.75Pb2Re2 132 | Ba0.99O3.03Pr1 133 | B6Ba0.75Na0.25 134 | Co1Li0.48O2 135 | Gd2.53Ge4Zr2.47 136 | Co15Ga2Pr2 137 | N4O3S5 138 | F4K1Yb1 139 | Cr1Se2Tl1 140 | Co1Dy1Sn2 141 | Li0.88O8U3 142 | O2Sr1 143 | O4.6Sr1Ti1.8 144 | Os1Re9 145 | Br6Cs2Sn1 146 | Fe14.94Nd2Si2.06 147 | As1Cr0.5Fe0.5 148 | La9.6Te16Y2.4 149 | C7Y4 150 | Cu20.4P18Yb9 151 | K0.475O3W1 152 | Al2.54Au4.46Sr1 153 | Cu1Rb1Se4 154 | Co2Sn1V1 155 | C2.5Er2Mn17 156 | Cs2Cu1.5F6 157 | Cu5Pr1 158 | Dy0.5O1.75Zr0.5 159 | Ga1I4Tl1 160 | Al4.8Dy7.2Fe12 161 | Cu1Hf1Sn1 162 | Cr1O4U1 163 | Pb1Se3Tl4 164 | Se8Ti6Tl0.76 165 | Ge9K4O20 166 | O6.16V14 167 | Ce2Si7 168 | Au0.9Ba1Ge1.1 169 | Ga12Nd2Ni1 170 | K1Nd1S2 171 | F6Gd1.5Na1.5 172 | Fe2Ta0.4Zr0.6 173 | Nd3Ru4Sn13 174 | Co1Er2Ga8 175 | Al1Os1U1 176 | Al1Cr1.667S4 177 | Nd2Ni1Sn4 178 | Ni6S5 179 | Al32.386Cr8.75 180 | Dy1Ga4Ni1 181 | O1V8 182 | Au2O8Sr5 183 | H2 184 | Co15Ga2Sm2 185 | Ca5Cr3O12 186 | K2Ni1O2 187 | B0.75Pr1Rh3 188 | Cl1K0.3Rb0.7 189 | Co6.5Re4.35Si2.15 190 | Fe0.664Mn0.336O1 191 | Co1P2 192 | In1Pt2Sc1 193 | Al2O1 194 | Ba5Nb2O11 195 | Au2O4Sr1 196 | H1Nb1O3 197 | Pt2Tb1 198 | Fe4O9P2 199 | Au78Na60Sn24 200 | Co11Ti1Y1 201 | B3Cl6N3 202 | Ga6Ni11Sm26 203 | Al5.5Ni3Si1.5 204 | Am1Pt2 205 | Ga1.08Pt2.92 206 | Al4Ca1Yb1 207 | Fe0.67Te1 208 | Al1Nd1O3 209 | Mn56Yb2 210 | C0.7Fe14Gd2 211 | B2Fe2O5 212 | Sn1Tl4 213 | Mo1O33Ta12 214 | Gd1Pd1Tl1 215 | Bi0.52F2.04Rb0.48 216 | B5Na3Pt9 217 | B2Mg3.58Ni6.42 218 | Ag1In1 219 | Ba1Ge2Rh2 220 | Ga3I1Te3 221 | Fe1S7Y4 222 | Ag4.604O6Pb2 223 | Ge4.4Ru0.8Sm2 224 | Al0.4Ge1.6Ni1 225 | Ho2O3 226 | Gd0.77Ge4Y4.23 227 | Ag1Se8Yb5.52 228 | Ag1.15Al1.85Sm1 229 | Ge5N2Sr6 230 | Ce1Fe9Si4 231 | Al2Gd0.5Pr0.5 232 | P6Pt4Sr1 233 | Rh1Sm3 234 | B4Pr1Rh4 235 | Cl4Os1 236 | P6Sb4Se18 237 | Al43W4Y6 238 | Gd2S5Zr1 239 | Mo0.2788U1.7212 240 | In1S1 241 | K2O7U2 242 | Co13Ga2.88Ge5.12 243 | Au3Ge1Sr1 244 | P0.12S0.88U1 245 | Au0.15Sb0.65Sn0.2 246 | O1.978Sr1 247 | Na5P3Si1 248 | Fe0.925O1 249 | As3Ce1Zn2.074 250 | Cl1La1Te1 251 | Fe1Ga1Nb4 252 | Co2Cs1S2 253 | In16S32Sn4 254 | Bi2Ce1Cu0.71 255 | Eu3In1P3 256 | Al11.2Co4.8Y8 257 | B2La1Os2 258 | -------------------------------------------------------------------------------- /demo/demo_metal.csv: -------------------------------------------------------------------------------- 1 | formula 2 | In20.966La4Pt10 3 | Ag3.63Gd1 4 | Er0.667In0.667Pt2.668 5 | Ag1.37La1Mg10.63 6 | Er1Ga1Mn1 7 | In1Pt2Pu2 8 | Ho0.667In0.667Pt2.668 9 | Cu2Th1 10 | In4Pt3Sr2 11 | Os1W1 12 | Ni1.68Sn1Ti1 13 | Al1Mn1Pt1 14 | Ag1Ce1 15 | Th1.33Zr2.66 16 | Be5.11Rh0.79 17 | Fe1.8V0.2Zr1 18 | Au1Yb1 19 | Co5.9Ga8.7Hf5 20 | Eu5Ga9 21 | Ga0.83Pd2.17 22 | Ce2Cu3Ga5 23 | Al2.8Cr0.4Ti0.8 24 | Au2Sr3 25 | Pd4Yb3 26 | Al54.1Ru28Ti37.9 27 | Al0.66Cr1.32 28 | Co11Gd1Ti1 29 | Au4Dy3Ga7 30 | Sn2Ta1 31 | Fe1Ni1Y1 32 | Cu5Y1 33 | Li3Sn4Zn2 34 | Fe12Ga5Sm2 35 | Co2.25Ta0.75 36 | Al1Pt1Tm1 37 | Al0.6Fe1.4Zr1 38 | Fe2.6Ni1.4 39 | Os3Pr2 40 | Fe6Ga5 41 | Cd19Eu3 42 | Al8Fe4Tb1 43 | Ca3Pd2 44 | Ho5Ir2 45 | In1Ni2 46 | Ga3Pu5 47 | Ga1Nd1Pd2 48 | Ca1Cu0.85Ga0.15 49 | Hf1Re2 50 | Os1Y3 51 | Ni3Sn2Tb1 52 | Au45.77Ca14Sn4.97 53 | Cd1Pb1Yb1 54 | Co17Fe17Y4 55 | Al2Ca1 56 | Ho14In3Rh2.85 57 | Cu3Pd2Pr1 58 | Cr7Nb6 59 | Be4Mo1Ta1 60 | Al8.26Fe8.74Y2 61 | Cu0.95Ni0.05Y1 62 | Fe2Nb0.4Zr0.6 63 | Cu0.8Ga0.2Ni3 64 | In1Pd2Pu2 65 | Au0.67Hg0.33Mg1 66 | Er3Rh1 67 | Ba1Hg0.92In1.08 68 | Al16Hf6Pt7 69 | Ce7Pt3 70 | In1Pt1Th1 71 | Co3Sn3.92Th2 72 | Au0.938Cr0.062Nb3 73 | Au0.99Mo0.01 74 | Al5Cu7Zr1 75 | Au0.248Ga3.752Yb1 76 | Fe17.51Tm1.74 77 | Bi8Sn2Yb11 78 | Co7In14Zr9 79 | Ce0.3Gd0.7 80 | In2Ir1La1 81 | Mo0.1Nb0.45U0.45 82 | Ni5Y1 83 | La1Pt1 84 | Co1In5Y1 85 | Ga12Ho4Ni1 86 | Dy3In0.64Rh1.31 87 | Ag1Cd1Eu1 88 | Co1Ni1Pt2 89 | Ho2Pt1 90 | Al0.96Cu1.04Hf1 91 | Ga6Lu1Mn6 92 | Al0.9Fe1.2Ti0.9 93 | Cs1Hg2 94 | Li2Sr3 95 | Co0.67Ga1.33Gd1 96 | Cd2.52Cu3.13 97 | Dy4Ga12Pd1 98 | Dy1Ni3.13Sn1.87 99 | Pb3Tb1 100 | Ga2Pr1 101 | Al4Fe13Sm2 102 | Rh1Sn1Tm1 103 | Er6Ga1.43Ni2.57 104 | Au81Ga275Na128 105 | Pb1Th1 106 | Bi4Th3 107 | Co3Mo0.25V0.75 108 | Al64.5Cr12.5 109 | Bi4Rh1 110 | Al4.32Cu6.68U1 111 | Ce0.78Cu8.76In3.88 112 | Mn1Nd1Ni2 113 | Er4.86Pd2 114 | La1Li0.69Sn2 115 | Ni1Sn1Ti1 116 | Ho13.35In3.48Ni3.17 117 | Co1Mn1 118 | Bi1Ni0.97 119 | Ce1Ga1Pd2 120 | Pb4Y5 121 | Cu0.8Mg1Ni1.2 122 | Mg1.2Ni1Y3.8 123 | Al2.56Lu1Zn5.44 124 | Cs1Hg1 125 | Ce1Cu5In1 126 | Au5.15Cr4.73Zn91.93 127 | Mo0.12Ni0.83V1.06 128 | In1Pr1Zn1 129 | Cd45Er11 130 | Al8Cr4Tm1 131 | Al2W1 132 | Bi1Pb1Pt2 133 | Gd3.2Ni4Y0.8 134 | Al2Pd5Pu1 135 | Ce1In2Ir1 136 | Hg1Ti0.75Zn0.25 137 | Eu1Zn12.75 138 | Fe5Ga7Tb1 139 | Ce1Ga1Mg1 140 | Fe10Ho1V2 141 | Ti1Zn2 142 | Fe3Th7 143 | Au0.96Sc0.04 144 | Bi2Ce1Pd0.78 145 | Eu1Ga1.78Mg0.22 146 | Cu0.96Mn0.04 147 | Au1 148 | Pt0.8Re1.2 149 | Dy0.12Ni2Zr0.88 150 | Cu2Gd1 151 | Cu46Ga5Sm14 152 | Ba1Mg2Pb2 153 | Au2Sn1U1 154 | Ca1Ga1Pt1 155 | Al18Mg3Mo2 156 | Fe3Gd0.5Y0.5 157 | Tb6Zn23 158 | Ir0.072Nb0.928 159 | Li2Pd1 160 | Ga4Sc1V2 161 | Al2Mg0.9Pr0.1 162 | Al8La1Mn4 163 | Al1.65Ga0.35Nd1 164 | Al1Cu1Lu1 165 | Co2Nd0.5Tb0.5 166 | Mg2.17Tm0.83 167 | Ba1Pb1.5Tl1.5 168 | Co8Mn4Sm1 169 | Al6Mn6Yb1 170 | Cu2Nd1 171 | Er2Ni17 172 | Ag7.5Al9.5La1.5 173 | Ba8Ga16Sn30 174 | Cu3Ga2U1 175 | Ag4Nd3Sn4 176 | Al1Fe1Hf1 177 | Pt1W1 178 | Hg1Ho1 179 | Ho1Pd1Tl1 180 | Sn3Tb1 181 | Al3Er0.8Y0.2 182 | Mn1.72Sn0.93 183 | Al3.7Mg0.3 184 | Ni2Pr0.945 185 | Fe1.5Ni0.25Pt0.25 186 | La0.99Mn0.32Ni4.71 187 | La1Mg1Rh1 188 | Cu5.38Ti3.33Zr3.29 189 | Gd1.12Mg0.88Ni4 190 | Ga8Ru1U2 191 | Al8.165Mo1Ni1.835 192 | Ni5Y0.33Zr0.67 193 | Al1Ti2 194 | Co2Ga7Mg3 195 | Al1Au1Y1 196 | Al1Ho1 197 | In39.67K21.33 198 | Ce3Pt4 199 | Rh3Sc1 200 | Fe1.6Ni2.4 201 | Dy1Fe6Ga6 202 | Tc2Y1 203 | Ce1Rh1Zn1 204 | Ce1Mn1Ni4 205 | Fe6Ga6Pr1 206 | Cu3.2Ga1.8Lu1 207 | Ce1Co2 208 | In1Pt2Sc1 209 | Gd5In4Pt2 210 | Ta0.167Tc0.833 211 | Mn23Pr3Tm3 212 | Gd6Mn23 213 | Cu2Ga1 214 | Fe17Yb2 215 | Ag0.75Eu1Ga3.25 216 | Lu2Ni1Sn6 217 | Al3.25Cu3.25Nb6.5 218 | Hg0.1Sn0.9 219 | Ni1.014Sn1Ti0.997 220 | Fe2Y1 221 | La3Pb13Rh4 222 | Al7Cu16Tm6 223 | In4Ir1Sr1 224 | Co2Ga1Ni1 225 | Al1Au1Ho1 226 | Mg1.92Pb0.08 227 | In1Lu1Pd1 228 | In0.74Mn1.26Tm1 229 | Co1Dy8In3 230 | Ca1Ga0.85In0.15 231 | Ce0.174Pd0.826 232 | Ce1Ir3 233 | Cu6Th1 234 | Al4Fe13Nd2 235 | Ga21Ni10Tb4 236 | Li1.99Nd5Sn7.01 237 | Pd4Tm3 238 | Lu1Pd1Sn1 239 | Ti7Zr3 240 | Mg1Pd0.63Zn1.37 241 | Am1Pt5 242 | Bi4Ce3Pd3 243 | Al9Dy1Ni3 244 | La1Ni1Sn2 245 | Ir3La5 246 | Gd1 247 | Fe7Mo6 248 | Co0.7Ga1Mn0.3 249 | Al0.29Ni0.27Ti0.44 250 | Co1Ta1Ti1 251 | Al1.39Nb1Ni0.62 252 | Ce1Mg1Ni4 253 | Mg1.78Ni1Sn0.22 254 | Lu1Sn2 255 | Ca1Cu1In2 256 | Rh1Ru1 257 | Hg41.29Pu9.71 258 | -------------------------------------------------------------------------------- /demo/demo_oxide.csv: -------------------------------------------------------------------------------- 1 | formula 2 | Al1Cu1O2 3 | Cl3Hg6O2 4 | Bi2O6.8Rh2 5 | B1Nd1O3 6 | Ir1K1O3 7 | Cd1.333O4Sn1.333 8 | Co1La1O2.883 9 | Cr3Li1O8 10 | Co1Dy1O3 11 | Ca4.74Ir3O12 12 | Fe1.08O4P1 13 | Co4K10O9 14 | Mg0.75O5Ti2.25 15 | Ba2Cu1O3.39 16 | K4O5W1 17 | H2Cu1O2 18 | Li1.142Ni0.858O2 19 | As2Ba3O8 20 | C1O3Zn1 21 | Co2.35O3.86Ru0.65 22 | Cu1O2.61Sc1 23 | Ag1O4Tc1 24 | O10Sb2Tl10 25 | Dy0.2O1.9Zr0.8 26 | K2O2Pb1 27 | Ba1.04Nb1O3 28 | B2Cu1O4 29 | Ba2O5W1 30 | Dy2O5Ti1 31 | Ag1O4Re1 32 | F1Mn1O3 33 | O3Tm1.8Y0.2 34 | O4Ru1Sr2 35 | Cu1O6Ta2 36 | Ag1O6Os2 37 | Nd2O7Pt2 38 | B2Cd2O5 39 | C1Mg1O3 40 | Li2O7Si3 41 | Ce0.875O1.938Y0.125 42 | H46O23Xe3.27 43 | K1.67O5.54Ta2 44 | O53Ta18Th4 45 | O0.333Zr1 46 | Ba3O11Te4 47 | O7Pr2Te2 48 | Br2O1Rb4 49 | Cr3Cu1O8 50 | Hg2O7Te2 51 | Al11O16.96Pr0.53 52 | Ba2Ir3O9 53 | Gd2O7Si2 54 | K2O16S5 55 | H4O4V2 56 | Li6O7Zr2 57 | Fe2.08Mn0.92O4 58 | Cu0.1O1Zn0.9 59 | Mg1Mo1O4 60 | Fe12Na4O20 61 | Mg1.85O3.76Si0.94 62 | I1Li1O3 63 | Cl16Mo4O2 64 | Cr2Fe2O5 65 | Ge1O4Sr2 66 | K1O2Pr1 67 | Eu3O4 68 | Ba1Co1O2.74 69 | Fe2O7P2 70 | Mg1Nb2O6 71 | O4Pb2Pt1 72 | H3F1O1 73 | Al14Ca12O33.32 74 | B4Eu1O7 75 | O3Rb2Ti1 76 | Al0.1O4.9Ta1.9 77 | Co1O6V2 78 | Ce0.65O2Zr0.35 79 | K1O5Sb3 80 | O3Pr1Sr0.963 81 | Bi12.24Co12.8O40 82 | Cr1K1O2 83 | Ba6O40Ti17 84 | Cs6In2O6 85 | Ho0.05O0.68Zn0.6 86 | Mo10O31Sb2 87 | H2Ni1O2 88 | Cu16O14.15 89 | Lu2O3 90 | In0.18O3W1 91 | Co2O5.914Sr3 92 | Ca1Ni4O8 93 | Cs1N1O2 94 | Ba1O7U2 95 | Cl1O3Rb1 96 | Au1Ga1O2 97 | Fe1O3Si1 98 | Fe1O4S1 99 | Na6O13Ti6 100 | O1Zr3 101 | K3Nb1O8 102 | Cu0.8O1Pt0.2 103 | Ce0.5O1.77Th0.5 104 | B4O7Pb1 105 | Al0.8O21.6Si10.2 106 | La2O9Se3 107 | O5Rb0.48V2 108 | Al2O5Si1 109 | Nd1O3.05Zr1 110 | I1Na1O4 111 | Ce0.58O2Zr0.42 112 | Fe2.8In0.2O4 113 | Mn1.5Mo8O11 114 | Fe2O6.74Sr3 115 | Ag1.2O8V3 116 | Li2Mo4O13 117 | Cl2O1Os1 118 | Cu5O30Ta11 119 | Ni3O8V2 120 | Li0.8Mn0.81O2 121 | Ce0.8O3.4Y1.2 122 | La1O3Ru1 123 | Nb5O14Pr1 124 | Br1N1O3 125 | Co1O3Sr2 126 | Ho2O5Si1 127 | H3.568Mn1O3.784 128 | Ho3.536O2Sb2 129 | Nd0.5O2Pa0.5 130 | Li8O6Pb1 131 | Ge4O9Sr1 132 | Na0.08O1.88Zr0.92 133 | Mo20O64U3 134 | Al1La1O3 135 | H4O7Sb2 136 | B4Mg1O7 137 | La1.6O8.81U2.4 138 | Fe2K4O5 139 | B9Cs1O14 140 | Cu2.2Mg0.8O3 141 | N0.9Nb1O0.1 142 | Fe1.97Mg1.03O4 143 | Co2.108O4Ru0.892 144 | Mn9Na4.32O18 145 | Cd1O6Sb2 146 | O3.78Sr1U1 147 | Nd1O4Sb1 148 | Mn1O3Y1 149 | O1.862Y0.28Zr0.72 150 | Fe1K5O4 151 | O2V0.1Zr0.9 152 | Al2.23Mg0.655O4 153 | Ag4O4Si1 154 | Cd2Ge1O4 155 | K6O7Ti2 156 | O9S2W1 157 | Co1Na0.32O2 158 | Gd2O1S2 159 | Cr0.02O2Ti0.98 160 | Gd1O2 161 | Ir1Na1O3 162 | H0.99Fe1.67O3 163 | H18O10Sr1 164 | Li0.69Ni1.01O2 165 | Cu1O4S1 166 | H8Ba1O5 167 | Li0.48Ni0.52O1 168 | Lu2.67O6.67Ti1.33 169 | Fe1O2.7756Sr1 170 | K6O7Si2 171 | Li0.89Ni1.01O2 172 | Al2.305Mg0.545O4 173 | Cd3O10Se3 174 | Mo10Na6O33 175 | Er0.667O1.667Ti0.333 176 | Hg2N2O4 177 | Co1.05Fe1.95O4 178 | K1.92O24Si12 179 | Cs1O6Os2 180 | Cd0.9O1Zn0.1 181 | O12Ru3.05Sr4 182 | O6Rb1Te2 183 | Ag1Lu1O2 184 | Fe2Li2O4 185 | Gd2Mo2O7 186 | O12P4Sr2 187 | H4O6U1 188 | O6Pb1V2 189 | O9P2Th2 190 | Fe0.037Mg0.963O1 191 | Nb1O4Rh1 192 | Eu0.01O1Zn0.99 193 | Ni1O6Ta2 194 | Al2.03Mg0.93O4 195 | Dy0.67O1.67Zr0.33 196 | La2Mo2O8.18 197 | Na4O4Pb1 198 | Ba1In2O4 199 | Bi1.6Ho0.4O3 200 | O5Rb4Sb2 201 | Er1.9Mn0.1O3 202 | Er2Ge2O7 203 | Cu1Nd2O3.95 204 | O9P2Zr2 205 | Li0.611Mn2O3.88 206 | Li1.82O3Zr1 207 | Ga0.5In1.5O3 208 | O3Te1 209 | C1Ca1O3 210 | Li0.795Nb1O2 211 | Cs1O6W2 212 | O0.532V1 213 | O18Pr10 214 | Nb2O6.43Tl2 215 | O2Pb0.986 216 | O2Rb9 217 | O7V2Y2 218 | Ca2.8O6Tl2.2 219 | Ce0.1O2Pr0.9 220 | Co1O3Se1 221 | Ag2O6Te2 222 | Al2.66O4 223 | O7Sr2V2 224 | K2O7U2 225 | Mo4O11Tb4 226 | Ir3O11Sr5 227 | Cu0.98La1O2.59 228 | Al2.43Mg0.35O4 229 | Al1Fe2O4 230 | Ba2.72Bi1.28O5.664 231 | O1.91653Y0.16506Zr0.83447 232 | Co0.94Li1.03O1.88 233 | Ca2Nb2O7 234 | O2Tb1 235 | Bi0.775O1.5Tb0.225 236 | O2Tb2Te1 237 | Bi2O2S1 238 | Nb1O4Yb1 239 | Ca0.241Mn0.759O1 240 | N2O6Pd1 241 | Be3O4Sr1 242 | Ce11O20 243 | Cu0.15Mg0.85O1 244 | K1O8Ru4 245 | Al2.96O22.32Si8.94 246 | La3O10Os2 247 | Ba1Nb8O14 248 | Mg0.6875O1Zn0.3125 249 | Li2.333O4Ti1.667 250 | O12Re1Sc6 251 | F1O32Si16 252 | O73W25 253 | Li7Nb1O6 254 | H1Cl1O4 255 | Li0.735Mn1.767O4 256 | C1N6O1 257 | Ca2Fe9O13 258 | -------------------------------------------------------------------------------- /demo/demo_train.csv: -------------------------------------------------------------------------------- 1 | formula,Space Group,Bravais,a,b,c,alpha,beta,gamma 2 | C4Hf1Ta2U1,225,cubic (F),4.615,4.615,4.615,90.0,90.0,90.0 3 | Co1Sb1Ta1,216,cubic (F),5.882999999999999,5.882999999999999,5.882999999999999,90.0,90.0,90.0 4 | Ge0.8Sb0.133Se0.133Te0.867,225,cubic (F),5.9510000000000005,5.9510000000000005,5.9510000000000005,90.0,90.0,90.0 5 | C1Ni1,225,cubic (F),4.077,4.077,4.077,90.0,90.0,90.0 6 | Ir7Mg44,216,cubic (F),20.097,20.097,20.097,90.0,90.0,90.0 7 | Ca1Zn12.88,226,cubic (F),12.1543,12.1543,12.1543,90.0,90.0,90.0 8 | Co2Fe0.76Ga1Sc0.24,225,cubic (F),5.7364,5.7364,5.7364,90.0,90.0,90.0 9 | Eu2Mn1O7Sb1,227,cubic (F),10.315999999999999,10.315999999999999,10.315999999999999,90.0,90.0,90.0 10 | Cu0.2Fe2Ni0.6O4Zn0.2,227,cubic (F),8.3741,8.3741,8.3741,90.0,90.0,90.0 11 | Ce0.81O1.9Yb0.19,225,cubic (F),5.41364,5.41364,5.41364,90.0,90.0,90.0 12 | Al2.4Fe14.4Gd7.2,227,cubic (F),7.5,7.5,7.5,90.0,90.0,90.0 13 | Fe2.67O4,227,cubic (F),8.3482,8.3482,8.3482,90.0,90.0,90.0 14 | Fe1.7Mg1O4Sc0.3,227,cubic (F),8.416,8.416,8.416,90.0,90.0,90.0 15 | Cd0.5Co0.5Cr2S4,227,cubic (F),10.08,10.08,10.08,90.0,90.0,90.0 16 | Co1Cr1Fe0.4Ga0.6O4,227,cubic (F),8.3499,8.3499,8.3499,90.0,90.0,90.0 17 | Np1O1,225,cubic (F),5.01,5.01,5.01,90.0,90.0,90.0 18 | Bi1Na1S2,225,cubic (F),5.75,5.75,5.75,90.0,90.0,90.0 19 | Cu0.95Ir0.05,225,cubic (F),3.6289999999999996,3.6289999999999996,3.6289999999999996,90.0,90.0,90.0 20 | Bi1Cm1,225,cubic (F),6.327999999999999,6.327999999999999,6.327999999999999,90.0,90.0,90.0 21 | Mg0.5Ni0.5O1,225,cubic (F),4.1889,4.1889,4.1889,90.0,90.0,90.0 22 | C18H54As6N9O51V15,229,cubic (I),16.6217,16.6217,16.6217,90.0,90.0,90.0 23 | Ba1Ce2Se4,220,cubic (I),9.186,9.186,9.186,90.0,90.0,90.0 24 | Ce0.47Co1.97Fe2.03Sb12.14,204,cubic (I),9.09,9.09,9.09,90.0,90.0,90.0 25 | Cu3Ga8Mo2,220,cubic (I),11.917,11.917,11.917,90.0,90.0,90.0 26 | Al14O96Rb14Si34,217,cubic (I),14.374,14.374,14.374,90.0,90.0,90.0 27 | Cs18O6Tl8,197,cubic (I),13.3724,13.3724,13.3724,90.0,90.0,90.0 28 | As3Ir1,204,cubic (I),8.4673,8.4673,8.4673,90.0,90.0,90.0 29 | Nb1.6Rh0.4,229,cubic (I),3.241,3.241,3.241,90.0,90.0,90.0 30 | Bi2Sm1Te4,220,cubic (I),10.57,10.57,10.57,90.0,90.0,90.0 31 | Pa3Sb4,220,cubic (I),9.1098,9.1098,9.1098,90.0,90.0,90.0 32 | Ir1Sb3,204,cubic (I),9.249,9.249,9.249,90.0,90.0,90.0 33 | La3Li6Nb1.5O12Y0.5,230,cubic (I),12.9136,12.9136,12.9136,90.0,90.0,90.0 34 | Au0.3Nb0.7,229,cubic (I),3.2578,3.2578,3.2578,90.0,90.0,90.0 35 | Nb0.67Pd0.33,229,cubic (I),3.23,3.23,3.23,90.0,90.0,90.0 36 | Mo1.45Re0.55,229,cubic (I),3.131,3.131,3.131,90.0,90.0,90.0 37 | Rb8Sn43.96,230,cubic (I),24.1162,24.1162,24.1162,90.0,90.0,90.0 38 | Br15Ta6,230,cubic (I),21.309,21.309,21.309,90.0,90.0,90.0 39 | Fe4.2O12Pr0.7Sc0.8Y2.3,230,cubic (I),12.639000000000001,12.639000000000001,12.639000000000001,90.0,90.0,90.0 40 | Bi4La3Pt3,220,cubic (I),10.13,10.13,10.13,90.0,90.0,90.0 41 | Ca1Nd2S4,220,cubic (I),8.53,8.53,8.53,90.0,90.0,90.0 42 | H1.92Al6Na8.08O28.88S0.98Si6,218,cubic (P),9.084,9.084,9.084,90.0,90.0,90.0 43 | Ge13Ir4Yb3,223,cubic (P),8.936,8.936,8.936,90.0,90.0,90.0 44 | Er1Ir1,221,cubic (P),3.367,3.367,3.367,90.0,90.0,90.0 45 | Ag6.979As0.886Cu0.021S6Sb0.114,198,cubic (P),10.476,10.476,10.476,90.0,90.0,90.0 46 | As0.3Rh1Se1.7,205,cubic (P),5.959,5.959,5.959,90.0,90.0,90.0 47 | Au2.2Ba8Ge41.4,223,cubic (P),10.7081,10.7081,10.7081,90.0,90.0,90.0 48 | Rh3V1,221,cubic (P),3.784,3.784,3.784,90.0,90.0,90.0 49 | Co1Ga1,221,cubic (P),2.88,2.88,2.88,90.0,90.0,90.0 50 | Ba1N1O2Ta1,221,cubic (P),4.1128,4.1128,4.1128,90.0,90.0,90.0 51 | Ga0.8Ni3.04Sb0.16,221,cubic (P),3.6069999999999998,3.6069999999999998,3.6069999999999998,90.0,90.0,90.0 52 | Eu1Pd1Si1,198,cubic (P),6.428,6.428,6.428,90.0,90.0,90.0 53 | Ba1In0.5O2.75Sn0.5,221,cubic (P),4.164,4.164,4.164,90.0,90.0,90.0 54 | Ga8Pd3Yb1,221,cubic (P),8.4346,8.4346,8.4346,90.0,90.0,90.0 55 | Ba1Bi0.001O3Zr0.999,221,cubic (P),4.1951800000000015,4.1951800000000015,4.1951800000000015,90.0,90.0,90.0 56 | Ni1S1.968,205,cubic (P),5.6765,5.6765,5.6765,90.0,90.0,90.0 57 | B12,195,cubic (P),7.3195,7.3195,7.3195,90.0,90.0,90.0 58 | F1Rb1,221,cubic (P),3.29,3.29,3.29,90.0,90.0,90.0 59 | C0.5Ti3Zn1,221,cubic (P),4.162,4.162,4.162,90.0,90.0,90.0 60 | La1Tl3,221,cubic (P),4.7989999999999995,4.7989999999999995,4.7989999999999995,90.0,90.0,90.0 61 | Ba8Ni3.7Si41.4,223,cubic (P),10.2906,10.2906,10.2906,90.0,90.0,90.0 62 | Cr6Ge6Sc1,191,hexagonal (P),5.102,5.102,8.229,90.0,90.0,120.0 63 | P7Rh12Y2,176,hexagonal (P),9.619,9.619,3.792,90.0,90.0,120.0 64 | Al1Co4Nd1,191,hexagonal (P),5.07,5.07,4.034,90.0,90.0,120.0 65 | Ba3Ni1O9Ru2,194,hexagonal (P),5.7275,5.7275,14.0729,90.0,90.0,120.0 66 | Br3Cs1Mn1,194,hexagonal (P),7.6179999999999986,7.6179999999999986,6.519,90.0,90.0,120.0 67 | N2O5,194,hexagonal (P),5.45,5.45,6.66,90.0,90.0,120.0 68 | P2.54Si6,162,hexagonal (P),6.16,6.16,13.17,90.0,90.0,120.0 69 | Al0.29Ce0.47La0.34Mn0.4Nd0.14Ni4.31Pr0.05,191,hexagonal (P),4.9802,4.9802,4.0522,90.0,90.0,120.0 70 | Ge1.4La1Ni0.6,191,hexagonal (P),4.194,4.194,4.312,90.0,90.0,120.0 71 | B1.934Nb1,191,hexagonal (P),3.1076200000000003,3.1076200000000003,3.28444,90.0,90.0,120.0 72 | Fe6Sn6.2U0.54,191,hexagonal (P),5.3346,5.3346,8.9063,90.0,90.0,120.0 73 | H62.88B9Ba4Na14.4O289.44Si81Sr21.6Zr26,165,hexagonal (P),26.509,26.509,9.975,90.0,90.0,120.0 74 | Cr1Nb4Se8,194,hexagonal (P),6.904,6.904,12.57,90.0,90.0,120.0 75 | Tl1,194,hexagonal (P),3.47,3.47,5.52,90.0,90.0,120.0 76 | Ba1Fe0.6Mn0.4O2.73,194,hexagonal (P),5.74644,5.74644,24.0433,90.0,90.0,120.0 77 | Al1F6Li1Yb1,163,hexagonal (P),5.042,5.042,9.868,90.0,90.0,120.0 78 | H13.332F9.333N3.333Nb2O2,164,hexagonal (P),20.3757,20.3757,8.0851,90.0,90.0,120.0 79 | Eu2Ge1Na4P4,186,hexagonal (P),9.317,9.317,7.2,90.0,90.0,120.0 80 | Ca1F6Li1Ni1,163,hexagonal (P),5.06,5.06,9.7447,90.0,90.0,120.0 81 | Cu1Sb1Yb1,186,hexagonal (P),4.452,4.452,7.995,90.0,90.0,120.0 82 | Ca1Ge1O5Ti0.807Zr0.192,15,monoclinic (C),7.1749,8.93321,6.67312,90.0,113.6,90.0 83 | Ag2P2S6Zn1,15,monoclinic (C),6.2952,10.866,26.528000000000002,90.0,96.234,90.0 84 | H2Ga1Na3O9P2,12,monoclinic (C),15.4319,7.1637,7.0565,90.0,96.637,90.0 85 | H2Ce2Cs1F9O1,15,monoclinic (C),15.4579,6.9826,8.5656,90.0,117.93799999999999,90.0 86 | Co1F1Fe1O4P1,9,monoclinic (C),12.968,6.444,9.818,90.0,118.27,90.0 87 | K8.91Nd1.09P4S16,15,monoclinic (C),20.1894,9.7679,17.493,90.0,115.661,90.0 88 | Na3O12P1.5Sc0.5Si1.5Zr1.5,15,monoclinic (C),15.616,9.0188,9.1841,90.0,123.92,90.0 89 | Ba1Cu10P4,12,monoclinic (C),23.288,3.907,9.533999999999999,90.0,92.26,90.0 90 | Li5O6Re1,12,monoclinic (C),5.0649,8.7314,5.0277,90.0,110.18700000000001,90.0 91 | Ge2.94Tm2,15,monoclinic (C),9.0577,6.6386,7.7596,90.0,115.678,90.0 92 | Nd3Pt7Sb4,12,monoclinic (C),16.44,4.293,10.306,90.0,128.58,90.0 93 | Co1Ge2Tb2,12,monoclinic (C),10.569,4.212,10.687000000000001,90.0,118.18,90.0 94 | Ba8Cu2O54Ti22,12,monoclinic (C),19.865,11.469000000000001,9.94,90.0,109.02,90.0 95 | C2H5Na3O8,15,monoclinic (C),20.366,3.4583,10.2946,90.0,106.48299999999999,90.0 96 | Cu2Na2S4Zr1,12,monoclinic (C),13.657,3.72,7.025,90.0,112.44,90.0 97 | K2Mo1O2S2,15,monoclinic (C),11.2298,7.0741,9.705,90.0,121.7337,90.0 98 | Ca1Nb2O8Th1,15,monoclinic (C),5.4460000000000015,11.207,5.1370000000000005,90.0,94.7,90.0 99 | Ca2.12O10Sb2.99Sm7.88,12,monoclinic (C),13.462,3.8429,17.458,90.0,93.62,90.0 100 | H12Br2Co1O6,12,monoclinic (C),11.0158,7.1694,6.9111,90.0,124.76100000000001,90.0 101 | H2Al1.98Ca1.84Fe1.45K0.1Mg3.06Mn0.02Na0.46O24Si6.32Ti0.33,12,monoclinic (C),9.8699,18.0459,5.3177,90.0,105.037,90.0 102 | Cs1O12P4Sm1,14,monoclinic (P),10.382,8.978,11.205,90.0,106.398,90.0 103 | H72I8O41Y6,14,monoclinic (P),12.9099,14.805,14.7933,90.0,90.821,90.0 104 | Ca2O6Sb1Sm1,14,monoclinic (P),5.6062,5.8553,8.0977,90.0,90.23,90.0 105 | H8Co0.36Mg0.3Mn0.17Na2Ni0.12O12S2,14,monoclinic (P),11.147,8.267999999999999,5.5396,90.0,100.51700000000001,90.0 106 | Bi0.58Ni0.29O3Pb0.42Ti0.71,6,monoclinic (P),3.9657,3.9519,3.9886,90.0,90.10799999999999,90.0 107 | H20N4Na2O20P6,14,monoclinic (P),13.363,11.58,6.809,90.0,101.87,90.0 108 | C16H48Br24N4Se12,14,monoclinic (P),12.645999999999999,16.499000000000002,16.844,90.0,101.7,90.0 109 | C1.2Cl16Cs3.036Zr6,14,monoclinic (P),11.032,11.850999999999999,12.472999999999999,90.0,108.01,90.0 110 | H0.8F1.08Mg8.792O16.88Si4Ti0.208,14,monoclinic (P),4.7404,10.238,13.651,100.90899999999999,90.0,90.0 111 | H21K4Nb3O36S6,4,monoclinic (P),9.961,18.088,10.036,90.0,118.84,90.0 112 | Ge4O17Rb14Si6,14,monoclinic (P),14.554,7.922999999999999,32.872,90.0,103.5,90.0 113 | H46K3Na4O58Pr1W10,14,monoclinic (P),30.049,16.055,11.462,90.0,96.36,90.0 114 | Bi1K1S4Si1,14,monoclinic (P),6.4769,6.7371,17.168,90.0,108.14,90.0 115 | O22P2Rb0.45W6,10,monoclinic (P),13.991,3.765,8.561,90.0,114.22,90.0 116 | H3K3O13P2V2,14,monoclinic (P),5.0992,29.16800000000001,8.1146,90.0,91.65,90.0 117 | Cs1O7P2Ti1,14,monoclinic (P),7.7247,10.2237,8.3429,90.0,104.788,90.0 118 | H8Cs1O12Pr1S2,14,monoclinic (P),6.671,19.054000000000002,8.839,90.0,94.55,90.0 119 | La6.7S24Sb9.3,14,monoclinic (P),8.6829,14.3334,15.2691,90.0,90.077,90.0 120 | Co1K2N4O12,14,monoclinic (P),6.8260000000000005,10.244000000000002,15.088,90.0,95.87,90.0 121 | F2O6S2Xe1,14,monoclinic (P),6.706,13.237,7.769,90.0,96.5,90.0 122 | Cd25.39Rb16Sb36,63,orthorhombic (C),16.499000000000002,12.390999999999998,12.4,90.0,90.0,90.0 123 | H2.54Al1.48Be2.46Ca4O28Si9.06,63,orthorhombic (C),23.2028,4.9805,19.429,90.0,90.0,90.0 124 | Al31Mn6Ni2,63,orthorhombic (C),7.55,23.8,12.5,90.0,90.0,90.0 125 | Ce3Pd6Sn10,63,orthorhombic (C),44.027,6.271,6.1266,90.0,90.0,90.0 126 | H32N4O8Re4S20Te4,63,orthorhombic (C),12.6463,16.23479,19.8038,90.0,90.0,90.0 127 | Nb1O9Rb1W2,35,orthorhombic (C),22.00985,12.66916,3.8989,90.0,90.0,90.0 128 | Sn5Yb3,63,orthorhombic (C),10.193,8.1675,10.375,90.0,90.0,90.0 129 | H8Cu1F4N2,64,orthorhombic (C),13.235,6.067,6.069,90.0,90.0,90.0 130 | Cu1La2O4.03,64,orthorhombic (C),5.3618,13.1688,5.4001,90.0,90.0,90.0 131 | Cl1N3,36,orthorhombic (C),5.6456,11.7515,4.3077,90.0,90.0,90.0 132 | Au10Gd5.61Mg1.39,64,orthorhombic (C),13.502,9.955,10.093,90.0,90.0,90.0 133 | Cu1La1.86O4.09Sr0.14,64,orthorhombic (C),5.3245,5.3473,13.1935,90.0,90.0,90.0 134 | Co3Ga9Lu2,63,orthorhombic (C),12.662,7.313,9.322000000000001,90.0,90.0,90.0 135 | Bi2Pb3S6,63,orthorhombic (C),13.353,20.367,4.0615,90.0,90.0,90.0 136 | O7Pr3Ta1,63,orthorhombic (C),10.972999999999999,7.523,7.6721,90.0,90.0,90.0 137 | Ge6Lu4Zn5,36,orthorhombic (C),4.178999999999999,18.368,15.05,90.0,90.0,90.0 138 | O9P3Pr1,20,orthorhombic (C),11.234000000000002,8.5289,7.3199,90.0,90.0,90.0 139 | Al4Ce1Ni1,63,orthorhombic (C),4.0969999999999995,15.47,6.643,90.0,90.0,90.0 140 | B1K2Li3O14P4,64,orthorhombic (C),24.044,13.902000000000001,6.874,90.0,90.0,90.0 141 | Ce1Pd2Sn4,63,orthorhombic (C),16.315,6.291,6.081,90.0,90.0,90.0 142 | As2Cd1F12O4S2,43,orthorhombic (F),15.3496,18.1289,8.9228,90.0,90.0,90.0 143 | Ba1Cu1La1O5.13Ru1,69,orthorhombic (F),7.951,8.0307,7.9356,90.0,90.0,90.0 144 | H32Al16.32Ca0.32Na15.68O96Si23.68,43,orthorhombic (F),18.354,18.586,6.608,90.0,90.0,90.0 145 | C2Bi2Cu3O16Sr5,69,orthorhombic (F),5.468999999999999,5.483,54.26,90.0,90.0,90.0 146 | F1In1O1,70,orthorhombic (F),8.356,10.186,7.039,90.0,90.0,90.0 147 | Ba2Cu1O6.016Tl1.928,69,orthorhombic (F),5.468,5.46943,23.25114,90.0,90.0,90.0 148 | Cl12Re1Se2,43,orthorhombic (F),11.538,12.677,21.4742,90.0,90.0,90.0 149 | K0.42P1S5Ta1,70,orthorhombic (F),9.6983,14.373,21.642,90.0,90.0,90.0 150 | As2.93Pb1S6Sb0.07Tl1,43,orthorhombic (F),15.4764,47.602,5.8489,90.0,90.0,90.0 151 | Sn2Ta1,70,orthorhombic (F),9.801,5.627999999999999,19.177,90.0,90.0,90.0 152 | B2La1Os2,22,orthorhombic (F),6.5159,9.2152,10.2105,90.0,90.0,90.0 153 | Ba2Cu1O6.04Tl1.848,69,orthorhombic (F),5.4605,5.4697,23.22893,90.0,90.0,90.0 154 | B6Os8Y3,69,orthorhombic (F),5.4792,9.5139,17.6972,90.0,90.0,90.0 155 | H4Ba3I2Mo2O18,43,orthorhombic (F),13.356,45.544,4.867,90.0,90.0,90.0 156 | As2Ba0.76Fe2K0.24,69,orthorhombic (F),5.5672,5.5449,13.0888,90.0,90.0,90.0 157 | Ga2.21S3.98Sr1,70,orthorhombic (F),20.8558,20.5115,12.2138,90.0,90.0,90.0 158 | H20Ca1Na2Ni2O24P4,43,orthorhombic (F),11.934,32.774,10.986,90.0,90.0,90.0 159 | La2Ni1O4.148,69,orthorhombic (F),5.4839,5.4978,12.5326,90.0,90.0,90.0 160 | C8Rb1,70,orthorhombic (F),4.926,8.532,22.471,90.0,90.0,90.0 161 | Cl6N5S5Sb1,43,orthorhombic (F),18.05,56.72,7.38,90.0,90.0,90.0 162 | In2Mg5,72,orthorhombic (I),14.23,7.36,6.19,90.0,90.0,90.0 163 | Ba2O3Pd1,71,orthorhombic (I),13.335,4.08,3.8362,90.0,90.0,90.0 164 | Al1P1S4,23,orthorhombic (I),5.6604,5.7589,9.1888,90.0,90.0,90.0 165 | Fe5.3Ga6.7Nd1,71,orthorhombic (I),8.698,8.686,5.099,90.0,90.0,90.0 166 | Cd1.24La0.97Sb9Yb9.78,72,orthorhombic (I),11.7852,12.3782,16.7986,90.0,90.0,90.0 167 | Fe1Ge1Nb1,46,orthorhombic (I),7.195,11.173,6.489,90.0,90.0,90.0 168 | B2Co1Mo2,71,orthorhombic (I),7.07,4.53,3.17,90.0,90.0,90.0 169 | Ag8Pr6Sn8,71,orthorhombic (I),15.489,7.371,4.681,90.0,90.0,90.0 170 | Ge5Ir3Y2,72,orthorhombic (I),10.133,11.72,5.955,90.0,90.0,90.0 171 | Br2Nb3Se10,73,orthorhombic (I),14.205,23.7219,38.243,90.0,90.0,90.0 172 | K2Pd1S2,71,orthorhombic (I),9.339,7.107,3.588,90.0,90.0,90.0 173 | Co5.04Ga6.96Y1,71,orthorhombic (I),8.4845,8.4793,5.0996,90.0,90.0,90.0 174 | Ir3Si5Tb2,72,orthorhombic (I),9.857999999999999,11.658,5.7120000000000015,90.0,90.0,90.0 175 | H1Ag2Co3O12P3,46,orthorhombic (I),12.9814,6.5948,10.7062,90.0,90.0,90.0 176 | As2Ga1K2Na1,72,orthorhombic (I),6.7331,14.8089,6.5736,90.0,90.0,90.0 177 | Na2O2Pt1,71,orthorhombic (I),4.585,3.1189999999999998,9.588,90.0,90.0,90.0 178 | Al0.663Ca2Fe1.337O5,46,orthorhombic (I),5.3714,14.5805,5.5847,90.0,90.0,90.0 179 | Co1Ga3Zr2,46,orthorhombic (I),7.808,11.43,6.602,90.0,90.0,90.0 180 | Cu1.08Ga2.73Y1,71,orthorhombic (I),12.311,12.366,10.206,90.0,90.0,90.0 181 | Eu3Ga8,71,orthorhombic (I),4.3919999999999995,4.386,25.866,90.0,90.0,90.0 182 | Fe0.9Ta0.77Te2,53,orthorhombic (P),7.89,7.2520000000000024,6.192,90.0,90.0,90.0 183 | Co1Cr1Si1,62,orthorhombic (P),5.8,3.7,6.75,90.0,90.0,90.0 184 | Cr2Hg6O9,19,orthorhombic (P),7.3573,8.0336,20.281,90.0,90.0,90.0 185 | O3Rh1Tb1,62,orthorhombic (P),5.2538,5.7454,7.6254,90.0,90.0,90.0 186 | Al1B14Yb1,74,orthorhombic (P),5.86,10.439,8.222000000000001,90.0,90.0,90.0 187 | Co1Ga1Sm1,62,orthorhombic (P),6.867999999999999,4.493,7.252999999999999,90.0,90.0,90.0 188 | Ce1O8Se2,61,orthorhombic (P),9.748,9.174,13.74,90.0,90.0,90.0 189 | Mn1O4U1,74,orthorhombic (P),6.647,6.984,6.75,90.0,90.0,90.0 190 | Ge1.6Nd1,74,orthorhombic (P),4.22,4.17,14.03,90.0,90.0,90.0 191 | Pt2Si1Y1,62,orthorhombic (P),7.282,6.912000000000001,5.461,90.0,90.0,90.0 192 | H12Cl3K1Mg1O6,52,orthorhombic (P),16.119,22.4719,9.551,90.0,90.0,90.0 193 | B1Rh2,62,orthorhombic (P),5.42,3.98,7.44,90.0,90.0,90.0 194 | Dy3O7Ru1,33,orthorhombic (P),10.515999999999998,14.56,7.3329999999999975,90.0,90.0,90.0 195 | H0.7B1Be2F0.3O3.7,61,orthorhombic (P),9.677999999999999,12.312999999999999,4.439,90.0,90.0,90.0 196 | H6.2Al4.6As0.8Ca0.8Fe0.2Mg1.4Mn3O28Si5.2,59,orthorhombic (P),8.741,5.8329999999999975,18.557,90.0,90.0,90.0 197 | Mg9.14P7Pt9,55,orthorhombic (P),18.121,23.316,3.8480000000000003,90.0,90.0,90.0 198 | Ga1Li3Na2O4,58,orthorhombic (P),8.266,7.9461,6.5153,90.0,90.0,90.0 199 | Ga2.493Ni2.747O8Si0.76,74,orthorhombic (P),5.7620000000000005,17.618,8.238999999999999,90.0,90.0,90.0 200 | Dy3Pt1,62,orthorhombic (P),7.0489999999999995,9.485,6.4170000000000025,90.0,90.0,90.0 201 | Nb3Si1Te6,62,orthorhombic (P),6.353,13.937999999999999,11.507,90.0,90.0,90.0 202 | Ca3Li1O6Os1,167,rhombohedral (P),6.448261224116499,6.448261224116499,6.448261224116499,91.95930761870459,91.95930761870459,91.95930761870459 203 | H3.39Al7.3B3Ca0.022F0.43K0.003Li0.57Mn1.16Na0.74O30.57,160,rhombohedral (P),9.499817291108524,9.499817291108524,9.499817291108524,113.95423117172571,113.95423117172571,113.95423117172571 204 | O24P6Sr1Zr4,148,rhombohedral (P),9.217784607726763,9.217784607726763,9.217784607726763,56.649521601515104,56.649521601515104,56.649521601515104 205 | Cr1Mo1O3,155,rhombohedral (P),3.6430000000000002,3.6430000000000002,3.6430000000000002,85.59,85.59,85.59 206 | O28Sr9Tm1V7,161,rhombohedral (P),14.761885951473515,14.761885951473515,14.761885951473515,44.58600093317604,44.58600093317604,44.58600093317604 207 | In2Li3,166,rhombohedral (P),5.626308361419394,5.626308361419394,5.626308361419394,49.91505340794696,49.91505340794696,49.91505340794696 208 | Co3Mo22.33Na4O72,167,rhombohedral (P),9.202168953265556,9.202168953265556,9.202168953265556,59.39577987720728,59.39577987720728,59.39577987720728 209 | C3Co1Fe16Sm2,166,rhombohedral (P),6.544651972751832,6.544651972751832,6.544651972751832,83.54779735666246,83.54779735666246,83.54779735666246 210 | Al9Sr5,166,rhombohedral (P),12.378128484459102,12.378128484459102,12.378128484459102,27.484689023091537,27.484689023091537,27.484689023091537 211 | Co1Y1,166,rhombohedral (P),7.228549331950668,7.228549331950668,7.228549331950668,31.26748408775499,31.26748408775499,31.26748408775499 212 | Fe0.04La0.7Mn0.96O3Sr0.3,167,rhombohedral (P),5.4731042247014114,5.4731042247014114,5.4731042247014114,60.405315197655796,60.405315197655796,60.405315197655796 213 | Co0.2Li0.26Mn0.3Ni0.5O2,166,rhombohedral (P),5.083952648721705,5.083952648721705,5.083952648721705,32.189336031490996,32.189336031490996,32.189336031490996 214 | C0.167H3Al0.333Mg0.667O3.001,166,rhombohedral (P),7.791721618344548,7.791721618344548,7.791721618344548,22.54364478647596,22.54364478647596,22.54364478647596 215 | H131Fe13Na30O300P8W60,148,rhombohedral (P),30.67690167802697,30.67690167802697,30.67690167802697,57.99740733944636,57.99740733944636,57.99740733944636 216 | Co0.333Li1Mn0.333Ni0.333O2,166,rhombohedral (P),5.016052337025379,5.016052337025379,5.016052337025379,33.057524535825,33.057524535825,33.057524535825 217 | Li8O6Pt1,148,rhombohedral (P),5.900874548836902,5.900874548836902,5.900874548836902,54.618937261711004,54.618937261711004,54.618937261711004 218 | H12B12Br1K3,166,rhombohedral (P),6.882558478744175,6.882558478744175,6.882558478744175,93.46705999594349,93.46705999594349,93.46705999594349 219 | H2Ba1Cu3O10V2,166,rhombohedral (P),7.729460314278092,7.729460314278092,7.729460314278092,45.10935797000883,45.10935797000883,45.10935797000883 220 | Al3Ca0.75K0.92O12Si3,148,rhombohedral (P),9.411,9.411,9.411,91.48,91.48,91.48 221 | H3.367Al7.272B3Ca0.614F0.633Fe0.156Li1.482Mg0.018Mn0.057,160,rhombohedral (P),9.442961358246327,9.442961358246327,9.442961358246327,113.9357743908453,113.9357743908453,113.9357743908453 222 | Ru2Si2Sm1,139,tetragonal (I),4.171,4.171,9.6601,90.0,90.0,90.0 223 | Cu0.5La0.5O4Ru0.5Sr1.5,139,tetragonal (I),3.893,3.893,12.694,90.0,90.0,90.0 224 | Er1Fe10.4V1.6,139,tetragonal (I),8.4624,8.4624,4.7624,90.0,90.0,90.0 225 | Au4.76In1.24Sr1,139,tetragonal (I),7.1877,7.1877,5.5279,90.0,90.0,90.0 226 | C4Al4Th1,87,tetragonal (I),8.231,8.231,3.3273,90.0,90.0,90.0 227 | Ce1Mg12,139,tetragonal (I),10.32,10.32,5.94,90.0,90.0,90.0 228 | Bi2Cd1O4,141,tetragonal (I),14.495,14.495,9.3173,90.0,90.0,90.0 229 | Al2.218Ba1.109O16Ti5.782,87,tetragonal (I),9.975,9.975,2.92538,90.0,90.0,90.0 230 | B1Ni2,140,tetragonal (I),4.993,4.993,4.247,90.0,90.0,90.0 231 | Ce0.2Cu1Nd1.8O4,139,tetragonal (I),3.9481,3.9481,12.0524,90.0,90.0,90.0 232 | Al4Pr1,139,tetragonal (I),4.36,4.36,10.01,90.0,90.0,90.0 233 | Ba2Ca1.9Cu3O10.94Tl1.82,139,tetragonal (I),3.85,3.85,35.6,90.0,90.0,90.0 234 | Fe1Mo1O6Sr2,87,tetragonal (I),5.57235,5.57235,7.86665,90.0,90.0,90.0 235 | Fe2.2Rb0.61Se2,139,tetragonal (I),3.8322,3.8322,14.629000000000001,90.0,90.0,90.0 236 | Co5Fe6Re1Y1,139,tetragonal (I),8.49,8.49,4.75,90.0,90.0,90.0 237 | C1Li2N2,139,tetragonal (I),3.687,3.687,8.668,90.0,90.0,90.0 238 | Mn0.2O4Ru0.8Sr2,139,tetragonal (I),3.8813,3.8813,12.6481,90.0,90.0,90.0 239 | Ca1.72Mn0.77O4Pb0.51,142,tetragonal (I),5.189,5.189,24.156,90.0,90.0,90.0 240 | Cu1K1Na2O2,107,tetragonal (I),4.327,4.327,10.890999999999998,90.0,90.0,90.0 241 | Ce0.5Cu2Gd1.5Hg0.75O9Sr2W0.25,139,tetragonal (I),3.8327,3.8327,29.15,90.0,90.0,90.0 242 | P1Se1U1,129,tetragonal (P),3.9610000000000003,3.9610000000000003,8.177999999999999,90.0,90.0,90.0 243 | F5Pd1Rb3,127,tetragonal (P),7.4620000000000015,7.4620000000000015,6.4570000000000025,90.0,90.0,90.0 244 | Co0.1La1Ni0.9O2.54,83,tetragonal (P),10.835,10.835,3.8381,90.0,90.0,90.0 245 | Ce0.12O2Y0.04Zr0.84,137,tetragonal (P),3.6316,3.6316,5.2117,90.0,90.0,90.0 246 | Ba2Co1O7Si2,113,tetragonal (P),8.1709,8.1709,5.3374,90.0,90.0,90.0 247 | Ba0.985Na0.015Nb0.015O3Ti0.985,99,tetragonal (P),4.0047,4.0047,4.02214,90.0,90.0,90.0 248 | Ba1Cu0.68La2O5Pt0.32,127,tetragonal (P),6.854,6.854,5.877999999999999,90.0,90.0,90.0 249 | Na0.1O3W1,129,tetragonal (P),5.2492,5.2492,3.8953,90.0,90.0,90.0 250 | Ni2Sn1Tm2,127,tetragonal (P),7.2520000000000024,7.2520000000000024,3.628,90.0,90.0,90.0 251 | Cu2.8Te2,129,tetragonal (P),3.978,3.978,6.122000000000001,90.0,90.0,90.0 252 | H16Ca1Cd1I4O8,125,tetragonal (P),12.6814,12.6814,5.3696,90.0,90.0,90.0 253 | As1Ba1F1Zn1,129,tetragonal (P),4.2383,4.2383,9.526,90.0,90.0,90.0 254 | Cl8I1Sb1,76,tetragonal (P),6.98,6.98,24.2,90.0,90.0,90.0 255 | H44Cs13Na1Nb18O76Si1,138,tetragonal (P),16.8305,16.8305,28.914,90.0,90.0,90.0 256 | Cu1.25Gd1P1.75,129,tetragonal (P),3.798,3.798,9.73,90.0,90.0,90.0 257 | Ho14In3Rh2.85,137,tetragonal (P),9.524,9.524,23.092,90.0,90.0,90.0 258 | Ba2Cu1Hg0.922O4.25,123,tetragonal (P),3.8680000000000003,3.8680000000000003,9.484,90.0,90.0,90.0 259 | Cu1.16Sb2Sm1,129,tetragonal (P),4.297,4.297,10.0,90.0,90.0,90.0 260 | O5Pr9Sb5,85,tetragonal (P),10.2203,10.2203,9.1508,90.0,90.0,90.0 261 | B10Co29Nd3Si4,129,tetragonal (P),11.2405,11.2405,7.8905,90.0,90.0,90.0 262 | H4As2Hg5O18U2,2,triclinic (P),6.8229,6.8795,9.5959,109.456,104.834,93.867 263 | H4I3O11Y1,2,triclinic (P),7.3529,10.5112,7.0282,105.177,109.814,95.179 264 | C5H22N18O17Zn1,2,triclinic (P),9.0769,11.6288,12.659,101.11399999999999,100.204,105.719 265 | H4Li1N2O6Rb1S2,2,triclinic (P),7.8439999999999985,9.804,10.825,108.77,93.29,98.64 266 | H146N14O167P4Tb2W34,2,triclinic (P),12.732999999999999,17.455,19.151,77.85,77.47,71.47 267 | H0.911Al1F0.089Li1O4.911P1,2,triclinic (P),6.712999999999999,7.711,7.0102,91.22,117.91,91.67 268 | H26Mo12O50Si1,2,triclinic (P),14.077,14.175999999999998,13.752,111.04,93.99,119.09 269 | H47Cu1K14O89.5Si2W18,2,triclinic (P),12.7063,18.87,20.0911,73.9,74.938,71.785 270 | La10O39Te12,2,triclinic (P),5.6856,12.620999999999999,14.402000000000001,95.53,100.88,93.13 271 | C4Au1F12Na1O12S4,2,triclinic (P),5.2543,9.0208,10.8028,104.367,93.602,90.706 272 | C16H58Br8Mo6Na2O16,2,triclinic (P),10.811,12.27,10.776,97.31,119.36,95.42 273 | Eu1Li1Mo2O8,2,triclinic (P),10.4094,5.1989,6.754,112.686,112.575,90.04299999999998 274 | K2N0.75O0.25,1,triclinic (P),6.492999999999999,6.492999999999999,6.492999999999999,90.0,90.0,90.0 275 | H47.5Na8O64.75Sn1W12,2,triclinic (P),11.5728,13.2261,22.3361,80.859,79.7,77.759 276 | Ge3La2O9,2,triclinic (P),7.7070000000000025,5.727,16.797,90.53,109.85,88.68 277 | Mo1Nd2O10Se2,2,triclinic (P),5.3582,6.8922,12.575999999999999,85.802,83.97,70.07 278 | C4K3N4O2Re1,2,triclinic (P),7.47,7.597,6.329,105.37,110.2,114.63 279 | H278As8Na40O420Sm6W80,2,triclinic (P),20.6818,22.2615,25.1353,99.32,96.59100000000001,109.036 280 | H2Cs1O12P3Zn3,2,triclinic (P),5.1636,8.0324,14.8009,95.796,90.315,108.07799999999999 281 | Al2Ca0.2O8Si2Sr0.8,2,triclinic (P),8.379,12.970999999999998,14.277999999999999,90.11,115.48,90.06 282 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: cryspnet 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - argon2-cffi=20.1.0=py38h25fe258_2 8 | - async_generator=1.10=py_0 9 | - attrs=20.3.0=pyhd3deb0d_0 10 | - backcall=0.2.0=pyh9f0ad1d_0 11 | - backports=1.0=py_2 12 | - backports.functools_lru_cache=1.6.1=py_0 13 | - bleach=3.2.1=pyh9f0ad1d_0 14 | - ca-certificates=2020.12.5=ha878542_0 15 | - certifi=2020.12.5=py38h578d9bd_0 16 | - cffi=1.14.4=py38h261ae71_0 17 | - decorator=4.4.2=py_0 18 | - defusedxml=0.6.0=py_0 19 | - entrypoints=0.3=pyhd8ed1ab_1003 20 | - icu=58.2=hf484d3e_1000 21 | - importlib-metadata=3.3.0=py38h578d9bd_2 22 | - importlib_metadata=3.3.0=hd8ed1ab_2 23 | - ipykernel=5.4.2=py38h81c977d_0 24 | - ipython=7.19.0=py38h81c977d_0 25 | - ipython_genutils=0.2.0=py_1 26 | - jinja2=2.11.2=pyh9f0ad1d_0 27 | - jsonschema=3.2.0=py_2 28 | - jupyter_client=6.1.7=py_0 29 | - jupyter_contrib_core=0.3.3=py_2 30 | - jupyter_contrib_nbextensions=0.5.1=py38h32f6830_1 31 | - jupyter_core=4.7.0=py38h578d9bd_0 32 | - jupyter_highlight_selected_word=0.2.0=py38h32f6830_1002 33 | - jupyter_latex_envs=1.4.6=py38h32f6830_1001 34 | - jupyter_nbextensions_configurator=0.4.1=py38h32f6830_2 35 | - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0 36 | - ld_impl_linux-64=2.33.1=h53a641e_7 37 | - libedit=3.1.20191231=h14c3975_1 38 | - libffi=3.3=he6710b0_2 39 | - libgcc-ng=9.1.0=hdf63c60_0 40 | - libsodium=1.0.18=h36c2ea0_1 41 | - libstdcxx-ng=9.1.0=hdf63c60_0 42 | - libxml2=2.9.10=hb55368b_3 43 | - libxslt=1.1.34=hc22bd24_0 44 | - lxml=4.6.2=py38h9120a33_0 45 | - markupsafe=1.1.1=py38h8df0ef7_2 46 | - mistune=0.8.4=py38h25fe258_1002 47 | - nbclient=0.5.1=py_0 48 | - nbformat=5.0.8=py_0 49 | - ncurses=6.2=he6710b0_1 50 | - nest-asyncio=1.4.3=pyhd8ed1ab_0 51 | - notebook=6.1.6=py38h578d9bd_0 52 | - openssl=1.1.1i=h27cfd23_0 53 | - packaging=20.8=pyhd3deb0d_0 54 | - pandoc=2.11.3.2=h7f98852_0 55 | - pexpect=4.8.0=pyh9f0ad1d_2 56 | - pickleshare=0.7.5=py_1003 57 | - pip=20.3.3=py38h06a4308_0 58 | - prometheus_client=0.9.0=pyhd3deb0d_0 59 | - prompt-toolkit=3.0.8=pyha770c72_0 60 | - ptyprocess=0.7.0=pyhd3deb0d_0 61 | - pycparser=2.20=pyh9f0ad1d_2 62 | - pygments=2.7.3=pyhd8ed1ab_0 63 | - pyparsing=2.4.7=pyh9f0ad1d_0 64 | - pyrsistent=0.17.3=py38h25fe258_1 65 | - python=3.8.5=h7579374_1 66 | - python-dateutil=2.8.1=py_0 67 | - python_abi=3.8=1_cp38 68 | - readline=8.0=h7b6447c_0 69 | - send2trash=1.5.0=py_0 70 | - setuptools=51.0.0=py38h06a4308_2 71 | - six=1.15.0=pyh9f0ad1d_0 72 | - sqlite=3.33.0=h62c20be_0 73 | - terminado=0.9.1=py38h32f6830_1 74 | - testpath=0.4.4=py_0 75 | - tk=8.6.10=hbc83047_0 76 | - tornado=6.1=py38h25fe258_0 77 | - traitlets=5.0.5=py_0 78 | - wcwidth=0.2.5=pyh9f0ad1d_2 79 | - wheel=0.36.2=pyhd3eb1b0_0 80 | - xz=5.2.5=h7b6447c_0 81 | - yaml=0.2.5=h516909a_0 82 | - zeromq=4.3.3=h58526e2_3 83 | - zipp=3.4.0=py_0 84 | - zlib=1.2.11=h7b6447c_3 85 | - pip: 86 | - absl-py==0.11.0 87 | - albumentations==0.5.2 88 | - blis==0.7.4 89 | - bravado==11.0.2 90 | - bravado-core==5.17.0 91 | - cachetools==4.2.0 92 | - captum==0.3.0 93 | - catalogue==1.0.0 94 | - catalyst==20.12 95 | - chardet==4.0.0 96 | - click==7.1.2 97 | - configparser==5.0.1 98 | - cycler==0.10.0 99 | - cymem==2.0.5 100 | - docker-pycreds==0.4.0 101 | - equation==1.2.1 102 | - fastcore==1.3.13 103 | - fastprogress==1.0.0 104 | - filelock==3.0.12 105 | - flask==1.1.2 106 | - fsspec==0.8.5 107 | - future==0.18.2 108 | - gitdb==4.0.5 109 | - gitpython==3.1.11 110 | - google-auth==1.24.0 111 | - google-auth-oauthlib==0.4.2 112 | - grpcio==1.34.0 113 | - idna==2.10 114 | - imageio==2.9.0 115 | - imgaug==0.4.0 116 | - ipython-genutils==0.2.0 117 | - ipywidgets==7.6.2 118 | - itsdangerous==1.1.0 119 | - jedi==0.17.2 120 | - joblib==1.0.0 121 | - jsonpointer==2.0 122 | - jsonref==0.2 123 | - jupyterlab-widgets==1.0.0 124 | - kiwisolver==1.3.1 125 | - kornia==0.4.1 126 | - markdown==3.3.3 127 | - matminer==0.6.4 128 | - matplotlib==3.3.3 129 | - monotonic==1.5 130 | - monty==4.0.2 131 | - mpmath==1.1.0 132 | - msgpack==1.0.2 133 | - murmurhash==1.0.5 134 | - nbconvert==5.6.1 135 | - nbdev==1.1.5 136 | - neptune-client==0.4.130 137 | - networkx==2.5 138 | - numpy==1.19.4 139 | - oauthlib==3.1.0 140 | - opencv-python==4.5.1.48 141 | - opencv-python-headless==4.5.1.48 142 | - palettable==3.3.0 143 | - pandas==1.2.0 144 | - pandocfilters==1.4.3 145 | - parso==0.7.1 146 | - pillow==8.0.1 147 | - pint==0.16.1 148 | - plac==1.1.3 149 | - plotly==4.14.1 150 | - preshed==3.0.5 151 | - promise==2.3 152 | - protobuf==3.14.0 153 | - psutil==5.8.0 154 | - py3nvml==0.2.6 155 | - pyarrow==2.0.0 156 | - pyasn1==0.4.8 157 | - pyasn1-modules==0.2.8 158 | - pydicom==2.1.2 159 | - pyjwt==1.7.1 160 | - pymatgen==2020.12.18 161 | - pymongo==3.11.2 162 | - pytorch-ignite==0.4.2 163 | - pytorch-lightning==1.1.2 164 | - pytz==2020.5 165 | - pywavelets==1.1.1 166 | - pyyaml==5.3.1 167 | - pyzmq==20.0.0 168 | - regex==2020.11.13 169 | - requests==2.25.1 170 | - requests-oauthlib==1.3.0 171 | - retrying==1.3.3 172 | - rfc3987==1.3.8 173 | - rsa==4.6 174 | - ruamel-yaml==0.16.12 175 | - ruamel-yaml-clib==0.2.2 176 | - sacremoses==0.0.43 177 | - scikit-image==0.18.1 178 | - scikit-learn==0.24.0 179 | - scipy==1.5.4 180 | - sentencepiece==0.1.94 181 | - sentry-sdk==0.19.5 182 | - shapely==1.7.1 183 | - shortuuid==1.0.1 184 | - simplejson==3.17.2 185 | - smmap==3.0.4 186 | - spacy==2.3.5 187 | - spglib==1.16.0 188 | - srsly==1.0.5 189 | - strict-rfc3339==0.7 190 | - subprocess32==3.5.4 191 | - swagger-spec-validator==2.7.3 192 | - sympy==1.7.1 193 | - tabulate==0.8.7 194 | - tensorboard==2.4.0 195 | - tensorboard-plugin-wit==1.7.0 196 | - tensorboardx==2.1 197 | - thinc==7.4.5 198 | - threadpoolctl==2.1.0 199 | - tifffile==2020.12.8 200 | - tokenizers==0.9.4 201 | - torch==1.7.1 202 | - torchvision==0.8.2 203 | - tqdm==4.55.0 204 | - transformers==4.1.1 205 | - typing-extensions==3.7.4.3 206 | - uncertainties==2.4.4 207 | - urllib3==1.26.2 208 | - wandb==0.10.12 209 | - wasabi==0.8.0 210 | - watchdog==1.0.2 211 | - webcolors==1.11.1 212 | - webencodings==0.5.1 213 | - websocket-client==0.57.0 214 | - werkzeug==1.0.1 215 | - widgetsnbextension==3.5.1 216 | - xmltodict==0.12.0 217 | prefix: ~/anaconda3/envs/cryspnet 218 | -------------------------------------------------------------------------------- /output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AuroraLHT/cryspnet/d0cf3c67ebb40ec6d8ca96f8dd6dbab73fc352d7/output/.gitkeep -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from cryspnet.utils import FeatureGenerator, load_input, dump_output, group_outputs, topkacc 2 | from cryspnet.models import load_Bravais_models, load_Lattice_models, load_SpaceGroup_models 3 | from cryspnet.config import * 4 | 5 | import argparse 6 | 7 | featurizer = FeatureGenerator() 8 | 9 | def main(): 10 | 11 | parser = argparse.ArgumentParser() 12 | 13 | ## Required parameters 14 | parser.add_argument("-i", "--input", default=None, type=str, required=True, 15 | help="The input data path. The program accept .csv, .xlsx file.") 16 | parser.add_argument("-o", "--output", default=None, type=str, required=True, 17 | help="The output directory where predictions for \ 18 | Bravais Lattice, Space Group, and Lattice will be written.") 19 | parser.add_argument("--use_metal", action='store_true', 20 | help="Whether to run prediction on the Bravais Lattice model that trained on metal subset.") 21 | parser.add_argument("--use_oxide", action='store_true', 22 | help="Whether to run prediction on the Bravais Lattice model that trained on oxide subset.") 23 | parser.add_argument("--n_ensembler", default=N_ESMBLER, type=int, 24 | help="number of ensembler for Bravais Lattice Prediction.") 25 | parser.add_argument("--topn_bravais", default=TOPN_BRAVAIS, type=int, 26 | help="The top-n Bravais Lattice the user want to pre \ 27 | serve. The space group and lattice parameter would \ 28 | be predicted for each top-n Bravais Lattice" 29 | ) 30 | parser.add_argument("--topn_spacegroup", default=TOPN_SPACEGROUP, type=int, 31 | help="The top-n Space Group the user want to pre \ 32 | serve." 33 | ) 34 | parser.add_argument("--batch_size", default=BATCHSIZE, type=int, 35 | help="Batch size per GPU/CPU for prediction.") 36 | parser.add_argument("--no_cuda", action='store_true', 37 | help="Avoid using CUDA when available") 38 | 39 | args = parser.parse_args() 40 | 41 | use_cpu = args.no_cuda 42 | 43 | if args.use_metal and args.use_oxide: 44 | raise Exception("Could only select --use_metal or --use_oxide") 45 | elif args.use_metal: 46 | which = "metal" 47 | elif args.use_oxide: 48 | which = "oxide" 49 | else: 50 | which = "whole" 51 | 52 | BE = load_Bravais_models( 53 | n_ensembler = args.n_ensembler, 54 | which = which, 55 | batch_size = args.batch_size, 56 | cpu=use_cpu 57 | ) 58 | LPB = load_Lattice_models(batch_size = args.batch_size, cpu=use_cpu) 59 | SGB = load_SpaceGroup_models(batch_size = args.batch_size, cpu=use_cpu) 60 | 61 | formula = load_input(args.input) 62 | ext_magpie = featurizer.generate(formula) 63 | 64 | bravais_probs, bravais = BE.predicts(ext_magpie, topn_bravais=args.topn_bravais) 65 | 66 | lattices = [] 67 | spacegroups = [] 68 | spacegroups_probs = [] 69 | 70 | for i in range(args.topn_bravais): 71 | ext_magpie["Bravais"] = bravais[:, i] 72 | lattices.append(LPB.predicts(ext_magpie)) 73 | sg_prob, sg = SGB.predicts(ext_magpie, topn_spacegroup=args.topn_spacegroup) 74 | spacegroups.append(sg) 75 | spacegroups_probs.append(sg_prob) 76 | 77 | out = group_outputs(bravais, bravais_probs, spacegroups, spacegroups_probs, lattices, formula) 78 | dump_output(out, args.output, index=False) 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /random_crystal.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | from pathlib import Path 4 | import time 5 | import re 6 | from multiprocessing import Pool, TimeoutError 7 | from functools import partial 8 | from tqdm import tqdm 9 | import logging 10 | 11 | import pandas as pd 12 | import numpy as np 13 | 14 | from pyxtal.structure import Xstruct 15 | from pyxtal.crystal import random_crystal, Lattice 16 | 17 | import pymatgen 18 | 19 | from typing import Dict, List, Union 20 | 21 | from cryspnet.utils import LATTICE_PARAM_ERROR, LATTICE_PARAM_MODELS_FOLDER, LEARNER 22 | 23 | DEFAULT_ERROR = str( Path(LEARNER) / Path(LATTICE_PARAM_MODELS_FOLDER) / Path(LATTICE_PARAM_ERROR) ) 24 | 25 | logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) 26 | 27 | def is_valid_crystal(rc:random_crystal): 28 | return (rc is not None) and rc.valid 29 | 30 | def sample_lattice(one:pd.Series, bra:str, trails:int, err_dict:Dict=None): 31 | """ 32 | Use error from the model's validation set to add variation in the predicted lattice 33 | If the err_dict is supplied then lattice parameter is simplity copied by "trails" times. 34 | 35 | Arguments: 36 | one : one row from the input dataframe 37 | bra : Bravais Lattice 38 | trails : number of lattice we want to produce for crystal generation 39 | err_dict : error from the lattice parameter model 40 | """ 41 | 42 | lt = bra.split(" ")[0] 43 | if err_dict is None: 44 | a,b,c,alpha,beta,gamma = one[['a','b', 'c', 'alpha', 'beta', 'gamma']] 45 | a,b,c,alpha,beta,gamma = float(a), float(b), float(c), float(alpha),float(beta),float(gamma) 46 | 47 | for i in range(trails): 48 | yield Lattice.from_para(a, b, c, alpha, beta, gamma, ltype=lt,) 49 | 50 | else: 51 | if "cubic" in bra: 52 | random_error = np.random.normal(err_dict[bra]['mean'], err_dict[bra]['std'], trails) 53 | 54 | min_random_err = err_dict[bra]['mean'] - 2*err_dict[bra]['std'] 55 | max_random_err = err_dict[bra]['mean'] + 2*err_dict[bra]['std'] 56 | 57 | else: 58 | random_error = np.random.multivariate_normal(err_dict[bra]['mean'], err_dict[bra]['std'], trails) 59 | 60 | min_random_err = err_dict[bra]['mean'] - 2*np.diagonal(err_dict[bra]['std']) 61 | max_random_err = err_dict[bra]['mean'] + 2*np.diagonal(err_dict[bra]['std']) 62 | 63 | random_error = np.clip(random_error, min_random_err, max_random_err) 64 | 65 | for i in range(trails): 66 | if "cubic" in bra: 67 | a = one['a'] / random_error[i] 68 | yield Lattice.from_para(a, a, a, 90, 90, 90, ltype=lt,) 69 | 70 | elif "tetragonal" in bra: 71 | a, c = one[['a', 'c']] / random_error[i] 72 | yield Lattice.from_para(a, a, c, 90, 90, 90, ltype=lt,) 73 | 74 | elif "orthorhombic" in bra: 75 | a, b, c = one[['a', 'b', 'c']] / random_error[i] 76 | yield Lattice.from_para(a, b, c, 90, 90, 90, ltype=lt,) 77 | 78 | elif "hexagonal" in bra: 79 | a, c = one[['a', 'c']] / random_error[i] 80 | yield Lattice.from_para(a, a, c, 90, 90, 120, ltype=lt,) 81 | 82 | elif "monoclinic" in bra: 83 | a, b, c, beta = one[['a', 'b', 'c', 'beta']] / random_error[i] 84 | yield Lattice.from_para(a, b, c, 90, beta, 90, ltype=lt,) 85 | 86 | elif "triclinic" in bra: 87 | a, b, c, alpha, beta, gamma = one[['a', 'b', 'c', 'alpha', 'beta', 'gamma']] / random_error[i] 88 | yield Lattice.from_para(a, b, c, alpha, beta, gamma, ltype=lt,) 89 | 90 | elif "rhombohedral" in bra: 91 | a, alpha= one[['a', 'alpha']] / random_error[i] 92 | yield Lattice.from_para(a, a, a, alpha, alpha, alpha, ltype=lt,) 93 | 94 | def is_stoi(formula:str): 95 | """ check if the formula contain fractional stoichiometric """ 96 | return "." not in formula 97 | 98 | def decomp(formula:str): 99 | """ parse formula into elements and stoichiometric """ 100 | groups = re.findall("([A-Za-z]{1,2})([0-9]+)", formula) 101 | 102 | elements, stois = list( zip(*groups) ) 103 | stois = list(map(int, stois)) 104 | return elements, stois 105 | 106 | def try_random_crystal(formula:str, sg:int, elements:List[str], stois:Union[List[int], np.ndarray], lattice:Lattice=None, vf:float=1.0, max_multi:int=5, max_atoms:int=50, start:int=-1): 107 | """ 108 | Try to generate a crystal from given space group, elements, stoichiometric, and lattice (optional) information. 109 | Due to mechanism of Pyxtal, the input stoichiometric might need to be multiplied by a integar. The max_multi set the 110 | maximum value the integar could be. max_atom set the maximum atoms a lattice could have. 111 | 112 | Arguments: 113 | formula : chemical formula 114 | sg : space group number 115 | elements : a list of elements symbol 116 | stois : a list or array of stoichoimetric values 117 | lattice : a Lattice object that contains lattice parameters and crystal system (optional) 118 | vf : volume factor (see random_crystal from Pyxtal) 119 | max_multi : maximum value of multiplicity would be tried on 120 | max_atoms : maximum value of atoms in a lattice 121 | start : multiplicity that explicitly given to work on, if this one is given then the method would not try other multiplicity 122 | """ 123 | 124 | def _try(formula:str, sg:int, elements:List[str], stois:Union[List[int], np.ndarray], lattice:Lattice, vf:float): 125 | try: 126 | # logging.debug( f"{formula} {sg} {elements} {stois} {lattice} {vf}") 127 | crystal = random_crystal(sg, elements, stois, vf, lattice=lattice) 128 | logging.debug(f"_try Is crystal valid {is_valid_crystal(crystal)}" ) 129 | except Exception as e: 130 | logging.error(f"During random crystal generation: \n {formula} {sg} {elements} {stois} \n Error Message: {e}") 131 | crystal = None 132 | finally: 133 | return crystal 134 | 135 | elements = list(elements) 136 | stois = np.array(stois) 137 | 138 | if start==-1: 139 | # try multiply the input stoichiometric from 1 to max_multiplicity to see if the wyckoff postion is compatible 140 | for multi in range(1, max_multi+1): 141 | if max_atoms >= np.sum(stois) * multi: 142 | crystal= _try(formula, int(sg), list(elements), list(stois*multi), lattice, vf) 143 | logging.debug(f"_try Is crystal valid {is_valid_crystal(crystal)}" ) 144 | if is_valid_crystal(crystal): return crystal, multi 145 | return None, -1 146 | else: 147 | crystal=_try(formula, sg, list(elements), list(stois*start), lattice, vf) 148 | if is_valid_crystal(crystal) : return crystal, start 149 | return None, -1 150 | 151 | def get_max_topn_bravais(df:pd.DataFrame): 152 | """get the number of bravais lattice stored in the dataframe""" 153 | 154 | l1 = list(df.columns.get_level_values(0)) 155 | l1 = [ int(c.split(" ")[0].split("-")[1]) for c in l1 if "Bravais" in c] 156 | return max(l1) 157 | 158 | def get_max_topn_spacegroup(df:pd.DataFrame): 159 | """get the number of space group stored in the dataframe""" 160 | 161 | l2 = list(df.columns.get_level_values(1)) 162 | l2 = [ int(c.split(" ")[0].split("-")[1]) for c in l2 if "SpaceGroup" in c] 163 | return max(l2) 164 | 165 | def save_random_crystal(rc:random_crystal, path:str): 166 | """ save the generated crystal to cif format""" 167 | 168 | rc.to_file(fmt='cif', filename=path) 169 | logging.debug(f"Save rc to --> {path}") 170 | 171 | def process(one:pd.Series, output:Path, n_trails:int, topn_bravais:int, topn_spacegroup:int, max_atoms:int, err_dict:dict): 172 | """ 173 | Generate crystals from a given row of the input DataFrame (produced by CRYSPNet). 174 | The number of generated crystals is control by "n_trails". 175 | If error is given then lattice would be varied for each trails. 176 | The generated crystal would be save in folder "output" with the format "formula_spacegroup_trails.cif" 177 | 178 | Arguments: 179 | one : a row from CRYSPNet prediction 180 | output : the folder the generated crystals to be saved at 181 | n_trails : maximum number of crystals that the process would generated 182 | topn_bravais : select prediction from 1 to top-n bravais as input 183 | topn_spacegroup : select prediction from 1 to top-n space group as input 184 | max_atoms : maximum amount of atoms in a lattice 185 | err_dict : a dictionary of error term of the lattice parameters model (from CRYSPNet) 186 | """ 187 | 188 | start_t = time.time() 189 | 190 | formulas = [] 191 | paths = [] 192 | formula = one['formula']['-'] 193 | elements, stois = decomp(formula) 194 | 195 | for topn_b in range(1, topn_bravais+1): 196 | bra = one[f'Top-{topn_b} Bravais']['Bravais'] 197 | for topn_sg in range(1, topn_spacegroup+1): 198 | mul = -1 199 | 200 | sg = int(one[f'Top-{topn_b} Bravais'][f'Top-{topn_sg} SpaceGroup']) 201 | # sg = int(one[f'Top-{topn_b} Bravais'][f'Top-{topn_b} SpaceGroup']) # don't delete this, it is a reminder of a historic bug caused by copy and paste, please check ! 202 | # importance of logging each step 203 | 204 | for trail, lattice in enumerate(sample_lattice(one[f'Top-{topn_b} Bravais'], bra, n_trails, err_dict=err_dict)): 205 | rc, mul = try_random_crystal(formula, sg, elements, stois, lattice=lattice, start=mul, max_atoms=max_atoms) 206 | 207 | logging.debug(f"Process is_valid_crystal {is_valid_crystal(rc)}") 208 | if is_valid_crystal(rc): 209 | path = output/f"{formula}_{sg}_{trail}.cif" 210 | save_random_crystal(rc, path) 211 | 212 | formulas.append(formula) 213 | paths.append(path) 214 | else: 215 | logging.info(f"{formula} maximum trail exceed at trail {trail} break") 216 | break 217 | 218 | end_t = time.time() 219 | logging.info(f"Finished {formula} in {end_t - start_t:.1f}s") 220 | return formulas, paths 221 | 222 | def process_space_group_only(one:pd.Series, output:Path, n_trails:int, topn_bravais:int, topn_spacegroup:int, max_atoms:int): 223 | """ 224 | Generate crystals from a given row of the input DataFrame (produced by CRYSPNet). 225 | Lattice parameter information is not used in this method. 226 | The number of generated crystals is control by "n_trails". 227 | If error is given then lattice would be varied for each trails. 228 | The generated crystal would be save in folder "output" with the format "formula_spacegroup_trails.cif" 229 | 230 | Arguments: 231 | one : a row from CRYSPNet prediction 232 | output : the folder the generated crystals to be saved at 233 | n_trails : maximum number of crystals that the process would generated 234 | topn_bravais : select prediction from 1 to top-n bravais as input 235 | topn_spacegroup : select prediction from 1 to top-n space group as input 236 | max_atoms : maximum amount of atoms in a lattice 237 | """ 238 | 239 | 240 | formulas = [] 241 | paths = [] 242 | formula = one['formula']['-'] 243 | elements, stois = decomp(formula) 244 | 245 | for topn_b in range(1, topn_bravais+1): 246 | bra = one[f'Top-{topn_b} Bravais']['Bravais'].split(" ")[0] 247 | for topn_sg in range(1, topn_spacegroup+1): 248 | mul = -1 249 | sg = int(one[f'Top-{topn_b} Bravais'][f'Top-{topn_sg} SpaceGroup']) 250 | 251 | for trail in range(n_trails): 252 | rc, mul = try_random_crystal(sg, elements, stois, lattice=None, start=mul, max_atoms=max_atoms) 253 | if is_valid_crystal(rc): break 254 | path = output/f"{formula}_{sg}_{trail}.cif" 255 | save_random_crystal(rc, path) 256 | 257 | formulas.append(formula) 258 | paths.append(path) 259 | 260 | logging.info(f"finished {formula}") 261 | return formulas, paths 262 | 263 | def process_formula_only(one:pd.Series, output:Path, n_trails:int, max_atoms:int): 264 | """ 265 | Generate crystals from a given row of the input DataFrame (any DataFrame has the same format as CRYSPNet). 266 | The number of generated crystals is control by "n_trails". 267 | If error is given then lattice would be varied for each trails. 268 | The generated crystal would be save in folder "output" with the format "formula_spacegroup_trails.cif" 269 | 270 | Arguments: 271 | one : a row from CRYSPNet prediction 272 | output : the folder the generated crystals to be saved at 273 | n_trails : maximum number of crystals that the process would generated 274 | max_atoms : maximum amount of atoms in a lattice 275 | """ 276 | formulas = [] 277 | paths = [] 278 | 279 | formula = one['formula']['-'] 280 | elements, stois = decomp(formula) 281 | 282 | for sg in range(1, 230+1): 283 | mul = -1 284 | 285 | for trail in range(n_trails): 286 | rc, mul = try_random_crystal(sg, elements, stois, lattice=None, start=mul) 287 | if is_valid_crystal(rc): 288 | path = output/f"{formula}_{sg}_{trail}.cif" 289 | save_random_crystal(rc, path) 290 | 291 | formulas.append(formula) 292 | paths.append(path) 293 | else: 294 | logging.info(f"{formula} maximum trail exceed at trail {trail} break") 295 | 296 | logging.info(f"finished {formula}") 297 | return formulas, paths 298 | 299 | def main(): 300 | parser = argparse.ArgumentParser() 301 | 302 | ## Required parameters 303 | parser.add_argument("-i", "--input", default=None, type=str, required=True, 304 | help="The input data path. The program accept .csv, .xlsx file." 305 | ) 306 | parser.add_argument("-e", "--error", default=DEFAULT_ERROR, type=str, required=False, 307 | help="The error associated with the prediction" 308 | ) 309 | parser.add_argument("-o", "--output", default=None, type=str, required=True, 310 | help="The output directory where predictions for \ 311 | Bravais Lattice, Space Group, and Lattice will be written." 312 | ) 313 | parser.add_argument("--topn_bravais", default=2, type=int, 314 | help="The top-n Bravais Lattice the user want to pre \ 315 | serve. The space group and lattice parameter would \ 316 | be predicted for each top-n Bravais Lattice" 317 | ) 318 | parser.add_argument("--topn_spacegroup", default=1, type=int, 319 | help="The top-n Space Group the user want to pre \ 320 | serve." 321 | ) 322 | parser.add_argument("--n_workers", default=4, type=int, 323 | help="Number of workers used to generate random crystal" 324 | ) 325 | parser.add_argument("--n_trails", default=100, type=int, 326 | help="Number of trails for a given composition, space group, and lattice parameter" 327 | ) 328 | parser.add_argument("--timeout", default=100, type=int, 329 | help="You ultimate patient level in the unit of second! some entries would just run forever so we have to do discard it" 330 | ) 331 | parser.add_argument("--formula_only", action='store_true', 332 | help="Use Only formula as information to generate structure" 333 | ) 334 | parser.add_argument("--space_group_only", action='store_true', 335 | help="Use Only the space group information but not lattice parameter to generate structure" 336 | ) 337 | parser.add_argument("--max_atoms", default=50, type=int, 338 | help="the maximum number of atoms per unit cell, setted to avoid generating superlarge unit cell that slow down the calculation" 339 | ) 340 | 341 | args = parser.parse_args() 342 | 343 | if args.error is not None and Path(args.error).exists(): 344 | with open(args.error, "rb") as f: 345 | err_dict = pickle.load(f) 346 | logging.info(f"use error from {args.error}") 347 | else: 348 | logging.info("do not use error") 349 | err_dict = None 350 | 351 | csv = pd.read_csv(args.input, header=[0,1]) 352 | stoi_entries = csv.loc[ csv['formula']['-'].map(is_stoi) ] 353 | 354 | output = Path(args.output) 355 | output.mkdir(exist_ok=True) 356 | 357 | topn_bravais = min(args.topn_bravais, get_max_topn_bravais(stoi_entries)) 358 | topn_spacegroup = min(args.topn_spacegroup, get_max_topn_spacegroup(stoi_entries)) 359 | 360 | if args.n_workers <= 0: 361 | raise Exception("argument number of worker is less than 1") 362 | elif args.n_workers == 1: 363 | logging.info(f"use single process") 364 | for i, row in tqdm(stoi_entries.iterrows(), total=len(stoi_entries)): 365 | if args.formula_only: 366 | f, p = process_formula_only(row, output=output, n_trails=args.n_trails, max_atoms=args.max_atoms) 367 | elif args.space_group_only: 368 | f, p = process_space_group_only(row, output=output, n_trails=args.n_trails, topn_bravais=topn_bravais, topn_spacegroup=topn_spacegroup, max_atoms=args.max_atoms) 369 | else: 370 | f, p = process(row, output=output, n_trails=args.n_trails, topn_bravais=topn_bravais, topn_spacegroup=topn_spacegroup, max_atoms=args.max_atoms, err_dict=err_dict) 371 | else: 372 | logging.info(f"use multiprocess with {args.n_workers} worders") 373 | if args.formula_only: 374 | f = partial(process_formula_only, output=output, n_trails=args.n_trails, max_atoms=args.max_atoms) 375 | elif args.space_group_only: 376 | f = partial(process_space_group_only, output=output, n_trails=args.n_trails, topn_bravais=topn_bravais, topn_spacegroup=topn_spacegroup, max_atoms=args.max_atoms) 377 | else: 378 | f = partial(process, output=output, n_trails=args.n_trails, topn_bravais=topn_bravais, topn_spacegroup=topn_spacegroup, max_atoms=args.max_atoms, err_dict=err_dict) 379 | 380 | with Pool(processes=args.n_workers) as pool: 381 | async_list = [] 382 | formula_list = [] 383 | for i, row in stoi_entries.iterrows(): 384 | formula = row['formula']['-'] 385 | formula_list.append(formula) 386 | res = pool.apply_async(f, (row,)) # this (row, ) could be further changed by chunksize 387 | async_list.append(res) 388 | 389 | for res, formula in zip(async_list, formula_list): 390 | try: 391 | fs, ps = res.get(timeout=args.timeout) 392 | except TimeoutError: 393 | logging.info(f"{formula} timeout in {args.timeout}s!") 394 | except Exception as e: 395 | logging.error(f"Other Error encounter {e}") 396 | finally: 397 | pass 398 | 399 | all_cifs = list(output.glob("*.cif")) 400 | formulas = list(map(lambda x: x.name.split('_')[0], all_cifs)) 401 | 402 | pd.DataFrame({ 403 | "formula" : formulas, 404 | "path" : list(map(lambda x: x.name, all_cifs)), 405 | }).to_csv(str(output/"index.csv"), index=False) 406 | 407 | logging.info("Index file is saved to --> {}".format(str(output/"index.csv"))) 408 | 409 | if __name__ == "__main__": 410 | main() 411 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16.4 2 | pandas>=0.24.2 3 | torch>=1.6.0 4 | torchvision>=0.7.0 5 | fastai==2.5.2 6 | plotly>=4.2.0 7 | regex>=2018.01.10 8 | matminer==0.6.5 9 | pymatgen==2021.2.16 10 | tqdm>=4.32.2 11 | Equation>=1.2.1 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup 4 | from setuptools import find_packages 5 | 6 | 7 | with open('README.md') as f: 8 | readme = f.read() 9 | 10 | with open('requirement.txt') as f: 11 | requirements = f.readlines() 12 | 13 | setup( 14 | name='cryspnet', 15 | version='0.1', 16 | description='CRYSPNet: Crystal Structure Predictions via Neural Network. Liang et al. (2020).', 17 | long_description=readme, 18 | author='H Liang, V Stanev, A. G Kusne, and I Takeuchi', 19 | author_email='auroralht@gmail.com, vstanev@umd.edu, aaron.kusne@nist.gov, takeuchi@umd.edu', 20 | packages=find_packages(), 21 | install_requires=[ 22 | requirements, 23 | ], 24 | include_package_data=True, 25 | 26 | classifiers=['Programming Language :: Python :: 3.6', 27 | 'Development Status :: 4 - Beta', 28 | 'Intended Audience :: Science/Research', 29 | 'Intended Audience :: System Administrators', 30 | 'Intended Audience :: Information Technology', 31 | 'Operating System :: OS Independent', 32 | 'Topic :: Other/Nonlisted Topic', 33 | 'Topic :: Scientific/Engineering'], 34 | ) 35 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import hashlib 4 | import gzip 5 | import errno 6 | import tarfile 7 | import zipfile 8 | 9 | import tqdm 10 | 11 | 12 | def gen_bar_updater(): 13 | pbar = tqdm(total=None) 14 | 15 | def bar_update(count, block_size, total_size): 16 | if pbar.total is None and total_size: 17 | pbar.total = total_size 18 | progress_bytes = count * block_size 19 | pbar.update(progress_bytes - pbar.n) 20 | 21 | return bar_update 22 | 23 | 24 | def calculate_md5(fpath, chunk_size=1024 * 1024): 25 | md5 = hashlib.md5() 26 | with open(fpath, 'rb') as f: 27 | for chunk in iter(lambda: f.read(chunk_size), b''): 28 | md5.update(chunk) 29 | return md5.hexdigest() 30 | 31 | 32 | def check_md5(fpath, md5, **kwargs): 33 | return md5 == calculate_md5(fpath, **kwargs) 34 | 35 | 36 | def check_integrity(fpath, md5=None): 37 | if not os.path.isfile(fpath): 38 | return False 39 | if md5 is None: 40 | return True 41 | return check_md5(fpath, md5) 42 | 43 | 44 | def makedir_exist_ok(dirpath): 45 | """ 46 | Python2 support for os.makedirs(.., exist_ok=True) 47 | """ 48 | try: 49 | os.makedirs(dirpath) 50 | except OSError as e: 51 | if e.errno == errno.EEXIST: 52 | pass 53 | else: 54 | raise 55 | 56 | 57 | def download_url(url, root, filename=None, md5=None): 58 | """Download a file from a url and place it in root. 59 | Args: 60 | url (str): URL to download file from 61 | root (str): Directory to place downloaded file in 62 | filename (str, optional): Name to save the file under. If None, use the basename of the URL 63 | md5 (str, optional): MD5 checksum of the download. If None, do not check 64 | """ 65 | from six.moves import urllib 66 | 67 | root = os.path.expanduser(root) 68 | if not filename: 69 | filename = os.path.basename(url) 70 | fpath = os.path.join(root, filename) 71 | 72 | makedir_exist_ok(root) 73 | 74 | # check if file is already present locally 75 | if check_integrity(fpath, md5): 76 | print('Using downloaded and verified file: ' + fpath) 77 | else: # download the file 78 | try: 79 | print('Downloading ' + url + ' to ' + fpath) 80 | urllib.request.urlretrieve( 81 | url, fpath, 82 | reporthook=gen_bar_updater() 83 | ) 84 | except (urllib.error.URLError, IOError) as e: 85 | if url[:5] == 'https': 86 | url = url.replace('https:', 'http:') 87 | print('Failed download. Trying https -> http instead.' 88 | ' Downloading ' + url + ' to ' + fpath) 89 | urllib.request.urlretrieve( 90 | url, fpath, 91 | reporthook=gen_bar_updater() 92 | ) 93 | else: 94 | raise e 95 | # check integrity of downloaded file 96 | if not check_integrity(fpath, md5): 97 | raise RuntimeError("File not found or corrupted.") 98 | 99 | 100 | def list_dir(root, prefix=False): 101 | """List all directories at a given root 102 | Args: 103 | root (str): Path to directory whose folders need to be listed 104 | prefix (bool, optional): If true, prepends the path to each result, otherwise 105 | only returns the name of the directories found 106 | """ 107 | root = os.path.expanduser(root) 108 | directories = list( 109 | filter( 110 | lambda p: os.path.isdir(os.path.join(root, p)), 111 | os.listdir(root) 112 | ) 113 | ) 114 | 115 | if prefix is True: 116 | directories = [os.path.join(root, d) for d in directories] 117 | 118 | return directories 119 | 120 | 121 | def list_files(root, suffix, prefix=False): 122 | """List all files ending with a suffix at a given root 123 | Args: 124 | root (str): Path to directory whose folders need to be listed 125 | suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png'). 126 | It uses the Python "str.endswith" method and is passed directly 127 | prefix (bool, optional): If true, prepends the path to each result, otherwise 128 | only returns the name of the files found 129 | """ 130 | root = os.path.expanduser(root) 131 | files = list( 132 | filter( 133 | lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix), 134 | os.listdir(root) 135 | ) 136 | ) 137 | 138 | if prefix is True: 139 | files = [os.path.join(root, d) for d in files] 140 | 141 | return files 142 | 143 | 144 | def download_file_from_google_drive(file_id, root, filename=None, md5=None): 145 | """Download a Google Drive file from and place it in root. 146 | Args: 147 | file_id (str): id of file to be downloaded 148 | root (str): Directory to place downloaded file in 149 | filename (str, optional): Name to save the file under. If None, use the id of the file. 150 | md5 (str, optional): MD5 checksum of the download. If None, do not check 151 | """ 152 | # Based on https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url 153 | import requests 154 | url = "https://docs.google.com/uc?export=download" 155 | 156 | root = os.path.expanduser(root) 157 | if not filename: 158 | filename = file_id 159 | fpath = os.path.join(root, filename) 160 | 161 | makedir_exist_ok(root) 162 | 163 | if os.path.isfile(fpath) and check_integrity(fpath, md5): 164 | print('Using downloaded and verified file: ' + fpath) 165 | else: 166 | session = requests.Session() 167 | 168 | response = session.get(url, params={'id': file_id}, stream=True) 169 | token = _get_confirm_token(response) 170 | 171 | if token: 172 | params = {'id': file_id, 'confirm': token} 173 | response = session.get(url, params=params, stream=True) 174 | 175 | _save_response_content(response, fpath) 176 | 177 | 178 | def _get_confirm_token(response): 179 | for key, value in response.cookies.items(): 180 | if key.startswith('download_warning'): 181 | return value 182 | 183 | return None 184 | 185 | 186 | def _save_response_content(response, destination, chunk_size=32768): 187 | with open(destination, "wb") as f: 188 | pbar = tqdm(total=None) 189 | progress = 0 190 | for chunk in response.iter_content(chunk_size): 191 | if chunk: # filter out keep-alive new chunks 192 | f.write(chunk) 193 | progress += len(chunk) 194 | pbar.update(progress - pbar.n) 195 | pbar.close() 196 | 197 | 198 | def _is_tarxz(filename): 199 | return filename.endswith(".tar.xz") 200 | 201 | 202 | def _is_tar(filename): 203 | return filename.endswith(".tar") 204 | 205 | 206 | def _is_targz(filename): 207 | return filename.endswith(".tar.gz") 208 | 209 | 210 | def _is_gzip(filename): 211 | return filename.endswith(".gz") and not filename.endswith(".tar.gz") 212 | 213 | 214 | def _is_zip(filename): 215 | return filename.endswith(".zip") 216 | 217 | 218 | def extract_archive(from_path, to_path=None, remove_finished=False): 219 | if to_path is None: 220 | to_path = os.path.dirname(from_path) 221 | 222 | if _is_tar(from_path): 223 | with tarfile.open(from_path, 'r') as tar: 224 | tar.extractall(path=to_path) 225 | elif _is_targz(from_path): 226 | with tarfile.open(from_path, 'r:gz') as tar: 227 | tar.extractall(path=to_path) 228 | elif _is_tarxz(from_path) and PY3: 229 | # .tar.xz archive only supported in Python 3.x 230 | with tarfile.open(from_path, 'r:xz') as tar: 231 | tar.extractall(path=to_path) 232 | elif _is_gzip(from_path): 233 | to_path = os.path.join(to_path, os.path.splitext(os.path.basename(from_path))[0]) 234 | with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f: 235 | out_f.write(zip_f.read()) 236 | elif _is_zip(from_path): 237 | with zipfile.ZipFile(from_path, 'r') as z: 238 | z.extractall(to_path) 239 | else: 240 | raise ValueError("Extraction of {} not supported".format(from_path)) 241 | 242 | if remove_finished: 243 | os.remove(from_path) 244 | 245 | 246 | def download_and_extract_archive(url, download_root, extract_root=None, filename=None, 247 | md5=None, remove_finished=False): 248 | download_root = os.path.expanduser(download_root) 249 | if extract_root is None: 250 | extract_root = download_root 251 | if not filename: 252 | filename = os.path.basename(url) 253 | 254 | download_url(url, download_root, filename, md5) 255 | 256 | archive = os.path.join(download_root, filename) 257 | print("Extracting {} to {}".format(archive, extract_root)) 258 | extract_archive(archive, extract_root, remove_finished) 259 | 260 | 261 | def iterable_to_str(iterable): 262 | return "'" + "', '".join([str(item) for item in iterable]) + "'" 263 | 264 | 265 | def verify_str_arg(value, arg=None, valid_values=None, custom_msg=None): 266 | if not isinstance(value, torch._six.string_classes): 267 | if arg is None: 268 | msg = "Expected type str, but got type {type}." 269 | else: 270 | msg = "Expected type str for argument {arg}, but got type {type}." 271 | msg = msg.format(type=type(value), arg=arg) 272 | raise ValueError(msg) 273 | 274 | if valid_values is None: 275 | return value 276 | 277 | if value not in valid_values: 278 | if custom_msg is not None: 279 | msg = custom_msg 280 | else: 281 | msg = ("Unknown value '{value}' for argument {arg}. " 282 | "Valid values are {{{valid_values}}}.") 283 | msg = msg.format(value=value, arg=arg, 284 | valid_values=iterable_to_str(valid_values)) 285 | raise ValueError(msg) 286 | 287 | return value --------------------------------------------------------------------------------