├── .github ├── dependabot.yml └── workflows │ └── lint.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── LICENSE ├── Procfile ├── README.md ├── app.py ├── assets ├── Generating MatPES Compatible Training Data for Fine-tuning of Models.md ├── MatPES_workflow.png ├── Training a MatPES model.md ├── Using Pre-Trained Models with MatCalc.md ├── favicon.ico ├── logo.svg ├── matpes.ai.png ├── matpes.ai.svg └── matpes.css ├── dev └── Generate Stats Files.ipynb ├── docs ├── apidoc │ ├── conf.py │ ├── index.rst │ ├── matpes.rst │ └── modules.rst ├── changes.md ├── index.md ├── matpes.html └── modules.html ├── notebooks ├── Generating MatPES Compatible Training Data for Fine-tuning of Models.ipynb ├── Training a MatPES model.ipynb └── Using Pre-Trained Models with Matcalc.ipynb ├── pages ├── about.py ├── benchmarks.py ├── dataset.py ├── explorer.py ├── home.py ├── matcalc-benchmark-pbe.csv ├── matcalc-benchmark-r2scan.csv ├── pbe_stats.json ├── r2scan_stats.json ├── references.py ├── tasks.csv ├── tutorials.py └── utils.py ├── pyproject.toml ├── requirements.txt ├── src └── matpes │ ├── __init__.py │ ├── cli.py │ ├── data.py │ ├── db.py │ └── py.typed ├── tasks.py └── uv.lock /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "7:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 4 10 | matrix: 11 | python-version: ["3.10"] 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Install uv with python version. 16 | uses: astral-sh/setup-uv@v5 17 | with: 18 | enable-cache: true 19 | cache-dependency-glob: "uv.lock" 20 | - name: Install dependencies 21 | run: | 22 | uv sync --group lint 23 | 24 | - name: ruff 25 | run: | 26 | uv run ruff --version 27 | uv run ruff check src 28 | uv run ruff format src --check 29 | 30 | - name: mypy 31 | run: | 32 | uv run mypy --version 33 | uv run rm -rf .mypy_cache 34 | uv run mypy -p matpes 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | venv/ 13 | ENV/ 14 | env.bak/ 15 | venv.bak/ 16 | *.egg 17 | *.egg-info/ 18 | dist/ 19 | build/ 20 | *.wheel 21 | 22 | # PyInstaller 23 | # Usually these files are written by a python script from a template 24 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 25 | *.manifest 26 | *.spec 27 | 28 | # Installer logs 29 | pip-log.txt 30 | pip-delete-this-directory.txt 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .nox/ 36 | .coverage 37 | *.cover 38 | *.py,cover 39 | .hypothesis/ 40 | pytest_cache/ 41 | cover/ 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | local_settings.py 50 | db.sqlite3 51 | db.sqlite3-journal 52 | 53 | # Flask stuff: 54 | instance/ 55 | .webassets-cache 56 | 57 | # Scrapy stuff: 58 | .scrapy 59 | 60 | # Sphinx documentation 61 | docs/_build/ 62 | 63 | # PyBuilder 64 | target/ 65 | 66 | # Jupyter Notebook 67 | .ipynb_checkpoints 68 | 69 | # IPython 70 | profile_default/ 71 | ipython_config.py 72 | 73 | 74 | # Celery 75 | celerybeat-schedule 76 | *.celerybeat-schedule 77 | 78 | # SageMath 79 | *.sage.py 80 | 81 | # Environments 82 | .env 83 | .env.* 84 | .spacy 85 | *.envrc 86 | 87 | # mypy 88 | .mypy_cache/ 89 | .dmypy.json 90 | dmypy.json 91 | 92 | # Pyre 93 | .pyre/ 94 | 95 | # VS Code 96 | .vscode/ 97 | 98 | # IDEA 99 | .idea/ 100 | 101 | # MacOS 102 | .DS_Store 103 | 104 | # Logs 105 | *.log 106 | 107 | # Temporary files 108 | *.tmp 109 | *.temp 110 | *.bak 111 | *.swp 112 | *.swo 113 | 114 | # Windows 115 | Thumbs.db 116 | ehthumbs.db 117 | # Environments 118 | .env 119 | .venv 120 | env/ 121 | venv/ 122 | ENV/ 123 | env.bak/ 124 | venv.bak/ 125 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: ^(docs|tests/files|tasks.py) 2 | 3 | ci: 4 | autoupdate_schedule: monthly 5 | skip: [mypy, pyright] 6 | autofix_commit_msg: pre-commit auto-fixes 7 | autoupdate_commit_msg: pre-commit autoupdate 8 | 9 | repos: 10 | - repo: https://github.com/astral-sh/ruff-pre-commit 11 | rev: v0.11.8 12 | hooks: 13 | - id: ruff 14 | args: [--fix, --unsafe-fixes] 15 | - id: ruff-format 16 | 17 | - repo: https://github.com/pre-commit/pre-commit-hooks 18 | rev: v5.0.0 19 | hooks: 20 | - id: check-yaml 21 | - id: end-of-file-fixer 22 | - id: trailing-whitespace 23 | 24 | - repo: https://github.com/pre-commit/mirrors-mypy 25 | rev: v1.15.0 26 | hooks: 27 | - id: mypy 28 | 29 | - repo: https://github.com/codespell-project/codespell 30 | rev: v2.4.1 31 | hooks: 32 | - id: codespell 33 | stages: [pre-commit, commit-msg] 34 | exclude_types: [html] 35 | additional_dependencies: [tomli] # needed to read pyproject.toml below py3.11 36 | exclude: src/pymatgen/analysis/aflow_prototypes.json 37 | 38 | - repo: https://github.com/adamchainz/blacken-docs 39 | rev: 1.19.1 40 | hooks: 41 | - id: blacken-docs 42 | 43 | - repo: https://github.com/igorshubovych/markdownlint-cli 44 | rev: v0.44.0 45 | hooks: 46 | - id: markdownlint 47 | # MD013: line too long 48 | # MD024: Multiple headings with the same content 49 | # MD033: no inline HTML 50 | # MD041: first line in a file should be a top-level heading 51 | # MD025: single title 52 | args: [--disable, MD013, MD024, MD025, MD033, MD041, "--"] 53 | 54 | - repo: https://github.com/kynan/nbstripout 55 | rev: 0.8.1 56 | hooks: 57 | - id: nbstripout 58 | args: [--drop-empty-cells, --keep-output] 59 | 60 | - repo: https://github.com/RobertCraigie/pyright-python 61 | rev: v1.1.400 62 | hooks: 63 | - id: pyright 64 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10.16 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Materials Virtual Lab 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:server 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![GitHub license](https://img.shields.io/github/license/materialsvirtuallab/matpes)](https://github.com/materialsvirtuallab/matpes/blob/main/LICENSE) 2 | [![Linting](https://github.com/materialsvirtuallab/matpes/workflows/Linting/badge.svg)](https://github.com/materialsvirtuallab/matpes/workflows/Linting/badge.svg) 3 | 4 | ### Aims 5 | 6 | Potential energy surface datasets with near-complete coverage of the periodic table are used to train foundation 7 | potentials (FPs), i.e., machine learning interatomic potentials (MLIPs) with near-complete coverage of the periodic 8 | table. MatPES is an initiative by the [Materials Virtual Lab] and the [Materials Project] to address 9 | [critical deficiencies](http://matpes.ai/about) in such PES datasets for materials. 10 | 11 | 1. **Accuracy.** MatPES is computed using static DFT calculations with stringent converegence criteria. 12 | Please refer to the `MatPESStaticSet` in [pymatgen] for details. 13 | 2. **Comprehensiveness.** MatPES structures are sampled using a 2-stage version of DImensionality-Reduced 14 | Encoded Clusters with sTratified ([DIRECT]) sampling from a greatly expanded configuration of MD structures. 15 | 3. **Quality.** MatPES includes computed data from the PBE functional, as well as the high fidelity r2SCAN meta-GGA 16 | functional with improved description across diverse bonding and chemistries. 17 | 18 | The initial v2025.1 release comprises ~400,000 structures from 300K MD simulations. This dataset is much smaller 19 | than other PES datasets in the literature and yet achieves comparable or, in some cases, 20 | [improved performance and reliability](http://matpes.ai/benchmarks) on trained FPs. 21 | 22 | MatPES is part of the MatML ecosystem, which includes the [MatGL] (Materials Graph Library) and [maml] (MAterials 23 | Machine Learning) packages, the [MatPES] (Materials Potential Energy Surface) dataset, and the [MatCalc] (Materials 24 | Calculator). 25 | 26 | ### Getting the DataSet 27 | 28 | #### Hugging Face 29 | 30 | The MatPES dataset is available on [Hugging Face](https://huggingface.co/datasets/mavrl/matpes). You can use the 31 | `datasets` package to download it. 32 | 33 | ```python 34 | from datasets import load_dataset 35 | 36 | load_dataset("mavrl/matpes", "pbe") 37 | 38 | load_dataset("mavrl/matpes", "r2scan") 39 | ``` 40 | 41 | #### MatPES Package 42 | 43 | The `matpes` python package, which provides tools for working with the MatPES datasets, can be installed via pip: 44 | 45 | ```shell 46 | pip install matpes 47 | ``` 48 | 49 | Some command line usage examples: 50 | 51 | ```shell 52 | # Download the PBE dataset to the current directory 53 | matpes download pbe 54 | 55 | # You should see a MatPES-PBE-20240214.json.gz file in your directory. 56 | 57 | # Extract all entries in the Fe-O chemical system 58 | matpes data -i MatPES-PBE-20240214.json.gz --chemsys Fe-O -o Fe-O.json.gz 59 | ``` 60 | 61 | The `matpes.db` module provides functionality to create your own MongoDB database with the MatPES downloaded data, 62 | which is extremely useful if you are going to be working with the data (e.g., querying, adding entries, etc.) a lot. 63 | 64 | ### MatPES-trained Models 65 | 66 | We have released a set of MatPES-trained foundation potentials (FPs) in the [M3GNet], [CHGNet], [TensorNet] 67 | architectures in the [MatGL] package. For example, you can load the TensorNet FP trained on MatPES PBE 2025.1 as 68 | follows: 69 | 70 | ```python 71 | import matgl 72 | 73 | potential = matgl.load_model("TensorNet-MatPES-PBE-v2025.1-PES") 74 | ``` 75 | 76 | The naming of the models follow the format `---PES`. 77 | 78 | These FPs can be used easily with the [MatCalc] package to rapidly compute properties. For example: 79 | 80 | ```python 81 | from matcalc.elasticity import ElasticityCalc 82 | from matgl.ext.ase import PESCalculator 83 | 84 | ase_calc = PESCalculator(potential) 85 | calculator = ElasticityCalc(ase_calc) 86 | calculator.calc(structure) 87 | ``` 88 | 89 | ### Tutorials 90 | 91 | We have provided [Jupyter notebooks](http://matpes.ai/tutorials) demonstrating how to load the MatPES dataset, train a model and 92 | perform fine-tuning. 93 | 94 | ### Citing 95 | 96 | If you use the MatPES dataset, please cite the following [work](https://doi.org/10.48550/arXiv.2503.04070): 97 | 98 | ```txt 99 | Kaplan, A. D.; Liu, R.; Qi, J.; Ko, T. W.; Deng, B.; Riebesell, J.; Ceder, G.; Persson, K. A.; Ong, S. P. A 100 | Foundational Potential Energy Surface Dataset for Materials. arXiv 2025. DOI: 10.48550/arXiv.2503.04070. 101 | ``` 102 | 103 | In addition, if you use any of the pre-trained FPs or architectures, please cite the 104 | [references provided](http://matgl.ai/references) on the architecture used as well as MatGL. 105 | 106 | [Materials Virtual Lab]: http://materialsvirtuallab.org 107 | [Materials Project]: https://materialsproject.org 108 | [M3GNet]: http://dx.doi.org/10.1038/s43588-022-00349-3 109 | [CHGNet]: http://doi.org/10.1038/s42256-023-00716-3 110 | [TensorNet]: https://arxiv.org/abs/2306.06482 111 | [DIRECT]: https//doi.org/10.1038/s41524-024-01227-4 112 | [maml]: https://materialsvirtuallab.github.io/maml/ 113 | [MatGL]: https://matgl.ai 114 | [MatPES]: https://matpes.ai 115 | [MatCalc]: https://matcalc.ai 116 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | """Main app.""" 2 | 3 | from __future__ import annotations 4 | 5 | import argparse 6 | 7 | import dash 8 | import dash_bootstrap_components as dbc 9 | from dash import Dash, Input, Output, State, dcc, html 10 | from dash_bootstrap_templates import load_figure_template 11 | 12 | load_figure_template("pulse") 13 | 14 | app = Dash( 15 | "MatPES", 16 | use_pages=True, 17 | external_stylesheets=[dbc.themes.PULSE], 18 | title="MatPES", 19 | ) 20 | 21 | 22 | navbar = dbc.Navbar( 23 | dbc.Container( 24 | [ 25 | dbc.Row( 26 | [ 27 | dbc.Col( 28 | html.Img(src=dash.get_asset_url("logo.svg"), alt="MatPES", id="header-logo"), 29 | ), 30 | # dbc.Col(html.A(dbc.NavbarBrand("MatPES.ai", class_name="ms-2"), href="/")), 31 | ], 32 | align="center", 33 | class_name="g-0", 34 | ), 35 | dbc.NavbarToggler(id="navbar-toggler", n_clicks=0), 36 | dbc.Collapse( 37 | [ 38 | dbc.Row( 39 | [ 40 | dbc.Col( 41 | dbc.NavLink( 42 | page["name"], 43 | href=page["path"], 44 | class_name="ms-4 nav-link-item", 45 | active="exact", 46 | ) 47 | ) 48 | for page in dash.page_registry.values() 49 | # for name in ("Explorer", "Dataset", "Benchmarks", "About", "References") 50 | ], 51 | align="center", 52 | class_name="g-0", 53 | ), 54 | ], 55 | id="navbar-collapse", 56 | is_open=False, 57 | navbar=True, 58 | ), 59 | ] 60 | ), 61 | color="primary", 62 | dark=True, 63 | ) 64 | 65 | 66 | content = html.Div(children=dash.page_container, id="page-content") 67 | 68 | footer_style = { 69 | "border-top": "1px solid #111", # Add a border at the top 70 | "text-align": "center", # Center-align the text 71 | "padding": "10px", # Add some padding for spacing 72 | "font-size": "0.8rem", 73 | } 74 | 75 | footer = html.Footer(["© ", html.A("Materials Virtual Lab", href="http://materialsvirtuallab.org")], style=footer_style) 76 | 77 | app.index_string = """ 78 | 79 | 80 | 81 | 82 | 89 | {%metas%} 90 | {%title%} 91 | {%favicon%} 92 | {%css%} 93 | 94 | 95 | {%app_entry%} 96 |
97 | {%config%} 98 | {%scripts%} 99 | {%renderer%} 100 |
101 | 102 | """ 103 | 104 | app.layout = html.Div([dcc.Location(id="url"), navbar, content, footer]) 105 | 106 | 107 | server = app.server 108 | 109 | 110 | @app.callback( 111 | Output("navbar-collapse", "is_open"), 112 | [Input("navbar-toggler", "n_clicks")], 113 | [State("navbar-collapse", "is_open")], 114 | ) 115 | def toggle_navbar_collapse(n, is_open): 116 | """Toggle navbar collapse on small screens.""" 117 | if n: 118 | return not is_open 119 | return is_open 120 | 121 | 122 | def main(): 123 | """Main entry point for MatPES Webapp.""" 124 | parser = argparse.ArgumentParser( 125 | description="""MatPES.ai is a Dash Interface for MatPES.""", 126 | epilog="Author: Shyue Ping Ong", 127 | ) 128 | 129 | parser.add_argument( 130 | "-d", 131 | "--debug", 132 | dest="debug", 133 | action="store_true", 134 | help="Whether to run in debug mode.", 135 | ) 136 | parser.add_argument( 137 | "-hh", 138 | "--host", 139 | dest="host", 140 | type=str, 141 | nargs="?", 142 | default="0.0.0.0", 143 | help="Host in which to run the server. Defaults to 0.0.0.0.", 144 | ) 145 | parser.add_argument( 146 | "-p", 147 | "--port", 148 | dest="port", 149 | type=int, 150 | nargs="?", 151 | default=8050, 152 | help="Port in which to run the server. Defaults to 8050.", 153 | ) 154 | 155 | args = parser.parse_args() 156 | 157 | app.run(debug=args.debug, host=args.host, port=args.port) 158 | 159 | 160 | if __name__ == "__main__": 161 | main() 162 | -------------------------------------------------------------------------------- /assets/Generating MatPES Compatible Training Data for Fine-tuning of Models.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This notebook shows how to use pymatgen to generate VASP inputs for MatPES-compatible calculations. This is very useful if you intend to generate additional training data to fine-tine MatPES models. 4 | 5 | 6 | ```python 7 | from __future__ import annotations 8 | 9 | from pymatgen.core import Lattice, Structure 10 | from pymatgen.io.vasp.sets import MatPESStaticSet 11 | ``` 12 | 13 | 14 | ```python 15 | example_structure = Structure.from_spacegroup( 16 | "Im-3m", Lattice.cubic(3), ["Li", "Li"], [[0, 0, 0], [0.5, 0.5, 0.5]] 17 | ) 18 | ``` 19 | 20 | 21 | ```python 22 | vis = MatPESStaticSet(example_structure, xc_functional="PBE") 23 | print(vis.incar) 24 | ``` 25 | 26 | ALGO = Normal 27 | EDIFF = 1e-05 28 | ENAUG = 1360 29 | ENCUT = 680.0 30 | GGA = Pe 31 | ISMEAR = 0 32 | ISPIN = 2 33 | KSPACING = 0.22 34 | LAECHG = True 35 | LASPH = True 36 | LCHARG = True 37 | LMAXMIX = 6 38 | LMIXTAU = True 39 | LORBIT = 11 40 | LREAL = False 41 | LWAVE = False 42 | MAGMOM = 4*0.6 43 | NELM = 200 44 | NSW = 0 45 | PREC = Accurate 46 | SIGMA = 0.05 47 | 48 | 49 | 50 | Note the strict ENCUT and EDIFF used. 51 | 52 | 53 | ```python 54 | # To write the input files to a directory, use the following line. 55 | # vis.write_input("Li") 56 | ``` 57 | 58 | Similarly, the r2SCAN data can be generated using the following code: 59 | 60 | 61 | ```python 62 | vis = MatPESStaticSet(example_structure, xc_functional="r2SCAN") 63 | print(vis.incar) 64 | ``` 65 | 66 | ALGO = All 67 | EDIFF = 1e-05 68 | ENAUG = 1360 69 | ENCUT = 680.0 70 | ISMEAR = 0 71 | ISPIN = 2 72 | KSPACING = 0.22 73 | LAECHG = True 74 | LASPH = True 75 | LCHARG = True 76 | LMAXMIX = 6 77 | LMIXTAU = True 78 | LORBIT = 11 79 | LREAL = False 80 | LWAVE = False 81 | MAGMOM = 4*0.6 82 | METAGGA = R2scan 83 | NELM = 200 84 | NSW = 0 85 | PREC = Accurate 86 | SIGMA = 0.05 87 | -------------------------------------------------------------------------------- /assets/MatPES_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsvirtuallab/matpes/141fb91330f866e668633fab6bf2a2b756028b30/assets/MatPES_workflow.png -------------------------------------------------------------------------------- /assets/Using Pre-Trained Models with MatCalc.md: -------------------------------------------------------------------------------- 1 | ```python 2 | from __future__ import annotations 3 | 4 | import matgl 5 | from matcalc.elasticity import ElasticityCalc 6 | from matgl.ext.ase import PESCalculator 7 | from pymatgen.ext.matproj import MPRester 8 | 9 | potential = matgl.load_model("TensorNet-MatPES-PBE-v2025.1-PES") 10 | ase_calc = PESCalculator(potential) 11 | calculator = ElasticityCalc(ase_calc) 12 | ``` 13 | 14 | /Users/shyue/repos/matgl/src/matgl/apps/pes.py:69: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). 15 | self.element_refs = AtomRef(property_offset=torch.tensor(element_refs, dtype=matgl.float_th)) 16 | /Users/shyue/repos/matgl/src/matgl/apps/pes.py:75: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). 17 | self.register_buffer("data_mean", torch.tensor(data_mean, dtype=matgl.float_th)) 18 | /Users/shyue/repos/matgl/src/matgl/apps/pes.py:76: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). 19 | self.register_buffer("data_std", torch.tensor(data_std, dtype=matgl.float_th)) 20 | 21 | 22 | Let us obtain the structure of Si from the Materials Project API 23 | 24 | 25 | ```python 26 | mpr = MPRester() 27 | ``` 28 | 29 | 30 | ```python 31 | si = mpr.get_structure_by_material_id("mp-149") 32 | print(si) 33 | ``` 34 | 35 | Full Formula (Si2) 36 | Reduced Formula: Si 37 | abc : 3.849278 3.849279 3.849278 38 | angles: 60.000012 60.000003 60.000011 39 | pbc : True True True 40 | Sites (2) 41 | # SP a b c magmom 42 | --- ---- ----- ----- ----- -------- 43 | 0 Si 0.875 0.875 0.875 -0 44 | 1 Si 0.125 0.125 0.125 -0 45 | 46 | 47 | 48 | ```python 49 | pred = calculator.calc(si) 50 | ``` 51 | 52 | For comparison, let's obtain the DFT computed values from Materials Project 53 | 54 | 55 | ```python 56 | mp_data = mpr.get_summary_by_material_id("mp-149") 57 | ``` 58 | 59 | 60 | ```python 61 | print( 62 | f"K_VRH: TensorNet-MatPES-PBE = {pred['bulk_modulus_vrh']}; DFT = {mp_data['bulk_modulus']['vrh']}" 63 | ) 64 | print( 65 | f"G_VRH: TensorNet-MatPES-PBE = {pred['shear_modulus_vrh']}; DFT = {mp_data['shear_modulus']['vrh']}" 66 | ) 67 | ``` 68 | 69 | K_VRH: TensorNet-MatPES-PBE = 101.15424648468968; DFT = 88.916 70 | G_VRH: TensorNet-MatPES-PBE = 62.546024424713266; DFT = 62.445 71 | -------------------------------------------------------------------------------- /assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsvirtuallab/matpes/141fb91330f866e668633fab6bf2a2b756028b30/assets/favicon.ico -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | Canvas 1 14 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /assets/matpes.ai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsvirtuallab/matpes/141fb91330f866e668633fab6bf2a2b756028b30/assets/matpes.ai.png -------------------------------------------------------------------------------- /assets/matpes.ai.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Canvas 1 20 | 21 | Text 22 | 23 | 24 | MatPES.ai 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /assets/matpes.css: -------------------------------------------------------------------------------- 1 | #page-content { 2 | margin-top: 2rem; 3 | } 4 | 5 | #matpes-title{ 6 | background: -webkit-linear-gradient(#613B9B, royalblue); 7 | -webkit-background-clip: text; 8 | -webkit-text-fill-color: transparent; 9 | } 10 | 11 | 12 | .section-title { 13 | padding-top: 1rem; 14 | text-align: center; 15 | } 16 | 17 | #header-logo { 18 | height: 40px; 19 | display: block; 20 | float: right; 21 | } 22 | 23 | .nav-link-item { 24 | font-size: 1.2rem; 25 | color: white; 26 | } 27 | 28 | .nav-link:hover { 29 | color: #ddd; 30 | } 31 | 32 | 33 | .nav-link.active { 34 | color: #ddd; 35 | } 36 | 37 | .download-button { 38 | border-radius: 0.5rem; 39 | } 40 | 41 | #matcalc-benchmark-legend table { 42 | width: 100%; 43 | 44 | } 45 | 46 | #matcalc-benchmark-legends { 47 | border-collapse: collapse; 48 | width: 100%; 49 | } 50 | 51 | #matcalc-benchmark-legends td, #matcalc-benchmark-legend th { 52 | border: 1px solid #ddd; 53 | padding: 8px; 54 | } 55 | 56 | #matcalc-benchmark-legend tr:nth-child(even){background-color: #f2f2f2;} 57 | 58 | #matcalc-benchmark-legend tr:hover {background-color: #ddd;} 59 | 60 | #matcalc-benchmark-legend th { 61 | padding-top: 12px; 62 | padding-bottom: 12px; 63 | text-align: left; 64 | background-color: #633D9C; 65 | color: white; 66 | } 67 | 68 | 69 | img[alt=matpes_workflow] { width: 100%; } 70 | -------------------------------------------------------------------------------- /dev/Generate Stats Files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "This notebook is used to generate a small stats datafile for display on the front end." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "1", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from __future__ import annotations\n", 19 | "\n", 20 | "import collections\n", 21 | "import itertools\n", 22 | "import json\n", 23 | "\n", 24 | "import numpy as np\n", 25 | "\n", 26 | "from matpes.db import MatPESDB" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "2", 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "{'nstructures': 434712, 'element_counts': {'Ti': 19582, 'Y': 11913, 'N': 46032, 'Zn': 15747, 'Sn': 16844, 'U': 9978, 'Ni': 29953, 'Zr': 12234, 'Ga': 14424, 'Gd': 4073, 'O': 117733, 'P': 20139, 'C': 25549, 'Th': 6839, 'Tc': 7282, 'Sb': 17902, 'Cd': 11301, 'La': 12460, 'Te': 13663, 'Cu': 24215, 'S': 21187, 'Ag': 11239, 'F': 18416, 'B': 17186, 'As': 11601, 'Ge': 15543, 'Li': 29705, 'Tl': 9729, 'Os': 10237, 'Si': 25229, 'Mo': 13265, 'V': 18753, 'Pd': 12550, 'Hg': 9439, 'Pu': 9842, 'W': 14791, 'K': 14456, 'Al': 20315, 'Nb': 13854, 'Rh': 12674, 'Ce': 9680, 'Ru': 12588, 'Sc': 9416, 'Co': 27048, 'Be': 7618, 'Au': 12663, 'Mg': 33692, 'Eu': 6954, 'Cl': 11168, 'Fe': 36116, 'Cr': 18516, 'Mn': 30568, 'Br': 7690, 'H': 15441, 'Re': 9901, 'Se': 14238, 'Sr': 16097, 'In': 12260, 'Pt': 12678, 'Cs': 8554, 'Ir': 9692, 'Hf': 10140, 'I': 7016, 'Bi': 13712, 'Pa': 4577, 'Np': 5453, 'Ac': 2708, 'Na': 15718, 'Lu': 3398, 'Kr': 26, 'Ta': 12552, 'Rb': 9213, 'Ca': 15258, 'Pb': 9609, 'He': 88, 'Tm': 2961, 'Yb': 7452, 'Sm': 5420, 'Ho': 3546, 'Pm': 1776, 'Ba': 15554, 'Pr': 5354, 'Tb': 3745, 'Er': 3189, 'Nd': 5922, 'Dy': 3409, 'Xe': 204, 'Ne': 1, 'Ar': 3}, 'cohesive_energy_per_atom': {'counts': [1, 0, 6, 22, 99, 380, 706, 1314, 2355, 3667, 5664, 7998, 10499, 13787, 16543, 19561, 22852, 25220, 28952, 28719, 29383, 27531, 26217, 24750, 24139, 21872, 19748, 16414, 13750, 11239, 9264, 7684, 5599, 3839, 2456, 1417, 639, 307, 42, 28, 26, 11, 7, 1, 3, 0, 0, 0, 0, 1], 'bins': [-9.441932215, -9.193073244699999, -8.9442142744, -8.6953553041, -8.446496333799999, -8.1976373635, -7.9487783932, -7.6999194229, -7.4510604526, -7.2022014823, -6.953342512, -6.7044835417, -6.4556245714, -6.2067656011, -5.9579066308, -5.7090476605, -5.4601886902, -5.2113297199, -4.9624707496, -4.7136117793, -4.464752809, -4.2158938387, -3.9670348684, -3.7181758981, -3.4693169277999996, -3.2204579575, -2.9715989872, -2.7227400169000004, -2.4738810466, -2.2250220763, -1.9761631060000004, -1.7273041356999999, -1.4784451654000001, -1.2295861950999996, -0.9807272248000007, -0.7318682545000001, -0.48300928419999956, -0.23415031390000074, 0.014708656399999853, 0.26356762670000045, 0.5124265969999993, 0.7612855672999999, 1.0101445376000004, 1.2590035078999993, 1.5078624781999999, 1.7567214485000004, 2.0055804187999993, 2.2544393891, 2.5032983594000005, 2.7521573296999993, 3.0010163000000003]}, 'nsites': {'counts': [252234, 75140, 31734, 29365, 14086, 14177, 8149, 7800, 872, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 49, 81, 433, 21, 131, 58, 55, 70, 25, 61, 46, 0, 18, 15, 44, 0, 0, 0, 0, 0, 0, 0, 6, 4, 13, 0, 6, 0, 2, 15], 'bins': [1.0, 5.84, 10.68, 15.52, 20.36, 25.2, 30.04, 34.879999999999995, 39.72, 44.56, 49.4, 54.239999999999995, 59.08, 63.92, 68.75999999999999, 73.6, 78.44, 83.28, 88.12, 92.96, 97.8, 102.64, 107.47999999999999, 112.32, 117.16, 122.0, 126.84, 131.68, 136.51999999999998, 141.35999999999999, 146.2, 151.04, 155.88, 160.72, 165.56, 170.4, 175.24, 180.07999999999998, 184.92, 189.76, 194.6, 199.44, 204.28, 209.12, 213.95999999999998, 218.79999999999998, 223.64, 228.48, 233.32, 238.16, 243.0]}, 'abs_forces': {'counts': [3874787, 3293, 1538, 668, 508, 250, 137, 110, 63, 28, 32, 24, 11, 8, 1, 5, 2, 1, 3, 0, 0, 0, 1, 0, 1, 0, 1, 10, 6, 0, 0, 0, 0, 9, 16, 14, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6], 'bins': [0.0, 15.777319937691583, 31.554639875383167, 47.33195981307475, 63.10927975076633, 78.88659968845792, 94.6639196261495, 110.44123956384108, 126.21855950153267, 141.99587943922424, 157.77319937691584, 173.5505193146074, 189.327839252299, 205.1051591899906, 220.88247912768216, 236.65979906537376, 252.43711900306533, 268.2144389407569, 283.9917588784485, 299.7690788161401, 315.5463987538317, 331.32371869152325, 347.1010386292148, 362.8783585669064, 378.655678504598, 394.4329984422896, 410.2103183799812, 425.98763831767275, 441.7649582553643, 457.5422781930559, 473.3195981307475, 489.0969180684391, 504.87423800613067, 520.6515579438222, 536.4288778815138, 552.2061978192054, 567.983517756897, 583.7608376945885, 599.5381576322802, 615.3154775699718, 631.0927975076634, 646.8701174453549, 662.6474373830465, 678.4247573207381, 694.2020772584297, 709.9793971961212, 725.7567171338128, 741.5340370715044, 757.311357009196, 773.0886769468876, 788.8659968845792]}, 'nelements': {'counts': [5274, 133557, 231743, 51303, 11424, 1371, 39, 1], 'bins': [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5]}}\n", 40 | "{'nstructures': 387897, 'element_counts': {'N': 43472, 'Zn': 14282, 'Ti': 17785, 'Y': 10925, 'Sn': 15426, 'U': 9141, 'Ni': 27300, 'Zr': 11476, 'Ga': 13285, 'Gd': 3770, 'O': 98013, 'P': 16695, 'C': 23490, 'Th': 6507, 'Tc': 7011, 'Sb': 16225, 'Cd': 10482, 'La': 11435, 'Te': 12500, 'Ag': 10345, 'F': 15572, 'Cu': 21841, 'S': 18755, 'B': 15527, 'As': 10442, 'Ge': 14071, 'Li': 24068, 'Tl': 8878, 'Os': 9601, 'Si': 22437, 'Mo': 12426, 'V': 16601, 'Pd': 11702, 'Hg': 8659, 'Pu': 7954, 'W': 13852, 'Al': 18405, 'Nb': 12676, 'Rh': 11803, 'Ru': 11516, 'Ce': 8434, 'Co': 23345, 'Au': 11672, 'Mg': 30197, 'Be': 7307, 'Eu': 6277, 'Cr': 16465, 'Mn': 26080, 'Cl': 9976, 'Fe': 31496, 'Br': 6881, 'H': 13649, 'Re': 9188, 'Cs': 7651, 'Lu': 3144, 'Se': 12954, 'Sr': 14560, 'In': 11098, 'Pt': 11913, 'Sc': 8951, 'Ir': 8970, 'Hf': 9560, 'I': 6155, 'K': 12923, 'Bi': 12282, 'Pa': 4125, 'Np': 4785, 'Ac': 2526, 'Na': 14021, 'Rb': 8311, 'Ta': 11710, 'Ca': 13825, 'Pb': 8611, 'He': 85, 'Kr': 26, 'Yb': 5148, 'Ba': 10183, 'Ho': 2141, 'Pm': 997, 'Dy': 2004, 'Sm': 3394, 'Pr': 3389, 'Er': 2104, 'Tm': 2139, 'Tb': 2326, 'Nd': 3766, 'Xe': 173, 'Ar': 3, 'Ne': 1}, 'cohesive_energy_per_atom': {'counts': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 31, 199, 737, 1744, 3737, 6681, 11137, 16321, 21740, 26673, 31886, 36275, 38308, 38434, 35456, 31603, 26364, 20390, 15090, 10996, 6531, 4178, 2262, 835, 185, 38, 28, 16, 10, 2, 0, 0, 0, 0, 1], 'bins': [-13.7408266, -13.3929893422, -13.0451520844, -12.6973148266, -12.349477568800001, -12.001640311000001, -11.6538030532, -11.3059657954, -10.9581285376, -10.6102912798, -10.262454022, -9.914616764200002, -9.5667795064, -9.218942248600001, -8.871104990800001, -8.523267733, -8.1754304752, -7.8275932174000005, -7.4797559596, -7.131918701800001, -6.784081444000001, -6.436244186200001, -6.088406928400001, -5.740569670600001, -5.392732412800001, -5.044895155000001, -4.6970578972000006, -4.3492206394, -4.001383381600002, -3.6535461238000018, -3.3057088660000016, -2.9578716082000014, -2.610034350400001, -2.262197092600001, -1.9143598348000008, -1.5665225770000006, -1.2186853192000004, -0.870848061400002, -0.5230108036000019, -0.17517354580000166, 0.17266371199999853, 0.5205009697999987, 0.8683382275999989, 1.216175485399999, 1.5640127431999975, 1.9118500009999977, 2.259687258799998, 2.6075245166, 2.9553617743999983, 3.3031990321999967, 3.6510362900000004]}, 'nsites': {'counts': [234358, 70479, 30708, 28481, 10376, 4072, 2654, 5102, 719, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 42, 72, 286, 79, 113, 26, 59, 18, 48, 68, 9, 37, 1, 20, 43, 0, 0, 0, 0, 0, 0, 0, 0, 4, 14, 0, 3, 0, 0, 4], 'bins': [1.0, 5.78, 10.56, 15.34, 20.12, 24.900000000000002, 29.68, 34.46, 39.24, 44.02, 48.800000000000004, 53.580000000000005, 58.36, 63.14, 67.92, 72.7, 77.48, 82.26, 87.04, 91.82000000000001, 96.60000000000001, 101.38000000000001, 106.16000000000001, 110.94000000000001, 115.72, 120.5, 125.28, 130.06, 134.84, 139.62, 144.4, 149.18, 153.96, 158.74, 163.52, 168.3, 173.08, 177.86, 182.64000000000001, 187.42000000000002, 192.20000000000002, 196.98000000000002, 201.76000000000002, 206.54000000000002, 211.32000000000002, 216.10000000000002, 220.88000000000002, 225.66000000000003, 230.44, 235.22, 240.0]}, 'abs_forces': {'counts': [3055137, 2341, 1023, 420, 331, 132, 96, 71, 21, 12, 23, 24, 7, 3, 2, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 6, 12, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 2], 'bins': [0.0, 16.279898247353877, 32.559796494707754, 48.83969474206163, 65.11959298941551, 81.39949123676939, 97.67938948412326, 113.95928773147713, 130.23918597883102, 146.5190842261849, 162.79898247353879, 179.07888072089264, 195.35877896824653, 211.6386772156004, 227.91857546295427, 244.19847371030815, 260.47837195766203, 276.7582702050159, 293.0381684523698, 309.31806669972366, 325.59796494707757, 341.8778631944314, 358.1577614417853, 374.4376596891392, 390.71755793649305, 406.9974561838469, 423.2773544312008, 439.5572526785547, 455.83715092590853, 472.11704917326244, 488.3969474206163, 504.6768456679702, 520.9567439153241, 537.236642162678, 553.5165404100318, 569.7964386573857, 586.0763369047396, 602.3562351520934, 618.6361333994473, 634.9160316468012, 651.1959298941551, 667.4758281415089, 683.7557263888629, 700.0356246362168, 716.3155228835706, 732.5954211309245, 748.8753193782784, 765.1552176256322, 781.4351158729861, 797.71501412034, 813.9949123676939]}, 'nelements': {'counts': [5097, 123446, 207104, 42284, 8989, 950, 26, 1], 'bins': [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5]}}\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "matpes = MatPESDB()\n", 46 | "\n", 47 | "for fun in [\"pbe\", \"r2scan\"]:\n", 48 | " df = matpes.get_df(fun)\n", 49 | " df[\"abs_forces\"] = df[\"forces\"].map(lambda a: np.linalg.norm(a, axis=1))\n", 50 | " df = df.drop([\"matpes_id\", \"bandgap\", \"energy\", \"forces\", \"formation_energy_per_atom\", \"formula_pretty\"], axis=1)\n", 51 | " stats = {\"nstructures\": len(df)}\n", 52 | " stats[\"element_counts\"] = dict(collections.Counter(itertools.chain.from_iterable(df[\"elements\"])))\n", 53 | " for c in [\"cohesive_energy_per_atom\", \"nsites\"]:\n", 54 | " counts, bins = np.histogram(df[c], bins=50)\n", 55 | " stats[c] = {\"counts\": counts.tolist(), \"bins\": bins.tolist()}\n", 56 | " counts, bins = np.histogram(list(itertools.chain(*df[\"abs_forces\"])), bins=50)\n", 57 | " stats[\"abs_forces\"] = {\"counts\": counts.tolist(), \"bins\": bins.tolist()}\n", 58 | " counts, bins = np.histogram(df[\"nelements\"], bins=np.arange(0.5, 9.5, 1))\n", 59 | " stats[\"nelements\"] = {\"counts\": counts.tolist(), \"bins\": bins.tolist()}\n", 60 | " with open(f\"../pages/{fun}_stats.json\", \"w\") as f:\n", 61 | " json.dump(stats, f)\n", 62 | " print(stats)\n", 63 | " # df.to_pickle(f\"../pages/{f}_stats.pkl\")\n", 64 | " # print(df.columns)" 65 | ] 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python 3 (ipykernel)", 71 | "language": "python", 72 | "name": "python3" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython3", 84 | "version": "3.11.10" 85 | } 86 | }, 87 | "nbformat": 4, 88 | "nbformat_minor": 5 89 | } 90 | -------------------------------------------------------------------------------- /docs/apidoc/conf.py: -------------------------------------------------------------------------------- 1 | """matpes documentation build configuration file, created by 2 | sphinx-quickstart on Tue Nov 15 00:13:52 2011. 3 | 4 | This file is execfile()d with the current directory set to its containing dir. 5 | 6 | Note that not all possible configuration values are present in this 7 | autogenerated file. 8 | 9 | All configuration values have a default; values that are commented out 10 | serve to show the default. 11 | """ 12 | 13 | from __future__ import annotations 14 | 15 | import inspect 16 | import os 17 | import sys 18 | 19 | from matpes import __file__, __version__ 20 | 21 | project = "matpes" 22 | copyright = "2025, Materials Virtual Lab" 23 | author = "Materials Virtual Lab & Materials Project" 24 | 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | # Napoleon is necessary to parse Google style docstrings. Markdown builder allows the generation of markdown output. 32 | # extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "myst_parser", "sphinx_markdown_builder"] 33 | # Add any paths that contain templates here, relative to this directory. 34 | 35 | # -- General configuration ----------------------------------------------------- 36 | 37 | # If your documentation needs a minimal Sphinx version, state it here. 38 | # needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be extensions 41 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 42 | extensions = [ 43 | "sphinx.ext.autodoc", 44 | "sphinx.ext.napoleon", 45 | "sphinx.ext.linkcode", 46 | "sphinx.ext.mathjax", 47 | "sphinx_markdown_builder", 48 | ] 49 | exclude_patterns = ["../**/tests*"] 50 | exclude_dirnames = ["../**/tests*"] 51 | autoclass_content = "both" 52 | 53 | # Add any paths that contain templates here, relative to this directory. 54 | templates_path = ["_templates"] 55 | 56 | # The suffix of source filenames. 57 | source_suffix = ".rst" 58 | 59 | # The encoding of source files. 60 | # source_encoding = 'utf-8-sig' 61 | 62 | # The master toctree document. 63 | master_doc = "index" 64 | 65 | # General information about the project. 66 | project = "matpes" 67 | copyright = f"2011, {author}" 68 | 69 | # The version info for the project you're documenting, acts as replacement for 70 | # |version| and |release|, also used in various other places throughout the 71 | # built documents. 72 | # 73 | # The short X.Y version. 74 | version = __version__ 75 | # The full version, including alpha/beta/rc tags. 76 | release = __version__ 77 | 78 | # The language for content autogenerated by Sphinx. Refer to documentation 79 | # for a list of supported languages. 80 | # language = None 81 | 82 | # There are two options for replacing |today|: either, you set today to some 83 | # non-false value, then it is used: 84 | # today = '' 85 | # Else, today_fmt is used as the format for a strftime call. 86 | # today_fmt = '%B %d, %Y' 87 | 88 | # List of patterns, relative to source directory, that match files and 89 | # directories to ignore when looking for source files. 90 | exclude_patterns = ["_build"] 91 | 92 | # The reST default role (used for this markup: `text`) to use for all 93 | # documents. 94 | # default_role = None 95 | 96 | # If true, '()' will be appended to :func: etc. cross-reference text. 97 | # add_function_parentheses = True 98 | 99 | # If true, the current module name will be prepended to all description 100 | # unit titles (such as .. function::). 101 | add_module_names = False 102 | 103 | # If true, sectionauthor and moduleauthor directives will be shown in the 104 | # output. They are ignored by default. 105 | # show_authors = False 106 | 107 | # The name of the Pygments (syntax highlighting) style to use. 108 | pygments_style = "sphinx" 109 | 110 | # A list of ignored prefixes for module index sorting. 111 | # modindex_common_prefix = [] 112 | 113 | 114 | # -- Options for HTML output ------------------------------------------------- 115 | 116 | # The theme to use for HTML and HTML Help pages. See the documentation for 117 | # a list of builtin themes. 118 | html_theme = "sphinx_rtd_theme" 119 | 120 | # Theme options are theme-specific and customize the look and feel of a theme 121 | # further. For a list of options available for each theme, see the 122 | # documentation. 123 | # html_theme_options = {} 124 | 125 | # Add any paths that contain custom themes here, relative to this directory. 126 | html_theme_path = ["."] 127 | 128 | # The name for this set of Sphinx documents. If None, it defaults to 129 | # " v documentation". 130 | # html_title = None 131 | 132 | # A shorter title for the navigation bar. Default is the same as html_title. 133 | # html_short_title = None 134 | 135 | # The name of an image file (relative to this directory) to place at the top 136 | # of the sidebar. 137 | # html_logo = None 138 | 139 | # The name of an image file (within the static path) to use as favicon of the 140 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 141 | # pixels large. 142 | html_favicon = "favicon.ico" 143 | 144 | # Add any paths that contain custom static files (such as style sheets) here, 145 | # relative to this directory. They are copied after the builtin static files, 146 | # so a file named "default.css" will overwrite the builtin "default.css". 147 | html_static_path = ["assets"] 148 | 149 | html_css_files = [ 150 | "css/custom.css", 151 | ] 152 | 153 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 154 | # using the given strftime format. 155 | # html_last_updated_fmt = '%b %d, %Y' 156 | 157 | # If true, SmartyPants will be used to convert quotes and dashes to 158 | # typographically correct entities. 159 | # html_use_smartypants = True 160 | 161 | # Custom sidebar templates, maps document names to template names. 162 | # html_sidebars = {} 163 | 164 | # Additional templates that should be rendered to pages, maps page names to 165 | # template names. 166 | # html_additional_pages = {} 167 | 168 | # If false, no module index is generated. 169 | # html_domain_indices = True 170 | 171 | # If false, no index is generated. 172 | # html_use_index = True 173 | 174 | # If true, the index is split into individual pages for each letter. 175 | # html_split_index = False 176 | 177 | # If true, links to the reST sources are added to the pages. 178 | # html_show_sourcelink = True 179 | 180 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 181 | # html_show_sphinx = True 182 | 183 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 184 | # html_show_copyright = True 185 | 186 | # If true, an OpenSearch description file will be output, and all pages will 187 | # contain a tag referring to it. The value of this option must be the 188 | # base URL from which the finished HTML is served. 189 | # html_use_opensearch = '' 190 | 191 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 192 | # html_file_suffix = None 193 | 194 | # Output file base name for HTML help builder. 195 | htmlhelp_basename = "matpesdoc" 196 | 197 | html_theme_options = { 198 | "canonical_url": "https://matpes.org", 199 | "logo_only": True, 200 | "display_version": True, 201 | "prev_next_buttons_location": None, 202 | "style_external_links": True, 203 | "style_nav_header_background": "linear-gradient(0deg, rgba(23,63,162,1) 0%, rgba(0,70,192,1) 100%)", 204 | "collapse_navigation": True, 205 | "sticky_navigation": True, 206 | "navigation_depth": 4, 207 | "includehidden": True, 208 | "titles_only": False, 209 | } 210 | 211 | html_context = { 212 | "display_github": True, 213 | "github_user": "materialsproject", 214 | "github_repo": "matpes", 215 | "github_version": "master", 216 | "conf_py_path": "/docs_rst/", 217 | } 218 | 219 | # -- Options for LaTeX output ------------------------------------------------ 220 | 221 | latex_elements = { 222 | # The paper size ('letterpaper' or 'a4paper'). 223 | # 'papersize': 'letterpaper', 224 | # The font size ('10pt', '11pt' or '12pt'). 225 | # 'pointsize': '10pt', 226 | # Additional stuff for the LaTeX preamble. 227 | # 'preamble': '', 228 | } 229 | 230 | # Grouping the document tree into LaTeX files. List of tuples 231 | # (source start file, target name, title, author, documentclass [howto/manual]). 232 | latex_documents = [ 233 | ("index", "matpes.tex", "matpes Documentation", author, "manual"), 234 | ] 235 | 236 | # The name of an image file (relative to this directory) to place at the top of 237 | # the title page. 238 | # latex_logo = None 239 | 240 | # For "manual" documents, if this is true, then toplevel headings are parts, 241 | # not chapters. 242 | # latex_use_parts = False 243 | 244 | # If true, show page references after internal links. 245 | # latex_show_pagerefs = False 246 | 247 | # If true, show URL addresses after external links. 248 | # latex_show_urls = False 249 | 250 | # Documents to append as an appendix to all manuals. 251 | # latex_appendices = [] 252 | 253 | # If false, no module index is generated. 254 | # latex_domain_indices = True 255 | 256 | 257 | # -- Options for manual page output ------------------------------------------ 258 | 259 | # One entry per manual page. List of tuples 260 | # (source start file, name, description, authors, manual section). 261 | man_pages = [("index", "matpes", "matpes Documentation", [author], 1)] 262 | 263 | # If true, show URL addresses after external links. 264 | # man_show_urls = False 265 | 266 | 267 | # -- Options for Texinfo output ---------------------------------------------- 268 | 269 | # Grouping the document tree into Texinfo files. List of tuples 270 | # (source start file, target name, title, author, 271 | # dir menu entry, description, category) 272 | texinfo_documents = [ 273 | ( 274 | "index", 275 | "matpes", 276 | "matpes Documentation", 277 | author, 278 | "matpes", 279 | "One line description of project.", 280 | "Miscellaneous", 281 | ), 282 | ] 283 | 284 | # Documents to append as an appendix to all manuals. 285 | # texinfo_appendices = [] 286 | 287 | # If false, no module index is generated. 288 | # texinfo_domain_indices = True 289 | 290 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 291 | # texinfo_show_urls = 'footnote' 292 | 293 | 294 | # -- Options for Epub output --------------------------------------------------- 295 | 296 | # Bibliographic Dublin Core info. 297 | epub_title = "matpes" 298 | epub_author = author 299 | epub_publisher = "matpes Development Team" 300 | epub_copyright = copyright 301 | 302 | # The language of the text. It defaults to the language option 303 | # or en if the language is not set. 304 | # epub_language = '' 305 | 306 | # The scheme of the identifier. Typical schemes are ISBN or URL. 307 | # epub_scheme = '' 308 | 309 | # The unique identifier of the text. This can be a ISBN number 310 | # or the project homepage. 311 | # epub_identifier = '' 312 | 313 | # A unique identification for the text. 314 | # epub_uid = '' 315 | 316 | # A tuple containing the cover image and cover page html template filenames. 317 | # epub_cover = () 318 | 319 | # HTML files that should be inserted before the pages created by sphinx. 320 | # The format is a list of tuples containing the path and title. 321 | # epub_pre_files = [] 322 | 323 | # HTML files that should be inserted after the pages created by sphinx. 324 | # The format is a list of tuples containing the path and title. 325 | # epub_post_files = [] 326 | 327 | # A list of files that should not be packed into the epub file. 328 | # epub_exclude_files = [] 329 | 330 | # The depth of the table of contents in toc.ncx. 331 | # epub_tocdepth = 3 332 | 333 | # Allow duplicate toc entries. 334 | # epub_tocdup = True 335 | 336 | 337 | def linkcode_resolve(domain, info): 338 | """Resolve function for the linkcode extension. 339 | Thanks to https://github.com/Lasagne/Lasagne/blob/master/docs/conf.py. 340 | """ 341 | 342 | def find_source(): 343 | # try to find the file and line number, based on code from numpy: 344 | # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286 345 | obj = sys.modules[info["module"]] 346 | for part in info["fullname"].split("."): 347 | obj = getattr(obj, part) 348 | 349 | fn = inspect.getsourcefile(obj) 350 | fn = os.path.relpath(fn, start=os.path.dirname(__file__)) 351 | source, lineno = inspect.getsourcelines(obj) 352 | return fn, lineno, lineno + len(source) - 1 353 | 354 | if domain != "py" or not info["module"]: 355 | return None 356 | 357 | try: 358 | rel_path, line_start, line_end = find_source() 359 | # __file__ is imported from matpes.core 360 | filename = f"matpes/core/{rel_path}#L{line_start}-L{line_end}" 361 | except Exception: 362 | # no need to be relative to core here as module includes full path. 363 | filename = info["module"].replace(".", "/") + ".py" 364 | 365 | return f"https://github.com/materialsproject/matpes/blob/v{__version__}/src/{filename}" 366 | -------------------------------------------------------------------------------- /docs/apidoc/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsvirtuallab/matpes/141fb91330f866e668633fab6bf2a2b756028b30/docs/apidoc/index.rst -------------------------------------------------------------------------------- /docs/apidoc/matpes.rst: -------------------------------------------------------------------------------- 1 | matpes package 2 | ============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | matpes.cli module 8 | ----------------- 9 | 10 | .. automodule:: matpes.cli 11 | :members: 12 | :show-inheritance: 13 | :undoc-members: 14 | 15 | matpes.data module 16 | ------------------ 17 | 18 | .. automodule:: matpes.data 19 | :members: 20 | :show-inheritance: 21 | :undoc-members: 22 | 23 | matpes.db module 24 | ---------------- 25 | 26 | .. automodule:: matpes.db 27 | :members: 28 | :show-inheritance: 29 | :undoc-members: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: matpes 35 | :members: 36 | :show-inheritance: 37 | :undoc-members: 38 | -------------------------------------------------------------------------------- /docs/apidoc/modules.rst: -------------------------------------------------------------------------------- 1 | matpes 2 | ====== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | matpes 8 | -------------------------------------------------------------------------------- /docs/changes.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## v0.0.3 4 | - Added option to download `atoms` file with atomic reference energies needed for PES fitting. 5 | 6 | ## v0.0.2 7 | - Update download links. -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | [![GitHub license](https://img.shields.io/github/license/materialsvirtuallab/matpes)](https://github.com/materialsvirtuallab/matpes/blob/main/LICENSE) 2 | [![Linting](https://github.com/materialsvirtuallab/matpes/workflows/Linting/badge.svg)](https://github.com/materialsvirtuallab/matpes/workflows/Linting/badge.svg) 3 | 4 | ### Aims 5 | 6 | MatPES is an initiative by the [Materials Virtual Lab] and the [Materials Project] to address 7 | [critical deficiencies](http://matpes.ai/about) in potential energy surface (PES) datasets for materials. 8 | 9 | 1. **Accuracy.** MatPES is computed using static DFT calculations with stringent converegence criteria. 10 | Please refer to the `MatPESStaticSet` in [pymatgen] for details. 11 | 2. **Comprehensiveness.** MatPES structures are sampled using a 2-stage version of DImensionality-Reduced 12 | Encoded Clusters with sTratified ([DIRECT]) sampling from a greatly expanded configuration of MD structures. 13 | 3. **Quality.** MatPES includes computed data from the PBE functional, as well as the high fidelity r2SCAN meta-GGA 14 | functional with improved description across diverse bonding and chemistries. 15 | 16 | The initial v2025.1 release comprises ~400,000 structures from 300K MD simulations. This dataset is much smaller 17 | than other PES datasets in the literature and yet achieves comparable or, in some cases, 18 | [improved performance and reliability](http://matpes.ai/benchmarks). 19 | 20 | MatPES is part of the MatML ecosystem, which includes the [MatGL] (Materials Graph Library) and [maml] (MAterials 21 | Machine Learning) packages, the [MatPES] (Materials Potential Energy Surface) dataset, and the [MatCalc] (Materials 22 | Calculator). 23 | 24 | ### Software 25 | 26 | The `matpes` python package, which provides tools for working with the MatPES datasets, can be installed via pip: 27 | 28 | ```shell 29 | pip install matpes 30 | ``` 31 | 32 | Some command line usage examples: 33 | 34 | ```shell 35 | # Download the PBE dataset to the current directory 36 | matpes download pbe 37 | 38 | # You should see a MatPES-PBE-20240214.json.gz file in your directory. 39 | 40 | # Extract all entries in the Fe-O chemical system 41 | matpes data -i MatPES-PBE-20240214.json.gz --chemsys Fe-O -o Fe-O.json.gz 42 | ``` 43 | 44 | The `matpes.db` module provides functionality to create your own MongoDB database with the MatPES downloaded data, 45 | which is extremely useful if you are going to be working with the data (e.g., querying, adding entries, etc.) a lot. 46 | 47 | ### Models 48 | 49 | We have released a set of MatPES-trained universal machine learning interatomic potentials (FPs) in the [M3GNet], 50 | [CHGNet], [TensorNet] architectures in the [MatGL] package. For example, you can load the TensorNet FP trained on 51 | MatPES PBE 2025.1 as follows: 52 | 53 | ```python 54 | import matgl 55 | 56 | potential = matgl.load_model("TensorNet-MatPES-PBE-v2025.1-PES") 57 | ``` 58 | 59 | These FPs can be used easily with the [MatCalc] package to rapidly compute properties. For example: 60 | 61 | ```python 62 | from matcalc.elasticity import ElasticityCalc 63 | from matgl.ext.ase import PESCalculator 64 | 65 | ase_calc = PESCalculator(potential) 66 | calculator = ElasticityCalc(ase_calc) 67 | calculator.calc(structure) 68 | ``` 69 | 70 | ### Tutorials 71 | 72 | We have provided [Jupyter notebooks](http://matpes.ai/tutorials) demonstrating how to load the MatPES dataset, train a model and 73 | perform fine-tuning. 74 | 75 | ### Citing 76 | 77 | If you use the MatPES dataset, please cite the following [work](https://doi.org/10.48550/arXiv.2503.04070): 78 | 79 | ```txt 80 | Kaplan, A. D.; Liu, R.; Qi, J.; Ko, T. W.; Deng, B.; Riebesell, J.; Ceder, G.; Persson, K. A.; Ong, S. P. A 81 | Foundational Potential Energy Surface Dataset for Materials. arXiv 2025. DOI: 10.48550/arXiv.2503.04070. 82 | ``` 83 | 84 | In addition, if you use any of the pre-trained FPs or architectures, please cite the 85 | [references provided](http://matgl.ai/references) on the architecture used as well as MatGL. 86 | 87 | [Materials Virtual Lab]: http://materialsvirtuallab.org 88 | [Materials Project]: https://materialsproject.org 89 | [M3GNet]: http://dx.doi.org/10.1038/s43588-022-00349-3 90 | [CHGNet]: http://doi.org/10.1038/s42256-023-00716-3 91 | [TensorNet]: https://arxiv.org/abs/2306.06482 92 | [DIRECT]: https//doi.org/10.1038/s41524-024-01227-4 93 | [maml]: https://materialsvirtuallab.github.io/maml/ 94 | [MatGL]: https://matgl.ai 95 | [MatPES]: https://matpes.ai 96 | [MatCalc]: https://matcalc.ai 97 | -------------------------------------------------------------------------------- /docs/matpes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | matpes package — matpes 0.0.3 documentation 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 77 | 78 |
82 | 83 |
84 | 328 |
329 |
330 |
331 | 336 | 337 | 338 | -------------------------------------------------------------------------------- /docs/modules.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | matpes — matpes 0.0.3 documentation 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 53 | 54 |
58 | 59 |
60 | 125 |
126 |
127 |
128 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /notebooks/Generating MatPES Compatible Training Data for Fine-tuning of Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Introduction\n", 9 | "\n", 10 | "This notebook shows how to use pymatgen to generate VASP inputs for MatPES-compatible calculations. This is very useful if you intend to generate additional training data to fine-tine MatPES models." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "1", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from __future__ import annotations\n", 21 | "\n", 22 | "from pymatgen.core import Lattice, Structure\n", 23 | "from pymatgen.io.vasp.sets import MatPESStaticSet" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "2", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "example_structure = Structure.from_spacegroup(\"Im-3m\", Lattice.cubic(3), [\"Li\", \"Li\"], [[0, 0, 0], [0.5, 0.5, 0.5]])" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "3", 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "ALGO = Normal\n", 47 | "EDIFF = 1e-05\n", 48 | "ENAUG = 1360\n", 49 | "ENCUT = 680.0\n", 50 | "GGA = Pe\n", 51 | "ISMEAR = 0\n", 52 | "ISPIN = 2\n", 53 | "KSPACING = 0.22\n", 54 | "LAECHG = True\n", 55 | "LASPH = True\n", 56 | "LCHARG = True\n", 57 | "LMAXMIX = 6\n", 58 | "LMIXTAU = True\n", 59 | "LORBIT = 11\n", 60 | "LREAL = False\n", 61 | "LWAVE = False\n", 62 | "MAGMOM = 4*0.6\n", 63 | "NELM = 200\n", 64 | "NSW = 0\n", 65 | "PREC = Accurate\n", 66 | "SIGMA = 0.05\n", 67 | "\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "vis = MatPESStaticSet(example_structure, xc_functional=\"PBE\")\n", 73 | "print(vis.incar)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "4", 79 | "metadata": {}, 80 | "source": [ 81 | "Note the strict ENCUT and EDIFF used." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "5", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# To write the input files to a directory, use the following line.\n", 92 | "# vis.write_input(\"Li\")" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "6", 98 | "metadata": {}, 99 | "source": [ 100 | "Similarly, the r2SCAN data can be generated using the following code:" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "7", 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "ALGO = All\n", 114 | "EDIFF = 1e-05\n", 115 | "ENAUG = 1360\n", 116 | "ENCUT = 680.0\n", 117 | "ISMEAR = 0\n", 118 | "ISPIN = 2\n", 119 | "KSPACING = 0.22\n", 120 | "LAECHG = True\n", 121 | "LASPH = True\n", 122 | "LCHARG = True\n", 123 | "LMAXMIX = 6\n", 124 | "LMIXTAU = True\n", 125 | "LORBIT = 11\n", 126 | "LREAL = False\n", 127 | "LWAVE = False\n", 128 | "MAGMOM = 4*0.6\n", 129 | "METAGGA = R2scan\n", 130 | "NELM = 200\n", 131 | "NSW = 0\n", 132 | "PREC = Accurate\n", 133 | "SIGMA = 0.05\n", 134 | "\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "vis = MatPESStaticSet(example_structure, xc_functional=\"r2SCAN\")\n", 140 | "print(vis.incar)" 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3 (ipykernel)", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.11.10" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 5 165 | } 166 | -------------------------------------------------------------------------------- /notebooks/Using Pre-Trained Models with Matcalc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "0", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stderr", 11 | "output_type": "stream", 12 | "text": [ 13 | "/Users/shyue/repos/matgl/src/matgl/apps/pes.py:69: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", 14 | " self.element_refs = AtomRef(property_offset=torch.tensor(element_refs, dtype=matgl.float_th))\n", 15 | "/Users/shyue/repos/matgl/src/matgl/apps/pes.py:75: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", 16 | " self.register_buffer(\"data_mean\", torch.tensor(data_mean, dtype=matgl.float_th))\n", 17 | "/Users/shyue/repos/matgl/src/matgl/apps/pes.py:76: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", 18 | " self.register_buffer(\"data_std\", torch.tensor(data_std, dtype=matgl.float_th))\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "from __future__ import annotations\n", 24 | "\n", 25 | "import matgl\n", 26 | "from matcalc.elasticity import ElasticityCalc\n", 27 | "from matgl.ext.ase import PESCalculator\n", 28 | "from pymatgen.ext.matproj import MPRester\n", 29 | "\n", 30 | "potential = matgl.load_model(\"TensorNet-MatPES-PBE-v2025.1-PES\")\n", 31 | "ase_calc = PESCalculator(potential)\n", 32 | "calculator = ElasticityCalc(ase_calc)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "1", 38 | "metadata": {}, 39 | "source": [ 40 | "Let us obtain the structure of Si from the Materials Project API" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "2", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "mpr = MPRester()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "3", 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "Full Formula (Si2)\n", 64 | "Reduced Formula: Si\n", 65 | "abc : 3.849278 3.849279 3.849278\n", 66 | "angles: 60.000012 60.000003 60.000011\n", 67 | "pbc : True True True\n", 68 | "Sites (2)\n", 69 | " # SP a b c magmom\n", 70 | "--- ---- ----- ----- ----- --------\n", 71 | " 0 Si 0.875 0.875 0.875 -0\n", 72 | " 1 Si 0.125 0.125 0.125 -0\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "si = mpr.get_structure_by_material_id(\"mp-149\")\n", 78 | "print(si)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "4", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "pred = calculator.calc(si)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "id": "5", 94 | "metadata": {}, 95 | "source": [ 96 | "For comparison, let's obtain the DFT computed values from Materials Project" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "6", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "mp_data = mpr.get_summary_by_material_id(\"mp-149\")" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "7", 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "K_VRH: TensorNet-MatPES-PBE = 101.15424648468968; DFT = 88.916\n", 120 | "G_VRH: TensorNet-MatPES-PBE = 62.546024424713266; DFT = 62.445\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "print(f\"K_VRH: TensorNet-MatPES-PBE = {pred['bulk_modulus_vrh']}; DFT = {mp_data['bulk_modulus']['vrh']}\")\n", 126 | "print(f\"G_VRH: TensorNet-MatPES-PBE = {pred['shear_modulus_vrh']}; DFT = {mp_data['shear_modulus']['vrh']}\")" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3 (ipykernel)", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.11.10" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 5 151 | } 152 | -------------------------------------------------------------------------------- /pages/about.py: -------------------------------------------------------------------------------- 1 | """Home page.""" 2 | 3 | from __future__ import annotations 4 | 5 | import dash 6 | import dash_bootstrap_components as dbc 7 | from dash import dcc, html 8 | 9 | dash.register_page(__name__, path="/about", order=6) 10 | 11 | MARKDOWN_CONTENT = """ 12 | ### Background 13 | 14 | Machine learning interatomic potentials (MLIPs) have revolutionized the field of computational materials science. 15 | MLIPs use ML to reproduce the PES (energies, forces, and stresses) of a collection of atoms, typically computed 16 | using an ab initio method such as density functional theory (DFT). 17 | This enables the simulation of materials at much larger length and longer time scales at near-ab initio accuracy. 18 | 19 | One of the most exciting developments in the past few years is the emergence of MLIPs with near-complete coverage of 20 | the periodic table of elements. Such universal MLIPs are also known as foundation potentials (FPs). Examples include 21 | [M3GNet], [CHGNet], [MACE], to name a few. FPs have broad applications, including materials discovery and the 22 | prediction of PES-derived properties such as elastic constants, phonon dispersion, etc. 23 | 24 | However, most current FPs were trained on DFT relaxation calculations, e.g., from the [Materials Project]. 25 | This dataset, referred to as `MPF` or `MPTraj` in the literature, suffer from several issues: 26 | 27 | 1. The energies, forces, and stresses are not converged to the accuracies necessary to train a high quality MLIP. 28 | 2. Most of the structures are near-equilibrium, with very little coverage of non-equilibrium local environments. 29 | 3. The calculations were performed using the common Perdew-Burke-Ernzerhof (PBE) generalized gradient approximation 30 | (GGA) functional, even though improved functionals with better performance across diverse chemistries and bonding 31 | such as the strongly constrained and appropriately normed (SCAN) meta-GGA functional already exists. 32 | 33 | ### Goals 34 | 35 | MatPES is an initiative by the [Materials Virtual Lab] and the [Materials Project] to address these limitations 36 | comprehensively. The aims of MatPES are three-fold: 37 | 38 | 1. **Accuracy.** The data in MatPES was computed using static DFT calculations with stringent converegence criteria. 39 | Please refer to the `MatPESStaticSet` in [pymatgen] for details. 40 | 2. **Comprehensiveness.** The structures in MatPES are using a 2-stage version of DImensionality-Reduced 41 | Encoded Clusters with sTratified ([DIRECT]) sampling from a greatly expanded configuration of structures from MD 42 | simulations with the pre-trained [M3GNet] FP. 43 | 3. **Quality.** MatPES contains not only data computed using the PBE functional, but also the revised regularized SCAN 44 | (r2SCAN) meta-GGA functional. The r2SCAN functional recovers all 17 exact constraints presently known for 45 | meta-GGA functionals and has shown good transferable accuracy across diverse bonding and chemistries. 46 | 47 | The workflow used to generate the MatPES dataset is shown below. The initial v2025.1 release comprises ~400,000 48 | structures from 300K MD simulations and Materials Project ground state calculations. This dataset is much smaller 49 | than other PES datasets in the literature and yet achieves essentially comparable or, in some cases, improved 50 | performance and reliability. The [MatPES.ai] website provides a comprehensive analysis of the 51 | [statistics](http://matpes.ai/explorer) and [performance benchmarks](http://matpes.ai/benchmarks) of the MatPES 52 | dataset. 53 | 54 | ![matpes_workflow](assets/MatPES_workflow.png) 55 | 56 | ### Citing MatPES 57 | 58 | If you use MatPES, please cite the following work: 59 | 60 | ```txt 61 | Aaron Kaplan, Runze Liu, Ji Qi, Tsz Wai Ko, Bowen Deng, Gerbrand Ceder, Kristin A. Persson, Shyue Ping Ong. 62 | A foundational potential energy surface dataset for materials. Submitted. 63 | ``` 64 | 65 | [Materials Virtual Lab]: http://materialsvirtuallab.org 66 | [pymatgen]: https://pymatgen.org 67 | [Materials Project]: https://materialsproject.org 68 | [MatGL]: https://matgl.ai 69 | [M3GNet]: http://dx.doi.org/10.1038/s43588-022-00349-3 70 | [CHGNet]: http://doi.org/10.1038/s42256-023-00716-3 71 | [MACE]: https://proceedings.neurips.cc/paper_files/paper/2022/hash/4a36c3c51af11ed9f34615b81edb5bbc-Abstract-Conference.html 72 | [DIRECT]: https//doi.org/10.1038/s41524-024-01227-4 73 | [MatPES.ai]: https://matpes.ai 74 | 75 | """ 76 | 77 | layout = dbc.Container([html.Div([dcc.Markdown(MARKDOWN_CONTENT)])]) 78 | -------------------------------------------------------------------------------- /pages/benchmarks.py: -------------------------------------------------------------------------------- 1 | """Benchmarks page.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | 7 | import dash 8 | import dash_bootstrap_components as dbc 9 | import numpy as np 10 | import pandas as pd 11 | import plotly.express as px 12 | from dash import Input, Output, callback, dash_table, dcc, html 13 | from dash.dash_table.Format import Format, Scheme 14 | 15 | dash.register_page(__name__, path="/benchmarks", order=3) 16 | 17 | DATADIR = Path(__file__).absolute().parent 18 | 19 | BENCHMARK_DATA = {func: pd.read_csv(DATADIR / f"matcalc-benchmark-{func.lower()}.csv") for func in ("PBE", "r2SCAN")} 20 | 21 | 22 | INTRO_CONTENT = """ 23 | ## MatCalc-Benchmark 24 | 25 | The MatCalc-Benchmark evaluates FP performance across equilibrium, near-equilibrium, and molecular dynamics 26 | properties. 27 | 28 | Important notes: 29 | - Beyond property metrics, training data size and model complexity are crucial. 30 | - Large datasets demand significant computational resources. TensorNet training on MatPES-PBE takes ~15 min/epoch on an 31 | RTX A6000 GPU, whereas OMat24 requires ~20 hours/epoch on 16 A100 GPUs. 32 | - Complex models with high parameter counts are computationally expensive, restricting MD simulation scale and time. 33 | For instance, eqV2-OMat24 has a t_step of ~213 ms/atom/step, nearly 100 times costlier than the models reported here. 34 | - Performance differences should be viewed in context of statistical significance. Given that the same datasets are 35 | used for all models, statistical significance is determined using a 36 | [paired t-test](https://en.wikipedia.org/wiki/Paired_difference_test) at alpha=0.05. 37 | 38 | We welcome the community's contribution of FPs to this MatCalc-Benchmark. To ensure a fair 39 | comparison, please provide **information about training dataset size, training cost, and the number of parameters**. 40 | The easiest way to run the benchmark is to implement an ASE compatible calculator, which can then be used with the 41 | [MatCalc](https://github.com/materialsvirtuallab/matcalc) package. We will release the equilibrium and 42 | near-equilibrium benchmark datasets soon in the MatCalc repository together with benchmarking tools. The MD 43 | benchmarks can only be run by the Materials Virtual Lab. 44 | """ 45 | 46 | TABLE_NOTE = """ 47 | For MAEs, all values not statistically different from the best value in each column are highlighted. Statistical 48 | significance is determined using a [paired t-test](https://en.wikipedia.org/wiki/Paired_difference_test) with 0.05 49 | alpha level. It should be noted that the Ef test set was derived from the WBM database, which was computed using a 50 | different set of pseudopotential settings. This is likely the reason why the reported Ef MAEs are higher than 51 | expected. We are in the process of updating the Ef benchmark data with consistent settings for the MatPES models. We 52 | expect the performance to be comparable to the OMat24 models. 53 | """ 54 | 55 | LEGEND = r""" 56 | ##### Metrics 57 | 58 | MatCalc-Benchmark metrics can be divided into three categories: equilibrium, near-equilibrium, and molecular dynamics 59 | properties. 60 | 61 | | Task | Symbol | Units | Functional | Test Source | Number | 62 | |-------------------------------|------------|--------------|--------------|--------------------------|--------| 63 | | **Equilibrium** | | | | | | 64 | | Structural similarity | d | - | PBE | [WBM] | 1,000 | 65 | | | | - | r2SCAN | [GNoME] | 1,000 | 66 | | Formation energy per atom | Ef | eV/atom | PBE | [WBM] | 1,000 | 67 | | | | eV/atom | r2SCAN | [GNoME] | 1,000 | 68 | | **Near-equilibrium** | | | | | | 69 | | Bulk modulus | K_VRH | GPa | PBE | [MP] | 3,959 | 70 | | Shear modulus | G_VRH | GPa | PBE | [MP] | 3,959 | 71 | | Constant volume heat capacity | C_V | J/mol/K | PBE | [Alexandria] | 1,170 | 72 | | Off-equilibrium force | F/F_DFT | -- | PBE | [WBM high energy states] | 979 | 73 | | **Molecular dynamics** | | | | | | 74 | | Median termination temp | T_1/2^term | K | PBE & r2SCAN | [MVL] | 172 | 75 | | Ionic conductivity | sigma | mS/cm | PBE | [MVL] | 698 | 76 | | Time per atom per step | t_step | ms/step/atom | PBE & r2SCAN | [MVL] | 1 | 77 | 78 | The time per atom per step (t_step) was computed using LAMMPS MD simulations conducted on a single Intel Xeon Gold core 79 | for a system of 64 Si atoms under ambient conditions (300 K and 1 bar) over 50 ps with a 1 fs time step. 80 | 81 | ##### Datasets 82 | 83 | The current MatCalc-Benchmark includes M3GNet, CHGNet and TensorNet FPs trained on the MatPES, MPF, 84 | MPtrj, and OMat24 datasets, summarized below. 85 | 86 | | Dataset | Number of Structures | 87 | |--------------|---------------------| 88 | | MPF | 185,877 | 89 | | MPtrj | 1,580,395 | 90 | | OMat24 | 100,824,585 | 91 | | MatPES PBE | 434,712 | 92 | | MatPES r²SCAN | 387,897 | 93 | 94 | [WBM]: https://doi.org/10.1038/s41524-020-00481-6 95 | [GNoME]: https://doi.org/10.1038/s41586-023-06735-9 96 | [Alexandria]: https://doi.org/10.48550/arXiv.2412.16551 97 | [WBM high energy states]: https://doi.org/10.48550/arXiv.2405.07105 98 | [MP]: http://materialsproject.org 99 | [MVL]: http://materialsvirtuallab.org 100 | """ 101 | 102 | 103 | def get_sorted(df, i): 104 | """ 105 | Determine the best value from a specified column in a DataFrame. 106 | 107 | This function selects either the maximum or minimum value of a specified column 108 | based on the input. For specific column names, the maximum value is chosen, 109 | while for all other columns, the minimum value is selected. 110 | 111 | Args: 112 | df (pandas.DataFrame): The DataFrame containing the column to evaluate. 113 | i (str): The name of the column to determine the best value from. 114 | 115 | Returns: 116 | Sorted list of values from the specified column. 117 | """ 118 | if i in ("f_FP/f_DFT", "T_1/2^term (K)"): 119 | return sorted(df[i].dropna(), reverse=True) 120 | return sorted(df[i].dropna()) 121 | 122 | 123 | @callback( 124 | [Output("pbe-graph", "figure"), Output("r2scan-graph", "figure")], 125 | [ 126 | Input("pbe-benchmarks-table", "selected_columns"), 127 | Input("r2scan-benchmarks-table", "selected_columns"), 128 | Input("pbe-benchmarks-table", "selected_rows"), 129 | Input("r2scan-benchmarks-table", "selected_rows"), 130 | ], 131 | ) 132 | def update_graphs(selected_columns_pbe, selected_columns_r2scan, selected_rows_pbe, selected_rows_r2scan): 133 | """ 134 | 135 | @callback( 136 | [Output("pbe-graph", "figure"), Output("r2scan-graph", "figure")], 137 | [ 138 | Input("pbe-benchmarks-table", "selected_columns"), 139 | Input("r2scan-benchmarks-table", "selected_columns"), 140 | Input("pbe-benchmarks-table", "selected_rows"), 141 | Input("r2scan-benchmarks-table", "selected_rows"), 142 | ], 143 | ). 144 | 145 | Update the graphs based on the selected columns and rows for PBE and R2SCAN benchmarks. 146 | 147 | Parameters: 148 | - selected_columns_pbe: List of selected columns for PBE benchmarks. 149 | - selected_columns_r2scan: List of selected columns for R2SCAN benchmarks. 150 | - selected_rows_pbe: List of selected rows for PBE benchmarks. 151 | - selected_rows_r2scan: List of selected rows for R2SCAN benchmarks. 152 | 153 | Returns: 154 | A list of figures updated based on the selected columns and rows for PBE and R2SCAN benchmarks. 155 | 156 | """ 157 | layout = dict(font=dict(size=18)) 158 | figs = [] 159 | for cols, rows, (_func, df) in zip( 160 | [selected_columns_pbe, selected_columns_r2scan], 161 | [selected_rows_pbe, selected_rows_r2scan], 162 | BENCHMARK_DATA.items(), 163 | strict=False, 164 | ): 165 | to_plot = df.iloc[rows] 166 | col = cols[0] 167 | fig = px.bar(to_plot, x="Dataset", y=col, color="Architecture", barmode="group") 168 | fig.update_layout(**layout) 169 | figs.append(fig) 170 | return figs 171 | 172 | 173 | def gen_data_table(df, name): 174 | """ 175 | Generates a Dash DataTable with specific configurations for displaying benchmarking 176 | data from a Pandas DataFrame. The table filters out certain columns, formats numeric 177 | data, and applies conditional styling to rows and columns based on specified criteria. 178 | 179 | Parameters: 180 | df (pd.DataFrame): The Pandas DataFrame containing data to display in the table. 181 | Columns in the DataFrame will be filtered and styled based on the function's logic. 182 | 183 | Returns: 184 | dash.dash_table.DataTable: A Dash DataTable object configured with the data, styling, 185 | and sorting properties derived from the input DataFrame. 186 | """ 187 | cols = [c for c in df.columns if c if not ("diff" in c or "STDAE" in c)] 188 | return dash_table.DataTable( 189 | id=f"{name}-benchmarks-table", 190 | columns=[ 191 | { 192 | "name": i, 193 | "id": i, 194 | "type": "numeric", 195 | "selectable": i not in ["Dataset", "Architecture"], 196 | "format": Format(precision=2, scheme=Scheme.decimal, nully="-"), 197 | } 198 | for i in cols 199 | ], 200 | data=df.to_dict("records"), 201 | column_selectable="single", 202 | row_selectable="multi", 203 | selected_columns=["d MAE"], 204 | selected_rows=list(range(len(df))), 205 | style_data_conditional=[ 206 | { 207 | "if": {"row_index": "odd"}, 208 | "backgroundColor": "rgb(220, 220, 220)", 209 | } 210 | ] 211 | + [ 212 | { 213 | "if": {"column_id": i, "row_index": np.where(~df[f"{i.split(' ')[0]} sig_diff_rel"])[0]}, 214 | "font-weight": "bold", 215 | "color": "white", 216 | "background-color": "#633D9Caa", 217 | } 218 | for i in cols 219 | if i.endswith("MAE") 220 | ] 221 | + [ 222 | { 223 | "if": { 224 | "filter_query": "{{T_1/2^term}} = {}".format(df["T_1/2^term"].max()), 225 | "column_id": "T_1/2^term", 226 | }, 227 | "font-weight": "bold", 228 | "color": "white", 229 | "background-color": "#633D9Caa", 230 | }, 231 | { 232 | "if": { 233 | "filter_query": "{{f/f_DFT}} = {}".format(df["f/f_DFT"].max() if "f/f_DFT" in df else 0), 234 | "column_id": "f/f_DFT", 235 | }, 236 | "font-weight": "bold", 237 | "color": "white", 238 | "background-color": "#633D9Caa", 239 | }, 240 | { 241 | "if": { 242 | "filter_query": "{{t_step}} = {}".format(df["t_step"].min()), 243 | "column_id": "t_step", 244 | }, 245 | "font-weight": "bold", 246 | "color": "white", 247 | "background-color": "#633D9Caa", 248 | }, 249 | ], 250 | style_header={"backgroundColor": "#633D9C", "color": "white", "fontWeight": "bold"}, 251 | sort_action="native", 252 | ) 253 | 254 | 255 | pbe_tab = dbc.Card( 256 | dbc.CardBody( 257 | [ 258 | dbc.Col(dcc.Graph(id="pbe-graph"), width=12), 259 | dbc.Col( 260 | html.Div( 261 | "Clicking on the radio buttons graphs the selected column.", 262 | ), 263 | width=12, 264 | ), 265 | dbc.Col( 266 | gen_data_table(BENCHMARK_DATA["PBE"], "pbe"), 267 | width=12, 268 | ), 269 | dbc.Col( 270 | html.Div(dcc.Markdown(TABLE_NOTE)), 271 | width=12, 272 | ), 273 | ] 274 | ), 275 | className="mt-3", 276 | ) 277 | 278 | r2scan_tab = dbc.Card( 279 | dbc.CardBody( 280 | [ 281 | dbc.Col( 282 | html.Div( 283 | "There are only a limited number of MatPES r2SCAN benchmarks for different models due to" 284 | " the limited amount of other r2SCAN training data sets and ground-truth r2SCAN DFT data.", 285 | ), 286 | width=12, 287 | ), 288 | dbc.Col(dcc.Graph(id="r2scan-graph"), width=12), 289 | dbc.Col( 290 | html.Div( 291 | "Clicking on the radio buttons graphs the selected column.", 292 | ), 293 | width=12, 294 | ), 295 | dbc.Col( 296 | gen_data_table(BENCHMARK_DATA["r2SCAN"], "r2scan"), 297 | width=12, 298 | ), 299 | dbc.Col( 300 | html.Div(dcc.Markdown(TABLE_NOTE)), 301 | width=12, 302 | ), 303 | ] 304 | ), 305 | className="mt-3", 306 | ) 307 | 308 | 309 | layout = dbc.Container( 310 | [ 311 | dbc.Col( 312 | html.Div([dcc.Markdown(INTRO_CONTENT)]), 313 | width=12, 314 | ), 315 | dbc.Tabs( 316 | [ 317 | dbc.Tab(pbe_tab, label="PBE"), 318 | dbc.Tab(r2scan_tab, label="r2SCAN"), 319 | ] 320 | ), 321 | dbc.Col(html.H4("Additional Information"), width=12, style={"padding-top": "20px"}), 322 | dbc.Col( 323 | html.Div([dcc.Markdown(LEGEND)], id="matcalc-benchmark-legend"), 324 | width=12, 325 | style={"padding-top": "10px"}, 326 | ), 327 | ] 328 | ) 329 | -------------------------------------------------------------------------------- /pages/dataset.py: -------------------------------------------------------------------------------- 1 | """Benchmarks page.""" 2 | 3 | from __future__ import annotations 4 | 5 | import dash 6 | import dash_bootstrap_components as dbc 7 | from dash import dcc, html 8 | 9 | from matpes import MATPES_SRC 10 | 11 | dash.register_page(__name__, path="/dataset", order=4) 12 | 13 | INTRO_CONTENT = f""" 14 | #### Introduction 15 | 16 | Each MatPES dataset is provided as a gzipped file in the Javascript object notation (JSON) format. For example, the 17 | `MatPES-PBE-2025.1.json.gz` file contains a list of structures with PES (energy, force, stresses) and associated 18 | metadata. The [PBE]({MATPES_SRC}/MatPES-PBE-atoms.json.gz) and [r2SCAN]({MATPES_SRC}/MatPES-R2SCAN-atoms.json.gz) 19 | atomic energies computed with the same settings are also available. """ 20 | 21 | EXAMPLE_CONTENT = ( 22 | """ 23 | #### Example document 24 | 25 | The following is a commented version of a single entry in the `MatPES-PBE-2025.1.json.gz` file. 26 | 27 | ```json 28 | { 29 | "builder_meta": { ... }, // Metadata used by MatPES developers only. 30 | 31 | "nsites": 2, // Number of sites in the structure. 32 | "elements": ["Ti", "Y"], // Elements present in the structure. 33 | "nelements": 2, // Number of unique elements in the structure. 34 | 35 | "composition": { "Y": 1.0, "Ti": 1.0 }, // Elemental composition as a dictionary. 36 | "composition_reduced": { "Y": 1.0, "Ti": 1.0 }, // Reduced/normalized composition. 37 | 38 | "formula_pretty": "YTi", // Readable chemical formula. 39 | "formula_anonymous": "AB", // Anonymous formula representation. 40 | "chemsys": "Ti-Y", // Chemical system association. 41 | 42 | "volume": 49.25681734779065, // Structure volume in ų. 43 | "density": 4.6108675489852535, // Density in g/cm³. 44 | "density_atomic": 24.628408673895326, // Atomic density. 45 | 46 | "symmetry": { // Crystallographic symmetry information. 47 | "crystal_system": "Monoclinic", 48 | "symbol": "Pm", 49 | "number": 6, 50 | "point_group": "m", 51 | "symprec": 0.1, 52 | "angle_tolerance": 5.0, 53 | "version": "2.5.0" 54 | }, 55 | 56 | "structure": { ... }, // Pymatgen serialized structure. 57 | 58 | "energy": -13.19442081, // DFT energy in eV. 59 | 60 | "forces": [ // DFT-calculated forces on each atom (eV/Å). 61 | [0.43578007, -0.32456562, -0.38019986], 62 | [-0.43578007, 0.32456562, 0.38019986] 63 | ], 64 | 65 | "stress": [ // DFT-calculated stress tensor components (kilobar). 66 | -5.71186022, -9.34590239, 13.64346365, 67 | 22.84178803, 23.6719352, 6.22290851 68 | ], 69 | 70 | "abs_forces": [ // Magnitude of DFT forces per atom (eV/Å). 71 | 0.6631734649691654, 72 | 0.6631734649691654 73 | ], 74 | 75 | "matpes_id": "matpes-20240214_999484_73", // Unique MatPES ID for this structure. 76 | 77 | "bandgap": 0.0, // DFT-calculated electronic band gap (eV). 78 | "functional": "PBE", // DFT exchange-correlation functional. 79 | 80 | "formation_energy_per_atom": 0.5199284258333332, // Formation energy per atom (eV). 81 | "cohesive_energy_per_atom": -4.266150985, // Cohesive energy per atom (eV). 82 | 83 | "provenance": { // Metadata describing dataset origin and simulation conditions. 84 | "original_mp_id": "mp-999484", // Source ID from the Materials Project. 85 | "materials_project_version": "v2022.10.28", 86 | "md_ensemble": "NpT", // Molecular dynamics ensemble type. 87 | "md_temperature": 300.0, // MD simulation temperature (K). 88 | "md_pressure": 1.0, // MD simulation pressure (atm). 89 | "md_step": 93, // MD simulation step number. 90 | "mlip_name": "M3GNet-MP-2021.2.8-DIRECT" // Machine learning potential used. 91 | } 92 | } 93 | ``` 94 | """, 95 | f""" 96 | #### Train-validation-test split 97 | 98 | If you wish to reproduce the exact train:validation:test split used in the MatPES paper, you can download the split 99 | indices for the [PBE]({MATPES_SRC}/MatPES-PBE-split.json.gz) and [r2SCAN]({MATPES_SRC}/MatPES-R2SCAN-split.json.gz). 100 | You can then use the following code to split the dataset into train, validation, and test sets: 101 | """, 102 | """ 103 | ```python 104 | from monty.serialization import loadfn 105 | import json 106 | 107 | pbe = loadfn("MatPES-PBE-2025.1.json.gz") 108 | splits = loadfn("MatPES-PBE-split.json.gz") 109 | 110 | train_set = [] 111 | valid_set = [] 112 | test_set = [] 113 | 114 | for i, d in enumerate(pbe): 115 | if i in splits["train"]: 116 | train_set.append(d) 117 | elif i in splits["valid"]: 118 | valid_set.append(d) 119 | else: 120 | test_set.append(d) 121 | 122 | print(f"{len(train_set)}-{len(valid_set)}-{len(test_set)}") 123 | # Output is 391240-21735-21737 124 | ``` 125 | 126 | """, 127 | ) 128 | 129 | layout = dbc.Container([html.Div([dcc.Markdown(INTRO_CONTENT), dcc.Markdown(EXAMPLE_CONTENT)])]) 130 | -------------------------------------------------------------------------------- /pages/explorer.py: -------------------------------------------------------------------------------- 1 | """Main MatPES Explorer App.""" 2 | 3 | from __future__ import annotations 4 | 5 | import collections 6 | import itertools 7 | import json 8 | from pathlib import Path 9 | from typing import TYPE_CHECKING 10 | 11 | import dash 12 | import dash_bootstrap_components as dbc 13 | import numpy as np 14 | import plotly.express as px 15 | import plotly.io as pio 16 | from dash import Input, Output, State, callback, dcc, html 17 | from dash.dash_table import DataTable 18 | from dash.dash_table.Format import Format, Scheme 19 | from pymatgen.core import Element 20 | 21 | from matpes.db import MatPESDB 22 | 23 | from .utils import pt_heatmap 24 | 25 | if TYPE_CHECKING: 26 | import pandas as pd 27 | 28 | dash.register_page(__name__, order=2) 29 | DATADIR = Path(__file__).absolute().parent 30 | # Define constants 31 | FUNCTIONALS = ("PBE", "r2SCAN") 32 | 33 | STATS = {} 34 | for func in FUNCTIONALS: 35 | with open(DATADIR / f"{func.lower()}_stats.json") as f: 36 | STATS[func] = json.load(f) 37 | 38 | 39 | DEFAULT_FIG_LAYOUT = dict(font=dict(size=18), yaxis=dict(nticks=8)) 40 | 41 | 42 | def get_data( 43 | functional: str, 44 | chemsys: str, 45 | ) -> pd.DataFrame: 46 | """ 47 | Filter data. 48 | 49 | Args: 50 | functional (str): The functional used to filter the dataset (e.g., "PBE", "r2SCAN"). 51 | chemsys (list of str): A list of chemical systems to filter by (e.g., ["Fe-O", "Ni-Mn"]). 52 | 53 | Returns: 54 | pd.DataFrame: Filtered data. 55 | """ 56 | matpes = MatPESDB() 57 | return matpes.get_df( 58 | functional, 59 | criteria={"chemsys": chemsys}, 60 | projection=[ 61 | "formula_pretty", 62 | "elements", 63 | "cohesive_energy_per_atom", 64 | "abs_forces", 65 | "nsites", 66 | "nelements", 67 | ], 68 | ) 69 | 70 | 71 | def validate_chemsys(chemsys): 72 | """ 73 | Validates and normalizes a chemical system string. 74 | 75 | This function checks whether the given chemical system string is valid and 76 | converts it into a normalized format where the chemical elements are sorted 77 | alphabetically. If the string is invalid, it returns None. 78 | 79 | Args: 80 | chemsys (str): A string representing a chemical system, where the elements 81 | are separated by a hyphen (e.g., "H-O-C"). 82 | 83 | Returns: 84 | str or None: A normalized and sorted version of the chemical system string 85 | if valid. Returns None if the input is invalid. 86 | """ 87 | try: 88 | toks = chemsys.split("-") 89 | for sym in toks: 90 | Element(sym) 91 | return "-".join(sorted(toks)) 92 | except: # noqa: E722 93 | pass 94 | return None 95 | 96 | 97 | @callback(Output("chemsys_filter", "value"), Input("ptheatmap", "clickData"), State("chemsys_filter", "value")) 98 | def update_chemsys_filter_on_click(clickdata, chemsys_filter): 99 | """ 100 | Update chemsys_filter when PT table is clicked. 101 | 102 | Args: 103 | clickdata (dict): Click data. 104 | chemsys_filter (dict): Element filter. 105 | """ 106 | new_chemsys_filter = chemsys_filter or "" 107 | chemsys = new_chemsys_filter.split("-") 108 | if clickdata: 109 | try: 110 | z = clickdata["points"][0]["text"].split("<")[0] 111 | chemsys.append(Element.from_Z(int(z)).symbol) 112 | except (ValueError, AttributeError): 113 | pass 114 | return "-".join(sorted(set(chemsys))).strip("-") 115 | 116 | 117 | @callback( 118 | [ 119 | Output("heatmap-title", "children"), 120 | Output("ptheatmap", "figure"), 121 | Output("coh_energy_hist", "figure"), 122 | Output("abs_forces_hist", "figure"), 123 | Output("nsites_hist", "figure"), 124 | Output("nelements_hist", "figure"), 125 | Output("data-div", "children"), 126 | ], 127 | [ 128 | Input("functional", "value"), 129 | Input("chemsys_filter", "value"), 130 | ], 131 | ) 132 | def display_data( 133 | functional, 134 | chemsys_filter, 135 | ): 136 | """ 137 | Update graphs and data tables based on user-provided filters and criteria. 138 | 139 | This function processes the input filters and generates various visualizations and data structures, 140 | including a heatmap, histograms, and a formatted data table. The data is derived from a dataset 141 | filtered by the specified parameters. 142 | 143 | Args: 144 | functional (str): The functional used to filter the dataset (e.g., "PBE", "r2SCAN"). 145 | chemsys_filter (list of str): A list of chemical systems to filter by (e.g., ["Fe-O", "Ni-Mn"]). 146 | 147 | Returns: 148 | tuple: 149 | - heatmap_figure (plotly.graph_objects.Figure): A heatmap of element counts, displayed in log scale. 150 | - histograms of formation energies, cohesive energies, nsites, nlements. 151 | - data table. 152 | """ 153 | chemsys = validate_chemsys(chemsys_filter) 154 | df = None 155 | if chemsys: 156 | df = get_data( 157 | functional, 158 | chemsys, 159 | ) 160 | data = {"nstructures": len(df)} 161 | data["element_counts"] = dict(collections.Counter(itertools.chain.from_iterable(df["elements"]))) 162 | for c in ["cohesive_energy_per_atom", "nsites"]: 163 | counts, bins = np.histogram(df[c], bins=50) 164 | data[c] = {"counts": counts.tolist(), "bins": bins.tolist()} 165 | counts, bins = np.histogram(list(itertools.chain(*df["abs_forces"])), bins=50) 166 | data["abs_forces"] = {"counts": counts.tolist(), "bins": bins.tolist()} 167 | counts, bins = np.histogram(df["nelements"], bins=np.arange(0.5, 9.5, 1)) 168 | 169 | data["nelements"] = {"counts": counts.tolist(), "bins": bins.tolist()} 170 | else: 171 | data = STATS[functional] 172 | nstructures = data["nstructures"] 173 | el_counts = collections.defaultdict(int) 174 | el_counts.update(data["element_counts"]) 175 | 176 | def get_bin_mid(bins): 177 | bins = np.array(bins) 178 | return (bins[:-1] + bins[1:]) / 2 179 | 180 | current_template = pio.templates[pio.templates.default] 181 | colorway = current_template.layout.colorway 182 | ecoh_fig = px.bar( 183 | x=get_bin_mid(data["cohesive_energy_per_atom"]["bins"]), 184 | y=data["cohesive_energy_per_atom"]["counts"], 185 | labels={"x": "Cohesive Energy per Atom (eV/atom)", "y": "Count"}, 186 | color_discrete_sequence=colorway, 187 | ) 188 | ecoh_fig.update_layout(**DEFAULT_FIG_LAYOUT) 189 | forces_fig = px.bar( 190 | x=get_bin_mid(data["abs_forces"]["bins"]), 191 | y=data["abs_forces"]["counts"], 192 | labels={"x": "Absolute Forces (eV/A)", "y": "Count"}, 193 | color_discrete_sequence=colorway[1:], 194 | ) 195 | forces_fig.update_yaxes(title_text="Count", type="log") 196 | forces_fig.update_layout(showlegend=False, **DEFAULT_FIG_LAYOUT) 197 | 198 | nsites_fig = px.bar( 199 | x=get_bin_mid(data["nsites"]["bins"]), 200 | y=data["nsites"]["counts"], 201 | labels={"x": "nsites", "y": "Count"}, 202 | color_discrete_sequence=colorway[2:], 203 | ) 204 | 205 | nsites_fig.update_yaxes(title_text="Count", type="log") 206 | nsites_fig.update_layout(showlegend=False, **DEFAULT_FIG_LAYOUT) 207 | 208 | nelements_fig = px.bar( 209 | x=get_bin_mid(data["nelements"]["bins"]), 210 | y=data["nelements"]["counts"], 211 | labels={"x": "nelements", "y": "Count"}, 212 | color_discrete_sequence=current_template.layout.colorway[3:], 213 | ) 214 | nelements_fig.update_layout(showlegend=False, **DEFAULT_FIG_LAYOUT) 215 | 216 | output = [ 217 | f"Elemental Heatmap ({nstructures:,} structures)", 218 | pt_heatmap(el_counts, label="Count", log=True, colorscale="YlOrRd_r"), 219 | ecoh_fig, 220 | forces_fig, 221 | nsites_fig, 222 | nelements_fig, 223 | ] 224 | 225 | if chemsys: 226 | table_df = df.drop("elements", axis=1) 227 | table_df = table_df.drop("abs_forces", axis=1) 228 | output.append( 229 | DataTable( 230 | page_size=25, 231 | id="data-table", 232 | columns=[ 233 | ( 234 | { 235 | "name": i, 236 | "id": i, 237 | "type": "numeric", 238 | "format": Format(precision=3, scheme=Scheme.fixed), 239 | } 240 | if i in ["energy", "cohesive_energy_per_atom"] 241 | else { 242 | "name": i, 243 | "id": i, 244 | } 245 | ) 246 | for i in table_df.columns 247 | ], 248 | data=table_df.to_dict("records"), 249 | ) 250 | ) 251 | else: 252 | output.append("") 253 | return output 254 | 255 | 256 | # Define app layout 257 | layout = dbc.Container( 258 | [ 259 | dbc.Row( 260 | [ 261 | dbc.Col( 262 | [ 263 | html.Label("Functional"), 264 | dcc.Dropdown( 265 | id="functional", 266 | options=[{"label": f, "value": f} for f in FUNCTIONALS], 267 | value="PBE", 268 | clearable=False, 269 | ), 270 | ], 271 | width=2, 272 | ), 273 | dbc.Col( 274 | [ 275 | html.Div("Chemical System"), 276 | dcc.Input( 277 | id="chemsys_filter", 278 | placeholder="e.g., Li-Fe-O", 279 | ), 280 | ], 281 | width=2, 282 | ), 283 | ], 284 | ), 285 | dbc.Col( 286 | [ 287 | html.Div( 288 | "By default, statistics of the entire dataset are shown. Filtering by chemical system will " 289 | "also display a table with basic information about the entries in that system. You can click on" 290 | " the cells in the periodic table to set the chemical system as well. Filtering is done on the fly." 291 | ), 292 | ], 293 | width=12, 294 | ), 295 | dbc.Row( 296 | [ 297 | html.H4("Elemental Heatmap", id="heatmap-title", className="section-title"), 298 | dbc.Col( 299 | html.Div( 300 | [dcc.Graph(id="ptheatmap")], 301 | style={"marginLeft": "auto", "marginRight": "auto", "text-align": "center"}, 302 | ), 303 | width=12, 304 | ), 305 | ] 306 | ), 307 | dbc.Row( 308 | [ 309 | html.H4("Property Distribution", className="section-title"), 310 | dbc.Col( 311 | dcc.Graph( 312 | id="coh_energy_hist", 313 | ), 314 | width=6, 315 | ), 316 | dbc.Col( 317 | dcc.Graph(id="abs_forces_hist"), 318 | width=6, 319 | ), 320 | dbc.Col( 321 | dcc.Graph(id="nsites_hist"), 322 | width=6, 323 | ), 324 | dbc.Col( 325 | dcc.Graph(id="nelements_hist"), 326 | width=6, 327 | ), 328 | ] 329 | ), 330 | html.Div(id="stats-div"), 331 | html.Div(id="data-div"), 332 | ] 333 | ) 334 | -------------------------------------------------------------------------------- /pages/home.py: -------------------------------------------------------------------------------- 1 | """Home page.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | 7 | import dash 8 | import dash_bootstrap_components as dbc 9 | from dash import dcc, html 10 | 11 | from matpes import MATPES_SRC 12 | 13 | dash.register_page(__name__, path="/", order=1) 14 | 15 | readme = Path(__file__).parent.absolute() / ".." / "README.md" 16 | 17 | with open(readme, encoding="utf-8") as f: 18 | MARKDOWN_CONTENT = f.read() 19 | 20 | MARKDOWN_CONTENT = "\n".join(MARKDOWN_CONTENT.split("\n")[2:]) 21 | 22 | jumbotron = html.Div( 23 | dbc.Container( 24 | [ 25 | html.H1("MatPES", className="display-3", id="matpes-title"), 26 | html.P( 27 | "A Foundational Potential Energy Surface Dataset for Materials.", 28 | className="lead", 29 | ), 30 | html.Hr(className="my-2"), 31 | dbc.Row( 32 | html.Div( 33 | [ 34 | dbc.Button( 35 | "PBE", 36 | href=f"{MATPES_SRC}/MatPES-PBE-2025.1.json.gz", 37 | class_name="me-1 download-button", 38 | color="info", 39 | external_link=True, 40 | size="lg", 41 | id="pbe-download-button", 42 | ), 43 | dbc.Tooltip( 44 | "Download PBE dataset (434,712 structures)", 45 | target="pbe-download-button", 46 | placement="bottom", 47 | ), 48 | dbc.Button( 49 | "r2SCAN", 50 | href=f"{MATPES_SRC}/MatPES-R2SCAN-2025.1.json.gz", 51 | class_name="me-1 download-button", 52 | color="success", 53 | external_link=True, 54 | size="lg", 55 | id="r2scan-download-button", 56 | ), 57 | dbc.Tooltip( 58 | "Download r2SCAN dataset (387,897 structures)", 59 | target="r2scan-download-button", 60 | placement="bottom", 61 | ), 62 | ] 63 | ), 64 | ), 65 | ], 66 | fluid=True, 67 | className="py-3", 68 | ), 69 | className="p-3 bg-body-secondary rounded-3", 70 | ) 71 | 72 | 73 | layout = dbc.Container( 74 | [ 75 | jumbotron, 76 | dbc.Row( 77 | html.Div([dcc.Markdown(MARKDOWN_CONTENT)]), 78 | className="mt-4", 79 | ), 80 | ] 81 | ) 82 | -------------------------------------------------------------------------------- /pages/matcalc-benchmark-pbe.csv: -------------------------------------------------------------------------------- 1 | Dataset,Architecture,d MAE,d STDAE,d p_diff_ind,d p_diff_rel,d sig_diff_ind,d sig_diff_rel,Ef MAE,Ef STDAE,Ef p_diff_ind,Ef p_diff_rel,Ef sig_diff_ind,Ef sig_diff_rel,K MAE,K STDAE,K p_diff_ind,K p_diff_rel,K sig_diff_ind,K sig_diff_rel,G MAE,G STDAE,G p_diff_ind,G p_diff_rel,G sig_diff_ind,G sig_diff_rel,CV MAE,CV STDAE,CV p_diff_ind,CV p_diff_rel,CV sig_diff_ind,CV sig_diff_rel,log(sigma) MAE,log(sigma) STDAE,log(sigma) p_diff_ind,log(sigma) p_diff_rel,log(sigma) sig_diff_ind,log(sigma) sig_diff_rel,f/f_DFT,T_1/2^term,t_step 2 | MatPES,M3GNet,0.418333686,0.421147572,6.15E-08,3.94E-08,TRUE,TRUE,0.108808711,0.124468688,7.65E-31,2.74E-56,TRUE,TRUE,26.10996026,30.64843729,7.06E-66,1.52E-92,TRUE,TRUE,25.29958992,233.8792658,0.005467365,0.003433715,TRUE,TRUE,27.24895906,43.22437762,4.86E-53,3.97E-61,TRUE,TRUE,,,,,TRUE,TRUE,0.97,1636,1.96 3 | MatPES,CHGNet,0.430374339,0.4537371,6.03E-09,2.94E-09,TRUE,TRUE,0.08215786,0.116415111,8.85E-11,4.32E-22,TRUE,TRUE,23.72668324,27.8841634,3.37E-45,2.39E-70,TRUE,TRUE,20.61025996,144.1852856,0.014705082,0.012631022,TRUE,TRUE,22.83090524,33.95409596,2.91E-52,3.47E-66,TRUE,TRUE,,,,,TRUE,TRUE,0.91,,4.92 4 | MatPES,TensorNet,0.371426277,0.446028675,0.003874242,0.003022485,TRUE,TRUE,0.080902882,0.119354248,9.63E-10,9.21E-19,TRUE,TRUE,18.02262869,21.68063541,1.37E-06,1.11E-15,TRUE,TRUE,14.81502756,38.37166234,1,,FALSE,FALSE,13.26988714,21.53620621,2.81E-23,2.90E-40,TRUE,TRUE,0.365039674,0.287698396,0.117700773,0.060063602,FALSE,FALSE,0.93,1852,3.01 5 | MPF,M3GNet,0.586588911,0.537624623,2.30E-34,7.62E-33,TRUE,TRUE,0.097652072,0.111683052,1.20E-23,4.73E-56,TRUE,TRUE,21.35687157,27.47417139,5.02E-24,1.10E-40,TRUE,TRUE,40.9678789,121.6616656,2.42E-37,7.20E-40,TRUE,TRUE,30.42199418,47.02615204,7.78E-59,6.62E-68,TRUE,TRUE,,,,,TRUE,TRUE,0.77,1136,2.09 6 | MPF,TensorNet,0.58279326,0.538658991,2.03E-33,8.69E-59,TRUE,TRUE,0.102910583,0.113633415,2.94E-28,2.36E-71,TRUE,TRUE,24.42472992,34.24627817,5.76E-41,3.37E-68,TRUE,TRUE,36.0081031,55.51663247,2.17E-84,7.32E-100,TRUE,TRUE,18.23074932,28.98754308,4.99E-38,5.77E-55,TRUE,TRUE,1.454135167,0.72984558,1.64E-186,1.46E-177,TRUE,TRUE,0.75,1348,1.8 7 | MPtrj,CHGNet,0.505472963,0.512791624,5.24E-19,3.07E-19,TRUE,TRUE,0.091746699,0.099760909,7.21E-21,1.59E-54,TRUE,TRUE,17.44791149,59.96994325,0.072130251,0.051332585,FALSE,FALSE,29.59115865,34.99410977,4.30E-70,7.46E-94,TRUE,TRUE,23.57312967,33.7078937,4.38E-57,9.79E-72,TRUE,TRUE,,,,,TRUE,TRUE,0.83,,4.4 8 | OMat24,TensorNet,0.314650256,0.431951838,1,,FALSE,FALSE,0.052162018,0.083592375,1,,FALSE,FALSE,15.61330592,22.60364463,1,,FALSE,FALSE,15.4963306,36.95529706,0.421677082,0.260346965,FALSE,FALSE,5.966876576,11.56306626,1,,FALSE,FALSE,0.340143433,0.30621217,1,,FALSE,FALSE,0.96,1430,3.12 9 | -------------------------------------------------------------------------------- /pages/matcalc-benchmark-r2scan.csv: -------------------------------------------------------------------------------- 1 | Dataset,Architecture,d MAE,d STDAE,d p_diff_ind,d p_diff_rel,d sig_diff_ind,d sig_diff_rel,Ef MAE,Ef STDAE,Ef p_diff_ind,Ef p_diff_rel,Ef sig_diff_ind,Ef sig_diff_rel,T_1/2^term,t_step 2 | MatPES,M3GNet,0.291577736,0.210904642,4.39E-18,2.31E-30,TRUE,TRUE,0.078279868,0.077356709,1.73E-27,4.81E-42,TRUE,TRUE,1704,1.94 3 | MatPES,CHGNet,0.255577169,0.17583993,1.14E-06,8.50E-07,TRUE,TRUE,0.058032754,0.060976862,7.93E-07,5.18E-15,TRUE,TRUE,,4.88 4 | MatPES,TensorNet,0.219970327,0.148966128,1,,FALSE,FALSE,0.045572464,0.047458108,1,,FALSE,FALSE,1904,3.05 5 | -------------------------------------------------------------------------------- /pages/pbe_stats.json: -------------------------------------------------------------------------------- 1 | {"nstructures": 434712, "element_counts": {"Ti": 19582, "Y": 11913, "N": 46032, "Zn": 15747, "Sn": 16844, "U": 9978, "Ni": 29953, "Zr": 12234, "Ga": 14424, "Gd": 4073, "O": 117733, "P": 20139, "C": 25549, "Th": 6839, "Tc": 7282, "Sb": 17902, "Cd": 11301, "La": 12460, "Te": 13663, "Cu": 24215, "S": 21187, "Ag": 11239, "F": 18416, "B": 17186, "As": 11601, "Ge": 15543, "Li": 29705, "Tl": 9729, "Os": 10237, "Si": 25229, "Mo": 13265, "V": 18753, "Pd": 12550, "Hg": 9439, "Pu": 9842, "W": 14791, "K": 14456, "Al": 20315, "Nb": 13854, "Rh": 12674, "Ce": 9680, "Ru": 12588, "Sc": 9416, "Co": 27048, "Be": 7618, "Au": 12663, "Mg": 33692, "Eu": 6954, "Cl": 11168, "Fe": 36116, "Cr": 18516, "Mn": 30568, "Br": 7690, "H": 15441, "Re": 9901, "Se": 14238, "Sr": 16097, "In": 12260, "Pt": 12678, "Cs": 8554, "Ir": 9692, "Hf": 10140, "I": 7016, "Bi": 13712, "Pa": 4577, "Np": 5453, "Ac": 2708, "Na": 15718, "Lu": 3398, "Kr": 26, "Ta": 12552, "Rb": 9213, "Ca": 15258, "Pb": 9609, "He": 88, "Tm": 2961, "Yb": 7452, "Sm": 5420, "Ho": 3546, "Pm": 1776, "Ba": 15554, "Pr": 5354, "Tb": 3745, "Er": 3189, "Nd": 5922, "Dy": 3409, "Xe": 204, "Ne": 1, "Ar": 3}, "cohesive_energy_per_atom": {"counts": [1, 0, 6, 22, 99, 380, 706, 1314, 2355, 3667, 5664, 7998, 10499, 13787, 16543, 19561, 22852, 25220, 28952, 28719, 29383, 27531, 26217, 24750, 24139, 21872, 19748, 16414, 13750, 11239, 9264, 7684, 5599, 3839, 2456, 1417, 639, 307, 42, 28, 26, 11, 7, 1, 3, 0, 0, 0, 0, 1], "bins": [-9.441932215, -9.193073244699999, -8.9442142744, -8.6953553041, -8.446496333799999, -8.1976373635, -7.9487783932, -7.6999194229, -7.4510604526, -7.2022014823, -6.953342512, -6.7044835417, -6.4556245714, -6.2067656011, -5.9579066308, -5.7090476605, -5.4601886902, -5.2113297199, -4.9624707496, -4.7136117793, -4.464752809, -4.2158938387, -3.9670348684, -3.7181758981, -3.4693169277999996, -3.2204579575, -2.9715989872, -2.7227400169000004, -2.4738810466, -2.2250220763, -1.9761631060000004, -1.7273041356999999, -1.4784451654000001, -1.2295861950999996, -0.9807272248000007, -0.7318682545000001, -0.48300928419999956, -0.23415031390000074, 0.014708656399999853, 0.26356762670000045, 0.5124265969999993, 0.7612855672999999, 1.0101445376000004, 1.2590035078999993, 1.5078624781999999, 1.7567214485000004, 2.0055804187999993, 2.2544393891, 2.5032983594000005, 2.7521573296999993, 3.0010163000000003]}, "nsites": {"counts": [252234, 75140, 31734, 29365, 14086, 14177, 8149, 7800, 872, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 49, 81, 433, 21, 131, 58, 55, 70, 25, 61, 46, 0, 18, 15, 44, 0, 0, 0, 0, 0, 0, 0, 6, 4, 13, 0, 6, 0, 2, 15], "bins": [1.0, 5.84, 10.68, 15.52, 20.36, 25.2, 30.04, 34.879999999999995, 39.72, 44.56, 49.4, 54.239999999999995, 59.08, 63.92, 68.75999999999999, 73.6, 78.44, 83.28, 88.12, 92.96, 97.8, 102.64, 107.47999999999999, 112.32, 117.16, 122.0, 126.84, 131.68, 136.51999999999998, 141.35999999999999, 146.2, 151.04, 155.88, 160.72, 165.56, 170.4, 175.24, 180.07999999999998, 184.92, 189.76, 194.6, 199.44, 204.28, 209.12, 213.95999999999998, 218.79999999999998, 223.64, 228.48, 233.32, 238.16, 243.0]}, "abs_forces": {"counts": [3874787, 3293, 1538, 668, 508, 250, 137, 110, 63, 28, 32, 24, 11, 8, 1, 5, 2, 1, 3, 0, 0, 0, 1, 0, 1, 0, 1, 10, 6, 0, 0, 0, 0, 9, 16, 14, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6], "bins": [0.0, 15.777319937691583, 31.554639875383167, 47.33195981307475, 63.10927975076633, 78.88659968845792, 94.6639196261495, 110.44123956384108, 126.21855950153267, 141.99587943922424, 157.77319937691584, 173.5505193146074, 189.327839252299, 205.1051591899906, 220.88247912768216, 236.65979906537376, 252.43711900306533, 268.2144389407569, 283.9917588784485, 299.7690788161401, 315.5463987538317, 331.32371869152325, 347.1010386292148, 362.8783585669064, 378.655678504598, 394.4329984422896, 410.2103183799812, 425.98763831767275, 441.7649582553643, 457.5422781930559, 473.3195981307475, 489.0969180684391, 504.87423800613067, 520.6515579438222, 536.4288778815138, 552.2061978192054, 567.983517756897, 583.7608376945885, 599.5381576322802, 615.3154775699718, 631.0927975076634, 646.8701174453549, 662.6474373830465, 678.4247573207381, 694.2020772584297, 709.9793971961212, 725.7567171338128, 741.5340370715044, 757.311357009196, 773.0886769468876, 788.8659968845792]}, "nelements": {"counts": [5274, 133557, 231743, 51303, 11424, 1371, 39, 1], "bins": [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5]}} 2 | -------------------------------------------------------------------------------- /pages/r2scan_stats.json: -------------------------------------------------------------------------------- 1 | {"nstructures": 387897, "element_counts": {"N": 43472, "Zn": 14282, "Ti": 17785, "Y": 10925, "Sn": 15426, "U": 9141, "Ni": 27300, "Zr": 11476, "Ga": 13285, "Gd": 3770, "O": 98013, "P": 16695, "C": 23490, "Th": 6507, "Tc": 7011, "Sb": 16225, "Cd": 10482, "La": 11435, "Te": 12500, "Ag": 10345, "F": 15572, "Cu": 21841, "S": 18755, "B": 15527, "As": 10442, "Ge": 14071, "Li": 24068, "Tl": 8878, "Os": 9601, "Si": 22437, "Mo": 12426, "V": 16601, "Pd": 11702, "Hg": 8659, "Pu": 7954, "W": 13852, "Al": 18405, "Nb": 12676, "Rh": 11803, "Ru": 11516, "Ce": 8434, "Co": 23345, "Au": 11672, "Mg": 30197, "Be": 7307, "Eu": 6277, "Cr": 16465, "Mn": 26080, "Cl": 9976, "Fe": 31496, "Br": 6881, "H": 13649, "Re": 9188, "Cs": 7651, "Lu": 3144, "Se": 12954, "Sr": 14560, "In": 11098, "Pt": 11913, "Sc": 8951, "Ir": 8970, "Hf": 9560, "I": 6155, "K": 12923, "Bi": 12282, "Pa": 4125, "Np": 4785, "Ac": 2526, "Na": 14021, "Rb": 8311, "Ta": 11710, "Ca": 13825, "Pb": 8611, "He": 85, "Kr": 26, "Yb": 5148, "Ba": 10183, "Ho": 2141, "Pm": 997, "Dy": 2004, "Sm": 3394, "Pr": 3389, "Er": 2104, "Tm": 2139, "Tb": 2326, "Nd": 3766, "Xe": 173, "Ar": 3, "Ne": 1}, "cohesive_energy_per_atom": {"counts": [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 31, 199, 737, 1744, 3737, 6681, 11137, 16321, 21740, 26673, 31886, 36275, 38308, 38434, 35456, 31603, 26364, 20390, 15090, 10996, 6531, 4178, 2262, 835, 185, 38, 28, 16, 10, 2, 0, 0, 0, 0, 1], "bins": [-13.7408266, -13.3929893422, -13.0451520844, -12.6973148266, -12.349477568800001, -12.001640311000001, -11.6538030532, -11.3059657954, -10.9581285376, -10.6102912798, -10.262454022, -9.914616764200002, -9.5667795064, -9.218942248600001, -8.871104990800001, -8.523267733, -8.1754304752, -7.8275932174000005, -7.4797559596, -7.131918701800001, -6.784081444000001, -6.436244186200001, -6.088406928400001, -5.740569670600001, -5.392732412800001, -5.044895155000001, -4.6970578972000006, -4.3492206394, -4.001383381600002, -3.6535461238000018, -3.3057088660000016, -2.9578716082000014, -2.610034350400001, -2.262197092600001, -1.9143598348000008, -1.5665225770000006, -1.2186853192000004, -0.870848061400002, -0.5230108036000019, -0.17517354580000166, 0.17266371199999853, 0.5205009697999987, 0.8683382275999989, 1.216175485399999, 1.5640127431999975, 1.9118500009999977, 2.259687258799998, 2.6075245166, 2.9553617743999983, 3.3031990321999967, 3.6510362900000004]}, "nsites": {"counts": [234358, 70479, 30708, 28481, 10376, 4072, 2654, 5102, 719, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 42, 72, 286, 79, 113, 26, 59, 18, 48, 68, 9, 37, 1, 20, 43, 0, 0, 0, 0, 0, 0, 0, 0, 4, 14, 0, 3, 0, 0, 4], "bins": [1.0, 5.78, 10.56, 15.34, 20.12, 24.900000000000002, 29.68, 34.46, 39.24, 44.02, 48.800000000000004, 53.580000000000005, 58.36, 63.14, 67.92, 72.7, 77.48, 82.26, 87.04, 91.82000000000001, 96.60000000000001, 101.38000000000001, 106.16000000000001, 110.94000000000001, 115.72, 120.5, 125.28, 130.06, 134.84, 139.62, 144.4, 149.18, 153.96, 158.74, 163.52, 168.3, 173.08, 177.86, 182.64000000000001, 187.42000000000002, 192.20000000000002, 196.98000000000002, 201.76000000000002, 206.54000000000002, 211.32000000000002, 216.10000000000002, 220.88000000000002, 225.66000000000003, 230.44, 235.22, 240.0]}, "abs_forces": {"counts": [3055137, 2341, 1023, 420, 331, 132, 96, 71, 21, 12, 23, 24, 7, 3, 2, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 6, 12, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 2], "bins": [0.0, 16.279898247353877, 32.559796494707754, 48.83969474206163, 65.11959298941551, 81.39949123676939, 97.67938948412326, 113.95928773147713, 130.23918597883102, 146.5190842261849, 162.79898247353879, 179.07888072089264, 195.35877896824653, 211.6386772156004, 227.91857546295427, 244.19847371030815, 260.47837195766203, 276.7582702050159, 293.0381684523698, 309.31806669972366, 325.59796494707757, 341.8778631944314, 358.1577614417853, 374.4376596891392, 390.71755793649305, 406.9974561838469, 423.2773544312008, 439.5572526785547, 455.83715092590853, 472.11704917326244, 488.3969474206163, 504.6768456679702, 520.9567439153241, 537.236642162678, 553.5165404100318, 569.7964386573857, 586.0763369047396, 602.3562351520934, 618.6361333994473, 634.9160316468012, 651.1959298941551, 667.4758281415089, 683.7557263888629, 700.0356246362168, 716.3155228835706, 732.5954211309245, 748.8753193782784, 765.1552176256322, 781.4351158729861, 797.71501412034, 813.9949123676939]}, "nelements": {"counts": [5097, 123446, 207104, 42284, 8989, 950, 26, 1], "bins": [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5]}} 2 | -------------------------------------------------------------------------------- /pages/references.py: -------------------------------------------------------------------------------- 1 | """Home page.""" 2 | 3 | from __future__ import annotations 4 | 5 | import dash 6 | import dash_bootstrap_components as dbc 7 | from dash import dcc, html 8 | 9 | dash.register_page(__name__, path="/references", order=7) 10 | 11 | MARKDOWN_CONTENT = """ 12 | #### References 13 | 14 | [MatPES](https://doi.org/10.48550/arXiv.2503.04070) 15 | 16 | ```txt 17 | Kaplan, A. D.; Liu, R.; Qi, J.; Ko, T. W.; Deng, B.; Riebesell, J.; Ceder, G.; Persson, K. A.; Ong, S. P. A 18 | Foundational Potential Energy Surface Dataset for Materials. arXiv 2025. DOI: 10.48550/arXiv.2503.04070 19 | ``` 20 | 21 | [M3GNet] 22 | 23 | ```txt 24 | Chen, C.; Ong, S. P. A Universal Graph Deep Learning Interatomic Potential for the Periodic Table. Nat Comput 25 | Sci 2022, 2 (11), 718-728. DOI: 10.1038/s43588-022-00349-3 26 | ``` 27 | 28 | [CHGNet] 29 | 30 | ```txt 31 | Deng, B.; Zhong, P.; Jun, K.; Riebesell, J.; Han, K.; Bartel, C. J.; Ceder, G. CHGNet as a Pretrained Universal 32 | Neural Network Potential for Charge-Informed Atomistic Modelling. Nat Mach Intell 2023, 5 (9), 1031-1041. 33 | DOI: 10.1038/s42256-023-00716-3. 34 | ``` 35 | 36 | [TensorNet] 37 | 38 | ```txt 39 | Simeon, G.; de Fabritiis, G. TensorNet: Cartesian Tensor Representations for Efficient Learning of Molecular 40 | Potentials. arXiv October 30, 2023. DOI: 10.48550/arXiv.2306.06482. 41 | ``` 42 | 43 | [MatGL] 44 | 45 | ```txt 46 | Ko, T. W.; Deng, B.; Nassar, M.; Barroso-Luque, L.; Liu, R.; Qi, J.; Liu, E.; Ceder, G.; Miret, S.; 47 | Ong, S. P. Materials Graph Library (MatGL), an open-source graph deep learning library for materials science and 48 | chemistry. Submitted. 49 | ``` 50 | 51 | [Materials Virtual Lab]: http://materialsvirtuallab.org 52 | [pymatgen]: https://pymatgen.org 53 | [Materials Project]: https://materialsproject.org 54 | [MatGL]: https://matgl.ai 55 | [M3GNet]: http://dx.doi.org/10.1038/s43588-022-00349-3 56 | [CHGNet]: http://doi.org/10.1038/s42256-023-00716-3 57 | [MACE]: https://proceedings.neurips.cc/paper_files/paper/2022/hash/4a36c3c51af11ed9f34615b81edb5bbc-Abstract-Conference.html 58 | [DIRECT]: https//doi.org/10.1038/s41524-024-01227-4 59 | [MatPES.ai]: https://matpes.ai 60 | 61 | """ 62 | 63 | layout = dbc.Container([html.Div([dcc.Markdown(MARKDOWN_CONTENT)])]) 64 | -------------------------------------------------------------------------------- /pages/tasks.csv: -------------------------------------------------------------------------------- 1 | Task,Symbol,Units,Functional,TestSource,Number 2 | Structuralsimilarity,|v|,-,PBE,[WBM],"1,000" 3 | ,,-,r2SCAN,[GNoME],"1,000" 4 | Formation energy per atom,Ef,eV/atom,PBE,[WBM],"1,000" 5 | ,,eV/atom,r2SCAN,[GNoME],"1,000" 6 | Bulk modulus,K_VRH,GPa,PBE,[MP],"3,959" 7 | Shear modulus,G_VRH,GPa,PBE,[MP],"3,959" 8 | Constant volume heat capacity,C_V,J/mol/K,PBE,[Alexandria],"1,170" 9 | Off-equilibrium force,F/F_DFT,--,PBE,[WBMhighenergystates],979 10 | Median termination temperature,T_1/2^term,K,PBE & r2SCAN,[MVL],172 11 | Ionic conductivity,sigma,mS/cm,PBE,[MVL],698 12 | Time per atom per time step,t_step,ms/step/atom,PBE & r2SCAN,[MVL],1 13 | -------------------------------------------------------------------------------- /pages/tutorials.py: -------------------------------------------------------------------------------- 1 | """Home page.""" 2 | 3 | from __future__ import annotations 4 | 5 | import glob 6 | import os 7 | from pathlib import Path 8 | 9 | import dash 10 | import dash_bootstrap_components as dbc 11 | from dash import Input, Output, callback, dcc 12 | 13 | dash.register_page(__name__, path="/tutorials", order=5) 14 | 15 | 16 | DATADIR = Path(__file__).absolute().parent / ".." / "assets" 17 | 18 | NOTEBOOKS = [os.path.basename(n) for n in glob.glob(str(DATADIR / "*.md"))] 19 | 20 | 21 | @callback( 22 | Output("notebook-markdown", "children"), 23 | Input("notebook-dropdown", "value"), 24 | ) 25 | def display_notebook(nb): 26 | """ 27 | Display the selected notebook by updating the iframe's source. 28 | 29 | This callback updates the src attribute of an iframe component based 30 | on the value selected from a dropdown menu. It dynamically generates 31 | the path to the desired notebook file located in the assets directory 32 | and sets it as the src for rendering in the iframe. 33 | 34 | Parameters: 35 | nb : str 36 | The value selected from the dropdown menu, representing the 37 | notebook filename. 38 | 39 | Returns: 40 | str 41 | The dynamically constructed path to the selected notebook file 42 | located under the 'assets' directory. 43 | """ 44 | with open(DATADIR / nb) as f: 45 | return f.read() 46 | 47 | 48 | HEADER = """ 49 | #### Tutorials 50 | 51 | We have created a series of Jupyter Notebook tutorials on how to use MatPES. This page provides an easy way to explore 52 | the tutorials. The Jupyter notebooks can be downloaded from the 53 | [MatPES Github repository](https://github.com/materialsvirtuallab/matpes/tree/main/notebooks). 54 | """ 55 | 56 | 57 | layout = dbc.Container( 58 | [ 59 | dcc.Markdown(HEADER), 60 | dcc.Dropdown( 61 | id="notebook-dropdown", 62 | placeholder="Select a notebook to view:", 63 | value=NOTEBOOKS[0], 64 | options=[{"label": f.rsplit(".")[0], "value": f} for f in NOTEBOOKS], 65 | ), 66 | dcc.Markdown(id="notebook-markdown", style={"marginTop": "10px"}), 67 | ] 68 | ) 69 | -------------------------------------------------------------------------------- /pages/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions, e.g., pt heatmaps, etc.""" 2 | 3 | from __future__ import annotations 4 | 5 | import functools 6 | import warnings 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import plotly.graph_objects as go 11 | from pymatgen.core.periodic_table import Element 12 | 13 | 14 | @functools.lru_cache 15 | def get_pt_df(include_artificial=False) -> pd.DataFrame: 16 | """ 17 | Creates a DataFrame containing periodic table data. 18 | 19 | Returns: 20 | pd.DataFrame: DataFrame with element data including symbol, name, atomic number (Z), 21 | electronegativity (X), group, period, category, and hover label. 22 | """ 23 | with warnings.catch_warnings(): 24 | # Suppress pymatgen warnings 25 | warnings.simplefilter("ignore") 26 | elements = [ 27 | { 28 | "symbol": el.symbol, 29 | "name": el.long_name, 30 | "Z": el.Z, 31 | "X": el.X, 32 | "group": get_group(el), 33 | "period": get_period(el), 34 | "category": get_category(el), 35 | } 36 | for el in Element 37 | if (el.name not in ["D", "T"]) and (el.Z <= 92 or include_artificial) 38 | ] 39 | df = pd.DataFrame(elements) 40 | df["label"] = df.apply(lambda row: f"{row['Z']}
{row['symbol']}", axis=1) 41 | return df 42 | 43 | 44 | def get_period(el: Element) -> int: 45 | """ 46 | Determine the period for an element, with adjustments for rare earths. 47 | 48 | Args: 49 | el (Element): An element instance. 50 | 51 | Returns: 52 | int: The adjusted period number. 53 | """ 54 | if el.is_actinoid or el.is_lanthanoid: 55 | return el.row + 2 56 | return el.row 57 | 58 | 59 | def get_group(el: Element) -> int: 60 | """ 61 | Determine the group for an element, with adjustments for rare earths. 62 | 63 | Args: 64 | el (Element): An element instance. 65 | 66 | Returns: 67 | int: The adjusted group number. 68 | """ 69 | if el.is_actinoid: 70 | return el.group + el.Z - 89 71 | if el.is_lanthanoid: 72 | return el.group + el.Z - 57 73 | return el.group 74 | 75 | 76 | def get_category(el: Element) -> str: 77 | """ 78 | Categorize the element based on its type. 79 | 80 | Args: 81 | el (Element): An element instance. 82 | 83 | Returns: 84 | str: The category of the element (e.g., alkali, noble_gas, etc.). 85 | """ 86 | if el.Z > 92: 87 | return "transuranic" 88 | for category in [ 89 | "alkali", 90 | "alkaline", 91 | "actinoid", 92 | "lanthanoid", 93 | "halogen", 94 | "noble_gas", 95 | "metal", 96 | "chalcogen", 97 | ]: 98 | if getattr(el, f"is_{category}"): 99 | return category 100 | return "other" 101 | 102 | 103 | def pt_heatmap( 104 | values: dict[str, float], label: str = "value", log: bool = False, include_artificial=False, colorscale="YlOrRd" 105 | ) -> go.Figure: 106 | """ 107 | Generate a heatmap visualization of the periodic table. 108 | 109 | Args: 110 | values (dict[str, float]): Mapping of element symbols to values to visualize. 111 | label (str): Label for the values displayed. 112 | log (bool): Whether to use logarithmic scaling for the color axis. 113 | include_artificial (bool): Whether to include artificial elements. Defaults to False. 114 | colorscale (str): Colorscale to use for the heatmap. Defaults to "YlOrRd". 115 | 116 | Returns: 117 | plotly.graph_objects.Figure: A scatter plot representing the heatmap. 118 | """ 119 | df = get_pt_df(include_artificial=include_artificial) 120 | df[label] = df["symbol"].map(values) if values else df["X"] 121 | if log: 122 | with warnings.catch_warnings(): 123 | warnings.filterwarnings("ignore", category=RuntimeWarning) 124 | df[f"log10_{label}"] = np.log10(df[label]) 125 | 126 | # Initialize periodic table grid 127 | grid = np.full((9, 18), None, dtype=np.float64) 128 | label_texts = np.full((9, 18), "", dtype=object) 129 | hover_texts = np.full((9, 18), "", dtype=object) 130 | 131 | # Fill grid with element symbols, hover text, and category colors 132 | for _, row in df.iterrows(): 133 | group, period = row["group"], row["period"] 134 | grid[period - 1, group - 1] = row[label] if not log else row[f"log10_{label}"] 135 | label_texts[period - 1, group - 1] = f"{row['Z']}
{row['symbol']}
{row[label]}" 136 | hover_texts[period - 1, group - 1] = f"{row['Z']}
{row['name']}
{row[label]}" 137 | 138 | # Create the plot 139 | fig = go.Figure() 140 | 141 | for el in Element: 142 | if el.symbol not in values and (el.Z <= 92 or include_artificial): 143 | fig.add_trace( 144 | go.Heatmap( 145 | z=[-1], 146 | x=[get_group(el)], 147 | y=[get_period(el)], 148 | xgap=1, 149 | ygap=1, 150 | showscale=False, 151 | colorscale="Greys", 152 | ) 153 | ) 154 | 155 | fig.add_trace( 156 | go.Heatmap( 157 | z=grid, 158 | x=list(range(1, 19)), 159 | y=list(range(1, 9)), 160 | text=hover_texts, 161 | hoverinfo="text", 162 | showscale=True, 163 | colorscale="matter", 164 | xgap=1, 165 | ygap=1, 166 | coloraxis="coloraxis", 167 | ) 168 | ) 169 | 170 | # Add annotations for element symbols 171 | for _index, row in df.iterrows(): 172 | group, period = row["group"], row["period"] 173 | fig.add_annotation( 174 | x=group, 175 | y=period, 176 | text=label_texts[period - 1, group - 1], 177 | showarrow=False, 178 | font=dict( 179 | family="Arial", 180 | size=14, 181 | color="black", 182 | weight="bold", 183 | ), 184 | align="center", 185 | ) 186 | # Hide x-axis 187 | fig.update_xaxes(showticklabels=False, showgrid=False) 188 | 189 | # Hide y-axis 190 | fig.update_yaxes(showticklabels=False, showgrid=False) 191 | 192 | # Update layout 193 | fig.update_layout( 194 | title=None, 195 | xaxis=dict(title=None), # Maintain 1:1 aspect ratio 196 | yaxis=dict(title=None, scaleanchor="x", scaleratio=1.33, autorange="reversed"), 197 | width=1200, 198 | height=900, 199 | colorscale={"sequential": colorscale}, 200 | plot_bgcolor="white", 201 | ) 202 | 203 | if log: 204 | max_log = int(df[f"log10_{label}"].max()) 205 | fig.update_layout( 206 | coloraxis=dict( 207 | colorbar=dict( 208 | title=label, 209 | tickmode="array", 210 | tickvals=list(range(1, max_log + 1)), 211 | ticktext=[f"1e{i}" for i in range(1, max_log + 1)], 212 | tickfont=dict(size=14), 213 | ) 214 | ) 215 | ) 216 | 217 | return fig 218 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | # pin NumPy version used in the build 4 | "oldest-supported-numpy", 5 | "setuptools>=65.0.0", 6 | ] 7 | build-backend = "setuptools.build_meta" 8 | 9 | 10 | [project] 11 | name = "matpes" 12 | authors = [{ name = "MatPES Development Team", email = "ongsp@ucsd.edu" }] 13 | maintainers = [ 14 | { name = "Shyue Ping Ong", email = "ongsp@ucsd.edu" }, 15 | ] 16 | description = """Tools for working with MatPES.""" 17 | readme = "README.md" 18 | requires-python = ">=3.10,<3.13" 19 | keywords = [ 20 | "potential energy surface", 21 | "PES", 22 | "VASP", 23 | "analysis", 24 | "crystal", 25 | "materials", 26 | "science", 27 | "ai", 28 | "ml" 29 | ] 30 | license = { text = "BSD-3-Clause" } 31 | classifiers = [ 32 | "Development Status :: 4 - Beta", 33 | "Intended Audience :: Science/Research", 34 | "License :: OSI Approved :: MIT License", 35 | "Operating System :: OS Independent", 36 | "Programming Language :: Python :: 3", 37 | "Programming Language :: Python :: 3.10", 38 | "Programming Language :: Python :: 3.11", 39 | "Programming Language :: Python :: 3.12", 40 | "Topic :: Scientific/Engineering :: Chemistry", 41 | "Topic :: Scientific/Engineering :: Information Analysis", 42 | "Topic :: Scientific/Engineering :: Physics", 43 | ] 44 | dependencies = [ 45 | "pandas>=2", 46 | "pymongo>=4.11", 47 | "pymatgen", 48 | "tqdm", 49 | "datasets>=3.6.0", 50 | ] 51 | version = "0.0.3" 52 | 53 | [tool.setuptools.packages.find] 54 | where = ["src"] 55 | include = ["matpes", "matpes.*"] 56 | 57 | [project.scripts] 58 | matpes = "matpes.cli:main" 59 | 60 | [tool.versioningit.vcs] 61 | method = "git" 62 | default-tag = "0.0.1" 63 | 64 | [tool.black] 65 | line-length = 120 66 | 67 | [tool.ruff] 68 | target-version = "py310" 69 | line-length = 120 70 | lint.select = [ 71 | "B", # flake8-bugbear 72 | "C4", # flake8-comprehensions 73 | "D", # pydocstyle 74 | "E", # pycodestyle error 75 | "EXE", # flake8-executable 76 | "F", # pyflakes 77 | "FA", # flake8-future-annotations 78 | "FLY", # flynt 79 | "I", # isort 80 | "ICN", # flake8-import-conventions 81 | "ISC", # flake8-implicit-str-concat 82 | "PD", # pandas-vet 83 | "PERF", # perflint 84 | "PIE", # flake8-pie 85 | "PL", # pylint 86 | "PT", # flake8-pytest-style 87 | "PYI", # flakes8-pyi 88 | "Q", # flake8-quotes 89 | "RET", # flake8-return 90 | "RSE", # flake8-raise 91 | "RUF", # Ruff-specific rules 92 | "SIM", # flake8-simplify 93 | "SLOT", # flake8-slots 94 | "TCH", # flake8-type-checking 95 | "TID", # tidy imports 96 | "TID", # flake8-tidy-imports 97 | "UP", # pyupgrade 98 | "W", # pycodestyle warning 99 | "YTT", # flake8-2020 100 | ] 101 | lint.ignore = [ 102 | "B023", # Function definition does not bind loop variable 103 | "B028", # No explicit stacklevel keyword argument found 104 | "B904", # Within an except clause, raise exceptions with ... 105 | "C408", # unnecessary-collection-call 106 | "D105", # Missing docstring in magic method 107 | "D205", # 1 blank line required between summary line and description 108 | "D212", # Multi-line docstring summary should start at the first line 109 | "PD901", # pandas-df-variable-name 110 | "PERF203", # try-except-in-loop 111 | "PERF401", # manual-list-comprehension (TODO fix these or wait for autofix) 112 | "PLR", # pylint refactor 113 | "PLW2901", # Outer for loop variable overwritten by inner assignment target 114 | "PT013", # pytest-incorrect-pytest-import 115 | "RUF012", # Disable checks for mutable class args. This is a non-problem. 116 | "SIM105", # Use contextlib.suppress(OSError) instead of try-except-pass 117 | "ISC001" 118 | ] 119 | lint.pydocstyle.convention = "google" 120 | lint.isort.required-imports = ["from __future__ import annotations"] 121 | lint.isort.split-on-trailing-comma = false 122 | 123 | [tool.ruff.lint.per-file-ignores] 124 | "__init__.py" = ["F401"] 125 | "tests/*" = ["D"] 126 | "tasks.py" = ["D"] 127 | 128 | [tool.pytest.ini_options] 129 | addopts = "--durations=30 --quiet -r xXs --color=yes -p no:warnings --import-mode=importlib" 130 | 131 | [tool.mypy] 132 | ignore_missing_imports = true 133 | namespace_packages = true 134 | explicit_package_bases = true 135 | no_implicit_optional = false 136 | exclude = ["docs", "tasks.py"] 137 | 138 | [[tool.mypy.overrides]] 139 | module = ["requests.*", "tabulate.*"] 140 | ignore_missing_imports = true 141 | 142 | [tool.codespell] 143 | ignore-words-list = """ 144 | titel,alls,ans,nd,mater,nwo,te,hart,ontop,ist,ot,fo,nax,coo,coul,ser,leary,thre, 145 | fase,rute,reson,titels,ges,scalr,strat,struc,hda,nin,ons,pres,kno,loos,lamda,lew 146 | """ 147 | check-filenames = true 148 | 149 | [tool.coverage.run] 150 | relative_files = true 151 | 152 | [tool.coverage.report] 153 | exclude_lines = [ 154 | "@deprecated", 155 | "def __repr__", 156 | "if 0:", 157 | "if __name__ == .__main__.:", 158 | "if self.debug:", 159 | "if settings.DEBUG", 160 | "pragma: no cover", 161 | "raise AssertionError", 162 | "raise NotImplementedError", 163 | "input", 164 | "if TYPE_CHECKING:", 165 | "except PackageNotFoundError:" 166 | ] 167 | 168 | [tool.pyright] 169 | typeCheckingMode = "off" 170 | reportPossiblyUnboundVariable = true 171 | reportUnboundVariable = true 172 | reportMissingImports = false 173 | reportMissingModuleSource = false 174 | reportInvalidTypeForm = false 175 | exclude = ["**/tests"] 176 | 177 | [dependency-groups] 178 | dev = [ 179 | "pre-commit>=4.2.0", 180 | ] 181 | lint = [ 182 | "mypy>=1.15.0", 183 | "ruff>=0.11.2", 184 | ] 185 | maintain = [ 186 | "pre-commit>=4.2.0", 187 | ] 188 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.26.4 2 | pymongo==4.11 3 | gunicorn==23.0.0 4 | dash==2.18.2 5 | dash-bootstrap-components==1.6.0 6 | dash_bootstrap_templates==2.1.0 7 | plotly==6.0.0 8 | pymatgen==2025.1.24 9 | -e . 10 | -------------------------------------------------------------------------------- /src/matpes/__init__.py: -------------------------------------------------------------------------------- 1 | """Tools for working with MatPES.""" 2 | 3 | from __future__ import annotations 4 | 5 | from importlib.metadata import PackageNotFoundError, version 6 | 7 | try: 8 | __version__ = version("matpes") 9 | except PackageNotFoundError: 10 | pass # package not installed 11 | 12 | MATPES_SRC = "https://s3.us-east-1.amazonaws.com/materialsproject-contribs/MatPES_2025_1" 13 | -------------------------------------------------------------------------------- /src/matpes/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a CLI for MatPES, a material property exploration suite. 3 | 4 | The CLI provides the following features: 5 | - Download functionality to fetch data related to specific functionals using the `download` command. 6 | - Data processing capabilities using the `data` command, including filtering MatPES data by 7 | chemical systems or formulas. 8 | 9 | The commands are structured as subcommands, facilitating distinct functionalities for 10 | data retrieval and post-processing operations. 11 | """ 12 | 13 | from __future__ import annotations 14 | 15 | import argparse 16 | import json 17 | 18 | from monty.io import zopen 19 | from pymatgen.core import Composition 20 | 21 | from .data import get_data 22 | 23 | 24 | def download(args): 25 | """ 26 | Function to download data based on the given functional argument. 27 | 28 | This function utilizes the "get_data" method with an input argument 29 | to fetch and download data tied to the specified functionality. After 30 | successful execution, it outputs a confirmation message. 31 | 32 | Parameters 33 | ---------- 34 | args : argparse.Namespace 35 | Argument namespace that must contain a "functional" attribute. 36 | 37 | Raises: 38 | ------ 39 | None 40 | """ 41 | get_data(functional=args.functional, download_atoms=True, return_data=False) 42 | print(f">>> Successfully downloaded data for {args.functional}.") 43 | 44 | 45 | def get_data_subset(args): 46 | """ 47 | Filter MatPES data by chemical system or formula. 48 | 49 | This function processes a given JSON file containing MatPES data and filters 50 | the entries based on chemical systems or formulas specified by the user. 51 | The filtered results are then written to an output file. 52 | 53 | Parameters: 54 | args: Namespace 55 | A namespace object containing the following attributes: 56 | - filename: List[str] 57 | List containing the name of the input file to process (only 58 | the first entry is used). 59 | - outfile: str 60 | Name of the output file to write the filtered results to. 61 | Defaults to 'filtered.json.gz'. 62 | - chemsys: List[str] 63 | List of chemical systems (e.g., 'Li-Fe-O') to filter by. 64 | If empty, no filtering by chemical system is applied. 65 | - formula: List[str] 66 | List of formulas (e.g., 'Fe2O3') to filter by. If empty, 67 | no filtering by formula is applied. 68 | 69 | Returns: 70 | None 71 | 72 | Notes: 73 | - The chemical system string should follow the format 'Element1-Element2-...'. 74 | - Formulas are case-insensitive and automatically converted to their 75 | reduced forms for comparison. 76 | - The input file must be a JSON file and the output is written in 77 | compressed JSON format. 78 | """ 79 | infname = args.filename[0] 80 | outfname = args.outfile 81 | with zopen(infname, "rt", encoding="utf-8") as f: 82 | data = json.load(f) 83 | print(f"Total number of entries: {len(data)}.") 84 | if args.chemsys: 85 | for c in args.chemsys: 86 | chemsys = "-".join(sorted(c.split("-"))) 87 | data = [d for d in data if d["chemsys"] == chemsys] 88 | if args.formula: 89 | for f in args.formula: 90 | f = Composition(f).reduced_formula 91 | data = [d for d in data if d["formula_pretty"] == f] 92 | with zopen(outfname, "wt", encoding="utf-8") as f: 93 | json.dump(data, f) 94 | print(f"{len(data)} filtered entries written in {outfname}.") 95 | 96 | 97 | def main(): 98 | """Main entry point for matpes cli.""" 99 | parser = argparse.ArgumentParser( 100 | description="""matpes is a CLI for MatPES.""", 101 | epilog="Author: Shyue Ping Ong", 102 | ) 103 | 104 | subparsers = parser.add_subparsers() 105 | subparser_download = subparsers.add_parser(name="download") 106 | subparser_download.add_argument( 107 | "functional", 108 | metavar="functional", 109 | type=str.upper, 110 | nargs="?", 111 | default="PBE", 112 | help="Functional to download. Defaults to PBE.", 113 | ) 114 | 115 | subparser_download.set_defaults(func=download) 116 | 117 | subparser_data = subparsers.add_parser( 118 | name="data", help="Process downloaded MatPES data files, e.g., filtering by chemical system or formula." 119 | ) 120 | 121 | subparser_data.add_argument( 122 | "-f", 123 | "--formula", 124 | dest="formula", 125 | type=str, 126 | nargs="*", 127 | help="Formulas to filter by, e.g., Fe2O3.", 128 | ) 129 | 130 | subparser_data.add_argument( 131 | "-c", 132 | "--chemsys", 133 | dest="chemsys", 134 | type=str, 135 | nargs="*", 136 | help="Chemical systems to filter by, e.g., Li-Fe-O.", 137 | ) 138 | 139 | subparser_data.add_argument( 140 | "-o", 141 | "--outfile", 142 | dest="outfile", 143 | type=str, 144 | nargs="?", 145 | default="filtered.json.gz", 146 | help="File to write filtered entries to.", 147 | ) 148 | 149 | subparser_data.add_argument( 150 | "filename", 151 | metavar="filename", 152 | type=str, 153 | nargs=1, 154 | help="Filename to process.", 155 | ) 156 | 157 | subparser_data.set_defaults(func=get_data_subset) 158 | 159 | args = parser.parse_args() 160 | 161 | try: 162 | _ = args.func 163 | except AttributeError as exc: 164 | parser.print_help() 165 | raise SystemExit("Please specify a command.") from exc 166 | return args.func(args) 167 | -------------------------------------------------------------------------------- /src/matpes/data.py: -------------------------------------------------------------------------------- 1 | """Methods for working with MatPES data downloads.""" 2 | 3 | from __future__ import annotations 4 | 5 | import gzip 6 | import json 7 | from typing import Literal 8 | 9 | from huggingface_hub import hf_hub_download 10 | 11 | REPO_ID = "mavrl/matpes" 12 | 13 | 14 | def get_data( 15 | functional: Literal["PBE", "R2SCAN"] = "PBE", 16 | version: str = "2025.1", 17 | return_data: bool = True, 18 | download_atoms: bool = False, 19 | ) -> tuple[list[dict], list[dict]] | list[dict] | None: 20 | """ 21 | Retrieves dataset(s) related to materials properties based on specified options. 22 | 23 | This function loads a dataset corresponding to a given functional and optionally 24 | downloads additional atomic data. It allows specifying the functional type 25 | (e.g., "PBE" or "R2SCAN") and the dataset version. By default, the output includes 26 | entries unless otherwise configured. 27 | 28 | Parameters: 29 | functional (Literal["PBE", "R2SCAN"]): The functional type specifying the 30 | dataset to retrieve. Defaults to "PBE". 31 | version (str): The version of the dataset to retrieve. Defaults to "2025.1". 32 | download_atoms (bool): Whether to download and include atomic data in 33 | the output. Defaults to False. 34 | 35 | Return Values: 36 | Either the primary dataset or both the primary dataset and atomic data 37 | depending on the value of `download_atoms`. If `download_atoms` is False, it 38 | returns the primary dataset. Otherwise, it returns a tuple containing the 39 | primary dataset and atomic data. 40 | 41 | Exceptions: 42 | None 43 | """ 44 | data_path = hf_hub_download( 45 | repo_id=REPO_ID, filename=f"MatPES-{functional.upper()}-{version}.json.gz", repo_type="dataset" 46 | ) 47 | atoms_path = "" 48 | if download_atoms: 49 | atoms_path = hf_hub_download( 50 | repo_id=REPO_ID, filename=f"MatPES-{functional.upper()}-atoms.json.gz", repo_type="dataset" 51 | ) 52 | 53 | if not return_data: 54 | return None 55 | 56 | with gzip.open(data_path, "rt") as f: 57 | data = json.load(f) 58 | 59 | if download_atoms: 60 | with gzip.open(atoms_path, "rt") as f: 61 | atoms_data = json.load(f) 62 | 63 | return data, atoms_data 64 | 65 | return data 66 | -------------------------------------------------------------------------------- /src/matpes/db.py: -------------------------------------------------------------------------------- 1 | """Tools for directly working with a MatPES style DB.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | 7 | import pandas as pd 8 | from pymongo import MongoClient 9 | 10 | from .data import get_data 11 | 12 | 13 | class MatPESDB: 14 | """A MatPES DB object. This requires access to a MatPES style DB. Typically meant for developers.""" 15 | 16 | FUNCTIONALS = ("PBE", "r2SCAN") 17 | 18 | def __init__(self, dbname="matpes"): 19 | """ 20 | Args: 21 | dbname (str): The name of the MatPES DB. 22 | """ 23 | client = MongoClient( 24 | host=os.environ.get("MATPES_HOST", "127.0.0.1"), 25 | username=os.environ.get("MATPES_USERNAME"), 26 | password=os.environ.get("MATPES_PASSWORD"), 27 | authSource="admin", 28 | ) 29 | self.db = client.get_database(dbname) 30 | 31 | def create_db(self): 32 | """ 33 | Create a MatPES database from the json files. 34 | Note that any existing collections will be deleted. 35 | """ 36 | for functional in self.FUNCTIONALS: 37 | data = get_data(functional=functional) 38 | coll = self.db.get_collection(functional.lower()) 39 | coll.delete_many({}) 40 | coll.insert_many(data) 41 | for field in [ 42 | "matpes_id", 43 | "formula_pretty", 44 | "elements", 45 | "chemsys", 46 | "cohesive_energy_per_atom", 47 | "nsites", 48 | "nelements", 49 | "bandgap", 50 | ]: 51 | coll.create_index(field) 52 | 53 | def get_json(self, functional: str, criteria: dict) -> list: 54 | """ 55 | Args: 56 | functional (str): The name of the functional to query. 57 | criteria (dict): The criteria to query. 58 | """ 59 | return list(self.db.get_collection(functional.lower()).find(criteria)) 60 | 61 | def get_df(self, functional: str, criteria=None, projection=None) -> pd.DataFrame: 62 | """ 63 | Retrieve a pandas DataFrame from a MongoDB collection based on the provided 64 | criteria and projection. 65 | 66 | This method queries a MongoDB collection corresponding to the specified 67 | functional argument. It uses given criteria and projection to filter and 68 | retrieve the desired data, returning the results in the form of a pandas 69 | DataFrame. If no criteria or projection is provided, it uses default values. 70 | 71 | Parameters: 72 | functional: str 73 | The name of the collection to query, corresponding to a specific 74 | functional. The string is converted to lowercase. 75 | criteria: dict, optional 76 | A dictionary to filter the query results. Defaults to an empty 77 | dictionary if not provided. 78 | projection: list[str], optional 79 | A list of strings specifying the fields to include in the query 80 | results. Defaults to a predefined list of fields if not provided. 81 | 82 | Returns: 83 | pd.DataFrame 84 | A pandas DataFrame containing the retrieved data with the specified 85 | projection fields. 86 | 87 | Raises: 88 | None 89 | """ 90 | collection = self.db.get_collection(functional.lower()) 91 | criteria = criteria or {} 92 | projection = projection or [ 93 | "matpes_id", 94 | "formula_pretty", 95 | "elements", 96 | "energy", 97 | "chemsys", 98 | "cohesive_energy_per_atom", 99 | "formation_energy_per_atom", 100 | "abs_forces", 101 | "nsites", 102 | "nelements", 103 | "bandgap", 104 | ] 105 | return pd.DataFrame( 106 | collection.find( 107 | criteria, 108 | projection=projection, 109 | ) 110 | )[projection] 111 | -------------------------------------------------------------------------------- /src/matpes/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/materialsvirtuallab/matpes/141fb91330f866e668633fab6bf2a2b756028b30/src/matpes/py.typed -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pyinvoke tasks.py file for automating releases and admin stuff. 3 | 4 | To cut a new release: 5 | 6 | invoke release 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | import json 12 | import os 13 | import re 14 | import subprocess 15 | from datetime import datetime, timezone 16 | from typing import TYPE_CHECKING 17 | 18 | import requests 19 | from invoke import task 20 | from monty.os import cd 21 | 22 | from matpes import __version__ 23 | 24 | if TYPE_CHECKING: 25 | from invoke import Context 26 | 27 | 28 | @task 29 | def make_docs(ctx: Context) -> None: 30 | """ 31 | Generate API documentation + run Sphinx. 32 | 33 | Args: 34 | ctx (Context): The context. 35 | """ 36 | with cd("docs"): 37 | ctx.run("touch apidoc/index.rst", warn=True) 38 | ctx.run("rm matpes.*.rst", warn=True) 39 | ctx.run("sphinx-apidoc -o apidoc -f ../src/matpes") 40 | 41 | # Note: we use HTML building for the API docs to preserve search functionality. 42 | ctx.run("sphinx-build -b html apidoc html") # HTML building. 43 | ctx.run("rm apidocs/*.rst", warn=True) 44 | ctx.run("mv html/matpes*.html .") 45 | ctx.run("mv html/modules.html .") 46 | 47 | ctx.run("cp ../README.md index.md", warn=True) 48 | 49 | ctx.run("rm -r markdown", warn=True) 50 | ctx.run("rm -r html", warn=True) 51 | ctx.run('sed -I "" "s/_static/assets/g" matpes*.html') 52 | ctx.run("rm -rf doctrees", warn=True) 53 | 54 | 55 | @task 56 | def publish(ctx: Context) -> None: 57 | """ 58 | Upload release to Pypi using twine. 59 | 60 | Args: 61 | ctx (Context): The context. 62 | """ 63 | ctx.run("rm dist/*.*", warn=True) 64 | ctx.run("python setup.py sdist bdist_wheel") 65 | ctx.run("twine upload dist/*") 66 | 67 | 68 | @task 69 | def set_ver(ctx: Context, version: str): 70 | """ 71 | Set version in pyproject.toml file. 72 | 73 | Args: 74 | ctx (Context): The context. 75 | version (str): An input version. 76 | """ 77 | with open("pyproject.toml", encoding="utf-8") as file: 78 | lines = [re.sub(r"^version = \"([^,]+)\"", f'version = "{version}"', line.rstrip()) for line in file] 79 | 80 | with open("pyproject.toml", "w", encoding="utf-8") as file: 81 | file.write("\n".join(lines) + "\n") 82 | 83 | ctx.run("ruff check --fix src") 84 | ctx.run("ruff format pyproject.toml") 85 | 86 | 87 | @task 88 | def release_github(ctx: Context, version: str) -> None: 89 | """ 90 | Release to Github using Github API. 91 | 92 | Args: 93 | ctx (Context): The context. 94 | version (str): The version. 95 | """ 96 | with open("docs/changes.md", encoding="utf-8") as file: 97 | contents = file.read() 98 | tokens = re.split(r"\n\#\#\s", contents) 99 | desc = tokens[1].strip() 100 | tokens = desc.split("\n") 101 | desc = "\n".join(tokens[1:]).strip() 102 | payload = { 103 | "tag_name": f"v{version}", 104 | "target_commitish": "main", 105 | "name": f"v{version}", 106 | "body": desc, 107 | "draft": False, 108 | "prerelease": False, 109 | } 110 | print(payload) 111 | response = requests.post( 112 | "https://api.github.com/repos/materialsvirtuallab/matpes/releases", 113 | data=json.dumps(payload), 114 | headers={"Authorization": f"token {os.environ['GITHUB_RELEASES_TOKEN']}"}, 115 | timeout=60, 116 | ) 117 | print(response.text) 118 | 119 | 120 | @task 121 | def update_changelog(ctx: Context, version: str | None = None, dry_run: bool = False) -> None: 122 | """Create a preliminary change log using the git logs. 123 | 124 | Args: 125 | ctx (invoke.Context): The context object. 126 | version (str, optional): The version to use for the change log. If not provided, it will 127 | use the current date in the format 'YYYY.M.D'. Defaults to None. 128 | dry_run (bool, optional): If True, the function will only print the changes without 129 | updating the actual change log file. Defaults to False. 130 | """ 131 | version = version or f"{datetime.now(tz=timezone.utc):%Y.%-m.%-d}" 132 | print(f"Getting all comments since {__version__}") 133 | output = subprocess.check_output(["git", "log", "--pretty=format:%s", f"v{__version__}..HEAD"]) 134 | lines = [] 135 | ignored_commits = [] 136 | for line in output.decode("utf-8").strip().split("\n"): 137 | re_match = re.match(r".*\(\#(\d+)\)", line) 138 | if re_match and "materialsproject/dependabot/pip" not in line: 139 | pr_number = re_match[1].strip() 140 | response = requests.get( 141 | f"https://api.github.com/repos/materialsproject/matpes/pulls/{pr_number}", 142 | timeout=60, 143 | ) 144 | resp = response.json() 145 | lines += [f"- PR #{pr_number} {resp['title'].strip()} by @{resp['user']['login']}"] 146 | if body := resp["body"]: 147 | for ll in map(str.strip, body.split("\n")): 148 | if ll in ("", "## Summary"): 149 | continue 150 | if ll.startswith(("## Checklist", "## TODO")): 151 | break 152 | lines += [f" {ll}"] 153 | else: 154 | ignored_commits += [line] 155 | 156 | body = "\n".join(lines) 157 | try: 158 | # Use OpenAI to improve changelog. Requires openai to be installed and an OPENAPI_KEY env variable. 159 | from openai import OpenAI 160 | 161 | client = OpenAI(api_key=os.environ["OPENAPI_KEY"]) 162 | 163 | messages = [{"role": "user", "content": f"summarize as a markdown numbered list, include authors: '{body}'"}] 164 | chat = client.chat.completions.create(model="gpt-4o", messages=messages) 165 | 166 | reply = chat.choices[0].message.content 167 | body = "\n".join(reply.split("\n")[1:-1]) 168 | body = body.strip().strip("`") 169 | print(f"ChatGPT Summary of Changes:\n{body}") 170 | 171 | except BaseException as ex: 172 | print(f"Unable to use openai due to {ex}") 173 | with open("docs/CHANGES.md", encoding="utf-8") as file: 174 | contents = file.read() 175 | delim = "##" 176 | tokens = contents.split(delim) 177 | tokens.insert(1, f"## v{version}\n\n{body}\n\n") 178 | if dry_run: 179 | print(tokens[0] + "##".join(tokens[1:])) 180 | else: 181 | with open("docs/CHANGES.md", mode="w", encoding="utf-8") as file: 182 | file.write(tokens[0] + "##".join(tokens[1:])) 183 | ctx.run("open docs/CHANGES.md") 184 | print("The following commit messages were not included...") 185 | print("\n".join(ignored_commits)) 186 | 187 | 188 | @task 189 | def release(ctx: Context, version, nodoc: bool = False) -> None: 190 | """ 191 | Run full sequence for releasing matpes. 192 | 193 | Args: 194 | ctx (invoke.Context): The context object. 195 | version (str, optional): The version to release. 196 | nodoc (bool, optional): Whether to skip documentation generation. 197 | """ 198 | ctx.run("rm -r dist build matpes.egg-info", warn=True) 199 | set_ver(ctx, version) 200 | if not nodoc: 201 | make_docs(ctx) 202 | ctx.run("git add .") 203 | ctx.run('git commit --no-verify -a -m "Update docs"') 204 | ctx.run("git push") 205 | release_github(ctx, version) 206 | 207 | ctx.run("rm -f dist/*.*", warn=True) 208 | ctx.run("pip install -e .", warn=True) 209 | ctx.run("python -m build", warn=True) 210 | ctx.run("twine upload --skip-existing dist/*.whl", warn=True) 211 | ctx.run("twine upload --skip-existing dist/*.tar.gz", warn=True) 212 | # post_discourse(ctx, warn=True) 213 | 214 | 215 | @task 216 | def make_tutorials(ctx: Context) -> None: 217 | """ 218 | Generate tutorial HTML. 219 | 220 | Args: 221 | ctx (invoke.Context): The context object. 222 | """ 223 | ctx.run("rm assets/*.md", warn=True) 224 | ctx.run("jupyter nbconvert --to markdown notebooks/*.ipynb") 225 | ctx.run("mv notebooks/*.md assets") 226 | 227 | 228 | @task 229 | def lint(ctx: Context) -> None: 230 | """ 231 | Run linting tools. 232 | 233 | Args: 234 | ctx (invoke.Context): The context object. 235 | """ 236 | for cmd in ("ruff", "mypy", "ruff format"): 237 | ctx.run(f"{cmd} matpes") 238 | --------------------------------------------------------------------------------