├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.ja.md ├── README.md ├── condaenv.yaml ├── docs ├── Makefile ├── make.bat └── source │ ├── _templates │ └── autosummary │ │ └── class.rst │ ├── conf.py │ ├── description.rst │ ├── example.rst │ ├── extend.rst │ ├── index.rst │ ├── install.rst │ ├── modules │ ├── dataset.rst │ ├── format.rst │ ├── index.rst │ ├── model.rst │ ├── preprocess.rst │ ├── training.rst │ └── utils.rst │ └── usage.rst ├── examples ├── prediction_config.py └── training_config.py ├── hdnnpy ├── __init__.py ├── __main__.py ├── _version.py ├── cli │ ├── __init__.py │ ├── configurables.py │ ├── conversion_application.py │ ├── main.py │ ├── prediction_application.py │ └── training_application.py ├── dataset │ ├── __init__.py │ ├── atomic_structure.py │ ├── dataset_generator.py │ ├── descriptor │ │ ├── __init__.py │ │ ├── descriptor_dataset_base.py │ │ ├── symmetry_function_dataset.py │ │ └── weighted_symmetry_function_dataset.py │ ├── hdnnp_dataset.py │ └── property │ │ ├── __init__.py │ │ ├── interatomic_potential_dataset.py │ │ └── property_dataset_base.py ├── format │ ├── __init__.py │ └── xyz.py ├── model │ ├── __init__.py │ └── models.py ├── preprocess │ ├── __init__.py │ ├── pca.py │ ├── preprocess_base.py │ ├── scaling.py │ └── standardization.py ├── training │ ├── __init__.py │ ├── extensions.py │ ├── loss_function │ │ ├── __init__.py │ │ ├── first.py │ │ ├── loss_functions_base.py │ │ ├── potential.py │ │ └── zeroth.py │ ├── manager.py │ └── updater.py └── utils.py ├── readthedocs.yml ├── requirements.txt ├── scripts ├── merge_xyz ├── outcar2xyz └── poscars2xyz └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | # created by `pip install --editable .` 4 | /hdnnpy.egg-info/ 5 | /build/ 6 | /dist/ 7 | 8 | # html built by sphinx 9 | /docs/build/ 10 | # autosummary generated rst files 11 | /docs/source/modules/**/generated 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 masayoshi.ogura 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | ase = "*" 8 | chainer = "*" 9 | matplotlib = "*" 10 | "mpi4py" = "*" 11 | numpy = "*" 12 | pyyaml = "*" 13 | scikit-learn = "*" 14 | scipy = "*" 15 | tqdm = "*" 16 | traitlets = "*" 17 | 18 | [dev-packages] 19 | sphinx = "*" 20 | sphinx-rtd-theme = "*" 21 | sphinx-autobuild = "*" 22 | hdnnpy = {editable = true, path = "."} 23 | 24 | [requires] 25 | python_version = "3.6.7" 26 | -------------------------------------------------------------------------------- /README.ja.md: -------------------------------------------------------------------------------- 1 | # environment construction 2 | 3 | 基本的にpythonのバージョン管理はpyenvを使うこと。 4 | 必要なコマンド(pyenv,pipenv,conda)のインストールについては省略する。 5 | 6 | 注意1: 7 | 2018/11/16時点で、Anaconda Cloud上に 8 | 9 | - ChainerMN 10 | - Chainer v5 (v5からChainerMNがマージされた) 11 | 12 | が存在しないため、anacondaを使う場合でもpipを使用してChainerMNをインストールする必要がある。 13 | 環境が壊れる可能性を承知した上で使用すること。 14 | 参考:http://onoz000.hatenablog.com/entry/2018/02/11/142347 15 | 16 | 注意2: 17 | 2018/11/16時点で、pyenvを使ってanacondaをインストールしてある場合、pipenvによるインストールは失敗するという情報がある。 18 | このバグの修正版は将来的にリリースされるらしい。 19 | 参考:https://github.com/pypa/pipenv/issues/3044 20 | 21 | ## pipenv (recommended) 22 | 23 | 簡単かつ確実なインストール方法。 24 | 25 | 環境変数`PIPENV_VENV_IN_PROJECT`を1に設定すると、 26 | pipenvで作成されるpythonの仮想環境がこのプロジェクトの直下に作成される。(`/path/to/HDNNP/.venv/`) 27 | 以下のコマンドを実行するか、`~/.bashrc`に追記してプロセスを再起動することで変更が適用される。 28 | ``` 29 | export PIPENV_VENV_IN_PROJECT=1 30 | ``` 31 | 32 | ``` 33 | $ git clone https://github.com/ogura-edu/HDNNP.git 34 | $ cd HDNNP/ 35 | $ pyenv install 3.6.7 36 | $ pyenv local 3.6.7 37 | $ pipenv install 38 | 39 | # activate 40 | $ pipenv shell 41 | 42 | (HDNNP) $ hdnnpy training 43 | 44 | # deactivate 45 | (HDNNP) $ exit 46 | ``` 47 | 48 | ## anaconda 49 | 50 | 最適化されたバイナリを取得できるので、実行速度が速い。 51 | しかし、上記の理由からpipと混在した形になることや、 52 | マシンによってはインストールがうまくいかないことがあるため注意すること。 53 | 54 | `conda env create --file condaenv.yaml`の実行が終了すると、 55 | 各々の環境に合わせてactivationの仕方がいくつか提示されるので好きなものを選ぶ。 56 | 以下の例では`~/.bashrc`に1文追記する方法を選択している。 57 | 58 | ``` 59 | $ git clone https://github.com/ogura-edu/HDNNP.git 60 | $ cd HDNNP/ 61 | $ pyenv install anaconda-x.x.x 62 | $ pyenv local anaconda-x.x.x 63 | $ conda env create -n HDNNP --file condaenv.yaml 64 | $ echo ". ${HOME}/.pyenv/versions/anaconda-x.x.x/etc/profile.d/conda.sh" > ~/.bashrc 65 | 66 | # activate 67 | $ conda activate HDNNP 68 | 69 | # install this program using pip 70 | (HDNNP) $ pip install --editable . 71 | 72 | (HDNNP) $ hdnnpy training 73 | 74 | # deactivate 75 | (HDNNP) $ conda deactivate 76 | ``` 77 | 78 | ## pip install only 79 | 80 | `Pipfile`または`condaenv.yaml`に記述されている依存関係を元に、 81 | パッケージを個別に`pip install`することももちろん可能。 82 | この場合は`virtualenv`を使って自分で仮想環境を管理することを推奨する。 83 | ``` 84 | $ git clone https://github.com/ogura-edu/HDNNP.git 85 | $ cd HDNNP/ 86 | $ pip install PKG1 PKG2 ... 87 | $ pip install -e . 88 | ``` 89 | 90 | または、慣れた人であれば依存関係を`setup.py`に書き加えるだけで済む。 91 | ``` 92 | $ git clone https://github.com/ogura-edu/HDNNP.git 93 | $ cd HDNNP/ 94 | $ vim setup.py #=> setup()の引数にinstall_requiresを追加 95 | $ pip install -e . 96 | ``` 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Documentation Status](https://readthedocs.org/projects/hdnnp/badge/?version=latest)](https://hdnnp.readthedocs.io/en/latest/?badge=latest) 2 | 3 | # High Dimensional Neural Network Potential(HDNNP) 4 | 5 | > This is a implementation of High Dimensional Neural Network Potential(HDNNP) designed to reproduce Density Function Theory(DFT) calculation *effectively* with high *flexibility*, *reactivity*. 6 | 7 | There is equivalent doc in Japanese [README.ja.md](https://github.com/ogura-edu/HDNNP/blob/master/README.ja.md). 8 | 9 | ## Install 10 | 11 | Install this project by `git`. 12 | 13 | ```shell 14 | $ git clone https://github.com/ogura-edu/HDNNP.git 15 | 16 | # or if using ssh 17 | 18 | $ git clone git@github.com:ogura-edu/HDNNP.git 19 | ``` 20 | 21 | This project uses [Pipenv](https://github.com/pypa/pipenv) for development workflow. If you don't have it, run this command to install. 22 | 23 | 24 | **macOS** 25 | 26 | ```shell 27 | $ brew install pipenv 28 | ``` 29 | 30 | **other** 31 | 32 | ```shell 33 | # please run after installing python 34 | $ pip install pipenv 35 | ``` 36 | 37 | ## Setup 38 | ### By Pipenv(Prefered) 39 | 40 | Same as by anaconda, but you need to install python rather than installing anaconda. 41 | 42 | This bug will be fixed in near future release(ref: [pythonfinder + pyenv + anaconda issue](https://github.com/pypa/pipenv/issues/3044)). 43 | 44 | Set environmental variable `PIPENV_VENV_IN_PROJECT` to `1` to create your VM into this project dir(`/path/to/HDNNP/.venv`). 45 | 46 | ```shell 47 | export PIPENV_VENV_IN_PROJECT = 1 48 | ``` 49 | 50 | For macOS users, you need to install `mpich` before installing dependencies. 51 | 52 | ```shell 53 | # Only for macOS users. 54 | # 55 | # NOTE: Installing both mpich and openmpi will conflict 56 | # 57 | $ brew install mpich 58 | 59 | # or 60 | 61 | $ brew install openmpi 62 | ``` 63 | 64 | Setup your enviroments. 65 | 66 | ```shell 67 | # Install dependencies 68 | $ pipenv install 69 | 70 | # activate your VM 71 | $ pipenv shell 72 | 73 | # For example... 74 | (HDNNP) $ hdnnpy training 75 | 76 | # deactivate 77 | (HDNNP) $ exit 78 | ``` 79 | 80 | ### By Anaconda 81 | 82 | Using anaconda is prefered because it is basically faster than Pipenv. 83 | 84 | Install anaconda and activate your VM. 85 | 86 | ```shell 87 | $ ANACONDA_VERSION = [YOUR_ANACODA_VERSION] 88 | $ pyenv install $ANACONDA_VERSION 89 | $ pyenv local $ANACONDA_VERSION 90 | $ conda env create -n HDNNP --file condaenv.yaml 91 | $ echo ". ${HOME}/.pyenv/versions//etc/profile.d/conda.sh" > ~/.bashrc 92 | 93 | # activate 94 | $ conda activate HDNNP 95 | 96 | # install this program using pip 97 | (HDNNP) $ pip install --editable . 98 | 99 | # For example... 100 | (HDNNP) $ hdnnpy training 101 | 102 | # deactivate 103 | (HDNNP) $ conda deactivate 104 | ``` 105 | 106 | **NOTE** 107 | 108 | There is no 109 | 110 | - ChainerMN 111 | - Chainer v5 112 | 113 | on the Anaconda Cloud, so you still have to install these packages by `pip`. 114 | 115 | And these is a bug that if you install anaconda by `pyenv`, `pipenv` will fail to start(ref: [pythonfinder + pyenv + anaconda issue](https://github.com/pypa/pipenv/issues/3044)). 116 | 117 | ## Reference 118 | 119 | - Jörg Behler. First Principle Neural Network Potentials for Reactive Simulations of Large Molecular and Condensed System, 2007 120 | -------------------------------------------------------------------------------- /condaenv.yaml: -------------------------------------------------------------------------------- 1 | name: HDNNP 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python==3.6.7 7 | - ase 8 | - chainer 9 | - matplotlib 10 | - mpi4py 11 | - numpy 12 | - pyyaml 13 | - scikit-learn 14 | - scipy 15 | - tqdm 16 | - traitlets 17 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {{ objname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | {% if module.startswith(('hdnnpy.model', 'hdnnpy.training')) %} 7 | .. autoclass:: {{ objname }} 8 | :no-inherited-members: 9 | {% else %} 10 | .. autoclass:: {{ objname }} 11 | :inherited-members: 12 | {% endif %} 13 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | from hdnnpy import __version__ 18 | sys.path.insert(0, os.path.abspath('../..')) 19 | 20 | 21 | # -- Project information ----------------------------------------------------- 22 | 23 | project = 'HDNNP' 24 | copyright = '2018, masayoshi.ogura' 25 | author = 'masayoshi.ogura' 26 | 27 | # The short X.Y version 28 | version = __version__ 29 | # The full version, including alpha/beta/rc tags 30 | release = __version__ 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # If your documentation needs a minimal Sphinx version, state it here. 36 | # 37 | # needs_sphinx = '1.0' 38 | 39 | # Add any Sphinx extension module names here, as strings. They can be 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 41 | # ones. 42 | extensions = [ 43 | 'sphinx.ext.autosummary', 44 | 'sphinx.ext.autodoc', 45 | 'sphinx.ext.doctest', 46 | 'sphinx.ext.todo', 47 | 'sphinx.ext.coverage', 48 | 'sphinx.ext.mathjax', 49 | 'sphinx.ext.ifconfig', 50 | 'sphinx.ext.viewcode', 51 | 'sphinx.ext.napoleon', 52 | 'sphinx.ext.intersphinx', 53 | ] 54 | 55 | # Add any paths that contain templates here, relative to this directory. 56 | templates_path = ['_templates'] 57 | 58 | # The suffix(es) of source filenames. 59 | # You can specify multiple suffix as a list of string: 60 | # 61 | # source_suffix = ['.rst', '.md'] 62 | source_suffix = '.rst' 63 | 64 | # The master toctree document. 65 | master_doc = 'index' 66 | 67 | # The language for content autogenerated by Sphinx. Refer to documentation 68 | # for a list of supported languages. 69 | # 70 | # This is also used if you do content translation via gettext catalogs. 71 | # Usually you set "language" from the command line for these cases. 72 | language = None 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | # This pattern also affects html_static_path and html_extra_path. 77 | exclude_patterns = [] 78 | 79 | # The name of the Pygments (syntax highlighting) style to use. 80 | pygments_style = 'sphinx' 81 | 82 | # The reST default role (used for this markup: `text`) to use for all 83 | # documents. 84 | default_role = 'any' 85 | 86 | 87 | # -- Options for HTML output ------------------------------------------------- 88 | 89 | # The theme to use for HTML and HTML Help pages. See the documentation for 90 | # a list of builtin themes. 91 | # 92 | html_theme = 'sphinx_rtd_theme' 93 | 94 | # Theme options are theme-specific and customize the look and feel of a theme 95 | # further. For a list of options available for each theme, see the 96 | # documentation. 97 | # 98 | # html_theme_options = {} 99 | 100 | # Add any paths that contain custom static files (such as style sheets) here, 101 | # relative to this directory. They are copied after the builtin static files, 102 | # so a file named "default.css" will overwrite the builtin "default.css". 103 | html_static_path = ['_static'] 104 | 105 | # Custom sidebar templates, must be a dictionary that maps document names 106 | # to template names. 107 | # 108 | # The default sidebars (for documents that don't match any pattern) are 109 | # defined by theme itself. Builtin themes are using these templates by 110 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 111 | # 'searchbox.html']``. 112 | # 113 | # html_sidebars = {} 114 | 115 | 116 | # -- Options for HTMLHelp output --------------------------------------------- 117 | 118 | # Output file base name for HTML help builder. 119 | htmlhelp_basename = 'HDNNPdoc' 120 | 121 | 122 | # -- Options for LaTeX output ------------------------------------------------ 123 | 124 | latex_elements = { 125 | # The paper size ('letterpaper' or 'a4paper'). 126 | # 127 | # 'papersize': 'letterpaper', 128 | 129 | # The font size ('10pt', '11pt' or '12pt'). 130 | # 131 | # 'pointsize': '10pt', 132 | 133 | # Additional stuff for the LaTeX preamble. 134 | # 135 | # 'preamble': '', 136 | 137 | # Latex figure (float) alignment 138 | # 139 | # 'figure_align': 'htbp', 140 | } 141 | 142 | # Grouping the document tree into LaTeX files. List of tuples 143 | # (source start file, target name, title, 144 | # author, documentclass [howto, manual, or own class]). 145 | latex_documents = [ 146 | (master_doc, 'HDNNP.tex', 'HDNNP Documentation', 147 | 'masayoshi.ogura', 'manual'), 148 | ] 149 | 150 | 151 | # -- Options for manual page output ------------------------------------------ 152 | 153 | # One entry per manual page. List of tuples 154 | # (source start file, name, description, authors, manual section). 155 | man_pages = [ 156 | (master_doc, 'hdnnp', 'HDNNP Documentation', 157 | [author], 1) 158 | ] 159 | 160 | 161 | # -- Options for Texinfo output ---------------------------------------------- 162 | 163 | # Grouping the document tree into Texinfo files. List of tuples 164 | # (source start file, target name, title, author, 165 | # dir menu entry, description, category) 166 | texinfo_documents = [ 167 | (master_doc, 'HDNNP', 'HDNNP Documentation', 168 | author, 'HDNNP', 'One line description of project.', 169 | 'Miscellaneous'), 170 | ] 171 | 172 | 173 | # -- Options for Epub output ------------------------------------------------- 174 | 175 | # Bibliographic Dublin Core info. 176 | epub_title = project 177 | 178 | # The unique identifier of the text. This can be a ISBN number 179 | # or the project homepage. 180 | # 181 | # epub_identifier = '' 182 | 183 | # A unique identification for the text. 184 | # 185 | # epub_uid = '' 186 | 187 | # A list of files that should not be packed into the epub file. 188 | epub_exclude_files = ['search.html'] 189 | 190 | 191 | # -- Extension configuration ------------------------------------------------- 192 | 193 | # -- Options for todo extension ---------------------------------------------- 194 | 195 | # If true, `todo` and `todoList` produce output, else they produce nothing. 196 | todo_include_todos = True 197 | 198 | # -- Options for napoleon extension ------------------------------------------ 199 | napoleon_include_special_with_doc = True 200 | napoleon_use_admonition_for_examples = True 201 | napoleon_use_admonition_for_notes = True 202 | napoleon_use_admonition_for_references = True 203 | 204 | # -- Options for autodoc extension ------------------------------------------- 205 | autoclass_content = 'both' 206 | autodoc_member_order = 'groupwise' 207 | autodoc_default_options = { 208 | 'show-inheritance': None, 209 | 'members': None, 210 | } 211 | 212 | # -- Options for intersphinx extension --------------------------------------- 213 | intersphinx_mapping = { 214 | 'python': ('https://docs.python.org/3.6/', None), 215 | 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 216 | 'chainer': ('https://docs.chainer.org/en/stable/', None), 217 | 'ase': ('https://wiki.fysik.dtu.dk/ase/', None), 218 | 'sklearn': ('https://scikit-learn.org/stable/', None), 219 | } 220 | 221 | # -- Options for autosummary extension --------------------------------------- 222 | autosummary_generate = True 223 | -------------------------------------------------------------------------------- /docs/source/description.rst: -------------------------------------------------------------------------------- 1 | What is HDNNP? 2 | ============== 3 | 4 | .. contents:: 5 | :local: 6 | :depth: 2 7 | 8 | 9 | 10 | | This program is an implementation of HDNNP that is 11 | suggested by Behler *et al* [Ref]_. 12 | | HDNNP stands for **High Dimensional Neural Network Potential**. 13 | | HDNNP is one of machine learning potentials that is used to 14 | reduce calculation cost of DFT(Density Functional Theory) calculation. 15 | 16 | | Currently, energy and force prediction using symmetry function have been implemented. 17 | 18 | 19 | 20 | .. [Ref] https://onlinelibrary.wiley.com/doi/full/10.1002/qua.24890 -------------------------------------------------------------------------------- /docs/source/example.rst: -------------------------------------------------------------------------------- 1 | Execution example 2 | ================= 3 | 4 | .. contents:: 5 | :local: 6 | :depth: 2 7 | 8 | 9 | 10 | GaN interatomic potential 11 | ------------------------- 12 | 13 | | In this section, show you an execution example of HDNNP training 14 | using 1st order differentiation of interatomic potential 15 | (e.g. interatomic forces) of GaN 16 | 17 | Data file 18 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | Prepare a .xyz format file which have some structures with energy and force data. 21 | 22 | ``GaN.xyz`` 23 | 24 | .. code-block:: text 25 | 26 | 32 27 | Lattice="6.46474316 0.0 0.0 -3.23237159 5.5986318 0.0 0.0 0.0 10.53232454" Properties=species:S:1:pos:R:3:forces:R:3 energy=-194.5164333 tag=CrystalGa16N16 pbc="T T T" 28 | Ga 1.61619000 0.93311000 2.62845000 0.00000300 0.00001200 -0.00570900 29 | Ga 3.23237000 3.73242000 2.62845000 0.00003900 -0.00004700 -0.00571500 30 | Ga 4.84856000 0.93311000 2.62845000 0.00000400 -0.00001100 -0.00563600 31 | Ga -0.00000000 3.73242000 7.89461000 -0.00003800 0.00003200 -0.00564200 32 | Ga 1.61619000 0.93311000 7.89461000 0.00006100 -0.00001800 -0.00571100 33 | Ga 3.23237000 3.73242000 7.89461000 0.00002100 -0.00006400 -0.00572000 34 | Ga 4.84856000 0.93311000 7.89461000 -0.00003200 -0.00002300 -0.00565600 35 | Ga -0.00000000 3.73242000 2.62845000 0.00002100 -0.00002000 -0.00565100 36 | Ga -0.00000000 1.86621000 5.26153000 -0.00006900 0.00005900 -0.00572300 37 | Ga 1.61619000 4.66553000 5.26153000 -0.00002700 0.00008200 -0.00571900 38 | Ga 3.23237000 1.86621000 5.26153000 0.00001800 -0.00001400 -0.00566500 39 | Ga -1.61619000 4.66553000 10.52769000 -0.00002700 -0.00002600 -0.00566900 40 | Ga -0.00000000 1.86621000 10.52769000 -0.00002200 0.00008500 -0.00568700 41 | Ga 1.61619000 4.66553000 10.52769000 0.00000600 -0.00002400 -0.00574300 42 | Ga 3.23237000 1.86621000 10.52769000 0.00000100 0.00007600 -0.00564000 43 | Ga -1.61619000 4.66553000 5.26153000 0.00002200 -0.00000200 -0.00568800 44 | N 1.61619000 0.93311000 4.61253000 0.00005500 -0.00002000 -0.00041000 45 | N 3.23237000 3.73242000 4.61253000 0.00003600 -0.00000900 -0.00037900 46 | N 4.84856000 0.93311000 4.61253000 -0.00004100 0.00000700 -0.00041100 47 | N -0.00000000 3.73242000 9.87869000 -0.00001300 -0.00003500 -0.00042500 48 | N 1.61619000 0.93311000 9.87869000 0.00001200 0.00002900 -0.00040900 49 | N 3.23237000 3.73242000 9.87869000 0.00002700 -0.00006200 -0.00041700 50 | N 4.84856000 0.93311000 9.87869000 -0.00000400 0.00002500 -0.00041500 51 | N -0.00000000 3.73242000 4.61253000 -0.00004500 -0.00000400 -0.00041800 52 | N -0.00000000 1.86621000 1.97945000 0.00000000 -0.00000800 -0.00034400 53 | N 1.61619000 4.66553000 1.97945000 -0.00000200 0.00000500 -0.00033700 54 | N 3.23237000 1.86621000 1.97945000 0.00001700 0.00001600 -0.00036100 55 | N -1.61619000 4.66553000 7.24561000 0.00002800 -0.00002300 -0.00036000 56 | N -0.00000000 1.86621000 7.24561000 -0.00008200 0.00001500 -0.00043200 57 | N 1.61619000 4.66553000 7.24561000 -0.00002200 0.00004200 -0.00040100 58 | N 3.23237000 1.86621000 7.24561000 0.00001900 -0.00001200 -0.00039500 59 | N -1.61619000 4.66553000 1.97945000 0.00000400 -0.00001800 -0.00046000 60 | 32 61 | Lattice="6.46474316 0.0 0.0 -3.23237159 5.5986318 0.0 0.0 0.0 10.53232454" Properties=species:S:1:pos:R:3:forces:R:3 energy=-169.96635976 tag=CrystalGa16N16 pbc="T T T" 62 | Ga 1.44265000 1.46790000 2.04947000 -0.95595000 -3.56110800 2.54045000 63 | Ga 2.88538000 4.34404000 2.89380000 4.75932000 -2.04809500 -1.43108200 64 | Ga 4.38372000 0.68215000 2.61606000 0.15090500 6.97113700 2.40537400 65 | Ga 0.47836000 3.95213000 7.90284000 -3.31821700 -0.13409600 -0.21437100 66 | Ga 1.82415000 1.43420000 8.18380000 -0.78327100 -2.70531000 -3.50469000 67 | Ga 3.49351000 3.96284000 7.92622000 1.84595600 -0.42627100 -0.16593100 68 | Ga 5.17229000 0.83662000 7.71745000 -0.46937900 1.21688400 1.11923500 69 | Ga -0.04508000 3.95689000 2.71946000 -3.88117900 -1.84159800 0.64959300 70 | Ga -0.96518000 1.98086000 5.22137000 1.12890800 -1.31857500 -0.37168600 71 | Ga 1.18573000 3.20454000 5.22045000 1.58317800 1.58466500 0.77557000 72 | Ga 2.91073000 1.45415000 5.60119000 -0.29420600 -1.79185700 -2.55652100 73 | Ga -0.99634000 4.45389000 0.07004000 -2.39983600 3.43545000 1.27018200 74 | Ga 0.17764000 1.60544000 10.36435000 6.30208700 4.30252400 2.73199900 75 | Ga 2.35420000 4.13573000 0.39168000 -1.28509600 -0.64262000 -3.92936300 76 | ... 77 | 4 78 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.3605335 tag=CrystalGa2N2 pbc="T T T" 79 | Ga 1.60815000 0.92846000 2.61537000 0.00057000 -0.00032400 -0.00131800 80 | Ga 0.00000000 1.85693000 5.23535000 -0.00055000 0.00030900 -0.00128000 81 | N 1.60815000 0.92846000 4.58958000 0.00038300 -0.00020300 0.00049500 82 | N 0.00000000 1.85693000 1.96960000 -0.00030900 0.00021200 0.00050600 83 | 4 84 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.04284841 tag=CrystalGa2N2 pbc="T T T" 85 | Ga 1.56998000 1.01961000 2.64712000 0.37879200 -0.65345000 -0.84588100 86 | Ga 0.00233000 1.78610000 5.21359000 1.53422400 0.01126800 0.83092200 87 | N 1.80998000 0.78162000 4.55671000 -1.91098000 0.49960800 -0.07141600 88 | N -0.02338000 1.90257000 1.95274000 0.00855700 0.14604000 0.09234500 89 | 4 90 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.07370026 tag=CrystalGa2N2 pbc="T T T" 91 | Ga 1.68022000 0.78468000 2.59601000 -0.77026300 1.15126700 0.71828100 92 | Ga -0.04831000 1.97869000 0.01593000 -1.05203000 0.42443800 -0.31339000 93 | N 1.47544000 1.12447000 4.57171000 1.50854300 -1.32922700 -0.04524600 94 | N 0.01431000 1.77059000 1.98155000 0.31937700 -0.24596800 -0.35639000 95 | 4 96 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.06789171 tag=CrystalGa2N2 pbc="T T T" 97 | Ga 1.55216000 1.03346000 2.59780000 1.76477100 -1.33788800 0.62275500 98 | Ga 0.04645000 1.78043000 0.02483000 -0.39888700 -0.84820500 -0.84426800 99 | N 1.59299000 0.75442000 4.54056000 0.36047300 1.45854900 0.51138400 100 | N 0.06265000 1.88907000 1.95951000 -1.73396900 0.72932900 -0.27762300 101 | 4 102 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.10933618 tag=CrystalGa2N2 pbc="T T T" 103 | Ga 1.62285000 0.92354000 2.56898000 -0.87387700 0.84344000 1.29437700 104 | Ga -0.00655000 1.82730000 0.04373000 0.63633100 1.10065300 -1.07564600 105 | N 1.65007000 1.03662000 4.56438000 -0.83168500 -1.16592600 0.26072300 106 | N -0.08253000 1.92082000 1.98507000 1.07124400 -0.78418500 -0.47994500 107 | 4 108 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.15961153 tag=CrystalGa2N2 pbc="T T T" 109 | Ga 1.61929000 0.86275000 2.60668000 0.91655600 0.12884500 0.02524600 110 | Ga -0.02746000 1.90759000 0.02534000 -0.00425900 0.48361500 -1.32527900 111 | N 1.57325000 1.05930000 4.54898000 0.29235100 -0.94998800 0.25695700 112 | N 0.11613000 1.80106000 1.90435000 -1.21017800 0.33509300 1.05032200 113 | 4 114 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-23.90497111 tag=CrystalGa2N2 pbc="T T T" 115 | Ga 1.57753000 1.01962000 2.53889000 -0.58498700 0.38561600 1.95812800 116 | Ga 0.05221000 1.77667000 0.06084000 -0.50913400 -1.39207300 -1.16507600 117 | N 1.60109000 0.71987000 4.62834000 0.25821000 2.35785600 -0.69708500 118 | N -0.10050000 2.01120000 1.98576000 0.83273600 -1.35617800 -0.10520400 119 | 4 120 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.17936965 tag=CrystalGa2N2 pbc="T T T" 121 | Ga 1.65588000 0.84325000 2.61391000 -0.48280700 0.58352400 -0.06140200 122 | Ga -0.05236000 1.91994000 0.00989000 1.13163900 0.73695700 -0.46324400 123 | N 1.63413000 1.09260000 4.55873000 -1.08709100 -1.30806300 0.05205700 124 | N -0.00295000 1.80336000 1.93549000 0.44154800 -0.01662100 0.47920500 125 | 4 126 | Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-23.82707164 tag=CrystalGa2N2 pbc="T T T" 127 | ... 128 | 129 | 130 | Config file 131 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 132 | 133 | ``training_config.py`` 134 | (necessary parts picked up) 135 | 136 | .. code-block:: python 137 | 138 | c.TrainingApplication.verbose = True 139 | 140 | c.DatasetConfig.parameters = { 141 | 'type1': [ 142 | (5.0,), 143 | ], 144 | 'type2': [ 145 | (5.0, 0.01, 2.0), 146 | (5.0, 0.01, 3.2), 147 | (5.0, 0.01, 3.8), 148 | (5.0, 0.1, 2.0), 149 | (5.0, 0.1, 3.2), 150 | (5.0, 0.1, 3.8), 151 | (5.0, 1.0, 2.0), 152 | (5.0, 1.0, 3.2), 153 | (5.0, 1.0, 3.8), 154 | ], 155 | 'type4': [ 156 | (5.0, 0.01, -1, 1), 157 | (5.0, 0.01, -1, 2), 158 | (5.0, 0.01, -1, 4), 159 | (5.0, 0.01, 1, 1), 160 | (5.0, 0.01, 1, 2), 161 | (5.0, 0.01, 1, 4), 162 | (5.0, 0.1, -1, 1), 163 | (5.0, 0.1, -1, 2), 164 | (5.0, 0.1, -1, 4), 165 | (5.0, 0.1, 1, 1), 166 | (5.0, 0.1, 1, 2), 167 | (5.0, 0.1, 1, 4), 168 | (5.0, 1.0, -1, 1), 169 | (5.0, 1.0, -1, 2), 170 | (5.0, 1.0, -1, 4), 171 | (5.0, 1.0, 1, 1), 172 | (5.0, 1.0, 1, 2), 173 | (5.0, 1.0, 1, 4), 174 | ], 175 | } 176 | 177 | c.DatasetConfig.preprocesses = [ 178 | ('pca', (), {}), 179 | ] 180 | 181 | c.ModelConfig.layers = [ 182 | (90, 'tanh'), 183 | (90, 'tanh'), 184 | (1, 'identity'), 185 | ] 186 | 187 | c.TrainingConfig.batch_size = 100 188 | 189 | c.TrainingConfig.data_file = 'data/GaN.xyz' 190 | 191 | c.TrainingConfig.epoch = 1000 192 | 193 | c.TrainingConfig.interval = 10 194 | 195 | c.TrainingConfig.loss_function = ( 196 | 'first_only', 197 | {} 198 | ) 199 | 200 | c.TrainingConfig.lr_decay = 1.0e-6 201 | 202 | c.TrainingConfig.order = 1 203 | 204 | c.TrainingConfig.out_dir = 'output' 205 | 206 | c.TrainingConfig.patients = 5 207 | 208 | c.TrainingConfig.scatter_plot = True 209 | 210 | command line log 211 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 212 | 213 | Once edited configuration file ``training_config.py``, you just do one command ``hdnnpy trian``. 214 | 215 | .. code-block:: none 216 | 217 | $ hdnnpy train 218 | 219 | Construct sub dataset tagged as "CrystalGa16N16" 220 | Successfully loaded & made needed symmetry_function dataset from /data/CrystalGa16N16/symmetry_function.npz 221 | Successfully loaded & made needed interatomic_potential dataset from /data/CrystalGa16N16/interatomic_potential.npz 222 | 223 | Initialized PCA parameters for Ga 224 | Feature dimension: 74 => 74 225 | Cumulative contribution rate = 0.9999999403953552 226 | 227 | 228 | Initialized PCA parameters for N 229 | Feature dimension: 74 => 74 230 | Cumulative contribution rate = 1.0000001192092896 231 | 232 | Construct sub dataset tagged as "CrystalGa2N2" 233 | Successfully loaded & made needed symmetry_function dataset from /data/CrystalGa2N2/symmetry_function.npz 234 | Successfully loaded & made needed interatomic_potential dataset from /data/CrystalGa2N2/interatomic_potential.npz 235 | Saved PCA parameters to /output/preprocess/pca.npz. 236 | early stopping: operator is less 237 | epoch iteration main/RMSE/force main/RMSE/total val/main/RMSE/force val/main/RMSE/total 238 | 1 14 1.20575 1.20575 1.21576 1.21576 239 | 2 28 1.08758 1.08758 1.06121 1.06121 240 | 3 42 0.895798 0.895798 0.865482 0.865482 241 | 4 55 0.685623 0.685623 0.694789 0.694789 242 | 5 69 0.560702 0.560702 0.603832 0.603832 243 | 6 83 0.509542 0.509542 0.570984 0.570984 244 | 7 97 0.486743 0.486743 0.552533 0.552533 245 | 8 110 0.468966 0.468966 0.540375 0.540375 246 | 9 124 0.458917 0.458917 0.531327 0.531327 247 | 10 138 0.448132 0.448132 0.524466 0.524466 248 | ... 249 | 250 | Directory tree 251 | ^^^^^^^^^^^^^^^^^^^^^^^^ 252 | 253 | After training, directory tree becomes as follows: 254 | 255 | .. code-block:: text 256 | 257 | workdir 258 | ├── data/ 259 | │   ├── GaN.xyz 260 | │   ... 261 | ├── output/ 262 | │   ├── CrystalGa16N16/ 263 | │   │   ├── energy.png 264 | │   │   ├── force.png 265 | │   │   └── training.log 266 | │   ├── CrystalGa2N2/ 267 | │   │   ├── energy.png 268 | │   │   ├── force.png 269 | │   │   └── training.log 270 | │   ├── master_nnp.npz 271 | │   ├── preprocess/ 272 | │   │   └── pca.npz 273 | │   ├── training_config.py 274 | │   └── training_result.yaml 275 | └── training_config.py 276 | -------------------------------------------------------------------------------- /docs/source/extend.rst: -------------------------------------------------------------------------------- 1 | How to extend HDNNP 2 | =================== 3 | 4 | .. contents:: 5 | :local: 6 | :depth: 2 7 | 8 | 9 | Dataset 10 | ------------------- 11 | 12 | HDNNP dataset consists of **Descriptor dataset** and **Property dataset**. 13 | 14 | 15 | 16 | 17 | Descriptor dataset 18 | ^^^^^^^^^^^^^^^^^^^ 19 | 20 | | Currently, we have implemented only **symmetry function** dataset. 21 | | If you want to use other descriptor dataset, define a class that inherits 22 | | ``hdnnpy.dataset.descriptor.descriptor_dataset_base.DescriptorDatasetBase`` 23 | | It defines several instance variables, properties and instance methods for creating a HDNNP dataset. 24 | 25 | In addition, override the following abstract method. 26 | 27 | * generate_feature_keys 28 | 29 | | It returns a list of unique keys in feature dimension. 30 | | In addition to being able to use it internally, 31 | it is also used to expand feature dimension and zero-fill in ``hdnnpy.dataset.HDNNPDataset`` 32 | 33 | * calculate_descriptors 34 | 35 | | It is main function for calculating descriptors from a atomic structure, which is a wrapper of ase.Atoms object. 36 | 37 | 38 | 39 | 40 | 41 | Property dataset 42 | ^^^^^^^^^^^^^^^^^^^ 43 | 44 | | Currently, we have implemented only **interatomic potential** dataset. 45 | | If you want to use other property dataset, define a class that inherits 46 | | ``hdnnpy.dataset.property.property_dataset_base.PropertyDatasetBase`` 47 | | It defines several instance variables, properties and instance methods for creating a HDNNP dataset. 48 | 49 | In addition, override the following abstract method. 50 | 51 | * calculate_properties 52 | 53 | | It is main function for getting properties from a atomic structure, which is a wrapper of ase.Atoms object. 54 | 55 | 56 | Preprocess 57 | ------------------- 58 | 59 | * PCA 60 | * Scaling 61 | * Standardization 62 | 63 | 64 | Loss function 65 | ------------------- 66 | 67 | Currently, we have implemented following loss function for HDNNP training. 68 | 69 | * Zeroth 70 | * First 71 | 72 | Each loss function uses a 0th/1st order error of property to optimize HDNNP. 73 | ``First`` uses both 0th/1st order errors of property weighted by parameter ``mixing_beta`` to optimize HDNNP. 74 | 75 | * Potential 76 | 77 | It uses 2nd order derivative of descriptor dataset to optimize HDNNP to satisfy following condition: 78 | 79 | .. math:: 80 | 81 | \rot \bm{F} = 0 82 | 83 | Then, there is a scalar potential :math:`\varphi`: 84 | 85 | .. math:: 86 | 87 | \bm{F} = \mathrm{grad} \varphi 88 | 89 | | If you want to use other loss function, define a class that inherits 90 | | ``hdnnpy.training.loss_function.loss_function_base.LossFunctionBase``. 91 | | It defines several instance variables, properties and instance methods. 92 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. HDNNP documentation master file, created by 2 | sphinx-quickstart on Thu Dec 20 09:39:20 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to HDNNP's documentation! 7 | ================================= 8 | 9 | .. toctree:: 10 | :glob: 11 | :maxdepth: 2 12 | :caption: Contents: 13 | 14 | description 15 | install 16 | usage 17 | example 18 | modules/index 19 | extend 20 | 21 | 22 | 23 | Indices and tables 24 | ================== 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | How to install HDNNP 2 | ==================== 3 | 4 | .. contents:: 5 | :local: 6 | :depth: 2 7 | 8 | 9 | 10 | 11 | Python installation 12 | --------------------- 13 | 14 | | We recommend that you install python using pyenv, 15 | because non-sudo user can install any python version on any computer. 16 | | We confirmed that this program works only with python 3.6.7. 17 | 18 | .. code-block:: shell 19 | 20 | (on Linux) 21 | $ git clone https://github.com/yyuu/pyenv.git ~/.pyenv 22 | (on MacOS) 23 | $ brew install pyenv 24 | 25 | $ echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bash_profile 26 | $ echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile 27 | $ echo 'eval "$(pyenv init -)"' >> ~/.bash_profile 28 | $ source ~/.bash_profile 29 | 30 | $ pyenv install 3.6.7 31 | 32 | 33 | 34 | Get source code 35 | --------------------- 36 | 37 | .. note:: 38 | 39 | | This program is now under development, not uploaded to PyPI. 40 | | You have to get source code and install it manually. 41 | 42 | .. code-block:: shell 43 | 44 | $ git clone https://github.com/ogura-edu/HDNNP.git 45 | 46 | Install dependencies and this program 47 | ------------------------------------- 48 | 49 | Via pipenv 50 | ^^^^^^^^^^^^^^^^^^^^^ 51 | 52 | .. code-block:: shell 53 | 54 | $ cd HDNNP/ 55 | $ pyenv local 3.6.7 56 | $ pip install pipenv 57 | $ pipenv install --dev 58 | 59 | (activate) 60 | $ pipenv shell 61 | 62 | (for example:) 63 | (HDNNP) $ hdnnpy train 64 | 65 | (deactivate) 66 | (HDNNP) $ exit 67 | 68 | 69 | Via anaconda 70 | ^^^^^^^^^^^^^^^^^^^^^ 71 | 72 | Anaconda also can be installed by pyenv. 73 | 74 | .. code-block:: shell 75 | 76 | $ cd HDNNP/ 77 | $ pyenv install anaconda3-xxx 78 | $ pyenv local anaconda3-xxx 79 | $ conda env create -n HDNNP --file condaenv.yaml 80 | 81 | (activate) 82 | $ conda activate HDNNP 83 | 84 | (for example:) 85 | (HDNNP) $ hdnnpy train 86 | 87 | (deactivate) 88 | (HDNNP) $ conda deactivate 89 | 90 | 91 | 92 | Via raw pip 93 | ^^^^^^^^^^^^^^^^^^^^^ 94 | 95 | You can install all dependent packages manually. 96 | The dependent packages are written in ``Pipfile``, ``condaenv.yaml`` or ``requirements.txt``. 97 | 98 | .. code-block:: shell 99 | 100 | $ cd HDNNP/ 101 | $ pip install PKG1 PKG2 ... 102 | $ pip install --editable . 103 | -------------------------------------------------------------------------------- /docs/source/modules/dataset.rst: -------------------------------------------------------------------------------- 1 | .. module:: hdnnpy.dataset 2 | 3 | Dataset tools 4 | ============= 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | :nosignatures: 9 | 10 | ~dataset_generator.DatasetGenerator 11 | ~hdnnp_dataset.HDNNPDataset 12 | 13 | 14 | Descriptor datasets 15 | ------------------- 16 | 17 | .. autosummary:: 18 | :toctree: generated/ 19 | :nosignatures: 20 | 21 | ~descriptor.symmetry_function_dataset.SymmetryFunctionDataset 22 | 23 | 24 | Property datasets 25 | ----------------- 26 | 27 | .. autosummary:: 28 | :toctree: generated/ 29 | :nosignatures: 30 | 31 | ~property.interatomic_potential_dataset.InteratomicPotentialDataset 32 | 33 | Dataset base classes 34 | -------------------- 35 | 36 | .. autosummary:: 37 | :toctree: generated/ 38 | :nosignatures: 39 | 40 | ~descriptor.descriptor_dataset_base.DescriptorDatasetBase 41 | ~property.property_dataset_base.PropertyDatasetBase 42 | 43 | Atomic structure 44 | ---------------- 45 | 46 | .. autosummary:: 47 | :toctree: generated/ 48 | :nosignatures: 49 | 50 | ~atomic_structure.AtomicStructure 51 | -------------------------------------------------------------------------------- /docs/source/modules/format.rst: -------------------------------------------------------------------------------- 1 | .. module:: hdnnpy.format 2 | 3 | File parsing tools 4 | ================== 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | :nosignatures: 9 | 10 | ~xyz.parse_xyz 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/source/modules/index.rst: -------------------------------------------------------------------------------- 1 | ******* 2 | Modules 3 | ******* 4 | 5 | .. module:: hdnnpy 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | dataset 11 | format 12 | model 13 | preprocess 14 | training 15 | utils 16 | -------------------------------------------------------------------------------- /docs/source/modules/model.rst: -------------------------------------------------------------------------------- 1 | .. module:: hdnnpy.model 2 | 3 | Neural network potential models 4 | =============================== 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | :nosignatures: 9 | 10 | ~models.HighDimensionalNNP 11 | ~models.MasterNNP 12 | ~models.SubNNP 13 | -------------------------------------------------------------------------------- /docs/source/modules/preprocess.rst: -------------------------------------------------------------------------------- 1 | .. module:: hdnnpy.preprocess 2 | 3 | Pre-processing of dataset 4 | ========================= 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | :nosignatures: 9 | 10 | ~pca.PCA 11 | ~scaling.Scaling 12 | ~standardization.Standardization 13 | 14 | Pre-processing base class 15 | ------------------------- 16 | 17 | .. autosummary:: 18 | :toctree: generated/ 19 | :nosignatures: 20 | 21 | ~preprocess_base.PreprocessBase 22 | -------------------------------------------------------------------------------- /docs/source/modules/training.rst: -------------------------------------------------------------------------------- 1 | .. module:: hdnnpy.training 2 | 3 | Chainer-based training tools 4 | ============================ 5 | 6 | Custom training extensions 7 | -------------------------- 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | :nosignatures: 12 | 13 | ~extensions.ScatterPlot 14 | ~extensions.set_log_scale 15 | 16 | 17 | Loss functions 18 | -------------- 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | :nosignatures: 23 | 24 | ~loss_function.Zeroth 25 | ~loss_function.First 26 | ~loss_function.Potential 27 | 28 | Loss function base class 29 | ------------------------ 30 | 31 | .. autosummary:: 32 | :toctree: generated/ 33 | :nosignatures: 34 | 35 | ~loss_function.loss_function_base.LossFunctionBase 36 | 37 | 38 | Training manager 39 | ---------------- 40 | 41 | .. autosummary:: 42 | :toctree: generated/ 43 | :nosignatures: 44 | 45 | ~manager.Manager 46 | 47 | 48 | Updater 49 | ------- 50 | 51 | .. autosummary:: 52 | :toctree: generated/ 53 | :nosignatures: 54 | 55 | ~updater.Updater 56 | -------------------------------------------------------------------------------- /docs/source/modules/utils.rst: -------------------------------------------------------------------------------- 1 | .. module:: hdnnpy.utils 2 | 3 | Utilities 4 | ========= 5 | 6 | .. autosummary:: 7 | :toctree: generated/ 8 | :nosignatures: 9 | 10 | MPI 11 | pprint 12 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | How to use HDNNP 2 | ================ 3 | 4 | .. contents:: 5 | :local: 6 | :depth: 2 7 | 8 | 9 | Data generation 10 | ----------------- 11 | 12 | | Usually, HDNNP is used to reduce cost by learning the result of 13 | DFT(Density Functional Theory) calculation that is high accuracy and high cost. 14 | | Therefore, first step is to generate training dataset using DFT calculation such as ab-initio MD calculation. 15 | 16 | 17 | 18 | Pre-processing 19 | ----------------- 20 | 21 | | HDNNP training application supports only .xyz file format. 22 | | We prepare a python script to convert the output file of VASP such as ``OUTCAR`` to .xyz format file, 23 | but in the same way you can convert the output of other DFT calculation program to .xyz format file. 24 | | Inside this program, file format conversion is performed using `ASE`_ package. 25 | 26 | .. _ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html 27 | 28 | 29 | 30 | 31 | Training 32 | ----------------- 33 | 34 | Configuration 35 | ^^^^^^^^^^^^^^^^^ 36 | 37 | A default configuration file for training is located in ``examples/training_config.py``. 38 | 39 | ``training_config.py`` consists of some subclasses that inherits ``traitlets.config.Configurable``: 40 | 41 | * c.Application.xxx 42 | * c.TrainingApplication.xxx 43 | * c.DatasetConfig.xxx 44 | * c.ModelConfig.xxx 45 | * c.TrainingConfig.xxx 46 | 47 | 48 | Following configurations are required, and remaining configurations are optional. 49 | 50 | * c.DatasetConfig.parameters 51 | * c.ModelConfig.layers 52 | * c.TrainingConfig.data_file 53 | * c.TrainingConfig.batch_size 54 | * c.TrainingConfig.epoch 55 | * c.TrainingConfig.order 56 | * c.TrainingConfig.loss_function 57 | * c.TrainingConfig.interval 58 | * c.TrainingConfig.patients 59 | 60 | For details of each setting, see ``training_config.py`` 61 | 62 | 63 | Command line interface 64 | ^^^^^^^^^^^^^^^^^^^^^^ 65 | 66 | Execute the following command in the directory where ``training_config.py`` is located. 67 | 68 | :: 69 | 70 | $ hdnnpy train 71 | 72 | .. note:: 73 | 74 | | Currently, if output directory set by ``c.TrainingConfig.out_dir`` already exists, it overwrites the existing file in the directory. 75 | | If you want to avoid this, please change ``c.TrainingConfig.out_dir`` for each execution. 76 | 77 | 78 | 79 | 80 | 81 | Prediction 82 | ----------------- 83 | 84 | Configuration 85 | ^^^^^^^^^^^^^^^^^ 86 | 87 | A default configuration file for prediction is located in ``examples/prediction_config.py``. 88 | 89 | ``prediction_config.py`` consists of some subclasses that inherits ``traitlets.config.Configurable``: 90 | 91 | * c.Application.xxx 92 | * c.PredictionApplication.xxx 93 | * c.PredictionConfig.xxx 94 | 95 | 96 | Following configurations are required, and remaining configurations are optional. 97 | 98 | * c.PredictionConfig.data_file 99 | * c.PredictionConfig.order 100 | 101 | For details of each setting, see ``prediction_config.py`` 102 | 103 | 104 | Command line interface 105 | ^^^^^^^^^^^^^^^^^^^^^^ 106 | 107 | Execute the following command in the directory where ``prediction_config.py`` is located. 108 | 109 | :: 110 | 111 | $ hdnnpy predict 112 | 113 | 114 | Post-processing 115 | ----------------- 116 | 117 | | It is possible to calculate MD simulation with LAMMPS using trained HDNNP. 118 | | However, it is also under development. 119 | | We welcome your comments and suggestions. 120 | 121 | `HDNNP-LAMMPS interface program `_ 122 | 123 | 124 | Command line interface 125 | ^^^^^^^^^^^^^^^^^^^^^^ 126 | 127 | Execute the following command. 128 | 129 | :: 130 | 131 | $ hdnnpy convert 132 | 133 | | 2 command line options are available, and no config file is used in this command. 134 | | To see details of these options, use 135 | 136 | :: 137 | 138 | $ hdnnpy convert -h 139 | -------------------------------------------------------------------------------- /examples/prediction_config.py: -------------------------------------------------------------------------------- 1 | # Configuration file for hdnnpy predict. 2 | 3 | #------------------------------------------------------------------------------ 4 | # Application(SingletonConfigurable) configuration 5 | #------------------------------------------------------------------------------ 6 | 7 | ## This is an application. 8 | 9 | ## The date format used by logging formatters for %(asctime)s 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' 11 | 12 | ## The Logging format template 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' 14 | 15 | ## Set the log level by value or name. 16 | #c.Application.log_level = 30 17 | 18 | #------------------------------------------------------------------------------ 19 | # PredictionApplication(Application) configuration 20 | #------------------------------------------------------------------------------ 21 | 22 | ## Set verbose mode 23 | #c.PredictionApplication.verbose = False 24 | 25 | #------------------------------------------------------------------------------ 26 | # PredictionConfig(Configurable) configuration 27 | #------------------------------------------------------------------------------ 28 | 29 | ## Path to a data file used for HDNNP prediction. Only .xyz file format is 30 | # supported. 31 | #c.PredictionConfig.data_file = '.' 32 | 33 | ## File format to output HDNNP predition result 34 | #c.PredictionConfig.dump_format = '.npz' 35 | 36 | ## Path to directory to load training output files 37 | #c.PredictionConfig.load_dir = 'output' 38 | 39 | ## Order of differentiation used for calculation of descriptor & property 40 | # datasets and HDNNP prediction. ex.) 0: energy, 1: force, for interatomic 41 | # potential 42 | #c.PredictionConfig.order = 0 43 | 44 | ## List of dataset tags. Use dataset for HDNNP training in this order. Pattern 45 | # matching is available. 46 | #c.PredictionConfig.tags = ['*'] 47 | -------------------------------------------------------------------------------- /examples/training_config.py: -------------------------------------------------------------------------------- 1 | # Configuration file for hdnnpy train. 2 | 3 | #------------------------------------------------------------------------------ 4 | # Application(SingletonConfigurable) configuration 5 | #------------------------------------------------------------------------------ 6 | 7 | ## This is an application. 8 | 9 | ## The date format used by logging formatters for %(asctime)s 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' 11 | 12 | ## The Logging format template 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' 14 | 15 | ## Set the log level by value or name. 16 | #c.Application.log_level = 30 17 | 18 | #------------------------------------------------------------------------------ 19 | # TrainingApplication(Application) configuration 20 | #------------------------------------------------------------------------------ 21 | 22 | ## Set verbose mode 23 | #c.TrainingApplication.verbose = False 24 | 25 | #------------------------------------------------------------------------------ 26 | # DatasetConfig(Configurable) configuration 27 | #------------------------------------------------------------------------------ 28 | 29 | ## Name of descriptor dataset used for input of HDNNP 30 | #c.DatasetConfig.descriptor = 'symmetry_function' 31 | 32 | ## Parameters used for the specified descriptor dataset. Set as Dict{key: 33 | # List[Tuple(parameters)]}. This will be passed to descriptor dataset as keyword 34 | # arguments. ex.) {"type2": [(5.0, 0.01, 2.0)]} 35 | #c.DatasetConfig.parameters = {} 36 | 37 | ## Preprocess to be applied for input of HDNNP (=descriptor). Set as 38 | # List[Tuple(Str(name), Tuple(args), Dict{kwargs})]. Each preprocess instance 39 | # will be initialized with (*args, **kwargs). ex.) [("pca", (20,), {})] 40 | #c.DatasetConfig.preprocesses = [] 41 | 42 | ## Name of property dataset to be optimized by HDNNP 43 | #c.DatasetConfig.property_ = 'interatomic_potential' 44 | 45 | ## If the given data file and the loaded dataset are not compatible, 46 | # automatically recalculate and overwrite it. 47 | #c.DatasetConfig.remake = False 48 | 49 | #------------------------------------------------------------------------------ 50 | # ModelConfig(Configurable) configuration 51 | #------------------------------------------------------------------------------ 52 | 53 | ## Hidden layers of a neural network constituting HDNNP. Set as List[Tuple(Int(# 54 | # of nodes), Str(activation function))]. 55 | #c.ModelConfig.hidden_layers = [] 56 | 57 | #------------------------------------------------------------------------------ 58 | # TrainingConfig(Configurable) configuration 59 | #------------------------------------------------------------------------------ 60 | 61 | ## Number of data within each batch 62 | #c.TrainingConfig.batch_size = 0 63 | 64 | ## Path to a data file used for HDNNP training. Only .xyz file format is 65 | # supported. 66 | #c.TrainingConfig.data_file = '.' 67 | 68 | ## Upper bound of the number of training loops 69 | #c.TrainingConfig.epoch = 0 70 | 71 | ## Lower limit of learning rate when it decays 72 | #c.TrainingConfig.final_lr = 1e-06 73 | 74 | ## Initial learning rate 75 | #c.TrainingConfig.init_lr = 0.001 76 | 77 | ## Length of interval of training epochs used for checking metrics value 78 | #c.TrainingConfig.interval = 0 79 | 80 | ## Coefficient for the weight decay in L1 regularization 81 | #c.TrainingConfig.l1_norm = 0.0 82 | 83 | ## Coefficient for the weight decay in L2 regularization 84 | #c.TrainingConfig.l2_norm = 0.0 85 | 86 | ## Set chainer training extension `LogReport` if this flag is set 87 | #c.TrainingConfig.log_report = True 88 | 89 | ## Name of loss function and parameters of it. Set as Tuple(Str(name), 90 | # Dict{parameters}). ex.) ("first", {"mixing_beta": 0.5}) 91 | #c.TrainingConfig.loss_function = () 92 | 93 | ## Rate of exponential decay of learning rate 94 | #c.TrainingConfig.lr_decay = 0.0 95 | 96 | ## Path to output directory. NOTE: Currently, all output files will be 97 | # overwritten. 98 | #c.TrainingConfig.out_dir = 'output' 99 | 100 | ## Counts to let `chainer.training.triggers.EarlyStoppingTrigger` be patient 101 | #c.TrainingConfig.patients = 0 102 | 103 | ## Set chainer training extension `PlotReport` if this flag is set 104 | #c.TrainingConfig.plot_report = False 105 | 106 | ## Set chainer training extension `PrintReport` if this flag is set 107 | #c.TrainingConfig.print_report = True 108 | 109 | ## Set chainer training extension `ScatterPlot` if this flag is set 110 | #c.TrainingConfig.scatter_plot = False 111 | 112 | ## List of dataset tags. Use dataset for HDNNP training in this order. Pattern 113 | # matching is available. 114 | #c.TrainingConfig.tags = ['*'] 115 | 116 | ## Ratio to use for training data. The rest are used for test data. 117 | #c.TrainingConfig.train_test_ratio = 0.9 118 | -------------------------------------------------------------------------------- /hdnnpy/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """High Dimensional Neural Network Potential package.""" 4 | 5 | __all__ = [ 6 | '__version__', 7 | ] 8 | 9 | from ._version import __version__ 10 | -------------------------------------------------------------------------------- /hdnnpy/__main__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from hdnnpy.cli import main 4 | 5 | 6 | if __name__ == '__main__': 7 | main() 8 | -------------------------------------------------------------------------------- /hdnnpy/_version.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | __version__ = '0.5.1.dev' 4 | -------------------------------------------------------------------------------- /hdnnpy/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | __all__ = [ 4 | 'main', 5 | ] 6 | 7 | from hdnnpy.cli.main import main 8 | -------------------------------------------------------------------------------- /hdnnpy/cli/configurables.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import pathlib 4 | 5 | from traitlets import ( 6 | Bool, CaselessStrEnum, Dict, Float, 7 | Integer, List, TraitType, Tuple, Unicode, 8 | ) 9 | import traitlets.config 10 | 11 | 12 | class Path(TraitType): 13 | default_value = '.' 14 | info_text = 'a pathlib.Path instance' 15 | 16 | def validate(self, obj, value): 17 | if isinstance(value, pathlib.Path): 18 | return value.absolute() 19 | elif isinstance(value, str): 20 | return pathlib.Path(value).absolute() 21 | else: 22 | self.error(obj, value) 23 | 24 | 25 | class Configurable(traitlets.config.Configurable): 26 | def dump(self): 27 | dic = {key: value for key, value in self._trait_values.items() 28 | if key not in ['config', 'parent']} 29 | return dic 30 | 31 | 32 | class DatasetConfig(Configurable): 33 | # not configurable 34 | n_sample = Integer( 35 | help='Total number of data counted internally') 36 | 37 | # configurable 38 | descriptor = CaselessStrEnum( 39 | ['symmetry_function', 'weighted_symmetry_function'], 40 | default_value='symmetry_function', 41 | help='Name of descriptor dataset used for input of HDNNP' 42 | ).tag(config=True) 43 | parameters = Dict( 44 | trait=List, 45 | help='Parameters used for the specified descriptor dataset. ' 46 | 'Set as Dict{key: List[Tuple(parameters)]}. ' 47 | 'This will be passed to descriptor dataset as keyword arguments. ' 48 | 'ex.) {"type2": [(5.0, 0.01, 2.0)]}' 49 | ).tag(config=True) 50 | property_ = CaselessStrEnum( 51 | ['interatomic_potential'], 52 | default_value='interatomic_potential', 53 | help='Name of property dataset to be optimized by HDNNP' 54 | ).tag(config=True) 55 | preprocesses = List( 56 | trait=Tuple( 57 | CaselessStrEnum(['pca', 'scaling', 'standardization']), 58 | Tuple(), 59 | Dict() 60 | ), 61 | help='Preprocess to be applied for input of HDNNP (=descriptor). ' 62 | 'Set as List[Tuple(Str(name), Tuple(args), Dict{kwargs})]. ' 63 | 'Each preprocess instance will be initialized with ' 64 | '(*args, **kwargs). ' 65 | 'ex.) [("pca", (20,), {})]' 66 | ).tag(config=True) 67 | remake = Bool( 68 | default_value=False, 69 | help='If the given data file and the loaded dataset are not ' 70 | 'compatible, automatically recalculate and overwrite it.' 71 | ).tag(config=True) 72 | 73 | 74 | class ModelConfig(Configurable): 75 | # not configurable 76 | n_input = Integer( 77 | help='Number of nodes of input layer.') 78 | n_output = Integer( 79 | help='Number of nodes of output layer.') 80 | 81 | # configurable 82 | hidden_layers = List( 83 | trait=Tuple(Integer, Unicode), 84 | help='Hidden layers of a neural network constituting HDNNP. ' 85 | 'Set as List[Tuple(Int(# of nodes), Str(activation function))]. ' 86 | ).tag(config=True) 87 | 88 | 89 | class TrainingConfig(Configurable): 90 | # not configurable 91 | elements = List( 92 | trait=Unicode, 93 | help='All elements contained in the dataset listed internally') 94 | 95 | # configurable 96 | data_file = Path( 97 | help='Path to a data file used for HDNNP training. ' 98 | 'Only .xyz file format is supported.' 99 | ).tag(config=True) 100 | tags = List( 101 | trait=Unicode, 102 | default_value=['*'], 103 | help='List of dataset tags. ' 104 | 'Use dataset for HDNNP training in this order. ' 105 | 'Pattern matching is available.' 106 | ).tag(config=True) 107 | out_dir = Path( 108 | default_value='output', 109 | help='Path to output directory. ' 110 | 'NOTE: Currently, all output files will be overwritten.' 111 | ).tag(config=True) 112 | train_test_ratio = Float( 113 | default_value=0.9, 114 | help='Ratio to use for training data. ' 115 | 'The rest are used for test data.' 116 | ).tag(config=True) 117 | # chainer training 118 | loss_function = Tuple( 119 | CaselessStrEnum(['zeroth', 'first', 'potential']), 120 | Dict(), 121 | help='Name of loss function and parameters of it. ' 122 | 'Set as Tuple(Str(name), Dict{parameters}). ' 123 | 'ex.) ("first", {"mixing_beta": 0.5})' 124 | ).tag(config=True) 125 | init_lr = Float( 126 | default_value=1.0e-3, 127 | help='Initial learning rate' 128 | ).tag(config=True) 129 | final_lr = Float( 130 | default_value=1.0e-6, 131 | help='Lower limit of learning rate when it decays' 132 | ).tag(config=True) 133 | lr_decay = Float( 134 | help='Rate of exponential decay of learning rate' 135 | ).tag(config=True) 136 | l1_norm = Float( 137 | help='Coefficient for the weight decay in L1 regularization' 138 | ).tag(config=True) 139 | l2_norm = Float( 140 | help='Coefficient for the weight decay in L2 regularization' 141 | ).tag(config=True) 142 | interval = Integer( 143 | help='Length of interval of training epochs used for checking metrics' 144 | ' value' 145 | ).tag(config=True) 146 | patients = Integer( 147 | help='Counts to let `chainer.training.triggers.EarlyStoppingTrigger`' 148 | ' be patient' 149 | ).tag(config=True) 150 | epoch = Integer( 151 | help='Upper bound of the number of training loops' 152 | ).tag(config=True) 153 | batch_size = Integer( 154 | help='Number of data within each batch' 155 | ).tag(config=True) 156 | # chainer extension flags 157 | scatter_plot = Bool( 158 | False, 159 | help='Set chainer training extension `ScatterPlot` if this flag is set' 160 | ).tag(config=True) 161 | log_report = Bool( 162 | True, 163 | help='Set chainer training extension `LogReport` if this flag is set' 164 | ).tag(config=True) 165 | print_report = Bool( 166 | True, 167 | help='Set chainer training extension `PrintReport` if this flag is set' 168 | ).tag(config=True) 169 | plot_report = Bool( 170 | False, 171 | help='Set chainer training extension `PlotReport` if this flag is set' 172 | ).tag(config=True) 173 | 174 | 175 | class PredictionConfig(Configurable): 176 | # not configurable 177 | elements = List( 178 | trait=Unicode, 179 | help='All elements contained in the dataset listed internally') 180 | 181 | # configurable 182 | data_file = Path( 183 | help='Path to a data file used for HDNNP prediction. ' 184 | 'Only .xyz file format is supported.' 185 | ).tag(config=True) 186 | tags = List( 187 | trait=Unicode, 188 | default_value=['*'], 189 | help='List of dataset tags. ' 190 | 'Use dataset for HDNNP training in this order. ' 191 | 'Pattern matching is available.' 192 | ).tag(config=True) 193 | load_dir = Path( 194 | default_value='output', 195 | help='Path to directory to load training output files' 196 | ).tag(config=True) 197 | order = Integer( 198 | help='Order of differentiation used for calculation ' 199 | 'of descriptor & property datasets and HDNNP prediction. ' 200 | 'ex.) 0: energy, 1: force, for interatomic potential' 201 | ).tag(config=True) 202 | dump_format = CaselessStrEnum( 203 | ['.npz'], 204 | default_value='.npz', 205 | help='File format to output HDNNP predition result' 206 | ).tag(config=True) 207 | -------------------------------------------------------------------------------- /hdnnpy/cli/conversion_application.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import datetime 4 | import socket 5 | import textwrap 6 | 7 | import chainer 8 | from traitlets import (CaselessStrEnum, Dict, Unicode) 9 | from traitlets.config import Application 10 | import yaml 11 | 12 | from hdnnpy import __version__ 13 | from hdnnpy.cli.configurables import (DatasetConfig, ModelConfig, Path) 14 | from hdnnpy.model import MasterNNP 15 | from hdnnpy.preprocess import PREPROCESS 16 | from hdnnpy.utils import (pprint, pyyaml_path_constructor) 17 | 18 | 19 | class ConversionApplication(Application): 20 | name = Unicode(u'hdnnpy convert') 21 | description = 'Convert output files of training to required format.' 22 | 23 | format = CaselessStrEnum( 24 | ['lammps'], 25 | default_value='lammps', 26 | help='Name of the destination format.', 27 | ).tag(config=True) 28 | load_dir = Path( 29 | default_value='output', 30 | help='Path to directory to load training output files.', 31 | ).tag(config=True) 32 | 33 | aliases = Dict({ 34 | 'format': 'ConversionApplication.format', 35 | 'load_dir': 'ConversionApplication.load_dir', 36 | }) 37 | 38 | def __init__(self, **kwargs): 39 | super().__init__(**kwargs) 40 | self.training_result = None 41 | self.dataset_config = None 42 | self.model_config = None 43 | 44 | def initialize(self, argv=None): 45 | self.parse_command_line(argv) 46 | 47 | yaml.add_constructor('Path', pyyaml_path_constructor) 48 | self.training_result = yaml.load( 49 | (self.load_dir / 'training_result.yaml').open()) 50 | self.dataset_config = DatasetConfig(**self.training_result['dataset']) 51 | self.model_config = ModelConfig(**self.training_result['model']) 52 | 53 | def start(self): 54 | tr = self.training_result 55 | dc = self.dataset_config 56 | mc = self.model_config 57 | 58 | # load preprocesses 59 | preprocesses = [] 60 | for (name, args, kwargs) in dc.preprocesses: 61 | preprocess = PREPROCESS[name](*args, **kwargs) 62 | preprocess.load( 63 | self.load_dir / 'preprocess' / f'{name}.npz', verbose=False) 64 | preprocesses.append(preprocess) 65 | # load master nnp 66 | master_nnp = MasterNNP(tr['training']['elements'], 67 | tr['model']['n_input'], 68 | mc.hidden_layers, 69 | tr['model']['n_output']) 70 | chainer.serializers.load_npz( 71 | self.load_dir / 'master_nnp.npz', master_nnp) 72 | 73 | if self.format == 'lammps': 74 | self.dump_for_lammps(preprocesses, master_nnp) 75 | 76 | def dump_for_lammps(self, preprocesses, master_nnp): 77 | dc = self.dataset_config 78 | potential_file = self.load_dir / 'lammps.nnp' 79 | with potential_file.open('w') as f: 80 | # information 81 | now = datetime.datetime.now() 82 | machine = socket.gethostname() 83 | pprint(f''' 84 | # Created by hdnnpy {__version__} ({now}). 85 | # All parameters are read from [{machine}] {self.load_dir}. 86 | # Ref: https://github.com/ogura-edu/HDNNP 87 | ''', stream=f) 88 | 89 | # descriptor 90 | pprint(f''' 91 | # {dc.descriptor} parameters 92 | {len(dc.parameters)} 93 | ''', stream=f) 94 | for name, params in dc.parameters.items(): 95 | params_str = ('\n'+' '*16).join([' '.join(map(str, row)) 96 | for row in params]) 97 | pprint(f''' 98 | {name} {len(params)} 99 | {params_str} 100 | ''', stream=f) 101 | 102 | # preprocess 103 | pprint(f''' 104 | # pre-processing parameters 105 | {len(preprocesses)} 106 | ''', stream=f) 107 | for preprocess in preprocesses: 108 | pprint(f''' 109 | {preprocess.name} 110 | 111 | {textwrap.indent( 112 | textwrap.dedent(preprocess.dump_params()), ' '*16)} 113 | ''', stream=f) 114 | 115 | # model 116 | pprint(f''' 117 | # neural network parameters 118 | {len(master_nnp[0])} 119 | 120 | {textwrap.indent( 121 | textwrap.dedent(master_nnp.dump_params()), ' '*12)} 122 | ''', stream=f) 123 | -------------------------------------------------------------------------------- /hdnnpy/cli/main.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import os 4 | from pathlib import Path 5 | import sys 6 | 7 | from traitlets import Unicode 8 | from traitlets.config import Application 9 | 10 | from hdnnpy.cli.conversion_application import ConversionApplication 11 | from hdnnpy.cli.prediction_application import PredictionApplication 12 | from hdnnpy.cli.training_application import TrainingApplication 13 | from hdnnpy.utils import MPI 14 | 15 | 16 | class HDNNPApplication(Application): 17 | name = Unicode(u'hdnnpy') 18 | 19 | classes = [ 20 | ConversionApplication, 21 | PredictionApplication, 22 | TrainingApplication, 23 | ] 24 | 25 | subcommands = { 26 | 'convert': (ConversionApplication, ConversionApplication.description), 27 | 'predict': (PredictionApplication, PredictionApplication.description), 28 | 'train': (TrainingApplication, TrainingApplication.description), 29 | } 30 | 31 | def initialize(self, argv=None): 32 | if MPI.rank != 0: 33 | sys.stdout = Path(os.devnull).open('w') 34 | assert sys.argv[1] in self.subcommands, \ 35 | 'Only `hdnnpy train` and `hdnnpy predict` `hdnnpy convert` are' \ 36 | ' available.' 37 | super().initialize(argv) 38 | 39 | 40 | main = HDNNPApplication.launch_instance 41 | -------------------------------------------------------------------------------- /hdnnpy/cli/prediction_application.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import fnmatch 4 | import shutil 5 | 6 | import chainer 7 | import numpy as np 8 | from traitlets import (Bool, Dict, List, Unicode) 9 | from traitlets.config import Application 10 | import yaml 11 | 12 | from hdnnpy.cli.configurables import ( 13 | DatasetConfig, ModelConfig, Path, PredictionConfig, 14 | ) 15 | from hdnnpy.dataset import (AtomicStructure, DatasetGenerator, HDNNPDataset) 16 | from hdnnpy.dataset.descriptor import DESCRIPTOR_DATASET 17 | from hdnnpy.dataset.property import PROPERTY_DATASET 18 | from hdnnpy.format import parse_xyz 19 | from hdnnpy.model import (HighDimensionalNNP, MasterNNP) 20 | from hdnnpy.preprocess import PREPROCESS 21 | from hdnnpy.utils import (MPI, pprint, pyyaml_path_constructor) 22 | 23 | 24 | class PredictionApplication(Application): 25 | name = Unicode(u'hdnnpy predict') 26 | description = ('Predict properties for atomic structures using trained' 27 | ' HDNNP.') 28 | 29 | verbose = Bool( 30 | False, 31 | help='Set verbose mode' 32 | ).tag(config=True) 33 | 34 | classes = List([PredictionConfig]) 35 | 36 | config_file = Path( 37 | 'prediction_config.py', 38 | help='Load this config file') 39 | 40 | aliases = Dict({ 41 | 'log_level': 'Application.log_level', 42 | }) 43 | 44 | flags = Dict({ 45 | 'verbose': ({ 46 | 'PredictionApplication': { 47 | 'verbose': True, 48 | }, 49 | }, 'Set verbose mode'), 50 | 'v': ({ 51 | 'PredictionApplication': { 52 | 'verbose': True, 53 | }, 54 | }, 'Set verbose mode'), 55 | 'debug': ({ 56 | 'Application': { 57 | 'log_level': 10, 58 | }, 59 | }, 'Set log level to DEBUG'), 60 | }) 61 | 62 | def __init__(self, **kwargs): 63 | super().__init__(**kwargs) 64 | self.dataset_config = None 65 | self.model_config = None 66 | self.prediction_config = None 67 | 68 | def initialize(self, argv=None): 69 | self.parse_command_line(argv) 70 | self.load_config_file(self.config_file) 71 | self.prediction_config = PredictionConfig(config=self.config) 72 | 73 | yaml.add_constructor('Path', pyyaml_path_constructor) 74 | training_result = yaml.load( 75 | (self.prediction_config.load_dir / 'training_result.yaml').open()) 76 | self.dataset_config = DatasetConfig(**training_result['dataset']) 77 | self.model_config = ModelConfig(**training_result['model']) 78 | 79 | def start(self): 80 | pc = self.prediction_config 81 | shutil.copy(self.config_file, pc.load_dir / self.config_file.name) 82 | tag_xyz_map, pc.elements = parse_xyz( 83 | pc.data_file, save=False, verbose=self.verbose) 84 | datasets = self.construct_datasets(tag_xyz_map) 85 | datasets = DatasetGenerator(*datasets).all() 86 | if MPI.rank == 0: 87 | results = self.predict(datasets) 88 | self.dump_result(results) 89 | 90 | def construct_datasets(self, tag_xyz_map): 91 | dc = self.dataset_config 92 | mc = self.model_config 93 | pc = self.prediction_config 94 | 95 | preprocesses = [] 96 | for (name, args, kwargs) in dc.preprocesses: 97 | preprocess = PREPROCESS[name](*args, **kwargs) 98 | preprocess.load( 99 | pc.load_dir / 'preprocess' / f'{name}.npz', 100 | verbose=self.verbose) 101 | preprocesses.append(preprocess) 102 | 103 | datasets = [] 104 | for pattern in pc.tags: 105 | for tag in fnmatch.filter(tag_xyz_map, pattern): 106 | if self.verbose: 107 | pprint(f'Construct sub dataset tagged as "{tag}"') 108 | tagged_xyz = tag_xyz_map.pop(tag) 109 | structures = AtomicStructure.read_xyz(tagged_xyz) 110 | 111 | # prepare descriptor dataset 112 | descriptor = DESCRIPTOR_DATASET[dc.descriptor]( 113 | pc.order, structures, **dc.parameters) 114 | descriptor.make(verbose=self.verbose) 115 | 116 | # prepare empty property dataset 117 | property_ = PROPERTY_DATASET[dc.property_]( 118 | pc.order, structures) 119 | 120 | # construct test dataset from descriptor & property datasets 121 | dataset = HDNNPDataset(descriptor, property_) 122 | dataset.construct( 123 | all_elements=pc.elements, preprocesses=preprocesses, 124 | shuffle=False, verbose=self.verbose) 125 | datasets.append(dataset) 126 | dc.n_sample += dataset.total_size 127 | mc.n_input = dataset.n_input 128 | mc.n_output = dataset.n_label 129 | 130 | return datasets 131 | 132 | def predict(self, datasets): 133 | mc = self.model_config 134 | pc = self.prediction_config 135 | results = [] 136 | 137 | # master model 138 | master_nnp = MasterNNP( 139 | pc.elements, mc.n_input, mc.hidden_layers, mc.n_output) 140 | chainer.serializers.load_npz( 141 | pc.load_dir / 'master_nnp.npz', master_nnp) 142 | 143 | for dataset in datasets: 144 | # hdnnp model 145 | hdnnp = HighDimensionalNNP( 146 | dataset.elemental_composition, 147 | mc.n_input, mc.hidden_layers, mc.n_output) 148 | hdnnp.sync_param_with(master_nnp) 149 | 150 | batch = chainer.dataset.concat_examples(dataset) 151 | inputs = [batch[f'inputs/{i}'] for i in range(pc.order + 1)] 152 | with chainer.using_config('train', False), \ 153 | chainer.using_config('enable_backprop', False): 154 | predictions = hdnnp.predict(inputs, pc.order) 155 | 156 | result = { 157 | **{'tag': dataset.tag}, 158 | **{property_: coefficient * prediction.data 159 | for property_, coefficient, prediction 160 | in zip(dataset.property.properties, 161 | dataset.property.coefficients, 162 | predictions)}, 163 | } 164 | results.append(result) 165 | return results 166 | 167 | def dump_result(self, results): 168 | pc = self.prediction_config 169 | result_file = pc.load_dir / f'prediction_result{pc.dump_format}' 170 | if pc.dump_format == '.npz': 171 | kv_result = {} 172 | for result in results: 173 | tag = result.pop('tag') 174 | kv_result.update({tag + '/' + key: value 175 | for key, value in result.items()}) 176 | np.savez(result_file, **kv_result) 177 | 178 | 179 | def generate_config_file(): 180 | prediction_app = PredictionApplication() 181 | prediction_app.config_file.write_text( 182 | prediction_app.generate_config_file()) 183 | -------------------------------------------------------------------------------- /hdnnpy/cli/training_application.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import fnmatch 4 | import pathlib 5 | import shutil 6 | 7 | import chainer 8 | import chainer.training.extensions as ext 9 | from chainer.training.triggers import EarlyStoppingTrigger 10 | import chainermn 11 | from traitlets import (Bool, Dict, List, Unicode) 12 | from traitlets.config import Application 13 | import yaml 14 | 15 | from hdnnpy.cli.configurables import ( 16 | DatasetConfig, ModelConfig, Path, TrainingConfig, 17 | ) 18 | from hdnnpy.dataset import (AtomicStructure, DatasetGenerator, HDNNPDataset) 19 | from hdnnpy.dataset.descriptor import DESCRIPTOR_DATASET 20 | from hdnnpy.dataset.property import PROPERTY_DATASET 21 | from hdnnpy.format import parse_xyz 22 | from hdnnpy.model import (HighDimensionalNNP, MasterNNP) 23 | from hdnnpy.preprocess import PREPROCESS 24 | from hdnnpy.training import ( 25 | Manager, Updater, ScatterPlot, set_log_scale, 26 | ) 27 | from hdnnpy.training.loss_function import LOSS_FUNCTION 28 | from hdnnpy.utils import (MPI, pprint, pyyaml_path_representer) 29 | 30 | 31 | class TrainingApplication(Application): 32 | name = Unicode(u'hdnnpy train') 33 | description = 'Train a HDNNP to optimize given properties.' 34 | 35 | is_resume = Bool( 36 | False, 37 | help='Resume flag used internally.') 38 | resume_dir = Path( 39 | None, 40 | allow_none=True, 41 | help='This option can be set only by command line.') 42 | verbose = Bool( 43 | False, 44 | help='Set verbose mode' 45 | ).tag(config=True) 46 | 47 | classes = List([DatasetConfig, ModelConfig, TrainingConfig]) 48 | 49 | config_file = Path( 50 | 'training_config.py', 51 | help='Load this config file') 52 | 53 | aliases = Dict({ 54 | 'resume': 'TrainingApplication.resume_dir', 55 | 'log_level': 'Application.log_level', 56 | }) 57 | 58 | flags = Dict({ 59 | 'verbose': ({ 60 | 'TrainingApplication': { 61 | 'verbose': True, 62 | }, 63 | }, 'Set verbose mode'), 64 | 'v': ({ 65 | 'TrainingApplication': { 66 | 'verbose': True, 67 | }, 68 | }, 'Set verbose mode'), 69 | 'debug': ({ 70 | 'Application': { 71 | 'log_level': 10, 72 | }, 73 | }, 'Set log level to DEBUG'), 74 | }) 75 | 76 | def __init__(self, **kwargs): 77 | super().__init__(**kwargs) 78 | self.dataset_config = None 79 | self.model_config = None 80 | self.training_config = None 81 | self.loss_function = None 82 | 83 | def initialize(self, argv=None): 84 | # temporarily set `resume_dir` configurable 85 | self.__class__.resume_dir.tag(config=True) 86 | self.parse_command_line(argv) 87 | 88 | if self.resume_dir is not None: 89 | self.is_resume = True 90 | self.config_file = self.resume_dir.with_name(self.config_file.name) 91 | self.load_config_file(self.config_file) 92 | 93 | self.dataset_config = DatasetConfig(config=self.config) 94 | self.model_config = ModelConfig(config=self.config) 95 | self.training_config = TrainingConfig(config=self.config) 96 | if self.is_resume: 97 | self.training_config.out_dir = self.resume_dir.parent 98 | name, _ = self.training_config.loss_function 99 | self.loss_function = LOSS_FUNCTION[name] 100 | 101 | def start(self): 102 | tc = self.training_config 103 | tc.out_dir.mkdir(parents=True, exist_ok=True) 104 | if not self.is_resume: 105 | shutil.copy(self.config_file, 106 | tc.out_dir / self.config_file.name) 107 | tag_xyz_map, tc.elements = parse_xyz( 108 | tc.data_file, verbose=self.verbose) 109 | datasets = self.construct_datasets(tag_xyz_map) 110 | dataset = DatasetGenerator(*datasets).holdout(tc.train_test_ratio) 111 | result = self.train(dataset) 112 | if MPI.rank == 0: 113 | self.dump_result(result) 114 | 115 | def construct_datasets(self, tag_xyz_map): 116 | dc = self.dataset_config 117 | mc = self.model_config 118 | tc = self.training_config 119 | 120 | preprocess_dir = tc.out_dir / 'preprocess' 121 | preprocess_dir.mkdir(parents=True, exist_ok=True) 122 | preprocesses = [] 123 | for (name, args, kwargs) in dc.preprocesses: 124 | preprocess = PREPROCESS[name](*args, **kwargs) 125 | if self.is_resume: 126 | preprocess.load( 127 | preprocess_dir / f'{name}.npz', verbose=self.verbose) 128 | preprocesses.append(preprocess) 129 | 130 | datasets = [] 131 | for pattern in tc.tags: 132 | for tag in fnmatch.filter(tag_xyz_map, pattern): 133 | if self.verbose: 134 | pprint(f'Construct sub dataset tagged as "{tag}"') 135 | tagged_xyz = tag_xyz_map.pop(tag) 136 | structures = AtomicStructure.read_xyz(tagged_xyz) 137 | 138 | # prepare descriptor dataset 139 | descriptor = DESCRIPTOR_DATASET[dc.descriptor]( 140 | self.loss_function.order['descriptor'], 141 | structures, **dc.parameters) 142 | descriptor_npz = tagged_xyz.with_name(f'{dc.descriptor}.npz') 143 | if descriptor_npz.exists(): 144 | descriptor.load( 145 | descriptor_npz, verbose=self.verbose, remake=dc.remake) 146 | else: 147 | descriptor.make(verbose=self.verbose) 148 | descriptor.save(descriptor_npz, verbose=self.verbose) 149 | 150 | # prepare property dataset 151 | property_ = PROPERTY_DATASET[dc.property_]( 152 | self.loss_function.order['property'], structures) 153 | property_npz = tagged_xyz.with_name(f'{dc.property_}.npz') 154 | if property_npz.exists(): 155 | property_.load( 156 | property_npz, verbose=self.verbose, remake=dc.remake) 157 | else: 158 | property_.make(verbose=self.verbose) 159 | property_.save(property_npz, verbose=self.verbose) 160 | 161 | # construct HDNNP dataset from descriptor & property datasets 162 | dataset = HDNNPDataset(descriptor, property_) 163 | dataset.construct( 164 | all_elements=tc.elements, preprocesses=preprocesses, 165 | shuffle=True, verbose=self.verbose) 166 | dataset.scatter() 167 | datasets.append(dataset) 168 | dc.n_sample += dataset.total_size 169 | mc.n_input = dataset.n_input 170 | mc.n_output = dataset.n_label 171 | 172 | for preprocess in preprocesses: 173 | preprocess.save( 174 | preprocess_dir / f'{preprocess.name}.npz', 175 | verbose=self.verbose) 176 | 177 | return datasets 178 | 179 | def train(self, dataset, comm=None): 180 | mc = self.model_config 181 | tc = self.training_config 182 | if comm is None: 183 | comm = chainermn.create_communicator('naive', MPI.comm) 184 | result = {'training_time': 0.0, 'observation': []} 185 | 186 | # model and optimizer 187 | master_nnp = MasterNNP( 188 | tc.elements, mc.n_input, mc.hidden_layers, mc.n_output) 189 | master_opt = chainer.optimizers.Adam(tc.init_lr) 190 | master_opt = chainermn.create_multi_node_optimizer(master_opt, comm) 191 | master_opt.setup(master_nnp) 192 | master_opt.add_hook(chainer.optimizer_hooks.Lasso(tc.l1_norm)) 193 | master_opt.add_hook(chainer.optimizer_hooks.WeightDecay(tc.l2_norm)) 194 | 195 | for training, test in dataset: 196 | tag = training.tag 197 | properties = training.property.properties 198 | 199 | # iterators 200 | train_iter = chainer.iterators.SerialIterator( 201 | training, tc.batch_size // MPI.size, repeat=True, shuffle=True) 202 | test_iter = chainer.iterators.SerialIterator( 203 | test, tc.batch_size // MPI.size, repeat=False, shuffle=False) 204 | 205 | # model 206 | hdnnp = HighDimensionalNNP( 207 | training.elemental_composition, 208 | mc.n_input, mc.hidden_layers, mc.n_output) 209 | hdnnp.sync_param_with(master_nnp) 210 | main_opt = chainer.Optimizer() 211 | main_opt = chainermn.create_multi_node_optimizer(main_opt, comm) 212 | main_opt.setup(hdnnp) 213 | 214 | # loss function 215 | _, kwargs = tc.loss_function 216 | loss_function = self.loss_function(hdnnp, properties, **kwargs) 217 | observation_keys = loss_function.observation_keys 218 | 219 | # triggers 220 | interval = (tc.interval, 'epoch') 221 | stop_trigger = EarlyStoppingTrigger( 222 | check_trigger=interval, 223 | monitor=f'val/main/{observation_keys[-1]}', 224 | patients=tc.patients, mode='min', 225 | verbose=self.verbose, max_trigger=(tc.epoch, 'epoch')) 226 | 227 | # updater and trainer 228 | updater = Updater(train_iter, 229 | {'main': main_opt, 'master': master_opt}, 230 | loss_func=loss_function.eval) 231 | out_dir = tc.out_dir / tag 232 | trainer = chainer.training.Trainer(updater, stop_trigger, out_dir) 233 | 234 | # extensions 235 | trainer.extend(ext.ExponentialShift('alpha', 1 - tc.lr_decay, 236 | target=tc.final_lr, 237 | optimizer=master_opt)) 238 | evaluator = chainermn.create_multi_node_evaluator( 239 | ext.Evaluator(test_iter, hdnnp, eval_func=loss_function.eval), 240 | comm) 241 | trainer.extend(evaluator, name='val') 242 | if tc.scatter_plot: 243 | trainer.extend(ScatterPlot(test, hdnnp, comm), 244 | trigger=interval) 245 | if MPI.rank == 0: 246 | if tc.log_report: 247 | trainer.extend(ext.LogReport(log_name='training.log')) 248 | if tc.print_report: 249 | trainer.extend(ext.PrintReport( 250 | ['epoch', 'iteration'] 251 | + [f'main/{key}' for key in observation_keys] 252 | + [f'val/main/{key}' for key in observation_keys])) 253 | if tc.plot_report: 254 | trainer.extend(ext.PlotReport( 255 | [f'main/{key}' for key in observation_keys], 256 | x_key='epoch', postprocess=set_log_scale, 257 | file_name='training_set.png', marker=None)) 258 | trainer.extend(ext.PlotReport( 259 | [f'val/main/{key}' for key in observation_keys], 260 | x_key='epoch', postprocess=set_log_scale, 261 | file_name='validation_set.png', marker=None)) 262 | 263 | manager = Manager(tag, trainer, result, is_snapshot=True) 264 | if self.is_resume: 265 | manager.check_to_resume(self.resume_dir.name) 266 | if manager.allow_to_run: 267 | with manager: 268 | trainer.run() 269 | 270 | if MPI.rank == 0: 271 | chainer.serializers.save_npz( 272 | tc.out_dir / 'master_nnp.npz', master_nnp) 273 | 274 | return result 275 | 276 | def dump_result(self, result): 277 | yaml.add_representer(pathlib.PosixPath, pyyaml_path_representer) 278 | result_file = self.training_config.out_dir / 'training_result.yaml' 279 | with result_file.open('w') as f: 280 | yaml.dump({ 281 | 'dataset': self.dataset_config.dump(), 282 | 'model': self.model_config.dump(), 283 | 'training': self.training_config.dump(), 284 | }, f, default_flow_style=False) 285 | yaml.dump({ 286 | 'result': result, 287 | }, f, default_flow_style=False) 288 | 289 | 290 | def generate_config_file(): 291 | training_app = TrainingApplication() 292 | training_app.config_file.write_text(training_app.generate_config_file()) 293 | -------------------------------------------------------------------------------- /hdnnpy/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Dataset tools subpackage for HDNNP.""" 4 | 5 | __all__ = [ 6 | 'AtomicStructure', 7 | 'DatasetGenerator', 8 | 'HDNNPDataset', 9 | ] 10 | 11 | from hdnnpy.dataset.atomic_structure import AtomicStructure 12 | from hdnnpy.dataset.dataset_generator import DatasetGenerator 13 | from hdnnpy.dataset.hdnnp_dataset import HDNNPDataset 14 | -------------------------------------------------------------------------------- /hdnnpy/dataset/atomic_structure.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Wrapper class of ase.Atoms.""" 4 | 5 | from ase.calculators.singlepoint import SinglePointCalculator 6 | import ase.io 7 | import ase.neighborlist 8 | import chainer 9 | import chainer.functions as F 10 | import numpy as np 11 | 12 | 13 | class AtomicStructure(object): 14 | """Wrapper class of ase.Atoms.""" 15 | def __init__(self, atoms): 16 | """ 17 | | It wraps :obj:`ase.Atoms` object to define additional methods 18 | and attributes. 19 | | Before wrapping, it sorts atoms by element alphabetically. 20 | | It stores calculated neighbor information such as distance, 21 | indices. 22 | 23 | Args: 24 | atoms (~ase.Atoms): an object to wrap. 25 | """ 26 | tags = atoms.get_chemical_symbols() 27 | deco = sorted([(tag, i) for i, tag in enumerate(tags)]) 28 | indices = [i for tag, i in deco] 29 | self._atoms = atoms[indices] 30 | 31 | results = {} 32 | calculator = atoms.get_calculator() 33 | if calculator: 34 | for key, value in calculator.results.items(): 35 | if key in atoms.arrays: 36 | results[key] = value[indices] 37 | else: 38 | results[key] = value 39 | self._atoms.set_calculator( 40 | SinglePointCalculator(self._atoms, **results)) 41 | 42 | self._cache = {} 43 | 44 | def __getattr__(self, item): 45 | return getattr(self._atoms, item) 46 | 47 | def __getstate__(self): 48 | return self._atoms 49 | 50 | def __len__(self): 51 | return len(self._atoms) 52 | 53 | def __setstate__(self, state): 54 | self._atoms = state 55 | self._cache = {} 56 | 57 | @property 58 | def elements(self): 59 | """list [str]: Elements included in a cell.""" 60 | return sorted(set(self._atoms.get_chemical_symbols())) 61 | 62 | def clear_cache(self, cutoff_distance=None): 63 | """Clear up cached neighbor information in this instance. 64 | 65 | Args: 66 | cutoff_distance (float, optional): 67 | It clears the corresponding cached data if specified, 68 | otherwise it clears all cached data. 69 | """ 70 | if cutoff_distance: 71 | self._cache[cutoff_distance].clear() 72 | else: 73 | self._cache.clear() 74 | 75 | def get_neighbor_info(self, cutoff_distance, geometry_keys): 76 | """Calculate or return cached data. 77 | 78 | | If there is no cached data, calculate it as necessary. 79 | | The calculated result is cached, and retained unless 80 | you use :meth:`clear_cache` method. 81 | 82 | Args: 83 | cutoff_distance (float): 84 | It calculates the geometry for the neighboring atoms 85 | within this value of each atom in a cell. 86 | geometry_keys (list [str]): 87 | A list of atomic geometries to calculate between an atom 88 | and its neighboring atoms. 89 | 90 | Returns: 91 | Iterator [tuple]: Neighbor information required by 92 | ``geometry_keys`` for each atom in a cell. 93 | """ 94 | ret = [] 95 | for key in geometry_keys: 96 | if (cutoff_distance not in self._cache 97 | or key not in self._cache[cutoff_distance]): 98 | self._calculate_neighbors(cutoff_distance) 99 | ret.append(self._cache[cutoff_distance][key]) 100 | for neighbor_info in zip(*ret): 101 | yield neighbor_info 102 | 103 | @classmethod 104 | def read_xyz(cls, file_path): 105 | """Read .xyz format file and make a list of instances. 106 | 107 | Parses .xyz format file using :func:`ase.io.iread` and wraps it 108 | by this class. 109 | 110 | Args: 111 | file_path (~pathlib.Path): 112 | File path to read atomic structures. 113 | 114 | Returns: 115 | list [AtomicStructure]: Initialized instances. 116 | """ 117 | return [cls(atoms) for atoms 118 | in ase.io.iread(str(file_path), index=':', format='xyz')] 119 | 120 | def _calculate_neighbors(self, cutoff_distance): 121 | """Calculate distance to one neighboring atom and store indices 122 | of neighboring atoms.""" 123 | symbols = self._atoms.get_chemical_symbols() 124 | elements = sorted(set(symbols)) 125 | atomic_numbers = self._atoms.get_atomic_numbers() 126 | index_element_map = [elements.index(element) for element in symbols] 127 | 128 | i_list, j_list, D_list = ase.neighborlist.neighbor_list( 129 | 'ijD', self._atoms, cutoff_distance) 130 | 131 | sort_indices = np.lexsort((j_list, i_list)) 132 | i_list = i_list[sort_indices] 133 | j_list = j_list[sort_indices] 134 | D_list = D_list[sort_indices] 135 | elem_list = np.array([index_element_map[idx] for idx in j_list]) 136 | 137 | i_indices = np.unique(i_list, return_index=True)[1] 138 | j_list = np.split(j_list, i_indices[1:]) 139 | distance_vector = [chainer.Variable(r.astype(np.float32)) 140 | for r in np.split(D_list, i_indices[1:])] 141 | distance = [F.sqrt(F.sum(r**2, axis=1)) for r in distance_vector] 142 | cutoff_function = [F.tanh(1.0 - R/cutoff_distance)**3 143 | for R in distance] 144 | elem_list = np.split(elem_list, i_indices[1:]) 145 | 146 | self._cache[cutoff_distance] = { 147 | 'distance_vector': distance_vector, 148 | 'distance': distance, 149 | 'cutoff_function': cutoff_function, 150 | 'element_indices': [np.searchsorted(elem, range(len(elements))) 151 | for elem in elem_list], 152 | 'j_indices': [np.searchsorted(j, range(len(symbols))) 153 | for j in j_list], 154 | 'atomic_number': [ 155 | np.apply_along_axis(lambda x: atomic_numbers[x], 0, j) 156 | for j in j_list], 157 | } 158 | -------------------------------------------------------------------------------- /hdnnpy/dataset/dataset_generator.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Deal out datasets as needed.""" 4 | 5 | from sklearn.model_selection import KFold 6 | 7 | from hdnnpy.dataset.hdnnp_dataset import HDNNPDataset 8 | 9 | 10 | class DatasetGenerator(object): 11 | """Deal out datasets as needed.""" 12 | def __init__(self, *datasets): 13 | """ 14 | Args: 15 | *datasets (HDNNPDataset): What you want to unite. 16 | """ 17 | if not datasets: 18 | raise ValueError('No dataset are given') 19 | for dataset in datasets: 20 | assert isinstance(dataset, HDNNPDataset) 21 | self._datasets = list(datasets) 22 | 23 | def all(self): 24 | """Pass all datasets an instance have. 25 | 26 | Returns: 27 | list [HDNNPDataset]: All stored datasets. 28 | """ 29 | return self._datasets 30 | 31 | def foreach(self): 32 | """Pass all datasets an instance have one by one. 33 | 34 | Returns: 35 | Iterator [HDNNPDataset]: a stored dataset object. 36 | """ 37 | for dataset in self._datasets: 38 | yield dataset 39 | 40 | def holdout(self, ratio): 41 | """Split each dataset at a certain rate and pass it 42 | 43 | Args: 44 | ratio (float): 45 | Specify the rate you want to use as training data. 46 | Remains are test data. 47 | 48 | Returns: 49 | list [tuple [HDNNPDataset, HDNNPDataset]]: 50 | All stored dataset split by specified ratio into training 51 | and test data. 52 | """ 53 | split = [] 54 | for dataset in self._datasets: 55 | s = int(dataset.partial_size * ratio) 56 | train = dataset.take(slice(None, s, None)) 57 | test = dataset.take(slice(s, None, None)) 58 | assert len(train) > 0 59 | assert len(test) > 0 60 | split.append((train, test)) 61 | return split 62 | 63 | def kfold(self, kfold): 64 | """Split each dataset almost equally and pass it for cross 65 | validation. 66 | 67 | Args: 68 | kfold (int): Number of folds to split dataset. 69 | 70 | Returns: 71 | Iterator [list [tuple [HDNNPDataset, HDNNPDataset]]]: 72 | All stored dataset split into training and test data. 73 | It iterates k times while changing parts used for test data. 74 | """ 75 | kf = KFold(n_splits=kfold) 76 | kfold_indices = [kf.split(range(dataset.partial_size)) 77 | for dataset in self._datasets] 78 | 79 | for indices in zip(*kfold_indices): 80 | split = [] 81 | for dataset, (train_idx, test_idx) in zip(self._datasets, indices): 82 | train = dataset.take(train_idx) 83 | test = dataset.take(test_idx) 84 | assert len(train) > 0 85 | assert len(test) > 0 86 | split.append((train, test)) 87 | yield split 88 | -------------------------------------------------------------------------------- /hdnnpy/dataset/descriptor/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Descriptor dataset subpackage.""" 4 | 5 | __all__ = [ 6 | 'DESCRIPTOR_DATASET', 7 | ] 8 | 9 | from hdnnpy.dataset.descriptor.symmetry_function_dataset import ( 10 | SymmetryFunctionDataset) 11 | from hdnnpy.dataset.descriptor.weighted_symmetry_function_dataset import ( 12 | WeightedSymmetryFunctionDataset) 13 | 14 | DESCRIPTOR_DATASET = { 15 | SymmetryFunctionDataset.name: SymmetryFunctionDataset, 16 | WeightedSymmetryFunctionDataset.name: WeightedSymmetryFunctionDataset, 17 | } 18 | -------------------------------------------------------------------------------- /hdnnpy/dataset/descriptor/descriptor_dataset_base.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Base class of atomic structure based descriptor dataset. 4 | 5 | If you want to add new descriptor to extend HDNNP, inherits this base 6 | class. 7 | """ 8 | 9 | from abc import (ABC, abstractmethod) 10 | 11 | import numpy as np 12 | from tqdm import tqdm 13 | 14 | from hdnnpy.utils import (MPI, pprint, recv_chunk, send_chunk) 15 | 16 | 17 | class DescriptorDatasetBase(ABC): 18 | """Base class of atomic structure based descriptor dataset.""" 19 | DESCRIPTORS = [] 20 | """list [str]: Names of descriptors for each derivative order.""" 21 | name = None 22 | """str: Name of this descriptor class.""" 23 | 24 | def __init__(self, order, structures): 25 | """ 26 | Common instance variables for descriptor datasets are 27 | initialized. 28 | 29 | Args: 30 | order (int): Derivative order of descriptor to calculate. 31 | structures (list [AtomicStructure]): 32 | Descriptors are calculated for these atomic structures. 33 | """ 34 | self._order = order 35 | self._descriptors = self.DESCRIPTORS[: order+1] 36 | self._elemental_composition = structures[0].get_chemical_symbols() 37 | self._elements = sorted(set(self._elemental_composition)) 38 | self._length = len(structures) 39 | self._slices = [slice(i[0], i[-1]+1) 40 | for i in np.array_split(range(self._length), MPI.size)] 41 | self._structures = structures[self._slices[MPI.rank]] 42 | self._tag = structures[0].info['tag'] 43 | self._dataset = [] 44 | self._feature_keys = [] 45 | 46 | def __getitem__(self, item): 47 | """Return descriptor data this instance has. 48 | 49 | If ``item`` is string, it returns corresponding descriptor. 50 | Available keys can be obtained by ``descriptors`` attribute. 51 | Otherwise, it returns a list of descriptor sliced by ``item``. 52 | """ 53 | if isinstance(item, str): 54 | try: 55 | index = self._descriptors.index(item) 56 | except ValueError: 57 | raise KeyError(item) from None 58 | return self._dataset[index] 59 | else: 60 | return [data[item] for data in self._dataset] 61 | 62 | def __len__(self): 63 | """Number of atomic structures given at initialization.""" 64 | return self._length 65 | 66 | @property 67 | def descriptors(self): 68 | """list [str]: Names of descriptors this instance have.""" 69 | return self._descriptors 70 | 71 | @property 72 | def elemental_composition(self): 73 | """list [str]: Elemental composition of atomic structures given 74 | at initialization.""" 75 | return self._elemental_composition 76 | 77 | @property 78 | def elements(self): 79 | """list [str]: Elements of atomic structures given at 80 | initialization.""" 81 | return self._elements 82 | 83 | @property 84 | def feature_keys(self): 85 | """list [str]: Unique keys of feature dimension.""" 86 | return self._feature_keys 87 | 88 | @property 89 | def has_data(self): 90 | """bool: True if success to load or make dataset, 91 | False otherwise.""" 92 | return len(self._dataset) == self._order + 1 93 | 94 | @property 95 | def n_feature(self): 96 | """int: Length of feature dimension.""" 97 | return len(self._feature_keys) 98 | 99 | @property 100 | def order(self): 101 | """int: Derivative order of descriptor to calculate.""" 102 | return self._order 103 | 104 | @property 105 | def tag(self): 106 | """str: Unique tag of atomic structures given at 107 | initialization. 108 | 109 | Usually, it is a form like `` ``. 110 | (ex. ``CrystalGa2N2``) 111 | """ 112 | return self._tag 113 | 114 | def clear(self): 115 | """Clear up instance variables to initial state.""" 116 | self._dataset.clear() 117 | self._feature_keys.clear() 118 | 119 | def load(self, file_path, verbose=True, remake=False): 120 | """Load dataset from .npz format file. 121 | 122 | Only root MPI process load dataset. 123 | 124 | It validates following compatibility between loaded dataset and 125 | atomic structures given at initialization. 126 | 127 | * length of data 128 | * elemental composition 129 | * elements 130 | * tag 131 | 132 | It also validates that loaded dataset satisfies requirements. 133 | 134 | * feature keys 135 | * order 136 | 137 | Args: 138 | file_path (~pathlib.Path): File path to load dataset. 139 | verbose (bool, optional): Print log to stdout. 140 | remake (bool, optional): If loaded dataset is lacking in 141 | any feature key or any descriptor, recalculate dataset 142 | from scratch and overwrite it to ``file_path``. 143 | Otherwise, it raises ValueError. 144 | 145 | Raises: 146 | AssertionError: If loaded dataset is incompatible with 147 | atomic structures given at initialization. 148 | ValueError: If loaded dataset is lacking in any feature key 149 | or any descriptor and ``remake=False``. 150 | """ 151 | # validate compatibility between my structures and loaded dataset 152 | ndarray = np.load(file_path) 153 | assert list(ndarray['elemental_composition']) \ 154 | == self._elemental_composition 155 | assert list(ndarray['elements']) == self._elements 156 | assert ndarray['tag'].item() == self._tag 157 | assert len(ndarray[self._descriptors[0]]) == len(self) 158 | 159 | # validate lacking feature keys 160 | loaded_keys = list(ndarray['feature_keys']) 161 | lacking_keys = set(self._feature_keys) - set(loaded_keys) 162 | lacking_descriptors = set(self._descriptors) - set(ndarray) 163 | if lacking_keys or lacking_descriptors: 164 | if verbose and lacking_keys: 165 | lacking = ('\n'+' '*20).join(sorted(lacking_keys)) 166 | pprint(f''' 167 | Following feature keys are lacked in {file_path}. 168 | {lacking} 169 | ''') 170 | if verbose and lacking_descriptors: 171 | lacking = ('\n'+' '*20).join(sorted(lacking_descriptors)) 172 | pprint(f''' 173 | Following descriptors are lacked in {file_path}. 174 | {lacking} 175 | ''') 176 | if remake: 177 | if verbose: 178 | pprint('Start to recalculate dataset from scratch.') 179 | self.make(verbose=verbose) 180 | self.save(file_path, verbose=verbose) 181 | return 182 | else: 183 | raise ValueError('Please recalculate dataset from scratch.') 184 | 185 | # load dataset as much as needed 186 | if MPI.rank == 0: 187 | for i in range(self._order + 1): 188 | indices = np.array([loaded_keys.index(key) 189 | for key in self._feature_keys]) 190 | data = np.take(ndarray[self._descriptors[i]], indices, axis=2) 191 | self._dataset.append(data) 192 | 193 | if verbose: 194 | pprint(f'Successfully loaded & made needed {self.name} dataset' 195 | f' from {file_path}') 196 | 197 | def make(self, verbose=True): 198 | """Calculate & retain descriptor dataset 199 | 200 | | It calculates descriptor dataset by data-parallel using MPI 201 | communication. 202 | | The calculated dataset is retained in only root MPI process. 203 | 204 | Args: 205 | verbose (bool, optional): Print log to stdout. 206 | """ 207 | dataset = [] 208 | for structure in tqdm(self._structures, 209 | ascii=True, desc=f'Process #{MPI.rank}', 210 | leave=False, position=MPI.rank): 211 | dataset.append(self.calculate_descriptors(structure)) 212 | 213 | for data_list in zip(*dataset): 214 | shape = data_list[0].shape 215 | send_data = np.stack(data_list) 216 | del data_list 217 | if MPI.rank == 0: 218 | recv_data = np.empty((self._length, *shape), dtype=np.float32) 219 | recv_data[self._slices[0]] = send_data 220 | del send_data 221 | for i in range(1, MPI.size): 222 | recv_data[self._slices[i]] = recv_chunk(source=i) 223 | self._dataset.append(recv_data) 224 | else: 225 | send_chunk(send_data, dest=0) 226 | del send_data 227 | 228 | if verbose: 229 | pprint(f'Calculated {self.name} dataset.') 230 | 231 | def save(self, file_path, verbose=True): 232 | """Save dataset to .npz format file. 233 | 234 | Only root MPI process save dataset. 235 | 236 | Args: 237 | file_path (~pathlib.Path): File path to save dataset. 238 | verbose (bool, optional): Print log to stdout. 239 | 240 | Raises: 241 | RuntimeError: If this instance do not have any data. 242 | """ 243 | if not MPI.comm.bcast(self.has_data, root=0): 244 | raise RuntimeError(''' 245 | Cannot save dataset, since this dataset does not have any data. 246 | ''') 247 | 248 | if MPI.rank == 0: 249 | data = {descriptor: data for descriptor, data 250 | in zip(self._descriptors, self._dataset)} 251 | info = { 252 | 'elemental_composition': self._elemental_composition, 253 | 'elements': self._elements, 254 | 'feature_keys': self._feature_keys, 255 | 'tag': self._tag, 256 | } 257 | np.savez(file_path, **data, **info) 258 | if verbose: 259 | pprint(f'Successfully saved {self.name} dataset to {file_path}.') 260 | 261 | @abstractmethod 262 | def calculate_descriptors(self, structure): 263 | """Calculate required descriptors for a structure data. 264 | 265 | This is abstract method. 266 | Subclass of this base class have to override. 267 | 268 | Args: 269 | structure (AtomicStructure): 270 | A structure data to calculate descriptors. 271 | 272 | Returns: 273 | list [~numpy.ndarray]: Calculated descriptors. 274 | The length is the same as ``order`` given at initialization. 275 | """ 276 | return 277 | 278 | @abstractmethod 279 | def generate_feature_keys(self, *args, **kwargs): 280 | """Generate feature keys of current state. 281 | 282 | This is abstract method. 283 | Subclass of this base class have to override. 284 | 285 | Returns: 286 | list [str]: Unique keys of feature dimension. 287 | """ 288 | return 289 | -------------------------------------------------------------------------------- /hdnnpy/dataset/descriptor/symmetry_function_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Symmetry function dataset for descriptor of HDNNP.""" 4 | 5 | from itertools import combinations_with_replacement 6 | 7 | import chainer 8 | import chainer.functions as F 9 | import numpy as np 10 | 11 | from hdnnpy.dataset.descriptor.descriptor_dataset_base import ( 12 | DescriptorDatasetBase) 13 | 14 | 15 | class SymmetryFunctionDataset(DescriptorDatasetBase): 16 | """Symmetry function dataset for descriptor of HDNNP.""" 17 | DESCRIPTORS = ['sym_func', 'derivative', 'second_derivative'] 18 | """list [str]: Names of descriptors for each derivative order.""" 19 | name = 'symmetry_function' 20 | """str: Name of this descriptor class.""" 21 | 22 | def __init__(self, order, structures, **func_param_map): 23 | """ 24 | It accepts 0 or 2 for ``order``. 25 | 26 | | Each symmetry function requires following parameters. 27 | | Pass parameters you want to use for the dataset as keyword 28 | arguments ``func_param_map``. 29 | 30 | * type1: :math:`R_c` 31 | * type2: :math:`R_c, \eta, R_s` 32 | * type4: :math:`R_c, \eta, \lambda, \zeta` 33 | 34 | Args: 35 | order (int): passed to super class. 36 | structures (list [AtomicStructure]): passed to super class. 37 | **func_param_map (list [tuple]): 38 | parameter sets for each type of symmetry function. 39 | 40 | References: 41 | Symmetry function was proposed by Behler *et al.* in 42 | `this paper`_ as a descriptor of HDNNP. Please see here for 43 | details of each symmetry function. 44 | 45 | .. _`this paper`: 46 | https://onlinelibrary.wiley.com/doi/full/10.1002/qua.24890 47 | """ 48 | assert 0 <= order <= 2 49 | assert func_param_map 50 | super().__init__(order, structures) 51 | self._func_param_map = func_param_map.copy() 52 | self._feature_keys = self.generate_feature_keys(self._elements) 53 | 54 | @property 55 | def function_names(self): 56 | """list [str]: Names of symmetry functions this instance 57 | calculates or has calculated.""" 58 | return list(self._func_param_map.keys()) 59 | 60 | @property 61 | def params(self): 62 | """dict [list [tuple]]]: Mapping from symmetry function name to 63 | its parameters.""" 64 | return self._func_param_map 65 | 66 | def calculate_descriptors(self, structure): 67 | """Calculate required descriptors for a structure data. 68 | 69 | Args: 70 | structure (AtomicStructure): 71 | A structure data to calculate descriptors. 72 | 73 | Returns: 74 | list [~numpy.ndarray]: Calculated descriptors. 75 | The length is the same as ``order`` given at initialization. 76 | """ 77 | generators = [] 78 | for name, params_set in self._func_param_map.items(): 79 | for params in params_set: 80 | generators.append(eval( 81 | f'self._symmetry_function_{name}')(structure, *params)) 82 | 83 | dataset = [np.concatenate([next(gen).data 84 | for gen in generators]).swapaxes(0, 1) 85 | for _ in range(self._order + 1)] 86 | 87 | structure.clear_cache() 88 | 89 | return dataset 90 | 91 | def generate_feature_keys(self, elements): 92 | """Generate feature keys from given elements and parameters. 93 | 94 | | parameters given at initialization are used. 95 | | This method is used to initialize instance and expand feature 96 | dimension in 97 | :class:`~hdnnpy.dataset.hdnnp_dataset.HDNNPDataset`. 98 | 99 | Args: 100 | elements (list [str]): Unique list of elements. It should be 101 | sorted alphabetically. 102 | 103 | Returns: 104 | list [str]: Generated feature keys in a format 105 | like ``::``. 106 | """ 107 | feature_keys = [] 108 | for function_name, params_set in self._func_param_map.items(): 109 | for params in params_set: 110 | param_key = '/'.join(map(str, params)) 111 | if function_name in ['type1', 'type2']: 112 | for element_key in elements: 113 | key = ':'.join([function_name, param_key, element_key]) 114 | feature_keys.append(key) 115 | 116 | elif function_name in ['type4']: 117 | for combo in combinations_with_replacement(elements, 2): 118 | element_key = '/'.join(combo) 119 | key = ':'.join([function_name, param_key, element_key]) 120 | feature_keys.append(key) 121 | return feature_keys 122 | 123 | def differentiate(func): 124 | """Decorator function to differentiate symmetry function.""" 125 | def wrapper(self, structure, Rc, *params): 126 | differentiate_more = self._order > 0 127 | with chainer.using_config('enable_backprop', differentiate_more): 128 | G = func(self, structure, Rc, *params) 129 | yield F.stack([F.stack(g) for g in G]) 130 | 131 | n_atom = len(G[0]) 132 | r = [] 133 | j_indices = [] 134 | for r_, j_idx in structure.get_neighbor_info( 135 | Rc, ['distance_vector', 'j_indices']): 136 | r.append(r_) 137 | j_indices.append(j_idx) 138 | 139 | differentiate_more = self._order > 1 140 | with chainer.using_config('enable_backprop', differentiate_more): 141 | dG = [] 142 | for g in G: 143 | with chainer.force_backprop_mode(): 144 | grad = chainer.grad( 145 | g, r, enable_double_backprop=differentiate_more) 146 | dg = [F.concat([F.sum(dg_, axis=0) for dg_ 147 | in F.split_axis(grad_, j_idx[1:], 148 | axis=0)], 149 | axis=0) 150 | for grad_, j_idx in zip(grad, j_indices)] 151 | dG.append(dg) 152 | yield F.stack([F.stack(dg) for dg in dG]) 153 | 154 | differentiate_more = self._order > 2 155 | with chainer.using_config('enable_backprop', differentiate_more): 156 | d2G = [] 157 | for dg in dG: 158 | d2g = [] 159 | for i in range(3 * n_atom): 160 | with chainer.force_backprop_mode(): 161 | grad = chainer.grad( 162 | [dg_[i] for dg_ in dg], r, 163 | enable_double_backprop=differentiate_more) 164 | d2g_ = [F.concat([F.sum(d2g_, axis=0) for d2g_ 165 | in F.split_axis(grad_, j_idx[1:], 166 | axis=0)], 167 | axis=0) 168 | for grad_, j_idx in zip(grad, j_indices)] 169 | d2g.append(d2g_) 170 | d2G.append(d2g) 171 | yield F.stack([F.stack([F.stack(d2g_) for d2g_ in d2g]) 172 | for d2g in d2G]).transpose(0, 2, 1, 3) 173 | 174 | return wrapper 175 | 176 | @differentiate 177 | def _symmetry_function_type1(self, structure, Rc): 178 | """Symmetry function type1 for specific parameters.""" 179 | G = [] 180 | for fc, element_indices in structure.get_neighbor_info( 181 | Rc, ['cutoff_function', 'element_indices']): 182 | g = fc 183 | g = [F.sum(g_) for g_ 184 | in F.split_axis(g, element_indices[1:], axis=0)] 185 | G.append(g) 186 | return list(zip(*G)) 187 | 188 | @differentiate 189 | def _symmetry_function_type2(self, structure, Rc, eta, Rs): 190 | """Symmetry function type2 for specific parameters.""" 191 | G = [] 192 | for R, fc, element_indices in structure.get_neighbor_info( 193 | Rc, ['distance', 'cutoff_function', 'element_indices']): 194 | g = F.exp(-eta*(R-Rs)**2) * fc 195 | g = [F.sum(g_) for g_ 196 | in F.split_axis(g, element_indices[1:], axis=0)] 197 | G.append(g) 198 | return list(zip(*G)) 199 | 200 | @differentiate 201 | def _symmetry_function_type4(self, structure, Rc, eta, lambda_, zeta): 202 | """Symmetry function type4 for specific parameters.""" 203 | G = [] 204 | for r, R, fc, element_indices in structure.get_neighbor_info( 205 | Rc, ['distance_vector', 'distance', 'cutoff_function', 206 | 'element_indices']): 207 | cos = (r/F.expand_dims(R, axis=1)) @ (r.T/R) 208 | if zeta == 1: 209 | ang = (1.0 + lambda_*cos) 210 | else: 211 | ang = (1.0 + lambda_*cos) ** zeta 212 | g = (2.0 ** (1-zeta) 213 | * ang 214 | * F.expand_dims(F.exp(-eta*R**2) * fc, axis=1) 215 | * F.expand_dims(F.exp(-eta*R**2) * fc, axis=0)) 216 | triu = np.triu(np.ones_like(cos.data), k=1) 217 | g = F.where(triu.astype(np.bool), g, triu) 218 | g = [F.sum(g__) 219 | for j, g_ 220 | in enumerate(F.split_axis(g, element_indices[1:], axis=0)) 221 | for k, g__ 222 | in enumerate(F.split_axis(g_, element_indices[1:], axis=1)) 223 | if j <= k] 224 | G.append(g) 225 | return list(zip(*G)) 226 | -------------------------------------------------------------------------------- /hdnnpy/dataset/descriptor/weighted_symmetry_function_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Weighted symmetry function dataset for descriptor of HDNNP.""" 4 | 5 | import chainer 6 | import chainer.functions as F 7 | import numpy as np 8 | 9 | from hdnnpy.dataset.descriptor.descriptor_dataset_base import ( 10 | DescriptorDatasetBase) 11 | 12 | 13 | class WeightedSymmetryFunctionDataset(DescriptorDatasetBase): 14 | """Weighted symmetry function dataset for descriptor of HDNNP.""" 15 | DESCRIPTORS = ['sym_func', 'derivative', 'second_derivative'] 16 | """list [str]: Names of descriptors for each derivative order.""" 17 | name = 'weighted_symmetry_function' 18 | """str: Name of this descriptor class.""" 19 | 20 | def __init__(self, order, structures, **func_param_map): 21 | """ 22 | It accepts 0 or 2 for ``order``. 23 | 24 | | Each weighted symmetry function requires following parameters. 25 | | Pass parameters you want to use for the dataset as keyword 26 | arguments ``func_param_map``. 27 | 28 | * type1: :math:`R_c` 29 | * type2: :math:`R_c, \eta, R_s` 30 | * type4: :math:`R_c, \eta, \lambda, \zeta` 31 | 32 | Args: 33 | order (int): passed to super class. 34 | structures (list [AtomicStructure]): passed to super class. 35 | **func_param_map (list [tuple]): 36 | parameter sets for each type of weighted symmetry function. 37 | 38 | References: 39 | Weighted symmetry function was proposed by Gastegger *et al.* in 40 | `this paper`_ as a descriptor of HDNNP. Please see here for 41 | details of weighted symmetry function. 42 | 43 | .. _`this paper`: 44 | https://doi.org/10.1063/1.5019667 45 | """ 46 | assert 0 <= order <= 2 47 | assert func_param_map 48 | super().__init__(order, structures) 49 | self._func_param_map = func_param_map.copy() 50 | self._feature_keys = self.generate_feature_keys(self._elements) 51 | 52 | @property 53 | def function_names(self): 54 | """list [str]: Names of weighted symmetry functions this 55 | instance calculates or has calculated.""" 56 | return list(self._func_param_map.keys()) 57 | 58 | @property 59 | def params(self): 60 | """dict [list [tuple]]]: Mapping from weighted symmetry function 61 | name to its parameters.""" 62 | return self._func_param_map 63 | 64 | def calculate_descriptors(self, structure): 65 | """Calculate required descriptors for a structure data. 66 | 67 | Args: 68 | structure (AtomicStructure): 69 | A structure data to calculate descriptors. 70 | 71 | Returns: 72 | list [~numpy.ndarray]: Calculated descriptors. 73 | The length is the same as ``order`` given at initialization. 74 | """ 75 | generators = [] 76 | for name, params_set in self._func_param_map.items(): 77 | for params in params_set: 78 | generators.append(eval( 79 | f'self._weighted_symmetry_function_{name}' 80 | )(structure, *params)) 81 | 82 | dataset = [np.stack([next(gen).data 83 | for gen in generators]).swapaxes(0, 1) 84 | for _ in range(self._order + 1)] 85 | 86 | structure.clear_cache() 87 | 88 | return dataset 89 | 90 | def generate_feature_keys(self, _): 91 | """Generate feature keys from given elements and parameters. 92 | 93 | | parameters given at initialization are used. 94 | | This method is used to initialize instance and expand feature 95 | dimension in 96 | :class:`~hdnnpy.dataset.hdnnp_dataset.HDNNPDataset`. 97 | 98 | Returns: 99 | list [str]: Generated feature keys in a format 100 | like ``:``. 101 | """ 102 | feature_keys = [] 103 | for function_name, params_set in self._func_param_map.items(): 104 | for params in params_set: 105 | param_key = '/'.join(map(str, params)) 106 | key = ':'.join([function_name, param_key]) 107 | feature_keys.append(key) 108 | return feature_keys 109 | 110 | def differentiate(func): 111 | """Decorator function to differentiate weighted symmetry 112 | function.""" 113 | def wrapper(self, structure, Rc, *params): 114 | differentiate_more = self._order > 0 115 | with chainer.using_config('enable_backprop', differentiate_more): 116 | G = func(self, structure, Rc, *params) 117 | yield F.stack(G) 118 | 119 | n_atom = len(G) 120 | r = [] 121 | j_indices = [] 122 | for r_, j_idx in structure.get_neighbor_info( 123 | Rc, ['distance_vector', 'j_indices']): 124 | r.append(r_) 125 | j_indices.append(j_idx) 126 | 127 | differentiate_more = self._order > 1 128 | with chainer.using_config('enable_backprop', differentiate_more): 129 | with chainer.force_backprop_mode(): 130 | grad = chainer.grad( 131 | G, r, enable_double_backprop=differentiate_more) 132 | dG = [F.concat([F.sum(dg_, axis=0) for dg_ 133 | in F.split_axis(grad_, j_idx[1:], axis=0)], 134 | axis=0) 135 | for grad_, j_idx in zip(grad, j_indices)] 136 | yield F.stack(dG) 137 | 138 | differentiate_more = self._order > 2 139 | with chainer.using_config('enable_backprop', differentiate_more): 140 | d2G = [] 141 | for i in range(3 * n_atom): 142 | with chainer.force_backprop_mode(): 143 | grad = chainer.grad( 144 | [dg[i] for dg in dG], r, 145 | enable_double_backprop=differentiate_more) 146 | d2g = [F.concat([F.sum(d2g_, axis=0) for d2g_ 147 | in F.split_axis(grad_, j_idx[1:], 148 | axis=0)], 149 | axis=0) 150 | for grad_, j_idx in zip(grad, j_indices)] 151 | d2G.append(d2g) 152 | yield F.stack([F.stack(d2g) for d2g in d2G]).transpose(1, 0, 2) 153 | 154 | return wrapper 155 | 156 | @differentiate 157 | def _weighted_symmetry_function_type1(self, structure, Rc): 158 | """Weighted symmetry function type1 for specific parameters.""" 159 | G = [] 160 | for z, fc in structure.get_neighbor_info( 161 | Rc, ['atomic_number', 'cutoff_function']): 162 | g = z * fc 163 | G.append(F.sum(g)) 164 | return G 165 | 166 | @differentiate 167 | def _weighted_symmetry_function_type2(self, structure, Rc, eta, Rs): 168 | """Weighted symmetry function type2 for specific parameters.""" 169 | G = [] 170 | for z, R, fc in structure.get_neighbor_info( 171 | Rc, ['atomic_number', 'distance', 'cutoff_function']): 172 | g = z * F.exp(-eta*(R-Rs)**2) * fc 173 | G.append(F.sum(g)) 174 | return G 175 | 176 | @differentiate 177 | def _weighted_symmetry_function_type4( 178 | self, structure, Rc, eta, lambda_, zeta): 179 | """Weighted symmetry function type4 for specific parameters.""" 180 | G = [] 181 | for z, r, R, fc in structure.get_neighbor_info( 182 | Rc, ['atomic_number', 'distance_vector', 'distance', 183 | 'cutoff_function']): 184 | cos = (r/F.expand_dims(R, axis=1)) @ (r.T/R) 185 | if zeta == 1: 186 | ang = (1.0 + lambda_*cos) 187 | else: 188 | ang = (1.0 + lambda_*cos) ** zeta 189 | g = (2.0 ** (1-zeta) 190 | * z[:, None] * z[None, :] 191 | * ang 192 | * F.expand_dims(F.exp(-eta*R**2) * fc, axis=1) 193 | * F.expand_dims(F.exp(-eta*R**2) * fc, axis=0)) 194 | triu = np.triu(np.ones_like(cos.data), k=1) 195 | g = F.where(triu.astype(np.bool), g, triu) 196 | G.append(F.sum(g)) 197 | return G 198 | -------------------------------------------------------------------------------- /hdnnpy/dataset/hdnnp_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Combine and preprocess descriptor and property dataset.""" 4 | 5 | import numpy as np 6 | 7 | from hdnnpy.utils import (MPI, recv_chunk, send_chunk) 8 | 9 | 10 | RANDOMSTATE = np.random.get_state() 11 | 12 | 13 | class HDNNPDataset(object): 14 | """Combine and preprocess descriptor and property dataset.""" 15 | def __init__(self, descriptor, property_, dataset=None): 16 | """ 17 | | It is desirable that the type of descriptor and property used 18 | for HDNNP is fixed at initialization. 19 | | Also, an instance itself does not have any dataset at 20 | initialization and you need to execute :meth:`construct`. 21 | | If ``dataset`` is given it will be an instance's own dataset. 22 | 23 | Args: 24 | descriptor (DescriptorDatasetBase): 25 | Descriptor instance you want to use as HDNNP input. 26 | property\_ (PropertyDatasetBase): 27 | Property instance you want to use as HDNNP label. 28 | dataset (dict [~numpy.ndarray], optional): 29 | If specified, dataset will be initialized with this. 30 | """ 31 | if dataset is None: 32 | dataset = {} 33 | self._descriptor = descriptor 34 | self._property = property_ 35 | self._dataset = dataset.copy() 36 | 37 | def __getitem__(self, item): 38 | """Return indexed or sliced dataset as dict data.""" 39 | batches = {key: data[item] 40 | for key, data in self._dataset.items()} 41 | if isinstance(item, slice): 42 | length = len(list(batches.values())[0]) 43 | return [{key: batch[i] for key, batch in batches.items()} 44 | for i in range(length)] 45 | else: 46 | return batches 47 | 48 | def __len__(self): 49 | """Redicect to :attr:`partial_size`""" 50 | return self.partial_size 51 | 52 | @property 53 | def descriptor(self): 54 | """DescriptorDatasetBase: Descriptor dataset instance.""" 55 | return self._descriptor 56 | 57 | @property 58 | def elemental_composition(self): 59 | """list [str]: Elemental composition of the dataset.""" 60 | return self._descriptor.elemental_composition 61 | 62 | @property 63 | def elements(self): 64 | """list [str]: Elements of the dataset.""" 65 | return self._descriptor.elements 66 | 67 | @property 68 | def n_input(self): 69 | """int: Number of dimensions of input data.""" 70 | if 'inputs/0' in self._dataset: 71 | return self._dataset['inputs/0'].shape[-1] 72 | else: 73 | return self._descriptor.n_feature 74 | 75 | @property 76 | def n_label(self): 77 | """int: Number of dimensions of label data.""" 78 | if 'labels/0' in self._dataset: 79 | return self._dataset['labels/0'].shape[-1] 80 | else: 81 | return self._property.n_property 82 | 83 | @property 84 | def partial_size(self): 85 | """int: Number of data after scattered by MPI communication.""" 86 | return len(list(self._dataset.values())[0]) 87 | 88 | @property 89 | def tag(self): 90 | """str: Unique tag of the dataset. 91 | 92 | Usually, it is a form like `` ``. 93 | (ex. ``CrystalGa2N2``) 94 | """ 95 | return self._descriptor.tag 96 | 97 | @property 98 | def total_size(self): 99 | """int: Number of data before scattered by MPI communication.""" 100 | return len(self._descriptor) 101 | 102 | @property 103 | def property(self): 104 | """PropertyDatasetBase: Property dataset instance.""" 105 | return self._property 106 | 107 | def construct(self, all_elements=None, preprocesses=None, 108 | shuffle=True, verbose=True): 109 | """Construct an instance's own dataset. 110 | 111 | This method does following steps: 112 | 113 | * Check compatibility between descriptor and property datasets. 114 | * Expand feature dimension of descriptor dataset according to 115 | ``all_elements`` and pre-process descriptor dataset in a 116 | given order and add to its own dataset. 117 | * Add property dataset to its own dataset. 118 | * Clear up the original data in descriptor and property dataset. 119 | * Shuffle the order of the data. 120 | 121 | Args: 122 | all_elements (list [str], optional): 123 | If specified, it expands feature dimensions of 124 | descriptor dataset according to this. 125 | preprocesses (list [PreprocessBase], optional): 126 | If specified, it pre-processes descriptor dataset in a 127 | given order. 128 | shuffle (bool, optional): 129 | If specified, it shuffles the order of the data. 130 | verbose (bool, optional): 131 | Print log to stdout. 132 | 133 | Raises: 134 | AssertionError: 135 | If descriptor and property datasets are incompatible. 136 | """ 137 | if preprocesses is None: 138 | preprocesses = [] 139 | 140 | # check compatibility between descriptor and property datasets 141 | assert len(self._descriptor) == len(self._property) 142 | assert self._descriptor.elemental_composition \ 143 | == self._property.elemental_composition 144 | assert self._descriptor.elements == self._property.elements 145 | assert self._descriptor.tag == self._property.tag 146 | 147 | # add descriptor dataset and delete original data 148 | if self._descriptor.has_data: 149 | inputs = [self._descriptor[key] 150 | for key in self._descriptor.descriptors] 151 | # expand along to feature dimension 152 | if all_elements != self._descriptor.elements: 153 | old_feature_keys = self._descriptor.feature_keys 154 | new_feature_keys = ( 155 | self._descriptor.generate_feature_keys(all_elements)) 156 | inputs = self._expand_feature_dims( 157 | inputs, old_feature_keys, new_feature_keys) 158 | # pre-process descriptor dataset 159 | for preprocess in preprocesses: 160 | inputs = preprocess.apply( 161 | inputs, self.elemental_composition, verbose=verbose) 162 | self._dataset.update( 163 | {f'inputs/{i}': data for i, data in enumerate(inputs)}) 164 | self._descriptor.clear() 165 | 166 | # add property dataset and delete original data 167 | if self._property.has_data: 168 | labels = [self._property[key] for key in self._property.properties] 169 | self._dataset.update( 170 | {f'labels/{i}': data for i, data in enumerate(labels)}) 171 | self._property.clear() 172 | 173 | # shuffle dataset 174 | if shuffle: 175 | self._shuffle() 176 | 177 | def scatter(self, max_buf_len=256 * 1024 * 1024): 178 | """Scatter dataset by MPI communication. 179 | 180 | Each instance is re-initialized with received dataset. 181 | 182 | Args: 183 | max_buf_len (int, optional): 184 | Each data is divided into chunks of this size at 185 | maximum. 186 | """ 187 | if MPI.rank == 0: 188 | new_dataset = {} 189 | MPI.comm.bcast(len(self._dataset), root=0) 190 | while self._dataset: 191 | key, data = self._dataset.popitem() 192 | n_total = self.total_size 193 | n_sub = -(-n_total // MPI.size) 194 | for i in range(MPI.size): 195 | s = n_total*i//MPI.size 196 | e = n_total*i//MPI.size + n_sub 197 | if i == 0: 198 | new_dataset[key] = data[s:e] 199 | else: 200 | MPI.comm.send(key, dest=i) 201 | send_chunk(data[s:e], dest=i, max_buf_len=max_buf_len) 202 | self._dataset.update(new_dataset) 203 | 204 | else: 205 | self._dataset.clear() 206 | n_data = MPI.comm.bcast(None, root=0) 207 | for i in range(n_data): 208 | key = MPI.comm.recv(source=0) 209 | recv_data = recv_chunk(source=0, max_buf_len=max_buf_len) 210 | self._dataset[key] = recv_data 211 | 212 | def take(self, index): 213 | """Return copied object that has sliced dataset. 214 | 215 | Args: 216 | index (int or slice): 217 | Copied object has dataset indexed or sliced by this. 218 | """ 219 | dataset = {key: data[index] for key, data in self._dataset.items()} 220 | new_dataset = self.__class__(self._descriptor, self._property, dataset) 221 | return new_dataset 222 | 223 | @staticmethod 224 | def _expand_feature_dims(inputs, old_feature_keys, new_feature_keys): 225 | """Expand feature dimension of input dataset according to 226 | ``all_elements``.""" 227 | n_pad = len(new_feature_keys) - len(old_feature_keys) 228 | idx_pad = len(old_feature_keys) 229 | sort_indices = [] 230 | for key in new_feature_keys: 231 | if key in old_feature_keys: 232 | sort_indices.append(old_feature_keys.index(key)) 233 | else: 234 | sort_indices.append(idx_pad) 235 | idx_pad += 1 236 | sort_indices = np.array(sort_indices) 237 | 238 | for i, data in enumerate(inputs): 239 | pad_width = [(0, n_pad) if i == 2 else (0, 0) 240 | for i in range(data.ndim)] 241 | data = np.pad(data, pad_width, 'constant') 242 | inputs[i] = data[:, :, sort_indices] 243 | return inputs 244 | 245 | def _shuffle(self): 246 | """Shuffle the order of the data.""" 247 | for data in self._dataset.values(): 248 | np.random.set_state(RANDOMSTATE) 249 | np.random.shuffle(data) 250 | -------------------------------------------------------------------------------- /hdnnpy/dataset/property/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Property dataset subpackage.""" 4 | 5 | __all__ = [ 6 | 'PROPERTY_DATASET', 7 | ] 8 | 9 | from hdnnpy.dataset.property.interatomic_potential_dataset import ( 10 | InteratomicPotentialDataset) 11 | 12 | PROPERTY_DATASET = { 13 | InteratomicPotentialDataset.name: InteratomicPotentialDataset, 14 | } 15 | -------------------------------------------------------------------------------- /hdnnpy/dataset/property/interatomic_potential_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Interatomic potential dataset for property of HDNNP. """ 4 | 5 | import numpy as np 6 | 7 | from hdnnpy.dataset.property.property_dataset_base import PropertyDatasetBase 8 | 9 | 10 | class InteratomicPotentialDataset(PropertyDatasetBase): 11 | """Interatomic potential dataset for property of HDNNP. """ 12 | PROPERTIES = ['energy', 'force', 'harmonic', 'third_order'] 13 | """list [str]: Names of properties for each derivative order.""" 14 | COEFFICIENTS = [1.0, -1.0, 1.0, 1.0] 15 | """list [float]: Coefficient values of each properties.""" 16 | UNITS = ['eV/atom', 'eV/$\\AA$', 'eV/$\\AA$^2', 'eV/$\\AA$^3'] 17 | """list [str]: Units of properties for each derivative order.""" 18 | name = 'interatomic_potential' 19 | """str: Name of this property class.""" 20 | n_property = 1 21 | """int: Number of dimensions of 0th property.""" 22 | 23 | def __init__(self, order, structures): 24 | """ 25 | It accepts 0 or 3 for ``order``. 26 | 27 | Notes: 28 | Currently you cannot use order = 2 or 3, since it is not 29 | implemented. 30 | 31 | Args: 32 | order (int): passed to super class. 33 | structures (list [AtomicStructure]): passed to super class. 34 | """ 35 | assert 0 <= order <= 3 36 | super().__init__(order, structures) 37 | 38 | def calculate_properties(self, structure): 39 | """Calculate required properties for a structure data. 40 | 41 | Args: 42 | structure (AtomicStructure): 43 | A structure data to calculate properties. 44 | 45 | Returns: 46 | list [~numpy.ndarray]: Calculated properties. 47 | The length is the same as ``order`` given at initialization. 48 | """ 49 | n_deriv = len(structure) * 3 50 | dataset = [] 51 | if self._order >= 0: 52 | energy = (self._calculate_energy(structure) 53 | .astype(np.float32) 54 | .reshape(self.n_property)) 55 | dataset.append(energy) 56 | if self._order >= 1: 57 | force = (self._calculate_force(structure) 58 | .astype(np.float32) 59 | .reshape(self.n_property, n_deriv)) 60 | dataset.append(force) 61 | if self._order >= 2: 62 | harmonic = (self._calculate_harmonic(structure) 63 | .astype(np.float32) 64 | .reshape(self.n_property, n_deriv, n_deriv)) 65 | dataset.append(harmonic) 66 | if self._order >= 3: 67 | third_order = (self._calculate_third_order(structure) 68 | .astype(np.float32) 69 | .reshape(self.n_property, n_deriv, 70 | n_deriv, n_deriv)) 71 | dataset.append(third_order) 72 | return dataset 73 | 74 | @staticmethod 75 | def _calculate_energy(structure): 76 | """Calculate atomic energy.""" 77 | return structure.get_potential_energy() / len(structure) 78 | 79 | @staticmethod 80 | def _calculate_force(structure): 81 | """Calculate interatomic forces.""" 82 | return structure.get_forces() 83 | 84 | @staticmethod 85 | def _calculate_harmonic(structure): 86 | """Calculate 2nd-order harmonic force constant.""" 87 | raise NotImplementedError 88 | 89 | @staticmethod 90 | def _calculate_third_order(structure): 91 | """Calculate 3rd-order anharmonic force constant.""" 92 | raise NotImplementedError 93 | -------------------------------------------------------------------------------- /hdnnpy/dataset/property/property_dataset_base.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Base class of atomic structure based property dataset. 4 | 5 | If you want to add new property to extend HDNNP, inherits this base 6 | class. 7 | """ 8 | 9 | from abc import (ABC, abstractmethod) 10 | 11 | import numpy as np 12 | from tqdm import tqdm 13 | 14 | from hdnnpy.utils import (MPI, pprint, recv_chunk, send_chunk) 15 | 16 | 17 | class PropertyDatasetBase(ABC): 18 | """Base class of atomic structure based property dataset.""" 19 | PROPERTIES = [] 20 | """list [str]: Names of properties for each derivative order.""" 21 | COEFFICIENTS = [] 22 | """list [float]: Coefficient values of each properties.""" 23 | UNITS = [] 24 | """list [str]: Units of properties for each derivative order.""" 25 | name = None 26 | """str: Name of this property class.""" 27 | n_property = None 28 | """int: Number of dimensions of 0th property.""" 29 | 30 | def __init__(self, order, structures): 31 | """ 32 | Common instance variables for property datasets are initialized. 33 | 34 | Args: 35 | order (int): Derivative order of property to calculate. 36 | structures (list [AtomicStructure]): 37 | Properties are calculated for these atomic structures. 38 | """ 39 | self._order = order 40 | self._properties = self.PROPERTIES[: order+1] 41 | self._elemental_composition = structures[0].get_chemical_symbols() 42 | self._elements = sorted(set(self._elemental_composition)) 43 | self._length = len(structures) 44 | self._slices = [slice(i[0], i[-1]+1) 45 | for i in np.array_split(range(self._length), MPI.size)] 46 | self._structures = structures[self._slices[MPI.rank]] 47 | self._tag = structures[0].info['tag'] 48 | self._coefficients = self.COEFFICIENTS[: order+1] 49 | self._units = self.UNITS[: order+1] 50 | self._dataset = [] 51 | 52 | def __getitem__(self, item): 53 | """Return property data this instance has. 54 | 55 | If ``item`` is string, it returns corresponding property. 56 | Available keys can be obtained by ``properties`` attribute. 57 | Otherwise, it returns a list of property sliced by ``item``. 58 | """ 59 | if isinstance(item, str): 60 | try: 61 | index = self._properties.index(item) 62 | except ValueError: 63 | raise KeyError(item) from None 64 | return self._dataset[index] 65 | else: 66 | return [data[item] for data in self._dataset] 67 | 68 | def __len__(self): 69 | """Number of atomic structures given at initialization.""" 70 | return self._length 71 | 72 | @property 73 | def coefficients(self): 74 | """list [float]: Coefficient values this instance have.""" 75 | return self._coefficients 76 | 77 | @property 78 | def elemental_composition(self): 79 | """list [str]: Elemental composition of atomic structures given 80 | at initialization.""" 81 | return self._elemental_composition 82 | 83 | @property 84 | def elements(self): 85 | """list [str]: Elements of atomic structures given at 86 | initialization.""" 87 | return self._elements 88 | 89 | @property 90 | def has_data(self): 91 | """bool: True if success to load or make dataset, 92 | False otherwise.""" 93 | return len(self._dataset) == self._order + 1 94 | 95 | @property 96 | def order(self): 97 | """int: Derivative order of property to calculate.""" 98 | return self._order 99 | 100 | @property 101 | def properties(self): 102 | """list [str]: Names of properties this instance have.""" 103 | return self._properties 104 | 105 | @property 106 | def tag(self): 107 | """str: Unique tag of atomic structures given at 108 | initialization. 109 | 110 | Usually, it is a form like `` ``. 111 | (ex. ``CrystalGa2N2``) 112 | """ 113 | return self._tag 114 | 115 | @property 116 | def units(self): 117 | """list [str]: Units of properties this instance have.""" 118 | return self._units 119 | 120 | def clear(self): 121 | """Clear up instance variables to initial state.""" 122 | self._dataset.clear() 123 | 124 | def load(self, file_path, verbose=True, remake=False): 125 | """Load dataset from .npz format file. 126 | 127 | Only root MPI process load dataset. 128 | 129 | It validates following compatibility between loaded dataset and 130 | atomic structures given at initialization. 131 | 132 | * length of data 133 | * elemental composition 134 | * elements 135 | * tag 136 | 137 | It also validates that loaded dataset satisfies requirements. 138 | 139 | * order 140 | 141 | Args: 142 | file_path (~pathlib.Path): File path to load dataset. 143 | verbose (bool, optional): Print log to stdout. 144 | remake (bool, optional): If loaded dataset is lacking in 145 | any property, recalculate dataset from scratch and 146 | overwrite it to ``file_path``. Otherwise, it raises 147 | ValueError. 148 | 149 | Raises: 150 | AssertionError: If loaded dataset is incompatible with 151 | atomic structures given at initialization. 152 | ValueError: If loaded dataset is lacking in any property and 153 | ``remake=False``. 154 | """ 155 | # validate compatibility between my structures and loaded dataset 156 | ndarray = np.load(file_path) 157 | assert list(ndarray['elemental_composition']) \ 158 | == self._elemental_composition 159 | assert list(ndarray['elements']) == self._elements 160 | assert ndarray['tag'].item() == self._tag 161 | assert len(ndarray[self._properties[0]]) == len(self) 162 | 163 | # validate lacking properties 164 | lacking_properties = set(self._properties) - set(ndarray) 165 | if lacking_properties: 166 | if verbose: 167 | lacking = ('\n'+' '*20).join(sorted(lacking_properties)) 168 | pprint(f''' 169 | Following properties are lacked in {file_path}. 170 | {lacking} 171 | ''') 172 | if remake: 173 | if verbose: 174 | pprint('Start to recalculate dataset from scratch.') 175 | self.make(verbose=verbose) 176 | self.save(file_path, verbose=verbose) 177 | return 178 | else: 179 | raise ValueError('Please recalculate dataset from scratch.') 180 | 181 | # load dataset as much as needed 182 | if MPI.rank == 0: 183 | for i in range(self._order + 1): 184 | self._dataset.append(ndarray[self._properties[i]]) 185 | 186 | if verbose: 187 | pprint(f'Successfully loaded & made needed {self.name} dataset' 188 | f' from {file_path}') 189 | 190 | def make(self, verbose=True): 191 | """Calculate & retain property dataset 192 | 193 | | It calculates property dataset by data-parallel using MPI 194 | communication. 195 | | The calculated dataset is retained in only root MPI process. 196 | 197 | Each property values are divided by ``COEFFICIENTS`` which is 198 | unique to each property dataset class. 199 | 200 | Args: 201 | verbose (bool, optional): Print log to stdout. 202 | """ 203 | dataset = [] 204 | for structure in tqdm(self._structures, 205 | ascii=True, desc=f'Process #{MPI.rank}', 206 | leave=False, position=MPI.rank): 207 | dataset.append(self.calculate_properties(structure)) 208 | 209 | for data_list, coefficient in zip(zip(*dataset), self._coefficients): 210 | shape = data_list[0].shape 211 | send_data = np.stack(data_list) / coefficient 212 | del data_list 213 | if MPI.rank == 0: 214 | recv_data = np.empty((self._length, *shape), dtype=np.float32) 215 | recv_data[self._slices[0]] = send_data 216 | del send_data 217 | for i in range(1, MPI.size): 218 | recv_data[self._slices[i]] = recv_chunk(source=i) 219 | self._dataset.append(recv_data) 220 | else: 221 | send_chunk(send_data, dest=0) 222 | del send_data 223 | 224 | if verbose: 225 | pprint(f'Calculated {self.name} dataset.') 226 | 227 | def save(self, file_path, verbose=True): 228 | """Save dataset to .npz format file. 229 | 230 | Only root MPI process save dataset. 231 | 232 | Args: 233 | file_path (~pathlib.Path): File path to save dataset. 234 | verbose (bool, optional): Print log to stdout. 235 | 236 | Raises: 237 | RuntimeError: If this instance do not have any data. 238 | """ 239 | if not MPI.comm.bcast(self.has_data, root=0): 240 | raise RuntimeError(''' 241 | Cannot save dataset, since this dataset does not have any data. 242 | ''') 243 | 244 | if MPI.rank == 0: 245 | data = {property_: data for property_, data 246 | in zip(self._properties, self._dataset)} 247 | info = { 248 | 'elemental_composition': self._elemental_composition, 249 | 'elements': self._elements, 250 | 'tag': self._tag, 251 | } 252 | np.savez(file_path, **data, **info) 253 | if verbose: 254 | pprint(f'Successfully saved {self.name} dataset to {file_path}.') 255 | 256 | @abstractmethod 257 | def calculate_properties(self, structure): 258 | """Calculate required properties for a structure data. 259 | 260 | This is abstract method. 261 | Subclass of this base class have to override. 262 | 263 | Args: 264 | structure (AtomicStructure): 265 | A structure data to calculate properties. 266 | 267 | Returns: 268 | list [~numpy.ndarray]: Calculated properties. 269 | The length is the same as ``order`` given at initialization. 270 | """ 271 | return 272 | -------------------------------------------------------------------------------- /hdnnpy/format/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """File format support subpackage.""" 4 | 5 | __all__ = [ 6 | 'parse_xyz', 7 | ] 8 | 9 | from hdnnpy.format.xyz import parse_xyz 10 | -------------------------------------------------------------------------------- /hdnnpy/format/xyz.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Functions to handle xyz file format.""" 4 | 5 | from pathlib import Path 6 | from tempfile import NamedTemporaryFile 7 | 8 | import ase.io 9 | 10 | from hdnnpy.utils import pprint 11 | 12 | 13 | def parse_xyz(file_path, save=True, verbose=True): 14 | """Parse a xyz format file and bunch structures by the same tag. 15 | 16 | Args: 17 | file_path (~pathlib.Path): File path to parse. 18 | save (bool, optional): 19 | If True, save the structures bunched by the same tag into 20 | files. Otherwise, save into temporarily files. 21 | verbose (bool, optional): Print log to stdout. 22 | 23 | Returns: 24 | tuple: 2-element tuple containing: 25 | 26 | - tag_xyz_map (dict): Tag to file path mapping. 27 | - elements (list [str]): 28 | All elements contained in the parsed file. 29 | """ 30 | tag_xyz_map = {} 31 | elements = set() 32 | 33 | info_file = file_path.with_name(f'{file_path.name}.dat') 34 | if info_file.exists(): 35 | elements, *tags = info_file.read_text().strip().split('\n') 36 | elements = set(elements.split()) 37 | for tag in tags: 38 | tag_xyz_map[tag] = (Path(file_path.with_name(tag)) 39 | / 'structure.xyz') 40 | else: 41 | for atoms in ase.io.iread(str(file_path), index=':', format='xyz'): 42 | tag = atoms.info['tag'] 43 | try: 44 | xyz_path = tag_xyz_map[tag] 45 | except KeyError: 46 | if save: 47 | file_path.with_name(tag).mkdir(parents=True, exist_ok=True) 48 | xyz_path = file_path.with_name(tag)/'structure.xyz' 49 | if verbose: 50 | pprint(f'Sub dataset tagged as "{tag}" is saved to' 51 | f' {xyz_path}.') 52 | 53 | else: 54 | xyz_path = Path(NamedTemporaryFile('w', delete=False).name) 55 | if verbose: 56 | pprint(f'Sub dataset tagged as "{tag}" is temporarily' 57 | f' saved to {xyz_path}.\n' 58 | 'If ABEND and this file remains, delete it' 59 | ' manually.') 60 | tag_xyz_map[tag] = xyz_path 61 | ase.io.write(str(xyz_path), atoms, format='xyz', append=True) 62 | elements.update(atoms.get_chemical_symbols()) 63 | if save: 64 | info_file.write_text(' '.join(sorted(elements)) + '\n' 65 | + '\n'.join(sorted(tag_xyz_map)) + '\n') 66 | 67 | return tag_xyz_map, sorted(elements) 68 | -------------------------------------------------------------------------------- /hdnnpy/model/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Neural network structure models subpackage.""" 4 | 5 | __all__ = [ 6 | 'HighDimensionalNNP', 7 | 'MasterNNP', 8 | ] 9 | 10 | from hdnnpy.model.models import (HighDimensionalNNP, MasterNNP) 11 | -------------------------------------------------------------------------------- /hdnnpy/model/models.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Neural network potential models.""" 4 | 5 | import chainer 6 | import chainer.functions as F 7 | import chainer.links as L 8 | from chainer import Variable 9 | 10 | 11 | class HighDimensionalNNP(chainer.ChainList): 12 | """High dimensional neural network potential. 13 | 14 | This is one implementation of HDNNP that is proposed by Behler 15 | *et al* [Ref]_. 16 | It has a structure in which simple neural networks are arranged in 17 | parallel. 18 | Each neural network corresponds to one atom and inputs descriptor 19 | and outputs property per atom. 20 | Total value or property is predicted to sum them up. 21 | """ 22 | def __init__(self, elemental_composition, *args): 23 | """ 24 | Args: 25 | elemental_composition (list [str]): 26 | Create the same number of :class:`SubNNP` instances as 27 | this. A :class:`SubNNP` with the same element has the 28 | same parameters synchronized. 29 | *args: Positional arguments that is passed to `SubNNP`. 30 | """ 31 | super().__init__( 32 | *[SubNNP(element, *args) for element in elemental_composition]) 33 | 34 | def predict(self, inputs, order): 35 | """Get prediction from input data in a feed-forward way. 36 | 37 | It accepts 0 or 2 for ``order``. 38 | 39 | Notes: 40 | 0th-order predicted value is not total value, but per-atom 41 | value. 42 | 43 | Args: 44 | inputs (list [~numpy.ndarray]): 45 | Length have to equal to ``order + 1``. Each element is 46 | correspond to ``0th-order``, ``1st-order``, ... 47 | order (int): 48 | Derivative order of prediction by this model. 49 | 50 | Returns: 51 | list [~chainer.Variable]: 52 | Predicted values. Each elements is correspond to 53 | ``0th-order``, ``1st-order``, ... 54 | """ 55 | assert 0 <= order <= 2 56 | input_variables = [[Variable(x) for x in data.swapaxes(0, 1)] 57 | for data in inputs] 58 | for nnp in self: 59 | nnp.results.clear() 60 | 61 | xs = input_variables.pop(0) 62 | with chainer.force_backprop_mode(): 63 | y_pred = self._predict_y(xs) 64 | if order == 0: 65 | return [y_pred] 66 | 67 | dxs = input_variables.pop(0) 68 | differentiate_more = chainer.config.train or order > 1 69 | with chainer.force_backprop_mode(): 70 | dy_pred = self._predict_dy(xs, dxs, differentiate_more) 71 | if order == 1: 72 | return [y_pred, dy_pred] 73 | 74 | d2xs = input_variables.pop(0) 75 | differentiate_more = chainer.config.train or order > 2 76 | with chainer.force_backprop_mode(): 77 | d2y_pred = self._predict_d2y(xs, dxs, d2xs, differentiate_more) 78 | if order == 2: 79 | return [y_pred, dy_pred, d2y_pred] 80 | 81 | def get_by_element(self, element): 82 | """Get all `SubNNP` instances that represent the same element. 83 | 84 | Args: 85 | element (str): Element symbol that you want to get. 86 | 87 | Returns: 88 | list [SubNNP]: 89 | All `SubNNP` instances which represent the same 90 | ``element`` in this HDNNP instance. 91 | """ 92 | return [nnp for nnp in self if nnp.element == element] 93 | 94 | def reduce_grad_to(self, master_nnp): 95 | """Collect calculated gradient of parameters into `MasterNNP` 96 | for each element. 97 | 98 | Args: 99 | master_nnp (MasterNNP): 100 | `MasterNNP` instance where you manage parameters. 101 | """ 102 | for master in master_nnp.children(): 103 | for nnp in self.get_by_element(master.element): 104 | master.addgrads(nnp) 105 | 106 | def sync_param_with(self, master_nnp): 107 | """Synchronize the parameters with `MasterNNP` for each element. 108 | 109 | Args: 110 | master_nnp (MasterNNP): 111 | `MasterNNP` instance where you manage parameters. 112 | """ 113 | for master in master_nnp.children(): 114 | for nnp in self.get_by_element(master.element): 115 | nnp.copyparams(master) 116 | 117 | def _predict_y(self, xs): 118 | """Calculate 0th-order prediction for each `SubNNP`. 119 | 120 | Args: 121 | xs (list [~chainer.Variable]): 122 | Input data for each `SubNNP` constituting this HDNNP 123 | instance. The shape of data is 124 | ``n_atom x (n_sample, n_input)``. 125 | 126 | Returns: 127 | ~chainer.Variable: 128 | Output data (per atom) for each `SubNNP` constituting 129 | this HDNNP instance. The shape of data is 130 | ``(n_sample, n_output)``. 131 | """ 132 | for nnp, x in zip(self, xs): 133 | nnp.feedforward(x) 134 | return sum([nnp.results['y'] for nnp in self]) / len(self) 135 | 136 | def _predict_dy(self, xs, dxs, differentiate_more): 137 | """Calculate 1st-order prediction for each `SubNNP`. 138 | 139 | Args: 140 | xs (list [~chainer.Variable]): 141 | Input data for each `SubNNP` constituting this HDNNP 142 | instance. The shape of data is 143 | ``n_atom x (n_sample, n_input)``. 144 | dxs (list [~chainer.Variable]): 145 | Differentiated input data. The shape of data is 146 | ``n_atom x (n_sample, n_input, n_deriv)``. 147 | differentiate_more (bool): 148 | If True, more deep calculation graph will be created for 149 | back-propagation or higher-order differentiation. 150 | 151 | Returns: 152 | ~chainer.Variable: 153 | Differentiated output data. The shape of data is 154 | ``(n_sample, n_output, n_deriv)``. 155 | """ 156 | for nnp, x in zip(self, xs): 157 | nnp.differentiate(x, differentiate_more) 158 | return sum([F.einsum('soi,six->sox', nnp.results['dy'], dx) 159 | for nnp, dx in zip(self, dxs)]) 160 | 161 | def _predict_d2y(self, xs, dxs, d2xs, differentiate_more): 162 | """Calculate 2nd-order prediction for each `SubNNP`. 163 | 164 | Args: 165 | xs (list [~chainer.Variable]): 166 | Input data for each `SubNNP` constituting this HDNNP 167 | instance. The shape of data is 168 | ``n_atom x (n_sample, n_input)``. 169 | dxs (list [~chainer.Variable]): 170 | Differentiated input data. The shape of data is 171 | ``n_atom x (n_sample, n_input, n_deriv)``. 172 | d2xs (list [~chainer.Variable]): 173 | Double differentiated input data. The shape of data is 174 | ``n_atom x (n_sample, n_input, n_deriv, n_deriv)``. 175 | differentiate_more (bool): 176 | If True, more deep calculation graph will be created for 177 | back-propagation or higher-order differentiation. 178 | 179 | Returns: 180 | ~chainer.Variable: 181 | Double differentiated output data. The shape of data is 182 | ``(n_sample, n_output, n_deriv, n_deriv)``. 183 | """ 184 | for nnp, x in zip(self, xs): 185 | nnp.second_differentiate(x, differentiate_more) 186 | return sum([F.einsum('soij,six,sjy->soxy', nnp.results['d2y'], dx, dx) 187 | + F.einsum('soi,sixy->soxy', nnp.results['dy'], d2x) 188 | for nnp, dx, d2x in zip(self, dxs, d2xs)]) 189 | 190 | 191 | class MasterNNP(chainer.ChainList): 192 | """Responsible for managing the parameters of each element.""" 193 | def __init__(self, elements, *args): 194 | """ 195 | It is implemented as a simple :class:`~chainer.ChainList` of 196 | `SubNNP`. 197 | 198 | Args: 199 | elements (list [str]): Element symbols must be unique. 200 | *args: Positional arguments that is passed to `SubNNP`. 201 | """ 202 | super().__init__(*[SubNNP(element, *args) for element in elements]) 203 | 204 | def dump_params(self): 205 | """Dump its own parameters as :obj:`str`. 206 | 207 | Returns: 208 | str: Formed parameters. 209 | """ 210 | params_str = '' 211 | for nnp in self: 212 | element = nnp.element 213 | depth = len(nnp) 214 | for i in range(depth): 215 | weight = getattr(nnp, f'fc_layer{i}').W.data 216 | bias = getattr(nnp, f'fc_layer{i}').b.data 217 | activation = getattr(nnp, f'activation_function{i}').__name__ 218 | weight_str = ('\n'+' '*16).join([' '.join(map(str, row)) 219 | for row in weight.T]) 220 | bias_str = ' '.join(map(str, bias)) 221 | 222 | params_str += f''' 223 | {element} {i} {weight.shape[1]} {weight.shape[0]} {activation} 224 | # weight 225 | {weight_str} 226 | # bias 227 | {bias_str} 228 | ''' 229 | 230 | return params_str 231 | 232 | 233 | class SubNNP(chainer.Chain): 234 | """Feed-forward neural network representing one element or atom.""" 235 | def __init__(self, element, n_feature, hidden_layers, n_property): 236 | """ 237 | | ``element`` is registered as a persistent value. 238 | | It consists of repetition of fully connected layer and 239 | activation function. 240 | | Weight initializer is :obj:`chainer.initializers.HeNormal`. 241 | 242 | Args: 243 | element (str): Element symbol represented by an instance. 244 | n_feature (int): Number of nodes of input layer. 245 | hidden_layers (list [tuple [int, str]]): 246 | A neural network structure. Last one is output layer, 247 | and the remains are hidden layers. Each element is a 248 | tuple ``(# of nodes, activation function)``, for example 249 | ``(50, 'sigmoid')``. Only activation functions 250 | implemented in `chainer.functions`_ can be used. 251 | n_property (int): Number of nodes of output layer. 252 | 253 | .. _`chainer.functions`: 254 | https://docs.chainer.org/en/stable/reference/functions.html 255 | """ 256 | super().__init__() 257 | self.add_persistent('element', element) 258 | self._n_layer = len(hidden_layers) + 1 259 | nodes = [n_feature, *[layer[0] for layer in hidden_layers], n_property] 260 | activations = [*[layer[1] for layer in hidden_layers], 'identity'] 261 | with self.init_scope(): 262 | w = chainer.initializers.HeNormal() 263 | for i, (in_size, out_size, activation) in enumerate(zip( 264 | nodes[:-1], nodes[1:], activations)): 265 | setattr(self, f'activation_function{i}', 266 | eval(f'F.{activation}')) 267 | setattr(self, f'fc_layer{i}', 268 | L.Linear(in_size, out_size, initialW=w)) 269 | self.results = {} 270 | 271 | def __len__(self): 272 | """Return the number of hidden_layers.""" 273 | return self._n_layer 274 | 275 | def feedforward(self, x): 276 | """Propagate input data in a feed-forward way. 277 | 278 | Args: 279 | x (~chainer.Variable): 280 | Input data which has the shape ``(n_sample, n_input)``. 281 | """ 282 | h = x 283 | for i in range(self._n_layer): 284 | h = eval(f'self.activation_function{i}(self.fc_layer{i}(h))') 285 | y = h 286 | self.results['y'] = y 287 | 288 | def differentiate(self, x, enable_double_backprop): 289 | """Calculate derivative of the output data w.r.t. input data. 290 | 291 | Args: 292 | x (~chainer.Variable): 293 | Input data which has the shape ``(n_sample, n_input)``. 294 | enable_double_backprop (bool): 295 | Passed to :func:`chainer.grad` to determine whether to 296 | create more deep calculation graph or not. 297 | """ 298 | dy = [chainer.grad([output_node], [x], 299 | enable_double_backprop=enable_double_backprop)[0] 300 | for output_node in F.moveaxis(self.results['y'], 0, -1)] 301 | dy = F.stack(dy, axis=1) 302 | self.results['dy'] = dy 303 | 304 | def second_differentiate(self, x, enable_double_backprop): 305 | """Calculate 2nd derivative of the output data w.r.t. input 306 | data. 307 | 308 | Args: 309 | x (~chainer.Variable): 310 | Input data which has the shape ``(n_sample, n_input)``. 311 | enable_double_backprop (bool): 312 | Passed to :func:`chainer.grad` to determine whether to 313 | create more deep calculation graph or not. 314 | """ 315 | d2y = [[chainer.grad([derivative], [x], 316 | enable_double_backprop=enable_double_backprop)[0] 317 | for derivative in dy_] 318 | for dy_ in F.moveaxis(self.results['dy'], 0, -1)] 319 | d2y = F.stack([F.stack(d2y_, axis=1) for d2y_ in d2y], axis=1) 320 | self.results['d2y'] = d2y 321 | -------------------------------------------------------------------------------- /hdnnpy/preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Pre-processing of input dataset subpackage.""" 4 | 5 | __all__ = [ 6 | 'PREPROCESS', 7 | ] 8 | 9 | from hdnnpy.preprocess.pca import PCA 10 | from hdnnpy.preprocess.scaling import Scaling 11 | from hdnnpy.preprocess.standardization import Standardization 12 | 13 | PREPROCESS = { 14 | PCA.name: PCA, 15 | Scaling.name: Scaling, 16 | Standardization.name: Standardization, 17 | } 18 | -------------------------------------------------------------------------------- /hdnnpy/preprocess/pca.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Principal component analysis (PCA).""" 4 | 5 | import numpy as np 6 | from sklearn import decomposition 7 | 8 | from hdnnpy.preprocess.preprocess_base import PreprocessBase 9 | from hdnnpy.utils import (MPI, pprint) 10 | 11 | 12 | class PCA(PreprocessBase): 13 | """Principal component analysis (PCA). 14 | 15 | The core part of this class uses `sklearn.decomposition.PCA` 16 | implementation. 17 | """ 18 | name = 'pca' 19 | """str: Name of this class.""" 20 | 21 | def __init__(self, n_components=None): 22 | """ 23 | Args: 24 | n_components (int, optional): 25 | Number of features to keep in decomposition. If 26 | ``None``, decomposition is not performed. 27 | """ 28 | super().__init__() 29 | self._n_components = n_components 30 | self._mean = {} 31 | self._transform = {} 32 | 33 | @property 34 | def n_components(self): 35 | """int or None: Number of features to keep in decomposition.""" 36 | return self._n_components 37 | 38 | @property 39 | def mean(self): 40 | """dict [~numpy.ndarray]: Initialized mean values in each 41 | feature dimension and each element.""" 42 | return self._mean 43 | 44 | @property 45 | def transform(self): 46 | """dict [~numpy.ndarray]: Initialized transformation matrix in 47 | each feature dimension and each element.""" 48 | return self._transform 49 | 50 | def apply(self, dataset, elemental_composition, verbose=True): 51 | """Apply the same pre-processing for each element to dataset. 52 | 53 | It accepts 1 or 2 for length of ``dataset``, each element of 54 | which is regarded as ``0th-order``, ``1st-order``, ... 55 | 56 | Args: 57 | dataset (list [~numpy.ndarray]): Input dataset to be scaled. 58 | elemental_composition (list [str]): 59 | Element symbols corresponding to 1st dimension of 60 | ``dataset``. 61 | verbose (bool, optional): Print log to stdout. 62 | 63 | Returns: 64 | list [~numpy.ndarray]: 65 | Processed dataset to be zero-mean and unit-variance. 66 | """ 67 | order = len(dataset) - 1 68 | assert 0 <= order <= 2 69 | 70 | self._initialize_params(dataset[0], elemental_composition, verbose) 71 | 72 | mean = np.array( 73 | [self._mean[element] for element in elemental_composition]) 74 | transform = np.array( 75 | [self._transform[element] for element in elemental_composition]) 76 | 77 | if order >= 0: 78 | dataset[0] = np.einsum('saf,aft->sat', dataset[0]-mean, transform) 79 | if order >= 1: 80 | dataset[1] = np.einsum('safx,aft->satx', dataset[1], transform) 81 | if order >= 2: 82 | dataset[2] = np.einsum('safxy,aft->satxy', dataset[2], transform) 83 | 84 | return dataset 85 | 86 | def dump_params(self): 87 | """Dump its own parameters as :obj:`str`. 88 | 89 | Returns: 90 | str: Formed parameters. 91 | """ 92 | params_str = '' 93 | for element in self._elements: 94 | transform = self._transform[element] 95 | mean = self._mean[element] 96 | transform_str = ('\n'+' '*12).join([' '.join(map(str, row)) 97 | for row in transform.T]) 98 | mean_str = ' '.join(map(str, mean)) 99 | 100 | params_str += f''' 101 | {element} {transform.shape[1]} {transform.shape[0]} 102 | # transformation matrix 103 | {transform_str} 104 | # mean 105 | {mean_str} 106 | ''' 107 | 108 | return params_str 109 | 110 | def load(self, file_path, verbose=True): 111 | """Load internal parameters for each element. 112 | 113 | Only root MPI process loads parameters. 114 | 115 | Args: 116 | file_path (~pathlib.Path): File path to load parameters. 117 | verbose (bool, optional): Print log to stdout. 118 | """ 119 | if MPI.rank == 0: 120 | ndarray = np.load(file_path) 121 | self._elements = ndarray['elements'].item() 122 | self._n_components = ndarray['n_components'].item() 123 | self._mean = {element: ndarray[f'mean:{element}'] 124 | for element in self._elements} 125 | self._transform = {element: ndarray[f'transform:{element}'] 126 | for element in self._elements} 127 | if verbose: 128 | pprint(f'Loaded PCA parameters from {file_path}.') 129 | 130 | def save(self, file_path, verbose=True): 131 | """Save internal parameters for each element. 132 | 133 | Only root MPI process saves parameters. 134 | 135 | Args: 136 | file_path (~pathlib.Path): File path to save parameters. 137 | verbose (bool, optional): Print log to stdout. 138 | """ 139 | if MPI.rank == 0: 140 | info = { 141 | 'elements': self._elements, 142 | 'n_components': self._n_components, 143 | } 144 | mean = {f'mean:{k}': v for k, v in self._mean.items()} 145 | transform = {f'transform:{k}': v 146 | for k, v in self._transform.items()} 147 | np.savez(file_path, **info, **mean, **transform) 148 | if verbose: 149 | pprint(f'Saved PCA parameters to {file_path}.') 150 | 151 | def _initialize_params(self, data, elemental_composition, verbose): 152 | """Initialize parameters only once for new elements.""" 153 | for element in set(elemental_composition) - self._elements: 154 | n_feature = data.shape[2] 155 | mask = np.array(elemental_composition) == element 156 | X = data[:, mask].reshape(-1, n_feature) 157 | pca = decomposition.PCA(n_components=self._n_components) 158 | pca.fit(X) 159 | if self._n_components is None: 160 | self._n_components = pca.n_components_ 161 | self._elements.add(element) 162 | self._mean[element] = pca.mean_.astype(np.float32) 163 | self._transform[element] = pca.components_.T.astype(np.float32) 164 | if verbose: 165 | pprint(f''' 166 | Initialized PCA parameters for {element} 167 | Feature dimension: {n_feature} => {self._n_components} 168 | Cumulative contribution rate = {np.sum(pca.explained_variance_ratio_)} 169 | ''') 170 | -------------------------------------------------------------------------------- /hdnnpy/preprocess/preprocess_base.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Base class of pre-processing. 4 | 5 | If you want to add new pre-processing method to extend HDNNP, inherits 6 | this base class. 7 | """ 8 | 9 | from abc import (ABC, abstractmethod) 10 | 11 | 12 | class PreprocessBase(ABC): 13 | """Base class of pre-processing.""" 14 | name = None 15 | """str: Name of this class.""" 16 | 17 | def __init__(self): 18 | """ 19 | Initialize private variable :attr:`_elements` as a empty `set`. 20 | """ 21 | self._elements = set() 22 | 23 | @property 24 | def elements(self): 25 | """list [str]: List of elements whose parameters have already 26 | been initialized.""" 27 | return sorted(self._elements) 28 | 29 | @abstractmethod 30 | def apply(self, *args, **kwargs): 31 | """Apply the same pre-processing for each element to dataset. 32 | 33 | This is abstract method. 34 | Subclass of this base class have to override. 35 | """ 36 | pass 37 | 38 | @abstractmethod 39 | def dump_params(self): 40 | """Dump its own parameters as :obj:`str`. 41 | 42 | This is abstract method. 43 | Subclass of this base class have to override. 44 | """ 45 | pass 46 | 47 | @abstractmethod 48 | def load(self, *args, **kwargs): 49 | """Load internal parameters for each element. 50 | 51 | This is abstract method. 52 | Subclass of this base class have to override. 53 | """ 54 | pass 55 | 56 | @abstractmethod 57 | def save(self, *args, **kwargs): 58 | """Save internal parameters for each element. 59 | 60 | This is abstract method. 61 | Subclass of this base class have to override. 62 | """ 63 | pass 64 | -------------------------------------------------------------------------------- /hdnnpy/preprocess/scaling.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Scale all feature values into the certain range.""" 4 | 5 | import numpy as np 6 | 7 | from hdnnpy.preprocess.preprocess_base import PreprocessBase 8 | from hdnnpy.utils import (MPI, pprint) 9 | 10 | 11 | class Scaling(PreprocessBase): 12 | """Scale all feature values into the certain range.""" 13 | name = 'scaling' 14 | """str: Name of this class.""" 15 | 16 | def __init__(self, min_=-1.0, max_=1.0): 17 | """ 18 | Args: 19 | min\_ (float): Target minimum value of scaling. 20 | max\_ (float): Target maximum value of scaling. 21 | """ 22 | assert isinstance(min_, float) 23 | assert isinstance(max_, float) 24 | assert min_ < max_ 25 | super().__init__() 26 | self._max = {} 27 | self._min = {} 28 | self._target_max = max_ 29 | self._target_min = min_ 30 | 31 | @property 32 | def max(self): 33 | """dict [~numpy.ndarray]: Initialized maximum values in each 34 | feature dimension and each element.""" 35 | return self._max 36 | 37 | @property 38 | def min(self): 39 | """dict [~numpy.ndarray]: Initialized minimum values in each 40 | feature dimension and each element.""" 41 | return self._min 42 | 43 | @property 44 | def target(self): 45 | """tuple [float, float]: Target min & max values of scaling.""" 46 | return self._target_min, self._target_max 47 | 48 | def apply(self, dataset, elemental_composition, verbose=True): 49 | """Apply the same pre-processing for each element to dataset. 50 | 51 | It accepts 1 or 2 for length of ``dataset``, each element of 52 | which is regarded as ``0th-order``, ``1st-order``, ... 53 | 54 | Args: 55 | dataset (list [~numpy.ndarray]): Input dataset to be scaled. 56 | elemental_composition (list [str]): 57 | Element symbols corresponding to 1st dimension of 58 | ``dataset``. 59 | verbose (bool, optional): Print log to stdout. 60 | 61 | Returns: 62 | list [~numpy.ndarray]: 63 | Processed dataset into the same min-max range. 64 | """ 65 | order = len(dataset) - 1 66 | assert 0 <= order <= 2 67 | 68 | self._initialize_params(dataset[0], elemental_composition, verbose) 69 | 70 | max_ = np.array( 71 | [self._max[element] for element in elemental_composition]) 72 | min_ = np.array( 73 | [self._min[element] for element in elemental_composition]) 74 | 75 | if order >= 0: 76 | dataset[0] = ((dataset[0] - min_) 77 | / (max_ - min_) 78 | * (self._target_max - self._target_min) 79 | + self._target_min) 80 | if order >= 1: 81 | dataset[1] = (dataset[1] 82 | / (max_ - min_)[..., None] 83 | * (self._target_max - self._target_min)) 84 | if order >= 2: 85 | dataset[2] = (dataset[2] 86 | / (max_ - min_)[..., None, None] 87 | * (self._target_max - self._target_min)) 88 | 89 | return dataset 90 | 91 | def dump_params(self): 92 | """Dump its own parameters as :obj:`str`. 93 | 94 | Returns: 95 | str: Formed parameters. 96 | """ 97 | params_str = (f''' 98 | # target range 99 | {self._target_max} {self._target_min} 100 | ''') 101 | for element in self._elements: 102 | max_ = self._max[element] 103 | min_ = self._min[element] 104 | max_str = ' '.join(map(str, max_)) 105 | min_str = ' '.join(map(str, min_)) 106 | 107 | params_str += f''' 108 | {element} {max_.shape[0]} 109 | # max 110 | {max_str} 111 | # min 112 | {min_str} 113 | ''' 114 | 115 | return params_str 116 | 117 | def load(self, file_path, verbose=True): 118 | """Load internal parameters for each element. 119 | 120 | Only root MPI process loads parameters. 121 | 122 | Args: 123 | file_path (~pathlib.Path): File path to load parameters. 124 | verbose (bool, optional): Print log to stdout. 125 | """ 126 | if MPI.rank == 0: 127 | ndarray = np.load(file_path) 128 | self._elements = ndarray['elements'].item() 129 | self._max = {element: ndarray[f'max:{element}'] 130 | for element in self._elements} 131 | self._min = {element: ndarray[f'min:{element}'] 132 | for element in self._elements} 133 | if verbose: 134 | pprint(f'Loaded Scaling parameters from {file_path}.') 135 | 136 | def save(self, file_path, verbose=True): 137 | """Save internal parameters for each element. 138 | 139 | Only root MPI process saves parameters. 140 | 141 | Args: 142 | file_path (~pathlib.Path): File path to save parameters. 143 | verbose (bool, optional): Print log to stdout. 144 | """ 145 | if MPI.rank == 0: 146 | info = {'elements': self._elements} 147 | max_ = {f'max:{k}': v for k, v in self._max.items()} 148 | min_ = {f'min:{k}': v for k, v in self._min.items()} 149 | np.savez(file_path, **info, **max_, **min_) 150 | if verbose: 151 | pprint(f'Saved Scaling parameters to {file_path}.') 152 | 153 | def _initialize_params(self, data, elemental_composition, verbose): 154 | """Initialize parameters only once for new elements.""" 155 | for element in set(elemental_composition) - self._elements: 156 | n_feature = data.shape[2] 157 | mask = np.array(elemental_composition) == element 158 | X = data[:, mask].reshape(-1, n_feature) 159 | self._elements.add(element) 160 | self._max[element] = X.max(axis=0) 161 | self._min[element] = X.min(axis=0) 162 | if verbose: 163 | pprint(f'Initialized Scaling parameters for {element}') 164 | -------------------------------------------------------------------------------- /hdnnpy/preprocess/standardization.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Scale all feature values to be zero-mean and unit-variance.""" 4 | 5 | import numpy as np 6 | 7 | from hdnnpy.preprocess.preprocess_base import PreprocessBase 8 | from hdnnpy.utils import (MPI, pprint) 9 | 10 | 11 | class Standardization(PreprocessBase): 12 | """Scale all feature values to be zero-mean and unit-variance.""" 13 | name = 'standardization' 14 | """str: Name of this class.""" 15 | 16 | def __init__(self): 17 | super().__init__() 18 | self._mean = {} 19 | self._std = {} 20 | 21 | @property 22 | def mean(self): 23 | """dict [~numpy.ndarray]: Initialized mean values in each 24 | feature dimension and each element.""" 25 | return self._mean 26 | 27 | @property 28 | def std(self): 29 | """dict [~numpy.ndarray]: Initialized standard deviation values 30 | in each feature dimension and each element.""" 31 | return self._std 32 | 33 | def apply(self, dataset, elemental_composition, verbose=True): 34 | """Apply the same pre-processing for each element to dataset. 35 | 36 | It accepts 1 or 2 for length of ``dataset``, each element of 37 | which is regarded as ``0th-order``, ``1st-order``, ... 38 | 39 | Args: 40 | dataset (list [~numpy.ndarray]): Input dataset to be scaled. 41 | elemental_composition (list [str]): 42 | Element symbols corresponding to 1st dimension of 43 | ``dataset``. 44 | verbose (bool, optional): Print log to stdout. 45 | 46 | Returns: 47 | list [~numpy.ndarray]: 48 | Processed dataset to be zero-mean and unit-variance. 49 | """ 50 | order = len(dataset) - 1 51 | assert 0 <= order <= 2 52 | 53 | self._initialize_params(dataset[0], elemental_composition, verbose) 54 | 55 | mean = np.array( 56 | [self._mean[element] for element in elemental_composition]) 57 | std = np.array( 58 | [self._std[element] for element in elemental_composition]) 59 | 60 | if order >= 0: 61 | dataset[0] -= mean 62 | dataset[0] /= std 63 | if order >= 1: 64 | dataset[1] /= std[..., None] 65 | if order >= 2: 66 | dataset[2] /= std[..., None, None] 67 | 68 | return dataset 69 | 70 | def dump_params(self): 71 | """Dump its own parameters as :obj:`str`. 72 | 73 | Returns: 74 | str: Formed parameters. 75 | """ 76 | params_str = '' 77 | for element in self._elements: 78 | mean = self._mean[element] 79 | std = self._std[element] 80 | mean_str = ' '.join(map(str, mean)) 81 | std_str = ' '.join(map(str, std)) 82 | 83 | params_str += f''' 84 | {element} {mean.shape[0]} 85 | # mean 86 | {mean_str} 87 | # standard deviation 88 | {std_str} 89 | ''' 90 | 91 | return params_str 92 | 93 | def load(self, file_path, verbose=True): 94 | """Load internal parameters for each element. 95 | 96 | Only root MPI process loads parameters. 97 | 98 | Args: 99 | file_path (~pathlib.Path): File path to load parameters. 100 | verbose (bool, optional): Print log to stdout. 101 | """ 102 | if MPI.rank == 0: 103 | ndarray = np.load(file_path) 104 | self._elements = ndarray['elements'].item() 105 | self._mean = {element: ndarray[f'mean:{element}'] 106 | for element in self._elements} 107 | self._std = {element: ndarray[f'std:{element}'] 108 | for element in self._elements} 109 | if verbose: 110 | pprint(f'Loaded Standardization parameters from {file_path}.') 111 | 112 | def save(self, file_path, verbose=True): 113 | """Save internal parameters for each element. 114 | 115 | Only root MPI process saves parameters. 116 | 117 | Args: 118 | file_path (~pathlib.Path): File path to save parameters. 119 | verbose (bool, optional): Print log to stdout. 120 | """ 121 | if MPI.rank == 0: 122 | info = {'elements': self._elements} 123 | mean = {f'mean:{k}': v for k, v in self._mean.items()} 124 | std = {f'std:{k}': v for k, v in self._std.items()} 125 | np.savez(file_path, **info, **mean, **std) 126 | if verbose: 127 | pprint(f'Saved Standardization parameters to {file_path}.') 128 | 129 | def _initialize_params(self, data, elemental_composition, verbose): 130 | """Initialize parameters only once for new elements.""" 131 | for element in set(elemental_composition) - self._elements: 132 | n_feature = data.shape[2] 133 | mask = np.array(elemental_composition) == element 134 | X = data[:, mask].reshape(-1, n_feature) 135 | self._elements.add(element) 136 | self._mean[element] = X.mean(axis=0) 137 | self._std[element] = X.std(axis=0, ddof=1) 138 | if verbose: 139 | pprint(f'Initialized Standardization parameters for {element}') 140 | -------------------------------------------------------------------------------- /hdnnpy/training/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Training tools subpackage.""" 4 | 5 | __all__ = [ 6 | 'Manager', 7 | 'Updater', 8 | 'ScatterPlot', 9 | 'set_log_scale', 10 | ] 11 | 12 | from hdnnpy.training.extensions import (ScatterPlot, 13 | set_log_scale, 14 | ) 15 | from hdnnpy.training.manager import Manager 16 | from hdnnpy.training.updater import Updater 17 | -------------------------------------------------------------------------------- /hdnnpy/training/extensions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Custom chainer training extensions.""" 4 | 5 | import chainer 6 | from chainer.training import Extension 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | 11 | class ScatterPlot(Extension): 12 | """Trainer extension to output predictions/labels scatter plots.""" 13 | def __init__(self, dataset, model, comm): 14 | """ 15 | Args: 16 | dataset (HDNNPDataset): 17 | Test dataset to plot a scatter plot. It has to have both 18 | input dataset and label dataset. 19 | model (HighDimensionalNNP): HDNNP model to evaluate. 20 | comm (~chainermn.CommunicatorBase): 21 | ChainerMN communicator instance. 22 | """ 23 | self._order = dataset.property.order 24 | self._model = model 25 | self._comm = comm.mpi_comm 26 | 27 | self._properties = [] 28 | self._coefficients = [] 29 | self._units = [] 30 | self._inputs = [] 31 | self._labels = [] 32 | self._predictions = [] 33 | self._init_labels(dataset) 34 | 35 | def __call__(self, trainer): 36 | """Execute scatter plot extension. 37 | 38 | | Perform prediction with the parameters of the model when this 39 | extension was executed, using the data set at initialization. 40 | | Horizontal axis shows the predicted values and vertical axis 41 | shows the true values. 42 | | Plot configurations are written in :meth:`_plot`. 43 | 44 | Args: 45 | trainer (~chainer.training.Trainer): 46 | Trainer object that invokes this extension. 47 | """ 48 | with chainer.using_config('train', False), \ 49 | chainer.using_config('enable_backprop', False): 50 | predictions = self._model.predict(self._inputs, self._order) 51 | 52 | for i in range(self._order + 1): 53 | pred_send = predictions[i].data 54 | if self._comm.Get_rank() == 0: 55 | self._comm.Gatherv(pred_send, self._predictions[i], root=0) 56 | self._plot(trainer, 57 | self._coefficients[i] * self._predictions[i], 58 | self._coefficients[i] * self._labels[i], 59 | self._properties[i], self._units[i]) 60 | else: 61 | self._comm.Gatherv(pred_send, None, root=0) 62 | 63 | plt.close('all') 64 | 65 | def _init_labels(self, dataset): 66 | """Gather label dataset to root process and initialize other 67 | instance variables.""" 68 | self._properties = dataset.property.properties 69 | self._coefficients = dataset.property.coefficients 70 | self._units = dataset.property.units 71 | batch = chainer.dataset.concat_examples(dataset) 72 | self._inputs = [batch[f'inputs/{i}'] for i in range(self._order + 1)] 73 | labels = [batch[f'labels/{i}'] for i in range(self._order + 1)] 74 | self._count = np.array(self._comm.gather(len(labels[0]), root=0)) 75 | 76 | for i in range(self._order + 1): 77 | label_send = labels[i] 78 | if self._comm.Get_rank() == 0: 79 | total_size = sum(self._count) 80 | prediction = np.empty((total_size,) + label_send[0].shape, 81 | dtype=np.float32) 82 | self._predictions.append(prediction) 83 | 84 | label = np.empty((total_size,) + label_send[0].shape, 85 | dtype=np.float32) 86 | label_recv = (label, self._count * label_send[0].size) 87 | self._comm.Gatherv(label_send, label_recv, root=0) 88 | self._labels.append(label) 89 | else: 90 | self._comm.Gatherv(label_send, None, root=0) 91 | 92 | @staticmethod 93 | def _plot(trainer, prediction, label, property_, unit): 94 | """Plot and save a scatter plot.""" 95 | fig = plt.figure(figsize=(10, 10)) 96 | min_ = np.min(label) 97 | max_ = np.max(label) 98 | plt.scatter(prediction, label, c='blue'), 99 | plt.xlabel(f'Prediction ({unit})'), 100 | plt.ylabel(f'Label ({unit})'), 101 | plt.xlim(min_, max_), 102 | plt.ylim(min_, max_), 103 | plt.text(0.5, 0.9, 104 | f'{property_} @ epoch={trainer.updater.epoch}', 105 | ha='center', transform=plt.gcf().transFigure) 106 | fig.savefig(trainer.out/f'{property_}.png') 107 | 108 | 109 | def set_log_scale(_, a, __): 110 | """Change y axis scale as log scale.""" 111 | a.set_yscale('log') 112 | -------------------------------------------------------------------------------- /hdnnpy/training/loss_function/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Loss function classses for HDNNP training.""" 4 | 5 | __all__ = [ 6 | 'LOSS_FUNCTION', 7 | ] 8 | 9 | from hdnnpy.training.loss_function.first import First 10 | from hdnnpy.training.loss_function.potential import Potential 11 | from hdnnpy.training.loss_function.zeroth import Zeroth 12 | 13 | LOSS_FUNCTION = { 14 | First.name: First, 15 | Potential.name: Potential, 16 | Zeroth.name: Zeroth, 17 | } 18 | -------------------------------------------------------------------------------- /hdnnpy/training/loss_function/first.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Loss function to optimize 0th and 1st-order property.""" 4 | 5 | import warnings 6 | 7 | import chainer 8 | import chainer.functions as F 9 | 10 | from hdnnpy.training.loss_function.loss_functions_base import ( 11 | LossFunctionBase) 12 | 13 | 14 | class First(LossFunctionBase): 15 | """Loss function to optimize 0th and 1st-order property.""" 16 | name = 'first' 17 | """str: Name of this loss function class.""" 18 | order = { 19 | 'descriptor': 1, 20 | 'property': 1, 21 | } 22 | """dict: Required orders of each dataset to calculate loss function. 23 | """ 24 | 25 | def __init__(self, model, properties, mixing_beta, **_): 26 | """ 27 | Args: 28 | model (HighDimensionalNNP): 29 | HDNNP object to optimize parameters. 30 | properties (list [str]): Names of properties to optimize. 31 | mixing_beta (float): 32 | Mixing parameter of errors of 0th and 1st order. 33 | It accepts 0.0 to 1.0. If 0.0 it optimizes HDNNP by only 34 | 0th order property and it is equal to loss function 35 | ``Zeroth``. If 1.0 it optimizes HDNNP by only 1st order 36 | property. 37 | """ 38 | assert 0.0 <= mixing_beta <= 1.0 39 | super().__init__(model) 40 | self._observation_keys = [ 41 | f'RMSE/{properties[0]}', f'RMSE/{properties[1]}', 'total'] 42 | self._mixing_beta = mixing_beta 43 | 44 | if mixing_beta == 0.0: 45 | warnings.warn( 46 | 'If mixing_beta=0.0, you should use loss function type ' 47 | '`zeroth` instead of `first`.') 48 | 49 | def eval(self, **dataset): 50 | """Calculate loss function from given datasets and model. 51 | 52 | Args: 53 | **dataset (~numpy.ndarray): 54 | Datasets passed as kwargs. Name of each key is in the 55 | format 'inputs/N' or 'labels/N'. 'N' is the order of 56 | the dataset. 57 | 58 | Returns: 59 | ~chainer.Variable: 60 | A scalar value calculated with loss function. 61 | """ 62 | inputs = [dataset[f'inputs/{i}'] for i 63 | in range(self.order['descriptor'] + 1)] 64 | labels = [dataset[f'labels/{i}'] for i 65 | in range(self.order['property'] + 1)] 66 | predictions = self._model.predict(inputs, self.order['descriptor']) 67 | loss0 = F.mean_squared_error(predictions[0], labels[0]) 68 | loss1 = F.mean_squared_error(predictions[1], labels[1]) 69 | total_loss = ((1.0 - self._mixing_beta) * loss0 70 | + self._mixing_beta * loss1) 71 | 72 | RMSE0 = F.sqrt(loss0) 73 | RMSE1 = F.sqrt(loss1) 74 | total = ((1.0 - self._mixing_beta) * RMSE0 75 | + self._mixing_beta * RMSE1) 76 | 77 | observation = { 78 | self._observation_keys[0]: RMSE0, 79 | self._observation_keys[1]: RMSE1, 80 | self._observation_keys[2]: total, 81 | } 82 | chainer.report(observation, observer=self._model) 83 | return total_loss 84 | -------------------------------------------------------------------------------- /hdnnpy/training/loss_function/loss_functions_base.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Base class for loss functions.""" 4 | 5 | from abc import (ABC, abstractmethod) 6 | 7 | 8 | class LossFunctionBase(ABC): 9 | """Base class for loss functions.""" 10 | name = None 11 | """str: Name of this loss function class.""" 12 | order = { 13 | 'descriptor': None, 14 | 'property': None, 15 | } 16 | """dict: Required orders of each dataset to calculate loss function. 17 | """ 18 | 19 | def __init__(self, model): 20 | """ 21 | Args: 22 | model (HighDimensionalNNP): 23 | HDNNP object to optimize parameters. 24 | """ 25 | self._model = model 26 | self._observation_keys = [] 27 | 28 | @property 29 | def observation_keys(self): 30 | """list [str]: Names of metrics which trainer observes.""" 31 | return self._observation_keys 32 | 33 | @abstractmethod 34 | def eval(self, **dataset): 35 | """Calculate loss function from given datasets and model. 36 | 37 | This is abstract method. 38 | Subclass of this base class have to override. 39 | 40 | Args: 41 | **dataset (~numpy.ndarray): 42 | Datasets passed as kwargs. Name of each key is in the 43 | format 'inputs/N' or 'labels/N'. 'N' is the order of 44 | the dataset. 45 | 46 | Returns: 47 | ~chainer.Variable: 48 | A scalar value calculated with loss function. 49 | """ 50 | pass 51 | -------------------------------------------------------------------------------- /hdnnpy/training/loss_function/potential.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Loss function to optimize 0th property as scalar potential.""" 4 | 5 | import warnings 6 | 7 | import chainer 8 | import chainer.functions as F 9 | 10 | from hdnnpy.training.loss_function.loss_functions_base import ( 11 | LossFunctionBase) 12 | 13 | 14 | class Potential(LossFunctionBase): 15 | """Loss function to optimize 0th property as scalar potential.""" 16 | name = 'potential' 17 | """str: Name of this loss function class.""" 18 | order = { 19 | 'descriptor': 2, 20 | 'property': 1, 21 | } 22 | """dict: Required orders of each dataset to calculate loss function. 23 | """ 24 | 25 | def __init__( 26 | self, model, properties, mixing_beta, summation, rotation, **_): 27 | """ 28 | Args: 29 | model (HighDimensionalNNP): 30 | HDNNP object to optimize parameters. 31 | properties (list [str]): Names of properties to optimize. 32 | mixing_beta (float): 33 | Mixing parameter of errors of 0th and 1st order. 34 | It accepts 0.0 to 1.0. If 0.0 it optimizes HDNNP by only 35 | 0th order property and it is equal to loss function 36 | ``Zeroth``. If 1.0 it optimizes HDNNP by only 1st order 37 | property. 38 | summation (float): 39 | Penalty term coefficient parameter for summation of 1st 40 | order property. This loss function adds following 41 | penalty to 1st order property vector. 42 | :math:`\sum_{i,\alpha} F_{i,\alpha} = 0` 43 | rotation (float): 44 | Penalty term coefficient parameter for rotation of 1st 45 | order property. This loss function adds following 46 | penalty to 1st order property vector. 47 | :math:`\rot \bm{F} = 0` 48 | """ 49 | assert 0.0 <= mixing_beta <= 1.0 50 | assert 0.0 <= summation 51 | assert 0.0 <= rotation 52 | super().__init__(model) 53 | self._observation_keys = [ 54 | f'RMSE/{properties[0]}', f'RMSE/{properties[1]}', 55 | f'AbsMean/{properties[1]}', f'RMS/rot-{properties[1]}', 56 | 'total'] 57 | self._mixing_beta = mixing_beta 58 | self._summation = summation 59 | self._rotation = rotation 60 | 61 | if mixing_beta == 0.0: 62 | warnings.warn( 63 | 'If mixing_beta=0.0, you should use loss function type ' 64 | '`zeroth` instead of `potential`.') 65 | if rotation == 0.0: 66 | warnings.warn( 67 | 'If rotation=0.0, you should use loss function type ' 68 | '`first` instead of `potential`.') 69 | 70 | def eval(self, **dataset): 71 | """Calculate loss function from given datasets and model. 72 | 73 | Args: 74 | **dataset (~numpy.ndarray): 75 | Datasets passed as kwargs. Name of each key is in the 76 | format 'inputs/N' or 'labels/N'. 'N' is the order of 77 | the dataset. 78 | 79 | Returns: 80 | ~chainer.Variable: 81 | A scalar value calculated with loss function. 82 | """ 83 | inputs = [dataset[f'inputs/{i}'] for i 84 | in range(self.order['descriptor'] + 1)] 85 | labels = [dataset[f'labels/{i}'] for i 86 | in range(self.order['property'] + 1)] 87 | predictions = self._model.predict(inputs, self.order['descriptor']) 88 | 89 | loss0 = F.mean_squared_error(predictions[0], labels[0]) 90 | loss1 = F.mean_squared_error(predictions[1], labels[1]) 91 | loss_sum1 = F.mean(predictions[1]) 92 | transverse = F.swapaxes(predictions[2], 2, 3) 93 | loss_rot = F.mean(F.square((predictions[2] - transverse) 94 | / (predictions[2] + transverse))) 95 | total_loss = ((1.0 - self._mixing_beta) * loss0 96 | + self._mixing_beta * loss1 97 | + self._summation * loss_sum1 98 | + self._rotation * loss_rot) 99 | 100 | RMSE0 = F.sqrt(loss0) 101 | RMSE1 = F.sqrt(loss1) 102 | AbsMean1 = F.absolute(loss_sum1) 103 | RMS_rot = F.sqrt(loss_rot) 104 | total = ((1.0 - self._mixing_beta) * RMSE0 105 | + self._mixing_beta * RMSE1 106 | + self._summation * AbsMean1 107 | + self._rotation * RMS_rot) 108 | 109 | observation = { 110 | self._observation_keys[0]: RMSE0, 111 | self._observation_keys[1]: RMSE1, 112 | self._observation_keys[2]: AbsMean1, 113 | self._observation_keys[3]: RMS_rot, 114 | self._observation_keys[4]: total, 115 | } 116 | chainer.report(observation, observer=self._model) 117 | return total_loss 118 | -------------------------------------------------------------------------------- /hdnnpy/training/loss_function/zeroth.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Loss function to optimize 0th-order property.""" 4 | 5 | import chainer 6 | import chainer.functions as F 7 | 8 | from hdnnpy.training.loss_function.loss_functions_base import ( 9 | LossFunctionBase) 10 | 11 | 12 | class Zeroth(LossFunctionBase): 13 | """Loss function to optimize 0th-order property.""" 14 | name = 'zeroth' 15 | """str: Name of this loss function class.""" 16 | order = { 17 | 'descriptor': 0, 18 | 'property': 0, 19 | } 20 | """dict: Required orders of each dataset to calculate loss function. 21 | """ 22 | 23 | def __init__(self, model, properties, **_): 24 | """ 25 | Args: 26 | model (HighDimensionalNNP): 27 | HDNNP object to optimize parameters. 28 | properties (list [str]): Names of properties to optimize. 29 | """ 30 | super().__init__(model) 31 | self._observation_keys = [f'RMSE/{properties[0]}', 'total'] 32 | 33 | def eval(self, **dataset): 34 | """Calculate loss function from given datasets and model. 35 | 36 | Args: 37 | **dataset (~numpy.ndarray): 38 | Datasets passed as kwargs. Name of each key is in the 39 | format 'inputs/N' or 'labels/N'. 'N' is the order of 40 | the dataset. 41 | 42 | Returns: 43 | ~chainer.Variable: 44 | A scalar value calculated with loss function. 45 | """ 46 | inputs = [dataset[f'inputs/{i}'] for i 47 | in range(self.order['descriptor'] + 1)] 48 | labels = [dataset[f'labels/{i}'] for i 49 | in range(self.order['property'] + 1)] 50 | predictions = self._model.predict(inputs, self.order['descriptor']) 51 | loss0 = F.mean_squared_error(predictions[0], labels[0]) 52 | RMSE0 = F.sqrt(loss0) 53 | 54 | observation = { 55 | self._observation_keys[0]: RMSE0, 56 | self._observation_keys[1]: RMSE0, 57 | } 58 | chainer.report(observation, observer=self._model) 59 | return loss0 60 | -------------------------------------------------------------------------------- /hdnnpy/training/manager.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Context manager to take trainer snapshot and decide whether to train 4 | or not.""" 5 | 6 | from contextlib import AbstractContextManager 7 | import pickle 8 | import signal 9 | 10 | import chainer 11 | import numpy as np 12 | 13 | from hdnnpy.utils import (MPI, pprint) 14 | 15 | 16 | class Manager(AbstractContextManager): 17 | """Context manager to take trainer snapshot and decide whether to 18 | train or not.""" 19 | def __init__(self, tag, trainer, result, is_snapshot=True): 20 | """ 21 | Args: 22 | tag (str): Tag of dataset used for training. 23 | trainer (~chainer.training.Trainer): 24 | Trainer object to be managed. 25 | result (dict): 26 | Dictionary object containing total elapsed time and 27 | metrics value corresponding to the type of loss 28 | function. Even when training is stopped / resumed, it is 29 | retained. 30 | is_snapshot (bool, optional): Take trainer snapshot if True. 31 | """ 32 | self._tag = tag 33 | self._trainer = trainer 34 | self._result = result 35 | self._is_snapshot = is_snapshot 36 | self._is_allow = True 37 | self._trainer_snapshot = trainer.out / 'trainer_snapshot.npz' 38 | self._interim_result = trainer.out / 'interim_result.pickle' 39 | self._signum = None 40 | 41 | def __enter__(self): 42 | """Replace signal handler of SIGINT and SIGTERM.""" 43 | self._old_sigint_handler = signal.signal( 44 | signal.SIGINT, self._snapshot) 45 | self._old_sigterm_handler = signal.signal( 46 | signal.SIGTERM, self._snapshot) 47 | 48 | def __exit__(self, type_, value, traceback): 49 | """Restore signal handler of SIGINT and SIGTERM, and record the 50 | result of training.""" 51 | signal.signal(signal.SIGINT, self._old_sigint_handler) 52 | signal.signal(signal.SIGTERM, self._old_sigterm_handler) 53 | if not self._signum: 54 | self._result['training_time'] += self._trainer.elapsed_time 55 | observation = { 56 | k: v.data.item() if isinstance(v, chainer.Variable) 57 | else v.item() if isinstance(v, np.float64) 58 | else v 59 | for k, v in self._trainer.observation.items()} 60 | self._result['observation'].append( 61 | {'tag': self._tag, **observation}) 62 | 63 | @property 64 | def allow_to_run(self): 65 | """Whether the given trainer can train with the dataset.""" 66 | return self._is_allow 67 | 68 | def check_to_resume(self, resume_tag): 69 | """Decide whether to train or not. 70 | 71 | If current tag of dataset is equal to ``resume_tag``, restore 72 | the state of trainer from snapshot file. 73 | 74 | Args: 75 | resume_tag (str): 76 | Tag of dataset when snapshot was taken last time. 77 | """ 78 | if self._tag == resume_tag: 79 | self._resume() 80 | self._is_allow = True 81 | elif self._trainer_snapshot.exists(): 82 | self._is_allow = False 83 | else: 84 | self._is_allow = True 85 | 86 | def _resume(self): 87 | """Restore the state of trainer from snapshot file.""" 88 | pprint(f'Resume training loop from dataset tagged "{self._tag}"') 89 | chainer.serializers.load_npz(self._trainer_snapshot, self._trainer) 90 | interim_result = pickle.loads(self._interim_result.read_bytes()) 91 | self._result['training_time'] += interim_result['training_time'] 92 | self._result['observation'].extend(interim_result['observation']) 93 | # remove snapshot 94 | if MPI.rank == 0: 95 | self._trainer_snapshot.unlink() 96 | self._interim_result.unlink() 97 | 98 | def _snapshot(self, signum, _): 99 | """Take trainer snapshot.""" 100 | self._signum = signal.Signals(signum) 101 | if self._is_snapshot and MPI.rank == 0: 102 | pprint(f'Stop {self._tag} training by signal:' 103 | f' {self._signum.name}!\n' 104 | f'Take trainer snapshot at epoch:' 105 | f' {self._trainer.updater.epoch}') 106 | chainer.serializers.save_npz(self._trainer_snapshot, self._trainer) 107 | self._interim_result.write_bytes(pickle.dumps(self._result)) 108 | 109 | # must raise any Exception to stop trainer.run() 110 | raise InterruptedError( 111 | f'Chainer training loop is interrupted by {self._signum.name}') 112 | -------------------------------------------------------------------------------- /hdnnpy/training/updater.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Updater for HDNNP training.""" 4 | 5 | import chainer 6 | 7 | 8 | class Updater(chainer.training.updaters.StandardUpdater): 9 | """Updater for HDNNP training using `HighDimensionalNNP` and 10 | `MasterNNP`.""" 11 | def __init__(self, *args, **kwargs): 12 | super().__init__(*args, **kwargs) 13 | 14 | def update_core(self): 15 | """Calculate gradient of parameters using `HighDimensionalNNP` 16 | and collect them in `MasterNNP` and update parameters.""" 17 | master_opt = self.get_optimizer('master') 18 | main_opt = self.get_optimizer('main') 19 | master_nnp = master_opt.target 20 | hdnnp = main_opt.target 21 | 22 | batch = self.converter(self.get_iterator('main').next(), self.device) 23 | 24 | master_nnp.cleargrads() 25 | hdnnp.cleargrads() 26 | 27 | loss = self.loss_func(**batch) 28 | loss.backward() 29 | 30 | hdnnp.reduce_grad_to(master_nnp) 31 | master_opt.update() 32 | hdnnp.sync_param_with(master_nnp) 33 | -------------------------------------------------------------------------------- /hdnnpy/utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """Utility functions used in various subpackages.""" 4 | 5 | __all__ = [ 6 | 'MPI', 7 | 'pprint', 8 | 'pyyaml_path_constructor', 9 | 'pyyaml_path_representer', 10 | 'recv_chunk', 11 | 'send_chunk', 12 | ] 13 | 14 | from pathlib import Path 15 | import pickle 16 | from pprint import pprint as pretty_print 17 | import sys 18 | import textwrap 19 | 20 | from mpi4py import MPI as MPI4PY 21 | 22 | 23 | INT_MAX = 2147483647 24 | 25 | 26 | class MPI: 27 | """MPI world communicator and aliases.""" 28 | comm = MPI4PY.COMM_WORLD 29 | rank = MPI4PY.COMM_WORLD.Get_rank() 30 | size = MPI4PY.COMM_WORLD.Get_size() 31 | 32 | 33 | def pprint(data=None, flush=True, **options): 34 | """Pretty print function. 35 | 36 | Args: 37 | data (str, optional): Data to output into stdout. 38 | flush (bool, optional): Flush the stream after output if True. 39 | **options: Other options passed to :meth:`print`. 40 | """ 41 | if data is None: 42 | data = '' 43 | data = textwrap.dedent(data) 44 | if isinstance(data, list) or isinstance(data, dict): 45 | pretty_print(data, **options) 46 | else: 47 | if 'stream' in options: 48 | options['file'] = options.pop('stream') 49 | print(data, **options) 50 | if flush: 51 | sys.stdout.flush() 52 | 53 | 54 | def pyyaml_path_constructor(loader, node): 55 | """Helper method to load Path tag in PyYAML.""" 56 | value = loader.construct_scalar(node) 57 | return Path(value) 58 | 59 | 60 | def pyyaml_path_representer(dumper, instance): 61 | """Helper method to dump :class:`~pathlib.Path` in PyYAML.""" 62 | return dumper.represent_scalar('Path', f'{instance}') 63 | 64 | 65 | def recv_chunk(source, max_buf_len=256 * 1024 * 1024): 66 | """Receive data divided into small chunks with MPI communication. 67 | 68 | Args: 69 | source (int): MPI source process that sends data. 70 | max_buf_len (int, optional): Maximum size of each chunk. 71 | 72 | Returns: 73 | object: Received data. 74 | """ 75 | assert max_buf_len < INT_MAX 76 | assert max_buf_len > 0 77 | data = MPI.comm.recv(source=source, tag=1) 78 | assert data is not None 79 | total_chunk_num, max_buf_len, total_bytes = data 80 | pickled_bytes = bytearray() 81 | 82 | for i in range(total_chunk_num): 83 | b = i * max_buf_len 84 | e = min(b + max_buf_len, total_bytes) 85 | buf = bytearray(e - b) 86 | MPI.comm.Recv(buf, source=source, tag=2) 87 | pickled_bytes[b:e] = buf 88 | 89 | obj = pickle.loads(pickled_bytes) 90 | return obj 91 | 92 | 93 | def send_chunk(obj, dest, max_buf_len=256 * 1024 * 1024): 94 | """Send data divided into small chunks with MPI communication. 95 | 96 | Args: 97 | obj (object): Any data to send, which can be pickled. 98 | dest (int): MPI destination process that receives data. 99 | max_buf_len (int, optional): Maximum size of each chunk. 100 | """ 101 | assert max_buf_len < INT_MAX 102 | assert max_buf_len > 0 103 | pickled_bytes = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) 104 | total_bytes = len(pickled_bytes) 105 | total_chunk_num = -(-total_bytes // max_buf_len) 106 | MPI.comm.send( 107 | (total_chunk_num, max_buf_len, total_bytes), dest=dest, tag=1) 108 | 109 | for i in range(total_chunk_num): 110 | b = i * max_buf_len 111 | e = min(b + max_buf_len, total_bytes) 112 | buf = pickled_bytes[b:e] 113 | MPI.comm.Send(buf, dest=dest, tag=2) 114 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | 3 | build: 4 | image: latest 5 | 6 | conda: 7 | file: condaenv.yaml 8 | 9 | python: 10 | version: 3.6 11 | pip_install: true 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ase 2 | chainer 3 | matplotlib 4 | mpi4py 5 | numpy 6 | PyYAML 7 | scikit-learn 8 | scipy 9 | tqdm 10 | traitlets 11 | -------------------------------------------------------------------------------- /scripts/merge_xyz: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from pathlib import Path 5 | import sys 6 | 7 | import ase.io 8 | 9 | args = sys.argv 10 | step = int(args[1]) 11 | in_dir = args[2] 12 | output = args[3] 13 | 14 | for f in Path(in_dir).glob('*.xyz'): 15 | images = ase.io.read(f, index=f'::{step}', format='xyz') 16 | ase.io.write(output, images, format='xyz', append=True) 17 | -------------------------------------------------------------------------------- /scripts/outcar2xyz: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import sys 5 | 6 | import ase.io 7 | 8 | args = sys.argv 9 | 10 | if len(args) != 4: 11 | print(f""" 12 | Error: args should have 3 but has {len(args)-1} 13 | 14 | The format should be 15 | 16 | $ outcar2xyz [PREFIX] [OUTCAR] [XYZFILE] 17 | """) 18 | sys.exit(1) 19 | 20 | prefix = args[1] 21 | infile = args[2] 22 | outfile = args[3] 23 | 24 | for atoms in ase.io.iread(infile, index=':', format='vasp-out'): 25 | # stress = atoms.get_stress(voigt=False) 26 | # atoms.set_param_value('stress', stress) 27 | atoms.info['tag'] = prefix + atoms.get_chemical_formula() 28 | ase.io.write(outfile, atoms, format='xyz', append=True) 29 | -------------------------------------------------------------------------------- /scripts/poscars2xyz: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import sys 5 | 6 | import ase.io 7 | 8 | args = sys.argv 9 | 10 | if len(args) < 4: 11 | print(f""" 12 | Error: args should have 4 or more but has {len(args)-1} 13 | 14 | The format should be 15 | 16 | $ poscars2xyz [PREFIX] [POSCARs]+ [XYZFILE] 17 | """) 18 | sys.exit(1) 19 | 20 | prefix = args[1] 21 | poscars = args[2:-1] 22 | xyz = args[-1] 23 | 24 | for poscar in poscars: 25 | atoms = ase.io.read(poscar, format='vasp') 26 | atoms.info['tag'] = prefix + atoms.get_chemical_formula() 27 | ase.io.write(xyz, atoms, format='xyz', append=True) 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from setuptools import setup 4 | from hdnnpy import __version__ 5 | 6 | setup( 7 | name='hdnnpy', 8 | version=__version__, 9 | description='High Dimensional Neural Network Potential package', 10 | long_description=open('README.md').read(), 11 | author='Masayoshi Ogura', 12 | author_email='ogura@cello.t.u-tokyo.ac.jp', 13 | url='https://github.com/ogura-edu/HDNNP', 14 | license='MIT', 15 | packages=[ 16 | 'hdnnpy', 17 | 'hdnnpy.cli', 18 | 'hdnnpy.dataset', 19 | 'hdnnpy.dataset.descriptor', 20 | 'hdnnpy.dataset.property', 21 | 'hdnnpy.format', 22 | 'hdnnpy.model', 23 | 'hdnnpy.preprocess', 24 | 'hdnnpy.training', 25 | 'hdnnpy.training.loss_function', 26 | ], 27 | scripts=[ 28 | 'scripts/merge_xyz', 29 | 'scripts/outcar2xyz', 30 | 'scripts/poscars2xyz', 31 | ], 32 | entry_points={ 33 | 'console_scripts': ['hdnnpy = hdnnpy.cli:main'], 34 | }, 35 | zip_safe=False, 36 | ) 37 | --------------------------------------------------------------------------------