├── .gitignore
├── LICENSE
├── Pipfile
├── Pipfile.lock
├── README.ja.md
├── README.md
├── condaenv.yaml
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── _templates
    │       └── autosummary
    │       │   └── class.rst
    │   ├── conf.py
    │   ├── description.rst
    │   ├── example.rst
    │   ├── extend.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── modules
    │       ├── dataset.rst
    │       ├── format.rst
    │       ├── index.rst
    │       ├── model.rst
    │       ├── preprocess.rst
    │       ├── training.rst
    │       └── utils.rst
    │   └── usage.rst
├── examples
    ├── prediction_config.py
    └── training_config.py
├── hdnnpy
    ├── __init__.py
    ├── __main__.py
    ├── _version.py
    ├── cli
    │   ├── __init__.py
    │   ├── configurables.py
    │   ├── conversion_application.py
    │   ├── main.py
    │   ├── prediction_application.py
    │   └── training_application.py
    ├── dataset
    │   ├── __init__.py
    │   ├── atomic_structure.py
    │   ├── dataset_generator.py
    │   ├── descriptor
    │   │   ├── __init__.py
    │   │   ├── descriptor_dataset_base.py
    │   │   ├── symmetry_function_dataset.py
    │   │   └── weighted_symmetry_function_dataset.py
    │   ├── hdnnp_dataset.py
    │   └── property
    │   │   ├── __init__.py
    │   │   ├── interatomic_potential_dataset.py
    │   │   └── property_dataset_base.py
    ├── format
    │   ├── __init__.py
    │   └── xyz.py
    ├── model
    │   ├── __init__.py
    │   └── models.py
    ├── preprocess
    │   ├── __init__.py
    │   ├── pca.py
    │   ├── preprocess_base.py
    │   ├── scaling.py
    │   └── standardization.py
    ├── training
    │   ├── __init__.py
    │   ├── extensions.py
    │   ├── loss_function
    │   │   ├── __init__.py
    │   │   ├── first.py
    │   │   ├── loss_functions_base.py
    │   │   ├── potential.py
    │   │   └── zeroth.py
    │   ├── manager.py
    │   └── updater.py
    └── utils.py
├── readthedocs.yml
├── requirements.txt
├── scripts
    ├── merge_xyz
    ├── outcar2xyz
    └── poscars2xyz
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | 
 3 | # created by `pip install --editable .`
 4 | /hdnnpy.egg-info/
 5 | /build/
 6 | /dist/
 7 | 
 8 | # html built by sphinx
 9 | /docs/build/
10 | # autosummary generated rst files
11 | /docs/source/modules/**/generated
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 masayoshi.ogura
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | ase = "*"
 8 | chainer = "*"
 9 | matplotlib = "*"
10 | "mpi4py" = "*"
11 | numpy = "*"
12 | pyyaml = "*"
13 | scikit-learn = "*"
14 | scipy = "*"
15 | tqdm = "*"
16 | traitlets = "*"
17 | 
18 | [dev-packages]
19 | sphinx = "*"
20 | sphinx-rtd-theme = "*"
21 | sphinx-autobuild = "*"
22 | hdnnpy = {editable = true, path = "."}
23 | 
24 | [requires]
25 | python_version = "3.6.7"
26 | 


--------------------------------------------------------------------------------
/README.ja.md:
--------------------------------------------------------------------------------
 1 | # environment construction
 2 | 
 3 | 基本的にpythonのバージョン管理はpyenvを使うこと。  
 4 | 必要なコマンド(pyenv,pipenv,conda)のインストールについては省略する。
 5 | 
 6 | 注意1：  
 7 | 2018/11/16時点で、Anaconda Cloud上に
 8 | 
 9 | - ChainerMN
10 | - Chainer v5 (v5からChainerMNがマージされた)
11 | 
12 | が存在しないため、anacondaを使う場合でもpipを使用してChainerMNをインストールする必要がある。  
13 | 環境が壊れる可能性を承知した上で使用すること。  
14 | 参考：http://onoz000.hatenablog.com/entry/2018/02/11/142347
15 | 
16 | 注意2：  
17 | 2018/11/16時点で、pyenvを使ってanacondaをインストールしてある場合、pipenvによるインストールは失敗するという情報がある。  
18 | このバグの修正版は将来的にリリースされるらしい。  
19 | 参考：https://github.com/pypa/pipenv/issues/3044
20 | 
21 | ## pipenv (recommended)
22 | 
23 | 簡単かつ確実なインストール方法。
24 | 
25 | 環境変数`PIPENV_VENV_IN_PROJECT`を1に設定すると、  
26 | pipenvで作成されるpythonの仮想環境がこのプロジェクトの直下に作成される。(`/path/to/HDNNP/.venv/`)  
27 | 以下のコマンドを実行するか、`~/.bashrc`に追記してプロセスを再起動することで変更が適用される。
28 | ```
29 | export PIPENV_VENV_IN_PROJECT=1
30 | ```
31 | 
32 | ```
33 | $ git clone https://github.com/ogura-edu/HDNNP.git
34 | $ cd HDNNP/
35 | $ pyenv install 3.6.7
36 | $ pyenv local 3.6.7
37 | $ pipenv install
38 | 
39 | # activate
40 | $ pipenv shell
41 | 
42 | (HDNNP) $ hdnnpy training
43 | 
44 | # deactivate
45 | (HDNNP) $ exit
46 | ```
47 | 
48 | ## anaconda
49 | 
50 | 最適化されたバイナリを取得できるので、実行速度が速い。  
51 | しかし、上記の理由からpipと混在した形になることや、  
52 | マシンによってはインストールがうまくいかないことがあるため注意すること。
53 | 
54 | `conda env create --file condaenv.yaml`の実行が終了すると、  
55 | 各々の環境に合わせてactivationの仕方がいくつか提示されるので好きなものを選ぶ。  
56 | 以下の例では`~/.bashrc`に1文追記する方法を選択している。
57 | 
58 | ```
59 | $ git clone https://github.com/ogura-edu/HDNNP.git
60 | $ cd HDNNP/
61 | $ pyenv install anaconda-x.x.x
62 | $ pyenv local anaconda-x.x.x
63 | $ conda env create -n HDNNP --file condaenv.yaml
64 | $ echo ". ${HOME}/.pyenv/versions/anaconda-x.x.x/etc/profile.d/conda.sh" > ~/.bashrc
65 | 
66 | # activate
67 | $ conda activate HDNNP
68 | 
69 | # install this program using pip
70 | (HDNNP) $ pip install --editable .
71 | 
72 | (HDNNP) $ hdnnpy training
73 | 
74 | # deactivate
75 | (HDNNP) $ conda deactivate
76 | ```
77 | 
78 | ## pip install only
79 | 
80 | `Pipfile`または`condaenv.yaml`に記述されている依存関係を元に、  
81 | パッケージを個別に`pip install`することももちろん可能。  
82 | この場合は`virtualenv`を使って自分で仮想環境を管理することを推奨する。
83 | ```
84 | $ git clone https://github.com/ogura-edu/HDNNP.git
85 | $ cd HDNNP/
86 | $ pip install PKG1 PKG2 ...
87 | $ pip install -e .
88 | ```
89 | 
90 | または、慣れた人であれば依存関係を`setup.py`に書き加えるだけで済む。
91 | ```
92 | $ git clone https://github.com/ogura-edu/HDNNP.git
93 | $ cd HDNNP/
94 | $ vim setup.py  #=> setup()の引数にinstall_requiresを追加
95 | $ pip install -e .
96 | ```
97 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Documentation Status](https://readthedocs.org/projects/hdnnp/badge/?version=latest)](https://hdnnp.readthedocs.io/en/latest/?badge=latest)
  2 | 
  3 | # High Dimensional Neural Network Potential(HDNNP)
  4 | 
  5 | > This is a implementation of High Dimensional Neural Network Potential(HDNNP) designed to reproduce Density Function Theory(DFT) calculation *effectively* with high *flexibility*, *reactivity*.
  6 | 
  7 | There is equivalent doc in Japanese [README.ja.md](https://github.com/ogura-edu/HDNNP/blob/master/README.ja.md).
  8 | 
  9 | ## Install
 10 | 
 11 | Install this project by `git`.
 12 | 
 13 | ```shell
 14 | $ git clone https://github.com/ogura-edu/HDNNP.git
 15 | 
 16 | # or if using ssh
 17 | 
 18 | $ git clone git@github.com:ogura-edu/HDNNP.git
 19 | ```
 20 | 
 21 | This project uses [Pipenv](https://github.com/pypa/pipenv) for development workflow. If you don't have it, run this command to install.
 22 | 
 23 | 
 24 | **macOS**
 25 | 
 26 | ```shell
 27 | $ brew install pipenv
 28 | ```
 29 | 
 30 | **other**
 31 | 
 32 | ```shell
 33 | # please run after installing python 
 34 | $ pip install pipenv
 35 | ```
 36 | 
 37 | ## Setup
 38 | ### By Pipenv(Prefered)
 39 | 
 40 | Same as by anaconda, but you need to install python rather than installing anaconda. 
 41 | 
 42 | This bug will be fixed in near future release(ref: [pythonfinder + pyenv + anaconda issue](https://github.com/pypa/pipenv/issues/3044)).
 43 | 
 44 | Set environmental variable `PIPENV_VENV_IN_PROJECT` to `1` to create your VM into this project dir(`/path/to/HDNNP/.venv`).
 45 | 
 46 | ```shell
 47 | export PIPENV_VENV_IN_PROJECT = 1
 48 | ```
 49 | 
 50 | For macOS users, you need to install `mpich` before installing dependencies.
 51 | 
 52 | ```shell
 53 | # Only for macOS users. 
 54 | #
 55 | # NOTE: Installing both mpich and openmpi will conflict
 56 | #
 57 | $ brew install mpich
 58 | 
 59 | # or
 60 | 
 61 | $ brew install openmpi
 62 | ```
 63 | 
 64 | Setup your enviroments.
 65 | 
 66 | ```shell
 67 | # Install dependencies
 68 | $ pipenv install
 69 | 
 70 | # activate your VM
 71 | $ pipenv shell
 72 | 
 73 | # For example...
 74 | (HDNNP) $ hdnnpy training
 75 | 
 76 | # deactivate
 77 | (HDNNP) $ exit
 78 | ```
 79 | 
 80 | ### By Anaconda
 81 | 
 82 | Using anaconda is prefered because it is basically faster than Pipenv.
 83 | 
 84 | Install anaconda and activate your VM.
 85 | 
 86 | ```shell
 87 | $ ANACONDA_VERSION = [YOUR_ANACODA_VERSION]
 88 | $ pyenv install $ANACONDA_VERSION
 89 | $ pyenv local $ANACONDA_VERSION
 90 | $ conda env create -n HDNNP --file condaenv.yaml
 91 | $ echo ". ${HOME}/.pyenv/versions/<anacondaVERSION>/etc/profile.d/conda.sh" > ~/.bashrc
 92 | 
 93 | # activate
 94 | $ conda activate HDNNP
 95 | 
 96 | # install this program using pip
 97 | (HDNNP) $ pip install --editable .
 98 | 
 99 | # For example...
100 | (HDNNP) $ hdnnpy training
101 | 
102 | # deactivate
103 | (HDNNP) $ conda deactivate
104 | ```
105 | 
106 | **NOTE** 
107 | 
108 | There is no
109 | 
110 | - ChainerMN
111 | - Chainer v5
112 | 
113 | on the Anaconda Cloud, so you still have to install these packages by `pip`.
114 | 
115 | And these is a bug that if you install anaconda by `pyenv`, `pipenv` will fail to start(ref: [pythonfinder + pyenv + anaconda issue](https://github.com/pypa/pipenv/issues/3044)).
116 | 
117 | ## Reference
118 | 
119 | - Jörg Behler. First Principle Neural Network Potentials for Reactive Simulations of Large Molecular and Condensed System, 2007
120 | 


--------------------------------------------------------------------------------
/condaenv.yaml:
--------------------------------------------------------------------------------
 1 | name: HDNNP
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python==3.6.7
 7 |   - ase
 8 |   - chainer
 9 |   - matplotlib
10 |   - mpi4py
11 |   - numpy
12 |   - pyyaml
13 |   - scikit-learn
14 |   - scipy
15 |   - tqdm
16 |   - traitlets
17 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
 1 | {{ objname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | {% if module.startswith(('hdnnpy.model', 'hdnnpy.training')) %}
 7 | .. autoclass:: {{ objname }}
 8 |     :no-inherited-members:
 9 | {% else %}
10 | .. autoclass:: {{ objname }}
11 |     :inherited-members:
12 | {% endif %}
13 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | from hdnnpy import __version__
 18 | sys.path.insert(0, os.path.abspath('../..'))
 19 | 
 20 | 
 21 | # -- Project information -----------------------------------------------------
 22 | 
 23 | project = 'HDNNP'
 24 | copyright = '2018, masayoshi.ogura'
 25 | author = 'masayoshi.ogura'
 26 | 
 27 | # The short X.Y version
 28 | version = __version__
 29 | # The full version, including alpha/beta/rc tags
 30 | release = __version__
 31 | 
 32 | 
 33 | # -- General configuration ---------------------------------------------------
 34 | 
 35 | # If your documentation needs a minimal Sphinx version, state it here.
 36 | #
 37 | # needs_sphinx = '1.0'
 38 | 
 39 | # Add any Sphinx extension module names here, as strings. They can be
 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 41 | # ones.
 42 | extensions = [
 43 |     'sphinx.ext.autosummary',
 44 |     'sphinx.ext.autodoc',
 45 |     'sphinx.ext.doctest',
 46 |     'sphinx.ext.todo',
 47 |     'sphinx.ext.coverage',
 48 |     'sphinx.ext.mathjax',
 49 |     'sphinx.ext.ifconfig',
 50 |     'sphinx.ext.viewcode',
 51 |     'sphinx.ext.napoleon',
 52 |     'sphinx.ext.intersphinx',
 53 | ]
 54 | 
 55 | # Add any paths that contain templates here, relative to this directory.
 56 | templates_path = ['_templates']
 57 | 
 58 | # The suffix(es) of source filenames.
 59 | # You can specify multiple suffix as a list of string:
 60 | #
 61 | # source_suffix = ['.rst', '.md']
 62 | source_suffix = '.rst'
 63 | 
 64 | # The master toctree document.
 65 | master_doc = 'index'
 66 | 
 67 | # The language for content autogenerated by Sphinx. Refer to documentation
 68 | # for a list of supported languages.
 69 | #
 70 | # This is also used if you do content translation via gettext catalogs.
 71 | # Usually you set "language" from the command line for these cases.
 72 | language = None
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | # This pattern also affects html_static_path and html_extra_path.
 77 | exclude_patterns = []
 78 | 
 79 | # The name of the Pygments (syntax highlighting) style to use.
 80 | pygments_style = 'sphinx'
 81 | 
 82 | # The reST default role (used for this markup: `text`) to use for all
 83 | # documents.
 84 | default_role = 'any'
 85 | 
 86 | 
 87 | # -- Options for HTML output -------------------------------------------------
 88 | 
 89 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 90 | # a list of builtin themes.
 91 | #
 92 | html_theme = 'sphinx_rtd_theme'
 93 | 
 94 | # Theme options are theme-specific and customize the look and feel of a theme
 95 | # further.  For a list of options available for each theme, see the
 96 | # documentation.
 97 | #
 98 | # html_theme_options = {}
 99 | 
100 | # Add any paths that contain custom static files (such as style sheets) here,
101 | # relative to this directory. They are copied after the builtin static files,
102 | # so a file named "default.css" will overwrite the builtin "default.css".
103 | html_static_path = ['_static']
104 | 
105 | # Custom sidebar templates, must be a dictionary that maps document names
106 | # to template names.
107 | #
108 | # The default sidebars (for documents that don't match any pattern) are
109 | # defined by theme itself.  Builtin themes are using these templates by
110 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
111 | # 'searchbox.html']``.
112 | #
113 | # html_sidebars = {}
114 | 
115 | 
116 | # -- Options for HTMLHelp output ---------------------------------------------
117 | 
118 | # Output file base name for HTML help builder.
119 | htmlhelp_basename = 'HDNNPdoc'
120 | 
121 | 
122 | # -- Options for LaTeX output ------------------------------------------------
123 | 
124 | latex_elements = {
125 |     # The paper size ('letterpaper' or 'a4paper').
126 |     #
127 |     # 'papersize': 'letterpaper',
128 | 
129 |     # The font size ('10pt', '11pt' or '12pt').
130 |     #
131 |     # 'pointsize': '10pt',
132 | 
133 |     # Additional stuff for the LaTeX preamble.
134 |     #
135 |     # 'preamble': '',
136 | 
137 |     # Latex figure (float) alignment
138 |     #
139 |     # 'figure_align': 'htbp',
140 | }
141 | 
142 | # Grouping the document tree into LaTeX files. List of tuples
143 | # (source start file, target name, title,
144 | #  author, documentclass [howto, manual, or own class]).
145 | latex_documents = [
146 |     (master_doc, 'HDNNP.tex', 'HDNNP Documentation',
147 |      'masayoshi.ogura', 'manual'),
148 | ]
149 | 
150 | 
151 | # -- Options for manual page output ------------------------------------------
152 | 
153 | # One entry per manual page. List of tuples
154 | # (source start file, name, description, authors, manual section).
155 | man_pages = [
156 |     (master_doc, 'hdnnp', 'HDNNP Documentation',
157 |      [author], 1)
158 | ]
159 | 
160 | 
161 | # -- Options for Texinfo output ----------------------------------------------
162 | 
163 | # Grouping the document tree into Texinfo files. List of tuples
164 | # (source start file, target name, title, author,
165 | #  dir menu entry, description, category)
166 | texinfo_documents = [
167 |     (master_doc, 'HDNNP', 'HDNNP Documentation',
168 |      author, 'HDNNP', 'One line description of project.',
169 |      'Miscellaneous'),
170 | ]
171 | 
172 | 
173 | # -- Options for Epub output -------------------------------------------------
174 | 
175 | # Bibliographic Dublin Core info.
176 | epub_title = project
177 | 
178 | # The unique identifier of the text. This can be a ISBN number
179 | # or the project homepage.
180 | #
181 | # epub_identifier = ''
182 | 
183 | # A unique identification for the text.
184 | #
185 | # epub_uid = ''
186 | 
187 | # A list of files that should not be packed into the epub file.
188 | epub_exclude_files = ['search.html']
189 | 
190 | 
191 | # -- Extension configuration -------------------------------------------------
192 | 
193 | # -- Options for todo extension ----------------------------------------------
194 | 
195 | # If true, `todo` and `todoList` produce output, else they produce nothing.
196 | todo_include_todos = True
197 | 
198 | # -- Options for napoleon extension ------------------------------------------
199 | napoleon_include_special_with_doc = True
200 | napoleon_use_admonition_for_examples = True
201 | napoleon_use_admonition_for_notes = True
202 | napoleon_use_admonition_for_references = True
203 | 
204 | # -- Options for autodoc extension -------------------------------------------
205 | autoclass_content = 'both'
206 | autodoc_member_order = 'groupwise'
207 | autodoc_default_options = {
208 |     'show-inheritance': None,
209 |     'members': None,
210 | }
211 | 
212 | # -- Options for intersphinx extension ---------------------------------------
213 | intersphinx_mapping = {
214 |     'python': ('https://docs.python.org/3.6/', None),
215 |     'numpy': ('https://docs.scipy.org/doc/numpy/', None),
216 |     'chainer': ('https://docs.chainer.org/en/stable/', None),
217 |     'ase': ('https://wiki.fysik.dtu.dk/ase/', None),
218 |     'sklearn': ('https://scikit-learn.org/stable/', None),
219 | }
220 | 
221 | # -- Options for autosummary extension ---------------------------------------
222 | autosummary_generate = True
223 | 


--------------------------------------------------------------------------------
/docs/source/description.rst:
--------------------------------------------------------------------------------
 1 | What is HDNNP?
 2 | ==============
 3 | 
 4 | .. contents::
 5 |    :local:
 6 |    :depth: 2
 7 | 
 8 | 
 9 | 
10 | | This program is an implementation of HDNNP that is
11 |   suggested by Behler *et al* [Ref]_.
12 | | HDNNP stands for **High Dimensional Neural Network Potential**.
13 | | HDNNP is one of machine learning potentials that is used to
14 |   reduce calculation cost of DFT(Density Functional Theory) calculation.
15 | 
16 | | Currently, energy and force prediction using symmetry function have been implemented.
17 | 
18 | 
19 | 
20 | .. [Ref] https://onlinelibrary.wiley.com/doi/full/10.1002/qua.24890


--------------------------------------------------------------------------------
/docs/source/example.rst:
--------------------------------------------------------------------------------
  1 | Execution example
  2 | =================
  3 | 
  4 | .. contents::
  5 |    :local:
  6 |    :depth: 2
  7 | 
  8 | 
  9 | 
 10 | GaN interatomic potential
 11 | -------------------------
 12 | 
 13 | | In this section, show you an execution example of HDNNP training
 14 |   using 1st order differentiation of interatomic potential
 15 |   (e.g. interatomic forces) of GaN
 16 | 
 17 | Data file
 18 | ^^^^^^^^^^^^^^^^^^^^^^^^^
 19 | 
 20 | Prepare a .xyz format file which have some structures with energy and force data.
 21 | 
 22 | ``GaN.xyz``
 23 | 
 24 | ..  code-block:: text
 25 | 
 26 |     32
 27 |     Lattice="6.46474316 0.0 0.0 -3.23237159 5.5986318 0.0 0.0 0.0 10.53232454" Properties=species:S:1:pos:R:3:forces:R:3 energy=-194.5164333 tag=CrystalGa16N16 pbc="T T T"
 28 |     Ga       1.61619000       0.93311000       2.62845000       0.00000300       0.00001200      -0.00570900
 29 |     Ga       3.23237000       3.73242000       2.62845000       0.00003900      -0.00004700      -0.00571500
 30 |     Ga       4.84856000       0.93311000       2.62845000       0.00000400      -0.00001100      -0.00563600
 31 |     Ga      -0.00000000       3.73242000       7.89461000      -0.00003800       0.00003200      -0.00564200
 32 |     Ga       1.61619000       0.93311000       7.89461000       0.00006100      -0.00001800      -0.00571100
 33 |     Ga       3.23237000       3.73242000       7.89461000       0.00002100      -0.00006400      -0.00572000
 34 |     Ga       4.84856000       0.93311000       7.89461000      -0.00003200      -0.00002300      -0.00565600
 35 |     Ga      -0.00000000       3.73242000       2.62845000       0.00002100      -0.00002000      -0.00565100
 36 |     Ga      -0.00000000       1.86621000       5.26153000      -0.00006900       0.00005900      -0.00572300
 37 |     Ga       1.61619000       4.66553000       5.26153000      -0.00002700       0.00008200      -0.00571900
 38 |     Ga       3.23237000       1.86621000       5.26153000       0.00001800      -0.00001400      -0.00566500
 39 |     Ga      -1.61619000       4.66553000      10.52769000      -0.00002700      -0.00002600      -0.00566900
 40 |     Ga      -0.00000000       1.86621000      10.52769000      -0.00002200       0.00008500      -0.00568700
 41 |     Ga       1.61619000       4.66553000      10.52769000       0.00000600      -0.00002400      -0.00574300
 42 |     Ga       3.23237000       1.86621000      10.52769000       0.00000100       0.00007600      -0.00564000
 43 |     Ga      -1.61619000       4.66553000       5.26153000       0.00002200      -0.00000200      -0.00568800
 44 |     N       1.61619000       0.93311000       4.61253000       0.00005500      -0.00002000      -0.00041000
 45 |     N       3.23237000       3.73242000       4.61253000       0.00003600      -0.00000900      -0.00037900
 46 |     N       4.84856000       0.93311000       4.61253000      -0.00004100       0.00000700      -0.00041100
 47 |     N      -0.00000000       3.73242000       9.87869000      -0.00001300      -0.00003500      -0.00042500
 48 |     N       1.61619000       0.93311000       9.87869000       0.00001200       0.00002900      -0.00040900
 49 |     N       3.23237000       3.73242000       9.87869000       0.00002700      -0.00006200      -0.00041700
 50 |     N       4.84856000       0.93311000       9.87869000      -0.00000400       0.00002500      -0.00041500
 51 |     N      -0.00000000       3.73242000       4.61253000      -0.00004500      -0.00000400      -0.00041800
 52 |     N      -0.00000000       1.86621000       1.97945000       0.00000000      -0.00000800      -0.00034400
 53 |     N       1.61619000       4.66553000       1.97945000      -0.00000200       0.00000500      -0.00033700
 54 |     N       3.23237000       1.86621000       1.97945000       0.00001700       0.00001600      -0.00036100
 55 |     N      -1.61619000       4.66553000       7.24561000       0.00002800      -0.00002300      -0.00036000
 56 |     N      -0.00000000       1.86621000       7.24561000      -0.00008200       0.00001500      -0.00043200
 57 |     N       1.61619000       4.66553000       7.24561000      -0.00002200       0.00004200      -0.00040100
 58 |     N       3.23237000       1.86621000       7.24561000       0.00001900      -0.00001200      -0.00039500
 59 |     N      -1.61619000       4.66553000       1.97945000       0.00000400      -0.00001800      -0.00046000
 60 |     32
 61 |     Lattice="6.46474316 0.0 0.0 -3.23237159 5.5986318 0.0 0.0 0.0 10.53232454" Properties=species:S:1:pos:R:3:forces:R:3 energy=-169.96635976 tag=CrystalGa16N16 pbc="T T T"
 62 |     Ga       1.44265000       1.46790000       2.04947000      -0.95595000      -3.56110800       2.54045000
 63 |     Ga       2.88538000       4.34404000       2.89380000       4.75932000      -2.04809500      -1.43108200
 64 |     Ga       4.38372000       0.68215000       2.61606000       0.15090500       6.97113700       2.40537400
 65 |     Ga       0.47836000       3.95213000       7.90284000      -3.31821700      -0.13409600      -0.21437100
 66 |     Ga       1.82415000       1.43420000       8.18380000      -0.78327100      -2.70531000      -3.50469000
 67 |     Ga       3.49351000       3.96284000       7.92622000       1.84595600      -0.42627100      -0.16593100
 68 |     Ga       5.17229000       0.83662000       7.71745000      -0.46937900       1.21688400       1.11923500
 69 |     Ga      -0.04508000       3.95689000       2.71946000      -3.88117900      -1.84159800       0.64959300
 70 |     Ga      -0.96518000       1.98086000       5.22137000       1.12890800      -1.31857500      -0.37168600
 71 |     Ga       1.18573000       3.20454000       5.22045000       1.58317800       1.58466500       0.77557000
 72 |     Ga       2.91073000       1.45415000       5.60119000      -0.29420600      -1.79185700      -2.55652100
 73 |     Ga      -0.99634000       4.45389000       0.07004000      -2.39983600       3.43545000       1.27018200
 74 |     Ga       0.17764000       1.60544000      10.36435000       6.30208700       4.30252400       2.73199900
 75 |     Ga       2.35420000       4.13573000       0.39168000      -1.28509600      -0.64262000      -3.92936300
 76 |     ...
 77 |     4
 78 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.3605335 tag=CrystalGa2N2 pbc="T T T"
 79 |     Ga       1.60815000       0.92846000       2.61537000       0.00057000      -0.00032400      -0.00131800
 80 |     Ga       0.00000000       1.85693000       5.23535000      -0.00055000       0.00030900      -0.00128000
 81 |     N       1.60815000       0.92846000       4.58958000       0.00038300      -0.00020300       0.00049500
 82 |     N       0.00000000       1.85693000       1.96960000      -0.00030900       0.00021200       0.00050600
 83 |     4
 84 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.04284841 tag=CrystalGa2N2 pbc="T T T"
 85 |     Ga       1.56998000       1.01961000       2.64712000       0.37879200      -0.65345000      -0.84588100
 86 |     Ga       0.00233000       1.78610000       5.21359000       1.53422400       0.01126800       0.83092200
 87 |     N       1.80998000       0.78162000       4.55671000      -1.91098000       0.49960800      -0.07141600
 88 |     N      -0.02338000       1.90257000       1.95274000       0.00855700       0.14604000       0.09234500
 89 |     4
 90 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.07370026 tag=CrystalGa2N2 pbc="T T T"
 91 |     Ga       1.68022000       0.78468000       2.59601000      -0.77026300       1.15126700       0.71828100
 92 |     Ga      -0.04831000       1.97869000       0.01593000      -1.05203000       0.42443800      -0.31339000
 93 |     N       1.47544000       1.12447000       4.57171000       1.50854300      -1.32922700      -0.04524600
 94 |     N       0.01431000       1.77059000       1.98155000       0.31937700      -0.24596800      -0.35639000
 95 |     4
 96 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.06789171 tag=CrystalGa2N2 pbc="T T T"
 97 |     Ga       1.55216000       1.03346000       2.59780000       1.76477100      -1.33788800       0.62275500
 98 |     Ga       0.04645000       1.78043000       0.02483000      -0.39888700      -0.84820500      -0.84426800
 99 |     N       1.59299000       0.75442000       4.54056000       0.36047300       1.45854900       0.51138400
100 |     N       0.06265000       1.88907000       1.95951000      -1.73396900       0.72932900      -0.27762300
101 |     4
102 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.10933618 tag=CrystalGa2N2 pbc="T T T"
103 |     Ga       1.62285000       0.92354000       2.56898000      -0.87387700       0.84344000       1.29437700
104 |     Ga      -0.00655000       1.82730000       0.04373000       0.63633100       1.10065300      -1.07564600
105 |     N       1.65007000       1.03662000       4.56438000      -0.83168500      -1.16592600       0.26072300
106 |     N      -0.08253000       1.92082000       1.98507000       1.07124400      -0.78418500      -0.47994500
107 |     4
108 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.15961153 tag=CrystalGa2N2 pbc="T T T"
109 |     Ga       1.61929000       0.86275000       2.60668000       0.91655600       0.12884500       0.02524600
110 |     Ga      -0.02746000       1.90759000       0.02534000      -0.00425900       0.48361500      -1.32527900
111 |     N       1.57325000       1.05930000       4.54898000       0.29235100      -0.94998800       0.25695700
112 |     N       0.11613000       1.80106000       1.90435000      -1.21017800       0.33509300       1.05032200
113 |     4
114 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-23.90497111 tag=CrystalGa2N2 pbc="T T T"
115 |     Ga       1.57753000       1.01962000       2.53889000      -0.58498700       0.38561600       1.95812800
116 |     Ga       0.05221000       1.77667000       0.06084000      -0.50913400      -1.39207300      -1.16507600
117 |     N       1.60109000       0.71987000       4.62834000       0.25821000       2.35785600      -0.69708500
118 |     N      -0.10050000       2.01120000       1.98576000       0.83273600      -1.35617800      -0.10520400
119 |     4
120 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-24.17936965 tag=CrystalGa2N2 pbc="T T T"
121 |     Ga       1.65588000       0.84325000       2.61391000      -0.48280700       0.58352400      -0.06140200
122 |     Ga      -0.05236000       1.91994000       0.00989000       1.13163900       0.73695700      -0.46324400
123 |     N       1.63413000       1.09260000       4.55873000      -1.08709100      -1.30806300       0.05205700
124 |     N      -0.00295000       1.80336000       1.93549000       0.44154800      -0.01662100       0.47920500
125 |     4
126 |     Lattice="3.21629013 0.0 0.0 -1.60814507 2.78538896 0.0 0.0 0.0 5.23996246" Properties=species:S:1:pos:R:3:forces:R:3 energy=-23.82707164 tag=CrystalGa2N2 pbc="T T T"
127 |     ...
128 | 
129 | 
130 | Config file
131 | ^^^^^^^^^^^^^^^^^^^^^^^^^
132 | 
133 | ``training_config.py``
134 | (necessary parts picked up)
135 | 
136 | ..  code-block:: python
137 | 
138 |     c.TrainingApplication.verbose = True
139 | 
140 |     c.DatasetConfig.parameters = {
141 |        'type1': [
142 |            (5.0,),
143 |            ],
144 |        'type2': [
145 |            (5.0, 0.01, 2.0),
146 |            (5.0, 0.01, 3.2),
147 |            (5.0, 0.01, 3.8),
148 |            (5.0, 0.1, 2.0),
149 |            (5.0, 0.1, 3.2),
150 |            (5.0, 0.1, 3.8),
151 |            (5.0, 1.0, 2.0),
152 |            (5.0, 1.0, 3.2),
153 |            (5.0, 1.0, 3.8),
154 |            ],
155 |        'type4': [
156 |            (5.0, 0.01, -1, 1),
157 |            (5.0, 0.01, -1, 2),
158 |            (5.0, 0.01, -1, 4),
159 |            (5.0, 0.01, 1, 1),
160 |            (5.0, 0.01, 1, 2),
161 |            (5.0, 0.01, 1, 4),
162 |            (5.0, 0.1, -1, 1),
163 |            (5.0, 0.1, -1, 2),
164 |            (5.0, 0.1, -1, 4),
165 |            (5.0, 0.1, 1, 1),
166 |            (5.0, 0.1, 1, 2),
167 |            (5.0, 0.1, 1, 4),
168 |            (5.0, 1.0, -1, 1),
169 |            (5.0, 1.0, -1, 2),
170 |            (5.0, 1.0, -1, 4),
171 |            (5.0, 1.0, 1, 1),
172 |            (5.0, 1.0, 1, 2),
173 |            (5.0, 1.0, 1, 4),
174 |            ],
175 |        }
176 | 
177 |     c.DatasetConfig.preprocesses = [
178 |        ('pca', (), {}),
179 |        ]
180 | 
181 |     c.ModelConfig.layers = [
182 |        (90, 'tanh'),
183 |        (90, 'tanh'),
184 |        (1, 'identity'),
185 |        ]
186 | 
187 |     c.TrainingConfig.batch_size = 100
188 | 
189 |     c.TrainingConfig.data_file = 'data/GaN.xyz'
190 | 
191 |     c.TrainingConfig.epoch = 1000
192 | 
193 |     c.TrainingConfig.interval = 10
194 | 
195 |     c.TrainingConfig.loss_function = (
196 |        'first_only',
197 |        {}
198 |        )
199 | 
200 |     c.TrainingConfig.lr_decay = 1.0e-6
201 | 
202 |     c.TrainingConfig.order = 1
203 | 
204 |     c.TrainingConfig.out_dir = 'output'
205 | 
206 |     c.TrainingConfig.patients = 5
207 | 
208 |     c.TrainingConfig.scatter_plot = True
209 | 
210 | command line log
211 | ^^^^^^^^^^^^^^^^^^^^^^^^^
212 | 
213 | Once edited configuration file ``training_config.py``, you just do one command ``hdnnpy trian``.
214 | 
215 | .. code-block:: none
216 | 
217 |     $ hdnnpy train
218 | 
219 |     Construct sub dataset tagged as "CrystalGa16N16"
220 |     Successfully loaded & made needed symmetry_function dataset from <workdir>/data/CrystalGa16N16/symmetry_function.npz
221 |     Successfully loaded & made needed interatomic_potential dataset from <workdir>/data/CrystalGa16N16/interatomic_potential.npz
222 | 
223 |     Initialized PCA parameters for Ga
224 |         Feature dimension: 74 => 74
225 |         Cumulative contribution rate = 0.9999999403953552
226 | 
227 | 
228 |     Initialized PCA parameters for N
229 |         Feature dimension: 74 => 74
230 |         Cumulative contribution rate = 1.0000001192092896
231 | 
232 |     Construct sub dataset tagged as "CrystalGa2N2"
233 |     Successfully loaded & made needed symmetry_function dataset from <workdir>/data/CrystalGa2N2/symmetry_function.npz
234 |     Successfully loaded & made needed interatomic_potential dataset from <workdir>/data/CrystalGa2N2/interatomic_potential.npz
235 |     Saved PCA parameters to <workdir>/output/preprocess/pca.npz.
236 |     early stopping: operator is less
237 |     epoch       iteration   main/RMSE/force  main/RMSE/total  val/main/RMSE/force  val/main/RMSE/total
238 |     1           14          1.20575          1.20575          1.21576              1.21576
239 |     2           28          1.08758          1.08758          1.06121              1.06121
240 |     3           42          0.895798         0.895798         0.865482             0.865482
241 |     4           55          0.685623         0.685623         0.694789             0.694789
242 |     5           69          0.560702         0.560702         0.603832             0.603832
243 |     6           83          0.509542         0.509542         0.570984             0.570984
244 |     7           97          0.486743         0.486743         0.552533             0.552533
245 |     8           110         0.468966         0.468966         0.540375             0.540375
246 |     9           124         0.458917         0.458917         0.531327             0.531327
247 |     10          138         0.448132         0.448132         0.524466             0.524466
248 |     ...
249 | 
250 | Directory tree
251 | ^^^^^^^^^^^^^^^^^^^^^^^^
252 | 
253 | After training, directory tree becomes as follows:
254 | 
255 | .. code-block:: text
256 | 
257 |     workdir
258 |     ├── data/
259 |     │   ├── GaN.xyz
260 |     │   ...
261 |     ├── output/
262 |     │   ├── CrystalGa16N16/
263 |     │   │   ├── energy.png
264 |     │   │   ├── force.png
265 |     │   │   └── training.log
266 |     │   ├── CrystalGa2N2/
267 |     │   │   ├── energy.png
268 |     │   │   ├── force.png
269 |     │   │   └── training.log
270 |     │   ├── master_nnp.npz
271 |     │   ├── preprocess/
272 |     │   │   └── pca.npz
273 |     │   ├── training_config.py
274 |     │   └── training_result.yaml
275 |     └── training_config.py
276 | 


--------------------------------------------------------------------------------
/docs/source/extend.rst:
--------------------------------------------------------------------------------
 1 | How to extend HDNNP
 2 | ===================
 3 | 
 4 | .. contents::
 5 |    :local:
 6 |    :depth: 2
 7 | 
 8 | 
 9 | Dataset
10 | -------------------
11 | 
12 | HDNNP dataset consists of **Descriptor dataset** and **Property dataset**.
13 | 
14 | 
15 | 
16 | 
17 | Descriptor dataset
18 | ^^^^^^^^^^^^^^^^^^^
19 | 
20 | | Currently, we have implemented only **symmetry function** dataset.
21 | | If you want to use other descriptor dataset, define a class that inherits
22 | | ``hdnnpy.dataset.descriptor.descriptor_dataset_base.DescriptorDatasetBase``
23 | | It defines several instance variables, properties and instance methods for creating a HDNNP dataset.
24 | 
25 | In addition, override the following abstract method.
26 | 
27 | * generate_feature_keys
28 | 
29 | | It returns a list of unique keys in feature dimension.
30 | | In addition to being able to use it internally,
31 |   it is also used to expand feature dimension and zero-fill in ``hdnnpy.dataset.HDNNPDataset``
32 | 
33 | * calculate_descriptors
34 | 
35 | | It is main function for calculating descriptors from a atomic structure, which is a wrapper of ase.Atoms object.
36 | 
37 | 
38 | 
39 | 
40 | 
41 | Property dataset
42 | ^^^^^^^^^^^^^^^^^^^
43 | 
44 | | Currently, we have implemented only **interatomic potential** dataset.
45 | | If you want to use other property dataset, define a class that inherits
46 | | ``hdnnpy.dataset.property.property_dataset_base.PropertyDatasetBase``
47 | | It defines several instance variables, properties and instance methods for creating a HDNNP dataset.
48 | 
49 | In addition, override the following abstract method.
50 | 
51 | * calculate_properties
52 | 
53 | | It is main function for getting properties from a atomic structure, which is a wrapper of ase.Atoms object.
54 | 
55 | 
56 | Preprocess
57 | -------------------
58 | 
59 | * PCA
60 | * Scaling
61 | * Standardization
62 | 
63 | 
64 | Loss function
65 | -------------------
66 | 
67 | Currently, we have implemented following loss function for HDNNP training.
68 | 
69 | * Zeroth
70 | * First
71 | 
72 | Each loss function uses a 0th/1st order error of property to optimize HDNNP.
73 | ``First`` uses both 0th/1st order errors of property weighted by parameter ``mixing_beta`` to optimize HDNNP.
74 | 
75 | * Potential
76 | 
77 | It uses 2nd order derivative of descriptor dataset to optimize HDNNP to satisfy following condition:
78 | 
79 | .. math::
80 | 
81 |     \rot \bm{F} = 0
82 | 
83 | Then, there is a scalar potential :math:`\varphi`:
84 | 
85 | .. math::
86 | 
87 |     \bm{F} = \mathrm{grad} \varphi
88 | 
89 | | If you want to use other loss function, define a class that inherits
90 | | ``hdnnpy.training.loss_function.loss_function_base.LossFunctionBase``.
91 | | It defines several instance variables, properties and instance methods.
92 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. HDNNP documentation master file, created by
 2 |    sphinx-quickstart on Thu Dec 20 09:39:20 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to HDNNP's documentation!
 7 | =================================
 8 | 
 9 | .. toctree::
10 |    :glob:
11 |    :maxdepth: 2
12 |    :caption: Contents:
13 | 
14 |    description
15 |    install
16 |    usage
17 |    example
18 |    modules/index
19 |    extend
20 | 
21 | 
22 | 
23 | Indices and tables
24 | ==================
25 | 
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
  1 | How to install HDNNP
  2 | ====================
  3 | 
  4 | ..  contents::
  5 |     :local:
  6 |     :depth: 2
  7 | 
  8 | 
  9 | 
 10 | 
 11 | Python installation
 12 | ---------------------
 13 | 
 14 | | We recommend that you install python using pyenv,
 15 |   because non-sudo user can install any python version on any computer.
 16 | | We confirmed that this program works only with python 3.6.7.
 17 | 
 18 | ..  code-block:: shell
 19 | 
 20 |     (on Linux)
 21 |     $ git clone https://github.com/yyuu/pyenv.git ~/.pyenv
 22 |     (on MacOS)
 23 |     $ brew install pyenv
 24 | 
 25 |     $ echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bash_profile
 26 |     $ echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile
 27 |     $ echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
 28 |     $ source ~/.bash_profile
 29 | 
 30 |     $ pyenv install 3.6.7
 31 | 
 32 | 
 33 | 
 34 | Get source code
 35 | ---------------------
 36 | 
 37 | ..  note::
 38 | 
 39 |     | This program is now under development, not uploaded to PyPI.
 40 |     | You have to get source code and install it manually.
 41 | 
 42 | ..  code-block:: shell
 43 | 
 44 |     $ git clone https://github.com/ogura-edu/HDNNP.git
 45 | 
 46 | Install dependencies and this program
 47 | -------------------------------------
 48 | 
 49 | Via pipenv
 50 | ^^^^^^^^^^^^^^^^^^^^^
 51 | 
 52 | ..  code-block:: shell
 53 | 
 54 |     $ cd HDNNP/
 55 |     $ pyenv local 3.6.7
 56 |     $ pip install pipenv
 57 |     $ pipenv install --dev
 58 | 
 59 |     (activate)
 60 |     $ pipenv shell
 61 | 
 62 |     (for example:)
 63 |     (HDNNP) $ hdnnpy train
 64 | 
 65 |     (deactivate)
 66 |     (HDNNP) $ exit
 67 | 
 68 | 
 69 | Via anaconda
 70 | ^^^^^^^^^^^^^^^^^^^^^
 71 | 
 72 | Anaconda also can be installed by pyenv.
 73 | 
 74 | ..  code-block:: shell
 75 | 
 76 |     $ cd HDNNP/
 77 |     $ pyenv install anaconda3-xxx
 78 |     $ pyenv local anaconda3-xxx
 79 |     $ conda env create -n HDNNP --file condaenv.yaml
 80 | 
 81 |     (activate)
 82 |     $ conda activate HDNNP
 83 | 
 84 |     (for example:)
 85 |     (HDNNP) $ hdnnpy train
 86 | 
 87 |     (deactivate)
 88 |     (HDNNP) $ conda deactivate
 89 | 
 90 | 
 91 | 
 92 | Via raw pip
 93 | ^^^^^^^^^^^^^^^^^^^^^
 94 | 
 95 | You can install all dependent packages manually.
 96 | The dependent packages are written in ``Pipfile``, ``condaenv.yaml`` or ``requirements.txt``.
 97 | 
 98 | ..  code-block:: shell
 99 | 
100 |     $ cd HDNNP/
101 |     $ pip install PKG1 PKG2 ...
102 |     $ pip install --editable .
103 | 


--------------------------------------------------------------------------------
/docs/source/modules/dataset.rst:
--------------------------------------------------------------------------------
 1 | .. module:: hdnnpy.dataset
 2 | 
 3 | Dataset tools
 4 | =============
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated/
 8 |     :nosignatures:
 9 | 
10 |     ~dataset_generator.DatasetGenerator
11 |     ~hdnnp_dataset.HDNNPDataset
12 | 
13 | 
14 | Descriptor datasets
15 | -------------------
16 | 
17 | .. autosummary::
18 |     :toctree: generated/
19 |     :nosignatures:
20 | 
21 |     ~descriptor.symmetry_function_dataset.SymmetryFunctionDataset
22 | 
23 | 
24 | Property datasets
25 | -----------------
26 | 
27 | .. autosummary::
28 |     :toctree: generated/
29 |     :nosignatures:
30 | 
31 |     ~property.interatomic_potential_dataset.InteratomicPotentialDataset
32 | 
33 | Dataset base classes
34 | --------------------
35 | 
36 | .. autosummary::
37 |     :toctree: generated/
38 |     :nosignatures:
39 | 
40 |     ~descriptor.descriptor_dataset_base.DescriptorDatasetBase
41 |     ~property.property_dataset_base.PropertyDatasetBase
42 | 
43 | Atomic structure
44 | ----------------
45 | 
46 | .. autosummary::
47 |     :toctree: generated/
48 |     :nosignatures:
49 | 
50 |     ~atomic_structure.AtomicStructure
51 | 


--------------------------------------------------------------------------------
/docs/source/modules/format.rst:
--------------------------------------------------------------------------------
 1 | .. module:: hdnnpy.format
 2 | 
 3 | File parsing tools
 4 | ==================
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated/
 8 |     :nosignatures:
 9 | 
10 |     ~xyz.parse_xyz
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/source/modules/index.rst:
--------------------------------------------------------------------------------
 1 | *******
 2 | Modules
 3 | *******
 4 | 
 5 | .. module:: hdnnpy
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 | 
10 |    dataset
11 |    format
12 |    model
13 |    preprocess
14 |    training
15 |    utils
16 | 


--------------------------------------------------------------------------------
/docs/source/modules/model.rst:
--------------------------------------------------------------------------------
 1 | .. module:: hdnnpy.model
 2 | 
 3 | Neural network potential models
 4 | ===============================
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated/
 8 |     :nosignatures:
 9 | 
10 |     ~models.HighDimensionalNNP
11 |     ~models.MasterNNP
12 |     ~models.SubNNP
13 | 


--------------------------------------------------------------------------------
/docs/source/modules/preprocess.rst:
--------------------------------------------------------------------------------
 1 | .. module:: hdnnpy.preprocess
 2 | 
 3 | Pre-processing of dataset
 4 | =========================
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated/
 8 |     :nosignatures:
 9 | 
10 |     ~pca.PCA
11 |     ~scaling.Scaling
12 |     ~standardization.Standardization
13 | 
14 | Pre-processing base class
15 | -------------------------
16 | 
17 | .. autosummary::
18 |     :toctree: generated/
19 |     :nosignatures:
20 | 
21 |     ~preprocess_base.PreprocessBase
22 | 


--------------------------------------------------------------------------------
/docs/source/modules/training.rst:
--------------------------------------------------------------------------------
 1 | .. module:: hdnnpy.training
 2 | 
 3 | Chainer-based training tools
 4 | ============================
 5 | 
 6 | Custom training extensions
 7 | --------------------------
 8 | 
 9 | .. autosummary::
10 |     :toctree: generated/
11 |     :nosignatures:
12 | 
13 |     ~extensions.ScatterPlot
14 |     ~extensions.set_log_scale
15 | 
16 | 
17 | Loss functions
18 | --------------
19 | 
20 | .. autosummary::
21 |     :toctree: generated/
22 |     :nosignatures:
23 | 
24 |     ~loss_function.Zeroth
25 |     ~loss_function.First
26 |     ~loss_function.Potential
27 | 
28 | Loss function base class
29 | ------------------------
30 | 
31 | .. autosummary::
32 |     :toctree: generated/
33 |     :nosignatures:
34 | 
35 |     ~loss_function.loss_function_base.LossFunctionBase
36 | 
37 | 
38 | Training manager
39 | ----------------
40 | 
41 | .. autosummary::
42 |     :toctree: generated/
43 |     :nosignatures:
44 | 
45 |     ~manager.Manager
46 | 
47 | 
48 | Updater
49 | -------
50 | 
51 | .. autosummary::
52 |     :toctree: generated/
53 |     :nosignatures:
54 | 
55 |     ~updater.Updater
56 | 


--------------------------------------------------------------------------------
/docs/source/modules/utils.rst:
--------------------------------------------------------------------------------
 1 | .. module:: hdnnpy.utils
 2 | 
 3 | Utilities
 4 | =========
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated/
 8 |     :nosignatures:
 9 | 
10 |     MPI
11 |     pprint
12 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
  1 | How to use HDNNP
  2 | ================
  3 | 
  4 | .. contents::
  5 |    :local:
  6 |    :depth: 2
  7 | 
  8 | 
  9 | Data generation
 10 | -----------------
 11 | 
 12 | | Usually, HDNNP is used to reduce cost by learning the result of
 13 |   DFT(Density Functional Theory) calculation that is high accuracy and high cost.
 14 | | Therefore, first step is to generate training dataset using DFT calculation such as ab-initio MD calculation.
 15 | 
 16 | 
 17 | 
 18 | Pre-processing
 19 | -----------------
 20 | 
 21 | | HDNNP training application supports only .xyz file format.
 22 | | We prepare a python script to convert the output file of VASP such as ``OUTCAR`` to .xyz format file,
 23 |   but in the same way you can convert the output of other DFT calculation program to .xyz format file.
 24 | | Inside this program, file format conversion is performed using `ASE`_ package.
 25 | 
 26 | .. _ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html
 27 | 
 28 | 
 29 | 
 30 | 
 31 | Training
 32 | -----------------
 33 | 
 34 | Configuration
 35 | ^^^^^^^^^^^^^^^^^
 36 | 
 37 | A default configuration file for training is located in ``examples/training_config.py``.
 38 | 
 39 | ``training_config.py`` consists of some subclasses that inherits ``traitlets.config.Configurable``:
 40 | 
 41 | * c.Application.xxx
 42 | * c.TrainingApplication.xxx
 43 | * c.DatasetConfig.xxx
 44 | * c.ModelConfig.xxx
 45 | * c.TrainingConfig.xxx
 46 | 
 47 | 
 48 | Following configurations are required, and remaining configurations are optional.
 49 | 
 50 | * c.DatasetConfig.parameters
 51 | * c.ModelConfig.layers
 52 | * c.TrainingConfig.data_file
 53 | * c.TrainingConfig.batch_size
 54 | * c.TrainingConfig.epoch
 55 | * c.TrainingConfig.order
 56 | * c.TrainingConfig.loss_function
 57 | * c.TrainingConfig.interval
 58 | * c.TrainingConfig.patients
 59 | 
 60 | For details of each setting, see ``training_config.py``
 61 | 
 62 | 
 63 | Command line interface
 64 | ^^^^^^^^^^^^^^^^^^^^^^
 65 | 
 66 | Execute the following command in the directory where ``training_config.py`` is located.
 67 | 
 68 | ::
 69 | 
 70 |     $ hdnnpy train
 71 | 
 72 | .. note::
 73 | 
 74 |     | Currently, if output directory set by ``c.TrainingConfig.out_dir`` already exists, it overwrites the existing file in the directory.
 75 |     | If you want to avoid this, please change ``c.TrainingConfig.out_dir`` for each execution.
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | Prediction
 82 | -----------------
 83 | 
 84 | Configuration
 85 | ^^^^^^^^^^^^^^^^^
 86 | 
 87 | A default configuration file for prediction is located in ``examples/prediction_config.py``.
 88 | 
 89 | ``prediction_config.py`` consists of some subclasses that inherits ``traitlets.config.Configurable``:
 90 | 
 91 | * c.Application.xxx
 92 | * c.PredictionApplication.xxx
 93 | * c.PredictionConfig.xxx
 94 | 
 95 | 
 96 | Following configurations are required, and remaining configurations are optional.
 97 | 
 98 | * c.PredictionConfig.data_file
 99 | * c.PredictionConfig.order
100 | 
101 | For details of each setting, see ``prediction_config.py``
102 | 
103 | 
104 | Command line interface
105 | ^^^^^^^^^^^^^^^^^^^^^^
106 | 
107 | Execute the following command in the directory where ``prediction_config.py`` is located.
108 | 
109 | ::
110 | 
111 |     $ hdnnpy predict
112 | 
113 | 
114 | Post-processing
115 | -----------------
116 | 
117 | | It is possible to calculate MD simulation with LAMMPS using trained HDNNP.
118 | | However, it is also under development.
119 | | We welcome your comments and suggestions.
120 | 
121 | `HDNNP-LAMMPS interface program <https://github.com/ogura-edu/HDNNP-LAMMPS.git>`_
122 | 
123 | 
124 | Command line interface
125 | ^^^^^^^^^^^^^^^^^^^^^^
126 | 
127 | Execute the following command.
128 | 
129 | ::
130 | 
131 |     $ hdnnpy convert
132 | 
133 | | 2 command line options are available, and no config file is used in this command.
134 | | To see details of these options, use
135 | 
136 | ::
137 | 
138 |     $ hdnnpy convert -h
139 | 


--------------------------------------------------------------------------------
/examples/prediction_config.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for hdnnpy predict.
 2 | 
 3 | #------------------------------------------------------------------------------
 4 | # Application(SingletonConfigurable) configuration
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | ## This is an application.
 8 | 
 9 | ## The date format used by logging formatters for %(asctime)s
10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'
11 | 
12 | ## The Logging format template
13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s'
14 | 
15 | ## Set the log level by value or name.
16 | #c.Application.log_level = 30
17 | 
18 | #------------------------------------------------------------------------------
19 | # PredictionApplication(Application) configuration
20 | #------------------------------------------------------------------------------
21 | 
22 | ## Set verbose mode
23 | #c.PredictionApplication.verbose = False
24 | 
25 | #------------------------------------------------------------------------------
26 | # PredictionConfig(Configurable) configuration
27 | #------------------------------------------------------------------------------
28 | 
29 | ## Path to a data file used for HDNNP prediction. Only .xyz file format is
30 | #  supported.
31 | #c.PredictionConfig.data_file = '.'
32 | 
33 | ## File format to output HDNNP predition result
34 | #c.PredictionConfig.dump_format = '.npz'
35 | 
36 | ## Path to directory to load training output files
37 | #c.PredictionConfig.load_dir = 'output'
38 | 
39 | ## Order of differentiation used for calculation of descriptor & property
40 | #  datasets and HDNNP prediction. ex.) 0: energy, 1: force, for interatomic
41 | #  potential
42 | #c.PredictionConfig.order = 0
43 | 
44 | ## List of dataset tags. Use dataset for HDNNP training in this order. Pattern
45 | #  matching is available.
46 | #c.PredictionConfig.tags = ['*']
47 | 


--------------------------------------------------------------------------------
/examples/training_config.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for hdnnpy train.
  2 | 
  3 | #------------------------------------------------------------------------------
  4 | # Application(SingletonConfigurable) configuration
  5 | #------------------------------------------------------------------------------
  6 | 
  7 | ## This is an application.
  8 | 
  9 | ## The date format used by logging formatters for %(asctime)s
 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'
 11 | 
 12 | ## The Logging format template
 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s'
 14 | 
 15 | ## Set the log level by value or name.
 16 | #c.Application.log_level = 30
 17 | 
 18 | #------------------------------------------------------------------------------
 19 | # TrainingApplication(Application) configuration
 20 | #------------------------------------------------------------------------------
 21 | 
 22 | ## Set verbose mode
 23 | #c.TrainingApplication.verbose = False
 24 | 
 25 | #------------------------------------------------------------------------------
 26 | # DatasetConfig(Configurable) configuration
 27 | #------------------------------------------------------------------------------
 28 | 
 29 | ## Name of descriptor dataset used for input of HDNNP
 30 | #c.DatasetConfig.descriptor = 'symmetry_function'
 31 | 
 32 | ## Parameters used for the specified descriptor dataset. Set as Dict{key:
 33 | #  List[Tuple(parameters)]}. This will be passed to descriptor dataset as keyword
 34 | #  arguments. ex.) {"type2": [(5.0, 0.01, 2.0)]}
 35 | #c.DatasetConfig.parameters = {}
 36 | 
 37 | ## Preprocess to be applied for input of HDNNP (=descriptor). Set as
 38 | #  List[Tuple(Str(name), Tuple(args), Dict{kwargs})]. Each preprocess instance
 39 | #  will be initialized with (*args, **kwargs). ex.) [("pca", (20,), {})]
 40 | #c.DatasetConfig.preprocesses = []
 41 | 
 42 | ## Name of property dataset to be optimized by HDNNP
 43 | #c.DatasetConfig.property_ = 'interatomic_potential'
 44 | 
 45 | ## If the given data file and the loaded dataset are not compatible,
 46 | #  automatically recalculate and overwrite it.
 47 | #c.DatasetConfig.remake = False
 48 | 
 49 | #------------------------------------------------------------------------------
 50 | # ModelConfig(Configurable) configuration
 51 | #------------------------------------------------------------------------------
 52 | 
 53 | ## Hidden layers of a neural network constituting HDNNP. Set as List[Tuple(Int(#
 54 | #  of nodes), Str(activation function))].
 55 | #c.ModelConfig.hidden_layers = []
 56 | 
 57 | #------------------------------------------------------------------------------
 58 | # TrainingConfig(Configurable) configuration
 59 | #------------------------------------------------------------------------------
 60 | 
 61 | ## Number of data within each batch
 62 | #c.TrainingConfig.batch_size = 0
 63 | 
 64 | ## Path to a data file used for HDNNP training. Only .xyz file format is
 65 | #  supported.
 66 | #c.TrainingConfig.data_file = '.'
 67 | 
 68 | ## Upper bound of the number of training loops
 69 | #c.TrainingConfig.epoch = 0
 70 | 
 71 | ## Lower limit of learning rate when it decays
 72 | #c.TrainingConfig.final_lr = 1e-06
 73 | 
 74 | ## Initial learning rate
 75 | #c.TrainingConfig.init_lr = 0.001
 76 | 
 77 | ## Length of interval of training epochs used for checking metrics value
 78 | #c.TrainingConfig.interval = 0
 79 | 
 80 | ## Coefficient for the weight decay in L1 regularization
 81 | #c.TrainingConfig.l1_norm = 0.0
 82 | 
 83 | ## Coefficient for the weight decay in L2 regularization
 84 | #c.TrainingConfig.l2_norm = 0.0
 85 | 
 86 | ## Set chainer training extension `LogReport` if this flag is set
 87 | #c.TrainingConfig.log_report = True
 88 | 
 89 | ## Name of loss function and parameters of it. Set as Tuple(Str(name),
 90 | #  Dict{parameters}). ex.) ("first", {"mixing_beta": 0.5})
 91 | #c.TrainingConfig.loss_function = ()
 92 | 
 93 | ## Rate of exponential decay of learning rate
 94 | #c.TrainingConfig.lr_decay = 0.0
 95 | 
 96 | ## Path to output directory. NOTE: Currently, all output files will be
 97 | #  overwritten.
 98 | #c.TrainingConfig.out_dir = 'output'
 99 | 
100 | ## Counts to let `chainer.training.triggers.EarlyStoppingTrigger` be patient
101 | #c.TrainingConfig.patients = 0
102 | 
103 | ## Set chainer training extension `PlotReport` if this flag is set
104 | #c.TrainingConfig.plot_report = False
105 | 
106 | ## Set chainer training extension `PrintReport` if this flag is set
107 | #c.TrainingConfig.print_report = True
108 | 
109 | ## Set chainer training extension `ScatterPlot` if this flag is set
110 | #c.TrainingConfig.scatter_plot = False
111 | 
112 | ## List of dataset tags. Use dataset for HDNNP training in this order. Pattern
113 | #  matching is available.
114 | #c.TrainingConfig.tags = ['*']
115 | 
116 | ## Ratio to use for training data. The rest are used for test data.
117 | #c.TrainingConfig.train_test_ratio = 0.9
118 | 


--------------------------------------------------------------------------------
/hdnnpy/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """High Dimensional Neural Network Potential package."""
 4 | 
 5 | __all__ = [
 6 |     '__version__',
 7 |     ]
 8 | 
 9 | from ._version import __version__
10 | 


--------------------------------------------------------------------------------
/hdnnpy/__main__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | 
3 | from hdnnpy.cli import main
4 | 
5 | 
6 | if __name__ == '__main__':
7 |     main()
8 | 


--------------------------------------------------------------------------------
/hdnnpy/_version.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | 
3 | __version__ = '0.5.1.dev'
4 | 


--------------------------------------------------------------------------------
/hdnnpy/cli/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | 
3 | __all__ = [
4 |     'main',
5 |     ]
6 | 
7 | from hdnnpy.cli.main import main
8 | 


--------------------------------------------------------------------------------
/hdnnpy/cli/configurables.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import pathlib
  4 | 
  5 | from traitlets import (
  6 |     Bool, CaselessStrEnum, Dict, Float,
  7 |     Integer, List, TraitType, Tuple, Unicode,
  8 |     )
  9 | import traitlets.config
 10 | 
 11 | 
 12 | class Path(TraitType):
 13 |     default_value = '.'
 14 |     info_text = 'a pathlib.Path instance'
 15 | 
 16 |     def validate(self, obj, value):
 17 |         if isinstance(value, pathlib.Path):
 18 |             return value.absolute()
 19 |         elif isinstance(value, str):
 20 |             return pathlib.Path(value).absolute()
 21 |         else:
 22 |             self.error(obj, value)
 23 | 
 24 | 
 25 | class Configurable(traitlets.config.Configurable):
 26 |     def dump(self):
 27 |         dic = {key: value for key, value in self._trait_values.items()
 28 |                if key not in ['config', 'parent']}
 29 |         return dic
 30 | 
 31 | 
 32 | class DatasetConfig(Configurable):
 33 |     # not configurable
 34 |     n_sample = Integer(
 35 |         help='Total number of data counted internally')
 36 | 
 37 |     # configurable
 38 |     descriptor = CaselessStrEnum(
 39 |         ['symmetry_function', 'weighted_symmetry_function'],
 40 |         default_value='symmetry_function',
 41 |         help='Name of descriptor dataset used for input of HDNNP'
 42 |         ).tag(config=True)
 43 |     parameters = Dict(
 44 |         trait=List,
 45 |         help='Parameters used for the specified descriptor dataset. '
 46 |              'Set as Dict{key: List[Tuple(parameters)]}. '
 47 |              'This will be passed to descriptor dataset as keyword arguments. '
 48 |              'ex.) {"type2": [(5.0, 0.01, 2.0)]}'
 49 |         ).tag(config=True)
 50 |     property_ = CaselessStrEnum(
 51 |         ['interatomic_potential'],
 52 |         default_value='interatomic_potential',
 53 |         help='Name of property dataset to be optimized by HDNNP'
 54 |         ).tag(config=True)
 55 |     preprocesses = List(
 56 |         trait=Tuple(
 57 |             CaselessStrEnum(['pca', 'scaling', 'standardization']),
 58 |             Tuple(),
 59 |             Dict()
 60 |             ),
 61 |         help='Preprocess to be applied for input of HDNNP (=descriptor). '
 62 |              'Set as List[Tuple(Str(name), Tuple(args), Dict{kwargs})]. '
 63 |              'Each preprocess instance will be initialized with '
 64 |              '(*args, **kwargs). '
 65 |              'ex.) [("pca", (20,), {})]'
 66 |         ).tag(config=True)
 67 |     remake = Bool(
 68 |         default_value=False,
 69 |         help='If the given data file and the loaded dataset are not '
 70 |              'compatible, automatically recalculate and overwrite it.'
 71 |         ).tag(config=True)
 72 | 
 73 | 
 74 | class ModelConfig(Configurable):
 75 |     # not configurable
 76 |     n_input = Integer(
 77 |         help='Number of nodes of input layer.')
 78 |     n_output = Integer(
 79 |         help='Number of nodes of output layer.')
 80 | 
 81 |     # configurable
 82 |     hidden_layers = List(
 83 |         trait=Tuple(Integer, Unicode),
 84 |         help='Hidden layers of a neural network constituting HDNNP. '
 85 |              'Set as List[Tuple(Int(# of nodes), Str(activation function))]. '
 86 |         ).tag(config=True)
 87 | 
 88 | 
 89 | class TrainingConfig(Configurable):
 90 |     # not configurable
 91 |     elements = List(
 92 |         trait=Unicode,
 93 |         help='All elements contained in the dataset listed internally')
 94 | 
 95 |     # configurable
 96 |     data_file = Path(
 97 |         help='Path to a data file used for HDNNP training. '
 98 |              'Only .xyz file format is supported.'
 99 |         ).tag(config=True)
100 |     tags = List(
101 |         trait=Unicode,
102 |         default_value=['*'],
103 |         help='List of dataset tags. '
104 |              'Use dataset for HDNNP training in this order. '
105 |              'Pattern matching is available.'
106 |         ).tag(config=True)
107 |     out_dir = Path(
108 |         default_value='output',
109 |         help='Path to output directory. '
110 |              'NOTE: Currently, all output files will be overwritten.'
111 |         ).tag(config=True)
112 |     train_test_ratio = Float(
113 |         default_value=0.9,
114 |         help='Ratio to use for training data. '
115 |              'The rest are used for test data.'
116 |         ).tag(config=True)
117 |     # chainer training
118 |     loss_function = Tuple(
119 |         CaselessStrEnum(['zeroth', 'first', 'potential']),
120 |         Dict(),
121 |         help='Name of loss function and parameters of it. '
122 |              'Set as Tuple(Str(name), Dict{parameters}). '
123 |              'ex.) ("first", {"mixing_beta": 0.5})'
124 |         ).tag(config=True)
125 |     init_lr = Float(
126 |         default_value=1.0e-3,
127 |         help='Initial learning rate'
128 |         ).tag(config=True)
129 |     final_lr = Float(
130 |         default_value=1.0e-6,
131 |         help='Lower limit of learning rate when it decays'
132 |         ).tag(config=True)
133 |     lr_decay = Float(
134 |         help='Rate of exponential decay of learning rate'
135 |         ).tag(config=True)
136 |     l1_norm = Float(
137 |         help='Coefficient for the weight decay in L1 regularization'
138 |         ).tag(config=True)
139 |     l2_norm = Float(
140 |         help='Coefficient for the weight decay in L2 regularization'
141 |         ).tag(config=True)
142 |     interval = Integer(
143 |         help='Length of interval of training epochs used for checking metrics'
144 |              ' value'
145 |         ).tag(config=True)
146 |     patients = Integer(
147 |         help='Counts to let `chainer.training.triggers.EarlyStoppingTrigger`'
148 |              ' be patient'
149 |         ).tag(config=True)
150 |     epoch = Integer(
151 |         help='Upper bound of the number of training loops'
152 |         ).tag(config=True)
153 |     batch_size = Integer(
154 |         help='Number of data within each batch'
155 |         ).tag(config=True)
156 |     # chainer extension flags
157 |     scatter_plot = Bool(
158 |         False,
159 |         help='Set chainer training extension `ScatterPlot` if this flag is set'
160 |         ).tag(config=True)
161 |     log_report = Bool(
162 |         True,
163 |         help='Set chainer training extension `LogReport` if this flag is set'
164 |         ).tag(config=True)
165 |     print_report = Bool(
166 |         True,
167 |         help='Set chainer training extension `PrintReport` if this flag is set'
168 |         ).tag(config=True)
169 |     plot_report = Bool(
170 |         False,
171 |         help='Set chainer training extension `PlotReport` if this flag is set'
172 |         ).tag(config=True)
173 | 
174 | 
175 | class PredictionConfig(Configurable):
176 |     # not configurable
177 |     elements = List(
178 |         trait=Unicode,
179 |         help='All elements contained in the dataset listed internally')
180 | 
181 |     # configurable
182 |     data_file = Path(
183 |         help='Path to a data file used for HDNNP prediction. '
184 |              'Only .xyz file format is supported.'
185 |         ).tag(config=True)
186 |     tags = List(
187 |         trait=Unicode,
188 |         default_value=['*'],
189 |         help='List of dataset tags. '
190 |              'Use dataset for HDNNP training in this order. '
191 |              'Pattern matching is available.'
192 |         ).tag(config=True)
193 |     load_dir = Path(
194 |         default_value='output',
195 |         help='Path to directory to load training output files'
196 |         ).tag(config=True)
197 |     order = Integer(
198 |         help='Order of differentiation used for calculation '
199 |              'of descriptor & property datasets and HDNNP prediction. '
200 |              'ex.) 0: energy, 1: force, for interatomic potential'
201 |         ).tag(config=True)
202 |     dump_format = CaselessStrEnum(
203 |         ['.npz'],
204 |         default_value='.npz',
205 |         help='File format to output HDNNP predition result'
206 |         ).tag(config=True)
207 | 


--------------------------------------------------------------------------------
/hdnnpy/cli/conversion_application.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import datetime
  4 | import socket
  5 | import textwrap
  6 | 
  7 | import chainer
  8 | from traitlets import (CaselessStrEnum, Dict, Unicode)
  9 | from traitlets.config import Application
 10 | import yaml
 11 | 
 12 | from hdnnpy import __version__
 13 | from hdnnpy.cli.configurables import (DatasetConfig, ModelConfig, Path)
 14 | from hdnnpy.model import MasterNNP
 15 | from hdnnpy.preprocess import PREPROCESS
 16 | from hdnnpy.utils import (pprint, pyyaml_path_constructor)
 17 | 
 18 | 
 19 | class ConversionApplication(Application):
 20 |     name = Unicode(u'hdnnpy convert')
 21 |     description = 'Convert output files of training to required format.'
 22 | 
 23 |     format = CaselessStrEnum(
 24 |         ['lammps'],
 25 |         default_value='lammps',
 26 |         help='Name of the destination format.',
 27 |         ).tag(config=True)
 28 |     load_dir = Path(
 29 |         default_value='output',
 30 |         help='Path to directory to load training output files.',
 31 |         ).tag(config=True)
 32 | 
 33 |     aliases = Dict({
 34 |         'format': 'ConversionApplication.format',
 35 |         'load_dir': 'ConversionApplication.load_dir',
 36 |         })
 37 | 
 38 |     def __init__(self, **kwargs):
 39 |         super().__init__(**kwargs)
 40 |         self.training_result = None
 41 |         self.dataset_config = None
 42 |         self.model_config = None
 43 | 
 44 |     def initialize(self, argv=None):
 45 |         self.parse_command_line(argv)
 46 | 
 47 |         yaml.add_constructor('Path', pyyaml_path_constructor)
 48 |         self.training_result = yaml.load(
 49 |             (self.load_dir / 'training_result.yaml').open())
 50 |         self.dataset_config = DatasetConfig(**self.training_result['dataset'])
 51 |         self.model_config = ModelConfig(**self.training_result['model'])
 52 | 
 53 |     def start(self):
 54 |         tr = self.training_result
 55 |         dc = self.dataset_config
 56 |         mc = self.model_config
 57 | 
 58 |         # load preprocesses
 59 |         preprocesses = []
 60 |         for (name, args, kwargs) in dc.preprocesses:
 61 |             preprocess = PREPROCESS[name](*args, **kwargs)
 62 |             preprocess.load(
 63 |                 self.load_dir / 'preprocess' / f'{name}.npz', verbose=False)
 64 |             preprocesses.append(preprocess)
 65 |         # load master nnp
 66 |         master_nnp = MasterNNP(tr['training']['elements'],
 67 |                                tr['model']['n_input'],
 68 |                                mc.hidden_layers,
 69 |                                tr['model']['n_output'])
 70 |         chainer.serializers.load_npz(
 71 |             self.load_dir / 'master_nnp.npz', master_nnp)
 72 | 
 73 |         if self.format == 'lammps':
 74 |             self.dump_for_lammps(preprocesses, master_nnp)
 75 | 
 76 |     def dump_for_lammps(self, preprocesses, master_nnp):
 77 |         dc = self.dataset_config
 78 |         potential_file = self.load_dir / 'lammps.nnp'
 79 |         with potential_file.open('w') as f:
 80 |             # information
 81 |             now = datetime.datetime.now()
 82 |             machine = socket.gethostname()
 83 |             pprint(f'''
 84 |             # Created by hdnnpy {__version__} ({now}).
 85 |             # All parameters are read from [{machine}] {self.load_dir}.
 86 |             # Ref: https://github.com/ogura-edu/HDNNP
 87 |             ''', stream=f)
 88 | 
 89 |             # descriptor
 90 |             pprint(f'''
 91 |             # {dc.descriptor} parameters
 92 |             {len(dc.parameters)}
 93 |             ''', stream=f)
 94 |             for name, params in dc.parameters.items():
 95 |                 params_str = ('\n'+' '*16).join([' '.join(map(str, row))
 96 |                                                  for row in params])
 97 |                 pprint(f'''
 98 |                 {name} {len(params)}
 99 |                 {params_str}
100 |                 ''', stream=f)
101 | 
102 |             # preprocess
103 |             pprint(f'''
104 |             # pre-processing parameters
105 |             {len(preprocesses)}
106 |             ''', stream=f)
107 |             for preprocess in preprocesses:
108 |                 pprint(f'''
109 |                 {preprocess.name}
110 | 
111 |                 {textwrap.indent(
112 |                     textwrap.dedent(preprocess.dump_params()), ' '*16)}
113 |                 ''', stream=f)
114 | 
115 |             # model
116 |             pprint(f'''
117 |             # neural network parameters
118 |             {len(master_nnp[0])}
119 | 
120 |             {textwrap.indent(
121 |                 textwrap.dedent(master_nnp.dump_params()), ' '*12)}
122 |             ''', stream=f)
123 | 


--------------------------------------------------------------------------------
/hdnnpy/cli/main.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | import os
 4 | from pathlib import Path
 5 | import sys
 6 | 
 7 | from traitlets import Unicode
 8 | from traitlets.config import Application
 9 | 
10 | from hdnnpy.cli.conversion_application import ConversionApplication
11 | from hdnnpy.cli.prediction_application import PredictionApplication
12 | from hdnnpy.cli.training_application import TrainingApplication
13 | from hdnnpy.utils import MPI
14 | 
15 | 
16 | class HDNNPApplication(Application):
17 |     name = Unicode(u'hdnnpy')
18 | 
19 |     classes = [
20 |         ConversionApplication,
21 |         PredictionApplication,
22 |         TrainingApplication,
23 |         ]
24 | 
25 |     subcommands = {
26 |         'convert': (ConversionApplication, ConversionApplication.description),
27 |         'predict': (PredictionApplication, PredictionApplication.description),
28 |         'train': (TrainingApplication, TrainingApplication.description),
29 |         }
30 | 
31 |     def initialize(self, argv=None):
32 |         if MPI.rank != 0:
33 |             sys.stdout = Path(os.devnull).open('w')
34 |         assert sys.argv[1] in self.subcommands, \
35 |             'Only `hdnnpy train` and `hdnnpy predict` `hdnnpy convert` are' \
36 |             ' available.'
37 |         super().initialize(argv)
38 | 
39 | 
40 | main = HDNNPApplication.launch_instance
41 | 


--------------------------------------------------------------------------------
/hdnnpy/cli/prediction_application.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import fnmatch
  4 | import shutil
  5 | 
  6 | import chainer
  7 | import numpy as np
  8 | from traitlets import (Bool, Dict, List, Unicode)
  9 | from traitlets.config import Application
 10 | import yaml
 11 | 
 12 | from hdnnpy.cli.configurables import (
 13 |     DatasetConfig, ModelConfig, Path, PredictionConfig,
 14 |     )
 15 | from hdnnpy.dataset import (AtomicStructure, DatasetGenerator, HDNNPDataset)
 16 | from hdnnpy.dataset.descriptor import DESCRIPTOR_DATASET
 17 | from hdnnpy.dataset.property import PROPERTY_DATASET
 18 | from hdnnpy.format import parse_xyz
 19 | from hdnnpy.model import (HighDimensionalNNP, MasterNNP)
 20 | from hdnnpy.preprocess import PREPROCESS
 21 | from hdnnpy.utils import (MPI, pprint, pyyaml_path_constructor)
 22 | 
 23 | 
 24 | class PredictionApplication(Application):
 25 |     name = Unicode(u'hdnnpy predict')
 26 |     description = ('Predict properties for atomic structures using trained'
 27 |                    ' HDNNP.')
 28 | 
 29 |     verbose = Bool(
 30 |         False,
 31 |         help='Set verbose mode'
 32 |         ).tag(config=True)
 33 | 
 34 |     classes = List([PredictionConfig])
 35 | 
 36 |     config_file = Path(
 37 |         'prediction_config.py',
 38 |         help='Load this config file')
 39 | 
 40 |     aliases = Dict({
 41 |         'log_level': 'Application.log_level',
 42 |         })
 43 | 
 44 |     flags = Dict({
 45 |         'verbose': ({
 46 |             'PredictionApplication': {
 47 |                 'verbose': True,
 48 |                 },
 49 |             }, 'Set verbose mode'),
 50 |         'v': ({
 51 |             'PredictionApplication': {
 52 |                 'verbose': True,
 53 |                 },
 54 |             }, 'Set verbose mode'),
 55 |         'debug': ({
 56 |             'Application': {
 57 |                 'log_level': 10,
 58 |                 },
 59 |             }, 'Set log level to DEBUG'),
 60 |         })
 61 | 
 62 |     def __init__(self, **kwargs):
 63 |         super().__init__(**kwargs)
 64 |         self.dataset_config = None
 65 |         self.model_config = None
 66 |         self.prediction_config = None
 67 | 
 68 |     def initialize(self, argv=None):
 69 |         self.parse_command_line(argv)
 70 |         self.load_config_file(self.config_file)
 71 |         self.prediction_config = PredictionConfig(config=self.config)
 72 | 
 73 |         yaml.add_constructor('Path', pyyaml_path_constructor)
 74 |         training_result = yaml.load(
 75 |             (self.prediction_config.load_dir / 'training_result.yaml').open())
 76 |         self.dataset_config = DatasetConfig(**training_result['dataset'])
 77 |         self.model_config = ModelConfig(**training_result['model'])
 78 | 
 79 |     def start(self):
 80 |         pc = self.prediction_config
 81 |         shutil.copy(self.config_file, pc.load_dir / self.config_file.name)
 82 |         tag_xyz_map, pc.elements = parse_xyz(
 83 |             pc.data_file, save=False, verbose=self.verbose)
 84 |         datasets = self.construct_datasets(tag_xyz_map)
 85 |         datasets = DatasetGenerator(*datasets).all()
 86 |         if MPI.rank == 0:
 87 |             results = self.predict(datasets)
 88 |             self.dump_result(results)
 89 | 
 90 |     def construct_datasets(self, tag_xyz_map):
 91 |         dc = self.dataset_config
 92 |         mc = self.model_config
 93 |         pc = self.prediction_config
 94 | 
 95 |         preprocesses = []
 96 |         for (name, args, kwargs) in dc.preprocesses:
 97 |             preprocess = PREPROCESS[name](*args, **kwargs)
 98 |             preprocess.load(
 99 |                 pc.load_dir / 'preprocess' / f'{name}.npz',
100 |                 verbose=self.verbose)
101 |             preprocesses.append(preprocess)
102 | 
103 |         datasets = []
104 |         for pattern in pc.tags:
105 |             for tag in fnmatch.filter(tag_xyz_map, pattern):
106 |                 if self.verbose:
107 |                     pprint(f'Construct sub dataset tagged as "{tag}"')
108 |                 tagged_xyz = tag_xyz_map.pop(tag)
109 |                 structures = AtomicStructure.read_xyz(tagged_xyz)
110 | 
111 |                 # prepare descriptor dataset
112 |                 descriptor = DESCRIPTOR_DATASET[dc.descriptor](
113 |                     pc.order, structures, **dc.parameters)
114 |                 descriptor.make(verbose=self.verbose)
115 | 
116 |                 # prepare empty property dataset
117 |                 property_ = PROPERTY_DATASET[dc.property_](
118 |                     pc.order, structures)
119 | 
120 |                 # construct test dataset from descriptor & property datasets
121 |                 dataset = HDNNPDataset(descriptor, property_)
122 |                 dataset.construct(
123 |                     all_elements=pc.elements, preprocesses=preprocesses,
124 |                     shuffle=False, verbose=self.verbose)
125 |                 datasets.append(dataset)
126 |                 dc.n_sample += dataset.total_size
127 |                 mc.n_input = dataset.n_input
128 |                 mc.n_output = dataset.n_label
129 | 
130 |         return datasets
131 | 
132 |     def predict(self, datasets):
133 |         mc = self.model_config
134 |         pc = self.prediction_config
135 |         results = []
136 | 
137 |         # master model
138 |         master_nnp = MasterNNP(
139 |             pc.elements, mc.n_input, mc.hidden_layers, mc.n_output)
140 |         chainer.serializers.load_npz(
141 |             pc.load_dir / 'master_nnp.npz', master_nnp)
142 | 
143 |         for dataset in datasets:
144 |             # hdnnp model
145 |             hdnnp = HighDimensionalNNP(
146 |                 dataset.elemental_composition,
147 |                 mc.n_input, mc.hidden_layers, mc.n_output)
148 |             hdnnp.sync_param_with(master_nnp)
149 | 
150 |             batch = chainer.dataset.concat_examples(dataset)
151 |             inputs = [batch[f'inputs/{i}'] for i in range(pc.order + 1)]
152 |             with chainer.using_config('train', False), \
153 |                  chainer.using_config('enable_backprop', False):
154 |                 predictions = hdnnp.predict(inputs, pc.order)
155 | 
156 |             result = {
157 |                 **{'tag': dataset.tag},
158 |                 **{property_: coefficient * prediction.data
159 |                    for property_, coefficient, prediction
160 |                    in zip(dataset.property.properties,
161 |                           dataset.property.coefficients,
162 |                           predictions)},
163 |                 }
164 |             results.append(result)
165 |         return results
166 | 
167 |     def dump_result(self, results):
168 |         pc = self.prediction_config
169 |         result_file = pc.load_dir / f'prediction_result{pc.dump_format}'
170 |         if pc.dump_format == '.npz':
171 |             kv_result = {}
172 |             for result in results:
173 |                 tag = result.pop('tag')
174 |                 kv_result.update({tag + '/' + key: value
175 |                                   for key, value in result.items()})
176 |             np.savez(result_file, **kv_result)
177 | 
178 | 
179 | def generate_config_file():
180 |     prediction_app = PredictionApplication()
181 |     prediction_app.config_file.write_text(
182 |         prediction_app.generate_config_file())
183 | 


--------------------------------------------------------------------------------
/hdnnpy/cli/training_application.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import fnmatch
  4 | import pathlib
  5 | import shutil
  6 | 
  7 | import chainer
  8 | import chainer.training.extensions as ext
  9 | from chainer.training.triggers import EarlyStoppingTrigger
 10 | import chainermn
 11 | from traitlets import (Bool, Dict, List, Unicode)
 12 | from traitlets.config import Application
 13 | import yaml
 14 | 
 15 | from hdnnpy.cli.configurables import (
 16 |     DatasetConfig, ModelConfig, Path, TrainingConfig,
 17 |     )
 18 | from hdnnpy.dataset import (AtomicStructure, DatasetGenerator, HDNNPDataset)
 19 | from hdnnpy.dataset.descriptor import DESCRIPTOR_DATASET
 20 | from hdnnpy.dataset.property import PROPERTY_DATASET
 21 | from hdnnpy.format import parse_xyz
 22 | from hdnnpy.model import (HighDimensionalNNP, MasterNNP)
 23 | from hdnnpy.preprocess import PREPROCESS
 24 | from hdnnpy.training import (
 25 |     Manager, Updater, ScatterPlot, set_log_scale,
 26 |     )
 27 | from hdnnpy.training.loss_function import LOSS_FUNCTION
 28 | from hdnnpy.utils import (MPI, pprint, pyyaml_path_representer)
 29 | 
 30 | 
 31 | class TrainingApplication(Application):
 32 |     name = Unicode(u'hdnnpy train')
 33 |     description = 'Train a HDNNP to optimize given properties.'
 34 | 
 35 |     is_resume = Bool(
 36 |         False,
 37 |         help='Resume flag used internally.')
 38 |     resume_dir = Path(
 39 |         None,
 40 |         allow_none=True,
 41 |         help='This option can be set only by command line.')
 42 |     verbose = Bool(
 43 |         False,
 44 |         help='Set verbose mode'
 45 |         ).tag(config=True)
 46 | 
 47 |     classes = List([DatasetConfig, ModelConfig, TrainingConfig])
 48 | 
 49 |     config_file = Path(
 50 |         'training_config.py',
 51 |         help='Load this config file')
 52 | 
 53 |     aliases = Dict({
 54 |         'resume': 'TrainingApplication.resume_dir',
 55 |         'log_level': 'Application.log_level',
 56 |         })
 57 | 
 58 |     flags = Dict({
 59 |         'verbose': ({
 60 |             'TrainingApplication': {
 61 |                 'verbose': True,
 62 |                 },
 63 |             }, 'Set verbose mode'),
 64 |         'v': ({
 65 |             'TrainingApplication': {
 66 |                 'verbose': True,
 67 |                 },
 68 |             }, 'Set verbose mode'),
 69 |         'debug': ({
 70 |             'Application': {
 71 |                 'log_level': 10,
 72 |                 },
 73 |             }, 'Set log level to DEBUG'),
 74 |         })
 75 | 
 76 |     def __init__(self, **kwargs):
 77 |         super().__init__(**kwargs)
 78 |         self.dataset_config = None
 79 |         self.model_config = None
 80 |         self.training_config = None
 81 |         self.loss_function = None
 82 | 
 83 |     def initialize(self, argv=None):
 84 |         # temporarily set `resume_dir` configurable
 85 |         self.__class__.resume_dir.tag(config=True)
 86 |         self.parse_command_line(argv)
 87 | 
 88 |         if self.resume_dir is not None:
 89 |             self.is_resume = True
 90 |             self.config_file = self.resume_dir.with_name(self.config_file.name)
 91 |         self.load_config_file(self.config_file)
 92 | 
 93 |         self.dataset_config = DatasetConfig(config=self.config)
 94 |         self.model_config = ModelConfig(config=self.config)
 95 |         self.training_config = TrainingConfig(config=self.config)
 96 |         if self.is_resume:
 97 |             self.training_config.out_dir = self.resume_dir.parent
 98 |         name, _ = self.training_config.loss_function
 99 |         self.loss_function = LOSS_FUNCTION[name]
100 | 
101 |     def start(self):
102 |         tc = self.training_config
103 |         tc.out_dir.mkdir(parents=True, exist_ok=True)
104 |         if not self.is_resume:
105 |             shutil.copy(self.config_file,
106 |                         tc.out_dir / self.config_file.name)
107 |         tag_xyz_map, tc.elements = parse_xyz(
108 |             tc.data_file, verbose=self.verbose)
109 |         datasets = self.construct_datasets(tag_xyz_map)
110 |         dataset = DatasetGenerator(*datasets).holdout(tc.train_test_ratio)
111 |         result = self.train(dataset)
112 |         if MPI.rank == 0:
113 |             self.dump_result(result)
114 | 
115 |     def construct_datasets(self, tag_xyz_map):
116 |         dc = self.dataset_config
117 |         mc = self.model_config
118 |         tc = self.training_config
119 | 
120 |         preprocess_dir = tc.out_dir / 'preprocess'
121 |         preprocess_dir.mkdir(parents=True, exist_ok=True)
122 |         preprocesses = []
123 |         for (name, args, kwargs) in dc.preprocesses:
124 |             preprocess = PREPROCESS[name](*args, **kwargs)
125 |             if self.is_resume:
126 |                 preprocess.load(
127 |                     preprocess_dir / f'{name}.npz', verbose=self.verbose)
128 |             preprocesses.append(preprocess)
129 | 
130 |         datasets = []
131 |         for pattern in tc.tags:
132 |             for tag in fnmatch.filter(tag_xyz_map, pattern):
133 |                 if self.verbose:
134 |                     pprint(f'Construct sub dataset tagged as "{tag}"')
135 |                 tagged_xyz = tag_xyz_map.pop(tag)
136 |                 structures = AtomicStructure.read_xyz(tagged_xyz)
137 | 
138 |                 # prepare descriptor dataset
139 |                 descriptor = DESCRIPTOR_DATASET[dc.descriptor](
140 |                     self.loss_function.order['descriptor'],
141 |                     structures, **dc.parameters)
142 |                 descriptor_npz = tagged_xyz.with_name(f'{dc.descriptor}.npz')
143 |                 if descriptor_npz.exists():
144 |                     descriptor.load(
145 |                         descriptor_npz, verbose=self.verbose, remake=dc.remake)
146 |                 else:
147 |                     descriptor.make(verbose=self.verbose)
148 |                     descriptor.save(descriptor_npz, verbose=self.verbose)
149 | 
150 |                 # prepare property dataset
151 |                 property_ = PROPERTY_DATASET[dc.property_](
152 |                     self.loss_function.order['property'], structures)
153 |                 property_npz = tagged_xyz.with_name(f'{dc.property_}.npz')
154 |                 if property_npz.exists():
155 |                     property_.load(
156 |                         property_npz, verbose=self.verbose, remake=dc.remake)
157 |                 else:
158 |                     property_.make(verbose=self.verbose)
159 |                     property_.save(property_npz, verbose=self.verbose)
160 | 
161 |                 # construct HDNNP dataset from descriptor & property datasets
162 |                 dataset = HDNNPDataset(descriptor, property_)
163 |                 dataset.construct(
164 |                     all_elements=tc.elements, preprocesses=preprocesses,
165 |                     shuffle=True, verbose=self.verbose)
166 |                 dataset.scatter()
167 |                 datasets.append(dataset)
168 |                 dc.n_sample += dataset.total_size
169 |                 mc.n_input = dataset.n_input
170 |                 mc.n_output = dataset.n_label
171 | 
172 |         for preprocess in preprocesses:
173 |             preprocess.save(
174 |                 preprocess_dir / f'{preprocess.name}.npz',
175 |                 verbose=self.verbose)
176 | 
177 |         return datasets
178 | 
179 |     def train(self, dataset, comm=None):
180 |         mc = self.model_config
181 |         tc = self.training_config
182 |         if comm is None:
183 |             comm = chainermn.create_communicator('naive', MPI.comm)
184 |         result = {'training_time': 0.0, 'observation': []}
185 | 
186 |         # model and optimizer
187 |         master_nnp = MasterNNP(
188 |             tc.elements, mc.n_input, mc.hidden_layers, mc.n_output)
189 |         master_opt = chainer.optimizers.Adam(tc.init_lr)
190 |         master_opt = chainermn.create_multi_node_optimizer(master_opt, comm)
191 |         master_opt.setup(master_nnp)
192 |         master_opt.add_hook(chainer.optimizer_hooks.Lasso(tc.l1_norm))
193 |         master_opt.add_hook(chainer.optimizer_hooks.WeightDecay(tc.l2_norm))
194 | 
195 |         for training, test in dataset:
196 |             tag = training.tag
197 |             properties = training.property.properties
198 | 
199 |             # iterators
200 |             train_iter = chainer.iterators.SerialIterator(
201 |                 training, tc.batch_size // MPI.size, repeat=True, shuffle=True)
202 |             test_iter = chainer.iterators.SerialIterator(
203 |                 test, tc.batch_size // MPI.size, repeat=False, shuffle=False)
204 | 
205 |             # model
206 |             hdnnp = HighDimensionalNNP(
207 |                 training.elemental_composition,
208 |                 mc.n_input, mc.hidden_layers, mc.n_output)
209 |             hdnnp.sync_param_with(master_nnp)
210 |             main_opt = chainer.Optimizer()
211 |             main_opt = chainermn.create_multi_node_optimizer(main_opt, comm)
212 |             main_opt.setup(hdnnp)
213 | 
214 |             # loss function
215 |             _, kwargs = tc.loss_function
216 |             loss_function = self.loss_function(hdnnp, properties, **kwargs)
217 |             observation_keys = loss_function.observation_keys
218 | 
219 |             # triggers
220 |             interval = (tc.interval, 'epoch')
221 |             stop_trigger = EarlyStoppingTrigger(
222 |                 check_trigger=interval,
223 |                 monitor=f'val/main/{observation_keys[-1]}',
224 |                 patients=tc.patients, mode='min',
225 |                 verbose=self.verbose, max_trigger=(tc.epoch, 'epoch'))
226 | 
227 |             # updater and trainer
228 |             updater = Updater(train_iter,
229 |                               {'main': main_opt, 'master': master_opt},
230 |                               loss_func=loss_function.eval)
231 |             out_dir = tc.out_dir / tag
232 |             trainer = chainer.training.Trainer(updater, stop_trigger, out_dir)
233 | 
234 |             # extensions
235 |             trainer.extend(ext.ExponentialShift('alpha', 1 - tc.lr_decay,
236 |                                                 target=tc.final_lr,
237 |                                                 optimizer=master_opt))
238 |             evaluator = chainermn.create_multi_node_evaluator(
239 |                 ext.Evaluator(test_iter, hdnnp, eval_func=loss_function.eval),
240 |                 comm)
241 |             trainer.extend(evaluator, name='val')
242 |             if tc.scatter_plot:
243 |                 trainer.extend(ScatterPlot(test, hdnnp, comm),
244 |                                trigger=interval)
245 |             if MPI.rank == 0:
246 |                 if tc.log_report:
247 |                     trainer.extend(ext.LogReport(log_name='training.log'))
248 |                 if tc.print_report:
249 |                     trainer.extend(ext.PrintReport(
250 |                         ['epoch', 'iteration']
251 |                         + [f'main/{key}' for key in observation_keys]
252 |                         + [f'val/main/{key}' for key in observation_keys]))
253 |                 if tc.plot_report:
254 |                     trainer.extend(ext.PlotReport(
255 |                         [f'main/{key}' for key in observation_keys],
256 |                         x_key='epoch', postprocess=set_log_scale,
257 |                         file_name='training_set.png', marker=None))
258 |                     trainer.extend(ext.PlotReport(
259 |                         [f'val/main/{key}' for key in observation_keys],
260 |                         x_key='epoch', postprocess=set_log_scale,
261 |                         file_name='validation_set.png', marker=None))
262 | 
263 |             manager = Manager(tag, trainer, result, is_snapshot=True)
264 |             if self.is_resume:
265 |                 manager.check_to_resume(self.resume_dir.name)
266 |             if manager.allow_to_run:
267 |                 with manager:
268 |                     trainer.run()
269 | 
270 |         if MPI.rank == 0:
271 |             chainer.serializers.save_npz(
272 |                 tc.out_dir / 'master_nnp.npz', master_nnp)
273 | 
274 |         return result
275 | 
276 |     def dump_result(self, result):
277 |         yaml.add_representer(pathlib.PosixPath, pyyaml_path_representer)
278 |         result_file = self.training_config.out_dir / 'training_result.yaml'
279 |         with result_file.open('w') as f:
280 |             yaml.dump({
281 |                 'dataset': self.dataset_config.dump(),
282 |                 'model': self.model_config.dump(),
283 |                 'training': self.training_config.dump(),
284 |                 }, f, default_flow_style=False)
285 |             yaml.dump({
286 |                 'result': result,
287 |                 }, f, default_flow_style=False)
288 | 
289 | 
290 | def generate_config_file():
291 |     training_app = TrainingApplication()
292 |     training_app.config_file.write_text(training_app.generate_config_file())
293 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Dataset tools subpackage for HDNNP."""
 4 | 
 5 | __all__ = [
 6 |     'AtomicStructure',
 7 |     'DatasetGenerator',
 8 |     'HDNNPDataset',
 9 |     ]
10 | 
11 | from hdnnpy.dataset.atomic_structure import AtomicStructure
12 | from hdnnpy.dataset.dataset_generator import DatasetGenerator
13 | from hdnnpy.dataset.hdnnp_dataset import HDNNPDataset
14 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/atomic_structure.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Wrapper class of ase.Atoms."""
  4 | 
  5 | from ase.calculators.singlepoint import SinglePointCalculator
  6 | import ase.io
  7 | import ase.neighborlist
  8 | import chainer
  9 | import chainer.functions as F
 10 | import numpy as np
 11 | 
 12 | 
 13 | class AtomicStructure(object):
 14 |     """Wrapper class of ase.Atoms."""
 15 |     def __init__(self, atoms):
 16 |         """
 17 |         | It wraps :obj:`ase.Atoms` object to define additional methods
 18 |           and attributes.
 19 |         | Before wrapping, it sorts atoms by element alphabetically.
 20 |         | It stores calculated neighbor information such as distance,
 21 |           indices.
 22 | 
 23 |         Args:
 24 |             atoms (~ase.Atoms): an object to wrap.
 25 |         """
 26 |         tags = atoms.get_chemical_symbols()
 27 |         deco = sorted([(tag, i) for i, tag in enumerate(tags)])
 28 |         indices = [i for tag, i in deco]
 29 |         self._atoms = atoms[indices]
 30 | 
 31 |         results = {}
 32 |         calculator = atoms.get_calculator()
 33 |         if calculator:
 34 |             for key, value in calculator.results.items():
 35 |                 if key in atoms.arrays:
 36 |                     results[key] = value[indices]
 37 |                 else:
 38 |                     results[key] = value
 39 |         self._atoms.set_calculator(
 40 |             SinglePointCalculator(self._atoms, **results))
 41 | 
 42 |         self._cache = {}
 43 | 
 44 |     def __getattr__(self, item):
 45 |         return getattr(self._atoms, item)
 46 | 
 47 |     def __getstate__(self):
 48 |         return self._atoms
 49 | 
 50 |     def __len__(self):
 51 |         return len(self._atoms)
 52 | 
 53 |     def __setstate__(self, state):
 54 |         self._atoms = state
 55 |         self._cache = {}
 56 | 
 57 |     @property
 58 |     def elements(self):
 59 |         """list [str]: Elements included in a cell."""
 60 |         return sorted(set(self._atoms.get_chemical_symbols()))
 61 | 
 62 |     def clear_cache(self, cutoff_distance=None):
 63 |         """Clear up cached neighbor information in this instance.
 64 | 
 65 |         Args:
 66 |             cutoff_distance (float, optional):
 67 |                 It clears the corresponding cached data if specified,
 68 |                 otherwise it clears all cached data.
 69 |         """
 70 |         if cutoff_distance:
 71 |             self._cache[cutoff_distance].clear()
 72 |         else:
 73 |             self._cache.clear()
 74 | 
 75 |     def get_neighbor_info(self, cutoff_distance, geometry_keys):
 76 |         """Calculate or return cached data.
 77 | 
 78 |         | If there is no cached data, calculate it as necessary.
 79 |         | The calculated result is cached, and retained unless
 80 |           you use :meth:`clear_cache` method.
 81 | 
 82 |         Args:
 83 |             cutoff_distance (float):
 84 |                 It calculates the geometry for the neighboring atoms
 85 |                 within this value of each atom in a cell.
 86 |             geometry_keys (list [str]):
 87 |                 A list of atomic geometries to calculate between an atom
 88 |                 and its neighboring atoms.
 89 | 
 90 |         Returns:
 91 |             Iterator [tuple]: Neighbor information required by
 92 |             ``geometry_keys`` for each atom in a cell.
 93 |         """
 94 |         ret = []
 95 |         for key in geometry_keys:
 96 |             if (cutoff_distance not in self._cache
 97 |                     or key not in self._cache[cutoff_distance]):
 98 |                 self._calculate_neighbors(cutoff_distance)
 99 |             ret.append(self._cache[cutoff_distance][key])
100 |         for neighbor_info in zip(*ret):
101 |             yield neighbor_info
102 | 
103 |     @classmethod
104 |     def read_xyz(cls, file_path):
105 |         """Read .xyz format file and make a list of instances.
106 | 
107 |         Parses .xyz format file using :func:`ase.io.iread` and wraps it
108 |         by this class.
109 | 
110 |         Args:
111 |             file_path (~pathlib.Path):
112 |                 File path to read atomic structures.
113 | 
114 |         Returns:
115 |             list [AtomicStructure]: Initialized instances.
116 |         """
117 |         return [cls(atoms) for atoms
118 |                 in ase.io.iread(str(file_path), index=':', format='xyz')]
119 | 
120 |     def _calculate_neighbors(self, cutoff_distance):
121 |         """Calculate distance to one neighboring atom and store indices
122 |         of neighboring atoms."""
123 |         symbols = self._atoms.get_chemical_symbols()
124 |         elements = sorted(set(symbols))
125 |         atomic_numbers = self._atoms.get_atomic_numbers()
126 |         index_element_map = [elements.index(element) for element in symbols]
127 | 
128 |         i_list, j_list, D_list = ase.neighborlist.neighbor_list(
129 |             'ijD', self._atoms, cutoff_distance)
130 | 
131 |         sort_indices = np.lexsort((j_list, i_list))
132 |         i_list = i_list[sort_indices]
133 |         j_list = j_list[sort_indices]
134 |         D_list = D_list[sort_indices]
135 |         elem_list = np.array([index_element_map[idx] for idx in j_list])
136 | 
137 |         i_indices = np.unique(i_list, return_index=True)[1]
138 |         j_list = np.split(j_list, i_indices[1:])
139 |         distance_vector = [chainer.Variable(r.astype(np.float32))
140 |                            for r in np.split(D_list, i_indices[1:])]
141 |         distance = [F.sqrt(F.sum(r**2, axis=1)) for r in distance_vector]
142 |         cutoff_function = [F.tanh(1.0 - R/cutoff_distance)**3
143 |                            for R in distance]
144 |         elem_list = np.split(elem_list, i_indices[1:])
145 | 
146 |         self._cache[cutoff_distance] = {
147 |             'distance_vector': distance_vector,
148 |             'distance': distance,
149 |             'cutoff_function': cutoff_function,
150 |             'element_indices': [np.searchsorted(elem, range(len(elements)))
151 |                                 for elem in elem_list],
152 |             'j_indices': [np.searchsorted(j, range(len(symbols)))
153 |                           for j in j_list],
154 |             'atomic_number': [
155 |                 np.apply_along_axis(lambda x: atomic_numbers[x], 0, j)
156 |                 for j in j_list],
157 |             }
158 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/dataset_generator.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Deal out datasets as needed."""
 4 | 
 5 | from sklearn.model_selection import KFold
 6 | 
 7 | from hdnnpy.dataset.hdnnp_dataset import HDNNPDataset
 8 | 
 9 | 
10 | class DatasetGenerator(object):
11 |     """Deal out datasets as needed."""
12 |     def __init__(self, *datasets):
13 |         """
14 |         Args:
15 |             *datasets (HDNNPDataset): What you want to unite.
16 |         """
17 |         if not datasets:
18 |             raise ValueError('No dataset are given')
19 |         for dataset in datasets:
20 |             assert isinstance(dataset, HDNNPDataset)
21 |         self._datasets = list(datasets)
22 | 
23 |     def all(self):
24 |         """Pass all datasets an instance have.
25 | 
26 |         Returns:
27 |             list [HDNNPDataset]: All stored datasets.
28 |         """
29 |         return self._datasets
30 | 
31 |     def foreach(self):
32 |         """Pass all datasets an instance have one by one.
33 | 
34 |         Returns:
35 |             Iterator [HDNNPDataset]: a stored dataset object.
36 |         """
37 |         for dataset in self._datasets:
38 |             yield dataset
39 | 
40 |     def holdout(self, ratio):
41 |         """Split each dataset at a certain rate and pass it
42 | 
43 |         Args:
44 |             ratio (float):
45 |                 Specify the rate you want to use as training data.
46 |                 Remains are test data.
47 | 
48 |         Returns:
49 |             list [tuple [HDNNPDataset, HDNNPDataset]]:
50 |             All stored dataset split by specified ratio into training
51 |             and test data.
52 |         """
53 |         split = []
54 |         for dataset in self._datasets:
55 |             s = int(dataset.partial_size * ratio)
56 |             train = dataset.take(slice(None, s, None))
57 |             test = dataset.take(slice(s, None, None))
58 |             assert len(train) > 0
59 |             assert len(test) > 0
60 |             split.append((train, test))
61 |         return split
62 | 
63 |     def kfold(self, kfold):
64 |         """Split each dataset almost equally and pass it for cross
65 |         validation.
66 | 
67 |         Args:
68 |             kfold (int): Number of folds to split dataset.
69 | 
70 |         Returns:
71 |             Iterator [list [tuple [HDNNPDataset, HDNNPDataset]]]:
72 |             All stored dataset split into training and test data.
73 |             It iterates k times while changing parts used for test data.
74 |         """
75 |         kf = KFold(n_splits=kfold)
76 |         kfold_indices = [kf.split(range(dataset.partial_size))
77 |                          for dataset in self._datasets]
78 | 
79 |         for indices in zip(*kfold_indices):
80 |             split = []
81 |             for dataset, (train_idx, test_idx) in zip(self._datasets, indices):
82 |                 train = dataset.take(train_idx)
83 |                 test = dataset.take(test_idx)
84 |                 assert len(train) > 0
85 |                 assert len(test) > 0
86 |                 split.append((train, test))
87 |             yield split
88 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/descriptor/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Descriptor dataset subpackage."""
 4 | 
 5 | __all__ = [
 6 |     'DESCRIPTOR_DATASET',
 7 |     ]
 8 | 
 9 | from hdnnpy.dataset.descriptor.symmetry_function_dataset import (
10 |     SymmetryFunctionDataset)
11 | from hdnnpy.dataset.descriptor.weighted_symmetry_function_dataset import (
12 |     WeightedSymmetryFunctionDataset)
13 | 
14 | DESCRIPTOR_DATASET = {
15 |     SymmetryFunctionDataset.name: SymmetryFunctionDataset,
16 |     WeightedSymmetryFunctionDataset.name: WeightedSymmetryFunctionDataset,
17 |     }
18 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/descriptor/descriptor_dataset_base.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Base class of atomic structure based descriptor dataset.
  4 | 
  5 | If you want to add new descriptor to extend HDNNP, inherits this base
  6 | class.
  7 | """
  8 | 
  9 | from abc import (ABC, abstractmethod)
 10 | 
 11 | import numpy as np
 12 | from tqdm import tqdm
 13 | 
 14 | from hdnnpy.utils import (MPI, pprint, recv_chunk, send_chunk)
 15 | 
 16 | 
 17 | class DescriptorDatasetBase(ABC):
 18 |     """Base class of atomic structure based descriptor dataset."""
 19 |     DESCRIPTORS = []
 20 |     """list [str]: Names of descriptors for each derivative order."""
 21 |     name = None
 22 |     """str: Name of this descriptor class."""
 23 | 
 24 |     def __init__(self, order, structures):
 25 |         """
 26 |         Common instance variables for descriptor datasets are
 27 |         initialized.
 28 | 
 29 |         Args:
 30 |             order (int): Derivative order of descriptor to calculate.
 31 |             structures (list [AtomicStructure]):
 32 |                 Descriptors are calculated for these atomic structures.
 33 |         """
 34 |         self._order = order
 35 |         self._descriptors = self.DESCRIPTORS[: order+1]
 36 |         self._elemental_composition = structures[0].get_chemical_symbols()
 37 |         self._elements = sorted(set(self._elemental_composition))
 38 |         self._length = len(structures)
 39 |         self._slices = [slice(i[0], i[-1]+1)
 40 |                         for i in np.array_split(range(self._length), MPI.size)]
 41 |         self._structures = structures[self._slices[MPI.rank]]
 42 |         self._tag = structures[0].info['tag']
 43 |         self._dataset = []
 44 |         self._feature_keys = []
 45 | 
 46 |     def __getitem__(self, item):
 47 |         """Return descriptor data this instance has.
 48 | 
 49 |         If ``item`` is string, it returns corresponding descriptor.
 50 |         Available keys can be obtained by ``descriptors`` attribute.
 51 |         Otherwise, it returns a list of descriptor sliced by ``item``.
 52 |         """
 53 |         if isinstance(item, str):
 54 |             try:
 55 |                 index = self._descriptors.index(item)
 56 |             except ValueError:
 57 |                 raise KeyError(item) from None
 58 |             return self._dataset[index]
 59 |         else:
 60 |             return [data[item] for data in self._dataset]
 61 | 
 62 |     def __len__(self):
 63 |         """Number of atomic structures given at initialization."""
 64 |         return self._length
 65 | 
 66 |     @property
 67 |     def descriptors(self):
 68 |         """list [str]: Names of descriptors this instance have."""
 69 |         return self._descriptors
 70 | 
 71 |     @property
 72 |     def elemental_composition(self):
 73 |         """list [str]: Elemental composition of atomic structures given
 74 |         at initialization."""
 75 |         return self._elemental_composition
 76 | 
 77 |     @property
 78 |     def elements(self):
 79 |         """list [str]: Elements of atomic structures given at
 80 |         initialization."""
 81 |         return self._elements
 82 | 
 83 |     @property
 84 |     def feature_keys(self):
 85 |         """list [str]: Unique keys of feature dimension."""
 86 |         return self._feature_keys
 87 | 
 88 |     @property
 89 |     def has_data(self):
 90 |         """bool: True if success to load or make dataset,
 91 |         False otherwise."""
 92 |         return len(self._dataset) == self._order + 1
 93 | 
 94 |     @property
 95 |     def n_feature(self):
 96 |         """int: Length of feature dimension."""
 97 |         return len(self._feature_keys)
 98 | 
 99 |     @property
100 |     def order(self):
101 |         """int: Derivative order of descriptor to calculate."""
102 |         return self._order
103 | 
104 |     @property
105 |     def tag(self):
106 |         """str: Unique tag of atomic structures given at
107 |         initialization.
108 | 
109 |         Usually, it is a form like ``<any prefix> <chemical formula>``.
110 |         (ex. ``CrystalGa2N2``)
111 |         """
112 |         return self._tag
113 | 
114 |     def clear(self):
115 |         """Clear up instance variables to initial state."""
116 |         self._dataset.clear()
117 |         self._feature_keys.clear()
118 | 
119 |     def load(self, file_path, verbose=True, remake=False):
120 |         """Load dataset from .npz format file.
121 | 
122 |         Only root MPI process load dataset.
123 | 
124 |         It validates following compatibility between loaded dataset and
125 |         atomic structures given at initialization.
126 | 
127 |             * length of data
128 |             * elemental composition
129 |             * elements
130 |             * tag
131 | 
132 |         It also validates that loaded dataset satisfies requirements.
133 | 
134 |             * feature keys
135 |             * order
136 | 
137 |         Args:
138 |             file_path (~pathlib.Path): File path to load dataset.
139 |             verbose (bool, optional): Print log to stdout.
140 |             remake (bool, optional): If loaded dataset is lacking in
141 |                 any feature key or any descriptor, recalculate dataset
142 |                 from scratch and overwrite it to ``file_path``.
143 |                 Otherwise, it raises ValueError.
144 | 
145 |         Raises:
146 |             AssertionError: If loaded dataset is incompatible with
147 |                 atomic structures given at initialization.
148 |             ValueError: If loaded dataset is lacking in any feature key
149 |                 or any descriptor and ``remake=False``.
150 |         """
151 |         # validate compatibility between my structures and loaded dataset
152 |         ndarray = np.load(file_path)
153 |         assert list(ndarray['elemental_composition']) \
154 |                == self._elemental_composition
155 |         assert list(ndarray['elements']) == self._elements
156 |         assert ndarray['tag'].item() == self._tag
157 |         assert len(ndarray[self._descriptors[0]]) == len(self)
158 | 
159 |         # validate lacking feature keys
160 |         loaded_keys = list(ndarray['feature_keys'])
161 |         lacking_keys = set(self._feature_keys) - set(loaded_keys)
162 |         lacking_descriptors = set(self._descriptors) - set(ndarray)
163 |         if lacking_keys or lacking_descriptors:
164 |             if verbose and lacking_keys:
165 |                 lacking = ('\n'+' '*20).join(sorted(lacking_keys))
166 |                 pprint(f'''
167 |                 Following feature keys are lacked in {file_path}.
168 |                     {lacking}
169 |                 ''')
170 |             if verbose and lacking_descriptors:
171 |                 lacking = ('\n'+' '*20).join(sorted(lacking_descriptors))
172 |                 pprint(f'''
173 |                 Following descriptors are lacked in {file_path}.
174 |                     {lacking}
175 |                 ''')
176 |             if remake:
177 |                 if verbose:
178 |                     pprint('Start to recalculate dataset from scratch.')
179 |                 self.make(verbose=verbose)
180 |                 self.save(file_path, verbose=verbose)
181 |                 return
182 |             else:
183 |                 raise ValueError('Please recalculate dataset from scratch.')
184 | 
185 |         # load dataset as much as needed
186 |         if MPI.rank == 0:
187 |             for i in range(self._order + 1):
188 |                 indices = np.array([loaded_keys.index(key)
189 |                                     for key in self._feature_keys])
190 |                 data = np.take(ndarray[self._descriptors[i]], indices, axis=2)
191 |                 self._dataset.append(data)
192 | 
193 |         if verbose:
194 |             pprint(f'Successfully loaded & made needed {self.name} dataset'
195 |                    f' from {file_path}')
196 | 
197 |     def make(self, verbose=True):
198 |         """Calculate & retain descriptor dataset
199 | 
200 |         | It calculates descriptor dataset by data-parallel using MPI
201 |           communication.
202 |         | The calculated dataset is retained in only root MPI process.
203 | 
204 |         Args:
205 |             verbose (bool, optional): Print log to stdout.
206 |         """
207 |         dataset = []
208 |         for structure in tqdm(self._structures,
209 |                               ascii=True, desc=f'Process #{MPI.rank}',
210 |                               leave=False, position=MPI.rank):
211 |             dataset.append(self.calculate_descriptors(structure))
212 | 
213 |         for data_list in zip(*dataset):
214 |             shape = data_list[0].shape
215 |             send_data = np.stack(data_list)
216 |             del data_list
217 |             if MPI.rank == 0:
218 |                 recv_data = np.empty((self._length, *shape), dtype=np.float32)
219 |                 recv_data[self._slices[0]] = send_data
220 |                 del send_data
221 |                 for i in range(1, MPI.size):
222 |                     recv_data[self._slices[i]] = recv_chunk(source=i)
223 |                 self._dataset.append(recv_data)
224 |             else:
225 |                 send_chunk(send_data, dest=0)
226 |                 del send_data
227 | 
228 |         if verbose:
229 |             pprint(f'Calculated {self.name} dataset.')
230 | 
231 |     def save(self, file_path, verbose=True):
232 |         """Save dataset to .npz format file.
233 | 
234 |         Only root MPI process save dataset.
235 | 
236 |         Args:
237 |             file_path (~pathlib.Path): File path to save dataset.
238 |             verbose (bool, optional): Print log to stdout.
239 | 
240 |         Raises:
241 |             RuntimeError: If this instance do not have any data.
242 |         """
243 |         if not MPI.comm.bcast(self.has_data, root=0):
244 |             raise RuntimeError('''
245 |             Cannot save dataset, since this dataset does not have any data.
246 |             ''')
247 | 
248 |         if MPI.rank == 0:
249 |             data = {descriptor: data for descriptor, data
250 |                     in zip(self._descriptors, self._dataset)}
251 |             info = {
252 |                 'elemental_composition': self._elemental_composition,
253 |                 'elements': self._elements,
254 |                 'feature_keys': self._feature_keys,
255 |                 'tag': self._tag,
256 |                 }
257 |             np.savez(file_path, **data, **info)
258 |         if verbose:
259 |             pprint(f'Successfully saved {self.name} dataset to {file_path}.')
260 | 
261 |     @abstractmethod
262 |     def calculate_descriptors(self, structure):
263 |         """Calculate required descriptors for a structure data.
264 | 
265 |         This is abstract method.
266 |         Subclass of this base class have to override.
267 | 
268 |         Args:
269 |             structure (AtomicStructure):
270 |                 A structure data to calculate descriptors.
271 | 
272 |         Returns:
273 |             list [~numpy.ndarray]: Calculated descriptors.
274 |             The length is the same as ``order`` given at initialization.
275 |         """
276 |         return
277 | 
278 |     @abstractmethod
279 |     def generate_feature_keys(self, *args, **kwargs):
280 |         """Generate feature keys of current state.
281 | 
282 |         This is abstract method.
283 |         Subclass of this base class have to override.
284 | 
285 |         Returns:
286 |             list [str]: Unique keys of feature dimension.
287 |         """
288 |         return
289 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/descriptor/symmetry_function_dataset.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Symmetry function dataset for descriptor of HDNNP."""
  4 | 
  5 | from itertools import combinations_with_replacement
  6 | 
  7 | import chainer
  8 | import chainer.functions as F
  9 | import numpy as np
 10 | 
 11 | from hdnnpy.dataset.descriptor.descriptor_dataset_base import (
 12 |     DescriptorDatasetBase)
 13 | 
 14 | 
 15 | class SymmetryFunctionDataset(DescriptorDatasetBase):
 16 |     """Symmetry function dataset for descriptor of HDNNP."""
 17 |     DESCRIPTORS = ['sym_func', 'derivative', 'second_derivative']
 18 |     """list [str]: Names of descriptors for each derivative order."""
 19 |     name = 'symmetry_function'
 20 |     """str: Name of this descriptor class."""
 21 | 
 22 |     def __init__(self, order, structures, **func_param_map):
 23 |         """
 24 |         It accepts 0 or 2 for ``order``.
 25 | 
 26 |         | Each symmetry function requires following parameters.
 27 |         | Pass parameters you want to use for the dataset as keyword
 28 |           arguments ``func_param_map``.
 29 | 
 30 |         * type1: :math:`R_c`
 31 |         * type2: :math:`R_c, \eta, R_s`
 32 |         * type4: :math:`R_c, \eta, \lambda, \zeta`
 33 | 
 34 |         Args:
 35 |             order (int): passed to super class.
 36 |             structures (list [AtomicStructure]): passed to super class.
 37 |             **func_param_map (list [tuple]):
 38 |                 parameter sets for each type of symmetry function.
 39 | 
 40 |         References:
 41 |             Symmetry function was proposed by Behler *et al.* in
 42 |             `this paper`_ as a descriptor of HDNNP. Please see here for
 43 |             details of each symmetry function.
 44 | 
 45 |         .. _`this paper`:
 46 |             https://onlinelibrary.wiley.com/doi/full/10.1002/qua.24890
 47 |         """
 48 |         assert 0 <= order <= 2
 49 |         assert func_param_map
 50 |         super().__init__(order, structures)
 51 |         self._func_param_map = func_param_map.copy()
 52 |         self._feature_keys = self.generate_feature_keys(self._elements)
 53 | 
 54 |     @property
 55 |     def function_names(self):
 56 |         """list [str]: Names of symmetry functions this instance
 57 |         calculates or has calculated."""
 58 |         return list(self._func_param_map.keys())
 59 | 
 60 |     @property
 61 |     def params(self):
 62 |         """dict [list [tuple]]]: Mapping from symmetry function name to
 63 |         its parameters."""
 64 |         return self._func_param_map
 65 | 
 66 |     def calculate_descriptors(self, structure):
 67 |         """Calculate required descriptors for a structure data.
 68 | 
 69 |         Args:
 70 |             structure (AtomicStructure):
 71 |                 A structure data to calculate descriptors.
 72 | 
 73 |         Returns:
 74 |             list [~numpy.ndarray]: Calculated descriptors.
 75 |             The length is the same as ``order`` given at initialization.
 76 |         """
 77 |         generators = []
 78 |         for name, params_set in self._func_param_map.items():
 79 |             for params in params_set:
 80 |                 generators.append(eval(
 81 |                     f'self._symmetry_function_{name}')(structure, *params))
 82 | 
 83 |         dataset = [np.concatenate([next(gen).data
 84 |                                    for gen in generators]).swapaxes(0, 1)
 85 |                    for _ in range(self._order + 1)]
 86 | 
 87 |         structure.clear_cache()
 88 | 
 89 |         return dataset
 90 | 
 91 |     def generate_feature_keys(self, elements):
 92 |         """Generate feature keys from given elements and parameters.
 93 | 
 94 |         | parameters given at initialization are used.
 95 |         | This method is used to initialize instance and expand feature
 96 |           dimension in
 97 |           :class:`~hdnnpy.dataset.hdnnp_dataset.HDNNPDataset`.
 98 | 
 99 |         Args:
100 |             elements (list [str]): Unique list of elements. It should be
101 |                 sorted alphabetically.
102 | 
103 |         Returns:
104 |             list [str]: Generated feature keys in a format
105 |             like ``<func_name>:<parameters>:<elements>``.
106 |         """
107 |         feature_keys = []
108 |         for function_name, params_set in self._func_param_map.items():
109 |             for params in params_set:
110 |                 param_key = '/'.join(map(str, params))
111 |                 if function_name in ['type1', 'type2']:
112 |                     for element_key in elements:
113 |                         key = ':'.join([function_name, param_key, element_key])
114 |                         feature_keys.append(key)
115 | 
116 |                 elif function_name in ['type4']:
117 |                     for combo in combinations_with_replacement(elements, 2):
118 |                         element_key = '/'.join(combo)
119 |                         key = ':'.join([function_name, param_key, element_key])
120 |                         feature_keys.append(key)
121 |         return feature_keys
122 | 
123 |     def differentiate(func):
124 |         """Decorator function to differentiate symmetry function."""
125 |         def wrapper(self, structure, Rc, *params):
126 |             differentiate_more = self._order > 0
127 |             with chainer.using_config('enable_backprop', differentiate_more):
128 |                 G = func(self, structure, Rc, *params)
129 |                 yield F.stack([F.stack(g) for g in G])
130 | 
131 |             n_atom = len(G[0])
132 |             r = []
133 |             j_indices = []
134 |             for r_, j_idx in structure.get_neighbor_info(
135 |                     Rc, ['distance_vector', 'j_indices']):
136 |                 r.append(r_)
137 |                 j_indices.append(j_idx)
138 | 
139 |             differentiate_more = self._order > 1
140 |             with chainer.using_config('enable_backprop', differentiate_more):
141 |                 dG = []
142 |                 for g in G:
143 |                     with chainer.force_backprop_mode():
144 |                         grad = chainer.grad(
145 |                             g, r, enable_double_backprop=differentiate_more)
146 |                     dg = [F.concat([F.sum(dg_, axis=0) for dg_
147 |                                     in F.split_axis(grad_, j_idx[1:],
148 |                                                     axis=0)],
149 |                                    axis=0)
150 |                           for grad_, j_idx in zip(grad, j_indices)]
151 |                     dG.append(dg)
152 |                 yield F.stack([F.stack(dg) for dg in dG])
153 | 
154 |             differentiate_more = self._order > 2
155 |             with chainer.using_config('enable_backprop', differentiate_more):
156 |                 d2G = []
157 |                 for dg in dG:
158 |                     d2g = []
159 |                     for i in range(3 * n_atom):
160 |                         with chainer.force_backprop_mode():
161 |                             grad = chainer.grad(
162 |                                 [dg_[i] for dg_ in dg], r,
163 |                                 enable_double_backprop=differentiate_more)
164 |                         d2g_ = [F.concat([F.sum(d2g_, axis=0) for d2g_
165 |                                           in F.split_axis(grad_, j_idx[1:],
166 |                                                           axis=0)],
167 |                                          axis=0)
168 |                                 for grad_, j_idx in zip(grad, j_indices)]
169 |                         d2g.append(d2g_)
170 |                     d2G.append(d2g)
171 |                 yield F.stack([F.stack([F.stack(d2g_) for d2g_ in d2g])
172 |                                for d2g in d2G]).transpose(0, 2, 1, 3)
173 | 
174 |         return wrapper
175 | 
176 |     @differentiate
177 |     def _symmetry_function_type1(self, structure, Rc):
178 |         """Symmetry function type1 for specific parameters."""
179 |         G = []
180 |         for fc, element_indices in structure.get_neighbor_info(
181 |                 Rc, ['cutoff_function', 'element_indices']):
182 |             g = fc
183 |             g = [F.sum(g_) for g_
184 |                  in F.split_axis(g, element_indices[1:], axis=0)]
185 |             G.append(g)
186 |         return list(zip(*G))
187 | 
188 |     @differentiate
189 |     def _symmetry_function_type2(self, structure, Rc, eta, Rs):
190 |         """Symmetry function type2 for specific parameters."""
191 |         G = []
192 |         for R, fc, element_indices in structure.get_neighbor_info(
193 |                 Rc, ['distance', 'cutoff_function', 'element_indices']):
194 |             g = F.exp(-eta*(R-Rs)**2) * fc
195 |             g = [F.sum(g_) for g_
196 |                  in F.split_axis(g, element_indices[1:], axis=0)]
197 |             G.append(g)
198 |         return list(zip(*G))
199 | 
200 |     @differentiate
201 |     def _symmetry_function_type4(self, structure, Rc, eta, lambda_, zeta):
202 |         """Symmetry function type4 for specific parameters."""
203 |         G = []
204 |         for r, R, fc, element_indices in structure.get_neighbor_info(
205 |                 Rc, ['distance_vector', 'distance', 'cutoff_function',
206 |                      'element_indices']):
207 |             cos = (r/F.expand_dims(R, axis=1)) @ (r.T/R)
208 |             if zeta == 1:
209 |                 ang = (1.0 + lambda_*cos)
210 |             else:
211 |                 ang = (1.0 + lambda_*cos) ** zeta
212 |             g = (2.0 ** (1-zeta)
213 |                  * ang
214 |                  * F.expand_dims(F.exp(-eta*R**2) * fc, axis=1)
215 |                  * F.expand_dims(F.exp(-eta*R**2) * fc, axis=0))
216 |             triu = np.triu(np.ones_like(cos.data), k=1)
217 |             g = F.where(triu.astype(np.bool), g, triu)
218 |             g = [F.sum(g__)
219 |                  for j, g_
220 |                  in enumerate(F.split_axis(g, element_indices[1:], axis=0))
221 |                  for k, g__
222 |                  in enumerate(F.split_axis(g_, element_indices[1:], axis=1))
223 |                  if j <= k]
224 |             G.append(g)
225 |         return list(zip(*G))
226 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/descriptor/weighted_symmetry_function_dataset.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Weighted symmetry function dataset for descriptor of HDNNP."""
  4 | 
  5 | import chainer
  6 | import chainer.functions as F
  7 | import numpy as np
  8 | 
  9 | from hdnnpy.dataset.descriptor.descriptor_dataset_base import (
 10 |     DescriptorDatasetBase)
 11 | 
 12 | 
 13 | class WeightedSymmetryFunctionDataset(DescriptorDatasetBase):
 14 |     """Weighted symmetry function dataset for descriptor of HDNNP."""
 15 |     DESCRIPTORS = ['sym_func', 'derivative', 'second_derivative']
 16 |     """list [str]: Names of descriptors for each derivative order."""
 17 |     name = 'weighted_symmetry_function'
 18 |     """str: Name of this descriptor class."""
 19 | 
 20 |     def __init__(self, order, structures, **func_param_map):
 21 |         """
 22 |         It accepts 0 or 2 for ``order``.
 23 | 
 24 |         | Each weighted symmetry function requires following parameters.
 25 |         | Pass parameters you want to use for the dataset as keyword
 26 |           arguments ``func_param_map``.
 27 | 
 28 |         * type1: :math:`R_c`
 29 |         * type2: :math:`R_c, \eta, R_s`
 30 |         * type4: :math:`R_c, \eta, \lambda, \zeta`
 31 | 
 32 |         Args:
 33 |             order (int): passed to super class.
 34 |             structures (list [AtomicStructure]): passed to super class.
 35 |             **func_param_map (list [tuple]):
 36 |                 parameter sets for each type of weighted symmetry function.
 37 | 
 38 |         References:
 39 |             Weighted symmetry function was proposed by Gastegger *et al.* in
 40 |             `this paper`_ as a descriptor of HDNNP. Please see here for
 41 |             details of weighted symmetry function.
 42 | 
 43 |         .. _`this paper`:
 44 |             https://doi.org/10.1063/1.5019667
 45 |         """
 46 |         assert 0 <= order <= 2
 47 |         assert func_param_map
 48 |         super().__init__(order, structures)
 49 |         self._func_param_map = func_param_map.copy()
 50 |         self._feature_keys = self.generate_feature_keys(self._elements)
 51 | 
 52 |     @property
 53 |     def function_names(self):
 54 |         """list [str]: Names of weighted symmetry functions this
 55 |         instance calculates or has calculated."""
 56 |         return list(self._func_param_map.keys())
 57 | 
 58 |     @property
 59 |     def params(self):
 60 |         """dict [list [tuple]]]: Mapping from weighted symmetry function
 61 |         name to its parameters."""
 62 |         return self._func_param_map
 63 | 
 64 |     def calculate_descriptors(self, structure):
 65 |         """Calculate required descriptors for a structure data.
 66 | 
 67 |         Args:
 68 |             structure (AtomicStructure):
 69 |                 A structure data to calculate descriptors.
 70 | 
 71 |         Returns:
 72 |             list [~numpy.ndarray]: Calculated descriptors.
 73 |             The length is the same as ``order`` given at initialization.
 74 |         """
 75 |         generators = []
 76 |         for name, params_set in self._func_param_map.items():
 77 |             for params in params_set:
 78 |                 generators.append(eval(
 79 |                     f'self._weighted_symmetry_function_{name}'
 80 |                     )(structure, *params))
 81 | 
 82 |         dataset = [np.stack([next(gen).data
 83 |                              for gen in generators]).swapaxes(0, 1)
 84 |                    for _ in range(self._order + 1)]
 85 | 
 86 |         structure.clear_cache()
 87 | 
 88 |         return dataset
 89 | 
 90 |     def generate_feature_keys(self, _):
 91 |         """Generate feature keys from given elements and parameters.
 92 | 
 93 |         | parameters given at initialization are used.
 94 |         | This method is used to initialize instance and expand feature
 95 |           dimension in
 96 |           :class:`~hdnnpy.dataset.hdnnp_dataset.HDNNPDataset`.
 97 | 
 98 |         Returns:
 99 |             list [str]: Generated feature keys in a format
100 |             like ``<func_name>:<parameters>``.
101 |         """
102 |         feature_keys = []
103 |         for function_name, params_set in self._func_param_map.items():
104 |             for params in params_set:
105 |                 param_key = '/'.join(map(str, params))
106 |                 key = ':'.join([function_name, param_key])
107 |                 feature_keys.append(key)
108 |         return feature_keys
109 | 
110 |     def differentiate(func):
111 |         """Decorator function to differentiate weighted symmetry
112 |         function."""
113 |         def wrapper(self, structure, Rc, *params):
114 |             differentiate_more = self._order > 0
115 |             with chainer.using_config('enable_backprop', differentiate_more):
116 |                 G = func(self, structure, Rc, *params)
117 |                 yield F.stack(G)
118 | 
119 |             n_atom = len(G)
120 |             r = []
121 |             j_indices = []
122 |             for r_, j_idx in structure.get_neighbor_info(
123 |                     Rc, ['distance_vector', 'j_indices']):
124 |                 r.append(r_)
125 |                 j_indices.append(j_idx)
126 | 
127 |             differentiate_more = self._order > 1
128 |             with chainer.using_config('enable_backprop', differentiate_more):
129 |                 with chainer.force_backprop_mode():
130 |                     grad = chainer.grad(
131 |                         G, r, enable_double_backprop=differentiate_more)
132 |                 dG = [F.concat([F.sum(dg_, axis=0) for dg_
133 |                                 in F.split_axis(grad_, j_idx[1:], axis=0)],
134 |                                axis=0)
135 |                       for grad_, j_idx in zip(grad, j_indices)]
136 |                 yield F.stack(dG)
137 | 
138 |             differentiate_more = self._order > 2
139 |             with chainer.using_config('enable_backprop', differentiate_more):
140 |                 d2G = []
141 |                 for i in range(3 * n_atom):
142 |                     with chainer.force_backprop_mode():
143 |                         grad = chainer.grad(
144 |                             [dg[i] for dg in dG], r,
145 |                             enable_double_backprop=differentiate_more)
146 |                     d2g = [F.concat([F.sum(d2g_, axis=0) for d2g_
147 |                                      in F.split_axis(grad_, j_idx[1:],
148 |                                                      axis=0)],
149 |                                     axis=0)
150 |                            for grad_, j_idx in zip(grad, j_indices)]
151 |                     d2G.append(d2g)
152 |                 yield F.stack([F.stack(d2g) for d2g in d2G]).transpose(1, 0, 2)
153 | 
154 |         return wrapper
155 | 
156 |     @differentiate
157 |     def _weighted_symmetry_function_type1(self, structure, Rc):
158 |         """Weighted symmetry function type1 for specific parameters."""
159 |         G = []
160 |         for z, fc in structure.get_neighbor_info(
161 |                 Rc, ['atomic_number', 'cutoff_function']):
162 |             g = z * fc
163 |             G.append(F.sum(g))
164 |         return G
165 | 
166 |     @differentiate
167 |     def _weighted_symmetry_function_type2(self, structure, Rc, eta, Rs):
168 |         """Weighted symmetry function type2 for specific parameters."""
169 |         G = []
170 |         for z, R, fc in structure.get_neighbor_info(
171 |                 Rc, ['atomic_number', 'distance', 'cutoff_function']):
172 |             g = z * F.exp(-eta*(R-Rs)**2) * fc
173 |             G.append(F.sum(g))
174 |         return G
175 | 
176 |     @differentiate
177 |     def _weighted_symmetry_function_type4(
178 |             self, structure, Rc, eta, lambda_, zeta):
179 |         """Weighted symmetry function type4 for specific parameters."""
180 |         G = []
181 |         for z, r, R, fc in structure.get_neighbor_info(
182 |                 Rc, ['atomic_number', 'distance_vector', 'distance',
183 |                      'cutoff_function']):
184 |             cos = (r/F.expand_dims(R, axis=1)) @ (r.T/R)
185 |             if zeta == 1:
186 |                 ang = (1.0 + lambda_*cos)
187 |             else:
188 |                 ang = (1.0 + lambda_*cos) ** zeta
189 |             g = (2.0 ** (1-zeta)
190 |                  * z[:, None] * z[None, :]
191 |                  * ang
192 |                  * F.expand_dims(F.exp(-eta*R**2) * fc, axis=1)
193 |                  * F.expand_dims(F.exp(-eta*R**2) * fc, axis=0))
194 |             triu = np.triu(np.ones_like(cos.data), k=1)
195 |             g = F.where(triu.astype(np.bool), g, triu)
196 |             G.append(F.sum(g))
197 |         return G
198 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/hdnnp_dataset.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Combine and preprocess descriptor and property dataset."""
  4 | 
  5 | import numpy as np
  6 | 
  7 | from hdnnpy.utils import (MPI, recv_chunk, send_chunk)
  8 | 
  9 | 
 10 | RANDOMSTATE = np.random.get_state()
 11 | 
 12 | 
 13 | class HDNNPDataset(object):
 14 |     """Combine and preprocess descriptor and property dataset."""
 15 |     def __init__(self, descriptor, property_, dataset=None):
 16 |         """
 17 |         | It is desirable that the type of descriptor and property used
 18 |           for HDNNP is fixed at initialization.
 19 |         | Also, an instance itself does not have any dataset at
 20 |           initialization and you need to execute :meth:`construct`.
 21 |         | If ``dataset`` is given it will be an instance's own dataset.
 22 | 
 23 |         Args:
 24 |             descriptor (DescriptorDatasetBase):
 25 |                 Descriptor instance you want to use as HDNNP input.
 26 |             property\_ (PropertyDatasetBase):
 27 |                 Property instance you want to use as HDNNP label.
 28 |             dataset (dict [~numpy.ndarray], optional):
 29 |                 If specified, dataset will be initialized with this.
 30 |         """
 31 |         if dataset is None:
 32 |             dataset = {}
 33 |         self._descriptor = descriptor
 34 |         self._property = property_
 35 |         self._dataset = dataset.copy()
 36 | 
 37 |     def __getitem__(self, item):
 38 |         """Return indexed or sliced dataset as dict data."""
 39 |         batches = {key: data[item]
 40 |                    for key, data in self._dataset.items()}
 41 |         if isinstance(item, slice):
 42 |             length = len(list(batches.values())[0])
 43 |             return [{key: batch[i] for key, batch in batches.items()}
 44 |                     for i in range(length)]
 45 |         else:
 46 |             return batches
 47 | 
 48 |     def __len__(self):
 49 |         """Redicect to :attr:`partial_size`"""
 50 |         return self.partial_size
 51 | 
 52 |     @property
 53 |     def descriptor(self):
 54 |         """DescriptorDatasetBase: Descriptor dataset instance."""
 55 |         return self._descriptor
 56 | 
 57 |     @property
 58 |     def elemental_composition(self):
 59 |         """list [str]: Elemental composition of the dataset."""
 60 |         return self._descriptor.elemental_composition
 61 | 
 62 |     @property
 63 |     def elements(self):
 64 |         """list [str]: Elements of the dataset."""
 65 |         return self._descriptor.elements
 66 | 
 67 |     @property
 68 |     def n_input(self):
 69 |         """int: Number of dimensions of input data."""
 70 |         if 'inputs/0' in self._dataset:
 71 |             return self._dataset['inputs/0'].shape[-1]
 72 |         else:
 73 |             return self._descriptor.n_feature
 74 | 
 75 |     @property
 76 |     def n_label(self):
 77 |         """int: Number of dimensions of label data."""
 78 |         if 'labels/0' in self._dataset:
 79 |             return self._dataset['labels/0'].shape[-1]
 80 |         else:
 81 |             return self._property.n_property
 82 | 
 83 |     @property
 84 |     def partial_size(self):
 85 |         """int: Number of data after scattered by MPI communication."""
 86 |         return len(list(self._dataset.values())[0])
 87 | 
 88 |     @property
 89 |     def tag(self):
 90 |         """str: Unique tag of the dataset.
 91 | 
 92 |         Usually, it is a form like ``<any prefix> <chemical formula>``.
 93 |         (ex. ``CrystalGa2N2``)
 94 |         """
 95 |         return self._descriptor.tag
 96 | 
 97 |     @property
 98 |     def total_size(self):
 99 |         """int: Number of data before scattered by MPI communication."""
100 |         return len(self._descriptor)
101 | 
102 |     @property
103 |     def property(self):
104 |         """PropertyDatasetBase: Property dataset instance."""
105 |         return self._property
106 | 
107 |     def construct(self, all_elements=None, preprocesses=None,
108 |                   shuffle=True, verbose=True):
109 |         """Construct an instance's own dataset.
110 | 
111 |         This method does following steps:
112 | 
113 |         * Check compatibility between descriptor and property datasets.
114 |         * Expand feature dimension of descriptor dataset according to
115 |           ``all_elements`` and pre-process descriptor dataset in a
116 |           given order and add to its own dataset.
117 |         * Add property dataset to its own dataset.
118 |         * Clear up the original data in descriptor and property dataset.
119 |         * Shuffle the order of the data.
120 | 
121 |         Args:
122 |             all_elements (list [str], optional):
123 |                 If specified, it expands feature dimensions of
124 |                 descriptor dataset according to this.
125 |             preprocesses (list [PreprocessBase], optional):
126 |                 If specified, it pre-processes descriptor dataset in a
127 |                 given order.
128 |             shuffle (bool, optional):
129 |                 If specified, it shuffles the order of the data.
130 |             verbose (bool, optional):
131 |                 Print log to stdout.
132 | 
133 |         Raises:
134 |             AssertionError:
135 |                 If descriptor and property datasets are incompatible.
136 |         """
137 |         if preprocesses is None:
138 |             preprocesses = []
139 | 
140 |         # check compatibility between descriptor and property datasets
141 |         assert len(self._descriptor) == len(self._property)
142 |         assert self._descriptor.elemental_composition \
143 |                == self._property.elemental_composition
144 |         assert self._descriptor.elements == self._property.elements
145 |         assert self._descriptor.tag == self._property.tag
146 | 
147 |         # add descriptor dataset and delete original data
148 |         if self._descriptor.has_data:
149 |             inputs = [self._descriptor[key]
150 |                       for key in self._descriptor.descriptors]
151 |             # expand along to feature dimension
152 |             if all_elements != self._descriptor.elements:
153 |                 old_feature_keys = self._descriptor.feature_keys
154 |                 new_feature_keys = (
155 |                     self._descriptor.generate_feature_keys(all_elements))
156 |                 inputs = self._expand_feature_dims(
157 |                     inputs, old_feature_keys, new_feature_keys)
158 |             # pre-process descriptor dataset
159 |             for preprocess in preprocesses:
160 |                 inputs = preprocess.apply(
161 |                     inputs, self.elemental_composition, verbose=verbose)
162 |             self._dataset.update(
163 |                 {f'inputs/{i}': data for i, data in enumerate(inputs)})
164 |             self._descriptor.clear()
165 | 
166 |         # add property dataset and delete original data
167 |         if self._property.has_data:
168 |             labels = [self._property[key] for key in self._property.properties]
169 |             self._dataset.update(
170 |                 {f'labels/{i}': data for i, data in enumerate(labels)})
171 |             self._property.clear()
172 | 
173 |         # shuffle dataset
174 |         if shuffle:
175 |             self._shuffle()
176 | 
177 |     def scatter(self, max_buf_len=256 * 1024 * 1024):
178 |         """Scatter dataset by MPI communication.
179 | 
180 |         Each instance is re-initialized with received dataset.
181 | 
182 |         Args:
183 |             max_buf_len (int, optional):
184 |                 Each data is divided into chunks of this size at
185 |                 maximum.
186 |         """
187 |         if MPI.rank == 0:
188 |             new_dataset = {}
189 |             MPI.comm.bcast(len(self._dataset), root=0)
190 |             while self._dataset:
191 |                 key, data = self._dataset.popitem()
192 |                 n_total = self.total_size
193 |                 n_sub = -(-n_total // MPI.size)
194 |                 for i in range(MPI.size):
195 |                     s = n_total*i//MPI.size
196 |                     e = n_total*i//MPI.size + n_sub
197 |                     if i == 0:
198 |                         new_dataset[key] = data[s:e]
199 |                     else:
200 |                         MPI.comm.send(key, dest=i)
201 |                         send_chunk(data[s:e], dest=i, max_buf_len=max_buf_len)
202 |             self._dataset.update(new_dataset)
203 | 
204 |         else:
205 |             self._dataset.clear()
206 |             n_data = MPI.comm.bcast(None, root=0)
207 |             for i in range(n_data):
208 |                 key = MPI.comm.recv(source=0)
209 |                 recv_data = recv_chunk(source=0, max_buf_len=max_buf_len)
210 |                 self._dataset[key] = recv_data
211 | 
212 |     def take(self, index):
213 |         """Return copied object that has sliced dataset.
214 | 
215 |         Args:
216 |             index (int or slice):
217 |                 Copied object has dataset indexed or sliced by this.
218 |         """
219 |         dataset = {key: data[index] for key, data in self._dataset.items()}
220 |         new_dataset = self.__class__(self._descriptor, self._property, dataset)
221 |         return new_dataset
222 | 
223 |     @staticmethod
224 |     def _expand_feature_dims(inputs, old_feature_keys, new_feature_keys):
225 |         """Expand feature dimension of input dataset according to
226 |         ``all_elements``."""
227 |         n_pad = len(new_feature_keys) - len(old_feature_keys)
228 |         idx_pad = len(old_feature_keys)
229 |         sort_indices = []
230 |         for key in new_feature_keys:
231 |             if key in old_feature_keys:
232 |                 sort_indices.append(old_feature_keys.index(key))
233 |             else:
234 |                 sort_indices.append(idx_pad)
235 |                 idx_pad += 1
236 |         sort_indices = np.array(sort_indices)
237 | 
238 |         for i, data in enumerate(inputs):
239 |             pad_width = [(0, n_pad) if i == 2 else (0, 0)
240 |                          for i in range(data.ndim)]
241 |             data = np.pad(data, pad_width, 'constant')
242 |             inputs[i] = data[:, :, sort_indices]
243 |         return inputs
244 | 
245 |     def _shuffle(self):
246 |         """Shuffle the order of the data."""
247 |         for data in self._dataset.values():
248 |             np.random.set_state(RANDOMSTATE)
249 |             np.random.shuffle(data)
250 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/property/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Property dataset subpackage."""
 4 | 
 5 | __all__ = [
 6 |     'PROPERTY_DATASET',
 7 |     ]
 8 | 
 9 | from hdnnpy.dataset.property.interatomic_potential_dataset import (
10 |     InteratomicPotentialDataset)
11 | 
12 | PROPERTY_DATASET = {
13 |     InteratomicPotentialDataset.name: InteratomicPotentialDataset,
14 |     }
15 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/property/interatomic_potential_dataset.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Interatomic potential dataset for property of HDNNP. """
 4 | 
 5 | import numpy as np
 6 | 
 7 | from hdnnpy.dataset.property.property_dataset_base import PropertyDatasetBase
 8 | 
 9 | 
10 | class InteratomicPotentialDataset(PropertyDatasetBase):
11 |     """Interatomic potential dataset for property of HDNNP. """
12 |     PROPERTIES = ['energy', 'force', 'harmonic', 'third_order']
13 |     """list [str]: Names of properties for each derivative order."""
14 |     COEFFICIENTS = [1.0, -1.0, 1.0, 1.0]
15 |     """list [float]: Coefficient values of each properties."""
16 |     UNITS = ['eV/atom', 'eV/$\\AA$', 'eV/$\\AA$^2', 'eV/$\\AA$^3']
17 |     """list [str]: Units of properties for each derivative order."""
18 |     name = 'interatomic_potential'
19 |     """str: Name of this property class."""
20 |     n_property = 1
21 |     """int: Number of dimensions of 0th property."""
22 | 
23 |     def __init__(self, order, structures):
24 |         """
25 |         It accepts 0 or 3 for ``order``.
26 | 
27 |         Notes:
28 |             Currently you cannot use order = 2 or 3, since it is not
29 |             implemented.
30 | 
31 |         Args:
32 |             order (int): passed to super class.
33 |             structures (list [AtomicStructure]): passed to super class.
34 |         """
35 |         assert 0 <= order <= 3
36 |         super().__init__(order, structures)
37 | 
38 |     def calculate_properties(self, structure):
39 |         """Calculate required properties for a structure data.
40 | 
41 |         Args:
42 |             structure (AtomicStructure):
43 |                 A structure data to calculate properties.
44 | 
45 |         Returns:
46 |             list [~numpy.ndarray]: Calculated properties.
47 |             The length is the same as ``order`` given at initialization.
48 |         """
49 |         n_deriv = len(structure) * 3
50 |         dataset = []
51 |         if self._order >= 0:
52 |             energy = (self._calculate_energy(structure)
53 |                       .astype(np.float32)
54 |                       .reshape(self.n_property))
55 |             dataset.append(energy)
56 |         if self._order >= 1:
57 |             force = (self._calculate_force(structure)
58 |                      .astype(np.float32)
59 |                      .reshape(self.n_property, n_deriv))
60 |             dataset.append(force)
61 |         if self._order >= 2:
62 |             harmonic = (self._calculate_harmonic(structure)
63 |                         .astype(np.float32)
64 |                         .reshape(self.n_property, n_deriv, n_deriv))
65 |             dataset.append(harmonic)
66 |         if self._order >= 3:
67 |             third_order = (self._calculate_third_order(structure)
68 |                            .astype(np.float32)
69 |                            .reshape(self.n_property, n_deriv,
70 |                                     n_deriv, n_deriv))
71 |             dataset.append(third_order)
72 |         return dataset
73 | 
74 |     @staticmethod
75 |     def _calculate_energy(structure):
76 |         """Calculate atomic energy."""
77 |         return structure.get_potential_energy() / len(structure)
78 | 
79 |     @staticmethod
80 |     def _calculate_force(structure):
81 |         """Calculate interatomic forces."""
82 |         return structure.get_forces()
83 | 
84 |     @staticmethod
85 |     def _calculate_harmonic(structure):
86 |         """Calculate 2nd-order harmonic force constant."""
87 |         raise NotImplementedError
88 | 
89 |     @staticmethod
90 |     def _calculate_third_order(structure):
91 |         """Calculate 3rd-order anharmonic force constant."""
92 |         raise NotImplementedError
93 | 


--------------------------------------------------------------------------------
/hdnnpy/dataset/property/property_dataset_base.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Base class of atomic structure based property dataset.
  4 | 
  5 | If you want to add new property to extend HDNNP, inherits this base
  6 | class.
  7 | """
  8 | 
  9 | from abc import (ABC, abstractmethod)
 10 | 
 11 | import numpy as np
 12 | from tqdm import tqdm
 13 | 
 14 | from hdnnpy.utils import (MPI, pprint, recv_chunk, send_chunk)
 15 | 
 16 | 
 17 | class PropertyDatasetBase(ABC):
 18 |     """Base class of atomic structure based property dataset."""
 19 |     PROPERTIES = []
 20 |     """list [str]: Names of properties for each derivative order."""
 21 |     COEFFICIENTS = []
 22 |     """list [float]: Coefficient values of each properties."""
 23 |     UNITS = []
 24 |     """list [str]: Units of properties for each derivative order."""
 25 |     name = None
 26 |     """str: Name of this property class."""
 27 |     n_property = None
 28 |     """int: Number of dimensions of 0th property."""
 29 | 
 30 |     def __init__(self, order, structures):
 31 |         """
 32 |         Common instance variables for property datasets are initialized.
 33 | 
 34 |         Args:
 35 |             order (int): Derivative order of property to calculate.
 36 |             structures (list [AtomicStructure]):
 37 |                 Properties are calculated for these atomic structures.
 38 |         """
 39 |         self._order = order
 40 |         self._properties = self.PROPERTIES[: order+1]
 41 |         self._elemental_composition = structures[0].get_chemical_symbols()
 42 |         self._elements = sorted(set(self._elemental_composition))
 43 |         self._length = len(structures)
 44 |         self._slices = [slice(i[0], i[-1]+1)
 45 |                         for i in np.array_split(range(self._length), MPI.size)]
 46 |         self._structures = structures[self._slices[MPI.rank]]
 47 |         self._tag = structures[0].info['tag']
 48 |         self._coefficients = self.COEFFICIENTS[: order+1]
 49 |         self._units = self.UNITS[: order+1]
 50 |         self._dataset = []
 51 | 
 52 |     def __getitem__(self, item):
 53 |         """Return property data this instance has.
 54 | 
 55 |         If ``item`` is string, it returns corresponding property.
 56 |         Available keys can be obtained by ``properties`` attribute.
 57 |         Otherwise, it returns a list of property sliced by ``item``.
 58 |         """
 59 |         if isinstance(item, str):
 60 |             try:
 61 |                 index = self._properties.index(item)
 62 |             except ValueError:
 63 |                 raise KeyError(item) from None
 64 |             return self._dataset[index]
 65 |         else:
 66 |             return [data[item] for data in self._dataset]
 67 | 
 68 |     def __len__(self):
 69 |         """Number of atomic structures given at initialization."""
 70 |         return self._length
 71 | 
 72 |     @property
 73 |     def coefficients(self):
 74 |         """list [float]: Coefficient values this instance have."""
 75 |         return self._coefficients
 76 | 
 77 |     @property
 78 |     def elemental_composition(self):
 79 |         """list [str]: Elemental composition of atomic structures given
 80 |         at initialization."""
 81 |         return self._elemental_composition
 82 | 
 83 |     @property
 84 |     def elements(self):
 85 |         """list [str]: Elements of atomic structures given at
 86 |         initialization."""
 87 |         return self._elements
 88 | 
 89 |     @property
 90 |     def has_data(self):
 91 |         """bool: True if success to load or make dataset,
 92 |         False otherwise."""
 93 |         return len(self._dataset) == self._order + 1
 94 | 
 95 |     @property
 96 |     def order(self):
 97 |         """int: Derivative order of property to calculate."""
 98 |         return self._order
 99 | 
100 |     @property
101 |     def properties(self):
102 |         """list [str]: Names of properties this instance have."""
103 |         return self._properties
104 | 
105 |     @property
106 |     def tag(self):
107 |         """str: Unique tag of atomic structures given at
108 |         initialization.
109 | 
110 |         Usually, it is a form like ``<any prefix> <chemical formula>``.
111 |         (ex. ``CrystalGa2N2``)
112 |         """
113 |         return self._tag
114 | 
115 |     @property
116 |     def units(self):
117 |         """list [str]: Units of properties this instance have."""
118 |         return self._units
119 | 
120 |     def clear(self):
121 |         """Clear up instance variables to initial state."""
122 |         self._dataset.clear()
123 | 
124 |     def load(self, file_path, verbose=True, remake=False):
125 |         """Load dataset from .npz format file.
126 | 
127 |         Only root MPI process load dataset.
128 | 
129 |         It validates following compatibility between loaded dataset and
130 |         atomic structures given at initialization.
131 | 
132 |             * length of data
133 |             * elemental composition
134 |             * elements
135 |             * tag
136 | 
137 |         It also validates that loaded dataset satisfies requirements.
138 | 
139 |             * order
140 | 
141 |         Args:
142 |             file_path (~pathlib.Path): File path to load dataset.
143 |             verbose (bool, optional): Print log to stdout.
144 |             remake (bool, optional): If loaded dataset is lacking in
145 |                 any property, recalculate dataset from scratch and
146 |                 overwrite it to ``file_path``. Otherwise, it raises
147 |                 ValueError.
148 | 
149 |         Raises:
150 |             AssertionError: If loaded dataset is incompatible with
151 |                 atomic structures given at initialization.
152 |             ValueError: If loaded dataset is lacking in any property and
153 |                 ``remake=False``.
154 |         """
155 |         # validate compatibility between my structures and loaded dataset
156 |         ndarray = np.load(file_path)
157 |         assert list(ndarray['elemental_composition']) \
158 |                == self._elemental_composition
159 |         assert list(ndarray['elements']) == self._elements
160 |         assert ndarray['tag'].item() == self._tag
161 |         assert len(ndarray[self._properties[0]]) == len(self)
162 | 
163 |         # validate lacking properties
164 |         lacking_properties = set(self._properties) - set(ndarray)
165 |         if lacking_properties:
166 |             if verbose:
167 |                 lacking = ('\n'+' '*20).join(sorted(lacking_properties))
168 |                 pprint(f'''
169 |                 Following properties are lacked in {file_path}.
170 |                     {lacking}
171 |                 ''')
172 |             if remake:
173 |                 if verbose:
174 |                     pprint('Start to recalculate dataset from scratch.')
175 |                 self.make(verbose=verbose)
176 |                 self.save(file_path, verbose=verbose)
177 |                 return
178 |             else:
179 |                 raise ValueError('Please recalculate dataset from scratch.')
180 | 
181 |         # load dataset as much as needed
182 |         if MPI.rank == 0:
183 |             for i in range(self._order + 1):
184 |                 self._dataset.append(ndarray[self._properties[i]])
185 | 
186 |         if verbose:
187 |             pprint(f'Successfully loaded & made needed {self.name} dataset'
188 |                    f' from {file_path}')
189 | 
190 |     def make(self, verbose=True):
191 |         """Calculate & retain property dataset
192 | 
193 |         | It calculates property dataset by data-parallel using MPI
194 |           communication.
195 |         | The calculated dataset is retained in only root MPI process.
196 | 
197 |         Each property values are divided by ``COEFFICIENTS`` which is
198 |         unique to each property dataset class.
199 | 
200 |         Args:
201 |             verbose (bool, optional): Print log to stdout.
202 |         """
203 |         dataset = []
204 |         for structure in tqdm(self._structures,
205 |                               ascii=True, desc=f'Process #{MPI.rank}',
206 |                               leave=False, position=MPI.rank):
207 |             dataset.append(self.calculate_properties(structure))
208 | 
209 |         for data_list, coefficient in zip(zip(*dataset), self._coefficients):
210 |             shape = data_list[0].shape
211 |             send_data = np.stack(data_list) / coefficient
212 |             del data_list
213 |             if MPI.rank == 0:
214 |                 recv_data = np.empty((self._length, *shape), dtype=np.float32)
215 |                 recv_data[self._slices[0]] = send_data
216 |                 del send_data
217 |                 for i in range(1, MPI.size):
218 |                     recv_data[self._slices[i]] = recv_chunk(source=i)
219 |                 self._dataset.append(recv_data)
220 |             else:
221 |                 send_chunk(send_data, dest=0)
222 |                 del send_data
223 | 
224 |         if verbose:
225 |             pprint(f'Calculated {self.name} dataset.')
226 | 
227 |     def save(self, file_path, verbose=True):
228 |         """Save dataset to .npz format file.
229 | 
230 |         Only root MPI process save dataset.
231 | 
232 |         Args:
233 |             file_path (~pathlib.Path): File path to save dataset.
234 |             verbose (bool, optional): Print log to stdout.
235 | 
236 |         Raises:
237 |             RuntimeError: If this instance do not have any data.
238 |         """
239 |         if not MPI.comm.bcast(self.has_data, root=0):
240 |             raise RuntimeError('''
241 |             Cannot save dataset, since this dataset does not have any data.
242 |             ''')
243 | 
244 |         if MPI.rank == 0:
245 |             data = {property_: data for property_, data
246 |                     in zip(self._properties, self._dataset)}
247 |             info = {
248 |                 'elemental_composition': self._elemental_composition,
249 |                 'elements': self._elements,
250 |                 'tag': self._tag,
251 |                 }
252 |             np.savez(file_path, **data, **info)
253 |         if verbose:
254 |             pprint(f'Successfully saved {self.name} dataset to {file_path}.')
255 | 
256 |     @abstractmethod
257 |     def calculate_properties(self, structure):
258 |         """Calculate required properties for a structure data.
259 | 
260 |         This is abstract method.
261 |         Subclass of this base class have to override.
262 | 
263 |         Args:
264 |             structure (AtomicStructure):
265 |                 A structure data to calculate properties.
266 | 
267 |         Returns:
268 |             list [~numpy.ndarray]: Calculated properties.
269 |             The length is the same as ``order`` given at initialization.
270 |         """
271 |         return
272 | 


--------------------------------------------------------------------------------
/hdnnpy/format/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """File format support subpackage."""
 4 | 
 5 | __all__ = [
 6 |     'parse_xyz',
 7 |     ]
 8 | 
 9 | from hdnnpy.format.xyz import parse_xyz
10 | 


--------------------------------------------------------------------------------
/hdnnpy/format/xyz.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Functions to handle xyz file format."""
 4 | 
 5 | from pathlib import Path
 6 | from tempfile import NamedTemporaryFile
 7 | 
 8 | import ase.io
 9 | 
10 | from hdnnpy.utils import pprint
11 | 
12 | 
13 | def parse_xyz(file_path, save=True, verbose=True):
14 |     """Parse a xyz format file and bunch structures by the same tag.
15 | 
16 |     Args:
17 |         file_path (~pathlib.Path): File path to parse.
18 |         save (bool, optional):
19 |             If True, save the structures bunched by the same tag into
20 |             files. Otherwise, save into temporarily files.
21 |         verbose (bool, optional): Print log to stdout.
22 | 
23 |     Returns:
24 |         tuple: 2-element tuple containing:
25 | 
26 |         - tag_xyz_map (dict): Tag to file path mapping.
27 |         - elements (list [str]):
28 |             All elements contained in the parsed file.
29 |     """
30 |     tag_xyz_map = {}
31 |     elements = set()
32 | 
33 |     info_file = file_path.with_name(f'{file_path.name}.dat')
34 |     if info_file.exists():
35 |         elements, *tags = info_file.read_text().strip().split('\n')
36 |         elements = set(elements.split())
37 |         for tag in tags:
38 |             tag_xyz_map[tag] = (Path(file_path.with_name(tag))
39 |                                 / 'structure.xyz')
40 |     else:
41 |         for atoms in ase.io.iread(str(file_path), index=':', format='xyz'):
42 |             tag = atoms.info['tag']
43 |             try:
44 |                 xyz_path = tag_xyz_map[tag]
45 |             except KeyError:
46 |                 if save:
47 |                     file_path.with_name(tag).mkdir(parents=True, exist_ok=True)
48 |                     xyz_path = file_path.with_name(tag)/'structure.xyz'
49 |                     if verbose:
50 |                         pprint(f'Sub dataset tagged as "{tag}" is saved to'
51 |                                f' {xyz_path}.')
52 | 
53 |                 else:
54 |                     xyz_path = Path(NamedTemporaryFile('w', delete=False).name)
55 |                     if verbose:
56 |                         pprint(f'Sub dataset tagged as "{tag}" is temporarily'
57 |                                f' saved to {xyz_path}.\n'
58 |                                'If ABEND and this file remains, delete it'
59 |                                ' manually.')
60 |                 tag_xyz_map[tag] = xyz_path
61 |             ase.io.write(str(xyz_path), atoms, format='xyz', append=True)
62 |             elements.update(atoms.get_chemical_symbols())
63 |         if save:
64 |             info_file.write_text(' '.join(sorted(elements)) + '\n'
65 |                                  + '\n'.join(sorted(tag_xyz_map)) + '\n')
66 | 
67 |     return tag_xyz_map, sorted(elements)
68 | 


--------------------------------------------------------------------------------
/hdnnpy/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Neural network structure models subpackage."""
 4 | 
 5 | __all__ = [
 6 |     'HighDimensionalNNP',
 7 |     'MasterNNP',
 8 |     ]
 9 | 
10 | from hdnnpy.model.models import (HighDimensionalNNP, MasterNNP)
11 | 


--------------------------------------------------------------------------------
/hdnnpy/model/models.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Neural network potential models."""
  4 | 
  5 | import chainer
  6 | import chainer.functions as F
  7 | import chainer.links as L
  8 | from chainer import Variable
  9 | 
 10 | 
 11 | class HighDimensionalNNP(chainer.ChainList):
 12 |     """High dimensional neural network potential.
 13 | 
 14 |     This is one implementation of HDNNP that is proposed by Behler
 15 |     *et al* [Ref]_.
 16 |     It has a structure in which simple neural networks are arranged in
 17 |     parallel.
 18 |     Each neural network corresponds to one atom and inputs descriptor
 19 |     and outputs property per atom.
 20 |     Total value or property is predicted to sum them up.
 21 |     """
 22 |     def __init__(self, elemental_composition, *args):
 23 |         """
 24 |         Args:
 25 |             elemental_composition (list [str]):
 26 |                 Create the same number of :class:`SubNNP` instances as
 27 |                 this. A :class:`SubNNP` with the same element has the
 28 |                 same parameters synchronized.
 29 |             *args: Positional arguments that is passed to `SubNNP`.
 30 |         """
 31 |         super().__init__(
 32 |             *[SubNNP(element, *args) for element in elemental_composition])
 33 | 
 34 |     def predict(self, inputs, order):
 35 |         """Get prediction from input data in a feed-forward way.
 36 | 
 37 |         It accepts 0 or 2 for ``order``.
 38 | 
 39 |         Notes:
 40 |             0th-order predicted value is not total value, but per-atom
 41 |             value.
 42 | 
 43 |         Args:
 44 |             inputs (list [~numpy.ndarray]):
 45 |                 Length have to equal to ``order + 1``. Each element is
 46 |                 correspond to ``0th-order``, ``1st-order``, ...
 47 |             order (int):
 48 |                 Derivative order of prediction by this model.
 49 | 
 50 |         Returns:
 51 |             list [~chainer.Variable]:
 52 |                 Predicted values. Each elements is correspond to
 53 |                 ``0th-order``, ``1st-order``, ...
 54 |         """
 55 |         assert 0 <= order <= 2
 56 |         input_variables = [[Variable(x) for x in data.swapaxes(0, 1)]
 57 |                            for data in inputs]
 58 |         for nnp in self:
 59 |             nnp.results.clear()
 60 | 
 61 |         xs = input_variables.pop(0)
 62 |         with chainer.force_backprop_mode():
 63 |             y_pred = self._predict_y(xs)
 64 |         if order == 0:
 65 |             return [y_pred]
 66 | 
 67 |         dxs = input_variables.pop(0)
 68 |         differentiate_more = chainer.config.train or order > 1
 69 |         with chainer.force_backprop_mode():
 70 |             dy_pred = self._predict_dy(xs, dxs, differentiate_more)
 71 |         if order == 1:
 72 |             return [y_pred, dy_pred]
 73 | 
 74 |         d2xs = input_variables.pop(0)
 75 |         differentiate_more = chainer.config.train or order > 2
 76 |         with chainer.force_backprop_mode():
 77 |             d2y_pred = self._predict_d2y(xs, dxs, d2xs, differentiate_more)
 78 |         if order == 2:
 79 |             return [y_pred, dy_pred, d2y_pred]
 80 | 
 81 |     def get_by_element(self, element):
 82 |         """Get all `SubNNP` instances that represent the same element.
 83 | 
 84 |         Args:
 85 |             element (str): Element symbol that you want to get.
 86 | 
 87 |         Returns:
 88 |             list [SubNNP]:
 89 |                 All `SubNNP` instances which represent the same
 90 |                 ``element`` in this HDNNP instance.
 91 |         """
 92 |         return [nnp for nnp in self if nnp.element == element]
 93 | 
 94 |     def reduce_grad_to(self, master_nnp):
 95 |         """Collect calculated gradient of parameters into `MasterNNP`
 96 |         for each element.
 97 | 
 98 |         Args:
 99 |             master_nnp (MasterNNP):
100 |                 `MasterNNP` instance where you manage parameters.
101 |         """
102 |         for master in master_nnp.children():
103 |             for nnp in self.get_by_element(master.element):
104 |                 master.addgrads(nnp)
105 | 
106 |     def sync_param_with(self, master_nnp):
107 |         """Synchronize the parameters with `MasterNNP` for each element.
108 | 
109 |         Args:
110 |             master_nnp (MasterNNP):
111 |                 `MasterNNP` instance where you manage parameters.
112 |         """
113 |         for master in master_nnp.children():
114 |             for nnp in self.get_by_element(master.element):
115 |                 nnp.copyparams(master)
116 | 
117 |     def _predict_y(self, xs):
118 |         """Calculate 0th-order prediction for each `SubNNP`.
119 | 
120 |         Args:
121 |             xs (list [~chainer.Variable]):
122 |                 Input data for each `SubNNP` constituting this HDNNP
123 |                 instance. The shape of data is
124 |                 ``n_atom x (n_sample, n_input)``.
125 | 
126 |         Returns:
127 |             ~chainer.Variable:
128 |                 Output data (per atom) for each `SubNNP` constituting
129 |                 this HDNNP instance. The shape of data is
130 |                 ``(n_sample, n_output)``.
131 |         """
132 |         for nnp, x in zip(self, xs):
133 |             nnp.feedforward(x)
134 |         return sum([nnp.results['y'] for nnp in self]) / len(self)
135 | 
136 |     def _predict_dy(self, xs, dxs, differentiate_more):
137 |         """Calculate 1st-order prediction for each `SubNNP`.
138 | 
139 |         Args:
140 |             xs (list [~chainer.Variable]):
141 |                 Input data for each `SubNNP` constituting this HDNNP
142 |                 instance. The shape of data is
143 |                 ``n_atom x (n_sample, n_input)``.
144 |             dxs (list [~chainer.Variable]):
145 |                 Differentiated input data. The shape of data is
146 |                 ``n_atom x (n_sample, n_input, n_deriv)``.
147 |             differentiate_more (bool):
148 |                 If True, more deep calculation graph will be created for
149 |                 back-propagation or higher-order differentiation.
150 | 
151 |         Returns:
152 |             ~chainer.Variable:
153 |                 Differentiated output data. The shape of data is
154 |                 ``(n_sample, n_output, n_deriv)``.
155 |         """
156 |         for nnp, x in zip(self, xs):
157 |             nnp.differentiate(x, differentiate_more)
158 |         return sum([F.einsum('soi,six->sox', nnp.results['dy'], dx)
159 |                     for nnp, dx in zip(self, dxs)])
160 | 
161 |     def _predict_d2y(self, xs, dxs, d2xs, differentiate_more):
162 |         """Calculate 2nd-order prediction for each `SubNNP`.
163 | 
164 |         Args:
165 |             xs (list [~chainer.Variable]):
166 |                 Input data for each `SubNNP` constituting this HDNNP
167 |                 instance. The shape of data is
168 |                 ``n_atom x (n_sample, n_input)``.
169 |             dxs (list [~chainer.Variable]):
170 |                 Differentiated input data. The shape of data is
171 |                 ``n_atom x (n_sample, n_input, n_deriv)``.
172 |             d2xs (list [~chainer.Variable]):
173 |                 Double differentiated input data. The shape of data is
174 |                 ``n_atom x (n_sample, n_input, n_deriv, n_deriv)``.
175 |             differentiate_more (bool):
176 |                 If True, more deep calculation graph will be created for
177 |                 back-propagation or higher-order differentiation.
178 | 
179 |         Returns:
180 |             ~chainer.Variable:
181 |                 Double differentiated output data. The shape of data is
182 |                 ``(n_sample, n_output, n_deriv, n_deriv)``.
183 |         """
184 |         for nnp, x in zip(self, xs):
185 |             nnp.second_differentiate(x, differentiate_more)
186 |         return sum([F.einsum('soij,six,sjy->soxy', nnp.results['d2y'], dx, dx)
187 |                     + F.einsum('soi,sixy->soxy', nnp.results['dy'], d2x)
188 |                     for nnp, dx, d2x in zip(self, dxs, d2xs)])
189 | 
190 | 
191 | class MasterNNP(chainer.ChainList):
192 |     """Responsible for managing the parameters of each element."""
193 |     def __init__(self, elements, *args):
194 |         """
195 |         It is implemented as a simple :class:`~chainer.ChainList` of
196 |         `SubNNP`.
197 | 
198 |         Args:
199 |             elements (list [str]): Element symbols must be unique.
200 |             *args: Positional arguments that is passed to `SubNNP`.
201 |         """
202 |         super().__init__(*[SubNNP(element, *args) for element in elements])
203 | 
204 |     def dump_params(self):
205 |         """Dump its own parameters as :obj:`str`.
206 | 
207 |         Returns:
208 |             str: Formed parameters.
209 |         """
210 |         params_str = ''
211 |         for nnp in self:
212 |             element = nnp.element
213 |             depth = len(nnp)
214 |             for i in range(depth):
215 |                 weight = getattr(nnp, f'fc_layer{i}').W.data
216 |                 bias = getattr(nnp, f'fc_layer{i}').b.data
217 |                 activation = getattr(nnp, f'activation_function{i}').__name__
218 |                 weight_str = ('\n'+' '*16).join([' '.join(map(str, row))
219 |                                                  for row in weight.T])
220 |                 bias_str = ' '.join(map(str, bias))
221 | 
222 |                 params_str += f'''
223 |                 {element} {i} {weight.shape[1]} {weight.shape[0]} {activation}
224 |                 # weight
225 |                 {weight_str}
226 |                 # bias
227 |                 {bias_str}
228 |                 '''
229 | 
230 |         return params_str
231 | 
232 | 
233 | class SubNNP(chainer.Chain):
234 |     """Feed-forward neural network representing one element or atom."""
235 |     def __init__(self, element, n_feature, hidden_layers, n_property):
236 |         """
237 |         | ``element`` is registered as a persistent value.
238 |         | It consists of repetition of fully connected layer and
239 |           activation function.
240 |         | Weight initializer is :obj:`chainer.initializers.HeNormal`.
241 | 
242 |         Args:
243 |             element (str): Element symbol represented by an instance.
244 |             n_feature (int): Number of nodes of input layer.
245 |             hidden_layers (list [tuple [int, str]]):
246 |                 A neural network structure. Last one is output layer,
247 |                 and the remains are hidden layers. Each element is a
248 |                 tuple ``(# of nodes, activation function)``, for example
249 |                 ``(50, 'sigmoid')``. Only activation functions
250 |                 implemented in `chainer.functions`_ can be used.
251 |             n_property (int): Number of nodes of output layer.
252 | 
253 |         .. _`chainer.functions`:
254 |             https://docs.chainer.org/en/stable/reference/functions.html
255 |         """
256 |         super().__init__()
257 |         self.add_persistent('element', element)
258 |         self._n_layer = len(hidden_layers) + 1
259 |         nodes = [n_feature, *[layer[0] for layer in hidden_layers], n_property]
260 |         activations = [*[layer[1] for layer in hidden_layers], 'identity']
261 |         with self.init_scope():
262 |             w = chainer.initializers.HeNormal()
263 |             for i, (in_size, out_size, activation) in enumerate(zip(
264 |                     nodes[:-1], nodes[1:], activations)):
265 |                 setattr(self, f'activation_function{i}',
266 |                         eval(f'F.{activation}'))
267 |                 setattr(self, f'fc_layer{i}',
268 |                         L.Linear(in_size, out_size, initialW=w))
269 |         self.results = {}
270 | 
271 |     def __len__(self):
272 |         """Return the number of hidden_layers."""
273 |         return self._n_layer
274 | 
275 |     def feedforward(self, x):
276 |         """Propagate input data in a feed-forward way.
277 | 
278 |         Args:
279 |             x (~chainer.Variable):
280 |                 Input data which has the shape ``(n_sample, n_input)``.
281 |         """
282 |         h = x
283 |         for i in range(self._n_layer):
284 |             h = eval(f'self.activation_function{i}(self.fc_layer{i}(h))')
285 |         y = h
286 |         self.results['y'] = y
287 | 
288 |     def differentiate(self, x, enable_double_backprop):
289 |         """Calculate derivative of the output data w.r.t. input data.
290 | 
291 |         Args:
292 |             x (~chainer.Variable):
293 |                 Input data which has the shape ``(n_sample, n_input)``.
294 |             enable_double_backprop (bool):
295 |                 Passed to :func:`chainer.grad` to determine whether to
296 |                 create more deep calculation graph or not.
297 |         """
298 |         dy = [chainer.grad([output_node], [x],
299 |                            enable_double_backprop=enable_double_backprop)[0]
300 |               for output_node in F.moveaxis(self.results['y'], 0, -1)]
301 |         dy = F.stack(dy, axis=1)
302 |         self.results['dy'] = dy
303 | 
304 |     def second_differentiate(self, x, enable_double_backprop):
305 |         """Calculate 2nd derivative of the output data w.r.t. input
306 |         data.
307 | 
308 |         Args:
309 |             x (~chainer.Variable):
310 |                 Input data which has the shape ``(n_sample, n_input)``.
311 |             enable_double_backprop (bool):
312 |                 Passed to :func:`chainer.grad` to determine whether to
313 |                 create more deep calculation graph or not.
314 |         """
315 |         d2y = [[chainer.grad([derivative], [x],
316 |                              enable_double_backprop=enable_double_backprop)[0]
317 |                 for derivative in dy_]
318 |                for dy_ in F.moveaxis(self.results['dy'], 0, -1)]
319 |         d2y = F.stack([F.stack(d2y_, axis=1) for d2y_ in d2y], axis=1)
320 |         self.results['d2y'] = d2y
321 | 


--------------------------------------------------------------------------------
/hdnnpy/preprocess/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Pre-processing of input dataset subpackage."""
 4 | 
 5 | __all__ = [
 6 |     'PREPROCESS',
 7 |     ]
 8 | 
 9 | from hdnnpy.preprocess.pca import PCA
10 | from hdnnpy.preprocess.scaling import Scaling
11 | from hdnnpy.preprocess.standardization import Standardization
12 | 
13 | PREPROCESS = {
14 |     PCA.name: PCA,
15 |     Scaling.name: Scaling,
16 |     Standardization.name: Standardization,
17 |     }
18 | 


--------------------------------------------------------------------------------
/hdnnpy/preprocess/pca.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Principal component analysis (PCA)."""
  4 | 
  5 | import numpy as np
  6 | from sklearn import decomposition
  7 | 
  8 | from hdnnpy.preprocess.preprocess_base import PreprocessBase
  9 | from hdnnpy.utils import (MPI, pprint)
 10 | 
 11 | 
 12 | class PCA(PreprocessBase):
 13 |     """Principal component analysis (PCA).
 14 | 
 15 |     The core part of this class uses `sklearn.decomposition.PCA`
 16 |     implementation.
 17 |     """
 18 |     name = 'pca'
 19 |     """str: Name of this class."""
 20 | 
 21 |     def __init__(self, n_components=None):
 22 |         """
 23 |         Args:
 24 |             n_components (int, optional):
 25 |                 Number of features to keep in decomposition. If
 26 |                 ``None``, decomposition is not performed.
 27 |         """
 28 |         super().__init__()
 29 |         self._n_components = n_components
 30 |         self._mean = {}
 31 |         self._transform = {}
 32 | 
 33 |     @property
 34 |     def n_components(self):
 35 |         """int or None: Number of features to keep in decomposition."""
 36 |         return self._n_components
 37 | 
 38 |     @property
 39 |     def mean(self):
 40 |         """dict [~numpy.ndarray]: Initialized mean values in each
 41 |         feature dimension and each element."""
 42 |         return self._mean
 43 | 
 44 |     @property
 45 |     def transform(self):
 46 |         """dict [~numpy.ndarray]: Initialized transformation matrix in
 47 |         each feature dimension and each element."""
 48 |         return self._transform
 49 | 
 50 |     def apply(self, dataset, elemental_composition, verbose=True):
 51 |         """Apply the same pre-processing for each element to dataset.
 52 | 
 53 |         It accepts 1 or 2 for length of ``dataset``, each element of
 54 |         which is regarded as ``0th-order``, ``1st-order``, ...
 55 | 
 56 |         Args:
 57 |             dataset (list [~numpy.ndarray]): Input dataset to be scaled.
 58 |             elemental_composition (list [str]):
 59 |                 Element symbols corresponding to 1st dimension of
 60 |                 ``dataset``.
 61 |             verbose (bool, optional): Print log to stdout.
 62 | 
 63 |         Returns:
 64 |             list [~numpy.ndarray]:
 65 |                 Processed dataset to be zero-mean and unit-variance.
 66 |         """
 67 |         order = len(dataset) - 1
 68 |         assert 0 <= order <= 2
 69 | 
 70 |         self._initialize_params(dataset[0], elemental_composition, verbose)
 71 | 
 72 |         mean = np.array(
 73 |             [self._mean[element] for element in elemental_composition])
 74 |         transform = np.array(
 75 |             [self._transform[element] for element in elemental_composition])
 76 | 
 77 |         if order >= 0:
 78 |             dataset[0] = np.einsum('saf,aft->sat', dataset[0]-mean, transform)
 79 |         if order >= 1:
 80 |             dataset[1] = np.einsum('safx,aft->satx', dataset[1], transform)
 81 |         if order >= 2:
 82 |             dataset[2] = np.einsum('safxy,aft->satxy', dataset[2], transform)
 83 | 
 84 |         return dataset
 85 | 
 86 |     def dump_params(self):
 87 |         """Dump its own parameters as :obj:`str`.
 88 | 
 89 |         Returns:
 90 |             str: Formed parameters.
 91 |         """
 92 |         params_str = ''
 93 |         for element in self._elements:
 94 |             transform = self._transform[element]
 95 |             mean = self._mean[element]
 96 |             transform_str = ('\n'+' '*12).join([' '.join(map(str, row))
 97 |                                                 for row in transform.T])
 98 |             mean_str = ' '.join(map(str, mean))
 99 | 
100 |             params_str += f'''
101 |             {element} {transform.shape[1]} {transform.shape[0]}
102 |             # transformation matrix
103 |             {transform_str}
104 |             # mean
105 |             {mean_str}
106 |             '''
107 | 
108 |         return params_str
109 | 
110 |     def load(self, file_path, verbose=True):
111 |         """Load internal parameters for each element.
112 | 
113 |         Only root MPI process loads parameters.
114 | 
115 |         Args:
116 |             file_path (~pathlib.Path): File path to load parameters.
117 |             verbose (bool, optional): Print log to stdout.
118 |         """
119 |         if MPI.rank == 0:
120 |             ndarray = np.load(file_path)
121 |             self._elements = ndarray['elements'].item()
122 |             self._n_components = ndarray['n_components'].item()
123 |             self._mean = {element: ndarray[f'mean:{element}']
124 |                           for element in self._elements}
125 |             self._transform = {element: ndarray[f'transform:{element}']
126 |                                for element in self._elements}
127 |         if verbose:
128 |             pprint(f'Loaded PCA parameters from {file_path}.')
129 | 
130 |     def save(self, file_path, verbose=True):
131 |         """Save internal parameters for each element.
132 | 
133 |         Only root MPI process saves parameters.
134 | 
135 |         Args:
136 |             file_path (~pathlib.Path): File path to save parameters.
137 |             verbose (bool, optional): Print log to stdout.
138 |         """
139 |         if MPI.rank == 0:
140 |             info = {
141 |                 'elements': self._elements,
142 |                 'n_components': self._n_components,
143 |                 }
144 |             mean = {f'mean:{k}': v for k, v in self._mean.items()}
145 |             transform = {f'transform:{k}': v
146 |                          for k, v in self._transform.items()}
147 |             np.savez(file_path, **info, **mean, **transform)
148 |         if verbose:
149 |             pprint(f'Saved PCA parameters to {file_path}.')
150 | 
151 |     def _initialize_params(self, data, elemental_composition, verbose):
152 |         """Initialize parameters only once for new elements."""
153 |         for element in set(elemental_composition) - self._elements:
154 |             n_feature = data.shape[2]
155 |             mask = np.array(elemental_composition) == element
156 |             X = data[:, mask].reshape(-1, n_feature)
157 |             pca = decomposition.PCA(n_components=self._n_components)
158 |             pca.fit(X)
159 |             if self._n_components is None:
160 |                 self._n_components = pca.n_components_
161 |             self._elements.add(element)
162 |             self._mean[element] = pca.mean_.astype(np.float32)
163 |             self._transform[element] = pca.components_.T.astype(np.float32)
164 |             if verbose:
165 |                 pprint(f'''
166 | Initialized PCA parameters for {element}
167 |     Feature dimension: {n_feature} => {self._n_components}
168 |     Cumulative contribution rate = {np.sum(pca.explained_variance_ratio_)}
169 | ''')
170 | 


--------------------------------------------------------------------------------
/hdnnpy/preprocess/preprocess_base.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Base class of pre-processing.
 4 | 
 5 | If you want to add new pre-processing method to extend HDNNP, inherits
 6 | this base class.
 7 | """
 8 | 
 9 | from abc import (ABC, abstractmethod)
10 | 
11 | 
12 | class PreprocessBase(ABC):
13 |     """Base class of pre-processing."""
14 |     name = None
15 |     """str: Name of this class."""
16 | 
17 |     def __init__(self):
18 |         """
19 |         Initialize private variable :attr:`_elements` as a empty `set`.
20 |         """
21 |         self._elements = set()
22 | 
23 |     @property
24 |     def elements(self):
25 |         """list [str]: List of elements whose parameters have already
26 |         been initialized."""
27 |         return sorted(self._elements)
28 | 
29 |     @abstractmethod
30 |     def apply(self, *args, **kwargs):
31 |         """Apply the same pre-processing for each element to dataset.
32 | 
33 |         This is abstract method.
34 |         Subclass of this base class have to override.
35 |         """
36 |         pass
37 | 
38 |     @abstractmethod
39 |     def dump_params(self):
40 |         """Dump its own parameters as :obj:`str`.
41 | 
42 |         This is abstract method.
43 |         Subclass of this base class have to override.
44 |         """
45 |         pass
46 | 
47 |     @abstractmethod
48 |     def load(self, *args, **kwargs):
49 |         """Load internal parameters for each element.
50 | 
51 |         This is abstract method.
52 |         Subclass of this base class have to override.
53 |         """
54 |         pass
55 | 
56 |     @abstractmethod
57 |     def save(self, *args, **kwargs):
58 |         """Save internal parameters for each element.
59 | 
60 |         This is abstract method.
61 |         Subclass of this base class have to override.
62 |         """
63 |         pass
64 | 


--------------------------------------------------------------------------------
/hdnnpy/preprocess/scaling.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Scale all feature values into the certain range."""
  4 | 
  5 | import numpy as np
  6 | 
  7 | from hdnnpy.preprocess.preprocess_base import PreprocessBase
  8 | from hdnnpy.utils import (MPI, pprint)
  9 | 
 10 | 
 11 | class Scaling(PreprocessBase):
 12 |     """Scale all feature values into the certain range."""
 13 |     name = 'scaling'
 14 |     """str: Name of this class."""
 15 | 
 16 |     def __init__(self, min_=-1.0, max_=1.0):
 17 |         """
 18 |         Args:
 19 |             min\_ (float): Target minimum value of scaling.
 20 |             max\_ (float): Target maximum value of scaling.
 21 |         """
 22 |         assert isinstance(min_, float)
 23 |         assert isinstance(max_, float)
 24 |         assert min_ < max_
 25 |         super().__init__()
 26 |         self._max = {}
 27 |         self._min = {}
 28 |         self._target_max = max_
 29 |         self._target_min = min_
 30 | 
 31 |     @property
 32 |     def max(self):
 33 |         """dict [~numpy.ndarray]: Initialized maximum values in each
 34 |         feature dimension and each element."""
 35 |         return self._max
 36 | 
 37 |     @property
 38 |     def min(self):
 39 |         """dict [~numpy.ndarray]: Initialized minimum values in each
 40 |         feature dimension and each element."""
 41 |         return self._min
 42 | 
 43 |     @property
 44 |     def target(self):
 45 |         """tuple [float, float]: Target min & max values of scaling."""
 46 |         return self._target_min, self._target_max
 47 | 
 48 |     def apply(self, dataset, elemental_composition, verbose=True):
 49 |         """Apply the same pre-processing for each element to dataset.
 50 | 
 51 |         It accepts 1 or 2 for length of ``dataset``, each element of
 52 |         which is regarded as ``0th-order``, ``1st-order``, ...
 53 | 
 54 |         Args:
 55 |             dataset (list [~numpy.ndarray]): Input dataset to be scaled.
 56 |             elemental_composition (list [str]):
 57 |                 Element symbols corresponding to 1st dimension of
 58 |                 ``dataset``.
 59 |             verbose (bool, optional): Print log to stdout.
 60 | 
 61 |         Returns:
 62 |             list [~numpy.ndarray]:
 63 |                 Processed dataset into the same min-max range.
 64 |         """
 65 |         order = len(dataset) - 1
 66 |         assert 0 <= order <= 2
 67 | 
 68 |         self._initialize_params(dataset[0], elemental_composition, verbose)
 69 | 
 70 |         max_ = np.array(
 71 |             [self._max[element] for element in elemental_composition])
 72 |         min_ = np.array(
 73 |             [self._min[element] for element in elemental_composition])
 74 | 
 75 |         if order >= 0:
 76 |             dataset[0] = ((dataset[0] - min_)
 77 |                           / (max_ - min_)
 78 |                           * (self._target_max - self._target_min)
 79 |                           + self._target_min)
 80 |         if order >= 1:
 81 |             dataset[1] = (dataset[1]
 82 |                           / (max_ - min_)[..., None]
 83 |                           * (self._target_max - self._target_min))
 84 |         if order >= 2:
 85 |             dataset[2] = (dataset[2]
 86 |                           / (max_ - min_)[..., None, None]
 87 |                           * (self._target_max - self._target_min))
 88 | 
 89 |         return dataset
 90 | 
 91 |     def dump_params(self):
 92 |         """Dump its own parameters as :obj:`str`.
 93 | 
 94 |         Returns:
 95 |             str: Formed parameters.
 96 |         """
 97 |         params_str = (f'''
 98 |             # target range
 99 |             {self._target_max} {self._target_min}
100 |             ''')
101 |         for element in self._elements:
102 |             max_ = self._max[element]
103 |             min_ = self._min[element]
104 |             max_str = ' '.join(map(str, max_))
105 |             min_str = ' '.join(map(str, min_))
106 | 
107 |             params_str += f'''
108 |             {element} {max_.shape[0]}
109 |             # max
110 |             {max_str}
111 |             # min
112 |             {min_str}
113 |             '''
114 | 
115 |         return params_str
116 | 
117 |     def load(self, file_path, verbose=True):
118 |         """Load internal parameters for each element.
119 | 
120 |         Only root MPI process loads parameters.
121 | 
122 |         Args:
123 |             file_path (~pathlib.Path): File path to load parameters.
124 |             verbose (bool, optional): Print log to stdout.
125 |         """
126 |         if MPI.rank == 0:
127 |             ndarray = np.load(file_path)
128 |             self._elements = ndarray['elements'].item()
129 |             self._max = {element: ndarray[f'max:{element}']
130 |                          for element in self._elements}
131 |             self._min = {element: ndarray[f'min:{element}']
132 |                          for element in self._elements}
133 |         if verbose:
134 |             pprint(f'Loaded Scaling parameters from {file_path}.')
135 | 
136 |     def save(self, file_path, verbose=True):
137 |         """Save internal parameters for each element.
138 | 
139 |         Only root MPI process saves parameters.
140 | 
141 |         Args:
142 |             file_path (~pathlib.Path): File path to save parameters.
143 |             verbose (bool, optional): Print log to stdout.
144 |         """
145 |         if MPI.rank == 0:
146 |             info = {'elements': self._elements}
147 |             max_ = {f'max:{k}': v for k, v in self._max.items()}
148 |             min_ = {f'min:{k}': v for k, v in self._min.items()}
149 |             np.savez(file_path, **info, **max_, **min_)
150 |         if verbose:
151 |             pprint(f'Saved Scaling parameters to {file_path}.')
152 | 
153 |     def _initialize_params(self, data, elemental_composition, verbose):
154 |         """Initialize parameters only once for new elements."""
155 |         for element in set(elemental_composition) - self._elements:
156 |             n_feature = data.shape[2]
157 |             mask = np.array(elemental_composition) == element
158 |             X = data[:, mask].reshape(-1, n_feature)
159 |             self._elements.add(element)
160 |             self._max[element] = X.max(axis=0)
161 |             self._min[element] = X.min(axis=0)
162 |             if verbose:
163 |                 pprint(f'Initialized Scaling parameters for {element}')
164 | 


--------------------------------------------------------------------------------
/hdnnpy/preprocess/standardization.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Scale all feature values to be zero-mean and unit-variance."""
  4 | 
  5 | import numpy as np
  6 | 
  7 | from hdnnpy.preprocess.preprocess_base import PreprocessBase
  8 | from hdnnpy.utils import (MPI, pprint)
  9 | 
 10 | 
 11 | class Standardization(PreprocessBase):
 12 |     """Scale all feature values to be zero-mean and unit-variance."""
 13 |     name = 'standardization'
 14 |     """str: Name of this class."""
 15 | 
 16 |     def __init__(self):
 17 |         super().__init__()
 18 |         self._mean = {}
 19 |         self._std = {}
 20 | 
 21 |     @property
 22 |     def mean(self):
 23 |         """dict [~numpy.ndarray]: Initialized mean values in each
 24 |         feature dimension and each element."""
 25 |         return self._mean
 26 | 
 27 |     @property
 28 |     def std(self):
 29 |         """dict [~numpy.ndarray]: Initialized standard deviation values
 30 |         in each feature dimension and each element."""
 31 |         return self._std
 32 | 
 33 |     def apply(self, dataset, elemental_composition, verbose=True):
 34 |         """Apply the same pre-processing for each element to dataset.
 35 | 
 36 |         It accepts 1 or 2 for length of ``dataset``, each element of
 37 |         which is regarded as ``0th-order``, ``1st-order``, ...
 38 | 
 39 |         Args:
 40 |             dataset (list [~numpy.ndarray]): Input dataset to be scaled.
 41 |             elemental_composition (list [str]):
 42 |                 Element symbols corresponding to 1st dimension of
 43 |                 ``dataset``.
 44 |             verbose (bool, optional): Print log to stdout.
 45 | 
 46 |         Returns:
 47 |             list [~numpy.ndarray]:
 48 |                 Processed dataset to be zero-mean and unit-variance.
 49 |         """
 50 |         order = len(dataset) - 1
 51 |         assert 0 <= order <= 2
 52 | 
 53 |         self._initialize_params(dataset[0], elemental_composition, verbose)
 54 | 
 55 |         mean = np.array(
 56 |             [self._mean[element] for element in elemental_composition])
 57 |         std = np.array(
 58 |             [self._std[element] for element in elemental_composition])
 59 | 
 60 |         if order >= 0:
 61 |             dataset[0] -= mean
 62 |             dataset[0] /= std
 63 |         if order >= 1:
 64 |             dataset[1] /= std[..., None]
 65 |         if order >= 2:
 66 |             dataset[2] /= std[..., None, None]
 67 | 
 68 |         return dataset
 69 | 
 70 |     def dump_params(self):
 71 |         """Dump its own parameters as :obj:`str`.
 72 | 
 73 |         Returns:
 74 |             str: Formed parameters.
 75 |         """
 76 |         params_str = ''
 77 |         for element in self._elements:
 78 |             mean = self._mean[element]
 79 |             std = self._std[element]
 80 |             mean_str = ' '.join(map(str, mean))
 81 |             std_str = ' '.join(map(str, std))
 82 | 
 83 |             params_str += f'''
 84 |             {element} {mean.shape[0]}
 85 |             # mean
 86 |             {mean_str}
 87 |             # standard deviation
 88 |             {std_str}
 89 |             '''
 90 | 
 91 |         return params_str
 92 | 
 93 |     def load(self, file_path, verbose=True):
 94 |         """Load internal parameters for each element.
 95 | 
 96 |         Only root MPI process loads parameters.
 97 | 
 98 |         Args:
 99 |             file_path (~pathlib.Path): File path to load parameters.
100 |             verbose (bool, optional): Print log to stdout.
101 |         """
102 |         if MPI.rank == 0:
103 |             ndarray = np.load(file_path)
104 |             self._elements = ndarray['elements'].item()
105 |             self._mean = {element: ndarray[f'mean:{element}']
106 |                           for element in self._elements}
107 |             self._std = {element: ndarray[f'std:{element}']
108 |                          for element in self._elements}
109 |         if verbose:
110 |             pprint(f'Loaded Standardization parameters from {file_path}.')
111 | 
112 |     def save(self, file_path, verbose=True):
113 |         """Save internal parameters for each element.
114 | 
115 |         Only root MPI process saves parameters.
116 | 
117 |         Args:
118 |             file_path (~pathlib.Path): File path to save parameters.
119 |             verbose (bool, optional): Print log to stdout.
120 |         """
121 |         if MPI.rank == 0:
122 |             info = {'elements': self._elements}
123 |             mean = {f'mean:{k}': v for k, v in self._mean.items()}
124 |             std = {f'std:{k}': v for k, v in self._std.items()}
125 |             np.savez(file_path, **info, **mean, **std)
126 |         if verbose:
127 |             pprint(f'Saved Standardization parameters to {file_path}.')
128 | 
129 |     def _initialize_params(self, data, elemental_composition, verbose):
130 |         """Initialize parameters only once for new elements."""
131 |         for element in set(elemental_composition) - self._elements:
132 |             n_feature = data.shape[2]
133 |             mask = np.array(elemental_composition) == element
134 |             X = data[:, mask].reshape(-1, n_feature)
135 |             self._elements.add(element)
136 |             self._mean[element] = X.mean(axis=0)
137 |             self._std[element] = X.std(axis=0, ddof=1)
138 |             if verbose:
139 |                 pprint(f'Initialized Standardization parameters for {element}')
140 | 


--------------------------------------------------------------------------------
/hdnnpy/training/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Training tools subpackage."""
 4 | 
 5 | __all__ = [
 6 |     'Manager',
 7 |     'Updater',
 8 |     'ScatterPlot',
 9 |     'set_log_scale',
10 |     ]
11 | 
12 | from hdnnpy.training.extensions import (ScatterPlot,
13 |                                         set_log_scale,
14 |                                         )
15 | from hdnnpy.training.manager import Manager
16 | from hdnnpy.training.updater import Updater
17 | 


--------------------------------------------------------------------------------
/hdnnpy/training/extensions.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Custom chainer training extensions."""
  4 | 
  5 | import chainer
  6 | from chainer.training import Extension
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | 
 10 | 
 11 | class ScatterPlot(Extension):
 12 |     """Trainer extension to output predictions/labels scatter plots."""
 13 |     def __init__(self, dataset, model, comm):
 14 |         """
 15 |         Args:
 16 |             dataset (HDNNPDataset):
 17 |                 Test dataset to plot a scatter plot. It has to have both
 18 |                 input dataset and label dataset.
 19 |             model (HighDimensionalNNP): HDNNP model to evaluate.
 20 |             comm (~chainermn.CommunicatorBase):
 21 |                 ChainerMN communicator instance.
 22 |         """
 23 |         self._order = dataset.property.order
 24 |         self._model = model
 25 |         self._comm = comm.mpi_comm
 26 | 
 27 |         self._properties = []
 28 |         self._coefficients = []
 29 |         self._units = []
 30 |         self._inputs = []
 31 |         self._labels = []
 32 |         self._predictions = []
 33 |         self._init_labels(dataset)
 34 | 
 35 |     def __call__(self, trainer):
 36 |         """Execute scatter plot extension.
 37 | 
 38 |         | Perform prediction with the parameters of the model when this
 39 |           extension was executed, using the data set at initialization.
 40 |         | Horizontal axis shows the predicted values and vertical axis
 41 |           shows the true values.
 42 |         | Plot configurations are written in :meth:`_plot`.
 43 | 
 44 |         Args:
 45 |             trainer (~chainer.training.Trainer):
 46 |                 Trainer object that invokes this extension.
 47 |         """
 48 |         with chainer.using_config('train', False), \
 49 |              chainer.using_config('enable_backprop', False):
 50 |             predictions = self._model.predict(self._inputs, self._order)
 51 | 
 52 |         for i in range(self._order + 1):
 53 |             pred_send = predictions[i].data
 54 |             if self._comm.Get_rank() == 0:
 55 |                 self._comm.Gatherv(pred_send, self._predictions[i], root=0)
 56 |                 self._plot(trainer,
 57 |                            self._coefficients[i] * self._predictions[i],
 58 |                            self._coefficients[i] * self._labels[i],
 59 |                            self._properties[i], self._units[i])
 60 |             else:
 61 |                 self._comm.Gatherv(pred_send, None, root=0)
 62 | 
 63 |         plt.close('all')
 64 | 
 65 |     def _init_labels(self, dataset):
 66 |         """Gather label dataset to root process and initialize other
 67 |         instance variables."""
 68 |         self._properties = dataset.property.properties
 69 |         self._coefficients = dataset.property.coefficients
 70 |         self._units = dataset.property.units
 71 |         batch = chainer.dataset.concat_examples(dataset)
 72 |         self._inputs = [batch[f'inputs/{i}'] for i in range(self._order + 1)]
 73 |         labels = [batch[f'labels/{i}'] for i in range(self._order + 1)]
 74 |         self._count = np.array(self._comm.gather(len(labels[0]), root=0))
 75 | 
 76 |         for i in range(self._order + 1):
 77 |             label_send = labels[i]
 78 |             if self._comm.Get_rank() == 0:
 79 |                 total_size = sum(self._count)
 80 |                 prediction = np.empty((total_size,) + label_send[0].shape,
 81 |                                       dtype=np.float32)
 82 |                 self._predictions.append(prediction)
 83 | 
 84 |                 label = np.empty((total_size,) + label_send[0].shape,
 85 |                                  dtype=np.float32)
 86 |                 label_recv = (label, self._count * label_send[0].size)
 87 |                 self._comm.Gatherv(label_send, label_recv, root=0)
 88 |                 self._labels.append(label)
 89 |             else:
 90 |                 self._comm.Gatherv(label_send, None, root=0)
 91 | 
 92 |     @staticmethod
 93 |     def _plot(trainer, prediction, label, property_, unit):
 94 |         """Plot and save a scatter plot."""
 95 |         fig = plt.figure(figsize=(10, 10))
 96 |         min_ = np.min(label)
 97 |         max_ = np.max(label)
 98 |         plt.scatter(prediction, label, c='blue'),
 99 |         plt.xlabel(f'Prediction ({unit})'),
100 |         plt.ylabel(f'Label ({unit})'),
101 |         plt.xlim(min_, max_),
102 |         plt.ylim(min_, max_),
103 |         plt.text(0.5, 0.9,
104 |                  f'{property_} @ epoch={trainer.updater.epoch}',
105 |                  ha='center', transform=plt.gcf().transFigure)
106 |         fig.savefig(trainer.out/f'{property_}.png')
107 | 
108 | 
109 | def set_log_scale(_, a, __):
110 |     """Change y axis scale as log scale."""
111 |     a.set_yscale('log')
112 | 


--------------------------------------------------------------------------------
/hdnnpy/training/loss_function/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Loss function classses for HDNNP training."""
 4 | 
 5 | __all__ = [
 6 |     'LOSS_FUNCTION',
 7 |     ]
 8 | 
 9 | from hdnnpy.training.loss_function.first import First
10 | from hdnnpy.training.loss_function.potential import Potential
11 | from hdnnpy.training.loss_function.zeroth import Zeroth
12 | 
13 | LOSS_FUNCTION = {
14 |     First.name: First,
15 |     Potential.name: Potential,
16 |     Zeroth.name: Zeroth,
17 |     }
18 | 


--------------------------------------------------------------------------------
/hdnnpy/training/loss_function/first.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Loss function to optimize 0th and 1st-order property."""
 4 | 
 5 | import warnings
 6 | 
 7 | import chainer
 8 | import chainer.functions as F
 9 | 
10 | from hdnnpy.training.loss_function.loss_functions_base import (
11 |     LossFunctionBase)
12 | 
13 | 
14 | class First(LossFunctionBase):
15 |     """Loss function to optimize 0th and 1st-order property."""
16 |     name = 'first'
17 |     """str: Name of this loss function class."""
18 |     order = {
19 |         'descriptor': 1,
20 |         'property': 1,
21 |         }
22 |     """dict: Required orders of each dataset to calculate loss function.
23 |     """
24 | 
25 |     def __init__(self, model, properties, mixing_beta, **_):
26 |         """
27 |         Args:
28 |             model (HighDimensionalNNP):
29 |                 HDNNP object to optimize parameters.
30 |             properties (list [str]): Names of properties to optimize.
31 |             mixing_beta (float):
32 |                 Mixing parameter of errors of 0th and 1st order.
33 |                 It accepts 0.0 to 1.0. If 0.0 it optimizes HDNNP by only
34 |                 0th order property and it is equal to loss function
35 |                 ``Zeroth``. If 1.0 it optimizes HDNNP by only 1st order
36 |                 property.
37 |         """
38 |         assert 0.0 <= mixing_beta <= 1.0
39 |         super().__init__(model)
40 |         self._observation_keys = [
41 |             f'RMSE/{properties[0]}', f'RMSE/{properties[1]}', 'total']
42 |         self._mixing_beta = mixing_beta
43 | 
44 |         if mixing_beta == 0.0:
45 |             warnings.warn(
46 |                 'If mixing_beta=0.0, you should use loss function type '
47 |                 '`zeroth` instead of `first`.')
48 | 
49 |     def eval(self, **dataset):
50 |         """Calculate loss function from given datasets and model.
51 | 
52 |         Args:
53 |             **dataset (~numpy.ndarray):
54 |                 Datasets passed as kwargs. Name of each key is in the
55 |                 format 'inputs/N' or 'labels/N'. 'N' is the order of
56 |                 the dataset.
57 | 
58 |         Returns:
59 |             ~chainer.Variable:
60 |             A scalar value calculated with loss function.
61 |         """
62 |         inputs = [dataset[f'inputs/{i}'] for i
63 |                   in range(self.order['descriptor'] + 1)]
64 |         labels = [dataset[f'labels/{i}'] for i
65 |                   in range(self.order['property'] + 1)]
66 |         predictions = self._model.predict(inputs, self.order['descriptor'])
67 |         loss0 = F.mean_squared_error(predictions[0], labels[0])
68 |         loss1 = F.mean_squared_error(predictions[1], labels[1])
69 |         total_loss = ((1.0 - self._mixing_beta) * loss0
70 |                       + self._mixing_beta * loss1)
71 | 
72 |         RMSE0 = F.sqrt(loss0)
73 |         RMSE1 = F.sqrt(loss1)
74 |         total = ((1.0 - self._mixing_beta) * RMSE0
75 |                  + self._mixing_beta * RMSE1)
76 | 
77 |         observation = {
78 |             self._observation_keys[0]: RMSE0,
79 |             self._observation_keys[1]: RMSE1,
80 |             self._observation_keys[2]: total,
81 |             }
82 |         chainer.report(observation, observer=self._model)
83 |         return total_loss
84 | 


--------------------------------------------------------------------------------
/hdnnpy/training/loss_function/loss_functions_base.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Base class for loss functions."""
 4 | 
 5 | from abc import (ABC, abstractmethod)
 6 | 
 7 | 
 8 | class LossFunctionBase(ABC):
 9 |     """Base class for loss functions."""
10 |     name = None
11 |     """str: Name of this loss function class."""
12 |     order = {
13 |         'descriptor': None,
14 |         'property': None,
15 |         }
16 |     """dict: Required orders of each dataset to calculate loss function.
17 |     """
18 | 
19 |     def __init__(self, model):
20 |         """
21 |         Args:
22 |             model (HighDimensionalNNP):
23 |                 HDNNP object to optimize parameters.
24 |         """
25 |         self._model = model
26 |         self._observation_keys = []
27 | 
28 |     @property
29 |     def observation_keys(self):
30 |         """list [str]: Names of metrics which trainer observes."""
31 |         return self._observation_keys
32 | 
33 |     @abstractmethod
34 |     def eval(self, **dataset):
35 |         """Calculate loss function from given datasets and model.
36 | 
37 |         This is abstract method.
38 |         Subclass of this base class have to override.
39 | 
40 |         Args:
41 |             **dataset (~numpy.ndarray):
42 |                 Datasets passed as kwargs. Name of each key is in the
43 |                 format 'inputs/N' or 'labels/N'. 'N' is the order of
44 |                 the dataset.
45 | 
46 |         Returns:
47 |             ~chainer.Variable:
48 |             A scalar value calculated with loss function.
49 |         """
50 |         pass
51 | 


--------------------------------------------------------------------------------
/hdnnpy/training/loss_function/potential.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Loss function to optimize 0th property as scalar potential."""
  4 | 
  5 | import warnings
  6 | 
  7 | import chainer
  8 | import chainer.functions as F
  9 | 
 10 | from hdnnpy.training.loss_function.loss_functions_base import (
 11 |     LossFunctionBase)
 12 | 
 13 | 
 14 | class Potential(LossFunctionBase):
 15 |     """Loss function to optimize 0th property as scalar potential."""
 16 |     name = 'potential'
 17 |     """str: Name of this loss function class."""
 18 |     order = {
 19 |         'descriptor': 2,
 20 |         'property': 1,
 21 |         }
 22 |     """dict: Required orders of each dataset to calculate loss function.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |             self, model, properties, mixing_beta, summation, rotation, **_):
 27 |         """
 28 |         Args:
 29 |             model (HighDimensionalNNP):
 30 |                 HDNNP object to optimize parameters.
 31 |             properties (list [str]): Names of properties to optimize.
 32 |             mixing_beta (float):
 33 |                 Mixing parameter of errors of 0th and 1st order.
 34 |                 It accepts 0.0 to 1.0. If 0.0 it optimizes HDNNP by only
 35 |                 0th order property and it is equal to loss function
 36 |                 ``Zeroth``. If 1.0 it optimizes HDNNP by only 1st order
 37 |                 property.
 38 |             summation (float):
 39 |                 Penalty term coefficient parameter for summation of 1st
 40 |                 order property. This loss function adds following
 41 |                  penalty to 1st order property vector.
 42 |                 :math:`\sum_{i,\alpha} F_{i,\alpha} = 0`
 43 |             rotation (float):
 44 |                 Penalty term coefficient parameter for rotation of 1st
 45 |                 order property. This loss function adds following
 46 |                  penalty to 1st order property vector.
 47 |                 :math:`\rot \bm{F} = 0`
 48 |         """
 49 |         assert 0.0 <= mixing_beta <= 1.0
 50 |         assert 0.0 <= summation
 51 |         assert 0.0 <= rotation
 52 |         super().__init__(model)
 53 |         self._observation_keys = [
 54 |             f'RMSE/{properties[0]}', f'RMSE/{properties[1]}',
 55 |             f'AbsMean/{properties[1]}', f'RMS/rot-{properties[1]}',
 56 |             'total']
 57 |         self._mixing_beta = mixing_beta
 58 |         self._summation = summation
 59 |         self._rotation = rotation
 60 | 
 61 |         if mixing_beta == 0.0:
 62 |             warnings.warn(
 63 |                 'If mixing_beta=0.0, you should use loss function type '
 64 |                 '`zeroth` instead of `potential`.')
 65 |         if rotation == 0.0:
 66 |             warnings.warn(
 67 |                 'If rotation=0.0, you should use loss function type '
 68 |                 '`first` instead of `potential`.')
 69 | 
 70 |     def eval(self, **dataset):
 71 |         """Calculate loss function from given datasets and model.
 72 | 
 73 |         Args:
 74 |             **dataset (~numpy.ndarray):
 75 |                 Datasets passed as kwargs. Name of each key is in the
 76 |                 format 'inputs/N' or 'labels/N'. 'N' is the order of
 77 |                 the dataset.
 78 | 
 79 |         Returns:
 80 |             ~chainer.Variable:
 81 |             A scalar value calculated with loss function.
 82 |         """
 83 |         inputs = [dataset[f'inputs/{i}'] for i
 84 |                   in range(self.order['descriptor'] + 1)]
 85 |         labels = [dataset[f'labels/{i}'] for i
 86 |                   in range(self.order['property'] + 1)]
 87 |         predictions = self._model.predict(inputs, self.order['descriptor'])
 88 | 
 89 |         loss0 = F.mean_squared_error(predictions[0], labels[0])
 90 |         loss1 = F.mean_squared_error(predictions[1], labels[1])
 91 |         loss_sum1 = F.mean(predictions[1])
 92 |         transverse = F.swapaxes(predictions[2], 2, 3)
 93 |         loss_rot = F.mean(F.square((predictions[2] - transverse)
 94 |                                    / (predictions[2] + transverse)))
 95 |         total_loss = ((1.0 - self._mixing_beta) * loss0
 96 |                       + self._mixing_beta * loss1
 97 |                       + self._summation * loss_sum1
 98 |                       + self._rotation * loss_rot)
 99 | 
100 |         RMSE0 = F.sqrt(loss0)
101 |         RMSE1 = F.sqrt(loss1)
102 |         AbsMean1 = F.absolute(loss_sum1)
103 |         RMS_rot = F.sqrt(loss_rot)
104 |         total = ((1.0 - self._mixing_beta) * RMSE0
105 |                  + self._mixing_beta * RMSE1
106 |                  + self._summation * AbsMean1
107 |                  + self._rotation * RMS_rot)
108 | 
109 |         observation = {
110 |             self._observation_keys[0]: RMSE0,
111 |             self._observation_keys[1]: RMSE1,
112 |             self._observation_keys[2]: AbsMean1,
113 |             self._observation_keys[3]: RMS_rot,
114 |             self._observation_keys[4]: total,
115 |             }
116 |         chainer.report(observation, observer=self._model)
117 |         return total_loss
118 | 


--------------------------------------------------------------------------------
/hdnnpy/training/loss_function/zeroth.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Loss function to optimize 0th-order property."""
 4 | 
 5 | import chainer
 6 | import chainer.functions as F
 7 | 
 8 | from hdnnpy.training.loss_function.loss_functions_base import (
 9 |     LossFunctionBase)
10 | 
11 | 
12 | class Zeroth(LossFunctionBase):
13 |     """Loss function to optimize 0th-order property."""
14 |     name = 'zeroth'
15 |     """str: Name of this loss function class."""
16 |     order = {
17 |         'descriptor': 0,
18 |         'property': 0,
19 |         }
20 |     """dict: Required orders of each dataset to calculate loss function.
21 |     """
22 | 
23 |     def __init__(self, model, properties, **_):
24 |         """
25 |         Args:
26 |             model (HighDimensionalNNP):
27 |                 HDNNP object to optimize parameters.
28 |             properties (list [str]): Names of properties to optimize.
29 |         """
30 |         super().__init__(model)
31 |         self._observation_keys = [f'RMSE/{properties[0]}', 'total']
32 | 
33 |     def eval(self, **dataset):
34 |         """Calculate loss function from given datasets and model.
35 | 
36 |         Args:
37 |             **dataset (~numpy.ndarray):
38 |                 Datasets passed as kwargs. Name of each key is in the
39 |                 format 'inputs/N' or 'labels/N'. 'N' is the order of
40 |                 the dataset.
41 | 
42 |         Returns:
43 |             ~chainer.Variable:
44 |             A scalar value calculated with loss function.
45 |         """
46 |         inputs = [dataset[f'inputs/{i}'] for i
47 |                   in range(self.order['descriptor'] + 1)]
48 |         labels = [dataset[f'labels/{i}'] for i
49 |                   in range(self.order['property'] + 1)]
50 |         predictions = self._model.predict(inputs, self.order['descriptor'])
51 |         loss0 = F.mean_squared_error(predictions[0], labels[0])
52 |         RMSE0 = F.sqrt(loss0)
53 | 
54 |         observation = {
55 |             self._observation_keys[0]: RMSE0,
56 |             self._observation_keys[1]: RMSE0,
57 |             }
58 |         chainer.report(observation, observer=self._model)
59 |         return loss0
60 | 


--------------------------------------------------------------------------------
/hdnnpy/training/manager.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Context manager to take trainer snapshot and decide whether to train
  4 | or not."""
  5 | 
  6 | from contextlib import AbstractContextManager
  7 | import pickle
  8 | import signal
  9 | 
 10 | import chainer
 11 | import numpy as np
 12 | 
 13 | from hdnnpy.utils import (MPI, pprint)
 14 | 
 15 | 
 16 | class Manager(AbstractContextManager):
 17 |     """Context manager to take trainer snapshot and decide whether to
 18 |     train or not."""
 19 |     def __init__(self, tag, trainer, result, is_snapshot=True):
 20 |         """
 21 |         Args:
 22 |             tag (str): Tag of dataset used for training.
 23 |             trainer (~chainer.training.Trainer):
 24 |                 Trainer object to be managed.
 25 |             result (dict):
 26 |                 Dictionary object containing total elapsed time and
 27 |                 metrics value corresponding to the type of loss
 28 |                 function. Even when training is stopped / resumed, it is
 29 |                 retained.
 30 |             is_snapshot (bool, optional): Take trainer snapshot if True.
 31 |         """
 32 |         self._tag = tag
 33 |         self._trainer = trainer
 34 |         self._result = result
 35 |         self._is_snapshot = is_snapshot
 36 |         self._is_allow = True
 37 |         self._trainer_snapshot = trainer.out / 'trainer_snapshot.npz'
 38 |         self._interim_result = trainer.out / 'interim_result.pickle'
 39 |         self._signum = None
 40 | 
 41 |     def __enter__(self):
 42 |         """Replace signal handler of SIGINT and SIGTERM."""
 43 |         self._old_sigint_handler = signal.signal(
 44 |             signal.SIGINT, self._snapshot)
 45 |         self._old_sigterm_handler = signal.signal(
 46 |             signal.SIGTERM, self._snapshot)
 47 | 
 48 |     def __exit__(self, type_, value, traceback):
 49 |         """Restore signal handler of SIGINT and SIGTERM, and record the
 50 |         result of training."""
 51 |         signal.signal(signal.SIGINT, self._old_sigint_handler)
 52 |         signal.signal(signal.SIGTERM, self._old_sigterm_handler)
 53 |         if not self._signum:
 54 |             self._result['training_time'] += self._trainer.elapsed_time
 55 |             observation = {
 56 |                 k: v.data.item() if isinstance(v, chainer.Variable)
 57 |                 else v.item() if isinstance(v, np.float64)
 58 |                 else v
 59 |                 for k, v in self._trainer.observation.items()}
 60 |             self._result['observation'].append(
 61 |                 {'tag': self._tag, **observation})
 62 | 
 63 |     @property
 64 |     def allow_to_run(self):
 65 |         """Whether the given trainer can train with the dataset."""
 66 |         return self._is_allow
 67 | 
 68 |     def check_to_resume(self, resume_tag):
 69 |         """Decide whether to train or not.
 70 | 
 71 |         If current tag of dataset is equal to ``resume_tag``, restore
 72 |         the state of trainer from snapshot file.
 73 | 
 74 |         Args:
 75 |             resume_tag (str):
 76 |                 Tag of dataset when snapshot was taken last time.
 77 |         """
 78 |         if self._tag == resume_tag:
 79 |             self._resume()
 80 |             self._is_allow = True
 81 |         elif self._trainer_snapshot.exists():
 82 |             self._is_allow = False
 83 |         else:
 84 |             self._is_allow = True
 85 | 
 86 |     def _resume(self):
 87 |         """Restore the state of trainer from snapshot file."""
 88 |         pprint(f'Resume training loop from dataset tagged "{self._tag}"')
 89 |         chainer.serializers.load_npz(self._trainer_snapshot, self._trainer)
 90 |         interim_result = pickle.loads(self._interim_result.read_bytes())
 91 |         self._result['training_time'] += interim_result['training_time']
 92 |         self._result['observation'].extend(interim_result['observation'])
 93 |         # remove snapshot
 94 |         if MPI.rank == 0:
 95 |             self._trainer_snapshot.unlink()
 96 |             self._interim_result.unlink()
 97 | 
 98 |     def _snapshot(self, signum, _):
 99 |         """Take trainer snapshot."""
100 |         self._signum = signal.Signals(signum)
101 |         if self._is_snapshot and MPI.rank == 0:
102 |             pprint(f'Stop {self._tag} training by signal:'
103 |                    f' {self._signum.name}!\n'
104 |                    f'Take trainer snapshot at epoch:'
105 |                    f' {self._trainer.updater.epoch}')
106 |             chainer.serializers.save_npz(self._trainer_snapshot, self._trainer)
107 |             self._interim_result.write_bytes(pickle.dumps(self._result))
108 | 
109 |         # must raise any Exception to stop trainer.run()
110 |         raise InterruptedError(
111 |             f'Chainer training loop is interrupted by {self._signum.name}')
112 | 


--------------------------------------------------------------------------------
/hdnnpy/training/updater.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | """Updater for HDNNP training."""
 4 | 
 5 | import chainer
 6 | 
 7 | 
 8 | class Updater(chainer.training.updaters.StandardUpdater):
 9 |     """Updater for HDNNP training using `HighDimensionalNNP` and
10 |     `MasterNNP`."""
11 |     def __init__(self, *args, **kwargs):
12 |         super().__init__(*args, **kwargs)
13 | 
14 |     def update_core(self):
15 |         """Calculate gradient of parameters using `HighDimensionalNNP`
16 |         and collect them in `MasterNNP` and update parameters."""
17 |         master_opt = self.get_optimizer('master')
18 |         main_opt = self.get_optimizer('main')
19 |         master_nnp = master_opt.target
20 |         hdnnp = main_opt.target
21 | 
22 |         batch = self.converter(self.get_iterator('main').next(), self.device)
23 | 
24 |         master_nnp.cleargrads()
25 |         hdnnp.cleargrads()
26 | 
27 |         loss = self.loss_func(**batch)
28 |         loss.backward()
29 | 
30 |         hdnnp.reduce_grad_to(master_nnp)
31 |         master_opt.update()
32 |         hdnnp.sync_param_with(master_nnp)
33 | 


--------------------------------------------------------------------------------
/hdnnpy/utils.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | """Utility functions used in various subpackages."""
  4 | 
  5 | __all__ = [
  6 |     'MPI',
  7 |     'pprint',
  8 |     'pyyaml_path_constructor',
  9 |     'pyyaml_path_representer',
 10 |     'recv_chunk',
 11 |     'send_chunk',
 12 |     ]
 13 | 
 14 | from pathlib import Path
 15 | import pickle
 16 | from pprint import pprint as pretty_print
 17 | import sys
 18 | import textwrap
 19 | 
 20 | from mpi4py import MPI as MPI4PY
 21 | 
 22 | 
 23 | INT_MAX = 2147483647
 24 | 
 25 | 
 26 | class MPI:
 27 |     """MPI world communicator and aliases."""
 28 |     comm = MPI4PY.COMM_WORLD
 29 |     rank = MPI4PY.COMM_WORLD.Get_rank()
 30 |     size = MPI4PY.COMM_WORLD.Get_size()
 31 | 
 32 | 
 33 | def pprint(data=None, flush=True, **options):
 34 |     """Pretty print function.
 35 | 
 36 |     Args:
 37 |         data (str, optional): Data to output into stdout.
 38 |         flush (bool, optional): Flush the stream after output if True.
 39 |         **options: Other options passed to :meth:`print`.
 40 |     """
 41 |     if data is None:
 42 |         data = ''
 43 |     data = textwrap.dedent(data)
 44 |     if isinstance(data, list) or isinstance(data, dict):
 45 |         pretty_print(data, **options)
 46 |     else:
 47 |         if 'stream' in options:
 48 |             options['file'] = options.pop('stream')
 49 |         print(data, **options)
 50 |     if flush:
 51 |         sys.stdout.flush()
 52 | 
 53 | 
 54 | def pyyaml_path_constructor(loader, node):
 55 |     """Helper method to load Path tag in PyYAML."""
 56 |     value = loader.construct_scalar(node)
 57 |     return Path(value)
 58 | 
 59 | 
 60 | def pyyaml_path_representer(dumper, instance):
 61 |     """Helper method to dump :class:`~pathlib.Path` in PyYAML."""
 62 |     return dumper.represent_scalar('Path', f'{instance}')
 63 | 
 64 | 
 65 | def recv_chunk(source, max_buf_len=256 * 1024 * 1024):
 66 |     """Receive data divided into small chunks with MPI communication.
 67 | 
 68 |     Args:
 69 |         source (int): MPI source process that sends data.
 70 |         max_buf_len (int, optional): Maximum size of each chunk.
 71 | 
 72 |     Returns:
 73 |         object: Received data.
 74 |     """
 75 |     assert max_buf_len < INT_MAX
 76 |     assert max_buf_len > 0
 77 |     data = MPI.comm.recv(source=source, tag=1)
 78 |     assert data is not None
 79 |     total_chunk_num, max_buf_len, total_bytes = data
 80 |     pickled_bytes = bytearray()
 81 | 
 82 |     for i in range(total_chunk_num):
 83 |         b = i * max_buf_len
 84 |         e = min(b + max_buf_len, total_bytes)
 85 |         buf = bytearray(e - b)
 86 |         MPI.comm.Recv(buf, source=source, tag=2)
 87 |         pickled_bytes[b:e] = buf
 88 | 
 89 |     obj = pickle.loads(pickled_bytes)
 90 |     return obj
 91 | 
 92 | 
 93 | def send_chunk(obj, dest, max_buf_len=256 * 1024 * 1024):
 94 |     """Send data divided into small chunks with MPI communication.
 95 | 
 96 |     Args:
 97 |         obj (object): Any data to send, which can be pickled.
 98 |         dest (int): MPI destination process that receives data.
 99 |         max_buf_len (int, optional): Maximum size of each chunk.
100 |     """
101 |     assert max_buf_len < INT_MAX
102 |     assert max_buf_len > 0
103 |     pickled_bytes = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
104 |     total_bytes = len(pickled_bytes)
105 |     total_chunk_num = -(-total_bytes // max_buf_len)
106 |     MPI.comm.send(
107 |         (total_chunk_num, max_buf_len, total_bytes), dest=dest, tag=1)
108 | 
109 |     for i in range(total_chunk_num):
110 |         b = i * max_buf_len
111 |         e = min(b + max_buf_len, total_bytes)
112 |         buf = pickled_bytes[b:e]
113 |         MPI.comm.Send(buf, dest=dest, tag=2)
114 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | 
 3 | build:
 4 |   image: latest
 5 | 
 6 | conda:
 7 |   file: condaenv.yaml
 8 | 
 9 | python:
10 |   version: 3.6
11 |   pip_install: true
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ase
 2 | chainer
 3 | matplotlib
 4 | mpi4py
 5 | numpy
 6 | PyYAML
 7 | scikit-learn
 8 | scipy
 9 | tqdm
10 | traitlets
11 | 


--------------------------------------------------------------------------------
/scripts/merge_xyz:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from pathlib import Path
 5 | import sys
 6 | 
 7 | import ase.io
 8 | 
 9 | args = sys.argv
10 | step = int(args[1])
11 | in_dir = args[2]
12 | output = args[3]
13 | 
14 | for f in Path(in_dir).glob('*.xyz'):
15 |     images = ase.io.read(f, index=f'::{step}', format='xyz')
16 |     ase.io.write(output, images, format='xyz', append=True)
17 | 


--------------------------------------------------------------------------------
/scripts/outcar2xyz:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import sys
 5 | 
 6 | import ase.io
 7 | 
 8 | args = sys.argv
 9 | 
10 | if len(args) != 4:
11 |     print(f"""
12 | Error: args should have 3 but has {len(args)-1}
13 | 
14 | The format should be
15 | 
16 | $ outcar2xyz [PREFIX] [OUTCAR] [XYZFILE]
17 | """)
18 |     sys.exit(1)
19 | 
20 | prefix = args[1]
21 | infile = args[2]
22 | outfile = args[3]
23 | 
24 | for atoms in ase.io.iread(infile, index=':', format='vasp-out'):
25 |     # stress = atoms.get_stress(voigt=False)
26 |     # atoms.set_param_value('stress', stress)
27 |     atoms.info['tag'] = prefix + atoms.get_chemical_formula()
28 |     ase.io.write(outfile, atoms, format='xyz', append=True)
29 | 


--------------------------------------------------------------------------------
/scripts/poscars2xyz:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import sys
 5 | 
 6 | import ase.io
 7 | 
 8 | args = sys.argv
 9 | 
10 | if len(args) < 4:
11 |     print(f"""
12 | Error: args should have 4 or more but has {len(args)-1}
13 | 
14 | The format should be
15 | 
16 | $ poscars2xyz [PREFIX] [POSCARs]+ [XYZFILE]
17 | """)
18 |     sys.exit(1)
19 | 
20 | prefix = args[1]
21 | poscars = args[2:-1]
22 | xyz = args[-1]
23 | 
24 | for poscar in poscars:
25 |     atoms = ase.io.read(poscar, format='vasp')
26 |     atoms.info['tag'] = prefix + atoms.get_chemical_formula()
27 |     ase.io.write(xyz, atoms, format='xyz', append=True)
28 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from setuptools import setup
 4 | from hdnnpy import __version__
 5 | 
 6 | setup(
 7 |     name='hdnnpy',
 8 |     version=__version__,
 9 |     description='High Dimensional Neural Network Potential package',
10 |     long_description=open('README.md').read(),
11 |     author='Masayoshi Ogura',
12 |     author_email='ogura@cello.t.u-tokyo.ac.jp',
13 |     url='https://github.com/ogura-edu/HDNNP',
14 |     license='MIT',
15 |     packages=[
16 |         'hdnnpy',
17 |         'hdnnpy.cli',
18 |         'hdnnpy.dataset',
19 |         'hdnnpy.dataset.descriptor',
20 |         'hdnnpy.dataset.property',
21 |         'hdnnpy.format',
22 |         'hdnnpy.model',
23 |         'hdnnpy.preprocess',
24 |         'hdnnpy.training',
25 |         'hdnnpy.training.loss_function',
26 |         ],
27 |     scripts=[
28 |         'scripts/merge_xyz',
29 |         'scripts/outcar2xyz',
30 |         'scripts/poscars2xyz',
31 |         ],
32 |     entry_points={
33 |         'console_scripts': ['hdnnpy = hdnnpy.cli:main'],
34 |         },
35 |     zip_safe=False,
36 |     )
37 | 


--------------------------------------------------------------------------------