├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── dco.yml ├── .gitignore ├── .travis.yml ├── .whitesource ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── MANIFEST.in ├── Pipfile ├── README.md ├── example_data ├── Cu-BTT_500165.0_198.000000.csv └── enthalpy_data.csv ├── example_workflow ├── README.md ├── environment.yml ├── modules │ ├── raspa_input.py │ ├── raspa_output.py │ └── tools.py └── run.py ├── pymser_tutorial.ipynb ├── pyproject.toml ├── renovate.json ├── setup.cfg ├── setup.py ├── src └── pymser │ ├── __init__.py │ └── pymser.py └── version.py /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **As a** ... 2 | **I need** ... 3 | **So that** ... 4 | 5 | **Assumptions:** 6 | * ... 7 | * ... 8 | 9 | **Acceptance criteria:** 10 | ``` 11 | Given ... 12 | When ... 13 | Then ... 14 | ``` 15 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | ... 4 | 5 | ## Checklist 6 | 7 | - [ ] I ran the appropriate tests. 8 | - [ ] I ran `flake8 --max-line-length=100`. 9 | - [ ] I wrote documentation for all new features. 10 | - [ ] I have added any new dependencies (libraries and/or tools) to `setup.py`, `Pipfile` and `README.md`. 11 | 12 | ## Related Issue(s) 13 | 14 | Closes # 15 | 16 | ## Notes to Reviewer 17 | 18 | ... 19 | -------------------------------------------------------------------------------- /.github/dco.yml: -------------------------------------------------------------------------------- 1 | # This enables DCO bot for you, please take a look https://github.com/probot/dco 2 | # for more details. 3 | require: 4 | members: false 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # MacOS 2 | .DS_Store 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | 109 | # IBM Cloud dev plugins 110 | .ibm-project 111 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | 3 | language: python 4 | 5 | python: 6 | - "3.9" 7 | 8 | git: 9 | depth: 5 10 | 11 | branches: 12 | except: 13 | - /^v(\d+\.?)+/ 14 | 15 | addons: 16 | apt: 17 | update: true 18 | packages: 19 | - gettext 20 | 21 | install: 22 | - pip install --quiet --upgrade pip bump2version flake8 23 | 24 | before_script: 25 | # Define branch-dependent environment variables 26 | - TAG="${TRAVIS_PULL_REQUEST_BRANCH:-$TRAVIS_BRANCH}"; 27 | - if [ "$TRAVIS_BRANCH" == "release" ]; then 28 | UPDATE=minor; 29 | elif [ "$TRAVIS_BRANCH" == "master" ]; then 30 | UPDATE=patch; 31 | elif [ "$TRAVIS_BRANCH" == "main" ]; then 32 | UPDATE=patch; 33 | else 34 | UPDATE=none; 35 | fi; 36 | 37 | script: 38 | # Run Python linters 39 | - flake8 --max-line-length=100 40 | 41 | # Increment package version and prepare for release 42 | - CURRENT_VERSION=$(python version.py) 43 | - if [ "$UPDATE" != "none" ]; then 44 | bump2version --current-version $CURRENT_VERSION $UPDATE setup.py --tag --commit --message $'{new_version} Release\n\n[skip ci]' --verbose; 45 | fi; 46 | 47 | after_script: 48 | - pip list 49 | 50 | deploy: 51 | - provider: pypi 52 | username: "$ARTIFACTORY_USERNAME" 53 | password: "$ARTIFACTORY_ACCESS_TOKEN" 54 | server: "$ARTIFACTORY_URL" 55 | on: 56 | branch: 57 | - master 58 | - release 59 | - provider: pypi 60 | username: "__token__" 61 | password: "$PYPI_API_TOKEN" 62 | on: 63 | branch: 64 | - main 65 | - provider: script 66 | script: git push origin HEAD:"$TAG" --follow-tags 67 | skip_cleanup: true 68 | on: 69 | branch: 70 | - master 71 | - release 72 | - provider: script 73 | script: git remote add public https://oauth:${GITHUB_PERSONAL_ACCESS_TOKEN}@github.com/IBM/pymser.git && git push public HEAD:"$TAG" --follow-tags 74 | skip_cleanup: true 75 | on: 76 | branch: 77 | - main 78 | 79 | env: 80 | global: 81 | # ARTIFACTORY_USERNAME defined via web UI 82 | # ARTIFACTORY_URL defined via web UI 83 | # ARTIFACTORY_ACCESS_TOKEN defined via web UI 84 | # PYPI_API_TOKEN defined via web UI 85 | # GITHUB_PERSONAL_ACCESS_TOKEN defined via web UI 86 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "settingsInheritedFrom": "whitesource-config/whitesource-config@python3" 3 | } 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## v1.0.20 6 | 7 | - Use `pyTorch` to calculate the MSE curve reducing the overall time of the calculation by approximately one order of magnitude on large arrays 8 | - Use scipy to calculate the autocorrelation time reducing the overall time of the calculation by approximately two order of magnitudes on large arrays 9 | 10 | ## v1.0.18 11 | 12 | - Use `nanmean` and `nanstd` instead of `mean` and `std` to avoid erros when there are `NaN` values in the data 13 | - Add equilibration status on the print report and a warning if the equilibration is not reached 14 | 15 | ## v1.0.8 16 | 17 | - Downgrade requirements for Python from python>=3.10 to python>=3.9 18 | - Add the Standard Error (SE) as possible uncertainty of the average 19 | - Add the uncorrelated Standard Error (uSE) as possible uncertainty of the average 20 | - Add the uncorrelated Standard Deviation (uSD) as possible uncertainty of the average and set it as default 21 | - Small bug fixes 22 | 23 | ## v1.0.2 24 | 25 | - Add files to github repository 26 | - Prepare for Pypi.org release 27 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing In General 2 | Our project welcomes external contributions. If you have an itch, please feel 3 | free to scratch it. 4 | 5 | To contribute code or documentation, please submit a [pull request](https://github.com/IBM/pymser/pulls). 6 | 7 | A good way to familiarize yourself with the codebase and contribution process is 8 | to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/IBM/pymser/issues). 9 | Before embarking on a more ambitious contribution, please quickly get in touch with us. 10 | 11 | **Note: We appreciate your effort, and want to avoid a situation where a contribution 12 | requires extensive rework (by you or by us), sits in backlog for a long time, or 13 | cannot be accepted at all!** 14 | 15 | ### Proposing new features 16 | 17 | If you would like to implement a new feature, please [raise an issue](https://github.com/IBM/pymser/issues) 18 | before sending a pull request so the feature can be discussed. This is to avoid 19 | you wasting your valuable time working on a feature that the project developers 20 | are not interested in accepting into the code base. 21 | 22 | ### Fixing bugs 23 | 24 | If you would like to fix a bug, please [raise an issue](https://github.com/IBM/pymser/issues) before sending a 25 | pull request so it can be tracked. 26 | 27 | ### Merge approval 28 | 29 | The project maintainers use LGTM (Looks Good To Me) in comments on the code 30 | review to indicate acceptance. A change requires LGTMs from two of the 31 | maintainers of each component affected. 32 | 33 | For a list of the maintainers, see the [MAINTAINERS.md](MAINTAINERS.md) page. 34 | 35 | ## Legal 36 | 37 | We have tried to make it as easy as possible to make contributions. This 38 | applies to how we handle the legal aspects of contribution. We use the 39 | same approach - the [Developer's Certificate of Origin 1.1 (DCO)](https://github.com/hyperledger/fabric/blob/master/docs/source/DCO1.1.txt) - that the Linux® Kernel [community](https://elinux.org/Developer_Certificate_Of_Origin) 40 | uses to manage code contributions. 41 | 42 | We simply ask that when submitting a patch for review, the developer 43 | must include a sign-off statement in the commit message. 44 | 45 | Here is an example Signed-off-by line, which indicates that the 46 | submitter accepts the DCO: 47 | 48 | ``` 49 | Signed-off-by: John Doe 50 | ``` 51 | 52 | You can include this automatically when you commit a change to your 53 | local git repository using the following command: 54 | 55 | ``` 56 | git commit -s 57 | ``` 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, International Business Machines 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # MAINTAINERS 2 | 3 | * Felipe Lopes de Oliveira 4 | * Rodrigo Neumann Barros Ferreira 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Include the README 2 | include README.md 3 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [requires] 7 | python_version = "3.9" 8 | 9 | [dev-packages] 10 | autopep8 = "*" 11 | flake8 = "*" 12 | 13 | [packages] 14 | numpy = "*" 15 | scipy = "*" 16 | statsmodels = "*" 17 | pip = "*" 18 | torch = "*" 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyMSER 2 | 3 | A Python library to apply the [Marginal Standard Error Rule (MSER)](https://doi.org/10.1177/003754979706900601) for transient regime detection and truncation on Grand Canonical Monte Carlo adsorption simulations. 4 | 5 | > Oliveira, Felipe L., et al. "pyMSER - An Open-Source Library for Automatic Equilibration Detection in Molecular Simulations." *Journal of Chemical Theory and Computation* **20.19** (2024): 8559-8568. 6 | > 7 | > https://doi.org/10.1021/acs.jctc.4c00417 8 | 9 | ## Dependencies 10 | 11 | * [NumPy](https://numpy.org) is the fundamental package for scientific computing with Python. 12 | * [SciPy](https://scipy.org/) is a collection of fundamental algorithms for scientific computing in Python. 13 | * [statsmodels](https://www.statsmodels.org/) is a python module that provides classes and functions for the estimation of many different statistical models, as well as for conducting statistical tests, and statistical data exploration. 14 | * [pyTorch](https://pytorch.org/) is an open source machine learning framework that uses tensor computations and automatic differentiation on GPU and CPU. 15 | 16 | ## Developer tips 17 | 18 | These tips are not mandatory, but they are a sure way of helping you develop the code while maintaining consistency with the current style, structure and formatting choices. 19 | 20 | ### Coding style guide 21 | 22 | We recommend these tools to ensure code style compatibility. 23 | 24 | * [autopep8](https://pypi.org/project/autopep8/) automatically formats Python code to conform to the PEP8 style guide. 25 | * [Flake8](https://flake8.pycqa.org) is your tool for style guide enforcement. 26 | 27 | ## Installation 28 | 29 | ### Option 1: Using `setup.py` 30 | 31 | Clone `pymser` repository if you haven't done it yet. 32 | 33 | Go to `pymser`'s root folder, there you will find `setup.py` file, and run the command below: 34 | 35 | ```Shell 36 | python setup.py install 37 | ``` 38 | 39 | ### Option 2: Using pip/pipenv to install from Pypi.org 40 | 41 | If you intend to use `pipenv`, please add the following to your `Pipfile`: 42 | 43 | ```Pipfile 44 | [[source]] 45 | url = "https://pypi.org/simple" 46 | verify_ssl = true 47 | name = "pypi" 48 | 49 | [packages] 50 | pymser = "*" 51 | ``` 52 | 53 | If you intend to use `pip`, please run the command below: 54 | 55 | ```Shell 56 | pip install pymser 57 | ``` 58 | 59 | ### Option 3: Using pip to install directly from the GitHub repo 60 | 61 | You can run 62 | 63 | ```Shell 64 | pip install git+https://github.com/IBM/pymser.git 65 | ``` 66 | 67 | and then you will be prompted to enter your GitHub username and password/access token. 68 | 69 | If you already have a SSH key configured, you can run 70 | 71 | ```Shell 72 | pip install git+ssh://git@github.com/IBM/pymser.git 73 | ``` 74 | 75 | ### Option 4: Using pip/pipenv to install from Artifactory 76 | 77 | Log into Artifactory and access your user profile. There you will find your API key and username. Then export your credentials as environment variables for later use in the installation process. 78 | 79 | ```Shell 80 | export ARTIFACTORY_USERNAME=username@email.com 81 | export ARTIFACTORY_ACCESS_TOKEN=your-access-token 82 | export ARTIFACTORY_URL=your-artifactory-url 83 | ``` 84 | 85 | If you intend to use `pipenv`, please add the following to your `Pipfile`: 86 | 87 | ```Pipfile 88 | [[source]] 89 | url = "https://$ARTIFACTORY_USERNAME:$ARTIFACTORY_ACCESS_TOKEN@$ARTIFACTORY_URL" 90 | verify_ssl = true 91 | name = "artifactory" 92 | 93 | [packages] 94 | pymser = {version="*", index="artifactory"} 95 | ``` 96 | 97 | If you intend to use `pip`, please run the command below: 98 | 99 | ```Shell 100 | pip install pymser --extra-index-url=https://$ARTIFACTORY_USERNAME:$ARTIFACTORY_ACCESS_TOKEN@$ARTIFACTORY_URL 101 | ``` 102 | 103 | ## Usage example 104 | 105 | This is a small example of how to use our package: 106 | 107 | ```Python 108 | >>> import pymser 109 | >>> import pandas as pd 110 | >>> 111 | >>> # Reads the example file using pandas 112 | >>> df = pd.read_csv('example_data/Cu-BTT_500165.0_198.000000.csv') 113 | >>> 114 | >>> # Apply the MSER to get the index of the start of equilibrated data 115 | >>> results = pymser.equilibrate(df['mol/kg'], LLM=False, batch_size=1, ADF_test=True, uncertainty='uSD', print_results=True) 116 | 117 | pyMSER Equilibration Results 118 | ============================================================================== 119 | Start of equilibrated data: 13368 of 48613 120 | Total equilibrated steps: 35245 (72.50%) 121 | Equilibrated: Yes 122 | Average over equilibrated data: 22.4197 ± 0.1905 123 | Number of uncorrelated samples: 22.3 124 | Autocorrelation time: 1579.0 125 | ============================================================================== 126 | 127 | Augmented Dickey-Fuller Test 128 | ============================================================================== 129 | Test statistic for observable: -3.926148246630434 130 | P-value for observable: 0.001850619485090052 131 | The number of lags used: 46 132 | The number of observations used for the ADF regression: 35198 133 | Cutoff Metrics : 134 | 1%: -3.430536 | The data is stationary with 99 % confidence 135 | 5%: -2.861622 | The data is stationary with 95 % confidence 136 | 10%: -2.566814 | The data is stationary with 90 % confidence 137 | ``` 138 | 139 | You can also access our [tutorial](pymser_tutorial.ipynb). 140 | 141 | ## Python package deployment 142 | 143 | ### Deploying to Artifactory 144 | 145 | We have an automated CI/CD pipeline running on TravisCI that takes every single `git push` event and executes the build/test/deploy instructions in the `.travis.yml`. If you are deploying `master` or `release` branches, a Python package will be generated and published to a private Pypi registry on Artifactory. 146 | 147 | ### Deploying to Pypi 148 | 149 | We have an automated CI/CD pipeline running on TravisCI that takes every single `git push` event and executes the build/test/deploy instructions in the `.travis.yml`. If you are deploying `main` branch, a Python package will be generated and published to Pypi.org registry. 150 | -------------------------------------------------------------------------------- /example_workflow/README.md: -------------------------------------------------------------------------------- 1 | # Tutorial: Using pyMSER with RASPA2 to Run Fixed Number of Production Cycles 2 | 3 | ## Introduction 4 | 5 | Welcome to this tutorial on using pyMSER with RASPA2 to run a fixed number of production cycles after automatically detecting the equilibrated portion of the simulation. This guide assumes that you have a basic understanding of Python and Grand Canonical Monte Carlo (GCMC) simulations using RASPA. The aim is to streamline simulation workflows by leveraging the capabilities of pyMSER for equilibration detection, ensuring accurate and efficient production runs. 6 | 7 | ## Installation 8 | 9 | To get started, ensure that your environment is set up with the necessary dependencies. The provided `environment.yml` file includes all the required packages: 10 | 11 | * [NumPy](https://numpy.org) is the fundamental package for scientific computing with Python. 12 | * [Pandas](https://pandas.pydata.org) is a fast, powerful, flexible, and easy-to-use open-source data analysis and data manipulation library built on top of NumPy. 13 | * [Gemmi](https://gemmi.readthedocs.io/en/latest/) is a Python library for handling macromolecular structures. 14 | * [pyMSER](https://pypi.org/project/pymser/) is a Python library to apply the Marginal Standard Error Rule (MSER) for transient regime detection and truncation on GCMC adsorption simulations. 15 | * [RASPA2](https://pypi.org/project/RASPA2/) is a Python interface to the RASPA2 molecular simulation package. 16 | 17 | To create the environment, run the following command: 18 | 19 | ```sh 20 | conda env create -f environment.yml 21 | ``` 22 | 23 | Activate the environment with: 24 | 25 | ```sh 26 | conda activate pymser 27 | ``` 28 | 29 | ## How to Run 30 | 31 | To run a fixed number of production cycles using pyMSER with RASPA2, you can use the provided `run.py` script. This script automates the process of equilibration detection and production run based on the MSER rule. The script takes the framework name, external pressure, and simulation type as input arguments. 32 | 33 | ```sh 34 | python run.py --FrameworkName 'MgMOF-74' --ExternalPressure 1e4 --NumberOfProdCycles 1500 --AddCycles 1000 --GasComposition '{"CO2":0.5,"N2":0.5}' 'GCMC'' 35 | ``` 36 | 37 | The script will perform the following steps: 38 | 39 | 1. Create the necessary directories and input files for the simulation. 40 | 2. Run RASPA for `AddCycles` cycles. 41 | 3. Apply the MSER rule to detect the equilibrated portion of the simulation. 42 | 4. If the desired number of production cycles is not reached, run additional `AddCycles cycles until the target is achieved. 43 | 5. Parse the output files and save the results. 44 | 45 | The output should be: 46 | 47 | ```sh 48 | ============================================================================== 49 | Automatic GCMC Simulation with pyMSER 50 | ============================================================================== 51 | Framework Name : MgMOF-74 52 | External Temperature : 298.0 K 53 | External Pressure : 10000.0 Pa 54 | Gas Composition : {'CO2': 0.5, 'N2': 0.5} 55 | Desired Prod. Cycles : 1500 56 | Output Path : GCMC 57 | ============================================================================== 58 | 59 | Running RASPA simulation... 60 | 61 | > Running iteration 1... 62 | > Found only 999/1500 production cycles. Running more 1000 cycles. 63 | > Running iteration 2... 64 | > Success! Found 1999 production cycles. Analyzing final data... 65 | 66 | 67 | pyMSER Equilibration Results 68 | ============================================================================== 69 | Start of equilibrated data: 1 of 2000 70 | Total equilibrated steps: 1999 (99.97%) 71 | Equilibrated: Yes 72 | Average over equilibrated data: 3.5916 ± 1.9509 73 | Number of uncorrelated samples: 1999.0 74 | Autocorrelation time: 1.0 75 | ============================================================================== 76 | 77 | Augmented Dickey-Fuller Test 78 | ============================================================================== 79 | Test statistic for observable: -40.707108860029564 80 | P-value for observable: 0.0 81 | The number of lags used: 0 82 | The number of observations used for the ADF regression: 3998 83 | Cutoff Metrics : 84 | 1%: -3.431987 | The data is stationary with 99 % confidence 85 | 5%: -2.862263 | The data is stationary with 95 % confidence 86 | 10%: -2.567155 | The data is stationary with 90 % confidence 87 | 88 | ============================================================================== 89 | Component 0 [CO2] 90 | --------------------------------------------------------------------------- 91 | Average loading absolute [molecules/unit cell] 0.1947205551 +/- 0.1135451716 92 | Average loading absolute [mol/kg framework] 0.0891424690 +/- 0.0519806290 93 | Average loading absolute [mg/g framework] 3.9231154891 +/- 2.2876414927 94 | Average loading absolute [cm^3 STP/gr] 1.9980371381 +/- 1.1650925581 95 | Average loading absolute [cm^3 STP/cm^3] 1.7647568024 +/- 1.0290624625 96 | Enthalpy of adsorption [kJ/mol] -17.8534776591 +/- 0.3852622588 97 | ============================================================================== 98 | Component 1 [N2] 99 | --------------------------------------------------------------------------- 100 | Average loading absolute [molecules/unit cell] 0.0297574394 +/- 0.0428471501 101 | Average loading absolute [mol/kg framework] 0.0136228639 +/- 0.0196152930 102 | Average loading absolute [mg/g framework] 0.3816227347 +/- 0.5494910502 103 | Average loading absolute [cm^3 STP/gr] 0.3053425404 +/- 0.4396567026 104 | Average loading absolute [cm^3 STP/cm^3] 0.2696923470 +/- 0.3883246922 105 | Enthalpy of adsorption [kJ/mol] -11.9135661990 +/- 3.2649508929 106 | ============================================================================== 107 | ``` 108 | 109 | ### Command Line Options 110 | 111 | You have several options to control the simulation parameters. Below is a detailed explanation of each option available in the `run.py` script: 112 | 113 | - **output_folder (required)** 114 | - Type: `str` 115 | - Help: Directory to save the files of the calculations. This directory should contain the `cif` file of the framework and the force field files. 116 | 117 | - **--FrameworkName (required)** 118 | - Type: `str` 119 | - Help: Name of the framework to be simulated 120 | 121 | - **--ExternalPressure (required)** 122 | - Type: `float` 123 | - Help: External pressure in Pascal 124 | 125 | #### Optional Parameters 126 | 127 | - **--NumberOfProdCycles** 128 | - Type: `int` 129 | - Default: `5000` 130 | - Help: Number of desired production cycles 131 | 132 | - **--AddCycles** 133 | - Type: `int` 134 | - Default: `1000` 135 | - Help: Number of additional tentative cycles if the desired number of production cycles has not been achieved 136 | 137 | - **--ExternalTemperature** 138 | - Type: `float` 139 | - Default: `298.0` 140 | - Help: Temperature of the simulation in Kelvin 141 | 142 | - **--UnitCells** 143 | - Type: `str` 144 | - Default: `auto` 145 | - Help: Number of unit cells to be simulated. Can be "auto" or a string of comma-separated values. E.g., "3,3,1" 146 | 147 | - **--GasComposition** 148 | - Type: `str` 149 | - Default: `{"CO2": 1.0}` 150 | - Help: Type of gas composition for the simulation as a dictionary. E.g., '{"CO2": 0.5, "N2": 0.5}' 151 | 152 | - **--UseChargesFromCIFFile** 153 | - Help: Use charges from CIF file. 154 | 155 | 156 | With these options, you can customize your simulation to fit your specific needs, ensuring an efficient and accurate GCMC simulation workflow. 157 | -------------------------------------------------------------------------------- /example_workflow/environment.yml: -------------------------------------------------------------------------------- 1 | name: pymser 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python >= 3.10 6 | - pip 7 | - numpy 8 | - pandas 9 | - gemmi 10 | - pip: 11 | - pymser 12 | - RASPA2 13 | -------------------------------------------------------------------------------- /example_workflow/modules/raspa_input.py: -------------------------------------------------------------------------------- 1 | import os 2 | from textwrap import dedent 3 | from modules.tools import calculate_UnitCells, get_pseudoatoms 4 | 5 | 6 | def create_GCMC_input(path: str, FrameworkName: str, **kwargs): 7 | """ 8 | Create the RASPA GCMC simulation input. 9 | Parameters 10 | ---------- 11 | path : string 12 | Path where the file will be saved. 13 | FrameworkName : string 14 | Name of the structure. Must be the same name in the `.cif` file. 15 | """ 16 | 17 | # Calculation parameters dictionary 18 | CALC_DICT = { 19 | 'FrameworkName': FrameworkName, # string 20 | 'NumberOfCycles': 10000, # int 21 | 'NumberOfInitializationCycles': 0, # int 22 | 'PrintEvery': 1, # int 23 | 'PrintPropertiesEvery': 1, # int 24 | 'ForceField': 'local', # string 25 | 'CutOffVDW': 12.8, # float 26 | 'CutOffChargeCharge': 12.8, # float 27 | 'CutOffChargeBondDipole': 12.8, # float 28 | 'CutOffBondDipoleBondDipole': 12.8, # float 29 | 'EwaldPrecision': 1.0e-6, # float 30 | 'HeliumVoidFraction': 0.0, # float 31 | 'ExternalTemperature': 298.15, # int 32 | 'ExternalPressure': '100000', # float or csv 33 | 'UseChargesFromCIFFile': 'yes', # yes / no 34 | 'UnitCells': '1 1 1', # int int int 35 | 'GasComposition': {'CO2': 1.0}, # dict 36 | 'SpacingVDWGrid': 0.1, # float 37 | 'SpacingCoulombGrid': 0.1, # float 38 | 'UseTabularGrid': 'no', # yes / no 39 | 'NumberOfGrids': 0, # int 40 | 'GridTypes': '', # string 41 | 'Movies': 'no', # yes / no 42 | 'WriteMoviesEvery': 0, # int 43 | 'ComputeDensityProfile3DVTKGrid': 'no', # yes / no 44 | 'DensityProfile3DVTKGridPoints': '100 100 100', # int int int 45 | 'WriteDensityProfile3DVTKGridEvery': 100, # int 46 | 'RestartFile': 'no', # yes / no 47 | } 48 | 49 | # Update the dictionary with the kwargs 50 | CALC_DICT.update(kwargs) 51 | 52 | if 'UnitCells' in kwargs: 53 | if isinstance(kwargs['UnitCells'], list): 54 | kwargs['UnitCells'] = ' '.join(map(str, kwargs['UnitCells'])) 55 | if isinstance(kwargs['UnitCells'], int): 56 | kwargs['UnitCells'] = ' '.join(map(str, [kwargs['UnitCells']] * 3)) 57 | if isinstance(kwargs['UnitCells'], str) and kwargs['UnitCells'].lower() == 'auto': 58 | 59 | maxCutOff = max([CALC_DICT['CutOffVDW'], 60 | CALC_DICT['CutOffChargeCharge'], 61 | CALC_DICT['CutOffChargeBondDipole'], 62 | CALC_DICT['CutOffBondDipoleBondDipole']]) 63 | 64 | CALC_DICT['UnitCells'] = calculate_UnitCells( 65 | os.path.join(path, FrameworkName.rstrip('.cif') + '.cif'), 66 | maxCutOff) 67 | 68 | if 'ExternalPressure' in kwargs: 69 | if isinstance(kwargs['ExternalPressure'], list): 70 | CALC_DICT['ExternalPressure'] = ' '.join(map(str, kwargs['ExternalPressure'])) 71 | 72 | elif isinstance(kwargs['ExternalPressure'], int): 73 | CALC_DICT['ExternalPressure'] = float(kwargs['ExternalPressure']) 74 | 75 | elif isinstance(kwargs['ExternalPressure'], float): 76 | CALC_DICT['ExternalPressure'] = kwargs['ExternalPressure'] 77 | 78 | elif isinstance(kwargs['ExternalPressure'], str): 79 | CALC_DICT['ExternalPressure'] = ' '.join(kwargs['ExternalPressure'].split(',')) 80 | 81 | # Create file header as string 82 | GCMC_InputFile = dedent("""\ 83 | SimulationType MonteCarlo 84 | NumberOfCycles {NumberOfCycles} 85 | NumberOfInitializationCycles {NumberOfInitializationCycles} 86 | PrintEvery {PrintEvery} 87 | PrintPropertiesEvery {PrintPropertiesEvery} 88 | 89 | RestartFile {RestartFile} 90 | 91 | ForceField {ForceField} 92 | CutOffVDW {CutOffVDW} 93 | CutOffChargeCharge {CutOffChargeCharge} 94 | CutOffChargeBondDipole {CutOffChargeBondDipole} 95 | CutOffBondDipoleBondDipole {CutOffBondDipoleBondDipole} 96 | ChargeMethod Ewald 97 | EwaldPrecision {EwaldPrecision} 98 | 99 | Framework 0 100 | FrameworkName {FrameworkName} 101 | HeliumVoidFraction {HeliumVoidFraction} 102 | ExternalTemperature {ExternalTemperature} 103 | ExternalPressure {ExternalPressure} 104 | UseChargesFromCIFFile {UseChargesFromCIFFile} 105 | UnitCells {UnitCells} 106 | 107 | """).format(**CALC_DICT) 108 | 109 | if CALC_DICT['UseTabularGrid'] == 'yes': 110 | 111 | # Get the pseudoatoms number and types 112 | for gas in list(CALC_DICT['GasComposition'].keys()): 113 | pseudo_atoms = get_pseudoatoms(gas) 114 | CALC_DICT['NumberOfGrids'] += len(pseudo_atoms) # int 115 | CALC_DICT['GridTypes'] += ' '.join(pseudo_atoms) + ' ' # string 116 | 117 | GCMC_InputFile += dedent("""\ 118 | NumberOfGrids {NumberOfGrids} 119 | GridTypes {GridTypes} 120 | SpacingVDWGrid {SpacingVDWGrid} 121 | SpacingCoulombGrid {SpacingCoulombGrid} 122 | UseTabularGrid {UseTabularGrid} 123 | 124 | """).format(**CALC_DICT) 125 | 126 | if CALC_DICT['ComputeDensityProfile3DVTKGrid'] == 'yes': 127 | GCMC_InputFile += dedent("""\ 128 | ComputeDensityProfile3DVTKGrid yes 129 | DensityProfile3DVTKGridPoints {DensityProfile3DVTKGridPoints} 130 | WriteDensityProfile3DVTKGridEvery {WriteDensityProfile3DVTKGridEvery} 131 | 132 | """).format(**CALC_DICT) 133 | 134 | if CALC_DICT['Movies'] == 'yes': 135 | GCMC_InputFile += dedent("""\ 136 | Movies yes 137 | WriteMoviesEvery {WriteMoviesEvery} 138 | 139 | """).format(**CALC_DICT) 140 | 141 | # Create component list as string 142 | for name, fraction in CALC_DICT['GasComposition'].items(): 143 | 144 | number_of_components = len(CALC_DICT['GasComposition']) 145 | index_of_component = list(CALC_DICT['GasComposition']).index(name) 146 | 147 | # Append component string block to input file 148 | GCMC_InputFile += dedent(f"""\ 149 | Component {index_of_component} MoleculeName {name} 150 | MolFraction {fraction} 151 | MoleculeDefinition TraPPE 152 | SwapProbability 0.5 153 | TranslationProbability 0.3 154 | RotationProbability 0.2 155 | IdentityChangeProbability 0.1 156 | NumberOfIdentityChanges {number_of_components} 157 | IdentityChangesList {' '.join(map(str, range(number_of_components)))} 158 | CreateNumberOfMolecules 0 159 | 160 | """) 161 | 162 | # Write input to file 163 | with open(os.path.join(path, 164 | "simulation.input"), 'w') as f: 165 | f.write(GCMC_InputFile) 166 | -------------------------------------------------------------------------------- /example_workflow/modules/raspa_output.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from RASPA2 import parse 4 | from glob import glob 5 | 6 | 7 | def parse_GCMC(output_folder: str, 8 | FrameworkName: str, 9 | ExternalTemperature: float, 10 | ExternalPressure: float, 11 | GasComposition: dict, 12 | NumberOfCycles: int, 13 | PrintEvery: int): 14 | # Read file into string 15 | input_file_name = glob('{0}/output_{1}_*_{2:.6f}_{3:g}.data'.format(output_folder, 16 | FrameworkName, 17 | ExternalTemperature, 18 | ExternalPressure))[0] 19 | with open(os.path.join(input_file_name), 'r') as f: 20 | raspa_string = f.read() 21 | 22 | # Parse string into dictionary and list 23 | raspa_dict = parse(raspa_string) 24 | raspa_list = raspa_string.split('\n') 25 | 26 | # Extract number of unit cells in the supercell 27 | unit_cells = [int(line.split(':')[1]) for line in raspa_list if 'Number of unitcells' in line] 28 | 29 | # Calculate mol/kg conversion factor for the supercell 30 | conversion_string = 'Conversion factor molecules/unit cell -> mol/kg' 31 | to_mol_kg = raspa_dict['MoleculeDefinitions'][conversion_string][0] 32 | to_mol_kg /= math.prod(unit_cells) 33 | 34 | output_file_name = f'raspa_{ExternalTemperature:.6f}_{ExternalPressure}.csv' 35 | if os.path.isfile(os.path.join(output_folder, output_file_name)): 36 | append_data = True 37 | # Open the file 38 | with open(os.path.join(output_folder, output_file_name), 'r') as f: 39 | lines = f.readlines() 40 | # Check if the last line is the same as the last cycle 41 | last_line = lines[-1].split(',') 42 | base_cycle = int(last_line[0]) 43 | base_step = int(last_line[1]) 44 | 45 | else: 46 | append_data = False 47 | base_cycle = 0 48 | base_step = 0 49 | 50 | # Build header string 51 | if not append_data: 52 | header = 'cycle,step,N_ads' 53 | for component in range(len(GasComposition)): 54 | cycle_key = f'Current cycle: 0 out of {NumberOfCycles}' 55 | component_key = f'Component {component}' 56 | molecule_name = raspa_dict[cycle_key][component_key][0] 57 | header += ( 58 | f',{molecule_name}_[N_ads]' 59 | f',{molecule_name}_[molecules/uc]' 60 | f',{molecule_name}_[mol/kg]' 61 | ) 62 | 63 | header += ( 64 | ',total_[K]' 65 | ',host-host_[K]' 66 | ',host-adsorbate_[K]' 67 | ',host-cation_[K]' 68 | ',adsorbate-adsorbate_[K]' 69 | ',cation-cation_[K]' 70 | ',adsorbate-cation_[K]' 71 | ) 72 | 73 | csv_output = header + '\n' 74 | else: 75 | csv_output = '' 76 | 77 | # For each cycle 78 | steps = base_step 79 | for cycle in range(0, NumberOfCycles, PrintEvery): 80 | cycle_key = f'Current cycle: {cycle} out of {NumberOfCycles}' 81 | number_of_adsorbates = int(raspa_dict[cycle_key]['Number of Adsorbates'][0]) 82 | steps += max(20, number_of_adsorbates) 83 | line = ( 84 | f'{cycle + base_cycle},' 85 | f' {steps},' 86 | f' {number_of_adsorbates}' 87 | ) 88 | 89 | # For each component 90 | for component in range(len(GasComposition)): 91 | component_key = f'Component {component}' 92 | number_of_molecules = int(raspa_dict[cycle_key][component_key][2].split('/')[0]) 93 | line += ( 94 | f', {number_of_molecules:7}' 95 | f', {number_of_molecules / math.prod(unit_cells):7}' 96 | f', {number_of_molecules * to_mol_kg:.7f}' 97 | ) 98 | 99 | for energy_term in [ 100 | 'Current total potential energy', 101 | 'Current Host-Host energy', 102 | 'Current Host-Adsorbate energy', 103 | 'Current Host-Cation energy', 104 | 'Current Adsorbate-Adsorbate energy', 105 | 'Current Cation-Cation energy', 106 | 'Current Adsorbate-Cation energy' 107 | ]: 108 | 109 | line += f',{raspa_dict[cycle_key][energy_term][0]:.7f}' 110 | 111 | csv_output += line + '\n' 112 | 113 | # Write string into file 114 | with open(os.path.join(output_folder, output_file_name), 'a') as f: 115 | f.write(csv_output) 116 | -------------------------------------------------------------------------------- /example_workflow/modules/tools.py: -------------------------------------------------------------------------------- 1 | import gemmi 2 | from glob import glob 3 | import re 4 | import numpy as np 5 | 6 | 7 | def calculate_Perpendicular_Widths(cif_filename: str) -> tuple: 8 | """ 9 | Calculate the perpendicular widths of the unit cell. 10 | RASPA considers the perpendicular directions as the directions perpendicular to the `ab`, 11 | `bc`, and `ca` planes. Thus, the directions depend on the crystallographic vectors `a`, `b`, 12 | and `c`. 13 | The length in the perpendicular directions are the projections of the crystallographic vectors 14 | on the vectors `a x b`, `b x c`, and `c x a`. (here `x` means cross product) 15 | """ 16 | # Read data from CIF file 17 | cif = gemmi.cif.read_file(cif_filename).sole_block() 18 | a = float(cif.find_value('_cell_length_a').split('(')[0]) 19 | b = float(cif.find_value('_cell_length_b').split('(')[0]) 20 | c = float(cif.find_value('_cell_length_c').split('(')[0]) 21 | beta = float(cif.find_value('_cell_angle_beta').split('(')[0]) * np.pi / 180.0 22 | gamma = float(cif.find_value('_cell_angle_gamma').split('(')[0]) * np.pi / 180.0 23 | alpha = float(cif.find_value('_cell_angle_alpha').split('(')[0]) * np.pi / 180.0 24 | 25 | # Calculate the nu value 26 | nu = (np.cos(alpha) - np.cos(gamma) * np.cos(beta)) / np.sin(gamma) 27 | 28 | # Build the transformation matrix as a numpy array 29 | CellBox = np.array([[a, 0.0, 0.0], 30 | [b * np.cos(gamma), b * np.sin(gamma), 0.0], 31 | [c * np.cos(beta), c * nu, c * np.sqrt(1.0 - np.cos(beta)**2 - nu**2)]]) 32 | 33 | # Calculate the cross products 34 | axb = np.cross(CellBox[0], CellBox[1]) 35 | bxc = np.cross(CellBox[1], CellBox[2]) 36 | cxa = np.cross(CellBox[2], CellBox[0]) 37 | 38 | # Calculates the volume of the unit cell 39 | V = np.dot(np.cross(CellBox[0], CellBox[1]), CellBox[2]) 40 | 41 | # Calculate perpendicular widths 42 | p_width_1 = V / np.linalg.norm(bxc) 43 | p_width_2 = V / np.linalg.norm(cxa) 44 | p_width_3 = V / np.linalg.norm(axb) 45 | 46 | return p_width_1, p_width_2, p_width_3 47 | 48 | 49 | def calculate_UnitCells(cif_filename: str, cutoff: float) -> str: 50 | """ 51 | Calculate the number of unit cell repetitions so that all supercell lengths are larger than 52 | twice the interaction potential cut-off radius. 53 | """ 54 | 55 | # Calculate the perpendicular widths 56 | p_width_1, p_width_2, p_width_3 = calculate_Perpendicular_Widths(cif_filename) 57 | 58 | # Calculate UnitCells string 59 | uc_array = np.ceil(2.0 * cutoff / np.array([p_width_1, p_width_2, p_width_3])).astype(int) 60 | unit_cells = ' '.join(map(str, uc_array)) 61 | 62 | return unit_cells 63 | 64 | 65 | def get_pseudoatoms(molecule: str) -> list: 66 | """ 67 | Returns the pseudoatoms of a given molecule. 68 | If the molecule is not in the supported list will returns `None`. 69 | Parameters 70 | ---------- 71 | molecule : string 72 | Molecule name. Could be CO2, N2, O2, or H2O. 73 | Returns 74 | ---------- 75 | pseudoatoms : list 76 | List containing the strings with the pseudotoms. 77 | """ 78 | 79 | pseudoatoms_dict = {'CO2': ['C_co2', 'O_co2'], 80 | 'N2': ['N_n2', 'N_com'], 81 | 'O2': ['O_o2', 'O_com'], 82 | 'H2': ['H_h2', 'H_com'], 83 | 'CH4': ['CH4'], 84 | 'CO': ['C_co', 'CO_com', 'O_co'], 85 | 'H2O': ['Ow', 'Hw', 'Lw']} 86 | 87 | if molecule in list(pseudoatoms_dict.keys()): 88 | return pseudoatoms_dict[molecule] 89 | else: 90 | return None 91 | 92 | 93 | def get_conversion_factors(output_folder: str, 94 | FrameworkName: str, 95 | ExternalTemperature: float, 96 | ExternalPressure: float,): 97 | """ 98 | Get the conversion factors for the units in the RASPA simulation. 99 | 100 | Parameters 101 | ---------- 102 | path : string 103 | Path to the folder containing the RASPA output file. 104 | filename : string 105 | Name of the RASPA output file. 106 | Returns 107 | ---------- 108 | conversion_factors : dict 109 | Dictionary containing the conversion factors. 110 | """ 111 | 112 | # Read file into string 113 | filename = glob('{0}/output_{1}_*_{2:.6f}_{3:g}.data'.format(output_folder, 114 | FrameworkName, 115 | ExternalTemperature, 116 | ExternalPressure))[0] 117 | 118 | pattern = re.compile(r'Conversion factor molecules/unit cell -> (.+?):\s+(\d+\.\d+)') 119 | 120 | with open(filename, 'r') as f: 121 | lines = f.readlines() 122 | 123 | conversion_factors = { 124 | 'mol/kg': [], 125 | 'mg/g': [], 126 | 'cm^3 STP/gr': [], 127 | 'cm^3 STP/cm^3': [] 128 | } 129 | for line in lines: 130 | if 'Conversion factor molecules/unit cell' in line: 131 | match = re.search(pattern, line) 132 | 133 | conversion_factors[match.group(1)].append(float(match.group(2))) 134 | 135 | return conversion_factors 136 | -------------------------------------------------------------------------------- /example_workflow/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | import pymser 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from modules.raspa_input import create_GCMC_input 9 | from modules.raspa_output import parse_GCMC 10 | from modules.tools import get_conversion_factors 11 | 12 | # Ignore warnings 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | 16 | 17 | # Required parameters 18 | parser = argparse.ArgumentParser( 19 | description='on-the-fly RASPA simulations with pyMSER') 20 | parser.add_argument('output_folder', 21 | type=str, 22 | action='store', 23 | metavar='OUTPUT_FOLDER', 24 | help='Directory to save the files of the calculations') 25 | parser.add_argument('--FrameworkName', 26 | type=str, 27 | required=True, 28 | action='store', 29 | metavar='FRAMEWORK_NAME', 30 | help='Name of the framework to be simulated') 31 | parser.add_argument('--ExternalPressure', 32 | type=float, 33 | required=True, 34 | action='store', 35 | metavar='PRESSURE_LIST', 36 | help='External pressure in Pascal.') 37 | # Optional parameters 38 | parser.add_argument('--NumberOfProdCycles', 39 | type=int, 40 | required=False, 41 | action='store', 42 | metavar='NUMBER_OF_CYCLES', 43 | default=5000, 44 | help='Number of desired production cycles.') 45 | parser.add_argument('--AddCycles', 46 | type=int, 47 | required=False, 48 | action='store', 49 | metavar='ADD_CYCLES', 50 | default=1000, 51 | help='Number of aditional cycles if NumberOfProdCycles was not achieved.') 52 | parser.add_argument('--ExternalTemperature', 53 | type=float, 54 | required=False, 55 | action='store', 56 | metavar='TEMPERATURE', 57 | default=298.0, 58 | help='Temperature of the simulation in Kelvin') 59 | parser.add_argument('--UnitCells', 60 | type=str, 61 | required=False, 62 | action='store', 63 | metavar='UNIT_CELLS', 64 | default='auto', 65 | help='Number of unit cells to simulate. Can be "auto" or "NX,NY,NZ" string.') 66 | parser.add_argument('--UseChargesFromCIFFile', 67 | default=False, 68 | required=False, 69 | action='store_true', 70 | help='Use charges from CIF file.') 71 | parser.add_argument('--GasComposition', 72 | type=str, 73 | required=False, 74 | action='store', 75 | metavar='GAS_COMPOSITION', 76 | default='{"CO2": 1.0}', 77 | help='Type of dispersion correction used for all calculations.') 78 | 79 | 80 | arg = parser.parse_args() 81 | 82 | arg.GasComposition = json.loads(arg.GasComposition) 83 | 84 | header = f""" 85 | ============================================================================== 86 | Automatic GCMC Simulation with pyMSER 87 | ============================================================================== 88 | Framework Name : {arg.FrameworkName} 89 | External Temperature : {arg.ExternalTemperature} K 90 | External Pressure : {arg.ExternalPressure} Pa 91 | Gas Composition : {arg.GasComposition} 92 | Desired Prod. Cycles : {arg.NumberOfProdCycles} 93 | Output Path : {arg.output_folder} 94 | ============================================================================== 95 | 96 | Running RASPA simulation... 97 | """ 98 | 99 | print(header) 100 | 101 | os.chdir(arg.output_folder) 102 | 103 | equilibrated = False 104 | nstep = 0 105 | maxSteps = 20 106 | 107 | while not equilibrated and nstep < maxSteps: 108 | 109 | nstep += 1 110 | print(f' > Running iteration {nstep}...') 111 | 112 | create_GCMC_input( 113 | path='.', 114 | FrameworkName=arg.FrameworkName, 115 | UnitCells=arg.UnitCells, 116 | NumberOfCycles=arg.AddCycles, 117 | ForceField='local', 118 | UseChargesFromCIFFile=arg.UseChargesFromCIFFile, 119 | GasComposition=arg.GasComposition, 120 | ExternalTemperature=arg.ExternalTemperature, 121 | ExternalPressure=arg.ExternalPressure, 122 | RestartFile='no' if nstep == 1 else 'yes', 123 | ) 124 | 125 | os.system('${RASPA_DIR}/bin/simulate simulation.input > raspalog.txt 2>&1') 126 | 127 | parse_GCMC( 128 | output_folder='Output/System_0', 129 | FrameworkName=arg.FrameworkName, 130 | GasComposition=arg.GasComposition, 131 | ExternalTemperature=arg.ExternalTemperature, 132 | ExternalPressure=arg.ExternalPressure, 133 | NumberOfCycles=arg.AddCycles, 134 | PrintEvery=1 135 | ) 136 | 137 | csv_file = f'Output/System_0/raspa_{arg.ExternalTemperature:.6f}_{arg.ExternalPressure}.csv' 138 | dataFrame = pd.read_csv(csv_file) 139 | 140 | eqDict = pymser.equilibrate(dataFrame['N_ads'], print_results=False) 141 | 142 | equilibrated = len(dataFrame['N_ads']) - eqDict['t0'] > arg.NumberOfProdCycles 143 | 144 | if equilibrated: 145 | 146 | log_text = '=============================================================================\n' 147 | 148 | print(" > Success! Found {} production cycles. Analyzing final data...\n\n".format( 149 | len(dataFrame['N_ads']) - eqDict['t0'])) 150 | 151 | eqDict = pymser.equilibrate(dataFrame['N_ads'], print_results=True) 152 | 153 | log_text = '=============================================================================\n' 154 | 155 | convFactors = get_conversion_factors( 156 | output_folder='Output/System_0', 157 | FrameworkName=arg.FrameworkName, 158 | ExternalTemperature=arg.ExternalTemperature, 159 | ExternalPressure=arg.ExternalPressure) 160 | 161 | for i, gas in enumerate(arg.GasComposition.keys()): 162 | eq_data = pymser.calc_equilibrated_average( 163 | data=dataFrame[f'{gas}_[molecules/uc]'], 164 | eq_index=eqDict['t0'], 165 | uncertainty='uSD', 166 | ac_time=eqDict['ac_time'] 167 | ) 168 | 169 | eq_data = np.array(eq_data) 170 | 171 | enthalpy_data = pymser.calc_equilibrated_enthalpy( 172 | energy=dataFrame['total_[K]'], 173 | number_of_molecules=dataFrame[f'{gas}_[N_ads]'], 174 | temperature=arg.ExternalTemperature, 175 | eq_index=eqDict['t0'], 176 | uncertainty='uSD', 177 | ac_time=int(arg.NumberOfProdCycles/5)) 178 | 179 | log_text += f'Component {i} [{gas}]\n' 180 | log_text += '-'*75 + '\n' 181 | log_text += 'Average loading absolute [molecules/unit cell] {:20.10f} +/- {:20.10f}\n'\ 182 | .format(*eq_data) 183 | log_text += 'Average loading absolute [mol/kg framework] {:20.10f} +/- {:20.10f}\n'\ 184 | .format(*eq_data * convFactors['mol/kg'][i]) 185 | log_text += 'Average loading absolute [mg/g framework] {:20.10f} +/- {:20.10f}\n'\ 186 | .format(*eq_data * convFactors['mg/g'][i]) 187 | log_text += 'Average loading absolute [cm^3 STP/gr] {:20.10f} +/- {:20.10f}\n'\ 188 | .format(*eq_data * convFactors['cm^3 STP/gr'][i]) 189 | log_text += 'Average loading absolute [cm^3 STP/cm^3] {:20.10f} +/- {:20.10f}\n'\ 190 | .format(*eq_data * convFactors['cm^3 STP/cm^3'][i]) 191 | log_text += 'Enthalpy of adsorption [KJ/mol] {:20.10f} +/- {:20.10f}\n'\ 192 | .format(*enthalpy_data) 193 | log_text += '========================================================================\n' 194 | 195 | print(log_text) 196 | log = f'{arg.FrameworkName}_{arg.ExternalTemperature:.6f}_{arg.ExternalPressure}.log', 'a' 197 | with open(log) as f: 198 | f.write(log_text) 199 | else: 200 | print(" > Found only {}/{} production cycles. Running more {} cycles.".format( 201 | len(dataFrame['N_ads']) - eqDict['t0'], arg.NumberOfProdCycles, arg.AddCycles) 202 | ) 203 | 204 | os.makedirs('RestartInitial/System_0', exist_ok=True) 205 | 206 | # Copy the file from Restart/System_0 to RestartInitial/System_0 207 | os.system('cp -r Restart/System_0/* RestartInitial/System_0/') 208 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.0.22 3 | 4 | [metadata] 5 | description_file = README.md 6 | license_files = LICENSE 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="pymser", 8 | version="1.0.22", 9 | author="Felipe Lopes de Oliveira", 10 | author_email="felipe.lopes@nano.ufrj.br", 11 | description="Library to apply the Marginal Standard Error Rule \ 12 | for transient regime detection and truncation on Grand Canonical \ 13 | Monte Carlo adsorption simulations", 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/IBM/pymser", 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "Operating System :: OS Independent", 20 | "License :: OSI Approved :: BSD License", 21 | ], 22 | package_dir={"": "src"}, 23 | packages=setuptools.find_packages(where="src"), 24 | python_requires=">=3.9", 25 | include_package_data=True, 26 | install_requires=['numpy', 27 | 'scipy', 28 | 'statsmodels', 29 | 'torch'], 30 | license='BSD 3-Clause License' 31 | ) 32 | -------------------------------------------------------------------------------- /src/pymser/__init__.py: -------------------------------------------------------------------------------- 1 | from .pymser import (exp_decay, 2 | check_consistency, 3 | batch_average_data, 4 | calculate_MSEm, 5 | MSERm_index, 6 | MSERm_LLM_index, 7 | enthalpy_of_adsorption, 8 | calc_equilibrated_average, 9 | calc_equilibrated_enthalpy, 10 | calc_autocorrelation_time, 11 | apply_ADF_test, 12 | equilibrate, 13 | equilibrate_enthalpy) 14 | 15 | __all__ = [exp_decay, 16 | check_consistency, 17 | batch_average_data, 18 | calculate_MSEm, 19 | MSERm_index, 20 | MSERm_LLM_index, 21 | enthalpy_of_adsorption, 22 | calc_equilibrated_average, 23 | calc_equilibrated_enthalpy, 24 | calc_autocorrelation_time, 25 | apply_ADF_test, 26 | equilibrate, 27 | equilibrate_enthalpy] 28 | -------------------------------------------------------------------------------- /src/pymser/pymser.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy.optimize import curve_fit 4 | from scipy.signal import correlate as sp_corr 5 | from statsmodels.tsa.stattools import adfuller 6 | 7 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 8 | 9 | 10 | def exp_decay(t, tau): 11 | """ 12 | Simple function to model a exponential decay. 13 | 14 | Parameters 15 | ---------- 16 | t : array 17 | Time data 18 | tau : float 19 | Decay rate of the exponential 20 | Returns 21 | ------- 22 | Exponential curve as a NumPy array. 23 | """ 24 | 25 | return np.exp(-t/tau) 26 | 27 | 28 | def check_consistency(data): 29 | """ 30 | Checks the consistency of the input data. 31 | 32 | Parameters 33 | ---------- 34 | data : array 35 | Array with the data 36 | Returns 37 | ------- 38 | consistent : bool 39 | Boolean indicating if the input data is consistent 40 | data_array : array 41 | NumPy array containing the data in the correct format for the next steps. 42 | """ 43 | 44 | # Try to convert the data to a NumPy Array 45 | try: 46 | data_array = np.array(data).astype(float) 47 | # Remove wrong nested lists 48 | data_array = data_array.squeeze() 49 | except ValueError: 50 | print('Input data must be an array of float numbers!') 51 | print('The following data was passed:') 52 | print(data) 53 | 54 | # Replace the incorrect data with a array with a NaN value 55 | data_array = np.array(np.nan) 56 | 57 | # Check if input data is unidimentional 58 | if data_array.ndim != 1: 59 | raise Exception(f'Input data must be 1D. {data_array.ndim}D data used instead!') 60 | 61 | # Check if all the data is finite 62 | is_all_finite = np.all(np.isfinite(data_array)) 63 | 64 | # Check if the data is not an array filled with zeros 65 | is_all_zero = np.all((data_array == 0)) 66 | 67 | return is_all_finite, is_all_zero, data_array 68 | 69 | 70 | def batch_average_data(data, batch_size=1): 71 | """ 72 | Converts the data to batch averages with a given batch size. 73 | 74 | Parameters 75 | ---------- 76 | data : array 77 | Array with the data 78 | batch_size : int 79 | Size of the batch to take the averages 80 | Returns 81 | ------- 82 | averaged_batches : array 83 | Array containig the batch-averaged data 84 | """ 85 | 86 | if batch_size > 1: 87 | # Trucate the data to allow a closed batch. 88 | # Be aware that this will remove the last points to make a closed batch 89 | truncated_data = data[:int(len(data) / batch_size) * batch_size] 90 | 91 | # Reshape the data to create batch of size m. 92 | reshaped_data = torch.reshape(truncated_data, (-1, batch_size)) 93 | 94 | # Get the average of each batch 95 | averaged_batches = torch.tensor([torch.mean(i) for i in reshaped_data]) 96 | 97 | return averaged_batches 98 | 99 | else: 100 | return data 101 | 102 | 103 | def calculate_MSEm(data, batch_size=1): 104 | """ 105 | Calculates the m-Marginal Standard Error (MSEm) for a simulation data 106 | with batch size equals to m. m=1 reduces to the original MSER. 107 | 108 | Parameters 109 | ---------- 110 | data : array 111 | Array with the data 112 | batch_size : int 113 | Size of the batch to take the averages 114 | Returns 115 | ------- 116 | MSE : array 117 | Array containig the Marginal Standard Error data 118 | """ 119 | 120 | # Convert data to n-blocked average 121 | batch_tensor = batch_average_data(torch.from_numpy(data).float().to(device), batch_size) 122 | 123 | # Get the size of the data 124 | n = len(batch_tensor) 125 | 126 | # Creates a empty list to store the MSE values 127 | MSE = torch.zeros(n - 2, device=device) 128 | 129 | # Iterate over data index and calculates the average from k to n-2 130 | for k in range(n - 2): 131 | # Truncate data on k and convert it to a numpy array 132 | truncated_data = batch_tensor[k:] 133 | 134 | # Get the average of the truncated data 135 | Y_nk = truncated_data.mean() 136 | 137 | # Calculates the sum of the squared diference 138 | sum_sq_diff = torch.sum((truncated_data - Y_nk)**2) 139 | 140 | # Calculate the k-th Marginal standard error 141 | g_k = sum_sq_diff / (n - k)**2 142 | 143 | # Add the k-th to MSE array 144 | MSE[k] = g_k 145 | 146 | return MSE 147 | 148 | 149 | def MSERm_index(MSEm, batch_size=1): 150 | """ 151 | Applies the m-Marginal Standard Error Rule (MSERm) to the SERm data to get 152 | the position where equilibrated data starts. 153 | 154 | Parameters 155 | ---------- 156 | MSEm : array 157 | Marginal Standard Error applied to the data 158 | batch_size : int 159 | Size of the batch to take the average 160 | Returns 161 | ------- 162 | equilibrated_index : int 163 | Index of the start of equilibrated data 164 | """ 165 | # Remove potential too low values that apears artificially on last points 166 | MSEm = torch.where(MSEm < 1e-9, # where value < 1e-9 167 | max(MSEm), # replace for max(MSEm) 168 | MSEm) # on MSEm array 169 | 170 | equilibrated_index = torch.argmin(MSEm)*batch_size 171 | 172 | return equilibrated_index 173 | 174 | 175 | def MSERm_LLM_index(MSEm, batch_size=1): 176 | """ 177 | Applies the LLM version of m-Marginal Standard Error Rule (MSERm) to the SERm 178 | data to get the position where equilibrated data starts. This method gets 179 | the first minimum on Marginal Standard Error curve and assumes it is the 180 | start of equilibriation. It is a better option for complicated adsorptions 181 | like water close to condensation. 182 | 183 | Parameters 184 | ---------- 185 | MSEm : array 186 | Marginal Standard Error applied to the data 187 | batch_size : int 188 | Size of the batch to take the average 189 | Returns 190 | ------- 191 | t0 : int 192 | Start of the LLM equilibrated data 193 | """ 194 | 195 | # Search for the first mininum on the MSEm data 196 | i = 0 197 | while MSEm[i+1] < MSEm[i]: 198 | i += 1 199 | # Correct for the batch size 200 | t0 = i*batch_size 201 | 202 | return t0 203 | 204 | 205 | def enthalpy_of_adsorption(energy, number_of_molecules, temperature): 206 | """ 207 | Calculates the enthalpy of adsorption as 208 | 209 | H = - / - ^2 - RT 210 | 211 | adapted from J. Phys. Chem. 1993, 97, 51, 13742-13752. 212 | 213 | Please note that Heat of adsorption (Q_iso) = -Enthalpy of adsorption (H). 214 | 215 | The isosteric enthalpy of adsorption, H, is defined as the heat which is released 216 | when an adsorptive binds to a surface. The enthalpy of adsorption (H) is a negative 217 | number and the isosteric heat (Q_iso) of adsorption is a positive number. 218 | For a deeper discussion see: Dalton Trans., 2020, 49, 10295. 219 | 220 | Parameters 221 | ---------- 222 | energy : 1D array 223 | List with the potential energy of the adsorbed phase for each MC cycle in units of Kelvin. 224 | 225 | number_of_molecules : 1D array 226 | List with the number of molecules in the simulation system for each MC cycle. 227 | 228 | temperature : float 229 | Temperature of the simulation in Kelvin 230 | 231 | Returns 232 | ---------- 233 | 234 | H : float 235 | Enthalpy of adsorption in units of kJ⋅mol-1 236 | """ 237 | # Define basic constants 238 | R = 8.31446261815324 * 1e-3 # kJ⋅K−1⋅mol−1 239 | 240 | # Convert energy from Kelvin to kJ/mol 241 | E = np.array(energy) * R 242 | N = np.array(number_of_molecules) 243 | 244 | EN = E * N 245 | 246 | # Calculate the enthalpy of adsorption. Here - ^2 = VAR(N) 247 | H = (EN.mean() - E.mean() * N.mean()) / np.var(N) - R * temperature 248 | 249 | return H 250 | 251 | 252 | def calc_equilibrated_average(data, eq_index, uncertainty='uSD', ac_time=1): 253 | """ 254 | Calculates the average and uncertainty on the equilibrated part 255 | of the data. 256 | 257 | Parameters 258 | ---------- 259 | data : array 260 | Array with the data 261 | eq_index : int 262 | Index of the start of equilibrated data. 263 | uncertainty : str 264 | String for selecting Standard Error (SE), Standard Deviation (SD), or its 265 | uncorrelated versions uSD and uSE as the default uncertainty of the average. 266 | ac_time : int 267 | Autocorrelation time 268 | Returns 269 | ------- 270 | equilibrated_average : float 271 | Average on the equilibrated data 272 | equilibrated_uncertainty : float 273 | Uncertainty of the average calculation 274 | """ 275 | 276 | if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']: 277 | raise Exception(f"""{uncertainty} is not a valid option! 278 | Only Standard Deviation (SD), Standard Error (SE), uncorrelated 279 | Standard Deviation (uSD), and uncorrelated Standard Error (uSE) 280 | are valid options.""") 281 | 282 | # Remove the initial transient of the data 283 | equilibrated_data = data[eq_index:] 284 | 285 | # Calculates the average on the equilibrated data 286 | equilibrated_average = np.average(equilibrated_data) 287 | 288 | # Calculate the standad deviation on the equilibrated data 289 | if uncertainty == 'SD': 290 | equilibrated_uncertainty = np.std(equilibrated_data) 291 | 292 | # Calculate the Standard Error 293 | elif uncertainty == 'SE': 294 | equilibrated_uncertainty = np.std(equilibrated_data) / np.sqrt(len(equilibrated_data)) 295 | 296 | # Calculate the uncorrelated Standard Error 297 | elif uncertainty == 'uSD': 298 | # Divide the equilibrated_data on uncorrelated chunks 299 | uncorr_batches = batch_average_data(torch.from_numpy(equilibrated_data).float().to(device), 300 | np.ceil(ac_time).astype(int)) 301 | 302 | # Calculate the standard deviation on the uncorrelated chunks 303 | equilibrated_uncertainty = torch.std(uncorr_batches) 304 | 305 | # Calculate the uncorrelated Standard Error 306 | elif uncertainty == 'uSE': 307 | # Divide the equilibrated_data on uncorrelated chunks 308 | uncorr_batches = batch_average_data(torch.from_numpy(equilibrated_data).float().to(device), 309 | np.ceil(ac_time).astype(int)) 310 | 311 | # Calculate the standard error of the mean on the uncorrelated chunks 312 | equilibrated_uncertainty = torch.std(uncorr_batches) / np.sqrt(len(uncorr_batches)) 313 | 314 | return equilibrated_average, equilibrated_uncertainty 315 | 316 | 317 | def calc_equilibrated_enthalpy(energy, 318 | number_of_molecules, 319 | temperature, 320 | eq_index, 321 | uncertainty='uSD', 322 | ac_time=1): 323 | """ 324 | Calculates the average enthalpy of adsorption and uncertainty on the equilibrated 325 | part of the data. 326 | 327 | Parameters 328 | ---------- 329 | energy : 1D array 330 | List with the potential energy of the adsorbed phase for each MC cycle in units of Kelvin. 331 | number_of_molecules : 1D array 332 | List with the number of molecules in the simulation system for each MC cycle. 333 | eq_index : int 334 | Index of the start of equilibrated data. 335 | uncertainty : str 336 | String for selecting Standard Error (SE), Standard Deviation (SD), or its 337 | uncorrelated versions uSD and uSE as the default uncertainty of the average. 338 | ac_time : int 339 | Autocorrelation time 340 | Returns 341 | ------- 342 | equilibrated_average : float 343 | Average on the equilibrated data 344 | equilibrated_uncertainty : float 345 | Uncertainty of the average calculation 346 | """ 347 | 348 | if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']: 349 | raise Exception(f"""{uncertainty} is not a valid option! 350 | Only Standard Deviation (SD), Standard Error (SE), uncorrelated 351 | Standard Deviation (uSD), and uncorrelated Standard Error (uSE) 352 | are valid options.""") 353 | 354 | # Remove the initial transient of the data 355 | equilibrated_E = energy[eq_index:] 356 | equilibrated_N = number_of_molecules[eq_index:] 357 | 358 | if uncertainty in ['SD', 'SE']: 359 | 360 | # Trucate and reshape the data to allow a closed batch. 361 | truncated_E = equilibrated_E[:int(np.floor(len(equilibrated_E) / 5) * 5)] 362 | reshaped_E = np.reshape(truncated_E, (5, -1)) 363 | 364 | truncated_N = equilibrated_N[:int(np.floor(len(equilibrated_N) / 5) * 5)] 365 | reshaped_N = np.reshape(truncated_N, (5, -1)) 366 | 367 | elif uncertainty in ['uSD', 'uSE']: 368 | 369 | ac_time = np.ceil(ac_time).astype(int) 370 | 371 | # Trucate and reshape the data to allow a closed batch. 372 | truncated_E = equilibrated_E[:int(np.floor(len(equilibrated_E) / ac_time) * ac_time)] 373 | reshaped_E = np.reshape(truncated_E, (-1, ac_time)) 374 | 375 | truncated_N = equilibrated_N[:int(np.floor(len(equilibrated_N) / ac_time) * ac_time)] 376 | reshaped_N = np.reshape(truncated_N, (-1, ac_time)) 377 | 378 | # Calculates the average on the equilibrated data 379 | equilibrated_H_list = [] 380 | 381 | for i in range(len(reshaped_E)): 382 | # Check if all elements in reshaped_N are non-zero values 383 | H = enthalpy_of_adsorption(reshaped_E[i], reshaped_N[i], temperature) 384 | equilibrated_H_list.append(H) 385 | 386 | equilibrated_H = np.nanmean(np.array(equilibrated_H_list)) 387 | 388 | # Calculate the uncorrelated Standard Error 389 | if uncertainty in ['SD', 'uSD']: 390 | 391 | # Calculate the standard deviation on the uncorrelated chunks 392 | eq_uncertainty = np.nanstd(equilibrated_H_list) 393 | 394 | elif uncertainty in ['SE', 'uSE']: 395 | 396 | # Calculate the standard error of the mean on the uncorrelated chunks 397 | eq_uncertainty = np.nanstd(equilibrated_H_list) / np.sqrt(len(equilibrated_H_list)) 398 | 399 | return equilibrated_H, eq_uncertainty 400 | 401 | 402 | def calc_autocorrelation_time(data): 403 | """ 404 | Calculates the autocorrelation time of a equilibrated data. 405 | Autocorrelation is expected to fall off exponentially at long times 406 | 407 | Parameters 408 | ---------- 409 | data : array 410 | Array of data to calculate the integrated autocorrelation time 411 | Returns 412 | ------- 413 | autocorrelation_time : float 414 | Autocorrelation time 415 | uncorrelated_samples : float 416 | Number of uncorrelated samples 417 | """ 418 | 419 | # Check the consistency of the time_serie 420 | is_all_finite, is_all_zero, data_array = check_consistency(data) 421 | 422 | if is_all_finite is False or is_all_zero is True: 423 | return 0, 0 424 | 425 | try: 426 | # Calculates the ACF using numpy 427 | data_std = data_array - np.mean(data_array) 428 | data_norm = np.sum(data_std ** 2) 429 | 430 | ACF = sp_corr(data_std, data_std, mode='full', method='fft') / data_norm 431 | ACF = ACF[int(ACF.size/2):] 432 | 433 | # Filter ACF to remove values below 0.1 to improve the fit 434 | idx = np.argmax(ACF <= 0.1) 435 | ACF = ACF[:idx] 436 | 437 | # Fit a exponential decay to ACF 438 | x = np.arange(len(ACF)) 439 | [tau], _ = curve_fit(exp_decay, x, ACF) 440 | 441 | # Calculate autocorrelation time as the half-live of ACF exponential decay 442 | autocorrelation_time = np.ceil(tau*np.log(2)) 443 | 444 | except (RuntimeError, ValueError) as Error: 445 | # If the if the least-squares minimization fails, set the autocorrelation_time to 1. 446 | # This can happen if the ACF data do not present a exponential decay 447 | autocorrelation_time = 1 448 | print('The least-squares minimization failed! Please check the data.') 449 | print(Error) 450 | 451 | # Calculate the number of uncorrelated data 452 | uncorrelated_samples = data_array.size / autocorrelation_time 453 | 454 | return autocorrelation_time, uncorrelated_samples 455 | 456 | 457 | def apply_ADF_test(equilibrated_data, verbosity=True): 458 | """ 459 | Applies the Augmented Dickey-Fuller Test on the equilibrated data 460 | 461 | Parameters 462 | ---------- 463 | equilibrated_data : array 464 | Array with the equilibrated data 465 | verbosity : bool 466 | Boolean to control the output printing 467 | Returns 468 | ------- 469 | ADFTestResults : dict 470 | Dictionary containg the ADF test results 471 | output : str 472 | String containg the output 473 | """ 474 | adf, p, usedlag, n_obs, cv, icbest = adfuller(equilibrated_data, autolag='AIC') 475 | 476 | ADFTestResults = {'adf': adf, 477 | 'pvalue': p, 478 | 'usedlag': usedlag, 479 | 'n_obs': n_obs, 480 | 'critical_values': cv, 481 | 'icbest': icbest} 482 | 483 | output = f""" 484 | Augmented Dickey-Fuller Test 485 | ============================================================================== 486 | Test statistic for observable: {adf} 487 | P-value for observable: {p} 488 | The number of lags used: {usedlag} 489 | The number of observations used for the ADF regression: {n_obs} 490 | Cutoff Metrics : 491 | """ 492 | for k, v in cv.items(): 493 | conf = 100 - int(k.rstrip('%')) 494 | if v < adf: 495 | output += f"{k:>4}: {v:9.6f} | The data is not stationary with {conf} % confidence\n" 496 | else: 497 | output += f"{k:>4}: {v:9.6f} | The data is stationary with {conf} % confidence\n" 498 | 499 | if verbosity: 500 | print(output) 501 | 502 | return ADFTestResults, output 503 | 504 | 505 | def equilibrate(input_data, 506 | LLM=False, 507 | batch_size=1, 508 | ADF_test=True, 509 | uncertainty='uSD', 510 | print_results=True): 511 | """ 512 | Wrap function to apply MSER to an input_data array. 513 | 514 | Parameters 515 | ---------- 516 | input_data : array 517 | Array with the original data 518 | LLM : bool 519 | Boolean to control usage of the LLM variation of MSER 520 | batch_size : int 521 | Size of the batch to take the average 522 | ADF_test : bool 523 | Boolean to control usage ADF test 524 | uncertainty : str 525 | String for selecting Standard Error (SE), Standard Deviation (SD), or its 526 | uncorrelated versions uSD and uSE as the default uncertainty of the average. 527 | print_results : bool 528 | Boolean to control printing of the results 529 | Returns 530 | ------- 531 | results_dict : dict 532 | Dictionary containg resunts of MSER 533 | """ 534 | 535 | # Check the consistency of the time_serie 536 | is_all_finite, is_all_zero, array_data = check_consistency(input_data) 537 | 538 | # Returns NaN if any of the time_series data is not a finite number 539 | if is_all_finite is False: 540 | results_dict = {'MSE': np.nan, 541 | 't0': np.nan, 542 | 'average': np.nan, 543 | 'uncertainty': np.nan, 544 | 'equilibrated': np.nan, 545 | 'ac_time': np.nan, 546 | 'uncorr_samples': np.nan} 547 | return results_dict 548 | 549 | # Returns zero if all the data in time_series is zero 550 | if is_all_zero: 551 | results_dict = {'MSE': np.zeros(len(array_data)), 552 | 't0': 0, 553 | 'average': 0, 554 | 'uncertainty': 0, 555 | 'equilibrated': np.zeros(len(array_data)), 556 | 'ac_time': 0, 557 | 'uncorr_samples': 0} 558 | return results_dict 559 | 560 | # Check if the input parameters are what is expected 561 | assert isinstance(LLM, bool), 'LLM should be True or False' 562 | assert isinstance(batch_size, int), 'batch_size should be an int' 563 | assert isinstance(ADF_test, bool), 'ADF_test should be True or False' 564 | assert isinstance(print_results, bool), 'print_results should be True or False' 565 | 566 | # Check if the uncertainty is a valid option 567 | if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']: 568 | raise Exception(f"""{uncertainty} is not a valid option! 569 | Only Standard Deviation (SD), Standard Error (SE), uncorrelated 570 | Standard Deviation (uSD), and uncorrelated Standard Error (uSE) 571 | are valid options.""") 572 | 573 | # Calculate the Marginal Standard Error curve 574 | MSEm_curve = calculate_MSEm(array_data, batch_size=batch_size) 575 | 576 | if LLM is True: 577 | # Apply the MSER-LLM to get the index of the start of equilibrated data 578 | t0 = MSERm_LLM_index(MSEm_curve, batch_size=batch_size) 579 | 580 | if LLM is False: 581 | # Apply the MSER to get the index of the start of equilibrated data 582 | t0 = MSERm_index(MSEm_curve, batch_size=batch_size) 583 | 584 | # Check if t0 < 75% of the data 585 | if t0 < 0.75 * len(array_data): 586 | eq_status = 'Yes' 587 | else: 588 | eq_status = 'No. t0 > 75% of the data!' 589 | print('Warning: t0 is too close to the end of the data!') 590 | print('The results may not be reliable!') 591 | 592 | # Calculate autocorrelation time and the number of uncorrelated samples 593 | equilibrated = array_data[t0:] 594 | ac_time, uncorr_samples = calc_autocorrelation_time(equilibrated) 595 | 596 | # Calculates the average and standard deviation on the equilibrated data 597 | average, avg_uncertainty = calc_equilibrated_average(array_data, 598 | t0, 599 | uncertainty, 600 | ac_time) 601 | 602 | # Create a dictionary with the results 603 | results_dict = {'MSE': MSEm_curve, 604 | 't0': t0, 605 | 'average': average, 606 | 'uncertainty': avg_uncertainty, 607 | 'equilibrated': equilibrated, 608 | 'ac_time': ac_time, 609 | 'uncorr_samples': uncorr_samples} 610 | 611 | eq_ratio = 100 * (len(array_data) - t0) / len(array_data) 612 | 613 | if print_results: 614 | print(f""" pyMSER Equilibration Results 615 | ============================================================================== 616 | Start of equilibrated data: {t0} of {len(array_data)} 617 | Total equilibrated steps: {len(array_data) - t0} ({eq_ratio:.2f}%) 618 | Equilibrated: {eq_status} 619 | Average over equilibrated data: {average:.4f} ± {avg_uncertainty:.4f} 620 | Number of uncorrelated samples: {uncorr_samples:.1f} 621 | Autocorrelation time: {ac_time:.1f} 622 | ==============================================================================""") 623 | 624 | if ADF_test: 625 | # Apply the Augmented Dickey-Fuller Test on the equilibrated data 626 | ADFTestResults, output_text = apply_ADF_test(equilibrated, verbosity=print_results) 627 | results_dict.update(ADFTestResults) 628 | 629 | return results_dict 630 | 631 | 632 | def equilibrate_enthalpy(energy, 633 | number_of_molecules, 634 | temperature, 635 | LLM=False, 636 | batch_size=1, 637 | ADF_test=True, 638 | uncertainty='uSD', 639 | print_results=True): 640 | """ 641 | Wrap function to apply MSER and calculate the equilibrated enthalpy of adsorption as 642 | 643 | H = - / - ^2 - RT 644 | 645 | adapted from J. Phys. Chem. 1993, 97, 51, 13742-13752. 646 | 647 | Please note that Heat of adsorption (Q_iso) = -Enthalpy of adsorption (H). 648 | 649 | The isosteric enthalpy of adsorption, H, is defined as the heat which is released 650 | when an adsorptive binds to a surface. The enthalpy of adsorption (H) is a negative 651 | number and the isosteric heat (Q_iso) of adsorption is a positive number. 652 | For a deeper discussion see: Dalton Trans., 2020, 49, 10295. 653 | 654 | Parameters 655 | ---------- 656 | energy : 1D array 657 | List with the potential energy of the adsorbed phase for each MC cycle in units of Kelvin. 658 | number_of_molecules : 1D array 659 | List with the total number of molecules in the simulation box for each MC cycle. 660 | temperature : float 661 | Temperature of the simulation in Kelvin 662 | LLM : bool 663 | Boolean to control usage of the LLM variation of MSER 664 | batch_size : int 665 | Size of the batch to take the average 666 | ADF_test : bool 667 | Boolean to control usage ADF test 668 | uncertainty : str 669 | String for selecting Standard Error (SE), Standard Deviation (SD), or its 670 | uncorrelated versions uSD and uSE as the default uncertainty of the average. 671 | print_results : bool 672 | Boolean to control printing of the results 673 | 674 | Returns 675 | ---------- 676 | results_dict : dict 677 | Dictionary containg resunts of MSER 678 | 679 | """ 680 | 681 | # Check if energy and number_of_molecules are the same length 682 | if len(energy) != len(number_of_molecules): 683 | print('Energy and number_of_molecules arrays should have the same length!') 684 | results_dict = {'MSE_E': np.nan, 685 | 'MSE_N': np.nan, 686 | 't0_E': np.nan, 687 | 't0_N': np.nan, 688 | 'average': np.nan, 689 | 'uncertainty': np.nan, 690 | 'equilibrated_E': np.nan, 691 | 'equilibrated_N': np.nan, 692 | 'ac_time_E': np.nan, 693 | 'ac_time_N': np.nan, 694 | 'uncorr_samples_E': np.nan, 695 | 'uncorr_samples_N': np.nan} 696 | return results_dict 697 | 698 | # Check the consistency of the energy values 699 | is_all_finite_E, is_all_zero_E, array_data_E = check_consistency(energy) 700 | 701 | # Check the consistency of the number of molecules 702 | is_all_finite_N, is_all_zero_N, array_data_N = check_consistency(number_of_molecules) 703 | 704 | # Returns NaN if any of the time_series data is not a finite number 705 | if all([is_all_finite_E, is_all_finite_N]) is False: 706 | results_dict = {'MSE_E': np.nan, 707 | 'MSE_N': np.nan, 708 | 't0_E': np.nan, 709 | 't0_N': np.nan, 710 | 'average': np.nan, 711 | 'uncertainty': np.nan, 712 | 'equilibrated_E': np.nan, 713 | 'equilibrated_N': np.nan, 714 | 'ac_time_E': np.nan, 715 | 'ac_time_N': np.nan, 716 | 'uncorr_samples_E': np.nan, 717 | 'uncorr_samples_N': np.nan} 718 | return results_dict 719 | 720 | # Returns zero if all the data in time_series is zero 721 | if all([is_all_zero_E, is_all_zero_N]) is True: 722 | results_dict = {'MSE_E': np.zeros(len(number_of_molecules)), 723 | 'MSE_N': np.zeros(len(number_of_molecules)), 724 | 't0_E': 0, 725 | 't0_N': 0, 726 | 'average': 0, 727 | 'uncertainty': 0, 728 | 'equilibrated_E': np.zeros(len(number_of_molecules)), 729 | 'equilibrated_N': np.zeros(len(number_of_molecules)), 730 | 'ac_time_E': 0, 731 | 'ac_time_N': 0, 732 | 'uncorr_samples_E': 0, 733 | 'uncorr_samples_N': 0} 734 | 735 | return results_dict 736 | 737 | # Check if the input parameters are what is expected 738 | assert isinstance(temperature, float), 'Temperature should be a float' 739 | assert isinstance(LLM, bool), 'LLM should be True or False' 740 | assert isinstance(batch_size, int), 'batch_size should be an int' 741 | assert isinstance(ADF_test, bool), 'ADF_test should be True or False' 742 | assert isinstance(print_results, bool), 'print_results should be True or False' 743 | 744 | # Check if the uncertainty is a valid option 745 | if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']: 746 | raise Exception(f"""{uncertainty} is not a valid option! 747 | Only Standard Deviation (SD), Standard Error (SE), uncorrelated 748 | Standard Deviation (uSD), and uncorrelated Standard Error (uSE) 749 | are valid options.""") 750 | 751 | # Calculate the Marginal Standard Error for the energy 752 | MSEm_E = calculate_MSEm(energy, batch_size=batch_size) 753 | 754 | if LLM is False: 755 | # Apply the MSER to get the index of the start of equilibrated data 756 | t0_E = MSERm_index(MSEm_E, batch_size=batch_size) 757 | 758 | if LLM is True: 759 | # Apply the MSER-LLM to get the index of the start of equilibrated data 760 | t0_E = MSERm_LLM_index(MSEm_E, batch_size=batch_size) 761 | 762 | # Calculate autocorrelation time and the number of uncorrelated samples 763 | equilibrated_E = energy[t0_E:] 764 | ac_time_E, uncorr_samples_E = calc_autocorrelation_time(equilibrated_E) 765 | 766 | # Calculate the Marginal Standard Error for the number of molecules 767 | MSEm_N = calculate_MSEm(number_of_molecules, batch_size=batch_size) 768 | 769 | if LLM is False: 770 | # Apply the MSER to get the index of the start of equilibrated data 771 | t0_N = MSERm_index(MSEm_N, batch_size=batch_size) 772 | 773 | if LLM is True: 774 | # Apply the MSER-LLM to get the index of the start of equilibrated data 775 | t0_N = MSERm_LLM_index(MSEm_N, batch_size=batch_size) 776 | 777 | # Check if t0 < 75% of the data 778 | if t0_E < 0.75 * len(energy): 779 | eq_status = 'Yes' 780 | else: 781 | eq_status = 'No. t0 > 75% of the data!' 782 | print('Warning: t0 is too close to the end of the data!') 783 | print('The results may not be reliable!') 784 | 785 | # Calculate autocorrelation time and the number of uncorrelated samples 786 | equilibrated_N = number_of_molecules[t0_N:] 787 | ac_time_N, uncorr_samples_N = calc_autocorrelation_time(equilibrated_N) 788 | 789 | # Calculates the enthalpy of adsorption and its uncertainty 790 | average, avg_uncertainty = calc_equilibrated_enthalpy(energy, 791 | number_of_molecules, 792 | temperature, 793 | eq_index=t0_E, 794 | uncertainty=uncertainty, 795 | ac_time=ac_time_E) 796 | 797 | # Create a dictionary with the results 798 | results_dict = {'MSE_E': MSEm_E, 799 | 'MSE_N': MSEm_N, 800 | 't0_E': t0_E, 801 | 't0_N': t0_N, 802 | 'average': average, 803 | 'uncertainty': avg_uncertainty, 804 | 'equilibrated_E': equilibrated_E, 805 | 'equilibrated_N': equilibrated_N, 806 | 'ac_time_E': ac_time_E, 807 | 'ac_time_N': ac_time_N, 808 | 'uncorr_samples_E': uncorr_samples_E, 809 | 'uncorr_samples_N': uncorr_samples_N} 810 | 811 | eq_ratio = 100 * (len(energy) - t0_E) / len(energy) 812 | 813 | if print_results: 814 | print(f""" pyMSER Equilibration Results 815 | ============================================================================== 816 | Start of equilibrated data: {t0_E} of {len(energy)} 817 | Total equilibrated steps: {len(energy) - t0_E} ({eq_ratio:.2f}%) 818 | Equilibrated: {eq_status} 819 | Average over equilibrated data: {average:.4f} ± {avg_uncertainty:.4f} kJ/mol 820 | Number of uncorrelated samples: {uncorr_samples_E:.1f} 821 | Autocorrelation time: {ac_time_E:.1f} 822 | ==============================================================================""") 823 | 824 | if ADF_test: 825 | # Apply the Augmented Dickey-Fuller Test on the equilibrated data 826 | ADFTestResults, output_text = apply_ADF_test(equilibrated_E, verbosity=print_results) 827 | results_dict.update(ADFTestResults) 828 | 829 | return results_dict 830 | -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os.path 3 | 4 | 5 | def read(rel_path): 6 | here = os.path.abspath(os.path.dirname(__file__)) 7 | with codecs.open(os.path.join(here, rel_path), 'r') as fp: 8 | return fp.read() 9 | 10 | 11 | def get_version(rel_path): 12 | for line in read(rel_path).splitlines(): 13 | if line.startswith(' version'): 14 | delim = '"' if '"' in line else "'" 15 | return line.split(delim)[1] 16 | else: 17 | raise RuntimeError("Unable to find version string.") 18 | 19 | 20 | print(get_version("setup.py")) 21 | --------------------------------------------------------------------------------