├── .github
    ├── ISSUE_TEMPLATE.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── dco.yml
├── .gitignore
├── .travis.yml
├── .whitesource
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MAINTAINERS.md
├── MANIFEST.in
├── Pipfile
├── README.md
├── example_data
    ├── Cu-BTT_500165.0_198.000000.csv
    └── enthalpy_data.csv
├── example_workflow
    ├── README.md
    ├── environment.yml
    ├── modules
    │   ├── raspa_input.py
    │   ├── raspa_output.py
    │   └── tools.py
    └── run.py
├── pymser_tutorial.ipynb
├── pyproject.toml
├── renovate.json
├── setup.cfg
├── setup.py
├── src
    └── pymser
    │   ├── __init__.py
    │   └── pymser.py
└── version.py


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **As a** ...
 2 | **I need** ...
 3 | **So that** ...
 4 | 
 5 | **Assumptions:**
 6 | * ...
 7 | * ...
 8 | 
 9 | **Acceptance criteria:**
10 | ```
11 | Given ...
12 | When ...
13 | Then ...
14 | ```
15 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | ...
 4 | 
 5 | ## Checklist
 6 | 
 7 | - [ ] I ran the appropriate tests.
 8 | - [ ] I ran `flake8 --max-line-length=100`.
 9 | - [ ] I wrote documentation for all new features.
10 | - [ ] I have added any new dependencies (libraries and/or tools) to `setup.py`, `Pipfile` and `README.md`.
11 | 
12 | ## Related Issue(s)
13 | 
14 | Closes #
15 | 
16 | ## Notes to Reviewer
17 | 
18 | ...
19 | 


--------------------------------------------------------------------------------
/.github/dco.yml:
--------------------------------------------------------------------------------
1 | # This enables DCO bot for you, please take a look https://github.com/probot/dco
2 | # for more details.
3 | require:
4 |   members: false
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # MacOS
  2 | .DS_Store
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 
109 | # IBM Cloud dev plugins
110 | .ibm-project
111 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: focal
 2 | 
 3 | language: python
 4 | 
 5 | python:
 6 |   - "3.9"
 7 | 
 8 | git:
 9 |   depth: 5
10 | 
11 | branches:
12 |   except:
13 |     - /^v(\d+\.?)+/
14 | 
15 | addons:
16 |   apt:
17 |     update: true
18 |     packages:
19 |       - gettext
20 | 
21 | install:
22 |   - pip install --quiet --upgrade pip bump2version flake8
23 | 
24 | before_script:
25 |  # Define branch-dependent environment variables
26 |   - TAG="${TRAVIS_PULL_REQUEST_BRANCH:-$TRAVIS_BRANCH}";
27 |   - if [ "$TRAVIS_BRANCH" == "release" ]; then
28 |       UPDATE=minor;
29 |     elif [ "$TRAVIS_BRANCH" == "master" ]; then
30 |       UPDATE=patch;
31 |     elif [ "$TRAVIS_BRANCH" == "main" ]; then
32 |       UPDATE=patch;
33 |     else
34 |       UPDATE=none;
35 |     fi;
36 | 
37 | script:
38 |   # Run Python linters
39 |   - flake8 --max-line-length=100
40 | 
41 |   # Increment package version and prepare for release
42 |   - CURRENT_VERSION=$(python version.py)
43 |   - if [ "$UPDATE" != "none" ]; then
44 |       bump2version --current-version $CURRENT_VERSION $UPDATE setup.py --tag --commit --message $'{new_version} Release\n\n[skip ci]' --verbose;
45 |     fi;
46 | 
47 | after_script:
48 |   - pip list
49 | 
50 | deploy:
51 |   - provider: pypi
52 |     username: "$ARTIFACTORY_USERNAME"
53 |     password: "$ARTIFACTORY_ACCESS_TOKEN"
54 |     server: "$ARTIFACTORY_URL"
55 |     on:
56 |       branch:
57 |         - master
58 |         - release
59 |   - provider: pypi
60 |     username: "__token__"
61 |     password: "$PYPI_API_TOKEN"
62 |     on:
63 |       branch:
64 |         - main
65 |   - provider: script
66 |     script: git push origin HEAD:"$TAG" --follow-tags
67 |     skip_cleanup: true
68 |     on:
69 |       branch:
70 |         - master
71 |         - release
72 |   - provider: script
73 |     script: git remote add public https://oauth:${GITHUB_PERSONAL_ACCESS_TOKEN}@github.com/IBM/pymser.git && git push public HEAD:"$TAG" --follow-tags
74 |     skip_cleanup: true
75 |     on:
76 |       branch:
77 |         - main
78 | 
79 | env:
80 |   global:
81 |     # ARTIFACTORY_USERNAME defined via web UI
82 |     # ARTIFACTORY_URL defined via web UI
83 |     # ARTIFACTORY_ACCESS_TOKEN defined via web UI
84 |     # PYPI_API_TOKEN defined via web UI
85 |     # GITHUB_PERSONAL_ACCESS_TOKEN defined via web UI
86 | 


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 |   "settingsInheritedFrom": "whitesource-config/whitesource-config@python3"
3 | }
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | ## v1.0.20
 6 | 
 7 | - Use `pyTorch` to calculate the MSE curve reducing the overall time of the calculation by approximately one order of magnitude on large arrays
 8 | - Use scipy to calculate the autocorrelation time reducing the overall time of the calculation by approximately two order of magnitudes on large arrays
 9 | 
10 | ## v1.0.18
11 | 
12 | - Use `nanmean` and `nanstd` instead of `mean` and `std` to avoid erros when there are `NaN` values in the data
13 | - Add equilibration status on the print report and a warning if the equilibration is not reached
14 | 
15 | ## v1.0.8
16 | 
17 | - Downgrade requirements for Python from python>=3.10 to python>=3.9
18 | - Add the Standard Error (SE) as possible uncertainty of the average
19 | - Add the uncorrelated Standard Error (uSE) as possible uncertainty of the average
20 | - Add the uncorrelated Standard Deviation (uSD) as possible uncertainty of the average and set it as default
21 | - Small bug fixes
22 | 
23 | ## v1.0.2
24 | 
25 | - Add files to github repository
26 | - Prepare for Pypi.org release
27 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributing In General
 2 | Our project welcomes external contributions. If you have an itch, please feel
 3 | free to scratch it.
 4 | 
 5 | To contribute code or documentation, please submit a [pull request](https://github.com/IBM/pymser/pulls).
 6 | 
 7 | A good way to familiarize yourself with the codebase and contribution process is
 8 | to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/IBM/pymser/issues).
 9 | Before embarking on a more ambitious contribution, please quickly get in touch with us.
10 | 
11 | **Note: We appreciate your effort, and want to avoid a situation where a contribution
12 | requires extensive rework (by you or by us), sits in backlog for a long time, or
13 | cannot be accepted at all!**
14 | 
15 | ### Proposing new features
16 | 
17 | If you would like to implement a new feature, please [raise an issue](https://github.com/IBM/pymser/issues)
18 | before sending a pull request so the feature can be discussed. This is to avoid
19 | you wasting your valuable time working on a feature that the project developers
20 | are not interested in accepting into the code base.
21 | 
22 | ### Fixing bugs
23 | 
24 | If you would like to fix a bug, please [raise an issue](https://github.com/IBM/pymser/issues) before sending a
25 | pull request so it can be tracked.
26 | 
27 | ### Merge approval
28 | 
29 | The project maintainers use LGTM (Looks Good To Me) in comments on the code
30 | review to indicate acceptance. A change requires LGTMs from two of the
31 | maintainers of each component affected.
32 | 
33 | For a list of the maintainers, see the [MAINTAINERS.md](MAINTAINERS.md) page.
34 | 
35 | ## Legal
36 | 
37 | We have tried to make it as easy as possible to make contributions. This
38 | applies to how we handle the legal aspects of contribution. We use the
39 | same approach - the [Developer's Certificate of Origin 1.1 (DCO)](https://github.com/hyperledger/fabric/blob/master/docs/source/DCO1.1.txt) - that the Linux® Kernel [community](https://elinux.org/Developer_Certificate_Of_Origin)
40 | uses to manage code contributions.
41 | 
42 | We simply ask that when submitting a patch for review, the developer
43 | must include a sign-off statement in the commit message.
44 | 
45 | Here is an example Signed-off-by line, which indicates that the
46 | submitter accepts the DCO:
47 | 
48 | ```
49 | Signed-off-by: John Doe <john.doe@example.com>
50 | ```
51 | 
52 | You can include this automatically when you commit a change to your
53 | local git repository using the following command:
54 | 
55 | ```
56 | git commit -s
57 | ```
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2022, International Business Machines
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MAINTAINERS.md:
--------------------------------------------------------------------------------
1 | # MAINTAINERS
2 | 
3 | * Felipe Lopes de Oliveira
4 | * Rodrigo Neumann Barros Ferreira
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # Include the README
2 | include README.md
3 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [requires]
 7 | python_version = "3.9"
 8 | 
 9 | [dev-packages]
10 | autopep8 = "*"
11 | flake8 = "*"
12 | 
13 | [packages]
14 | numpy = "*"
15 | scipy = "*"
16 | statsmodels = "*"
17 | pip = "*"
18 | torch = "*"
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pyMSER
  2 | 
  3 | A Python library to apply the [Marginal Standard Error Rule (MSER)](https://doi.org/10.1177/003754979706900601) for transient regime detection and truncation on Grand Canonical Monte Carlo adsorption simulations.
  4 | 
  5 | > Oliveira, Felipe L., et al. "pyMSER - An Open-Source Library for Automatic Equilibration Detection in Molecular Simulations." *Journal of Chemical Theory and Computation* **20.19** (2024): 8559-8568.
  6 | > 
  7 | > https://doi.org/10.1021/acs.jctc.4c00417
  8 | 
  9 | ## Dependencies
 10 | 
 11 | * [NumPy](https://numpy.org) is the fundamental package for scientific computing with Python.
 12 | * [SciPy](https://scipy.org/) is a collection of fundamental algorithms for scientific computing in Python.
 13 | * [statsmodels](https://www.statsmodels.org/) is a python module that provides classes and functions for the estimation of many different statistical models, as well as for conducting statistical tests, and statistical data exploration.
 14 | * [pyTorch](https://pytorch.org/) is an open source machine learning framework that uses tensor computations and automatic differentiation on GPU and CPU.
 15 | 
 16 | ## Developer tips
 17 | 
 18 | These tips are not mandatory, but they are a sure way of helping you develop the code while maintaining consistency with the current style, structure and formatting choices.
 19 | 
 20 | ### Coding style guide
 21 | 
 22 | We recommend these tools to ensure code style compatibility.
 23 | 
 24 | * [autopep8](https://pypi.org/project/autopep8/) automatically formats Python code to conform to the PEP8 style guide.
 25 | * [Flake8](https://flake8.pycqa.org) is your tool for style guide enforcement.
 26 | 
 27 | ## Installation
 28 | 
 29 | ### Option 1: Using `setup.py`
 30 | 
 31 | Clone `pymser` repository if you haven't done it yet.
 32 | 
 33 | Go to `pymser`'s root folder, there you will find `setup.py` file, and run the command below:
 34 | 
 35 | ```Shell
 36 | python setup.py install
 37 | ```
 38 | 
 39 | ### Option 2: Using pip/pipenv to install from Pypi.org
 40 | 
 41 | If you intend to use `pipenv`, please add the following to your `Pipfile`:
 42 | 
 43 | ```Pipfile
 44 | [[source]]
 45 | url = "https://pypi.org/simple"
 46 | verify_ssl = true
 47 | name = "pypi"
 48 | 
 49 | [packages]
 50 | pymser = "*"
 51 | ```
 52 | 
 53 | If you intend to use `pip`, please run the command below:
 54 | 
 55 | ```Shell
 56 | pip install pymser
 57 | ```
 58 | 
 59 | ### Option 3: Using pip to install directly from the GitHub repo
 60 | 
 61 | You can run
 62 | 
 63 | ```Shell
 64 | pip install git+https://github.com/IBM/pymser.git
 65 | ```
 66 | 
 67 | and then you will be prompted to enter your GitHub username and password/access token.
 68 | 
 69 | If you already have a SSH key configured, you can run
 70 | 
 71 | ```Shell
 72 | pip install git+ssh://git@github.com/IBM/pymser.git
 73 | ```
 74 | 
 75 | ### Option 4: Using pip/pipenv to install from Artifactory
 76 | 
 77 | Log into Artifactory and access your user profile. There you will find your API key and username. Then export your credentials as environment variables for later use in the installation process.
 78 | 
 79 | ```Shell
 80 | export ARTIFACTORY_USERNAME=username@email.com
 81 | export ARTIFACTORY_ACCESS_TOKEN=your-access-token
 82 | export ARTIFACTORY_URL=your-artifactory-url
 83 | ```
 84 | 
 85 | If you intend to use `pipenv`, please add the following to your `Pipfile`:
 86 | 
 87 | ```Pipfile
 88 | [[source]]
 89 | url = "https://$ARTIFACTORY_USERNAME:$ARTIFACTORY_ACCESS_TOKEN@$ARTIFACTORY_URL"
 90 | verify_ssl = true
 91 | name = "artifactory"
 92 | 
 93 | [packages]
 94 | pymser = {version="*", index="artifactory"}
 95 | ```
 96 | 
 97 | If you intend to use `pip`, please run the command below:
 98 | 
 99 | ```Shell
100 | pip install pymser --extra-index-url=https://$ARTIFACTORY_USERNAME:$ARTIFACTORY_ACCESS_TOKEN@$ARTIFACTORY_URL
101 | ```
102 | 
103 | ## Usage example
104 | 
105 | This is a small example of how to use our package:
106 | 
107 | ```Python
108 | >>> import pymser
109 | >>> import pandas as pd
110 | >>>
111 | >>> # Reads the example file using pandas
112 | >>> df = pd.read_csv('example_data/Cu-BTT_500165.0_198.000000.csv')
113 | >>>
114 | >>> # Apply the MSER to get the index of the start of equilibrated data
115 | >>> results = pymser.equilibrate(df['mol/kg'], LLM=False, batch_size=1, ADF_test=True, uncertainty='uSD', print_results=True)
116 | 
117 |                             pyMSER Equilibration Results
118 | ==============================================================================
119 | Start of equilibrated data:          13368 of 48613
120 | Total equilibrated steps:            35245  (72.50%)
121 | Equilibrated:                        Yes
122 | Average over equilibrated data:      22.4197 ± 0.1905
123 | Number of uncorrelated samples:      22.3
124 | Autocorrelation time:                1579.0
125 | ==============================================================================
126 | 
127 |                            Augmented Dickey-Fuller Test
128 | ==============================================================================
129 | Test statistic for observable: -3.926148246630434
130 | P-value for observable: 0.001850619485090052
131 | The number of lags used: 46
132 | The number of observations used for the ADF regression: 35198
133 | Cutoff Metrics :
134 |   1%: -3.430536 | The data is stationary with 99 % confidence
135 |   5%: -2.861622 | The data is stationary with 95 % confidence
136 |  10%: -2.566814 | The data is stationary with 90 % confidence
137 | ```
138 | 
139 | You can also access our [tutorial](pymser_tutorial.ipynb).
140 | 
141 | ## Python package deployment
142 | 
143 | ### Deploying to Artifactory
144 | 
145 | We have an automated CI/CD pipeline running on TravisCI that takes every single `git push` event and executes the build/test/deploy instructions in the `.travis.yml`. If you are deploying `master` or `release` branches, a Python package will be generated and published to a private Pypi registry on Artifactory.
146 | 
147 | ### Deploying to Pypi
148 | 
149 | We have an automated CI/CD pipeline running on TravisCI that takes every single `git push` event and executes the build/test/deploy instructions in the `.travis.yml`. If you are deploying `main` branch, a Python package will be generated and published to Pypi.org registry.
150 | 


--------------------------------------------------------------------------------
/example_workflow/README.md:
--------------------------------------------------------------------------------
  1 | # Tutorial: Using pyMSER with RASPA2 to Run Fixed Number of Production Cycles
  2 | 
  3 | ## Introduction
  4 | 
  5 | Welcome to this tutorial on using pyMSER with RASPA2 to run a fixed number of production cycles after automatically detecting the equilibrated portion of the simulation. This guide assumes that you have a basic understanding of Python and Grand Canonical Monte Carlo (GCMC) simulations using RASPA. The aim is to streamline simulation workflows by leveraging the capabilities of pyMSER for equilibration detection, ensuring accurate and efficient production runs.
  6 | 
  7 | ## Installation
  8 | 
  9 | To get started, ensure that your environment is set up with the necessary dependencies. The provided `environment.yml` file includes all the required packages:
 10 | 
 11 | * [NumPy](https://numpy.org) is the fundamental package for scientific computing with Python.
 12 | * [Pandas](https://pandas.pydata.org) is a fast, powerful, flexible, and easy-to-use open-source data analysis and data manipulation library built on top of NumPy.
 13 | * [Gemmi](https://gemmi.readthedocs.io/en/latest/) is a Python library for handling macromolecular structures.
 14 | * [pyMSER](https://pypi.org/project/pymser/) is a Python library to apply the Marginal Standard Error Rule (MSER) for transient regime detection and truncation on GCMC adsorption simulations.
 15 | * [RASPA2](https://pypi.org/project/RASPA2/) is a Python interface to the RASPA2 molecular simulation package.
 16 | 
 17 | To create the environment, run the following command:
 18 | 
 19 | ```sh
 20 | conda env create -f environment.yml
 21 | ```
 22 | 
 23 | Activate the environment with:
 24 | 
 25 | ```sh
 26 | conda activate pymser
 27 | ```
 28 | 
 29 | ## How to Run
 30 | 
 31 | To run a fixed number of production cycles using pyMSER with RASPA2, you can use the provided `run.py` script. This script automates the process of equilibration detection and production run based on the MSER rule. The script takes the framework name, external pressure, and simulation type as input arguments.
 32 | 
 33 | ```sh
 34 | python run.py --FrameworkName 'MgMOF-74' --ExternalPressure 1e4 --NumberOfProdCycles 1500 --AddCycles 1000 --GasComposition '{"CO2":0.5,"N2":0.5}' 'GCMC''
 35 | ```
 36 | 
 37 | The script will perform the following steps:
 38 | 
 39 | 1. Create the necessary directories and input files for the simulation.
 40 | 2. Run RASPA for `AddCycles` cycles.
 41 | 3. Apply the MSER rule to detect the equilibrated portion of the simulation.
 42 | 4. If the desired number of production cycles is not reached, run additional `AddCycles cycles until the target is achieved.
 43 | 5. Parse the output files and save the results.
 44 | 
 45 | The output should be:
 46 | 
 47 | ```sh
 48 | ==============================================================================
 49 |                     Automatic GCMC Simulation with pyMSER
 50 | ==============================================================================
 51 | Framework Name        : MgMOF-74
 52 | External Temperature  : 298.0 K
 53 | External Pressure     : 10000.0 Pa
 54 | Gas Composition       : {'CO2': 0.5, 'N2': 0.5}
 55 | Desired Prod. Cycles  : 1500
 56 | Output Path           : GCMC
 57 | ==============================================================================
 58 | 
 59 |     Running RASPA simulation...
 60 |     
 61 |        > Running iteration 1...
 62 |        > Found only 999/1500 production cycles. Running more 1000 cycles.
 63 |        > Running iteration 2...
 64 |        > Success! Found 1999 production cycles. Analyzing final data...
 65 | 
 66 | 
 67 |                             pyMSER Equilibration Results
 68 | ==============================================================================
 69 | Start of equilibrated data:          1 of 2000
 70 | Total equilibrated steps:            1999  (99.97%)
 71 | Equilibrated:                        Yes
 72 | Average over equilibrated data:      3.5916 ± 1.9509
 73 | Number of uncorrelated samples:      1999.0
 74 | Autocorrelation time:                1.0
 75 | ==============================================================================
 76 | 
 77 |                            Augmented Dickey-Fuller Test
 78 | ==============================================================================
 79 | Test statistic for observable: -40.707108860029564
 80 | P-value for observable: 0.0
 81 | The number of lags used: 0
 82 | The number of observations used for the ADF regression: 3998
 83 | Cutoff Metrics :
 84 |   1%: -3.431987 | The data is stationary with 99 % confidence
 85 |   5%: -2.862263 | The data is stationary with 95 % confidence
 86 |  10%: -2.567155 | The data is stationary with 90 % confidence
 87 | 
 88 | ==============================================================================
 89 | Component 0 [CO2]
 90 | ---------------------------------------------------------------------------
 91 | Average loading absolute [molecules/unit cell]         0.1947205551 +/-          0.1135451716
 92 | Average loading absolute [mol/kg framework]            0.0891424690 +/-          0.0519806290
 93 | Average loading absolute [mg/g framework]              3.9231154891 +/-          2.2876414927
 94 | Average loading absolute [cm^3 STP/gr]                 1.9980371381 +/-          1.1650925581
 95 | Average loading absolute [cm^3 STP/cm^3]               1.7647568024 +/-          1.0290624625
 96 | Enthalpy of adsorption [kJ/mol]                      -17.8534776591 +/-          0.3852622588
 97 | ==============================================================================
 98 | Component 1 [N2]
 99 | ---------------------------------------------------------------------------
100 | Average loading absolute [molecules/unit cell]         0.0297574394 +/-          0.0428471501
101 | Average loading absolute [mol/kg framework]            0.0136228639 +/-          0.0196152930
102 | Average loading absolute [mg/g framework]              0.3816227347 +/-          0.5494910502
103 | Average loading absolute [cm^3 STP/gr]                 0.3053425404 +/-          0.4396567026
104 | Average loading absolute [cm^3 STP/cm^3]               0.2696923470 +/-          0.3883246922
105 | Enthalpy of adsorption [kJ/mol]                      -11.9135661990 +/-          3.2649508929
106 | ==============================================================================
107 | ```
108 | 
109 | ### Command Line Options
110 | 
111 | You have several options to control the simulation parameters. Below is a detailed explanation of each option available in the `run.py` script:
112 | 
113 | - **output_folder (required)**
114 |   - Type: `str`
115 |   - Help: Directory to save the files of the calculations. This directory should contain the `cif` file of the framework and the force field files.
116 | 
117 | - **--FrameworkName (required)**
118 |   - Type: `str`
119 |   - Help: Name of the framework to be simulated
120 | 
121 | - **--ExternalPressure (required)**
122 |   - Type: `float`
123 |   - Help: External pressure in Pascal
124 | 
125 | #### Optional Parameters
126 | 
127 | - **--NumberOfProdCycles**
128 |   - Type: `int`
129 |   - Default: `5000`
130 |   - Help: Number of desired production cycles
131 | 
132 | - **--AddCycles**
133 |   - Type: `int`
134 |   - Default: `1000`
135 |   - Help: Number of additional tentative cycles if the desired number of production cycles has not been achieved
136 | 
137 | - **--ExternalTemperature**
138 |   - Type: `float`
139 |   - Default: `298.0`
140 |   - Help: Temperature of the simulation in Kelvin
141 | 
142 | - **--UnitCells**
143 |   - Type: `str`
144 |   - Default: `auto`
145 |   - Help: Number of unit cells to be simulated. Can be "auto" or a string of comma-separated values. E.g., "3,3,1"
146 | 
147 | - **--GasComposition**
148 |   - Type: `str`
149 |   - Default: `{"CO2": 1.0}`
150 |   - Help: Type of gas composition for the simulation as a dictionary. E.g., '{"CO2": 0.5, "N2": 0.5}'
151 | 
152 | - **--UseChargesFromCIFFile**
153 |   - Help: Use charges from CIF file.
154 | 
155 | 
156 | With these options, you can customize your simulation to fit your specific needs, ensuring an efficient and accurate GCMC simulation workflow.
157 | 


--------------------------------------------------------------------------------
/example_workflow/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pymser
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python >= 3.10
 6 |   - pip
 7 |   - numpy
 8 |   - pandas
 9 |   - gemmi
10 |   - pip:
11 |     - pymser
12 |     - RASPA2
13 | 


--------------------------------------------------------------------------------
/example_workflow/modules/raspa_input.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from textwrap import dedent
  3 | from modules.tools import calculate_UnitCells, get_pseudoatoms
  4 | 
  5 | 
  6 | def create_GCMC_input(path: str, FrameworkName: str, **kwargs):
  7 |     """
  8 |     Create the RASPA GCMC simulation input.
  9 |     Parameters
 10 |     ----------
 11 |     path : string
 12 |         Path where the file will be saved.
 13 |     FrameworkName : string
 14 |         Name of the structure. Must be the same name in the `.cif` file.
 15 |     """
 16 | 
 17 |     # Calculation parameters dictionary
 18 |     CALC_DICT = {
 19 |         'FrameworkName': FrameworkName,                                 # string
 20 |         'NumberOfCycles': 10000,                                        # int
 21 |         'NumberOfInitializationCycles': 0,                              # int
 22 |         'PrintEvery': 1,                                                # int
 23 |         'PrintPropertiesEvery': 1,                                      # int
 24 |         'ForceField': 'local',                                          # string
 25 |         'CutOffVDW': 12.8,                                              # float
 26 |         'CutOffChargeCharge': 12.8,                                     # float
 27 |         'CutOffChargeBondDipole': 12.8,                                 # float
 28 |         'CutOffBondDipoleBondDipole': 12.8,                             # float
 29 |         'EwaldPrecision': 1.0e-6,                                       # float
 30 |         'HeliumVoidFraction': 0.0,                                      # float
 31 |         'ExternalTemperature': 298.15,                                  # int
 32 |         'ExternalPressure': '100000',                                   # float or csv
 33 |         'UseChargesFromCIFFile': 'yes',                                 # yes / no
 34 |         'UnitCells': '1 1 1',                                           # int int int
 35 |         'GasComposition': {'CO2': 1.0},                                 # dict
 36 |         'SpacingVDWGrid': 0.1,                                          # float
 37 |         'SpacingCoulombGrid': 0.1,                                      # float
 38 |         'UseTabularGrid': 'no',                                         # yes / no
 39 |         'NumberOfGrids': 0,                                             # int
 40 |         'GridTypes': '',                                                # string
 41 |         'Movies': 'no',                                                 # yes / no
 42 |         'WriteMoviesEvery': 0,                                          # int
 43 |         'ComputeDensityProfile3DVTKGrid': 'no',                         # yes / no
 44 |         'DensityProfile3DVTKGridPoints': '100 100 100',                 # int int int
 45 |         'WriteDensityProfile3DVTKGridEvery': 100,                       # int
 46 |         'RestartFile': 'no',                                            # yes / no
 47 |     }
 48 | 
 49 |     # Update the dictionary with the kwargs
 50 |     CALC_DICT.update(kwargs)
 51 | 
 52 |     if 'UnitCells' in kwargs:
 53 |         if isinstance(kwargs['UnitCells'], list):
 54 |             kwargs['UnitCells'] = ' '.join(map(str, kwargs['UnitCells']))
 55 |         if isinstance(kwargs['UnitCells'], int):
 56 |             kwargs['UnitCells'] = ' '.join(map(str, [kwargs['UnitCells']] * 3))
 57 |         if isinstance(kwargs['UnitCells'], str) and kwargs['UnitCells'].lower() == 'auto':
 58 | 
 59 |             maxCutOff = max([CALC_DICT['CutOffVDW'],
 60 |                              CALC_DICT['CutOffChargeCharge'],
 61 |                              CALC_DICT['CutOffChargeBondDipole'],
 62 |                              CALC_DICT['CutOffBondDipoleBondDipole']])
 63 | 
 64 |             CALC_DICT['UnitCells'] = calculate_UnitCells(
 65 |                 os.path.join(path, FrameworkName.rstrip('.cif') + '.cif'),
 66 |                 maxCutOff)
 67 | 
 68 |     if 'ExternalPressure' in kwargs:
 69 |         if isinstance(kwargs['ExternalPressure'], list):
 70 |             CALC_DICT['ExternalPressure'] = ' '.join(map(str, kwargs['ExternalPressure']))
 71 | 
 72 |         elif isinstance(kwargs['ExternalPressure'], int):
 73 |             CALC_DICT['ExternalPressure'] = float(kwargs['ExternalPressure'])
 74 | 
 75 |         elif isinstance(kwargs['ExternalPressure'], float):
 76 |             CALC_DICT['ExternalPressure'] = kwargs['ExternalPressure']
 77 | 
 78 |         elif isinstance(kwargs['ExternalPressure'], str):
 79 |             CALC_DICT['ExternalPressure'] = ' '.join(kwargs['ExternalPressure'].split(','))
 80 | 
 81 |     # Create file header as string
 82 |     GCMC_InputFile = dedent("""\
 83 |     SimulationType                      MonteCarlo
 84 |     NumberOfCycles                      {NumberOfCycles}
 85 |     NumberOfInitializationCycles        {NumberOfInitializationCycles}
 86 |     PrintEvery                          {PrintEvery}
 87 |     PrintPropertiesEvery                {PrintPropertiesEvery}
 88 | 
 89 |     RestartFile                         {RestartFile}
 90 | 
 91 |     ForceField                          {ForceField}
 92 |     CutOffVDW                           {CutOffVDW}
 93 |     CutOffChargeCharge                  {CutOffChargeCharge}
 94 |     CutOffChargeBondDipole              {CutOffChargeBondDipole}
 95 |     CutOffBondDipoleBondDipole          {CutOffBondDipoleBondDipole}
 96 |     ChargeMethod                        Ewald
 97 |     EwaldPrecision                      {EwaldPrecision}
 98 | 
 99 |     Framework                           0
100 |     FrameworkName                       {FrameworkName}
101 |     HeliumVoidFraction                  {HeliumVoidFraction}
102 |     ExternalTemperature                 {ExternalTemperature}
103 |     ExternalPressure                    {ExternalPressure}
104 |     UseChargesFromCIFFile               {UseChargesFromCIFFile}
105 |     UnitCells                           {UnitCells}
106 | 
107 |     """).format(**CALC_DICT)
108 | 
109 |     if CALC_DICT['UseTabularGrid'] == 'yes':
110 | 
111 |         # Get the pseudoatoms number and types
112 |         for gas in list(CALC_DICT['GasComposition'].keys()):
113 |             pseudo_atoms = get_pseudoatoms(gas)
114 |             CALC_DICT['NumberOfGrids'] += len(pseudo_atoms)           # int
115 |             CALC_DICT['GridTypes'] += ' '.join(pseudo_atoms) + ' '    # string
116 | 
117 |         GCMC_InputFile += dedent("""\
118 |         NumberOfGrids                       {NumberOfGrids}
119 |         GridTypes                           {GridTypes}
120 |         SpacingVDWGrid                      {SpacingVDWGrid}
121 |         SpacingCoulombGrid                  {SpacingCoulombGrid}
122 |         UseTabularGrid                      {UseTabularGrid}
123 | 
124 |         """).format(**CALC_DICT)
125 | 
126 |     if CALC_DICT['ComputeDensityProfile3DVTKGrid'] == 'yes':
127 |         GCMC_InputFile += dedent("""\
128 |         ComputeDensityProfile3DVTKGrid      yes
129 |         DensityProfile3DVTKGridPoints       {DensityProfile3DVTKGridPoints}
130 |         WriteDensityProfile3DVTKGridEvery   {WriteDensityProfile3DVTKGridEvery}
131 | 
132 |         """).format(**CALC_DICT)
133 | 
134 |     if CALC_DICT['Movies'] == 'yes':
135 |         GCMC_InputFile += dedent("""\
136 |         Movies yes
137 |         WriteMoviesEvery       {WriteMoviesEvery}
138 | 
139 |         """).format(**CALC_DICT)
140 | 
141 |     # Create component list as string
142 |     for name, fraction in CALC_DICT['GasComposition'].items():
143 | 
144 |         number_of_components = len(CALC_DICT['GasComposition'])
145 |         index_of_component = list(CALC_DICT['GasComposition']).index(name)
146 | 
147 |         # Append component string block to input file
148 |         GCMC_InputFile += dedent(f"""\
149 |         Component {index_of_component} MoleculeName                  {name}
150 |                     MolFraction                  {fraction}
151 |                     MoleculeDefinition           TraPPE
152 |                     SwapProbability              0.5
153 |                     TranslationProbability       0.3
154 |                     RotationProbability          0.2
155 |                     IdentityChangeProbability    0.1
156 |                         NumberOfIdentityChanges    {number_of_components}
157 |                         IdentityChangesList        {' '.join(map(str, range(number_of_components)))}
158 |                     CreateNumberOfMolecules      0
159 | 
160 |         """)
161 | 
162 |     # Write input to file
163 |     with open(os.path.join(path,
164 |                            "simulation.input"), 'w') as f:
165 |         f.write(GCMC_InputFile)
166 | 


--------------------------------------------------------------------------------
/example_workflow/modules/raspa_output.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | from RASPA2 import parse
  4 | from glob import glob
  5 | 
  6 | 
  7 | def parse_GCMC(output_folder: str,
  8 |                FrameworkName: str,
  9 |                ExternalTemperature: float,
 10 |                ExternalPressure: float,
 11 |                GasComposition: dict,
 12 |                NumberOfCycles: int,
 13 |                PrintEvery: int):
 14 |     # Read file into string
 15 |     input_file_name = glob('{0}/output_{1}_*_{2:.6f}_{3:g}.data'.format(output_folder,
 16 |                                                                         FrameworkName,
 17 |                                                                         ExternalTemperature,
 18 |                                                                         ExternalPressure))[0]
 19 |     with open(os.path.join(input_file_name), 'r') as f:
 20 |         raspa_string = f.read()
 21 | 
 22 |     # Parse string into dictionary and list
 23 |     raspa_dict = parse(raspa_string)
 24 |     raspa_list = raspa_string.split('\n')
 25 | 
 26 |     # Extract number of unit cells in the supercell
 27 |     unit_cells = [int(line.split(':')[1]) for line in raspa_list if 'Number of unitcells' in line]
 28 | 
 29 |     # Calculate mol/kg conversion factor for the supercell
 30 |     conversion_string = 'Conversion factor molecules/unit cell -> mol/kg'
 31 |     to_mol_kg = raspa_dict['MoleculeDefinitions'][conversion_string][0]
 32 |     to_mol_kg /= math.prod(unit_cells)
 33 | 
 34 |     output_file_name = f'raspa_{ExternalTemperature:.6f}_{ExternalPressure}.csv'
 35 |     if os.path.isfile(os.path.join(output_folder, output_file_name)):
 36 |         append_data = True
 37 |         # Open the file
 38 |         with open(os.path.join(output_folder, output_file_name), 'r') as f:
 39 |             lines = f.readlines()
 40 |             # Check if the last line is the same as the last cycle
 41 |             last_line = lines[-1].split(',')
 42 |             base_cycle = int(last_line[0])
 43 |             base_step = int(last_line[1])
 44 | 
 45 |     else:
 46 |         append_data = False
 47 |         base_cycle = 0
 48 |         base_step = 0
 49 | 
 50 |     # Build header string
 51 |     if not append_data:
 52 |         header = 'cycle,step,N_ads'
 53 |         for component in range(len(GasComposition)):
 54 |             cycle_key = f'Current cycle: 0 out of {NumberOfCycles}'
 55 |             component_key = f'Component {component}'
 56 |             molecule_name = raspa_dict[cycle_key][component_key][0]
 57 |             header += (
 58 |                 f',{molecule_name}_[N_ads]'
 59 |                 f',{molecule_name}_[molecules/uc]'
 60 |                 f',{molecule_name}_[mol/kg]'
 61 |             )
 62 | 
 63 |         header += (
 64 |             ',total_[K]'
 65 |             ',host-host_[K]'
 66 |             ',host-adsorbate_[K]'
 67 |             ',host-cation_[K]'
 68 |             ',adsorbate-adsorbate_[K]'
 69 |             ',cation-cation_[K]'
 70 |             ',adsorbate-cation_[K]'
 71 |         )
 72 | 
 73 |         csv_output = header + '\n'
 74 |     else:
 75 |         csv_output = ''
 76 | 
 77 |     # For each cycle
 78 |     steps = base_step
 79 |     for cycle in range(0, NumberOfCycles, PrintEvery):
 80 |         cycle_key = f'Current cycle: {cycle} out of {NumberOfCycles}'
 81 |         number_of_adsorbates = int(raspa_dict[cycle_key]['Number of Adsorbates'][0])
 82 |         steps += max(20, number_of_adsorbates)
 83 |         line = (
 84 |             f'{cycle + base_cycle},'
 85 |             f' {steps},'
 86 |             f' {number_of_adsorbates}'
 87 |         )
 88 | 
 89 |         # For each component
 90 |         for component in range(len(GasComposition)):
 91 |             component_key = f'Component {component}'
 92 |             number_of_molecules = int(raspa_dict[cycle_key][component_key][2].split('/')[0])
 93 |             line += (
 94 |                 f', {number_of_molecules:7}'
 95 |                 f', {number_of_molecules / math.prod(unit_cells):7}'
 96 |                 f', {number_of_molecules * to_mol_kg:.7f}'
 97 |             )
 98 | 
 99 |         for energy_term in [
100 |             'Current total potential energy',
101 |             'Current Host-Host energy',
102 |             'Current Host-Adsorbate energy',
103 |             'Current Host-Cation energy',
104 |             'Current Adsorbate-Adsorbate energy',
105 |             'Current Cation-Cation energy',
106 |             'Current Adsorbate-Cation energy'
107 |         ]:
108 | 
109 |             line += f',{raspa_dict[cycle_key][energy_term][0]:.7f}'
110 | 
111 |         csv_output += line + '\n'
112 | 
113 |     # Write string into file
114 |     with open(os.path.join(output_folder, output_file_name), 'a') as f:
115 |         f.write(csv_output)
116 | 


--------------------------------------------------------------------------------
/example_workflow/modules/tools.py:
--------------------------------------------------------------------------------
  1 | import gemmi
  2 | from glob import glob
  3 | import re
  4 | import numpy as np
  5 | 
  6 | 
  7 | def calculate_Perpendicular_Widths(cif_filename: str) -> tuple:
  8 |     """
  9 |     Calculate the perpendicular widths of the unit cell.
 10 |     RASPA considers the perpendicular directions as the directions perpendicular to the `ab`,
 11 |     `bc`, and `ca` planes. Thus, the directions depend on the crystallographic vectors `a`, `b`,
 12 |     and `c`.
 13 |     The length in the perpendicular directions are the projections of the crystallographic vectors
 14 |     on the vectors `a x b`, `b x c`, and `c x a`. (here `x` means cross product)
 15 |     """
 16 |     # Read data from CIF file
 17 |     cif = gemmi.cif.read_file(cif_filename).sole_block()
 18 |     a = float(cif.find_value('_cell_length_a').split('(')[0])
 19 |     b = float(cif.find_value('_cell_length_b').split('(')[0])
 20 |     c = float(cif.find_value('_cell_length_c').split('(')[0])
 21 |     beta = float(cif.find_value('_cell_angle_beta').split('(')[0]) * np.pi / 180.0
 22 |     gamma = float(cif.find_value('_cell_angle_gamma').split('(')[0]) * np.pi / 180.0
 23 |     alpha = float(cif.find_value('_cell_angle_alpha').split('(')[0]) * np.pi / 180.0
 24 | 
 25 |     # Calculate the nu value
 26 |     nu = (np.cos(alpha) - np.cos(gamma) * np.cos(beta)) / np.sin(gamma)
 27 | 
 28 |     # Build the transformation matrix as a numpy array
 29 |     CellBox = np.array([[a, 0.0, 0.0],
 30 |                         [b * np.cos(gamma), b * np.sin(gamma), 0.0],
 31 |                         [c * np.cos(beta), c * nu, c * np.sqrt(1.0 - np.cos(beta)**2 - nu**2)]])
 32 | 
 33 |     # Calculate the cross products
 34 |     axb = np.cross(CellBox[0], CellBox[1])
 35 |     bxc = np.cross(CellBox[1], CellBox[2])
 36 |     cxa = np.cross(CellBox[2], CellBox[0])
 37 | 
 38 |     # Calculates the volume of the unit cell
 39 |     V = np.dot(np.cross(CellBox[0], CellBox[1]), CellBox[2])
 40 | 
 41 |     # Calculate perpendicular widths
 42 |     p_width_1 = V / np.linalg.norm(bxc)
 43 |     p_width_2 = V / np.linalg.norm(cxa)
 44 |     p_width_3 = V / np.linalg.norm(axb)
 45 | 
 46 |     return p_width_1, p_width_2, p_width_3
 47 | 
 48 | 
 49 | def calculate_UnitCells(cif_filename: str, cutoff: float) -> str:
 50 |     """
 51 |     Calculate the number of unit cell repetitions so that all supercell lengths are larger than
 52 |     twice the interaction potential cut-off radius.
 53 |     """
 54 | 
 55 |     # Calculate the perpendicular widths
 56 |     p_width_1, p_width_2, p_width_3 = calculate_Perpendicular_Widths(cif_filename)
 57 | 
 58 |     # Calculate UnitCells string
 59 |     uc_array = np.ceil(2.0 * cutoff / np.array([p_width_1, p_width_2, p_width_3])).astype(int)
 60 |     unit_cells = ' '.join(map(str, uc_array))
 61 | 
 62 |     return unit_cells
 63 | 
 64 | 
 65 | def get_pseudoatoms(molecule: str) -> list:
 66 |     """
 67 |     Returns the pseudoatoms of a given molecule.
 68 |     If the molecule is not in the supported list will returns `None`.
 69 |     Parameters
 70 |     ----------
 71 |     molecule : string
 72 |         Molecule name. Could be CO2, N2, O2, or H2O.
 73 |     Returns
 74 |     ----------
 75 |     pseudoatoms : list
 76 |         List containing the strings with the pseudotoms.
 77 |     """
 78 | 
 79 |     pseudoatoms_dict = {'CO2': ['C_co2', 'O_co2'],
 80 |                         'N2': ['N_n2', 'N_com'],
 81 |                         'O2': ['O_o2', 'O_com'],
 82 |                         'H2': ['H_h2', 'H_com'],
 83 |                         'CH4': ['CH4'],
 84 |                         'CO': ['C_co', 'CO_com', 'O_co'],
 85 |                         'H2O': ['Ow', 'Hw', 'Lw']}
 86 | 
 87 |     if molecule in list(pseudoatoms_dict.keys()):
 88 |         return pseudoatoms_dict[molecule]
 89 |     else:
 90 |         return None
 91 | 
 92 | 
 93 | def get_conversion_factors(output_folder: str,
 94 |                            FrameworkName: str,
 95 |                            ExternalTemperature: float,
 96 |                            ExternalPressure: float,):
 97 |     """
 98 |     Get the conversion factors for the units in the RASPA simulation.
 99 | 
100 |     Parameters
101 |     ----------
102 |     path : string
103 |         Path to the folder containing the RASPA output file.
104 |     filename : string
105 |         Name of the RASPA output file.
106 |     Returns
107 |     ----------
108 |     conversion_factors : dict
109 |         Dictionary containing the conversion factors.
110 |     """
111 | 
112 |     # Read file into string
113 |     filename = glob('{0}/output_{1}_*_{2:.6f}_{3:g}.data'.format(output_folder,
114 |                                                                  FrameworkName,
115 |                                                                  ExternalTemperature,
116 |                                                                  ExternalPressure))[0]
117 | 
118 |     pattern = re.compile(r'Conversion factor molecules/unit cell -> (.+?):\s+(\d+\.\d+)')
119 | 
120 |     with open(filename, 'r') as f:
121 |         lines = f.readlines()
122 | 
123 |     conversion_factors = {
124 |         'mol/kg': [],
125 |         'mg/g': [],
126 |         'cm^3 STP/gr': [],
127 |         'cm^3 STP/cm^3': []
128 |     }
129 |     for line in lines:
130 |         if 'Conversion factor molecules/unit cell' in line:
131 |             match = re.search(pattern, line)
132 | 
133 |             conversion_factors[match.group(1)].append(float(match.group(2)))
134 | 
135 |     return conversion_factors
136 | 


--------------------------------------------------------------------------------
/example_workflow/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import json
  4 | import pymser
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from modules.raspa_input import create_GCMC_input
  9 | from modules.raspa_output import parse_GCMC
 10 | from modules.tools import get_conversion_factors
 11 | 
 12 | # Ignore warnings
 13 | import warnings
 14 | warnings.filterwarnings("ignore")
 15 | 
 16 | 
 17 | # Required parameters
 18 | parser = argparse.ArgumentParser(
 19 |     description='on-the-fly RASPA simulations with pyMSER')
 20 | parser.add_argument('output_folder',
 21 |                     type=str,
 22 |                     action='store',
 23 |                     metavar='OUTPUT_FOLDER',
 24 |                     help='Directory to save the files of the calculations')
 25 | parser.add_argument('--FrameworkName',
 26 |                     type=str,
 27 |                     required=True,
 28 |                     action='store',
 29 |                     metavar='FRAMEWORK_NAME',
 30 |                     help='Name of the framework to be simulated')
 31 | parser.add_argument('--ExternalPressure',
 32 |                     type=float,
 33 |                     required=True,
 34 |                     action='store',
 35 |                     metavar='PRESSURE_LIST',
 36 |                     help='External pressure in Pascal.')
 37 | # Optional parameters
 38 | parser.add_argument('--NumberOfProdCycles',
 39 |                     type=int,
 40 |                     required=False,
 41 |                     action='store',
 42 |                     metavar='NUMBER_OF_CYCLES',
 43 |                     default=5000,
 44 |                     help='Number of desired production cycles.')
 45 | parser.add_argument('--AddCycles',
 46 |                     type=int,
 47 |                     required=False,
 48 |                     action='store',
 49 |                     metavar='ADD_CYCLES',
 50 |                     default=1000,
 51 |                     help='Number of aditional cycles if NumberOfProdCycles was not achieved.')
 52 | parser.add_argument('--ExternalTemperature',
 53 |                     type=float,
 54 |                     required=False,
 55 |                     action='store',
 56 |                     metavar='TEMPERATURE',
 57 |                     default=298.0,
 58 |                     help='Temperature of the simulation in Kelvin')
 59 | parser.add_argument('--UnitCells',
 60 |                     type=str,
 61 |                     required=False,
 62 |                     action='store',
 63 |                     metavar='UNIT_CELLS',
 64 |                     default='auto',
 65 |                     help='Number of unit cells to simulate. Can be "auto" or "NX,NY,NZ" string.')
 66 | parser.add_argument('--UseChargesFromCIFFile',
 67 |                     default=False,
 68 |                     required=False,
 69 |                     action='store_true',
 70 |                     help='Use charges from CIF file.')
 71 | parser.add_argument('--GasComposition',
 72 |                     type=str,
 73 |                     required=False,
 74 |                     action='store',
 75 |                     metavar='GAS_COMPOSITION',
 76 |                     default='{"CO2": 1.0}',
 77 |                     help='Type of dispersion correction used for all calculations.')
 78 | 
 79 | 
 80 | arg = parser.parse_args()
 81 | 
 82 | arg.GasComposition = json.loads(arg.GasComposition)
 83 | 
 84 | header = f"""
 85 | ==============================================================================
 86 |                     Automatic GCMC Simulation with pyMSER
 87 | ==============================================================================
 88 | Framework Name        : {arg.FrameworkName}
 89 | External Temperature  : {arg.ExternalTemperature} K
 90 | External Pressure     : {arg.ExternalPressure} Pa
 91 | Gas Composition       : {arg.GasComposition}
 92 | Desired Prod. Cycles  : {arg.NumberOfProdCycles}
 93 | Output Path           : {arg.output_folder}
 94 | ==============================================================================
 95 | 
 96 |     Running RASPA simulation...
 97 |     """
 98 | 
 99 | print(header)
100 | 
101 | os.chdir(arg.output_folder)
102 | 
103 | equilibrated = False
104 | nstep = 0
105 | maxSteps = 20
106 | 
107 | while not equilibrated and nstep < maxSteps:
108 | 
109 |     nstep += 1
110 |     print(f'       > Running iteration {nstep}...')
111 | 
112 |     create_GCMC_input(
113 |         path='.',
114 |         FrameworkName=arg.FrameworkName,
115 |         UnitCells=arg.UnitCells,
116 |         NumberOfCycles=arg.AddCycles,
117 |         ForceField='local',
118 |         UseChargesFromCIFFile=arg.UseChargesFromCIFFile,
119 |         GasComposition=arg.GasComposition,
120 |         ExternalTemperature=arg.ExternalTemperature,
121 |         ExternalPressure=arg.ExternalPressure,
122 |         RestartFile='no' if nstep == 1 else 'yes',
123 |         )
124 | 
125 |     os.system('${RASPA_DIR}/bin/simulate simulation.input > raspalog.txt 2>&1')
126 | 
127 |     parse_GCMC(
128 |         output_folder='Output/System_0',
129 |         FrameworkName=arg.FrameworkName,
130 |         GasComposition=arg.GasComposition,
131 |         ExternalTemperature=arg.ExternalTemperature,
132 |         ExternalPressure=arg.ExternalPressure,
133 |         NumberOfCycles=arg.AddCycles,
134 |         PrintEvery=1
135 |         )
136 | 
137 |     csv_file = f'Output/System_0/raspa_{arg.ExternalTemperature:.6f}_{arg.ExternalPressure}.csv'
138 |     dataFrame = pd.read_csv(csv_file)
139 | 
140 |     eqDict = pymser.equilibrate(dataFrame['N_ads'], print_results=False)
141 | 
142 |     equilibrated = len(dataFrame['N_ads']) - eqDict['t0'] > arg.NumberOfProdCycles
143 | 
144 |     if equilibrated:
145 | 
146 |         log_text = '=============================================================================\n'
147 | 
148 |         print("       > Success! Found {} production cycles. Analyzing final data...\n\n".format(
149 |             len(dataFrame['N_ads']) - eqDict['t0']))
150 | 
151 |         eqDict = pymser.equilibrate(dataFrame['N_ads'], print_results=True)
152 | 
153 |         log_text = '=============================================================================\n'
154 | 
155 |         convFactors = get_conversion_factors(
156 |             output_folder='Output/System_0',
157 |             FrameworkName=arg.FrameworkName,
158 |             ExternalTemperature=arg.ExternalTemperature,
159 |             ExternalPressure=arg.ExternalPressure)
160 | 
161 |         for i, gas in enumerate(arg.GasComposition.keys()):
162 |             eq_data = pymser.calc_equilibrated_average(
163 |                 data=dataFrame[f'{gas}_[molecules/uc]'],
164 |                 eq_index=eqDict['t0'],
165 |                 uncertainty='uSD',
166 |                 ac_time=eqDict['ac_time']
167 |                 )
168 | 
169 |             eq_data = np.array(eq_data)
170 | 
171 |             enthalpy_data = pymser.calc_equilibrated_enthalpy(
172 |                 energy=dataFrame['total_[K]'],
173 |                 number_of_molecules=dataFrame[f'{gas}_[N_ads]'],
174 |                 temperature=arg.ExternalTemperature,
175 |                 eq_index=eqDict['t0'],
176 |                 uncertainty='uSD',
177 |                 ac_time=int(arg.NumberOfProdCycles/5))
178 | 
179 |             log_text += f'Component {i} [{gas}]\n'
180 |             log_text += '-'*75 + '\n'
181 |             log_text += 'Average loading absolute [molecules/unit cell] {:20.10f} +/-  {:20.10f}\n'\
182 |                 .format(*eq_data)
183 |             log_text += 'Average loading absolute [mol/kg framework]    {:20.10f} +/-  {:20.10f}\n'\
184 |                 .format(*eq_data * convFactors['mol/kg'][i])
185 |             log_text += 'Average loading absolute [mg/g framework]      {:20.10f} +/-  {:20.10f}\n'\
186 |                 .format(*eq_data * convFactors['mg/g'][i])
187 |             log_text += 'Average loading absolute [cm^3 STP/gr]         {:20.10f} +/-  {:20.10f}\n'\
188 |                 .format(*eq_data * convFactors['cm^3 STP/gr'][i])
189 |             log_text += 'Average loading absolute [cm^3 STP/cm^3]       {:20.10f} +/-  {:20.10f}\n'\
190 |                 .format(*eq_data * convFactors['cm^3 STP/cm^3'][i])
191 |             log_text += 'Enthalpy of adsorption [KJ/mol]                {:20.10f} +/-  {:20.10f}\n'\
192 |                 .format(*enthalpy_data)
193 |             log_text += '========================================================================\n'
194 | 
195 |         print(log_text)
196 |         log = f'{arg.FrameworkName}_{arg.ExternalTemperature:.6f}_{arg.ExternalPressure}.log', 'a'
197 |         with open(log) as f:
198 |             f.write(log_text)
199 |     else:
200 |         print("       > Found only {}/{} production cycles. Running more {} cycles.".format(
201 |             len(dataFrame['N_ads']) - eqDict['t0'], arg.NumberOfProdCycles, arg.AddCycles)
202 |             )
203 | 
204 |         os.makedirs('RestartInitial/System_0', exist_ok=True)
205 | 
206 |         # Copy the file from Restart/System_0 to RestartInitial/System_0
207 |         os.system('cp -r Restart/System_0/* RestartInitial/System_0/')
208 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
7 | 


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 |   "extends": [
4 |     "config:recommended"
5 |   ]
6 | }
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 1.0.22
3 | 
4 | [metadata]
5 | description_file = README.md
6 | license_files = LICENSE
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="pymser",
 8 |     version="1.0.22",
 9 |     author="Felipe Lopes de Oliveira",
10 |     author_email="felipe.lopes@nano.ufrj.br",
11 |     description="Library to apply the Marginal Standard Error Rule \
12 | for transient regime detection and truncation on Grand Canonical \
13 | Monte Carlo adsorption simulations",
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     url="https://github.com/IBM/pymser",
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3",
19 |         "Operating System :: OS Independent",
20 |         "License :: OSI Approved :: BSD License",
21 |     ],
22 |     package_dir={"": "src"},
23 |     packages=setuptools.find_packages(where="src"),
24 |     python_requires=">=3.9",
25 |     include_package_data=True,
26 |     install_requires=['numpy',
27 |                       'scipy',
28 |                       'statsmodels',
29 |                       'torch'],
30 |     license='BSD 3-Clause License'
31 | )
32 | 


--------------------------------------------------------------------------------
/src/pymser/__init__.py:
--------------------------------------------------------------------------------
 1 | from .pymser import (exp_decay,
 2 |                      check_consistency,
 3 |                      batch_average_data,
 4 |                      calculate_MSEm,
 5 |                      MSERm_index,
 6 |                      MSERm_LLM_index,
 7 |                      enthalpy_of_adsorption,
 8 |                      calc_equilibrated_average,
 9 |                      calc_equilibrated_enthalpy,
10 |                      calc_autocorrelation_time,
11 |                      apply_ADF_test,
12 |                      equilibrate,
13 |                      equilibrate_enthalpy)
14 | 
15 | __all__ = [exp_decay,
16 |            check_consistency,
17 |            batch_average_data,
18 |            calculate_MSEm,
19 |            MSERm_index,
20 |            MSERm_LLM_index,
21 |            enthalpy_of_adsorption,
22 |            calc_equilibrated_average,
23 |            calc_equilibrated_enthalpy,
24 |            calc_autocorrelation_time,
25 |            apply_ADF_test,
26 |            equilibrate,
27 |            equilibrate_enthalpy]
28 | 


--------------------------------------------------------------------------------
/src/pymser/pymser.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from scipy.optimize import curve_fit
  4 | from scipy.signal import correlate as sp_corr
  5 | from statsmodels.tsa.stattools import adfuller
  6 | 
  7 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  8 | 
  9 | 
 10 | def exp_decay(t, tau):
 11 |     """
 12 |     Simple function to model a exponential decay.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     t : array
 17 |         Time data
 18 |     tau : float
 19 |         Decay rate of the exponential
 20 |     Returns
 21 |     -------
 22 |     Exponential curve as a NumPy array.
 23 |     """
 24 | 
 25 |     return np.exp(-t/tau)
 26 | 
 27 | 
 28 | def check_consistency(data):
 29 |     """
 30 |     Checks the consistency of the input data.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     data : array
 35 |         Array with the data
 36 |     Returns
 37 |     -------
 38 |     consistent : bool
 39 |         Boolean indicating if the input data is consistent
 40 |     data_array : array
 41 |         NumPy array containing the data in the correct format for the next steps.
 42 |     """
 43 | 
 44 |     # Try to convert the data to a NumPy Array
 45 |     try:
 46 |         data_array = np.array(data).astype(float)
 47 |         # Remove wrong nested lists
 48 |         data_array = data_array.squeeze()
 49 |     except ValueError:
 50 |         print('Input data must be an array of float numbers!')
 51 |         print('The following data was passed:')
 52 |         print(data)
 53 | 
 54 |         # Replace the incorrect data with a array with a NaN value
 55 |         data_array = np.array(np.nan)
 56 | 
 57 |     # Check if input data is unidimentional
 58 |     if data_array.ndim != 1:
 59 |         raise Exception(f'Input data must be 1D. {data_array.ndim}D data used instead!')
 60 | 
 61 |     # Check if all the data is finite
 62 |     is_all_finite = np.all(np.isfinite(data_array))
 63 | 
 64 |     # Check if the data is not an array filled with zeros
 65 |     is_all_zero = np.all((data_array == 0))
 66 | 
 67 |     return is_all_finite, is_all_zero, data_array
 68 | 
 69 | 
 70 | def batch_average_data(data, batch_size=1):
 71 |     """
 72 |     Converts the data to batch averages with a given batch size.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     data : array
 77 |         Array with the data
 78 |     batch_size : int
 79 |         Size of the batch to take the averages
 80 |     Returns
 81 |     -------
 82 |     averaged_batches : array
 83 |         Array containig the batch-averaged data
 84 |     """
 85 | 
 86 |     if batch_size > 1:
 87 |         # Trucate the data to allow a closed batch.
 88 |         # Be aware that this will remove the last points to make a closed batch
 89 |         truncated_data = data[:int(len(data) / batch_size) * batch_size]
 90 | 
 91 |         # Reshape the data to create batch of size m.
 92 |         reshaped_data = torch.reshape(truncated_data, (-1, batch_size))
 93 | 
 94 |         # Get the average of each batch
 95 |         averaged_batches = torch.tensor([torch.mean(i) for i in reshaped_data])
 96 | 
 97 |         return averaged_batches
 98 | 
 99 |     else:
100 |         return data
101 | 
102 | 
103 | def calculate_MSEm(data, batch_size=1):
104 |     """
105 |     Calculates the m-Marginal Standard Error (MSEm) for a simulation data
106 |     with batch size equals to m. m=1 reduces to the original MSER.
107 | 
108 |     Parameters
109 |     ----------
110 |     data : array
111 |         Array with the data
112 |     batch_size : int
113 |         Size of the batch to take the averages
114 |     Returns
115 |     -------
116 |     MSE : array
117 |         Array containig the Marginal Standard Error data
118 |     """
119 | 
120 |     # Convert data to n-blocked average
121 |     batch_tensor = batch_average_data(torch.from_numpy(data).float().to(device), batch_size)
122 | 
123 |     # Get the size of the data
124 |     n = len(batch_tensor)
125 | 
126 |     # Creates a empty list to store the MSE values
127 |     MSE = torch.zeros(n - 2, device=device)
128 | 
129 |     # Iterate over data index and calculates the average from k to n-2
130 |     for k in range(n - 2):
131 |         # Truncate data on k and convert it to a numpy array
132 |         truncated_data = batch_tensor[k:]
133 | 
134 |         # Get the average of the truncated data
135 |         Y_nk = truncated_data.mean()
136 | 
137 |         # Calculates the sum of the squared diference
138 |         sum_sq_diff = torch.sum((truncated_data - Y_nk)**2)
139 | 
140 |         # Calculate the k-th Marginal standard error
141 |         g_k = sum_sq_diff / (n - k)**2
142 | 
143 |         # Add the k-th to MSE array
144 |         MSE[k] = g_k
145 | 
146 |     return MSE
147 | 
148 | 
149 | def MSERm_index(MSEm, batch_size=1):
150 |     """
151 |     Applies the m-Marginal Standard Error Rule (MSERm) to the SERm data to get
152 |     the position where equilibrated data starts.
153 | 
154 |     Parameters
155 |     ----------
156 |     MSEm : array
157 |         Marginal Standard Error applied to the data
158 |     batch_size : int
159 |         Size of the batch to take the average
160 |     Returns
161 |     -------
162 |     equilibrated_index : int
163 |         Index of the start of equilibrated data
164 |     """
165 |     # Remove potential too low values that apears artificially on last points
166 |     MSEm = torch.where(MSEm < 1e-9,   # where value < 1e-9
167 |                        max(MSEm),     # replace for max(MSEm)
168 |                        MSEm)          # on MSEm array
169 | 
170 |     equilibrated_index = torch.argmin(MSEm)*batch_size
171 | 
172 |     return equilibrated_index
173 | 
174 | 
175 | def MSERm_LLM_index(MSEm, batch_size=1):
176 |     """
177 |     Applies the LLM version of m-Marginal Standard Error Rule (MSERm) to the SERm
178 |     data to get the position where equilibrated data starts. This method gets
179 |     the first minimum on Marginal Standard Error curve and assumes it is the
180 |     start of equilibriation. It is a better option for complicated adsorptions
181 |     like water close to condensation.
182 | 
183 |     Parameters
184 |     ----------
185 |     MSEm : array
186 |         Marginal Standard Error applied to the data
187 |     batch_size : int
188 |         Size of the batch to take the average
189 |     Returns
190 |     -------
191 |     t0 : int
192 |         Start of the LLM equilibrated data
193 |     """
194 | 
195 |     # Search for the first mininum on the MSEm data
196 |     i = 0
197 |     while MSEm[i+1] < MSEm[i]:
198 |         i += 1
199 |     # Correct for the batch size
200 |     t0 = i*batch_size
201 | 
202 |     return t0
203 | 
204 | 
205 | def enthalpy_of_adsorption(energy, number_of_molecules, temperature):
206 |     """
207 |     Calculates the enthalpy of adsorption as
208 | 
209 |     H = <EN> - <E><N> / <N^2> - <N>^2 - RT
210 | 
211 |     adapted from J. Phys. Chem. 1993, 97, 51, 13742-13752.
212 | 
213 |     Please note that Heat of adsorption (Q_iso) = -Enthalpy of adsorption (H).
214 | 
215 |     The isosteric enthalpy of adsorption, H, is defined as the heat which is released
216 |     when an adsorptive binds to a surface. The enthalpy of adsorption (H) is a negative
217 |     number and the isosteric heat (Q_iso) of adsorption is a positive number.
218 |     For a deeper discussion see: Dalton Trans., 2020, 49, 10295.
219 | 
220 |     Parameters
221 |     ----------
222 |     energy : 1D array
223 |         List with the potential energy of the adsorbed phase for each MC cycle in units of Kelvin.
224 | 
225 |     number_of_molecules : 1D array
226 |         List with the number of molecules in the simulation system for each MC cycle.
227 | 
228 |     temperature : float
229 |         Temperature of the simulation in Kelvin
230 | 
231 |     Returns
232 |     ----------
233 | 
234 |     H : float
235 |         Enthalpy of adsorption in units of kJ⋅mol-1
236 |     """
237 |     # Define basic constants
238 |     R = 8.31446261815324 * 1e-3  # kJ⋅K−1⋅mol−1
239 | 
240 |     # Convert energy from Kelvin to kJ/mol
241 |     E = np.array(energy) * R
242 |     N = np.array(number_of_molecules)
243 | 
244 |     EN = E * N
245 | 
246 |     # Calculate the enthalpy of adsorption. Here <N^2> - <N>^2 = VAR(N)
247 |     H = (EN.mean() - E.mean() * N.mean()) / np.var(N) - R * temperature
248 | 
249 |     return H
250 | 
251 | 
252 | def calc_equilibrated_average(data, eq_index, uncertainty='uSD', ac_time=1):
253 |     """
254 |     Calculates the average and uncertainty on the equilibrated part
255 |     of the data.
256 | 
257 |     Parameters
258 |     ----------
259 |     data : array
260 |         Array with the data
261 |     eq_index : int
262 |         Index of the start of equilibrated data.
263 |     uncertainty : str
264 |         String for selecting Standard Error (SE), Standard Deviation (SD), or its
265 |         uncorrelated versions uSD and uSE as the default uncertainty of the average.
266 |     ac_time : int
267 |         Autocorrelation time
268 |     Returns
269 |     -------
270 |     equilibrated_average : float
271 |         Average on the equilibrated data
272 |     equilibrated_uncertainty : float
273 |         Uncertainty of the average calculation
274 |     """
275 | 
276 |     if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']:
277 |         raise Exception(f"""{uncertainty} is not a valid option!
278 |             Only Standard Deviation (SD), Standard Error (SE), uncorrelated
279 |             Standard Deviation (uSD), and uncorrelated Standard Error (uSE)
280 |             are valid options.""")
281 | 
282 |     # Remove the initial transient of the data
283 |     equilibrated_data = data[eq_index:]
284 | 
285 |     # Calculates the average on the equilibrated data
286 |     equilibrated_average = np.average(equilibrated_data)
287 | 
288 |     # Calculate the standad deviation on the equilibrated data
289 |     if uncertainty == 'SD':
290 |         equilibrated_uncertainty = np.std(equilibrated_data)
291 | 
292 |     # Calculate the Standard Error
293 |     elif uncertainty == 'SE':
294 |         equilibrated_uncertainty = np.std(equilibrated_data) / np.sqrt(len(equilibrated_data))
295 | 
296 |     # Calculate the uncorrelated Standard Error
297 |     elif uncertainty == 'uSD':
298 |         # Divide the equilibrated_data on uncorrelated chunks
299 |         uncorr_batches = batch_average_data(torch.from_numpy(equilibrated_data).float().to(device),
300 |                                             np.ceil(ac_time).astype(int))
301 | 
302 |         # Calculate the standard deviation on the uncorrelated chunks
303 |         equilibrated_uncertainty = torch.std(uncorr_batches)
304 | 
305 |     # Calculate the uncorrelated Standard Error
306 |     elif uncertainty == 'uSE':
307 |         # Divide the equilibrated_data on uncorrelated chunks
308 |         uncorr_batches = batch_average_data(torch.from_numpy(equilibrated_data).float().to(device),
309 |                                             np.ceil(ac_time).astype(int))
310 | 
311 |         # Calculate the standard error of the mean on the uncorrelated chunks
312 |         equilibrated_uncertainty = torch.std(uncorr_batches) / np.sqrt(len(uncorr_batches))
313 | 
314 |     return equilibrated_average, equilibrated_uncertainty
315 | 
316 | 
317 | def calc_equilibrated_enthalpy(energy,
318 |                                number_of_molecules,
319 |                                temperature,
320 |                                eq_index,
321 |                                uncertainty='uSD',
322 |                                ac_time=1):
323 |     """
324 |     Calculates the average enthalpy of adsorption and uncertainty on the equilibrated
325 |     part of the data.
326 | 
327 |     Parameters
328 |     ----------
329 |     energy : 1D array
330 |         List with the potential energy of the adsorbed phase for each MC cycle in units of Kelvin.
331 |     number_of_molecules : 1D array
332 |         List with the number of molecules in the simulation system for each MC cycle.
333 |     eq_index : int
334 |         Index of the start of equilibrated data.
335 |     uncertainty : str
336 |         String for selecting Standard Error (SE), Standard Deviation (SD), or its
337 |         uncorrelated versions uSD and uSE as the default uncertainty of the average.
338 |     ac_time : int
339 |         Autocorrelation time
340 |     Returns
341 |     -------
342 |     equilibrated_average : float
343 |         Average on the equilibrated data
344 |     equilibrated_uncertainty : float
345 |         Uncertainty of the average calculation
346 |     """
347 | 
348 |     if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']:
349 |         raise Exception(f"""{uncertainty} is not a valid option!
350 |             Only Standard Deviation (SD), Standard Error (SE), uncorrelated
351 |             Standard Deviation (uSD), and uncorrelated Standard Error (uSE)
352 |             are valid options.""")
353 | 
354 |     # Remove the initial transient of the data
355 |     equilibrated_E = energy[eq_index:]
356 |     equilibrated_N = number_of_molecules[eq_index:]
357 | 
358 |     if uncertainty in ['SD', 'SE']:
359 | 
360 |         # Trucate and reshape the data to allow a closed batch.
361 |         truncated_E = equilibrated_E[:int(np.floor(len(equilibrated_E) / 5) * 5)]
362 |         reshaped_E = np.reshape(truncated_E, (5, -1))
363 | 
364 |         truncated_N = equilibrated_N[:int(np.floor(len(equilibrated_N) / 5) * 5)]
365 |         reshaped_N = np.reshape(truncated_N, (5, -1))
366 | 
367 |     elif uncertainty in ['uSD', 'uSE']:
368 | 
369 |         ac_time = np.ceil(ac_time).astype(int)
370 | 
371 |         # Trucate and reshape the data to allow a closed batch.
372 |         truncated_E = equilibrated_E[:int(np.floor(len(equilibrated_E) / ac_time) * ac_time)]
373 |         reshaped_E = np.reshape(truncated_E, (-1, ac_time))
374 | 
375 |         truncated_N = equilibrated_N[:int(np.floor(len(equilibrated_N) / ac_time) * ac_time)]
376 |         reshaped_N = np.reshape(truncated_N, (-1, ac_time))
377 | 
378 |     # Calculates the average on the equilibrated data
379 |     equilibrated_H_list = []
380 | 
381 |     for i in range(len(reshaped_E)):
382 |         # Check if all elements in reshaped_N are non-zero values
383 |         H = enthalpy_of_adsorption(reshaped_E[i], reshaped_N[i], temperature)
384 |         equilibrated_H_list.append(H)
385 | 
386 |     equilibrated_H = np.nanmean(np.array(equilibrated_H_list))
387 | 
388 |     # Calculate the uncorrelated Standard Error
389 |     if uncertainty in ['SD', 'uSD']:
390 | 
391 |         # Calculate the standard deviation on the uncorrelated chunks
392 |         eq_uncertainty = np.nanstd(equilibrated_H_list)
393 | 
394 |     elif uncertainty in ['SE', 'uSE']:
395 | 
396 |         # Calculate the standard error of the mean on the uncorrelated chunks
397 |         eq_uncertainty = np.nanstd(equilibrated_H_list) / np.sqrt(len(equilibrated_H_list))
398 | 
399 |     return equilibrated_H, eq_uncertainty
400 | 
401 | 
402 | def calc_autocorrelation_time(data):
403 |     """
404 |     Calculates the autocorrelation time of a equilibrated data.
405 |     Autocorrelation is expected to fall off exponentially at long times
406 | 
407 |     Parameters
408 |     ----------
409 |     data : array
410 |         Array of data to calculate the integrated autocorrelation time
411 |     Returns
412 |     -------
413 |     autocorrelation_time : float
414 |         Autocorrelation time
415 |     uncorrelated_samples : float
416 |         Number of uncorrelated samples
417 |     """
418 | 
419 |     # Check the consistency of the time_serie
420 |     is_all_finite, is_all_zero, data_array = check_consistency(data)
421 | 
422 |     if is_all_finite is False or is_all_zero is True:
423 |         return 0, 0
424 | 
425 |     try:
426 |         # Calculates the ACF using numpy
427 |         data_std = data_array - np.mean(data_array)
428 |         data_norm = np.sum(data_std ** 2)
429 | 
430 |         ACF = sp_corr(data_std, data_std, mode='full', method='fft') / data_norm
431 |         ACF = ACF[int(ACF.size/2):]
432 | 
433 |         # Filter ACF to remove values below 0.1 to improve the fit
434 |         idx = np.argmax(ACF <= 0.1)
435 |         ACF = ACF[:idx]
436 | 
437 |         # Fit a exponential decay to ACF
438 |         x = np.arange(len(ACF))
439 |         [tau], _ = curve_fit(exp_decay, x, ACF)
440 | 
441 |         # Calculate autocorrelation time as the half-live of ACF exponential decay
442 |         autocorrelation_time = np.ceil(tau*np.log(2))
443 | 
444 |     except (RuntimeError, ValueError) as Error:
445 |         # If the if the least-squares minimization fails, set the autocorrelation_time to 1.
446 |         # This can happen if the ACF data do not present a exponential decay
447 |         autocorrelation_time = 1
448 |         print('The least-squares minimization failed! Please check the data.')
449 |         print(Error)
450 | 
451 |     # Calculate the number of uncorrelated data
452 |     uncorrelated_samples = data_array.size / autocorrelation_time
453 | 
454 |     return autocorrelation_time, uncorrelated_samples
455 | 
456 | 
457 | def apply_ADF_test(equilibrated_data, verbosity=True):
458 |     """
459 |     Applies the Augmented Dickey-Fuller Test on the equilibrated data
460 | 
461 |     Parameters
462 |     ----------
463 |     equilibrated_data : array
464 |         Array with the equilibrated data
465 |     verbosity : bool
466 |         Boolean to control the output printing
467 |     Returns
468 |     -------
469 |     ADFTestResults : dict
470 |         Dictionary containg the ADF test results
471 |     output : str
472 |         String containg the output
473 |     """
474 |     adf, p, usedlag, n_obs, cv, icbest = adfuller(equilibrated_data, autolag='AIC')
475 | 
476 |     ADFTestResults = {'adf': adf,
477 |                       'pvalue': p,
478 |                       'usedlag': usedlag,
479 |                       'n_obs': n_obs,
480 |                       'critical_values': cv,
481 |                       'icbest': icbest}
482 | 
483 |     output = f"""
484 |                            Augmented Dickey-Fuller Test
485 | ==============================================================================
486 | Test statistic for observable: {adf}
487 | P-value for observable: {p}
488 | The number of lags used: {usedlag}
489 | The number of observations used for the ADF regression: {n_obs}
490 | Cutoff Metrics :
491 | """
492 |     for k, v in cv.items():
493 |         conf = 100 - int(k.rstrip('%'))
494 |         if v < adf:
495 |             output += f"{k:>4}: {v:9.6f} | The data is not stationary with {conf} % confidence\n"
496 |         else:
497 |             output += f"{k:>4}: {v:9.6f} | The data is stationary with {conf} % confidence\n"
498 | 
499 |     if verbosity:
500 |         print(output)
501 | 
502 |     return ADFTestResults, output
503 | 
504 | 
505 | def equilibrate(input_data,
506 |                 LLM=False,
507 |                 batch_size=1,
508 |                 ADF_test=True,
509 |                 uncertainty='uSD',
510 |                 print_results=True):
511 |     """
512 |     Wrap function to apply MSER to an input_data array.
513 | 
514 |     Parameters
515 |     ----------
516 |     input_data : array
517 |         Array with the original data
518 |     LLM : bool
519 |         Boolean to control usage of the LLM variation of MSER
520 |     batch_size : int
521 |         Size of the batch to take the average
522 |     ADF_test : bool
523 |         Boolean to control usage ADF test
524 |     uncertainty : str
525 |         String for selecting Standard Error (SE), Standard Deviation (SD), or its
526 |         uncorrelated versions uSD and uSE as the default uncertainty of the average.
527 |     print_results : bool
528 |         Boolean to control printing of the results
529 |     Returns
530 |     -------
531 |     results_dict : dict
532 |         Dictionary containg resunts of MSER
533 |     """
534 | 
535 |     # Check the consistency of the time_serie
536 |     is_all_finite, is_all_zero, array_data = check_consistency(input_data)
537 | 
538 |     # Returns NaN if any of the time_series data is not a finite number
539 |     if is_all_finite is False:
540 |         results_dict = {'MSE': np.nan,
541 |                         't0': np.nan,
542 |                         'average': np.nan,
543 |                         'uncertainty': np.nan,
544 |                         'equilibrated': np.nan,
545 |                         'ac_time': np.nan,
546 |                         'uncorr_samples': np.nan}
547 |         return results_dict
548 | 
549 |     # Returns zero if all the data in time_series is zero
550 |     if is_all_zero:
551 |         results_dict = {'MSE': np.zeros(len(array_data)),
552 |                         't0': 0,
553 |                         'average': 0,
554 |                         'uncertainty': 0,
555 |                         'equilibrated': np.zeros(len(array_data)),
556 |                         'ac_time': 0,
557 |                         'uncorr_samples': 0}
558 |         return results_dict
559 | 
560 |     # Check if the input parameters are what is expected
561 |     assert isinstance(LLM, bool), 'LLM should be True or False'
562 |     assert isinstance(batch_size, int), 'batch_size should be an int'
563 |     assert isinstance(ADF_test, bool), 'ADF_test should be True or False'
564 |     assert isinstance(print_results, bool), 'print_results should be True or False'
565 | 
566 |     # Check if the uncertainty is a valid option
567 |     if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']:
568 |         raise Exception(f"""{uncertainty} is not a valid option!
569 |             Only Standard Deviation (SD), Standard Error (SE), uncorrelated
570 |             Standard Deviation (uSD), and uncorrelated Standard Error (uSE)
571 |             are valid options.""")
572 | 
573 |     # Calculate the Marginal Standard Error curve
574 |     MSEm_curve = calculate_MSEm(array_data, batch_size=batch_size)
575 | 
576 |     if LLM is True:
577 |         # Apply the MSER-LLM to get the index of the start of equilibrated data
578 |         t0 = MSERm_LLM_index(MSEm_curve, batch_size=batch_size)
579 | 
580 |     if LLM is False:
581 |         # Apply the MSER to get the index of the start of equilibrated data
582 |         t0 = MSERm_index(MSEm_curve, batch_size=batch_size)
583 | 
584 |     # Check if t0 < 75% of the data
585 |     if t0 < 0.75 * len(array_data):
586 |         eq_status = 'Yes'
587 |     else:
588 |         eq_status = 'No. t0 > 75% of the data!'
589 |         print('Warning: t0 is too close to the end of the data!')
590 |         print('The results may not be reliable!')
591 | 
592 |     # Calculate autocorrelation time and the number of uncorrelated samples
593 |     equilibrated = array_data[t0:]
594 |     ac_time, uncorr_samples = calc_autocorrelation_time(equilibrated)
595 | 
596 |     # Calculates the average and standard deviation on the equilibrated data
597 |     average, avg_uncertainty = calc_equilibrated_average(array_data,
598 |                                                          t0,
599 |                                                          uncertainty,
600 |                                                          ac_time)
601 | 
602 |     # Create a dictionary with the results
603 |     results_dict = {'MSE': MSEm_curve,
604 |                     't0': t0,
605 |                     'average': average,
606 |                     'uncertainty': avg_uncertainty,
607 |                     'equilibrated': equilibrated,
608 |                     'ac_time': ac_time,
609 |                     'uncorr_samples': uncorr_samples}
610 | 
611 |     eq_ratio = 100 * (len(array_data) - t0) / len(array_data)
612 | 
613 |     if print_results:
614 |         print(f"""                            pyMSER Equilibration Results
615 | ==============================================================================
616 | Start of equilibrated data:          {t0} of {len(array_data)}
617 | Total equilibrated steps:            {len(array_data) - t0}  ({eq_ratio:.2f}%)
618 | Equilibrated:                        {eq_status}
619 | Average over equilibrated data:      {average:.4f} ± {avg_uncertainty:.4f}
620 | Number of uncorrelated samples:      {uncorr_samples:.1f}
621 | Autocorrelation time:                {ac_time:.1f}
622 | ==============================================================================""")
623 | 
624 |     if ADF_test:
625 |         # Apply the Augmented Dickey-Fuller Test on the equilibrated data
626 |         ADFTestResults, output_text = apply_ADF_test(equilibrated, verbosity=print_results)
627 |         results_dict.update(ADFTestResults)
628 | 
629 |     return results_dict
630 | 
631 | 
632 | def equilibrate_enthalpy(energy,
633 |                          number_of_molecules,
634 |                          temperature,
635 |                          LLM=False,
636 |                          batch_size=1,
637 |                          ADF_test=True,
638 |                          uncertainty='uSD',
639 |                          print_results=True):
640 |     """
641 |     Wrap function to apply MSER and calculate the equilibrated enthalpy of adsorption as
642 | 
643 |     H = <EN> - <E><N> / <N^2> - <N>^2 - RT
644 | 
645 |     adapted from J. Phys. Chem. 1993, 97, 51, 13742-13752.
646 | 
647 |     Please note that Heat of adsorption (Q_iso) = -Enthalpy of adsorption (H).
648 | 
649 |     The isosteric enthalpy of adsorption, H, is defined as the heat which is released
650 |     when an adsorptive binds to a surface. The enthalpy of adsorption (H) is a negative
651 |     number and the isosteric heat (Q_iso) of adsorption is a positive number.
652 |     For a deeper discussion see: Dalton Trans., 2020, 49, 10295.
653 | 
654 |     Parameters
655 |     ----------
656 |     energy : 1D array
657 |         List with the potential energy of the adsorbed phase for each MC cycle in units of Kelvin.
658 |     number_of_molecules : 1D array
659 |         List with the total number of molecules in the simulation box for each MC cycle.
660 |     temperature : float
661 |         Temperature of the simulation in Kelvin
662 |     LLM : bool
663 |         Boolean to control usage of the LLM variation of MSER
664 |     batch_size : int
665 |         Size of the batch to take the average
666 |     ADF_test : bool
667 |         Boolean to control usage ADF test
668 |     uncertainty : str
669 |         String for selecting Standard Error (SE), Standard Deviation (SD), or its
670 |         uncorrelated versions uSD and uSE as the default uncertainty of the average.
671 |     print_results : bool
672 |         Boolean to control printing of the results
673 | 
674 |     Returns
675 |     ----------
676 |     results_dict : dict
677 |         Dictionary containg resunts of MSER
678 | 
679 |     """
680 | 
681 |     # Check if energy and number_of_molecules are the same length
682 |     if len(energy) != len(number_of_molecules):
683 |         print('Energy and number_of_molecules arrays should have the same length!')
684 |         results_dict = {'MSE_E': np.nan,
685 |                         'MSE_N': np.nan,
686 |                         't0_E': np.nan,
687 |                         't0_N': np.nan,
688 |                         'average': np.nan,
689 |                         'uncertainty': np.nan,
690 |                         'equilibrated_E': np.nan,
691 |                         'equilibrated_N': np.nan,
692 |                         'ac_time_E': np.nan,
693 |                         'ac_time_N': np.nan,
694 |                         'uncorr_samples_E': np.nan,
695 |                         'uncorr_samples_N': np.nan}
696 |         return results_dict
697 | 
698 |     # Check the consistency of the energy values
699 |     is_all_finite_E, is_all_zero_E, array_data_E = check_consistency(energy)
700 | 
701 |     # Check the consistency of the number of molecules
702 |     is_all_finite_N, is_all_zero_N, array_data_N = check_consistency(number_of_molecules)
703 | 
704 |     # Returns NaN if any of the time_series data is not a finite number
705 |     if all([is_all_finite_E, is_all_finite_N]) is False:
706 |         results_dict = {'MSE_E': np.nan,
707 |                         'MSE_N': np.nan,
708 |                         't0_E': np.nan,
709 |                         't0_N': np.nan,
710 |                         'average': np.nan,
711 |                         'uncertainty': np.nan,
712 |                         'equilibrated_E': np.nan,
713 |                         'equilibrated_N': np.nan,
714 |                         'ac_time_E': np.nan,
715 |                         'ac_time_N': np.nan,
716 |                         'uncorr_samples_E': np.nan,
717 |                         'uncorr_samples_N': np.nan}
718 |         return results_dict
719 | 
720 |     # Returns zero if all the data in time_series is zero
721 |     if all([is_all_zero_E, is_all_zero_N]) is True:
722 |         results_dict = {'MSE_E': np.zeros(len(number_of_molecules)),
723 |                         'MSE_N': np.zeros(len(number_of_molecules)),
724 |                         't0_E': 0,
725 |                         't0_N': 0,
726 |                         'average': 0,
727 |                         'uncertainty': 0,
728 |                         'equilibrated_E': np.zeros(len(number_of_molecules)),
729 |                         'equilibrated_N': np.zeros(len(number_of_molecules)),
730 |                         'ac_time_E': 0,
731 |                         'ac_time_N': 0,
732 |                         'uncorr_samples_E': 0,
733 |                         'uncorr_samples_N': 0}
734 | 
735 |         return results_dict
736 | 
737 |     # Check if the input parameters are what is expected
738 |     assert isinstance(temperature, float), 'Temperature should be a float'
739 |     assert isinstance(LLM, bool), 'LLM should be True or False'
740 |     assert isinstance(batch_size, int), 'batch_size should be an int'
741 |     assert isinstance(ADF_test, bool), 'ADF_test should be True or False'
742 |     assert isinstance(print_results, bool), 'print_results should be True or False'
743 | 
744 |     # Check if the uncertainty is a valid option
745 |     if uncertainty not in ['SD', 'SE', 'uSD', 'uSE']:
746 |         raise Exception(f"""{uncertainty} is not a valid option!
747 |             Only Standard Deviation (SD), Standard Error (SE), uncorrelated
748 |             Standard Deviation (uSD), and uncorrelated Standard Error (uSE)
749 |             are valid options.""")
750 | 
751 |     # Calculate the Marginal Standard Error for the energy
752 |     MSEm_E = calculate_MSEm(energy, batch_size=batch_size)
753 | 
754 |     if LLM is False:
755 |         # Apply the MSER to get the index of the start of equilibrated data
756 |         t0_E = MSERm_index(MSEm_E, batch_size=batch_size)
757 | 
758 |     if LLM is True:
759 |         # Apply the MSER-LLM to get the index of the start of equilibrated data
760 |         t0_E = MSERm_LLM_index(MSEm_E, batch_size=batch_size)
761 | 
762 |     # Calculate autocorrelation time and the number of uncorrelated samples
763 |     equilibrated_E = energy[t0_E:]
764 |     ac_time_E, uncorr_samples_E = calc_autocorrelation_time(equilibrated_E)
765 | 
766 |     # Calculate the Marginal Standard Error for the number of molecules
767 |     MSEm_N = calculate_MSEm(number_of_molecules, batch_size=batch_size)
768 | 
769 |     if LLM is False:
770 |         # Apply the MSER to get the index of the start of equilibrated data
771 |         t0_N = MSERm_index(MSEm_N, batch_size=batch_size)
772 | 
773 |     if LLM is True:
774 |         # Apply the MSER-LLM to get the index of the start of equilibrated data
775 |         t0_N = MSERm_LLM_index(MSEm_N, batch_size=batch_size)
776 | 
777 |     # Check if t0 < 75% of the data
778 |     if t0_E < 0.75 * len(energy):
779 |         eq_status = 'Yes'
780 |     else:
781 |         eq_status = 'No. t0 > 75% of the data!'
782 |         print('Warning: t0 is too close to the end of the data!')
783 |         print('The results may not be reliable!')
784 | 
785 |     # Calculate autocorrelation time and the number of uncorrelated samples
786 |     equilibrated_N = number_of_molecules[t0_N:]
787 |     ac_time_N, uncorr_samples_N = calc_autocorrelation_time(equilibrated_N)
788 | 
789 |     # Calculates the enthalpy of adsorption and its uncertainty
790 |     average, avg_uncertainty = calc_equilibrated_enthalpy(energy,
791 |                                                           number_of_molecules,
792 |                                                           temperature,
793 |                                                           eq_index=t0_E,
794 |                                                           uncertainty=uncertainty,
795 |                                                           ac_time=ac_time_E)
796 | 
797 |     # Create a dictionary with the results
798 |     results_dict = {'MSE_E': MSEm_E,
799 |                     'MSE_N': MSEm_N,
800 |                     't0_E': t0_E,
801 |                     't0_N': t0_N,
802 |                     'average': average,
803 |                     'uncertainty': avg_uncertainty,
804 |                     'equilibrated_E': equilibrated_E,
805 |                     'equilibrated_N': equilibrated_N,
806 |                     'ac_time_E': ac_time_E,
807 |                     'ac_time_N': ac_time_N,
808 |                     'uncorr_samples_E': uncorr_samples_E,
809 |                     'uncorr_samples_N': uncorr_samples_N}
810 | 
811 |     eq_ratio = 100 * (len(energy) - t0_E) / len(energy)
812 | 
813 |     if print_results:
814 |         print(f"""                            pyMSER Equilibration Results
815 | ==============================================================================
816 | Start of equilibrated data:          {t0_E} of {len(energy)}
817 | Total equilibrated steps:            {len(energy) - t0_E}  ({eq_ratio:.2f}%)
818 | Equilibrated:                        {eq_status}
819 | Average over equilibrated data:      {average:.4f} ± {avg_uncertainty:.4f} kJ/mol
820 | Number of uncorrelated samples:      {uncorr_samples_E:.1f}
821 | Autocorrelation time:                {ac_time_E:.1f}
822 | ==============================================================================""")
823 | 
824 |     if ADF_test:
825 |         # Apply the Augmented Dickey-Fuller Test on the equilibrated data
826 |         ADFTestResults, output_text = apply_ADF_test(equilibrated_E, verbosity=print_results)
827 |         results_dict.update(ADFTestResults)
828 | 
829 |     return results_dict
830 | 


--------------------------------------------------------------------------------
/version.py:
--------------------------------------------------------------------------------
 1 | import codecs
 2 | import os.path
 3 | 
 4 | 
 5 | def read(rel_path):
 6 |     here = os.path.abspath(os.path.dirname(__file__))
 7 |     with codecs.open(os.path.join(here, rel_path), 'r') as fp:
 8 |         return fp.read()
 9 | 
10 | 
11 | def get_version(rel_path):
12 |     for line in read(rel_path).splitlines():
13 |         if line.startswith('    version'):
14 |             delim = '"' if '"' in line else "'"
15 |             return line.split(delim)[1]
16 |     else:
17 |         raise RuntimeError("Unable to find version string.")
18 | 
19 | 
20 | print(get_version("setup.py"))
21 | 


--------------------------------------------------------------------------------