├── .git_archival.txt
├── .gitattributes
├── .github
└── workflows
│ └── codeql.yml
├── .gitignore
├── COPYING
├── COPYING.LESSER
├── README.md
├── bin
├── c2anmr
├── censo
├── nmrplot
└── uvvisplot
├── environment.yaml
├── pyproject.toml
├── src
└── censo
│ ├── __init__.py
│ ├── __main__.py
│ ├── assets
│ ├── basis_sets.json
│ ├── censo_dfa_settings.json
│ ├── censo_nmr_ref.json
│ ├── censo_solvents_db.json
│ ├── dfa.bu
│ ├── hexadecane_25.pot
│ ├── octanol_25.pot
│ ├── old_solvents_db.json
│ ├── solvents.json
│ ├── solvents_dc.json
│ ├── supporting_info.json
│ └── wet-octanol_25.pot
│ ├── cli
│ ├── __init__.py
│ ├── cml_parser.py
│ └── interface.py
│ ├── configuration.py
│ ├── datastructure.py
│ ├── ensembledata.py
│ ├── ensembleopt
│ ├── __init__.py
│ ├── optimization.py
│ ├── optimizer.py
│ ├── prescreening.py
│ ├── refinement.py
│ └── screening.py
│ ├── logging.py
│ ├── orca_processor.py
│ ├── parallel.py
│ ├── params.py
│ ├── part.py
│ ├── properties
│ ├── __init__.py
│ ├── nmr.py
│ ├── property_calculator.py
│ └── uvvis.py
│ ├── qm_processor.py
│ ├── tm_processor.py
│ └── utilities.py
└── test
├── __init__.py
├── conftest.py
├── fixtures
├── crest_conformers.xyz
├── inp
├── inp2
├── test.template
└── testinp
├── test_cli
└── test_interface.py
└── test_ensembledata.py
/.git_archival.txt:
--------------------------------------------------------------------------------
1 | node: 3326db4579d1f630c28fce17e3b3e2d66070b8bd
2 | node-date: 2025-05-19T11:34:57+02:00
3 | dscribe-name: v2.1.3-6-g3326db45e
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | .git_archival.txt export-subst
2 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 | schedule:
9 | - cron: "42 13 * * 6"
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ python ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v3
28 |
29 | - name: Initialize CodeQL
30 | uses: github/codeql-action/init@v2
31 | with:
32 | languages: ${{ matrix.language }}
33 | queries: +security-and-quality
34 |
35 | - name: Autobuild
36 | uses: github/codeql-action/autobuild@v2
37 |
38 | - name: Perform CodeQL Analysis
39 | uses: github/codeql-action/analyze@v2
40 | with:
41 | category: "/language:${{ matrix.language }}"
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/vscode
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=vscode
3 |
4 | ### vscode ###
5 | .vscode/*
6 | !.vscode/settings.json
7 | !.vscode/tasks.json
8 | !.vscode/launch.json
9 | !.vscode/extensions.json
10 | *.code-workspace
11 |
12 | # End of https://www.toptal.com/developers/gitignore/api/vscode
13 |
14 | ###pycache##
15 | __pycache__/
16 |
17 |
18 | # packaging
19 | *.egg-info/
20 |
21 | # venv
22 | venv/*
23 |
24 | # pycharm
25 | .idea/*
26 |
--------------------------------------------------------------------------------
/COPYING.LESSER:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CENSO - Commandline ENergetic SOrting of Conformer-Rotamer Ensembles
2 | 
3 | CENSO is a Python package meant to automate refinement of Conformer-Rotamer ensembles on DFT level, as well as calculation of ensemble properties, e.g. NMR parameters.
4 | It can be used from the command line as well as using custom wrapper scripts.
5 |
6 | ## NEW: CENSO 2.0
7 | This is the updated version of the former CENSO 1.3 program. New features include the possibility to use CENSO as a package from within Python, template files, json outputs, and more! For more information about the use and the capabilities of CENSO 2.0 visit the documentation [here](https://xtb-docs.readthedocs.io/en/latest/CENSO_docs/censo.html).
8 |
9 | ## CENSO 2.1.3
10 | In the most recent version of CENSO 2.1, the code was cleaned up and many bug fixes were implemented. When it comes to functionality, the multitemp mode and constraints for the geometry optimization were removed, since they did not work (reliably). Also, a new system for solvent lookup was implemented, which is also going to be used for CENSO 2.2, and completes all solvent name mappings. This way, all solvents should be callable using a multitude of different aliases (if they are available for the respective solvation model). Also, printouts at the end of the UV/Vis and NMR calculations were added.
11 |
12 | For usage via the CLI, it is now no longer necessary to provide `--maxcores` or `-i`, since default values are now defined for both cases. It is now also possible to change the minimum number of threads when calling external programs using `--omp-min`. The same can be achieved by modifying `Config.OMPMIN` in Python.
13 |
14 | # Installation
15 | Can be installed using `pip` by running
16 |
17 | pip install .
18 |
19 | If you want to install and run `CENSO` without `pip` you can add the `CENSO/src` directory to your `$PYTHONPATH` and add `CENSO/bin` to your `$PATH`.
20 |
21 | # Usage
22 | After installing CENSO via `pip`, it can be called using either
23 | ```
24 | python -m censo
25 | ```
26 | or
27 | ```
28 | censo
29 | ```
30 | as now the CLI is implemented as entry point. As of version 2.1.3, it is also no longer necessary to call CENSO using `--maxcores` or `-i`, since both have default values now.
31 |
32 | For information about command line options use the `-h` option.
33 |
34 | If you chose not to install it using `pip` and you added the `bin` directory to your `$PATH`, you can also just invoke `censo`.
35 |
36 | CENSO can also be used as a package. A basic setup for a CENSO run in a Python file could look like this:
37 | ```python
38 | from censo.ensembledata import EnsembleData
39 | from censo.configuration import configure
40 | from censo.ensembleopt import Prescreening, Screening, Optimization
41 | from censo.properties import NMR
42 | from censo.params import Config
43 |
44 | # CENSO will put all files in the current working directory (os.getcwd())
45 | input_path = "rel/path/to/your/inputfile" # path relative to the working directory
46 | ensemble = EnsembleData(input_file=input_path)
47 | # the above can be used if you molecule is neutral and closed shell, otherwise
48 | # it is necessary to proceed with e.g.
49 | # ensemble = EnsembleData()
50 | # ensemble.read_input(input_path, charge=-1, unpaired=1)
51 |
52 | # If the user wants to use a specific rcfile:
53 | configure("/abs/path/to/rcfile")
54 |
55 | # Get the number of available cpu cores on this machine
56 | # This is also the default value that CENSO uses
57 | # This number can also be set to any other integer value and automatically checked for validity
58 | Config.NCORES = os.cpu_count()
59 |
60 | # Another possibly important setting is OMP, which will get used if you disabled the automatic
61 | # load balancing in the settings
62 | Config.OMP = 4
63 |
64 | # The user can also choose to change specific settings of the parts
65 | # Please take note of the following:
66 | # - the settings of certain parts, e.g. Prescreening are changed using set_setting(name, value)
67 | # - general settings are changed by using set_general_setting(name, value) (it does not matter which part you call it from)
68 | # - the values you want to set must comply with limits and the type of the setting
69 | Prescreening.set_setting("threshold", 5.0)
70 | Prescreening.set_general_setting("solvent", "dmso")
71 |
72 | # It is also possible to use a dict to set multiple values in one step
73 | settings = {
74 | "threshold": 3.5,
75 | "func": "pbeh-3c",
76 | "implicit": True,
77 | }
78 | Screening.set_settings(settings, complete=False)
79 | # the complete kwarg tells the method whether to set the undefined settings using defaults or leave them on their current value
80 |
81 |
82 | # Setup and run all the parts that the user wants to run
83 | # Running the parts in order here, while it is also possible to use a custom order or run some parts multiple times
84 | # Running a part will return an instance of the respective type
85 | # References to the resulting part instances will be appended to a list in the EnsembleData object (ensemble.results)
86 | # Note though, that currently this will lead to results being overwritten in your working directory
87 | # (you could circumvent this by moving/renaming the folders)
88 | results, timings = zip(*[part.run(ensemble) for part in [Prescreening, Screening, Optimization, NMR]])
89 |
90 | # You access the results using the ensemble object
91 | # You can also find all the results the .json output files
92 | print(ensemble.results[0].data["results"]["CONF5"]["sp"]["energy"])
93 | ```
94 |
95 | # License
96 |
97 | ``CENSO`` is free software: you can redistribute it and/or modify it under
98 | the terms of the GNU Lesser General Public License as published by
99 | the Free Software Foundation, either version 3 of the License, or
100 | (at your option) any later version.
101 |
102 | ``CENSO`` is distributed in the hope that it will be useful,
103 | but without any warranty; without even the implied warranty of
104 | merchantability or fitness for a particular purpose. See the
105 | GNU Lesser General Public License for more details.
106 |
--------------------------------------------------------------------------------
/bin/c2anmr:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import shutil
3 | from pathlib import Path
4 | import sys
5 |
6 | def main():
7 | # Define source and destination directories
8 | src_dir = Path("4_NMR")
9 | dest_dir = Path("anmr")
10 |
11 | # Create destination directory
12 | dest_dir.mkdir(parents=True, exist_ok=True)
13 |
14 | # Copy all files matching anmr_* into dest_dir
15 | for file in Path.cwd().glob("anmr_*"):
16 | if file.is_file():
17 | shutil.copy(file, dest_dir / file.name)
18 |
19 | # Loop through CONF* subdirectories in src_dir
20 | for conf_dir in src_dir.glob("CONF*"):
21 | if not conf_dir.is_dir():
22 | continue
23 |
24 | # Build target NMR directory under dest_dir/CONF#/NMR
25 | new_nmr_dir = dest_dir / conf_dir.name / "NMR"
26 | new_nmr_dir.mkdir(parents=True, exist_ok=True)
27 |
28 | # Copy the two specific files if they exist
29 | for fname in ("nmrprop.dat", "coord"):
30 | src_file = conf_dir / fname
31 | if src_file.exists():
32 | shutil.copy(src_file, new_nmr_dir / fname)
33 | else:
34 | print(f"Warning: {src_file} not found", file=sys.stderr)
35 |
36 | if __name__ == "__main__":
37 | main()
38 |
39 |
--------------------------------------------------------------------------------
/bin/censo:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import sys
3 | import os
4 |
5 | parentdir = os.path.split(__file__)[0]
6 | sys.path.insert(0, f"{os.path.join(parentdir, '..', 'src')}")
7 | from censo.cli.interface import entry_point
8 |
9 | entry_point()
10 |
--------------------------------------------------------------------------------
/bin/uvvisplot:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | #
4 | # Copyright (C) 2024 Leopold M. Seidler
5 | #
6 | # UVVISPLOT is free software: you can redistribute it and/or modify it under
7 | # the terms of the GNU Lesser General Public License as published by
8 | # the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # UVVISPLOT is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with UVVISPLOT. If not, see .
18 |
19 | """
20 | Created on Mar 17, 2024
21 | last updated on 17-March-2024
22 | @author: lmseidler
23 | """
24 | import matplotlib.pyplot as plt
25 | import os
26 | import argparse
27 | import json
28 | import numpy as np
29 | import pandas as pd
30 |
31 | PLANCK = 6.62607015e-34
32 | C = 2.998e8
33 | COULOMB = 1.602e-19
34 |
35 |
36 | descr = """
37 | __________________________________________________
38 | | |
39 | | UVVISPLOT |
40 | | Plotting of ensemble UV/Vis spectra |
41 | | University of Bonn, MCTC |
42 | | March 2024 |
43 | | v 1.0.0 |
44 | | L. M. Seidler |
45 | |__________________________________________________|
46 | """
47 |
48 |
49 | def get_args():
50 | parser = argparse.ArgumentParser(
51 | description="",
52 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
53 | usage=argparse.SUPPRESS,
54 | ) # argparse.RawDescriptionHelpFormatter) #,
55 | parser.add_argument(
56 | "-mode",
57 | dest="mode",
58 | action="store",
59 | required=False,
60 | default="wavenumber",
61 | type=str,
62 | choices=["wavenumber", "energy", "wavelength"],
63 | help="Set the unit of the x-axis. Can be wavenumber [cm-1], energy [eV] or wavelength [nm]."
64 | )
65 | parser.add_argument(
66 | "-start",
67 | dest="start",
68 | action="store",
69 | required=False,
70 | type=float,
71 | help="Start plotting from ''. Default values: 300nm/1.8eV/14000cm-1.",
72 | )
73 | parser.add_argument(
74 | "-end",
75 | dest="end",
76 | action="store",
77 | required=False,
78 | type=float,
79 | help="End plotting at ''. '' must be larger than ''. Default values: 700nm/4.2eV/33000cm-1.",
80 | )
81 | parser.add_argument(
82 | "-title",
83 | "--title",
84 | dest="title",
85 | action="store",
86 | required=False,
87 | default="UVVis-PLOT",
88 | type=str,
89 | help="Set title of entire plot. If no title is required use "
90 | "'<--title ''>'.",
91 | )
92 | parser.add_argument(
93 | "-lw",
94 | "--linewidth",
95 | dest="lw",
96 | action="store",
97 | required=False,
98 | default=1.6131e3,
99 | type=float,
100 | help="Set linewidth in cm-1.",
101 | )
102 | parser.add_argument(
103 | "-i",
104 | "--inp",
105 | dest="inp",
106 | action="store",
107 | required=True,
108 | help="Provide input file.",
109 | )
110 | parser.add_argument(
111 | "-fontsize",
112 | "--fontsize",
113 | dest="fontsize",
114 | action="store",
115 | required=False,
116 | default=15,
117 | type=float,
118 | help="Set fontsize for entire plot.",
119 | )
120 | parser.add_argument(
121 | "-o",
122 | "--out",
123 | dest="out",
124 | action="store",
125 | required=False,
126 | default="nmrplot",
127 | help="Provide name of the output file (including ending).",
128 | )
129 | args = parser.parse_args()
130 | return args
131 |
132 |
133 | def read_data(inp):
134 | cwd = os.getcwd()
135 | with open(os.path.join(cwd, inp), "r") as f:
136 | data = json.load(f)
137 |
138 | return data
139 |
140 |
141 | def plot(data, args):
142 | # Get plotting mode
143 | mode = args.mode
144 |
145 | # Select start value
146 | if args.start is not None:
147 | start = args.start
148 | else:
149 | defaults = {
150 | "wavelength": 300,
151 | "wavenumber": 14000,
152 | "energy": 1.8
153 | }
154 | start = defaults[mode]
155 |
156 | # Select end value
157 | if args.end is not None:
158 | end = args.end
159 | else:
160 | defaults = {
161 | "wavelength": 700,
162 | "wavenumber": 33000,
163 | "energy": 4.2
164 | }
165 | end = defaults[mode]
166 |
167 | assert end > start
168 | xrange = np.linspace(start, end, 10000)
169 |
170 | # Dump single contributions to csv file
171 | confs = set([d[2] for d in data])
172 | exc_number = {conf: 0 for conf in confs}
173 | contributions = {}
174 |
175 | for exc in data:
176 | yrange = gaussian_signal(xrange, exc[0], exc[1], args.lw, mode=mode)
177 | contributions[f"{exc[2]}_S{exc_number[exc[2]]}"] = yrange
178 |
179 | exc_number[exc[2]] += 1
180 |
181 | cwd = os.getcwd()
182 | contributions = pd.DataFrame.from_dict(contributions)
183 | contributions.to_csv(os.path.join(cwd, "contributions.csv"))
184 | print("All contributions written to contributions.csv.")
185 |
186 | # Plot the whole spectrum
187 | fig, ax = plt.subplots()
188 | yrange = contributions.sum(axis=1)
189 | ax.plot(xrange, yrange)
190 | ax.set_title(args.title)
191 | labels = {
192 | "wavelength": "$\mathrm{nm}$",
193 | "wavenumber": "$\mathrm{cm-1}$",
194 | "energy": "$\mathrm{eV}$",
195 | }
196 | ax.set_xlabel(f"{args.mode} [{labels[args.mode]}]")
197 | ax.set_ylabel("$\epsilon$ [a. u.]")
198 |
199 | return fig
200 |
201 |
202 | def gaussian_signal(xrange, center_wl, eps_max, lw, mode="wavelength"):
203 | # E = h ν = h c/λ
204 | # <=> 1/λ = E / (h c)
205 | # 1 nm = 1e-7 cm
206 | # 1 cm-1 = 1e7 nm-1
207 | if mode == "wavelength":
208 | return eps_max * np.exp(- ((1 / xrange - 1 / center_wl) / (lw * 1e7))**2)
209 | elif mode == "wavenumber":
210 | return eps_max * np.exp(- ((xrange - 1 / center_wl * 1e7) / lw)**2)
211 | elif mode == "energy":
212 | return eps_max * np.exp(- ((xrange * COULOMB / (PLANCK * C) - 1 / center_wl * 1e9) / (lw * 1e2))**2)
213 |
214 |
215 | def save_plot(fig, out):
216 | cwd = os.getcwd()
217 | fig.savefig(os.path.join(cwd, out), format="pdf")
218 |
219 |
220 | def main():
221 | print(descr)
222 |
223 | # Parse cml args
224 | args = get_args()
225 |
226 | # Read data
227 | data = read_data(args.inp)
228 |
229 | # Plot data
230 | figure = plot(data, args)
231 |
232 | # Save plot
233 | save_plot(figure, args.out)
234 |
235 |
236 | if __name__ == "__main__":
237 | main()
238 |
--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
1 | name: censo
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | - python=3.10
7 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "setuptools_scm[toml]"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "censo"
7 | dynamic = ["version", "readme"]
8 | requires-python = ">= 3.10"
9 |
10 | [project.urls]
11 | homepage = "https://github.com/grimme-lab/CENSO"
12 | documentation = "https://xtb-docs.readthedocs.io/en/latest/CENSO_docs/censo.html"
13 |
14 | [project.optional-dependencies]
15 | scripts = [
16 | "numpy",
17 | "matplotlib",
18 | "pandas"
19 | ]
20 |
21 | [project.scripts]
22 | censo = "censo.cli.interface:entry_point"
23 | c2anmr = "bin.c2anmr:main"
24 | uvvisplot = "bin.uvvisplot:main"
25 | nmrplot = "bin.nmrplot:main"
26 |
27 | [tool.setuptools.packages.find]
28 | where = ["src", "."]
29 | include = ["censo*", "bin"]
30 |
31 | [tool.setuptools.dynamic]
32 | readme = {file = "README.md"}
33 |
34 | [tool.setuptools_scm]
35 | version_file = "src/censo/__version__.py"
36 |
--------------------------------------------------------------------------------
/src/censo/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration import configure
2 | from .params import DESCR
3 | from .__version__ import __version__
4 |
5 | print(DESCR)
6 | configure()
7 |
8 | from .cli import interface, cml_parser
9 | from . import (
10 | configuration,
11 | ensembledata,
12 | datastructure,
13 | orca_processor,
14 | parallel,
15 | part,
16 | qm_processor,
17 | utilities,
18 | ensembleopt,
19 | properties,
20 | )
21 |
--------------------------------------------------------------------------------
/src/censo/__main__.py:
--------------------------------------------------------------------------------
1 | from censo.cli.interface import entry_point
2 |
3 | if __name__ == "__main__":
4 | entry_point()
5 |
--------------------------------------------------------------------------------
/src/censo/assets/basis_sets.json:
--------------------------------------------------------------------------------
1 | [
2 | "SVP",
3 | "SV(P)",
4 | "TZVP",
5 | "TZVPP",
6 | "QZVP",
7 | "QZVPP",
8 | "def2-SV(P)",
9 | "def2-mSVP",
10 | "def2-SVP",
11 | "def2-TZVP",
12 | "def2-TZVPP",
13 | "def2-mTZVP",
14 | "def2-mTZVPP",
15 | "def2-TZVPD",
16 | "def2-SVPD",
17 | "def-SVP",
18 | "def-SV(P)",
19 | "def2-QZVP",
20 | "DZ",
21 | "QZV",
22 | "cc-pVDZ",
23 | "cc-pVTZ",
24 | "cc-pVQZ",
25 | "cc-pV5Z",
26 | "aug-cc-pVDZ",
27 | "aug-cc-pVTZ",
28 | "aug-cc-pVQZ",
29 | "aug-cc-pV5Z",
30 | "def2-QZVPP",
31 | "minix",
32 | "pcJ-0",
33 | "pcJ-1",
34 | "pcJ-2",
35 | "pcSseg-0",
36 | "pcSseg-1",
37 | "pcSseg-2",
38 | "pcSseg-3",
39 | "x2c-SVPall-s",
40 | "x2c-TZVPall-s",
41 | "def2-TZVP(-f)",
42 | "def2-QZVP(-gf)",
43 | "def2-TZVPD(-f)"
44 | ]
--------------------------------------------------------------------------------
/src/censo/assets/censo_dfa_settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "composite_method_basis": {
3 | "pbeh-3c": "def2-mSVP",
4 | "b97-3c": "def2-mTZVP",
5 | "hf-3c": "minix",
6 | "r2scan-3c": "def2-mTZVPP"
7 | },
8 | "relay_functionals": {
9 | "pbe": "pbe-d4",
10 | "tpss": "tpss-d4",
11 | "b97-d": "b97-d3(0)",
12 | "kt1": "kt1-novdw",
13 | "kt2": "kt2-novdw",
14 | "pbe0": "pbe0-d4",
15 | "pw6b95": "pw6b95-d4",
16 | "b3lyp": "b3lyp-d4",
17 | "b3-lyp": "b3lyp-d4",
18 | "dsd-blyp": "dsd-blyp-d3"
19 | },
20 | "functionals": {
21 | "dummy": {
22 | "tm": null,
23 | "orca": "dummy",
24 | "disp": "dummy",
25 | "type": "dummy"
26 | },
27 | "pbeh-3c": {
28 | "tm": "pbeh-3c",
29 | "orca": "pbeh-3c",
30 | "disp": "composite",
31 | "type": "composite_hybrid"
32 | },
33 | "b97-3c": {
34 | "tm": "b97-3c",
35 | "orca": "b97-3c",
36 | "disp": "composite",
37 | "type": "composite_gga"
38 | },
39 | "r2scan-3c": {
40 | "tm": "r2scan-3c",
41 | "orca": "r2scan-3c",
42 | "disp": "composite",
43 | "type": "composite_mgga"
44 | },
45 | "r2scan-novdw": {
46 | "tm": "r2scan",
47 | "orca": "r2scan",
48 | "disp": "novdw",
49 | "type": "mgga"
50 | },
51 | "r2scan-d3": {
52 | "tm": "r2scan",
53 | "orca": "r2scan",
54 | "disp": "d3bj",
55 | "type": "mgga"
56 | },
57 | "r2scan-d3(0)": {
58 | "tm": "r2scan",
59 | "orca": "r2scan",
60 | "disp": "d3(0)",
61 | "type": "mgga"
62 | },
63 | "r2scan-d4": {
64 | "tm": "r2scan",
65 | "orca": "r2scan",
66 | "disp": "d4",
67 | "type": "mgga"
68 | },
69 | "pbe-novdw": {
70 | "tm": "pbe",
71 | "orca": "pbe",
72 | "disp": "novdw",
73 | "type": "gga"
74 | },
75 | "pbe-d3": {
76 | "tm": "pbe",
77 | "orca": "pbe",
78 | "disp": "d3bj",
79 | "type": "gga"
80 | },
81 | "pbe-d3(0)": {
82 | "tm": "pbe",
83 | "orca": "pbe",
84 | "disp": "d3(0)",
85 | "type": "gga"
86 | },
87 | "pbe-d4": {
88 | "tm": "pbe",
89 | "orca": "pbe",
90 | "disp": "d4",
91 | "type": "gga"
92 | },
93 | "pbe-nl": {
94 | "tm": "pbe",
95 | "orca": null,
96 | "disp": "nl",
97 | "type": "gga"
98 | },
99 | "tpss-novdw": {
100 | "tm": "tpss",
101 | "orca": "tpss",
102 | "disp": "novdw",
103 | "type": "mgga"
104 | },
105 | "tpss-d3": {
106 | "tm": "tpss",
107 | "orca": "tpss",
108 | "disp": "d3bj",
109 | "type": "mgga"
110 | },
111 | "tpss-d3(0)": {
112 | "tm": "tpss",
113 | "orca": "tpss",
114 | "disp": "d3(0)",
115 | "type": "mgga"
116 | },
117 | "tpss-d4": {
118 | "tm": "tpss",
119 | "orca": "tpss",
120 | "disp": "d4",
121 | "type": "mgga"
122 | },
123 | "tpss-nl": {
124 | "tm": "tpss",
125 | "orca": null,
126 | "disp": "nl",
127 | "type": "mgga"
128 | },
129 | "revtpss-novdw": {
130 | "tm": "revtpss",
131 | "orca": "revTPSS",
132 | "disp": "novdw",
133 | "type": "mgga"
134 | },
135 | "tpssh-novdw": {
136 | "tm": null,
137 | "orca": "tpssh",
138 | "disp": "novdw",
139 | "type": "global_hybrid"
140 | },
141 | "tpssh-d3": {
142 | "tm": null,
143 | "orca": "tpssh",
144 | "disp": "d3",
145 | "type": "global_hybrid"
146 | },
147 | "tpssh-d3(0)": {
148 | "tm": null,
149 | "orca": "tpssh",
150 | "disp": "d3(0)",
151 | "type": "global_hybrid"
152 | },
153 | "tpssh-d4": {
154 | "tm": null,
155 | "orca": "tpssh",
156 | "disp": "d4",
157 | "type": "global_hybrid"
158 | },
159 | "b97-d3": {
160 | "tm": "b97-d",
161 | "orca": "b97-d3",
162 | "disp": "included",
163 | "type": "gga"
164 | },
165 | "b97-d4": {
166 | "tm": null,
167 | "orca": "b97",
168 | "disp": "d4",
169 | "type": "gga"
170 | },
171 | "kt1-novdw": {
172 | "tm": "kt1",
173 | "orca": null,
174 | "disp": "novdw",
175 | "type": "gga"
176 | },
177 | "kt2-novdw": {
178 | "tm": "kt2",
179 | "orca": "kt2",
180 | "disp": "novdw",
181 | "type": "gga"
182 | },
183 | "pbe0-novdw": {
184 | "tm": "pbe0",
185 | "orca": "pbe0",
186 | "disp": "novdw",
187 | "type": "global_hybrid"
188 | },
189 | "pbe0-d3": {
190 | "tm": "pbe0",
191 | "orca": "pbe0",
192 | "disp": "d3bj",
193 | "type": "global_hybrid"
194 | },
195 | "pbe0-d3(0)": {
196 | "tm": "pbe0",
197 | "orca": "pbe0",
198 | "disp": "d3(0)",
199 | "type": "global_hybrid"
200 | },
201 | "pbe0-d4": {
202 | "tm": "pbe0",
203 | "orca": "pbe0",
204 | "disp": "d4",
205 | "type": "global_hybrid"
206 | },
207 | "pbe0-nl": {
208 | "tm": "pbe0",
209 | "orca": null,
210 | "disp": "nl",
211 | "type": "global_hybrid"
212 | },
213 | "pw6b95-novdw": {
214 | "tm": "pw6b95",
215 | "orca": "pw6b95",
216 | "disp": "novdw",
217 | "type": "global_hybrid"
218 | },
219 | "pw6b95-d3": {
220 | "tm": "pw6b95",
221 | "orca": "pw6b95",
222 | "disp": "d3bj",
223 | "type": "global_hybrid"
224 | },
225 | "pw6b95-d3(0)": {
226 | "tm": "pw6b95",
227 | "orca": "pw6b95",
228 | "disp": "d3(0)",
229 | "type": "global_hybrid"
230 | },
231 | "pw6b95-d4": {
232 | "tm": "pw6b95",
233 | "orca": "pw6b95",
234 | "disp": "d4",
235 | "type": "global_hybrid"
236 | },
237 | "b3lyp-novdw": {
238 | "tm": "b3-lyp",
239 | "orca": "b3lyp",
240 | "disp": "novdw",
241 | "type": "global_hybrid"
242 | },
243 | "b3lyp-d3": {
244 | "tm": "b3-lyp",
245 | "orca": "b3lyp",
246 | "disp": "d3bj",
247 | "type": "global_hybrid"
248 | },
249 | "b3lyp-d3(0)": {
250 | "tm": "b3-lyp",
251 | "orca": "b3lyp",
252 | "disp": "d3(0)",
253 | "type": "global_hybrid"
254 | },
255 | "b3lyp-d4": {
256 | "tm": "b3-lyp",
257 | "orca": "b3lyp",
258 | "disp": "d4",
259 | "type": "global_hybrid"
260 | },
261 | "b3lyp-nl": {
262 | "tm": "b3-lyp",
263 | "orca": "b3lyp",
264 | "disp": "nl",
265 | "type": "global_hybrid"
266 | },
267 | "wb97x-v": {
268 | "tm": "wb97x-v",
269 | "orca": "wb97x-v",
270 | "disp": "included",
271 | "type": "rs_hybrid"
272 | },
273 | "wb97x-d3": {
274 | "tm": null,
275 | "orca": "wb97x-d3",
276 | "disp": "included",
277 | "type": "rs_hybrid"
278 | },
279 | "wb97x-d3bj": {
280 | "tm": null,
281 | "orca": "wb97x-d3bj",
282 | "disp": "included",
283 | "type": "rs_hybrid"
284 | },
285 | "wb97x-d4": {
286 | "tm": null,
287 | "orca": "wb97x-d4",
288 | "disp": "included",
289 | "type": "rs_hybrid"
290 | },
291 | "wb97m-v": {
292 | "tm": null,
293 | "orca": "wb97m-v",
294 | "disp": "included",
295 | "type": "rs_hybrid"
296 | },
297 | "chyf-b95-novdw": {
298 | "tm": "chyf-b95",
299 | "orca": null,
300 | "disp": "novdw",
301 | "type": "local_hybrid"
302 | },
303 | "chyf-b95-d3": {
304 | "tm": "chyf-b95",
305 | "orca": null,
306 | "disp": "d3bj",
307 | "type": "local_hybrid"
308 | },
309 | "chyf-b95-d4": {
310 | "tm": "chyf-b95",
311 | "orca": null,
312 | "disp": "d4",
313 | "type": "local_hybrid"
314 | },
315 | "dsd-blyp-d3": {
316 | "tm": null,
317 | "orca": "ri-dsd-blyp",
318 | "disp": "d3bj",
319 | "type": "double"
320 | },
321 | "dsd-pbep86-d3": {
322 | "tm": null,
323 | "orca": "dsd-pbep86",
324 | "disp": "d3bj",
325 | "type": "double"
326 | }
327 | }
328 | }
329 |
--------------------------------------------------------------------------------
/src/censo/assets/old_solvents_db.json:
--------------------------------------------------------------------------------
1 | {
2 | "smd": {
3 | "1,1,1-trichloroethane": ["1,1,1-trichloroethane"],
4 | "1,1,2-trichloroethane": ["1,1,2-trichloroethane"],
5 | "1,2,4-trimethylbenzene": ["1,2,4-trimethylbenzene"],
6 | "1,2-dibromoethane": ["1,2-dibromoethane"],
7 | "1,2-dichloroethane": ["1,2-dichloroethane"],
8 | "1,2-ethanediol": ["1,2-ethanediol"],
9 | "1,4-dioxane": ["1,4-dioxane"],
10 | "1-bromo-2-methylpropane": ["1-bromo-2-methylpropane"],
11 | "1-bromooctane": ["1-bromooctane"],
12 | "1-bromopentane": ["1-bromopentane"],
13 | "1-bromopropane": ["1-bromopropane"],
14 | "1-butanol": ["1-butanol"],
15 | "1-chlorohexane": ["1-chlorohexane"],
16 | "1-chloropentane": ["1-chloropentane"],
17 | "1-chloropropane": ["1-chloropropane"],
18 | "1-decanol": ["1-decanol"],
19 | "1-fluorooctane": ["1-fluorooctane"],
20 | "1-heptanol": ["1-heptanol"],
21 | "1-hexanol": ["1-hexanol"],
22 | "1-hexene": ["1-hexene"],
23 | "1-hexyne": ["1-hexyne"],
24 | "1-iodobutane": ["1-iodobutane"],
25 | "1-iodohexadecane": ["1-iodohexadecane"],
26 | "1-iodopentane": ["1-iodopentane"],
27 | "1-iodopropane": ["1-iodopropane"],
28 | "1-nitropropane": ["1-nitropropane"],
29 | "1-nonanol": ["1-nonanol"],
30 | "1-octanol": ["1-octanol"],
31 | "1-pentanol": ["1-pentanol"],
32 | "1-pentene": ["1-pentene"],
33 | "1-propanol": ["1-propanol"],
34 | "2,2,2-trifluoroethanol": ["2,2,2-trifluoroethanol"],
35 | "2,2,4-trimethylpentane": ["2,2,4-trimethylpentane"],
36 | "2,4-dimethylpentane": ["2,4-dimethylpentane"],
37 | "2,4-dimethylpyridine": ["2,4-dimethylpyridine"],
38 | "2,6-dimethylpyridine": ["2,6-dimethylpyridine"],
39 | "2-bromopropane": ["2-bromopropane"],
40 | "2-butanol": ["2-butanol"],
41 | "2-chlorobutane": ["2-chlorobutane"],
42 | "2-heptanone": ["2-heptanone"],
43 | "2-hexanone": ["2-hexanone"],
44 | "2-methoxyethanol": ["2-methoxyethanol"],
45 | "2-methyl-1-propanol": ["2-methyl-1-propanol"],
46 | "2-methyl-2-propanol": ["2-methyl-2-propanol"],
47 | "2-methylpentane": ["2-methylpentane"],
48 | "2-methylpyridine": ["2-methylpyridine"],
49 | "2-nitropropane": ["2-nitropropane"],
50 | "2-octanone": ["2-octanone"],
51 | "2-pentanone": ["2-pentanone"],
52 | "2-propanol": ["2-propanol"],
53 | "2-propen-1-ol": ["2-propen-1-ol"],
54 | "e-2-pentene": ["e-2-pentene"],
55 | "3-methylpyridine": ["3-methylpyridine"],
56 | "3-pentanone": ["3-pentanone"],
57 | "4-heptanone": ["4-heptanone"],
58 | "4-methyl-2-pentanone": ["4-methyl-2-pentanone"],
59 | "4-methylpyridine": ["4-methylpyridine"],
60 | "5-nonanone": ["5-nonanone"],
61 | "acetic acid": ["acetic_acid"],
62 | "acetone": ["acetone"],
63 | "acetonitrile": ["mecn", "acetonitrile", "cyanomethane"],
64 | "acetophenone": ["acetophenone"],
65 | "aniline": ["aniline"],
66 | "anisole": ["anisole"],
67 | "benzaldehyde": ["benzaldehyde"],
68 | "benzene": ["benzene"],
69 | "benzonitrile": ["benzonitrile"],
70 | "benzyl alcohol": ["benzyl_alcohol"],
71 | "bromobenzene": ["bromobenzene"],
72 | "bromoethane": ["bromoethane"],
73 | "bromoform": ["bromoform"],
74 | "butanal": ["butanal"],
75 | "butanoic acid": ["butanoic_acid"],
76 | "butanone": ["butanone"],
77 | "butanonitrile": ["butanonitrile"],
78 | "butyl ethanoate": ["butyl_ethanoate"],
79 | "butylamine": ["butylamine"],
80 | "n-butylbenzene": ["n-butylbenzene"],
81 | "sec-butylbenzene": ["sec-butylbenzene"],
82 | "tert-butylbenzene": ["tert-butylbenzene"],
83 | "carbon disulfide": ["carbon_disulfide"],
84 | "carbon tetrachloride": ["carbon_tetrachloride"],
85 | "chlorobenzene": ["chlorobenzene"],
86 | "chloroform": ["chloroform", "chcl3"],
87 | "a-chlorotoluene": ["a-chlorotoluene"],
88 | "o-chlorotoluene": ["o-chlorotoluene"],
89 | "m-cresol": ["m-cresol"],
90 | "o-cresol": ["o-cresol"],
91 | "cyclohexane": ["cyclohexane"],
92 | "cyclohexanone": ["cyclohexanone"],
93 | "mecn": ["mecn", "acetonitrile", "cyanomethane"],
94 | "ccl4": ["ccl4"],
95 | "cyclopentane": ["cyclopentane"],
96 | "cyclopentanol": ["cyclopentanol"],
97 | "cyclopentanone": ["cyclopentanone"],
98 | "decalin (cis/trans mixture)": ["decalin_mix"],
99 | "cis-decalin": ["cis-decalin"],
100 | "n-decane": ["n-decane"],
101 | "dibromomethane": ["dibromomethane"],
102 | "dibutylether": ["dibutylether"],
103 | "o-dichlorobenzene": ["o-dichlorobenzene"],
104 | "e-1,2-dichloroethene": ["e-1,2-dichloroethene"],
105 | "z-1,2-dichloroethene": ["z-1,2-dichloroethene"],
106 | "dichloromethane": ["dichloromethane"],
107 | "diethyl ether": ["diethyl_ether"],
108 | "diethyl sulfide": ["diethyl_sulfide"],
109 | "diethylamine": ["diethylamine"],
110 | "diiodomethane": ["diiodomethane"],
111 | "diisopropyl ether": ["diisopropyl_ether"],
112 | "cis-1,2-dimethylcyclohexane": ["cis-1,2-dimethylcyclohexane"],
113 | "dimethyl disulfide": ["dimethyl_disulfide"],
114 | "n,n-dimethylacetamide": ["n,n-dimethylacetamide"],
115 | "n,n-dimethylformamide": ["n,n-dimethylformamide"],
116 | "dimethylsulfoxide": ["dimethylsulfoxide"],
117 | "diphenylether": ["diphenylether"],
118 | "dipropylamine": ["dipropylamine"],
119 | "n-dodecane": ["n-dodecane"],
120 | "ethanethiol": ["ethanethiol"],
121 | "ethanol": ["ethanol"],
122 | "ethyl ethanoate": ["ethyl_ethanoate"],
123 | "ethyl methanoate": ["ethyl_methanoate"],
124 | "ethyl phenyl ether": ["ethyl_phenyl_ether"],
125 | "ethylbenzene": ["ethylbenzene"],
126 | "fluorobenzene": ["fluorobenzene"],
127 | "formamide": ["formamide"],
128 | "formic acid": ["formic_acid"],
129 | "n-heptane": ["n-heptane"],
130 | "n-hexadecane": ["n-hexadecane"],
131 | "n-hexane": ["n-hexane"],
132 | "hexanoic acid": ["hexanoic_acid"],
133 | "iodobenzene": ["iodobenzene"],
134 | "iodoethane": ["iodoethane"],
135 | "iodomethane": ["iodomethane"],
136 | "isopropylbenzene": ["isopropylbenzene"],
137 | "p-isopropyltoluene": ["p-isopropyltoluene"],
138 | "mesitylene": ["mesitylene"],
139 | "methanol": ["methanol"],
140 | "methyl benzoate": ["methyl_benzoate"],
141 | "methyl butanoate": ["methyl_butanoate"],
142 | "methyl ethanoate": ["methyl_ethanoate"],
143 | "methyl methanoate": ["methyl_methanoate"],
144 | "methyl propanoate": ["methyl_propanoate"],
145 | "n-methylaniline": ["n-methylaniline"],
146 | "methylcyclohexane": ["methylcyclohexane"],
147 | "n-methylformamide": ["n-methylformamide"],
148 | "nitrobenzene": ["nitrobenzene"],
149 | "nitroethane": ["nitroethane"],
150 | "nitromethane": ["nitromethane"],
151 | "o-nitrotoluene": ["o-nitrotoluene"],
152 | "n-nonane": ["n-nonane"],
153 | "n-octane": ["n-octane"],
154 | "n-pentadecane": ["n-pentadecane"],
155 | "pentanal": ["pentanal"],
156 | "n-pentane": ["n-pentane"],
157 | "pentanoic acid": ["pentanoic_acid"],
158 | "pentyl ethanoate": ["pentyl_ethanoate"],
159 | "pentylamine": ["pentylamine"],
160 | "perfluorobenzene": ["perfluorobenzene"],
161 | "propanal": ["propanal"],
162 | "propanoic acid": ["propanoic_acid"],
163 | "propanonitrile": ["propanonitrile"],
164 | "propyl ethanoate": ["propyl_ethanoate"],
165 | "propylamine": ["propylamine"],
166 | "pyridine": ["pyridine"],
167 | "tetrachloroethene": ["tetrachloroethene"],
168 | "tetrahydrofuran": ["tetrahydrofuran"],
169 | "tetrahydrothiophene-s,s-dioxide": ["tetrahydrothiophene-s,s-dioxide"],
170 | "tetralin": ["tetralin"],
171 | "thiophene": ["thiophene"],
172 | "thiophenol": ["thiophenol"],
173 | "toluene": ["toluene"],
174 | "trans-decalin": ["trans-decalin"],
175 | "tributylphosphate": ["tributylphosphate"],
176 | "trichloroethene": ["trichloroethene"],
177 | "triethylamine": ["triethylamine"],
178 | "n-undecane": ["n-undecane"],
179 | "water": ["h2o", "water"],
180 | "xylene (mixture)": ["xylene_mix"],
181 | "m-xylene": ["m-xylene"],
182 | "o-xylene": ["o-xylene"],
183 | "p-xylene": ["p-xylene"],
184 | "dmf": ["dmf"],
185 | "dmso": ["dmso"],
186 | "phno2": ["phno2"],
187 | "meno2": ["meno2"],
188 | "thf": ["thf"]
189 | },
190 | "gbsa": {
191 | "acetone": ["propanone", "acetone"],
192 | "acetonitrile": ["mecn", "acetonitrile", "cyanomethane"],
193 | "aniline": ["aniline"],
194 | "benzaldehyde": ["benzaldehyde"],
195 | "benzene": ["benzene"],
196 | "chcl3": ["chloroform", "chcl3"],
197 | "ch2cl2": ["ch2cl2"],
198 | "ccl4": ["ccl4"],
199 | "cs2": ["cs2"],
200 | "dioxane": ["dioxane"],
201 | "dmf": ["dmf"],
202 | "dmso": ["dmso"],
203 | "ether": ["ether"],
204 | "ethanol": ["ethanol"],
205 | "ethylacetate": ["ethylacetate"],
206 | "furane": ["furane"],
207 | "hexadecane": ["hexadecane"],
208 | "hexane": ["hexane"],
209 | "h2o": ["h2o"],
210 | "water": ["water"],
211 | "methanol": ["methanol"],
212 | "nitromethane": ["nitromethane"],
213 | "thf": ["thf"],
214 | "toluene": ["toluene"],
215 | "octanol": ["octanol"],
216 | "woctanol": ["woctanol", "wet_octanol"],
217 | "phenol": ["phenol"]
218 | },
219 | "alpb": {
220 | "acetone": ["propanone", "acetone"],
221 | "acetonitrile": ["cyanomethane", "acetonitrile"],
222 | "aniline": ["aniline"],
223 | "benzaldehyde": ["benzaldehyde"],
224 | "benzene": ["benzene"],
225 | "chcl3": ["chloroform", "chcl3"],
226 | "ch2cl2": ["ch2cl2", "dcm"],
227 | "ccl4": ["ccl4"],
228 | "cs2": ["cs2"],
229 | "dioxane": ["dioxane"],
230 | "dmf": ["dmf"],
231 | "dmso": ["dmso"],
232 | "ether": ["ether"],
233 | "ethanol": ["ethanol"],
234 | "ethylacetate": ["ethylacetate"],
235 | "furane": ["furane"],
236 | "hexadecane": ["hexadecane"],
237 | "hexane": ["hexane"],
238 | "water": ["water", "h2o"],
239 | "methanol": ["methanol"],
240 | "nitromethane": ["nitromethane"],
241 | "thf": ["thf"],
242 | "toluene": ["toluene"],
243 | "octanol": ["octanol"],
244 | "woctanol": ["woctanol", "wet_octanol"],
245 | "phenol": ["phenol"]
246 | },
247 | "cpcm": {
248 | "water": ["h2o", "water"],
249 | "acetone": ["propanone", "acetone"],
250 | "acetonitrile": ["cyanomethane", "acetonitrile"],
251 | "ammonia": ["ammonia"],
252 | "benzene": ["benzene"],
253 | "chloroform": ["chloroform"],
254 | "ch2cl2": ["ch2cl2", "dcm"],
255 | "ccl4": ["ccl4"],
256 | "cyclohexane": ["cyclohexane"],
257 | "dmf": ["dmf"],
258 | "dmso": ["dmso"],
259 | "ethanol": ["ethanol"],
260 | "hexane": ["hexane"],
261 | "methanol": ["methanol"],
262 | "octanol": ["octanol"],
263 | "pyridine": ["pyridine"],
264 | "thf": ["thf"],
265 | "toluene": ["toluene"]
266 | },
267 | "cosmors-fine": {
268 | "propanone_c0": ["acetone", "propanone", "propanone_c0"],
269 | "chcl3_c0": ["chloroform", "trichloromethane", "chcl3", "chcl3_c0"],
270 | "acetonitrile_c0": [
271 | "acetonitrile",
272 | "mecn",
273 | "cyanomethane",
274 | "acetonitrile_c0"
275 | ],
276 | "ch2cl2_c0": ["ch2cl2", "ch2cl2_c0"],
277 | "dimethylsulfoxide_c0": ["dimethylsulfoxide", "dimethylsulfoxide_c0"],
278 | "h2o_c0": ["h2o", "h2o_c0"],
279 | "methanol_c0": ["methanol", "methanol_c0"],
280 | "thf_c0": ["thf", "thf_c0"],
281 | "toluene_c0": ["toluene_c0", "toluene"],
282 | "1-octanol_c0": ["1-octanol_c0", "1-octanol"],
283 | "woctanol": ["woctanol", "wet_octanol"],
284 | "n-hexadecane_c0": ["n-hexadecane_c0", "n-hexadecane"],
285 | "dimethylformamide_c0": ["dimethylformamide_c0", "dimethylformamide"],
286 | "aniline_c0": ["aniline_c0", "aniline"],
287 | "cyclohexane_c0": ["cyclohexane_c0", "cyclohexane"],
288 | "ccl4_c0": ["ccl4_c0", "ccl4"],
289 | "diethylether_c0": ["diethylether_c0", "diethylether"],
290 | "ethanol_c0": ["ethanol_c0", "ethanol"],
291 | "hexane_c0": ["hexane_c0", "hexane"],
292 | "nitromethane_c0": ["nitromethane_c0", "nitromethane"],
293 | "benzaldehyde_c0": ["benzaldehyde", "benzaldehyde_c0"],
294 | "benzene_c0": ["benzene", "benzene_c0"],
295 | "cs2_c0": ["cs2", "cs2_c0"],
296 | "dioxane_c0": ["dioxane", "1,4-dioxane", "dioxane_c0"],
297 | "ethylacetate_c0": ["ethylacetate_c0", "ethylacetate"],
298 | "furane_c0": ["furane", "furane_c0"],
299 | "phenol_c0": ["phenol_c0", "phenol"],
300 | "1,2-dichloroethane_c0": ["1,2-dichloroethane_c0", "1,2-dichloroethane"]
301 | },
302 | "cosmors": {
303 | "propanone_c0": ["acetone", "propanone", "propanone_c0"],
304 | "chcl3_c0": ["chloroform", "trichloromethane", "chcl3", "chcl3_c0"],
305 | "acetonitrile_c0": [
306 | "acetonitrile",
307 | "mecn",
308 | "cyanomethane",
309 | "acetonitrile_c0"
310 | ],
311 | "ch2cl2_c0": ["ch2cl2", "ch2cl2_c0"],
312 | "dimethylsulfoxide_c0": ["dimethylsulfoxide", "dimethylsulfoxide_c0"],
313 | "h2o_c0": ["h2o", "h2o_c0"],
314 | "methanol_c0": ["methanol", "methanol_c0"],
315 | "thf_c0": ["thf", "thf_c0"],
316 | "toluene_c0": ["toluene_c0", "toluene"],
317 | "1-octanol_c0": ["1-octanol_c0", "1-octanol"],
318 | "woctanol": ["woctanol", "wet_octanol"],
319 | "n-hexadecane_c0": ["n-hexadecane_c0", "n-hexadecane"],
320 | "dimethylformamide_c0": ["dimethylformamide_c0", "dimethylformamide"],
321 | "aniline_c0": ["aniline_c0", "aniline"],
322 | "cyclohexane_c0": ["cyclohexane_c0", "cyclohexane"],
323 | "ccl4_c0": ["ccl4_c0", "ccl4"],
324 | "diethylether_c0": ["diethylether_c0", "diethylether"],
325 | "ethanol_c0": ["ethanol_c0", "ethanol"],
326 | "hexane_c0": ["hexane_c0", "hexane"],
327 | "nitromethane_c0": ["nitromethane_c0", "nitromethane"],
328 | "benzaldehyde_c0": ["benzaldehyde", "benzaldehyde_c0"],
329 | "benzene_c0": ["benzene", "benzene_c0"],
330 | "cs2_c0": ["cs2", "cs2_c0"],
331 | "dioxane_c0": ["dioxane", "1,4-dioxane", "dioxane_c0"],
332 | "ethylacetate_c0": ["ethylacetate_c0", "ethylacetate"],
333 | "furane_c0": ["furane", "furane_c0"],
334 | "phenol_c0": ["phenol_c0", "phenol"],
335 | "1,2-dichloroethane_c0": ["1,2-dichloroethane_c0", "1,2-dichloroethane"]
336 | },
337 | "dcosmors": {
338 | "acetonitrile": ["cyanomethane", "acetonitrile", "mecn"],
339 | "aniline": ["aminobenzene", "phenylamine", "aniline"],
340 | "benzene": ["benzene"],
341 | "ccl4": ["ccl4"],
342 | "chcl3": ["chloroform", "chcl3"],
343 | "cyclohexane": ["cyclohexane"],
344 | "diethylether": ["diethylether"],
345 | "dimethylsulfoxide": ["dimethylsulfoxide"],
346 | "ethanol": ["ethanol"],
347 | "h2o": ["h2o", "water"],
348 | "hexadecane": ["hexadecane"],
349 | "hexane": ["hexane"],
350 | "methanol": ["methanol"],
351 | "nitromethane": ["nitromethane"],
352 | "octanol": ["octanol"],
353 | "propanone": ["propanone"],
354 | "thf": ["thf"],
355 | "toluene": ["toluene"],
356 | "wet-octanol": ["wet_octanol", "woctanol"]
357 | },
358 | "cosmo": {
359 | "acetonitrile": ["cyanomethane", "acetonitrile", "mecn"],
360 | "aniline": ["aminobenzene", "phenylamine", "aniline"],
361 | "benzene": ["benzene"],
362 | "ccl4": ["ccl4"],
363 | "chcl3": ["chloroform", "chcl3"],
364 | "cyclohexane": ["cyclohexane"],
365 | "diethylether": ["diethylether"],
366 | "dimethylsulfoxide": ["dimethylsulfoxide"],
367 | "ethanol": ["ethanol"],
368 | "h2o": ["h2o", "water"],
369 | "hexadecane": ["hexadecane"],
370 | "hexane": ["hexane"],
371 | "methanol": ["methanol"],
372 | "nitromethane": ["nitromethane"],
373 | "octanol": ["octanol"],
374 | "propanone": ["propanone"],
375 | "thf": ["thf"],
376 | "toluene": ["toluene"],
377 | "wet-octanol": ["wet_octanol", "woctanol"]
378 | }
379 | }
380 |
--------------------------------------------------------------------------------
/src/censo/assets/solvents.json:
--------------------------------------------------------------------------------
1 | {
2 | "smd": [
3 | "1,1,1-trichloroethane",
4 | "1,1,2-trichloroethane",
5 | "1,2,4-trimethylbenzene",
6 | "1,2-dibromoethane",
7 | "1,2-dichloroethane",
8 | "1,2-ethanediol",
9 | "1,4-dioxane",
10 | "1-bromo-2-methylpropane",
11 | "1-bromooctane",
12 | "1-bromopentane",
13 | "1-bromopropane",
14 | "1-butanol",
15 | "1-chlorohexane",
16 | "1-chloropentane",
17 | "1-chloropropane",
18 | "1-decanol",
19 | "1-fluorooctane",
20 | "1-heptanol",
21 | "1-hexanol",
22 | "1-hexene",
23 | "1-hexyne",
24 | "1-iodobutane",
25 | "1-iodohexadecane",
26 | "1-iodopentane",
27 | "1-iodopropane",
28 | "1-nitropropane",
29 | "1-nonanol",
30 | "1-octanol",
31 | "1-pentanol",
32 | "1-pentene",
33 | "1-propanol",
34 | "2,2,2-trifluoroethanol",
35 | "2,2,4-trimethylpentane",
36 | "2,4-dimethylpentane",
37 | "2,4-dimethylpyridine",
38 | "2,6-dimethylpyridine",
39 | "2-bromopropane",
40 | "2-butanol",
41 | "2-chlorobutane",
42 | "2-heptanone",
43 | "2-hexanone",
44 | "2-methoxyethanol",
45 | "2-methyl-1-propanol",
46 | "2-methyl-2-propanol",
47 | "2-methylpentane",
48 | "2-methylpyridine",
49 | "2-nitropropane",
50 | "2-octanone",
51 | "2-pentanone",
52 | "2-propanol",
53 | "2-propen-1-ol",
54 | "e-2-pentene",
55 | "3-methylpyridine",
56 | "3-pentanone",
57 | "4-heptanone",
58 | "4-methyl-2-pentanone",
59 | "4-methylpyridine",
60 | "5-nonanone",
61 | "acetic acid",
62 | "acetone",
63 | "acetonitrile",
64 | "acetophenone",
65 | "aniline",
66 | "anisole",
67 | "benzaldehyde",
68 | "benzene",
69 | "benzonitrile",
70 | "benzyl alcohol",
71 | "bromobenzene",
72 | "bromoethane",
73 | "bromoform",
74 | "butanal",
75 | "butanoic acid",
76 | "butanone",
77 | "butanonitrile",
78 | "butyl ethanoate",
79 | "butylamine",
80 | "n-butylbenzene",
81 | "sec-butylbenzene",
82 | "tert-butylbenzene",
83 | "carbon disulfide",
84 | "carbon tetrachloride",
85 | "chlorobenzene",
86 | "chloroform",
87 | "a-chlorotoluene",
88 | "o-chlorotoluene",
89 | "m-cresol", "o-cresol",
90 | "cyclohexane",
91 | "cyclohexanone",
92 | "mecn",
93 | "ccl4",
94 | "cyclopentane",
95 | "cyclopentanol",
96 | "cyclopentanone",
97 | "decalin (cis/trans mixture)",
98 | "cis-decalin",
99 | "n-decane",
100 | "dibromomethane",
101 | "dibutylether",
102 | "o-dichlorobenzene",
103 | "e-1,2-dichloroethene",
104 | "z-1,2-dichloroethene",
105 | "dichloromethane",
106 | "diethyl ether",
107 | "diethyl sulfide",
108 | "diethylamine",
109 | "diiodomethane",
110 | "diisopropyl ether",
111 | "cis-1,2-dimethylcyclohexane",
112 | "dimethyl disulfide",
113 | "n,n-dimethylacetamide",
114 | "n,n-dimethylformamide",
115 | "dimethylsulfoxide",
116 | "diphenylether",
117 | "dipropylamine",
118 | "n-dodecane",
119 | "ethanethiol",
120 | "ethanol",
121 | "ethyl ethanoate",
122 | "ethyl methanoate",
123 | "ethyl phenyl ether",
124 | "ethylbenzene",
125 | "fluorobenzene",
126 | "formamide",
127 | "formic acid",
128 | "n-heptane",
129 | "n-hexadecane",
130 | "n-hexane",
131 | "hexanoic acid",
132 | "iodobenzene",
133 | "iodoethane",
134 | "iodomethane",
135 | "isopropylbenzene",
136 | "p-isopropyltoluene",
137 | "mesitylene",
138 | "methanol",
139 | "methyl benzoate",
140 | "methyl butanoate",
141 | "methyl ethanoate",
142 | "methyl methanoate",
143 | "methyl propanoate",
144 | "n-methylaniline",
145 | "methylcyclohexane",
146 | "n-methylformamide",
147 | "nitrobenzene",
148 | "nitroethane",
149 | "nitromethane",
150 | "o-nitrotoluene",
151 | "n-nonane",
152 | "n-octane",
153 | "n-pentadecane",
154 | "pentanal",
155 | "n-pentane",
156 | "pentanoic acid",
157 | "pentyl ethanoate",
158 | "pentylamine",
159 | "perfluorobenzene",
160 | "propanal",
161 | "propanoic acid",
162 | "propanonitrile",
163 | "propyl ethanoate",
164 | "propylamine",
165 | "pyridine",
166 | "tetrachloroethene",
167 | "tetrahydrofuran",
168 | "tetrahydrothiophene-s,s-dioxide",
169 | "tetralin",
170 | "thiophene",
171 | "thiophenol",
172 | "toluene",
173 | "trans-decalin",
174 | "tributylphosphate",
175 | "trichloroethene",
176 | "triethylamine",
177 | "n-undecane",
178 | "water",
179 | "xylene (mixture)",
180 | "m-xylene",
181 | "o-xylene",
182 | "p-xylene",
183 | "dmf",
184 | "dmso",
185 | "phno2",
186 | "meno2",
187 | "thf"
188 | ],
189 | "xtb": [
190 | "acetone",
191 | "acetonitrile",
192 | "aniline",
193 | "benzaldehyde",
194 | "benzene",
195 | "chcl3",
196 | "ch2cl2",
197 | "ccl4",
198 | "cs2",
199 | "dioxane",
200 | "dmf",
201 | "dmso",
202 | "ether",
203 | "ethanol",
204 | "ethylacetate",
205 | "furane",
206 | "hexadecane",
207 | "hexane",
208 | "h2o",
209 | "water",
210 | "methanol",
211 | "nitromethane",
212 | "thf",
213 | "toluene",
214 | "octanol",
215 | "woctanol",
216 | "phenol"
217 | ],
218 | "cpcm": [
219 | "water",
220 | "acetone",
221 | "acetonitrile",
222 | "ammonia",
223 | "benzene",
224 | "chloroform",
225 | "ch2cl2",
226 | "ccl4",
227 | "cyclohexane",
228 | "dmf",
229 | "dmso",
230 | "ethanol",
231 | "hexane",
232 | "methanol",
233 | "octanol",
234 | "pyridine",
235 | "thf",
236 | "toluene"
237 | ],
238 | "cosmors": [
239 | "propanone_c0",
240 | "chcl3_c0",
241 | "acetonitrile_c0",
242 | "ch2cl2_c0",
243 | "dimethylsulfoxide_c0",
244 | "h2o_c0",
245 | "methanol_c0",
246 | "thf_c0",
247 | "toluene_c0",
248 | "1-octanol_c0",
249 | "woctanol",
250 | "n-hexadecane_c0",
251 | "dimethylformamide_c0",
252 | "aniline_c0",
253 | "cyclohexane_c0",
254 | "ccl4_c0",
255 | "diethylether_c0",
256 | "ethanol_c0",
257 | "hexane_c0",
258 | "nitromethane_c0",
259 | "benzaldehyde_c0",
260 | "benzene_c0",
261 | "cs2_c0",
262 | "dioxane_c0",
263 | "ethylacetate_c0",
264 | "furane_c0",
265 | "phenol_c0",
266 | "1,2-dichloroethane_c0"
267 | ],
268 | "dcosmors": [
269 | "acetonitrile",
270 | "aniline",
271 | "benzene",
272 | "ccl4",
273 | "chcl3",
274 | "cyclohexane",
275 | "diethylether",
276 | "dimethylsulfoxide",
277 | "ethanol",
278 | "h2o",
279 | "hexadecane",
280 | "hexane",
281 | "methanol",
282 | "nitromethane",
283 | "octanol",
284 | "propanone",
285 | "thf",
286 | "toluene",
287 | "wet-octanol"
288 | ]
289 | }
--------------------------------------------------------------------------------
/src/censo/assets/solvents_dc.json:
--------------------------------------------------------------------------------
1 | {
2 | "acetone": 20.7,
3 | "acetonitrile": 36.6,
4 | "aniline": 6.9,
5 | "benzaldehyde": 18.2,
6 | "benzene": 2.3,
7 | "ccl4": 2.2,
8 | "ch2cl2": 9.1,
9 | "chcl3": 4.8,
10 | "cs2": 2.6,
11 | "cyclohexane": 2.0,
12 | "dichloroethane": 10.125,
13 | "diethylether": 4.4,
14 | "dioxane": 2.2,
15 | "dmf": 38.3,
16 | "dmso": 47.2,
17 | "ethanol": 24.6,
18 | "ethylacetate": 5.9,
19 | "furan": 3.0,
20 | "h2o": 80.1,
21 | "hexadecane": 2.1,
22 | "hexane": 1.9,
23 | "methanol": 32.7,
24 | "nitromethane": 38.2,
25 | "octane": 1.94,
26 | "octanol": 9.9,
27 | "phenol": 8.0,
28 | "thf": 7.6,
29 | "toluene": 2.4,
30 | "woctanol": 8.1
31 | }
--------------------------------------------------------------------------------
/src/censo/assets/supporting_info.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimme-lab/CENSO/3326db4579d1f630c28fce17e3b3e2d66070b8bd/src/censo/assets/supporting_info.json
--------------------------------------------------------------------------------
/src/censo/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimme-lab/CENSO/3326db4579d1f630c28fce17e3b3e2d66070b8bd/src/censo/cli/__init__.py
--------------------------------------------------------------------------------
/src/censo/cli/cml_parser.py:
--------------------------------------------------------------------------------
1 | """
2 | defininition of internal defaults, checking of logic for parameter combinations,
3 | cml parsing
4 | """
5 |
6 | import os
7 | from ..params import START_DESCR
8 | import argparse
9 |
10 |
11 | def parse(argv=None) -> argparse.Namespace:
12 | """
13 | Process commandline arguments
14 |
15 | NOTE: on args with the action 'store_const' with const=True, this is on purpose so as long as the flag is not set,
16 | the arg Namespace evaluates to None.
17 | """
18 |
19 | parser = argparse.ArgumentParser(
20 | description=START_DESCR,
21 | prog="censo",
22 | )
23 |
24 | groups = []
25 |
26 | # RUN SETTINGS
27 | groups.append(parser.add_argument_group("RUN SETTINGS"))
28 | groups[0].add_argument(
29 | "-i",
30 | "--input",
31 | dest="inp",
32 | type=str,
33 | help="Relative path to ensemble file, e.g. crest_conformers.xyz (default). ",
34 | default="crest_conformers.xyz",
35 | )
36 | groups[0].add_argument(
37 | "-n",
38 | "--nconf",
39 | dest="nconf",
40 | type=int,
41 | help="The first 'nconf' conformers will be considered.",
42 | )
43 | groups[0].add_argument(
44 | "-c",
45 | "--charge",
46 | dest="charge",
47 | default=0,
48 | type=int,
49 | help="Integer charge of the investigated molecule.",
50 | )
51 | groups[0].add_argument(
52 | "-u",
53 | "--unpaired",
54 | dest="unpaired",
55 | default=0,
56 | type=int,
57 | help="Integer number of unpaired electrons of the investigated molecule.",
58 | )
59 | groups[0].add_argument(
60 | "-v",
61 | "--version",
62 | dest="version",
63 | action="store_true",
64 | help="Print CENSO version and exit.",
65 | )
66 | groups[0].add_argument(
67 | "--cleanup",
68 | dest="cleanup",
69 | action="store_true",
70 | help="Delete unneeded files from current working directory.",
71 | )
72 | groups[0].add_argument(
73 | "--cleanup_all",
74 | dest="cleanup_all",
75 | action="store_true",
76 | help="Delete all CENSO files from previous runs from current working directory. "
77 | "Stronger than -cleanup !",
78 | )
79 | groups[0].add_argument(
80 | "--new-config",
81 | dest="writeconfig",
82 | action="store_true",
83 | help="Write new configuration file, which is placed into the current "
84 | "directory.",
85 | )
86 | groups[0].add_argument(
87 | "--inprc",
88 | dest="inprcpath",
89 | help="Use to provide a path to the CENSO configuration file if you want to use a different one"
90 | " than the default (~/.censo2rc).",
91 | )
92 | groups[0].add_argument(
93 | "--maxcores",
94 | dest="maxcores",
95 | type=int,
96 | help="Number of cores that should be used for CENSO on the machine. If this is not provided CENSO will use "
97 | "the maximum number available. By default this is determined by os.cpu_count().",
98 | default=os.cpu_count(),
99 | )
100 | groups[0].add_argument(
101 | "-O",
102 | "--omp",
103 | dest="omp",
104 | type=int,
105 | help="Number of OpenMP threads, e.g. 4. Effectively translates to the number of cores used per calculation "
106 | "if load balancing is disabled.",
107 | )
108 | groups[0].add_argument(
109 | "--omp-min",
110 | dest="ompmin",
111 | type=int,
112 | help="Minimum number of OpenMP threads per process, default is 4. This is mostly important if load balancing is enabled.",
113 | )
114 | groups[0].add_argument(
115 | "--loglevel",
116 | dest="loglevel",
117 | help="Set the loglevel for all modules to a specified level.",
118 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
119 | )
120 | groups[0].add_argument(
121 | "--reload",
122 | dest="reload",
123 | nargs="+",
124 | help="Reload data from json output files. List all file names separated by spaces. "
125 | "Note that all conformers from the current ensemble need to be included in the output data keys.",
126 | )
127 |
128 | # GENERAL SETTINGS
129 | groups.append(parser.add_argument_group("GENERAL SETTINGS"))
130 | groups[1].add_argument(
131 | "-T",
132 | "--temperature",
133 | dest="temperature",
134 | type=float,
135 | help="Temperature in Kelvin for thermostatistical evaluation.",
136 | )
137 | # groups[1].add_argument(
138 | # "--trange",
139 | # dest="trange",
140 | # nargs=3,
141 | # metavar=("start", "end", "step"),
142 | # type=float,
143 | # help="specify a temperature range [start, end, step] e.g.: 250.0 300.0 10.0"
144 | # " resulting in the range [250.0, 260.0, 270.0, 280.0, 290.0, 300.0].",
145 | # )
146 | groups[1].add_argument(
147 | "--bhess",
148 | dest="bhess",
149 | action="store_const",
150 | const=True,
151 | help="Uses SPH and applies structure constraint to input/DFT geometry "
152 | "for mRRHO calcuation. ",
153 | )
154 | groups[1].add_argument(
155 | "--consider-sym",
156 | dest="consider_sym",
157 | action="store_const",
158 | const=True,
159 | help="Consider symmetry in mRRHO calcuation (based on desy xtb threshold). ",
160 | )
161 | groups[1].add_argument(
162 | "--rmsdbias",
163 | dest="rmsdbias",
164 | action="store_const",
165 | const=True,
166 | help="Applies constraint to rmsdpot.xyz to be consistent to CREST. ",
167 | )
168 | groups[1].add_argument(
169 | "--sm-rrho",
170 | dest="sm_rrho",
171 | type=str,
172 | help="Solvation model used in xTB GmRRHO calculation. Applied if not in "
173 | "gas-phase. Options are 'gbsa' or 'alpb'.",
174 | )
175 | groups[1].add_argument(
176 | "--evaluate-rrho",
177 | dest="evaluate_rrho",
178 | action="store_const",
179 | const=True,
180 | help="Evaluate mRRHO contribution.",
181 | )
182 | groups[1].add_argument(
183 | "-s",
184 | "--solvent",
185 | dest="solvent",
186 | type=str,
187 | help="Solvent to be used for Gsolv calculation.",
188 | )
189 | groups[1].add_argument(
190 | "--gas-phase",
191 | dest="gas-phase",
192 | action="store_const",
193 | const=True,
194 | help="Run calculation in gas-phase, overriding all solvation settings.",
195 | )
196 | groups[1].add_argument(
197 | "--imagthr",
198 | dest="imagthr",
199 | type=float,
200 | help="threshold for inverting imaginary frequencies for thermo in cm-1,"
201 | " e.g. -30.0.",
202 | )
203 | groups[1].add_argument(
204 | "--sthr",
205 | dest="sthr",
206 | type=float,
207 | help="Rotor cut-off for thermo in cm-1, e.g. 50.0.",
208 | )
209 | groups[1].add_argument(
210 | "--scale",
211 | dest="scale",
212 | type=float,
213 | help="Scaling factor for frequencies, e.g. 1.0.",
214 | )
215 | """
216 | groups[1].add_argument(
217 | "--vapor_pressure",
218 | "-vp",
219 | dest="vapor_pressure",
220 | action="store_true",
221 | help="Gsolv is evaluated for the input molecule in its solution (same). "
222 | "Only possible with COSMO-RS.",
223 | )
224 | """
225 |
226 | # PRESCREENING SETTINGS
227 | # groups.append(parser.add_argument_group("PRESCREENING SETTINGS"))
228 |
229 | # SCREENING SETTINGS
230 | # groups.append(parser.add_argument_group("SCREENING SETTINGS"))
231 |
232 | # OPTIMIZATION SETTINGS
233 | # groups.append(parser.add_argument_group("OPTIMIZATION SETTINGS"))
234 |
235 | # REFINEMENT SETTINGS
236 | # groups.append(parser.add_argument_group("REFINEMENT SETTINGS"))
237 |
238 | # NMR SETTINGS
239 | # groups.append(parser.add_argument_group("NMR SETTINGS"))
240 |
241 | # OPTROT SETTINGS
242 | # groups.append(parser.add_argument_group("OPTROT SETTINGS"))
243 |
244 | # UVVIS SETTINGS
245 | # groups.append(parser.add_argument_group("UVVIS SETTINGS"))
246 |
247 | # leave these options out for now, implementation for cml complicated
248 | """
249 | groups[7].add_argument(
250 | "-freqOR",
251 | "--freqOR",
252 | dest="freq_or",
253 | nargs="*",
254 | required=False,
255 | type=float,
256 | help="Frequencies to evaluate specific rotation at in nm, e.g. 589 "
257 | "or 589 700 to evaluate at 598 nm and 700 nm.",
258 | )
259 | groups[6].add_argument(
260 | "-couplings",
261 | "--couplings",
262 | dest="couplings",
263 | action="store_true",
264 | required=False,
265 | help="Option to run coupling constant calculations. Options are ???.",
266 | )
267 | groups[6].add_argument(
268 | "-shieldings",
269 | "--shieldings",
270 | dest="shieldings",
271 | action="store_true",
272 | required=False,
273 | help="Option to run shielding constant calculations. Options are ???.",
274 | )
275 | groups[6].add_argument(
276 | "-hactive",
277 | "--hactive",
278 | dest="h_active",
279 | action="store_true",
280 | required=False,
281 | help="Investigates hydrogen nuclei in coupling and shielding calculations."
282 | "choices=[???]",
283 | )
284 | groups[6].add_argument(
285 | "-cactive",
286 | "--cactive",
287 | dest="c_active",
288 | action="store_true",
289 | required=False,
290 | help="Investigates carbon nuclei in coupling and shielding calculations."
291 | "choices=[???]",
292 | )
293 | groups[6].add_argument(
294 | "-factive",
295 | "--factive",
296 | dest="f_active",
297 | action="store_true",
298 | required=False,
299 | help="Investigates fluorine nuclei in coupling and shielding calculations."
300 | "choices=[???]",
301 | )
302 | groups[6].add_argument(
303 | "-siactive",
304 | "--siactive",
305 | dest="si_active",
306 | action="store_true",
307 | required=False,
308 | help="Investigates silicon nuclei in coupling and shielding calculations."
309 | "choices=[???]",
310 | )
311 | groups[6].add_argument(
312 | "-pactive",
313 | "--pactive",
314 | dest="p_active",
315 | action="store_true",
316 | required=False,
317 | help="Investigates phosophorus nuclei in coupling and shielding calculations."
318 | "choices=[???]",
319 | )
320 | groups[4].add_argument(
321 | "-crestcheck",
322 | "--crestcheck",
323 | dest="crestcheck",
324 | action="store_true",
325 | required=False,
326 |
327 | help="Option to sort out conformers after DFT ensembleopt which CREST "
328 | "identifies as identical or rotamers of each other. \nThe identification/"
329 | "analysis is always performed, but the removal of conformers has to "
330 | "be the choice of the user. Options are: [???]", # TODO
331 | )
332 | groups[4].add_argument(
333 | "-macro",
334 | dest="macrocycles",
335 | action="store_const",
336 | const=True,
337 | required=False,
338 | help="Option to use macrocycles for geometry optimization."
339 | )
340 | groups[4].add_argument(
341 | "-optlevel2",
342 | "--optlevel2",
343 | dest="optlevel2",
344 | default=None,
345 | required=False,
346 | help="Option to set the optlevel in part2, only if optimizing with the xTB-optimizer!"
347 | "Allowed values are ***", # TODO
348 | )
349 | groups[4].add_argument(
350 | "-optcycles",
351 | "--optcycles",
352 | dest="optcycles",
353 | required=False,
354 | type=int,
355 | help="number of cycles in ensemble optimizer.",
356 | )
357 | groups[4].add_argument(
358 | "-hlow",
359 | "--hlow",
360 | dest="hlow",
361 | required=False,
362 | type=float,
363 | help="Lowest force constant in ANC generation (real), used by xTB-optimizer.",
364 | )
365 | groups[4].add_argument(
366 | "-spearmanthr",
367 | "--spearmanthr",
368 | dest="spearmanthr",
369 | required=False,
370 | help="Value between -1 and 1 for the spearman correlation coeffient threshold, "
371 | "employed in the ensemlbe optimizer",
372 | )
373 | groups[4].add_argument(
374 | "-radsize",
375 | "--radsize",
376 | dest="radsize",
377 | required=False,
378 | type=int,
379 | help="Radsize used in the ensembleopt and only for r2scan-3c!",
380 | )
381 | group1.add_argument(
382 | "-func",
383 | "--functional",
384 | dest="func",
385 | choices=options.value_options["func"],
386 | action="store",
387 | required=False,
388 |
389 | help="Functional for geometry ensembleopt (used in part2) and "
390 | "single-points in part1",
391 | )
392 | group1.add_argument(
393 | "-basis",
394 | "--basis",
395 | dest="basis",
396 | action="store",
397 | required=False,
398 |
399 | help="Basis set employed together with the functional (func) for the "
400 | "low level single point in part1 und ensembleopt in part2.",
401 | )
402 | group1.add_argument(
403 | "-prog",
404 | "--prog",
405 | choices=options.value_options["prog"],
406 | dest="prog",
407 | required=False,
408 |
409 | help="QM-program used in part0, part1 and part2 either 'orca' or 'tm'.",
410 | )
411 | group10.add_argument(
412 | "-part0_gfnv",
413 | "--part0_gfnv",
414 | dest="part0_gfnv",
415 | choices=options.value_options["part0_gfnv"],
416 |
417 | action="store",
418 | required=False,
419 | help="GFNn-xTB version employed for calculating the GFNn-xTB "
420 | "single point in part0. "
421 | f"Allowed values are [{', '.join(options.value_options['part0_gfnv'])}]",
422 | )
423 | group3.add_argument(
424 | "-part1",
425 | "--part1",
426 | choices=["on", "off"],
427 | dest="part1",
428 | action="store",
429 | required=False,
430 |
431 | help="Option to turn the prescreening evaluation (part1) 'on' or 'off'.",
432 | )
433 | group3.add_argument(
434 | "-smgsolv1",
435 | "--smgsolv1",
436 | choices=options.value_options["smgsolv1"],
437 | dest="smgsolv1",
438 | action="store",
439 | required=False,
440 |
441 | help="Solvent model for the Gsolv evaluation in part1. This can either be"
442 | " an implicit solvation or an additive solvation model. "
443 | f"Allowed values are [{', '.join(options.value_options['smgsolv1'])}]",
444 | )
445 | group10.add_argument(
446 | "-prescreening_threshold",
447 | "-prethr",
448 | "--thresholdpre",
449 | dest="prescreening_threshold",
450 |
451 | action="store",
452 | type=float,
453 | required=False,
454 | help=(
455 | "Threshold in kcal/mol. All conformers in part0 (prescreening)"
456 | " with a relativ energy below the threshold are considered for part1."
457 | ),
458 | )
459 | group4.add_argument(
460 | "-sm2",
461 | "--solventmodel2",
462 | choices=options.value_options.get("sm2"),
463 | dest="sm2",
464 | action="store",
465 | required=False,
466 |
467 | help="Solvent model employed during the geometry ensembleopt in part2."
468 | "The solvent model sm2 is not used for Gsolv evaluation, but for the "
469 | "implicit effect on a property (e.g. the geometry in the ensembleopt).",
470 | )
471 | group4.add_argument(
472 | "-smgsolv2",
473 | "--smgsolv2",
474 | choices=options.value_options["smgsolv2"],
475 | dest="smgsolv2",
476 | action="store",
477 | required=False,
478 |
479 | help="Solvent model for the Gsolv (solvation contribution to free energy) "
480 | "calculation in part2. Either the solvent"
481 | " model of the ensembleopt (sm2) or an additive solvation model. "
482 | f"Allowed values are [{', '.join(options.value_options['smgsolv2'])}]",
483 | ) """
484 |
485 | # TODO - keep this?
486 | """ group1.add_argument(
487 | "-prog_rrho",
488 | "--prog_rrho",
489 | choices=options.value_options["prog_rrho"],
490 | dest="prog_rrho",
491 | required=False,
492 |
493 | help="QM-program for mRRHO contribution in part1 2 and 3, currently only 'xtb'.",
494 | ) """
495 |
496 | # TODO - keep?
497 | """ group4.add_argument(
498 | "-ancopt",
499 | choices=["on"], # there is no other option right now!
500 | dest="ancopt",
501 | required=False,
502 |
503 | help="Option to use xtb as driver for the xTB-optimizer in part2. "
504 | "Which is currently not changeable!",
505 | ) """
506 |
507 | args = parser.parse_args(argv)
508 |
509 | return args
510 |
--------------------------------------------------------------------------------
/src/censo/cli/interface.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import sys
4 | from os import getcwd
5 | from argparse import ArgumentError
6 | from datetime import timedelta
7 | from typing import cast
8 |
9 | from .cml_parser import parse
10 | from ..configuration import configure, override_rc
11 | from ..ensembledata import EnsembleData
12 | from ..ensembleopt import Prescreening, Screening, Optimization, Refinement
13 | from ..part import CensoPart
14 | from ..properties import NMR, UVVis
15 | from ..params import __version__, Config
16 | from ..utilities import print
17 | from ..logging import setup_logger, set_loglevel
18 |
19 | logger = setup_logger(__name__)
20 |
21 |
22 | def entry_point(argv: list[str] | None = None) -> int:
23 | """
24 | Console entry point to execute CENSO from the command line.
25 | """
26 | try:
27 | args = parse(argv=argv)
28 | except ArgumentError as e:
29 | print(e.message)
30 | return 1
31 | except SystemExit as e:
32 | return cast(int, e.code)
33 |
34 | if not any(vars(args).values()):
35 | print("CENSO needs at least one argument!")
36 | return 1
37 |
38 | # Print program call
39 | print("CALL: " + " ".join(arg for arg in sys.argv))
40 |
41 | try:
42 | ensemble = startup(args)
43 | except SystemExit as e:
44 | return cast(int, e.code)
45 |
46 | # Print general settings once
47 | CensoPart(ensemble, print_info=True)
48 |
49 | run = filter(
50 | lambda x: x.get_settings()["run"],
51 | [Prescreening, Screening, Optimization, Refinement, NMR, UVVis],
52 | )
53 |
54 | time = 0.0
55 | for part in run:
56 | res, runtime = part.run(ensemble)
57 | print(f"Ran {res.name} in {runtime:.2f} seconds!")
58 | time += runtime
59 |
60 | time = timedelta(seconds=int(time))
61 | hours, r = divmod(time.seconds, 3600)
62 | minutes, seconds = divmod(r, 60)
63 | if time.days:
64 | hours += time.days * 24
65 |
66 | print(f"\nRan CENSO in {hours:02d}:{minutes:02d}:{seconds:02d}")
67 |
68 | print("\nCENSO all done!")
69 | return 0
70 |
71 |
72 | # sets up a ensemble object for you using the given cml arguments and censorc
73 | def startup(args) -> EnsembleData:
74 | # get most important infos for current run
75 | cwd = getcwd()
76 |
77 | # run actions for which no complete setup is needed
78 | if args.version:
79 | print(__version__)
80 | sys.exit()
81 | elif args.cleanup:
82 | cleanup_run(cwd)
83 | print("Removed files and going to exit!")
84 | sys.exit()
85 | elif args.cleanup_all:
86 | cleanup_run(cwd, complete=True)
87 | print("Removed files and going to exit!")
88 | sys.exit()
89 | elif args.writeconfig:
90 | configure(rcpath=cwd, create_new=True)
91 | sys.exit()
92 | elif args.inprcpath is not None:
93 | configure(args.inprcpath)
94 |
95 | if args.loglevel:
96 | set_loglevel(args.loglevel)
97 |
98 | # Override settings with command line arguments
99 | override_rc(args)
100 |
101 | # initialize ensemble, constructor get runinfo from args
102 | ensemble = EnsembleData()
103 |
104 | # read input and setup conformers
105 | ensemble.read_input(
106 | args.inp, charge=args.charge, unpaired=args.unpaired, nconf=args.nconf
107 | )
108 |
109 | # if data should be reloaded, do it here
110 | if args.reload:
111 | for filename in args.reload:
112 | ensemble.read_output(os.path.join(cwd, filename))
113 |
114 | if args.maxcores:
115 | Config.NCORES = args.maxcores
116 |
117 | if args.omp:
118 | Config.OMP = args.omp
119 |
120 | if args.ompmin:
121 | Config.OMPMIN = args.ompmin
122 |
123 | # if data should be reloaded, do it here
124 | if args.reload:
125 | for filename in args.reload:
126 | ensemble.read_output(os.path.join(cwd, filename))
127 |
128 | # END of setup
129 | # -> ensemble.conformers contains all conformers with their info from input (sorted by CREST energy if possible)
130 | # -> output data is reloaded if wanted
131 |
132 | return ensemble
133 |
134 |
135 | def cleanup_run(cwd, complete=False):
136 | """
137 | Delete all unneeded files.
138 | """
139 |
140 | # files containing these patterns are deleted
141 | to_delete = [
142 | "censo.log",
143 | "0_PRESCREENING",
144 | "1_SCREENING",
145 | "2_OPTIMIZATION",
146 | "3_REFINEMENT",
147 | "4_NMR",
148 | "6_UVVIS",
149 | ]
150 |
151 | if complete:
152 | print(
153 | "Removing ALL files generated by previous CENSO runs, including ensembles!"
154 | )
155 |
156 | print(
157 | f"Be aware that files in {cwd} and subdirectories with names containing the following substrings "
158 | f"will be deleted:"
159 | )
160 | for sub in to_delete:
161 | print(sub)
162 |
163 | print("Do you wish to continue?")
164 | print("Please type 'yes' or 'no':")
165 |
166 | ui = input()
167 | if ui.strip().lower() not in ["yes", "y"]:
168 | print("Aborting cleanup!")
169 | sys.exit(0)
170 |
171 | # iterate over files in cwd and subdirs recursively and remove them if to delete
172 | for subdir, dirs, files in os.walk(cwd):
173 | if any(s in subdir for s in to_delete):
174 | print(f"Removing: {subdir}")
175 | shutil.rmtree(subdir)
176 | for file in files:
177 | if any(s in file for s in to_delete) and (
178 | complete or "ensemble" not in file
179 | ):
180 | print(f"Removing: {file}")
181 | os.remove(os.path.join(subdir, file))
182 |
--------------------------------------------------------------------------------
/src/censo/configuration.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import configparser
4 | from argparse import Namespace
5 |
6 | from .params import Config
7 | from .qm_processor import QmProc
8 | from .utilities import DfaHelper, SolventHelper, print
9 |
10 | parts = {}
11 |
12 |
13 | def configure(rcpath: str = None, create_new: bool = False):
14 | """
15 | Configures the application based on the provided configuration file path.
16 | If no configuration file path is provided, it searches for the default configuration file.
17 | If no configuration file is found, it raises an error.
18 |
19 | Args:
20 | rcpath (str): Path to the configuration file.
21 | create_new (bool): If True, a new configuration file will be created at rcpath.
22 |
23 | Returns:
24 | None
25 | """
26 | # Try to find the .censo2rc in the user's home directory
27 | # if no configuration file path is provided
28 | if rcpath is None:
29 | censorc_path = find_rcfile()
30 | else:
31 | if not os.path.isfile(rcpath) and not create_new:
32 | raise FileNotFoundError(f"No configuration file found at {rcpath}.")
33 | censorc_path = rcpath
34 |
35 | # Set up the DFAHelper
36 | DfaHelper.set_dfa_dict(os.path.join(Config.ASSETS_PATH, "censo_dfa_settings.json"))
37 |
38 | # Set up the SolventHelper
39 | SolventHelper.set_solvent_dict(
40 | os.path.join(Config.ASSETS_PATH, "censo_solvents_db.json")
41 | )
42 |
43 | # map the part names to their respective classes
44 | # NOTE: the DFAHelper and the databases should be setup before the parts are imported,
45 | # otherwise there will be errors in the CensoPart._options
46 | from .part import CensoPart
47 | from .ensembleopt import Prescreening, Screening, Optimization, Refinement
48 | from .properties import NMR, UVVis
49 |
50 | global parts
51 | parts = {
52 | "prescreening": Prescreening,
53 | "screening": Screening,
54 | "optimization": Optimization,
55 | "refinement": Refinement,
56 | "nmr": NMR,
57 | "uvvis": UVVis,
58 | }
59 |
60 | # if explicitely told to create a new configuration file, do so
61 | if create_new:
62 | if rcpath is None:
63 | # If not chosen otherwise, the new rcfile is written in the home dir
64 | censorc_path = os.path.join(os.path.expanduser("~"), "censo2rc_NEW")
65 | else:
66 | censorc_path = os.path.join(rcpath, "censo2rc_NEW")
67 | write_rcfile(censorc_path)
68 | else:
69 | # Initialize default settings
70 | # Make sure that settings are initialized even if there is no section for this part in the rcfile
71 | # General settings should always be configured first
72 | CensoPart.set_general_settings({})
73 | for part in parts.values():
74 | part.set_settings({}, complete=True)
75 |
76 | # Read rcfile if it exists
77 | if censorc_path is not None:
78 | # Read the actual configuration file (located at rcpath if not None, otherwise rcfile in home dir)
79 | settings_dict = read_rcfile(censorc_path, silent=False)
80 |
81 | # first set general settings
82 | CensoPart.set_general_settings(settings_dict["general"])
83 |
84 | # Then the remaining settings for each part
85 | for section, settings in settings_dict.items():
86 | if section in parts:
87 | parts[section].set_settings(settings)
88 | # NOTE: if section is not in the parts names, it will be ignored
89 |
90 | paths = read_rcfile(censorc_path)["paths"]
91 | else:
92 | # Try to automatically determine program paths (not guaranteed to succeed)
93 | paths = find_program_paths()
94 |
95 | # Update the paths for the processors
96 | QmProc._paths.update(paths)
97 |
98 | # create user assets folder if it does not exist
99 | if not os.path.isdir(Config.USER_ASSETS_PATH):
100 | os.mkdir(Config.USER_ASSETS_PATH)
101 |
102 |
103 | def read_rcfile(path: str, silent: bool = True) -> dict[str, dict[str, any]]:
104 | """
105 | Read the configuration file at 'path' and return the settings as a dictionary.
106 |
107 | Args:
108 | path (str): Path to the configuration file.
109 | silent (bool): If True, no messages will be printed.
110 |
111 | Returns:
112 | dict[str, dict[str, any]]: Dictionary containing the settings read from the configuration file.
113 | """
114 | # read config file
115 | if not silent:
116 | print(f"Reading configuration file from {path}.")
117 |
118 | parser: configparser.ConfigParser = configparser.ConfigParser()
119 | with open(path, "r") as file:
120 | parser.read_file(file)
121 |
122 | returndict = {section: dict(parser[section]) for section in parser.sections()}
123 | return returndict
124 |
125 |
126 | def write_rcfile(path: str) -> None:
127 | """
128 | Write new configuration file with default settings into file at 'path'.
129 | Also reads program paths from preexisting configuration file or tries to
130 | determine the paths automatically.
131 |
132 | Args:
133 | path (str): Path to the new configuration file.
134 |
135 | Returns:
136 | None
137 | """
138 | # what to do if there is an existing configuration file
139 | external_paths = None
140 | if os.path.isfile(path):
141 | print(
142 | f"An existing configuration file has been found at {path}.\n",
143 | f"Renaming existing file to {Config.CENSORCNAME}_OLD.\n",
144 | )
145 | # Read program paths from the existing configuration file
146 | print("Reading program paths from existing configuration file ...")
147 | external_paths = read_program_paths(path)
148 |
149 | # Rename existing file
150 | os.rename(path, f"{path}_OLD")
151 |
152 | with open(path, "w", newline=None) as rcfile:
153 | parser = configparser.ConfigParser()
154 |
155 | # collect all default settings from parts and feed them into the parser
156 | from .part import CensoPart
157 |
158 | parts["general"] = CensoPart
159 | parser.read_dict(
160 | {
161 | partname: {
162 | settingname: setting["default"]
163 | for settingname, setting in part.get_options().items()
164 | }
165 | for partname, part in parts.items()
166 | }
167 | )
168 |
169 | # Try to get paths from 'which'
170 | if external_paths is None:
171 | print("Trying to determine program paths automatically ...")
172 | external_paths = find_program_paths()
173 |
174 | parser["paths"] = external_paths
175 |
176 | print(f"Writing new configuration file to {path} ...")
177 | parser.write(rcfile)
178 |
179 | print(
180 | f"\nA new configuration file was written into {path}.\n"
181 | "You should adjust the settings to your needs and set the program paths.\n"
182 | "Right now the settings are at their default values.\n"
183 | )
184 |
185 | if Config.CENSORCNAME not in path:
186 | print(
187 | f"Additionally make sure that the file name is '{Config.CENSORCNAME}'.\n"
188 | f"Currently it is '{os.path.split(path)[-1]}'.\n"
189 | )
190 |
191 |
192 | def read_program_paths(path: str) -> dict[str, str] | None:
193 | """
194 | Read program paths from the configuration file at 'path'
195 | """
196 | with open(path, "r") as inp:
197 | parser = configparser.ConfigParser()
198 | parser.read_file(inp)
199 |
200 | try:
201 | return dict(parser["paths"])
202 | except KeyError:
203 | print(f"WARNING: No paths found in {path}")
204 | return None
205 |
206 |
207 | def find_program_paths() -> dict[str, str]:
208 | """
209 | Try to determine program paths automatically
210 | """
211 | # TODO - for now only the most important ones are implemented
212 | mapping = {
213 | "orcapath": "orca",
214 | "xtbpath": "xtb",
215 | "mpshiftpath": "mpshift",
216 | "escfpath": "escf",
217 | # "crestpath": "crest",
218 | # "cosmorssetup": None,
219 | # "dbpath": None,
220 | # "cosmothermversion": None,
221 | }
222 | paths = {}
223 |
224 | for pathname, program in mapping.items():
225 | if program is not None:
226 | path = shutil.which(program)
227 | else:
228 | path = None
229 |
230 | if path is not None:
231 | paths[pathname] = path
232 | else:
233 | paths[pathname] = ""
234 |
235 | # if orca was found try to determine orca version from the path (kinda hacky)
236 | if paths["orcapath"] != "":
237 | try:
238 | paths["orcaversion"] = (
239 | paths["orcapath"].split(os.sep)[-2][5:10].replace("_", ".")
240 | )
241 | except Exception:
242 | paths["orcaversion"] = ""
243 |
244 | return paths
245 |
246 |
247 | def find_rcfile() -> str | None:
248 | """
249 | check for existing .censorc2 in $home dir
250 | """
251 |
252 | rcpath = None
253 | # check for .censorc in $home
254 | if os.path.isfile(os.path.join(os.path.expanduser("~"), Config.CENSORCNAME)):
255 | rcpath = os.path.join(os.path.expanduser("~"), Config.CENSORCNAME)
256 |
257 | return rcpath
258 |
259 |
260 | def override_rc(args: Namespace) -> None:
261 | """
262 | Override the settings from the rcfile (or default settings) with settings from the command line.
263 |
264 | Args:
265 | args(Namespace): Namespace generated by command line parser.
266 |
267 | Returns:
268 | None
269 | """
270 | # Override general and part specific settings
271 | from .part import CensoPart
272 |
273 | for part in list(parts.values()) + [CensoPart]:
274 | part_settings = part.get_settings()
275 | for setting in part_settings:
276 | if getattr(args, setting, None) is not None:
277 | part.set_setting(setting, getattr(args, setting))
278 |
--------------------------------------------------------------------------------
/src/censo/datastructure.py:
--------------------------------------------------------------------------------
1 | from functools import reduce
2 | from typing import TypedDict
3 |
4 | from .params import BOHR2ANG, Config
5 |
6 |
7 | class Atom(TypedDict):
8 | element: str
9 | xyz: list[float]
10 |
11 |
12 | class GeometryData:
13 | """
14 | Geometry contains geometry information as well as identifier to match it to a MoleculeData object
15 | in order to keep the object small, since it has to be pickled for multiprocessing
16 | """
17 |
18 | def __init__(self, name: str, xyz: list[str]):
19 | """
20 | takes an identifier and the geometry lines from the xyz-file as input
21 | """
22 |
23 | # name of the linked MoleculeData
24 | self.name: str = name
25 |
26 | # list of dicts preserving the order of the input file for easy mapping
27 | # the coordinates should be given in Angstrom
28 | # self.xyz = [{"element": "H", "xyz": [0.0, 0.0, 0.0]}, {"element": "C", "xyz": [0.0, 0.0 0.7]}, ...]
29 | self.xyz: list[Atom] = []
30 |
31 | # set up xyz dict from the input lines
32 | for line in xyz:
33 | spl = [s.strip() for s in line.split()]
34 | element = spl[0].capitalize()
35 | self.xyz.append({"element": element, "xyz": [float(i) for i in spl[1:]]})
36 |
37 | # Count atoms
38 | self.nat: int = len(self.xyz)
39 |
40 | def toorca(self) -> list[str | float]:
41 | """
42 | method to convert the internal cartesian coordinates to a data format usable by the OrcaParser
43 | """
44 | coord = []
45 | for atom in self.xyz:
46 | coord.append(" ".join([atom["element"]] + [str(c) for c in atom["xyz"]]))
47 |
48 | return coord
49 |
50 | def tocoord(self) -> list[str]:
51 | """
52 | method to convert the internal cartesian coordinates (self.xyz) to coord file format (for tm or xtb)
53 | """
54 | coord = ["$coord\n"]
55 | for atom in self.xyz:
56 | coord.append(
57 | reduce(
58 | lambda x, y: f"{x} {y}",
59 | list(map(lambda x: float(x) / BOHR2ANG, atom["xyz"]))
60 | + [f"{atom['element']}\n"],
61 | )
62 | )
63 |
64 | coord.append("$end\n")
65 |
66 | return coord
67 |
68 | def fromcoord(self, path: str) -> None:
69 | """
70 | method to convert the content of a coord file to cartesian coordinates for the 'xyz' attribute
71 | """
72 | with open(path, "r") as file:
73 | lines = file.readlines()
74 |
75 | self.xyz = []
76 | for line in lines:
77 | if not line.startswith("$"):
78 | coords = line.split()
79 | element = coords[-1]
80 | cartesian_coords = [float(x) * BOHR2ANG for x in coords[:-1]]
81 | self.xyz.append({"element": element, "xyz": cartesian_coords})
82 | elif line.startswith("$end"):
83 | break
84 |
85 | def fromxyz(self, path: str) -> None:
86 | """
87 | Method to convert the content of an xyz file to cartesian coordinates for the 'xyz' attribute
88 | """
89 | with open(path, "r") as file:
90 | lines = file.readlines()
91 |
92 | self.xyz = []
93 | # Just skip the first two lines
94 | for line in lines[2:]:
95 | split = line.split()
96 | element = split[0]
97 | coords = [float(x) for x in split[1:]]
98 | self.xyz.append({"element": element, "xyz": coords})
99 |
100 | def toxyz(self) -> list[str]:
101 | """
102 | method to convert self.xyz to xyz-file format
103 | """
104 | lines = [
105 | f"{self.nat}\n",
106 | f"{self.name}\n",
107 | ]
108 | for atom in self.xyz:
109 | lines.append(
110 | f"{atom['element']} {atom['xyz'][0]:.10f} {atom['xyz'][1]:.10f} {atom['xyz'][2]:.10f}\n"
111 | )
112 |
113 | return lines
114 |
115 |
116 | class MoleculeData:
117 | """
118 | The confomers' MoleculeData are set up in censo.ensembledata.EnsembleData.setup_conformers
119 | """
120 |
121 | def __init__(self, name: str, xyz: list[str]):
122 | """
123 | takes geometry lines from the xyz-file as input to pass it to the GeometryData constructor
124 | """
125 |
126 | # stores a name for printing and (limited) between-run comparisons
127 | self.name: str = name
128 |
129 | # stores the geometry info to have a small object to be used for multiprocessing
130 | self.geom: GeometryData = GeometryData(self.name, xyz)
131 |
132 | # stores the degeneration factor of the conformer
133 | self.degen: int = 1
134 |
135 | # stores the initial (biased) xtb energy from CREST (or whatever was used before)
136 | self.xtb_energy: float = None
137 |
138 | # list to store the paths to all MO-files from the jobs run for this conformer
139 | # might also include tuples if open shell and tm is used
140 | self.mo_paths: list[str, tuple] = []
141 |
142 |
143 | class ParallelJob:
144 |
145 | def __init__(self, conf: GeometryData, jobtype: list[str]):
146 | # conformer for the job
147 | self.conf = conf
148 |
149 | # list of jobtypes to execute for the processor
150 | self.jobtype = jobtype
151 |
152 | # number of cores to use
153 | self.omp = Config.OMPMIN
154 |
155 | # stores path to an mo file which is supposed to be used as a guess
156 | # In case of open shell tm calculation this can be a tuple of files
157 | self.mo_guess = None
158 |
159 | # Stores all the important information for preparation of the input files for every jobtype
160 | # Always contains the 'general' key, which basically stores settings from the general section
161 | # that are supposed to be applied for every job
162 | # Also should always contain the name of the part where the job is launched from, as well as charge and
163 | # number of unpaired electrons
164 | # NOTE: prepinfo.keys() and items in jobtype are not necessarily the same! E.g. for NMR
165 | # jobtype = ["nmr"], prepinfo.keys() = ["nmr_s"], or prepinfo.keys() = ["nmr_s", "nmr_j"], ...
166 | self.prepinfo: dict[str, dict[str, any]] = {
167 | "general": {},
168 | "partname": "",
169 | "charge": 0,
170 | "unpaired": 0,
171 | }
172 |
173 | # store metadata, is updated by the processor
174 | # structure e.g.: {"sp": {"success": True, "error": None}, "xtb_rrho": {"success": False, ...}, ...}
175 | # always contains the "mo_path" key
176 | self.meta: dict[str, any] = {"mo_path": None}
177 |
178 | # store the results of the job
179 | self.results: dict[str, any] = {}
180 |
181 | # stores all flags for the jobtypes
182 | self.flags: dict[str, any] = {}
183 |
--------------------------------------------------------------------------------
/src/censo/ensembledata.py:
--------------------------------------------------------------------------------
1 | """
2 | stores ensembledata and conformers
3 | functionality for program setup
4 | """
5 |
6 | import os
7 | import re
8 | import json
9 |
10 | from .datastructure import MoleculeData
11 | from .logging import setup_logger
12 | from .params import DIGILEN
13 | from .utilities import check_for_float, print, t2x, Factory
14 |
15 | logger = setup_logger(__name__)
16 |
17 |
18 | class EnsembleData:
19 | """
20 | Class to store conformer rotamer ensembles for use in CENSO.
21 | """
22 |
23 | def __init__(self, input_file: str | None = None):
24 | """
25 | Setup an EnsembleData object, which contains a list of conformers, read from
26 | input_file. If input_file is not passed here, conformers can be read using
27 | read_input.
28 |
29 | Args:
30 | input_file (str, optional): Path to the ensemble input file. Defaults to None.
31 | If this is provided, the charge and unpaired electron count will be assumed to be 0 and all conformers will be read from the input file.
32 | """
33 | # contains run-specific info that may change during runtime
34 | # initialized in EnsembleData.read_input
35 | self.runinfo = {
36 | "charge": None,
37 | "unpaired": None,
38 | }
39 |
40 | # stores the conformers with all info
41 | # NOTE: this is deliberately chosen to be a list since lists are ordered
42 | self.__conformers: list[MoleculeData] = []
43 |
44 | # stores the conformers which were sorted out
45 | self.rem: list[MoleculeData] = []
46 |
47 | # A list containing all part references in order of execution or loading
48 | self.results = []
49 |
50 | if input_file is not None:
51 | self.read_input(input_file, charge=0, unpaired=0)
52 |
53 | @property
54 | def conformers(self):
55 | """
56 | Returns the conformers list. Includes a check wether there are any conformers left.
57 | """
58 | # TODO - no checks for now
59 | return self.__conformers
60 |
61 | @conformers.setter
62 | def conformers(self, confs):
63 | assert all(isinstance(conf, MoleculeData) for conf in confs)
64 | self.__conformers = confs
65 |
66 | def read_output(self, outpath: str) -> None:
67 | """
68 | Read json output file of a previous execution. Will try to load data into current conformer ensemble, matching
69 | based on names. If a conformer name does not exist in the current ensemble it will be ignored. If a conformer
70 | does not exist in the output data RuntimeError will be raised.
71 |
72 | Args:
73 | outpath (str): Path to the output file.
74 |
75 | Returns:
76 | None
77 | """
78 |
79 | with open(outpath, "r") as file:
80 | data = json.load(file)
81 |
82 | # Check if all conformers from the current ensemble are also found in the output data
83 | if not all(conf.name in data["results"] for conf in self.conformers):
84 | raise RuntimeError(
85 | "Not all conformers from the current ensemble are found in the output data."
86 | )
87 |
88 | # Create a part instance and load in the results
89 | part = Factory.create(data["partname"], self)
90 | part.data.update(data)
91 |
92 | logger.info(f"Reloaded results from {outpath}.")
93 |
94 | self.results.append(part)
95 |
96 | def read_input(
97 | self,
98 | input_path: str,
99 | charge: int = None,
100 | unpaired: int = None,
101 | nconf: int = None,
102 | append: bool = False,
103 | ) -> None:
104 | """
105 | Read ensemble input file. Should be a file in xyz-file format with all the conformers in consecutive order.
106 |
107 | Args:
108 | input_path (str): Path to the ensemble input file.
109 | charge (int, optional): Charge of the system. Defaults to None. Overwrites preexisting values.
110 | unpaired (int, optional): Number of unpaired electrons. Defaults to None. Overwrites preexisting values.
111 | nconf (int, optional): Number of conformers to consider. Defaults to None, so all conformers are read.
112 | append (bool, optional): If True, the conformers will be appended to the existing ensemble. Defaults to False.
113 |
114 | Returns:
115 | None
116 |
117 | Raises:
118 | RuntimeError: If the charge or the number of unpaired electrons is not defined.
119 | """
120 | # If $coord in file => tm format, needs to be converted to xyz
121 | with open(input_path, "r") as inp:
122 | lines = inp.readlines()
123 | if any("$coord" in line for line in lines):
124 | _, nat, input_path = t2x(
125 | input_path, writexyz=True, outfile="converted.xyz"
126 | )
127 | else:
128 | nat = int(lines[0].split()[0])
129 |
130 | # Set charge and unpaired via funtion args
131 | self.runinfo["charge"] = charge
132 | self.runinfo["unpaired"] = unpaired
133 |
134 | if self.runinfo["charge"] is None or self.runinfo["unpaired"] is None:
135 | raise RuntimeError("Charge or number of unpaired electrons not defined.")
136 |
137 | confs = self.__setup_conformers(input_path)
138 | if len(confs) == 0:
139 | logger.warning("Input file is empty!")
140 |
141 | if nconf is None:
142 | nconf = len(confs)
143 |
144 | if append:
145 | self.conformers.append(confs[:nconf])
146 | else:
147 | self.conformers = confs[:nconf]
148 |
149 | try:
150 | self.conformers.sort(key=lambda x: x.xtb_energy)
151 | except TypeError:
152 | # Only sort if all conformers have a defined precalculated energy
153 | pass
154 |
155 | # Print information about read ensemble
156 | print(
157 | f"Read {len(self.conformers)} conformers.\n",
158 | "Number of atoms:".ljust(DIGILEN // 2, " ") + f"{nat}" + "\n",
159 | "Charge:".ljust(DIGILEN // 2, " ") + f"{self.runinfo['charge']}" + "\n",
160 | "Unpaired electrons:".ljust(DIGILEN // 2, " ")
161 | + f"{self.runinfo['unpaired']}"
162 | + "\n",
163 | sep="",
164 | )
165 |
166 | def __setup_conformers(self, input_path: str) -> list[MoleculeData]:
167 | """
168 | open ensemble input
169 | split into conformers
170 | create MoleculeData objects out of coord input
171 | read out energy from xyz file if possible
172 | In principle this can also read xyz-files with molecules of different sizes.
173 |
174 | Args:
175 | input_path (str): Path to the ensemble input file.
176 |
177 | Returns:
178 | list[MoleculeData]: A list of MoleculeData objects.
179 | """
180 | # open ensemble input
181 | with open(input_path, "r") as file:
182 | lines = file.readlines()
183 |
184 | # Get rid of unnecessary empty lines
185 | # Basically this filters out all only-whitespace lines except the comment lines after the number of atoms is declared
186 | lines = list(
187 | filter(
188 | lambda line: not (
189 | bool(re.match(r"^\s*$", line)) # matches only whitespace chars
190 | and len(lines[lines.index(line) - 1].split()) != 1
191 | ),
192 | lines,
193 | )
194 | )
195 |
196 | # assuming consecutive xyz-file format
197 | # (every conf geometry is separated by a line with split length of 4 followed by a line of split length 1)
198 | #
199 | # 14 <-- split_index refers to this line (this is line 0 for the first conf)
200 | # CONF12 -22.521386
201 | # H x.xxxxxxxx x.xxxxxxx x.xxxxxx
202 | # ...
203 | split_indices = [
204 | i
205 | for i in range(len(lines))
206 | if i == 0 or (len(lines[i].split()) == 1 and len(lines[i - 1].split()) == 4)
207 | ]
208 |
209 | conformers = []
210 | for i, split_index in enumerate(split_indices):
211 | # Check whether the names are stored in the ensemble file,
212 | # use those if possible because of crest rotamer files
213 | if "CONF" in lines[split_index + 1]:
214 | confname = next(
215 | s for s in lines[split_index + 1].split() if "CONF" in s
216 | )
217 | else:
218 | # Start counting from 1
219 | confname = f"CONF{i + 1}"
220 |
221 | # Determine end of geometry definition for this conf
222 | # which is either the next conf definition or EOF
223 | conf_end_index = (
224 | split_indices[i + 1] if i + 1 < len(split_indices) else len(lines)
225 | )
226 |
227 | # Create a new conformer object and append it to the ensemble
228 | conformers.append(
229 | MoleculeData(
230 | confname,
231 | lines[split_index + 2 : conf_end_index],
232 | )
233 | )
234 |
235 | # get precalculated energies if possible
236 | # precalculated energy set to 0.0 if it cannot be found
237 | conformers[i].xtb_energy = check_for_float(lines[split_index + 1]) or 0.0
238 |
239 | return conformers
240 |
241 | def remove_conformers(self, confnames: list[str]) -> None:
242 | """
243 | Remove the conformers with the names listed in 'confnames' from further consideration.
244 | The removed conformers will be stored in self.rem.
245 |
246 | Args:
247 | confnames (list[str]): A list of conformer names.
248 |
249 | Returns:
250 | None
251 | """
252 | if len(confnames) > 0:
253 | for confname in confnames:
254 | remove = next(c for c in self.conformers if c.name == confname)
255 |
256 | # pop item from conformers and insert this item at index 0 in rem
257 | self.rem.insert(0, self.conformers.pop(self.conformers.index(remove)))
258 |
259 | # Log removed conformers
260 | logger.debug(f"Removed {remove.name}.")
261 |
262 | def dump(self, filename: str) -> None:
263 | """
264 | dump the conformers to a file
265 | """
266 | with open(os.path.join(f"{os.getcwd()}", f"{filename}.xyz"), "w") as file:
267 | for conf in self.conformers:
268 | file.writelines(conf.geom.toxyz())
269 |
--------------------------------------------------------------------------------
/src/censo/ensembleopt/__init__.py:
--------------------------------------------------------------------------------
1 | from .prescreening import Prescreening
2 | from .screening import Screening
3 | from .optimization import Optimization
4 | from .refinement import Refinement
5 | from .optimizer import EnsembleOptimizer
6 |
--------------------------------------------------------------------------------
/src/censo/ensembleopt/optimizer.py:
--------------------------------------------------------------------------------
1 | from ..logging import setup_logger
2 | from ..params import AU2KCAL, DIGILEN, PLENGTH
3 | from ..part import CensoPart
4 | from ..utilities import (
5 | DfaHelper,
6 | SolventHelper,
7 | format_data,
8 | h1,
9 | print,
10 | timeit,
11 | )
12 |
13 | logger = setup_logger(__name__)
14 |
15 |
16 | class EnsembleOptimizer(CensoPart):
17 | """
18 | Boilerplate class for all ensemble optimization steps.
19 | """
20 |
21 | _grid = ""
22 |
23 | @classmethod
24 | def _validate(cls, tovalidate: dict[str, any]) -> None:
25 | """
26 | Validates the type of each setting in the given dict. Also potentially validate if the setting is allowed by
27 | checking with cls._options.
28 | This is the part-specific version of the method. It will run the general validation first and then
29 | check part-specific logic.
30 |
31 | Args:
32 | tovalidate (dict[str, any]): The dict containing the settings to be validated.
33 |
34 | Returns:
35 | None
36 |
37 | Raises:
38 | ValueError: If the setting is not allowed or the value is not within the allowed options.
39 | """
40 | # General validation
41 | super()._validate(tovalidate)
42 |
43 | # Part-specific validation
44 | # NOTE: tovalidate is always complete
45 | # Check availability of func for prog
46 | func = tovalidate["func"]
47 | if func not in cls._options["func"]["options"][tovalidate["prog"]]:
48 | raise ValueError(
49 | f"Functional {func} is not available for {tovalidate['prog']}. "
50 | "Check spelling w.r.t. CENSO functional naming convention (case insensitive)."
51 | )
52 |
53 | # Check sm availability for prog
54 | sm = tovalidate.get("sm", None)
55 | if (
56 | sm is not None
57 | and sm not in cls._options["sm"]["options"][tovalidate["prog"]]
58 | ):
59 | raise ValueError(
60 | f"Solvent model {sm} not available for {tovalidate['prog']}."
61 | )
62 |
63 | # Check solvent availability for sm
64 | if (
65 | sm is not None
66 | and cls.get_general_settings()["solvent"]
67 | not in CensoPart._options["solvent"]["options"][sm]
68 | ):
69 | raise ValueError(
70 | f"Solvent {cls.get_general_settings()['solvent']} is not available for {sm}. "
71 | )
72 |
73 | # dummy/template functionality not implemented yet for TM
74 | if tovalidate["prog"] == "tm" and (func == "dummy"):
75 | raise NotImplementedError(
76 | "Dummy functionality is not implemented for use with TURBOMOLE."
77 | )
78 |
79 | @timeit
80 | @CensoPart._create_dir
81 | def __call__(self, cut: bool = True) -> None:
82 | """
83 | Boilerplate run logic for any ensemble optimization step. The 'optimize' method should be implemented for every
84 | class respectively.
85 | """
86 | # print instructions
87 | self._print_info()
88 |
89 | # Store number of conformer put in
90 | self.data["nconf_in"] = len(self._ensemble.conformers)
91 |
92 | # Perform the actual optimization logic
93 | self._optimize(cut=cut)
94 | self.data["nconf_out"] = len(self._ensemble.conformers)
95 |
96 | # Resort the ensemble
97 | self._ensemble.conformers.sort(
98 | key=lambda conf: self.data["results"][conf.name]["gtot"],
99 | )
100 |
101 | # DONE
102 |
103 | def _optimize(self, cut: bool = True):
104 | raise NotImplementedError
105 |
106 | def _write_results(self):
107 | raise NotImplementedError
108 |
109 | def _output(self) -> None:
110 | """
111 | Implements printouts and writes for any output data.
112 | Necessary to implement for each part.
113 | """
114 | # Write out results
115 | self._write_results()
116 |
117 | # Print comparison with previous parts
118 | if len(self._ensemble.results) > 1:
119 | self._print_comparison()
120 |
121 | # Print information about ensemble after optimization
122 | self._print_update()
123 |
124 | # dump ensemble
125 | self._ensemble.dump(f"{self._part_nos[self.name]}_{self.name.upper()}")
126 |
127 | def _setup_prepinfo(self, jobtype: list[str]) -> dict[str, dict]:
128 | """
129 | Sets up lookup information to be used by the processor in parallel execution. Returns a dictionary
130 | containing all information for all jobtypes provided.
131 |
132 | Args:
133 | jobtype (list[str]): list of jobtypes to be run.
134 |
135 | Returns:
136 | dict[str, dict]: dictionary containing all information for all jobtypes provided.
137 | """
138 | prepinfo = {jt: {} for jt in jobtype}
139 |
140 | prepinfo["partname"] = self.name
141 | prepinfo["charge"] = self._ensemble.runinfo.get("charge")
142 | prepinfo["unpaired"] = self._ensemble.runinfo.get("unpaired")
143 | prepinfo["general"] = self.get_general_settings()
144 |
145 | if "sp" in jobtype or "gsolv" in jobtype:
146 | prepinfo["sp"] = {
147 | "func_name": DfaHelper.get_name(
148 | self.get_settings()["func"], self.get_settings()["prog"]
149 | ),
150 | "func_type": DfaHelper.get_type(self.get_settings()["func"]),
151 | "disp": DfaHelper.get_disp(self.get_settings()["func"]),
152 | "basis": self.get_settings()["basis"],
153 | "grid": self._grid,
154 | "template": self.get_settings()["template"],
155 | "gcp": True,
156 | }
157 |
158 | # Add the solvent key if a solvent model exists in the part settings
159 | # NOTE: 'sm' in key catches also cases like NMR (sm_s and sm_j)
160 | # Only look up solvent if solvation is used
161 | if (
162 | "sm" in self.get_settings()
163 | and not self.get_general_settings()["gas-phase"]
164 | ):
165 | prepinfo["sp"]["sm"] = self.get_settings()["sm"]
166 | prepinfo["sp"]["solvent_key_prog"] = SolventHelper.get_solvent(
167 | self.get_settings()["sm"], self.get_general_settings()["solvent"]
168 | )
169 |
170 | if (
171 | self.get_settings()["prog"] == "tm"
172 | and prepinfo["sp"]["disp"] == "d4"
173 | and prepinfo["sp"]["gcp"]
174 | ):
175 | # Basis sets including the following naming patterns should definitely use GCP
176 | gcp_basis_patterns = ["sv", "dz", "tz", "mini", "6-31g(d)"]
177 | if any(
178 | pattern in prepinfo["sp"]["basis"] for pattern in gcp_basis_patterns
179 | ):
180 | logger.warning(
181 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP "
182 | "together with the D4 correction. Switching to D3."
183 | )
184 | prepinfo["sp"]["disp"] = DfaHelper.get_disp(
185 | self.get_settings()["func"].replace("d4", "d3")
186 | )
187 | else:
188 | logger.warning(
189 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP "
190 | "together with the D4 correction. Turning off GCP."
191 | )
192 | prepinfo["sp"]["gcp"] = False
193 |
194 | # TODO - this doesn't look very nice
195 | if "xtb_gsolv" in jobtype:
196 | prepinfo["xtb_sp"] = {
197 | "gfnv": self.get_settings()["gfnv"],
198 | "solvent_key_xtb": SolventHelper.get_solvent(
199 | self.get_general_settings()["sm_rrho"],
200 | self.get_general_settings()["solvent"],
201 | ),
202 | }
203 | # gsolv implies that solvation should be used, so no check here
204 | assert prepinfo["xtb_sp"]["solvent_key_xtb"] is not None
205 |
206 | if "xtb_rrho" in jobtype:
207 | prepinfo["xtb_rrho"] = {
208 | "gfnv": self.get_settings()["gfnv"],
209 | }
210 | # Only look up solvent if solvation is used
211 | if not self.get_general_settings()["gas-phase"]:
212 | prepinfo["xtb_rrho"]["solvent_key_xtb"] = SolventHelper.get_solvent(
213 | self.get_general_settings()["sm_rrho"],
214 | self.get_general_settings()["solvent"],
215 | )
216 |
217 | for jt in ["xtb_opt", "opt"]:
218 | if jt in jobtype:
219 | prepinfo[jt] = {
220 | "func_name": DfaHelper.get_name(
221 | self.get_settings()["func"], self.get_settings()["prog"]
222 | ),
223 | "func_type": DfaHelper.get_type(self.get_settings()["func"]),
224 | "disp": DfaHelper.get_disp(self.get_settings()["func"]),
225 | "basis": self.get_settings()["basis"],
226 | "grid": self._grid,
227 | "template": self.get_settings()["template"],
228 | "gcp": True,
229 | "optcycles": self.get_settings()["optcycles"],
230 | "hlow": self.get_settings()["hlow"],
231 | "optlevel": self.get_settings()["optlevel"],
232 | "macrocycles": self.get_settings()["macrocycles"],
233 | # "constraints": self.constraints,
234 | # this is set to a path if constraints should be used, otherwise None
235 | }
236 |
237 | # Only look up solvent if solvation is used
238 | if not self.get_general_settings()["gas-phase"]:
239 | prepinfo[jt]["sm"] = self.get_settings()["sm"]
240 | prepinfo[jt]["solvent_key_prog"] = SolventHelper.get_solvent(
241 | self.get_settings()["sm"],
242 | self.get_general_settings()["solvent"],
243 | )
244 |
245 | if (
246 | self.get_settings()["prog"] == "tm"
247 | and prepinfo[jt]["disp"] == "d4"
248 | and prepinfo[jt]["gcp"]
249 | ):
250 | # Basis sets including the following naming patterns should definitely use GCP
251 | gcp_basis_patterns = ["sv", "dz", "tz", "mini", "6-31g(d)"]
252 | if any(
253 | pattern in prepinfo[jt]["basis"]
254 | for pattern in gcp_basis_patterns
255 | ):
256 | logger.warning(
257 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP "
258 | "together with the D4 correction. Switching to D3."
259 | )
260 | prepinfo[jt]["disp"] = DfaHelper.get_disp(
261 | self.get_settings()["func"].replace("d4", "d3")
262 | )
263 | else:
264 | logger.warning(
265 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP "
266 | "together with the D4 correction. Turning off GCP."
267 | )
268 | prepinfo[jt]["gcp"] = False
269 |
270 | break
271 |
272 | return prepinfo
273 |
274 | def _print_update(self) -> None:
275 | print("\n")
276 | print(
277 | "Number of conformers:".ljust(DIGILEN // 2, " ")
278 | + f"{len(self._ensemble.conformers)}"
279 | )
280 |
281 | print(
282 | "Highest ranked conformer:".ljust(DIGILEN // 2, " ")
283 | + f"{self._ensemble.conformers[0].name}"
284 | )
285 | print("\n")
286 |
287 | def _print_comparison(self) -> None:
288 | print(h1(f"{self.name.upper()} RANKING COMPARISON"))
289 |
290 | headers = ["CONF#"]
291 |
292 | parts = [
293 | p for p in self._ensemble.results if issubclass(type(p), EnsembleOptimizer)
294 | ]
295 |
296 | headers.extend([f"ΔGtot {part.name}" for part in parts])
297 |
298 | # column units
299 | units = [
300 | "",
301 | ]
302 |
303 | units.extend(["[kcal/mol]" for _ in range(len(parts))])
304 |
305 | # variables for printmap
306 | gtotmin = {part: 0.0 for part in parts}
307 | for part in parts:
308 | gtotmin[part.name] = min(
309 | part.data["results"][conf.name]["gtot"]
310 | for conf in self._ensemble.conformers
311 | )
312 |
313 | # determines what to print for each conformer in each column
314 | printmap = {
315 | "CONF#": lambda conf: conf.name,
316 | }
317 | for header, part in zip(headers[1:], parts):
318 | # Same lambda bullshittery as in parallel.py/dqp, python needs the lambda kwargs or it will
319 | # use the same values for every lambda call
320 | printmap[header] = (
321 | lambda conf, partl=part, headerl=header: f"{(partl.data['results'][conf.name]['gtot'] - gtotmin[partl.name]) * AU2KCAL:.2f}"
322 | )
323 |
324 | rows = [
325 | [printmap[header](conf) for header in headers]
326 | for conf in self._ensemble.conformers
327 | ]
328 |
329 | lines = format_data(headers, rows, units=units)
330 |
331 | # Print everything
332 | for line in lines:
333 | print(line, flush=True, end="")
334 |
335 | print("".ljust(int(PLENGTH), "-") + "\n")
336 |
--------------------------------------------------------------------------------
/src/censo/ensembleopt/prescreening.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from ..datastructure import MoleculeData
4 | from ..ensembledata import EnsembleData
5 | from ..logging import setup_logger
6 | from ..parallel import execute
7 | from ..params import AU2KCAL, PLENGTH, Config
8 | from ..utilities import format_data, h1, print, DfaHelper, Factory
9 | from .optimizer import EnsembleOptimizer
10 |
11 | logger = setup_logger(__name__)
12 |
13 |
14 | class Prescreening(EnsembleOptimizer):
15 | """
16 | This is supposed to implement a cheap prescreening step using low-cost DFT and possibly
17 | solvation contributions calculated using xtb.
18 |
19 | The list of conformers is then updated using Gtot (only DFT single-point energy if in gas-phase).
20 | """
21 |
22 | _grid = "low"
23 |
24 | _options = {
25 | "threshold": {"default": 4.0},
26 | "func": {
27 | "default": "pbe-d4",
28 | "options": {prog: DfaHelper.get_funcs(prog) for prog in Config.PROGS},
29 | },
30 | "basis": {"default": "def2-SV(P)"},
31 | "prog": {"default": "tm", "options": Config.PROGS},
32 | "gfnv": {"default": "gfn2", "options": Config.GFNOPTIONS},
33 | "run": {"default": True},
34 | "template": {"default": False},
35 | }
36 |
37 | _settings = {}
38 |
39 | def _optimize(self, cut: bool = True) -> None:
40 | """
41 | TODO
42 | """
43 | # set jobtype to pass to handler
44 | # TODO - it is not very nice to partially handle 'Screening' settings here
45 | if self.get_general_settings()["gas-phase"]:
46 | jobtype = ["sp"]
47 | elif self.get_settings().get("implicit", False):
48 | if self.get_settings().get("sm", None) in [
49 | "cosmors",
50 | "cosmors-fine",
51 | ]:
52 | # If cosmors is used as solvent model the gsolv calculation needs to be done explicitely
53 | logger.warning(
54 | "COSMORS detected as solvation model, this requires explicit calculation of ΔGsolv."
55 | )
56 | jobtype = ["gsolv"]
57 | else:
58 | # 'implicit' is a special option of Screening that makes CENSO skip the explicit computation of Gsolv
59 | # Gsolv will still be included in the DFT energy though
60 | jobtype = ["sp"]
61 | elif not self.get_settings().get("implicit", False):
62 | # Only for prescreening the solvation should be calculated with xtb
63 | if self.name == "prescreening":
64 | jobtype = ["xtb_gsolv"]
65 |
66 | # Compile all information required for the preparation of input files in parallel execution step
67 | prepinfo = self._setup_prepinfo(jobtype)
68 |
69 | # compute results
70 | # for structure of results from handler.execute look there
71 | results, failed = execute(
72 | self._ensemble.conformers,
73 | self._dir,
74 | self.get_settings()["prog"],
75 | prepinfo,
76 | jobtype,
77 | copy_mo=self.get_general_settings()["copy_mo"],
78 | balance=self.get_general_settings()["balance"],
79 | retry_failed=self.get_general_settings()["retry_failed"],
80 | )
81 |
82 | # Remove failed conformers
83 | self._ensemble.remove_conformers(failed)
84 |
85 | # Update results
86 | self._update_results(results)
87 |
88 | jobtype = ["sp"]
89 | else:
90 | jobtype = ["gsolv"]
91 |
92 | # Compile all information required for the preparation of input files in parallel execution step
93 | prepinfo = self._setup_prepinfo(jobtype)
94 |
95 | # compute results
96 | # for structure of results from handler.execute look there
97 | results, failed = execute(
98 | self._ensemble.conformers,
99 | self._dir,
100 | self.get_settings()["prog"],
101 | prepinfo,
102 | jobtype,
103 | copy_mo=self.get_general_settings()["copy_mo"],
104 | balance=self.get_general_settings()["balance"],
105 | retry_failed=self.get_general_settings()["retry_failed"],
106 | )
107 |
108 | # Remove failed conformers
109 | self._ensemble.remove_conformers(failed)
110 |
111 | # Update results
112 | self._update_results(results)
113 |
114 | # update results for each conformer
115 | for conf in self._ensemble.conformers:
116 | # calculate free enthalpy
117 | self.data["results"][conf.name]["gtot"] = self._gsolv(conf)
118 |
119 | # calculate boltzmann weights from gtot values calculated here
120 | self._update_results(self._calc_boltzmannweights())
121 |
122 | if cut:
123 | print("\n")
124 | # update conformers with threshold
125 | threshold = self.get_settings()["threshold"] / AU2KCAL
126 | limit = min(
127 | self.data["results"][conf.name]["gtot"]
128 | for conf in self._ensemble.conformers
129 | )
130 | filtered = list(
131 | filter(
132 | lambda conf: self.data["results"][conf.name]["gtot"] - limit
133 | > threshold,
134 | self._ensemble.conformers,
135 | )
136 | )
137 |
138 | # update the conformer list in ensemble (remove confs if below threshold)
139 | self._ensemble.remove_conformers([conf.name for conf in filtered])
140 | for conf in filtered:
141 | print(f"No longer considering {conf.name}.")
142 |
143 | def _gsolv(self, conf: MoleculeData) -> float:
144 | """
145 | Prescreening key for conformer sorting
146 | Calculates Gtot = E (DFT) + Gsolv (xtb) for a given conformer
147 | """
148 |
149 | # Gtot = E (DFT) + Gsolv (xtb)
150 | if not self.get_general_settings()["gas-phase"]:
151 | gtot = (
152 | self.data["results"][conf.name]["sp"]["energy"]
153 | + self.data["results"][conf.name]["xtb_gsolv"]["gsolv"]
154 | )
155 | else:
156 | gtot = self.data["results"][conf.name]["sp"]["energy"]
157 |
158 | return gtot
159 |
160 | def _write_results(self) -> None:
161 | """
162 | writes:
163 | E (xtb),
164 | δE (xtb),
165 | G_solv (xtb),
166 | δG_solv,
167 |
168 | E(DFT),
169 | δE(DFT),
170 |
171 | E(DFT) + G_solv,
172 | δ(E(DFT) + G_solv)
173 |
174 | also writes data in easily digestible format
175 | """
176 | print(h1(f"{self.name.upper()} SINGLE-POINT RESULTS"))
177 |
178 | # column headers
179 | headers = [
180 | "CONF#",
181 | "E (xTB)",
182 | "ΔE (xTB)",
183 | "E (DFT)",
184 | "ΔE (DFT)",
185 | "ΔGsolv (xTB)",
186 | # "δΔGsolv",
187 | "Gtot",
188 | "ΔGtot",
189 | "Boltzmann weight",
190 | ]
191 |
192 | # column units
193 | units = [
194 | "",
195 | "[Eh]",
196 | "[kcal/mol]",
197 | "[Eh]",
198 | "[kcal/mol]",
199 | "[kcal/mol]",
200 | # "[kcal/mol]",
201 | "[Eh]",
202 | "[kcal/mol]",
203 | f"% at {self.get_general_settings().get('temperature', 298.15)} K",
204 | ]
205 |
206 | # variables for printmap
207 | # minimal xtb single-point energy
208 | if all(
209 | "xtb_gsolv" in self.data["results"][conf.name]
210 | for conf in self._ensemble.conformers
211 | ):
212 | xtbmin = min(
213 | self.data["results"][conf.name]["xtb_gsolv"]["energy_xtb_gas"]
214 | for conf in self._ensemble.conformers
215 | )
216 |
217 | # minimal dft single-point energy
218 | dft_energies = (
219 | {
220 | conf.name: self.data["results"][conf.name]["sp"]["energy"]
221 | for conf in self._ensemble.conformers
222 | }
223 | if not all(
224 | "gsolv" in self.data["results"][conf.name]
225 | for conf in self._ensemble.conformers
226 | )
227 | else {
228 | conf.name: self.data["results"][conf.name]["gsolv"]["energy_gas"]
229 | for conf in self._ensemble.conformers
230 | }
231 | )
232 |
233 | dftmin = min(dft_energies.values())
234 |
235 | # minimal solvation free enthalpy
236 | if self.get_general_settings()["gas-phase"]:
237 | gsolvmin = 0.0
238 | else:
239 | # NOTE: there might still be an error if a (xtb_)gsolv calculation failed for a conformer, therefore this should be handled before this step
240 | if all(
241 | "xtb_gsolv" in self.data["results"][conf.name]
242 | for conf in self._ensemble.conformers
243 | ):
244 | gsolvmin = min(
245 | self.data["results"][conf.name]["xtb_gsolv"]["gsolv"]
246 | for conf in self._ensemble.conformers
247 | )
248 | elif all(
249 | "gsolv" in self.data["results"][conf.name]
250 | for conf in self._ensemble.conformers
251 | ):
252 | gsolvmin = min(
253 | self.data["results"][conf.name]["gsolv"]["gsolv"]
254 | for conf in self._ensemble.conformers
255 | )
256 | else:
257 | raise RuntimeError(
258 | "The calculations should have used implicit or additive solvation for all conformers, "
259 | "but it is missing for at least some conformers."
260 | )
261 |
262 | # minimal total free enthalpy
263 | gtotmin = min(self._gsolv(conf) for conf in self._ensemble.conformers)
264 |
265 | # determines what to print for each conformer in each column
266 | printmap = {
267 | "CONF#": lambda conf: conf.name,
268 | "E (xTB)": lambda conf: (
269 | f"{self.data['results'][conf.name]['xtb_gsolv']['energy_xtb_gas']:.6f}"
270 | if "xtb_gsolv" in self.data["results"][conf.name]
271 | else "---"
272 | ),
273 | "ΔE (xTB)": lambda conf: (
274 | f"{(self.data['results'][conf.name]['xtb_gsolv']['energy_xtb_gas'] - xtbmin) * AU2KCAL:.2f}"
275 | if "xtb_gsolv" in self.data["results"][conf.name]
276 | else "---"
277 | ),
278 | "E (DFT)": lambda conf: f"{dft_energies[conf.name]:.6f}",
279 | "ΔE (DFT)": lambda conf: f"{(dft_energies[conf.name] - dftmin) * AU2KCAL:.2f}",
280 | "ΔGsolv (xTB)": lambda conf: (
281 | f"{self.data['results'][conf.name]['xtb_gsolv']['gsolv'] * AU2KCAL:.6f}"
282 | if "xtb_gsolv" in self.data["results"][conf.name]
283 | else "---"
284 | ),
285 | "Gtot": lambda conf: f"{self._gsolv(conf):.6f}",
286 | # "δΔGsolv": lambda conf: f"{(self.data["results"][conf.name]['xtb_gsolv']['gsolv'] - gsolvmin) * AU2KCAL:.2f}"
287 | # if "xtb_gsolv" in self.data["results"][conf.name].keys()
288 | # else "---",
289 | "ΔGtot": lambda conf: f"{(self._gsolv(conf) - gtotmin) * AU2KCAL:.2f}",
290 | "Boltzmann weight": lambda conf: f"{self.data['results'][conf.name]['bmw'] * 100:.2f}",
291 | }
292 |
293 | rows = [
294 | [printmap[header](conf) for header in headers]
295 | for conf in self._ensemble.conformers
296 | ]
297 |
298 | lines = format_data(headers, rows, units=units)
299 |
300 | # list the averaged free enthalpy of the ensemble
301 | lines.append(
302 | "\nBoltzmann averaged free energy/enthalpy of ensemble on input geometries (not DFT optimized):\n"
303 | )
304 | lines.append(
305 | f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} {'avG(T) /a.u.':>14}\n"
306 | )
307 |
308 | # calculate averaged free enthalpy
309 | avG = sum(
310 | [
311 | self.data["results"][conf.name]["bmw"]
312 | * self.data["results"][conf.name]["gtot"]
313 | for conf in self._ensemble.conformers
314 | ]
315 | )
316 |
317 | # calculate averaged free energy
318 | avE = sum(
319 | [
320 | self.data["results"][conf.name]["bmw"]
321 | * self.data["results"][conf.name]["sp"]["energy"]
322 | for conf in self._ensemble.conformers
323 | ]
324 | )
325 |
326 | # append the lines for the free energy/enthalpy
327 | lines.append(
328 | f"{self.get_general_settings().get('temperature', 298.15):^15} {avE:>14.7f} {avG:>14.7f} <<==part0==\n"
329 | )
330 | lines.append("".ljust(int(PLENGTH), "-"))
331 |
332 | # lines.append(f">>> END of {self.__class__.__name__} <<<".center(PLENGTH, " ") + "\n")
333 |
334 | # Print everything
335 | for line in lines:
336 | print(line, flush=True, end="")
337 |
338 | # write everything to a file
339 | filename = f"{self._part_nos[self.name]}_{self.name.upper()}.out"
340 | logger.debug(f"Writing to {os.path.join(os.getcwd(), filename)}.")
341 | with open(os.path.join(os.getcwd(), filename), "w", newline=None) as outfile:
342 | outfile.writelines(lines)
343 |
344 | # Additionally, write results in json format
345 | self._write_json()
346 |
347 |
348 | Factory.register_builder("prescreening", Prescreening)
349 |
--------------------------------------------------------------------------------
/src/censo/ensembleopt/refinement.py:
--------------------------------------------------------------------------------
1 | import os
2 | from ..logging import setup_logger
3 | from ..parallel import execute
4 | from ..params import AU2KCAL, PLENGTH, Config
5 | from ..utilities import format_data, h1, print, DfaHelper, Factory
6 | from .prescreening import Prescreening
7 | from .screening import Screening
8 | from .optimization import Optimization
9 |
10 | logger = setup_logger(__name__)
11 |
12 |
13 | class Refinement(Screening):
14 | """
15 | Similar to Screening, however here we use a Boltzmann population cutoff instead of kcal cutoff.
16 | """
17 |
18 | _grid = "high+"
19 |
20 | __solv_mods = {prog: Config.SOLV_MODS[prog] for prog in Config.PROGS}
21 | # __gsolv_mods = reduce(lambda x, y: x + y, GConfig.SOLV_MODS.values())
22 |
23 | _options = {
24 | "threshold": {"default": 0.95},
25 | "func": {
26 | "default": "wb97x-v",
27 | "options": {prog: DfaHelper.get_funcs(prog) for prog in Config.PROGS},
28 | },
29 | "basis": {"default": "def2-TZVP"},
30 | "prog": {"default": "tm", "options": Config.PROGS},
31 | "sm": {"default": "cosmors", "options": __solv_mods},
32 | "gfnv": {"default": "gfn2", "options": Config.GFNOPTIONS},
33 | "run": {"default": True},
34 | "implicit": {"default": False},
35 | "template": {"default": False},
36 | }
37 |
38 | _settings = {}
39 |
40 | def _optimize(self, cut: bool = True) -> None:
41 | Prescreening._optimize(self, cut=False)
42 |
43 | if self.get_general_settings()["evaluate_rrho"]:
44 | # Check if evaluate_rrho, then check if optimization was run and use that value, otherwise do xtb_rrho
45 | if not any(type(p) is Optimization for p in self._ensemble.results):
46 | jobtype = ["xtb_rrho"]
47 | prepinfo = self._setup_prepinfo(jobtype)
48 |
49 | # append results to previous results
50 | results, failed = execute(
51 | self._ensemble.conformers,
52 | self._dir,
53 | self.get_settings()["prog"],
54 | prepinfo,
55 | jobtype,
56 | copy_mo=self.get_general_settings()["copy_mo"],
57 | balance=self.get_general_settings()["balance"],
58 | retry_failed=self.get_general_settings()["retry_failed"],
59 | )
60 |
61 | # Remove failed conformers
62 | self._ensemble.remove_conformers(failed)
63 |
64 | # Update results
65 | self._update_results(results)
66 |
67 | for conf in self._ensemble.conformers:
68 | # calculate new gtot including RRHO contribution
69 | self.data["results"][conf.name]["gtot"] = self._grrho(conf)
70 | else:
71 | # Use values from most recent optimization rrho
72 | using_part = [
73 | p for p in self._ensemble.results if type(p) is Optimization
74 | ][-1]
75 |
76 | for conf in self._ensemble.conformers:
77 | self.data["results"][conf.name]["xtb_rrho"] = using_part.data[
78 | "results"
79 | ][conf.name]["xtb_rrho"]
80 | self.data["results"][conf.name]["gtot"] = self._grrho(conf)
81 |
82 | # calculate boltzmann weights from gtot values calculated here
83 | # trying to get temperature from instructions, set it to room temperature if that fails for some reason
84 | self._update_results(self._calc_boltzmannweights())
85 |
86 | if cut:
87 | # Get Boltzmann population threshold from settings
88 | threshold = self.get_settings()["threshold"]
89 |
90 | # Update ensemble using Boltzman population threshold
91 | filtered = [
92 | conf.name
93 | for conf in sorted(
94 | self._ensemble.conformers,
95 | key=lambda x: self.data["results"][x.name]["gtot"],
96 | )
97 | ]
98 | total_bmw = 0
99 |
100 | for confname in filtered:
101 | if total_bmw > threshold:
102 | break
103 | total_bmw += self.data["results"][confname]["bmw"]
104 | filtered.remove(confname)
105 |
106 | # Remove conformers
107 | self._ensemble.remove_conformers(filtered)
108 | for confname in filtered:
109 | print(f"No longer considering {confname}.")
110 |
111 | # Recalculate boltzmann weights after cutting down the ensemble
112 | self._update_results(self._calc_boltzmannweights())
113 |
114 | def _write_results(self) -> None:
115 | """
116 | Additional write function in case RRHO is used.
117 | Write the results to a file in formatted way. This is appended to the first file.
118 | writes (2):
119 | G (xtb),
120 | δG (xtb),
121 | E (DFT),
122 | δGsolv (DFT),
123 | Grrho,
124 | Gtot,
125 | δGtot
126 |
127 | Also writes them into an easily digestible format.
128 | """
129 | print(h1(f"{self.name.upper()} SINGLE-POINT (+ mRRHO) RESULTS"))
130 |
131 | # column headers
132 | headers = [
133 | "CONF#",
134 | "E (DFT)",
135 | "ΔGsolv",
136 | "GmRRHO",
137 | "Gtot",
138 | "ΔGtot",
139 | "Boltzmann weight",
140 | ]
141 |
142 | # column units
143 | units = [
144 | "",
145 | "[Eh]",
146 | "[Eh]",
147 | "[Eh]",
148 | "[Eh]",
149 | "[kcal/mol]",
150 | f"% at {self.get_general_settings().get('temperature', 298.15)} K",
151 | ]
152 |
153 | # minimal gtot from E(DFT), Gsolv and GmRRHO
154 | gtotmin = min(
155 | self.data["results"][conf.name]["gtot"]
156 | for conf in self._ensemble.conformers
157 | )
158 |
159 | # collect all dft single point energies
160 | dft_energies = (
161 | {
162 | conf.name: self.data["results"][conf.name]["sp"]["energy"]
163 | for conf in self._ensemble.conformers
164 | }
165 | if not all(
166 | "gsolv" in self.data["results"][conf.name]
167 | for conf in self._ensemble.conformers
168 | )
169 | else {
170 | conf.name: self.data["results"][conf.name]["gsolv"]["energy_gas"]
171 | for conf in self._ensemble.conformers
172 | }
173 | )
174 |
175 | printmap = {
176 | "CONF#": lambda conf: conf.name,
177 | "E (DFT)": lambda conf: f"{dft_energies[conf.name]:.6f}",
178 | "ΔGsolv": lambda conf: (
179 | f"{self._gsolv(conf) - dft_energies[conf.name]:.6f}"
180 | if "gsolv" in self.data["results"][conf.name]
181 | else "---"
182 | ),
183 | "GmRRHO": lambda conf: (
184 | f"{self.data['results'][conf.name]['xtb_rrho']['gibbs'][self.get_general_settings()['temperature']]:.6f}"
185 | if self.get_general_settings()["evaluate_rrho"]
186 | else "---"
187 | ),
188 | "Gtot": lambda conf: f"{self.data['results'][conf.name]['gtot']:.6f}",
189 | "ΔGtot": lambda conf: f"{(self.data['results'][conf.name]['gtot'] - gtotmin) * AU2KCAL:.2f}",
190 | "Boltzmann weight": lambda conf: f"{self.data['results'][conf.name]['bmw'] * 100:.2f}",
191 | }
192 |
193 | rows = [
194 | [printmap[header](conf) for header in headers]
195 | for conf in self._ensemble.conformers
196 | ]
197 |
198 | lines = format_data(headers, rows, units=units)
199 |
200 | # list the averaged free enthalpy of the ensemble
201 | lines.append(
202 | "\nBoltzmann averaged free energy/enthalpy of ensemble (high level single-points):\n"
203 | )
204 | lines.append(
205 | f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} {'avG(T) /a.u.':>14}\n"
206 | )
207 |
208 | # calculate averaged free enthalpy
209 | avG = sum(
210 | self.data["results"][conf.name]["bmw"]
211 | * self.data["results"][conf.name]["gtot"]
212 | for conf in self._ensemble.conformers
213 | )
214 |
215 | # calculate averaged free energy
216 | avE = (
217 | sum(
218 | self.data["results"][conf.name]["bmw"]
219 | * self.data["results"][conf.name]["sp"]["energy"]
220 | for conf in self._ensemble.conformers
221 | )
222 | if all(
223 | "sp" in self.data["results"][conf.name]
224 | for conf in self._ensemble.conformers
225 | )
226 | else sum(
227 | self.data["results"][conf.name]["bmw"]
228 | * self.data["results"][conf.name]["gsolv"]["energy_gas"]
229 | for conf in self._ensemble.conformers
230 | )
231 | )
232 |
233 | # append the lines for the free energy/enthalpy
234 | lines.append(
235 | f"{self.get_general_settings().get('temperature', 298.15):^15} {avE:>14.7f} {avG:>14.7f} <<==part3==\n"
236 | )
237 | lines.append("".ljust(int(PLENGTH), "-") + "\n\n")
238 |
239 | # Print everything
240 | for line in lines:
241 | print(line, flush=True, end="")
242 |
243 | # append lines to already existing file
244 | filename = f"{self._part_nos[self.name]}_{self.name.upper()}.out"
245 | logger.debug(f"Writing to {os.path.join(os.getcwd(), filename)}.")
246 | with open(os.path.join(os.getcwd(), filename), "a", newline=None) as outfile:
247 | outfile.writelines(lines)
248 |
249 | # Additionally, write the results to a json file
250 | self._write_json()
251 |
252 |
253 | Factory.register_builder("refinement", Refinement)
254 |
--------------------------------------------------------------------------------
/src/censo/logging.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import sys
4 |
5 | __logpath: str = os.path.join(os.getcwd(), "censo.log")
6 | __loglevel = logging.INFO
7 |
8 | __loggers = []
9 |
10 | # _loglevel = logging.DEBUG
11 |
12 |
13 | def setup_logger(name: str, silent: bool = True) -> logging.Logger:
14 | """
15 | Initializes and configures a logger with the specified name.
16 |
17 | Args:
18 | name (str): The name of the logger.
19 | silent (bool, optional): Whether to print logpath or not. Defaults to True.
20 |
21 | Returns:
22 | logging.Logger: The configured logger instance.
23 | """
24 | if not silent:
25 | print(f"LOGFILE CAN BE FOUND AT: {__logpath}")
26 |
27 | # Create a logger instance with the specified name
28 | logger = logging.getLogger(name)
29 | logger.setLevel(__loglevel)
30 |
31 | # Create a FileHandler to log messages to the logpath file
32 | handler = logging.FileHandler(__logpath)
33 | stream_handler = logging.StreamHandler(sys.stdout)
34 | stream_handler.setLevel(logging.WARNING)
35 |
36 | # Define the log message format
37 | formatter = logging.Formatter(
38 | "{asctime:24s}-{name:^24s}-{levelname:^10s}- {message}", style="{"
39 | )
40 | stream_formatter = logging.Formatter("{levelname:^10s}- {message}", style="{")
41 | handler.setFormatter(formatter)
42 | stream_handler.setFormatter(stream_formatter)
43 |
44 | # Add the FileHandler and StreamHandler to the logger
45 | logger.addHandler(handler)
46 | logger.addHandler(stream_handler)
47 |
48 | __loggers.append(logger)
49 |
50 | return logger
51 |
52 |
53 | def set_loglevel(loglevel: str | int) -> None:
54 | """
55 | Set the log level for the logger.
56 |
57 | Args:
58 | loglevel (str | int): The log level to set. In case of a string this will get the respective attr
59 | from logging..
60 |
61 | Returns:
62 | None
63 | """
64 | global __loglevel
65 |
66 | if isinstance(loglevel, str):
67 | __loglevel = getattr(logging, loglevel)
68 | else:
69 | __loglevel = loglevel
70 |
71 | for logger in __loggers:
72 | logger.setLevel(__loglevel)
73 |
--------------------------------------------------------------------------------
/src/censo/parallel.py:
--------------------------------------------------------------------------------
1 | """
2 | Performs the parallel execution of the QM calls.
3 | """
4 |
5 | import multiprocessing
6 | import signal
7 | from concurrent.futures import ProcessPoolExecutor, as_completed
8 |
9 | from .datastructure import MoleculeData, ParallelJob
10 | from .logging import setup_logger
11 | from .params import Config
12 | from .qm_processor import QmProc
13 | from .tm_processor import TmProc
14 | from .utilities import Factory
15 |
16 | logger = setup_logger(__name__)
17 |
18 |
19 | def execute(
20 | conformers: list[MoleculeData],
21 | workdir: str,
22 | prog: str,
23 | prepinfo: dict[str, dict],
24 | jobtype: list[str],
25 | copy_mo: bool = False,
26 | retry_failed: bool = False,
27 | balance: bool = True,
28 | ) -> tuple[dict, list]:
29 | """
30 | Manages parallel execution of external program calls. Sets cores used per job, checks requirements,
31 | can copy MO-files, and retry failed jobs.
32 |
33 | Args:
34 | conformers (list[MoleculeData]): List of conformers for which jobs will be created and executed.
35 | workdir (str): Working directory.
36 | prog (str): Name of the program to be used.
37 | copy_mo (bool, optional): Whether to copy the MO-files from the previous calculation.
38 | retry_failed (bool, optional): Whether to retry failed jobs.
39 | balance (bool, optional): Whether to balance the number of cores used per job.
40 | maxcores (int, optional): Maximum number of cores to be used.
41 | omp (int, optional): Number of cores to be used per job.
42 | update (bool, optional): Wether to update the results dict for each conformer.
43 |
44 | Returns:
45 | tuple[dict, list]: Dictionary containing the results for each conformer and a list of unrecoverable conformers.
46 | """
47 |
48 | def prepare_jobs(
49 | conformers: list[MoleculeData], prepinfo: dict[str, dict], jobtype: list[str]
50 | ) -> list[ParallelJob]:
51 | # create jobs from conformers
52 | jobs = [ParallelJob(conf.geom, jobtype) for conf in conformers]
53 |
54 | # put settings into jobs
55 | for job in jobs:
56 | job.prepinfo.update(prepinfo)
57 |
58 | return jobs
59 |
60 | def reduce_cores(
61 | free_cores: multiprocessing.Value,
62 | omp: int,
63 | enough_cores: multiprocessing.Condition,
64 | ):
65 | # acquire lock on the condition and wait until enough cores are available
66 | with enough_cores:
67 | enough_cores.wait_for(lambda: free_cores.value >= omp)
68 | free_cores.value -= omp
69 | logger.debug(
70 | f"Free cores decreased {free_cores.value + omp} -> {free_cores.value}."
71 | )
72 |
73 | def increase_cores(
74 | free_cores: multiprocessing.Value,
75 | omp: int,
76 | enough_cores: multiprocessing.Condition,
77 | ):
78 | # acquire lock on the condition and increase the number of cores, notifying one waiting process
79 | with enough_cores:
80 | free_cores.value += omp
81 | logger.debug(
82 | f"Free cores increased {free_cores.value - omp} -> {free_cores.value}."
83 | )
84 | enough_cores.notify()
85 |
86 | def handle_sigterm(signum, frame, executor):
87 | logger.critical("Received SIGTERM. Terminating.")
88 | executor.shutdown(wait=False)
89 |
90 | def dqp(jobs: list[ParallelJob], processor: QmProc) -> list[ParallelJob]:
91 | """
92 | D ynamic Q ueue P rocessing
93 | """
94 |
95 | with multiprocessing.Manager() as manager:
96 | # execute calculations for given list of conformers
97 | with ProcessPoolExecutor(
98 | max_workers=Config.NCORES // min(job.omp for job in jobs)
99 | ) as executor:
100 | # make sure that the executor exits gracefully on termination
101 | # TODO - is using wait=False a good option here?
102 | # should be fine since workers will kill programs with SIGTERM
103 | # wait=True leads to the workers waiting for their current task to be finished before terminating
104 | # Register the signal handler
105 | signal.signal(
106 | signal.SIGTERM,
107 | lambda signum, frame: handle_sigterm(signum, frame, executor),
108 | )
109 |
110 | # define shared variables that can be safely asynchronously accessed
111 | free_cores = manager.Value(int, Config.NCORES)
112 | enough_cores = manager.Condition()
113 |
114 | # sort the jobs by the number of cores used
115 | # (the first item will be the one with the lowest number of cores)
116 | jobs.sort(key=lambda x: x.omp)
117 |
118 | tasks = []
119 | for i in range(len(jobs)):
120 | # TODO - something to readjust omp based on expected time to finish and the timings of other jobs
121 | # try to reduce the number of cores by job.omp, if there are not enough cores available we wait
122 | reduce_cores(free_cores, jobs[i].omp, enough_cores)
123 |
124 | try:
125 | # submit the job
126 | tasks.append(executor.submit(processor.run, jobs[i]))
127 | # NOTE: explanation of the lambda: the first argument passed to the done_callback is always the future
128 | # itself, it is not assigned (_), the second parameter is the number of openmp threads of the job (i.e.
129 | # job.omp) if this is not specified like this (omp=jobs[i].omp) the done_callback will instead use the
130 | # omp of the current item in the for-iterator (e.g. the submitted job has omp=4, but the current jobs[i]
131 | # has omp=7, so the callback would use 7 instead of 4)
132 | tasks[-1].add_done_callback(
133 | lambda _, omp=jobs[i].omp: increase_cores(
134 | free_cores, omp, enough_cores
135 | )
136 | )
137 | except RuntimeError:
138 | # Makes this exit gracefully in case that the main process is killed
139 | return None
140 |
141 | # wait for all jobs to finish and collect results
142 | try:
143 | results = [task.result() for task in as_completed(tasks)]
144 | except Exception as exc:
145 | raise exc
146 |
147 | return results
148 |
149 | def set_omp_chunking(jobs: list[ParallelJob]) -> None:
150 | """
151 | Determines and sets the number of cores that are supposed to be used for every job.
152 | This method is efficient if it can be assumed that the jobs take roughly the same amount of time each.
153 | Each job shouldn't use less than Config.OMPMIN cores.
154 | """
155 | # Get the total number of jobs
156 | jobs_left, tot_jobs = len(jobs), len(jobs)
157 |
158 | # Calculate the maximum and minimum number of processes (number of jobs that can be executed simultaneously)
159 | maxprocs = (
160 | Config.NCORES // Config.OMPMIN
161 | ) # Calculate the maximum number of processes
162 | # Calculate the minimum number of processes
163 | minprocs = max(1, Config.NCORES // Config.OMPMAX)
164 |
165 | # Loop until all jobs are distributed
166 | while jobs_left > 0:
167 | if jobs_left >= maxprocs:
168 | p = maxprocs # Set the number of processes to the maximum if there are enough jobs left
169 | elif minprocs <= jobs_left < maxprocs:
170 | # Find the largest number of processes that evenly divides the remaining jobs
171 | p = max(
172 | [
173 | j
174 | for j in range(minprocs, maxprocs)
175 | if Config.NCORES % j == 0 and j <= jobs_left
176 | ]
177 | )
178 | else:
179 | # There are not enough jobs left for at least minprocs processes
180 | for job in jobs[tot_jobs - jobs_left : tot_jobs]:
181 | job.omp = (
182 | Config.NCORES // minprocs
183 | ) # Set the number of cores for each job to the maximum value
184 | jobs_left -= jobs_left
185 | continue
186 |
187 | # Set the number of cores for each job for as many jobs as possible before moving onto the next omp value
188 | while jobs_left - p >= 0:
189 | for job in jobs[tot_jobs - jobs_left : tot_jobs - jobs_left + p]:
190 | job.omp = Config.NCORES // p # Set the number of cores for each job
191 | jobs_left -= p # Decrement the number of remaining jobs
192 |
193 | def retry_failed_jobs(
194 | jobs: list[ParallelJob], processor: QmProc, balance: bool
195 | ) -> tuple[list[int], list[str]]:
196 | """
197 | Tries to recover failed jobs.
198 |
199 | Args:
200 | jobs (list[ParallelJob]): List of jobs.
201 | processor (QmProc): Processor object.
202 |
203 | Returns:
204 | tuple[list[int], list[str]]: List of indices of jobs that should be retried, list of names of conformers
205 | that could not be recovered.
206 | """
207 | # determine failed jobs
208 | logger.debug("Checking for failed jobs...")
209 | failed_jobs = [
210 | i
211 | for i, job in enumerate(jobs)
212 | if any(not job.meta[jt]["success"] for jt in job.jobtype)
213 | ]
214 |
215 | if len(failed_jobs) != 0:
216 | # create a new list of failed jobs that should be restarted with special flags
217 | # contains jobs that should be retried (depends on wether the error can be handled or not)
218 | retry = []
219 |
220 | # determine flags for jobs based on error messages
221 | for failed_job in failed_jobs:
222 | handled_errors = ["scf_not_converged", "Previous calculation failed"]
223 |
224 | # list of jobtypes that should be removed from the jobtype list
225 | jtremove = []
226 | for jt in jobs[failed_job].jobtype:
227 | if not jobs[failed_job].meta[jt]["success"]:
228 | if jobs[failed_job].meta[jt]["error"] in handled_errors:
229 | retry.append(failed_job)
230 | jobs[failed_job].flags[jt] = jobs[failed_job].meta[jt][
231 | "error"
232 | ]
233 | # store all successful jobtypes to be removed later
234 | elif jobs[failed_job].meta[jt]["success"]:
235 | jtremove.append(jt)
236 |
237 | # remove all successful jobs from jobtype to avoid re-execution
238 | for jt in jtremove:
239 | jobs[failed_job].jobtype.remove(jt)
240 |
241 | # execute jobs that should be retried
242 | logger.info(
243 | f"Number of failed jobs: {len(failed_jobs)}. Restarting {len(retry)} jobs."
244 | )
245 |
246 | if len(retry) > 0:
247 | # Rebalancing necessary
248 | if balance:
249 | set_omp_chunking([jobs[i] for i in retry])
250 |
251 | for i, job in zip(
252 | [i for i in retry], dqp([jobs[i] for i in retry], processor)
253 | ):
254 | jobs[i] = job
255 |
256 | # any jobs that still failed will lead to the conformer being marked as unrecoverable
257 | failed_confs = []
258 | for job in jobs:
259 | if not all(job.meta[jt]["success"] for jt in job.jobtype):
260 | logger.warning(
261 | f"{job.conf.name} job recovery failed. Error: {job.meta[jt]['error']}. Check output files."
262 | )
263 | failed_confs.append(job.conf.name)
264 | else:
265 | logger.info(f"Successfully retried job for {job.conf.name}.")
266 | else:
267 | retry = []
268 | failed_confs = []
269 | logger.info("All jobs executed successfully.")
270 |
271 | return retry, failed_confs
272 |
273 | # Check first if there are any conformers at all
274 | try:
275 | assert len(conformers) > 0
276 | except AssertionError as e:
277 | raise AssertionError("No jobs to compute!") from e
278 |
279 | # Create jobs from conformers data
280 | jobs = prepare_jobs(conformers, prepinfo, jobtype)
281 |
282 | # initialize the processor for the respective program
283 | processor = Factory.create(
284 | prog,
285 | workdir,
286 | )
287 |
288 | # processor.check_requirements(jobs)
289 |
290 | # Set processor to copy the MO-files
291 | processor.copy_mo = copy_mo
292 |
293 | # check for the most recent mo files for each conformer
294 | # TODO - how would this work when multiple different programs are supported?
295 | for job in jobs:
296 | try:
297 | job.mo_guess = next(
298 | c for c in conformers if c.name == job.conf.name
299 | ).mo_paths[-1]
300 | except IndexError:
301 | pass
302 |
303 | # set cores per process for each job
304 | # NOTE: since parallelization in tm is controlled using environment variables we cannot use automatic load balancing
305 | if balance and not isinstance(processor, TmProc):
306 | set_omp_chunking(jobs)
307 | elif balance and isinstance(processor, TmProc):
308 | logger.warning(
309 | "Load balancing 2.0 is not supported for TURBOMOLE. Falling back to old behaviour."
310 | )
311 |
312 | # If there are not enough cores to use omp = Config.OMPMIN (to avoid unnecessary waiting)
313 | if len(jobs) < Config.NCORES // Config.OMPMIN:
314 | omp = Config.NCORES // len(jobs)
315 | # Otherwise try find the largest number of parallel processors p that
316 | # is Config.NCORES // Config.OMPMIN at most and Config.NCORES // Config.OMPMAX at least
317 | # such that at least 75% of processors still work for the remainder jobs
318 | # or the number of jobs can be evenly distributed between the processors
319 | else:
320 | for o in range(Config.OMPMIN, Config.OMPMAX + 1):
321 | p = Config.NCORES // o
322 | if p == 1:
323 | break
324 | if len(jobs) % p >= 0.75 * p or len(jobs) % p == 0:
325 | break
326 | omp = o
327 |
328 | # Configure environment variables
329 | Config.ENVIRON["PARA_ARCH"] = "SMP"
330 | Config.ENVIRON["PARNODES"] = str(omp)
331 |
332 | for job in jobs:
333 | job.omp = omp
334 | else:
335 | omp = Config.OMP
336 | if omp < Config.OMPMIN:
337 | logger.warning(
338 | f"User OMP setting is below the minimum value of {Config.OMPMIN}. Using {Config.OMPMIN} instead."
339 | )
340 | omp = Config.OMPMIN
341 | elif omp > Config.NCORES:
342 | logger.warning(
343 | f"Value of {omp} for OMP is larger than the number of available cores {Config.NCORES}. Using OMP = {Config.NCORES}."
344 | )
345 | omp = Config.NCORES
346 |
347 | for job in jobs:
348 | job.omp = omp
349 |
350 | # execute the jobs
351 | jobs = dqp(jobs, processor)
352 |
353 | # Try to get the mo_path from metadata and store it in the respective conformer object
354 | mo_paths = {job.conf.name: job.meta["mo_path"] for job in jobs}
355 | for conf in conformers:
356 | if mo_paths[conf.name] is not None:
357 | conf.mo_paths.append(mo_paths[conf.name])
358 |
359 | failed_confs = []
360 | if retry_failed:
361 | retried, failed_confs = retry_failed_jobs(jobs, processor, balance)
362 |
363 | # Again, try to get the mo_path from metadata and store it in the respective conformer object
364 | mo_paths = {
365 | job.conf.name: job.meta["mo_path"] for job in [jobs[i] for i in retried]
366 | }
367 | for conf in conformers:
368 | if mo_paths.get(conf.name, None) is not None:
369 | conf.mo_paths.append(mo_paths[conf.name])
370 |
371 | # RuntimeError if all jobs failed
372 | if len(jobs) == len(failed_confs):
373 | raise RuntimeError(
374 | "Parallel execution of all jobs failed and could not be recovered!"
375 | )
376 |
377 | # e.g. {"CONF23": {"sp": {"energy": 1231.5}, ...}}
378 | return {job.conf.name: job.results for job in jobs}, failed_confs
379 |
--------------------------------------------------------------------------------
/src/censo/properties/__init__.py:
--------------------------------------------------------------------------------
1 | from .nmr import NMR
2 | from .uvvis import UVVis
3 | from .property_calculator import PropertyCalculator
4 |
--------------------------------------------------------------------------------
/src/censo/properties/property_calculator.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains boilerplate class for calculating ensemble properties.
3 | """
4 |
5 | from ..logging import setup_logger
6 | from ..part import CensoPart
7 | from ..utilities import timeit, SolventHelper
8 | from ..datastructure import MoleculeData
9 | from ..ensembleopt import EnsembleOptimizer
10 |
11 | logger = setup_logger(__name__)
12 |
13 |
14 | class PropertyCalculator(CensoPart):
15 | """
16 | Boilerplate class for all property calculations.
17 | """
18 |
19 | _grid = ""
20 |
21 | @timeit
22 | @CensoPart._create_dir
23 | def __call__(self, using_part: CensoPart = None) -> None:
24 | """
25 | Boilerplate run logic for any ensemble property calculation. The 'property' method should be implemented for every
26 | class respectively.
27 |
28 | Running a property calculation requires some kind of ensemble energetic ranking beforehand.
29 |
30 | It is possible to pass a specific part output to determine the Boltzmann populations
31 | """
32 | # print instructions
33 | self._print_info()
34 |
35 | # Set energy values to use later
36 | self._set_energy(using_part=using_part)
37 | for conf in self._ensemble.conformers:
38 | self.data["results"][conf.name]["gtot"] = self._gtot(conf)
39 |
40 | # Calculate Boltzmann populations
41 | self._update_results(self._calc_boltzmannweights())
42 |
43 | # Perform the property calculations
44 | self._property()
45 |
46 | # DONE
47 |
48 | def _output(self) -> None:
49 | """
50 | Implements printouts and writes for any output data.
51 | Necessary to implement for each part.
52 | """
53 | # Write out results
54 | self._write_results()
55 |
56 | def _property(self):
57 | raise NotImplementedError
58 |
59 | def _write_results(self):
60 | raise NotImplementedError
61 |
62 | def _gtot(self, conf: MoleculeData) -> float:
63 | return (
64 | self.data["results"][conf.name]["energy"]
65 | + self.data["results"][conf.name]["gsolv"]
66 | + self.data["results"][conf.name]["grrho"]
67 | )
68 |
69 | def _setup_prepinfo_rrho(self) -> dict[str, dict]:
70 | prepinfo = {}
71 |
72 | prepinfo["partname"] = self.name
73 | prepinfo["charge"] = self._ensemble.runinfo.get("charge")
74 | prepinfo["unpaired"] = self._ensemble.runinfo.get("unpaired")
75 | prepinfo["general"] = self.get_general_settings()
76 |
77 | prepinfo["xtb_rrho"] = {
78 | "gfnv": self.get_settings()["gfnv"],
79 | }
80 | # Only lookup solvent if solvation should be used
81 | if not self.get_general_settings()["gas-phase"]:
82 | prepinfo["xtb_rrho"]["solvent_key_xtb"] = SolventHelper.get_solvent(
83 | self.get_general_settings()["sm_rrho"],
84 | self.get_general_settings()["solvent"],
85 | )
86 |
87 | return prepinfo
88 |
89 | def _set_energy(self, using_part: CensoPart | None = None):
90 | """
91 | Looks through results to set energy values.
92 | Order of preference:
93 | refinement -> optimization -> screening -> prescreening
94 |
95 | If None of these are found, raise RuntimeError.
96 | """
97 | if using_part is None:
98 | # Determine the smallest usable optimization results
99 | # First filter ensemble optimizations
100 | opts = filter(
101 | lambda part: issubclass(type(part), EnsembleOptimizer),
102 | self._ensemble.results,
103 | )
104 | opts = sorted(opts, key=lambda part: part.data["nconf_out"])
105 |
106 | # Get the results with the smallest outputs
107 | opts_iter = iter(opts)
108 | res = next(opts_iter, None)
109 | if res is None:
110 | raise RuntimeError(
111 | "Calculating an ensemble property requires some kind of energetic ensemble ranking performed beforehand."
112 | )
113 |
114 | smallest_results = []
115 | while res.data["nconf_out"] == opts[0].data["nconf_out"]:
116 | smallest_results.append(res)
117 | try:
118 | res = next(opts_iter)
119 | except StopIteration:
120 | break
121 |
122 | # Get the highest (assumed) quality part from those
123 | if len(smallest_results) == 1:
124 | using_part = smallest_results[0]
125 | else:
126 | # This will put the highest quality part at the top (highest part number)
127 | smallest_results.sort(
128 | key=lambda part: self._part_nos[part.name], reverse=True
129 | )
130 | using_part = smallest_results[0]
131 |
132 | # Get the index of this results from the ensemble results
133 | assert using_part is not None
134 | using_part = self._ensemble.results.index(using_part)
135 |
136 | energy_values = {
137 | "prescreening": lambda conf: {
138 | "energy": self._ensemble.results[using_part].data["results"][conf.name][
139 | "sp"
140 | ]["energy"],
141 | "gsolv": (
142 | self._ensemble.results[using_part].data["results"][conf.name][
143 | "xtb_gsolv"
144 | ]["gsolv"]
145 | if "xtb_gsolv"
146 | in self._ensemble.results[using_part].data["results"][conf.name]
147 | else 0.0
148 | ),
149 | "grrho": 0.0,
150 | },
151 | "screening": lambda conf: {
152 | "energy": (
153 | self._ensemble.results[using_part].data["results"][conf.name][
154 | "gsolv"
155 | ]["energy_gas"]
156 | if "gsolv"
157 | in self._ensemble.results[using_part].data["results"][conf.name]
158 | else self._ensemble.results[using_part].data["results"][conf.name][
159 | "sp"
160 | ]["energy"]
161 | ),
162 | "gsolv": (
163 | self._ensemble.results[using_part].data["results"][conf.name][
164 | "gsolv"
165 | ]["gsolv"]
166 | if "gsolv"
167 | in self._ensemble.results[using_part].data["results"][conf.name]
168 | else 0.0
169 | ),
170 | "grrho": self._ensemble.results[using_part]
171 | .data["results"][conf.name]
172 | .get("xtb_rrho", {"energy": 0.0})["energy"],
173 | },
174 | "optimization": lambda conf: {
175 | "energy": self._ensemble.results[using_part].data["results"][conf.name][
176 | "xtb_opt"
177 | ]["energy"],
178 | "gsolv": 0.0,
179 | "grrho": self._ensemble.results[using_part]
180 | .data["results"][conf.name]
181 | .get("xtb_rrho", {"energy": 0.0})["energy"],
182 | },
183 | "refinement": lambda conf: {
184 | "energy": (
185 | self._ensemble.results[using_part].data["results"][conf.name][
186 | "gsolv"
187 | ]["energy_gas"]
188 | if "gsolv"
189 | in self._ensemble.results[using_part].data["results"][conf.name]
190 | else self._ensemble.results[using_part].data["results"][conf.name][
191 | "sp"
192 | ]["energy"]
193 | ),
194 | "gsolv": (
195 | self._ensemble.results[using_part].data["results"][conf.name][
196 | "gsolv"
197 | ]["gsolv"]
198 | if "gsolv"
199 | in self._ensemble.results[using_part].data["results"][conf.name]
200 | else 0.0
201 | ),
202 | "grrho": self._ensemble.results[using_part]
203 | .data["results"][conf.name]
204 | .get("xtb_rrho", {"energy": 0.0})["energy"],
205 | },
206 | }
207 |
208 | for conf in self._ensemble.conformers:
209 | self.data["results"].setdefault(
210 | conf.name, energy_values[self._ensemble.results[using_part].name](conf)
211 | )
212 |
--------------------------------------------------------------------------------
/src/censo/properties/uvvis.py:
--------------------------------------------------------------------------------
1 | """
2 | Calculates the ensemble UV/Vis spectrum.
3 | """
4 |
5 | import json
6 | import os
7 |
8 | from ..parallel import execute
9 | from ..params import Config
10 | from ..utilities import SolventHelper, DfaHelper, format_data, print, Factory
11 | from ..logging import setup_logger
12 | from .property_calculator import PropertyCalculator
13 | from ..part import CensoPart
14 |
15 | logger = setup_logger(__name__)
16 |
17 |
18 | class UVVis(PropertyCalculator):
19 | """
20 | Calculation of the ensemble UV/Vis spectrum of a (previously) optimized ensemble.
21 | Note, that the ensemble will not be modified anymore.
22 | """
23 |
24 | __solv_mods = {
25 | prog: tuple(
26 | t for t in Config.SOLV_MODS[prog] if t not in ("cosmors", "cosmors-fine")
27 | )
28 | for prog in Config.PROGS
29 | }
30 |
31 | _options = {
32 | "prog": {"default": "orca", "options": ["orca"]}, # required
33 | "func": {
34 | "default": "wb97x-d4",
35 | "options": {prog: DfaHelper.get_funcs(prog) for prog in Config.PROGS},
36 | },
37 | "basis": {"default": "def2-TZVP"},
38 | "sm": {"default": "smd", "options": __solv_mods},
39 | "gfnv": {"default": "gfn2", "options": Config.GFNOPTIONS},
40 | "nroots": {"default": 20},
41 | "run": {"default": False}, # required
42 | "template": {"default": False}, # required
43 | }
44 |
45 | _settings = {}
46 |
47 | @classmethod
48 | def _validate(cls, tovalidate: dict[str, any]) -> None:
49 | """
50 | Validates the type of each setting in the given dict. Also potentially validate if the setting is allowed by
51 | checking with cls._options.
52 | This is the part-specific version of the method. It will run the general validation first and then
53 | check part-specific logic.
54 |
55 | Args:
56 | tovalidate (dict[str, any]): The dict containing the settings to be validated.
57 |
58 | Returns:
59 | None
60 |
61 | Raises:
62 | ValueError: If the setting is not allowed or the value is not within the allowed options.
63 | """
64 | # General validation
65 | super()._validate(tovalidate)
66 |
67 | # Part-specific validation
68 | # NOTE: tovalidate is always complete
69 | # Check availability of func for prog
70 | func = tovalidate["func"]
71 | if func not in cls._options["func"]["options"][tovalidate["prog"]]:
72 | raise ValueError(
73 | f"Functional {func} is not available for {tovalidate['prog']}. "
74 | "Check spelling w.r.t. CENSO functional naming convention (case insensitive)."
75 | )
76 |
77 | # Check sm availability for prog
78 | # Remember: tovalidate is always complete so we don't need .get with default None here
79 | sm = tovalidate["sm"]
80 | if sm not in cls._options["sm"]["options"][tovalidate["prog"]]:
81 | raise ValueError(
82 | f"Solvent model {sm} not available for {tovalidate['prog']}."
83 | )
84 |
85 | # Check solvent availability for sm
86 | if (
87 | cls.get_general_settings()["solvent"]
88 | not in CensoPart._options["solvent"]["options"][sm]
89 | ):
90 | raise ValueError(
91 | f"Solvent {cls.get_general_settings()['solvent']} is not available for {sm}. "
92 | )
93 |
94 | # dummy/template functionality not implemented yet for TM
95 | if tovalidate["prog"] == "tm" and (func == "dummy"):
96 | raise NotImplementedError(
97 | "Dummy functionality is not implemented for use with TURBOMOLE."
98 | )
99 |
100 | def _property(self) -> None:
101 | jobtype = ["uvvis"]
102 |
103 | # Compile all information required for the preparation of input files in parallel execution step
104 | prepinfo = self._setup_prepinfo()
105 |
106 | # compute results
107 | # for structure of results from handler.execute look there
108 | results, failed = execute(
109 | self._ensemble.conformers,
110 | self._dir,
111 | self.get_settings()["prog"],
112 | prepinfo,
113 | jobtype,
114 | copy_mo=self.get_general_settings()["copy_mo"],
115 | balance=self.get_general_settings()["balance"],
116 | retry_failed=self.get_general_settings()["retry_failed"],
117 | )
118 |
119 | # Remove failed conformers
120 | self._ensemble.remove_conformers(failed)
121 |
122 | # Update results
123 | self._update_results(results)
124 |
125 | # Ensemble averaging of excitations
126 | self.__excitation_averaging()
127 |
128 | def _setup_prepinfo(self) -> dict[str, dict]:
129 | prepinfo = {}
130 |
131 | prepinfo["partname"] = self.name
132 | prepinfo["charge"] = self._ensemble.runinfo.get("charge")
133 | prepinfo["unpaired"] = self._ensemble.runinfo.get("unpaired")
134 | prepinfo["general"] = self.get_general_settings()
135 |
136 | prepinfo["uvvis"] = {
137 | "func_name": DfaHelper.get_name(
138 | self.get_settings()["func"], self.get_settings()["prog"]
139 | ),
140 | "func_type": DfaHelper.get_type(self.get_settings()["func"]),
141 | "disp": DfaHelper.get_disp(self.get_settings()["func"]),
142 | "basis": self.get_settings()["basis"],
143 | "grid": "high+", # hardcoded grid settings
144 | "template": self.get_settings()["template"],
145 | "gcp": False, # GCP is not necessary for spectra calculations
146 | "nroots": self.get_settings()["nroots"],
147 | }
148 | # Only look up solvent if solvation is used
149 | if not self.get_general_settings()["gas-phase"]:
150 | prepinfo["uvvis"]["sm"] = self.get_settings()["sm"]
151 | prepinfo["uvvis"]["solvent_key_prog"] = SolventHelper.get_solvent(
152 | self.get_settings()["sm"], self.get_general_settings()["solvent"]
153 | )
154 |
155 | return prepinfo
156 |
157 | def __excitation_averaging(self):
158 | """
159 | Calculates population weighted excitation parameters.
160 | """
161 | # Calculate epsilon_max (maximum extinctions) for each excitation, weighted by population
162 | # eps is a list of tuples that contain each excitation wavelength with the respective epsilon_max
163 | eps = []
164 | for conf in self._ensemble.conformers:
165 | for excitation in self.data["results"][conf.name]["uvvis"]["excitations"]:
166 | epsilon_max = (
167 | self.data["results"][conf.name]["bmw"] * excitation["osc_str"]
168 | )
169 | eps.append((excitation["wavelength"], epsilon_max, conf.name))
170 |
171 | # Print table
172 | headers = ["λ", "ε_max", "Origin. CONF#"]
173 |
174 | units = ["[nm]", "", ""]
175 |
176 | printmap = {
177 | "λ": lambda exc: f"{exc[0]:.2f}",
178 | "ε_max": lambda exc: f"{exc[1]:.6f}",
179 | "Origin. CONF#": lambda exc: f"{exc[2]}",
180 | }
181 |
182 | rows = [[printmap[header](exc) for header in headers] for exc in eps]
183 |
184 | lines = format_data(headers, rows, units=units)
185 |
186 | # Print everything
187 | for line in lines:
188 | print(line, flush=True, end="")
189 |
190 | # write lines to file
191 | logger.debug(
192 | f"Writing to {os.path.join(os.getcwd(), f'{self._part_nos[self.name]}_{self.name.upper()}.out')}."
193 | )
194 | with open(
195 | os.path.join(
196 | os.getcwd(),
197 | f"{self._part_nos[self.name]}_{self.name.upper()}.out",
198 | ),
199 | "w",
200 | newline=None,
201 | ) as outfile:
202 | outfile.writelines(lines)
203 |
204 | # Dump data into json
205 | with open(os.path.join(os.getcwd(), "excitations.json"), "w") as f:
206 | json.dump(eps, f, indent=4)
207 |
208 | def _write_results(self) -> None:
209 | """
210 | Write result excitations to files.
211 | """
212 | # Write results to json file
213 | self._write_json()
214 |
215 |
216 | Factory.register_builder("uvvis", UVVis)
217 |
--------------------------------------------------------------------------------
/src/censo/utilities.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions which are used in the CENSO modules. From creating folders to
3 | printout routines.
4 | """
5 |
6 | import functools
7 | import hashlib
8 | import json
9 | import os
10 | import time
11 | import re
12 | from builtins import print as print_orig
13 | from collections import OrderedDict
14 | from collections.abc import Callable, Sequence
15 | import math
16 |
17 | from .params import BOHR2ANG, PLENGTH, Config
18 | from .logging import setup_logger
19 |
20 | logger = setup_logger(__name__)
21 |
22 |
23 | class Factory:
24 | """
25 | Generic object factory class.
26 | """
27 |
28 | __builders: dict[str, type] = {}
29 |
30 | @classmethod
31 | def register_builder(cls, name: str, builder: type) -> None:
32 | """
33 | Registers a builder.
34 |
35 | Args:
36 | name (str): name of the builder.
37 | builder (type): type of the builder.
38 | """
39 | cls.__builders[name] = builder
40 |
41 | @classmethod
42 | def create(cls, name: str, *args, **kwargs) -> object:
43 | """
44 | Generic factory method
45 | """
46 | builder = cls.__builders.get(name, None)
47 |
48 | if builder is not None:
49 | return builder(*args, **kwargs)
50 | raise TypeError(f"No type was found for '{name}' in {list(cls.__builders)}.")
51 |
52 |
53 | class DfaHelper:
54 | _dfa_dict: dict
55 |
56 | @classmethod
57 | def set_dfa_dict(cls, dfadict_path: str):
58 | with open(dfadict_path, "r") as f:
59 | cls._dfa_dict = json.load(f)
60 |
61 | @classmethod
62 | def get_funcs(cls, prog: str):
63 | """
64 | Returns all functionals available for a given qm program.
65 |
66 | Args:
67 | prog (str): The qm program name.
68 |
69 | Returns:
70 | list[str]: The list of functionals.
71 | """
72 | return [
73 | func
74 | for func, v in cls._dfa_dict["functionals"].items()
75 | if v[prog.lower()] is not None
76 | ]
77 |
78 | @classmethod
79 | def get_name(cls, func: str, prog: str):
80 | """
81 | Returns the name of a certain functional in the given qm program. If name could not
82 | be found, the string passed as func will be returned instead.
83 |
84 | Args:
85 | func (str): The functional.
86 | prog (str): The qm program.
87 |
88 | Returns:
89 | str: The name of the functional.
90 | """
91 | func = func.lower()
92 | prog = prog.lower()
93 | if func in cls._dfa_dict["functionals"].keys():
94 | name = cls._dfa_dict["functionals"][func][prog]
95 | else:
96 | logger.warning(
97 | f"Functional {func} not found for program {prog}. Applying name literally."
98 | )
99 | name = func
100 | return name
101 |
102 | @classmethod
103 | def get_disp(cls, func: str):
104 | """
105 | Returns the dispersion correction of a given functional. If dispersion correction
106 | cannot be determined, apply none.
107 |
108 | Args:
109 | func (str): The functional.
110 |
111 | Returns:
112 | str: The dispersion correction name.
113 | """
114 | func = func.lower()
115 | if func in cls._dfa_dict["functionals"].keys():
116 | disp = cls._dfa_dict["functionals"][func]["disp"]
117 | else:
118 | logger.warning(
119 | f"Could not determine dispersion correction for {func}. Applying none."
120 | )
121 | disp = "novdw"
122 | return disp
123 |
124 | @classmethod
125 | def get_type(cls, func: str):
126 | """
127 | Returns the type of a certain functional. If the type cannot be determined, it
128 | is assumed to be a GGA.
129 |
130 | Args:
131 | func (str): The functional.
132 |
133 | Returns:
134 | str: The type of the functional.
135 | """
136 | func = func.lower()
137 | if func in cls._dfa_dict["functionals"].keys():
138 | rettype = cls._dfa_dict["functionals"][func]["type"]
139 | else:
140 | logger.warning(
141 | f"Could not determine functional type for {func}. Assuming GGA."
142 | )
143 | rettype = "GGA"
144 | return rettype
145 |
146 | @classmethod
147 | def functionals(cls) -> dict[str, dict]:
148 | return cls._dfa_dict["functionals"]
149 |
150 |
151 | class SolventHelper:
152 | """
153 | Helper class to manage solvent lookup.
154 | """
155 |
156 | @classmethod
157 | def set_solvent_dict(cls, solvent_dict_path: str) -> None:
158 | """
159 | Load the solvents lookup dict.
160 |
161 | Args:
162 | solvent_dict_path (str): The path to the solvents lookup dict.
163 | """
164 | with open(solvent_dict_path, "r") as f:
165 | cls._solv_dict = json.load(f)
166 |
167 | @classmethod
168 | def get_solvent(cls, sm: str, name: str) -> str | None:
169 | """
170 | Try to lookup the solvent model keyword for the given solvent name. If it is not found, return None.
171 |
172 | Args:
173 | sm (str): The solvent model.
174 | name (str): The solvent name.
175 |
176 | Returns:
177 | str | None: The solvent model keyword or None if not found.
178 | """
179 | mappings = cls._solv_dict[name]
180 | if sm in mappings["sms"]:
181 | return name
182 | for alias in mappings["alias"]:
183 | if sm in cls._solv_dict[alias]["sms"]:
184 | return alias
185 | return None
186 |
187 | @classmethod
188 | def get_solvent_names(cls, sm: str) -> list[str]:
189 | """
190 | Get all available solvent names for a specified solvent model with the respective internal keyword.
191 |
192 | Args:
193 | sm (str): The solvent model.
194 |
195 | Returns:
196 | list[str]: The available solvent names for the given solvent model.
197 | """
198 | solvents = []
199 | for solventname, mappings in cls._solv_dict.items():
200 | if sm in mappings["sms"]:
201 | solvents.append(solventname)
202 | for alias in mappings["alias"]:
203 | if sm in cls._solv_dict[alias]["sms"]:
204 | solvents.append(solventname)
205 | solvents = list(set(solvents))
206 | return solvents
207 |
208 |
209 | def print(*args, **kwargs):
210 | """
211 | patch print to always flush
212 | """
213 | sep = " "
214 | end = "\n"
215 | file = None
216 | flush = True
217 | for key, value in kwargs.items():
218 | if key == "sep":
219 | sep = value
220 | elif key == "end":
221 | end = value
222 | elif key == "file":
223 | file = value
224 | elif key == "flush":
225 | flush = value
226 | print_orig(*args, sep=sep, end=end, file=file, flush=flush)
227 |
228 |
229 | def format_data(
230 | headers: list[str],
231 | rows: list[list[str]],
232 | units: list[str] = None,
233 | sortby: int = 0,
234 | padding: int = 6,
235 | ) -> list[str]:
236 | """
237 | Generates a formatted table based on the given headers, rows, units, and sortby index.
238 |
239 | Args:
240 | headers (list[str]): The list of column headers.
241 | rows (list[list[str]]): The list of rows, where each row is a list of values.
242 | units (list[str], optional): The list of units for each column. Defaults to None.
243 | sortby (int, optional): The index of the column to sort by. Defaults to 0. In case of a string column,
244 | use natural sorting.
245 |
246 | Returns:
247 | list[str]: The list of formatted lines representing the table.
248 |
249 | """
250 |
251 | def natural_sort_key(s):
252 | """
253 | Natural sorting key for strings.
254 | """
255 | return [int(text) if text.isdigit() else text for text in re.split(r"(\d+)", s)]
256 |
257 | lines = []
258 |
259 | # First, determine the maximium width for each column
260 | ncols = len(headers)
261 | if units is not None:
262 | maxcolw = [
263 | max(
264 | [
265 | len(headers[i]),
266 | max(len(rows[j][i]) for j in range(len(rows))),
267 | len(units[i]),
268 | ]
269 | )
270 | for i in range(ncols)
271 | ]
272 | else:
273 | maxcolw = [
274 | max(len(headers[i]), max(len(rows[j][i]) for j in range(len(rows))))
275 | for i in range(ncols)
276 | ]
277 |
278 | # add table header
279 | lines.append(
280 | " ".join(f"{headers[i]:^{width + padding}}" for i, width in enumerate(maxcolw))
281 | + "\n"
282 | )
283 |
284 | # Add units
285 | if units is not None:
286 | lines.append(
287 | " ".join(
288 | f"{units[i]:^{width + padding}}" for i, width in enumerate(maxcolw)
289 | )
290 | + "\n"
291 | )
292 |
293 | # TODO - draw an arrow if conformer is the best in current ranking
294 | # (" <------\n" if self.key(conf) == self.key(self.core.conformers[0]) else "\n")
295 |
296 | # Sort rows lexicographically if column sorted by is a number
297 | if rows[0][sortby].replace(".", "", 1).isdigit():
298 | rows = sorted(rows, key=lambda x: x[sortby])
299 | # Otherwise use natural sorting
300 | else:
301 | rows = sorted(rows, key=lambda x: natural_sort_key(x[sortby]))
302 |
303 | # add a line for every row
304 | for row in rows:
305 | lines.append(
306 | " ".join(f"{row[i]:^{width + padding}}" for i, width in enumerate(maxcolw))
307 | + "\n"
308 | )
309 |
310 | # Remove leading whitespace
311 | start = min(len(line) - len(line.lstrip()) for line in lines)
312 | for i in range(len(lines)):
313 | lines[i] = lines[i][start:]
314 |
315 | return lines
316 |
317 |
318 | def frange(start: float, end: float, step: float = 1) -> list[float]:
319 | """
320 | Creates a range of floats, adding 'step' to 'start' while it's less or equal than 'end'.
321 |
322 | Args:
323 | start (float): The start of the range.
324 | end (float): The end of the range.
325 | step (float, optional): The step size. Defaults to 1.
326 |
327 | Returns:
328 | list[float]: The list of floats.
329 | """
330 | result = []
331 | current = start
332 | while current <= end:
333 | result.append(current)
334 | current += step
335 | return result
336 |
337 |
338 | def t2x(
339 | path: str, writexyz: bool = False, outfile: str = "original.xyz"
340 | ) -> tuple[list, int, str]:
341 | """
342 | convert TURBOMOLE coord file to xyz data and/or write *.xyz output
343 |
344 | - path [abs. path] either to dir or file directly
345 | - writexyz [bool] default=False, directly write to outfile
346 | - outfile [filename] default = 'original.xyz' filename of xyz file which
347 | is written into the same directory as
348 | returns:
349 | - coordxyz --> list of strings including atom x y z information
350 | - number of atoms
351 | """
352 | # read lines from coord file
353 | with open(path, "r", encoding=Config.CODING, newline=None) as f:
354 | coord = f.readlines()
355 |
356 | # read coordinates with atom labels directly into a string
357 | # and append the string to a list to be written/returned later
358 | xyzatom = []
359 | for line in coord:
360 | if "$end" in line: # stop at $end ...
361 | break
362 | xyzatom.append(
363 | functools.reduce(
364 | lambda x, y: x + " " + y,
365 | [
366 | f"{float(line.split()[0]) * BOHR2ANG:.10f}",
367 | f"{float(line.split()[1]) * BOHR2ANG:.10f}",
368 | f"{float(line.split()[2]) * BOHR2ANG:.10f}",
369 | f"{str(line.split()[3].lower()).capitalize()}",
370 | ],
371 | )
372 | )
373 |
374 | # get path from args without the filename of the ensemble (last element of path)
375 | if os.path.isfile(path):
376 | outpath = functools.reduce(
377 | lambda x, y: os.path.join(x, y), list(os.path.split(path))[::-1][1:][::-1]
378 | )
379 | # or just use the given path if it is not a file path
380 | else:
381 | outpath = path
382 |
383 | # write converted coordinates to xyz outfile if wanted
384 | if writexyz:
385 | with open(os.path.join(outpath, outfile), "w", encoding=Config.CODING) as out:
386 | out.write(str(len(xyzatom)) + "\n")
387 | for line in xyzatom:
388 | out.write(line)
389 | return xyzatom, len(xyzatom), os.path.join(outpath, outfile)
390 |
391 |
392 | def check_for_float(line: str) -> float | None:
393 | """Go through line and check for float, return first float"""
394 | elements = line.strip().split()
395 | value = None
396 | for element in elements:
397 | try:
398 | value = float(element)
399 | except ValueError:
400 | value = None
401 | if value is not None:
402 | break
403 | return value
404 |
405 |
406 | def average(x: list[int | float]):
407 | assert len(x) > 0
408 | return float(sum(x)) / len(x)
409 |
410 |
411 | def pearson_def(x: list[int | float], y: list[int | float]):
412 | n = min(len(x), len(y))
413 | assert n > 0
414 | avg_x = average(x[:n])
415 | avg_y = average(y[:n])
416 | diffprod = 0
417 | xdiff2 = 0
418 | ydiff2 = 0
419 | for idx in range(n):
420 | xdiff = x[idx] - avg_x
421 | ydiff = y[idx] - avg_y
422 | diffprod += xdiff * ydiff
423 | xdiff2 += xdiff * xdiff
424 | ydiff2 += ydiff * ydiff
425 |
426 | try:
427 | return diffprod / math.sqrt(xdiff2 * ydiff2)
428 | except ZeroDivisionError:
429 | return 1.0
430 |
431 |
432 | def do_md5(path):
433 | """
434 | Calculate md5 of file to identifly if restart happend on the same file!
435 | Input is buffered into smaller sizes to ease on memory consumption.
436 | Hashes entire content of ensemble input file to compare later
437 | """
438 | BUF_SIZE = 65536
439 | md5 = hashlib.md5()
440 | if os.path.isfile(path):
441 | with open(path, "rb") as f:
442 | while True:
443 | data = f.read(BUF_SIZE)
444 | if not data:
445 | break
446 | md5.update(data)
447 | return md5.hexdigest()
448 | else:
449 | raise FileNotFoundError
450 |
451 |
452 | def timeit(f) -> Callable:
453 | """
454 | time function execution
455 | timed function should have no return value, since it is lost in the process
456 | calling a decorated function returns the time spent for it's execution
457 | """
458 |
459 | @functools.wraps(f)
460 | def wrapper(*args, **kwargs) -> float:
461 | start = time.perf_counter()
462 | f(*args, **kwargs)
463 | end = time.perf_counter()
464 | return end - start
465 |
466 | return wrapper
467 |
468 |
469 | def od_insert(
470 | od: OrderedDict[str, any], key: str, value: any, index: int
471 | ) -> OrderedDict[str, any]:
472 | """
473 | Insert a new key/value pair into an OrderedDict at a specific position.
474 | If it was a normal dict:
475 | od[key] = value, with insertion before the 'index'th key.
476 |
477 | Args:
478 | od: The OrderedDict to insert into.
479 | key: The key to insert.
480 | value: The value associated with the key.
481 | index: The index before which to insert the key/value pair.
482 |
483 | Returns:
484 | The updated OrderedDict.
485 | """
486 | # FIXME - somehow this doesn't work reliably, no idea why but sometimes the value is not inserted
487 | items: list[tuple[str, any]] = list(od.items())
488 | items.insert(index, (key, value))
489 | return OrderedDict(items)
490 |
491 |
492 | def h1(text: str) -> str:
493 | """
494 | Creates a formatted header of type 1:
495 | ---- text ----
496 |
497 | Args:
498 | text: The text to be formatted.
499 |
500 | Returns:
501 | The formatted header.
502 | """
503 | return "\n" + f" {text} ".center(PLENGTH, "-") + "\n"
504 |
505 |
506 | def h2(text: str) -> str:
507 | """
508 | Creates a formatted header of type 2:
509 | ----------
510 | text
511 | ----------
512 |
513 | Args:
514 | text: The text to be formatted.
515 |
516 | Returns:
517 | The formatted header.
518 | """
519 | return f"""
520 | {'-' * PLENGTH}
521 | {text.center(PLENGTH, " ")}
522 | {'-' * PLENGTH}
523 | """
524 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimme-lab/CENSO/3326db4579d1f630c28fce17e3b3e2d66070b8bd/test/__init__.py
--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration file for pytest.
3 | """
4 |
--------------------------------------------------------------------------------
/test/fixtures/crest_conformers.xyz:
--------------------------------------------------------------------------------
1 | 22
2 | -36.24494569
3 | Eu 0.0046135283 0.0000019397 0.0009018604
4 | O -0.7091301781 -2.1926549416 0.6522655995
5 | H -0.1535268869 -2.9824097960 0.7314104880
6 | H -1.6126014988 -2.5040861288 0.8117534927
7 | O 1.9409180491 -1.4560619164 -0.1827958483
8 | H 2.6132125060 -1.6189420204 0.4952852918
9 | H 2.2920839687 -1.8610047732 -0.9898651025
10 | O -0.1351016460 0.5087983503 2.3092454014
11 | H 0.0150560893 -0.0922690962 3.0554620867
12 | H -0.4176088948 1.3440476223 2.7129882785
13 | O -0.6004293364 2.2183222629 -0.6744062753
14 | H -0.0084392969 2.9826022184 -0.7393680822
15 | H -1.4852723177 2.5699723619 -0.8532200401
16 | O -2.4106895860 0.0546905730 -0.0105828403
17 | H -2.9985407313 0.2612603992 0.7311674308
18 | H -2.9997083631 -0.1227271263 -0.7588457491
19 | O -0.1224603279 -0.5059944695 -2.3086940938
20 | H -0.4489089440 -1.3237222813 -2.7151687992
21 | H 0.0751867130 0.0828295267 -3.0536044410
22 | O 1.9983553636 1.3728575371 0.2078343153
23 | H 2.3626293325 1.7651006468 1.0153395082
24 | H 2.6825624998 1.4997322752 -0.4660748530
25 | 22
26 | -36.24490733
27 | Eu 0.0019887047 -0.0070027676 0.0013348105
28 | O 1.7080706294 -1.4142412068 0.9816638746
29 | H 1.9177647051 -1.5651424065 1.9151325237
30 | H 2.3391796071 -1.9628177144 0.4920285899
31 | O -0.6508239399 -2.2909714424 -0.4597686028
32 | H -0.9322831632 -2.9449732718 0.1975590738
33 | H -0.6853147982 -2.7582437492 -1.3074403401
34 | O -0.4373601420 0.3614891289 2.3099909688
35 | H -0.3650771354 1.2170139351 2.7613184362
36 | H -0.7489165479 -0.2532245816 2.9914223372
37 | O 2.0661066296 1.2395055439 -0.1381085256
38 | H 2.5742615626 1.4228209808 -0.9425070717
39 | H 2.6475984769 1.5034358354 0.5901552390
40 | O -2.4131423511 -0.0243830142 -0.0316201606
41 | H -2.9999359914 0.0661084266 0.7341184169
42 | H -2.9917366248 -0.2976440415 -0.7587489233
43 | O 0.3561826905 -0.0699633168 -2.3501775298
44 | H 0.9909921709 -0.5835390208 -2.8722143139
45 | H -0.1157001473 0.4752930362 -2.9989742106
46 | O -0.6486709310 2.2666952669 -0.3243766789
47 | H -0.0703694241 3.0412186551 -0.3910030604
48 | H -1.5485930638 2.6140526031 -0.4153234533
49 | 22
50 | -36.24467174
51 | Eu 0.0029283528 -0.0023450380 0.0044432982
52 | O -1.6844788501 -1.5967934815 0.5002804002
53 | H -1.6620817548 -2.2923932443 1.1754078089
54 | H -2.5596952333 -1.6748661758 0.0899459885
55 | O 2.0991841044 -0.9358500165 0.7881761024
56 | H 2.2150973902 -1.7998801533 1.2108167641
57 | H 2.9968011333 -0.5936456292 0.6630889874
58 | O 0.0033506661 0.2303751068 2.4000656315
59 | H -0.7583354660 0.2698160911 2.9973300450
60 | H 0.7727461301 0.3878823236 2.9672468638
61 | O 1.5088968033 1.7263033759 -0.6138141926
62 | H 1.7354587376 1.9652735921 -1.5262825722
63 | H 1.9577474126 2.3875666865 -0.0655521288
64 | O 0.6239989003 -1.7523255174 -1.5484644055
65 | H 0.2160032647 -2.6267859427 -1.6354173491
66 | H 1.4290687600 -1.7964228534 -2.0853795239
67 | O -1.5070001715 1.8141911780 0.4998681703
68 | H -2.4541784400 1.7774516605 0.6982268306
69 | H -1.2551054906 2.7372572053 0.6528424686
70 | O -1.0682080844 0.5332086198 -2.0679918136
71 | H -1.6666262067 1.2733037974 -2.2491960315
72 | H -1.0233842314 0.0356843847 -2.8982289049
73 | 22
74 | -36.24419579
75 | Eu 0.0006649450 -0.0048693367 -0.0046766397
76 | O 0.6242169873 -0.8197812124 2.1558750267
77 | H 1.4392076824 -0.7689007768 2.6755987327
78 | H 0.0008354138 -1.3038466398 2.7191928356
79 | O 1.4086421404 -1.7697184079 -0.7430827655
80 | H 1.4400487942 -2.6734948377 -0.3928548702
81 | H 2.1961760468 -1.6921850277 -1.3036291631
82 | O -1.9329935822 -1.1609896501 0.8952502336
83 | H -2.6811978331 -0.7779238666 1.3769259919
84 | H -2.2055433350 -2.0667116580 0.6854366845
85 | O -1.2843600975 -0.8048410561 -1.8982900605
86 | H -1.0718701531 -1.5304860976 -2.5040469488
87 | H -2.1324306230 -0.4544981550 -2.2102170531
88 | O -1.1130803385 1.8164948122 1.0330774823
89 | H -1.9470007336 2.2639250227 0.8246578413
90 | H -0.7676086885 2.2805933677 1.8119648116
91 | O 0.2208757376 1.6576208867 -1.7588616056
92 | H -0.1366892892 2.5579116322 -1.7718262654
93 | H 0.5224099593 1.4933901419 -2.6647873535
94 | O 2.0821197045 1.1153007230 0.3571919070
95 | H 2.3791104806 1.8366509648 -0.2187560819
96 | H 2.7782468214 1.0286863869 1.0241020801
97 | 22
98 | -36.24408866
99 | Eu -0.0020929849 0.0024535642 0.0008042944
100 | O -1.4370214132 1.0256339553 -1.6547846662
101 | H -2.4048397653 1.0617130039 -1.6561979199
102 | H -1.1685902270 1.4871282510 -2.4632246761
103 | O 2.0052994350 -1.3350751432 0.0601669581
104 | H 2.0570185422 -2.3010896348 0.0089127423
105 | H 2.9106519795 -1.0453116806 0.2459961968
106 | O -1.1642785326 1.7910491421 1.0964146788
107 | H -1.6340495969 1.7589294735 1.9429577236
108 | H -1.2687388535 2.7029428460 0.7867873563
109 | O 0.7572538345 0.0349707521 2.2975364179
110 | H 1.2383753047 0.7423251257 2.7516145410
111 | H 0.6430253416 -0.6579823352 2.9649023730
112 | O -1.5794623319 -1.5359676697 0.8849185605
113 | H -1.4276269609 -2.4752955182 1.0712057546
114 | H -2.5074038229 -1.3858698094 1.1221856367
115 | O -0.1259762170 -1.5669058531 -1.8285738588
116 | H 0.5516996618 -1.7234313865 -2.5028767065
117 | H -0.8900972968 -2.0881724529 -2.1159735099
118 | O 1.5625637182 1.5653040952 -0.8627608608
119 | H 1.8276628937 2.4242641033 -0.4996350812
120 | H 2.0967505458 1.4631110393 -1.6654931419
121 | 22
122 | -36.24374091
123 | Eu -0.0023141475 0.0025233246 -0.0003014911
124 | O -1.0309325177 -1.7401384080 -1.3518007359
125 | H -1.9707684966 -1.9448270984 -1.4624969464
126 | H -0.5633984165 -2.3915814219 -1.8958123706
127 | O 0.9000729197 -2.0893026805 0.7472748942
128 | H 1.7900104626 -2.4650201591 0.6747615397
129 | H 0.3360011548 -2.8315075200 1.0143378586
130 | O -0.3119154538 2.3296096660 -0.4329174746
131 | H -0.6531167506 3.0212940592 0.1535858264
132 | H -0.1696061264 2.7710766181 -1.2841333120
133 | O -2.3923793286 0.2961613011 0.2121267374
134 | H -2.9813866839 0.8109843908 -0.3595242196
135 | H -2.9332358193 0.0491115952 0.9775974905
136 | O -0.3113367077 0.1792863173 2.3619321693
137 | H -0.2246728213 -0.5505204129 2.9940651470
138 | H -0.3483991481 0.9776509824 2.9097658511
139 | O 1.0159849986 0.1699625353 -2.1582024829
140 | H 1.9591231251 0.1115682900 -2.3743311602
141 | H 0.5609408735 0.2014877811 -3.0132133961
142 | O 2.1470822966 0.8344329669 0.6253005990
143 | H 2.8673552988 0.4160604967 1.1197414977
144 | H 2.4169472033 1.7610526407 0.5321633192
145 | 22
146 | -36.24372527
147 | Eu -0.0001769071 0.0081524112 -0.0054024578
148 | O 1.4108283370 -1.8598675974 -0.4660464342
149 | H 1.2553507582 -2.4423590830 -1.2260256521
150 | H 2.2004739841 -2.2183243012 -0.0352314518
151 | O -0.5077581560 -1.4148359820 1.8611468082
152 | H -0.5588176615 -2.3826655114 1.8602421878
153 | H -0.5227482048 -1.1619420045 2.7969006321
154 | O 1.4812914309 1.1576630807 -1.4689822384
155 | H 1.5484031285 2.0830897704 -1.7463217470
156 | H 2.2884201270 0.7396305442 -1.8076967281
157 | O -0.8056587982 2.1904933815 0.5897101686
158 | H -1.3668015292 2.3259328507 1.3692873540
159 | H -0.8442108435 3.0285996160 0.1059069427
160 | O -2.4355866195 -0.1373768271 0.0884522191
161 | H -2.9609934409 -0.6877241698 0.6880944485
162 | H -3.0826646987 0.3571500393 -0.4355541029
163 | O -0.7413594316 -0.7691315754 -2.1662458301
164 | H -1.3961794473 -1.4641573327 -2.3303699406
165 | H -0.5937040767 -0.3497734958 -3.0273542089
166 | O 1.6017119559 0.7640624975 1.6052753047
167 | H 1.5575559297 1.6420612230 2.0147414696
168 | H 2.4475273993 0.3964656044 1.9004544481
169 |
--------------------------------------------------------------------------------
/test/fixtures/inp:
--------------------------------------------------------------------------------
1 | ! RHF CCSD(T) def2-TZVP TightSCF # test comment
2 |
3 | %paras R= 4.0,0.5,35 end#testcomment2
4 | #end
5 | * xyz 0 1
6 | H 0 0 0
7 | F 0 0 {R}
8 | *
9 |
--------------------------------------------------------------------------------
/test/fixtures/inp2:
--------------------------------------------------------------------------------
1 | ! RHF CCSD(T) def2-TZVP TightSCF
2 | %paras
3 | R= 4.0,0.5,35
4 | end
5 | * xyz 0 1
6 | H 0 0 0
7 | F 0 0 {R}
8 | *
9 |
--------------------------------------------------------------------------------
/test/fixtures/test.template:
--------------------------------------------------------------------------------
1 | {main}
2 | ! OPT
3 |
4 | {postgeom}
5 | %mp2
6 | bla bla
7 | end
8 |
--------------------------------------------------------------------------------
/test/fixtures/testinp:
--------------------------------------------------------------------------------
1 | ! RHF CCSD(T) def2-TZVP TightSCF
2 | %paras
3 | R= 4.0,0.5,35
4 | end
5 | * xyz 0 1
6 | H 0 0 0
7 | F 0 0 {R}
8 | *
9 |
--------------------------------------------------------------------------------
/test/test_cli/test_interface.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import unittest
4 |
5 | os.chdir(os.path.split(__file__)[0])
6 |
7 | from censo.cli.cml_parser import parse
8 | from censo.cli.interface import startup, entry_point
9 | from censo.params import DESCR
10 |
11 |
12 | class CensoTest(unittest.TestCase):
13 | def test_blank_startup(self):
14 | entry_point("")
15 |
16 | def test_help_startup(self):
17 | argv = "-h".split()
18 | entry_point(argv)
19 |
20 | def test_general_startup(self):
21 | argv = "-inp testfiles/crest_conformers.xyz -solvent water -chrg 0 -u 0"
22 | core = startup(parse(DESCR, argv.split()))
23 | self.assertEqual(core.workdir, os.path.split(__file__)[0])
24 |
25 | def test_partial_req(self):
26 | argv = "-inp testfiles/crest_conformers.xyz".split()
27 | entry_point(argv)
28 |
29 | def test_writeconfig(self):
30 | argv = "-newconfig".split()
31 | entry_point(argv)
32 |
33 | self.assertTrue(os.path.isfile("censo2rc_NEW"))
34 |
35 | def test_writereadconfig(self):
36 | argv = "-newconfig".split()
37 | entry_point(argv)
38 |
39 | argv = "-inp testfiles/crest_conformers.xyz -solvent water -chrg 0 -u 0 -inprc censo2rc_NEW"
40 | startup(parse(DESCR, argv.split()))
41 |
42 | def test_rc_override(self):
43 | argv = "-newconfig".split()
44 | entry_point(argv)
45 |
46 | argv = "-inprc censo2rc_NEW -inp testfiles/crest_conformers.xyz -solvent water -chrg 0 -u 0 -gp".split()
47 | args = parse(DESCR, argv)
48 | startup(args)
49 | from censo.part import CensoPart
50 |
51 | self.assertTrue(CensoPart.get_general_settings()["gas-phase"])
52 |
53 | def doCleanups(self):
54 | # perform cleanup
55 | delete = ["censo.log", "censo2rc_NEW_OLD", "censo2rc_NEW"]
56 | for f in delete:
57 | f = os.path.join(os.path.split(__file__)[0], f)
58 | if os.path.exists(f):
59 | if os.path.isdir(f):
60 | shutil.rmtree(f)
61 | else:
62 | os.remove(f)
63 |
64 |
65 | if __name__ == "__main__":
66 | unittest.main()
67 |
--------------------------------------------------------------------------------
/test/test_ensembledata.py:
--------------------------------------------------------------------------------
1 | from censo.cli.cml_parser import parse
2 | from censo.params import DESCR
3 | from censo.ensembledata import EnsembleData
4 | import pytest
5 |
6 |
7 | def test_read_input(self):
8 | # Read input via python instruction
9 |
10 | # Read input passed via cml args
11 | test_args = parse(argv="-i fixtures/crest_conformers.xyz".split())
12 | ensemble = EnsembleData(test_dir, args=test_args)
13 | ensemble.read_input(test_args.inp)
14 | nconf = 7
15 | assert nconf == len(ensemble.conformers)
16 | assert 0 == ensemble.runinfo["charge"]
17 | assert 0 == ensemble.runinfo["unpaired"]
18 |
--------------------------------------------------------------------------------