├── .git_archival.txt ├── .gitattributes ├── .github └── workflows │ └── codeql.yml ├── .gitignore ├── COPYING ├── COPYING.LESSER ├── README.md ├── bin ├── c2anmr ├── censo ├── nmrplot └── uvvisplot ├── environment.yaml ├── pyproject.toml ├── src └── censo │ ├── __init__.py │ ├── __main__.py │ ├── assets │ ├── basis_sets.json │ ├── censo_dfa_settings.json │ ├── censo_nmr_ref.json │ ├── censo_solvents_db.json │ ├── dfa.bu │ ├── hexadecane_25.pot │ ├── octanol_25.pot │ ├── old_solvents_db.json │ ├── solvents.json │ ├── solvents_dc.json │ ├── supporting_info.json │ └── wet-octanol_25.pot │ ├── cli │ ├── __init__.py │ ├── cml_parser.py │ └── interface.py │ ├── configuration.py │ ├── datastructure.py │ ├── ensembledata.py │ ├── ensembleopt │ ├── __init__.py │ ├── optimization.py │ ├── optimizer.py │ ├── prescreening.py │ ├── refinement.py │ └── screening.py │ ├── logging.py │ ├── orca_processor.py │ ├── parallel.py │ ├── params.py │ ├── part.py │ ├── properties │ ├── __init__.py │ ├── nmr.py │ ├── property_calculator.py │ └── uvvis.py │ ├── qm_processor.py │ ├── tm_processor.py │ └── utilities.py └── test ├── __init__.py ├── conftest.py ├── fixtures ├── crest_conformers.xyz ├── inp ├── inp2 ├── test.template └── testinp ├── test_cli └── test_interface.py └── test_ensembledata.py /.git_archival.txt: -------------------------------------------------------------------------------- 1 | node: 3326db4579d1f630c28fce17e3b3e2d66070b8bd 2 | node-date: 2025-05-19T11:34:57+02:00 3 | dscribe-name: v2.1.3-6-g3326db45e 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | .git_archival.txt export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | schedule: 9 | - cron: "42 13 * * 6" 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: [ python ] 24 | 25 | steps: 26 | - name: Checkout 27 | uses: actions/checkout@v3 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v2 31 | with: 32 | languages: ${{ matrix.language }} 33 | queries: +security-and-quality 34 | 35 | - name: Autobuild 36 | uses: github/codeql-action/autobuild@v2 37 | 38 | - name: Perform CodeQL Analysis 39 | uses: github/codeql-action/analyze@v2 40 | with: 41 | category: "/language:${{ matrix.language }}" 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/vscode 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=vscode 3 | 4 | ### vscode ### 5 | .vscode/* 6 | !.vscode/settings.json 7 | !.vscode/tasks.json 8 | !.vscode/launch.json 9 | !.vscode/extensions.json 10 | *.code-workspace 11 | 12 | # End of https://www.toptal.com/developers/gitignore/api/vscode 13 | 14 | ###pycache## 15 | __pycache__/ 16 | 17 | 18 | # packaging 19 | *.egg-info/ 20 | 21 | # venv 22 | venv/* 23 | 24 | # pycharm 25 | .idea/* 26 | -------------------------------------------------------------------------------- /COPYING.LESSER: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CENSO - Commandline ENergetic SOrting of Conformer-Rotamer Ensembles 2 | ![censo_logo_300dpi](https://github.com/user-attachments/assets/0e6bac6a-2637-4207-8eca-3122ab90112a) 3 | CENSO is a Python package meant to automate refinement of Conformer-Rotamer ensembles on DFT level, as well as calculation of ensemble properties, e.g. NMR parameters. 4 | It can be used from the command line as well as using custom wrapper scripts. 5 | 6 | ## NEW: CENSO 2.0 7 | This is the updated version of the former CENSO 1.3 program. New features include the possibility to use CENSO as a package from within Python, template files, json outputs, and more! For more information about the use and the capabilities of CENSO 2.0 visit the documentation [here](https://xtb-docs.readthedocs.io/en/latest/CENSO_docs/censo.html). 8 | 9 | ## CENSO 2.1.3 10 | In the most recent version of CENSO 2.1, the code was cleaned up and many bug fixes were implemented. When it comes to functionality, the multitemp mode and constraints for the geometry optimization were removed, since they did not work (reliably). Also, a new system for solvent lookup was implemented, which is also going to be used for CENSO 2.2, and completes all solvent name mappings. This way, all solvents should be callable using a multitude of different aliases (if they are available for the respective solvation model). Also, printouts at the end of the UV/Vis and NMR calculations were added. 11 | 12 | For usage via the CLI, it is now no longer necessary to provide `--maxcores` or `-i`, since default values are now defined for both cases. It is now also possible to change the minimum number of threads when calling external programs using `--omp-min`. The same can be achieved by modifying `Config.OMPMIN` in Python. 13 | 14 | # Installation 15 | Can be installed using `pip` by running 16 | 17 | pip install . 18 | 19 | If you want to install and run `CENSO` without `pip` you can add the `CENSO/src` directory to your `$PYTHONPATH` and add `CENSO/bin` to your `$PATH`. 20 | 21 | # Usage 22 | After installing CENSO via `pip`, it can be called using either 23 | ``` 24 | python -m censo 25 | ``` 26 | or 27 | ``` 28 | censo 29 | ``` 30 | as now the CLI is implemented as entry point. As of version 2.1.3, it is also no longer necessary to call CENSO using `--maxcores` or `-i`, since both have default values now. 31 | 32 | For information about command line options use the `-h` option. 33 | 34 | If you chose not to install it using `pip` and you added the `bin` directory to your `$PATH`, you can also just invoke `censo`. 35 | 36 | CENSO can also be used as a package. A basic setup for a CENSO run in a Python file could look like this: 37 | ```python 38 | from censo.ensembledata import EnsembleData 39 | from censo.configuration import configure 40 | from censo.ensembleopt import Prescreening, Screening, Optimization 41 | from censo.properties import NMR 42 | from censo.params import Config 43 | 44 | # CENSO will put all files in the current working directory (os.getcwd()) 45 | input_path = "rel/path/to/your/inputfile" # path relative to the working directory 46 | ensemble = EnsembleData(input_file=input_path) 47 | # the above can be used if you molecule is neutral and closed shell, otherwise 48 | # it is necessary to proceed with e.g. 49 | # ensemble = EnsembleData() 50 | # ensemble.read_input(input_path, charge=-1, unpaired=1) 51 | 52 | # If the user wants to use a specific rcfile: 53 | configure("/abs/path/to/rcfile") 54 | 55 | # Get the number of available cpu cores on this machine 56 | # This is also the default value that CENSO uses 57 | # This number can also be set to any other integer value and automatically checked for validity 58 | Config.NCORES = os.cpu_count() 59 | 60 | # Another possibly important setting is OMP, which will get used if you disabled the automatic 61 | # load balancing in the settings 62 | Config.OMP = 4 63 | 64 | # The user can also choose to change specific settings of the parts 65 | # Please take note of the following: 66 | # - the settings of certain parts, e.g. Prescreening are changed using set_setting(name, value) 67 | # - general settings are changed by using set_general_setting(name, value) (it does not matter which part you call it from) 68 | # - the values you want to set must comply with limits and the type of the setting 69 | Prescreening.set_setting("threshold", 5.0) 70 | Prescreening.set_general_setting("solvent", "dmso") 71 | 72 | # It is also possible to use a dict to set multiple values in one step 73 | settings = { 74 | "threshold": 3.5, 75 | "func": "pbeh-3c", 76 | "implicit": True, 77 | } 78 | Screening.set_settings(settings, complete=False) 79 | # the complete kwarg tells the method whether to set the undefined settings using defaults or leave them on their current value 80 | 81 | 82 | # Setup and run all the parts that the user wants to run 83 | # Running the parts in order here, while it is also possible to use a custom order or run some parts multiple times 84 | # Running a part will return an instance of the respective type 85 | # References to the resulting part instances will be appended to a list in the EnsembleData object (ensemble.results) 86 | # Note though, that currently this will lead to results being overwritten in your working directory 87 | # (you could circumvent this by moving/renaming the folders) 88 | results, timings = zip(*[part.run(ensemble) for part in [Prescreening, Screening, Optimization, NMR]]) 89 | 90 | # You access the results using the ensemble object 91 | # You can also find all the results the .json output files 92 | print(ensemble.results[0].data["results"]["CONF5"]["sp"]["energy"]) 93 | ``` 94 | 95 | # License 96 | 97 | ``CENSO`` is free software: you can redistribute it and/or modify it under 98 | the terms of the GNU Lesser General Public License as published by 99 | the Free Software Foundation, either version 3 of the License, or 100 | (at your option) any later version. 101 | 102 | ``CENSO`` is distributed in the hope that it will be useful, 103 | but without any warranty; without even the implied warranty of 104 | merchantability or fitness for a particular purpose. See the 105 | GNU Lesser General Public License for more details. 106 | -------------------------------------------------------------------------------- /bin/c2anmr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shutil 3 | from pathlib import Path 4 | import sys 5 | 6 | def main(): 7 | # Define source and destination directories 8 | src_dir = Path("4_NMR") 9 | dest_dir = Path("anmr") 10 | 11 | # Create destination directory 12 | dest_dir.mkdir(parents=True, exist_ok=True) 13 | 14 | # Copy all files matching anmr_* into dest_dir 15 | for file in Path.cwd().glob("anmr_*"): 16 | if file.is_file(): 17 | shutil.copy(file, dest_dir / file.name) 18 | 19 | # Loop through CONF* subdirectories in src_dir 20 | for conf_dir in src_dir.glob("CONF*"): 21 | if not conf_dir.is_dir(): 22 | continue 23 | 24 | # Build target NMR directory under dest_dir/CONF#/NMR 25 | new_nmr_dir = dest_dir / conf_dir.name / "NMR" 26 | new_nmr_dir.mkdir(parents=True, exist_ok=True) 27 | 28 | # Copy the two specific files if they exist 29 | for fname in ("nmrprop.dat", "coord"): 30 | src_file = conf_dir / fname 31 | if src_file.exists(): 32 | shutil.copy(src_file, new_nmr_dir / fname) 33 | else: 34 | print(f"Warning: {src_file} not found", file=sys.stderr) 35 | 36 | if __name__ == "__main__": 37 | main() 38 | 39 | -------------------------------------------------------------------------------- /bin/censo: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import os 4 | 5 | parentdir = os.path.split(__file__)[0] 6 | sys.path.insert(0, f"{os.path.join(parentdir, '..', 'src')}") 7 | from censo.cli.interface import entry_point 8 | 9 | entry_point() 10 | -------------------------------------------------------------------------------- /bin/uvvisplot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Copyright (C) 2024 Leopold M. Seidler 5 | # 6 | # UVVISPLOT is free software: you can redistribute it and/or modify it under 7 | # the terms of the GNU Lesser General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # UVVISPLOT is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public License 17 | # along with UVVISPLOT. If not, see . 18 | 19 | """ 20 | Created on Mar 17, 2024 21 | last updated on 17-March-2024 22 | @author: lmseidler 23 | """ 24 | import matplotlib.pyplot as plt 25 | import os 26 | import argparse 27 | import json 28 | import numpy as np 29 | import pandas as pd 30 | 31 | PLANCK = 6.62607015e-34 32 | C = 2.998e8 33 | COULOMB = 1.602e-19 34 | 35 | 36 | descr = """ 37 | __________________________________________________ 38 | | | 39 | | UVVISPLOT | 40 | | Plotting of ensemble UV/Vis spectra | 41 | | University of Bonn, MCTC | 42 | | March 2024 | 43 | | v 1.0.0 | 44 | | L. M. Seidler | 45 | |__________________________________________________| 46 | """ 47 | 48 | 49 | def get_args(): 50 | parser = argparse.ArgumentParser( 51 | description="", 52 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 53 | usage=argparse.SUPPRESS, 54 | ) # argparse.RawDescriptionHelpFormatter) #, 55 | parser.add_argument( 56 | "-mode", 57 | dest="mode", 58 | action="store", 59 | required=False, 60 | default="wavenumber", 61 | type=str, 62 | choices=["wavenumber", "energy", "wavelength"], 63 | help="Set the unit of the x-axis. Can be wavenumber [cm-1], energy [eV] or wavelength [nm]." 64 | ) 65 | parser.add_argument( 66 | "-start", 67 | dest="start", 68 | action="store", 69 | required=False, 70 | type=float, 71 | help="Start plotting from ''. Default values: 300nm/1.8eV/14000cm-1.", 72 | ) 73 | parser.add_argument( 74 | "-end", 75 | dest="end", 76 | action="store", 77 | required=False, 78 | type=float, 79 | help="End plotting at ''. '' must be larger than ''. Default values: 700nm/4.2eV/33000cm-1.", 80 | ) 81 | parser.add_argument( 82 | "-title", 83 | "--title", 84 | dest="title", 85 | action="store", 86 | required=False, 87 | default="UVVis-PLOT", 88 | type=str, 89 | help="Set title of entire plot. If no title is required use " 90 | "'<--title ''>'.", 91 | ) 92 | parser.add_argument( 93 | "-lw", 94 | "--linewidth", 95 | dest="lw", 96 | action="store", 97 | required=False, 98 | default=1.6131e3, 99 | type=float, 100 | help="Set linewidth in cm-1.", 101 | ) 102 | parser.add_argument( 103 | "-i", 104 | "--inp", 105 | dest="inp", 106 | action="store", 107 | required=True, 108 | help="Provide input file.", 109 | ) 110 | parser.add_argument( 111 | "-fontsize", 112 | "--fontsize", 113 | dest="fontsize", 114 | action="store", 115 | required=False, 116 | default=15, 117 | type=float, 118 | help="Set fontsize for entire plot.", 119 | ) 120 | parser.add_argument( 121 | "-o", 122 | "--out", 123 | dest="out", 124 | action="store", 125 | required=False, 126 | default="nmrplot", 127 | help="Provide name of the output file (including ending).", 128 | ) 129 | args = parser.parse_args() 130 | return args 131 | 132 | 133 | def read_data(inp): 134 | cwd = os.getcwd() 135 | with open(os.path.join(cwd, inp), "r") as f: 136 | data = json.load(f) 137 | 138 | return data 139 | 140 | 141 | def plot(data, args): 142 | # Get plotting mode 143 | mode = args.mode 144 | 145 | # Select start value 146 | if args.start is not None: 147 | start = args.start 148 | else: 149 | defaults = { 150 | "wavelength": 300, 151 | "wavenumber": 14000, 152 | "energy": 1.8 153 | } 154 | start = defaults[mode] 155 | 156 | # Select end value 157 | if args.end is not None: 158 | end = args.end 159 | else: 160 | defaults = { 161 | "wavelength": 700, 162 | "wavenumber": 33000, 163 | "energy": 4.2 164 | } 165 | end = defaults[mode] 166 | 167 | assert end > start 168 | xrange = np.linspace(start, end, 10000) 169 | 170 | # Dump single contributions to csv file 171 | confs = set([d[2] for d in data]) 172 | exc_number = {conf: 0 for conf in confs} 173 | contributions = {} 174 | 175 | for exc in data: 176 | yrange = gaussian_signal(xrange, exc[0], exc[1], args.lw, mode=mode) 177 | contributions[f"{exc[2]}_S{exc_number[exc[2]]}"] = yrange 178 | 179 | exc_number[exc[2]] += 1 180 | 181 | cwd = os.getcwd() 182 | contributions = pd.DataFrame.from_dict(contributions) 183 | contributions.to_csv(os.path.join(cwd, "contributions.csv")) 184 | print("All contributions written to contributions.csv.") 185 | 186 | # Plot the whole spectrum 187 | fig, ax = plt.subplots() 188 | yrange = contributions.sum(axis=1) 189 | ax.plot(xrange, yrange) 190 | ax.set_title(args.title) 191 | labels = { 192 | "wavelength": "$\mathrm{nm}$", 193 | "wavenumber": "$\mathrm{cm-1}$", 194 | "energy": "$\mathrm{eV}$", 195 | } 196 | ax.set_xlabel(f"{args.mode} [{labels[args.mode]}]") 197 | ax.set_ylabel("$\epsilon$ [a. u.]") 198 | 199 | return fig 200 | 201 | 202 | def gaussian_signal(xrange, center_wl, eps_max, lw, mode="wavelength"): 203 | # E = h ν = h c/λ 204 | # <=> 1/λ = E / (h c) 205 | # 1 nm = 1e-7 cm 206 | # 1 cm-1 = 1e7 nm-1 207 | if mode == "wavelength": 208 | return eps_max * np.exp(- ((1 / xrange - 1 / center_wl) / (lw * 1e7))**2) 209 | elif mode == "wavenumber": 210 | return eps_max * np.exp(- ((xrange - 1 / center_wl * 1e7) / lw)**2) 211 | elif mode == "energy": 212 | return eps_max * np.exp(- ((xrange * COULOMB / (PLANCK * C) - 1 / center_wl * 1e9) / (lw * 1e2))**2) 213 | 214 | 215 | def save_plot(fig, out): 216 | cwd = os.getcwd() 217 | fig.savefig(os.path.join(cwd, out), format="pdf") 218 | 219 | 220 | def main(): 221 | print(descr) 222 | 223 | # Parse cml args 224 | args = get_args() 225 | 226 | # Read data 227 | data = read_data(args.inp) 228 | 229 | # Plot data 230 | figure = plot(data, args) 231 | 232 | # Save plot 233 | save_plot(figure, args.out) 234 | 235 | 236 | if __name__ == "__main__": 237 | main() 238 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: censo 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.10 7 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools_scm[toml]"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "censo" 7 | dynamic = ["version", "readme"] 8 | requires-python = ">= 3.10" 9 | 10 | [project.urls] 11 | homepage = "https://github.com/grimme-lab/CENSO" 12 | documentation = "https://xtb-docs.readthedocs.io/en/latest/CENSO_docs/censo.html" 13 | 14 | [project.optional-dependencies] 15 | scripts = [ 16 | "numpy", 17 | "matplotlib", 18 | "pandas" 19 | ] 20 | 21 | [project.scripts] 22 | censo = "censo.cli.interface:entry_point" 23 | c2anmr = "bin.c2anmr:main" 24 | uvvisplot = "bin.uvvisplot:main" 25 | nmrplot = "bin.nmrplot:main" 26 | 27 | [tool.setuptools.packages.find] 28 | where = ["src", "."] 29 | include = ["censo*", "bin"] 30 | 31 | [tool.setuptools.dynamic] 32 | readme = {file = "README.md"} 33 | 34 | [tool.setuptools_scm] 35 | version_file = "src/censo/__version__.py" 36 | -------------------------------------------------------------------------------- /src/censo/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration import configure 2 | from .params import DESCR 3 | from .__version__ import __version__ 4 | 5 | print(DESCR) 6 | configure() 7 | 8 | from .cli import interface, cml_parser 9 | from . import ( 10 | configuration, 11 | ensembledata, 12 | datastructure, 13 | orca_processor, 14 | parallel, 15 | part, 16 | qm_processor, 17 | utilities, 18 | ensembleopt, 19 | properties, 20 | ) 21 | -------------------------------------------------------------------------------- /src/censo/__main__.py: -------------------------------------------------------------------------------- 1 | from censo.cli.interface import entry_point 2 | 3 | if __name__ == "__main__": 4 | entry_point() 5 | -------------------------------------------------------------------------------- /src/censo/assets/basis_sets.json: -------------------------------------------------------------------------------- 1 | [ 2 | "SVP", 3 | "SV(P)", 4 | "TZVP", 5 | "TZVPP", 6 | "QZVP", 7 | "QZVPP", 8 | "def2-SV(P)", 9 | "def2-mSVP", 10 | "def2-SVP", 11 | "def2-TZVP", 12 | "def2-TZVPP", 13 | "def2-mTZVP", 14 | "def2-mTZVPP", 15 | "def2-TZVPD", 16 | "def2-SVPD", 17 | "def-SVP", 18 | "def-SV(P)", 19 | "def2-QZVP", 20 | "DZ", 21 | "QZV", 22 | "cc-pVDZ", 23 | "cc-pVTZ", 24 | "cc-pVQZ", 25 | "cc-pV5Z", 26 | "aug-cc-pVDZ", 27 | "aug-cc-pVTZ", 28 | "aug-cc-pVQZ", 29 | "aug-cc-pV5Z", 30 | "def2-QZVPP", 31 | "minix", 32 | "pcJ-0", 33 | "pcJ-1", 34 | "pcJ-2", 35 | "pcSseg-0", 36 | "pcSseg-1", 37 | "pcSseg-2", 38 | "pcSseg-3", 39 | "x2c-SVPall-s", 40 | "x2c-TZVPall-s", 41 | "def2-TZVP(-f)", 42 | "def2-QZVP(-gf)", 43 | "def2-TZVPD(-f)" 44 | ] -------------------------------------------------------------------------------- /src/censo/assets/censo_dfa_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "composite_method_basis": { 3 | "pbeh-3c": "def2-mSVP", 4 | "b97-3c": "def2-mTZVP", 5 | "hf-3c": "minix", 6 | "r2scan-3c": "def2-mTZVPP" 7 | }, 8 | "relay_functionals": { 9 | "pbe": "pbe-d4", 10 | "tpss": "tpss-d4", 11 | "b97-d": "b97-d3(0)", 12 | "kt1": "kt1-novdw", 13 | "kt2": "kt2-novdw", 14 | "pbe0": "pbe0-d4", 15 | "pw6b95": "pw6b95-d4", 16 | "b3lyp": "b3lyp-d4", 17 | "b3-lyp": "b3lyp-d4", 18 | "dsd-blyp": "dsd-blyp-d3" 19 | }, 20 | "functionals": { 21 | "dummy": { 22 | "tm": null, 23 | "orca": "dummy", 24 | "disp": "dummy", 25 | "type": "dummy" 26 | }, 27 | "pbeh-3c": { 28 | "tm": "pbeh-3c", 29 | "orca": "pbeh-3c", 30 | "disp": "composite", 31 | "type": "composite_hybrid" 32 | }, 33 | "b97-3c": { 34 | "tm": "b97-3c", 35 | "orca": "b97-3c", 36 | "disp": "composite", 37 | "type": "composite_gga" 38 | }, 39 | "r2scan-3c": { 40 | "tm": "r2scan-3c", 41 | "orca": "r2scan-3c", 42 | "disp": "composite", 43 | "type": "composite_mgga" 44 | }, 45 | "r2scan-novdw": { 46 | "tm": "r2scan", 47 | "orca": "r2scan", 48 | "disp": "novdw", 49 | "type": "mgga" 50 | }, 51 | "r2scan-d3": { 52 | "tm": "r2scan", 53 | "orca": "r2scan", 54 | "disp": "d3bj", 55 | "type": "mgga" 56 | }, 57 | "r2scan-d3(0)": { 58 | "tm": "r2scan", 59 | "orca": "r2scan", 60 | "disp": "d3(0)", 61 | "type": "mgga" 62 | }, 63 | "r2scan-d4": { 64 | "tm": "r2scan", 65 | "orca": "r2scan", 66 | "disp": "d4", 67 | "type": "mgga" 68 | }, 69 | "pbe-novdw": { 70 | "tm": "pbe", 71 | "orca": "pbe", 72 | "disp": "novdw", 73 | "type": "gga" 74 | }, 75 | "pbe-d3": { 76 | "tm": "pbe", 77 | "orca": "pbe", 78 | "disp": "d3bj", 79 | "type": "gga" 80 | }, 81 | "pbe-d3(0)": { 82 | "tm": "pbe", 83 | "orca": "pbe", 84 | "disp": "d3(0)", 85 | "type": "gga" 86 | }, 87 | "pbe-d4": { 88 | "tm": "pbe", 89 | "orca": "pbe", 90 | "disp": "d4", 91 | "type": "gga" 92 | }, 93 | "pbe-nl": { 94 | "tm": "pbe", 95 | "orca": null, 96 | "disp": "nl", 97 | "type": "gga" 98 | }, 99 | "tpss-novdw": { 100 | "tm": "tpss", 101 | "orca": "tpss", 102 | "disp": "novdw", 103 | "type": "mgga" 104 | }, 105 | "tpss-d3": { 106 | "tm": "tpss", 107 | "orca": "tpss", 108 | "disp": "d3bj", 109 | "type": "mgga" 110 | }, 111 | "tpss-d3(0)": { 112 | "tm": "tpss", 113 | "orca": "tpss", 114 | "disp": "d3(0)", 115 | "type": "mgga" 116 | }, 117 | "tpss-d4": { 118 | "tm": "tpss", 119 | "orca": "tpss", 120 | "disp": "d4", 121 | "type": "mgga" 122 | }, 123 | "tpss-nl": { 124 | "tm": "tpss", 125 | "orca": null, 126 | "disp": "nl", 127 | "type": "mgga" 128 | }, 129 | "revtpss-novdw": { 130 | "tm": "revtpss", 131 | "orca": "revTPSS", 132 | "disp": "novdw", 133 | "type": "mgga" 134 | }, 135 | "tpssh-novdw": { 136 | "tm": null, 137 | "orca": "tpssh", 138 | "disp": "novdw", 139 | "type": "global_hybrid" 140 | }, 141 | "tpssh-d3": { 142 | "tm": null, 143 | "orca": "tpssh", 144 | "disp": "d3", 145 | "type": "global_hybrid" 146 | }, 147 | "tpssh-d3(0)": { 148 | "tm": null, 149 | "orca": "tpssh", 150 | "disp": "d3(0)", 151 | "type": "global_hybrid" 152 | }, 153 | "tpssh-d4": { 154 | "tm": null, 155 | "orca": "tpssh", 156 | "disp": "d4", 157 | "type": "global_hybrid" 158 | }, 159 | "b97-d3": { 160 | "tm": "b97-d", 161 | "orca": "b97-d3", 162 | "disp": "included", 163 | "type": "gga" 164 | }, 165 | "b97-d4": { 166 | "tm": null, 167 | "orca": "b97", 168 | "disp": "d4", 169 | "type": "gga" 170 | }, 171 | "kt1-novdw": { 172 | "tm": "kt1", 173 | "orca": null, 174 | "disp": "novdw", 175 | "type": "gga" 176 | }, 177 | "kt2-novdw": { 178 | "tm": "kt2", 179 | "orca": "kt2", 180 | "disp": "novdw", 181 | "type": "gga" 182 | }, 183 | "pbe0-novdw": { 184 | "tm": "pbe0", 185 | "orca": "pbe0", 186 | "disp": "novdw", 187 | "type": "global_hybrid" 188 | }, 189 | "pbe0-d3": { 190 | "tm": "pbe0", 191 | "orca": "pbe0", 192 | "disp": "d3bj", 193 | "type": "global_hybrid" 194 | }, 195 | "pbe0-d3(0)": { 196 | "tm": "pbe0", 197 | "orca": "pbe0", 198 | "disp": "d3(0)", 199 | "type": "global_hybrid" 200 | }, 201 | "pbe0-d4": { 202 | "tm": "pbe0", 203 | "orca": "pbe0", 204 | "disp": "d4", 205 | "type": "global_hybrid" 206 | }, 207 | "pbe0-nl": { 208 | "tm": "pbe0", 209 | "orca": null, 210 | "disp": "nl", 211 | "type": "global_hybrid" 212 | }, 213 | "pw6b95-novdw": { 214 | "tm": "pw6b95", 215 | "orca": "pw6b95", 216 | "disp": "novdw", 217 | "type": "global_hybrid" 218 | }, 219 | "pw6b95-d3": { 220 | "tm": "pw6b95", 221 | "orca": "pw6b95", 222 | "disp": "d3bj", 223 | "type": "global_hybrid" 224 | }, 225 | "pw6b95-d3(0)": { 226 | "tm": "pw6b95", 227 | "orca": "pw6b95", 228 | "disp": "d3(0)", 229 | "type": "global_hybrid" 230 | }, 231 | "pw6b95-d4": { 232 | "tm": "pw6b95", 233 | "orca": "pw6b95", 234 | "disp": "d4", 235 | "type": "global_hybrid" 236 | }, 237 | "b3lyp-novdw": { 238 | "tm": "b3-lyp", 239 | "orca": "b3lyp", 240 | "disp": "novdw", 241 | "type": "global_hybrid" 242 | }, 243 | "b3lyp-d3": { 244 | "tm": "b3-lyp", 245 | "orca": "b3lyp", 246 | "disp": "d3bj", 247 | "type": "global_hybrid" 248 | }, 249 | "b3lyp-d3(0)": { 250 | "tm": "b3-lyp", 251 | "orca": "b3lyp", 252 | "disp": "d3(0)", 253 | "type": "global_hybrid" 254 | }, 255 | "b3lyp-d4": { 256 | "tm": "b3-lyp", 257 | "orca": "b3lyp", 258 | "disp": "d4", 259 | "type": "global_hybrid" 260 | }, 261 | "b3lyp-nl": { 262 | "tm": "b3-lyp", 263 | "orca": "b3lyp", 264 | "disp": "nl", 265 | "type": "global_hybrid" 266 | }, 267 | "wb97x-v": { 268 | "tm": "wb97x-v", 269 | "orca": "wb97x-v", 270 | "disp": "included", 271 | "type": "rs_hybrid" 272 | }, 273 | "wb97x-d3": { 274 | "tm": null, 275 | "orca": "wb97x-d3", 276 | "disp": "included", 277 | "type": "rs_hybrid" 278 | }, 279 | "wb97x-d3bj": { 280 | "tm": null, 281 | "orca": "wb97x-d3bj", 282 | "disp": "included", 283 | "type": "rs_hybrid" 284 | }, 285 | "wb97x-d4": { 286 | "tm": null, 287 | "orca": "wb97x-d4", 288 | "disp": "included", 289 | "type": "rs_hybrid" 290 | }, 291 | "wb97m-v": { 292 | "tm": null, 293 | "orca": "wb97m-v", 294 | "disp": "included", 295 | "type": "rs_hybrid" 296 | }, 297 | "chyf-b95-novdw": { 298 | "tm": "chyf-b95", 299 | "orca": null, 300 | "disp": "novdw", 301 | "type": "local_hybrid" 302 | }, 303 | "chyf-b95-d3": { 304 | "tm": "chyf-b95", 305 | "orca": null, 306 | "disp": "d3bj", 307 | "type": "local_hybrid" 308 | }, 309 | "chyf-b95-d4": { 310 | "tm": "chyf-b95", 311 | "orca": null, 312 | "disp": "d4", 313 | "type": "local_hybrid" 314 | }, 315 | "dsd-blyp-d3": { 316 | "tm": null, 317 | "orca": "ri-dsd-blyp", 318 | "disp": "d3bj", 319 | "type": "double" 320 | }, 321 | "dsd-pbep86-d3": { 322 | "tm": null, 323 | "orca": "dsd-pbep86", 324 | "disp": "d3bj", 325 | "type": "double" 326 | } 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/censo/assets/old_solvents_db.json: -------------------------------------------------------------------------------- 1 | { 2 | "smd": { 3 | "1,1,1-trichloroethane": ["1,1,1-trichloroethane"], 4 | "1,1,2-trichloroethane": ["1,1,2-trichloroethane"], 5 | "1,2,4-trimethylbenzene": ["1,2,4-trimethylbenzene"], 6 | "1,2-dibromoethane": ["1,2-dibromoethane"], 7 | "1,2-dichloroethane": ["1,2-dichloroethane"], 8 | "1,2-ethanediol": ["1,2-ethanediol"], 9 | "1,4-dioxane": ["1,4-dioxane"], 10 | "1-bromo-2-methylpropane": ["1-bromo-2-methylpropane"], 11 | "1-bromooctane": ["1-bromooctane"], 12 | "1-bromopentane": ["1-bromopentane"], 13 | "1-bromopropane": ["1-bromopropane"], 14 | "1-butanol": ["1-butanol"], 15 | "1-chlorohexane": ["1-chlorohexane"], 16 | "1-chloropentane": ["1-chloropentane"], 17 | "1-chloropropane": ["1-chloropropane"], 18 | "1-decanol": ["1-decanol"], 19 | "1-fluorooctane": ["1-fluorooctane"], 20 | "1-heptanol": ["1-heptanol"], 21 | "1-hexanol": ["1-hexanol"], 22 | "1-hexene": ["1-hexene"], 23 | "1-hexyne": ["1-hexyne"], 24 | "1-iodobutane": ["1-iodobutane"], 25 | "1-iodohexadecane": ["1-iodohexadecane"], 26 | "1-iodopentane": ["1-iodopentane"], 27 | "1-iodopropane": ["1-iodopropane"], 28 | "1-nitropropane": ["1-nitropropane"], 29 | "1-nonanol": ["1-nonanol"], 30 | "1-octanol": ["1-octanol"], 31 | "1-pentanol": ["1-pentanol"], 32 | "1-pentene": ["1-pentene"], 33 | "1-propanol": ["1-propanol"], 34 | "2,2,2-trifluoroethanol": ["2,2,2-trifluoroethanol"], 35 | "2,2,4-trimethylpentane": ["2,2,4-trimethylpentane"], 36 | "2,4-dimethylpentane": ["2,4-dimethylpentane"], 37 | "2,4-dimethylpyridine": ["2,4-dimethylpyridine"], 38 | "2,6-dimethylpyridine": ["2,6-dimethylpyridine"], 39 | "2-bromopropane": ["2-bromopropane"], 40 | "2-butanol": ["2-butanol"], 41 | "2-chlorobutane": ["2-chlorobutane"], 42 | "2-heptanone": ["2-heptanone"], 43 | "2-hexanone": ["2-hexanone"], 44 | "2-methoxyethanol": ["2-methoxyethanol"], 45 | "2-methyl-1-propanol": ["2-methyl-1-propanol"], 46 | "2-methyl-2-propanol": ["2-methyl-2-propanol"], 47 | "2-methylpentane": ["2-methylpentane"], 48 | "2-methylpyridine": ["2-methylpyridine"], 49 | "2-nitropropane": ["2-nitropropane"], 50 | "2-octanone": ["2-octanone"], 51 | "2-pentanone": ["2-pentanone"], 52 | "2-propanol": ["2-propanol"], 53 | "2-propen-1-ol": ["2-propen-1-ol"], 54 | "e-2-pentene": ["e-2-pentene"], 55 | "3-methylpyridine": ["3-methylpyridine"], 56 | "3-pentanone": ["3-pentanone"], 57 | "4-heptanone": ["4-heptanone"], 58 | "4-methyl-2-pentanone": ["4-methyl-2-pentanone"], 59 | "4-methylpyridine": ["4-methylpyridine"], 60 | "5-nonanone": ["5-nonanone"], 61 | "acetic acid": ["acetic_acid"], 62 | "acetone": ["acetone"], 63 | "acetonitrile": ["mecn", "acetonitrile", "cyanomethane"], 64 | "acetophenone": ["acetophenone"], 65 | "aniline": ["aniline"], 66 | "anisole": ["anisole"], 67 | "benzaldehyde": ["benzaldehyde"], 68 | "benzene": ["benzene"], 69 | "benzonitrile": ["benzonitrile"], 70 | "benzyl alcohol": ["benzyl_alcohol"], 71 | "bromobenzene": ["bromobenzene"], 72 | "bromoethane": ["bromoethane"], 73 | "bromoform": ["bromoform"], 74 | "butanal": ["butanal"], 75 | "butanoic acid": ["butanoic_acid"], 76 | "butanone": ["butanone"], 77 | "butanonitrile": ["butanonitrile"], 78 | "butyl ethanoate": ["butyl_ethanoate"], 79 | "butylamine": ["butylamine"], 80 | "n-butylbenzene": ["n-butylbenzene"], 81 | "sec-butylbenzene": ["sec-butylbenzene"], 82 | "tert-butylbenzene": ["tert-butylbenzene"], 83 | "carbon disulfide": ["carbon_disulfide"], 84 | "carbon tetrachloride": ["carbon_tetrachloride"], 85 | "chlorobenzene": ["chlorobenzene"], 86 | "chloroform": ["chloroform", "chcl3"], 87 | "a-chlorotoluene": ["a-chlorotoluene"], 88 | "o-chlorotoluene": ["o-chlorotoluene"], 89 | "m-cresol": ["m-cresol"], 90 | "o-cresol": ["o-cresol"], 91 | "cyclohexane": ["cyclohexane"], 92 | "cyclohexanone": ["cyclohexanone"], 93 | "mecn": ["mecn", "acetonitrile", "cyanomethane"], 94 | "ccl4": ["ccl4"], 95 | "cyclopentane": ["cyclopentane"], 96 | "cyclopentanol": ["cyclopentanol"], 97 | "cyclopentanone": ["cyclopentanone"], 98 | "decalin (cis/trans mixture)": ["decalin_mix"], 99 | "cis-decalin": ["cis-decalin"], 100 | "n-decane": ["n-decane"], 101 | "dibromomethane": ["dibromomethane"], 102 | "dibutylether": ["dibutylether"], 103 | "o-dichlorobenzene": ["o-dichlorobenzene"], 104 | "e-1,2-dichloroethene": ["e-1,2-dichloroethene"], 105 | "z-1,2-dichloroethene": ["z-1,2-dichloroethene"], 106 | "dichloromethane": ["dichloromethane"], 107 | "diethyl ether": ["diethyl_ether"], 108 | "diethyl sulfide": ["diethyl_sulfide"], 109 | "diethylamine": ["diethylamine"], 110 | "diiodomethane": ["diiodomethane"], 111 | "diisopropyl ether": ["diisopropyl_ether"], 112 | "cis-1,2-dimethylcyclohexane": ["cis-1,2-dimethylcyclohexane"], 113 | "dimethyl disulfide": ["dimethyl_disulfide"], 114 | "n,n-dimethylacetamide": ["n,n-dimethylacetamide"], 115 | "n,n-dimethylformamide": ["n,n-dimethylformamide"], 116 | "dimethylsulfoxide": ["dimethylsulfoxide"], 117 | "diphenylether": ["diphenylether"], 118 | "dipropylamine": ["dipropylamine"], 119 | "n-dodecane": ["n-dodecane"], 120 | "ethanethiol": ["ethanethiol"], 121 | "ethanol": ["ethanol"], 122 | "ethyl ethanoate": ["ethyl_ethanoate"], 123 | "ethyl methanoate": ["ethyl_methanoate"], 124 | "ethyl phenyl ether": ["ethyl_phenyl_ether"], 125 | "ethylbenzene": ["ethylbenzene"], 126 | "fluorobenzene": ["fluorobenzene"], 127 | "formamide": ["formamide"], 128 | "formic acid": ["formic_acid"], 129 | "n-heptane": ["n-heptane"], 130 | "n-hexadecane": ["n-hexadecane"], 131 | "n-hexane": ["n-hexane"], 132 | "hexanoic acid": ["hexanoic_acid"], 133 | "iodobenzene": ["iodobenzene"], 134 | "iodoethane": ["iodoethane"], 135 | "iodomethane": ["iodomethane"], 136 | "isopropylbenzene": ["isopropylbenzene"], 137 | "p-isopropyltoluene": ["p-isopropyltoluene"], 138 | "mesitylene": ["mesitylene"], 139 | "methanol": ["methanol"], 140 | "methyl benzoate": ["methyl_benzoate"], 141 | "methyl butanoate": ["methyl_butanoate"], 142 | "methyl ethanoate": ["methyl_ethanoate"], 143 | "methyl methanoate": ["methyl_methanoate"], 144 | "methyl propanoate": ["methyl_propanoate"], 145 | "n-methylaniline": ["n-methylaniline"], 146 | "methylcyclohexane": ["methylcyclohexane"], 147 | "n-methylformamide": ["n-methylformamide"], 148 | "nitrobenzene": ["nitrobenzene"], 149 | "nitroethane": ["nitroethane"], 150 | "nitromethane": ["nitromethane"], 151 | "o-nitrotoluene": ["o-nitrotoluene"], 152 | "n-nonane": ["n-nonane"], 153 | "n-octane": ["n-octane"], 154 | "n-pentadecane": ["n-pentadecane"], 155 | "pentanal": ["pentanal"], 156 | "n-pentane": ["n-pentane"], 157 | "pentanoic acid": ["pentanoic_acid"], 158 | "pentyl ethanoate": ["pentyl_ethanoate"], 159 | "pentylamine": ["pentylamine"], 160 | "perfluorobenzene": ["perfluorobenzene"], 161 | "propanal": ["propanal"], 162 | "propanoic acid": ["propanoic_acid"], 163 | "propanonitrile": ["propanonitrile"], 164 | "propyl ethanoate": ["propyl_ethanoate"], 165 | "propylamine": ["propylamine"], 166 | "pyridine": ["pyridine"], 167 | "tetrachloroethene": ["tetrachloroethene"], 168 | "tetrahydrofuran": ["tetrahydrofuran"], 169 | "tetrahydrothiophene-s,s-dioxide": ["tetrahydrothiophene-s,s-dioxide"], 170 | "tetralin": ["tetralin"], 171 | "thiophene": ["thiophene"], 172 | "thiophenol": ["thiophenol"], 173 | "toluene": ["toluene"], 174 | "trans-decalin": ["trans-decalin"], 175 | "tributylphosphate": ["tributylphosphate"], 176 | "trichloroethene": ["trichloroethene"], 177 | "triethylamine": ["triethylamine"], 178 | "n-undecane": ["n-undecane"], 179 | "water": ["h2o", "water"], 180 | "xylene (mixture)": ["xylene_mix"], 181 | "m-xylene": ["m-xylene"], 182 | "o-xylene": ["o-xylene"], 183 | "p-xylene": ["p-xylene"], 184 | "dmf": ["dmf"], 185 | "dmso": ["dmso"], 186 | "phno2": ["phno2"], 187 | "meno2": ["meno2"], 188 | "thf": ["thf"] 189 | }, 190 | "gbsa": { 191 | "acetone": ["propanone", "acetone"], 192 | "acetonitrile": ["mecn", "acetonitrile", "cyanomethane"], 193 | "aniline": ["aniline"], 194 | "benzaldehyde": ["benzaldehyde"], 195 | "benzene": ["benzene"], 196 | "chcl3": ["chloroform", "chcl3"], 197 | "ch2cl2": ["ch2cl2"], 198 | "ccl4": ["ccl4"], 199 | "cs2": ["cs2"], 200 | "dioxane": ["dioxane"], 201 | "dmf": ["dmf"], 202 | "dmso": ["dmso"], 203 | "ether": ["ether"], 204 | "ethanol": ["ethanol"], 205 | "ethylacetate": ["ethylacetate"], 206 | "furane": ["furane"], 207 | "hexadecane": ["hexadecane"], 208 | "hexane": ["hexane"], 209 | "h2o": ["h2o"], 210 | "water": ["water"], 211 | "methanol": ["methanol"], 212 | "nitromethane": ["nitromethane"], 213 | "thf": ["thf"], 214 | "toluene": ["toluene"], 215 | "octanol": ["octanol"], 216 | "woctanol": ["woctanol", "wet_octanol"], 217 | "phenol": ["phenol"] 218 | }, 219 | "alpb": { 220 | "acetone": ["propanone", "acetone"], 221 | "acetonitrile": ["cyanomethane", "acetonitrile"], 222 | "aniline": ["aniline"], 223 | "benzaldehyde": ["benzaldehyde"], 224 | "benzene": ["benzene"], 225 | "chcl3": ["chloroform", "chcl3"], 226 | "ch2cl2": ["ch2cl2", "dcm"], 227 | "ccl4": ["ccl4"], 228 | "cs2": ["cs2"], 229 | "dioxane": ["dioxane"], 230 | "dmf": ["dmf"], 231 | "dmso": ["dmso"], 232 | "ether": ["ether"], 233 | "ethanol": ["ethanol"], 234 | "ethylacetate": ["ethylacetate"], 235 | "furane": ["furane"], 236 | "hexadecane": ["hexadecane"], 237 | "hexane": ["hexane"], 238 | "water": ["water", "h2o"], 239 | "methanol": ["methanol"], 240 | "nitromethane": ["nitromethane"], 241 | "thf": ["thf"], 242 | "toluene": ["toluene"], 243 | "octanol": ["octanol"], 244 | "woctanol": ["woctanol", "wet_octanol"], 245 | "phenol": ["phenol"] 246 | }, 247 | "cpcm": { 248 | "water": ["h2o", "water"], 249 | "acetone": ["propanone", "acetone"], 250 | "acetonitrile": ["cyanomethane", "acetonitrile"], 251 | "ammonia": ["ammonia"], 252 | "benzene": ["benzene"], 253 | "chloroform": ["chloroform"], 254 | "ch2cl2": ["ch2cl2", "dcm"], 255 | "ccl4": ["ccl4"], 256 | "cyclohexane": ["cyclohexane"], 257 | "dmf": ["dmf"], 258 | "dmso": ["dmso"], 259 | "ethanol": ["ethanol"], 260 | "hexane": ["hexane"], 261 | "methanol": ["methanol"], 262 | "octanol": ["octanol"], 263 | "pyridine": ["pyridine"], 264 | "thf": ["thf"], 265 | "toluene": ["toluene"] 266 | }, 267 | "cosmors-fine": { 268 | "propanone_c0": ["acetone", "propanone", "propanone_c0"], 269 | "chcl3_c0": ["chloroform", "trichloromethane", "chcl3", "chcl3_c0"], 270 | "acetonitrile_c0": [ 271 | "acetonitrile", 272 | "mecn", 273 | "cyanomethane", 274 | "acetonitrile_c0" 275 | ], 276 | "ch2cl2_c0": ["ch2cl2", "ch2cl2_c0"], 277 | "dimethylsulfoxide_c0": ["dimethylsulfoxide", "dimethylsulfoxide_c0"], 278 | "h2o_c0": ["h2o", "h2o_c0"], 279 | "methanol_c0": ["methanol", "methanol_c0"], 280 | "thf_c0": ["thf", "thf_c0"], 281 | "toluene_c0": ["toluene_c0", "toluene"], 282 | "1-octanol_c0": ["1-octanol_c0", "1-octanol"], 283 | "woctanol": ["woctanol", "wet_octanol"], 284 | "n-hexadecane_c0": ["n-hexadecane_c0", "n-hexadecane"], 285 | "dimethylformamide_c0": ["dimethylformamide_c0", "dimethylformamide"], 286 | "aniline_c0": ["aniline_c0", "aniline"], 287 | "cyclohexane_c0": ["cyclohexane_c0", "cyclohexane"], 288 | "ccl4_c0": ["ccl4_c0", "ccl4"], 289 | "diethylether_c0": ["diethylether_c0", "diethylether"], 290 | "ethanol_c0": ["ethanol_c0", "ethanol"], 291 | "hexane_c0": ["hexane_c0", "hexane"], 292 | "nitromethane_c0": ["nitromethane_c0", "nitromethane"], 293 | "benzaldehyde_c0": ["benzaldehyde", "benzaldehyde_c0"], 294 | "benzene_c0": ["benzene", "benzene_c0"], 295 | "cs2_c0": ["cs2", "cs2_c0"], 296 | "dioxane_c0": ["dioxane", "1,4-dioxane", "dioxane_c0"], 297 | "ethylacetate_c0": ["ethylacetate_c0", "ethylacetate"], 298 | "furane_c0": ["furane", "furane_c0"], 299 | "phenol_c0": ["phenol_c0", "phenol"], 300 | "1,2-dichloroethane_c0": ["1,2-dichloroethane_c0", "1,2-dichloroethane"] 301 | }, 302 | "cosmors": { 303 | "propanone_c0": ["acetone", "propanone", "propanone_c0"], 304 | "chcl3_c0": ["chloroform", "trichloromethane", "chcl3", "chcl3_c0"], 305 | "acetonitrile_c0": [ 306 | "acetonitrile", 307 | "mecn", 308 | "cyanomethane", 309 | "acetonitrile_c0" 310 | ], 311 | "ch2cl2_c0": ["ch2cl2", "ch2cl2_c0"], 312 | "dimethylsulfoxide_c0": ["dimethylsulfoxide", "dimethylsulfoxide_c0"], 313 | "h2o_c0": ["h2o", "h2o_c0"], 314 | "methanol_c0": ["methanol", "methanol_c0"], 315 | "thf_c0": ["thf", "thf_c0"], 316 | "toluene_c0": ["toluene_c0", "toluene"], 317 | "1-octanol_c0": ["1-octanol_c0", "1-octanol"], 318 | "woctanol": ["woctanol", "wet_octanol"], 319 | "n-hexadecane_c0": ["n-hexadecane_c0", "n-hexadecane"], 320 | "dimethylformamide_c0": ["dimethylformamide_c0", "dimethylformamide"], 321 | "aniline_c0": ["aniline_c0", "aniline"], 322 | "cyclohexane_c0": ["cyclohexane_c0", "cyclohexane"], 323 | "ccl4_c0": ["ccl4_c0", "ccl4"], 324 | "diethylether_c0": ["diethylether_c0", "diethylether"], 325 | "ethanol_c0": ["ethanol_c0", "ethanol"], 326 | "hexane_c0": ["hexane_c0", "hexane"], 327 | "nitromethane_c0": ["nitromethane_c0", "nitromethane"], 328 | "benzaldehyde_c0": ["benzaldehyde", "benzaldehyde_c0"], 329 | "benzene_c0": ["benzene", "benzene_c0"], 330 | "cs2_c0": ["cs2", "cs2_c0"], 331 | "dioxane_c0": ["dioxane", "1,4-dioxane", "dioxane_c0"], 332 | "ethylacetate_c0": ["ethylacetate_c0", "ethylacetate"], 333 | "furane_c0": ["furane", "furane_c0"], 334 | "phenol_c0": ["phenol_c0", "phenol"], 335 | "1,2-dichloroethane_c0": ["1,2-dichloroethane_c0", "1,2-dichloroethane"] 336 | }, 337 | "dcosmors": { 338 | "acetonitrile": ["cyanomethane", "acetonitrile", "mecn"], 339 | "aniline": ["aminobenzene", "phenylamine", "aniline"], 340 | "benzene": ["benzene"], 341 | "ccl4": ["ccl4"], 342 | "chcl3": ["chloroform", "chcl3"], 343 | "cyclohexane": ["cyclohexane"], 344 | "diethylether": ["diethylether"], 345 | "dimethylsulfoxide": ["dimethylsulfoxide"], 346 | "ethanol": ["ethanol"], 347 | "h2o": ["h2o", "water"], 348 | "hexadecane": ["hexadecane"], 349 | "hexane": ["hexane"], 350 | "methanol": ["methanol"], 351 | "nitromethane": ["nitromethane"], 352 | "octanol": ["octanol"], 353 | "propanone": ["propanone"], 354 | "thf": ["thf"], 355 | "toluene": ["toluene"], 356 | "wet-octanol": ["wet_octanol", "woctanol"] 357 | }, 358 | "cosmo": { 359 | "acetonitrile": ["cyanomethane", "acetonitrile", "mecn"], 360 | "aniline": ["aminobenzene", "phenylamine", "aniline"], 361 | "benzene": ["benzene"], 362 | "ccl4": ["ccl4"], 363 | "chcl3": ["chloroform", "chcl3"], 364 | "cyclohexane": ["cyclohexane"], 365 | "diethylether": ["diethylether"], 366 | "dimethylsulfoxide": ["dimethylsulfoxide"], 367 | "ethanol": ["ethanol"], 368 | "h2o": ["h2o", "water"], 369 | "hexadecane": ["hexadecane"], 370 | "hexane": ["hexane"], 371 | "methanol": ["methanol"], 372 | "nitromethane": ["nitromethane"], 373 | "octanol": ["octanol"], 374 | "propanone": ["propanone"], 375 | "thf": ["thf"], 376 | "toluene": ["toluene"], 377 | "wet-octanol": ["wet_octanol", "woctanol"] 378 | } 379 | } 380 | -------------------------------------------------------------------------------- /src/censo/assets/solvents.json: -------------------------------------------------------------------------------- 1 | { 2 | "smd": [ 3 | "1,1,1-trichloroethane", 4 | "1,1,2-trichloroethane", 5 | "1,2,4-trimethylbenzene", 6 | "1,2-dibromoethane", 7 | "1,2-dichloroethane", 8 | "1,2-ethanediol", 9 | "1,4-dioxane", 10 | "1-bromo-2-methylpropane", 11 | "1-bromooctane", 12 | "1-bromopentane", 13 | "1-bromopropane", 14 | "1-butanol", 15 | "1-chlorohexane", 16 | "1-chloropentane", 17 | "1-chloropropane", 18 | "1-decanol", 19 | "1-fluorooctane", 20 | "1-heptanol", 21 | "1-hexanol", 22 | "1-hexene", 23 | "1-hexyne", 24 | "1-iodobutane", 25 | "1-iodohexadecane", 26 | "1-iodopentane", 27 | "1-iodopropane", 28 | "1-nitropropane", 29 | "1-nonanol", 30 | "1-octanol", 31 | "1-pentanol", 32 | "1-pentene", 33 | "1-propanol", 34 | "2,2,2-trifluoroethanol", 35 | "2,2,4-trimethylpentane", 36 | "2,4-dimethylpentane", 37 | "2,4-dimethylpyridine", 38 | "2,6-dimethylpyridine", 39 | "2-bromopropane", 40 | "2-butanol", 41 | "2-chlorobutane", 42 | "2-heptanone", 43 | "2-hexanone", 44 | "2-methoxyethanol", 45 | "2-methyl-1-propanol", 46 | "2-methyl-2-propanol", 47 | "2-methylpentane", 48 | "2-methylpyridine", 49 | "2-nitropropane", 50 | "2-octanone", 51 | "2-pentanone", 52 | "2-propanol", 53 | "2-propen-1-ol", 54 | "e-2-pentene", 55 | "3-methylpyridine", 56 | "3-pentanone", 57 | "4-heptanone", 58 | "4-methyl-2-pentanone", 59 | "4-methylpyridine", 60 | "5-nonanone", 61 | "acetic acid", 62 | "acetone", 63 | "acetonitrile", 64 | "acetophenone", 65 | "aniline", 66 | "anisole", 67 | "benzaldehyde", 68 | "benzene", 69 | "benzonitrile", 70 | "benzyl alcohol", 71 | "bromobenzene", 72 | "bromoethane", 73 | "bromoform", 74 | "butanal", 75 | "butanoic acid", 76 | "butanone", 77 | "butanonitrile", 78 | "butyl ethanoate", 79 | "butylamine", 80 | "n-butylbenzene", 81 | "sec-butylbenzene", 82 | "tert-butylbenzene", 83 | "carbon disulfide", 84 | "carbon tetrachloride", 85 | "chlorobenzene", 86 | "chloroform", 87 | "a-chlorotoluene", 88 | "o-chlorotoluene", 89 | "m-cresol", "o-cresol", 90 | "cyclohexane", 91 | "cyclohexanone", 92 | "mecn", 93 | "ccl4", 94 | "cyclopentane", 95 | "cyclopentanol", 96 | "cyclopentanone", 97 | "decalin (cis/trans mixture)", 98 | "cis-decalin", 99 | "n-decane", 100 | "dibromomethane", 101 | "dibutylether", 102 | "o-dichlorobenzene", 103 | "e-1,2-dichloroethene", 104 | "z-1,2-dichloroethene", 105 | "dichloromethane", 106 | "diethyl ether", 107 | "diethyl sulfide", 108 | "diethylamine", 109 | "diiodomethane", 110 | "diisopropyl ether", 111 | "cis-1,2-dimethylcyclohexane", 112 | "dimethyl disulfide", 113 | "n,n-dimethylacetamide", 114 | "n,n-dimethylformamide", 115 | "dimethylsulfoxide", 116 | "diphenylether", 117 | "dipropylamine", 118 | "n-dodecane", 119 | "ethanethiol", 120 | "ethanol", 121 | "ethyl ethanoate", 122 | "ethyl methanoate", 123 | "ethyl phenyl ether", 124 | "ethylbenzene", 125 | "fluorobenzene", 126 | "formamide", 127 | "formic acid", 128 | "n-heptane", 129 | "n-hexadecane", 130 | "n-hexane", 131 | "hexanoic acid", 132 | "iodobenzene", 133 | "iodoethane", 134 | "iodomethane", 135 | "isopropylbenzene", 136 | "p-isopropyltoluene", 137 | "mesitylene", 138 | "methanol", 139 | "methyl benzoate", 140 | "methyl butanoate", 141 | "methyl ethanoate", 142 | "methyl methanoate", 143 | "methyl propanoate", 144 | "n-methylaniline", 145 | "methylcyclohexane", 146 | "n-methylformamide", 147 | "nitrobenzene", 148 | "nitroethane", 149 | "nitromethane", 150 | "o-nitrotoluene", 151 | "n-nonane", 152 | "n-octane", 153 | "n-pentadecane", 154 | "pentanal", 155 | "n-pentane", 156 | "pentanoic acid", 157 | "pentyl ethanoate", 158 | "pentylamine", 159 | "perfluorobenzene", 160 | "propanal", 161 | "propanoic acid", 162 | "propanonitrile", 163 | "propyl ethanoate", 164 | "propylamine", 165 | "pyridine", 166 | "tetrachloroethene", 167 | "tetrahydrofuran", 168 | "tetrahydrothiophene-s,s-dioxide", 169 | "tetralin", 170 | "thiophene", 171 | "thiophenol", 172 | "toluene", 173 | "trans-decalin", 174 | "tributylphosphate", 175 | "trichloroethene", 176 | "triethylamine", 177 | "n-undecane", 178 | "water", 179 | "xylene (mixture)", 180 | "m-xylene", 181 | "o-xylene", 182 | "p-xylene", 183 | "dmf", 184 | "dmso", 185 | "phno2", 186 | "meno2", 187 | "thf" 188 | ], 189 | "xtb": [ 190 | "acetone", 191 | "acetonitrile", 192 | "aniline", 193 | "benzaldehyde", 194 | "benzene", 195 | "chcl3", 196 | "ch2cl2", 197 | "ccl4", 198 | "cs2", 199 | "dioxane", 200 | "dmf", 201 | "dmso", 202 | "ether", 203 | "ethanol", 204 | "ethylacetate", 205 | "furane", 206 | "hexadecane", 207 | "hexane", 208 | "h2o", 209 | "water", 210 | "methanol", 211 | "nitromethane", 212 | "thf", 213 | "toluene", 214 | "octanol", 215 | "woctanol", 216 | "phenol" 217 | ], 218 | "cpcm": [ 219 | "water", 220 | "acetone", 221 | "acetonitrile", 222 | "ammonia", 223 | "benzene", 224 | "chloroform", 225 | "ch2cl2", 226 | "ccl4", 227 | "cyclohexane", 228 | "dmf", 229 | "dmso", 230 | "ethanol", 231 | "hexane", 232 | "methanol", 233 | "octanol", 234 | "pyridine", 235 | "thf", 236 | "toluene" 237 | ], 238 | "cosmors": [ 239 | "propanone_c0", 240 | "chcl3_c0", 241 | "acetonitrile_c0", 242 | "ch2cl2_c0", 243 | "dimethylsulfoxide_c0", 244 | "h2o_c0", 245 | "methanol_c0", 246 | "thf_c0", 247 | "toluene_c0", 248 | "1-octanol_c0", 249 | "woctanol", 250 | "n-hexadecane_c0", 251 | "dimethylformamide_c0", 252 | "aniline_c0", 253 | "cyclohexane_c0", 254 | "ccl4_c0", 255 | "diethylether_c0", 256 | "ethanol_c0", 257 | "hexane_c0", 258 | "nitromethane_c0", 259 | "benzaldehyde_c0", 260 | "benzene_c0", 261 | "cs2_c0", 262 | "dioxane_c0", 263 | "ethylacetate_c0", 264 | "furane_c0", 265 | "phenol_c0", 266 | "1,2-dichloroethane_c0" 267 | ], 268 | "dcosmors": [ 269 | "acetonitrile", 270 | "aniline", 271 | "benzene", 272 | "ccl4", 273 | "chcl3", 274 | "cyclohexane", 275 | "diethylether", 276 | "dimethylsulfoxide", 277 | "ethanol", 278 | "h2o", 279 | "hexadecane", 280 | "hexane", 281 | "methanol", 282 | "nitromethane", 283 | "octanol", 284 | "propanone", 285 | "thf", 286 | "toluene", 287 | "wet-octanol" 288 | ] 289 | } -------------------------------------------------------------------------------- /src/censo/assets/solvents_dc.json: -------------------------------------------------------------------------------- 1 | { 2 | "acetone": 20.7, 3 | "acetonitrile": 36.6, 4 | "aniline": 6.9, 5 | "benzaldehyde": 18.2, 6 | "benzene": 2.3, 7 | "ccl4": 2.2, 8 | "ch2cl2": 9.1, 9 | "chcl3": 4.8, 10 | "cs2": 2.6, 11 | "cyclohexane": 2.0, 12 | "dichloroethane": 10.125, 13 | "diethylether": 4.4, 14 | "dioxane": 2.2, 15 | "dmf": 38.3, 16 | "dmso": 47.2, 17 | "ethanol": 24.6, 18 | "ethylacetate": 5.9, 19 | "furan": 3.0, 20 | "h2o": 80.1, 21 | "hexadecane": 2.1, 22 | "hexane": 1.9, 23 | "methanol": 32.7, 24 | "nitromethane": 38.2, 25 | "octane": 1.94, 26 | "octanol": 9.9, 27 | "phenol": 8.0, 28 | "thf": 7.6, 29 | "toluene": 2.4, 30 | "woctanol": 8.1 31 | } -------------------------------------------------------------------------------- /src/censo/assets/supporting_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grimme-lab/CENSO/3326db4579d1f630c28fce17e3b3e2d66070b8bd/src/censo/assets/supporting_info.json -------------------------------------------------------------------------------- /src/censo/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grimme-lab/CENSO/3326db4579d1f630c28fce17e3b3e2d66070b8bd/src/censo/cli/__init__.py -------------------------------------------------------------------------------- /src/censo/cli/cml_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | defininition of internal defaults, checking of logic for parameter combinations, 3 | cml parsing 4 | """ 5 | 6 | import os 7 | from ..params import START_DESCR 8 | import argparse 9 | 10 | 11 | def parse(argv=None) -> argparse.Namespace: 12 | """ 13 | Process commandline arguments 14 | 15 | NOTE: on args with the action 'store_const' with const=True, this is on purpose so as long as the flag is not set, 16 | the arg Namespace evaluates to None. 17 | """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description=START_DESCR, 21 | prog="censo", 22 | ) 23 | 24 | groups = [] 25 | 26 | # RUN SETTINGS 27 | groups.append(parser.add_argument_group("RUN SETTINGS")) 28 | groups[0].add_argument( 29 | "-i", 30 | "--input", 31 | dest="inp", 32 | type=str, 33 | help="Relative path to ensemble file, e.g. crest_conformers.xyz (default). ", 34 | default="crest_conformers.xyz", 35 | ) 36 | groups[0].add_argument( 37 | "-n", 38 | "--nconf", 39 | dest="nconf", 40 | type=int, 41 | help="The first 'nconf' conformers will be considered.", 42 | ) 43 | groups[0].add_argument( 44 | "-c", 45 | "--charge", 46 | dest="charge", 47 | default=0, 48 | type=int, 49 | help="Integer charge of the investigated molecule.", 50 | ) 51 | groups[0].add_argument( 52 | "-u", 53 | "--unpaired", 54 | dest="unpaired", 55 | default=0, 56 | type=int, 57 | help="Integer number of unpaired electrons of the investigated molecule.", 58 | ) 59 | groups[0].add_argument( 60 | "-v", 61 | "--version", 62 | dest="version", 63 | action="store_true", 64 | help="Print CENSO version and exit.", 65 | ) 66 | groups[0].add_argument( 67 | "--cleanup", 68 | dest="cleanup", 69 | action="store_true", 70 | help="Delete unneeded files from current working directory.", 71 | ) 72 | groups[0].add_argument( 73 | "--cleanup_all", 74 | dest="cleanup_all", 75 | action="store_true", 76 | help="Delete all CENSO files from previous runs from current working directory. " 77 | "Stronger than -cleanup !", 78 | ) 79 | groups[0].add_argument( 80 | "--new-config", 81 | dest="writeconfig", 82 | action="store_true", 83 | help="Write new configuration file, which is placed into the current " 84 | "directory.", 85 | ) 86 | groups[0].add_argument( 87 | "--inprc", 88 | dest="inprcpath", 89 | help="Use to provide a path to the CENSO configuration file if you want to use a different one" 90 | " than the default (~/.censo2rc).", 91 | ) 92 | groups[0].add_argument( 93 | "--maxcores", 94 | dest="maxcores", 95 | type=int, 96 | help="Number of cores that should be used for CENSO on the machine. If this is not provided CENSO will use " 97 | "the maximum number available. By default this is determined by os.cpu_count().", 98 | default=os.cpu_count(), 99 | ) 100 | groups[0].add_argument( 101 | "-O", 102 | "--omp", 103 | dest="omp", 104 | type=int, 105 | help="Number of OpenMP threads, e.g. 4. Effectively translates to the number of cores used per calculation " 106 | "if load balancing is disabled.", 107 | ) 108 | groups[0].add_argument( 109 | "--omp-min", 110 | dest="ompmin", 111 | type=int, 112 | help="Minimum number of OpenMP threads per process, default is 4. This is mostly important if load balancing is enabled.", 113 | ) 114 | groups[0].add_argument( 115 | "--loglevel", 116 | dest="loglevel", 117 | help="Set the loglevel for all modules to a specified level.", 118 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 119 | ) 120 | groups[0].add_argument( 121 | "--reload", 122 | dest="reload", 123 | nargs="+", 124 | help="Reload data from json output files. List all file names separated by spaces. " 125 | "Note that all conformers from the current ensemble need to be included in the output data keys.", 126 | ) 127 | 128 | # GENERAL SETTINGS 129 | groups.append(parser.add_argument_group("GENERAL SETTINGS")) 130 | groups[1].add_argument( 131 | "-T", 132 | "--temperature", 133 | dest="temperature", 134 | type=float, 135 | help="Temperature in Kelvin for thermostatistical evaluation.", 136 | ) 137 | # groups[1].add_argument( 138 | # "--trange", 139 | # dest="trange", 140 | # nargs=3, 141 | # metavar=("start", "end", "step"), 142 | # type=float, 143 | # help="specify a temperature range [start, end, step] e.g.: 250.0 300.0 10.0" 144 | # " resulting in the range [250.0, 260.0, 270.0, 280.0, 290.0, 300.0].", 145 | # ) 146 | groups[1].add_argument( 147 | "--bhess", 148 | dest="bhess", 149 | action="store_const", 150 | const=True, 151 | help="Uses SPH and applies structure constraint to input/DFT geometry " 152 | "for mRRHO calcuation. ", 153 | ) 154 | groups[1].add_argument( 155 | "--consider-sym", 156 | dest="consider_sym", 157 | action="store_const", 158 | const=True, 159 | help="Consider symmetry in mRRHO calcuation (based on desy xtb threshold). ", 160 | ) 161 | groups[1].add_argument( 162 | "--rmsdbias", 163 | dest="rmsdbias", 164 | action="store_const", 165 | const=True, 166 | help="Applies constraint to rmsdpot.xyz to be consistent to CREST. ", 167 | ) 168 | groups[1].add_argument( 169 | "--sm-rrho", 170 | dest="sm_rrho", 171 | type=str, 172 | help="Solvation model used in xTB GmRRHO calculation. Applied if not in " 173 | "gas-phase. Options are 'gbsa' or 'alpb'.", 174 | ) 175 | groups[1].add_argument( 176 | "--evaluate-rrho", 177 | dest="evaluate_rrho", 178 | action="store_const", 179 | const=True, 180 | help="Evaluate mRRHO contribution.", 181 | ) 182 | groups[1].add_argument( 183 | "-s", 184 | "--solvent", 185 | dest="solvent", 186 | type=str, 187 | help="Solvent to be used for Gsolv calculation.", 188 | ) 189 | groups[1].add_argument( 190 | "--gas-phase", 191 | dest="gas-phase", 192 | action="store_const", 193 | const=True, 194 | help="Run calculation in gas-phase, overriding all solvation settings.", 195 | ) 196 | groups[1].add_argument( 197 | "--imagthr", 198 | dest="imagthr", 199 | type=float, 200 | help="threshold for inverting imaginary frequencies for thermo in cm-1," 201 | " e.g. -30.0.", 202 | ) 203 | groups[1].add_argument( 204 | "--sthr", 205 | dest="sthr", 206 | type=float, 207 | help="Rotor cut-off for thermo in cm-1, e.g. 50.0.", 208 | ) 209 | groups[1].add_argument( 210 | "--scale", 211 | dest="scale", 212 | type=float, 213 | help="Scaling factor for frequencies, e.g. 1.0.", 214 | ) 215 | """ 216 | groups[1].add_argument( 217 | "--vapor_pressure", 218 | "-vp", 219 | dest="vapor_pressure", 220 | action="store_true", 221 | help="Gsolv is evaluated for the input molecule in its solution (same). " 222 | "Only possible with COSMO-RS.", 223 | ) 224 | """ 225 | 226 | # PRESCREENING SETTINGS 227 | # groups.append(parser.add_argument_group("PRESCREENING SETTINGS")) 228 | 229 | # SCREENING SETTINGS 230 | # groups.append(parser.add_argument_group("SCREENING SETTINGS")) 231 | 232 | # OPTIMIZATION SETTINGS 233 | # groups.append(parser.add_argument_group("OPTIMIZATION SETTINGS")) 234 | 235 | # REFINEMENT SETTINGS 236 | # groups.append(parser.add_argument_group("REFINEMENT SETTINGS")) 237 | 238 | # NMR SETTINGS 239 | # groups.append(parser.add_argument_group("NMR SETTINGS")) 240 | 241 | # OPTROT SETTINGS 242 | # groups.append(parser.add_argument_group("OPTROT SETTINGS")) 243 | 244 | # UVVIS SETTINGS 245 | # groups.append(parser.add_argument_group("UVVIS SETTINGS")) 246 | 247 | # leave these options out for now, implementation for cml complicated 248 | """ 249 | groups[7].add_argument( 250 | "-freqOR", 251 | "--freqOR", 252 | dest="freq_or", 253 | nargs="*", 254 | required=False, 255 | type=float, 256 | help="Frequencies to evaluate specific rotation at in nm, e.g. 589 " 257 | "or 589 700 to evaluate at 598 nm and 700 nm.", 258 | ) 259 | groups[6].add_argument( 260 | "-couplings", 261 | "--couplings", 262 | dest="couplings", 263 | action="store_true", 264 | required=False, 265 | help="Option to run coupling constant calculations. Options are ???.", 266 | ) 267 | groups[6].add_argument( 268 | "-shieldings", 269 | "--shieldings", 270 | dest="shieldings", 271 | action="store_true", 272 | required=False, 273 | help="Option to run shielding constant calculations. Options are ???.", 274 | ) 275 | groups[6].add_argument( 276 | "-hactive", 277 | "--hactive", 278 | dest="h_active", 279 | action="store_true", 280 | required=False, 281 | help="Investigates hydrogen nuclei in coupling and shielding calculations." 282 | "choices=[???]", 283 | ) 284 | groups[6].add_argument( 285 | "-cactive", 286 | "--cactive", 287 | dest="c_active", 288 | action="store_true", 289 | required=False, 290 | help="Investigates carbon nuclei in coupling and shielding calculations." 291 | "choices=[???]", 292 | ) 293 | groups[6].add_argument( 294 | "-factive", 295 | "--factive", 296 | dest="f_active", 297 | action="store_true", 298 | required=False, 299 | help="Investigates fluorine nuclei in coupling and shielding calculations." 300 | "choices=[???]", 301 | ) 302 | groups[6].add_argument( 303 | "-siactive", 304 | "--siactive", 305 | dest="si_active", 306 | action="store_true", 307 | required=False, 308 | help="Investigates silicon nuclei in coupling and shielding calculations." 309 | "choices=[???]", 310 | ) 311 | groups[6].add_argument( 312 | "-pactive", 313 | "--pactive", 314 | dest="p_active", 315 | action="store_true", 316 | required=False, 317 | help="Investigates phosophorus nuclei in coupling and shielding calculations." 318 | "choices=[???]", 319 | ) 320 | groups[4].add_argument( 321 | "-crestcheck", 322 | "--crestcheck", 323 | dest="crestcheck", 324 | action="store_true", 325 | required=False, 326 | 327 | help="Option to sort out conformers after DFT ensembleopt which CREST " 328 | "identifies as identical or rotamers of each other. \nThe identification/" 329 | "analysis is always performed, but the removal of conformers has to " 330 | "be the choice of the user. Options are: [???]", # TODO 331 | ) 332 | groups[4].add_argument( 333 | "-macro", 334 | dest="macrocycles", 335 | action="store_const", 336 | const=True, 337 | required=False, 338 | help="Option to use macrocycles for geometry optimization." 339 | ) 340 | groups[4].add_argument( 341 | "-optlevel2", 342 | "--optlevel2", 343 | dest="optlevel2", 344 | default=None, 345 | required=False, 346 | help="Option to set the optlevel in part2, only if optimizing with the xTB-optimizer!" 347 | "Allowed values are ***", # TODO 348 | ) 349 | groups[4].add_argument( 350 | "-optcycles", 351 | "--optcycles", 352 | dest="optcycles", 353 | required=False, 354 | type=int, 355 | help="number of cycles in ensemble optimizer.", 356 | ) 357 | groups[4].add_argument( 358 | "-hlow", 359 | "--hlow", 360 | dest="hlow", 361 | required=False, 362 | type=float, 363 | help="Lowest force constant in ANC generation (real), used by xTB-optimizer.", 364 | ) 365 | groups[4].add_argument( 366 | "-spearmanthr", 367 | "--spearmanthr", 368 | dest="spearmanthr", 369 | required=False, 370 | help="Value between -1 and 1 for the spearman correlation coeffient threshold, " 371 | "employed in the ensemlbe optimizer", 372 | ) 373 | groups[4].add_argument( 374 | "-radsize", 375 | "--radsize", 376 | dest="radsize", 377 | required=False, 378 | type=int, 379 | help="Radsize used in the ensembleopt and only for r2scan-3c!", 380 | ) 381 | group1.add_argument( 382 | "-func", 383 | "--functional", 384 | dest="func", 385 | choices=options.value_options["func"], 386 | action="store", 387 | required=False, 388 | 389 | help="Functional for geometry ensembleopt (used in part2) and " 390 | "single-points in part1", 391 | ) 392 | group1.add_argument( 393 | "-basis", 394 | "--basis", 395 | dest="basis", 396 | action="store", 397 | required=False, 398 | 399 | help="Basis set employed together with the functional (func) for the " 400 | "low level single point in part1 und ensembleopt in part2.", 401 | ) 402 | group1.add_argument( 403 | "-prog", 404 | "--prog", 405 | choices=options.value_options["prog"], 406 | dest="prog", 407 | required=False, 408 | 409 | help="QM-program used in part0, part1 and part2 either 'orca' or 'tm'.", 410 | ) 411 | group10.add_argument( 412 | "-part0_gfnv", 413 | "--part0_gfnv", 414 | dest="part0_gfnv", 415 | choices=options.value_options["part0_gfnv"], 416 | 417 | action="store", 418 | required=False, 419 | help="GFNn-xTB version employed for calculating the GFNn-xTB " 420 | "single point in part0. " 421 | f"Allowed values are [{', '.join(options.value_options['part0_gfnv'])}]", 422 | ) 423 | group3.add_argument( 424 | "-part1", 425 | "--part1", 426 | choices=["on", "off"], 427 | dest="part1", 428 | action="store", 429 | required=False, 430 | 431 | help="Option to turn the prescreening evaluation (part1) 'on' or 'off'.", 432 | ) 433 | group3.add_argument( 434 | "-smgsolv1", 435 | "--smgsolv1", 436 | choices=options.value_options["smgsolv1"], 437 | dest="smgsolv1", 438 | action="store", 439 | required=False, 440 | 441 | help="Solvent model for the Gsolv evaluation in part1. This can either be" 442 | " an implicit solvation or an additive solvation model. " 443 | f"Allowed values are [{', '.join(options.value_options['smgsolv1'])}]", 444 | ) 445 | group10.add_argument( 446 | "-prescreening_threshold", 447 | "-prethr", 448 | "--thresholdpre", 449 | dest="prescreening_threshold", 450 | 451 | action="store", 452 | type=float, 453 | required=False, 454 | help=( 455 | "Threshold in kcal/mol. All conformers in part0 (prescreening)" 456 | " with a relativ energy below the threshold are considered for part1." 457 | ), 458 | ) 459 | group4.add_argument( 460 | "-sm2", 461 | "--solventmodel2", 462 | choices=options.value_options.get("sm2"), 463 | dest="sm2", 464 | action="store", 465 | required=False, 466 | 467 | help="Solvent model employed during the geometry ensembleopt in part2." 468 | "The solvent model sm2 is not used for Gsolv evaluation, but for the " 469 | "implicit effect on a property (e.g. the geometry in the ensembleopt).", 470 | ) 471 | group4.add_argument( 472 | "-smgsolv2", 473 | "--smgsolv2", 474 | choices=options.value_options["smgsolv2"], 475 | dest="smgsolv2", 476 | action="store", 477 | required=False, 478 | 479 | help="Solvent model for the Gsolv (solvation contribution to free energy) " 480 | "calculation in part2. Either the solvent" 481 | " model of the ensembleopt (sm2) or an additive solvation model. " 482 | f"Allowed values are [{', '.join(options.value_options['smgsolv2'])}]", 483 | ) """ 484 | 485 | # TODO - keep this? 486 | """ group1.add_argument( 487 | "-prog_rrho", 488 | "--prog_rrho", 489 | choices=options.value_options["prog_rrho"], 490 | dest="prog_rrho", 491 | required=False, 492 | 493 | help="QM-program for mRRHO contribution in part1 2 and 3, currently only 'xtb'.", 494 | ) """ 495 | 496 | # TODO - keep? 497 | """ group4.add_argument( 498 | "-ancopt", 499 | choices=["on"], # there is no other option right now! 500 | dest="ancopt", 501 | required=False, 502 | 503 | help="Option to use xtb as driver for the xTB-optimizer in part2. " 504 | "Which is currently not changeable!", 505 | ) """ 506 | 507 | args = parser.parse_args(argv) 508 | 509 | return args 510 | -------------------------------------------------------------------------------- /src/censo/cli/interface.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | from os import getcwd 5 | from argparse import ArgumentError 6 | from datetime import timedelta 7 | from typing import cast 8 | 9 | from .cml_parser import parse 10 | from ..configuration import configure, override_rc 11 | from ..ensembledata import EnsembleData 12 | from ..ensembleopt import Prescreening, Screening, Optimization, Refinement 13 | from ..part import CensoPart 14 | from ..properties import NMR, UVVis 15 | from ..params import __version__, Config 16 | from ..utilities import print 17 | from ..logging import setup_logger, set_loglevel 18 | 19 | logger = setup_logger(__name__) 20 | 21 | 22 | def entry_point(argv: list[str] | None = None) -> int: 23 | """ 24 | Console entry point to execute CENSO from the command line. 25 | """ 26 | try: 27 | args = parse(argv=argv) 28 | except ArgumentError as e: 29 | print(e.message) 30 | return 1 31 | except SystemExit as e: 32 | return cast(int, e.code) 33 | 34 | if not any(vars(args).values()): 35 | print("CENSO needs at least one argument!") 36 | return 1 37 | 38 | # Print program call 39 | print("CALL: " + " ".join(arg for arg in sys.argv)) 40 | 41 | try: 42 | ensemble = startup(args) 43 | except SystemExit as e: 44 | return cast(int, e.code) 45 | 46 | # Print general settings once 47 | CensoPart(ensemble, print_info=True) 48 | 49 | run = filter( 50 | lambda x: x.get_settings()["run"], 51 | [Prescreening, Screening, Optimization, Refinement, NMR, UVVis], 52 | ) 53 | 54 | time = 0.0 55 | for part in run: 56 | res, runtime = part.run(ensemble) 57 | print(f"Ran {res.name} in {runtime:.2f} seconds!") 58 | time += runtime 59 | 60 | time = timedelta(seconds=int(time)) 61 | hours, r = divmod(time.seconds, 3600) 62 | minutes, seconds = divmod(r, 60) 63 | if time.days: 64 | hours += time.days * 24 65 | 66 | print(f"\nRan CENSO in {hours:02d}:{minutes:02d}:{seconds:02d}") 67 | 68 | print("\nCENSO all done!") 69 | return 0 70 | 71 | 72 | # sets up a ensemble object for you using the given cml arguments and censorc 73 | def startup(args) -> EnsembleData: 74 | # get most important infos for current run 75 | cwd = getcwd() 76 | 77 | # run actions for which no complete setup is needed 78 | if args.version: 79 | print(__version__) 80 | sys.exit() 81 | elif args.cleanup: 82 | cleanup_run(cwd) 83 | print("Removed files and going to exit!") 84 | sys.exit() 85 | elif args.cleanup_all: 86 | cleanup_run(cwd, complete=True) 87 | print("Removed files and going to exit!") 88 | sys.exit() 89 | elif args.writeconfig: 90 | configure(rcpath=cwd, create_new=True) 91 | sys.exit() 92 | elif args.inprcpath is not None: 93 | configure(args.inprcpath) 94 | 95 | if args.loglevel: 96 | set_loglevel(args.loglevel) 97 | 98 | # Override settings with command line arguments 99 | override_rc(args) 100 | 101 | # initialize ensemble, constructor get runinfo from args 102 | ensemble = EnsembleData() 103 | 104 | # read input and setup conformers 105 | ensemble.read_input( 106 | args.inp, charge=args.charge, unpaired=args.unpaired, nconf=args.nconf 107 | ) 108 | 109 | # if data should be reloaded, do it here 110 | if args.reload: 111 | for filename in args.reload: 112 | ensemble.read_output(os.path.join(cwd, filename)) 113 | 114 | if args.maxcores: 115 | Config.NCORES = args.maxcores 116 | 117 | if args.omp: 118 | Config.OMP = args.omp 119 | 120 | if args.ompmin: 121 | Config.OMPMIN = args.ompmin 122 | 123 | # if data should be reloaded, do it here 124 | if args.reload: 125 | for filename in args.reload: 126 | ensemble.read_output(os.path.join(cwd, filename)) 127 | 128 | # END of setup 129 | # -> ensemble.conformers contains all conformers with their info from input (sorted by CREST energy if possible) 130 | # -> output data is reloaded if wanted 131 | 132 | return ensemble 133 | 134 | 135 | def cleanup_run(cwd, complete=False): 136 | """ 137 | Delete all unneeded files. 138 | """ 139 | 140 | # files containing these patterns are deleted 141 | to_delete = [ 142 | "censo.log", 143 | "0_PRESCREENING", 144 | "1_SCREENING", 145 | "2_OPTIMIZATION", 146 | "3_REFINEMENT", 147 | "4_NMR", 148 | "6_UVVIS", 149 | ] 150 | 151 | if complete: 152 | print( 153 | "Removing ALL files generated by previous CENSO runs, including ensembles!" 154 | ) 155 | 156 | print( 157 | f"Be aware that files in {cwd} and subdirectories with names containing the following substrings " 158 | f"will be deleted:" 159 | ) 160 | for sub in to_delete: 161 | print(sub) 162 | 163 | print("Do you wish to continue?") 164 | print("Please type 'yes' or 'no':") 165 | 166 | ui = input() 167 | if ui.strip().lower() not in ["yes", "y"]: 168 | print("Aborting cleanup!") 169 | sys.exit(0) 170 | 171 | # iterate over files in cwd and subdirs recursively and remove them if to delete 172 | for subdir, dirs, files in os.walk(cwd): 173 | if any(s in subdir for s in to_delete): 174 | print(f"Removing: {subdir}") 175 | shutil.rmtree(subdir) 176 | for file in files: 177 | if any(s in file for s in to_delete) and ( 178 | complete or "ensemble" not in file 179 | ): 180 | print(f"Removing: {file}") 181 | os.remove(os.path.join(subdir, file)) 182 | -------------------------------------------------------------------------------- /src/censo/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import configparser 4 | from argparse import Namespace 5 | 6 | from .params import Config 7 | from .qm_processor import QmProc 8 | from .utilities import DfaHelper, SolventHelper, print 9 | 10 | parts = {} 11 | 12 | 13 | def configure(rcpath: str = None, create_new: bool = False): 14 | """ 15 | Configures the application based on the provided configuration file path. 16 | If no configuration file path is provided, it searches for the default configuration file. 17 | If no configuration file is found, it raises an error. 18 | 19 | Args: 20 | rcpath (str): Path to the configuration file. 21 | create_new (bool): If True, a new configuration file will be created at rcpath. 22 | 23 | Returns: 24 | None 25 | """ 26 | # Try to find the .censo2rc in the user's home directory 27 | # if no configuration file path is provided 28 | if rcpath is None: 29 | censorc_path = find_rcfile() 30 | else: 31 | if not os.path.isfile(rcpath) and not create_new: 32 | raise FileNotFoundError(f"No configuration file found at {rcpath}.") 33 | censorc_path = rcpath 34 | 35 | # Set up the DFAHelper 36 | DfaHelper.set_dfa_dict(os.path.join(Config.ASSETS_PATH, "censo_dfa_settings.json")) 37 | 38 | # Set up the SolventHelper 39 | SolventHelper.set_solvent_dict( 40 | os.path.join(Config.ASSETS_PATH, "censo_solvents_db.json") 41 | ) 42 | 43 | # map the part names to their respective classes 44 | # NOTE: the DFAHelper and the databases should be setup before the parts are imported, 45 | # otherwise there will be errors in the CensoPart._options 46 | from .part import CensoPart 47 | from .ensembleopt import Prescreening, Screening, Optimization, Refinement 48 | from .properties import NMR, UVVis 49 | 50 | global parts 51 | parts = { 52 | "prescreening": Prescreening, 53 | "screening": Screening, 54 | "optimization": Optimization, 55 | "refinement": Refinement, 56 | "nmr": NMR, 57 | "uvvis": UVVis, 58 | } 59 | 60 | # if explicitely told to create a new configuration file, do so 61 | if create_new: 62 | if rcpath is None: 63 | # If not chosen otherwise, the new rcfile is written in the home dir 64 | censorc_path = os.path.join(os.path.expanduser("~"), "censo2rc_NEW") 65 | else: 66 | censorc_path = os.path.join(rcpath, "censo2rc_NEW") 67 | write_rcfile(censorc_path) 68 | else: 69 | # Initialize default settings 70 | # Make sure that settings are initialized even if there is no section for this part in the rcfile 71 | # General settings should always be configured first 72 | CensoPart.set_general_settings({}) 73 | for part in parts.values(): 74 | part.set_settings({}, complete=True) 75 | 76 | # Read rcfile if it exists 77 | if censorc_path is not None: 78 | # Read the actual configuration file (located at rcpath if not None, otherwise rcfile in home dir) 79 | settings_dict = read_rcfile(censorc_path, silent=False) 80 | 81 | # first set general settings 82 | CensoPart.set_general_settings(settings_dict["general"]) 83 | 84 | # Then the remaining settings for each part 85 | for section, settings in settings_dict.items(): 86 | if section in parts: 87 | parts[section].set_settings(settings) 88 | # NOTE: if section is not in the parts names, it will be ignored 89 | 90 | paths = read_rcfile(censorc_path)["paths"] 91 | else: 92 | # Try to automatically determine program paths (not guaranteed to succeed) 93 | paths = find_program_paths() 94 | 95 | # Update the paths for the processors 96 | QmProc._paths.update(paths) 97 | 98 | # create user assets folder if it does not exist 99 | if not os.path.isdir(Config.USER_ASSETS_PATH): 100 | os.mkdir(Config.USER_ASSETS_PATH) 101 | 102 | 103 | def read_rcfile(path: str, silent: bool = True) -> dict[str, dict[str, any]]: 104 | """ 105 | Read the configuration file at 'path' and return the settings as a dictionary. 106 | 107 | Args: 108 | path (str): Path to the configuration file. 109 | silent (bool): If True, no messages will be printed. 110 | 111 | Returns: 112 | dict[str, dict[str, any]]: Dictionary containing the settings read from the configuration file. 113 | """ 114 | # read config file 115 | if not silent: 116 | print(f"Reading configuration file from {path}.") 117 | 118 | parser: configparser.ConfigParser = configparser.ConfigParser() 119 | with open(path, "r") as file: 120 | parser.read_file(file) 121 | 122 | returndict = {section: dict(parser[section]) for section in parser.sections()} 123 | return returndict 124 | 125 | 126 | def write_rcfile(path: str) -> None: 127 | """ 128 | Write new configuration file with default settings into file at 'path'. 129 | Also reads program paths from preexisting configuration file or tries to 130 | determine the paths automatically. 131 | 132 | Args: 133 | path (str): Path to the new configuration file. 134 | 135 | Returns: 136 | None 137 | """ 138 | # what to do if there is an existing configuration file 139 | external_paths = None 140 | if os.path.isfile(path): 141 | print( 142 | f"An existing configuration file has been found at {path}.\n", 143 | f"Renaming existing file to {Config.CENSORCNAME}_OLD.\n", 144 | ) 145 | # Read program paths from the existing configuration file 146 | print("Reading program paths from existing configuration file ...") 147 | external_paths = read_program_paths(path) 148 | 149 | # Rename existing file 150 | os.rename(path, f"{path}_OLD") 151 | 152 | with open(path, "w", newline=None) as rcfile: 153 | parser = configparser.ConfigParser() 154 | 155 | # collect all default settings from parts and feed them into the parser 156 | from .part import CensoPart 157 | 158 | parts["general"] = CensoPart 159 | parser.read_dict( 160 | { 161 | partname: { 162 | settingname: setting["default"] 163 | for settingname, setting in part.get_options().items() 164 | } 165 | for partname, part in parts.items() 166 | } 167 | ) 168 | 169 | # Try to get paths from 'which' 170 | if external_paths is None: 171 | print("Trying to determine program paths automatically ...") 172 | external_paths = find_program_paths() 173 | 174 | parser["paths"] = external_paths 175 | 176 | print(f"Writing new configuration file to {path} ...") 177 | parser.write(rcfile) 178 | 179 | print( 180 | f"\nA new configuration file was written into {path}.\n" 181 | "You should adjust the settings to your needs and set the program paths.\n" 182 | "Right now the settings are at their default values.\n" 183 | ) 184 | 185 | if Config.CENSORCNAME not in path: 186 | print( 187 | f"Additionally make sure that the file name is '{Config.CENSORCNAME}'.\n" 188 | f"Currently it is '{os.path.split(path)[-1]}'.\n" 189 | ) 190 | 191 | 192 | def read_program_paths(path: str) -> dict[str, str] | None: 193 | """ 194 | Read program paths from the configuration file at 'path' 195 | """ 196 | with open(path, "r") as inp: 197 | parser = configparser.ConfigParser() 198 | parser.read_file(inp) 199 | 200 | try: 201 | return dict(parser["paths"]) 202 | except KeyError: 203 | print(f"WARNING: No paths found in {path}") 204 | return None 205 | 206 | 207 | def find_program_paths() -> dict[str, str]: 208 | """ 209 | Try to determine program paths automatically 210 | """ 211 | # TODO - for now only the most important ones are implemented 212 | mapping = { 213 | "orcapath": "orca", 214 | "xtbpath": "xtb", 215 | "mpshiftpath": "mpshift", 216 | "escfpath": "escf", 217 | # "crestpath": "crest", 218 | # "cosmorssetup": None, 219 | # "dbpath": None, 220 | # "cosmothermversion": None, 221 | } 222 | paths = {} 223 | 224 | for pathname, program in mapping.items(): 225 | if program is not None: 226 | path = shutil.which(program) 227 | else: 228 | path = None 229 | 230 | if path is not None: 231 | paths[pathname] = path 232 | else: 233 | paths[pathname] = "" 234 | 235 | # if orca was found try to determine orca version from the path (kinda hacky) 236 | if paths["orcapath"] != "": 237 | try: 238 | paths["orcaversion"] = ( 239 | paths["orcapath"].split(os.sep)[-2][5:10].replace("_", ".") 240 | ) 241 | except Exception: 242 | paths["orcaversion"] = "" 243 | 244 | return paths 245 | 246 | 247 | def find_rcfile() -> str | None: 248 | """ 249 | check for existing .censorc2 in $home dir 250 | """ 251 | 252 | rcpath = None 253 | # check for .censorc in $home 254 | if os.path.isfile(os.path.join(os.path.expanduser("~"), Config.CENSORCNAME)): 255 | rcpath = os.path.join(os.path.expanduser("~"), Config.CENSORCNAME) 256 | 257 | return rcpath 258 | 259 | 260 | def override_rc(args: Namespace) -> None: 261 | """ 262 | Override the settings from the rcfile (or default settings) with settings from the command line. 263 | 264 | Args: 265 | args(Namespace): Namespace generated by command line parser. 266 | 267 | Returns: 268 | None 269 | """ 270 | # Override general and part specific settings 271 | from .part import CensoPart 272 | 273 | for part in list(parts.values()) + [CensoPart]: 274 | part_settings = part.get_settings() 275 | for setting in part_settings: 276 | if getattr(args, setting, None) is not None: 277 | part.set_setting(setting, getattr(args, setting)) 278 | -------------------------------------------------------------------------------- /src/censo/datastructure.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import TypedDict 3 | 4 | from .params import BOHR2ANG, Config 5 | 6 | 7 | class Atom(TypedDict): 8 | element: str 9 | xyz: list[float] 10 | 11 | 12 | class GeometryData: 13 | """ 14 | Geometry contains geometry information as well as identifier to match it to a MoleculeData object 15 | in order to keep the object small, since it has to be pickled for multiprocessing 16 | """ 17 | 18 | def __init__(self, name: str, xyz: list[str]): 19 | """ 20 | takes an identifier and the geometry lines from the xyz-file as input 21 | """ 22 | 23 | # name of the linked MoleculeData 24 | self.name: str = name 25 | 26 | # list of dicts preserving the order of the input file for easy mapping 27 | # the coordinates should be given in Angstrom 28 | # self.xyz = [{"element": "H", "xyz": [0.0, 0.0, 0.0]}, {"element": "C", "xyz": [0.0, 0.0 0.7]}, ...] 29 | self.xyz: list[Atom] = [] 30 | 31 | # set up xyz dict from the input lines 32 | for line in xyz: 33 | spl = [s.strip() for s in line.split()] 34 | element = spl[0].capitalize() 35 | self.xyz.append({"element": element, "xyz": [float(i) for i in spl[1:]]}) 36 | 37 | # Count atoms 38 | self.nat: int = len(self.xyz) 39 | 40 | def toorca(self) -> list[str | float]: 41 | """ 42 | method to convert the internal cartesian coordinates to a data format usable by the OrcaParser 43 | """ 44 | coord = [] 45 | for atom in self.xyz: 46 | coord.append(" ".join([atom["element"]] + [str(c) for c in atom["xyz"]])) 47 | 48 | return coord 49 | 50 | def tocoord(self) -> list[str]: 51 | """ 52 | method to convert the internal cartesian coordinates (self.xyz) to coord file format (for tm or xtb) 53 | """ 54 | coord = ["$coord\n"] 55 | for atom in self.xyz: 56 | coord.append( 57 | reduce( 58 | lambda x, y: f"{x} {y}", 59 | list(map(lambda x: float(x) / BOHR2ANG, atom["xyz"])) 60 | + [f"{atom['element']}\n"], 61 | ) 62 | ) 63 | 64 | coord.append("$end\n") 65 | 66 | return coord 67 | 68 | def fromcoord(self, path: str) -> None: 69 | """ 70 | method to convert the content of a coord file to cartesian coordinates for the 'xyz' attribute 71 | """ 72 | with open(path, "r") as file: 73 | lines = file.readlines() 74 | 75 | self.xyz = [] 76 | for line in lines: 77 | if not line.startswith("$"): 78 | coords = line.split() 79 | element = coords[-1] 80 | cartesian_coords = [float(x) * BOHR2ANG for x in coords[:-1]] 81 | self.xyz.append({"element": element, "xyz": cartesian_coords}) 82 | elif line.startswith("$end"): 83 | break 84 | 85 | def fromxyz(self, path: str) -> None: 86 | """ 87 | Method to convert the content of an xyz file to cartesian coordinates for the 'xyz' attribute 88 | """ 89 | with open(path, "r") as file: 90 | lines = file.readlines() 91 | 92 | self.xyz = [] 93 | # Just skip the first two lines 94 | for line in lines[2:]: 95 | split = line.split() 96 | element = split[0] 97 | coords = [float(x) for x in split[1:]] 98 | self.xyz.append({"element": element, "xyz": coords}) 99 | 100 | def toxyz(self) -> list[str]: 101 | """ 102 | method to convert self.xyz to xyz-file format 103 | """ 104 | lines = [ 105 | f"{self.nat}\n", 106 | f"{self.name}\n", 107 | ] 108 | for atom in self.xyz: 109 | lines.append( 110 | f"{atom['element']} {atom['xyz'][0]:.10f} {atom['xyz'][1]:.10f} {atom['xyz'][2]:.10f}\n" 111 | ) 112 | 113 | return lines 114 | 115 | 116 | class MoleculeData: 117 | """ 118 | The confomers' MoleculeData are set up in censo.ensembledata.EnsembleData.setup_conformers 119 | """ 120 | 121 | def __init__(self, name: str, xyz: list[str]): 122 | """ 123 | takes geometry lines from the xyz-file as input to pass it to the GeometryData constructor 124 | """ 125 | 126 | # stores a name for printing and (limited) between-run comparisons 127 | self.name: str = name 128 | 129 | # stores the geometry info to have a small object to be used for multiprocessing 130 | self.geom: GeometryData = GeometryData(self.name, xyz) 131 | 132 | # stores the degeneration factor of the conformer 133 | self.degen: int = 1 134 | 135 | # stores the initial (biased) xtb energy from CREST (or whatever was used before) 136 | self.xtb_energy: float = None 137 | 138 | # list to store the paths to all MO-files from the jobs run for this conformer 139 | # might also include tuples if open shell and tm is used 140 | self.mo_paths: list[str, tuple] = [] 141 | 142 | 143 | class ParallelJob: 144 | 145 | def __init__(self, conf: GeometryData, jobtype: list[str]): 146 | # conformer for the job 147 | self.conf = conf 148 | 149 | # list of jobtypes to execute for the processor 150 | self.jobtype = jobtype 151 | 152 | # number of cores to use 153 | self.omp = Config.OMPMIN 154 | 155 | # stores path to an mo file which is supposed to be used as a guess 156 | # In case of open shell tm calculation this can be a tuple of files 157 | self.mo_guess = None 158 | 159 | # Stores all the important information for preparation of the input files for every jobtype 160 | # Always contains the 'general' key, which basically stores settings from the general section 161 | # that are supposed to be applied for every job 162 | # Also should always contain the name of the part where the job is launched from, as well as charge and 163 | # number of unpaired electrons 164 | # NOTE: prepinfo.keys() and items in jobtype are not necessarily the same! E.g. for NMR 165 | # jobtype = ["nmr"], prepinfo.keys() = ["nmr_s"], or prepinfo.keys() = ["nmr_s", "nmr_j"], ... 166 | self.prepinfo: dict[str, dict[str, any]] = { 167 | "general": {}, 168 | "partname": "", 169 | "charge": 0, 170 | "unpaired": 0, 171 | } 172 | 173 | # store metadata, is updated by the processor 174 | # structure e.g.: {"sp": {"success": True, "error": None}, "xtb_rrho": {"success": False, ...}, ...} 175 | # always contains the "mo_path" key 176 | self.meta: dict[str, any] = {"mo_path": None} 177 | 178 | # store the results of the job 179 | self.results: dict[str, any] = {} 180 | 181 | # stores all flags for the jobtypes 182 | self.flags: dict[str, any] = {} 183 | -------------------------------------------------------------------------------- /src/censo/ensembledata.py: -------------------------------------------------------------------------------- 1 | """ 2 | stores ensembledata and conformers 3 | functionality for program setup 4 | """ 5 | 6 | import os 7 | import re 8 | import json 9 | 10 | from .datastructure import MoleculeData 11 | from .logging import setup_logger 12 | from .params import DIGILEN 13 | from .utilities import check_for_float, print, t2x, Factory 14 | 15 | logger = setup_logger(__name__) 16 | 17 | 18 | class EnsembleData: 19 | """ 20 | Class to store conformer rotamer ensembles for use in CENSO. 21 | """ 22 | 23 | def __init__(self, input_file: str | None = None): 24 | """ 25 | Setup an EnsembleData object, which contains a list of conformers, read from 26 | input_file. If input_file is not passed here, conformers can be read using 27 | read_input. 28 | 29 | Args: 30 | input_file (str, optional): Path to the ensemble input file. Defaults to None. 31 | If this is provided, the charge and unpaired electron count will be assumed to be 0 and all conformers will be read from the input file. 32 | """ 33 | # contains run-specific info that may change during runtime 34 | # initialized in EnsembleData.read_input 35 | self.runinfo = { 36 | "charge": None, 37 | "unpaired": None, 38 | } 39 | 40 | # stores the conformers with all info 41 | # NOTE: this is deliberately chosen to be a list since lists are ordered 42 | self.__conformers: list[MoleculeData] = [] 43 | 44 | # stores the conformers which were sorted out 45 | self.rem: list[MoleculeData] = [] 46 | 47 | # A list containing all part references in order of execution or loading 48 | self.results = [] 49 | 50 | if input_file is not None: 51 | self.read_input(input_file, charge=0, unpaired=0) 52 | 53 | @property 54 | def conformers(self): 55 | """ 56 | Returns the conformers list. Includes a check wether there are any conformers left. 57 | """ 58 | # TODO - no checks for now 59 | return self.__conformers 60 | 61 | @conformers.setter 62 | def conformers(self, confs): 63 | assert all(isinstance(conf, MoleculeData) for conf in confs) 64 | self.__conformers = confs 65 | 66 | def read_output(self, outpath: str) -> None: 67 | """ 68 | Read json output file of a previous execution. Will try to load data into current conformer ensemble, matching 69 | based on names. If a conformer name does not exist in the current ensemble it will be ignored. If a conformer 70 | does not exist in the output data RuntimeError will be raised. 71 | 72 | Args: 73 | outpath (str): Path to the output file. 74 | 75 | Returns: 76 | None 77 | """ 78 | 79 | with open(outpath, "r") as file: 80 | data = json.load(file) 81 | 82 | # Check if all conformers from the current ensemble are also found in the output data 83 | if not all(conf.name in data["results"] for conf in self.conformers): 84 | raise RuntimeError( 85 | "Not all conformers from the current ensemble are found in the output data." 86 | ) 87 | 88 | # Create a part instance and load in the results 89 | part = Factory.create(data["partname"], self) 90 | part.data.update(data) 91 | 92 | logger.info(f"Reloaded results from {outpath}.") 93 | 94 | self.results.append(part) 95 | 96 | def read_input( 97 | self, 98 | input_path: str, 99 | charge: int = None, 100 | unpaired: int = None, 101 | nconf: int = None, 102 | append: bool = False, 103 | ) -> None: 104 | """ 105 | Read ensemble input file. Should be a file in xyz-file format with all the conformers in consecutive order. 106 | 107 | Args: 108 | input_path (str): Path to the ensemble input file. 109 | charge (int, optional): Charge of the system. Defaults to None. Overwrites preexisting values. 110 | unpaired (int, optional): Number of unpaired electrons. Defaults to None. Overwrites preexisting values. 111 | nconf (int, optional): Number of conformers to consider. Defaults to None, so all conformers are read. 112 | append (bool, optional): If True, the conformers will be appended to the existing ensemble. Defaults to False. 113 | 114 | Returns: 115 | None 116 | 117 | Raises: 118 | RuntimeError: If the charge or the number of unpaired electrons is not defined. 119 | """ 120 | # If $coord in file => tm format, needs to be converted to xyz 121 | with open(input_path, "r") as inp: 122 | lines = inp.readlines() 123 | if any("$coord" in line for line in lines): 124 | _, nat, input_path = t2x( 125 | input_path, writexyz=True, outfile="converted.xyz" 126 | ) 127 | else: 128 | nat = int(lines[0].split()[0]) 129 | 130 | # Set charge and unpaired via funtion args 131 | self.runinfo["charge"] = charge 132 | self.runinfo["unpaired"] = unpaired 133 | 134 | if self.runinfo["charge"] is None or self.runinfo["unpaired"] is None: 135 | raise RuntimeError("Charge or number of unpaired electrons not defined.") 136 | 137 | confs = self.__setup_conformers(input_path) 138 | if len(confs) == 0: 139 | logger.warning("Input file is empty!") 140 | 141 | if nconf is None: 142 | nconf = len(confs) 143 | 144 | if append: 145 | self.conformers.append(confs[:nconf]) 146 | else: 147 | self.conformers = confs[:nconf] 148 | 149 | try: 150 | self.conformers.sort(key=lambda x: x.xtb_energy) 151 | except TypeError: 152 | # Only sort if all conformers have a defined precalculated energy 153 | pass 154 | 155 | # Print information about read ensemble 156 | print( 157 | f"Read {len(self.conformers)} conformers.\n", 158 | "Number of atoms:".ljust(DIGILEN // 2, " ") + f"{nat}" + "\n", 159 | "Charge:".ljust(DIGILEN // 2, " ") + f"{self.runinfo['charge']}" + "\n", 160 | "Unpaired electrons:".ljust(DIGILEN // 2, " ") 161 | + f"{self.runinfo['unpaired']}" 162 | + "\n", 163 | sep="", 164 | ) 165 | 166 | def __setup_conformers(self, input_path: str) -> list[MoleculeData]: 167 | """ 168 | open ensemble input 169 | split into conformers 170 | create MoleculeData objects out of coord input 171 | read out energy from xyz file if possible 172 | In principle this can also read xyz-files with molecules of different sizes. 173 | 174 | Args: 175 | input_path (str): Path to the ensemble input file. 176 | 177 | Returns: 178 | list[MoleculeData]: A list of MoleculeData objects. 179 | """ 180 | # open ensemble input 181 | with open(input_path, "r") as file: 182 | lines = file.readlines() 183 | 184 | # Get rid of unnecessary empty lines 185 | # Basically this filters out all only-whitespace lines except the comment lines after the number of atoms is declared 186 | lines = list( 187 | filter( 188 | lambda line: not ( 189 | bool(re.match(r"^\s*$", line)) # matches only whitespace chars 190 | and len(lines[lines.index(line) - 1].split()) != 1 191 | ), 192 | lines, 193 | ) 194 | ) 195 | 196 | # assuming consecutive xyz-file format 197 | # (every conf geometry is separated by a line with split length of 4 followed by a line of split length 1) 198 | # 199 | # 14 <-- split_index refers to this line (this is line 0 for the first conf) 200 | # CONF12 -22.521386 201 | # H x.xxxxxxxx x.xxxxxxx x.xxxxxx 202 | # ... 203 | split_indices = [ 204 | i 205 | for i in range(len(lines)) 206 | if i == 0 or (len(lines[i].split()) == 1 and len(lines[i - 1].split()) == 4) 207 | ] 208 | 209 | conformers = [] 210 | for i, split_index in enumerate(split_indices): 211 | # Check whether the names are stored in the ensemble file, 212 | # use those if possible because of crest rotamer files 213 | if "CONF" in lines[split_index + 1]: 214 | confname = next( 215 | s for s in lines[split_index + 1].split() if "CONF" in s 216 | ) 217 | else: 218 | # Start counting from 1 219 | confname = f"CONF{i + 1}" 220 | 221 | # Determine end of geometry definition for this conf 222 | # which is either the next conf definition or EOF 223 | conf_end_index = ( 224 | split_indices[i + 1] if i + 1 < len(split_indices) else len(lines) 225 | ) 226 | 227 | # Create a new conformer object and append it to the ensemble 228 | conformers.append( 229 | MoleculeData( 230 | confname, 231 | lines[split_index + 2 : conf_end_index], 232 | ) 233 | ) 234 | 235 | # get precalculated energies if possible 236 | # precalculated energy set to 0.0 if it cannot be found 237 | conformers[i].xtb_energy = check_for_float(lines[split_index + 1]) or 0.0 238 | 239 | return conformers 240 | 241 | def remove_conformers(self, confnames: list[str]) -> None: 242 | """ 243 | Remove the conformers with the names listed in 'confnames' from further consideration. 244 | The removed conformers will be stored in self.rem. 245 | 246 | Args: 247 | confnames (list[str]): A list of conformer names. 248 | 249 | Returns: 250 | None 251 | """ 252 | if len(confnames) > 0: 253 | for confname in confnames: 254 | remove = next(c for c in self.conformers if c.name == confname) 255 | 256 | # pop item from conformers and insert this item at index 0 in rem 257 | self.rem.insert(0, self.conformers.pop(self.conformers.index(remove))) 258 | 259 | # Log removed conformers 260 | logger.debug(f"Removed {remove.name}.") 261 | 262 | def dump(self, filename: str) -> None: 263 | """ 264 | dump the conformers to a file 265 | """ 266 | with open(os.path.join(f"{os.getcwd()}", f"{filename}.xyz"), "w") as file: 267 | for conf in self.conformers: 268 | file.writelines(conf.geom.toxyz()) 269 | -------------------------------------------------------------------------------- /src/censo/ensembleopt/__init__.py: -------------------------------------------------------------------------------- 1 | from .prescreening import Prescreening 2 | from .screening import Screening 3 | from .optimization import Optimization 4 | from .refinement import Refinement 5 | from .optimizer import EnsembleOptimizer 6 | -------------------------------------------------------------------------------- /src/censo/ensembleopt/optimizer.py: -------------------------------------------------------------------------------- 1 | from ..logging import setup_logger 2 | from ..params import AU2KCAL, DIGILEN, PLENGTH 3 | from ..part import CensoPart 4 | from ..utilities import ( 5 | DfaHelper, 6 | SolventHelper, 7 | format_data, 8 | h1, 9 | print, 10 | timeit, 11 | ) 12 | 13 | logger = setup_logger(__name__) 14 | 15 | 16 | class EnsembleOptimizer(CensoPart): 17 | """ 18 | Boilerplate class for all ensemble optimization steps. 19 | """ 20 | 21 | _grid = "" 22 | 23 | @classmethod 24 | def _validate(cls, tovalidate: dict[str, any]) -> None: 25 | """ 26 | Validates the type of each setting in the given dict. Also potentially validate if the setting is allowed by 27 | checking with cls._options. 28 | This is the part-specific version of the method. It will run the general validation first and then 29 | check part-specific logic. 30 | 31 | Args: 32 | tovalidate (dict[str, any]): The dict containing the settings to be validated. 33 | 34 | Returns: 35 | None 36 | 37 | Raises: 38 | ValueError: If the setting is not allowed or the value is not within the allowed options. 39 | """ 40 | # General validation 41 | super()._validate(tovalidate) 42 | 43 | # Part-specific validation 44 | # NOTE: tovalidate is always complete 45 | # Check availability of func for prog 46 | func = tovalidate["func"] 47 | if func not in cls._options["func"]["options"][tovalidate["prog"]]: 48 | raise ValueError( 49 | f"Functional {func} is not available for {tovalidate['prog']}. " 50 | "Check spelling w.r.t. CENSO functional naming convention (case insensitive)." 51 | ) 52 | 53 | # Check sm availability for prog 54 | sm = tovalidate.get("sm", None) 55 | if ( 56 | sm is not None 57 | and sm not in cls._options["sm"]["options"][tovalidate["prog"]] 58 | ): 59 | raise ValueError( 60 | f"Solvent model {sm} not available for {tovalidate['prog']}." 61 | ) 62 | 63 | # Check solvent availability for sm 64 | if ( 65 | sm is not None 66 | and cls.get_general_settings()["solvent"] 67 | not in CensoPart._options["solvent"]["options"][sm] 68 | ): 69 | raise ValueError( 70 | f"Solvent {cls.get_general_settings()['solvent']} is not available for {sm}. " 71 | ) 72 | 73 | # dummy/template functionality not implemented yet for TM 74 | if tovalidate["prog"] == "tm" and (func == "dummy"): 75 | raise NotImplementedError( 76 | "Dummy functionality is not implemented for use with TURBOMOLE." 77 | ) 78 | 79 | @timeit 80 | @CensoPart._create_dir 81 | def __call__(self, cut: bool = True) -> None: 82 | """ 83 | Boilerplate run logic for any ensemble optimization step. The 'optimize' method should be implemented for every 84 | class respectively. 85 | """ 86 | # print instructions 87 | self._print_info() 88 | 89 | # Store number of conformer put in 90 | self.data["nconf_in"] = len(self._ensemble.conformers) 91 | 92 | # Perform the actual optimization logic 93 | self._optimize(cut=cut) 94 | self.data["nconf_out"] = len(self._ensemble.conformers) 95 | 96 | # Resort the ensemble 97 | self._ensemble.conformers.sort( 98 | key=lambda conf: self.data["results"][conf.name]["gtot"], 99 | ) 100 | 101 | # DONE 102 | 103 | def _optimize(self, cut: bool = True): 104 | raise NotImplementedError 105 | 106 | def _write_results(self): 107 | raise NotImplementedError 108 | 109 | def _output(self) -> None: 110 | """ 111 | Implements printouts and writes for any output data. 112 | Necessary to implement for each part. 113 | """ 114 | # Write out results 115 | self._write_results() 116 | 117 | # Print comparison with previous parts 118 | if len(self._ensemble.results) > 1: 119 | self._print_comparison() 120 | 121 | # Print information about ensemble after optimization 122 | self._print_update() 123 | 124 | # dump ensemble 125 | self._ensemble.dump(f"{self._part_nos[self.name]}_{self.name.upper()}") 126 | 127 | def _setup_prepinfo(self, jobtype: list[str]) -> dict[str, dict]: 128 | """ 129 | Sets up lookup information to be used by the processor in parallel execution. Returns a dictionary 130 | containing all information for all jobtypes provided. 131 | 132 | Args: 133 | jobtype (list[str]): list of jobtypes to be run. 134 | 135 | Returns: 136 | dict[str, dict]: dictionary containing all information for all jobtypes provided. 137 | """ 138 | prepinfo = {jt: {} for jt in jobtype} 139 | 140 | prepinfo["partname"] = self.name 141 | prepinfo["charge"] = self._ensemble.runinfo.get("charge") 142 | prepinfo["unpaired"] = self._ensemble.runinfo.get("unpaired") 143 | prepinfo["general"] = self.get_general_settings() 144 | 145 | if "sp" in jobtype or "gsolv" in jobtype: 146 | prepinfo["sp"] = { 147 | "func_name": DfaHelper.get_name( 148 | self.get_settings()["func"], self.get_settings()["prog"] 149 | ), 150 | "func_type": DfaHelper.get_type(self.get_settings()["func"]), 151 | "disp": DfaHelper.get_disp(self.get_settings()["func"]), 152 | "basis": self.get_settings()["basis"], 153 | "grid": self._grid, 154 | "template": self.get_settings()["template"], 155 | "gcp": True, 156 | } 157 | 158 | # Add the solvent key if a solvent model exists in the part settings 159 | # NOTE: 'sm' in key catches also cases like NMR (sm_s and sm_j) 160 | # Only look up solvent if solvation is used 161 | if ( 162 | "sm" in self.get_settings() 163 | and not self.get_general_settings()["gas-phase"] 164 | ): 165 | prepinfo["sp"]["sm"] = self.get_settings()["sm"] 166 | prepinfo["sp"]["solvent_key_prog"] = SolventHelper.get_solvent( 167 | self.get_settings()["sm"], self.get_general_settings()["solvent"] 168 | ) 169 | 170 | if ( 171 | self.get_settings()["prog"] == "tm" 172 | and prepinfo["sp"]["disp"] == "d4" 173 | and prepinfo["sp"]["gcp"] 174 | ): 175 | # Basis sets including the following naming patterns should definitely use GCP 176 | gcp_basis_patterns = ["sv", "dz", "tz", "mini", "6-31g(d)"] 177 | if any( 178 | pattern in prepinfo["sp"]["basis"] for pattern in gcp_basis_patterns 179 | ): 180 | logger.warning( 181 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP " 182 | "together with the D4 correction. Switching to D3." 183 | ) 184 | prepinfo["sp"]["disp"] = DfaHelper.get_disp( 185 | self.get_settings()["func"].replace("d4", "d3") 186 | ) 187 | else: 188 | logger.warning( 189 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP " 190 | "together with the D4 correction. Turning off GCP." 191 | ) 192 | prepinfo["sp"]["gcp"] = False 193 | 194 | # TODO - this doesn't look very nice 195 | if "xtb_gsolv" in jobtype: 196 | prepinfo["xtb_sp"] = { 197 | "gfnv": self.get_settings()["gfnv"], 198 | "solvent_key_xtb": SolventHelper.get_solvent( 199 | self.get_general_settings()["sm_rrho"], 200 | self.get_general_settings()["solvent"], 201 | ), 202 | } 203 | # gsolv implies that solvation should be used, so no check here 204 | assert prepinfo["xtb_sp"]["solvent_key_xtb"] is not None 205 | 206 | if "xtb_rrho" in jobtype: 207 | prepinfo["xtb_rrho"] = { 208 | "gfnv": self.get_settings()["gfnv"], 209 | } 210 | # Only look up solvent if solvation is used 211 | if not self.get_general_settings()["gas-phase"]: 212 | prepinfo["xtb_rrho"]["solvent_key_xtb"] = SolventHelper.get_solvent( 213 | self.get_general_settings()["sm_rrho"], 214 | self.get_general_settings()["solvent"], 215 | ) 216 | 217 | for jt in ["xtb_opt", "opt"]: 218 | if jt in jobtype: 219 | prepinfo[jt] = { 220 | "func_name": DfaHelper.get_name( 221 | self.get_settings()["func"], self.get_settings()["prog"] 222 | ), 223 | "func_type": DfaHelper.get_type(self.get_settings()["func"]), 224 | "disp": DfaHelper.get_disp(self.get_settings()["func"]), 225 | "basis": self.get_settings()["basis"], 226 | "grid": self._grid, 227 | "template": self.get_settings()["template"], 228 | "gcp": True, 229 | "optcycles": self.get_settings()["optcycles"], 230 | "hlow": self.get_settings()["hlow"], 231 | "optlevel": self.get_settings()["optlevel"], 232 | "macrocycles": self.get_settings()["macrocycles"], 233 | # "constraints": self.constraints, 234 | # this is set to a path if constraints should be used, otherwise None 235 | } 236 | 237 | # Only look up solvent if solvation is used 238 | if not self.get_general_settings()["gas-phase"]: 239 | prepinfo[jt]["sm"] = self.get_settings()["sm"] 240 | prepinfo[jt]["solvent_key_prog"] = SolventHelper.get_solvent( 241 | self.get_settings()["sm"], 242 | self.get_general_settings()["solvent"], 243 | ) 244 | 245 | if ( 246 | self.get_settings()["prog"] == "tm" 247 | and prepinfo[jt]["disp"] == "d4" 248 | and prepinfo[jt]["gcp"] 249 | ): 250 | # Basis sets including the following naming patterns should definitely use GCP 251 | gcp_basis_patterns = ["sv", "dz", "tz", "mini", "6-31g(d)"] 252 | if any( 253 | pattern in prepinfo[jt]["basis"] 254 | for pattern in gcp_basis_patterns 255 | ): 256 | logger.warning( 257 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP " 258 | "together with the D4 correction. Switching to D3." 259 | ) 260 | prepinfo[jt]["disp"] = DfaHelper.get_disp( 261 | self.get_settings()["func"].replace("d4", "d3") 262 | ) 263 | else: 264 | logger.warning( 265 | "Due to a bug in TURBOMOLE it is currently not possible to use GCP " 266 | "together with the D4 correction. Turning off GCP." 267 | ) 268 | prepinfo[jt]["gcp"] = False 269 | 270 | break 271 | 272 | return prepinfo 273 | 274 | def _print_update(self) -> None: 275 | print("\n") 276 | print( 277 | "Number of conformers:".ljust(DIGILEN // 2, " ") 278 | + f"{len(self._ensemble.conformers)}" 279 | ) 280 | 281 | print( 282 | "Highest ranked conformer:".ljust(DIGILEN // 2, " ") 283 | + f"{self._ensemble.conformers[0].name}" 284 | ) 285 | print("\n") 286 | 287 | def _print_comparison(self) -> None: 288 | print(h1(f"{self.name.upper()} RANKING COMPARISON")) 289 | 290 | headers = ["CONF#"] 291 | 292 | parts = [ 293 | p for p in self._ensemble.results if issubclass(type(p), EnsembleOptimizer) 294 | ] 295 | 296 | headers.extend([f"ΔGtot {part.name}" for part in parts]) 297 | 298 | # column units 299 | units = [ 300 | "", 301 | ] 302 | 303 | units.extend(["[kcal/mol]" for _ in range(len(parts))]) 304 | 305 | # variables for printmap 306 | gtotmin = {part: 0.0 for part in parts} 307 | for part in parts: 308 | gtotmin[part.name] = min( 309 | part.data["results"][conf.name]["gtot"] 310 | for conf in self._ensemble.conformers 311 | ) 312 | 313 | # determines what to print for each conformer in each column 314 | printmap = { 315 | "CONF#": lambda conf: conf.name, 316 | } 317 | for header, part in zip(headers[1:], parts): 318 | # Same lambda bullshittery as in parallel.py/dqp, python needs the lambda kwargs or it will 319 | # use the same values for every lambda call 320 | printmap[header] = ( 321 | lambda conf, partl=part, headerl=header: f"{(partl.data['results'][conf.name]['gtot'] - gtotmin[partl.name]) * AU2KCAL:.2f}" 322 | ) 323 | 324 | rows = [ 325 | [printmap[header](conf) for header in headers] 326 | for conf in self._ensemble.conformers 327 | ] 328 | 329 | lines = format_data(headers, rows, units=units) 330 | 331 | # Print everything 332 | for line in lines: 333 | print(line, flush=True, end="") 334 | 335 | print("".ljust(int(PLENGTH), "-") + "\n") 336 | -------------------------------------------------------------------------------- /src/censo/ensembleopt/prescreening.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ..datastructure import MoleculeData 4 | from ..ensembledata import EnsembleData 5 | from ..logging import setup_logger 6 | from ..parallel import execute 7 | from ..params import AU2KCAL, PLENGTH, Config 8 | from ..utilities import format_data, h1, print, DfaHelper, Factory 9 | from .optimizer import EnsembleOptimizer 10 | 11 | logger = setup_logger(__name__) 12 | 13 | 14 | class Prescreening(EnsembleOptimizer): 15 | """ 16 | This is supposed to implement a cheap prescreening step using low-cost DFT and possibly 17 | solvation contributions calculated using xtb. 18 | 19 | The list of conformers is then updated using Gtot (only DFT single-point energy if in gas-phase). 20 | """ 21 | 22 | _grid = "low" 23 | 24 | _options = { 25 | "threshold": {"default": 4.0}, 26 | "func": { 27 | "default": "pbe-d4", 28 | "options": {prog: DfaHelper.get_funcs(prog) for prog in Config.PROGS}, 29 | }, 30 | "basis": {"default": "def2-SV(P)"}, 31 | "prog": {"default": "tm", "options": Config.PROGS}, 32 | "gfnv": {"default": "gfn2", "options": Config.GFNOPTIONS}, 33 | "run": {"default": True}, 34 | "template": {"default": False}, 35 | } 36 | 37 | _settings = {} 38 | 39 | def _optimize(self, cut: bool = True) -> None: 40 | """ 41 | TODO 42 | """ 43 | # set jobtype to pass to handler 44 | # TODO - it is not very nice to partially handle 'Screening' settings here 45 | if self.get_general_settings()["gas-phase"]: 46 | jobtype = ["sp"] 47 | elif self.get_settings().get("implicit", False): 48 | if self.get_settings().get("sm", None) in [ 49 | "cosmors", 50 | "cosmors-fine", 51 | ]: 52 | # If cosmors is used as solvent model the gsolv calculation needs to be done explicitely 53 | logger.warning( 54 | "COSMORS detected as solvation model, this requires explicit calculation of ΔGsolv." 55 | ) 56 | jobtype = ["gsolv"] 57 | else: 58 | # 'implicit' is a special option of Screening that makes CENSO skip the explicit computation of Gsolv 59 | # Gsolv will still be included in the DFT energy though 60 | jobtype = ["sp"] 61 | elif not self.get_settings().get("implicit", False): 62 | # Only for prescreening the solvation should be calculated with xtb 63 | if self.name == "prescreening": 64 | jobtype = ["xtb_gsolv"] 65 | 66 | # Compile all information required for the preparation of input files in parallel execution step 67 | prepinfo = self._setup_prepinfo(jobtype) 68 | 69 | # compute results 70 | # for structure of results from handler.execute look there 71 | results, failed = execute( 72 | self._ensemble.conformers, 73 | self._dir, 74 | self.get_settings()["prog"], 75 | prepinfo, 76 | jobtype, 77 | copy_mo=self.get_general_settings()["copy_mo"], 78 | balance=self.get_general_settings()["balance"], 79 | retry_failed=self.get_general_settings()["retry_failed"], 80 | ) 81 | 82 | # Remove failed conformers 83 | self._ensemble.remove_conformers(failed) 84 | 85 | # Update results 86 | self._update_results(results) 87 | 88 | jobtype = ["sp"] 89 | else: 90 | jobtype = ["gsolv"] 91 | 92 | # Compile all information required for the preparation of input files in parallel execution step 93 | prepinfo = self._setup_prepinfo(jobtype) 94 | 95 | # compute results 96 | # for structure of results from handler.execute look there 97 | results, failed = execute( 98 | self._ensemble.conformers, 99 | self._dir, 100 | self.get_settings()["prog"], 101 | prepinfo, 102 | jobtype, 103 | copy_mo=self.get_general_settings()["copy_mo"], 104 | balance=self.get_general_settings()["balance"], 105 | retry_failed=self.get_general_settings()["retry_failed"], 106 | ) 107 | 108 | # Remove failed conformers 109 | self._ensemble.remove_conformers(failed) 110 | 111 | # Update results 112 | self._update_results(results) 113 | 114 | # update results for each conformer 115 | for conf in self._ensemble.conformers: 116 | # calculate free enthalpy 117 | self.data["results"][conf.name]["gtot"] = self._gsolv(conf) 118 | 119 | # calculate boltzmann weights from gtot values calculated here 120 | self._update_results(self._calc_boltzmannweights()) 121 | 122 | if cut: 123 | print("\n") 124 | # update conformers with threshold 125 | threshold = self.get_settings()["threshold"] / AU2KCAL 126 | limit = min( 127 | self.data["results"][conf.name]["gtot"] 128 | for conf in self._ensemble.conformers 129 | ) 130 | filtered = list( 131 | filter( 132 | lambda conf: self.data["results"][conf.name]["gtot"] - limit 133 | > threshold, 134 | self._ensemble.conformers, 135 | ) 136 | ) 137 | 138 | # update the conformer list in ensemble (remove confs if below threshold) 139 | self._ensemble.remove_conformers([conf.name for conf in filtered]) 140 | for conf in filtered: 141 | print(f"No longer considering {conf.name}.") 142 | 143 | def _gsolv(self, conf: MoleculeData) -> float: 144 | """ 145 | Prescreening key for conformer sorting 146 | Calculates Gtot = E (DFT) + Gsolv (xtb) for a given conformer 147 | """ 148 | 149 | # Gtot = E (DFT) + Gsolv (xtb) 150 | if not self.get_general_settings()["gas-phase"]: 151 | gtot = ( 152 | self.data["results"][conf.name]["sp"]["energy"] 153 | + self.data["results"][conf.name]["xtb_gsolv"]["gsolv"] 154 | ) 155 | else: 156 | gtot = self.data["results"][conf.name]["sp"]["energy"] 157 | 158 | return gtot 159 | 160 | def _write_results(self) -> None: 161 | """ 162 | writes: 163 | E (xtb), 164 | δE (xtb), 165 | G_solv (xtb), 166 | δG_solv, 167 | 168 | E(DFT), 169 | δE(DFT), 170 | 171 | E(DFT) + G_solv, 172 | δ(E(DFT) + G_solv) 173 | 174 | also writes data in easily digestible format 175 | """ 176 | print(h1(f"{self.name.upper()} SINGLE-POINT RESULTS")) 177 | 178 | # column headers 179 | headers = [ 180 | "CONF#", 181 | "E (xTB)", 182 | "ΔE (xTB)", 183 | "E (DFT)", 184 | "ΔE (DFT)", 185 | "ΔGsolv (xTB)", 186 | # "δΔGsolv", 187 | "Gtot", 188 | "ΔGtot", 189 | "Boltzmann weight", 190 | ] 191 | 192 | # column units 193 | units = [ 194 | "", 195 | "[Eh]", 196 | "[kcal/mol]", 197 | "[Eh]", 198 | "[kcal/mol]", 199 | "[kcal/mol]", 200 | # "[kcal/mol]", 201 | "[Eh]", 202 | "[kcal/mol]", 203 | f"% at {self.get_general_settings().get('temperature', 298.15)} K", 204 | ] 205 | 206 | # variables for printmap 207 | # minimal xtb single-point energy 208 | if all( 209 | "xtb_gsolv" in self.data["results"][conf.name] 210 | for conf in self._ensemble.conformers 211 | ): 212 | xtbmin = min( 213 | self.data["results"][conf.name]["xtb_gsolv"]["energy_xtb_gas"] 214 | for conf in self._ensemble.conformers 215 | ) 216 | 217 | # minimal dft single-point energy 218 | dft_energies = ( 219 | { 220 | conf.name: self.data["results"][conf.name]["sp"]["energy"] 221 | for conf in self._ensemble.conformers 222 | } 223 | if not all( 224 | "gsolv" in self.data["results"][conf.name] 225 | for conf in self._ensemble.conformers 226 | ) 227 | else { 228 | conf.name: self.data["results"][conf.name]["gsolv"]["energy_gas"] 229 | for conf in self._ensemble.conformers 230 | } 231 | ) 232 | 233 | dftmin = min(dft_energies.values()) 234 | 235 | # minimal solvation free enthalpy 236 | if self.get_general_settings()["gas-phase"]: 237 | gsolvmin = 0.0 238 | else: 239 | # NOTE: there might still be an error if a (xtb_)gsolv calculation failed for a conformer, therefore this should be handled before this step 240 | if all( 241 | "xtb_gsolv" in self.data["results"][conf.name] 242 | for conf in self._ensemble.conformers 243 | ): 244 | gsolvmin = min( 245 | self.data["results"][conf.name]["xtb_gsolv"]["gsolv"] 246 | for conf in self._ensemble.conformers 247 | ) 248 | elif all( 249 | "gsolv" in self.data["results"][conf.name] 250 | for conf in self._ensemble.conformers 251 | ): 252 | gsolvmin = min( 253 | self.data["results"][conf.name]["gsolv"]["gsolv"] 254 | for conf in self._ensemble.conformers 255 | ) 256 | else: 257 | raise RuntimeError( 258 | "The calculations should have used implicit or additive solvation for all conformers, " 259 | "but it is missing for at least some conformers." 260 | ) 261 | 262 | # minimal total free enthalpy 263 | gtotmin = min(self._gsolv(conf) for conf in self._ensemble.conformers) 264 | 265 | # determines what to print for each conformer in each column 266 | printmap = { 267 | "CONF#": lambda conf: conf.name, 268 | "E (xTB)": lambda conf: ( 269 | f"{self.data['results'][conf.name]['xtb_gsolv']['energy_xtb_gas']:.6f}" 270 | if "xtb_gsolv" in self.data["results"][conf.name] 271 | else "---" 272 | ), 273 | "ΔE (xTB)": lambda conf: ( 274 | f"{(self.data['results'][conf.name]['xtb_gsolv']['energy_xtb_gas'] - xtbmin) * AU2KCAL:.2f}" 275 | if "xtb_gsolv" in self.data["results"][conf.name] 276 | else "---" 277 | ), 278 | "E (DFT)": lambda conf: f"{dft_energies[conf.name]:.6f}", 279 | "ΔE (DFT)": lambda conf: f"{(dft_energies[conf.name] - dftmin) * AU2KCAL:.2f}", 280 | "ΔGsolv (xTB)": lambda conf: ( 281 | f"{self.data['results'][conf.name]['xtb_gsolv']['gsolv'] * AU2KCAL:.6f}" 282 | if "xtb_gsolv" in self.data["results"][conf.name] 283 | else "---" 284 | ), 285 | "Gtot": lambda conf: f"{self._gsolv(conf):.6f}", 286 | # "δΔGsolv": lambda conf: f"{(self.data["results"][conf.name]['xtb_gsolv']['gsolv'] - gsolvmin) * AU2KCAL:.2f}" 287 | # if "xtb_gsolv" in self.data["results"][conf.name].keys() 288 | # else "---", 289 | "ΔGtot": lambda conf: f"{(self._gsolv(conf) - gtotmin) * AU2KCAL:.2f}", 290 | "Boltzmann weight": lambda conf: f"{self.data['results'][conf.name]['bmw'] * 100:.2f}", 291 | } 292 | 293 | rows = [ 294 | [printmap[header](conf) for header in headers] 295 | for conf in self._ensemble.conformers 296 | ] 297 | 298 | lines = format_data(headers, rows, units=units) 299 | 300 | # list the averaged free enthalpy of the ensemble 301 | lines.append( 302 | "\nBoltzmann averaged free energy/enthalpy of ensemble on input geometries (not DFT optimized):\n" 303 | ) 304 | lines.append( 305 | f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} {'avG(T) /a.u.':>14}\n" 306 | ) 307 | 308 | # calculate averaged free enthalpy 309 | avG = sum( 310 | [ 311 | self.data["results"][conf.name]["bmw"] 312 | * self.data["results"][conf.name]["gtot"] 313 | for conf in self._ensemble.conformers 314 | ] 315 | ) 316 | 317 | # calculate averaged free energy 318 | avE = sum( 319 | [ 320 | self.data["results"][conf.name]["bmw"] 321 | * self.data["results"][conf.name]["sp"]["energy"] 322 | for conf in self._ensemble.conformers 323 | ] 324 | ) 325 | 326 | # append the lines for the free energy/enthalpy 327 | lines.append( 328 | f"{self.get_general_settings().get('temperature', 298.15):^15} {avE:>14.7f} {avG:>14.7f} <<==part0==\n" 329 | ) 330 | lines.append("".ljust(int(PLENGTH), "-")) 331 | 332 | # lines.append(f">>> END of {self.__class__.__name__} <<<".center(PLENGTH, " ") + "\n") 333 | 334 | # Print everything 335 | for line in lines: 336 | print(line, flush=True, end="") 337 | 338 | # write everything to a file 339 | filename = f"{self._part_nos[self.name]}_{self.name.upper()}.out" 340 | logger.debug(f"Writing to {os.path.join(os.getcwd(), filename)}.") 341 | with open(os.path.join(os.getcwd(), filename), "w", newline=None) as outfile: 342 | outfile.writelines(lines) 343 | 344 | # Additionally, write results in json format 345 | self._write_json() 346 | 347 | 348 | Factory.register_builder("prescreening", Prescreening) 349 | -------------------------------------------------------------------------------- /src/censo/ensembleopt/refinement.py: -------------------------------------------------------------------------------- 1 | import os 2 | from ..logging import setup_logger 3 | from ..parallel import execute 4 | from ..params import AU2KCAL, PLENGTH, Config 5 | from ..utilities import format_data, h1, print, DfaHelper, Factory 6 | from .prescreening import Prescreening 7 | from .screening import Screening 8 | from .optimization import Optimization 9 | 10 | logger = setup_logger(__name__) 11 | 12 | 13 | class Refinement(Screening): 14 | """ 15 | Similar to Screening, however here we use a Boltzmann population cutoff instead of kcal cutoff. 16 | """ 17 | 18 | _grid = "high+" 19 | 20 | __solv_mods = {prog: Config.SOLV_MODS[prog] for prog in Config.PROGS} 21 | # __gsolv_mods = reduce(lambda x, y: x + y, GConfig.SOLV_MODS.values()) 22 | 23 | _options = { 24 | "threshold": {"default": 0.95}, 25 | "func": { 26 | "default": "wb97x-v", 27 | "options": {prog: DfaHelper.get_funcs(prog) for prog in Config.PROGS}, 28 | }, 29 | "basis": {"default": "def2-TZVP"}, 30 | "prog": {"default": "tm", "options": Config.PROGS}, 31 | "sm": {"default": "cosmors", "options": __solv_mods}, 32 | "gfnv": {"default": "gfn2", "options": Config.GFNOPTIONS}, 33 | "run": {"default": True}, 34 | "implicit": {"default": False}, 35 | "template": {"default": False}, 36 | } 37 | 38 | _settings = {} 39 | 40 | def _optimize(self, cut: bool = True) -> None: 41 | Prescreening._optimize(self, cut=False) 42 | 43 | if self.get_general_settings()["evaluate_rrho"]: 44 | # Check if evaluate_rrho, then check if optimization was run and use that value, otherwise do xtb_rrho 45 | if not any(type(p) is Optimization for p in self._ensemble.results): 46 | jobtype = ["xtb_rrho"] 47 | prepinfo = self._setup_prepinfo(jobtype) 48 | 49 | # append results to previous results 50 | results, failed = execute( 51 | self._ensemble.conformers, 52 | self._dir, 53 | self.get_settings()["prog"], 54 | prepinfo, 55 | jobtype, 56 | copy_mo=self.get_general_settings()["copy_mo"], 57 | balance=self.get_general_settings()["balance"], 58 | retry_failed=self.get_general_settings()["retry_failed"], 59 | ) 60 | 61 | # Remove failed conformers 62 | self._ensemble.remove_conformers(failed) 63 | 64 | # Update results 65 | self._update_results(results) 66 | 67 | for conf in self._ensemble.conformers: 68 | # calculate new gtot including RRHO contribution 69 | self.data["results"][conf.name]["gtot"] = self._grrho(conf) 70 | else: 71 | # Use values from most recent optimization rrho 72 | using_part = [ 73 | p for p in self._ensemble.results if type(p) is Optimization 74 | ][-1] 75 | 76 | for conf in self._ensemble.conformers: 77 | self.data["results"][conf.name]["xtb_rrho"] = using_part.data[ 78 | "results" 79 | ][conf.name]["xtb_rrho"] 80 | self.data["results"][conf.name]["gtot"] = self._grrho(conf) 81 | 82 | # calculate boltzmann weights from gtot values calculated here 83 | # trying to get temperature from instructions, set it to room temperature if that fails for some reason 84 | self._update_results(self._calc_boltzmannweights()) 85 | 86 | if cut: 87 | # Get Boltzmann population threshold from settings 88 | threshold = self.get_settings()["threshold"] 89 | 90 | # Update ensemble using Boltzman population threshold 91 | filtered = [ 92 | conf.name 93 | for conf in sorted( 94 | self._ensemble.conformers, 95 | key=lambda x: self.data["results"][x.name]["gtot"], 96 | ) 97 | ] 98 | total_bmw = 0 99 | 100 | for confname in filtered: 101 | if total_bmw > threshold: 102 | break 103 | total_bmw += self.data["results"][confname]["bmw"] 104 | filtered.remove(confname) 105 | 106 | # Remove conformers 107 | self._ensemble.remove_conformers(filtered) 108 | for confname in filtered: 109 | print(f"No longer considering {confname}.") 110 | 111 | # Recalculate boltzmann weights after cutting down the ensemble 112 | self._update_results(self._calc_boltzmannweights()) 113 | 114 | def _write_results(self) -> None: 115 | """ 116 | Additional write function in case RRHO is used. 117 | Write the results to a file in formatted way. This is appended to the first file. 118 | writes (2): 119 | G (xtb), 120 | δG (xtb), 121 | E (DFT), 122 | δGsolv (DFT), 123 | Grrho, 124 | Gtot, 125 | δGtot 126 | 127 | Also writes them into an easily digestible format. 128 | """ 129 | print(h1(f"{self.name.upper()} SINGLE-POINT (+ mRRHO) RESULTS")) 130 | 131 | # column headers 132 | headers = [ 133 | "CONF#", 134 | "E (DFT)", 135 | "ΔGsolv", 136 | "GmRRHO", 137 | "Gtot", 138 | "ΔGtot", 139 | "Boltzmann weight", 140 | ] 141 | 142 | # column units 143 | units = [ 144 | "", 145 | "[Eh]", 146 | "[Eh]", 147 | "[Eh]", 148 | "[Eh]", 149 | "[kcal/mol]", 150 | f"% at {self.get_general_settings().get('temperature', 298.15)} K", 151 | ] 152 | 153 | # minimal gtot from E(DFT), Gsolv and GmRRHO 154 | gtotmin = min( 155 | self.data["results"][conf.name]["gtot"] 156 | for conf in self._ensemble.conformers 157 | ) 158 | 159 | # collect all dft single point energies 160 | dft_energies = ( 161 | { 162 | conf.name: self.data["results"][conf.name]["sp"]["energy"] 163 | for conf in self._ensemble.conformers 164 | } 165 | if not all( 166 | "gsolv" in self.data["results"][conf.name] 167 | for conf in self._ensemble.conformers 168 | ) 169 | else { 170 | conf.name: self.data["results"][conf.name]["gsolv"]["energy_gas"] 171 | for conf in self._ensemble.conformers 172 | } 173 | ) 174 | 175 | printmap = { 176 | "CONF#": lambda conf: conf.name, 177 | "E (DFT)": lambda conf: f"{dft_energies[conf.name]:.6f}", 178 | "ΔGsolv": lambda conf: ( 179 | f"{self._gsolv(conf) - dft_energies[conf.name]:.6f}" 180 | if "gsolv" in self.data["results"][conf.name] 181 | else "---" 182 | ), 183 | "GmRRHO": lambda conf: ( 184 | f"{self.data['results'][conf.name]['xtb_rrho']['gibbs'][self.get_general_settings()['temperature']]:.6f}" 185 | if self.get_general_settings()["evaluate_rrho"] 186 | else "---" 187 | ), 188 | "Gtot": lambda conf: f"{self.data['results'][conf.name]['gtot']:.6f}", 189 | "ΔGtot": lambda conf: f"{(self.data['results'][conf.name]['gtot'] - gtotmin) * AU2KCAL:.2f}", 190 | "Boltzmann weight": lambda conf: f"{self.data['results'][conf.name]['bmw'] * 100:.2f}", 191 | } 192 | 193 | rows = [ 194 | [printmap[header](conf) for header in headers] 195 | for conf in self._ensemble.conformers 196 | ] 197 | 198 | lines = format_data(headers, rows, units=units) 199 | 200 | # list the averaged free enthalpy of the ensemble 201 | lines.append( 202 | "\nBoltzmann averaged free energy/enthalpy of ensemble (high level single-points):\n" 203 | ) 204 | lines.append( 205 | f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} {'avG(T) /a.u.':>14}\n" 206 | ) 207 | 208 | # calculate averaged free enthalpy 209 | avG = sum( 210 | self.data["results"][conf.name]["bmw"] 211 | * self.data["results"][conf.name]["gtot"] 212 | for conf in self._ensemble.conformers 213 | ) 214 | 215 | # calculate averaged free energy 216 | avE = ( 217 | sum( 218 | self.data["results"][conf.name]["bmw"] 219 | * self.data["results"][conf.name]["sp"]["energy"] 220 | for conf in self._ensemble.conformers 221 | ) 222 | if all( 223 | "sp" in self.data["results"][conf.name] 224 | for conf in self._ensemble.conformers 225 | ) 226 | else sum( 227 | self.data["results"][conf.name]["bmw"] 228 | * self.data["results"][conf.name]["gsolv"]["energy_gas"] 229 | for conf in self._ensemble.conformers 230 | ) 231 | ) 232 | 233 | # append the lines for the free energy/enthalpy 234 | lines.append( 235 | f"{self.get_general_settings().get('temperature', 298.15):^15} {avE:>14.7f} {avG:>14.7f} <<==part3==\n" 236 | ) 237 | lines.append("".ljust(int(PLENGTH), "-") + "\n\n") 238 | 239 | # Print everything 240 | for line in lines: 241 | print(line, flush=True, end="") 242 | 243 | # append lines to already existing file 244 | filename = f"{self._part_nos[self.name]}_{self.name.upper()}.out" 245 | logger.debug(f"Writing to {os.path.join(os.getcwd(), filename)}.") 246 | with open(os.path.join(os.getcwd(), filename), "a", newline=None) as outfile: 247 | outfile.writelines(lines) 248 | 249 | # Additionally, write the results to a json file 250 | self._write_json() 251 | 252 | 253 | Factory.register_builder("refinement", Refinement) 254 | -------------------------------------------------------------------------------- /src/censo/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import sys 4 | 5 | __logpath: str = os.path.join(os.getcwd(), "censo.log") 6 | __loglevel = logging.INFO 7 | 8 | __loggers = [] 9 | 10 | # _loglevel = logging.DEBUG 11 | 12 | 13 | def setup_logger(name: str, silent: bool = True) -> logging.Logger: 14 | """ 15 | Initializes and configures a logger with the specified name. 16 | 17 | Args: 18 | name (str): The name of the logger. 19 | silent (bool, optional): Whether to print logpath or not. Defaults to True. 20 | 21 | Returns: 22 | logging.Logger: The configured logger instance. 23 | """ 24 | if not silent: 25 | print(f"LOGFILE CAN BE FOUND AT: {__logpath}") 26 | 27 | # Create a logger instance with the specified name 28 | logger = logging.getLogger(name) 29 | logger.setLevel(__loglevel) 30 | 31 | # Create a FileHandler to log messages to the logpath file 32 | handler = logging.FileHandler(__logpath) 33 | stream_handler = logging.StreamHandler(sys.stdout) 34 | stream_handler.setLevel(logging.WARNING) 35 | 36 | # Define the log message format 37 | formatter = logging.Formatter( 38 | "{asctime:24s}-{name:^24s}-{levelname:^10s}- {message}", style="{" 39 | ) 40 | stream_formatter = logging.Formatter("{levelname:^10s}- {message}", style="{") 41 | handler.setFormatter(formatter) 42 | stream_handler.setFormatter(stream_formatter) 43 | 44 | # Add the FileHandler and StreamHandler to the logger 45 | logger.addHandler(handler) 46 | logger.addHandler(stream_handler) 47 | 48 | __loggers.append(logger) 49 | 50 | return logger 51 | 52 | 53 | def set_loglevel(loglevel: str | int) -> None: 54 | """ 55 | Set the log level for the logger. 56 | 57 | Args: 58 | loglevel (str | int): The log level to set. In case of a string this will get the respective attr 59 | from logging.. 60 | 61 | Returns: 62 | None 63 | """ 64 | global __loglevel 65 | 66 | if isinstance(loglevel, str): 67 | __loglevel = getattr(logging, loglevel) 68 | else: 69 | __loglevel = loglevel 70 | 71 | for logger in __loggers: 72 | logger.setLevel(__loglevel) 73 | -------------------------------------------------------------------------------- /src/censo/parallel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Performs the parallel execution of the QM calls. 3 | """ 4 | 5 | import multiprocessing 6 | import signal 7 | from concurrent.futures import ProcessPoolExecutor, as_completed 8 | 9 | from .datastructure import MoleculeData, ParallelJob 10 | from .logging import setup_logger 11 | from .params import Config 12 | from .qm_processor import QmProc 13 | from .tm_processor import TmProc 14 | from .utilities import Factory 15 | 16 | logger = setup_logger(__name__) 17 | 18 | 19 | def execute( 20 | conformers: list[MoleculeData], 21 | workdir: str, 22 | prog: str, 23 | prepinfo: dict[str, dict], 24 | jobtype: list[str], 25 | copy_mo: bool = False, 26 | retry_failed: bool = False, 27 | balance: bool = True, 28 | ) -> tuple[dict, list]: 29 | """ 30 | Manages parallel execution of external program calls. Sets cores used per job, checks requirements, 31 | can copy MO-files, and retry failed jobs. 32 | 33 | Args: 34 | conformers (list[MoleculeData]): List of conformers for which jobs will be created and executed. 35 | workdir (str): Working directory. 36 | prog (str): Name of the program to be used. 37 | copy_mo (bool, optional): Whether to copy the MO-files from the previous calculation. 38 | retry_failed (bool, optional): Whether to retry failed jobs. 39 | balance (bool, optional): Whether to balance the number of cores used per job. 40 | maxcores (int, optional): Maximum number of cores to be used. 41 | omp (int, optional): Number of cores to be used per job. 42 | update (bool, optional): Wether to update the results dict for each conformer. 43 | 44 | Returns: 45 | tuple[dict, list]: Dictionary containing the results for each conformer and a list of unrecoverable conformers. 46 | """ 47 | 48 | def prepare_jobs( 49 | conformers: list[MoleculeData], prepinfo: dict[str, dict], jobtype: list[str] 50 | ) -> list[ParallelJob]: 51 | # create jobs from conformers 52 | jobs = [ParallelJob(conf.geom, jobtype) for conf in conformers] 53 | 54 | # put settings into jobs 55 | for job in jobs: 56 | job.prepinfo.update(prepinfo) 57 | 58 | return jobs 59 | 60 | def reduce_cores( 61 | free_cores: multiprocessing.Value, 62 | omp: int, 63 | enough_cores: multiprocessing.Condition, 64 | ): 65 | # acquire lock on the condition and wait until enough cores are available 66 | with enough_cores: 67 | enough_cores.wait_for(lambda: free_cores.value >= omp) 68 | free_cores.value -= omp 69 | logger.debug( 70 | f"Free cores decreased {free_cores.value + omp} -> {free_cores.value}." 71 | ) 72 | 73 | def increase_cores( 74 | free_cores: multiprocessing.Value, 75 | omp: int, 76 | enough_cores: multiprocessing.Condition, 77 | ): 78 | # acquire lock on the condition and increase the number of cores, notifying one waiting process 79 | with enough_cores: 80 | free_cores.value += omp 81 | logger.debug( 82 | f"Free cores increased {free_cores.value - omp} -> {free_cores.value}." 83 | ) 84 | enough_cores.notify() 85 | 86 | def handle_sigterm(signum, frame, executor): 87 | logger.critical("Received SIGTERM. Terminating.") 88 | executor.shutdown(wait=False) 89 | 90 | def dqp(jobs: list[ParallelJob], processor: QmProc) -> list[ParallelJob]: 91 | """ 92 | D ynamic Q ueue P rocessing 93 | """ 94 | 95 | with multiprocessing.Manager() as manager: 96 | # execute calculations for given list of conformers 97 | with ProcessPoolExecutor( 98 | max_workers=Config.NCORES // min(job.omp for job in jobs) 99 | ) as executor: 100 | # make sure that the executor exits gracefully on termination 101 | # TODO - is using wait=False a good option here? 102 | # should be fine since workers will kill programs with SIGTERM 103 | # wait=True leads to the workers waiting for their current task to be finished before terminating 104 | # Register the signal handler 105 | signal.signal( 106 | signal.SIGTERM, 107 | lambda signum, frame: handle_sigterm(signum, frame, executor), 108 | ) 109 | 110 | # define shared variables that can be safely asynchronously accessed 111 | free_cores = manager.Value(int, Config.NCORES) 112 | enough_cores = manager.Condition() 113 | 114 | # sort the jobs by the number of cores used 115 | # (the first item will be the one with the lowest number of cores) 116 | jobs.sort(key=lambda x: x.omp) 117 | 118 | tasks = [] 119 | for i in range(len(jobs)): 120 | # TODO - something to readjust omp based on expected time to finish and the timings of other jobs 121 | # try to reduce the number of cores by job.omp, if there are not enough cores available we wait 122 | reduce_cores(free_cores, jobs[i].omp, enough_cores) 123 | 124 | try: 125 | # submit the job 126 | tasks.append(executor.submit(processor.run, jobs[i])) 127 | # NOTE: explanation of the lambda: the first argument passed to the done_callback is always the future 128 | # itself, it is not assigned (_), the second parameter is the number of openmp threads of the job (i.e. 129 | # job.omp) if this is not specified like this (omp=jobs[i].omp) the done_callback will instead use the 130 | # omp of the current item in the for-iterator (e.g. the submitted job has omp=4, but the current jobs[i] 131 | # has omp=7, so the callback would use 7 instead of 4) 132 | tasks[-1].add_done_callback( 133 | lambda _, omp=jobs[i].omp: increase_cores( 134 | free_cores, omp, enough_cores 135 | ) 136 | ) 137 | except RuntimeError: 138 | # Makes this exit gracefully in case that the main process is killed 139 | return None 140 | 141 | # wait for all jobs to finish and collect results 142 | try: 143 | results = [task.result() for task in as_completed(tasks)] 144 | except Exception as exc: 145 | raise exc 146 | 147 | return results 148 | 149 | def set_omp_chunking(jobs: list[ParallelJob]) -> None: 150 | """ 151 | Determines and sets the number of cores that are supposed to be used for every job. 152 | This method is efficient if it can be assumed that the jobs take roughly the same amount of time each. 153 | Each job shouldn't use less than Config.OMPMIN cores. 154 | """ 155 | # Get the total number of jobs 156 | jobs_left, tot_jobs = len(jobs), len(jobs) 157 | 158 | # Calculate the maximum and minimum number of processes (number of jobs that can be executed simultaneously) 159 | maxprocs = ( 160 | Config.NCORES // Config.OMPMIN 161 | ) # Calculate the maximum number of processes 162 | # Calculate the minimum number of processes 163 | minprocs = max(1, Config.NCORES // Config.OMPMAX) 164 | 165 | # Loop until all jobs are distributed 166 | while jobs_left > 0: 167 | if jobs_left >= maxprocs: 168 | p = maxprocs # Set the number of processes to the maximum if there are enough jobs left 169 | elif minprocs <= jobs_left < maxprocs: 170 | # Find the largest number of processes that evenly divides the remaining jobs 171 | p = max( 172 | [ 173 | j 174 | for j in range(minprocs, maxprocs) 175 | if Config.NCORES % j == 0 and j <= jobs_left 176 | ] 177 | ) 178 | else: 179 | # There are not enough jobs left for at least minprocs processes 180 | for job in jobs[tot_jobs - jobs_left : tot_jobs]: 181 | job.omp = ( 182 | Config.NCORES // minprocs 183 | ) # Set the number of cores for each job to the maximum value 184 | jobs_left -= jobs_left 185 | continue 186 | 187 | # Set the number of cores for each job for as many jobs as possible before moving onto the next omp value 188 | while jobs_left - p >= 0: 189 | for job in jobs[tot_jobs - jobs_left : tot_jobs - jobs_left + p]: 190 | job.omp = Config.NCORES // p # Set the number of cores for each job 191 | jobs_left -= p # Decrement the number of remaining jobs 192 | 193 | def retry_failed_jobs( 194 | jobs: list[ParallelJob], processor: QmProc, balance: bool 195 | ) -> tuple[list[int], list[str]]: 196 | """ 197 | Tries to recover failed jobs. 198 | 199 | Args: 200 | jobs (list[ParallelJob]): List of jobs. 201 | processor (QmProc): Processor object. 202 | 203 | Returns: 204 | tuple[list[int], list[str]]: List of indices of jobs that should be retried, list of names of conformers 205 | that could not be recovered. 206 | """ 207 | # determine failed jobs 208 | logger.debug("Checking for failed jobs...") 209 | failed_jobs = [ 210 | i 211 | for i, job in enumerate(jobs) 212 | if any(not job.meta[jt]["success"] for jt in job.jobtype) 213 | ] 214 | 215 | if len(failed_jobs) != 0: 216 | # create a new list of failed jobs that should be restarted with special flags 217 | # contains jobs that should be retried (depends on wether the error can be handled or not) 218 | retry = [] 219 | 220 | # determine flags for jobs based on error messages 221 | for failed_job in failed_jobs: 222 | handled_errors = ["scf_not_converged", "Previous calculation failed"] 223 | 224 | # list of jobtypes that should be removed from the jobtype list 225 | jtremove = [] 226 | for jt in jobs[failed_job].jobtype: 227 | if not jobs[failed_job].meta[jt]["success"]: 228 | if jobs[failed_job].meta[jt]["error"] in handled_errors: 229 | retry.append(failed_job) 230 | jobs[failed_job].flags[jt] = jobs[failed_job].meta[jt][ 231 | "error" 232 | ] 233 | # store all successful jobtypes to be removed later 234 | elif jobs[failed_job].meta[jt]["success"]: 235 | jtremove.append(jt) 236 | 237 | # remove all successful jobs from jobtype to avoid re-execution 238 | for jt in jtremove: 239 | jobs[failed_job].jobtype.remove(jt) 240 | 241 | # execute jobs that should be retried 242 | logger.info( 243 | f"Number of failed jobs: {len(failed_jobs)}. Restarting {len(retry)} jobs." 244 | ) 245 | 246 | if len(retry) > 0: 247 | # Rebalancing necessary 248 | if balance: 249 | set_omp_chunking([jobs[i] for i in retry]) 250 | 251 | for i, job in zip( 252 | [i for i in retry], dqp([jobs[i] for i in retry], processor) 253 | ): 254 | jobs[i] = job 255 | 256 | # any jobs that still failed will lead to the conformer being marked as unrecoverable 257 | failed_confs = [] 258 | for job in jobs: 259 | if not all(job.meta[jt]["success"] for jt in job.jobtype): 260 | logger.warning( 261 | f"{job.conf.name} job recovery failed. Error: {job.meta[jt]['error']}. Check output files." 262 | ) 263 | failed_confs.append(job.conf.name) 264 | else: 265 | logger.info(f"Successfully retried job for {job.conf.name}.") 266 | else: 267 | retry = [] 268 | failed_confs = [] 269 | logger.info("All jobs executed successfully.") 270 | 271 | return retry, failed_confs 272 | 273 | # Check first if there are any conformers at all 274 | try: 275 | assert len(conformers) > 0 276 | except AssertionError as e: 277 | raise AssertionError("No jobs to compute!") from e 278 | 279 | # Create jobs from conformers data 280 | jobs = prepare_jobs(conformers, prepinfo, jobtype) 281 | 282 | # initialize the processor for the respective program 283 | processor = Factory.create( 284 | prog, 285 | workdir, 286 | ) 287 | 288 | # processor.check_requirements(jobs) 289 | 290 | # Set processor to copy the MO-files 291 | processor.copy_mo = copy_mo 292 | 293 | # check for the most recent mo files for each conformer 294 | # TODO - how would this work when multiple different programs are supported? 295 | for job in jobs: 296 | try: 297 | job.mo_guess = next( 298 | c for c in conformers if c.name == job.conf.name 299 | ).mo_paths[-1] 300 | except IndexError: 301 | pass 302 | 303 | # set cores per process for each job 304 | # NOTE: since parallelization in tm is controlled using environment variables we cannot use automatic load balancing 305 | if balance and not isinstance(processor, TmProc): 306 | set_omp_chunking(jobs) 307 | elif balance and isinstance(processor, TmProc): 308 | logger.warning( 309 | "Load balancing 2.0 is not supported for TURBOMOLE. Falling back to old behaviour." 310 | ) 311 | 312 | # If there are not enough cores to use omp = Config.OMPMIN (to avoid unnecessary waiting) 313 | if len(jobs) < Config.NCORES // Config.OMPMIN: 314 | omp = Config.NCORES // len(jobs) 315 | # Otherwise try find the largest number of parallel processors p that 316 | # is Config.NCORES // Config.OMPMIN at most and Config.NCORES // Config.OMPMAX at least 317 | # such that at least 75% of processors still work for the remainder jobs 318 | # or the number of jobs can be evenly distributed between the processors 319 | else: 320 | for o in range(Config.OMPMIN, Config.OMPMAX + 1): 321 | p = Config.NCORES // o 322 | if p == 1: 323 | break 324 | if len(jobs) % p >= 0.75 * p or len(jobs) % p == 0: 325 | break 326 | omp = o 327 | 328 | # Configure environment variables 329 | Config.ENVIRON["PARA_ARCH"] = "SMP" 330 | Config.ENVIRON["PARNODES"] = str(omp) 331 | 332 | for job in jobs: 333 | job.omp = omp 334 | else: 335 | omp = Config.OMP 336 | if omp < Config.OMPMIN: 337 | logger.warning( 338 | f"User OMP setting is below the minimum value of {Config.OMPMIN}. Using {Config.OMPMIN} instead." 339 | ) 340 | omp = Config.OMPMIN 341 | elif omp > Config.NCORES: 342 | logger.warning( 343 | f"Value of {omp} for OMP is larger than the number of available cores {Config.NCORES}. Using OMP = {Config.NCORES}." 344 | ) 345 | omp = Config.NCORES 346 | 347 | for job in jobs: 348 | job.omp = omp 349 | 350 | # execute the jobs 351 | jobs = dqp(jobs, processor) 352 | 353 | # Try to get the mo_path from metadata and store it in the respective conformer object 354 | mo_paths = {job.conf.name: job.meta["mo_path"] for job in jobs} 355 | for conf in conformers: 356 | if mo_paths[conf.name] is not None: 357 | conf.mo_paths.append(mo_paths[conf.name]) 358 | 359 | failed_confs = [] 360 | if retry_failed: 361 | retried, failed_confs = retry_failed_jobs(jobs, processor, balance) 362 | 363 | # Again, try to get the mo_path from metadata and store it in the respective conformer object 364 | mo_paths = { 365 | job.conf.name: job.meta["mo_path"] for job in [jobs[i] for i in retried] 366 | } 367 | for conf in conformers: 368 | if mo_paths.get(conf.name, None) is not None: 369 | conf.mo_paths.append(mo_paths[conf.name]) 370 | 371 | # RuntimeError if all jobs failed 372 | if len(jobs) == len(failed_confs): 373 | raise RuntimeError( 374 | "Parallel execution of all jobs failed and could not be recovered!" 375 | ) 376 | 377 | # e.g. {"CONF23": {"sp": {"energy": 1231.5}, ...}} 378 | return {job.conf.name: job.results for job in jobs}, failed_confs 379 | -------------------------------------------------------------------------------- /src/censo/properties/__init__.py: -------------------------------------------------------------------------------- 1 | from .nmr import NMR 2 | from .uvvis import UVVis 3 | from .property_calculator import PropertyCalculator 4 | -------------------------------------------------------------------------------- /src/censo/properties/property_calculator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains boilerplate class for calculating ensemble properties. 3 | """ 4 | 5 | from ..logging import setup_logger 6 | from ..part import CensoPart 7 | from ..utilities import timeit, SolventHelper 8 | from ..datastructure import MoleculeData 9 | from ..ensembleopt import EnsembleOptimizer 10 | 11 | logger = setup_logger(__name__) 12 | 13 | 14 | class PropertyCalculator(CensoPart): 15 | """ 16 | Boilerplate class for all property calculations. 17 | """ 18 | 19 | _grid = "" 20 | 21 | @timeit 22 | @CensoPart._create_dir 23 | def __call__(self, using_part: CensoPart = None) -> None: 24 | """ 25 | Boilerplate run logic for any ensemble property calculation. The 'property' method should be implemented for every 26 | class respectively. 27 | 28 | Running a property calculation requires some kind of ensemble energetic ranking beforehand. 29 | 30 | It is possible to pass a specific part output to determine the Boltzmann populations 31 | """ 32 | # print instructions 33 | self._print_info() 34 | 35 | # Set energy values to use later 36 | self._set_energy(using_part=using_part) 37 | for conf in self._ensemble.conformers: 38 | self.data["results"][conf.name]["gtot"] = self._gtot(conf) 39 | 40 | # Calculate Boltzmann populations 41 | self._update_results(self._calc_boltzmannweights()) 42 | 43 | # Perform the property calculations 44 | self._property() 45 | 46 | # DONE 47 | 48 | def _output(self) -> None: 49 | """ 50 | Implements printouts and writes for any output data. 51 | Necessary to implement for each part. 52 | """ 53 | # Write out results 54 | self._write_results() 55 | 56 | def _property(self): 57 | raise NotImplementedError 58 | 59 | def _write_results(self): 60 | raise NotImplementedError 61 | 62 | def _gtot(self, conf: MoleculeData) -> float: 63 | return ( 64 | self.data["results"][conf.name]["energy"] 65 | + self.data["results"][conf.name]["gsolv"] 66 | + self.data["results"][conf.name]["grrho"] 67 | ) 68 | 69 | def _setup_prepinfo_rrho(self) -> dict[str, dict]: 70 | prepinfo = {} 71 | 72 | prepinfo["partname"] = self.name 73 | prepinfo["charge"] = self._ensemble.runinfo.get("charge") 74 | prepinfo["unpaired"] = self._ensemble.runinfo.get("unpaired") 75 | prepinfo["general"] = self.get_general_settings() 76 | 77 | prepinfo["xtb_rrho"] = { 78 | "gfnv": self.get_settings()["gfnv"], 79 | } 80 | # Only lookup solvent if solvation should be used 81 | if not self.get_general_settings()["gas-phase"]: 82 | prepinfo["xtb_rrho"]["solvent_key_xtb"] = SolventHelper.get_solvent( 83 | self.get_general_settings()["sm_rrho"], 84 | self.get_general_settings()["solvent"], 85 | ) 86 | 87 | return prepinfo 88 | 89 | def _set_energy(self, using_part: CensoPart | None = None): 90 | """ 91 | Looks through results to set energy values. 92 | Order of preference: 93 | refinement -> optimization -> screening -> prescreening 94 | 95 | If None of these are found, raise RuntimeError. 96 | """ 97 | if using_part is None: 98 | # Determine the smallest usable optimization results 99 | # First filter ensemble optimizations 100 | opts = filter( 101 | lambda part: issubclass(type(part), EnsembleOptimizer), 102 | self._ensemble.results, 103 | ) 104 | opts = sorted(opts, key=lambda part: part.data["nconf_out"]) 105 | 106 | # Get the results with the smallest outputs 107 | opts_iter = iter(opts) 108 | res = next(opts_iter, None) 109 | if res is None: 110 | raise RuntimeError( 111 | "Calculating an ensemble property requires some kind of energetic ensemble ranking performed beforehand." 112 | ) 113 | 114 | smallest_results = [] 115 | while res.data["nconf_out"] == opts[0].data["nconf_out"]: 116 | smallest_results.append(res) 117 | try: 118 | res = next(opts_iter) 119 | except StopIteration: 120 | break 121 | 122 | # Get the highest (assumed) quality part from those 123 | if len(smallest_results) == 1: 124 | using_part = smallest_results[0] 125 | else: 126 | # This will put the highest quality part at the top (highest part number) 127 | smallest_results.sort( 128 | key=lambda part: self._part_nos[part.name], reverse=True 129 | ) 130 | using_part = smallest_results[0] 131 | 132 | # Get the index of this results from the ensemble results 133 | assert using_part is not None 134 | using_part = self._ensemble.results.index(using_part) 135 | 136 | energy_values = { 137 | "prescreening": lambda conf: { 138 | "energy": self._ensemble.results[using_part].data["results"][conf.name][ 139 | "sp" 140 | ]["energy"], 141 | "gsolv": ( 142 | self._ensemble.results[using_part].data["results"][conf.name][ 143 | "xtb_gsolv" 144 | ]["gsolv"] 145 | if "xtb_gsolv" 146 | in self._ensemble.results[using_part].data["results"][conf.name] 147 | else 0.0 148 | ), 149 | "grrho": 0.0, 150 | }, 151 | "screening": lambda conf: { 152 | "energy": ( 153 | self._ensemble.results[using_part].data["results"][conf.name][ 154 | "gsolv" 155 | ]["energy_gas"] 156 | if "gsolv" 157 | in self._ensemble.results[using_part].data["results"][conf.name] 158 | else self._ensemble.results[using_part].data["results"][conf.name][ 159 | "sp" 160 | ]["energy"] 161 | ), 162 | "gsolv": ( 163 | self._ensemble.results[using_part].data["results"][conf.name][ 164 | "gsolv" 165 | ]["gsolv"] 166 | if "gsolv" 167 | in self._ensemble.results[using_part].data["results"][conf.name] 168 | else 0.0 169 | ), 170 | "grrho": self._ensemble.results[using_part] 171 | .data["results"][conf.name] 172 | .get("xtb_rrho", {"energy": 0.0})["energy"], 173 | }, 174 | "optimization": lambda conf: { 175 | "energy": self._ensemble.results[using_part].data["results"][conf.name][ 176 | "xtb_opt" 177 | ]["energy"], 178 | "gsolv": 0.0, 179 | "grrho": self._ensemble.results[using_part] 180 | .data["results"][conf.name] 181 | .get("xtb_rrho", {"energy": 0.0})["energy"], 182 | }, 183 | "refinement": lambda conf: { 184 | "energy": ( 185 | self._ensemble.results[using_part].data["results"][conf.name][ 186 | "gsolv" 187 | ]["energy_gas"] 188 | if "gsolv" 189 | in self._ensemble.results[using_part].data["results"][conf.name] 190 | else self._ensemble.results[using_part].data["results"][conf.name][ 191 | "sp" 192 | ]["energy"] 193 | ), 194 | "gsolv": ( 195 | self._ensemble.results[using_part].data["results"][conf.name][ 196 | "gsolv" 197 | ]["gsolv"] 198 | if "gsolv" 199 | in self._ensemble.results[using_part].data["results"][conf.name] 200 | else 0.0 201 | ), 202 | "grrho": self._ensemble.results[using_part] 203 | .data["results"][conf.name] 204 | .get("xtb_rrho", {"energy": 0.0})["energy"], 205 | }, 206 | } 207 | 208 | for conf in self._ensemble.conformers: 209 | self.data["results"].setdefault( 210 | conf.name, energy_values[self._ensemble.results[using_part].name](conf) 211 | ) 212 | -------------------------------------------------------------------------------- /src/censo/properties/uvvis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Calculates the ensemble UV/Vis spectrum. 3 | """ 4 | 5 | import json 6 | import os 7 | 8 | from ..parallel import execute 9 | from ..params import Config 10 | from ..utilities import SolventHelper, DfaHelper, format_data, print, Factory 11 | from ..logging import setup_logger 12 | from .property_calculator import PropertyCalculator 13 | from ..part import CensoPart 14 | 15 | logger = setup_logger(__name__) 16 | 17 | 18 | class UVVis(PropertyCalculator): 19 | """ 20 | Calculation of the ensemble UV/Vis spectrum of a (previously) optimized ensemble. 21 | Note, that the ensemble will not be modified anymore. 22 | """ 23 | 24 | __solv_mods = { 25 | prog: tuple( 26 | t for t in Config.SOLV_MODS[prog] if t not in ("cosmors", "cosmors-fine") 27 | ) 28 | for prog in Config.PROGS 29 | } 30 | 31 | _options = { 32 | "prog": {"default": "orca", "options": ["orca"]}, # required 33 | "func": { 34 | "default": "wb97x-d4", 35 | "options": {prog: DfaHelper.get_funcs(prog) for prog in Config.PROGS}, 36 | }, 37 | "basis": {"default": "def2-TZVP"}, 38 | "sm": {"default": "smd", "options": __solv_mods}, 39 | "gfnv": {"default": "gfn2", "options": Config.GFNOPTIONS}, 40 | "nroots": {"default": 20}, 41 | "run": {"default": False}, # required 42 | "template": {"default": False}, # required 43 | } 44 | 45 | _settings = {} 46 | 47 | @classmethod 48 | def _validate(cls, tovalidate: dict[str, any]) -> None: 49 | """ 50 | Validates the type of each setting in the given dict. Also potentially validate if the setting is allowed by 51 | checking with cls._options. 52 | This is the part-specific version of the method. It will run the general validation first and then 53 | check part-specific logic. 54 | 55 | Args: 56 | tovalidate (dict[str, any]): The dict containing the settings to be validated. 57 | 58 | Returns: 59 | None 60 | 61 | Raises: 62 | ValueError: If the setting is not allowed or the value is not within the allowed options. 63 | """ 64 | # General validation 65 | super()._validate(tovalidate) 66 | 67 | # Part-specific validation 68 | # NOTE: tovalidate is always complete 69 | # Check availability of func for prog 70 | func = tovalidate["func"] 71 | if func not in cls._options["func"]["options"][tovalidate["prog"]]: 72 | raise ValueError( 73 | f"Functional {func} is not available for {tovalidate['prog']}. " 74 | "Check spelling w.r.t. CENSO functional naming convention (case insensitive)." 75 | ) 76 | 77 | # Check sm availability for prog 78 | # Remember: tovalidate is always complete so we don't need .get with default None here 79 | sm = tovalidate["sm"] 80 | if sm not in cls._options["sm"]["options"][tovalidate["prog"]]: 81 | raise ValueError( 82 | f"Solvent model {sm} not available for {tovalidate['prog']}." 83 | ) 84 | 85 | # Check solvent availability for sm 86 | if ( 87 | cls.get_general_settings()["solvent"] 88 | not in CensoPart._options["solvent"]["options"][sm] 89 | ): 90 | raise ValueError( 91 | f"Solvent {cls.get_general_settings()['solvent']} is not available for {sm}. " 92 | ) 93 | 94 | # dummy/template functionality not implemented yet for TM 95 | if tovalidate["prog"] == "tm" and (func == "dummy"): 96 | raise NotImplementedError( 97 | "Dummy functionality is not implemented for use with TURBOMOLE." 98 | ) 99 | 100 | def _property(self) -> None: 101 | jobtype = ["uvvis"] 102 | 103 | # Compile all information required for the preparation of input files in parallel execution step 104 | prepinfo = self._setup_prepinfo() 105 | 106 | # compute results 107 | # for structure of results from handler.execute look there 108 | results, failed = execute( 109 | self._ensemble.conformers, 110 | self._dir, 111 | self.get_settings()["prog"], 112 | prepinfo, 113 | jobtype, 114 | copy_mo=self.get_general_settings()["copy_mo"], 115 | balance=self.get_general_settings()["balance"], 116 | retry_failed=self.get_general_settings()["retry_failed"], 117 | ) 118 | 119 | # Remove failed conformers 120 | self._ensemble.remove_conformers(failed) 121 | 122 | # Update results 123 | self._update_results(results) 124 | 125 | # Ensemble averaging of excitations 126 | self.__excitation_averaging() 127 | 128 | def _setup_prepinfo(self) -> dict[str, dict]: 129 | prepinfo = {} 130 | 131 | prepinfo["partname"] = self.name 132 | prepinfo["charge"] = self._ensemble.runinfo.get("charge") 133 | prepinfo["unpaired"] = self._ensemble.runinfo.get("unpaired") 134 | prepinfo["general"] = self.get_general_settings() 135 | 136 | prepinfo["uvvis"] = { 137 | "func_name": DfaHelper.get_name( 138 | self.get_settings()["func"], self.get_settings()["prog"] 139 | ), 140 | "func_type": DfaHelper.get_type(self.get_settings()["func"]), 141 | "disp": DfaHelper.get_disp(self.get_settings()["func"]), 142 | "basis": self.get_settings()["basis"], 143 | "grid": "high+", # hardcoded grid settings 144 | "template": self.get_settings()["template"], 145 | "gcp": False, # GCP is not necessary for spectra calculations 146 | "nroots": self.get_settings()["nroots"], 147 | } 148 | # Only look up solvent if solvation is used 149 | if not self.get_general_settings()["gas-phase"]: 150 | prepinfo["uvvis"]["sm"] = self.get_settings()["sm"] 151 | prepinfo["uvvis"]["solvent_key_prog"] = SolventHelper.get_solvent( 152 | self.get_settings()["sm"], self.get_general_settings()["solvent"] 153 | ) 154 | 155 | return prepinfo 156 | 157 | def __excitation_averaging(self): 158 | """ 159 | Calculates population weighted excitation parameters. 160 | """ 161 | # Calculate epsilon_max (maximum extinctions) for each excitation, weighted by population 162 | # eps is a list of tuples that contain each excitation wavelength with the respective epsilon_max 163 | eps = [] 164 | for conf in self._ensemble.conformers: 165 | for excitation in self.data["results"][conf.name]["uvvis"]["excitations"]: 166 | epsilon_max = ( 167 | self.data["results"][conf.name]["bmw"] * excitation["osc_str"] 168 | ) 169 | eps.append((excitation["wavelength"], epsilon_max, conf.name)) 170 | 171 | # Print table 172 | headers = ["λ", "ε_max", "Origin. CONF#"] 173 | 174 | units = ["[nm]", "", ""] 175 | 176 | printmap = { 177 | "λ": lambda exc: f"{exc[0]:.2f}", 178 | "ε_max": lambda exc: f"{exc[1]:.6f}", 179 | "Origin. CONF#": lambda exc: f"{exc[2]}", 180 | } 181 | 182 | rows = [[printmap[header](exc) for header in headers] for exc in eps] 183 | 184 | lines = format_data(headers, rows, units=units) 185 | 186 | # Print everything 187 | for line in lines: 188 | print(line, flush=True, end="") 189 | 190 | # write lines to file 191 | logger.debug( 192 | f"Writing to {os.path.join(os.getcwd(), f'{self._part_nos[self.name]}_{self.name.upper()}.out')}." 193 | ) 194 | with open( 195 | os.path.join( 196 | os.getcwd(), 197 | f"{self._part_nos[self.name]}_{self.name.upper()}.out", 198 | ), 199 | "w", 200 | newline=None, 201 | ) as outfile: 202 | outfile.writelines(lines) 203 | 204 | # Dump data into json 205 | with open(os.path.join(os.getcwd(), "excitations.json"), "w") as f: 206 | json.dump(eps, f, indent=4) 207 | 208 | def _write_results(self) -> None: 209 | """ 210 | Write result excitations to files. 211 | """ 212 | # Write results to json file 213 | self._write_json() 214 | 215 | 216 | Factory.register_builder("uvvis", UVVis) 217 | -------------------------------------------------------------------------------- /src/censo/utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions which are used in the CENSO modules. From creating folders to 3 | printout routines. 4 | """ 5 | 6 | import functools 7 | import hashlib 8 | import json 9 | import os 10 | import time 11 | import re 12 | from builtins import print as print_orig 13 | from collections import OrderedDict 14 | from collections.abc import Callable, Sequence 15 | import math 16 | 17 | from .params import BOHR2ANG, PLENGTH, Config 18 | from .logging import setup_logger 19 | 20 | logger = setup_logger(__name__) 21 | 22 | 23 | class Factory: 24 | """ 25 | Generic object factory class. 26 | """ 27 | 28 | __builders: dict[str, type] = {} 29 | 30 | @classmethod 31 | def register_builder(cls, name: str, builder: type) -> None: 32 | """ 33 | Registers a builder. 34 | 35 | Args: 36 | name (str): name of the builder. 37 | builder (type): type of the builder. 38 | """ 39 | cls.__builders[name] = builder 40 | 41 | @classmethod 42 | def create(cls, name: str, *args, **kwargs) -> object: 43 | """ 44 | Generic factory method 45 | """ 46 | builder = cls.__builders.get(name, None) 47 | 48 | if builder is not None: 49 | return builder(*args, **kwargs) 50 | raise TypeError(f"No type was found for '{name}' in {list(cls.__builders)}.") 51 | 52 | 53 | class DfaHelper: 54 | _dfa_dict: dict 55 | 56 | @classmethod 57 | def set_dfa_dict(cls, dfadict_path: str): 58 | with open(dfadict_path, "r") as f: 59 | cls._dfa_dict = json.load(f) 60 | 61 | @classmethod 62 | def get_funcs(cls, prog: str): 63 | """ 64 | Returns all functionals available for a given qm program. 65 | 66 | Args: 67 | prog (str): The qm program name. 68 | 69 | Returns: 70 | list[str]: The list of functionals. 71 | """ 72 | return [ 73 | func 74 | for func, v in cls._dfa_dict["functionals"].items() 75 | if v[prog.lower()] is not None 76 | ] 77 | 78 | @classmethod 79 | def get_name(cls, func: str, prog: str): 80 | """ 81 | Returns the name of a certain functional in the given qm program. If name could not 82 | be found, the string passed as func will be returned instead. 83 | 84 | Args: 85 | func (str): The functional. 86 | prog (str): The qm program. 87 | 88 | Returns: 89 | str: The name of the functional. 90 | """ 91 | func = func.lower() 92 | prog = prog.lower() 93 | if func in cls._dfa_dict["functionals"].keys(): 94 | name = cls._dfa_dict["functionals"][func][prog] 95 | else: 96 | logger.warning( 97 | f"Functional {func} not found for program {prog}. Applying name literally." 98 | ) 99 | name = func 100 | return name 101 | 102 | @classmethod 103 | def get_disp(cls, func: str): 104 | """ 105 | Returns the dispersion correction of a given functional. If dispersion correction 106 | cannot be determined, apply none. 107 | 108 | Args: 109 | func (str): The functional. 110 | 111 | Returns: 112 | str: The dispersion correction name. 113 | """ 114 | func = func.lower() 115 | if func in cls._dfa_dict["functionals"].keys(): 116 | disp = cls._dfa_dict["functionals"][func]["disp"] 117 | else: 118 | logger.warning( 119 | f"Could not determine dispersion correction for {func}. Applying none." 120 | ) 121 | disp = "novdw" 122 | return disp 123 | 124 | @classmethod 125 | def get_type(cls, func: str): 126 | """ 127 | Returns the type of a certain functional. If the type cannot be determined, it 128 | is assumed to be a GGA. 129 | 130 | Args: 131 | func (str): The functional. 132 | 133 | Returns: 134 | str: The type of the functional. 135 | """ 136 | func = func.lower() 137 | if func in cls._dfa_dict["functionals"].keys(): 138 | rettype = cls._dfa_dict["functionals"][func]["type"] 139 | else: 140 | logger.warning( 141 | f"Could not determine functional type for {func}. Assuming GGA." 142 | ) 143 | rettype = "GGA" 144 | return rettype 145 | 146 | @classmethod 147 | def functionals(cls) -> dict[str, dict]: 148 | return cls._dfa_dict["functionals"] 149 | 150 | 151 | class SolventHelper: 152 | """ 153 | Helper class to manage solvent lookup. 154 | """ 155 | 156 | @classmethod 157 | def set_solvent_dict(cls, solvent_dict_path: str) -> None: 158 | """ 159 | Load the solvents lookup dict. 160 | 161 | Args: 162 | solvent_dict_path (str): The path to the solvents lookup dict. 163 | """ 164 | with open(solvent_dict_path, "r") as f: 165 | cls._solv_dict = json.load(f) 166 | 167 | @classmethod 168 | def get_solvent(cls, sm: str, name: str) -> str | None: 169 | """ 170 | Try to lookup the solvent model keyword for the given solvent name. If it is not found, return None. 171 | 172 | Args: 173 | sm (str): The solvent model. 174 | name (str): The solvent name. 175 | 176 | Returns: 177 | str | None: The solvent model keyword or None if not found. 178 | """ 179 | mappings = cls._solv_dict[name] 180 | if sm in mappings["sms"]: 181 | return name 182 | for alias in mappings["alias"]: 183 | if sm in cls._solv_dict[alias]["sms"]: 184 | return alias 185 | return None 186 | 187 | @classmethod 188 | def get_solvent_names(cls, sm: str) -> list[str]: 189 | """ 190 | Get all available solvent names for a specified solvent model with the respective internal keyword. 191 | 192 | Args: 193 | sm (str): The solvent model. 194 | 195 | Returns: 196 | list[str]: The available solvent names for the given solvent model. 197 | """ 198 | solvents = [] 199 | for solventname, mappings in cls._solv_dict.items(): 200 | if sm in mappings["sms"]: 201 | solvents.append(solventname) 202 | for alias in mappings["alias"]: 203 | if sm in cls._solv_dict[alias]["sms"]: 204 | solvents.append(solventname) 205 | solvents = list(set(solvents)) 206 | return solvents 207 | 208 | 209 | def print(*args, **kwargs): 210 | """ 211 | patch print to always flush 212 | """ 213 | sep = " " 214 | end = "\n" 215 | file = None 216 | flush = True 217 | for key, value in kwargs.items(): 218 | if key == "sep": 219 | sep = value 220 | elif key == "end": 221 | end = value 222 | elif key == "file": 223 | file = value 224 | elif key == "flush": 225 | flush = value 226 | print_orig(*args, sep=sep, end=end, file=file, flush=flush) 227 | 228 | 229 | def format_data( 230 | headers: list[str], 231 | rows: list[list[str]], 232 | units: list[str] = None, 233 | sortby: int = 0, 234 | padding: int = 6, 235 | ) -> list[str]: 236 | """ 237 | Generates a formatted table based on the given headers, rows, units, and sortby index. 238 | 239 | Args: 240 | headers (list[str]): The list of column headers. 241 | rows (list[list[str]]): The list of rows, where each row is a list of values. 242 | units (list[str], optional): The list of units for each column. Defaults to None. 243 | sortby (int, optional): The index of the column to sort by. Defaults to 0. In case of a string column, 244 | use natural sorting. 245 | 246 | Returns: 247 | list[str]: The list of formatted lines representing the table. 248 | 249 | """ 250 | 251 | def natural_sort_key(s): 252 | """ 253 | Natural sorting key for strings. 254 | """ 255 | return [int(text) if text.isdigit() else text for text in re.split(r"(\d+)", s)] 256 | 257 | lines = [] 258 | 259 | # First, determine the maximium width for each column 260 | ncols = len(headers) 261 | if units is not None: 262 | maxcolw = [ 263 | max( 264 | [ 265 | len(headers[i]), 266 | max(len(rows[j][i]) for j in range(len(rows))), 267 | len(units[i]), 268 | ] 269 | ) 270 | for i in range(ncols) 271 | ] 272 | else: 273 | maxcolw = [ 274 | max(len(headers[i]), max(len(rows[j][i]) for j in range(len(rows)))) 275 | for i in range(ncols) 276 | ] 277 | 278 | # add table header 279 | lines.append( 280 | " ".join(f"{headers[i]:^{width + padding}}" for i, width in enumerate(maxcolw)) 281 | + "\n" 282 | ) 283 | 284 | # Add units 285 | if units is not None: 286 | lines.append( 287 | " ".join( 288 | f"{units[i]:^{width + padding}}" for i, width in enumerate(maxcolw) 289 | ) 290 | + "\n" 291 | ) 292 | 293 | # TODO - draw an arrow if conformer is the best in current ranking 294 | # (" <------\n" if self.key(conf) == self.key(self.core.conformers[0]) else "\n") 295 | 296 | # Sort rows lexicographically if column sorted by is a number 297 | if rows[0][sortby].replace(".", "", 1).isdigit(): 298 | rows = sorted(rows, key=lambda x: x[sortby]) 299 | # Otherwise use natural sorting 300 | else: 301 | rows = sorted(rows, key=lambda x: natural_sort_key(x[sortby])) 302 | 303 | # add a line for every row 304 | for row in rows: 305 | lines.append( 306 | " ".join(f"{row[i]:^{width + padding}}" for i, width in enumerate(maxcolw)) 307 | + "\n" 308 | ) 309 | 310 | # Remove leading whitespace 311 | start = min(len(line) - len(line.lstrip()) for line in lines) 312 | for i in range(len(lines)): 313 | lines[i] = lines[i][start:] 314 | 315 | return lines 316 | 317 | 318 | def frange(start: float, end: float, step: float = 1) -> list[float]: 319 | """ 320 | Creates a range of floats, adding 'step' to 'start' while it's less or equal than 'end'. 321 | 322 | Args: 323 | start (float): The start of the range. 324 | end (float): The end of the range. 325 | step (float, optional): The step size. Defaults to 1. 326 | 327 | Returns: 328 | list[float]: The list of floats. 329 | """ 330 | result = [] 331 | current = start 332 | while current <= end: 333 | result.append(current) 334 | current += step 335 | return result 336 | 337 | 338 | def t2x( 339 | path: str, writexyz: bool = False, outfile: str = "original.xyz" 340 | ) -> tuple[list, int, str]: 341 | """ 342 | convert TURBOMOLE coord file to xyz data and/or write *.xyz output 343 | 344 | - path [abs. path] either to dir or file directly 345 | - writexyz [bool] default=False, directly write to outfile 346 | - outfile [filename] default = 'original.xyz' filename of xyz file which 347 | is written into the same directory as 348 | returns: 349 | - coordxyz --> list of strings including atom x y z information 350 | - number of atoms 351 | """ 352 | # read lines from coord file 353 | with open(path, "r", encoding=Config.CODING, newline=None) as f: 354 | coord = f.readlines() 355 | 356 | # read coordinates with atom labels directly into a string 357 | # and append the string to a list to be written/returned later 358 | xyzatom = [] 359 | for line in coord: 360 | if "$end" in line: # stop at $end ... 361 | break 362 | xyzatom.append( 363 | functools.reduce( 364 | lambda x, y: x + " " + y, 365 | [ 366 | f"{float(line.split()[0]) * BOHR2ANG:.10f}", 367 | f"{float(line.split()[1]) * BOHR2ANG:.10f}", 368 | f"{float(line.split()[2]) * BOHR2ANG:.10f}", 369 | f"{str(line.split()[3].lower()).capitalize()}", 370 | ], 371 | ) 372 | ) 373 | 374 | # get path from args without the filename of the ensemble (last element of path) 375 | if os.path.isfile(path): 376 | outpath = functools.reduce( 377 | lambda x, y: os.path.join(x, y), list(os.path.split(path))[::-1][1:][::-1] 378 | ) 379 | # or just use the given path if it is not a file path 380 | else: 381 | outpath = path 382 | 383 | # write converted coordinates to xyz outfile if wanted 384 | if writexyz: 385 | with open(os.path.join(outpath, outfile), "w", encoding=Config.CODING) as out: 386 | out.write(str(len(xyzatom)) + "\n") 387 | for line in xyzatom: 388 | out.write(line) 389 | return xyzatom, len(xyzatom), os.path.join(outpath, outfile) 390 | 391 | 392 | def check_for_float(line: str) -> float | None: 393 | """Go through line and check for float, return first float""" 394 | elements = line.strip().split() 395 | value = None 396 | for element in elements: 397 | try: 398 | value = float(element) 399 | except ValueError: 400 | value = None 401 | if value is not None: 402 | break 403 | return value 404 | 405 | 406 | def average(x: list[int | float]): 407 | assert len(x) > 0 408 | return float(sum(x)) / len(x) 409 | 410 | 411 | def pearson_def(x: list[int | float], y: list[int | float]): 412 | n = min(len(x), len(y)) 413 | assert n > 0 414 | avg_x = average(x[:n]) 415 | avg_y = average(y[:n]) 416 | diffprod = 0 417 | xdiff2 = 0 418 | ydiff2 = 0 419 | for idx in range(n): 420 | xdiff = x[idx] - avg_x 421 | ydiff = y[idx] - avg_y 422 | diffprod += xdiff * ydiff 423 | xdiff2 += xdiff * xdiff 424 | ydiff2 += ydiff * ydiff 425 | 426 | try: 427 | return diffprod / math.sqrt(xdiff2 * ydiff2) 428 | except ZeroDivisionError: 429 | return 1.0 430 | 431 | 432 | def do_md5(path): 433 | """ 434 | Calculate md5 of file to identifly if restart happend on the same file! 435 | Input is buffered into smaller sizes to ease on memory consumption. 436 | Hashes entire content of ensemble input file to compare later 437 | """ 438 | BUF_SIZE = 65536 439 | md5 = hashlib.md5() 440 | if os.path.isfile(path): 441 | with open(path, "rb") as f: 442 | while True: 443 | data = f.read(BUF_SIZE) 444 | if not data: 445 | break 446 | md5.update(data) 447 | return md5.hexdigest() 448 | else: 449 | raise FileNotFoundError 450 | 451 | 452 | def timeit(f) -> Callable: 453 | """ 454 | time function execution 455 | timed function should have no return value, since it is lost in the process 456 | calling a decorated function returns the time spent for it's execution 457 | """ 458 | 459 | @functools.wraps(f) 460 | def wrapper(*args, **kwargs) -> float: 461 | start = time.perf_counter() 462 | f(*args, **kwargs) 463 | end = time.perf_counter() 464 | return end - start 465 | 466 | return wrapper 467 | 468 | 469 | def od_insert( 470 | od: OrderedDict[str, any], key: str, value: any, index: int 471 | ) -> OrderedDict[str, any]: 472 | """ 473 | Insert a new key/value pair into an OrderedDict at a specific position. 474 | If it was a normal dict: 475 | od[key] = value, with insertion before the 'index'th key. 476 | 477 | Args: 478 | od: The OrderedDict to insert into. 479 | key: The key to insert. 480 | value: The value associated with the key. 481 | index: The index before which to insert the key/value pair. 482 | 483 | Returns: 484 | The updated OrderedDict. 485 | """ 486 | # FIXME - somehow this doesn't work reliably, no idea why but sometimes the value is not inserted 487 | items: list[tuple[str, any]] = list(od.items()) 488 | items.insert(index, (key, value)) 489 | return OrderedDict(items) 490 | 491 | 492 | def h1(text: str) -> str: 493 | """ 494 | Creates a formatted header of type 1: 495 | ---- text ---- 496 | 497 | Args: 498 | text: The text to be formatted. 499 | 500 | Returns: 501 | The formatted header. 502 | """ 503 | return "\n" + f" {text} ".center(PLENGTH, "-") + "\n" 504 | 505 | 506 | def h2(text: str) -> str: 507 | """ 508 | Creates a formatted header of type 2: 509 | ---------- 510 | text 511 | ---------- 512 | 513 | Args: 514 | text: The text to be formatted. 515 | 516 | Returns: 517 | The formatted header. 518 | """ 519 | return f""" 520 | {'-' * PLENGTH} 521 | {text.center(PLENGTH, " ")} 522 | {'-' * PLENGTH} 523 | """ 524 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grimme-lab/CENSO/3326db4579d1f630c28fce17e3b3e2d66070b8bd/test/__init__.py -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration file for pytest. 3 | """ 4 | -------------------------------------------------------------------------------- /test/fixtures/crest_conformers.xyz: -------------------------------------------------------------------------------- 1 | 22 2 | -36.24494569 3 | Eu 0.0046135283 0.0000019397 0.0009018604 4 | O -0.7091301781 -2.1926549416 0.6522655995 5 | H -0.1535268869 -2.9824097960 0.7314104880 6 | H -1.6126014988 -2.5040861288 0.8117534927 7 | O 1.9409180491 -1.4560619164 -0.1827958483 8 | H 2.6132125060 -1.6189420204 0.4952852918 9 | H 2.2920839687 -1.8610047732 -0.9898651025 10 | O -0.1351016460 0.5087983503 2.3092454014 11 | H 0.0150560893 -0.0922690962 3.0554620867 12 | H -0.4176088948 1.3440476223 2.7129882785 13 | O -0.6004293364 2.2183222629 -0.6744062753 14 | H -0.0084392969 2.9826022184 -0.7393680822 15 | H -1.4852723177 2.5699723619 -0.8532200401 16 | O -2.4106895860 0.0546905730 -0.0105828403 17 | H -2.9985407313 0.2612603992 0.7311674308 18 | H -2.9997083631 -0.1227271263 -0.7588457491 19 | O -0.1224603279 -0.5059944695 -2.3086940938 20 | H -0.4489089440 -1.3237222813 -2.7151687992 21 | H 0.0751867130 0.0828295267 -3.0536044410 22 | O 1.9983553636 1.3728575371 0.2078343153 23 | H 2.3626293325 1.7651006468 1.0153395082 24 | H 2.6825624998 1.4997322752 -0.4660748530 25 | 22 26 | -36.24490733 27 | Eu 0.0019887047 -0.0070027676 0.0013348105 28 | O 1.7080706294 -1.4142412068 0.9816638746 29 | H 1.9177647051 -1.5651424065 1.9151325237 30 | H 2.3391796071 -1.9628177144 0.4920285899 31 | O -0.6508239399 -2.2909714424 -0.4597686028 32 | H -0.9322831632 -2.9449732718 0.1975590738 33 | H -0.6853147982 -2.7582437492 -1.3074403401 34 | O -0.4373601420 0.3614891289 2.3099909688 35 | H -0.3650771354 1.2170139351 2.7613184362 36 | H -0.7489165479 -0.2532245816 2.9914223372 37 | O 2.0661066296 1.2395055439 -0.1381085256 38 | H 2.5742615626 1.4228209808 -0.9425070717 39 | H 2.6475984769 1.5034358354 0.5901552390 40 | O -2.4131423511 -0.0243830142 -0.0316201606 41 | H -2.9999359914 0.0661084266 0.7341184169 42 | H -2.9917366248 -0.2976440415 -0.7587489233 43 | O 0.3561826905 -0.0699633168 -2.3501775298 44 | H 0.9909921709 -0.5835390208 -2.8722143139 45 | H -0.1157001473 0.4752930362 -2.9989742106 46 | O -0.6486709310 2.2666952669 -0.3243766789 47 | H -0.0703694241 3.0412186551 -0.3910030604 48 | H -1.5485930638 2.6140526031 -0.4153234533 49 | 22 50 | -36.24467174 51 | Eu 0.0029283528 -0.0023450380 0.0044432982 52 | O -1.6844788501 -1.5967934815 0.5002804002 53 | H -1.6620817548 -2.2923932443 1.1754078089 54 | H -2.5596952333 -1.6748661758 0.0899459885 55 | O 2.0991841044 -0.9358500165 0.7881761024 56 | H 2.2150973902 -1.7998801533 1.2108167641 57 | H 2.9968011333 -0.5936456292 0.6630889874 58 | O 0.0033506661 0.2303751068 2.4000656315 59 | H -0.7583354660 0.2698160911 2.9973300450 60 | H 0.7727461301 0.3878823236 2.9672468638 61 | O 1.5088968033 1.7263033759 -0.6138141926 62 | H 1.7354587376 1.9652735921 -1.5262825722 63 | H 1.9577474126 2.3875666865 -0.0655521288 64 | O 0.6239989003 -1.7523255174 -1.5484644055 65 | H 0.2160032647 -2.6267859427 -1.6354173491 66 | H 1.4290687600 -1.7964228534 -2.0853795239 67 | O -1.5070001715 1.8141911780 0.4998681703 68 | H -2.4541784400 1.7774516605 0.6982268306 69 | H -1.2551054906 2.7372572053 0.6528424686 70 | O -1.0682080844 0.5332086198 -2.0679918136 71 | H -1.6666262067 1.2733037974 -2.2491960315 72 | H -1.0233842314 0.0356843847 -2.8982289049 73 | 22 74 | -36.24419579 75 | Eu 0.0006649450 -0.0048693367 -0.0046766397 76 | O 0.6242169873 -0.8197812124 2.1558750267 77 | H 1.4392076824 -0.7689007768 2.6755987327 78 | H 0.0008354138 -1.3038466398 2.7191928356 79 | O 1.4086421404 -1.7697184079 -0.7430827655 80 | H 1.4400487942 -2.6734948377 -0.3928548702 81 | H 2.1961760468 -1.6921850277 -1.3036291631 82 | O -1.9329935822 -1.1609896501 0.8952502336 83 | H -2.6811978331 -0.7779238666 1.3769259919 84 | H -2.2055433350 -2.0667116580 0.6854366845 85 | O -1.2843600975 -0.8048410561 -1.8982900605 86 | H -1.0718701531 -1.5304860976 -2.5040469488 87 | H -2.1324306230 -0.4544981550 -2.2102170531 88 | O -1.1130803385 1.8164948122 1.0330774823 89 | H -1.9470007336 2.2639250227 0.8246578413 90 | H -0.7676086885 2.2805933677 1.8119648116 91 | O 0.2208757376 1.6576208867 -1.7588616056 92 | H -0.1366892892 2.5579116322 -1.7718262654 93 | H 0.5224099593 1.4933901419 -2.6647873535 94 | O 2.0821197045 1.1153007230 0.3571919070 95 | H 2.3791104806 1.8366509648 -0.2187560819 96 | H 2.7782468214 1.0286863869 1.0241020801 97 | 22 98 | -36.24408866 99 | Eu -0.0020929849 0.0024535642 0.0008042944 100 | O -1.4370214132 1.0256339553 -1.6547846662 101 | H -2.4048397653 1.0617130039 -1.6561979199 102 | H -1.1685902270 1.4871282510 -2.4632246761 103 | O 2.0052994350 -1.3350751432 0.0601669581 104 | H 2.0570185422 -2.3010896348 0.0089127423 105 | H 2.9106519795 -1.0453116806 0.2459961968 106 | O -1.1642785326 1.7910491421 1.0964146788 107 | H -1.6340495969 1.7589294735 1.9429577236 108 | H -1.2687388535 2.7029428460 0.7867873563 109 | O 0.7572538345 0.0349707521 2.2975364179 110 | H 1.2383753047 0.7423251257 2.7516145410 111 | H 0.6430253416 -0.6579823352 2.9649023730 112 | O -1.5794623319 -1.5359676697 0.8849185605 113 | H -1.4276269609 -2.4752955182 1.0712057546 114 | H -2.5074038229 -1.3858698094 1.1221856367 115 | O -0.1259762170 -1.5669058531 -1.8285738588 116 | H 0.5516996618 -1.7234313865 -2.5028767065 117 | H -0.8900972968 -2.0881724529 -2.1159735099 118 | O 1.5625637182 1.5653040952 -0.8627608608 119 | H 1.8276628937 2.4242641033 -0.4996350812 120 | H 2.0967505458 1.4631110393 -1.6654931419 121 | 22 122 | -36.24374091 123 | Eu -0.0023141475 0.0025233246 -0.0003014911 124 | O -1.0309325177 -1.7401384080 -1.3518007359 125 | H -1.9707684966 -1.9448270984 -1.4624969464 126 | H -0.5633984165 -2.3915814219 -1.8958123706 127 | O 0.9000729197 -2.0893026805 0.7472748942 128 | H 1.7900104626 -2.4650201591 0.6747615397 129 | H 0.3360011548 -2.8315075200 1.0143378586 130 | O -0.3119154538 2.3296096660 -0.4329174746 131 | H -0.6531167506 3.0212940592 0.1535858264 132 | H -0.1696061264 2.7710766181 -1.2841333120 133 | O -2.3923793286 0.2961613011 0.2121267374 134 | H -2.9813866839 0.8109843908 -0.3595242196 135 | H -2.9332358193 0.0491115952 0.9775974905 136 | O -0.3113367077 0.1792863173 2.3619321693 137 | H -0.2246728213 -0.5505204129 2.9940651470 138 | H -0.3483991481 0.9776509824 2.9097658511 139 | O 1.0159849986 0.1699625353 -2.1582024829 140 | H 1.9591231251 0.1115682900 -2.3743311602 141 | H 0.5609408735 0.2014877811 -3.0132133961 142 | O 2.1470822966 0.8344329669 0.6253005990 143 | H 2.8673552988 0.4160604967 1.1197414977 144 | H 2.4169472033 1.7610526407 0.5321633192 145 | 22 146 | -36.24372527 147 | Eu -0.0001769071 0.0081524112 -0.0054024578 148 | O 1.4108283370 -1.8598675974 -0.4660464342 149 | H 1.2553507582 -2.4423590830 -1.2260256521 150 | H 2.2004739841 -2.2183243012 -0.0352314518 151 | O -0.5077581560 -1.4148359820 1.8611468082 152 | H -0.5588176615 -2.3826655114 1.8602421878 153 | H -0.5227482048 -1.1619420045 2.7969006321 154 | O 1.4812914309 1.1576630807 -1.4689822384 155 | H 1.5484031285 2.0830897704 -1.7463217470 156 | H 2.2884201270 0.7396305442 -1.8076967281 157 | O -0.8056587982 2.1904933815 0.5897101686 158 | H -1.3668015292 2.3259328507 1.3692873540 159 | H -0.8442108435 3.0285996160 0.1059069427 160 | O -2.4355866195 -0.1373768271 0.0884522191 161 | H -2.9609934409 -0.6877241698 0.6880944485 162 | H -3.0826646987 0.3571500393 -0.4355541029 163 | O -0.7413594316 -0.7691315754 -2.1662458301 164 | H -1.3961794473 -1.4641573327 -2.3303699406 165 | H -0.5937040767 -0.3497734958 -3.0273542089 166 | O 1.6017119559 0.7640624975 1.6052753047 167 | H 1.5575559297 1.6420612230 2.0147414696 168 | H 2.4475273993 0.3964656044 1.9004544481 169 | -------------------------------------------------------------------------------- /test/fixtures/inp: -------------------------------------------------------------------------------- 1 | ! RHF CCSD(T) def2-TZVP TightSCF # test comment 2 | 3 | %paras R= 4.0,0.5,35 end#testcomment2 4 | #end 5 | * xyz 0 1 6 | H 0 0 0 7 | F 0 0 {R} 8 | * 9 | -------------------------------------------------------------------------------- /test/fixtures/inp2: -------------------------------------------------------------------------------- 1 | ! RHF CCSD(T) def2-TZVP TightSCF 2 | %paras 3 | R= 4.0,0.5,35 4 | end 5 | * xyz 0 1 6 | H 0 0 0 7 | F 0 0 {R} 8 | * 9 | -------------------------------------------------------------------------------- /test/fixtures/test.template: -------------------------------------------------------------------------------- 1 | {main} 2 | ! OPT 3 | 4 | {postgeom} 5 | %mp2 6 | bla bla 7 | end 8 | -------------------------------------------------------------------------------- /test/fixtures/testinp: -------------------------------------------------------------------------------- 1 | ! RHF CCSD(T) def2-TZVP TightSCF 2 | %paras 3 | R= 4.0,0.5,35 4 | end 5 | * xyz 0 1 6 | H 0 0 0 7 | F 0 0 {R} 8 | * 9 | -------------------------------------------------------------------------------- /test/test_cli/test_interface.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import unittest 4 | 5 | os.chdir(os.path.split(__file__)[0]) 6 | 7 | from censo.cli.cml_parser import parse 8 | from censo.cli.interface import startup, entry_point 9 | from censo.params import DESCR 10 | 11 | 12 | class CensoTest(unittest.TestCase): 13 | def test_blank_startup(self): 14 | entry_point("") 15 | 16 | def test_help_startup(self): 17 | argv = "-h".split() 18 | entry_point(argv) 19 | 20 | def test_general_startup(self): 21 | argv = "-inp testfiles/crest_conformers.xyz -solvent water -chrg 0 -u 0" 22 | core = startup(parse(DESCR, argv.split())) 23 | self.assertEqual(core.workdir, os.path.split(__file__)[0]) 24 | 25 | def test_partial_req(self): 26 | argv = "-inp testfiles/crest_conformers.xyz".split() 27 | entry_point(argv) 28 | 29 | def test_writeconfig(self): 30 | argv = "-newconfig".split() 31 | entry_point(argv) 32 | 33 | self.assertTrue(os.path.isfile("censo2rc_NEW")) 34 | 35 | def test_writereadconfig(self): 36 | argv = "-newconfig".split() 37 | entry_point(argv) 38 | 39 | argv = "-inp testfiles/crest_conformers.xyz -solvent water -chrg 0 -u 0 -inprc censo2rc_NEW" 40 | startup(parse(DESCR, argv.split())) 41 | 42 | def test_rc_override(self): 43 | argv = "-newconfig".split() 44 | entry_point(argv) 45 | 46 | argv = "-inprc censo2rc_NEW -inp testfiles/crest_conformers.xyz -solvent water -chrg 0 -u 0 -gp".split() 47 | args = parse(DESCR, argv) 48 | startup(args) 49 | from censo.part import CensoPart 50 | 51 | self.assertTrue(CensoPart.get_general_settings()["gas-phase"]) 52 | 53 | def doCleanups(self): 54 | # perform cleanup 55 | delete = ["censo.log", "censo2rc_NEW_OLD", "censo2rc_NEW"] 56 | for f in delete: 57 | f = os.path.join(os.path.split(__file__)[0], f) 58 | if os.path.exists(f): 59 | if os.path.isdir(f): 60 | shutil.rmtree(f) 61 | else: 62 | os.remove(f) 63 | 64 | 65 | if __name__ == "__main__": 66 | unittest.main() 67 | -------------------------------------------------------------------------------- /test/test_ensembledata.py: -------------------------------------------------------------------------------- 1 | from censo.cli.cml_parser import parse 2 | from censo.params import DESCR 3 | from censo.ensembledata import EnsembleData 4 | import pytest 5 | 6 | 7 | def test_read_input(self): 8 | # Read input via python instruction 9 | 10 | # Read input passed via cml args 11 | test_args = parse(argv="-i fixtures/crest_conformers.xyz".split()) 12 | ensemble = EnsembleData(test_dir, args=test_args) 13 | ensemble.read_input(test_args.inp) 14 | nconf = 7 15 | assert nconf == len(ensemble.conformers) 16 | assert 0 == ensemble.runinfo["charge"] 17 | assert 0 == ensemble.runinfo["unpaired"] 18 | --------------------------------------------------------------------------------