├── .gitattributes ├── .github └── workflows │ └── run_tests_and_deploy.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── alfabet ├── __init__.py ├── _version.py ├── drawing.py ├── fragment.py ├── model.py ├── neighbors.py ├── prediction.py └── preprocessor.py ├── docs ├── logo.svg └── logo_wide.svg ├── etc └── environment.yml ├── examples ├── run_test_predictions.ipynb └── test_data.csv.gz ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── test_model.py └── test_neighbors.py └── versioneer.py /.gitattributes: -------------------------------------------------------------------------------- 1 | alfabet/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/run_tests_and_deploy.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - "master" 7 | - "main" 8 | tags: 9 | - '*' 10 | pull_request: 11 | branches: 12 | - "master" 13 | - "main" 14 | schedule: 15 | # Nightly tests run on master by default: 16 | # Scheduled workflows run on the latest commit on the default or base branch. 17 | # (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule) 18 | - cron: "0 0 * * 0" 19 | 20 | jobs: 21 | test: 22 | name: Test Python ${{ matrix.python-version }} 23 | runs-on: ubuntu-latest 24 | strategy: 25 | matrix: 26 | python-version: [ 3.7, 3.8, 3.9 ] 27 | 28 | steps: 29 | - uses: actions/checkout@v1 30 | - name: Additional info about the build 31 | shell: bash 32 | run: | 33 | uname -a 34 | df -h 35 | ulimit -a 36 | 37 | # More info on options: https://github.com/conda-incubator/setup-miniconda 38 | - uses: conda-incubator/setup-miniconda@v2 39 | with: 40 | python-version: ${{ matrix.python-version }} 41 | channels: conda-forge 42 | activate-environment: test 43 | auto-update-conda: false 44 | auto-activate-base: false 45 | show-channel-urls: true 46 | environment-file: etc/environment.yml 47 | 48 | - name: Install package 49 | # conda setup requires this special shell 50 | shell: bash -l {0} 51 | run: | 52 | python -m pip install . 53 | conda list 54 | 55 | - name: Run tests 56 | shell: bash -l {0} 57 | run: pytest tests/ 58 | 59 | 60 | release: 61 | needs: test 62 | if: startsWith(github.ref, 'refs/tags') 63 | runs-on: ubuntu-latest 64 | strategy: 65 | matrix: 66 | python-version: [ 3.8 ] 67 | steps: 68 | - uses: actions/checkout@v2 69 | - name: Set up Python ${{ matrix.python-version }} 70 | uses: actions/setup-python@v2 71 | with: 72 | python-version: ${{ matrix.python-version }} 73 | - name: Get tag 74 | id: tag 75 | run: echo "::set-output name=version::${GITHUB_REF#refs/tags/}" 76 | - name: Install dependencies 77 | run: | 78 | python -m pip install --upgrade pip setuptools wheel 79 | python -m pip install twine 80 | - name: Build package 81 | run: python setup.py sdist bdist_wheel 82 | - name: Check the package 83 | run: twine check dist/* 84 | - name: Publish to PyPI 85 | env: 86 | STACKMANAGER_VERSION: ${{ github.event.release.tag_name }} 87 | TWINE_USERNAME: __token__ 88 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 89 | run: twine upload --skip-existing --non-interactive dist/* 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | .static_storage/ 56 | .media/ 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .DS_Store 107 | *.sw[op] 108 | Untitled* 109 | 110 | #Editors 111 | .idea/ 112 | .vscode/ 113 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: minimal 2 | env: 3 | - PYTHON_VERSION="3.6" 4 | before_install: 5 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 6 | - bash miniconda.sh -b -p $HOME/miniconda 7 | - export PATH="$HOME/miniconda/bin:$PATH" 8 | - conda config --set always_yes yes --set changeps1 no 9 | - conda update -q conda 10 | install: 11 | - conda create -q -n test -c conda-forge python=$PYTHON_VERSION rdkit pytest 12 | - source activate test 13 | - python -c "import rdkit.Chem" 14 | - pip install --progress-bar off -e . 15 | script: 16 | - pytest tests/ 17 | deploy: 18 | provider: pypi 19 | user: __token__ 20 | on: 21 | tags: true 22 | password: 23 | secure: rCAq+QG8LnX75pOsGXNr8vr9f4fWP1VO6lAlD4qM86OWERcyMvGGNmmShp1CSCWaIOZuFcX9Ql6dE8g6nt/B8xoSh8y0U172NThP/nLKp+kXOXxE44GMF9VmVzBqupTY8vm1ke1eqqQ63QAmRAsZA3AnClR8yP8KWqul2eG9w+my6FKUp0vwoAADkiOhZSirLB500jY4sWLk5q9iMGG73EssMp4OYpRj6K/bNqN8K5azvqabHuWhPeSm5RLB/WgLaGPh+OrpbUykYgQ6QxNL2qliSv3FIKj3p9zLWbGD/urGvEekPxVi5K/BpmRFG1Seup4tHzIczrR7m/IPJr44PpwlhTnRZVkI46i0jXtuZTfsrtcBXP/Zv+BoMiZclaq6Qd2k64vxPi3TxseFv42pAVWwfBfMUpEliu1BYx+722fyExNRP5tDSJFbRTk5i264kAleV1v1/2AGov/dPfq0fF7NXnkPZt7q/pw5xNIhXJxAMw6kHUjxM0U5vBR5apwmKl78OJ8NeMG1tdSn5sImDZlmQNZOO16Rl4QpyYPsiKgw993osOphk9JXMFoIxorFd0UHDWEehna0qYuIAaTY9KZeIk0DMNYODMI75kmSDCt3BGhrMz3haykyp8X61LZMfzgJT6S771/cGvYJ2BUBQrviVLjrjVkFmqA7TYkfSl8= 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Alfabet Copyright (c) 2019 Alliance for Sustainable Energy, LLC All 2 | rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. The name of the copyright holder(s), any contributors, the United States 15 | Government, the United States Department of Energy, or any of their employees 16 | may not be used to endorse or promote products derived from this software 17 | without specific prior written permission from the respective party. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND ANY CONTRIBUTORS "AS 20 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S), ANY CONTRIBUTORS, THE 23 | UNITED STATES GOVERNMENT, OR THE UNITED STATES DEPARTMENT OF ENERGY, NOR ANY OF 24 | THEIR EMPLOYEES, BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 26 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 29 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 30 | OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include alfabet/_version.py 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![ALFABET logo](/docs/logo.svg) 2 | 3 | [![PyPI version](https://badge.fury.io/py/alfabet.svg)](https://badge.fury.io/py/alfabet) 4 | [![Build Status](https://travis-ci.com/NREL/alfabet.svg?branch=master)](https://travis-ci.com/NREL/alfabet) 5 | 6 | # A machine-Learning derived, Fast, Accurate Bond dissociation Enthalpy Tool (ALFABET) 7 | 8 | This library contains the trained graph neural network model for the prediction of homolytic bond dissociation energies (BDEs) of organic molecules with C, H, N, and O atoms. This package offers a command-line interface to the web-based model predictions at [bde.ml.nrel.gov](https://bde.ml.nrel.gov/). 9 | 10 | The basic interface works as follows, where `predict` expects a list of SMILES strings of the target molecules 11 | ```python 12 | >>> from alfabet import model 13 | >>> model.predict(['CC', 'NCCO']) 14 | ``` 15 | ``` 16 | molecule bond_index bond_type fragment1 fragment2 ... bde_pred is_valid 17 | 0 CC 0 C-C [CH3] [CH3] ... 90.278282 True 18 | 1 CC 1 C-H [H] [CH2]C ... 99.346184 True 19 | 2 NCCO 0 C-N [CH2]CO [NH2] ... 89.988495 True 20 | 3 NCCO 1 C-C [CH2]O [CH2]N ... 82.122429 True 21 | 4 NCCO 2 C-O [CH2]CN [OH] ... 98.250961 True 22 | 5 NCCO 3 H-N [H] [NH]CCO ... 99.134750 True 23 | 6 NCCO 5 C-H [H] N[CH]CO ... 92.216087 True 24 | 7 NCCO 7 C-H [H] NC[CH]O ... 92.562988 True 25 | 8 NCCO 9 H-O [H] NCC[O] ... 105.120598 True 26 | ``` 27 | 28 | The model breaks all single, non-cyclic bonds in the input molecules and calculates their bond dissociation energies. Typical prediction errors are less than 1 kcal/mol. 29 | The model is based on Tensorflow (2.x), and makes heavy use of the [neural fingerprint](https://github.com/NREL/nfp) library (0.1.x). 30 | 31 | For additional details, see the publication: 32 | St. John, P. C., Guan, Y., Kim, Y., Kim, S., & Paton, R. S. (2020). Prediction of organic homolytic bond dissociation enthalpies at near chemical accuracy with sub-second computational cost. Nature Communications, 11(1). doi:10.1038/s41467-020-16201-z 33 | 34 | *Note:* For the exact model described in the text, install `alfabet` version 0.0.x. Versions >0.1 have been updated for tensorflow 2. 35 | 36 | ## Installation 37 | Installation with `conda` is recommended, as [`rdkit`](https://github.com/rdkit/rdkit) can otherwise be difficult to install 38 | 39 | ```bash 40 | $ conda create -n alfabet -c conda-forge python=3.7 rdkit 41 | $ source activate alfabet 42 | $ pip install alfabet 43 | `` 44 | -------------------------------------------------------------------------------- /alfabet/__init__.py: -------------------------------------------------------------------------------- 1 | from rdkit import RDLogger 2 | 3 | from . import _version 4 | 5 | RDLogger.DisableLog("rdApp.*") 6 | 7 | 8 | __version__ = _version.get_versions()["version"] 9 | 10 | _model_tag = "v0.1.1" # Tag on https://github.com/pstjohn/alfabet-models/ 11 | _model_files_baseurl = ( 12 | f"https://github.com/pstjohn/alfabet-models/releases/download/{_model_tag}/" 13 | ) 14 | -------------------------------------------------------------------------------- /alfabet/_version.py: -------------------------------------------------------------------------------- 1 | # This file helps to compute a version number in source trees obtained from 2 | # git-archive tarball (such as those provided by githubs download-from-tag 3 | # feature). Distribution tarballs (built by setup.py sdist) and build 4 | # directories (produced by setup.py build) will contain a much shorter file 5 | # that just contains the computed version number. 6 | 7 | # This file is released into the public domain. Generated by 8 | # versioneer-0.20 (https://github.com/python-versioneer/python-versioneer) 9 | """Git implementation of _version.py.""" 10 | 11 | import errno 12 | import os 13 | import re 14 | import subprocess 15 | import sys 16 | 17 | 18 | def get_keywords(): 19 | """Get the keywords needed to look up the version information.""" 20 | # these strings will be replaced by git during git-archive. 21 | # setup.py/versioneer.py will grep for the variable names, so they must 22 | # each be defined on a line of their own. _version.py will just call 23 | # get_keywords(). 24 | git_refnames = " (HEAD -> master)" 25 | git_full = "991aaa2b35245d2f2a648cf63e5304cc661eb395" 26 | git_date = "2024-09-13 14:03:13 -0600" 27 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 28 | return keywords 29 | 30 | 31 | class VersioneerConfig: # pylint: disable=too-few-public-methods 32 | """Container for Versioneer configuration parameters.""" 33 | 34 | 35 | def get_config(): 36 | """Create, populate and return the VersioneerConfig() object.""" 37 | # these strings are filled in when 'setup.py versioneer' creates 38 | # _version.py 39 | cfg = VersioneerConfig() 40 | cfg.VCS = "git" 41 | cfg.style = "pep440" 42 | cfg.tag_prefix = "" 43 | cfg.parentdir_prefix = "None" 44 | cfg.versionfile_source = "alfabet/_version.py" 45 | cfg.verbose = False 46 | return cfg 47 | 48 | 49 | class NotThisMethod(Exception): 50 | """Exception raised if a method is not valid for the current scenario.""" 51 | 52 | 53 | LONG_VERSION_PY = {} 54 | HANDLERS = {} 55 | 56 | 57 | def register_vcs_handler(vcs, method): # decorator 58 | """Create decorator to mark a method as the handler of a VCS.""" 59 | def decorate(f): 60 | """Store f in HANDLERS[vcs][method].""" 61 | if vcs not in HANDLERS: 62 | HANDLERS[vcs] = {} 63 | HANDLERS[vcs][method] = f 64 | return f 65 | 66 | return decorate 67 | 68 | 69 | # pylint:disable=too-many-arguments,consider-using-with # noqa 70 | def run_command(commands, 71 | args, 72 | cwd=None, 73 | verbose=False, 74 | hide_stderr=False, 75 | env=None): 76 | """Call the given command(s).""" 77 | assert isinstance(commands, list) 78 | process = None 79 | for command in commands: 80 | try: 81 | dispcmd = str([command] + args) 82 | # remember shell=False, so use git.cmd on windows, not just git 83 | process = subprocess.Popen( 84 | [command] + args, 85 | cwd=cwd, 86 | env=env, 87 | stdout=subprocess.PIPE, 88 | stderr=(subprocess.PIPE if hide_stderr else None)) 89 | break 90 | except EnvironmentError: 91 | e = sys.exc_info()[1] 92 | if e.errno == errno.ENOENT: 93 | continue 94 | if verbose: 95 | print("unable to run %s" % dispcmd) 96 | print(e) 97 | return None, None 98 | else: 99 | if verbose: 100 | print("unable to find command, tried %s" % (commands, )) 101 | return None, None 102 | stdout = process.communicate()[0].strip().decode() 103 | if process.returncode != 0: 104 | if verbose: 105 | print("unable to run %s (error)" % dispcmd) 106 | print("stdout was %s" % stdout) 107 | return None, process.returncode 108 | return stdout, process.returncode 109 | 110 | 111 | def versions_from_parentdir(parentdir_prefix, root, verbose): 112 | """Try to determine the version from the parent directory name. 113 | 114 | Source tarballs conventionally unpack into a directory that includes both 115 | the project name and a version string. We will also support searching up 116 | two directory levels for an appropriately named parent directory 117 | """ 118 | rootdirs = [] 119 | 120 | for _ in range(3): 121 | dirname = os.path.basename(root) 122 | if dirname.startswith(parentdir_prefix): 123 | return { 124 | "version": dirname[len(parentdir_prefix):], 125 | "full-revisionid": None, 126 | "dirty": False, 127 | "error": None, 128 | "date": None 129 | } 130 | rootdirs.append(root) 131 | root = os.path.dirname(root) # up a level 132 | 133 | if verbose: 134 | print("Tried directories %s but none started with prefix %s" % 135 | (str(rootdirs), parentdir_prefix)) 136 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 137 | 138 | 139 | @register_vcs_handler("git", "get_keywords") 140 | def git_get_keywords(versionfile_abs): 141 | """Extract version information from the given file.""" 142 | # the code embedded in _version.py can just fetch the value of these 143 | # keywords. When used from setup.py, we don't want to import _version.py, 144 | # so we do it with a regexp instead. This function is not used from 145 | # _version.py. 146 | keywords = {} 147 | try: 148 | with open(versionfile_abs, "r") as fobj: 149 | for line in fobj: 150 | if line.strip().startswith("git_refnames ="): 151 | mo = re.search(r'=\s*"(.*)"', line) 152 | if mo: 153 | keywords["refnames"] = mo.group(1) 154 | if line.strip().startswith("git_full ="): 155 | mo = re.search(r'=\s*"(.*)"', line) 156 | if mo: 157 | keywords["full"] = mo.group(1) 158 | if line.strip().startswith("git_date ="): 159 | mo = re.search(r'=\s*"(.*)"', line) 160 | if mo: 161 | keywords["date"] = mo.group(1) 162 | except EnvironmentError: 163 | pass 164 | return keywords 165 | 166 | 167 | @register_vcs_handler("git", "keywords") 168 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 169 | """Get version information from git keywords.""" 170 | if "refnames" not in keywords: 171 | raise NotThisMethod("Short version file found") 172 | date = keywords.get("date") 173 | if date is not None: 174 | # Use only the last line. Previous lines may contain GPG signature 175 | # information. 176 | date = date.splitlines()[-1] 177 | 178 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 179 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 180 | # -like" string, which we must then edit to make compliant), because 181 | # it's been around since git-1.5.3, and it's too difficult to 182 | # discover which version we're using, or to work around using an 183 | # older one. 184 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 185 | refnames = keywords["refnames"].strip() 186 | if refnames.startswith("$Format"): 187 | if verbose: 188 | print("keywords are unexpanded, not using") 189 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 190 | refs = {r.strip() for r in refnames.strip("()").split(",")} 191 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 192 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 193 | TAG = "tag: " 194 | tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} 195 | if not tags: 196 | # Either we're using git < 1.8.3, or there really are no tags. We use 197 | # a heuristic: assume all version tags have a digit. The old git %d 198 | # expansion behaves like git log --decorate=short and strips out the 199 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 200 | # between branches and tags. By ignoring refnames without digits, we 201 | # filter out many common branch names like "release" and 202 | # "stabilization", as well as "HEAD" and "master". 203 | tags = {r for r in refs if re.search(r'\d', r)} 204 | if verbose: 205 | print("discarding '%s', no digits" % ",".join(refs - tags)) 206 | if verbose: 207 | print("likely tags: %s" % ",".join(sorted(tags))) 208 | for ref in sorted(tags): 209 | # sorting will prefer e.g. "2.0" over "2.0rc1" 210 | if ref.startswith(tag_prefix): 211 | r = ref[len(tag_prefix):] 212 | # Filter out refs that exactly match prefix or that don't start 213 | # with a number once the prefix is stripped (mostly a concern 214 | # when prefix is '') 215 | if not re.match(r'\d', r): 216 | continue 217 | if verbose: 218 | print("picking %s" % r) 219 | return { 220 | "version": r, 221 | "full-revisionid": keywords["full"].strip(), 222 | "dirty": False, 223 | "error": None, 224 | "date": date 225 | } 226 | # no suitable tags, so version is "0+unknown", but full hex is still there 227 | if verbose: 228 | print("no suitable tags, using unknown + full revision id") 229 | return { 230 | "version": "0+unknown", 231 | "full-revisionid": keywords["full"].strip(), 232 | "dirty": False, 233 | "error": "no suitable tags", 234 | "date": None 235 | } 236 | 237 | 238 | @register_vcs_handler("git", "pieces_from_vcs") 239 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): 240 | """Get version from 'git describe' in the root of the source tree. 241 | 242 | This only gets called if the git-archive 'subst' keywords were *not* 243 | expanded, and _version.py hasn't already been rewritten with a short 244 | version string, meaning we're inside a checked out source tree. 245 | """ 246 | GITS = ["git"] 247 | if sys.platform == "win32": 248 | GITS = ["git.cmd", "git.exe"] 249 | 250 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], 251 | cwd=root, 252 | hide_stderr=True) 253 | if rc != 0: 254 | if verbose: 255 | print("Directory %s not under git control" % root) 256 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 257 | 258 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 259 | # if there isn't one, this yields HEX[-dirty] (no NUM) 260 | describe_out, rc = runner(GITS, [ 261 | "describe", "--tags", "--dirty", "--always", "--long", "--match", 262 | "%s*" % tag_prefix 263 | ], 264 | cwd=root) 265 | # --long was added in git-1.5.5 266 | if describe_out is None: 267 | raise NotThisMethod("'git describe' failed") 268 | describe_out = describe_out.strip() 269 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) 270 | if full_out is None: 271 | raise NotThisMethod("'git rev-parse' failed") 272 | full_out = full_out.strip() 273 | 274 | pieces = {} 275 | pieces["long"] = full_out 276 | pieces["short"] = full_out[:7] # maybe improved later 277 | pieces["error"] = None 278 | 279 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], 280 | cwd=root) 281 | # --abbrev-ref was added in git-1.6.3 282 | if rc != 0 or branch_name is None: 283 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") 284 | branch_name = branch_name.strip() 285 | 286 | if branch_name == "HEAD": 287 | # If we aren't exactly on a branch, pick a branch which represents 288 | # the current commit. If all else fails, we are on a branchless 289 | # commit. 290 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) 291 | # --contains was added in git-1.5.4 292 | if rc != 0 or branches is None: 293 | raise NotThisMethod("'git branch --contains' returned error") 294 | branches = branches.split("\n") 295 | 296 | # Remove the first line if we're running detached 297 | if "(" in branches[0]: 298 | branches.pop(0) 299 | 300 | # Strip off the leading "* " from the list of branches. 301 | branches = [branch[2:] for branch in branches] 302 | if "master" in branches: 303 | branch_name = "master" 304 | elif not branches: 305 | branch_name = None 306 | else: 307 | # Pick the first branch that is returned. Good or bad. 308 | branch_name = branches[0] 309 | 310 | pieces["branch"] = branch_name 311 | 312 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 313 | # TAG might have hyphens. 314 | git_describe = describe_out 315 | 316 | # look for -dirty suffix 317 | dirty = git_describe.endswith("-dirty") 318 | pieces["dirty"] = dirty 319 | if dirty: 320 | git_describe = git_describe[:git_describe.rindex("-dirty")] 321 | 322 | # now we have TAG-NUM-gHEX or HEX 323 | 324 | if "-" in git_describe: 325 | # TAG-NUM-gHEX 326 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 327 | if not mo: 328 | # unparseable. Maybe git-describe is misbehaving? 329 | pieces["error"] = ("unable to parse git-describe output: '%s'" % 330 | describe_out) 331 | return pieces 332 | 333 | # tag 334 | full_tag = mo.group(1) 335 | if not full_tag.startswith(tag_prefix): 336 | if verbose: 337 | fmt = "tag '%s' doesn't start with prefix '%s'" 338 | print(fmt % (full_tag, tag_prefix)) 339 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % 340 | (full_tag, tag_prefix)) 341 | return pieces 342 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 343 | 344 | # distance: number of commits since tag 345 | pieces["distance"] = int(mo.group(2)) 346 | 347 | # commit: short hex revision ID 348 | pieces["short"] = mo.group(3) 349 | 350 | else: 351 | # HEX: no tags 352 | pieces["closest-tag"] = None 353 | count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) 354 | pieces["distance"] = int(count_out) # total number of commits 355 | 356 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 357 | date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], 358 | cwd=root)[0].strip() 359 | # Use only the last line. Previous lines may contain GPG signature 360 | # information. 361 | date = date.splitlines()[-1] 362 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 363 | 364 | return pieces 365 | 366 | 367 | def plus_or_dot(pieces): 368 | """Return a + if we don't already have one, else return a .""" 369 | if "+" in pieces.get("closest-tag", ""): 370 | return "." 371 | return "+" 372 | 373 | 374 | def render_pep440(pieces): 375 | """Build up version string, with post-release "local version identifier". 376 | 377 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 378 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 379 | 380 | Exceptions: 381 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 382 | """ 383 | if pieces["closest-tag"]: 384 | rendered = pieces["closest-tag"] 385 | if pieces["distance"] or pieces["dirty"]: 386 | rendered += plus_or_dot(pieces) 387 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 388 | if pieces["dirty"]: 389 | rendered += ".dirty" 390 | else: 391 | # exception #1 392 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 393 | if pieces["dirty"]: 394 | rendered += ".dirty" 395 | return rendered 396 | 397 | 398 | def render_pep440_branch(pieces): 399 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . 400 | 401 | The ".dev0" means not master branch. Note that .dev0 sorts backwards 402 | (a feature branch will appear "older" than the master branch). 403 | 404 | Exceptions: 405 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] 406 | """ 407 | if pieces["closest-tag"]: 408 | rendered = pieces["closest-tag"] 409 | if pieces["distance"] or pieces["dirty"]: 410 | if pieces["branch"] != "master": 411 | rendered += ".dev0" 412 | rendered += plus_or_dot(pieces) 413 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 414 | if pieces["dirty"]: 415 | rendered += ".dirty" 416 | else: 417 | # exception #1 418 | rendered = "0" 419 | if pieces["branch"] != "master": 420 | rendered += ".dev0" 421 | rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 422 | if pieces["dirty"]: 423 | rendered += ".dirty" 424 | return rendered 425 | 426 | 427 | def render_pep440_pre(pieces): 428 | """TAG[.post0.devDISTANCE] -- No -dirty. 429 | 430 | Exceptions: 431 | 1: no tags. 0.post0.devDISTANCE 432 | """ 433 | if pieces["closest-tag"]: 434 | rendered = pieces["closest-tag"] 435 | if pieces["distance"]: 436 | rendered += ".post0.dev%d" % pieces["distance"] 437 | else: 438 | # exception #1 439 | rendered = "0.post0.dev%d" % pieces["distance"] 440 | return rendered 441 | 442 | 443 | def render_pep440_post(pieces): 444 | """TAG[.postDISTANCE[.dev0]+gHEX] . 445 | 446 | The ".dev0" means dirty. Note that .dev0 sorts backwards 447 | (a dirty tree will appear "older" than the corresponding clean one), 448 | but you shouldn't be releasing software with -dirty anyways. 449 | 450 | Exceptions: 451 | 1: no tags. 0.postDISTANCE[.dev0] 452 | """ 453 | if pieces["closest-tag"]: 454 | rendered = pieces["closest-tag"] 455 | if pieces["distance"] or pieces["dirty"]: 456 | rendered += ".post%d" % pieces["distance"] 457 | if pieces["dirty"]: 458 | rendered += ".dev0" 459 | rendered += plus_or_dot(pieces) 460 | rendered += "g%s" % pieces["short"] 461 | else: 462 | # exception #1 463 | rendered = "0.post%d" % pieces["distance"] 464 | if pieces["dirty"]: 465 | rendered += ".dev0" 466 | rendered += "+g%s" % pieces["short"] 467 | return rendered 468 | 469 | 470 | def render_pep440_post_branch(pieces): 471 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . 472 | 473 | The ".dev0" means not master branch. 474 | 475 | Exceptions: 476 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] 477 | """ 478 | if pieces["closest-tag"]: 479 | rendered = pieces["closest-tag"] 480 | if pieces["distance"] or pieces["dirty"]: 481 | rendered += ".post%d" % pieces["distance"] 482 | if pieces["branch"] != "master": 483 | rendered += ".dev0" 484 | rendered += plus_or_dot(pieces) 485 | rendered += "g%s" % pieces["short"] 486 | if pieces["dirty"]: 487 | rendered += ".dirty" 488 | else: 489 | # exception #1 490 | rendered = "0.post%d" % pieces["distance"] 491 | if pieces["branch"] != "master": 492 | rendered += ".dev0" 493 | rendered += "+g%s" % pieces["short"] 494 | if pieces["dirty"]: 495 | rendered += ".dirty" 496 | return rendered 497 | 498 | 499 | def render_pep440_old(pieces): 500 | """TAG[.postDISTANCE[.dev0]] . 501 | 502 | The ".dev0" means dirty. 503 | 504 | Exceptions: 505 | 1: no tags. 0.postDISTANCE[.dev0] 506 | """ 507 | if pieces["closest-tag"]: 508 | rendered = pieces["closest-tag"] 509 | if pieces["distance"] or pieces["dirty"]: 510 | rendered += ".post%d" % pieces["distance"] 511 | if pieces["dirty"]: 512 | rendered += ".dev0" 513 | else: 514 | # exception #1 515 | rendered = "0.post%d" % pieces["distance"] 516 | if pieces["dirty"]: 517 | rendered += ".dev0" 518 | return rendered 519 | 520 | 521 | def render_git_describe(pieces): 522 | """TAG[-DISTANCE-gHEX][-dirty]. 523 | 524 | Like 'git describe --tags --dirty --always'. 525 | 526 | Exceptions: 527 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 528 | """ 529 | if pieces["closest-tag"]: 530 | rendered = pieces["closest-tag"] 531 | if pieces["distance"]: 532 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 533 | else: 534 | # exception #1 535 | rendered = pieces["short"] 536 | if pieces["dirty"]: 537 | rendered += "-dirty" 538 | return rendered 539 | 540 | 541 | def render_git_describe_long(pieces): 542 | """TAG-DISTANCE-gHEX[-dirty]. 543 | 544 | Like 'git describe --tags --dirty --always -long'. 545 | The distance/hash is unconditional. 546 | 547 | Exceptions: 548 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 549 | """ 550 | if pieces["closest-tag"]: 551 | rendered = pieces["closest-tag"] 552 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 553 | else: 554 | # exception #1 555 | rendered = pieces["short"] 556 | if pieces["dirty"]: 557 | rendered += "-dirty" 558 | return rendered 559 | 560 | 561 | def render(pieces, style): 562 | """Render the given version pieces into the requested style.""" 563 | if pieces["error"]: 564 | return { 565 | "version": "unknown", 566 | "full-revisionid": pieces.get("long"), 567 | "dirty": None, 568 | "error": pieces["error"], 569 | "date": None 570 | } 571 | 572 | if not style or style == "default": 573 | style = "pep440" # the default 574 | 575 | if style == "pep440": 576 | rendered = render_pep440(pieces) 577 | elif style == "pep440-branch": 578 | rendered = render_pep440_branch(pieces) 579 | elif style == "pep440-pre": 580 | rendered = render_pep440_pre(pieces) 581 | elif style == "pep440-post": 582 | rendered = render_pep440_post(pieces) 583 | elif style == "pep440-post-branch": 584 | rendered = render_pep440_post_branch(pieces) 585 | elif style == "pep440-old": 586 | rendered = render_pep440_old(pieces) 587 | elif style == "git-describe": 588 | rendered = render_git_describe(pieces) 589 | elif style == "git-describe-long": 590 | rendered = render_git_describe_long(pieces) 591 | else: 592 | raise ValueError("unknown style '%s'" % style) 593 | 594 | return { 595 | "version": rendered, 596 | "full-revisionid": pieces["long"], 597 | "dirty": pieces["dirty"], 598 | "error": None, 599 | "date": pieces.get("date") 600 | } 601 | 602 | 603 | def get_versions(): 604 | """Get version information or return default if unable to do so.""" 605 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 606 | # __file__, we can work backwards from there to the root. Some 607 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 608 | # case we can only use expanded keywords. 609 | 610 | cfg = get_config() 611 | verbose = cfg.verbose 612 | 613 | try: 614 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 615 | verbose) 616 | except NotThisMethod: 617 | pass 618 | 619 | try: 620 | root = os.path.realpath(__file__) 621 | # versionfile_source is the relative path from the top of the source 622 | # tree (where the .git directory might live) to this file. Invert 623 | # this to find the root from __file__. 624 | for _ in cfg.versionfile_source.split('/'): 625 | root = os.path.dirname(root) 626 | except NameError: 627 | return { 628 | "version": "0+unknown", 629 | "full-revisionid": None, 630 | "dirty": None, 631 | "error": "unable to find root of source tree", 632 | "date": None 633 | } 634 | 635 | try: 636 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 637 | return render(pieces, cfg.style) 638 | except NotThisMethod: 639 | pass 640 | 641 | try: 642 | if cfg.parentdir_prefix: 643 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 644 | except NotThisMethod: 645 | pass 646 | 647 | return { 648 | "version": "0+unknown", 649 | "full-revisionid": None, 650 | "dirty": None, 651 | "error": "unable to compute version", 652 | "date": None 653 | } 654 | -------------------------------------------------------------------------------- /alfabet/drawing.py: -------------------------------------------------------------------------------- 1 | try: 2 | import flask 3 | from flask import Markup 4 | except ImportError: 5 | flask = None 6 | 7 | from rdkit import Chem 8 | from rdkit.Chem import rdDepictor 9 | from rdkit.Chem.Draw import rdMolDraw2D 10 | 11 | 12 | def draw_bde(smiles, bond_index, figwidth=200): 13 | mol = Chem.MolFromSmiles(smiles) 14 | bond_index = int(bond_index) 15 | 16 | if mol.GetNumAtoms() > 20: 17 | figwidth = 300 18 | if mol.GetNumAtoms() > 40: 19 | figwidth = 400 20 | 21 | if bond_index >= mol.GetNumBonds(): 22 | molH = Chem.AddHs(mol) 23 | if bond_index >= molH.GetNumBonds(): 24 | raise RuntimeError( 25 | f"Fewer than {bond_index} bonds in {smiles}: " 26 | f"{molH.GetNumBonds()} total bonds" 27 | ) 28 | bond = molH.GetBondWithIdx(bond_index) 29 | 30 | start_atom = mol.GetAtomWithIdx(bond.GetBeginAtomIdx()) 31 | mol = Chem.AddHs(mol, onlyOnAtoms=[start_atom.GetIdx()]) 32 | bond_index = mol.GetNumBonds() - 1 33 | 34 | if not mol.GetNumConformers(): 35 | rdDepictor.Compute2DCoords(mol) 36 | 37 | drawer = rdMolDraw2D.MolDraw2DSVG(figwidth, figwidth) 38 | drawer.drawOptions().fixedBondLength = 30 39 | drawer.drawOptions().highlightBondWidthMultiplier = 20 40 | 41 | drawer.DrawMolecule( 42 | mol, 43 | highlightAtoms=[], 44 | highlightBonds=[ 45 | bond_index, 46 | ], 47 | ) 48 | 49 | drawer.FinishDrawing() 50 | svg = drawer.GetDrawingText() 51 | 52 | if flask: 53 | return Markup(svg) 54 | else: 55 | return svg 56 | 57 | 58 | def draw_mol_outlier(smiles, missing_atoms, missing_bonds, figsize=(300, 300)): 59 | mol = Chem.MolFromSmiles(smiles) 60 | missing_bonds_adjusted = [] 61 | for bond_index in missing_bonds: 62 | 63 | if bond_index >= mol.GetNumBonds(): 64 | molH = Chem.AddHs(mol) 65 | bond = molH.GetBondWithIdx(int(bond_index)) 66 | 67 | start_atom = mol.GetAtomWithIdx(bond.GetBeginAtomIdx()) 68 | mol = Chem.AddHs(mol, onlyOnAtoms=[start_atom.GetIdx()]) 69 | bond_index = mol.GetNumBonds() - 1 70 | 71 | missing_bonds_adjusted += [int(bond_index)] 72 | 73 | if not mol.GetNumConformers(): 74 | rdDepictor.Compute2DCoords(mol) 75 | 76 | drawer = rdMolDraw2D.MolDraw2DSVG(*figsize) 77 | drawer.SetFontSize(0.6) 78 | drawer.DrawMolecule( 79 | mol, 80 | highlightAtoms=[int(index) for index in missing_atoms], 81 | highlightBonds=missing_bonds_adjusted, 82 | ) 83 | 84 | drawer.FinishDrawing() 85 | svg = drawer.GetDrawingText() 86 | 87 | if flask: 88 | return Markup(svg) 89 | else: 90 | return svg 91 | 92 | 93 | def draw_mol(smiles, figsize=(300, 300)): 94 | mol = Chem.MolFromSmiles(smiles) 95 | rdDepictor.Compute2DCoords(mol) 96 | 97 | drawer = rdMolDraw2D.MolDraw2DSVG(*figsize) 98 | drawer.SetFontSize(0.6) 99 | drawer.DrawMolecule(mol) 100 | 101 | drawer.FinishDrawing() 102 | svg = drawer.GetDrawingText() 103 | 104 | if flask: 105 | return Markup(svg) 106 | else: 107 | return svg 108 | -------------------------------------------------------------------------------- /alfabet/fragment.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import Counter 3 | from typing import Dict, Iterator, Type 4 | 5 | import pandas as pd 6 | import rdkit 7 | import rdkit.Chem 8 | import rdkit.Chem.AllChem 9 | from rdkit import RDLogger 10 | 11 | RDLogger.DisableLog("rdApp.*") 12 | 13 | 14 | class Molecule: 15 | def __init__(self, mol: Type[rdkit.Chem.Mol] = None, smiles: str = None) -> None: 16 | assert (mol is not None) or ( 17 | smiles is not None 18 | ), "mol or smiles must be provided" 19 | 20 | self._mol = mol 21 | self._smiles = smiles 22 | self._molH = None 23 | self._is_canon = False 24 | 25 | @property 26 | def mol(self) -> Type[rdkit.Chem.Mol]: 27 | if self._mol is None: 28 | self._mol = rdkit.Chem.MolFromSmiles(self._smiles) 29 | return self._mol 30 | 31 | @property 32 | def molH(self) -> Type[rdkit.Chem.Mol]: 33 | if self._molH is None: 34 | self._molH = rdkit.Chem.AddHs(self.mol) 35 | return self._molH 36 | 37 | @property 38 | def smiles(self) -> str: 39 | if (self._smiles is None) or not self._is_canon: 40 | self._smiles = rdkit.Chem.MolToSmiles(self.mol) 41 | return self._smiles 42 | 43 | 44 | def get_fragments( 45 | input_molecule: Molecule, drop_duplicates: bool = False 46 | ) -> pd.DataFrame: 47 | df = pd.DataFrame(fragment_iterator(input_molecule)) 48 | if drop_duplicates: 49 | df = df.drop_duplicates(["fragment1", "fragment2"]).reset_index(drop=True) 50 | return df 51 | 52 | 53 | def fragment_iterator( 54 | input_molecule: str, skip_warnings: bool = False 55 | ) -> Iterator[Dict]: 56 | 57 | mol_stereo = count_stereocenters(input_molecule) 58 | if (mol_stereo["atom_unassigned"] != 0) or (mol_stereo["bond_unassigned"] != 0): 59 | logging.warning( 60 | f"Molecule {input_molecule.smiles} has undefined stereochemistry" 61 | ) 62 | if skip_warnings: 63 | return 64 | 65 | rdkit.Chem.Kekulize(input_molecule.molH, clearAromaticFlags=True) 66 | 67 | for bond in input_molecule.molH.GetBonds(): 68 | 69 | if bond.IsInRing(): 70 | continue 71 | 72 | if bond.GetBondTypeAsDouble() > 1.9999: 73 | continue 74 | 75 | try: 76 | 77 | # Use RDkit to break the given bond 78 | mh = rdkit.Chem.RWMol(input_molecule.molH) 79 | a1 = bond.GetBeginAtomIdx() 80 | a2 = bond.GetEndAtomIdx() 81 | mh.RemoveBond(a1, a2) 82 | 83 | mh.GetAtomWithIdx(a1).SetNoImplicit(True) 84 | mh.GetAtomWithIdx(a2).SetNoImplicit(True) 85 | 86 | # Call SanitizeMol to update radicals 87 | rdkit.Chem.SanitizeMol(mh) 88 | 89 | # Convert the two molecules into a SMILES string 90 | fragmented_smiles = rdkit.Chem.MolToSmiles(mh) 91 | 92 | # Split fragment and canonicalize 93 | frag1, frag2 = sorted(fragmented_smiles.split(".")) 94 | frag1 = Molecule(smiles=frag1) 95 | frag2 = Molecule(smiles=frag2) 96 | 97 | # Stoichiometry check 98 | assert ( 99 | count_atom_types(frag1) + count_atom_types(frag2) 100 | ) == count_atom_types(input_molecule), "Error with {}; {}; {}".format( 101 | frag1.smiles, frag2.smiles, input_molecule.smiles 102 | ) 103 | 104 | # Check introduction of new stereocenters 105 | is_valid_stereo = check_stereocenters(frag1) and check_stereocenters(frag2) 106 | 107 | yield { 108 | "molecule": input_molecule.smiles, 109 | "bond_index": bond.GetIdx(), 110 | "bond_type": get_bond_type(bond), 111 | "fragment1": frag1.smiles, 112 | "fragment2": frag2.smiles, 113 | "is_valid_stereo": is_valid_stereo, 114 | } 115 | 116 | except ValueError: 117 | logging.error( 118 | "Fragmentation error with {}, bond {}".format( 119 | input_molecule.smiles, bond.GetIdx() 120 | ) 121 | ) 122 | continue 123 | 124 | 125 | def count_atom_types(molecule: Type[Molecule]): 126 | """Return a dictionary of each atom type in the given fragment or molecule""" 127 | return Counter([atom.GetSymbol() for atom in molecule.molH.GetAtoms()]) 128 | 129 | 130 | def count_stereocenters(molecule: Type[Molecule]) -> Dict: 131 | """Returns a count of both assigned and unassigned stereocenters in the 132 | given molecule""" 133 | 134 | rdkit.Chem.FindPotentialStereoBonds(molecule.mol) 135 | 136 | stereocenters = rdkit.Chem.FindMolChiralCenters( 137 | molecule.mol, includeUnassigned=True 138 | ) 139 | stereobonds = [ 140 | bond 141 | for bond in molecule.mol.GetBonds() 142 | if bond.GetStereo() is not rdkit.Chem.rdchem.BondStereo.STEREONONE 143 | ] 144 | 145 | atom_assigned = len([center for center in stereocenters if center[1] != "?"]) 146 | atom_unassigned = len([center for center in stereocenters if center[1] == "?"]) 147 | 148 | bond_assigned = len( 149 | [ 150 | bond 151 | for bond in stereobonds 152 | if bond.GetStereo() is not rdkit.Chem.rdchem.BondStereo.STEREOANY 153 | ] 154 | ) 155 | bond_unassigned = len( 156 | [ 157 | bond 158 | for bond in stereobonds 159 | if bond.GetStereo() is rdkit.Chem.rdchem.BondStereo.STEREOANY 160 | ] 161 | ) 162 | 163 | return { 164 | "atom_assigned": atom_assigned, 165 | "atom_unassigned": atom_unassigned, 166 | "bond_assigned": bond_assigned, 167 | "bond_unassigned": bond_unassigned, 168 | } 169 | 170 | 171 | def check_stereocenters(molecule: Type[Molecule]): 172 | """Check the given SMILES string to determine whether accurate 173 | enthalpies can be calculated with the given stereochem information 174 | """ 175 | stereocenters = count_stereocenters(molecule) 176 | if stereocenters["bond_unassigned"] > 0: 177 | return False 178 | 179 | max_unassigned = 1 if stereocenters["atom_assigned"] == 0 else 1 180 | if stereocenters["atom_unassigned"] <= max_unassigned: 181 | return True 182 | else: 183 | return False 184 | 185 | 186 | def get_bond_type(bond): 187 | return "{}-{}".format( 188 | *tuple(sorted((bond.GetBeginAtom().GetSymbol(), bond.GetEndAtom().GetSymbol()))) 189 | ) 190 | -------------------------------------------------------------------------------- /alfabet/model.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pandas as pd 4 | import rdkit.Chem 5 | from nfp.frameworks import tf 6 | from tqdm import tqdm 7 | 8 | from alfabet.fragment import Molecule, get_fragments 9 | from alfabet.prediction import bde_dft, model, validate_inputs 10 | from alfabet.preprocessor import get_features, preprocessor 11 | 12 | 13 | def get_max_bonds(molecule_list: List[Molecule]): 14 | def num_bonds(molecule): 15 | molH = rdkit.Chem.AddHs(molecule.mol) 16 | return molH.GetNumBonds() 17 | 18 | return max((num_bonds(molecule) for molecule in molecule_list)) 19 | 20 | 21 | def predict(smiles_list, drop_duplicates=True, batch_size=1, verbose=False): 22 | """Predict the BDEs of each bond in a list of molecules. 23 | 24 | Parameters 25 | ---------- 26 | smiles_list : list 27 | List of SMILES strings for each molecule 28 | drop_duplicates : bool, optional 29 | Whether to drop duplicate bonds (those with the same resulting radicals) 30 | verbose : bool, optional 31 | Whether to show a progress bar 32 | 33 | Returns 34 | ------- 35 | pd.DataFrame 36 | dataframe of prediction results with columns: 37 | 38 | molecule - SMILES of parent 39 | bond_index - integer corresponding to given bond (of mol with explicit 40 | H's) 41 | bond_type - elements of start and end atom types 42 | fragment1 - SMILES of one radical product 43 | fragment2 - SMILES of second radical product 44 | delta_assigned_stereo - # of assigned stereocenters created or destroyed 45 | delta_unassigned_stereo - # of unassigned stereocenters changed 46 | bde_pred - predicted BDE (in kcal/mol) 47 | is_valid - whether the starting molecule is present in the model's 48 | domain of validity 49 | """ 50 | 51 | molecule_list = [Molecule(smiles=smiles) for smiles in smiles_list] 52 | smiles_list = [mol.smiles for mol in molecule_list] 53 | 54 | pred_df = pd.concat( 55 | ( 56 | get_fragments(mol, drop_duplicates=drop_duplicates) 57 | for mol in tqdm(molecule_list, disable=not verbose) 58 | ) 59 | ) 60 | 61 | max_bonds = get_max_bonds(molecule_list) 62 | input_dataset = tf.data.Dataset.from_generator( 63 | lambda: ( 64 | get_features(mol.smiles, max_num_edges=2 * max_bonds) 65 | for mol in tqdm(molecule_list, disable=not verbose) 66 | ), 67 | output_signature=preprocessor.output_signature, 68 | ).cache() 69 | 70 | batched_dataset = input_dataset.padded_batch(batch_size=batch_size).prefetch( 71 | tf.data.experimental.AUTOTUNE 72 | ) 73 | 74 | bdes, bdfes = model.predict(batched_dataset, verbose=1 if verbose else 0) 75 | 76 | bde_df = ( 77 | pd.DataFrame(bdes.squeeze(axis=-1), index=smiles_list) 78 | .T.unstack() 79 | .reindex(pred_df[["molecule", "bond_index"]]) 80 | ) 81 | bdfe_df = ( 82 | pd.DataFrame(bdfes.squeeze(axis=-1), index=smiles_list) 83 | .T.unstack() 84 | .reindex(pred_df[["molecule", "bond_index"]]) 85 | ) 86 | 87 | pred_df["bde_pred"] = bde_df.values 88 | pred_df["bdfe_pred"] = bdfe_df.values 89 | 90 | is_valid = pd.Series( 91 | { 92 | smiles: not validate_inputs(input_)[0] 93 | for smiles, input_ in zip(smiles_list, input_dataset) 94 | }, 95 | name="is_valid", 96 | ) 97 | 98 | pred_df = pred_df.merge(is_valid, left_on="molecule", right_index=True, how="left") 99 | pred_df = pred_df.merge( 100 | bde_dft[["molecule", "bond_index", "bde", "bdfe", "set"]], 101 | on=["molecule", "bond_index"], 102 | how="left", 103 | ) 104 | 105 | return pred_df 106 | -------------------------------------------------------------------------------- /alfabet/neighbors.py: -------------------------------------------------------------------------------- 1 | import joblib 2 | import numpy as np 3 | import pandas as pd 4 | import tensorflow as tf 5 | from pooch import retrieve 6 | 7 | from alfabet import _model_files_baseurl 8 | from alfabet.drawing import draw_bde 9 | from alfabet.prediction import bde_dft, model 10 | from alfabet.preprocessor import get_features 11 | 12 | embedding_model = tf.keras.Model(model.inputs, [model.layers[31].input]) 13 | 14 | nbrs_pipe = joblib.load( 15 | retrieve( 16 | _model_files_baseurl + "bond_embedding_nbrs.p.z", 17 | known_hash="sha256:187df1e88a5fafc1e83436f86ea0374df678e856f2c17506bc730de1996a47b1", 18 | ) 19 | ) 20 | 21 | 22 | def pipe_kneighbors(pipe, X): 23 | Xt = pipe.steps[0][-1].transform(X) 24 | return pipe.steps[-1][-1].kneighbors(Xt) 25 | 26 | 27 | def find_neighbor_bonds( 28 | smiles: str, bond_index: int, draw: bool = False 29 | ) -> pd.DataFrame: 30 | inputs = get_features(smiles) 31 | neighbor_df = get_neighbors(inputs, bond_index) 32 | 33 | if draw: 34 | neighbor_df["svg"] = neighbor_df.apply( 35 | lambda x: draw_bde(x.molecule, x.bond_index), 1 36 | ) 37 | 38 | return neighbor_df 39 | 40 | 41 | def get_neighbors(inputs: dict, bond_index: int) -> pd.DataFrame: 42 | embeddings = embedding_model( 43 | { 44 | key: tf.constant(np.expand_dims(np.asarray(val), 0), name=key) 45 | for key, val in inputs.items() 46 | } 47 | ) 48 | distances, indices = pipe_kneighbors(nbrs_pipe, embeddings[:, bond_index, :]) 49 | 50 | neighbor_df = bde_dft.dropna().iloc[indices.flatten()] 51 | neighbor_df["distance"] = distances.flatten() 52 | neighbor_df = neighbor_df.drop_duplicates( 53 | ["molecule", "fragment1", "fragment2"] 54 | ).sort_values("distance") 55 | 56 | return neighbor_df.drop(["rid", "bdscfe"], axis=1) 57 | -------------------------------------------------------------------------------- /alfabet/prediction.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, List, Tuple 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import pooch 7 | from nfp.frameworks import tf 8 | from pooch import retrieve 9 | from rdkit import RDLogger 10 | 11 | from alfabet import _model_files_baseurl 12 | 13 | RDLogger.DisableLog("rdApp.*") 14 | 15 | model_files = retrieve( 16 | _model_files_baseurl + "model.tar.gz", 17 | known_hash="sha256:f1c2b9436f2d18c76b45d95140e6" 18 | "a08c096250bd5f3e2b412492ca27ab38ad0c", 19 | processor=pooch.Untar(extract_dir="model"), 20 | ) 21 | 22 | model = tf.keras.models.load_model(os.path.dirname(model_files[0])) 23 | 24 | bde_dft = pd.read_csv( 25 | retrieve( 26 | _model_files_baseurl + "bonds_for_neighbors.csv.gz", 27 | known_hash="sha256:d4fb825c42d790d4b2b4bd5dc2d" 28 | "87c844932e2da82992a31d7521ce51395adb1", 29 | ) 30 | ) 31 | 32 | 33 | def validate_inputs(inputs: Dict) -> Tuple[bool, np.ndarray, np.ndarray]: 34 | """Check the given SMILES to ensure it's present in the model's 35 | preprocessor dictionary. 36 | 37 | Returns: 38 | (is_outlier, missing_atom, missing_bond) 39 | 40 | """ 41 | inputs = {key: np.asarray(val) for key, val in inputs.items()} 42 | 43 | missing_bond = np.array(list(set(inputs["bond_indices"][inputs["bond"] == 1]))) 44 | missing_atom = np.arange(len(inputs["atom"]))[inputs["atom"] == 1] 45 | 46 | is_outlier = (missing_bond.size != 0) | (missing_atom.size != 0) 47 | 48 | return is_outlier, missing_atom, missing_bond 49 | 50 | 51 | def tf_model_forward(inputs: Dict) -> Tuple[List[float], List[float]]: 52 | """Mimimcs a call to tf-serving by padding the input arrays to resemble a single 53 | batch and passing them through the loaded tensorflow model 54 | 55 | Args: 56 | inputs (dict): input arrays for a single molecule 57 | 58 | Returns: 59 | Tuple[List[int], List[int]]: flattened output arrays for predicted bde and bdfe 60 | """ 61 | 62 | def expand(val): 63 | return tf.constant(np.expand_dims(val, 0)) 64 | 65 | bde_pred, bdfe_pred = model({key: expand(val) for key, val in inputs.items()}) 66 | return bde_pred.numpy().squeeze().tolist(), bdfe_pred.numpy().squeeze().tolist() 67 | 68 | 69 | def format_predictions_into_dataframe( 70 | bde_pred: List[float], 71 | bdfe_pred: List[float], 72 | frag_df: pd.DataFrame, 73 | drop_duplicates: bool = False, 74 | ) -> pd.DataFrame: 75 | 76 | # Reindex predictions to fragment dataframe 77 | frag_df["bde_pred"] = ( 78 | pd.Series(bde_pred).reindex(frag_df.bond_index).reset_index(drop=True) 79 | ) 80 | frag_df["bdfe_pred"] = ( 81 | pd.Series(bdfe_pred).reindex(frag_df.bond_index).reset_index(drop=True) 82 | ) 83 | 84 | # Add DFT calculated bdes 85 | frag_df = frag_df.merge( 86 | bde_dft[["molecule", "bond_index", "bde", "bdfe", "set"]], 87 | on=["molecule", "bond_index"], 88 | how="left", 89 | ) 90 | 91 | # Drop duplicate entries and sort from weakest to strongest 92 | frag_df = frag_df.sort_values("bde_pred").reset_index(drop=True) 93 | 94 | if drop_duplicates: 95 | frag_df = frag_df.drop_duplicates(["fragment1", "fragment2"]).reset_index( 96 | drop=True 97 | ) 98 | 99 | frag_df["has_dft_bde"] = frag_df.bde.notna() 100 | 101 | return frag_df 102 | -------------------------------------------------------------------------------- /alfabet/preprocessor.py: -------------------------------------------------------------------------------- 1 | import nfp 2 | import numpy as np 3 | from nfp.preprocessing.features import get_ring_size 4 | from pooch import retrieve 5 | 6 | from alfabet import _model_files_baseurl 7 | 8 | 9 | def atom_featurizer(atom): 10 | """Return an integer hash representing the atom type""" 11 | 12 | return str( 13 | ( 14 | atom.GetSymbol(), 15 | atom.GetNumRadicalElectrons(), 16 | atom.GetFormalCharge(), 17 | atom.GetChiralTag(), 18 | atom.GetIsAromatic(), 19 | get_ring_size(atom, max_size=6), 20 | atom.GetDegree(), 21 | atom.GetTotalNumHs(includeNeighbors=True), 22 | ) 23 | ) 24 | 25 | 26 | def bond_featurizer(bond, flipped=False): 27 | if not flipped: 28 | atoms = "{}-{}".format( 29 | *tuple((bond.GetBeginAtom().GetSymbol(), bond.GetEndAtom().GetSymbol())) 30 | ) 31 | else: 32 | atoms = "{}-{}".format( 33 | *tuple((bond.GetEndAtom().GetSymbol(), bond.GetBeginAtom().GetSymbol())) 34 | ) 35 | 36 | btype = str((bond.GetBondType(), bond.GetIsConjugated())) 37 | ring = "R{}".format(get_ring_size(bond, max_size=6)) if bond.IsInRing() else "" 38 | 39 | return " ".join([atoms, btype, ring]).strip() 40 | 41 | 42 | preprocessor = nfp.SmilesBondIndexPreprocessor( 43 | atom_features=atom_featurizer, 44 | bond_features=bond_featurizer, 45 | explicit_hs=True, 46 | output_dtype="int64", 47 | ) 48 | 49 | preprocessor.from_json( 50 | retrieve( 51 | _model_files_baseurl + "preprocessor.json", 52 | known_hash="412d15ca4d0e8b5030e9b497f566566922818ff355b8ee677a91dd23696878ac", 53 | ) 54 | ) 55 | 56 | 57 | def get_features(smiles: str, pad: bool = False, **kwargs) -> dict: 58 | """Run the preprocessor on the given SMILES string 59 | 60 | Args: 61 | smiles (str): the input molecule 62 | pad (bool, optional): whether to left-pad the inputs with zeros in preparation 63 | for tf-serving's padding behavior. Defaults to False. 64 | 65 | Returns: 66 | dict: numpy array inputs with atom, bond, connectivity, and bond_indicies. 67 | """ 68 | features = preprocessor(smiles, train=False, **kwargs) 69 | 70 | if not pad: 71 | return features 72 | 73 | # We have to offset the connectivity array by 1 since we're adding a phantom atom 74 | # at position 0 75 | features["connectivity"] += 1 76 | 77 | def pad_value(val): 78 | return np.pad(val, [(1, 0)] + [(0, 0)] * (val.ndim - 1)) 79 | 80 | return {key: pad_value(val) for key, val in features.items()} 81 | -------------------------------------------------------------------------------- /docs/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 17 | 19 | 39 | 41 | 42 | 44 | image/svg+xml 45 | 47 | 48 | 49 | 50 | 51 | 57 | 64 | 80 | 84 | 88 | 92 | 96 | 100 | 104 | 108 | 109 | 112 | 117 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /docs/logo_wide.svg: -------------------------------------------------------------------------------- 1 | 2 | 17 | 19 | 39 | 41 | 42 | 44 | image/svg+xml 45 | 47 | 48 | 49 | 50 | 51 | 57 | 62 | 67 | 72 | 77 | 82 | 87 | 92 | 97 | 98 | 102 | 107 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /etc/environment.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | channels: 4 | - conda-forge 5 | - defaults 6 | 7 | dependencies: 8 | - rdkit 9 | - pytest 10 | - pandas 11 | - tqdm 12 | - joblib 13 | - scikit-learn 14 | - numpy 15 | - pip 16 | - pip: 17 | - tensorflow 18 | -------------------------------------------------------------------------------- /examples/run_test_predictions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "from alfabet import model" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import nfp" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Load the test dataset" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "
\n", 46 | "\n", 59 | "\n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | "
moleculebond_indexfragment1fragment2bdebond_typedelta_stereo
0C#C/C(C)=C/CNCC1[C]#CC[C]=CCNCC132.918956C-C0.0
1C#C/C(C)=C/CNCC2C#C/[C]=C/CNCC[CH3]90.545156C-C0.0
2C#C/C(C)=C/CNCC4[CH2]NCC[CH]=C(C)C#C92.585188C-C0.0
3C#C/C(C)=C/CNCC5C#C/C(C)=C/[CH2]CC[NH]62.904012C-N0.0
4C#C/C(C)=C/CNCC6C#C/C(C)=C/C[NH][CH2]C83.992707C-N0.0
\n", 125 | "
" 126 | ], 127 | "text/plain": [ 128 | " molecule bond_index fragment1 fragment2 bde \\\n", 129 | "0 C#C/C(C)=C/CNCC 1 [C]#C C[C]=CCNCC 132.918956 \n", 130 | "1 C#C/C(C)=C/CNCC 2 C#C/[C]=C/CNCC [CH3] 90.545156 \n", 131 | "2 C#C/C(C)=C/CNCC 4 [CH2]NCC [CH]=C(C)C#C 92.585188 \n", 132 | "3 C#C/C(C)=C/CNCC 5 C#C/C(C)=C/[CH2] CC[NH] 62.904012 \n", 133 | "4 C#C/C(C)=C/CNCC 6 C#C/C(C)=C/C[NH] [CH2]C 83.992707 \n", 134 | "\n", 135 | " bond_type delta_stereo \n", 136 | "0 C-C 0.0 \n", 137 | "1 C-C 0.0 \n", 138 | "2 C-C 0.0 \n", 139 | "3 C-N 0.0 \n", 140 | "4 C-N 0.0 " 141 | ] 142 | }, 143 | "execution_count": 3, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "test_data = pd.read_csv('test_data.csv.gz')\n", 150 | "test_data.head()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "In this test dataset and in the alfabet predictions, `bond_index` corresponds to the bond index assinged by RDKit for a molecule with explicit hydrogens" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 13, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "C-N\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "from rdkit import Chem\n", 175 | "molH = Chem.AddHs(Chem.MolFromSmiles('C#C/C(C)=C/CNCC'))\n", 176 | "bond = molH.GetBondWithIdx(5)\n", 177 | "print(f'{bond.GetBeginAtom().GetSymbol()}-{bond.GetEndAtom().GetSymbol()}')" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Get a list of unique molecules in the test dataset" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 4, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/plain": [ 195 | "array(['C#C/C(C)=C/CNCC', 'C#C/C=C/C(=O)O', 'C#C/C=C/C=C/C=C/C',\n", 196 | " 'C#C/C=C/COCCCC', 'C#CC#CCCO'], dtype=object)" 197 | ] 198 | }, 199 | "execution_count": 4, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "test_molecules = test_data.molecule.unique()\n", 206 | "test_molecules[:5]" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "`model.predict(...)` expects a list-like object of SMILES strings. The longest part of the calculation is breaking the bonds in the molecule and generating radical SMILES, which uses the `joblib` library to parallelize these calculations across multiple cores" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 5, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "alfabet_predictions = model.predict(test_molecules, verbose=False)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 6, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/html": [ 233 | "
\n", 234 | "\n", 247 | "\n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | "
moleculebond_indexbond_typefragment1fragment2delta_assigned_stereodelta_unassigned_stereobde_predis_valid
0C#C/C(C)=C/CNCC1C-C[C]#CC[C]=CCNCC00132.957993True
1C#C/C(C)=C/CNCC2C-CC#C/[C]=C/CNCC[CH3]0090.212402True
2C#C/C(C)=C/CNCC4C-C[CH2]NCC[CH]=C(C)C#C0092.262215True
3C#C/C(C)=C/CNCC5C-NC#C/C(C)=C/[CH2]CC[NH]0062.995834True
4C#C/C(C)=C/CNCC6C-NC#C/C(C)=C/C[NH][CH2]C0084.394958True
\n", 325 | "
" 326 | ], 327 | "text/plain": [ 328 | " molecule bond_index bond_type fragment1 fragment2 \\\n", 329 | "0 C#C/C(C)=C/CNCC 1 C-C [C]#C C[C]=CCNCC \n", 330 | "1 C#C/C(C)=C/CNCC 2 C-C C#C/[C]=C/CNCC [CH3] \n", 331 | "2 C#C/C(C)=C/CNCC 4 C-C [CH2]NCC [CH]=C(C)C#C \n", 332 | "3 C#C/C(C)=C/CNCC 5 C-N C#C/C(C)=C/[CH2] CC[NH] \n", 333 | "4 C#C/C(C)=C/CNCC 6 C-N C#C/C(C)=C/C[NH] [CH2]C \n", 334 | "\n", 335 | " delta_assigned_stereo delta_unassigned_stereo bde_pred is_valid \n", 336 | "0 0 0 132.957993 True \n", 337 | "1 0 0 90.212402 True \n", 338 | "2 0 0 92.262215 True \n", 339 | "3 0 0 62.995834 True \n", 340 | "4 0 0 84.394958 True " 341 | ] 342 | }, 343 | "execution_count": 6, 344 | "metadata": {}, 345 | "output_type": "execute_result" 346 | } 347 | ], 348 | "source": [ 349 | "alfabet_predictions.head()" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "Assert that the input molecules fall within the model's domain of validity." 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 7, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "True" 368 | ] 369 | }, 370 | "execution_count": 7, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "alfabet_predictions.is_valid.all()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "Merge the test data and model predictions" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 8, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "merged_predictions = test_data.merge(\n", 393 | " alfabet_predictions, on=['molecule', 'fragment1', 'fragment2'], how='left')" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "Calculate the MAE in kcal/mol" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 9, 406 | "metadata": {}, 407 | "outputs": [ 408 | { 409 | "data": { 410 | "text/plain": [ 411 | "0.5975915681620032" 412 | ] 413 | }, 414 | "execution_count": 9, 415 | "metadata": {}, 416 | "output_type": "execute_result" 417 | } 418 | ], 419 | "source": [ 420 | "(merged_predictions.bde - merged_predictions.bde_pred).abs().mean()" 421 | ] 422 | } 423 | ], 424 | "metadata": { 425 | "kernelspec": { 426 | "display_name": "Python 3", 427 | "language": "python", 428 | "name": "python3" 429 | }, 430 | "language_info": { 431 | "codemirror_mode": { 432 | "name": "ipython", 433 | "version": 3 434 | }, 435 | "file_extension": ".py", 436 | "mimetype": "text/x-python", 437 | "name": "python", 438 | "nbconvert_exporter": "python", 439 | "pygments_lexer": "ipython3", 440 | "version": "3.7.4" 441 | } 442 | }, 443 | "nbformat": 4, 444 | "nbformat_minor": 2 445 | } 446 | -------------------------------------------------------------------------------- /examples/test_data.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NREL/alfabet/991aaa2b35245d2f2a648cf63e5304cc661eb395/examples/test_data.csv.gz -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440 4 | versionfile_source = alfabet/_version.py 5 | versionfile_build = alfabet/_version.py 6 | tag_prefix = '' 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from io import open 2 | from os import path 3 | 4 | from setuptools import find_packages, setup 5 | 6 | import versioneer 7 | 8 | here = path.abspath(path.dirname(__file__)) 9 | 10 | # Get the long description from the README file 11 | with open(path.join(here, "README.md"), encoding="utf-8") as f: 12 | long_description = f.read() 13 | 14 | # Arguments marked as "Required" below must be included for upload to PyPI. 15 | # Fields marked as "Optional" may be commented out. 16 | 17 | setup( 18 | name="alfabet", 19 | version=versioneer.get_version(), 20 | cmdclass=versioneer.get_cmdclass(), 21 | description="A library to estimate bond dissociation energies (BDEs) of organic molecules", 22 | include_package_data=True, 23 | long_description=long_description, 24 | long_description_content_type="text/markdown", 25 | url="https://github.com/NREL/alfabet", # Optional 26 | author="Peter St. John", 27 | author_email="peter.stjohn@nrel.gov", # Optional 28 | classifiers=[ 29 | "Development Status :: 3 - Alpha", 30 | # Indicate who your project is intended for 31 | "Intended Audience :: Developers", 32 | # Pick your license as you wish 33 | "License :: OSI Approved :: MIT License", 34 | # Specify the Python versions you support here. In particular, ensure 35 | # that you indicate whether you support Python 2, Python 3 or both. 36 | "Programming Language :: Python :: 3", 37 | "Programming Language :: Python :: 3.4", 38 | "Programming Language :: Python :: 3.5", 39 | "Programming Language :: Python :: 3.6", 40 | "Programming Language :: Python :: 3.7", 41 | ], 42 | packages=find_packages(exclude=["docs", "tests"]), # Required 43 | install_requires=[ 44 | "pandas", 45 | "nfp>=0.3.6", 46 | "tqdm", 47 | "pooch", 48 | "joblib", 49 | "scikit-learn==0.24.2", 50 | ], 51 | project_urls={ 52 | "Source": "https://github.com/NREL/alfabet", 53 | }, 54 | ) 55 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NREL/alfabet/991aaa2b35245d2f2a648cf63e5304cc661eb395/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from alfabet import model 3 | 4 | 5 | def test_predict(): 6 | results = model.predict(["CC", "NCCO", "CF", "B"]) 7 | 8 | assert not results[results.molecule == "B"].is_valid.any() 9 | assert results[results.molecule != "B"].is_valid.all() 10 | 11 | # Should be less than 1 kcal/mol on this easy set 12 | assert (results.bde_pred - results.bde).abs().mean() < 1.0 13 | 14 | np.testing.assert_allclose( 15 | results[results.molecule == "CC"].bde_pred, [90.7, 99.8], atol=1.0, rtol=0.05 16 | ) 17 | 18 | np.testing.assert_allclose( 19 | results[results.molecule == "NCCO"].bde_pred, 20 | [90.0, 82.1, 98.2, 99.3, 92.1, 92.5, 105.2], 21 | atol=1.0, 22 | rtol=0.05, 23 | ) 24 | 25 | 26 | def test_data_missing(): 27 | results = model.predict(["CCCCCOC"]) 28 | assert np.isfinite(results[results.bond_index == 17].bde_pred.iloc[0]) 29 | 30 | 31 | def test_duplicates(): 32 | results = model.predict(["c1ccccc1"], drop_duplicates=True) 33 | assert len(results) == 1 34 | 35 | results = model.predict(["c1ccccc1"], drop_duplicates=False) 36 | assert len(results) == 6 37 | 38 | 39 | def test_non_canonical_smiles(): 40 | smiles = "CC(=O)OCC1=C\CC/C(C)=C/CC[C@@]2(C)CC[C@@](C(C)C)(/C=C/1)O2" 41 | assert len(model.predict([smiles])) == 24 42 | -------------------------------------------------------------------------------- /tests/test_neighbors.py: -------------------------------------------------------------------------------- 1 | import rdkit.Chem 2 | 3 | from alfabet.neighbors import find_neighbor_bonds 4 | 5 | 6 | def test_find_neighbor_bonds(): 7 | neighbor_df = find_neighbor_bonds('CC', 0) 8 | assert neighbor_df.distance.min() < 1E-3 # bond should be in the database 9 | 10 | for _, row in neighbor_df.iterrows(): 11 | mol = rdkit.Chem.AddHs(rdkit.Chem.MolFromSmiles(row.molecule)) 12 | bond = mol.GetBondWithIdx(row.bond_index) 13 | assert bond.GetEndAtom().GetSymbol() == 'C' 14 | assert bond.GetBeginAtom().GetSymbol() == 'C' 15 | assert bond.GetBondType() == rdkit.Chem.rdchem.BondType.SINGLE -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.20 3 | 4 | """The Versioneer - like a rocketeer, but for versions. 5 | 6 | The Versioneer 7 | ============== 8 | 9 | * like a rocketeer, but for versions! 10 | * https://github.com/python-versioneer/python-versioneer 11 | * Brian Warner 12 | * License: Public Domain 13 | * Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3 14 | * [![Latest Version][pypi-image]][pypi-url] 15 | * [![Build Status][travis-image]][travis-url] 16 | 17 | This is a tool for managing a recorded version number in distutils-based 18 | python projects. The goal is to remove the tedious and error-prone "update 19 | the embedded version string" step from your release process. Making a new 20 | release should be as easy as recording a new tag in your version-control 21 | system, and maybe making new tarballs. 22 | 23 | 24 | ## Quick Install 25 | 26 | * `pip install versioneer` to somewhere in your $PATH 27 | * add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) 28 | * run `versioneer install` in your source tree, commit the results 29 | * Verify version information with `python setup.py version` 30 | 31 | ## Version Identifiers 32 | 33 | Source trees come from a variety of places: 34 | 35 | * a version-control system checkout (mostly used by developers) 36 | * a nightly tarball, produced by build automation 37 | * a snapshot tarball, produced by a web-based VCS browser, like github's 38 | "tarball from tag" feature 39 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 40 | 41 | Within each source tree, the version identifier (either a string or a number, 42 | this tool is format-agnostic) can come from a variety of places: 43 | 44 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 45 | about recent "tags" and an absolute revision-id 46 | * the name of the directory into which the tarball was unpacked 47 | * an expanded VCS keyword ($Id$, etc) 48 | * a `_version.py` created by some earlier build step 49 | 50 | For released software, the version identifier is closely related to a VCS 51 | tag. Some projects use tag names that include more than just the version 52 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 53 | needs to strip the tag prefix to extract the version identifier. For 54 | unreleased software (between tags), the version identifier should provide 55 | enough information to help developers recreate the same tree, while also 56 | giving them an idea of roughly how old the tree is (after version 1.2, before 57 | version 1.3). Many VCS systems can report a description that captures this, 58 | for example `git describe --tags --dirty --always` reports things like 59 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 60 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 61 | uncommitted changes). 62 | 63 | The version identifier is used for multiple purposes: 64 | 65 | * to allow the module to self-identify its version: `myproject.__version__` 66 | * to choose a name and prefix for a 'setup.py sdist' tarball 67 | 68 | ## Theory of Operation 69 | 70 | Versioneer works by adding a special `_version.py` file into your source 71 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 72 | dynamically ask the VCS tool for version information at import time. 73 | 74 | `_version.py` also contains `$Revision$` markers, and the installation 75 | process marks `_version.py` to have this marker rewritten with a tag name 76 | during the `git archive` command. As a result, generated tarballs will 77 | contain enough information to get the proper version. 78 | 79 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 80 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 81 | that configures it. This overrides several distutils/setuptools commands to 82 | compute the version when invoked, and changes `setup.py build` and `setup.py 83 | sdist` to replace `_version.py` with a small static file that contains just 84 | the generated version data. 85 | 86 | ## Installation 87 | 88 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. 89 | 90 | ## Version-String Flavors 91 | 92 | Code which uses Versioneer can learn about its version string at runtime by 93 | importing `_version` from your main `__init__.py` file and running the 94 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 95 | import the top-level `versioneer.py` and run `get_versions()`. 96 | 97 | Both functions return a dictionary with different flavors of version 98 | information: 99 | 100 | * `['version']`: A condensed version string, rendered using the selected 101 | style. This is the most commonly used value for the project's version 102 | string. The default "pep440" style yields strings like `0.11`, 103 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 104 | below for alternative styles. 105 | 106 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 107 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 108 | 109 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the 110 | commit date in ISO 8601 format. This will be None if the date is not 111 | available. 112 | 113 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 114 | this is only accurate if run in a VCS checkout, otherwise it is likely to 115 | be False or None 116 | 117 | * `['error']`: if the version string could not be computed, this will be set 118 | to a string describing the problem, otherwise it will be None. It may be 119 | useful to throw an exception in setup.py if this is set, to avoid e.g. 120 | creating tarballs with a version string of "unknown". 121 | 122 | Some variants are more useful than others. Including `full-revisionid` in a 123 | bug report should allow developers to reconstruct the exact code being tested 124 | (or indicate the presence of local changes that should be shared with the 125 | developers). `version` is suitable for display in an "about" box or a CLI 126 | `--version` output: it can be easily compared against release notes and lists 127 | of bugs fixed in various releases. 128 | 129 | The installer adds the following text to your `__init__.py` to place a basic 130 | version in `YOURPROJECT.__version__`: 131 | 132 | from ._version import get_versions 133 | __version__ = get_versions()['version'] 134 | del get_versions 135 | 136 | ## Styles 137 | 138 | The setup.cfg `style=` configuration controls how the VCS information is 139 | rendered into a version string. 140 | 141 | The default style, "pep440", produces a PEP440-compliant string, equal to the 142 | un-prefixed tag name for actual releases, and containing an additional "local 143 | version" section with more detail for in-between builds. For Git, this is 144 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 145 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 146 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 147 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 148 | software (exactly equal to a known tag), the identifier will only contain the 149 | stripped tag, e.g. "0.11". 150 | 151 | Other styles are available. See [details.md](details.md) in the Versioneer 152 | source tree for descriptions. 153 | 154 | ## Debugging 155 | 156 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 157 | to return a version of "0+unknown". To investigate the problem, run `setup.py 158 | version`, which will run the version-lookup code in a verbose mode, and will 159 | display the full contents of `get_versions()` (including the `error` string, 160 | which may help identify what went wrong). 161 | 162 | ## Known Limitations 163 | 164 | Some situations are known to cause problems for Versioneer. This details the 165 | most significant ones. More can be found on Github 166 | [issues page](https://github.com/python-versioneer/python-versioneer/issues). 167 | 168 | ### Subprojects 169 | 170 | Versioneer has limited support for source trees in which `setup.py` is not in 171 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are 172 | two common reasons why `setup.py` might not be in the root: 173 | 174 | * Source trees which contain multiple subprojects, such as 175 | [Buildbot](https://github.com/buildbot/buildbot), which contains both 176 | "master" and "slave" subprojects, each with their own `setup.py`, 177 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI 178 | distributions (and upload multiple independently-installable tarballs). 179 | * Source trees whose main purpose is to contain a C library, but which also 180 | provide bindings to Python (and perhaps other languages) in subdirectories. 181 | 182 | Versioneer will look for `.git` in parent directories, and most operations 183 | should get the right version string. However `pip` and `setuptools` have bugs 184 | and implementation details which frequently cause `pip install .` from a 185 | subproject directory to fail to find a correct version string (so it usually 186 | defaults to `0+unknown`). 187 | 188 | `pip install --editable .` should work correctly. `setup.py install` might 189 | work too. 190 | 191 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in 192 | some later version. 193 | 194 | [Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking 195 | this issue. The discussion in 196 | [PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the 197 | issue from the Versioneer side in more detail. 198 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and 199 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve 200 | pip to let Versioneer work correctly. 201 | 202 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the 203 | `setup.cfg`, so subprojects were completely unsupported with those releases. 204 | 205 | ### Editable installs with setuptools <= 18.5 206 | 207 | `setup.py develop` and `pip install --editable .` allow you to install a 208 | project into a virtualenv once, then continue editing the source code (and 209 | test) without re-installing after every change. 210 | 211 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a 212 | convenient way to specify executable scripts that should be installed along 213 | with the python package. 214 | 215 | These both work as expected when using modern setuptools. When using 216 | setuptools-18.5 or earlier, however, certain operations will cause 217 | `pkg_resources.DistributionNotFound` errors when running the entrypoint 218 | script, which must be resolved by re-installing the package. This happens 219 | when the install happens with one version, then the egg_info data is 220 | regenerated while a different version is checked out. Many setup.py commands 221 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into 222 | a different virtualenv), so this can be surprising. 223 | 224 | [Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes 225 | this one, but upgrading to a newer version of setuptools should probably 226 | resolve it. 227 | 228 | 229 | ## Updating Versioneer 230 | 231 | To upgrade your project to a new release of Versioneer, do the following: 232 | 233 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 234 | * edit `setup.cfg`, if necessary, to include any new configuration settings 235 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. 236 | * re-run `versioneer install` in your source tree, to replace 237 | `SRC/_version.py` 238 | * commit any changed files 239 | 240 | ## Future Directions 241 | 242 | This tool is designed to make it easily extended to other version-control 243 | systems: all VCS-specific components are in separate directories like 244 | src/git/ . The top-level `versioneer.py` script is assembled from these 245 | components by running make-versioneer.py . In the future, make-versioneer.py 246 | will take a VCS name as an argument, and will construct a version of 247 | `versioneer.py` that is specific to the given VCS. It might also take the 248 | configuration arguments that are currently provided manually during 249 | installation by editing setup.py . Alternatively, it might go the other 250 | direction and include code from all supported VCS systems, reducing the 251 | number of intermediate scripts. 252 | 253 | ## Similar projects 254 | 255 | * [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time 256 | dependency 257 | * [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of 258 | versioneer 259 | * [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools 260 | plugin 261 | 262 | ## License 263 | 264 | To make Versioneer easier to embed, all its code is dedicated to the public 265 | domain. The `_version.py` that it creates is also in the public domain. 266 | Specifically, both are released under the Creative Commons "Public Domain 267 | Dedication" license (CC0-1.0), as described in 268 | https://creativecommons.org/publicdomain/zero/1.0/ . 269 | 270 | [pypi-image]: https://img.shields.io/pypi/v/versioneer.svg 271 | [pypi-url]: https://pypi.python.org/pypi/versioneer/ 272 | [travis-image]: 273 | https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg 274 | [travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer 275 | 276 | """ 277 | 278 | import configparser 279 | import errno 280 | import json 281 | import os 282 | import re 283 | import subprocess 284 | import sys 285 | 286 | 287 | class VersioneerConfig: # pylint: disable=too-few-public-methods # noqa 288 | """Container for Versioneer configuration parameters.""" 289 | 290 | 291 | def get_root(): 292 | """Get the project root directory. 293 | 294 | We require that all commands are run from the project root, i.e. the 295 | directory that contains setup.py, setup.cfg, and versioneer.py . 296 | """ 297 | root = os.path.realpath(os.path.abspath(os.getcwd())) 298 | setup_py = os.path.join(root, "setup.py") 299 | versioneer_py = os.path.join(root, "versioneer.py") 300 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 301 | # allow 'python path/to/setup.py COMMAND' 302 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 303 | setup_py = os.path.join(root, "setup.py") 304 | versioneer_py = os.path.join(root, "versioneer.py") 305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 306 | err = ("Versioneer was unable to run the project root directory. " 307 | "Versioneer requires setup.py to be executed from " 308 | "its immediate directory (like 'python setup.py COMMAND'), " 309 | "or in a way that lets it use sys.argv[0] to find the root " 310 | "(like 'python path/to/setup.py COMMAND').") 311 | raise VersioneerBadRootError(err) 312 | try: 313 | # Certain runtime workflows (setup.py install/develop in a setuptools 314 | # tree) execute all dependencies in a single python process, so 315 | # "versioneer" may be imported multiple times, and python's shared 316 | # module-import table will cache the first one. So we can't use 317 | # os.path.dirname(__file__), as that will find whichever 318 | # versioneer.py was first imported, even in later projects. 319 | my_path = os.path.realpath(os.path.abspath(__file__)) 320 | me_dir = os.path.normcase(os.path.splitext(my_path)[0]) 321 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) 322 | if me_dir != vsr_dir: 323 | print("Warning: build in %s is using versioneer.py from %s" 324 | % (os.path.dirname(my_path), versioneer_py)) 325 | except NameError: 326 | pass 327 | return root 328 | 329 | 330 | def get_config_from_root(root): 331 | """Read the project setup.cfg file to determine Versioneer config.""" 332 | # This might raise EnvironmentError (if setup.cfg is missing), or 333 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 334 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 335 | # the top of versioneer.py for instructions on writing your setup.cfg . 336 | setup_cfg = os.path.join(root, "setup.cfg") 337 | parser = configparser.ConfigParser() 338 | with open(setup_cfg, "r") as cfg_file: 339 | parser.read_file(cfg_file) 340 | VCS = parser.get("versioneer", "VCS") # mandatory 341 | 342 | # Dict-like interface for non-mandatory entries 343 | section = parser["versioneer"] 344 | 345 | # pylint:disable=attribute-defined-outside-init # noqa 346 | cfg = VersioneerConfig() 347 | cfg.VCS = VCS 348 | cfg.style = section.get("style", "") 349 | cfg.versionfile_source = section.get("versionfile_source") 350 | cfg.versionfile_build = section.get("versionfile_build") 351 | cfg.tag_prefix = section.get("tag_prefix") 352 | if cfg.tag_prefix in ("''", '""'): 353 | cfg.tag_prefix = "" 354 | cfg.parentdir_prefix = section.get("parentdir_prefix") 355 | cfg.verbose = section.get("verbose") 356 | return cfg 357 | 358 | 359 | class NotThisMethod(Exception): 360 | """Exception raised if a method is not valid for the current scenario.""" 361 | 362 | 363 | # these dictionaries contain VCS-specific tools 364 | LONG_VERSION_PY = {} 365 | HANDLERS = {} 366 | 367 | 368 | def register_vcs_handler(vcs, method): # decorator 369 | """Create decorator to mark a method as the handler of a VCS.""" 370 | def decorate(f): 371 | """Store f in HANDLERS[vcs][method].""" 372 | HANDLERS.setdefault(vcs, {})[method] = f 373 | return f 374 | return decorate 375 | 376 | 377 | # pylint:disable=too-many-arguments,consider-using-with # noqa 378 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 379 | env=None): 380 | """Call the given command(s).""" 381 | assert isinstance(commands, list) 382 | process = None 383 | for command in commands: 384 | try: 385 | dispcmd = str([command] + args) 386 | # remember shell=False, so use git.cmd on windows, not just git 387 | process = subprocess.Popen([command] + args, cwd=cwd, env=env, 388 | stdout=subprocess.PIPE, 389 | stderr=(subprocess.PIPE if hide_stderr 390 | else None)) 391 | break 392 | except EnvironmentError: 393 | e = sys.exc_info()[1] 394 | if e.errno == errno.ENOENT: 395 | continue 396 | if verbose: 397 | print("unable to run %s" % dispcmd) 398 | print(e) 399 | return None, None 400 | else: 401 | if verbose: 402 | print("unable to find command, tried %s" % (commands,)) 403 | return None, None 404 | stdout = process.communicate()[0].strip().decode() 405 | if process.returncode != 0: 406 | if verbose: 407 | print("unable to run %s (error)" % dispcmd) 408 | print("stdout was %s" % stdout) 409 | return None, process.returncode 410 | return stdout, process.returncode 411 | 412 | 413 | LONG_VERSION_PY['git'] = r''' 414 | # This file helps to compute a version number in source trees obtained from 415 | # git-archive tarball (such as those provided by githubs download-from-tag 416 | # feature). Distribution tarballs (built by setup.py sdist) and build 417 | # directories (produced by setup.py build) will contain a much shorter file 418 | # that just contains the computed version number. 419 | 420 | # This file is released into the public domain. Generated by 421 | # versioneer-0.20 (https://github.com/python-versioneer/python-versioneer) 422 | 423 | """Git implementation of _version.py.""" 424 | 425 | import errno 426 | import os 427 | import re 428 | import subprocess 429 | import sys 430 | 431 | 432 | def get_keywords(): 433 | """Get the keywords needed to look up the version information.""" 434 | # these strings will be replaced by git during git-archive. 435 | # setup.py/versioneer.py will grep for the variable names, so they must 436 | # each be defined on a line of their own. _version.py will just call 437 | # get_keywords(). 438 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 439 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 440 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" 441 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 442 | return keywords 443 | 444 | 445 | class VersioneerConfig: # pylint: disable=too-few-public-methods 446 | """Container for Versioneer configuration parameters.""" 447 | 448 | 449 | def get_config(): 450 | """Create, populate and return the VersioneerConfig() object.""" 451 | # these strings are filled in when 'setup.py versioneer' creates 452 | # _version.py 453 | cfg = VersioneerConfig() 454 | cfg.VCS = "git" 455 | cfg.style = "%(STYLE)s" 456 | cfg.tag_prefix = "%(TAG_PREFIX)s" 457 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 458 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 459 | cfg.verbose = False 460 | return cfg 461 | 462 | 463 | class NotThisMethod(Exception): 464 | """Exception raised if a method is not valid for the current scenario.""" 465 | 466 | 467 | LONG_VERSION_PY = {} 468 | HANDLERS = {} 469 | 470 | 471 | def register_vcs_handler(vcs, method): # decorator 472 | """Create decorator to mark a method as the handler of a VCS.""" 473 | def decorate(f): 474 | """Store f in HANDLERS[vcs][method].""" 475 | if vcs not in HANDLERS: 476 | HANDLERS[vcs] = {} 477 | HANDLERS[vcs][method] = f 478 | return f 479 | return decorate 480 | 481 | 482 | # pylint:disable=too-many-arguments,consider-using-with # noqa 483 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 484 | env=None): 485 | """Call the given command(s).""" 486 | assert isinstance(commands, list) 487 | process = None 488 | for command in commands: 489 | try: 490 | dispcmd = str([command] + args) 491 | # remember shell=False, so use git.cmd on windows, not just git 492 | process = subprocess.Popen([command] + args, cwd=cwd, env=env, 493 | stdout=subprocess.PIPE, 494 | stderr=(subprocess.PIPE if hide_stderr 495 | else None)) 496 | break 497 | except EnvironmentError: 498 | e = sys.exc_info()[1] 499 | if e.errno == errno.ENOENT: 500 | continue 501 | if verbose: 502 | print("unable to run %%s" %% dispcmd) 503 | print(e) 504 | return None, None 505 | else: 506 | if verbose: 507 | print("unable to find command, tried %%s" %% (commands,)) 508 | return None, None 509 | stdout = process.communicate()[0].strip().decode() 510 | if process.returncode != 0: 511 | if verbose: 512 | print("unable to run %%s (error)" %% dispcmd) 513 | print("stdout was %%s" %% stdout) 514 | return None, process.returncode 515 | return stdout, process.returncode 516 | 517 | 518 | def versions_from_parentdir(parentdir_prefix, root, verbose): 519 | """Try to determine the version from the parent directory name. 520 | 521 | Source tarballs conventionally unpack into a directory that includes both 522 | the project name and a version string. We will also support searching up 523 | two directory levels for an appropriately named parent directory 524 | """ 525 | rootdirs = [] 526 | 527 | for _ in range(3): 528 | dirname = os.path.basename(root) 529 | if dirname.startswith(parentdir_prefix): 530 | return {"version": dirname[len(parentdir_prefix):], 531 | "full-revisionid": None, 532 | "dirty": False, "error": None, "date": None} 533 | rootdirs.append(root) 534 | root = os.path.dirname(root) # up a level 535 | 536 | if verbose: 537 | print("Tried directories %%s but none started with prefix %%s" %% 538 | (str(rootdirs), parentdir_prefix)) 539 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 540 | 541 | 542 | @register_vcs_handler("git", "get_keywords") 543 | def git_get_keywords(versionfile_abs): 544 | """Extract version information from the given file.""" 545 | # the code embedded in _version.py can just fetch the value of these 546 | # keywords. When used from setup.py, we don't want to import _version.py, 547 | # so we do it with a regexp instead. This function is not used from 548 | # _version.py. 549 | keywords = {} 550 | try: 551 | with open(versionfile_abs, "r") as fobj: 552 | for line in fobj: 553 | if line.strip().startswith("git_refnames ="): 554 | mo = re.search(r'=\s*"(.*)"', line) 555 | if mo: 556 | keywords["refnames"] = mo.group(1) 557 | if line.strip().startswith("git_full ="): 558 | mo = re.search(r'=\s*"(.*)"', line) 559 | if mo: 560 | keywords["full"] = mo.group(1) 561 | if line.strip().startswith("git_date ="): 562 | mo = re.search(r'=\s*"(.*)"', line) 563 | if mo: 564 | keywords["date"] = mo.group(1) 565 | except EnvironmentError: 566 | pass 567 | return keywords 568 | 569 | 570 | @register_vcs_handler("git", "keywords") 571 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 572 | """Get version information from git keywords.""" 573 | if "refnames" not in keywords: 574 | raise NotThisMethod("Short version file found") 575 | date = keywords.get("date") 576 | if date is not None: 577 | # Use only the last line. Previous lines may contain GPG signature 578 | # information. 579 | date = date.splitlines()[-1] 580 | 581 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant 582 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 583 | # -like" string, which we must then edit to make compliant), because 584 | # it's been around since git-1.5.3, and it's too difficult to 585 | # discover which version we're using, or to work around using an 586 | # older one. 587 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 588 | refnames = keywords["refnames"].strip() 589 | if refnames.startswith("$Format"): 590 | if verbose: 591 | print("keywords are unexpanded, not using") 592 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 593 | refs = {r.strip() for r in refnames.strip("()").split(",")} 594 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 595 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 596 | TAG = "tag: " 597 | tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} 598 | if not tags: 599 | # Either we're using git < 1.8.3, or there really are no tags. We use 600 | # a heuristic: assume all version tags have a digit. The old git %%d 601 | # expansion behaves like git log --decorate=short and strips out the 602 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 603 | # between branches and tags. By ignoring refnames without digits, we 604 | # filter out many common branch names like "release" and 605 | # "stabilization", as well as "HEAD" and "master". 606 | tags = {r for r in refs if re.search(r'\d', r)} 607 | if verbose: 608 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) 609 | if verbose: 610 | print("likely tags: %%s" %% ",".join(sorted(tags))) 611 | for ref in sorted(tags): 612 | # sorting will prefer e.g. "2.0" over "2.0rc1" 613 | if ref.startswith(tag_prefix): 614 | r = ref[len(tag_prefix):] 615 | # Filter out refs that exactly match prefix or that don't start 616 | # with a number once the prefix is stripped (mostly a concern 617 | # when prefix is '') 618 | if not re.match(r'\d', r): 619 | continue 620 | if verbose: 621 | print("picking %%s" %% r) 622 | return {"version": r, 623 | "full-revisionid": keywords["full"].strip(), 624 | "dirty": False, "error": None, 625 | "date": date} 626 | # no suitable tags, so version is "0+unknown", but full hex is still there 627 | if verbose: 628 | print("no suitable tags, using unknown + full revision id") 629 | return {"version": "0+unknown", 630 | "full-revisionid": keywords["full"].strip(), 631 | "dirty": False, "error": "no suitable tags", "date": None} 632 | 633 | 634 | @register_vcs_handler("git", "pieces_from_vcs") 635 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): 636 | """Get version from 'git describe' in the root of the source tree. 637 | 638 | This only gets called if the git-archive 'subst' keywords were *not* 639 | expanded, and _version.py hasn't already been rewritten with a short 640 | version string, meaning we're inside a checked out source tree. 641 | """ 642 | GITS = ["git"] 643 | if sys.platform == "win32": 644 | GITS = ["git.cmd", "git.exe"] 645 | 646 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, 647 | hide_stderr=True) 648 | if rc != 0: 649 | if verbose: 650 | print("Directory %%s not under git control" %% root) 651 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 652 | 653 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 654 | # if there isn't one, this yields HEX[-dirty] (no NUM) 655 | describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", 656 | "--always", "--long", 657 | "--match", "%%s*" %% tag_prefix], 658 | cwd=root) 659 | # --long was added in git-1.5.5 660 | if describe_out is None: 661 | raise NotThisMethod("'git describe' failed") 662 | describe_out = describe_out.strip() 663 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) 664 | if full_out is None: 665 | raise NotThisMethod("'git rev-parse' failed") 666 | full_out = full_out.strip() 667 | 668 | pieces = {} 669 | pieces["long"] = full_out 670 | pieces["short"] = full_out[:7] # maybe improved later 671 | pieces["error"] = None 672 | 673 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], 674 | cwd=root) 675 | # --abbrev-ref was added in git-1.6.3 676 | if rc != 0 or branch_name is None: 677 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") 678 | branch_name = branch_name.strip() 679 | 680 | if branch_name == "HEAD": 681 | # If we aren't exactly on a branch, pick a branch which represents 682 | # the current commit. If all else fails, we are on a branchless 683 | # commit. 684 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) 685 | # --contains was added in git-1.5.4 686 | if rc != 0 or branches is None: 687 | raise NotThisMethod("'git branch --contains' returned error") 688 | branches = branches.split("\n") 689 | 690 | # Remove the first line if we're running detached 691 | if "(" in branches[0]: 692 | branches.pop(0) 693 | 694 | # Strip off the leading "* " from the list of branches. 695 | branches = [branch[2:] for branch in branches] 696 | if "master" in branches: 697 | branch_name = "master" 698 | elif not branches: 699 | branch_name = None 700 | else: 701 | # Pick the first branch that is returned. Good or bad. 702 | branch_name = branches[0] 703 | 704 | pieces["branch"] = branch_name 705 | 706 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 707 | # TAG might have hyphens. 708 | git_describe = describe_out 709 | 710 | # look for -dirty suffix 711 | dirty = git_describe.endswith("-dirty") 712 | pieces["dirty"] = dirty 713 | if dirty: 714 | git_describe = git_describe[:git_describe.rindex("-dirty")] 715 | 716 | # now we have TAG-NUM-gHEX or HEX 717 | 718 | if "-" in git_describe: 719 | # TAG-NUM-gHEX 720 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 721 | if not mo: 722 | # unparseable. Maybe git-describe is misbehaving? 723 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 724 | %% describe_out) 725 | return pieces 726 | 727 | # tag 728 | full_tag = mo.group(1) 729 | if not full_tag.startswith(tag_prefix): 730 | if verbose: 731 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 732 | print(fmt %% (full_tag, tag_prefix)) 733 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 734 | %% (full_tag, tag_prefix)) 735 | return pieces 736 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 737 | 738 | # distance: number of commits since tag 739 | pieces["distance"] = int(mo.group(2)) 740 | 741 | # commit: short hex revision ID 742 | pieces["short"] = mo.group(3) 743 | 744 | else: 745 | # HEX: no tags 746 | pieces["closest-tag"] = None 747 | count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) 748 | pieces["distance"] = int(count_out) # total number of commits 749 | 750 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 751 | date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() 752 | # Use only the last line. Previous lines may contain GPG signature 753 | # information. 754 | date = date.splitlines()[-1] 755 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 756 | 757 | return pieces 758 | 759 | 760 | def plus_or_dot(pieces): 761 | """Return a + if we don't already have one, else return a .""" 762 | if "+" in pieces.get("closest-tag", ""): 763 | return "." 764 | return "+" 765 | 766 | 767 | def render_pep440(pieces): 768 | """Build up version string, with post-release "local version identifier". 769 | 770 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 771 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 772 | 773 | Exceptions: 774 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 775 | """ 776 | if pieces["closest-tag"]: 777 | rendered = pieces["closest-tag"] 778 | if pieces["distance"] or pieces["dirty"]: 779 | rendered += plus_or_dot(pieces) 780 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 781 | if pieces["dirty"]: 782 | rendered += ".dirty" 783 | else: 784 | # exception #1 785 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 786 | pieces["short"]) 787 | if pieces["dirty"]: 788 | rendered += ".dirty" 789 | return rendered 790 | 791 | 792 | def render_pep440_branch(pieces): 793 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . 794 | 795 | The ".dev0" means not master branch. Note that .dev0 sorts backwards 796 | (a feature branch will appear "older" than the master branch). 797 | 798 | Exceptions: 799 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] 800 | """ 801 | if pieces["closest-tag"]: 802 | rendered = pieces["closest-tag"] 803 | if pieces["distance"] or pieces["dirty"]: 804 | if pieces["branch"] != "master": 805 | rendered += ".dev0" 806 | rendered += plus_or_dot(pieces) 807 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 808 | if pieces["dirty"]: 809 | rendered += ".dirty" 810 | else: 811 | # exception #1 812 | rendered = "0" 813 | if pieces["branch"] != "master": 814 | rendered += ".dev0" 815 | rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], 816 | pieces["short"]) 817 | if pieces["dirty"]: 818 | rendered += ".dirty" 819 | return rendered 820 | 821 | 822 | def render_pep440_pre(pieces): 823 | """TAG[.post0.devDISTANCE] -- No -dirty. 824 | 825 | Exceptions: 826 | 1: no tags. 0.post0.devDISTANCE 827 | """ 828 | if pieces["closest-tag"]: 829 | rendered = pieces["closest-tag"] 830 | if pieces["distance"]: 831 | rendered += ".post0.dev%%d" %% pieces["distance"] 832 | else: 833 | # exception #1 834 | rendered = "0.post0.dev%%d" %% pieces["distance"] 835 | return rendered 836 | 837 | 838 | def render_pep440_post(pieces): 839 | """TAG[.postDISTANCE[.dev0]+gHEX] . 840 | 841 | The ".dev0" means dirty. Note that .dev0 sorts backwards 842 | (a dirty tree will appear "older" than the corresponding clean one), 843 | but you shouldn't be releasing software with -dirty anyways. 844 | 845 | Exceptions: 846 | 1: no tags. 0.postDISTANCE[.dev0] 847 | """ 848 | if pieces["closest-tag"]: 849 | rendered = pieces["closest-tag"] 850 | if pieces["distance"] or pieces["dirty"]: 851 | rendered += ".post%%d" %% pieces["distance"] 852 | if pieces["dirty"]: 853 | rendered += ".dev0" 854 | rendered += plus_or_dot(pieces) 855 | rendered += "g%%s" %% pieces["short"] 856 | else: 857 | # exception #1 858 | rendered = "0.post%%d" %% pieces["distance"] 859 | if pieces["dirty"]: 860 | rendered += ".dev0" 861 | rendered += "+g%%s" %% pieces["short"] 862 | return rendered 863 | 864 | 865 | def render_pep440_post_branch(pieces): 866 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . 867 | 868 | The ".dev0" means not master branch. 869 | 870 | Exceptions: 871 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] 872 | """ 873 | if pieces["closest-tag"]: 874 | rendered = pieces["closest-tag"] 875 | if pieces["distance"] or pieces["dirty"]: 876 | rendered += ".post%%d" %% pieces["distance"] 877 | if pieces["branch"] != "master": 878 | rendered += ".dev0" 879 | rendered += plus_or_dot(pieces) 880 | rendered += "g%%s" %% pieces["short"] 881 | if pieces["dirty"]: 882 | rendered += ".dirty" 883 | else: 884 | # exception #1 885 | rendered = "0.post%%d" %% pieces["distance"] 886 | if pieces["branch"] != "master": 887 | rendered += ".dev0" 888 | rendered += "+g%%s" %% pieces["short"] 889 | if pieces["dirty"]: 890 | rendered += ".dirty" 891 | return rendered 892 | 893 | 894 | def render_pep440_old(pieces): 895 | """TAG[.postDISTANCE[.dev0]] . 896 | 897 | The ".dev0" means dirty. 898 | 899 | Exceptions: 900 | 1: no tags. 0.postDISTANCE[.dev0] 901 | """ 902 | if pieces["closest-tag"]: 903 | rendered = pieces["closest-tag"] 904 | if pieces["distance"] or pieces["dirty"]: 905 | rendered += ".post%%d" %% pieces["distance"] 906 | if pieces["dirty"]: 907 | rendered += ".dev0" 908 | else: 909 | # exception #1 910 | rendered = "0.post%%d" %% pieces["distance"] 911 | if pieces["dirty"]: 912 | rendered += ".dev0" 913 | return rendered 914 | 915 | 916 | def render_git_describe(pieces): 917 | """TAG[-DISTANCE-gHEX][-dirty]. 918 | 919 | Like 'git describe --tags --dirty --always'. 920 | 921 | Exceptions: 922 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 923 | """ 924 | if pieces["closest-tag"]: 925 | rendered = pieces["closest-tag"] 926 | if pieces["distance"]: 927 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 928 | else: 929 | # exception #1 930 | rendered = pieces["short"] 931 | if pieces["dirty"]: 932 | rendered += "-dirty" 933 | return rendered 934 | 935 | 936 | def render_git_describe_long(pieces): 937 | """TAG-DISTANCE-gHEX[-dirty]. 938 | 939 | Like 'git describe --tags --dirty --always -long'. 940 | The distance/hash is unconditional. 941 | 942 | Exceptions: 943 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 944 | """ 945 | if pieces["closest-tag"]: 946 | rendered = pieces["closest-tag"] 947 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 948 | else: 949 | # exception #1 950 | rendered = pieces["short"] 951 | if pieces["dirty"]: 952 | rendered += "-dirty" 953 | return rendered 954 | 955 | 956 | def render(pieces, style): 957 | """Render the given version pieces into the requested style.""" 958 | if pieces["error"]: 959 | return {"version": "unknown", 960 | "full-revisionid": pieces.get("long"), 961 | "dirty": None, 962 | "error": pieces["error"], 963 | "date": None} 964 | 965 | if not style or style == "default": 966 | style = "pep440" # the default 967 | 968 | if style == "pep440": 969 | rendered = render_pep440(pieces) 970 | elif style == "pep440-branch": 971 | rendered = render_pep440_branch(pieces) 972 | elif style == "pep440-pre": 973 | rendered = render_pep440_pre(pieces) 974 | elif style == "pep440-post": 975 | rendered = render_pep440_post(pieces) 976 | elif style == "pep440-post-branch": 977 | rendered = render_pep440_post_branch(pieces) 978 | elif style == "pep440-old": 979 | rendered = render_pep440_old(pieces) 980 | elif style == "git-describe": 981 | rendered = render_git_describe(pieces) 982 | elif style == "git-describe-long": 983 | rendered = render_git_describe_long(pieces) 984 | else: 985 | raise ValueError("unknown style '%%s'" %% style) 986 | 987 | return {"version": rendered, "full-revisionid": pieces["long"], 988 | "dirty": pieces["dirty"], "error": None, 989 | "date": pieces.get("date")} 990 | 991 | 992 | def get_versions(): 993 | """Get version information or return default if unable to do so.""" 994 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 995 | # __file__, we can work backwards from there to the root. Some 996 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 997 | # case we can only use expanded keywords. 998 | 999 | cfg = get_config() 1000 | verbose = cfg.verbose 1001 | 1002 | try: 1003 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 1004 | verbose) 1005 | except NotThisMethod: 1006 | pass 1007 | 1008 | try: 1009 | root = os.path.realpath(__file__) 1010 | # versionfile_source is the relative path from the top of the source 1011 | # tree (where the .git directory might live) to this file. Invert 1012 | # this to find the root from __file__. 1013 | for _ in cfg.versionfile_source.split('/'): 1014 | root = os.path.dirname(root) 1015 | except NameError: 1016 | return {"version": "0+unknown", "full-revisionid": None, 1017 | "dirty": None, 1018 | "error": "unable to find root of source tree", 1019 | "date": None} 1020 | 1021 | try: 1022 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 1023 | return render(pieces, cfg.style) 1024 | except NotThisMethod: 1025 | pass 1026 | 1027 | try: 1028 | if cfg.parentdir_prefix: 1029 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1030 | except NotThisMethod: 1031 | pass 1032 | 1033 | return {"version": "0+unknown", "full-revisionid": None, 1034 | "dirty": None, 1035 | "error": "unable to compute version", "date": None} 1036 | ''' 1037 | 1038 | 1039 | @register_vcs_handler("git", "get_keywords") 1040 | def git_get_keywords(versionfile_abs): 1041 | """Extract version information from the given file.""" 1042 | # the code embedded in _version.py can just fetch the value of these 1043 | # keywords. When used from setup.py, we don't want to import _version.py, 1044 | # so we do it with a regexp instead. This function is not used from 1045 | # _version.py. 1046 | keywords = {} 1047 | try: 1048 | with open(versionfile_abs, "r") as fobj: 1049 | for line in fobj: 1050 | if line.strip().startswith("git_refnames ="): 1051 | mo = re.search(r'=\s*"(.*)"', line) 1052 | if mo: 1053 | keywords["refnames"] = mo.group(1) 1054 | if line.strip().startswith("git_full ="): 1055 | mo = re.search(r'=\s*"(.*)"', line) 1056 | if mo: 1057 | keywords["full"] = mo.group(1) 1058 | if line.strip().startswith("git_date ="): 1059 | mo = re.search(r'=\s*"(.*)"', line) 1060 | if mo: 1061 | keywords["date"] = mo.group(1) 1062 | except EnvironmentError: 1063 | pass 1064 | return keywords 1065 | 1066 | 1067 | @register_vcs_handler("git", "keywords") 1068 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 1069 | """Get version information from git keywords.""" 1070 | if "refnames" not in keywords: 1071 | raise NotThisMethod("Short version file found") 1072 | date = keywords.get("date") 1073 | if date is not None: 1074 | # Use only the last line. Previous lines may contain GPG signature 1075 | # information. 1076 | date = date.splitlines()[-1] 1077 | 1078 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 1079 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 1080 | # -like" string, which we must then edit to make compliant), because 1081 | # it's been around since git-1.5.3, and it's too difficult to 1082 | # discover which version we're using, or to work around using an 1083 | # older one. 1084 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1085 | refnames = keywords["refnames"].strip() 1086 | if refnames.startswith("$Format"): 1087 | if verbose: 1088 | print("keywords are unexpanded, not using") 1089 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 1090 | refs = {r.strip() for r in refnames.strip("()").split(",")} 1091 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 1092 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 1093 | TAG = "tag: " 1094 | tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} 1095 | if not tags: 1096 | # Either we're using git < 1.8.3, or there really are no tags. We use 1097 | # a heuristic: assume all version tags have a digit. The old git %d 1098 | # expansion behaves like git log --decorate=short and strips out the 1099 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 1100 | # between branches and tags. By ignoring refnames without digits, we 1101 | # filter out many common branch names like "release" and 1102 | # "stabilization", as well as "HEAD" and "master". 1103 | tags = {r for r in refs if re.search(r'\d', r)} 1104 | if verbose: 1105 | print("discarding '%s', no digits" % ",".join(refs - tags)) 1106 | if verbose: 1107 | print("likely tags: %s" % ",".join(sorted(tags))) 1108 | for ref in sorted(tags): 1109 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1110 | if ref.startswith(tag_prefix): 1111 | r = ref[len(tag_prefix):] 1112 | # Filter out refs that exactly match prefix or that don't start 1113 | # with a number once the prefix is stripped (mostly a concern 1114 | # when prefix is '') 1115 | if not re.match(r'\d', r): 1116 | continue 1117 | if verbose: 1118 | print("picking %s" % r) 1119 | return {"version": r, 1120 | "full-revisionid": keywords["full"].strip(), 1121 | "dirty": False, "error": None, 1122 | "date": date} 1123 | # no suitable tags, so version is "0+unknown", but full hex is still there 1124 | if verbose: 1125 | print("no suitable tags, using unknown + full revision id") 1126 | return {"version": "0+unknown", 1127 | "full-revisionid": keywords["full"].strip(), 1128 | "dirty": False, "error": "no suitable tags", "date": None} 1129 | 1130 | 1131 | @register_vcs_handler("git", "pieces_from_vcs") 1132 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): 1133 | """Get version from 'git describe' in the root of the source tree. 1134 | 1135 | This only gets called if the git-archive 'subst' keywords were *not* 1136 | expanded, and _version.py hasn't already been rewritten with a short 1137 | version string, meaning we're inside a checked out source tree. 1138 | """ 1139 | GITS = ["git"] 1140 | if sys.platform == "win32": 1141 | GITS = ["git.cmd", "git.exe"] 1142 | 1143 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, 1144 | hide_stderr=True) 1145 | if rc != 0: 1146 | if verbose: 1147 | print("Directory %s not under git control" % root) 1148 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 1149 | 1150 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1151 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1152 | describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", 1153 | "--always", "--long", 1154 | "--match", "%s*" % tag_prefix], 1155 | cwd=root) 1156 | # --long was added in git-1.5.5 1157 | if describe_out is None: 1158 | raise NotThisMethod("'git describe' failed") 1159 | describe_out = describe_out.strip() 1160 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) 1161 | if full_out is None: 1162 | raise NotThisMethod("'git rev-parse' failed") 1163 | full_out = full_out.strip() 1164 | 1165 | pieces = {} 1166 | pieces["long"] = full_out 1167 | pieces["short"] = full_out[:7] # maybe improved later 1168 | pieces["error"] = None 1169 | 1170 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], 1171 | cwd=root) 1172 | # --abbrev-ref was added in git-1.6.3 1173 | if rc != 0 or branch_name is None: 1174 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") 1175 | branch_name = branch_name.strip() 1176 | 1177 | if branch_name == "HEAD": 1178 | # If we aren't exactly on a branch, pick a branch which represents 1179 | # the current commit. If all else fails, we are on a branchless 1180 | # commit. 1181 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) 1182 | # --contains was added in git-1.5.4 1183 | if rc != 0 or branches is None: 1184 | raise NotThisMethod("'git branch --contains' returned error") 1185 | branches = branches.split("\n") 1186 | 1187 | # Remove the first line if we're running detached 1188 | if "(" in branches[0]: 1189 | branches.pop(0) 1190 | 1191 | # Strip off the leading "* " from the list of branches. 1192 | branches = [branch[2:] for branch in branches] 1193 | if "master" in branches: 1194 | branch_name = "master" 1195 | elif not branches: 1196 | branch_name = None 1197 | else: 1198 | # Pick the first branch that is returned. Good or bad. 1199 | branch_name = branches[0] 1200 | 1201 | pieces["branch"] = branch_name 1202 | 1203 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1204 | # TAG might have hyphens. 1205 | git_describe = describe_out 1206 | 1207 | # look for -dirty suffix 1208 | dirty = git_describe.endswith("-dirty") 1209 | pieces["dirty"] = dirty 1210 | if dirty: 1211 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1212 | 1213 | # now we have TAG-NUM-gHEX or HEX 1214 | 1215 | if "-" in git_describe: 1216 | # TAG-NUM-gHEX 1217 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1218 | if not mo: 1219 | # unparseable. Maybe git-describe is misbehaving? 1220 | pieces["error"] = ("unable to parse git-describe output: '%s'" 1221 | % describe_out) 1222 | return pieces 1223 | 1224 | # tag 1225 | full_tag = mo.group(1) 1226 | if not full_tag.startswith(tag_prefix): 1227 | if verbose: 1228 | fmt = "tag '%s' doesn't start with prefix '%s'" 1229 | print(fmt % (full_tag, tag_prefix)) 1230 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 1231 | % (full_tag, tag_prefix)) 1232 | return pieces 1233 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1234 | 1235 | # distance: number of commits since tag 1236 | pieces["distance"] = int(mo.group(2)) 1237 | 1238 | # commit: short hex revision ID 1239 | pieces["short"] = mo.group(3) 1240 | 1241 | else: 1242 | # HEX: no tags 1243 | pieces["closest-tag"] = None 1244 | count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) 1245 | pieces["distance"] = int(count_out) # total number of commits 1246 | 1247 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 1248 | date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() 1249 | # Use only the last line. Previous lines may contain GPG signature 1250 | # information. 1251 | date = date.splitlines()[-1] 1252 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1253 | 1254 | return pieces 1255 | 1256 | 1257 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1258 | """Git-specific installation logic for Versioneer. 1259 | 1260 | For Git, this means creating/changing .gitattributes to mark _version.py 1261 | for export-subst keyword substitution. 1262 | """ 1263 | GITS = ["git"] 1264 | if sys.platform == "win32": 1265 | GITS = ["git.cmd", "git.exe"] 1266 | files = [manifest_in, versionfile_source] 1267 | if ipy: 1268 | files.append(ipy) 1269 | try: 1270 | my_path = __file__ 1271 | if my_path.endswith(".pyc") or my_path.endswith(".pyo"): 1272 | my_path = os.path.splitext(my_path)[0] + ".py" 1273 | versioneer_file = os.path.relpath(my_path) 1274 | except NameError: 1275 | versioneer_file = "versioneer.py" 1276 | files.append(versioneer_file) 1277 | present = False 1278 | try: 1279 | with open(".gitattributes", "r") as fobj: 1280 | for line in fobj: 1281 | if line.strip().startswith(versionfile_source): 1282 | if "export-subst" in line.strip().split()[1:]: 1283 | present = True 1284 | break 1285 | except EnvironmentError: 1286 | pass 1287 | if not present: 1288 | with open(".gitattributes", "a+") as fobj: 1289 | fobj.write(f"{versionfile_source} export-subst\n") 1290 | files.append(".gitattributes") 1291 | run_command(GITS, ["add", "--"] + files) 1292 | 1293 | 1294 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1295 | """Try to determine the version from the parent directory name. 1296 | 1297 | Source tarballs conventionally unpack into a directory that includes both 1298 | the project name and a version string. We will also support searching up 1299 | two directory levels for an appropriately named parent directory 1300 | """ 1301 | rootdirs = [] 1302 | 1303 | for _ in range(3): 1304 | dirname = os.path.basename(root) 1305 | if dirname.startswith(parentdir_prefix): 1306 | return {"version": dirname[len(parentdir_prefix):], 1307 | "full-revisionid": None, 1308 | "dirty": False, "error": None, "date": None} 1309 | rootdirs.append(root) 1310 | root = os.path.dirname(root) # up a level 1311 | 1312 | if verbose: 1313 | print("Tried directories %s but none started with prefix %s" % 1314 | (str(rootdirs), parentdir_prefix)) 1315 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1316 | 1317 | 1318 | SHORT_VERSION_PY = """ 1319 | # This file was generated by 'versioneer.py' (0.20) from 1320 | # revision-control system data, or from the parent directory name of an 1321 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1322 | # of this file. 1323 | 1324 | import json 1325 | 1326 | version_json = ''' 1327 | %s 1328 | ''' # END VERSION_JSON 1329 | 1330 | 1331 | def get_versions(): 1332 | return json.loads(version_json) 1333 | """ 1334 | 1335 | 1336 | def versions_from_file(filename): 1337 | """Try to determine the version from _version.py if present.""" 1338 | try: 1339 | with open(filename) as f: 1340 | contents = f.read() 1341 | except EnvironmentError: 1342 | raise NotThisMethod("unable to read _version.py") 1343 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", 1344 | contents, re.M | re.S) 1345 | if not mo: 1346 | mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", 1347 | contents, re.M | re.S) 1348 | if not mo: 1349 | raise NotThisMethod("no version_json in _version.py") 1350 | return json.loads(mo.group(1)) 1351 | 1352 | 1353 | def write_to_version_file(filename, versions): 1354 | """Write the given version number to the given _version.py file.""" 1355 | os.unlink(filename) 1356 | contents = json.dumps(versions, sort_keys=True, 1357 | indent=1, separators=(",", ": ")) 1358 | with open(filename, "w") as f: 1359 | f.write(SHORT_VERSION_PY % contents) 1360 | 1361 | print("set %s to '%s'" % (filename, versions["version"])) 1362 | 1363 | 1364 | def plus_or_dot(pieces): 1365 | """Return a + if we don't already have one, else return a .""" 1366 | if "+" in pieces.get("closest-tag", ""): 1367 | return "." 1368 | return "+" 1369 | 1370 | 1371 | def render_pep440(pieces): 1372 | """Build up version string, with post-release "local version identifier". 1373 | 1374 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1375 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1376 | 1377 | Exceptions: 1378 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1379 | """ 1380 | if pieces["closest-tag"]: 1381 | rendered = pieces["closest-tag"] 1382 | if pieces["distance"] or pieces["dirty"]: 1383 | rendered += plus_or_dot(pieces) 1384 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1385 | if pieces["dirty"]: 1386 | rendered += ".dirty" 1387 | else: 1388 | # exception #1 1389 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 1390 | pieces["short"]) 1391 | if pieces["dirty"]: 1392 | rendered += ".dirty" 1393 | return rendered 1394 | 1395 | 1396 | def render_pep440_branch(pieces): 1397 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . 1398 | 1399 | The ".dev0" means not master branch. Note that .dev0 sorts backwards 1400 | (a feature branch will appear "older" than the master branch). 1401 | 1402 | Exceptions: 1403 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] 1404 | """ 1405 | if pieces["closest-tag"]: 1406 | rendered = pieces["closest-tag"] 1407 | if pieces["distance"] or pieces["dirty"]: 1408 | if pieces["branch"] != "master": 1409 | rendered += ".dev0" 1410 | rendered += plus_or_dot(pieces) 1411 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1412 | if pieces["dirty"]: 1413 | rendered += ".dirty" 1414 | else: 1415 | # exception #1 1416 | rendered = "0" 1417 | if pieces["branch"] != "master": 1418 | rendered += ".dev0" 1419 | rendered += "+untagged.%d.g%s" % (pieces["distance"], 1420 | pieces["short"]) 1421 | if pieces["dirty"]: 1422 | rendered += ".dirty" 1423 | return rendered 1424 | 1425 | 1426 | def render_pep440_pre(pieces): 1427 | """TAG[.post0.devDISTANCE] -- No -dirty. 1428 | 1429 | Exceptions: 1430 | 1: no tags. 0.post0.devDISTANCE 1431 | """ 1432 | if pieces["closest-tag"]: 1433 | rendered = pieces["closest-tag"] 1434 | if pieces["distance"]: 1435 | rendered += ".post0.dev%d" % pieces["distance"] 1436 | else: 1437 | # exception #1 1438 | rendered = "0.post0.dev%d" % pieces["distance"] 1439 | return rendered 1440 | 1441 | 1442 | def render_pep440_post(pieces): 1443 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1444 | 1445 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1446 | (a dirty tree will appear "older" than the corresponding clean one), 1447 | but you shouldn't be releasing software with -dirty anyways. 1448 | 1449 | Exceptions: 1450 | 1: no tags. 0.postDISTANCE[.dev0] 1451 | """ 1452 | if pieces["closest-tag"]: 1453 | rendered = pieces["closest-tag"] 1454 | if pieces["distance"] or pieces["dirty"]: 1455 | rendered += ".post%d" % pieces["distance"] 1456 | if pieces["dirty"]: 1457 | rendered += ".dev0" 1458 | rendered += plus_or_dot(pieces) 1459 | rendered += "g%s" % pieces["short"] 1460 | else: 1461 | # exception #1 1462 | rendered = "0.post%d" % pieces["distance"] 1463 | if pieces["dirty"]: 1464 | rendered += ".dev0" 1465 | rendered += "+g%s" % pieces["short"] 1466 | return rendered 1467 | 1468 | 1469 | def render_pep440_post_branch(pieces): 1470 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . 1471 | 1472 | The ".dev0" means not master branch. 1473 | 1474 | Exceptions: 1475 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] 1476 | """ 1477 | if pieces["closest-tag"]: 1478 | rendered = pieces["closest-tag"] 1479 | if pieces["distance"] or pieces["dirty"]: 1480 | rendered += ".post%d" % pieces["distance"] 1481 | if pieces["branch"] != "master": 1482 | rendered += ".dev0" 1483 | rendered += plus_or_dot(pieces) 1484 | rendered += "g%s" % pieces["short"] 1485 | if pieces["dirty"]: 1486 | rendered += ".dirty" 1487 | else: 1488 | # exception #1 1489 | rendered = "0.post%d" % pieces["distance"] 1490 | if pieces["branch"] != "master": 1491 | rendered += ".dev0" 1492 | rendered += "+g%s" % pieces["short"] 1493 | if pieces["dirty"]: 1494 | rendered += ".dirty" 1495 | return rendered 1496 | 1497 | 1498 | def render_pep440_old(pieces): 1499 | """TAG[.postDISTANCE[.dev0]] . 1500 | 1501 | The ".dev0" means dirty. 1502 | 1503 | Exceptions: 1504 | 1: no tags. 0.postDISTANCE[.dev0] 1505 | """ 1506 | if pieces["closest-tag"]: 1507 | rendered = pieces["closest-tag"] 1508 | if pieces["distance"] or pieces["dirty"]: 1509 | rendered += ".post%d" % pieces["distance"] 1510 | if pieces["dirty"]: 1511 | rendered += ".dev0" 1512 | else: 1513 | # exception #1 1514 | rendered = "0.post%d" % pieces["distance"] 1515 | if pieces["dirty"]: 1516 | rendered += ".dev0" 1517 | return rendered 1518 | 1519 | 1520 | def render_git_describe(pieces): 1521 | """TAG[-DISTANCE-gHEX][-dirty]. 1522 | 1523 | Like 'git describe --tags --dirty --always'. 1524 | 1525 | Exceptions: 1526 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1527 | """ 1528 | if pieces["closest-tag"]: 1529 | rendered = pieces["closest-tag"] 1530 | if pieces["distance"]: 1531 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1532 | else: 1533 | # exception #1 1534 | rendered = pieces["short"] 1535 | if pieces["dirty"]: 1536 | rendered += "-dirty" 1537 | return rendered 1538 | 1539 | 1540 | def render_git_describe_long(pieces): 1541 | """TAG-DISTANCE-gHEX[-dirty]. 1542 | 1543 | Like 'git describe --tags --dirty --always -long'. 1544 | The distance/hash is unconditional. 1545 | 1546 | Exceptions: 1547 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1548 | """ 1549 | if pieces["closest-tag"]: 1550 | rendered = pieces["closest-tag"] 1551 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1552 | else: 1553 | # exception #1 1554 | rendered = pieces["short"] 1555 | if pieces["dirty"]: 1556 | rendered += "-dirty" 1557 | return rendered 1558 | 1559 | 1560 | def render(pieces, style): 1561 | """Render the given version pieces into the requested style.""" 1562 | if pieces["error"]: 1563 | return {"version": "unknown", 1564 | "full-revisionid": pieces.get("long"), 1565 | "dirty": None, 1566 | "error": pieces["error"], 1567 | "date": None} 1568 | 1569 | if not style or style == "default": 1570 | style = "pep440" # the default 1571 | 1572 | if style == "pep440": 1573 | rendered = render_pep440(pieces) 1574 | elif style == "pep440-branch": 1575 | rendered = render_pep440_branch(pieces) 1576 | elif style == "pep440-pre": 1577 | rendered = render_pep440_pre(pieces) 1578 | elif style == "pep440-post": 1579 | rendered = render_pep440_post(pieces) 1580 | elif style == "pep440-post-branch": 1581 | rendered = render_pep440_post_branch(pieces) 1582 | elif style == "pep440-old": 1583 | rendered = render_pep440_old(pieces) 1584 | elif style == "git-describe": 1585 | rendered = render_git_describe(pieces) 1586 | elif style == "git-describe-long": 1587 | rendered = render_git_describe_long(pieces) 1588 | else: 1589 | raise ValueError("unknown style '%s'" % style) 1590 | 1591 | return {"version": rendered, "full-revisionid": pieces["long"], 1592 | "dirty": pieces["dirty"], "error": None, 1593 | "date": pieces.get("date")} 1594 | 1595 | 1596 | class VersioneerBadRootError(Exception): 1597 | """The project root directory is unknown or missing key files.""" 1598 | 1599 | 1600 | def get_versions(verbose=False): 1601 | """Get the project version from whatever source is available. 1602 | 1603 | Returns dict with two keys: 'version' and 'full'. 1604 | """ 1605 | if "versioneer" in sys.modules: 1606 | # see the discussion in cmdclass.py:get_cmdclass() 1607 | del sys.modules["versioneer"] 1608 | 1609 | root = get_root() 1610 | cfg = get_config_from_root(root) 1611 | 1612 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1613 | handlers = HANDLERS.get(cfg.VCS) 1614 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1615 | verbose = verbose or cfg.verbose 1616 | assert cfg.versionfile_source is not None, \ 1617 | "please set versioneer.versionfile_source" 1618 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1619 | 1620 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1621 | 1622 | # extract version from first of: _version.py, VCS command (e.g. 'git 1623 | # describe'), parentdir. This is meant to work for developers using a 1624 | # source checkout, for users of a tarball created by 'setup.py sdist', 1625 | # and for users of a tarball/zipball created by 'git archive' or github's 1626 | # download-from-tag feature or the equivalent in other VCSes. 1627 | 1628 | get_keywords_f = handlers.get("get_keywords") 1629 | from_keywords_f = handlers.get("keywords") 1630 | if get_keywords_f and from_keywords_f: 1631 | try: 1632 | keywords = get_keywords_f(versionfile_abs) 1633 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1634 | if verbose: 1635 | print("got version from expanded keyword %s" % ver) 1636 | return ver 1637 | except NotThisMethod: 1638 | pass 1639 | 1640 | try: 1641 | ver = versions_from_file(versionfile_abs) 1642 | if verbose: 1643 | print("got version from file %s %s" % (versionfile_abs, ver)) 1644 | return ver 1645 | except NotThisMethod: 1646 | pass 1647 | 1648 | from_vcs_f = handlers.get("pieces_from_vcs") 1649 | if from_vcs_f: 1650 | try: 1651 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1652 | ver = render(pieces, cfg.style) 1653 | if verbose: 1654 | print("got version from VCS %s" % ver) 1655 | return ver 1656 | except NotThisMethod: 1657 | pass 1658 | 1659 | try: 1660 | if cfg.parentdir_prefix: 1661 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1662 | if verbose: 1663 | print("got version from parentdir %s" % ver) 1664 | return ver 1665 | except NotThisMethod: 1666 | pass 1667 | 1668 | if verbose: 1669 | print("unable to compute version") 1670 | 1671 | return {"version": "0+unknown", "full-revisionid": None, 1672 | "dirty": None, "error": "unable to compute version", 1673 | "date": None} 1674 | 1675 | 1676 | def get_version(): 1677 | """Get the short version string for this project.""" 1678 | return get_versions()["version"] 1679 | 1680 | 1681 | def get_cmdclass(cmdclass=None): 1682 | """Get the custom setuptools/distutils subclasses used by Versioneer. 1683 | 1684 | If the package uses a different cmdclass (e.g. one from numpy), it 1685 | should be provide as an argument. 1686 | """ 1687 | if "versioneer" in sys.modules: 1688 | del sys.modules["versioneer"] 1689 | # this fixes the "python setup.py develop" case (also 'install' and 1690 | # 'easy_install .'), in which subdependencies of the main project are 1691 | # built (using setup.py bdist_egg) in the same python process. Assume 1692 | # a main project A and a dependency B, which use different versions 1693 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1694 | # sys.modules by the time B's setup.py is executed, causing B to run 1695 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1696 | # sandbox that restores sys.modules to it's pre-build state, so the 1697 | # parent is protected against the child's "import versioneer". By 1698 | # removing ourselves from sys.modules here, before the child build 1699 | # happens, we protect the child from the parent's versioneer too. 1700 | # Also see https://github.com/python-versioneer/python-versioneer/issues/52 1701 | 1702 | cmds = {} if cmdclass is None else cmdclass.copy() 1703 | 1704 | # we add "version" to both distutils and setuptools 1705 | from distutils.core import Command 1706 | 1707 | class cmd_version(Command): 1708 | description = "report generated version string" 1709 | user_options = [] 1710 | boolean_options = [] 1711 | 1712 | def initialize_options(self): 1713 | pass 1714 | 1715 | def finalize_options(self): 1716 | pass 1717 | 1718 | def run(self): 1719 | vers = get_versions(verbose=True) 1720 | print("Version: %s" % vers["version"]) 1721 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1722 | print(" dirty: %s" % vers.get("dirty")) 1723 | print(" date: %s" % vers.get("date")) 1724 | if vers["error"]: 1725 | print(" error: %s" % vers["error"]) 1726 | cmds["version"] = cmd_version 1727 | 1728 | # we override "build_py" in both distutils and setuptools 1729 | # 1730 | # most invocation pathways end up running build_py: 1731 | # distutils/build -> build_py 1732 | # distutils/install -> distutils/build ->.. 1733 | # setuptools/bdist_wheel -> distutils/install ->.. 1734 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1735 | # setuptools/install -> bdist_egg ->.. 1736 | # setuptools/develop -> ? 1737 | # pip install: 1738 | # copies source tree to a tempdir before running egg_info/etc 1739 | # if .git isn't copied too, 'git describe' will fail 1740 | # then does setup.py bdist_wheel, or sometimes setup.py install 1741 | # setup.py egg_info -> ? 1742 | 1743 | # we override different "build_py" commands for both environments 1744 | if 'build_py' in cmds: 1745 | _build_py = cmds['build_py'] 1746 | elif "setuptools" in sys.modules: 1747 | from setuptools.command.build_py import build_py as _build_py 1748 | else: 1749 | from distutils.command.build_py import build_py as _build_py 1750 | 1751 | class cmd_build_py(_build_py): 1752 | def run(self): 1753 | root = get_root() 1754 | cfg = get_config_from_root(root) 1755 | versions = get_versions() 1756 | _build_py.run(self) 1757 | # now locate _version.py in the new build/ directory and replace 1758 | # it with an updated value 1759 | if cfg.versionfile_build: 1760 | target_versionfile = os.path.join(self.build_lib, 1761 | cfg.versionfile_build) 1762 | print("UPDATING %s" % target_versionfile) 1763 | write_to_version_file(target_versionfile, versions) 1764 | cmds["build_py"] = cmd_build_py 1765 | 1766 | if 'build_ext' in cmds: 1767 | _build_ext = cmds['build_ext'] 1768 | elif "setuptools" in sys.modules: 1769 | from setuptools.command.build_ext import build_ext as _build_ext 1770 | else: 1771 | from distutils.command.build_ext import build_ext as _build_ext 1772 | 1773 | class cmd_build_ext(_build_ext): 1774 | def run(self): 1775 | root = get_root() 1776 | cfg = get_config_from_root(root) 1777 | versions = get_versions() 1778 | _build_ext.run(self) 1779 | if self.inplace: 1780 | # build_ext --inplace will only build extensions in 1781 | # build/lib<..> dir with no _version.py to write to. 1782 | # As in place builds will already have a _version.py 1783 | # in the module dir, we do not need to write one. 1784 | return 1785 | # now locate _version.py in the new build/ directory and replace 1786 | # it with an updated value 1787 | target_versionfile = os.path.join(self.build_lib, 1788 | cfg.versionfile_build) 1789 | print("UPDATING %s" % target_versionfile) 1790 | write_to_version_file(target_versionfile, versions) 1791 | cmds["build_ext"] = cmd_build_ext 1792 | 1793 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1794 | from cx_Freeze.dist import build_exe as _build_exe 1795 | # nczeczulin reports that py2exe won't like the pep440-style string 1796 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. 1797 | # setup(console=[{ 1798 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION 1799 | # "product_version": versioneer.get_version(), 1800 | # ... 1801 | 1802 | class cmd_build_exe(_build_exe): 1803 | def run(self): 1804 | root = get_root() 1805 | cfg = get_config_from_root(root) 1806 | versions = get_versions() 1807 | target_versionfile = cfg.versionfile_source 1808 | print("UPDATING %s" % target_versionfile) 1809 | write_to_version_file(target_versionfile, versions) 1810 | 1811 | _build_exe.run(self) 1812 | os.unlink(target_versionfile) 1813 | with open(cfg.versionfile_source, "w") as f: 1814 | LONG = LONG_VERSION_PY[cfg.VCS] 1815 | f.write(LONG % 1816 | {"DOLLAR": "$", 1817 | "STYLE": cfg.style, 1818 | "TAG_PREFIX": cfg.tag_prefix, 1819 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1820 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1821 | }) 1822 | cmds["build_exe"] = cmd_build_exe 1823 | del cmds["build_py"] 1824 | 1825 | if 'py2exe' in sys.modules: # py2exe enabled? 1826 | from py2exe.distutils_buildexe import py2exe as _py2exe 1827 | 1828 | class cmd_py2exe(_py2exe): 1829 | def run(self): 1830 | root = get_root() 1831 | cfg = get_config_from_root(root) 1832 | versions = get_versions() 1833 | target_versionfile = cfg.versionfile_source 1834 | print("UPDATING %s" % target_versionfile) 1835 | write_to_version_file(target_versionfile, versions) 1836 | 1837 | _py2exe.run(self) 1838 | os.unlink(target_versionfile) 1839 | with open(cfg.versionfile_source, "w") as f: 1840 | LONG = LONG_VERSION_PY[cfg.VCS] 1841 | f.write(LONG % 1842 | {"DOLLAR": "$", 1843 | "STYLE": cfg.style, 1844 | "TAG_PREFIX": cfg.tag_prefix, 1845 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1846 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1847 | }) 1848 | cmds["py2exe"] = cmd_py2exe 1849 | 1850 | # we override different "sdist" commands for both environments 1851 | if 'sdist' in cmds: 1852 | _sdist = cmds['sdist'] 1853 | elif "setuptools" in sys.modules: 1854 | from setuptools.command.sdist import sdist as _sdist 1855 | else: 1856 | from distutils.command.sdist import sdist as _sdist 1857 | 1858 | class cmd_sdist(_sdist): 1859 | def run(self): 1860 | versions = get_versions() 1861 | # pylint:disable=attribute-defined-outside-init # noqa 1862 | self._versioneer_generated_versions = versions 1863 | # unless we update this, the command will keep using the old 1864 | # version 1865 | self.distribution.metadata.version = versions["version"] 1866 | return _sdist.run(self) 1867 | 1868 | def make_release_tree(self, base_dir, files): 1869 | root = get_root() 1870 | cfg = get_config_from_root(root) 1871 | _sdist.make_release_tree(self, base_dir, files) 1872 | # now locate _version.py in the new base_dir directory 1873 | # (remembering that it may be a hardlink) and replace it with an 1874 | # updated value 1875 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1876 | print("UPDATING %s" % target_versionfile) 1877 | write_to_version_file(target_versionfile, 1878 | self._versioneer_generated_versions) 1879 | cmds["sdist"] = cmd_sdist 1880 | 1881 | return cmds 1882 | 1883 | 1884 | CONFIG_ERROR = """ 1885 | setup.cfg is missing the necessary Versioneer configuration. You need 1886 | a section like: 1887 | 1888 | [versioneer] 1889 | VCS = git 1890 | style = pep440 1891 | versionfile_source = src/myproject/_version.py 1892 | versionfile_build = myproject/_version.py 1893 | tag_prefix = 1894 | parentdir_prefix = myproject- 1895 | 1896 | You will also need to edit your setup.py to use the results: 1897 | 1898 | import versioneer 1899 | setup(version=versioneer.get_version(), 1900 | cmdclass=versioneer.get_cmdclass(), ...) 1901 | 1902 | Please read the docstring in ./versioneer.py for configuration instructions, 1903 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1904 | """ 1905 | 1906 | SAMPLE_CONFIG = """ 1907 | # See the docstring in versioneer.py for instructions. Note that you must 1908 | # re-run 'versioneer.py setup' after changing this section, and commit the 1909 | # resulting files. 1910 | 1911 | [versioneer] 1912 | #VCS = git 1913 | #style = pep440 1914 | #versionfile_source = 1915 | #versionfile_build = 1916 | #tag_prefix = 1917 | #parentdir_prefix = 1918 | 1919 | """ 1920 | 1921 | OLD_SNIPPET = """ 1922 | from ._version import get_versions 1923 | __version__ = get_versions()['version'] 1924 | del get_versions 1925 | """ 1926 | 1927 | INIT_PY_SNIPPET = """ 1928 | from . import {0} 1929 | __version__ = {0}.get_versions()['version'] 1930 | """ 1931 | 1932 | 1933 | def do_setup(): 1934 | """Do main VCS-independent setup function for installing Versioneer.""" 1935 | root = get_root() 1936 | try: 1937 | cfg = get_config_from_root(root) 1938 | except (EnvironmentError, configparser.NoSectionError, 1939 | configparser.NoOptionError) as e: 1940 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1941 | print("Adding sample versioneer config to setup.cfg", 1942 | file=sys.stderr) 1943 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1944 | f.write(SAMPLE_CONFIG) 1945 | print(CONFIG_ERROR, file=sys.stderr) 1946 | return 1 1947 | 1948 | print(" creating %s" % cfg.versionfile_source) 1949 | with open(cfg.versionfile_source, "w") as f: 1950 | LONG = LONG_VERSION_PY[cfg.VCS] 1951 | f.write(LONG % {"DOLLAR": "$", 1952 | "STYLE": cfg.style, 1953 | "TAG_PREFIX": cfg.tag_prefix, 1954 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1955 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1956 | }) 1957 | 1958 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), 1959 | "__init__.py") 1960 | if os.path.exists(ipy): 1961 | try: 1962 | with open(ipy, "r") as f: 1963 | old = f.read() 1964 | except EnvironmentError: 1965 | old = "" 1966 | module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] 1967 | snippet = INIT_PY_SNIPPET.format(module) 1968 | if OLD_SNIPPET in old: 1969 | print(" replacing boilerplate in %s" % ipy) 1970 | with open(ipy, "w") as f: 1971 | f.write(old.replace(OLD_SNIPPET, snippet)) 1972 | elif snippet not in old: 1973 | print(" appending to %s" % ipy) 1974 | with open(ipy, "a") as f: 1975 | f.write(snippet) 1976 | else: 1977 | print(" %s unmodified" % ipy) 1978 | else: 1979 | print(" %s doesn't exist, ok" % ipy) 1980 | ipy = None 1981 | 1982 | # Make sure both the top-level "versioneer.py" and versionfile_source 1983 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1984 | # they'll be copied into source distributions. Pip won't be able to 1985 | # install the package without this. 1986 | manifest_in = os.path.join(root, "MANIFEST.in") 1987 | simple_includes = set() 1988 | try: 1989 | with open(manifest_in, "r") as f: 1990 | for line in f: 1991 | if line.startswith("include "): 1992 | for include in line.split()[1:]: 1993 | simple_includes.add(include) 1994 | except EnvironmentError: 1995 | pass 1996 | # That doesn't cover everything MANIFEST.in can do 1997 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1998 | # it might give some false negatives. Appending redundant 'include' 1999 | # lines is safe, though. 2000 | if "versioneer.py" not in simple_includes: 2001 | print(" appending 'versioneer.py' to MANIFEST.in") 2002 | with open(manifest_in, "a") as f: 2003 | f.write("include versioneer.py\n") 2004 | else: 2005 | print(" 'versioneer.py' already in MANIFEST.in") 2006 | if cfg.versionfile_source not in simple_includes: 2007 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 2008 | cfg.versionfile_source) 2009 | with open(manifest_in, "a") as f: 2010 | f.write("include %s\n" % cfg.versionfile_source) 2011 | else: 2012 | print(" versionfile_source already in MANIFEST.in") 2013 | 2014 | # Make VCS-specific changes. For git, this means creating/changing 2015 | # .gitattributes to mark _version.py for export-subst keyword 2016 | # substitution. 2017 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 2018 | return 0 2019 | 2020 | 2021 | def scan_setup_py(): 2022 | """Validate the contents of setup.py against Versioneer's expectations.""" 2023 | found = set() 2024 | setters = False 2025 | errors = 0 2026 | with open("setup.py", "r") as f: 2027 | for line in f.readlines(): 2028 | if "import versioneer" in line: 2029 | found.add("import") 2030 | if "versioneer.get_cmdclass()" in line: 2031 | found.add("cmdclass") 2032 | if "versioneer.get_version()" in line: 2033 | found.add("get_version") 2034 | if "versioneer.VCS" in line: 2035 | setters = True 2036 | if "versioneer.versionfile_source" in line: 2037 | setters = True 2038 | if len(found) != 3: 2039 | print("") 2040 | print("Your setup.py appears to be missing some important items") 2041 | print("(but I might be wrong). Please make sure it has something") 2042 | print("roughly like the following:") 2043 | print("") 2044 | print(" import versioneer") 2045 | print(" setup( version=versioneer.get_version(),") 2046 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 2047 | print("") 2048 | errors += 1 2049 | if setters: 2050 | print("You should remove lines like 'versioneer.VCS = ' and") 2051 | print("'versioneer.versionfile_source = ' . This configuration") 2052 | print("now lives in setup.cfg, and should be removed from setup.py") 2053 | print("") 2054 | errors += 1 2055 | return errors 2056 | 2057 | 2058 | if __name__ == "__main__": 2059 | cmd = sys.argv[1] 2060 | if cmd == "setup": 2061 | errors = do_setup() 2062 | errors += scan_setup_py() 2063 | if errors: 2064 | sys.exit(1) 2065 | --------------------------------------------------------------------------------