├── .codecov.yml ├── .coveragerc ├── .flake8 ├── .gitattributes ├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── borf-runner.py ├── borf ├── __init__.py ├── __main__.py ├── _version.py ├── borf.py ├── get_orfs.py └── tests │ ├── __init__.py │ ├── test_borf.py │ ├── test_frames.fa │ ├── test_getorfs.fa │ ├── test_mutliple_frame_orfs.fa │ └── test_trans_all_frames.fa ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ └── .placeholder │ ├── conf.py │ ├── index.rst │ ├── installation.rst │ ├── min_versions.rst │ ├── release-history.rst │ └── usage.rst ├── github_deploy_key_betsig_borf.enc ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── test_data ├── test_frames.fa ├── test_getorfs.fa ├── test_mutliple_frame_orfs.fa ├── test_stopsitent.fa └── test_trans_all_frames.fa └── versioneer.py /.codecov.yml: -------------------------------------------------------------------------------- 1 | # show coverage in CI status, not as a comment. 2 | comment: off 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | patch: 9 | default: 10 | target: auto 11 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = 3 | borf 4 | [report] 5 | omit = 6 | */python?.?/* 7 | */site-packages/nose/* 8 | # ignore _version.py and versioneer.py 9 | .*version.* 10 | *_version.py 11 | 12 | exclude_lines = 13 | if __name__ == '__main__': 14 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git,__pycache__,build,dist,versioneer.py,borf/_version.py,docs/source/conf.py 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | borf/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | venv/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | 56 | # Sphinx documentation 57 | docs/build/ 58 | docs/source/generated/ 59 | 60 | # pytest 61 | .pytest_cache/ 62 | 63 | # PyBuilder 64 | target/ 65 | 66 | # Editor files 67 | #mac 68 | .DS_Store 69 | *~ 70 | 71 | #vim 72 | *.swp 73 | *.swo 74 | 75 | #pycharm 76 | .idea/* 77 | 78 | 79 | #Ipython Notebook 80 | .ipynb_checkpoints 81 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.7 4 | cache: 5 | directories: 6 | - $HOME/.cache/pip 7 | - $HOME/.ccache # https://github.com/travis-ci/travis-ci/issues/5853 8 | 9 | env: 10 | global: 11 | # Doctr deploy key for betsig/borf 12 | - secure: "rp1A///9UiizP66RSQyOUE2aR5RIrJYhs6gcpqR4TLLVWFCAC149R9tLcRjDRJ8alWkEoobgfBnTxYRfXP+LvnruxL3SYkfwTMm46hO2IouIsnLU9QQoxZGcjSHLTPnu+kSybobFPmTvdZ84YIV8awyiUg4mhqvNW+lfNC6nONisS/tSkR7jWapyKH7X+HlpYIBKADpUaE8D383uCL6bhLPkxgrmqA989BvFwxRofvVw4j4a2KXcC+vQjIYxVkSO5q27vkKI6yZNaAxlxUu2yVjSQ84MZ925Rw6QmqhSnfwxn9a22FlNl7JyWmeBjdQRv3WIjdqGGPORa5NvsdalT76GjFtTB/G9eyB2MH8oAcyBQkj421lG6ktcp4ldhSXguKc45QyfGNqc9XLbTafbs28SHbM86kIbRq1y3R2jDnYL6bh/AA0k9zjsMQ8J7taKv+EYV3Ny5wPsbubH2cLM5ioQykcRdIMDpkcpYwYC4pLErcGYb7/AFX3zRJxBxf2y4Vg0+/88ftCVrSnpmJsqrkHgu9We1Y4zmMKID6IENGyUlc5p8tbuNjSv3TRUJs8g6sNym1rqPCciDILkJUuqZ2NQ57qPCXejTPmVhtsEHKCCc7HVKibnMVU5Xj/gPJiiQTT3OGQe8YZ42Bi482f8Lb9+PknG7ivNeUMz7K9rHKE=" 13 | 14 | 15 | install: 16 | # Install this package and the packages listed in requirements.txt. 17 | - pip install . 18 | # Install extra requirements for running tests and building docs. 19 | - pip install -r requirements-dev.txt 20 | 21 | script: 22 | - coverage run -m pytest # Run the tests and check for test coverage. 23 | - coverage report -m # Generate test coverage report. 24 | - codecov # Upload the report to codecov. 25 | - flake8 --max-line-length=115 # Enforce code style (but relax line length limit a bit). 26 | - set -e 27 | - make -C docs html 28 | - pip install doctr 29 | - doctr deploy --built-docs docs/build/html . -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Maintainer 6 | ---------- 7 | 8 | * Beth Signal 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? See: CONTRIBUTING.rst 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Contributions are welcome, and they are greatly appreciated! Every 6 | little bit helps, and credit will always be given. 7 | 8 | You can contribute in many ways: 9 | 10 | Types of Contributions 11 | ---------------------- 12 | 13 | Report Bugs 14 | ~~~~~~~~~~~ 15 | 16 | Report bugs at https://github.com/betsig/borf/issues. 17 | 18 | If you are reporting a bug, please include: 19 | 20 | * Any details about your local setup that might be helpful in troubleshooting. 21 | * Detailed steps to reproduce the bug. 22 | 23 | Fix Bugs 24 | ~~~~~~~~ 25 | 26 | Look through the GitHub issues for bugs. Anything tagged with "bug" 27 | is open to whoever wants to implement it. 28 | 29 | Implement Features 30 | ~~~~~~~~~~~~~~~~~~ 31 | 32 | Look through the GitHub issues for features. Anything tagged with "feature" 33 | is open to whoever wants to implement it. 34 | 35 | Write Documentation 36 | ~~~~~~~~~~~~~~~~~~~ 37 | 38 | borf could always use more documentation, whether 39 | as part of the official borf docs, in docstrings, 40 | or even on the web in blog posts, articles, and such. 41 | 42 | Submit Feedback 43 | ~~~~~~~~~~~~~~~ 44 | 45 | The best way to send feedback is to file an issue at https://github.com/betsig/borf/issues. 46 | 47 | If you are proposing a feature: 48 | 49 | * Explain in detail how it would work. 50 | * Keep the scope as narrow as possible, to make it easier to implement. 51 | * Remember that this is a volunteer-driven project, and that contributions 52 | are welcome :) 53 | 54 | Get Started! 55 | ------------ 56 | 57 | Ready to contribute? Here's how to set up `borf` for local development. 58 | 59 | 1. Fork the `borf` repo on GitHub. 60 | 2. Clone your fork locally:: 61 | 62 | $ git clone git@github.com:your_name_here/borf.git 63 | 64 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 65 | 66 | $ mkvirtualenv borf 67 | $ cd borf/ 68 | $ python setup.py develop 69 | 70 | 4. Create a branch for local development:: 71 | 72 | $ git checkout -b name-of-your-bugfix-or-feature 73 | 74 | Now you can make your changes locally. 75 | 76 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: 77 | 78 | $ flake8 borf tests 79 | $ python setup.py test 80 | $ tox 81 | 82 | To get flake8 and tox, just pip install them into your virtualenv. 83 | 84 | 6. Commit your changes and push your branch to GitHub:: 85 | 86 | $ git add . 87 | $ git commit -m "Your detailed description of your changes." 88 | $ git push origin name-of-your-bugfix-or-feature 89 | 90 | 7. Submit a pull request through the GitHub website. 91 | 92 | Pull Request Guidelines 93 | ----------------------- 94 | 95 | Before you submit a pull request, check that it meets these guidelines: 96 | 97 | 1. The pull request should include tests. 98 | 2. If the pull request adds functionality, the docs should be updated. Put 99 | your new functionality into a function with a docstring, and add the 100 | feature to the list in README.rst. 101 | 3. The pull request should work for Python 2.7, 3.3, 3.4, 3.5 and for PyPy. Check 102 | https://travis-ci.org/betsig/borf/pull_requests 103 | and make sure that the tests pass for all supported Python versions. 104 | 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Beth Signal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.rst 2 | include CONTRIBUTING.rst 3 | include LICENSE 4 | include README.rst 5 | include requirements.txt 6 | 7 | recursive-exclude * __pycache__ 8 | recursive-exclude * *.py[co] 9 | 10 | recursive-include docs *.rst conf.py Makefile make.bat 11 | 12 | include versioneer.py 13 | include borf/_version.py 14 | 15 | # If including data files in the package, add them like: 16 | # include path/to/data_file 17 | include test_data/*.fa -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | borf 3 | =============================== 4 | 5 | .. image:: https://img.shields.io/pypi/v/borf.svg 6 | :target: https://pypi.python.org/pypi/borf 7 | 8 | 9 | **Better ORF predictions** 10 | 11 | Fast and flexible ORF prediction in python with appropriate defaults for *de novo* assembled transcripts. 12 | 13 | Installation 14 | ------------ 15 | .. code-block:: console 16 | 17 | pip install borf 18 | 19 | 20 | Usage 21 | ------------ 22 | For basic usage, run borf with an input fasta formatted file as the first argument. 23 | 24 | .. code-block:: console 25 | 26 | borf input.fa 27 | 28 | 29 | For a more detailed guide on usage, please see the wiki https://github.com/betsig/borf/wiki 30 | -------------------------------------------------------------------------------- /borf-runner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | '''Convenience wrapper for running borf directly from source tree''' 4 | 5 | from borf.borf import main 6 | 7 | if __name__ == '__main__': 8 | main() 9 | -------------------------------------------------------------------------------- /borf/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from ._version import get_versions 3 | __version__ = get_versions()['version'] 4 | del get_versions 5 | -------------------------------------------------------------------------------- /borf/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | """borf.__main__: executed when borf directory is called as script.""" 5 | 6 | 7 | from .borf import main 8 | main() 9 | -------------------------------------------------------------------------------- /borf/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "52bca757f95027388c5f8cdb8de80d88d5974b27" 28 | git_date = "2021-08-24 11:48:26 +1000" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440-post" 44 | cfg.tag_prefix = "v" 45 | cfg.parentdir_prefix = "None" 46 | cfg.versionfile_source = "borf/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | return decorate 68 | 69 | 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 71 | env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 80 | stdout=subprocess.PIPE, 81 | stderr=(subprocess.PIPE if hide_stderr 82 | else None)) 83 | break 84 | except EnvironmentError: 85 | e = sys.exc_info()[1] 86 | if e.errno == errno.ENOENT: 87 | continue 88 | if verbose: 89 | print("unable to run %s" % dispcmd) 90 | print(e) 91 | return None, None 92 | else: 93 | if verbose: 94 | print("unable to find command, tried %s" % (commands,)) 95 | return None, None 96 | stdout = p.communicate()[0].strip() 97 | if sys.version_info[0] >= 3: 98 | stdout = stdout.decode() 99 | if p.returncode != 0: 100 | if verbose: 101 | print("unable to run %s (error)" % dispcmd) 102 | print("stdout was %s" % stdout) 103 | return None, p.returncode 104 | return stdout, p.returncode 105 | 106 | 107 | def versions_from_parentdir(parentdir_prefix, root, verbose): 108 | """Try to determine the version from the parent directory name. 109 | 110 | Source tarballs conventionally unpack into a directory that includes both 111 | the project name and a version string. We will also support searching up 112 | two directory levels for an appropriately named parent directory 113 | """ 114 | rootdirs = [] 115 | 116 | for i in range(3): 117 | dirname = os.path.basename(root) 118 | if dirname.startswith(parentdir_prefix): 119 | return {"version": dirname[len(parentdir_prefix):], 120 | "full-revisionid": None, 121 | "dirty": False, "error": None, "date": None} 122 | else: 123 | rootdirs.append(root) 124 | root = os.path.dirname(root) # up a level 125 | 126 | if verbose: 127 | print("Tried directories %s but none started with prefix %s" % 128 | (str(rootdirs), parentdir_prefix)) 129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 130 | 131 | 132 | @register_vcs_handler("git", "get_keywords") 133 | def git_get_keywords(versionfile_abs): 134 | """Extract version information from the given file.""" 135 | # the code embedded in _version.py can just fetch the value of these 136 | # keywords. When used from setup.py, we don't want to import _version.py, 137 | # so we do it with a regexp instead. This function is not used from 138 | # _version.py. 139 | keywords = {} 140 | try: 141 | f = open(versionfile_abs, "r") 142 | for line in f.readlines(): 143 | if line.strip().startswith("git_refnames ="): 144 | mo = re.search(r'=\s*"(.*)"', line) 145 | if mo: 146 | keywords["refnames"] = mo.group(1) 147 | if line.strip().startswith("git_full ="): 148 | mo = re.search(r'=\s*"(.*)"', line) 149 | if mo: 150 | keywords["full"] = mo.group(1) 151 | if line.strip().startswith("git_date ="): 152 | mo = re.search(r'=\s*"(.*)"', line) 153 | if mo: 154 | keywords["date"] = mo.group(1) 155 | f.close() 156 | except EnvironmentError: 157 | pass 158 | return keywords 159 | 160 | 161 | @register_vcs_handler("git", "keywords") 162 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 163 | """Get version information from git keywords.""" 164 | if not keywords: 165 | raise NotThisMethod("no keywords at all, weird") 166 | date = keywords.get("date") 167 | if date is not None: 168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 170 | # -like" string, which we must then edit to make compliant), because 171 | # it's been around since git-1.5.3, and it's too difficult to 172 | # discover which version we're using, or to work around using an 173 | # older one. 174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 175 | refnames = keywords["refnames"].strip() 176 | if refnames.startswith("$Format"): 177 | if verbose: 178 | print("keywords are unexpanded, not using") 179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 180 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 183 | TAG = "tag: " 184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 185 | if not tags: 186 | # Either we're using git < 1.8.3, or there really are no tags. We use 187 | # a heuristic: assume all version tags have a digit. The old git %d 188 | # expansion behaves like git log --decorate=short and strips out the 189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 190 | # between branches and tags. By ignoring refnames without digits, we 191 | # filter out many common branch names like "release" and 192 | # "stabilization", as well as "HEAD" and "master". 193 | tags = set([r for r in refs if re.search(r'\d', r)]) 194 | if verbose: 195 | print("discarding '%s', no digits" % ",".join(refs - tags)) 196 | if verbose: 197 | print("likely tags: %s" % ",".join(sorted(tags))) 198 | for ref in sorted(tags): 199 | # sorting will prefer e.g. "2.0" over "2.0rc1" 200 | if ref.startswith(tag_prefix): 201 | r = ref[len(tag_prefix):] 202 | if verbose: 203 | print("picking %s" % r) 204 | return {"version": r, 205 | "full-revisionid": keywords["full"].strip(), 206 | "dirty": False, "error": None, 207 | "date": date} 208 | # no suitable tags, so version is "0+unknown", but full hex is still there 209 | if verbose: 210 | print("no suitable tags, using unknown + full revision id") 211 | return {"version": "0+unknown", 212 | "full-revisionid": keywords["full"].strip(), 213 | "dirty": False, "error": "no suitable tags", "date": None} 214 | 215 | 216 | @register_vcs_handler("git", "pieces_from_vcs") 217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 218 | """Get version from 'git describe' in the root of the source tree. 219 | 220 | This only gets called if the git-archive 'subst' keywords were *not* 221 | expanded, and _version.py hasn't already been rewritten with a short 222 | version string, meaning we're inside a checked out source tree. 223 | """ 224 | GITS = ["git"] 225 | if sys.platform == "win32": 226 | GITS = ["git.cmd", "git.exe"] 227 | 228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 229 | hide_stderr=True) 230 | if rc != 0: 231 | if verbose: 232 | print("Directory %s not under git control" % root) 233 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 234 | 235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 236 | # if there isn't one, this yields HEX[-dirty] (no NUM) 237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 238 | "--always", "--long", 239 | "--match", "%s*" % tag_prefix], 240 | cwd=root) 241 | # --long was added in git-1.5.5 242 | if describe_out is None: 243 | raise NotThisMethod("'git describe' failed") 244 | describe_out = describe_out.strip() 245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 246 | if full_out is None: 247 | raise NotThisMethod("'git rev-parse' failed") 248 | full_out = full_out.strip() 249 | 250 | pieces = {} 251 | pieces["long"] = full_out 252 | pieces["short"] = full_out[:7] # maybe improved later 253 | pieces["error"] = None 254 | 255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 256 | # TAG might have hyphens. 257 | git_describe = describe_out 258 | 259 | # look for -dirty suffix 260 | dirty = git_describe.endswith("-dirty") 261 | pieces["dirty"] = dirty 262 | if dirty: 263 | git_describe = git_describe[:git_describe.rindex("-dirty")] 264 | 265 | # now we have TAG-NUM-gHEX or HEX 266 | 267 | if "-" in git_describe: 268 | # TAG-NUM-gHEX 269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 270 | if not mo: 271 | # unparseable. Maybe git-describe is misbehaving? 272 | pieces["error"] = ("unable to parse git-describe output: '%s'" 273 | % describe_out) 274 | return pieces 275 | 276 | # tag 277 | full_tag = mo.group(1) 278 | if not full_tag.startswith(tag_prefix): 279 | if verbose: 280 | fmt = "tag '%s' doesn't start with prefix '%s'" 281 | print(fmt % (full_tag, tag_prefix)) 282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 283 | % (full_tag, tag_prefix)) 284 | return pieces 285 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 286 | 287 | # distance: number of commits since tag 288 | pieces["distance"] = int(mo.group(2)) 289 | 290 | # commit: short hex revision ID 291 | pieces["short"] = mo.group(3) 292 | 293 | else: 294 | # HEX: no tags 295 | pieces["closest-tag"] = None 296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 297 | cwd=root) 298 | pieces["distance"] = int(count_out) # total number of commits 299 | 300 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 302 | cwd=root)[0].strip() 303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 304 | 305 | return pieces 306 | 307 | 308 | def plus_or_dot(pieces): 309 | """Return a + if we don't already have one, else return a .""" 310 | if "+" in pieces.get("closest-tag", ""): 311 | return "." 312 | return "+" 313 | 314 | 315 | def render_pep440(pieces): 316 | """Build up version string, with post-release "local version identifier". 317 | 318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 320 | 321 | Exceptions: 322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 323 | """ 324 | if pieces["closest-tag"]: 325 | rendered = pieces["closest-tag"] 326 | if pieces["distance"] or pieces["dirty"]: 327 | rendered += plus_or_dot(pieces) 328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 329 | if pieces["dirty"]: 330 | rendered += ".dirty" 331 | else: 332 | # exception #1 333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 334 | pieces["short"]) 335 | if pieces["dirty"]: 336 | rendered += ".dirty" 337 | return rendered 338 | 339 | 340 | def render_pep440_pre(pieces): 341 | """TAG[.post.devDISTANCE] -- No -dirty. 342 | 343 | Exceptions: 344 | 1: no tags. 0.post.devDISTANCE 345 | """ 346 | if pieces["closest-tag"]: 347 | rendered = pieces["closest-tag"] 348 | if pieces["distance"]: 349 | rendered += ".post.dev%d" % pieces["distance"] 350 | else: 351 | # exception #1 352 | rendered = "0.post.dev%d" % pieces["distance"] 353 | return rendered 354 | 355 | 356 | def render_pep440_post(pieces): 357 | """TAG[.postDISTANCE[.dev0]+gHEX] . 358 | 359 | The ".dev0" means dirty. Note that .dev0 sorts backwards 360 | (a dirty tree will appear "older" than the corresponding clean one), 361 | but you shouldn't be releasing software with -dirty anyways. 362 | 363 | Exceptions: 364 | 1: no tags. 0.postDISTANCE[.dev0] 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"] or pieces["dirty"]: 369 | rendered += ".post%d" % pieces["distance"] 370 | if pieces["dirty"]: 371 | rendered += ".dev0" 372 | rendered += plus_or_dot(pieces) 373 | rendered += "g%s" % pieces["short"] 374 | else: 375 | # exception #1 376 | rendered = "0.post%d" % pieces["distance"] 377 | if pieces["dirty"]: 378 | rendered += ".dev0" 379 | rendered += "+g%s" % pieces["short"] 380 | return rendered 381 | 382 | 383 | def render_pep440_old(pieces): 384 | """TAG[.postDISTANCE[.dev0]] . 385 | 386 | The ".dev0" means dirty. 387 | 388 | Eexceptions: 389 | 1: no tags. 0.postDISTANCE[.dev0] 390 | """ 391 | if pieces["closest-tag"]: 392 | rendered = pieces["closest-tag"] 393 | if pieces["distance"] or pieces["dirty"]: 394 | rendered += ".post%d" % pieces["distance"] 395 | if pieces["dirty"]: 396 | rendered += ".dev0" 397 | else: 398 | # exception #1 399 | rendered = "0.post%d" % pieces["distance"] 400 | if pieces["dirty"]: 401 | rendered += ".dev0" 402 | return rendered 403 | 404 | 405 | def render_git_describe(pieces): 406 | """TAG[-DISTANCE-gHEX][-dirty]. 407 | 408 | Like 'git describe --tags --dirty --always'. 409 | 410 | Exceptions: 411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 412 | """ 413 | if pieces["closest-tag"]: 414 | rendered = pieces["closest-tag"] 415 | if pieces["distance"]: 416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 417 | else: 418 | # exception #1 419 | rendered = pieces["short"] 420 | if pieces["dirty"]: 421 | rendered += "-dirty" 422 | return rendered 423 | 424 | 425 | def render_git_describe_long(pieces): 426 | """TAG-DISTANCE-gHEX[-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always -long'. 429 | The distance/hash is unconditional. 430 | 431 | Exceptions: 432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 433 | """ 434 | if pieces["closest-tag"]: 435 | rendered = pieces["closest-tag"] 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render(pieces, style): 446 | """Render the given version pieces into the requested style.""" 447 | if pieces["error"]: 448 | return {"version": "unknown", 449 | "full-revisionid": pieces.get("long"), 450 | "dirty": None, 451 | "error": pieces["error"], 452 | "date": None} 453 | 454 | if not style or style == "default": 455 | style = "pep440" # the default 456 | 457 | if style == "pep440": 458 | rendered = render_pep440(pieces) 459 | elif style == "pep440-pre": 460 | rendered = render_pep440_pre(pieces) 461 | elif style == "pep440-post": 462 | rendered = render_pep440_post(pieces) 463 | elif style == "pep440-old": 464 | rendered = render_pep440_old(pieces) 465 | elif style == "git-describe": 466 | rendered = render_git_describe(pieces) 467 | elif style == "git-describe-long": 468 | rendered = render_git_describe_long(pieces) 469 | else: 470 | raise ValueError("unknown style '%s'" % style) 471 | 472 | return {"version": rendered, "full-revisionid": pieces["long"], 473 | "dirty": pieces["dirty"], "error": None, 474 | "date": pieces.get("date")} 475 | 476 | 477 | def get_versions(): 478 | """Get version information or return default if unable to do so.""" 479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 480 | # __file__, we can work backwards from there to the root. Some 481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 482 | # case we can only use expanded keywords. 483 | 484 | cfg = get_config() 485 | verbose = cfg.verbose 486 | 487 | try: 488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 489 | verbose) 490 | except NotThisMethod: 491 | pass 492 | 493 | try: 494 | root = os.path.realpath(__file__) 495 | # versionfile_source is the relative path from the top of the source 496 | # tree (where the .git directory might live) to this file. Invert 497 | # this to find the root from __file__. 498 | for i in cfg.versionfile_source.split('/'): 499 | root = os.path.dirname(root) 500 | except NameError: 501 | return {"version": "0+unknown", "full-revisionid": None, 502 | "dirty": None, 503 | "error": "unable to find root of source tree", 504 | "date": None} 505 | 506 | try: 507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 508 | return render(pieces, cfg.style) 509 | except NotThisMethod: 510 | pass 511 | 512 | try: 513 | if cfg.parentdir_prefix: 514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 515 | except NotThisMethod: 516 | pass 517 | 518 | return {"version": "0+unknown", "full-revisionid": None, 519 | "dirty": None, 520 | "error": "unable to compute version", "date": None} 521 | -------------------------------------------------------------------------------- /borf/borf.py: -------------------------------------------------------------------------------- 1 | 2 | '''borf.borf: for running borf''' 3 | 4 | 5 | import argparse 6 | import os 7 | import sys 8 | import re 9 | from Bio import SeqIO 10 | import pandas as pd 11 | from .get_orfs import get_orfs, write_orf_fasta, write_orf_data, write_orf_cds, batch_iterator 12 | 13 | 14 | def main(): 15 | 16 | parser = argparse.ArgumentParser(description='Get orf predicitions from a nucleotide fasta file') 17 | 18 | parser.add_argument('Fasta', metavar='fasta_file', type=str, help='fasta file to predict ORFs') 19 | parser.add_argument('-o', '--output_path', type=str, help='path to write output files. [OUTPUT_PATH].pep and [OUTPUT_PATH].txt (default: input .fa file name)') 20 | parser.add_argument('-s', '--strand', action='store_true', help='Predict orfs for both strands') 21 | parser.add_argument('-a', '--all_orfs', action='store_true', help='Return all ORFs for each sequence longer than the cutoff') 22 | parser.add_argument('-l', '--orf_length', type=int, default=100, help='Minimum ORF length (AA). (default: %(default)d)') 23 | parser.add_argument('-u', '--upstream_incomplete_length', type=int, default=50, help='Minimum length (AA) of uninterupted sequence upstream of ORF to be included for incomplete_5prime transcripts (default: %(default)d)') 24 | parser.add_argument('-c', '--genetic_code', type=int, default=1, help='Genetic code (int: 1-14) to use for translation (default: %(default)d). See https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for list') 25 | parser.add_argument('-b', '--batch_size', type=int, default=10000, help='Number of fasta records to read in in each batch') 26 | parser.add_argument('-f', '--force_overwrite', action='store_true', help='Force overwriting of output files?') 27 | 28 | args = parser.parse_args() 29 | 30 | input_file = args.Fasta 31 | 32 | if args.output_path is None: 33 | output_path = os.path.splitext(input_file)[0] 34 | else: 35 | output_path = args.output_path 36 | 37 | output_path_pep = output_path + '.pep' 38 | output_path_txt = output_path + '.txt' 39 | output_path_cds = output_path + '.cds' 40 | 41 | # check if files exist already 42 | if os.path.isfile(output_path_pep) or os.path.isfile(output_path_txt) or os.path.isfile(output_path_cds): 43 | 44 | if os.path.isfile(output_path_pep) and os.path.isfile(output_path_txt) and os.path.isfile(output_path_cds): 45 | print(output_path_pep + ", " + output_path_txt + " and " + output_path_cds + " already exist") 46 | elif os.path.isfile(output_path_pep) and os.path.isfile(output_path_txt): 47 | print(output_path_pep + " and " + output_path_txt + " already exist") 48 | elif os.path.isfile(output_path_pep) and os.path.isfile(output_path_cds): 49 | print(output_path_pep + " and " + output_path_cds + " already exist") 50 | elif os.path.isfile(output_path_cds) and os.path.isfile(output_path_txt): 51 | print(output_path_txt + " and " + output_path_cds + " already exist") 52 | elif os.path.isfile(output_path_pep): 53 | print(output_path_pep + " already exists") 54 | elif os.path.isfile(output_path_cds): 55 | print(output_path_cds + " already exists") 56 | else: 57 | print(output_path_txt + " already exists") 58 | 59 | if not args.force_overwrite: 60 | overwrite = input("Do you want to overwrite these files? ([Y]/n): ").lower().strip()[:1] 61 | if not (overwrite == "y" or overwrite == ""): 62 | sys.exit(1) 63 | else: 64 | # remove old files so you don't append new data to old files 65 | if os.path.isfile(output_path_pep): 66 | os.remove(output_path_pep) 67 | if os.path.isfile(output_path_txt): 68 | os.remove(output_path_txt) 69 | if os.path.isfile(output_path_cds): 70 | os.remove(output_path_cds) 71 | else: 72 | print('Overwriting files') 73 | if os.path.isfile(output_path_pep): 74 | os.remove(output_path_pep) 75 | if os.path.isfile(output_path_txt): 76 | os.remove(output_path_txt) 77 | if os.path.isfile(output_path_cds): 78 | os.remove(output_path_cds) 79 | 80 | # number of sequences 81 | n_seqs = 0 82 | for record in SeqIO.parse(input_file, 'fasta'): 83 | n_seqs += 1 84 | 85 | batch_size = args.batch_size 86 | 87 | record_iter = SeqIO.parse(open(input_file), 'fasta') 88 | 89 | strand_warning = False 90 | for i, batch in enumerate(batch_iterator(record_iter, batch_size)): 91 | all_sequences = [] 92 | for record in batch: 93 | all_sequences.append(record.upper()) 94 | 95 | 96 | if i == 0: 97 | # check strandedness 98 | 99 | orf_data = get_orfs(all_sequences, both_strands=True, 100 | min_orf_length=args.orf_length, all_orfs=True, 101 | min_upstream_length=args.upstream_incomplete_length, 102 | genetic_code=args.genetic_code) 103 | 104 | orf_data_strand_bias = orf_data.sort_values(by='orf_length', ascending = False) 105 | orf_data_strand_bias = orf_data_strand_bias.drop_duplicates('id', keep='first') 106 | 107 | if len(orf_data_strand_bias) >= 10: 108 | 109 | pos_bias = (orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"] == "+").sum() 110 | neg_bias = (orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"] == "-").sum() 111 | positive_strand_bias = pos_bias / (pos_bias+neg_bias) 112 | 113 | if positive_strand_bias > 0.7 and args.strand == True: 114 | #data is likely from a stranded assembly. 115 | print("Are you sure your input .fasta file isn't stranded?") 116 | print(str(positive_strand_bias*100)+ "% of transcripts have the longest ORF on the + strand") 117 | strand_warning = True 118 | 119 | if positive_strand_bias <= 0.7 and args.strand == False: 120 | print("Are you sure your input .fasta file is stranded?") 121 | print(str(positive_strand_bias*100)+ "% of transcripts have the longest ORF on the + strand") 122 | strand_warning = True 123 | 124 | if args.strand == False: 125 | orf_data = orf_data[orf_data['strand'] == '+'] 126 | if args.all_orfs == False: 127 | idx = orf_data.groupby(['id'])['orf_length'].transform(max) == orf_data['orf_length'] 128 | orf_data = orf_data[idx] 129 | orf_data['isoform_number'] = 1 130 | orf_data['fasta_id'] = [re.sub("[.]orf[0-9]*",".orf1", x) for x in orf_data['fasta_id']] 131 | 132 | else: 133 | orf_data = get_orfs(all_sequences, both_strands=args.strand, 134 | min_orf_length=args.orf_length, all_orfs=args.all_orfs, 135 | min_upstream_length=args.upstream_incomplete_length, 136 | genetic_code=args.genetic_code) 137 | 138 | # extract nt seqs at CDS 139 | nucleotide_seq = [] 140 | nucleotide_id = [] 141 | for seq_string in all_sequences: 142 | nucleotide_seq.append(str(seq_string.seq)) 143 | nucleotide_id.append(str(seq_string.id)) 144 | seq_df = pd.DataFrame(list(zip(nucleotide_id, nucleotide_seq)), columns=['id', 'nt_seq']) 145 | 146 | # merge orfs with all_sequences 147 | orf_data = pd.merge(seq_df, orf_data, on='id', how='right') 148 | orf_data['cds_seq'] = orf_data.apply(lambda x: x['nt_seq'][(x['start_site_nt']-1):x['stop_site_nt']], axis=1) 149 | 150 | 151 | write_orf_data(orf_data, output_path_txt) 152 | write_orf_fasta(orf_data, output_path_pep) 153 | write_orf_cds(orf_data, output_path_cds) 154 | 155 | start_seq_n = (i*batch_size) + 1 156 | end_seq_n = min(start_seq_n + (batch_size - 1), n_seqs) 157 | print("Processed sequences " + str(start_seq_n) + " to " + str(end_seq_n) + " of " + str(n_seqs)) 158 | 159 | print("Done with borf.") 160 | print("Results in " + output_path_pep + " and " + output_path_txt + " and " + output_path_cds) 161 | 162 | if strand_warning == True: 163 | print("This data caused a warning based on strandedness. Please check the top of the log for details and rerun with appropriate flags if neccessary.") 164 | -------------------------------------------------------------------------------- /borf/get_orfs.py: -------------------------------------------------------------------------------- 1 | # get_orfs.py 2 | 3 | import numpy as np 4 | import re as re 5 | import pandas as pd 6 | import skbio as skbio 7 | from Bio import SeqIO 8 | import os 9 | 10 | 11 | def get_orfs(all_sequences, both_strands=False, min_orf_length=100, 12 | all_orfs=False, min_upstream_length=50,genetic_code=1): 13 | """ 14 | Produce a pandas DataFrame of predicted ORFs from a fasta file. 15 | 16 | Parameters 17 | ---------- 18 | all_sequences : 19 | sequence object 20 | fasta_file : str 21 | path to the fasta file to predict orfs for 22 | both_strands : bool 23 | Provide predictions for both strands? (i.e. reverse compliment). 24 | min_orf_length : int 25 | minimum length for a predicted ORF to be reported 26 | all_orfs : bool 27 | Return all ORFs longer than min_orf_length? 28 | Set to False (default) to only return the longest ORF for each sequence. 29 | min_upstream_length : int 30 | Minimum length of AA sequence upstream of a canonical start site (e.g. MET) to be used when reporting incomplete_5prime ORFs. 31 | Upstream sequence starts from the start of the translated sequence, and contains no STOP codons. 32 | 33 | Returns 34 | ------- 35 | orf_df : DataFrame 36 | DataFrame containing predicted ORF data and sequences 37 | 38 | """ 39 | # all_sequences = read_fasta(fasta_file) 40 | # create all frame translations of nt sequence 41 | ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(all_sequences, both_strands=both_strands, genetic_code=genetic_code) 42 | 43 | if all_orfs is False: 44 | 45 | # find the longest ORF in each frame 46 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(aa_frames, min_upstream_length = min_upstream_length) 47 | 48 | # check for upstream ORF? 49 | # get all sequence upstream of the start (M), and reverse it to find 50 | # the distance to the nearest upstream stop codon 51 | orf_sequence, start_sites, orf_length = add_upstream_aas(aa_frames, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=min_upstream_length) 52 | 53 | # filter data by minimum orf length 54 | keep = orf_length >= min_orf_length 55 | aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) 56 | 57 | # only run next steps if there are ORFs 58 | if np.any(keep): 59 | # convert aa indices to nt-based indices 60 | start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) 61 | 62 | # check first and last AA 63 | first_MET = check_first_aa(orf_sequence) 64 | final_stop = np.where(last_aa_is_stop, 'STOP', 'ALT') 65 | else: 66 | start_site_nt = [] 67 | stop_site_nt = [] 68 | utr3_length = [] 69 | first_MET = [] 70 | final_stop = [] 71 | 72 | # collect all and format as pandas DataFrame 73 | orf_df = pd.DataFrame(index=range(len(start_sites))) 74 | orf_df['id'] = ids 75 | orf_df['aa_sequence'] = aa_frames 76 | orf_df['frame'] = frame 77 | orf_df['strand'] = strand 78 | orf_df['seq_length'] = seq_length 79 | orf_df['seq_length_nt'] = seq_length_nt 80 | orf_df['orf_sequence'] = orf_sequence 81 | orf_df['start_site'] = start_sites 82 | orf_df['stop_site'] = stop_sites 83 | orf_df['orf_length'] = orf_length 84 | orf_df['start_site_nt'] = start_site_nt 85 | orf_df['stop_site_nt'] = stop_site_nt 86 | orf_df['utr3_length'] = utr3_length 87 | orf_df['first_MET'] = first_MET 88 | orf_df['final_stop'] = final_stop 89 | 90 | # filter by orf with the max length for each sequence 91 | idx = orf_df.groupby(['id'])['orf_length'].transform(max) == orf_df['orf_length'] 92 | orf_df = orf_df[idx] 93 | # isoform_number so output format is the same as if all_orfs == True 94 | orf_df['isoform_number'] = int(1) 95 | 96 | # if finding all orf > cutoff 97 | else: 98 | 99 | # make DataFrame for each AA frame - joined later with ORF data 100 | # to prevent increasing the size of this too early 101 | sequence_df = pd.DataFrame(index=range(len(aa_frames))) 102 | sequence_df['id'] = ids 103 | sequence_df['aa_sequence'] = aa_frames 104 | sequence_df['frame'] = frame 105 | sequence_df['strand'] = strand 106 | sequence_df['seq_length'] = seq_length 107 | sequence_df['seq_length_nt'] = seq_length_nt 108 | # index so we can match back data later 109 | sequence_df['seq_index'] = range(len(aa_frames)) 110 | 111 | # find all ORFs longer than min_orf_length 112 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_frames, min_orf_length=min_orf_length, min_upstream_length = min_upstream_length) 113 | 114 | # check for upstream ORF? 115 | # get all sequence upstream of the start (M), and reverse it to 116 | # find the distance to the nearest upstream stop codon 117 | full_seq_matched = np.array(sequence_df['aa_sequence'][matched_index], dtype='str') 118 | orf_sequence, start_sites, orf_length = add_upstream_aas(full_seq_matched, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=min_upstream_length) 119 | 120 | # filter data by minimum orf length 121 | keep = orf_length >= min_orf_length 122 | start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length, matched_index = filter_objects(keep, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length, matched_index) 123 | 124 | # make DataFrame of ORF data 125 | orf_df = pd.DataFrame(index=range(len(orf_sequence))) 126 | orf_df['seq_index'] = matched_index 127 | orf_df['orf_sequence'] = orf_sequence 128 | orf_df['start_site'] = start_sites 129 | orf_df['stop_site'] = stop_sites 130 | orf_df['orf_length'] = orf_length 131 | # combine with sequence data from above 132 | orf_df = pd.merge(sequence_df, orf_df, on='seq_index', how='right') 133 | orf_df.drop('seq_index', axis=1, inplace=True) 134 | 135 | if np.any(keep): 136 | # convert aa indices to nt-based indices 137 | orf_df['start_site_nt'], orf_df['stop_site_nt'], orf_df['utr3_length'] = convert_start_stop_to_nt(start_sites, stop_sites, orf_df['seq_length_nt'], orf_length, orf_df['frame'], last_aa_is_stop) 138 | # check first and last AA 139 | orf_df['first_MET'] = check_first_aa(orf_df['orf_sequence']) 140 | orf_df['final_stop'] = np.where(last_aa_is_stop, 'STOP', 'ALT') 141 | else: 142 | # convert aa indices to nt-based indices 143 | orf_df['start_site_nt'] = [] 144 | orf_df['stop_site_nt'] = [] 145 | orf_df['utr3_length'] = [] 146 | # check first and last AA 147 | orf_df['first_MET'] = [] 148 | orf_df['final_stop'] = [] 149 | 150 | orf_df['isoform_number'] = unique_number_from_list(orf_df.id) 151 | 152 | # add ORF classification 153 | orf_df['orf_class'] = add_orf_classification(orf_df) 154 | # Generate ids for writing to fasta 155 | orf_df['fasta_id'] = (orf_df.id + '.orf' + orf_df.isoform_number.map(str) + ' ' + orf_df.orf_class + ':' + orf_df.start_site_nt.map(str) + '-' + orf_df.stop_site_nt.map(str) + ' strand:' + orf_df.strand.map(str)) 156 | 157 | return orf_df 158 | 159 | 160 | def translate_all_frames(sequences, both_strands=False, genetic_code=1): 161 | 162 | """ 163 | translate nt sequences into all 3 frames 164 | 165 | Parameters 166 | ---------- 167 | sequences : list 168 | list of nucleotide sequences 169 | both_strands : bool 170 | translate both strands? 171 | 172 | Returns 173 | return ids, aa_seq_by_frame, frame, strand, seq_length_nt, seq_length 174 | 175 | ------- 176 | objects : 177 | filtered objects 178 | """ 179 | # create all frame translations of nt sequence 180 | aa_seq_by_frame = [] 181 | frame = [] 182 | seq_length_nt = [] 183 | ids = [] 184 | skipped_counter = 0 185 | for seq_string in sequences: 186 | 187 | nucleotide_seq = str(seq_string.seq) 188 | non_ATGC = len(nucleotide_seq) - (nucleotide_seq.count('A') + nucleotide_seq.count('T') + nucleotide_seq.count('G') + nucleotide_seq.count('C')) 189 | skip = non_ATGC > 0 190 | 191 | if skip is False: 192 | 193 | for reading_frame in range(3): 194 | 195 | aa_seq_by_frame.append(str(skbio.DNA(str(seq_string.seq[reading_frame:])).translate(genetic_code))) 196 | frame.append(reading_frame) 197 | seq_length_nt.append(len(str(seq_string.seq))) 198 | ids.append(seq_string.id) 199 | 200 | if both_strands is True: 201 | # translate reverse compliment 202 | aa_seq_by_frame.append(str(skbio.DNA(str(skbio.DNA(str(seq_string.seq)).complement(reverse=True))[reading_frame:]).translate(genetic_code))) 203 | frame.append(reading_frame) 204 | seq_length_nt.append(len(str(seq_string.seq))) 205 | ids.append(seq_string.id) 206 | 207 | else: 208 | print("Skipping " + str(seq_string.id) + ". Found " + str(non_ATGC) + " non-ACGT characters.") 209 | skipped_counter = skipped_counter + 1 210 | 211 | seq_length_nt = np.array(seq_length_nt) 212 | aa_seq_by_frame = np.array(aa_seq_by_frame) 213 | frame = np.array(frame) + 1 214 | if both_strands is False: 215 | strand = np.array([s for s in '+' for i in range(len(aa_seq_by_frame))]) 216 | else: 217 | strand = np.tile(np.array(['+', '-']), (len(sequences)-skipped_counter)*3) 218 | 219 | seq_length = np.array([len(o) for o in aa_seq_by_frame]) 220 | 221 | ids = np.array(ids) 222 | return ids, aa_seq_by_frame, frame, strand, seq_length_nt, seq_length 223 | 224 | 225 | def find_longest_orfs(aa_frames, min_upstream_length=50): 226 | start_sites = [] 227 | stop_sites = [] 228 | orf_sequence = [] 229 | 230 | for aa_seq in aa_frames: 231 | 232 | max_start, max_end = orf_start_stop_from_aa(aa_seq, min_upstream_length = min_upstream_length) 233 | # if returning all > 100AA 234 | 235 | start_sites.append(max_start) 236 | stop_sites.append(max_end) 237 | 238 | # extract orf sequence 239 | orf_sequence.append(aa_seq[max_start:max_end]) 240 | 241 | orf_sequence = np.array(orf_sequence) 242 | 243 | # check if the last AA is a stop (*) and trim it if neccessary 244 | last_aa_is_stop = [o[-1] == '*' for o in orf_sequence] 245 | orf_sequence[last_aa_is_stop] = [o[0:-1] for o in orf_sequence[last_aa_is_stop]] 246 | 247 | orf_length = np.array([len(o) for o in orf_sequence]) 248 | 249 | # add 1 to convert pythonic index to normal-person index... 250 | start_sites = np.array(start_sites) + 1 251 | stop_sites = np.array(stop_sites) 252 | last_aa_is_stop = np.array(last_aa_is_stop) 253 | 254 | return orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop 255 | 256 | 257 | def orf_start_stop_from_aa(aa_seq, *, max_only=True, min_upstream_length=50): 258 | """ 259 | Find locations of the start (M) and stop (*) codons that produce the 260 | longest ORF 261 | 262 | Parameters 263 | ---------- 264 | aa_seq : str 265 | amino acid sequence 266 | max_only : bool 267 | Only return that start and stop locations of the longest ORF 268 | 269 | Returns 270 | ------- 271 | start_loc : int 272 | start location 273 | end_loc : int 274 | end location 275 | 276 | Examples 277 | -------- 278 | 279 | orf_start_stop_from_aa("META*") 280 | orf_start_stop_from_aa("META*MEATBORF*") 281 | orf_start_stop_from_aa("META*MEATBORF") 282 | orf_start_stop_from_aa("MEATBORF") 283 | 284 | """ 285 | 286 | # find all M 287 | if aa_seq.count("M") > 0: 288 | start_locs = [] 289 | end_locs = [] 290 | 291 | M_locations = [m.span()[0] for m in re.finditer('M', aa_seq)] 292 | if min(M_locations) > min_upstream_length: 293 | M_locations.insert(0,0) # add 0 to be the first location (i.e. upstream incomplete transcripts) 294 | last_end = -1 295 | for m in M_locations: 296 | if m > last_end-1: 297 | stop_location = find_next_stop(aa_seq, m) 298 | start_locs.append(m) 299 | end_locs.append(stop_location) 300 | last_end = stop_location 301 | # if returning all > 100AA 302 | # find the start/end of the longest ORF 303 | if max_only is True: 304 | max_start, max_end = find_max_orf_index(start_locs, end_locs) 305 | else: 306 | max_start, max_end = start_locs, end_locs 307 | 308 | else: 309 | max_start = 0 310 | max_end = find_next_stop(aa_seq, max_start) 311 | 312 | return max_start, max_end 313 | 314 | 315 | def find_next_stop(aa_seq, start_loc): 316 | """ 317 | Find location of the next stop codon (*) after the start location. 318 | Return string length if no stop codon is found. 319 | 320 | Parameters 321 | ---------- 322 | aa_seq : str 323 | amino acid sequence 324 | start_loc : int 325 | start location 326 | 327 | Returns 328 | ------- 329 | end_loc : int 330 | location of the next stop codon, or length of string if none is found 331 | 332 | Examples 333 | -------- 334 | 335 | find_next_stop("AAAMBBB*CCC*", 4) 336 | find_next_stop("AAAMBBBCCC", 4) 337 | 338 | """ 339 | stop_codon = np.char.find(aa_seq[start_loc:], '*') 340 | 341 | if stop_codon == -1: 342 | stop_codon = len(aa_seq) 343 | return stop_codon 344 | else: 345 | end_loc = stop_codon + start_loc + 1 346 | return end_loc 347 | 348 | 349 | def find_max_orf_index(start_locs, end_locs): 350 | """ 351 | Given sets of start and end locations, return the set with the largest 352 | difference 353 | 354 | Parameters 355 | ---------- 356 | start_locs : np.array 357 | start locations 358 | end_locs : np.array 359 | end locations 360 | 361 | Returns 362 | ------- 363 | start_loc : int 364 | start location 365 | end_loc : int 366 | end location 367 | 368 | Examples 369 | -------- 370 | 371 | find_max_orf_index(start_locs = [0,100], end_locs = [1000, 200]) 372 | 373 | """ 374 | orf_lengths = np.array(end_locs) - np.array(start_locs) 375 | if orf_lengths.size > 1: 376 | max_index = np.where(orf_lengths == np.amax(orf_lengths))[0] 377 | return np.array(start_locs)[max_index][0], np.array(end_locs)[max_index][0] 378 | else: 379 | return np.array(start_locs)[0], np.array(end_locs)[0] 380 | 381 | 382 | def add_upstream_aas(aa_frames, stop_sites, start_sites, orf_sequence, 383 | orf_length, min_upstream_length=50): 384 | """ 385 | Add the upstream AAs onto orf sequences 386 | 387 | Parameters 388 | ---------- 389 | aa_frames : list 390 | list of translated AA sequences (full length) 391 | start_sites : list 392 | list of start sites 393 | stop_sites : list 394 | list of stop sites 395 | orf_sequence : list 396 | list of ORF sequences (i.e. from start to stop codon) 397 | orf_length : list 398 | list of orf lengths 399 | min_upstream_length : int 400 | minimum length of upstream sequence for it to be added 401 | 402 | Returns 403 | ------- 404 | orf_sequence : list 405 | list of ORF sequences including upstream AA where appropriate 406 | start_sites : list 407 | list of start sites 408 | orf_length : list 409 | list of orf lengths 410 | """ 411 | first_stop = np.char.find(np.array(aa_frames), "*") 412 | add_upstream = np.logical_and(np.logical_or(first_stop == -1, first_stop == (stop_sites-1)), start_sites > min_upstream_length) 413 | 414 | if np.any(add_upstream): 415 | # object so no sequence truncation 416 | orf_sequence_withup = orf_sequence.copy().astype('object') 417 | orf_length_withup = orf_length.copy() 418 | start_sites_withup = start_sites.copy() 419 | 420 | orf_with_upstream = [o[0:s] for o, s in zip(aa_frames[add_upstream], stop_sites[add_upstream])] 421 | # check if the last AA is a stop (*) and trim it if neccessary 422 | orf_with_upstream = [replace_last_stop(o) for o in orf_with_upstream] 423 | orf_sequence_withup[add_upstream] = orf_with_upstream 424 | start_sites_withup[add_upstream] = 1 # set to 1 for upstream ORFs 425 | orf_length_withup[add_upstream] = np.array([len(o) for o in orf_sequence_withup[add_upstream]]) 426 | 427 | orf_sequence_withup = orf_sequence_withup.astype(str) 428 | 429 | return orf_sequence_withup, start_sites_withup, orf_length_withup 430 | else: 431 | return orf_sequence, start_sites, orf_length 432 | 433 | 434 | def replace_last_stop(orf_seq): 435 | 436 | """ 437 | replace * with nothing as the final character in in string 438 | 439 | Parameters 440 | ---------- 441 | orf_seq : str 442 | orf_sequence 443 | 444 | Returns 445 | ------- 446 | orf_seq : str 447 | orf_sequence 448 | 449 | Examples 450 | -------- 451 | 452 | replace_last_stop("META*") 453 | replace_last_stop("METAL") 454 | 455 | """ 456 | 457 | if orf_seq[-1] == '*': 458 | replaced_orf_seq = orf_seq[0:-1] 459 | return replaced_orf_seq 460 | else: 461 | return orf_seq 462 | 463 | 464 | def filter_objects(filter, *objects): 465 | 466 | """ 467 | filter multiple objects 468 | 469 | Parameters 470 | ---------- 471 | filter : list 472 | boolean list 473 | objects : 474 | objects to filter 475 | 476 | Returns 477 | ------- 478 | objects : 479 | filtered objects 480 | """ 481 | 482 | new_objects = [] 483 | for o in objects: 484 | new_objects.append(o[filter]) 485 | 486 | return new_objects 487 | 488 | 489 | def convert_start_stop_to_nt(start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop): 490 | """ 491 | Convert AA locations to nt locations 492 | 493 | Parameters 494 | ---------- 495 | start_sites : list 496 | list of start sites 497 | stop_sites : list 498 | list of stop sites 499 | seq_length_nt : list 500 | list of sequence lengths (in nt) 501 | orf_length : list 502 | list of orf lengths 503 | frame : list 504 | list of frames 505 | last_aa_is_stop : list 506 | list of bool values for if the stop site refers to the stop codon (*) 507 | or not. 508 | 509 | Returns 510 | ------- 511 | start_site_nt : list 512 | list of start sites (in nt) 513 | stop_site_nt : list 514 | list of stop sites (in nt) 515 | utr3_length : list 516 | list of 3' utr lengths (in nt) 517 | """ 518 | 519 | start_site_nt = (start_sites*3) - 3 + frame 520 | # only give a stop_site_nt location if the last AA is * //// NOT ANYMORE 521 | # using NAN values gives issues when trying to convert to int 522 | stop_site_nt = orf_length*3 + start_site_nt + 3 - 1 523 | stop_site_nt[np.logical_not(last_aa_is_stop)] = seq_length_nt[np.logical_not(last_aa_is_stop)] 524 | 525 | utr3_length = np.zeros(len(start_site_nt)) 526 | utr3_length[last_aa_is_stop] = seq_length_nt[last_aa_is_stop] - stop_site_nt[last_aa_is_stop] 527 | utr3_length = utr3_length.astype(int) 528 | return start_site_nt, stop_site_nt, utr3_length 529 | 530 | 531 | def check_first_aa(orf_sequence, start_codon='M'): 532 | """ 533 | Check that the first AA in a list of ORF sequences is M. 534 | 535 | Parameters 536 | ---------- 537 | orf_sequence : 538 | list of orf sequences 539 | start_codon : 540 | character representing the start codon 541 | 542 | Returns 543 | ------- 544 | first_MET : numpy array 545 | array matching orf_sequence with either the start codon or 'ALT' 546 | 547 | Examples 548 | -------- 549 | check_first_aa(['META','ETAM']) 550 | """ 551 | 552 | first_aa = [o[0] for o in orf_sequence] 553 | first_MET = np.where(np.array(first_aa) == start_codon, start_codon, 'ALT') 554 | return first_MET 555 | 556 | 557 | def find_all_orfs(aa_frames, min_orf_length, min_upstream_length=50): 558 | matched_index = [] 559 | start_sites = [] 560 | stop_sites = [] 561 | orf_sequence = [] 562 | 563 | for i in range(len(aa_frames)): 564 | 565 | aa_seq = aa_frames[i] 566 | start_locs, end_locs = orf_start_stop_from_aa(aa_seq, max_only=False, min_upstream_length=min_upstream_length) 567 | first_stop = np.char.find(aa_seq, '*') 568 | # if returning all > 100AA 569 | # OR if potential upstream incomplete 570 | orf_lengths = (np.array(end_locs) - np.array(start_locs)) 571 | above_min_length = np.logical_or(np.logical_or(orf_lengths >= min_orf_length, start_locs < first_stop), first_stop == -1) 572 | 573 | orf_lengths = orf_lengths[above_min_length] 574 | max_start = np.array(start_locs)[above_min_length] 575 | max_end = np.array(end_locs)[above_min_length] 576 | rep_index = np.repeat(i, len(orf_lengths)) 577 | 578 | start_sites.append(max_start) 579 | stop_sites.append(max_end) 580 | matched_index.append(rep_index) 581 | 582 | # extract orf sequence 583 | if np.array(max_start).size == 1: 584 | orf_sequence.append(aa_seq[int(max_start):int(max_end)]) 585 | elif np.array(max_start).size > 1: 586 | orf_sequence.append([aa_seq[sta:end] for sta, end in zip(max_start, max_end)]) 587 | 588 | start_sites = np.hstack(start_sites) 589 | stop_sites = np.hstack(stop_sites) 590 | matched_index = np.hstack(matched_index) 591 | orf_sequence = np.hstack(orf_sequence) 592 | 593 | # check if the last AA is a stop (*) and trim it if neccessary 594 | last_aa_is_stop = [o[-1] == '*' for o in orf_sequence] 595 | orf_sequence = np.array([replace_last_stop(o) for o in orf_sequence]) 596 | 597 | orf_length = np.array([len(o) for o in orf_sequence]) 598 | 599 | # add 1 to convert pythonic index to normal-person index... 600 | start_sites = np.array(start_sites) + 1 601 | stop_sites = np.array(stop_sites) 602 | last_aa_is_stop = np.array(last_aa_is_stop) 603 | 604 | return orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index 605 | 606 | 607 | def unique_number_from_list(input_list): 608 | """ 609 | Produce a list of integers corresponding to the number of times an 610 | element in the input list has been observed. 611 | 612 | Parameters 613 | ---------- 614 | input_list : list 615 | list of values 616 | 617 | Returns 618 | ------- 619 | occurrence : list 620 | integer list 621 | 622 | Examples 623 | -------- 624 | 625 | unique_number_from_list(['a','a','b','c','c','c']) 626 | unique_number_from_list(['a','b','c']) 627 | 628 | """ 629 | dups = {} 630 | occurrence = [] 631 | for i, val in enumerate(input_list): 632 | if val not in dups: 633 | # Store index of first occurrence and occurrence value 634 | dups[val] = [i, 1] 635 | 636 | # Increment occurrence value, 637 | else: 638 | dups[val][1] += 1 639 | # Use stored occurrence value 640 | occurrence.append(dups[val][1]) 641 | return occurrence 642 | 643 | 644 | def add_orf_classification(orf_df): 645 | """ 646 | Generate ORF type classification from an orf_df. 647 | complete: Complete CDS - contains start codon and stop codon 648 | incomplete_5prime: Incomplete CDS - has stop codon, but start of sequence 649 | indicates that an upstream start codon may be missing. 650 | incomplete_3prime: Incomplete CDS - has start codon, but no stop codon. 651 | incomplete: Incomplete CDS - Both start codon and stop codon not found. 652 | 653 | Parameters 654 | ---------- 655 | orf_df : DataFrame 656 | orf_df DataFrame 657 | 658 | Returns 659 | ------- 660 | orf_class : np.array 661 | numpy array of orf classifications 662 | 663 | """ 664 | orf_class = np.empty(len(orf_df['first_MET']), dtype='object') 665 | 666 | orf_class[np.logical_and(orf_df['first_MET'] == "M", orf_df['final_stop'] == "STOP")] = 'complete' 667 | orf_class[np.logical_and(orf_df['first_MET'] != "M", orf_df['final_stop'] == "STOP")] = 'incomplete_5prime' 668 | orf_class[np.logical_and(orf_df['first_MET'] == "M", orf_df['final_stop'] != "STOP")] = 'incomplete_3prime' 669 | orf_class[np.logical_and(orf_df['first_MET'] != "M", orf_df['final_stop'] != "STOP")] = 'incomplete' 670 | 671 | return orf_class 672 | 673 | 674 | def read_fasta(fasta_file): 675 | """ 676 | read in a fasta file 677 | 678 | Parameters 679 | ---------- 680 | fasta_file : str 681 | path to fasta file 682 | 683 | Returns 684 | ------- 685 | sequences : 686 | SeqIO records of each sequence 687 | """ 688 | all_sequences = [] 689 | 690 | # read in fasta file 691 | for record in SeqIO.parse(fasta_file, 'fasta'): 692 | all_sequences.append(record.upper()) 693 | 694 | return all_sequences 695 | 696 | 697 | def write_orf_data(orf_df, file_out): 698 | """ 699 | Write ORF sequence metadata to txt file. 700 | 701 | Parameters 702 | ---------- 703 | orf_df : DataFrame 704 | orf_df DataFrame 705 | file_out : str 706 | path to file to write txt file 707 | 708 | """ 709 | 710 | orf_df = orf_df[['fasta_id', 'id', 'frame', 'strand', 'seq_length_nt', 'start_site_nt', 'stop_site_nt', 'utr3_length', 'start_site', 'stop_site', 'orf_length', 'first_MET', 'final_stop', 'orf_class']] 711 | 712 | orf_df.columns = ['orf_id', 'transcript_id', 'frame', 'strand', 'seq_length_nt', 'start_site_nt', 'stop_site_nt', 'utr3_length_nt', 'start_site_aa', 'stop_site_aa', 'orf_length_aa', 'first_aa_MET', 'final_aa_stop', 'orf_class'] 713 | 714 | #orf_df.to_csv(file_out, index=False, sep='\t') 715 | 716 | if not os.path.isfile(file_out): 717 | orf_df.to_csv(file_out, mode='a', index=False, sep='\t') 718 | elif len(orf_df.columns) != len(pd.read_csv(file_out, nrows=1, sep='\t').columns): 719 | raise Exception("Columns do not match!! ORF data has " + str(len(orf_df.columns)) + " columns. Output txt file has " + str(len(pd.read_csv(file_out, nrows=1, sep='\t').columns)) + " columns.") 720 | elif not (orf_df.columns == pd.read_csv(file_out, nrows=1, sep='\t').columns).all(): 721 | raise Exception("Columns and column order of ORF data and txt file do not match!!") 722 | else: 723 | orf_df.to_csv(file_out, mode='a', index=False, sep='\t', header=False) 724 | 725 | 726 | def write_orf_fasta(orf_df, file_out): 727 | """ 728 | Write ORF sequences to a fasta file. 729 | 730 | Parameters 731 | ---------- 732 | orf_df : DataFrame 733 | orf_df DataFrame 734 | file_out : str 735 | path to file to write fasta sequences 736 | 737 | """ 738 | orf_df['fasta_id'] = '>' + orf_df.fasta_id 739 | orf_df.to_csv(file_out, mode = 'a', index=False, sep='\n', header=False, columns=['fasta_id', 'orf_sequence']) 740 | 741 | def write_orf_cds(orf_df, file_out): 742 | """ 743 | Write ORF CDS sequences to a fasta file. 744 | 745 | Parameters 746 | ---------- 747 | orf_df : DataFrame 748 | orf_df DataFrame 749 | file_out : str 750 | path to file to write fasta sequences 751 | 752 | """ 753 | orf_df['fasta_id'] = '>' + orf_df.fasta_id 754 | orf_df.to_csv(file_out, mode = 'a', index=False, sep='\n', header=False, columns=['fasta_id', 'cds_seq']) 755 | 756 | def batch_iterator(iterator, batch_size): 757 | """Returns lists of length batch_size. 758 | 759 | This can be used on any iterator, for example to batch up 760 | SeqRecord objects from Bio.SeqIO.parse(...), or to batch 761 | Alignment objects from Bio.AlignIO.parse(...), or simply 762 | lines from a file handle. 763 | 764 | This is a generator function, and it returns lists of the 765 | entries from the supplied iterator. Each list will have 766 | batch_size entries, although the final list may be shorter. 767 | """ 768 | entry = True # Make sure we loop once 769 | while entry: 770 | batch = [] 771 | while len(batch) < batch_size: 772 | try: 773 | entry = next(iterator) 774 | except StopIteration: 775 | entry = None 776 | if entry is None: 777 | # End of file 778 | break 779 | batch.append(entry) 780 | if batch: 781 | yield batch 782 | -------------------------------------------------------------------------------- /borf/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/signalbash/borf/52bca757f95027388c5f8cdb8de80d88d5974b27/borf/tests/__init__.py -------------------------------------------------------------------------------- /borf/tests/test_borf.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | 5 | from borf.get_orfs import read_fasta 6 | from borf.get_orfs import find_next_stop 7 | from borf.get_orfs import find_max_orf_index 8 | from borf.get_orfs import orf_start_stop_from_aa 9 | from borf.get_orfs import find_longest_orfs 10 | from borf.get_orfs import replace_last_stop 11 | from borf.get_orfs import add_upstream_aas 12 | from borf.get_orfs import filter_objects 13 | from borf.get_orfs import translate_all_frames 14 | from borf.get_orfs import convert_start_stop_to_nt 15 | from borf.get_orfs import check_first_aa 16 | from borf.get_orfs import unique_number_from_list 17 | from borf.get_orfs import find_all_orfs 18 | from borf.get_orfs import add_orf_classification 19 | from borf.get_orfs import get_orfs 20 | 21 | 22 | class TestReadFasta(unittest.TestCase): 23 | def test_read_fasta(self): 24 | 25 | # check that files are read into correct format" 26 | read_sequence = read_fasta('test_data/test_mutliple_frame_orfs.fa') 27 | seq_array = [str(x.seq) for x in read_sequence] 28 | # check sequence matches 29 | # (only check first/last few nts, and total length) 30 | t_start = seq_array[0][0:20] == 'GCTTCGGGTTGGTGTCATGG' 31 | t_end = seq_array[0][-1:-20:-1] == 'AGTTGTGTTACCGGGACGG' 32 | t_len = len(seq_array[0]) == 2757 33 | 34 | self.assertTrue(t_start and t_end and t_len) 35 | 36 | 37 | class TestFindNextStop(unittest.TestCase): 38 | 39 | def test_next_stop_not_longest(self): 40 | # "check this finds the NEXT stop codon" 41 | # assert find_next_stop("AAAMBBB*CCC*", 4) == 8 42 | next_stop = find_next_stop("AMEATBALL*", 0) 43 | self.assertEqual(next_stop, 10) 44 | 45 | def test_next_stop_from_within(self): 46 | # "check this finds the NEXT stop codon when given a start position 47 | # greater than 0/1" 48 | orf = "AMEATY*METABALL*" 49 | next_stop = find_next_stop(orf, 7) 50 | self.assertEqual(next_stop, len(orf)) 51 | 52 | def test_next_stop_final(self): 53 | # "check that this returns the length of the given string when no stop 54 | # codon is found" 55 | orf = "AMEATBALL" 56 | next_stop = find_next_stop(orf, 0) 57 | self.assertEqual(next_stop, len(orf)) 58 | 59 | 60 | class TestFindMaxOrfIndex(unittest.TestCase): 61 | 62 | def test_find_max_orf_index(self): 63 | # test basic usage of finding the two maximum values 64 | self.assertEqual(find_max_orf_index(start_locs=[0, 100], 65 | end_locs=[1000, 200]), (0, 1000)) 66 | 67 | def test_find_max_orf_index_offby1(self): 68 | # test when second index is greater by one 69 | self.assertEqual(find_max_orf_index(start_locs=[0, 100], 70 | end_locs=[999, 1100]), (100, 1100)) 71 | 72 | def test_find_max_orf_index_equal(self): 73 | # test that first instance of the max is returned 74 | self.assertEqual(find_max_orf_index(start_locs=[0, 100], 75 | end_locs=[1000, 1100]), (0, 1000)) 76 | 77 | 78 | class TestOrfStartStopFromAA(unittest.TestCase): 79 | 80 | def test_correct_start_stop(self): 81 | # tests that the correct start/stop locations are given 82 | # in non-pythonic (1-indexed) manner 83 | self.assertEqual(orf_start_stop_from_aa('AMEATBALL*'), (1, 10)) 84 | 85 | def test_start_stop_no_stop_codon(self): 86 | # tests that stop location is the final aa when no stop codon is found 87 | self.assertEqual(orf_start_stop_from_aa('AMEATBALL'), (1, 9)) 88 | 89 | def test_start_stop_longest(self): 90 | # tests that the start/stop locations are given for the LONGEST orf 91 | self.assertEqual(orf_start_stop_from_aa('MAUL*AMEATBALL'), (6, 14)) 92 | 93 | 94 | class TestFindLongestORF(unittest.TestCase): 95 | 96 | def test_find_longest_orf_output_format(self): 97 | # tests that a length 5 tupple output, and each is the correct numpy 98 | # array type 99 | long_orf = find_longest_orfs(['AMEATBALL']) 100 | 101 | t_len = len(long_orf) == 5 102 | # test numpy types of all outputs 103 | t0 = long_orf[0].dtype == '= 6 326 | aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( 327 | keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) 328 | 329 | output = convert_start_stop_to_nt( 330 | start_sites, 331 | stop_sites, 332 | seq_length_nt, 333 | orf_length, 334 | frame, 335 | last_aa_is_stop) 336 | 337 | t_len = len(output) == 3 338 | # test numpy types of all outputs 339 | t0 = output[0].dtype == 'int64' 340 | t1 = output[1].dtype == 'int64' 341 | t2 = output[2].dtype == 'int64' 342 | 343 | all_right_types = t0 and t1 and t2 and t_len 344 | self.assertTrue(all_right_types) 345 | 346 | def test_convert_start_nt(self): 347 | sequences = read_fasta('test_data/test_frames.fa') 348 | 349 | ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames( 350 | sequences, both_strands=False) 351 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( 352 | aa_frames) 353 | # filter data by minimum orf length 354 | keep = orf_length >= 6 355 | aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( 356 | keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) 357 | 358 | start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt( 359 | start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) 360 | 361 | self.assertTrue(np.all(start_site_nt == np.array([1, 2, 3]))) 362 | 363 | def test_convert_stop_nt(self): 364 | sequences = read_fasta('test_data/test_frames.fa') 365 | 366 | ids, aa_frames, frame, strand,seq_length_nt, seq_length = translate_all_frames(sequences, both_strands=False) 367 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( 368 | aa_frames) 369 | # filter data by minimum orf length 370 | keep = orf_length >= 6 371 | aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( 372 | keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) 373 | 374 | start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt( 375 | start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) 376 | self.assertTrue(np.all(stop_site_nt == np.array([21, 22, 23]))) 377 | 378 | def test_convert_stop_nt_3incomplete(self): 379 | sequences = read_fasta('test_data/test_stopsitent.fa') 380 | 381 | ids, aa_frames, frame, strand,seq_length_nt, seq_length = translate_all_frames(sequences, both_strands=False) 382 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(aa_frames) 383 | # filter data by minimum orf length 384 | keep = orf_length >= 6 385 | aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( 386 | keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) 387 | 388 | start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt( 389 | start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) 390 | self.assertTrue(np.all(stop_site_nt == seq_length_nt)) 391 | 392 | 393 | def test_convert_utr_nt(self): 394 | sequences = read_fasta('test_data/test_frames.fa') 395 | 396 | ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames( 397 | sequences, both_strands=False) 398 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( 399 | aa_frames) 400 | # filter data by minimum orf length 401 | keep = orf_length >= 6 402 | aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( 403 | keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) 404 | 405 | start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt( 406 | start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) 407 | self.assertTrue(np.all(utr3_length == np.array([5, 4, 3]))) 408 | 409 | 410 | class TestCheckFirstAA(unittest.TestCase): 411 | 412 | def test_check_first_aa_pos(self): 413 | # tests that a length 3 tupple output, and each is the correct numpy 414 | # array type 415 | aa_sequence = np.array(['MEATBALL']) 416 | self.assertEqual(check_first_aa(aa_sequence), 'M') 417 | 418 | def test_check_first_aa_neg(self): 419 | # tests that a length 3 tupple output, and each is the correct numpy 420 | # array type 421 | aa_sequence = np.array(['NOTAMEATBALL']) 422 | self.assertEqual(check_first_aa(aa_sequence), 'ALT') 423 | 424 | def test_check_first_aa_multi(self): 425 | # tests that a length 3 tupple output, and each is the correct numpy 426 | # array type 427 | aa_sequence = np.array(['MEATBALL', 'NOTAMEATBALL']) 428 | self.assertTrue(np.all(check_first_aa( 429 | aa_sequence) == np.array(['M', 'ALT']))) 430 | 431 | 432 | class TestCheckUniqueN(unittest.TestCase): 433 | 434 | def test_check_unique_n(self): 435 | # tests that a length 3 tupple output, and each is the correct numpy 436 | # array type 437 | values = np.array( 438 | ['MEATBALL', 'MEATBALL', 'BEAR', 'MEATBALL', 'MEATBALLS']) 439 | self.assertEqual(unique_number_from_list(values), [1, 2, 1, 3, 1]) 440 | 441 | 442 | class TestFindAllORFs(unittest.TestCase): 443 | 444 | def test_find_all_orfs_output_format(self): 445 | 446 | aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*']) 447 | output = find_all_orfs(aa_seqs, min_orf_length=5) 448 | 449 | t_len = len(output) == 6 450 | # test numpy types of all outputs 451 | t0 = output[0].dtype.type == np.str_ 452 | t1 = output[1].dtype == 'int64' 453 | t2 = output[2].dtype == 'int64' 454 | t3 = output[3].dtype == 'int64' 455 | t4 = output[4].dtype == 'bool' 456 | t5 = output[5].dtype == 'int64' 457 | 458 | all_right_types = t0 and t1 and t2 and t3 and t4 and t5 and t_len 459 | self.assertTrue(all_right_types) 460 | 461 | def test_find_two_orfs(self): 462 | # tests that a length 3 tupple output, and each is the correct numpy 463 | # array type 464 | aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*']) 465 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs( 466 | aa_seqs, min_orf_length=5) 467 | 468 | orf_correct = np.all(orf_sequence == np.array( 469 | ['MEATBALL', 'MEATBALLBEAR'])) 470 | start_correct = np.all(start_sites == np.array([1, 10])) 471 | stop_correct = np.all(stop_sites == np.array([9, 22])) 472 | orf_length_correct = np.all(orf_length == np.array([8, 12])) 473 | last_aa_is_stop_correct = np.all( 474 | last_aa_is_stop == np.array([True, True])) 475 | matched_index_correct = np.all(matched_index == np.array([0, 0])) 476 | 477 | self.assertTrue( 478 | orf_correct and start_correct and stop_correct and orf_length_correct and last_aa_is_stop_correct and last_aa_is_stop_correct and matched_index_correct) 479 | 480 | def test_find_multi_orfs(self): 481 | # tests that a length 3 tupple output, and each is the correct numpy 482 | # array type 483 | aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*', '*NOPE', 'MELMCAT']) 484 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_seqs, min_orf_length=5) 485 | 486 | self.assertTrue(np.all(orf_sequence == np.array(['MEATBALL', 'MEATBALLBEAR', 'MELMCAT']))) 487 | 488 | def test_find_multi_orfs_index(self): 489 | # tests that a length 3 tupple output, and each is the correct numpy 490 | # array type 491 | aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*', '*NOPE', 'MELMCAT']) 492 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_seqs, min_orf_length=5) 493 | 494 | self.assertTrue(np.all(matched_index == np.array([0, 0, 2]))) 495 | 496 | def test_find_all_orfs_upstream_ic(self): 497 | # tests that a length 3 tupple output, and each is the correct numpy 498 | # array type 499 | aa_seqs = np.array(['*NOPE', 'YES']) 500 | orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_seqs, min_orf_length=5) 501 | 502 | self.assertTrue(np.all(orf_sequence == np.array(['YES']))) 503 | 504 | class TestAddOrfClass(unittest.TestCase): 505 | 506 | def test_add_orf_classification_complete(self): 507 | orf_df = pd.DataFrame(index=range(1)) 508 | orf_df['first_MET'] = 'M' 509 | orf_df['final_stop'] = 'STOP' 510 | 511 | self.assertTrue(np.all(add_orf_classification(orf_df) == 512 | np.array(['complete']))) 513 | 514 | def test_add_orf_classification_incomplete_5prime(self): 515 | orf_df = pd.DataFrame(index=range(1)) 516 | orf_df['first_MET'] = 'ALT' 517 | orf_df['final_stop'] = 'STOP' 518 | 519 | self.assertTrue(np.all(add_orf_classification(orf_df) == 520 | np.array(['incomplete_5prime']))) 521 | 522 | def test_add_orf_classification_incomplete_3prime(self): 523 | orf_df = pd.DataFrame(index=range(1)) 524 | orf_df['first_MET'] = 'M' 525 | orf_df['final_stop'] = 'ALT' 526 | 527 | self.assertTrue(np.all(add_orf_classification(orf_df) == 528 | np.array(['incomplete_3prime']))) 529 | 530 | def test_add_orf_classification_incomplete(self): 531 | orf_df = pd.DataFrame(index=range(1)) 532 | orf_df['first_MET'] = 'ALT' 533 | orf_df['final_stop'] = 'ALT' 534 | 535 | self.assertTrue(np.all(add_orf_classification(orf_df) == 536 | np.array(['incomplete']))) 537 | 538 | def test_add_orf_classification_multi(self): 539 | orf_df = pd.DataFrame(index=range(4)) 540 | orf_df['first_MET'] = ['M', 'ALT', 'M', 'ALT'] 541 | orf_df['final_stop'] = ['STOP', 'STOP', 'ALT', 'ALT'] 542 | 543 | self.assertTrue(np.all(add_orf_classification(orf_df) == 544 | np.array(['complete', 'incomplete_5prime', 545 | 'incomplete_3prime', 'incomplete']))) 546 | 547 | 548 | class TestGetORFs(unittest.TestCase): 549 | 550 | def test_get_orf_base(self): 551 | 552 | expected = pd.DataFrame(index=range(1)) 553 | expected['id'] = 'Single_FA' 554 | expected['aa_sequence'] = 'MIMIKL*P' 555 | expected['frame'] = 1 556 | expected['strand'] = '+' 557 | expected['seq_length'] = 8 558 | expected['seq_length_nt'] = 26 559 | expected['orf_sequence'] = 'MIMIKL' 560 | expected['start_site'] = 1 561 | expected['stop_site'] = 7 562 | expected['orf_length'] = 6 563 | expected['start_site_nt'] = 1 564 | expected['stop_site_nt'] = 21 565 | expected['utr3_length'] = 5 566 | expected['first_MET'] = 'M' 567 | expected['final_stop'] = 'STOP' 568 | expected['isoform_number'] = 1 569 | expected['orf_class'] = 'complete' 570 | expected['fasta_id'] = '>Single_FA.orf1 complete:1-21 strand:+' 571 | 572 | all_sequences = read_fasta('test_data/test_getorfs.fa') 573 | orf_df = get_orfs(all_sequences, min_orf_length=5) 574 | 575 | self.assertTrue(orf_df.equals(expected)) 576 | 577 | def test_get_orf_all(self): 578 | 579 | expected = pd.DataFrame(index=range(2)) 580 | expected['id'] = ['Single_FA', 'Single_FA'] 581 | expected['aa_sequence'] = ['MIMIKL*P', 'GLQLNHDH'] 582 | expected['frame'] = [1, 3] 583 | expected['strand'] = ['+', '-'] 584 | expected['seq_length'] = [8, 8] 585 | expected['seq_length_nt'] = [26, 26] 586 | expected['orf_sequence'] = ['MIMIKL', 'GLQLNHDH'] 587 | expected['start_site'] = [1, 1] 588 | expected['stop_site'] = [7, 8] 589 | expected['orf_length'] = [6, 8] 590 | expected['start_site_nt'] = [1, 3] 591 | expected['stop_site_nt'] = [21, 26] 592 | expected['utr3_length'] = [5, 0] 593 | expected['first_MET'] = ['M', 'ALT'] 594 | expected['final_stop'] = ['STOP', 'ALT'] 595 | expected['isoform_number'] = [1, 2] 596 | expected['orf_class'] = ['complete', 'incomplete'] 597 | expected['fasta_id'] = ['>Single_FA.orf1 complete:1-21 strand:+', 598 | '>Single_FA.orf2 incomplete:3-26 strand:-'] 599 | 600 | all_sequences = read_fasta('test_data/test_getorfs.fa') 601 | orf_df = get_orfs(all_sequences, min_orf_length=5, both_strands=True, all_orfs=True) 602 | 603 | self.assertTrue(orf_df.equals(expected)) 604 | 605 | 606 | if __name__ == '__main__': 607 | unittest.main() 608 | -------------------------------------------------------------------------------- /borf/tests/test_frames.fa: -------------------------------------------------------------------------------- 1 | >Frame_1 2 | atgatcatgattaagctgtaaccccc 3 | >Frame_2 4 | aatgatcatgattaagctgtaacccc 5 | >Frame_3 6 | aaatgatcatgattaagctgtaaccc 7 | -------------------------------------------------------------------------------- /borf/tests/test_getorfs.fa: -------------------------------------------------------------------------------- 1 | >Single_FA 2 | ATGATCATGATTAAGCTGTAACCCCC 3 | 4 | -------------------------------------------------------------------------------- /borf/tests/test_mutliple_frame_orfs.fa: -------------------------------------------------------------------------------- 1 | >ENST00000327044.7(-) 2 | GCTTCGGGTTGGTGTCATGGCAGCTGCGGGGAGCCGCAAGAGGCGCCTGGCGGAGCTGACGGTGGACGAGTTCCTAGCTTCGGGCTTTGACTCCGAGTCCGAATCCGAGTCCGAAAATTCTCCACAAGCGGAGACACGGGAAGCACGCGAGGCTGCCCGGAGTCCGGATAAGCCGGGCGGGAGCCCCTCGGCCAGCCGGCGTAAAGGCCGTGCCTCTGAGCACAAAGACCAGCTCTCTCGGCTGAAGGACAGAGACCCCGAGTTCTACAAGTTCCTGCAGGAGAATGACCAGAGCCTGCTAAACTTCAGCGACTCGGACAGCTCTGAGGAGGAAGAGGGGCCGTTCCACTCCCTGCCAGATGTGCTGGAGGAAGCCAGTGAGGAGGAGGATGGAGCGGAGGAAGGAGAAGATGGGGACAGAGTCCCCAGAGGGCTGAAGGGGAAGAAGAATTCTGTTCCTGTGACCGTCGCCATGGTTGAGAGATGGAAGCAGGCAGCAAAGCAACGCCTCACTCCAAAGCTGTTCCATGAAGTGGTACAGGCGTTCCGAGCAGCTGTGGCCACCACCCGAGGGGACCAGGAAAGTGCTGAGGCCAACAAATTCCAGGTCACGGACAGTGCTGCATTCAATGCTCTGGTTACCTTCTGCATCAGAGACCTCATTGGCTGTCTCCAGAAGCTGCTGTTTGGAAAGGTGGCAAAGGATAGCAGCAGGATGCTGCAGCCGTCCAGCAGCCCGCTCTGGGGGAAGCTTCGTGTGGACATCAAGGCTTACCTGGGCTCGGCCATACAGCTGGTGTCCTGTCTGTCGGAGACGACGGTGTTGGCGGCCGTGCTGCGGCACATCAGCGTGCTGGTGCCCTGCTTCCTGACCTTCCCCAAGCAGTGCCGCATGCTGCTCAAGAGAATGGTGATCGTATGGAGCACTGGGGAAGAGTCTCTGCGGGTGCTGGCTTTCCTGGTCCTCAGCAGAGTCTGCCGGCACAAGAAGGACACTTTCCTTGGCCCCGTCCTCAAGCAAATGTACATCACGTATGTGAGGAACTGCAAGTTCACCTCGCCTGGTGCCCTCCCCTTCATCAGTTTCATGCAGTGGACCTTGACGGAGCTGCTGGCCCTGGAGCCGGGTGTGGCCTACCAGCACGCCTTCCTCTACATCCGCCAGCTCGCCATACACCTGCGCAACGCCATGACCACTCGCAAGAAGGAAACATACCAGTCTGTGTACAACTGGCAGTATGTGCACTGCCTCTTCCTGTGGTGCCGGGTCCTGAGCACTGCGGGCCCCAGCGAAGCCCTCCAGCCCTTGGTCTACCCCCTTGCCCAAGTCATCATTGGCTGTATCAAGCTCATCCCCACTGCCCGCTTCTACCCGCTGCGAATGCACTGCATCCGTGCCCTGACGCTGCTCTCGGGGAGCTCGGGGGCCTTCATCCCGGTGCTGCCTTTCATCCTGGAGATGTTCCAGCAGGTCGACTTCAACAGGAAGCCAGGGCGCATGAGCTCCAAGCCCATCAACTTCTCCGTGATCCTGAAGCTGTCCAATGTCAACCTGCAGGAGAAGGCGTACCGGGACGGCCTGGTGGAGCAGCTGTACGACCTCACCCTGGAGTACCTGCACAGCCAGGCACACTGCATCGGCTTCCCGGAGCTGGTGCTGCCTGTGGTCCTGCAGCTGAAGTCGTTCCTCCGGGAGTGCAAGGTGGCCAACTACTGCCGGCAGGTGCAGCAGCTGCTTGGGAAGGTTCAGGAGAACTCGGCATACATCTGCAGCCGCCGCCAGAGGGTTTCCTTCGGCGTCTCTGAGCAGCAGGCAGTGGAAGCCTGGGAGAAGCTGACCCGGGAAGAGGGGACACCCCTGACCTTGTACTACAGCCACTGGCGCAAGCTGCGTGACCGGGAGATCCAGCTGGAGATCAGTGGCAAAGAGCGGCTGGAAGACCTGAACTTCCCTGAGATCAAACGAAGGAAGATGGCTGACAGGAAGGATGAGGACAGGAAGCAATTTAAAGACCTCTTTGACCTGAACAGCTCTGAAGAGGACGACACCGAGGGATTCTCGGAGAGAGGGATACTGAGGCCCCTGAGCACTCGGCATGGGGTGGAAGACGATGAAGAGGACGAGGAGGAGGGCGAGGAGGACAGCAGCAACTCGGAGGATGGAGACCCAGACGCAGAGGCGGGGCTGGCCCCTGGGGAGCTGCAGCAGCTGGCCCAGGGGCCGGAGGACGAGCTGGAGGATCTGCAGCTCTCAGAGGACGACTGAGGCAGCCCATCTGGGGGGCCTGTAGGGGCTGCCGGGCTGGTGGCCAGTGTTTCCACCTCCCTGGCAGTCAGGCCTAGAGGCTGGCGTCTGTGCAGTTGGGGGAGGCAGTAGACACGGGACAGGCTTTATTATTTATTTTTCAGCATGAAAGACCAAACGTATCGAGAGCTGGGCTGGGCTGGGCTGGTGTGGCTGCTGAAGCCCCACAGCTGTGGGCTGCTGAAGTCAGCTCCGCGGGGGAGCTGACCCTGACGTCAGCAGACCGAGACCAGTCCCAGTTCCAGGGGGAGGCCTGCAGGCCCCTGGCCCCTTCCACCACCTCTGCCCTCCGTCTGCAGACCTCGTCCATCTGCACCAGGCTCTGCCTTCACTCCCCCAAGTCTTTGAAAATTTGTTCCTTTCCTTTGAAGTCACATTTTCTTTTAAAATTTTTTGTTTTGCATCCGAAACCGAAAGAAATAAAGCGGTGGGAGGCAGGGCCATTGTGTTGA 3 | -------------------------------------------------------------------------------- /borf/tests/test_trans_all_frames.fa: -------------------------------------------------------------------------------- 1 | >MANATEE_seq 2 | atggcgaacgcgaccgaagaataa 3 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = "-W" # This flag turns warnings into errors. 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = PackagingScientificPython 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=PackagingScientificPython 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/source/_static/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/signalbash/borf/52bca757f95027388c5f8cdb8de80d88d5974b27/docs/source/_static/.placeholder -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # borf documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Jun 28 12:35:56 2018. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | # import os 21 | # import sys 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.autosummary', 37 | 'sphinx.ext.githubpages', 38 | 'sphinx.ext.intersphinx', 39 | 'sphinx.ext.mathjax', 40 | 'sphinx.ext.viewcode', 41 | 'IPython.sphinxext.ipython_directive', 42 | 'IPython.sphinxext.ipython_console_highlighting', 43 | 'matplotlib.sphinxext.plot_directive', 44 | 'numpydoc', 45 | 'sphinx_copybutton', 46 | ] 47 | 48 | # Configuration options for plot_directive. See: 49 | # https://github.com/matplotlib/matplotlib/blob/f3ed922d935751e08494e5fb5311d3050a3b637b/lib/matplotlib/sphinxext/plot_directive.py#L81 50 | plot_html_show_source_link = False 51 | plot_html_show_formats = False 52 | 53 | # Generate the API documentation when building 54 | autosummary_generate = True 55 | numpydoc_show_class_members = False 56 | 57 | # Add any paths that contain templates here, relative to this directory. 58 | templates_path = ['_templates'] 59 | 60 | # The suffix(es) of source filenames. 61 | # You can specify multiple suffix as a list of string: 62 | # 63 | # source_suffix = ['.rst', '.md'] 64 | source_suffix = '.rst' 65 | 66 | # The master toctree document. 67 | master_doc = 'index' 68 | 69 | # General information about the project. 70 | project = 'borf' 71 | copyright = '2019, Beth Signal' 72 | author = 'Beth Signal' 73 | 74 | # The version info for the project you're documenting, acts as replacement for 75 | # |version| and |release|, also used in various other places throughout the 76 | # built documents. 77 | # 78 | import borf 79 | # The short X.Y version. 80 | version = borf.__version__ 81 | # The full version, including alpha/beta/rc tags. 82 | release = borf.__version__ 83 | 84 | # The language for content autogenerated by Sphinx. Refer to documentation 85 | # for a list of supported languages. 86 | # 87 | # This is also used if you do content translation via gettext catalogs. 88 | # Usually you set "language" from the command line for these cases. 89 | language = None 90 | 91 | # List of patterns, relative to source directory, that match files and 92 | # directories to ignore when looking for source files. 93 | # This patterns also effect to html_static_path and html_extra_path 94 | exclude_patterns = [] 95 | 96 | # The name of the Pygments (syntax highlighting) style to use. 97 | pygments_style = 'sphinx' 98 | 99 | # If true, `todo` and `todoList` produce output, else they produce nothing. 100 | todo_include_todos = False 101 | 102 | 103 | # -- Options for HTML output ---------------------------------------------- 104 | 105 | # The theme to use for HTML and HTML Help pages. See the documentation for 106 | # a list of builtin themes. 107 | # 108 | html_theme = 'sphinx_rtd_theme' 109 | import sphinx_rtd_theme 110 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 111 | 112 | # Theme options are theme-specific and customize the look and feel of a theme 113 | # further. For a list of options available for each theme, see the 114 | # documentation. 115 | # 116 | # html_theme_options = {} 117 | 118 | # Add any paths that contain custom static files (such as style sheets) here, 119 | # relative to this directory. They are copied after the builtin static files, 120 | # so a file named "default.css" will overwrite the builtin "default.css". 121 | html_static_path = ['_static'] 122 | 123 | # Custom sidebar templates, must be a dictionary that maps document names 124 | # to template names. 125 | # 126 | # This is required for the alabaster theme 127 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 128 | html_sidebars = { 129 | '**': [ 130 | 'relations.html', # needs 'show_related': True theme option to display 131 | 'searchbox.html', 132 | ] 133 | } 134 | 135 | 136 | # -- Options for HTMLHelp output ------------------------------------------ 137 | 138 | # Output file base name for HTML help builder. 139 | htmlhelp_basename = 'borf' 140 | 141 | 142 | # -- Options for LaTeX output --------------------------------------------- 143 | 144 | latex_elements = { 145 | # The paper size ('letterpaper' or 'a4paper'). 146 | # 147 | # 'papersize': 'letterpaper', 148 | 149 | # The font size ('10pt', '11pt' or '12pt'). 150 | # 151 | # 'pointsize': '10pt', 152 | 153 | # Additional stuff for the LaTeX preamble. 154 | # 155 | # 'preamble': '', 156 | 157 | # Latex figure (float) alignment 158 | # 159 | # 'figure_align': 'htbp', 160 | } 161 | 162 | # Grouping the document tree into LaTeX files. List of tuples 163 | # (source start file, target name, title, 164 | # author, documentclass [howto, manual, or own class]). 165 | latex_documents = [ 166 | (master_doc, 'borf.tex', 'borf Documentation', 167 | 'Contributors', 'manual'), 168 | ] 169 | 170 | 171 | # -- Options for manual page output --------------------------------------- 172 | 173 | # One entry per manual page. List of tuples 174 | # (source start file, name, description, authors, manual section). 175 | man_pages = [ 176 | (master_doc, 'borf', 'borf Documentation', 177 | [author], 1) 178 | ] 179 | 180 | 181 | # -- Options for Texinfo output ------------------------------------------- 182 | 183 | # Grouping the document tree into Texinfo files. List of tuples 184 | # (source start file, target name, title, author, 185 | # dir menu entry, description, category) 186 | texinfo_documents = [ 187 | (master_doc, 'borf', 'borf Documentation', 188 | author, 'borf', 'Better ORF predictions', 189 | 'Miscellaneous'), 190 | ] 191 | 192 | 193 | 194 | 195 | # Example configuration for intersphinx: refer to the Python standard library. 196 | intersphinx_mapping = { 197 | 'python': ('https://docs.python.org/3/', None), 198 | 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 199 | 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), 200 | 'pandas': ('https://pandas.pydata.org/pandas-docs/stable', None), 201 | 'matplotlib': ('https://matplotlib.org', None), 202 | } 203 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Packaging Scientific Python documentation master file, created by 2 | sphinx-quickstart on Thu Jun 28 12:35:56 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | borf Documentation 7 | ================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | installation 13 | usage 14 | release-history 15 | min_versions 16 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | At the command line:: 6 | 7 | $ pip install borf 8 | -------------------------------------------------------------------------------- /docs/source/min_versions.rst: -------------------------------------------------------------------------------- 1 | =================================== 2 | Minimum Version of Python and NumPy 3 | =================================== 4 | 5 | 6 | - This project supports at least the minor versions of Python 7 | initially released 42 months prior to a planned project release 8 | date. 9 | - The project will always support at least the 2 latest minor 10 | versions of Python. 11 | - The project will support minor versions of ``numpy`` initially 12 | released in the 24 months prior to a planned project release date or 13 | the oldest version that supports the minimum Python version 14 | (whichever is higher). 15 | - The project will always support at least the 3 latest minor 16 | versions of NumPy. 17 | 18 | The minimum supported version of Python will be set to 19 | ``python_requires`` in ``setup``. All supported minor versions of 20 | Python will be in the test matrix and have binary artifacts built 21 | for releases. 22 | 23 | The project should adjust upward the minimum Python and NumPy 24 | version support on every minor and major release, but never on a 25 | patch release. 26 | 27 | This is consistent with NumPy `NEP 29 28 | `__. 29 | -------------------------------------------------------------------------------- /docs/source/release-history.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Release History 3 | =============== 4 | 5 | Initial Release (YYYY-MM-DD) 6 | ---------------------------- 7 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Usage 3 | ===== 4 | 5 | Start by importing borf. 6 | 7 | .. code-block:: python 8 | 9 | import borf 10 | 11 | 12 | .. autofunction:: borf.get_orfs.get_orfs -------------------------------------------------------------------------------- /github_deploy_key_betsig_borf.enc: -------------------------------------------------------------------------------- 1 | gAAAAABdjXdAeBTX9p_4dnxRTUqYHrUz14eaRTmHWQ5uZwXZvrRaf9c0UqrXkIIsIY0uURcEDiCBD7sPjXXEU9XBZ8yg9RMeRP5GH2hgvHHI5qOw3znERoOGnnzbAKIzIs8fz8zmI7JPCQ8Ni7AaaxRo4gk8vy8AousTK8hzLxa4wT8iifmEQ3YxLVDeiXs5GLzOxus3DaxB8Ho__nqoB4WXwoV6UNfgbbfIMwVIgajgSaOXt_dCPQVFun5Wd9lN4mTWBr7lBlMotrLY91sr1K621MM_2QMtsuNrhX4jZdNe-6huE5HyTwxk6uLQh1xSSnNMBhrtgfEZMEgL-SW2Ux9_z-Niw-gKDSnhnu7SktQQokJeZn9oTkTjgzs4cO-bfdKt4YZ9EXvSvSVHUgEtfmktiZXr-slvuTCbKdXEZDq8RHA0JeK7qkUQ5T8WhPyVxEN9MRs5dIkdHoM7lkVsyEYpOvj0zNNAnw6uVUIEwz_bpKRIbbXxAH_N2r7AxeITw9gS8WTWfUKtISxAAFInOY0yDVK8Coz2xKMbqSZBEXcUGlJLs3SPPKPJvISCLm8HAnC-qQXrUzQdDEm1wZLj5IUXHoajZu1VcDGsX128-Rqq5RhCoBFvxK9Oj5getYAIBUhx0-OZdYkid9g0AgzBylY9Vkh1935j2k3XM6VtWHLq5WiEoKGNvXxHJAyvFQVxS5vfLA7vUocXXlip7z5q85PgYlwq5RtvowskLA902GcaZveLwv96MS0Bw9bUBsAUSp_6bNUXz6XDkXUMdvxcP7E6F7-k379hJv5-mX-bYKtToKQpNlHVn52G_ZHKIxJ5orKmsG0xseVsZ76uQu4jFENAAGsUpMiurBQT3gpjuonCyKMwNyjC1DJL6rmvH3GnpOAq3OobCCC0HodInZpJyuejOQTjDyxwjmaOhqSpbkLKIRkJOKUGCg0ieP0owf1ap32-J_XL3K5lXeg1MuL3-AQz8rIYzAK2GJgbL8paMbGKEch6BbNRacpeYCzKSWQ9X3B753jeBL5u-3NlTgHDHDZavK8HOoBvGjLb7Xcn6FuiW2e_5q_d3V4_tR_JNB38tt-mgj-8i7voqK2u1ygllKPp3eAI77pI-0_xHNw6RoHhEsHBINsopDyL79b_EtBQgKLLXjiuOJ8jiYT1UZQrICu5wU3JnbJchE4zykeMbJzoxIvSWUa0mt8iD2XGfPj7fFVKyykchQQbmE2HYAYhs3loKao8aHZWz4NKjHf_sH3266tnzH99LV6Y6E_l6CEAobyjyGU6evgZKMwz38-DBzmlsZSzzCaBl5CCu_lDVnyyTnY79Qnup4t_Vc0CO4IwpftPfL803QTN4doEAAzn52xXrUs-G8nxlxIQCBcddWKf4YO6WerK65aRDLNAgw1yPYwlvQaqfdNmHkbbeeWPtZcHDFI6qkf6oL3rjkk7FaWSZ2C0BHk43KsMsPOPvNzxmTQF2_VhMZgJKxckTFQNVRMgWyeuk8hRwJZnwjH5PwloPBQS7q2sd1kzH7d8642slzIHMYl_nz_d0wdRlNU_cpcJYsZlyfi1C8ikoBjLhqXpPVYyWTx-b1MiKLKN6AyPtmqZuotvhiRvbh3OqKyFvkkZRk5yZmrBD4dF3TM0feMlZWtpNpCBBaxREW_D29YG4C2zvvkVsjFs2zCVqe1Hz3Qwf3EmihQ5Rt7XjUJHzeOH1ce1c3SRlLxbbpyreo9mNf9r0mEaxAghZYmbodcNDygUQ6MarYSfwLE42U-ADLodT8mNbwOnPONkARc73pVsDEQWpokX9mRHholWteAG4kw7c3t3l2o1iPbYvoNz6XhtEqDp3M3oEgvoooco70rpB3vMwqFbuzp3xGDFtqqVH4nmSBbevzdDjVX4cgc53qqqMemdR2DY1krXZQVUcfT8j3yEPNdZ1a9nXjW8osO4GajJ0bEhenAKlpjBi7KkVJuMp3EVjwAZo3R23Vg5xs8Plglc7W--G_ZsIV1YH-iLzFTiRPLc5acPz7gfmvJOYPWcBtvear4ng7iQbiSR1sRsSbsNIc94CozLRm7Zumv8RjBcMQd1PvT3H_mABBSs09USoqsdr1SnfGeC82T1WgUStTj83hJXOEFCdEZDNRCEI3Jf_AAoqbkxJWU0s_ZcD_zEYauXC4HpnLCNQvAYd6gYEkUFVtkxDCrRixVbKng3AS8Tc-uB_G8o9k97aGHJ-cvgosooR_Y6ZhLNSVPjBdWnmaCLxXL3YNhEy7ugvs_h0PwqJZ2nwNhv2taljqqDML6BNet26ZU6wogeSTEXq1L51xJa1n4TDEvPXtInQECQQJmIZ7CC3nypNsWMDxeQQ6Xrjn5ntYPsoL0xsLaYxsX3qcPRLZ2hHDnoTDnYTPnyqLSJ_omEgpr6nXupwh6dmicFUt2hSJhGU610c-0OyIQqO1lTNd6fSdwwVxMsfDzGXK68Rv9lMH_H0-jymAY0HcQGv5JRjpL71Ub2dFJOtOD-jH-neS2iF5LDdFLwtruBc0-nwuKWBF-6ixuuOvrSsfGi0preV1NAjjQZzZu65SItIWubfmS3w8x4sgn9dXTjjX0aH9dg8V7TBlVI5qTTABkQgEmNFBGMjdTm90lLClzxZSva7Dl9dYBD1m02m4gvcZHJ2AZ_1nPkaiI7s_OjQImsPazROwGUREQtOsSjpC8DRnFFXZWn_cYTje7zShflqM0h-qoQLGx0Nhu_bBqXm9tPkJQ6i_eM87UL3_SIIVAevhbL_SJCoPAPEa7tQaqcKx3coGrVGuxQaHQxQgQ7LDjc_p9U8_mvgMhsZrFaCH0xTqh6UEuIjFHoxnAqLIoJ8_YzPQqvHz7ovpXudLGoVPzTRinq1wsHRWe3sniD0F6CtWaDHCdPJdr6WIXP2BU0MaTosVTXO7S1b8_1LnKco44LBDPUbNh8MqGK2_z31FJluka5kYwox2WYv3jR5eTJDA1G_XzbCWJHuVRa0-j6_gradw7cW3i8b5N_qlZrwnPWjPT2qhwhybBRa74tXWApyrP8elLy6Pe_1RHtU9Ffkurj2RJjNJ59y0QdsuutzsN7KqPInAEnmv9cHe9ACDnMaud7fQTpV_hTt-nahVcmuepu0a-s9-HNkd9OKkWVVMNZh05ZESHhHvZyUZQ9f88Pa5Tgb8vUGM9aC8SQ7H0aHje-csa1svZJlXbdtK350UmtE10Kh5bIksyn6lCDzibi9O2sKjE0860vW2PV5P6qbAKw4w1fxID2nnJU4ob7Tf8XLK09mmv_6_f5xhfDRqAgybjRo0aF1B9Tx8jdeO_jx4HyUbk6QX99fH3B9fpfAAuhMUiv77gFeI_m5rumBfF9rYpgczAeOjQhKzSjTCDS91HB97rBiK9KM0J8lKtod2kO3RFusssYI_5cN8DqIz5CST-12l9JsZcX1dN4gaW3cIMZgNrNJFfkOSj2cq4C5S6tQrRY85pauPp4tgZU0veb7T_ZJOKEZNxX2KnalVdcB43WXBIJHe5AWeahvD5_CNGk2C1xx1iisskPxEZTNcJzD0ksQoZDmNvFZEcHHbazh0eDeg8OtfhR-GgCH8jz2b_BC0s571vrwGL584hSr3ViEmOg15qHAQNYrAjc9JtCufwl9Wr5WnTm8sM6gSkjhVFajl7L4cmXhB4d7KYFTFjw5cXZVSychc_4XuLOFwdjfT7H3RtYVuZNDId_7wwEES6547UjzBOhqRqYmpPb2fvCZVp8fu2qJ9oaeI3ApqvievBJL2crFJrddrthgbRlvbFVmiJw5CLDwDppu2rclzPNB5yB_1dH5So6zr2xbj3ScSFPAtEmlc-f6EGlFbjjPTbc3rxgYjeVKjB53kQQtSc8vZUpegXmppTKcJDMBPnocyDYmjN5ROE0ETvD5euteiCjfiSYOikSAd--Gzu3fYmvENeJhd6wWcpYTycGAfaLcaawAlnrNttG3HCg-AFf2OoQIrK3LyKnK6Y7OSR6WKGX_Z9-K2JF-NNNnc_dLd73dmgI_9KpFQ-19TXyTSAOCf8b_xK1W7XygLWGPEyLKVWG-vpT9CB_ZIdE2a0PLNV2kBydQsbLT-9l-GRNz-GrmrZuvwjv77QbLxNw9GvXtCpTgPVflETtWEUkYV7_iuY7g5_jcKHfSFmuH9Hq2NUKf50k0cDLUoGFYTKghWW3wZQ6BgkRbCW6Ct6FC9xaIvwWQO2IdRj304nfSEiVAAUXwBg4tTIlayOP9efjx9nYsyWJcSiu1KvPCaIcOU23BGQ_-gLM_1Ox2vXE0yzA_ahz6zZLxzvJ099JYSyhocqGzYV2EdaONGb1LlNutNsLAJ6LokGDqd8DwxEfrqzzPCr2ys6UhSAu0sEfUgjEK7PCpOfpBs0i7JCHttzvQuw0uSlcWNweoOCyLItPFdOq66NErjr-Lmm9-zmEYUhgx7NAQyvm1-XYyDjTdEgv8w== -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # These are required for developing the package (running the tests, building 2 | # the documentation) but not necessarily required for _using_ it. 3 | codecov 4 | coverage 5 | flake8 6 | pytest 7 | sphinx 8 | # These are dependencies of various sphinx extensions for documentation. 9 | ipython 10 | matplotlib 11 | numpydoc 12 | sphinx-copybutton 13 | sphinx_rtd_theme 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # List required packages in this file, one per line. 2 | numpy 3 | pandas 4 | biopython 5 | scikit-bio 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440-post 4 | versionfile_source = borf/_version.py 5 | versionfile_build = borf/_version.py 6 | tag_prefix = v 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from setuptools import setup, find_packages 3 | import sys 4 | import versioneer 5 | 6 | 7 | # NOTE: This file must remain Python 2 compatible for the foreseeable future, 8 | # to ensure that we error out properly for people with outdated setuptools 9 | # and/or pip. 10 | min_version = (3, 5) 11 | if sys.version_info < min_version: 12 | error = """ 13 | borf does not support Python {0}.{1}. 14 | Python {2}.{3} and above is required. Check your Python version like so: 15 | 16 | python3 --version 17 | 18 | This may be due to an out-of-date pip. Make sure you have pip >= 9.0.1. 19 | Upgrade pip like so: 20 | 21 | pip install --upgrade pip 22 | """.format(*(sys.version_info[:2] + min_version)) 23 | sys.exit(error) 24 | 25 | here = path.abspath(path.dirname(__file__)) 26 | 27 | with open(path.join(here, 'README.rst'), encoding='utf-8') as readme_file: 28 | readme = readme_file.read() 29 | 30 | with open(path.join(here, 'requirements.txt')) as requirements_file: 31 | # Parse requirements.txt, ignoring any commented-out lines. 32 | requirements = [line for line in requirements_file.read().splitlines() 33 | if not line.startswith('#')] 34 | 35 | 36 | setup( 37 | name='borf', 38 | version=versioneer.get_version(), 39 | cmdclass=versioneer.get_cmdclass(), 40 | description="ORF predictions from .fa files", 41 | long_description=readme, 42 | author="Beth Signal", 43 | author_email='bethany.signal@uts.edu.au', 44 | url='https://github.com/betsig/borf', 45 | python_requires='>={}'.format('.'.join(str(n) for n in min_version)), 46 | packages=find_packages(exclude=['docs', 'tests']), 47 | entry_points={ 48 | 'console_scripts': [ 49 | 'borf = borf.borf:main', 50 | ], 51 | }, 52 | include_package_data=True, 53 | package_data={ 54 | 'borf': [ 55 | # When adding files here, remember to update MANIFEST.in as well, 56 | # or else they will not be included in the distribution on PyPI! 57 | # 'path/to/data_file', 58 | 'test_data/*.fa' 59 | ] 60 | }, 61 | install_requires=requirements, 62 | license="MIT", 63 | long_description_content_type="text/x-rst", 64 | classifiers=[ 65 | 'Development Status :: 2 - Pre-Alpha', 66 | 'Natural Language :: English', 67 | 'Programming Language :: Python :: 3', 68 | ], 69 | ) 70 | -------------------------------------------------------------------------------- /test_data/test_frames.fa: -------------------------------------------------------------------------------- 1 | >Frame_1 2 | atgatcatgattaagctgtaaccccc 3 | >Frame_2 4 | aatgatcatgattaagctgtaacccc 5 | >Frame_3 6 | aaatgatcatgattaagctgtaaccc 7 | -------------------------------------------------------------------------------- /test_data/test_getorfs.fa: -------------------------------------------------------------------------------- 1 | >Single_FA 2 | ATGATCATGATTAAGCTGTAACCCCC 3 | 4 | -------------------------------------------------------------------------------- /test_data/test_mutliple_frame_orfs.fa: -------------------------------------------------------------------------------- 1 | >ENST00000327044.7(-) 2 | GCTTCGGGTTGGTGTCATGGCAGCTGCGGGGAGCCGCAAGAGGCGCCTGGCGGAGCTGACGGTGGACGAGTTCCTAGCTTCGGGCTTTGACTCCGAGTCCGAATCCGAGTCCGAAAATTCTCCACAAGCGGAGACACGGGAAGCACGCGAGGCTGCCCGGAGTCCGGATAAGCCGGGCGGGAGCCCCTCGGCCAGCCGGCGTAAAGGCCGTGCCTCTGAGCACAAAGACCAGCTCTCTCGGCTGAAGGACAGAGACCCCGAGTTCTACAAGTTCCTGCAGGAGAATGACCAGAGCCTGCTAAACTTCAGCGACTCGGACAGCTCTGAGGAGGAAGAGGGGCCGTTCCACTCCCTGCCAGATGTGCTGGAGGAAGCCAGTGAGGAGGAGGATGGAGCGGAGGAAGGAGAAGATGGGGACAGAGTCCCCAGAGGGCTGAAGGGGAAGAAGAATTCTGTTCCTGTGACCGTCGCCATGGTTGAGAGATGGAAGCAGGCAGCAAAGCAACGCCTCACTCCAAAGCTGTTCCATGAAGTGGTACAGGCGTTCCGAGCAGCTGTGGCCACCACCCGAGGGGACCAGGAAAGTGCTGAGGCCAACAAATTCCAGGTCACGGACAGTGCTGCATTCAATGCTCTGGTTACCTTCTGCATCAGAGACCTCATTGGCTGTCTCCAGAAGCTGCTGTTTGGAAAGGTGGCAAAGGATAGCAGCAGGATGCTGCAGCCGTCCAGCAGCCCGCTCTGGGGGAAGCTTCGTGTGGACATCAAGGCTTACCTGGGCTCGGCCATACAGCTGGTGTCCTGTCTGTCGGAGACGACGGTGTTGGCGGCCGTGCTGCGGCACATCAGCGTGCTGGTGCCCTGCTTCCTGACCTTCCCCAAGCAGTGCCGCATGCTGCTCAAGAGAATGGTGATCGTATGGAGCACTGGGGAAGAGTCTCTGCGGGTGCTGGCTTTCCTGGTCCTCAGCAGAGTCTGCCGGCACAAGAAGGACACTTTCCTTGGCCCCGTCCTCAAGCAAATGTACATCACGTATGTGAGGAACTGCAAGTTCACCTCGCCTGGTGCCCTCCCCTTCATCAGTTTCATGCAGTGGACCTTGACGGAGCTGCTGGCCCTGGAGCCGGGTGTGGCCTACCAGCACGCCTTCCTCTACATCCGCCAGCTCGCCATACACCTGCGCAACGCCATGACCACTCGCAAGAAGGAAACATACCAGTCTGTGTACAACTGGCAGTATGTGCACTGCCTCTTCCTGTGGTGCCGGGTCCTGAGCACTGCGGGCCCCAGCGAAGCCCTCCAGCCCTTGGTCTACCCCCTTGCCCAAGTCATCATTGGCTGTATCAAGCTCATCCCCACTGCCCGCTTCTACCCGCTGCGAATGCACTGCATCCGTGCCCTGACGCTGCTCTCGGGGAGCTCGGGGGCCTTCATCCCGGTGCTGCCTTTCATCCTGGAGATGTTCCAGCAGGTCGACTTCAACAGGAAGCCAGGGCGCATGAGCTCCAAGCCCATCAACTTCTCCGTGATCCTGAAGCTGTCCAATGTCAACCTGCAGGAGAAGGCGTACCGGGACGGCCTGGTGGAGCAGCTGTACGACCTCACCCTGGAGTACCTGCACAGCCAGGCACACTGCATCGGCTTCCCGGAGCTGGTGCTGCCTGTGGTCCTGCAGCTGAAGTCGTTCCTCCGGGAGTGCAAGGTGGCCAACTACTGCCGGCAGGTGCAGCAGCTGCTTGGGAAGGTTCAGGAGAACTCGGCATACATCTGCAGCCGCCGCCAGAGGGTTTCCTTCGGCGTCTCTGAGCAGCAGGCAGTGGAAGCCTGGGAGAAGCTGACCCGGGAAGAGGGGACACCCCTGACCTTGTACTACAGCCACTGGCGCAAGCTGCGTGACCGGGAGATCCAGCTGGAGATCAGTGGCAAAGAGCGGCTGGAAGACCTGAACTTCCCTGAGATCAAACGAAGGAAGATGGCTGACAGGAAGGATGAGGACAGGAAGCAATTTAAAGACCTCTTTGACCTGAACAGCTCTGAAGAGGACGACACCGAGGGATTCTCGGAGAGAGGGATACTGAGGCCCCTGAGCACTCGGCATGGGGTGGAAGACGATGAAGAGGACGAGGAGGAGGGCGAGGAGGACAGCAGCAACTCGGAGGATGGAGACCCAGACGCAGAGGCGGGGCTGGCCCCTGGGGAGCTGCAGCAGCTGGCCCAGGGGCCGGAGGACGAGCTGGAGGATCTGCAGCTCTCAGAGGACGACTGAGGCAGCCCATCTGGGGGGCCTGTAGGGGCTGCCGGGCTGGTGGCCAGTGTTTCCACCTCCCTGGCAGTCAGGCCTAGAGGCTGGCGTCTGTGCAGTTGGGGGAGGCAGTAGACACGGGACAGGCTTTATTATTTATTTTTCAGCATGAAAGACCAAACGTATCGAGAGCTGGGCTGGGCTGGGCTGGTGTGGCTGCTGAAGCCCCACAGCTGTGGGCTGCTGAAGTCAGCTCCGCGGGGGAGCTGACCCTGACGTCAGCAGACCGAGACCAGTCCCAGTTCCAGGGGGAGGCCTGCAGGCCCCTGGCCCCTTCCACCACCTCTGCCCTCCGTCTGCAGACCTCGTCCATCTGCACCAGGCTCTGCCTTCACTCCCCCAAGTCTTTGAAAATTTGTTCCTTTCCTTTGAAGTCACATTTTCTTTTAAAATTTTTTGTTTTGCATCCGAAACCGAAAGAAATAAAGCGGTGGGAGGCAGGGCCATTGTGTTGA 3 | -------------------------------------------------------------------------------- /test_data/test_stopsitent.fa: -------------------------------------------------------------------------------- 1 | >test_final_len_f1 2 | atgatcatgattaagctgttttttttt 3 | >test_final_len_f2 4 | atgatcatgattaagctgtttttttttt 5 | >test_final_len_f3 6 | atgatcatgattaagctgttttttttttt 7 | -------------------------------------------------------------------------------- /test_data/test_trans_all_frames.fa: -------------------------------------------------------------------------------- 1 | >MANATEE_seq 2 | atggcgaacgcgaccgaagaataa 3 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.18 3 | 4 | """The Versioneer - like a rocketeer, but for versions. 5 | 6 | The Versioneer 7 | ============== 8 | 9 | * like a rocketeer, but for versions! 10 | * https://github.com/warner/python-versioneer 11 | * Brian Warner 12 | * License: Public Domain 13 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy 14 | * [![Latest Version] 15 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 16 | ](https://pypi.python.org/pypi/versioneer/) 17 | * [![Build Status] 18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 19 | ](https://travis-ci.org/warner/python-versioneer) 20 | 21 | This is a tool for managing a recorded version number in distutils-based 22 | python projects. The goal is to remove the tedious and error-prone "update 23 | the embedded version string" step from your release process. Making a new 24 | release should be as easy as recording a new tag in your version-control 25 | system, and maybe making new tarballs. 26 | 27 | 28 | ## Quick Install 29 | 30 | * `pip install versioneer` to somewhere to your $PATH 31 | * add a `[versioneer]` section to your setup.cfg (see below) 32 | * run `versioneer install` in your source tree, commit the results 33 | 34 | ## Version Identifiers 35 | 36 | Source trees come from a variety of places: 37 | 38 | * a version-control system checkout (mostly used by developers) 39 | * a nightly tarball, produced by build automation 40 | * a snapshot tarball, produced by a web-based VCS browser, like github's 41 | "tarball from tag" feature 42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 43 | 44 | Within each source tree, the version identifier (either a string or a number, 45 | this tool is format-agnostic) can come from a variety of places: 46 | 47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 48 | about recent "tags" and an absolute revision-id 49 | * the name of the directory into which the tarball was unpacked 50 | * an expanded VCS keyword ($Id$, etc) 51 | * a `_version.py` created by some earlier build step 52 | 53 | For released software, the version identifier is closely related to a VCS 54 | tag. Some projects use tag names that include more than just the version 55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 56 | needs to strip the tag prefix to extract the version identifier. For 57 | unreleased software (between tags), the version identifier should provide 58 | enough information to help developers recreate the same tree, while also 59 | giving them an idea of roughly how old the tree is (after version 1.2, before 60 | version 1.3). Many VCS systems can report a description that captures this, 61 | for example `git describe --tags --dirty --always` reports things like 62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 64 | uncommitted changes. 65 | 66 | The version identifier is used for multiple purposes: 67 | 68 | * to allow the module to self-identify its version: `myproject.__version__` 69 | * to choose a name and prefix for a 'setup.py sdist' tarball 70 | 71 | ## Theory of Operation 72 | 73 | Versioneer works by adding a special `_version.py` file into your source 74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 75 | dynamically ask the VCS tool for version information at import time. 76 | 77 | `_version.py` also contains `$Revision$` markers, and the installation 78 | process marks `_version.py` to have this marker rewritten with a tag name 79 | during the `git archive` command. As a result, generated tarballs will 80 | contain enough information to get the proper version. 81 | 82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 83 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 84 | that configures it. This overrides several distutils/setuptools commands to 85 | compute the version when invoked, and changes `setup.py build` and `setup.py 86 | sdist` to replace `_version.py` with a small static file that contains just 87 | the generated version data. 88 | 89 | ## Installation 90 | 91 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. 92 | 93 | ## Version-String Flavors 94 | 95 | Code which uses Versioneer can learn about its version string at runtime by 96 | importing `_version` from your main `__init__.py` file and running the 97 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 98 | import the top-level `versioneer.py` and run `get_versions()`. 99 | 100 | Both functions return a dictionary with different flavors of version 101 | information: 102 | 103 | * `['version']`: A condensed version string, rendered using the selected 104 | style. This is the most commonly used value for the project's version 105 | string. The default "pep440" style yields strings like `0.11`, 106 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 107 | below for alternative styles. 108 | 109 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 110 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 111 | 112 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the 113 | commit date in ISO 8601 format. This will be None if the date is not 114 | available. 115 | 116 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 117 | this is only accurate if run in a VCS checkout, otherwise it is likely to 118 | be False or None 119 | 120 | * `['error']`: if the version string could not be computed, this will be set 121 | to a string describing the problem, otherwise it will be None. It may be 122 | useful to throw an exception in setup.py if this is set, to avoid e.g. 123 | creating tarballs with a version string of "unknown". 124 | 125 | Some variants are more useful than others. Including `full-revisionid` in a 126 | bug report should allow developers to reconstruct the exact code being tested 127 | (or indicate the presence of local changes that should be shared with the 128 | developers). `version` is suitable for display in an "about" box or a CLI 129 | `--version` output: it can be easily compared against release notes and lists 130 | of bugs fixed in various releases. 131 | 132 | The installer adds the following text to your `__init__.py` to place a basic 133 | version in `YOURPROJECT.__version__`: 134 | 135 | from ._version import get_versions 136 | __version__ = get_versions()['version'] 137 | del get_versions 138 | 139 | ## Styles 140 | 141 | The setup.cfg `style=` configuration controls how the VCS information is 142 | rendered into a version string. 143 | 144 | The default style, "pep440", produces a PEP440-compliant string, equal to the 145 | un-prefixed tag name for actual releases, and containing an additional "local 146 | version" section with more detail for in-between builds. For Git, this is 147 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 148 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 149 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 150 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 151 | software (exactly equal to a known tag), the identifier will only contain the 152 | stripped tag, e.g. "0.11". 153 | 154 | Other styles are available. See [details.md](details.md) in the Versioneer 155 | source tree for descriptions. 156 | 157 | ## Debugging 158 | 159 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 160 | to return a version of "0+unknown". To investigate the problem, run `setup.py 161 | version`, which will run the version-lookup code in a verbose mode, and will 162 | display the full contents of `get_versions()` (including the `error` string, 163 | which may help identify what went wrong). 164 | 165 | ## Known Limitations 166 | 167 | Some situations are known to cause problems for Versioneer. This details the 168 | most significant ones. More can be found on Github 169 | [issues page](https://github.com/warner/python-versioneer/issues). 170 | 171 | ### Subprojects 172 | 173 | Versioneer has limited support for source trees in which `setup.py` is not in 174 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are 175 | two common reasons why `setup.py` might not be in the root: 176 | 177 | * Source trees which contain multiple subprojects, such as 178 | [Buildbot](https://github.com/buildbot/buildbot), which contains both 179 | "master" and "slave" subprojects, each with their own `setup.py`, 180 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI 181 | distributions (and upload multiple independently-installable tarballs). 182 | * Source trees whose main purpose is to contain a C library, but which also 183 | provide bindings to Python (and perhaps other langauges) in subdirectories. 184 | 185 | Versioneer will look for `.git` in parent directories, and most operations 186 | should get the right version string. However `pip` and `setuptools` have bugs 187 | and implementation details which frequently cause `pip install .` from a 188 | subproject directory to fail to find a correct version string (so it usually 189 | defaults to `0+unknown`). 190 | 191 | `pip install --editable .` should work correctly. `setup.py install` might 192 | work too. 193 | 194 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in 195 | some later version. 196 | 197 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking 198 | this issue. The discussion in 199 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the 200 | issue from the Versioneer side in more detail. 201 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and 202 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve 203 | pip to let Versioneer work correctly. 204 | 205 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the 206 | `setup.cfg`, so subprojects were completely unsupported with those releases. 207 | 208 | ### Editable installs with setuptools <= 18.5 209 | 210 | `setup.py develop` and `pip install --editable .` allow you to install a 211 | project into a virtualenv once, then continue editing the source code (and 212 | test) without re-installing after every change. 213 | 214 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a 215 | convenient way to specify executable scripts that should be installed along 216 | with the python package. 217 | 218 | These both work as expected when using modern setuptools. When using 219 | setuptools-18.5 or earlier, however, certain operations will cause 220 | `pkg_resources.DistributionNotFound` errors when running the entrypoint 221 | script, which must be resolved by re-installing the package. This happens 222 | when the install happens with one version, then the egg_info data is 223 | regenerated while a different version is checked out. Many setup.py commands 224 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into 225 | a different virtualenv), so this can be surprising. 226 | 227 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes 228 | this one, but upgrading to a newer version of setuptools should probably 229 | resolve it. 230 | 231 | ### Unicode version strings 232 | 233 | While Versioneer works (and is continually tested) with both Python 2 and 234 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. 235 | Newer releases probably generate unicode version strings on py2. It's not 236 | clear that this is wrong, but it may be surprising for applications when then 237 | write these strings to a network connection or include them in bytes-oriented 238 | APIs like cryptographic checksums. 239 | 240 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates 241 | this question. 242 | 243 | 244 | ## Updating Versioneer 245 | 246 | To upgrade your project to a new release of Versioneer, do the following: 247 | 248 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 249 | * edit `setup.cfg`, if necessary, to include any new configuration settings 250 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. 251 | * re-run `versioneer install` in your source tree, to replace 252 | `SRC/_version.py` 253 | * commit any changed files 254 | 255 | ## Future Directions 256 | 257 | This tool is designed to make it easily extended to other version-control 258 | systems: all VCS-specific components are in separate directories like 259 | src/git/ . The top-level `versioneer.py` script is assembled from these 260 | components by running make-versioneer.py . In the future, make-versioneer.py 261 | will take a VCS name as an argument, and will construct a version of 262 | `versioneer.py` that is specific to the given VCS. It might also take the 263 | configuration arguments that are currently provided manually during 264 | installation by editing setup.py . Alternatively, it might go the other 265 | direction and include code from all supported VCS systems, reducing the 266 | number of intermediate scripts. 267 | 268 | 269 | ## License 270 | 271 | To make Versioneer easier to embed, all its code is dedicated to the public 272 | domain. The `_version.py` that it creates is also in the public domain. 273 | Specifically, both are released under the Creative Commons "Public Domain 274 | Dedication" license (CC0-1.0), as described in 275 | https://creativecommons.org/publicdomain/zero/1.0/ . 276 | 277 | """ 278 | 279 | from __future__ import print_function 280 | try: 281 | import configparser 282 | except ImportError: 283 | import ConfigParser as configparser 284 | import errno 285 | import json 286 | import os 287 | import re 288 | import subprocess 289 | import sys 290 | 291 | 292 | class VersioneerConfig: 293 | """Container for Versioneer configuration parameters.""" 294 | 295 | 296 | def get_root(): 297 | """Get the project root directory. 298 | 299 | We require that all commands are run from the project root, i.e. the 300 | directory that contains setup.py, setup.cfg, and versioneer.py . 301 | """ 302 | root = os.path.realpath(os.path.abspath(os.getcwd())) 303 | setup_py = os.path.join(root, "setup.py") 304 | versioneer_py = os.path.join(root, "versioneer.py") 305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 306 | # allow 'python path/to/setup.py COMMAND' 307 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 308 | setup_py = os.path.join(root, "setup.py") 309 | versioneer_py = os.path.join(root, "versioneer.py") 310 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 311 | err = ("Versioneer was unable to run the project root directory. " 312 | "Versioneer requires setup.py to be executed from " 313 | "its immediate directory (like 'python setup.py COMMAND'), " 314 | "or in a way that lets it use sys.argv[0] to find the root " 315 | "(like 'python path/to/setup.py COMMAND').") 316 | raise VersioneerBadRootError(err) 317 | try: 318 | # Certain runtime workflows (setup.py install/develop in a setuptools 319 | # tree) execute all dependencies in a single python process, so 320 | # "versioneer" may be imported multiple times, and python's shared 321 | # module-import table will cache the first one. So we can't use 322 | # os.path.dirname(__file__), as that will find whichever 323 | # versioneer.py was first imported, even in later projects. 324 | me = os.path.realpath(os.path.abspath(__file__)) 325 | me_dir = os.path.normcase(os.path.splitext(me)[0]) 326 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) 327 | if me_dir != vsr_dir: 328 | print("Warning: build in %s is using versioneer.py from %s" 329 | % (os.path.dirname(me), versioneer_py)) 330 | except NameError: 331 | pass 332 | return root 333 | 334 | 335 | def get_config_from_root(root): 336 | """Read the project setup.cfg file to determine Versioneer config.""" 337 | # This might raise EnvironmentError (if setup.cfg is missing), or 338 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 339 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 340 | # the top of versioneer.py for instructions on writing your setup.cfg . 341 | setup_cfg = os.path.join(root, "setup.cfg") 342 | parser = configparser.SafeConfigParser() 343 | with open(setup_cfg, "r") as f: 344 | parser.readfp(f) 345 | VCS = parser.get("versioneer", "VCS") # mandatory 346 | 347 | def get(parser, name): 348 | if parser.has_option("versioneer", name): 349 | return parser.get("versioneer", name) 350 | return None 351 | cfg = VersioneerConfig() 352 | cfg.VCS = VCS 353 | cfg.style = get(parser, "style") or "" 354 | cfg.versionfile_source = get(parser, "versionfile_source") 355 | cfg.versionfile_build = get(parser, "versionfile_build") 356 | cfg.tag_prefix = get(parser, "tag_prefix") 357 | if cfg.tag_prefix in ("''", '""'): 358 | cfg.tag_prefix = "" 359 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 360 | cfg.verbose = get(parser, "verbose") 361 | return cfg 362 | 363 | 364 | class NotThisMethod(Exception): 365 | """Exception raised if a method is not valid for the current scenario.""" 366 | 367 | 368 | # these dictionaries contain VCS-specific tools 369 | LONG_VERSION_PY = {} 370 | HANDLERS = {} 371 | 372 | 373 | def register_vcs_handler(vcs, method): # decorator 374 | """Decorator to mark a method as the handler for a particular VCS.""" 375 | def decorate(f): 376 | """Store f in HANDLERS[vcs][method].""" 377 | if vcs not in HANDLERS: 378 | HANDLERS[vcs] = {} 379 | HANDLERS[vcs][method] = f 380 | return f 381 | return decorate 382 | 383 | 384 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 385 | env=None): 386 | """Call the given command(s).""" 387 | assert isinstance(commands, list) 388 | p = None 389 | for c in commands: 390 | try: 391 | dispcmd = str([c] + args) 392 | # remember shell=False, so use git.cmd on windows, not just git 393 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 394 | stdout=subprocess.PIPE, 395 | stderr=(subprocess.PIPE if hide_stderr 396 | else None)) 397 | break 398 | except EnvironmentError: 399 | e = sys.exc_info()[1] 400 | if e.errno == errno.ENOENT: 401 | continue 402 | if verbose: 403 | print("unable to run %s" % dispcmd) 404 | print(e) 405 | return None, None 406 | else: 407 | if verbose: 408 | print("unable to find command, tried %s" % (commands,)) 409 | return None, None 410 | stdout = p.communicate()[0].strip() 411 | if sys.version_info[0] >= 3: 412 | stdout = stdout.decode() 413 | if p.returncode != 0: 414 | if verbose: 415 | print("unable to run %s (error)" % dispcmd) 416 | print("stdout was %s" % stdout) 417 | return None, p.returncode 418 | return stdout, p.returncode 419 | 420 | 421 | LONG_VERSION_PY['git'] = ''' 422 | # This file helps to compute a version number in source trees obtained from 423 | # git-archive tarball (such as those provided by githubs download-from-tag 424 | # feature). Distribution tarballs (built by setup.py sdist) and build 425 | # directories (produced by setup.py build) will contain a much shorter file 426 | # that just contains the computed version number. 427 | 428 | # This file is released into the public domain. Generated by 429 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 430 | 431 | """Git implementation of _version.py.""" 432 | 433 | import errno 434 | import os 435 | import re 436 | import subprocess 437 | import sys 438 | 439 | 440 | def get_keywords(): 441 | """Get the keywords needed to look up the version information.""" 442 | # these strings will be replaced by git during git-archive. 443 | # setup.py/versioneer.py will grep for the variable names, so they must 444 | # each be defined on a line of their own. _version.py will just call 445 | # get_keywords(). 446 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 447 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 448 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" 449 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 450 | return keywords 451 | 452 | 453 | class VersioneerConfig: 454 | """Container for Versioneer configuration parameters.""" 455 | 456 | 457 | def get_config(): 458 | """Create, populate and return the VersioneerConfig() object.""" 459 | # these strings are filled in when 'setup.py versioneer' creates 460 | # _version.py 461 | cfg = VersioneerConfig() 462 | cfg.VCS = "git" 463 | cfg.style = "%(STYLE)s" 464 | cfg.tag_prefix = "%(TAG_PREFIX)s" 465 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 466 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 467 | cfg.verbose = False 468 | return cfg 469 | 470 | 471 | class NotThisMethod(Exception): 472 | """Exception raised if a method is not valid for the current scenario.""" 473 | 474 | 475 | LONG_VERSION_PY = {} 476 | HANDLERS = {} 477 | 478 | 479 | def register_vcs_handler(vcs, method): # decorator 480 | """Decorator to mark a method as the handler for a particular VCS.""" 481 | def decorate(f): 482 | """Store f in HANDLERS[vcs][method].""" 483 | if vcs not in HANDLERS: 484 | HANDLERS[vcs] = {} 485 | HANDLERS[vcs][method] = f 486 | return f 487 | return decorate 488 | 489 | 490 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 491 | env=None): 492 | """Call the given command(s).""" 493 | assert isinstance(commands, list) 494 | p = None 495 | for c in commands: 496 | try: 497 | dispcmd = str([c] + args) 498 | # remember shell=False, so use git.cmd on windows, not just git 499 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 500 | stdout=subprocess.PIPE, 501 | stderr=(subprocess.PIPE if hide_stderr 502 | else None)) 503 | break 504 | except EnvironmentError: 505 | e = sys.exc_info()[1] 506 | if e.errno == errno.ENOENT: 507 | continue 508 | if verbose: 509 | print("unable to run %%s" %% dispcmd) 510 | print(e) 511 | return None, None 512 | else: 513 | if verbose: 514 | print("unable to find command, tried %%s" %% (commands,)) 515 | return None, None 516 | stdout = p.communicate()[0].strip() 517 | if sys.version_info[0] >= 3: 518 | stdout = stdout.decode() 519 | if p.returncode != 0: 520 | if verbose: 521 | print("unable to run %%s (error)" %% dispcmd) 522 | print("stdout was %%s" %% stdout) 523 | return None, p.returncode 524 | return stdout, p.returncode 525 | 526 | 527 | def versions_from_parentdir(parentdir_prefix, root, verbose): 528 | """Try to determine the version from the parent directory name. 529 | 530 | Source tarballs conventionally unpack into a directory that includes both 531 | the project name and a version string. We will also support searching up 532 | two directory levels for an appropriately named parent directory 533 | """ 534 | rootdirs = [] 535 | 536 | for i in range(3): 537 | dirname = os.path.basename(root) 538 | if dirname.startswith(parentdir_prefix): 539 | return {"version": dirname[len(parentdir_prefix):], 540 | "full-revisionid": None, 541 | "dirty": False, "error": None, "date": None} 542 | else: 543 | rootdirs.append(root) 544 | root = os.path.dirname(root) # up a level 545 | 546 | if verbose: 547 | print("Tried directories %%s but none started with prefix %%s" %% 548 | (str(rootdirs), parentdir_prefix)) 549 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 550 | 551 | 552 | @register_vcs_handler("git", "get_keywords") 553 | def git_get_keywords(versionfile_abs): 554 | """Extract version information from the given file.""" 555 | # the code embedded in _version.py can just fetch the value of these 556 | # keywords. When used from setup.py, we don't want to import _version.py, 557 | # so we do it with a regexp instead. This function is not used from 558 | # _version.py. 559 | keywords = {} 560 | try: 561 | f = open(versionfile_abs, "r") 562 | for line in f.readlines(): 563 | if line.strip().startswith("git_refnames ="): 564 | mo = re.search(r'=\s*"(.*)"', line) 565 | if mo: 566 | keywords["refnames"] = mo.group(1) 567 | if line.strip().startswith("git_full ="): 568 | mo = re.search(r'=\s*"(.*)"', line) 569 | if mo: 570 | keywords["full"] = mo.group(1) 571 | if line.strip().startswith("git_date ="): 572 | mo = re.search(r'=\s*"(.*)"', line) 573 | if mo: 574 | keywords["date"] = mo.group(1) 575 | f.close() 576 | except EnvironmentError: 577 | pass 578 | return keywords 579 | 580 | 581 | @register_vcs_handler("git", "keywords") 582 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 583 | """Get version information from git keywords.""" 584 | if not keywords: 585 | raise NotThisMethod("no keywords at all, weird") 586 | date = keywords.get("date") 587 | if date is not None: 588 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant 589 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 590 | # -like" string, which we must then edit to make compliant), because 591 | # it's been around since git-1.5.3, and it's too difficult to 592 | # discover which version we're using, or to work around using an 593 | # older one. 594 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 595 | refnames = keywords["refnames"].strip() 596 | if refnames.startswith("$Format"): 597 | if verbose: 598 | print("keywords are unexpanded, not using") 599 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 600 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 601 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 602 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 603 | TAG = "tag: " 604 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 605 | if not tags: 606 | # Either we're using git < 1.8.3, or there really are no tags. We use 607 | # a heuristic: assume all version tags have a digit. The old git %%d 608 | # expansion behaves like git log --decorate=short and strips out the 609 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 610 | # between branches and tags. By ignoring refnames without digits, we 611 | # filter out many common branch names like "release" and 612 | # "stabilization", as well as "HEAD" and "master". 613 | tags = set([r for r in refs if re.search(r'\d', r)]) 614 | if verbose: 615 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) 616 | if verbose: 617 | print("likely tags: %%s" %% ",".join(sorted(tags))) 618 | for ref in sorted(tags): 619 | # sorting will prefer e.g. "2.0" over "2.0rc1" 620 | if ref.startswith(tag_prefix): 621 | r = ref[len(tag_prefix):] 622 | if verbose: 623 | print("picking %%s" %% r) 624 | return {"version": r, 625 | "full-revisionid": keywords["full"].strip(), 626 | "dirty": False, "error": None, 627 | "date": date} 628 | # no suitable tags, so version is "0+unknown", but full hex is still there 629 | if verbose: 630 | print("no suitable tags, using unknown + full revision id") 631 | return {"version": "0+unknown", 632 | "full-revisionid": keywords["full"].strip(), 633 | "dirty": False, "error": "no suitable tags", "date": None} 634 | 635 | 636 | @register_vcs_handler("git", "pieces_from_vcs") 637 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 638 | """Get version from 'git describe' in the root of the source tree. 639 | 640 | This only gets called if the git-archive 'subst' keywords were *not* 641 | expanded, and _version.py hasn't already been rewritten with a short 642 | version string, meaning we're inside a checked out source tree. 643 | """ 644 | GITS = ["git"] 645 | if sys.platform == "win32": 646 | GITS = ["git.cmd", "git.exe"] 647 | 648 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 649 | hide_stderr=True) 650 | if rc != 0: 651 | if verbose: 652 | print("Directory %%s not under git control" %% root) 653 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 654 | 655 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 656 | # if there isn't one, this yields HEX[-dirty] (no NUM) 657 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 658 | "--always", "--long", 659 | "--match", "%%s*" %% tag_prefix], 660 | cwd=root) 661 | # --long was added in git-1.5.5 662 | if describe_out is None: 663 | raise NotThisMethod("'git describe' failed") 664 | describe_out = describe_out.strip() 665 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 666 | if full_out is None: 667 | raise NotThisMethod("'git rev-parse' failed") 668 | full_out = full_out.strip() 669 | 670 | pieces = {} 671 | pieces["long"] = full_out 672 | pieces["short"] = full_out[:7] # maybe improved later 673 | pieces["error"] = None 674 | 675 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 676 | # TAG might have hyphens. 677 | git_describe = describe_out 678 | 679 | # look for -dirty suffix 680 | dirty = git_describe.endswith("-dirty") 681 | pieces["dirty"] = dirty 682 | if dirty: 683 | git_describe = git_describe[:git_describe.rindex("-dirty")] 684 | 685 | # now we have TAG-NUM-gHEX or HEX 686 | 687 | if "-" in git_describe: 688 | # TAG-NUM-gHEX 689 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 690 | if not mo: 691 | # unparseable. Maybe git-describe is misbehaving? 692 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 693 | %% describe_out) 694 | return pieces 695 | 696 | # tag 697 | full_tag = mo.group(1) 698 | if not full_tag.startswith(tag_prefix): 699 | if verbose: 700 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 701 | print(fmt %% (full_tag, tag_prefix)) 702 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 703 | %% (full_tag, tag_prefix)) 704 | return pieces 705 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 706 | 707 | # distance: number of commits since tag 708 | pieces["distance"] = int(mo.group(2)) 709 | 710 | # commit: short hex revision ID 711 | pieces["short"] = mo.group(3) 712 | 713 | else: 714 | # HEX: no tags 715 | pieces["closest-tag"] = None 716 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 717 | cwd=root) 718 | pieces["distance"] = int(count_out) # total number of commits 719 | 720 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 721 | date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], 722 | cwd=root)[0].strip() 723 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 724 | 725 | return pieces 726 | 727 | 728 | def plus_or_dot(pieces): 729 | """Return a + if we don't already have one, else return a .""" 730 | if "+" in pieces.get("closest-tag", ""): 731 | return "." 732 | return "+" 733 | 734 | 735 | def render_pep440(pieces): 736 | """Build up version string, with post-release "local version identifier". 737 | 738 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 739 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 740 | 741 | Exceptions: 742 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 743 | """ 744 | if pieces["closest-tag"]: 745 | rendered = pieces["closest-tag"] 746 | if pieces["distance"] or pieces["dirty"]: 747 | rendered += plus_or_dot(pieces) 748 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 749 | if pieces["dirty"]: 750 | rendered += ".dirty" 751 | else: 752 | # exception #1 753 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 754 | pieces["short"]) 755 | if pieces["dirty"]: 756 | rendered += ".dirty" 757 | return rendered 758 | 759 | 760 | def render_pep440_pre(pieces): 761 | """TAG[.post.devDISTANCE] -- No -dirty. 762 | 763 | Exceptions: 764 | 1: no tags. 0.post.devDISTANCE 765 | """ 766 | if pieces["closest-tag"]: 767 | rendered = pieces["closest-tag"] 768 | if pieces["distance"]: 769 | rendered += ".post.dev%%d" %% pieces["distance"] 770 | else: 771 | # exception #1 772 | rendered = "0.post.dev%%d" %% pieces["distance"] 773 | return rendered 774 | 775 | 776 | def render_pep440_post(pieces): 777 | """TAG[.postDISTANCE[.dev0]+gHEX] . 778 | 779 | The ".dev0" means dirty. Note that .dev0 sorts backwards 780 | (a dirty tree will appear "older" than the corresponding clean one), 781 | but you shouldn't be releasing software with -dirty anyways. 782 | 783 | Exceptions: 784 | 1: no tags. 0.postDISTANCE[.dev0] 785 | """ 786 | if pieces["closest-tag"]: 787 | rendered = pieces["closest-tag"] 788 | if pieces["distance"] or pieces["dirty"]: 789 | rendered += ".post%%d" %% pieces["distance"] 790 | if pieces["dirty"]: 791 | rendered += ".dev0" 792 | rendered += plus_or_dot(pieces) 793 | rendered += "g%%s" %% pieces["short"] 794 | else: 795 | # exception #1 796 | rendered = "0.post%%d" %% pieces["distance"] 797 | if pieces["dirty"]: 798 | rendered += ".dev0" 799 | rendered += "+g%%s" %% pieces["short"] 800 | return rendered 801 | 802 | 803 | def render_pep440_old(pieces): 804 | """TAG[.postDISTANCE[.dev0]] . 805 | 806 | The ".dev0" means dirty. 807 | 808 | Eexceptions: 809 | 1: no tags. 0.postDISTANCE[.dev0] 810 | """ 811 | if pieces["closest-tag"]: 812 | rendered = pieces["closest-tag"] 813 | if pieces["distance"] or pieces["dirty"]: 814 | rendered += ".post%%d" %% pieces["distance"] 815 | if pieces["dirty"]: 816 | rendered += ".dev0" 817 | else: 818 | # exception #1 819 | rendered = "0.post%%d" %% pieces["distance"] 820 | if pieces["dirty"]: 821 | rendered += ".dev0" 822 | return rendered 823 | 824 | 825 | def render_git_describe(pieces): 826 | """TAG[-DISTANCE-gHEX][-dirty]. 827 | 828 | Like 'git describe --tags --dirty --always'. 829 | 830 | Exceptions: 831 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 832 | """ 833 | if pieces["closest-tag"]: 834 | rendered = pieces["closest-tag"] 835 | if pieces["distance"]: 836 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 837 | else: 838 | # exception #1 839 | rendered = pieces["short"] 840 | if pieces["dirty"]: 841 | rendered += "-dirty" 842 | return rendered 843 | 844 | 845 | def render_git_describe_long(pieces): 846 | """TAG-DISTANCE-gHEX[-dirty]. 847 | 848 | Like 'git describe --tags --dirty --always -long'. 849 | The distance/hash is unconditional. 850 | 851 | Exceptions: 852 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 853 | """ 854 | if pieces["closest-tag"]: 855 | rendered = pieces["closest-tag"] 856 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 857 | else: 858 | # exception #1 859 | rendered = pieces["short"] 860 | if pieces["dirty"]: 861 | rendered += "-dirty" 862 | return rendered 863 | 864 | 865 | def render(pieces, style): 866 | """Render the given version pieces into the requested style.""" 867 | if pieces["error"]: 868 | return {"version": "unknown", 869 | "full-revisionid": pieces.get("long"), 870 | "dirty": None, 871 | "error": pieces["error"], 872 | "date": None} 873 | 874 | if not style or style == "default": 875 | style = "pep440" # the default 876 | 877 | if style == "pep440": 878 | rendered = render_pep440(pieces) 879 | elif style == "pep440-pre": 880 | rendered = render_pep440_pre(pieces) 881 | elif style == "pep440-post": 882 | rendered = render_pep440_post(pieces) 883 | elif style == "pep440-old": 884 | rendered = render_pep440_old(pieces) 885 | elif style == "git-describe": 886 | rendered = render_git_describe(pieces) 887 | elif style == "git-describe-long": 888 | rendered = render_git_describe_long(pieces) 889 | else: 890 | raise ValueError("unknown style '%%s'" %% style) 891 | 892 | return {"version": rendered, "full-revisionid": pieces["long"], 893 | "dirty": pieces["dirty"], "error": None, 894 | "date": pieces.get("date")} 895 | 896 | 897 | def get_versions(): 898 | """Get version information or return default if unable to do so.""" 899 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 900 | # __file__, we can work backwards from there to the root. Some 901 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 902 | # case we can only use expanded keywords. 903 | 904 | cfg = get_config() 905 | verbose = cfg.verbose 906 | 907 | try: 908 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 909 | verbose) 910 | except NotThisMethod: 911 | pass 912 | 913 | try: 914 | root = os.path.realpath(__file__) 915 | # versionfile_source is the relative path from the top of the source 916 | # tree (where the .git directory might live) to this file. Invert 917 | # this to find the root from __file__. 918 | for i in cfg.versionfile_source.split('/'): 919 | root = os.path.dirname(root) 920 | except NameError: 921 | return {"version": "0+unknown", "full-revisionid": None, 922 | "dirty": None, 923 | "error": "unable to find root of source tree", 924 | "date": None} 925 | 926 | try: 927 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 928 | return render(pieces, cfg.style) 929 | except NotThisMethod: 930 | pass 931 | 932 | try: 933 | if cfg.parentdir_prefix: 934 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 935 | except NotThisMethod: 936 | pass 937 | 938 | return {"version": "0+unknown", "full-revisionid": None, 939 | "dirty": None, 940 | "error": "unable to compute version", "date": None} 941 | ''' 942 | 943 | 944 | @register_vcs_handler("git", "get_keywords") 945 | def git_get_keywords(versionfile_abs): 946 | """Extract version information from the given file.""" 947 | # the code embedded in _version.py can just fetch the value of these 948 | # keywords. When used from setup.py, we don't want to import _version.py, 949 | # so we do it with a regexp instead. This function is not used from 950 | # _version.py. 951 | keywords = {} 952 | try: 953 | f = open(versionfile_abs, "r") 954 | for line in f.readlines(): 955 | if line.strip().startswith("git_refnames ="): 956 | mo = re.search(r'=\s*"(.*)"', line) 957 | if mo: 958 | keywords["refnames"] = mo.group(1) 959 | if line.strip().startswith("git_full ="): 960 | mo = re.search(r'=\s*"(.*)"', line) 961 | if mo: 962 | keywords["full"] = mo.group(1) 963 | if line.strip().startswith("git_date ="): 964 | mo = re.search(r'=\s*"(.*)"', line) 965 | if mo: 966 | keywords["date"] = mo.group(1) 967 | f.close() 968 | except EnvironmentError: 969 | pass 970 | return keywords 971 | 972 | 973 | @register_vcs_handler("git", "keywords") 974 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 975 | """Get version information from git keywords.""" 976 | if not keywords: 977 | raise NotThisMethod("no keywords at all, weird") 978 | date = keywords.get("date") 979 | if date is not None: 980 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 981 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 982 | # -like" string, which we must then edit to make compliant), because 983 | # it's been around since git-1.5.3, and it's too difficult to 984 | # discover which version we're using, or to work around using an 985 | # older one. 986 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 987 | refnames = keywords["refnames"].strip() 988 | if refnames.startswith("$Format"): 989 | if verbose: 990 | print("keywords are unexpanded, not using") 991 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 992 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 993 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 994 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 995 | TAG = "tag: " 996 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 997 | if not tags: 998 | # Either we're using git < 1.8.3, or there really are no tags. We use 999 | # a heuristic: assume all version tags have a digit. The old git %d 1000 | # expansion behaves like git log --decorate=short and strips out the 1001 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 1002 | # between branches and tags. By ignoring refnames without digits, we 1003 | # filter out many common branch names like "release" and 1004 | # "stabilization", as well as "HEAD" and "master". 1005 | tags = set([r for r in refs if re.search(r'\d', r)]) 1006 | if verbose: 1007 | print("discarding '%s', no digits" % ",".join(refs - tags)) 1008 | if verbose: 1009 | print("likely tags: %s" % ",".join(sorted(tags))) 1010 | for ref in sorted(tags): 1011 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1012 | if ref.startswith(tag_prefix): 1013 | r = ref[len(tag_prefix):] 1014 | if verbose: 1015 | print("picking %s" % r) 1016 | return {"version": r, 1017 | "full-revisionid": keywords["full"].strip(), 1018 | "dirty": False, "error": None, 1019 | "date": date} 1020 | # no suitable tags, so version is "0+unknown", but full hex is still there 1021 | if verbose: 1022 | print("no suitable tags, using unknown + full revision id") 1023 | return {"version": "0+unknown", 1024 | "full-revisionid": keywords["full"].strip(), 1025 | "dirty": False, "error": "no suitable tags", "date": None} 1026 | 1027 | 1028 | @register_vcs_handler("git", "pieces_from_vcs") 1029 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1030 | """Get version from 'git describe' in the root of the source tree. 1031 | 1032 | This only gets called if the git-archive 'subst' keywords were *not* 1033 | expanded, and _version.py hasn't already been rewritten with a short 1034 | version string, meaning we're inside a checked out source tree. 1035 | """ 1036 | GITS = ["git"] 1037 | if sys.platform == "win32": 1038 | GITS = ["git.cmd", "git.exe"] 1039 | 1040 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 1041 | hide_stderr=True) 1042 | if rc != 0: 1043 | if verbose: 1044 | print("Directory %s not under git control" % root) 1045 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 1046 | 1047 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1048 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1049 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 1050 | "--always", "--long", 1051 | "--match", "%s*" % tag_prefix], 1052 | cwd=root) 1053 | # --long was added in git-1.5.5 1054 | if describe_out is None: 1055 | raise NotThisMethod("'git describe' failed") 1056 | describe_out = describe_out.strip() 1057 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1058 | if full_out is None: 1059 | raise NotThisMethod("'git rev-parse' failed") 1060 | full_out = full_out.strip() 1061 | 1062 | pieces = {} 1063 | pieces["long"] = full_out 1064 | pieces["short"] = full_out[:7] # maybe improved later 1065 | pieces["error"] = None 1066 | 1067 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1068 | # TAG might have hyphens. 1069 | git_describe = describe_out 1070 | 1071 | # look for -dirty suffix 1072 | dirty = git_describe.endswith("-dirty") 1073 | pieces["dirty"] = dirty 1074 | if dirty: 1075 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1076 | 1077 | # now we have TAG-NUM-gHEX or HEX 1078 | 1079 | if "-" in git_describe: 1080 | # TAG-NUM-gHEX 1081 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1082 | if not mo: 1083 | # unparseable. Maybe git-describe is misbehaving? 1084 | pieces["error"] = ("unable to parse git-describe output: '%s'" 1085 | % describe_out) 1086 | return pieces 1087 | 1088 | # tag 1089 | full_tag = mo.group(1) 1090 | if not full_tag.startswith(tag_prefix): 1091 | if verbose: 1092 | fmt = "tag '%s' doesn't start with prefix '%s'" 1093 | print(fmt % (full_tag, tag_prefix)) 1094 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 1095 | % (full_tag, tag_prefix)) 1096 | return pieces 1097 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1098 | 1099 | # distance: number of commits since tag 1100 | pieces["distance"] = int(mo.group(2)) 1101 | 1102 | # commit: short hex revision ID 1103 | pieces["short"] = mo.group(3) 1104 | 1105 | else: 1106 | # HEX: no tags 1107 | pieces["closest-tag"] = None 1108 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 1109 | cwd=root) 1110 | pieces["distance"] = int(count_out) # total number of commits 1111 | 1112 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 1113 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 1114 | cwd=root)[0].strip() 1115 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1116 | 1117 | return pieces 1118 | 1119 | 1120 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1121 | """Git-specific installation logic for Versioneer. 1122 | 1123 | For Git, this means creating/changing .gitattributes to mark _version.py 1124 | for export-subst keyword substitution. 1125 | """ 1126 | GITS = ["git"] 1127 | if sys.platform == "win32": 1128 | GITS = ["git.cmd", "git.exe"] 1129 | files = [manifest_in, versionfile_source] 1130 | if ipy: 1131 | files.append(ipy) 1132 | try: 1133 | me = __file__ 1134 | if me.endswith(".pyc") or me.endswith(".pyo"): 1135 | me = os.path.splitext(me)[0] + ".py" 1136 | versioneer_file = os.path.relpath(me) 1137 | except NameError: 1138 | versioneer_file = "versioneer.py" 1139 | files.append(versioneer_file) 1140 | present = False 1141 | try: 1142 | f = open(".gitattributes", "r") 1143 | for line in f.readlines(): 1144 | if line.strip().startswith(versionfile_source): 1145 | if "export-subst" in line.strip().split()[1:]: 1146 | present = True 1147 | f.close() 1148 | except EnvironmentError: 1149 | pass 1150 | if not present: 1151 | f = open(".gitattributes", "a+") 1152 | f.write("%s export-subst\n" % versionfile_source) 1153 | f.close() 1154 | files.append(".gitattributes") 1155 | run_command(GITS, ["add", "--"] + files) 1156 | 1157 | 1158 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1159 | """Try to determine the version from the parent directory name. 1160 | 1161 | Source tarballs conventionally unpack into a directory that includes both 1162 | the project name and a version string. We will also support searching up 1163 | two directory levels for an appropriately named parent directory 1164 | """ 1165 | rootdirs = [] 1166 | 1167 | for i in range(3): 1168 | dirname = os.path.basename(root) 1169 | if dirname.startswith(parentdir_prefix): 1170 | return {"version": dirname[len(parentdir_prefix):], 1171 | "full-revisionid": None, 1172 | "dirty": False, "error": None, "date": None} 1173 | else: 1174 | rootdirs.append(root) 1175 | root = os.path.dirname(root) # up a level 1176 | 1177 | if verbose: 1178 | print("Tried directories %s but none started with prefix %s" % 1179 | (str(rootdirs), parentdir_prefix)) 1180 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1181 | 1182 | 1183 | SHORT_VERSION_PY = """ 1184 | # This file was generated by 'versioneer.py' (0.18) from 1185 | # revision-control system data, or from the parent directory name of an 1186 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1187 | # of this file. 1188 | 1189 | import json 1190 | 1191 | version_json = ''' 1192 | %s 1193 | ''' # END VERSION_JSON 1194 | 1195 | 1196 | def get_versions(): 1197 | return json.loads(version_json) 1198 | """ 1199 | 1200 | 1201 | def versions_from_file(filename): 1202 | """Try to determine the version from _version.py if present.""" 1203 | try: 1204 | with open(filename) as f: 1205 | contents = f.read() 1206 | except EnvironmentError: 1207 | raise NotThisMethod("unable to read _version.py") 1208 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", 1209 | contents, re.M | re.S) 1210 | if not mo: 1211 | mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", 1212 | contents, re.M | re.S) 1213 | if not mo: 1214 | raise NotThisMethod("no version_json in _version.py") 1215 | return json.loads(mo.group(1)) 1216 | 1217 | 1218 | def write_to_version_file(filename, versions): 1219 | """Write the given version number to the given _version.py file.""" 1220 | os.unlink(filename) 1221 | contents = json.dumps(versions, sort_keys=True, 1222 | indent=1, separators=(",", ": ")) 1223 | with open(filename, "w") as f: 1224 | f.write(SHORT_VERSION_PY % contents) 1225 | 1226 | print("set %s to '%s'" % (filename, versions["version"])) 1227 | 1228 | 1229 | def plus_or_dot(pieces): 1230 | """Return a + if we don't already have one, else return a .""" 1231 | if "+" in pieces.get("closest-tag", ""): 1232 | return "." 1233 | return "+" 1234 | 1235 | 1236 | def render_pep440(pieces): 1237 | """Build up version string, with post-release "local version identifier". 1238 | 1239 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1240 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1241 | 1242 | Exceptions: 1243 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1244 | """ 1245 | if pieces["closest-tag"]: 1246 | rendered = pieces["closest-tag"] 1247 | if pieces["distance"] or pieces["dirty"]: 1248 | rendered += plus_or_dot(pieces) 1249 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1250 | if pieces["dirty"]: 1251 | rendered += ".dirty" 1252 | else: 1253 | # exception #1 1254 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 1255 | pieces["short"]) 1256 | if pieces["dirty"]: 1257 | rendered += ".dirty" 1258 | return rendered 1259 | 1260 | 1261 | def render_pep440_pre(pieces): 1262 | """TAG[.post.devDISTANCE] -- No -dirty. 1263 | 1264 | Exceptions: 1265 | 1: no tags. 0.post.devDISTANCE 1266 | """ 1267 | if pieces["closest-tag"]: 1268 | rendered = pieces["closest-tag"] 1269 | if pieces["distance"]: 1270 | rendered += ".post.dev%d" % pieces["distance"] 1271 | else: 1272 | # exception #1 1273 | rendered = "0.post.dev%d" % pieces["distance"] 1274 | return rendered 1275 | 1276 | 1277 | def render_pep440_post(pieces): 1278 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1279 | 1280 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1281 | (a dirty tree will appear "older" than the corresponding clean one), 1282 | but you shouldn't be releasing software with -dirty anyways. 1283 | 1284 | Exceptions: 1285 | 1: no tags. 0.postDISTANCE[.dev0] 1286 | """ 1287 | if pieces["closest-tag"]: 1288 | rendered = pieces["closest-tag"] 1289 | if pieces["distance"] or pieces["dirty"]: 1290 | rendered += ".post%d" % pieces["distance"] 1291 | if pieces["dirty"]: 1292 | rendered += ".dev0" 1293 | rendered += plus_or_dot(pieces) 1294 | rendered += "g%s" % pieces["short"] 1295 | else: 1296 | # exception #1 1297 | rendered = "0.post%d" % pieces["distance"] 1298 | if pieces["dirty"]: 1299 | rendered += ".dev0" 1300 | rendered += "+g%s" % pieces["short"] 1301 | return rendered 1302 | 1303 | 1304 | def render_pep440_old(pieces): 1305 | """TAG[.postDISTANCE[.dev0]] . 1306 | 1307 | The ".dev0" means dirty. 1308 | 1309 | Eexceptions: 1310 | 1: no tags. 0.postDISTANCE[.dev0] 1311 | """ 1312 | if pieces["closest-tag"]: 1313 | rendered = pieces["closest-tag"] 1314 | if pieces["distance"] or pieces["dirty"]: 1315 | rendered += ".post%d" % pieces["distance"] 1316 | if pieces["dirty"]: 1317 | rendered += ".dev0" 1318 | else: 1319 | # exception #1 1320 | rendered = "0.post%d" % pieces["distance"] 1321 | if pieces["dirty"]: 1322 | rendered += ".dev0" 1323 | return rendered 1324 | 1325 | 1326 | def render_git_describe(pieces): 1327 | """TAG[-DISTANCE-gHEX][-dirty]. 1328 | 1329 | Like 'git describe --tags --dirty --always'. 1330 | 1331 | Exceptions: 1332 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1333 | """ 1334 | if pieces["closest-tag"]: 1335 | rendered = pieces["closest-tag"] 1336 | if pieces["distance"]: 1337 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1338 | else: 1339 | # exception #1 1340 | rendered = pieces["short"] 1341 | if pieces["dirty"]: 1342 | rendered += "-dirty" 1343 | return rendered 1344 | 1345 | 1346 | def render_git_describe_long(pieces): 1347 | """TAG-DISTANCE-gHEX[-dirty]. 1348 | 1349 | Like 'git describe --tags --dirty --always -long'. 1350 | The distance/hash is unconditional. 1351 | 1352 | Exceptions: 1353 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1354 | """ 1355 | if pieces["closest-tag"]: 1356 | rendered = pieces["closest-tag"] 1357 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1358 | else: 1359 | # exception #1 1360 | rendered = pieces["short"] 1361 | if pieces["dirty"]: 1362 | rendered += "-dirty" 1363 | return rendered 1364 | 1365 | 1366 | def render(pieces, style): 1367 | """Render the given version pieces into the requested style.""" 1368 | if pieces["error"]: 1369 | return {"version": "unknown", 1370 | "full-revisionid": pieces.get("long"), 1371 | "dirty": None, 1372 | "error": pieces["error"], 1373 | "date": None} 1374 | 1375 | if not style or style == "default": 1376 | style = "pep440" # the default 1377 | 1378 | if style == "pep440": 1379 | rendered = render_pep440(pieces) 1380 | elif style == "pep440-pre": 1381 | rendered = render_pep440_pre(pieces) 1382 | elif style == "pep440-post": 1383 | rendered = render_pep440_post(pieces) 1384 | elif style == "pep440-old": 1385 | rendered = render_pep440_old(pieces) 1386 | elif style == "git-describe": 1387 | rendered = render_git_describe(pieces) 1388 | elif style == "git-describe-long": 1389 | rendered = render_git_describe_long(pieces) 1390 | else: 1391 | raise ValueError("unknown style '%s'" % style) 1392 | 1393 | return {"version": rendered, "full-revisionid": pieces["long"], 1394 | "dirty": pieces["dirty"], "error": None, 1395 | "date": pieces.get("date")} 1396 | 1397 | 1398 | class VersioneerBadRootError(Exception): 1399 | """The project root directory is unknown or missing key files.""" 1400 | 1401 | 1402 | def get_versions(verbose=False): 1403 | """Get the project version from whatever source is available. 1404 | 1405 | Returns dict with two keys: 'version' and 'full'. 1406 | """ 1407 | if "versioneer" in sys.modules: 1408 | # see the discussion in cmdclass.py:get_cmdclass() 1409 | del sys.modules["versioneer"] 1410 | 1411 | root = get_root() 1412 | cfg = get_config_from_root(root) 1413 | 1414 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1415 | handlers = HANDLERS.get(cfg.VCS) 1416 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1417 | verbose = verbose or cfg.verbose 1418 | assert cfg.versionfile_source is not None, \ 1419 | "please set versioneer.versionfile_source" 1420 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1421 | 1422 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1423 | 1424 | # extract version from first of: _version.py, VCS command (e.g. 'git 1425 | # describe'), parentdir. This is meant to work for developers using a 1426 | # source checkout, for users of a tarball created by 'setup.py sdist', 1427 | # and for users of a tarball/zipball created by 'git archive' or github's 1428 | # download-from-tag feature or the equivalent in other VCSes. 1429 | 1430 | get_keywords_f = handlers.get("get_keywords") 1431 | from_keywords_f = handlers.get("keywords") 1432 | if get_keywords_f and from_keywords_f: 1433 | try: 1434 | keywords = get_keywords_f(versionfile_abs) 1435 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1436 | if verbose: 1437 | print("got version from expanded keyword %s" % ver) 1438 | return ver 1439 | except NotThisMethod: 1440 | pass 1441 | 1442 | try: 1443 | ver = versions_from_file(versionfile_abs) 1444 | if verbose: 1445 | print("got version from file %s %s" % (versionfile_abs, ver)) 1446 | return ver 1447 | except NotThisMethod: 1448 | pass 1449 | 1450 | from_vcs_f = handlers.get("pieces_from_vcs") 1451 | if from_vcs_f: 1452 | try: 1453 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1454 | ver = render(pieces, cfg.style) 1455 | if verbose: 1456 | print("got version from VCS %s" % ver) 1457 | return ver 1458 | except NotThisMethod: 1459 | pass 1460 | 1461 | try: 1462 | if cfg.parentdir_prefix: 1463 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1464 | if verbose: 1465 | print("got version from parentdir %s" % ver) 1466 | return ver 1467 | except NotThisMethod: 1468 | pass 1469 | 1470 | if verbose: 1471 | print("unable to compute version") 1472 | 1473 | return {"version": "0+unknown", "full-revisionid": None, 1474 | "dirty": None, "error": "unable to compute version", 1475 | "date": None} 1476 | 1477 | 1478 | def get_version(): 1479 | """Get the short version string for this project.""" 1480 | return get_versions()["version"] 1481 | 1482 | 1483 | def get_cmdclass(): 1484 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" 1485 | if "versioneer" in sys.modules: 1486 | del sys.modules["versioneer"] 1487 | # this fixes the "python setup.py develop" case (also 'install' and 1488 | # 'easy_install .'), in which subdependencies of the main project are 1489 | # built (using setup.py bdist_egg) in the same python process. Assume 1490 | # a main project A and a dependency B, which use different versions 1491 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1492 | # sys.modules by the time B's setup.py is executed, causing B to run 1493 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1494 | # sandbox that restores sys.modules to it's pre-build state, so the 1495 | # parent is protected against the child's "import versioneer". By 1496 | # removing ourselves from sys.modules here, before the child build 1497 | # happens, we protect the child from the parent's versioneer too. 1498 | # Also see https://github.com/warner/python-versioneer/issues/52 1499 | 1500 | cmds = {} 1501 | 1502 | # we add "version" to both distutils and setuptools 1503 | from distutils.core import Command 1504 | 1505 | class cmd_version(Command): 1506 | description = "report generated version string" 1507 | user_options = [] 1508 | boolean_options = [] 1509 | 1510 | def initialize_options(self): 1511 | pass 1512 | 1513 | def finalize_options(self): 1514 | pass 1515 | 1516 | def run(self): 1517 | vers = get_versions(verbose=True) 1518 | print("Version: %s" % vers["version"]) 1519 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1520 | print(" dirty: %s" % vers.get("dirty")) 1521 | print(" date: %s" % vers.get("date")) 1522 | if vers["error"]: 1523 | print(" error: %s" % vers["error"]) 1524 | cmds["version"] = cmd_version 1525 | 1526 | # we override "build_py" in both distutils and setuptools 1527 | # 1528 | # most invocation pathways end up running build_py: 1529 | # distutils/build -> build_py 1530 | # distutils/install -> distutils/build ->.. 1531 | # setuptools/bdist_wheel -> distutils/install ->.. 1532 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1533 | # setuptools/install -> bdist_egg ->.. 1534 | # setuptools/develop -> ? 1535 | # pip install: 1536 | # copies source tree to a tempdir before running egg_info/etc 1537 | # if .git isn't copied too, 'git describe' will fail 1538 | # then does setup.py bdist_wheel, or sometimes setup.py install 1539 | # setup.py egg_info -> ? 1540 | 1541 | # we override different "build_py" commands for both environments 1542 | if "setuptools" in sys.modules: 1543 | from setuptools.command.build_py import build_py as _build_py 1544 | else: 1545 | from distutils.command.build_py import build_py as _build_py 1546 | 1547 | class cmd_build_py(_build_py): 1548 | def run(self): 1549 | root = get_root() 1550 | cfg = get_config_from_root(root) 1551 | versions = get_versions() 1552 | _build_py.run(self) 1553 | # now locate _version.py in the new build/ directory and replace 1554 | # it with an updated value 1555 | if cfg.versionfile_build: 1556 | target_versionfile = os.path.join(self.build_lib, 1557 | cfg.versionfile_build) 1558 | print("UPDATING %s" % target_versionfile) 1559 | write_to_version_file(target_versionfile, versions) 1560 | cmds["build_py"] = cmd_build_py 1561 | 1562 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1563 | from cx_Freeze.dist import build_exe as _build_exe 1564 | # nczeczulin reports that py2exe won't like the pep440-style string 1565 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. 1566 | # setup(console=[{ 1567 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION 1568 | # "product_version": versioneer.get_version(), 1569 | # ... 1570 | 1571 | class cmd_build_exe(_build_exe): 1572 | def run(self): 1573 | root = get_root() 1574 | cfg = get_config_from_root(root) 1575 | versions = get_versions() 1576 | target_versionfile = cfg.versionfile_source 1577 | print("UPDATING %s" % target_versionfile) 1578 | write_to_version_file(target_versionfile, versions) 1579 | 1580 | _build_exe.run(self) 1581 | os.unlink(target_versionfile) 1582 | with open(cfg.versionfile_source, "w") as f: 1583 | LONG = LONG_VERSION_PY[cfg.VCS] 1584 | f.write(LONG % 1585 | {"DOLLAR": "$", 1586 | "STYLE": cfg.style, 1587 | "TAG_PREFIX": cfg.tag_prefix, 1588 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1589 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1590 | }) 1591 | cmds["build_exe"] = cmd_build_exe 1592 | del cmds["build_py"] 1593 | 1594 | if 'py2exe' in sys.modules: # py2exe enabled? 1595 | try: 1596 | from py2exe.distutils_buildexe import py2exe as _py2exe # py3 1597 | except ImportError: 1598 | from py2exe.build_exe import py2exe as _py2exe # py2 1599 | 1600 | class cmd_py2exe(_py2exe): 1601 | def run(self): 1602 | root = get_root() 1603 | cfg = get_config_from_root(root) 1604 | versions = get_versions() 1605 | target_versionfile = cfg.versionfile_source 1606 | print("UPDATING %s" % target_versionfile) 1607 | write_to_version_file(target_versionfile, versions) 1608 | 1609 | _py2exe.run(self) 1610 | os.unlink(target_versionfile) 1611 | with open(cfg.versionfile_source, "w") as f: 1612 | LONG = LONG_VERSION_PY[cfg.VCS] 1613 | f.write(LONG % 1614 | {"DOLLAR": "$", 1615 | "STYLE": cfg.style, 1616 | "TAG_PREFIX": cfg.tag_prefix, 1617 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1618 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1619 | }) 1620 | cmds["py2exe"] = cmd_py2exe 1621 | 1622 | # we override different "sdist" commands for both environments 1623 | if "setuptools" in sys.modules: 1624 | from setuptools.command.sdist import sdist as _sdist 1625 | else: 1626 | from distutils.command.sdist import sdist as _sdist 1627 | 1628 | class cmd_sdist(_sdist): 1629 | def run(self): 1630 | versions = get_versions() 1631 | self._versioneer_generated_versions = versions 1632 | # unless we update this, the command will keep using the old 1633 | # version 1634 | self.distribution.metadata.version = versions["version"] 1635 | return _sdist.run(self) 1636 | 1637 | def make_release_tree(self, base_dir, files): 1638 | root = get_root() 1639 | cfg = get_config_from_root(root) 1640 | _sdist.make_release_tree(self, base_dir, files) 1641 | # now locate _version.py in the new base_dir directory 1642 | # (remembering that it may be a hardlink) and replace it with an 1643 | # updated value 1644 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1645 | print("UPDATING %s" % target_versionfile) 1646 | write_to_version_file(target_versionfile, 1647 | self._versioneer_generated_versions) 1648 | cmds["sdist"] = cmd_sdist 1649 | 1650 | return cmds 1651 | 1652 | 1653 | CONFIG_ERROR = """ 1654 | setup.cfg is missing the necessary Versioneer configuration. You need 1655 | a section like: 1656 | 1657 | [versioneer] 1658 | VCS = git 1659 | style = pep440 1660 | versionfile_source = src/myproject/_version.py 1661 | versionfile_build = myproject/_version.py 1662 | tag_prefix = 1663 | parentdir_prefix = myproject- 1664 | 1665 | You will also need to edit your setup.py to use the results: 1666 | 1667 | import versioneer 1668 | setup(version=versioneer.get_version(), 1669 | cmdclass=versioneer.get_cmdclass(), ...) 1670 | 1671 | Please read the docstring in ./versioneer.py for configuration instructions, 1672 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1673 | """ 1674 | 1675 | SAMPLE_CONFIG = """ 1676 | # See the docstring in versioneer.py for instructions. Note that you must 1677 | # re-run 'versioneer.py setup' after changing this section, and commit the 1678 | # resulting files. 1679 | 1680 | [versioneer] 1681 | #VCS = git 1682 | #style = pep440 1683 | #versionfile_source = 1684 | #versionfile_build = 1685 | #tag_prefix = 1686 | #parentdir_prefix = 1687 | 1688 | """ 1689 | 1690 | INIT_PY_SNIPPET = """ 1691 | from ._version import get_versions 1692 | __version__ = get_versions()['version'] 1693 | del get_versions 1694 | """ 1695 | 1696 | 1697 | def do_setup(): 1698 | """Main VCS-independent setup function for installing Versioneer.""" 1699 | root = get_root() 1700 | try: 1701 | cfg = get_config_from_root(root) 1702 | except (EnvironmentError, configparser.NoSectionError, 1703 | configparser.NoOptionError) as e: 1704 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1705 | print("Adding sample versioneer config to setup.cfg", 1706 | file=sys.stderr) 1707 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1708 | f.write(SAMPLE_CONFIG) 1709 | print(CONFIG_ERROR, file=sys.stderr) 1710 | return 1 1711 | 1712 | print(" creating %s" % cfg.versionfile_source) 1713 | with open(cfg.versionfile_source, "w") as f: 1714 | LONG = LONG_VERSION_PY[cfg.VCS] 1715 | f.write(LONG % {"DOLLAR": "$", 1716 | "STYLE": cfg.style, 1717 | "TAG_PREFIX": cfg.tag_prefix, 1718 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1719 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1720 | }) 1721 | 1722 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), 1723 | "__init__.py") 1724 | if os.path.exists(ipy): 1725 | try: 1726 | with open(ipy, "r") as f: 1727 | old = f.read() 1728 | except EnvironmentError: 1729 | old = "" 1730 | if INIT_PY_SNIPPET not in old: 1731 | print(" appending to %s" % ipy) 1732 | with open(ipy, "a") as f: 1733 | f.write(INIT_PY_SNIPPET) 1734 | else: 1735 | print(" %s unmodified" % ipy) 1736 | else: 1737 | print(" %s doesn't exist, ok" % ipy) 1738 | ipy = None 1739 | 1740 | # Make sure both the top-level "versioneer.py" and versionfile_source 1741 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1742 | # they'll be copied into source distributions. Pip won't be able to 1743 | # install the package without this. 1744 | manifest_in = os.path.join(root, "MANIFEST.in") 1745 | simple_includes = set() 1746 | try: 1747 | with open(manifest_in, "r") as f: 1748 | for line in f: 1749 | if line.startswith("include "): 1750 | for include in line.split()[1:]: 1751 | simple_includes.add(include) 1752 | except EnvironmentError: 1753 | pass 1754 | # That doesn't cover everything MANIFEST.in can do 1755 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1756 | # it might give some false negatives. Appending redundant 'include' 1757 | # lines is safe, though. 1758 | if "versioneer.py" not in simple_includes: 1759 | print(" appending 'versioneer.py' to MANIFEST.in") 1760 | with open(manifest_in, "a") as f: 1761 | f.write("include versioneer.py\n") 1762 | else: 1763 | print(" 'versioneer.py' already in MANIFEST.in") 1764 | if cfg.versionfile_source not in simple_includes: 1765 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 1766 | cfg.versionfile_source) 1767 | with open(manifest_in, "a") as f: 1768 | f.write("include %s\n" % cfg.versionfile_source) 1769 | else: 1770 | print(" versionfile_source already in MANIFEST.in") 1771 | 1772 | # Make VCS-specific changes. For git, this means creating/changing 1773 | # .gitattributes to mark _version.py for export-subst keyword 1774 | # substitution. 1775 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1776 | return 0 1777 | 1778 | 1779 | def scan_setup_py(): 1780 | """Validate the contents of setup.py against Versioneer's expectations.""" 1781 | found = set() 1782 | setters = False 1783 | errors = 0 1784 | with open("setup.py", "r") as f: 1785 | for line in f.readlines(): 1786 | if "import versioneer" in line: 1787 | found.add("import") 1788 | if "versioneer.get_cmdclass()" in line: 1789 | found.add("cmdclass") 1790 | if "versioneer.get_version()" in line: 1791 | found.add("get_version") 1792 | if "versioneer.VCS" in line: 1793 | setters = True 1794 | if "versioneer.versionfile_source" in line: 1795 | setters = True 1796 | if len(found) != 3: 1797 | print("") 1798 | print("Your setup.py appears to be missing some important items") 1799 | print("(but I might be wrong). Please make sure it has something") 1800 | print("roughly like the following:") 1801 | print("") 1802 | print(" import versioneer") 1803 | print(" setup( version=versioneer.get_version(),") 1804 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1805 | print("") 1806 | errors += 1 1807 | if setters: 1808 | print("You should remove lines like 'versioneer.VCS = ' and") 1809 | print("'versioneer.versionfile_source = ' . This configuration") 1810 | print("now lives in setup.cfg, and should be removed from setup.py") 1811 | print("") 1812 | errors += 1 1813 | return errors 1814 | 1815 | 1816 | if __name__ == "__main__": 1817 | cmd = sys.argv[1] 1818 | if cmd == "setup": 1819 | errors = do_setup() 1820 | errors += scan_setup_py() 1821 | if errors: 1822 | sys.exit(1) 1823 | --------------------------------------------------------------------------------