├── .codecov.yml
├── .coveragerc
├── .flake8
├── .gitattributes
├── .gitignore
├── .travis.yml
├── AUTHORS.rst
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── borf-runner.py
├── borf
    ├── __init__.py
    ├── __main__.py
    ├── _version.py
    ├── borf.py
    ├── get_orfs.py
    └── tests
    │   ├── __init__.py
    │   ├── test_borf.py
    │   ├── test_frames.fa
    │   ├── test_getorfs.fa
    │   ├── test_mutliple_frame_orfs.fa
    │   └── test_trans_all_frames.fa
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── _static
    │       └── .placeholder
    │   ├── conf.py
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── min_versions.rst
    │   ├── release-history.rst
    │   └── usage.rst
├── github_deploy_key_betsig_borf.enc
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── test_data
    ├── test_frames.fa
    ├── test_getorfs.fa
    ├── test_mutliple_frame_orfs.fa
    ├── test_stopsitent.fa
    └── test_trans_all_frames.fa
└── versioneer.py


/.codecov.yml:
--------------------------------------------------------------------------------
 1 | # show coverage in CI status, not as a comment. 
 2 | comment: off
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |     patch:
 9 |       default:
10 |         target: auto
11 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source =
 3 |     borf
 4 | [report]
 5 | omit =
 6 |     */python?.?/*
 7 |     */site-packages/nose/*
 8 |     # ignore _version.py and versioneer.py
 9 |     .*version.*
10 |     *_version.py
11 | 
12 | exclude_lines =
13 |     if __name__ == '__main__':
14 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = .git,__pycache__,build,dist,versioneer.py,borf/_version.py,docs/source/conf.py
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | borf/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | venv/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | 
56 | # Sphinx documentation
57 | docs/build/
58 | docs/source/generated/
59 | 
60 | # pytest
61 | .pytest_cache/
62 | 
63 | # PyBuilder
64 | target/
65 | 
66 | # Editor files
67 | #mac
68 | .DS_Store
69 | *~
70 | 
71 | #vim
72 | *.swp
73 | *.swo
74 | 
75 | #pycharm
76 | .idea/*
77 | 
78 | 
79 | #Ipython Notebook
80 | .ipynb_checkpoints
81 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - 3.7
 4 | cache:
 5 |   directories:
 6 |     - $HOME/.cache/pip
 7 |     - $HOME/.ccache  # https://github.com/travis-ci/travis-ci/issues/5853
 8 | 
 9 | env:
10 |   global:
11 |   # Doctr deploy key for betsig/borf
12 |   - secure: "rp1A///9UiizP66RSQyOUE2aR5RIrJYhs6gcpqR4TLLVWFCAC149R9tLcRjDRJ8alWkEoobgfBnTxYRfXP+LvnruxL3SYkfwTMm46hO2IouIsnLU9QQoxZGcjSHLTPnu+kSybobFPmTvdZ84YIV8awyiUg4mhqvNW+lfNC6nONisS/tSkR7jWapyKH7X+HlpYIBKADpUaE8D383uCL6bhLPkxgrmqA989BvFwxRofvVw4j4a2KXcC+vQjIYxVkSO5q27vkKI6yZNaAxlxUu2yVjSQ84MZ925Rw6QmqhSnfwxn9a22FlNl7JyWmeBjdQRv3WIjdqGGPORa5NvsdalT76GjFtTB/G9eyB2MH8oAcyBQkj421lG6ktcp4ldhSXguKc45QyfGNqc9XLbTafbs28SHbM86kIbRq1y3R2jDnYL6bh/AA0k9zjsMQ8J7taKv+EYV3Ny5wPsbubH2cLM5ioQykcRdIMDpkcpYwYC4pLErcGYb7/AFX3zRJxBxf2y4Vg0+/88ftCVrSnpmJsqrkHgu9We1Y4zmMKID6IENGyUlc5p8tbuNjSv3TRUJs8g6sNym1rqPCciDILkJUuqZ2NQ57qPCXejTPmVhtsEHKCCc7HVKibnMVU5Xj/gPJiiQTT3OGQe8YZ42Bi482f8Lb9+PknG7ivNeUMz7K9rHKE="
13 | 
14 | 
15 | install:
16 |   # Install this package and the packages listed in requirements.txt.
17 |   - pip install .
18 |   # Install extra requirements for running tests and building docs.
19 |   - pip install -r requirements-dev.txt
20 | 
21 | script:
22 |   - coverage run -m pytest  # Run the tests and check for test coverage.
23 |   - coverage report -m  # Generate test coverage report.
24 |   - codecov  # Upload the report to codecov.
25 |   - flake8 --max-line-length=115  # Enforce code style (but relax line length limit a bit).
26 |   - set -e
27 |   - make -C docs html
28 |   - pip install doctr
29 |   - doctr deploy --built-docs docs/build/html .


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Maintainer
 6 | ----------
 7 | 
 8 | * Beth Signal <bethany.signal@uts.edu.au>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first? See: CONTRIBUTING.rst
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Contributing
  3 | ============
  4 | 
  5 | Contributions are welcome, and they are greatly appreciated! Every
  6 | little bit helps, and credit will always be given.
  7 | 
  8 | You can contribute in many ways:
  9 | 
 10 | Types of Contributions
 11 | ----------------------
 12 | 
 13 | Report Bugs
 14 | ~~~~~~~~~~~
 15 | 
 16 | Report bugs at https://github.com/betsig/borf/issues.
 17 | 
 18 | If you are reporting a bug, please include:
 19 | 
 20 | * Any details about your local setup that might be helpful in troubleshooting.
 21 | * Detailed steps to reproduce the bug.
 22 | 
 23 | Fix Bugs
 24 | ~~~~~~~~
 25 | 
 26 | Look through the GitHub issues for bugs. Anything tagged with "bug"
 27 | is open to whoever wants to implement it.
 28 | 
 29 | Implement Features
 30 | ~~~~~~~~~~~~~~~~~~
 31 | 
 32 | Look through the GitHub issues for features. Anything tagged with "feature"
 33 | is open to whoever wants to implement it.
 34 | 
 35 | Write Documentation
 36 | ~~~~~~~~~~~~~~~~~~~
 37 | 
 38 | borf could always use more documentation, whether
 39 | as part of the official borf docs, in docstrings,
 40 | or even on the web in blog posts, articles, and such.
 41 | 
 42 | Submit Feedback
 43 | ~~~~~~~~~~~~~~~
 44 | 
 45 | The best way to send feedback is to file an issue at https://github.com/betsig/borf/issues.
 46 | 
 47 | If you are proposing a feature:
 48 | 
 49 | * Explain in detail how it would work.
 50 | * Keep the scope as narrow as possible, to make it easier to implement.
 51 | * Remember that this is a volunteer-driven project, and that contributions
 52 |   are welcome :)
 53 | 
 54 | Get Started!
 55 | ------------
 56 | 
 57 | Ready to contribute? Here's how to set up `borf` for local development.
 58 | 
 59 | 1. Fork the `borf` repo on GitHub.
 60 | 2. Clone your fork locally::
 61 | 
 62 |     $ git clone git@github.com:your_name_here/borf.git
 63 | 
 64 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
 65 | 
 66 |     $ mkvirtualenv borf
 67 |     $ cd borf/
 68 |     $ python setup.py develop
 69 | 
 70 | 4. Create a branch for local development::
 71 | 
 72 |     $ git checkout -b name-of-your-bugfix-or-feature
 73 | 
 74 |    Now you can make your changes locally.
 75 | 
 76 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox::
 77 | 
 78 |     $ flake8 borf tests
 79 |     $ python setup.py test
 80 |     $ tox
 81 | 
 82 |    To get flake8 and tox, just pip install them into your virtualenv.
 83 | 
 84 | 6. Commit your changes and push your branch to GitHub::
 85 | 
 86 |     $ git add .
 87 |     $ git commit -m "Your detailed description of your changes."
 88 |     $ git push origin name-of-your-bugfix-or-feature
 89 | 
 90 | 7. Submit a pull request through the GitHub website.
 91 | 
 92 | Pull Request Guidelines
 93 | -----------------------
 94 | 
 95 | Before you submit a pull request, check that it meets these guidelines:
 96 | 
 97 | 1. The pull request should include tests.
 98 | 2. If the pull request adds functionality, the docs should be updated. Put
 99 |    your new functionality into a function with a docstring, and add the
100 |    feature to the list in README.rst.
101 | 3. The pull request should work for Python 2.7, 3.3, 3.4, 3.5 and for PyPy. Check
102 |    https://travis-ci.org/betsig/borf/pull_requests
103 |    and make sure that the tests pass for all supported Python versions.
104 | 
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Beth Signal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.rst
 2 | include CONTRIBUTING.rst
 3 | include LICENSE
 4 | include README.rst
 5 | include requirements.txt
 6 | 
 7 | recursive-exclude * __pycache__
 8 | recursive-exclude * *.py[co]
 9 | 
10 | recursive-include docs *.rst conf.py Makefile make.bat
11 | 
12 | include versioneer.py
13 | include borf/_version.py
14 | 
15 | # If including data files in the package, add them like:
16 | # include path/to/data_file
17 | include test_data/*.fa


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ===============================
 2 | borf
 3 | ===============================
 4 | 
 5 | .. image:: https://img.shields.io/pypi/v/borf.svg
 6 |         :target: https://pypi.python.org/pypi/borf
 7 | 
 8 | 
 9 | **Better ORF predictions**
10 | 
11 | Fast and flexible ORF prediction in python with appropriate defaults for *de novo* assembled transcripts.
12 | 
13 | Installation
14 | ------------
15 | .. code-block:: console
16 | 
17 |         pip install borf
18 | 
19 | 
20 | Usage
21 | ------------
22 | For basic usage, run borf with an input fasta formatted file as the first argument.
23 | 
24 | .. code-block:: console
25 | 
26 |         borf input.fa
27 | 
28 | 
29 | For a more detailed guide on usage, please see the wiki https://github.com/betsig/borf/wiki
30 | 


--------------------------------------------------------------------------------
/borf-runner.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | '''Convenience wrapper for running borf directly from source tree'''
4 | 
5 | from borf.borf import main
6 | 
7 | if __name__ == '__main__':
8 | main() 
9 | 


--------------------------------------------------------------------------------
/borf/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from ._version import get_versions
3 | __version__ = get_versions()['version']
4 | del get_versions
5 | 


--------------------------------------------------------------------------------
/borf/__main__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | 
4 | """borf.__main__: executed when borf directory is called as script."""
5 | 
6 |  
7 | from .borf import main
8 | main()
9 | 


--------------------------------------------------------------------------------
/borf/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain. Generated by
  9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 10 | 
 11 | """Git implementation of _version.py."""
 12 | 
 13 | import errno
 14 | import os
 15 | import re
 16 | import subprocess
 17 | import sys
 18 | 
 19 | 
 20 | def get_keywords():
 21 |     """Get the keywords needed to look up the version information."""
 22 |     # these strings will be replaced by git during git-archive.
 23 |     # setup.py/versioneer.py will grep for the variable names, so they must
 24 |     # each be defined on a line of their own. _version.py will just call
 25 |     # get_keywords().
 26 |     git_refnames = " (HEAD -> master)"
 27 |     git_full = "52bca757f95027388c5f8cdb8de80d88d5974b27"
 28 |     git_date = "2021-08-24 11:48:26 +1000"
 29 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 30 |     return keywords
 31 | 
 32 | 
 33 | class VersioneerConfig:
 34 |     """Container for Versioneer configuration parameters."""
 35 | 
 36 | 
 37 | def get_config():
 38 |     """Create, populate and return the VersioneerConfig() object."""
 39 |     # these strings are filled in when 'setup.py versioneer' creates
 40 |     # _version.py
 41 |     cfg = VersioneerConfig()
 42 |     cfg.VCS = "git"
 43 |     cfg.style = "pep440-post"
 44 |     cfg.tag_prefix = "v"
 45 |     cfg.parentdir_prefix = "None"
 46 |     cfg.versionfile_source = "borf/_version.py"
 47 |     cfg.verbose = False
 48 |     return cfg
 49 | 
 50 | 
 51 | class NotThisMethod(Exception):
 52 |     """Exception raised if a method is not valid for the current scenario."""
 53 | 
 54 | 
 55 | LONG_VERSION_PY = {}
 56 | HANDLERS = {}
 57 | 
 58 | 
 59 | def register_vcs_handler(vcs, method):  # decorator
 60 |     """Decorator to mark a method as the handler for a particular VCS."""
 61 |     def decorate(f):
 62 |         """Store f in HANDLERS[vcs][method]."""
 63 |         if vcs not in HANDLERS:
 64 |             HANDLERS[vcs] = {}
 65 |         HANDLERS[vcs][method] = f
 66 |         return f
 67 |     return decorate
 68 | 
 69 | 
 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 71 |                 env=None):
 72 |     """Call the given command(s)."""
 73 |     assert isinstance(commands, list)
 74 |     p = None
 75 |     for c in commands:
 76 |         try:
 77 |             dispcmd = str([c] + args)
 78 |             # remember shell=False, so use git.cmd on windows, not just git
 79 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 80 |                                  stdout=subprocess.PIPE,
 81 |                                  stderr=(subprocess.PIPE if hide_stderr
 82 |                                          else None))
 83 |             break
 84 |         except EnvironmentError:
 85 |             e = sys.exc_info()[1]
 86 |             if e.errno == errno.ENOENT:
 87 |                 continue
 88 |             if verbose:
 89 |                 print("unable to run %s" % dispcmd)
 90 |                 print(e)
 91 |             return None, None
 92 |     else:
 93 |         if verbose:
 94 |             print("unable to find command, tried %s" % (commands,))
 95 |         return None, None
 96 |     stdout = p.communicate()[0].strip()
 97 |     if sys.version_info[0] >= 3:
 98 |         stdout = stdout.decode()
 99 |     if p.returncode != 0:
100 |         if verbose:
101 |             print("unable to run %s (error)" % dispcmd)
102 |             print("stdout was %s" % stdout)
103 |         return None, p.returncode
104 |     return stdout, p.returncode
105 | 
106 | 
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 |     """Try to determine the version from the parent directory name.
109 | 
110 |     Source tarballs conventionally unpack into a directory that includes both
111 |     the project name and a version string. We will also support searching up
112 |     two directory levels for an appropriately named parent directory
113 |     """
114 |     rootdirs = []
115 | 
116 |     for i in range(3):
117 |         dirname = os.path.basename(root)
118 |         if dirname.startswith(parentdir_prefix):
119 |             return {"version": dirname[len(parentdir_prefix):],
120 |                     "full-revisionid": None,
121 |                     "dirty": False, "error": None, "date": None}
122 |         else:
123 |             rootdirs.append(root)
124 |             root = os.path.dirname(root)  # up a level
125 | 
126 |     if verbose:
127 |         print("Tried directories %s but none started with prefix %s" %
128 |               (str(rootdirs), parentdir_prefix))
129 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 | 
131 | 
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 |     """Extract version information from the given file."""
135 |     # the code embedded in _version.py can just fetch the value of these
136 |     # keywords. When used from setup.py, we don't want to import _version.py,
137 |     # so we do it with a regexp instead. This function is not used from
138 |     # _version.py.
139 |     keywords = {}
140 |     try:
141 |         f = open(versionfile_abs, "r")
142 |         for line in f.readlines():
143 |             if line.strip().startswith("git_refnames ="):
144 |                 mo = re.search(r'=\s*"(.*)"', line)
145 |                 if mo:
146 |                     keywords["refnames"] = mo.group(1)
147 |             if line.strip().startswith("git_full ="):
148 |                 mo = re.search(r'=\s*"(.*)"', line)
149 |                 if mo:
150 |                     keywords["full"] = mo.group(1)
151 |             if line.strip().startswith("git_date ="):
152 |                 mo = re.search(r'=\s*"(.*)"', line)
153 |                 if mo:
154 |                     keywords["date"] = mo.group(1)
155 |         f.close()
156 |     except EnvironmentError:
157 |         pass
158 |     return keywords
159 | 
160 | 
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 |     """Get version information from git keywords."""
164 |     if not keywords:
165 |         raise NotThisMethod("no keywords at all, weird")
166 |     date = keywords.get("date")
167 |     if date is not None:
168 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 |         # -like" string, which we must then edit to make compliant), because
171 |         # it's been around since git-1.5.3, and it's too difficult to
172 |         # discover which version we're using, or to work around using an
173 |         # older one.
174 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 |     refnames = keywords["refnames"].strip()
176 |     if refnames.startswith("$Format"):
177 |         if verbose:
178 |             print("keywords are unexpanded, not using")
179 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 |     TAG = "tag: "
184 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 |     if not tags:
186 |         # Either we're using git < 1.8.3, or there really are no tags. We use
187 |         # a heuristic: assume all version tags have a digit. The old git %d
188 |         # expansion behaves like git log --decorate=short and strips out the
189 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 |         # between branches and tags. By ignoring refnames without digits, we
191 |         # filter out many common branch names like "release" and
192 |         # "stabilization", as well as "HEAD" and "master".
193 |         tags = set([r for r in refs if re.search(r'\d', r)])
194 |         if verbose:
195 |             print("discarding '%s', no digits" % ",".join(refs - tags))
196 |     if verbose:
197 |         print("likely tags: %s" % ",".join(sorted(tags)))
198 |     for ref in sorted(tags):
199 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
200 |         if ref.startswith(tag_prefix):
201 |             r = ref[len(tag_prefix):]
202 |             if verbose:
203 |                 print("picking %s" % r)
204 |             return {"version": r,
205 |                     "full-revisionid": keywords["full"].strip(),
206 |                     "dirty": False, "error": None,
207 |                     "date": date}
208 |     # no suitable tags, so version is "0+unknown", but full hex is still there
209 |     if verbose:
210 |         print("no suitable tags, using unknown + full revision id")
211 |     return {"version": "0+unknown",
212 |             "full-revisionid": keywords["full"].strip(),
213 |             "dirty": False, "error": "no suitable tags", "date": None}
214 | 
215 | 
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 |     """Get version from 'git describe' in the root of the source tree.
219 | 
220 |     This only gets called if the git-archive 'subst' keywords were *not*
221 |     expanded, and _version.py hasn't already been rewritten with a short
222 |     version string, meaning we're inside a checked out source tree.
223 |     """
224 |     GITS = ["git"]
225 |     if sys.platform == "win32":
226 |         GITS = ["git.cmd", "git.exe"]
227 | 
228 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 |                           hide_stderr=True)
230 |     if rc != 0:
231 |         if verbose:
232 |             print("Directory %s not under git control" % root)
233 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 | 
235 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
237 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 |                                           "--always", "--long",
239 |                                           "--match", "%s*" % tag_prefix],
240 |                                    cwd=root)
241 |     # --long was added in git-1.5.5
242 |     if describe_out is None:
243 |         raise NotThisMethod("'git describe' failed")
244 |     describe_out = describe_out.strip()
245 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 |     if full_out is None:
247 |         raise NotThisMethod("'git rev-parse' failed")
248 |     full_out = full_out.strip()
249 | 
250 |     pieces = {}
251 |     pieces["long"] = full_out
252 |     pieces["short"] = full_out[:7]  # maybe improved later
253 |     pieces["error"] = None
254 | 
255 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 |     # TAG might have hyphens.
257 |     git_describe = describe_out
258 | 
259 |     # look for -dirty suffix
260 |     dirty = git_describe.endswith("-dirty")
261 |     pieces["dirty"] = dirty
262 |     if dirty:
263 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
264 | 
265 |     # now we have TAG-NUM-gHEX or HEX
266 | 
267 |     if "-" in git_describe:
268 |         # TAG-NUM-gHEX
269 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 |         if not mo:
271 |             # unparseable. Maybe git-describe is misbehaving?
272 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
273 |                                % describe_out)
274 |             return pieces
275 | 
276 |         # tag
277 |         full_tag = mo.group(1)
278 |         if not full_tag.startswith(tag_prefix):
279 |             if verbose:
280 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
281 |                 print(fmt % (full_tag, tag_prefix))
282 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 |                                % (full_tag, tag_prefix))
284 |             return pieces
285 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 | 
287 |         # distance: number of commits since tag
288 |         pieces["distance"] = int(mo.group(2))
289 | 
290 |         # commit: short hex revision ID
291 |         pieces["short"] = mo.group(3)
292 | 
293 |     else:
294 |         # HEX: no tags
295 |         pieces["closest-tag"] = None
296 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 |                                     cwd=root)
298 |         pieces["distance"] = int(count_out)  # total number of commits
299 | 
300 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 |                        cwd=root)[0].strip()
303 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 | 
305 |     return pieces
306 | 
307 | 
308 | def plus_or_dot(pieces):
309 |     """Return a + if we don't already have one, else return a ."""
310 |     if "+" in pieces.get("closest-tag", ""):
311 |         return "."
312 |     return "+"
313 | 
314 | 
315 | def render_pep440(pieces):
316 |     """Build up version string, with post-release "local version identifier".
317 | 
318 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 | 
321 |     Exceptions:
322 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 |     """
324 |     if pieces["closest-tag"]:
325 |         rendered = pieces["closest-tag"]
326 |         if pieces["distance"] or pieces["dirty"]:
327 |             rendered += plus_or_dot(pieces)
328 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 |             if pieces["dirty"]:
330 |                 rendered += ".dirty"
331 |     else:
332 |         # exception #1
333 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 |                                           pieces["short"])
335 |         if pieces["dirty"]:
336 |             rendered += ".dirty"
337 |     return rendered
338 | 
339 | 
340 | def render_pep440_pre(pieces):
341 |     """TAG[.post.devDISTANCE] -- No -dirty.
342 | 
343 |     Exceptions:
344 |     1: no tags. 0.post.devDISTANCE
345 |     """
346 |     if pieces["closest-tag"]:
347 |         rendered = pieces["closest-tag"]
348 |         if pieces["distance"]:
349 |             rendered += ".post.dev%d" % pieces["distance"]
350 |     else:
351 |         # exception #1
352 |         rendered = "0.post.dev%d" % pieces["distance"]
353 |     return rendered
354 | 
355 | 
356 | def render_pep440_post(pieces):
357 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
358 | 
359 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
360 |     (a dirty tree will appear "older" than the corresponding clean one),
361 |     but you shouldn't be releasing software with -dirty anyways.
362 | 
363 |     Exceptions:
364 |     1: no tags. 0.postDISTANCE[.dev0]
365 |     """
366 |     if pieces["closest-tag"]:
367 |         rendered = pieces["closest-tag"]
368 |         if pieces["distance"] or pieces["dirty"]:
369 |             rendered += ".post%d" % pieces["distance"]
370 |             if pieces["dirty"]:
371 |                 rendered += ".dev0"
372 |             rendered += plus_or_dot(pieces)
373 |             rendered += "g%s" % pieces["short"]
374 |     else:
375 |         # exception #1
376 |         rendered = "0.post%d" % pieces["distance"]
377 |         if pieces["dirty"]:
378 |             rendered += ".dev0"
379 |         rendered += "+g%s" % pieces["short"]
380 |     return rendered
381 | 
382 | 
383 | def render_pep440_old(pieces):
384 |     """TAG[.postDISTANCE[.dev0]] .
385 | 
386 |     The ".dev0" means dirty.
387 | 
388 |     Eexceptions:
389 |     1: no tags. 0.postDISTANCE[.dev0]
390 |     """
391 |     if pieces["closest-tag"]:
392 |         rendered = pieces["closest-tag"]
393 |         if pieces["distance"] or pieces["dirty"]:
394 |             rendered += ".post%d" % pieces["distance"]
395 |             if pieces["dirty"]:
396 |                 rendered += ".dev0"
397 |     else:
398 |         # exception #1
399 |         rendered = "0.post%d" % pieces["distance"]
400 |         if pieces["dirty"]:
401 |             rendered += ".dev0"
402 |     return rendered
403 | 
404 | 
405 | def render_git_describe(pieces):
406 |     """TAG[-DISTANCE-gHEX][-dirty].
407 | 
408 |     Like 'git describe --tags --dirty --always'.
409 | 
410 |     Exceptions:
411 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
412 |     """
413 |     if pieces["closest-tag"]:
414 |         rendered = pieces["closest-tag"]
415 |         if pieces["distance"]:
416 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 |     else:
418 |         # exception #1
419 |         rendered = pieces["short"]
420 |     if pieces["dirty"]:
421 |         rendered += "-dirty"
422 |     return rendered
423 | 
424 | 
425 | def render_git_describe_long(pieces):
426 |     """TAG-DISTANCE-gHEX[-dirty].
427 | 
428 |     Like 'git describe --tags --dirty --always -long'.
429 |     The distance/hash is unconditional.
430 | 
431 |     Exceptions:
432 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
433 |     """
434 |     if pieces["closest-tag"]:
435 |         rendered = pieces["closest-tag"]
436 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 |     else:
438 |         # exception #1
439 |         rendered = pieces["short"]
440 |     if pieces["dirty"]:
441 |         rendered += "-dirty"
442 |     return rendered
443 | 
444 | 
445 | def render(pieces, style):
446 |     """Render the given version pieces into the requested style."""
447 |     if pieces["error"]:
448 |         return {"version": "unknown",
449 |                 "full-revisionid": pieces.get("long"),
450 |                 "dirty": None,
451 |                 "error": pieces["error"],
452 |                 "date": None}
453 | 
454 |     if not style or style == "default":
455 |         style = "pep440"  # the default
456 | 
457 |     if style == "pep440":
458 |         rendered = render_pep440(pieces)
459 |     elif style == "pep440-pre":
460 |         rendered = render_pep440_pre(pieces)
461 |     elif style == "pep440-post":
462 |         rendered = render_pep440_post(pieces)
463 |     elif style == "pep440-old":
464 |         rendered = render_pep440_old(pieces)
465 |     elif style == "git-describe":
466 |         rendered = render_git_describe(pieces)
467 |     elif style == "git-describe-long":
468 |         rendered = render_git_describe_long(pieces)
469 |     else:
470 |         raise ValueError("unknown style '%s'" % style)
471 | 
472 |     return {"version": rendered, "full-revisionid": pieces["long"],
473 |             "dirty": pieces["dirty"], "error": None,
474 |             "date": pieces.get("date")}
475 | 
476 | 
477 | def get_versions():
478 |     """Get version information or return default if unable to do so."""
479 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 |     # __file__, we can work backwards from there to the root. Some
481 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 |     # case we can only use expanded keywords.
483 | 
484 |     cfg = get_config()
485 |     verbose = cfg.verbose
486 | 
487 |     try:
488 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 |                                           verbose)
490 |     except NotThisMethod:
491 |         pass
492 | 
493 |     try:
494 |         root = os.path.realpath(__file__)
495 |         # versionfile_source is the relative path from the top of the source
496 |         # tree (where the .git directory might live) to this file. Invert
497 |         # this to find the root from __file__.
498 |         for i in cfg.versionfile_source.split('/'):
499 |             root = os.path.dirname(root)
500 |     except NameError:
501 |         return {"version": "0+unknown", "full-revisionid": None,
502 |                 "dirty": None,
503 |                 "error": "unable to find root of source tree",
504 |                 "date": None}
505 | 
506 |     try:
507 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 |         return render(pieces, cfg.style)
509 |     except NotThisMethod:
510 |         pass
511 | 
512 |     try:
513 |         if cfg.parentdir_prefix:
514 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 |     except NotThisMethod:
516 |         pass
517 | 
518 |     return {"version": "0+unknown", "full-revisionid": None,
519 |             "dirty": None,
520 |             "error": "unable to compute version", "date": None}
521 | 


--------------------------------------------------------------------------------
/borf/borf.py:
--------------------------------------------------------------------------------
  1 | 
  2 | '''borf.borf: for running borf'''
  3 | 
  4 | 
  5 | import argparse
  6 | import os
  7 | import sys
  8 | import re
  9 | from Bio import SeqIO
 10 | import pandas as pd
 11 | from .get_orfs import get_orfs, write_orf_fasta, write_orf_data, write_orf_cds, batch_iterator
 12 | 
 13 | 
 14 | def main():
 15 | 
 16 |     parser = argparse.ArgumentParser(description='Get orf predicitions from a nucleotide fasta file')
 17 | 
 18 |     parser.add_argument('Fasta', metavar='fasta_file', type=str, help='fasta file to predict ORFs')
 19 |     parser.add_argument('-o', '--output_path', type=str, help='path to write output files. [OUTPUT_PATH].pep and [OUTPUT_PATH].txt (default: input .fa file name)')
 20 |     parser.add_argument('-s', '--strand', action='store_true', help='Predict orfs for both strands')
 21 |     parser.add_argument('-a', '--all_orfs', action='store_true', help='Return all ORFs for each sequence longer than the cutoff')
 22 |     parser.add_argument('-l', '--orf_length', type=int, default=100, help='Minimum ORF length (AA). (default: %(default)d)')
 23 |     parser.add_argument('-u', '--upstream_incomplete_length', type=int, default=50, help='Minimum length (AA) of uninterupted sequence upstream of ORF to be included for incomplete_5prime transcripts (default: %(default)d)')
 24 |     parser.add_argument('-c', '--genetic_code', type=int, default=1, help='Genetic code (int: 1-14) to use for translation (default: %(default)d). See https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for list')
 25 |     parser.add_argument('-b', '--batch_size', type=int, default=10000, help='Number of fasta records to read in in each batch')
 26 |     parser.add_argument('-f', '--force_overwrite', action='store_true', help='Force overwriting of output files?')
 27 | 
 28 |     args = parser.parse_args()
 29 | 
 30 |     input_file = args.Fasta
 31 | 
 32 |     if args.output_path is None:
 33 |         output_path = os.path.splitext(input_file)[0]
 34 |     else:
 35 |         output_path = args.output_path
 36 | 
 37 |     output_path_pep = output_path + '.pep'
 38 |     output_path_txt = output_path + '.txt'
 39 |     output_path_cds = output_path + '.cds'
 40 | 
 41 |     # check if files exist already
 42 |     if os.path.isfile(output_path_pep) or os.path.isfile(output_path_txt) or os.path.isfile(output_path_cds):
 43 | 
 44 |         if os.path.isfile(output_path_pep) and os.path.isfile(output_path_txt) and os.path.isfile(output_path_cds):
 45 |              print(output_path_pep + ", " + output_path_txt + " and " + output_path_cds  + " already exist")
 46 |         elif os.path.isfile(output_path_pep) and os.path.isfile(output_path_txt):
 47 |              print(output_path_pep + " and " + output_path_txt + " already exist")
 48 |         elif os.path.isfile(output_path_pep) and os.path.isfile(output_path_cds):
 49 |              print(output_path_pep + " and " + output_path_cds + " already exist")
 50 |         elif os.path.isfile(output_path_cds) and os.path.isfile(output_path_txt):
 51 |              print(output_path_txt + " and " + output_path_cds + " already exist")
 52 |         elif os.path.isfile(output_path_pep):
 53 |             print(output_path_pep + " already exists")
 54 |         elif os.path.isfile(output_path_cds):
 55 |             print(output_path_cds + " already exists")
 56 |         else:
 57 |             print(output_path_txt + " already exists")
 58 | 
 59 |         if not args.force_overwrite:
 60 |             overwrite = input("Do you want to overwrite these files? ([Y]/n): ").lower().strip()[:1]
 61 |             if not (overwrite == "y" or overwrite == ""):
 62 |                 sys.exit(1)
 63 |             else:
 64 |                 # remove old files so you don't append new data to old files
 65 |                 if os.path.isfile(output_path_pep):
 66 |                     os.remove(output_path_pep)
 67 |                 if os.path.isfile(output_path_txt):
 68 |                     os.remove(output_path_txt)
 69 |                 if os.path.isfile(output_path_cds):
 70 |                     os.remove(output_path_cds)
 71 |         else:
 72 |             print('Overwriting files')
 73 |             if os.path.isfile(output_path_pep):
 74 |                 os.remove(output_path_pep)
 75 |             if os.path.isfile(output_path_txt):
 76 |                 os.remove(output_path_txt)
 77 |             if os.path.isfile(output_path_cds):
 78 |                 os.remove(output_path_cds)
 79 | 
 80 |     # number of sequences
 81 |     n_seqs = 0
 82 |     for record in SeqIO.parse(input_file, 'fasta'):
 83 |         n_seqs += 1
 84 | 
 85 |     batch_size = args.batch_size
 86 | 
 87 |     record_iter = SeqIO.parse(open(input_file), 'fasta')
 88 | 
 89 |     strand_warning = False
 90 |     for i, batch in enumerate(batch_iterator(record_iter, batch_size)):
 91 |         all_sequences = []
 92 |         for record in batch:
 93 |             all_sequences.append(record.upper())
 94 | 
 95 | 
 96 |         if i == 0:
 97 |             # check strandedness
 98 | 
 99 |             orf_data = get_orfs(all_sequences, both_strands=True,
100 |                                 min_orf_length=args.orf_length, all_orfs=True,
101 |                                 min_upstream_length=args.upstream_incomplete_length,
102 |                                 genetic_code=args.genetic_code)
103 | 
104 |             orf_data_strand_bias = orf_data.sort_values(by='orf_length', ascending = False)
105 |             orf_data_strand_bias = orf_data_strand_bias.drop_duplicates('id', keep='first')
106 | 
107 |             if len(orf_data_strand_bias) >= 10:
108 | 
109 |                 pos_bias = (orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"] == "+").sum()
110 |                 neg_bias = (orf_data_strand_bias['strand'][orf_data_strand_bias['orf_class'] == "complete"] == "-").sum()
111 |                 positive_strand_bias = pos_bias / (pos_bias+neg_bias)
112 | 
113 |                 if positive_strand_bias > 0.7 and args.strand == True:
114 |                     #data is likely from a stranded assembly.
115 |                     print("Are you sure your input .fasta file isn't stranded?")
116 |                     print(str(positive_strand_bias*100)+ "% of transcripts have the longest ORF on the + strand")
117 |                     strand_warning = True
118 | 
119 |                 if positive_strand_bias <= 0.7 and args.strand == False:
120 |                     print("Are you sure your input .fasta file is stranded?")
121 |                     print(str(positive_strand_bias*100)+ "% of transcripts have the longest ORF on the + strand")
122 |                     strand_warning = True
123 | 
124 |             if args.strand == False:
125 |                 orf_data = orf_data[orf_data['strand'] == '+']
126 |             if args.all_orfs == False:
127 |                 idx = orf_data.groupby(['id'])['orf_length'].transform(max) == orf_data['orf_length']
128 |                 orf_data = orf_data[idx]
129 |                 orf_data['isoform_number'] = 1
130 |                 orf_data['fasta_id'] = [re.sub("[.]orf[0-9]*",".orf1", x) for x in orf_data['fasta_id']]
131 | 
132 |         else:
133 |             orf_data = get_orfs(all_sequences, both_strands=args.strand,
134 |                                 min_orf_length=args.orf_length, all_orfs=args.all_orfs,
135 |                                 min_upstream_length=args.upstream_incomplete_length,
136 |                                 genetic_code=args.genetic_code)
137 | 
138 |         # extract nt seqs at CDS
139 |         nucleotide_seq = []
140 |         nucleotide_id = []
141 |         for seq_string in all_sequences:
142 |             nucleotide_seq.append(str(seq_string.seq))
143 |             nucleotide_id.append(str(seq_string.id))
144 |         seq_df = pd.DataFrame(list(zip(nucleotide_id, nucleotide_seq)), columns=['id', 'nt_seq'])
145 | 
146 |         # merge orfs with all_sequences 
147 |         orf_data = pd.merge(seq_df, orf_data,  on='id', how='right')
148 |         orf_data['cds_seq'] = orf_data.apply(lambda x: x['nt_seq'][(x['start_site_nt']-1):x['stop_site_nt']], axis=1)
149 | 
150 | 
151 |         write_orf_data(orf_data, output_path_txt)
152 |         write_orf_fasta(orf_data, output_path_pep)
153 |         write_orf_cds(orf_data, output_path_cds)
154 | 
155 |         start_seq_n = (i*batch_size) + 1
156 |         end_seq_n = min(start_seq_n + (batch_size - 1), n_seqs)
157 |         print("Processed sequences " + str(start_seq_n) + " to " + str(end_seq_n) + " of " + str(n_seqs))
158 | 
159 |     print("Done with borf.")
160 |     print("Results in " + output_path_pep + " and " + output_path_txt + " and " + output_path_cds)
161 | 
162 |     if strand_warning == True:
163 |         print("This data caused a warning based on strandedness. Please check the top of the log for details and rerun with appropriate flags if neccessary.")
164 | 


--------------------------------------------------------------------------------
/borf/get_orfs.py:
--------------------------------------------------------------------------------
  1 | # get_orfs.py
  2 | 
  3 | import numpy as np
  4 | import re as re
  5 | import pandas as pd
  6 | import skbio as skbio
  7 | from Bio import SeqIO
  8 | import os
  9 | 
 10 | 
 11 | def get_orfs(all_sequences, both_strands=False, min_orf_length=100,
 12 |              all_orfs=False, min_upstream_length=50,genetic_code=1):
 13 |     """
 14 |     Produce a pandas DataFrame of predicted ORFs from a fasta file.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     all_sequences :
 19 |         sequence object
 20 |     fasta_file : str
 21 |         path to the fasta file to predict orfs for
 22 |     both_strands : bool
 23 |         Provide predictions for both strands? (i.e. reverse compliment).
 24 |     min_orf_length : int
 25 |         minimum length for a predicted ORF to be reported
 26 |     all_orfs : bool
 27 |         Return all ORFs longer than min_orf_length?
 28 |         Set to False (default) to only return the longest ORF for each sequence.
 29 |     min_upstream_length : int
 30 |         Minimum length of AA sequence upstream of a canonical start site (e.g. MET) to be used when reporting incomplete_5prime ORFs.
 31 |         Upstream sequence starts from the start of the translated sequence, and contains no STOP codons.
 32 | 
 33 |     Returns
 34 |     -------
 35 |     orf_df : DataFrame
 36 |         DataFrame containing predicted ORF data and sequences
 37 | 
 38 |     """
 39 |     # all_sequences = read_fasta(fasta_file)
 40 |     # create all frame translations of nt sequence
 41 |     ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(all_sequences, both_strands=both_strands, genetic_code=genetic_code)
 42 | 
 43 |     if all_orfs is False:
 44 | 
 45 |         # find the longest ORF in each frame
 46 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(aa_frames, min_upstream_length = min_upstream_length)
 47 | 
 48 |         # check for upstream ORF?
 49 |         # get all sequence upstream of the start (M), and reverse it to find
 50 |         # the distance to the nearest upstream stop codon
 51 |         orf_sequence, start_sites, orf_length = add_upstream_aas(aa_frames, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=min_upstream_length)
 52 | 
 53 |         # filter data by minimum orf length
 54 |         keep = orf_length >= min_orf_length
 55 |         aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)
 56 | 
 57 |         # only run next steps if there are ORFs
 58 |         if np.any(keep):
 59 |             # convert aa indices to nt-based indices
 60 |             start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop)
 61 | 
 62 |             # check first and last AA
 63 |             first_MET = check_first_aa(orf_sequence)
 64 |             final_stop = np.where(last_aa_is_stop, 'STOP', 'ALT')
 65 |         else:
 66 |             start_site_nt = []
 67 |             stop_site_nt = []
 68 |             utr3_length = []
 69 |             first_MET = []
 70 |             final_stop = []
 71 | 
 72 |         # collect all and format as pandas DataFrame
 73 |         orf_df = pd.DataFrame(index=range(len(start_sites)))
 74 |         orf_df['id'] = ids
 75 |         orf_df['aa_sequence'] = aa_frames
 76 |         orf_df['frame'] = frame
 77 |         orf_df['strand'] = strand
 78 |         orf_df['seq_length'] = seq_length
 79 |         orf_df['seq_length_nt'] = seq_length_nt
 80 |         orf_df['orf_sequence'] = orf_sequence
 81 |         orf_df['start_site'] = start_sites
 82 |         orf_df['stop_site'] = stop_sites
 83 |         orf_df['orf_length'] = orf_length
 84 |         orf_df['start_site_nt'] = start_site_nt
 85 |         orf_df['stop_site_nt'] = stop_site_nt
 86 |         orf_df['utr3_length'] = utr3_length
 87 |         orf_df['first_MET'] = first_MET
 88 |         orf_df['final_stop'] = final_stop
 89 | 
 90 |         # filter by orf with the max length for each sequence
 91 |         idx = orf_df.groupby(['id'])['orf_length'].transform(max) == orf_df['orf_length']
 92 |         orf_df = orf_df[idx]
 93 |         # isoform_number so output format is the same as if all_orfs == True
 94 |         orf_df['isoform_number'] = int(1)
 95 | 
 96 |     # if finding all orf > cutoff
 97 |     else:
 98 | 
 99 |         # make DataFrame for each AA frame - joined later with ORF data
100 |         # to prevent increasing the size of this too early
101 |         sequence_df = pd.DataFrame(index=range(len(aa_frames)))
102 |         sequence_df['id'] = ids
103 |         sequence_df['aa_sequence'] = aa_frames
104 |         sequence_df['frame'] = frame
105 |         sequence_df['strand'] = strand
106 |         sequence_df['seq_length'] = seq_length
107 |         sequence_df['seq_length_nt'] = seq_length_nt
108 |         # index so we can match back data later
109 |         sequence_df['seq_index'] = range(len(aa_frames))
110 | 
111 |         # find all ORFs longer than min_orf_length
112 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_frames, min_orf_length=min_orf_length, min_upstream_length = min_upstream_length)
113 | 
114 |         # check for upstream ORF?
115 |         # get all sequence upstream of the start (M), and reverse it to
116 |         # find the distance to the nearest upstream stop codon
117 |         full_seq_matched = np.array(sequence_df['aa_sequence'][matched_index], dtype='str')
118 |         orf_sequence, start_sites, orf_length = add_upstream_aas(full_seq_matched, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=min_upstream_length)
119 | 
120 |         # filter data by minimum orf length
121 |         keep = orf_length >= min_orf_length
122 |         start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length, matched_index = filter_objects(keep, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length, matched_index)
123 | 
124 |         # make DataFrame of ORF data
125 |         orf_df = pd.DataFrame(index=range(len(orf_sequence)))
126 |         orf_df['seq_index'] = matched_index
127 |         orf_df['orf_sequence'] = orf_sequence
128 |         orf_df['start_site'] = start_sites
129 |         orf_df['stop_site'] = stop_sites
130 |         orf_df['orf_length'] = orf_length
131 |         # combine with sequence data from above
132 |         orf_df = pd.merge(sequence_df, orf_df,  on='seq_index', how='right')
133 |         orf_df.drop('seq_index', axis=1, inplace=True)
134 | 
135 |         if np.any(keep):
136 |             # convert aa indices to nt-based indices
137 |             orf_df['start_site_nt'], orf_df['stop_site_nt'], orf_df['utr3_length'] = convert_start_stop_to_nt(start_sites, stop_sites, orf_df['seq_length_nt'], orf_length, orf_df['frame'], last_aa_is_stop)
138 |             # check first and last AA
139 |             orf_df['first_MET'] = check_first_aa(orf_df['orf_sequence'])
140 |             orf_df['final_stop'] = np.where(last_aa_is_stop, 'STOP', 'ALT')
141 |         else:
142 |             # convert aa indices to nt-based indices
143 |             orf_df['start_site_nt'] = []
144 |             orf_df['stop_site_nt'] = []
145 |             orf_df['utr3_length'] = []
146 |             # check first and last AA
147 |             orf_df['first_MET'] = []
148 |             orf_df['final_stop'] = []
149 | 
150 |         orf_df['isoform_number'] = unique_number_from_list(orf_df.id)
151 | 
152 |     # add ORF classification
153 |     orf_df['orf_class'] = add_orf_classification(orf_df)
154 |     # Generate ids for writing to fasta
155 |     orf_df['fasta_id'] = (orf_df.id + '.orf' + orf_df.isoform_number.map(str) + ' ' + orf_df.orf_class + ':' + orf_df.start_site_nt.map(str) + '-' + orf_df.stop_site_nt.map(str) + ' strand:' + orf_df.strand.map(str))
156 | 
157 |     return orf_df
158 | 
159 | 
160 | def translate_all_frames(sequences, both_strands=False, genetic_code=1):
161 | 
162 |     """
163 |     translate nt sequences into all 3 frames
164 | 
165 |     Parameters
166 |     ----------
167 |     sequences : list
168 |         list of nucleotide sequences
169 |     both_strands : bool
170 |         translate both strands?
171 | 
172 |     Returns
173 |         return ids, aa_seq_by_frame, frame, strand, seq_length_nt, seq_length
174 | 
175 |     -------
176 |     objects :
177 |         filtered objects
178 |     """
179 |     # create all frame translations of nt sequence
180 |     aa_seq_by_frame = []
181 |     frame = []
182 |     seq_length_nt = []
183 |     ids = []
184 |     skipped_counter = 0
185 |     for seq_string in sequences:
186 | 
187 |         nucleotide_seq = str(seq_string.seq)
188 |         non_ATGC = len(nucleotide_seq) - (nucleotide_seq.count('A') + nucleotide_seq.count('T') + nucleotide_seq.count('G') + nucleotide_seq.count('C'))
189 |         skip = non_ATGC > 0
190 | 
191 |         if skip is False:
192 | 
193 |             for reading_frame in range(3):
194 | 
195 |                 aa_seq_by_frame.append(str(skbio.DNA(str(seq_string.seq[reading_frame:])).translate(genetic_code)))
196 |                 frame.append(reading_frame)
197 |                 seq_length_nt.append(len(str(seq_string.seq)))
198 |                 ids.append(seq_string.id)
199 | 
200 |                 if both_strands is True:
201 |                     # translate reverse compliment
202 |                     aa_seq_by_frame.append(str(skbio.DNA(str(skbio.DNA(str(seq_string.seq)).complement(reverse=True))[reading_frame:]).translate(genetic_code)))
203 |                     frame.append(reading_frame)
204 |                     seq_length_nt.append(len(str(seq_string.seq)))
205 |                     ids.append(seq_string.id)
206 | 
207 |         else:
208 |             print("Skipping " + str(seq_string.id) + ". Found " + str(non_ATGC) + " non-ACGT characters.")
209 |             skipped_counter = skipped_counter + 1
210 | 
211 |     seq_length_nt = np.array(seq_length_nt)
212 |     aa_seq_by_frame = np.array(aa_seq_by_frame)
213 |     frame = np.array(frame) + 1
214 |     if both_strands is False:
215 |         strand = np.array([s for s in '+' for i in range(len(aa_seq_by_frame))])
216 |     else:
217 |         strand = np.tile(np.array(['+', '-']), (len(sequences)-skipped_counter)*3)
218 | 
219 |     seq_length = np.array([len(o) for o in aa_seq_by_frame])
220 | 
221 |     ids = np.array(ids)
222 |     return ids, aa_seq_by_frame, frame, strand, seq_length_nt, seq_length
223 | 
224 | 
225 | def find_longest_orfs(aa_frames, min_upstream_length=50):
226 |     start_sites = []
227 |     stop_sites = []
228 |     orf_sequence = []
229 | 
230 |     for aa_seq in aa_frames:
231 | 
232 |         max_start, max_end = orf_start_stop_from_aa(aa_seq, min_upstream_length = min_upstream_length)
233 |         # if returning all > 100AA
234 | 
235 |         start_sites.append(max_start)
236 |         stop_sites.append(max_end)
237 | 
238 |         # extract orf sequence
239 |         orf_sequence.append(aa_seq[max_start:max_end])
240 | 
241 |     orf_sequence = np.array(orf_sequence)
242 | 
243 |     # check if the last AA is a stop (*) and trim it if neccessary
244 |     last_aa_is_stop = [o[-1] == '*' for o in orf_sequence]
245 |     orf_sequence[last_aa_is_stop] = [o[0:-1] for o in orf_sequence[last_aa_is_stop]]
246 | 
247 |     orf_length = np.array([len(o) for o in orf_sequence])
248 | 
249 |     # add 1 to convert pythonic index to normal-person index...
250 |     start_sites = np.array(start_sites) + 1
251 |     stop_sites = np.array(stop_sites)
252 |     last_aa_is_stop = np.array(last_aa_is_stop)
253 | 
254 |     return orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop
255 | 
256 | 
257 | def orf_start_stop_from_aa(aa_seq, *, max_only=True, min_upstream_length=50):
258 |     """
259 |     Find locations of the start (M) and stop (*) codons that produce the
260 |     longest ORF
261 | 
262 |     Parameters
263 |     ----------
264 |     aa_seq : str
265 |         amino acid sequence
266 |     max_only : bool
267 |         Only return that start and stop locations of the longest ORF
268 | 
269 |     Returns
270 |     -------
271 |     start_loc : int
272 |         start location
273 |     end_loc : int
274 |         end location
275 | 
276 |     Examples
277 |     --------
278 | 
279 |     orf_start_stop_from_aa("META*")
280 |     orf_start_stop_from_aa("META*MEATBORF*")
281 |     orf_start_stop_from_aa("META*MEATBORF")
282 |     orf_start_stop_from_aa("MEATBORF")
283 | 
284 |     """
285 | 
286 |     # find all M
287 |     if aa_seq.count("M") > 0:
288 |         start_locs = []
289 |         end_locs = []
290 | 
291 |         M_locations = [m.span()[0] for m in re.finditer('M', aa_seq)]
292 |         if min(M_locations) > min_upstream_length:
293 |             M_locations.insert(0,0) # add 0 to be the first location (i.e. upstream incomplete transcripts)
294 |         last_end = -1
295 |         for m in M_locations:
296 |             if m > last_end-1:
297 |                 stop_location = find_next_stop(aa_seq, m)
298 |                 start_locs.append(m)
299 |                 end_locs.append(stop_location)
300 |                 last_end = stop_location
301 |         # if returning all > 100AA
302 |         # find the start/end of the longest ORF
303 |         if max_only is True:
304 |             max_start, max_end = find_max_orf_index(start_locs, end_locs)
305 |         else:
306 |             max_start, max_end = start_locs, end_locs
307 | 
308 |     else:
309 |         max_start = 0
310 |         max_end = find_next_stop(aa_seq, max_start)
311 | 
312 |     return max_start, max_end
313 | 
314 | 
315 | def find_next_stop(aa_seq, start_loc):
316 |     """
317 |     Find location of the next stop codon (*) after the start location.
318 |     Return string length if no stop codon is found.
319 | 
320 |     Parameters
321 |     ----------
322 |     aa_seq : str
323 |         amino acid sequence
324 |     start_loc : int
325 |         start location
326 | 
327 |     Returns
328 |     -------
329 |     end_loc : int
330 |         location of the next stop codon, or length of string if none is found
331 | 
332 |     Examples
333 |     --------
334 | 
335 |     find_next_stop("AAAMBBB*CCC*", 4)
336 |     find_next_stop("AAAMBBBCCC", 4)
337 | 
338 |     """
339 |     stop_codon = np.char.find(aa_seq[start_loc:], '*')
340 | 
341 |     if stop_codon == -1:
342 |         stop_codon = len(aa_seq)
343 |         return stop_codon
344 |     else:
345 |         end_loc = stop_codon + start_loc + 1
346 |         return end_loc
347 | 
348 | 
349 | def find_max_orf_index(start_locs, end_locs):
350 |     """
351 |     Given sets of start and end locations, return the set with the largest
352 |     difference
353 | 
354 |     Parameters
355 |     ----------
356 |     start_locs : np.array
357 |         start locations
358 |     end_locs : np.array
359 |         end locations
360 | 
361 |     Returns
362 |     -------
363 |     start_loc : int
364 |         start location
365 |     end_loc : int
366 |         end location
367 | 
368 |     Examples
369 |     --------
370 | 
371 |     find_max_orf_index(start_locs = [0,100], end_locs = [1000, 200])
372 | 
373 |     """
374 |     orf_lengths = np.array(end_locs) - np.array(start_locs)
375 |     if orf_lengths.size > 1:
376 |         max_index = np.where(orf_lengths == np.amax(orf_lengths))[0]
377 |         return np.array(start_locs)[max_index][0], np.array(end_locs)[max_index][0]
378 |     else:
379 |         return np.array(start_locs)[0], np.array(end_locs)[0]
380 | 
381 | 
382 | def add_upstream_aas(aa_frames, stop_sites, start_sites, orf_sequence,
383 |                      orf_length, min_upstream_length=50):
384 |     """
385 |     Add the upstream AAs onto orf sequences
386 | 
387 |     Parameters
388 |     ----------
389 |     aa_frames : list
390 |         list of translated AA sequences (full length)
391 |     start_sites : list
392 |         list of start sites
393 |     stop_sites : list
394 |         list of stop sites
395 |     orf_sequence : list
396 |         list of ORF sequences (i.e. from start to stop codon)
397 |     orf_length : list
398 |         list of orf lengths
399 |     min_upstream_length : int
400 |         minimum length of upstream sequence for it to be added
401 | 
402 |     Returns
403 |     -------
404 |     orf_sequence : list
405 |         list of ORF sequences including upstream AA where appropriate
406 |     start_sites : list
407 |         list of start sites
408 |     orf_length : list
409 |         list of orf lengths
410 |     """
411 |     first_stop = np.char.find(np.array(aa_frames), "*")
412 |     add_upstream = np.logical_and(np.logical_or(first_stop == -1, first_stop == (stop_sites-1)), start_sites > min_upstream_length)
413 | 
414 |     if np.any(add_upstream):
415 |         # object so no sequence truncation
416 |         orf_sequence_withup = orf_sequence.copy().astype('object')
417 |         orf_length_withup = orf_length.copy()
418 |         start_sites_withup = start_sites.copy()
419 | 
420 |         orf_with_upstream = [o[0:s] for o, s in zip(aa_frames[add_upstream], stop_sites[add_upstream])]
421 |         # check if the last AA is a stop (*) and trim it if neccessary
422 |         orf_with_upstream = [replace_last_stop(o) for o in orf_with_upstream]
423 |         orf_sequence_withup[add_upstream] = orf_with_upstream
424 |         start_sites_withup[add_upstream] = 1  # set to 1 for upstream ORFs
425 |         orf_length_withup[add_upstream] = np.array([len(o) for o in orf_sequence_withup[add_upstream]])
426 | 
427 |         orf_sequence_withup = orf_sequence_withup.astype(str)
428 | 
429 |         return orf_sequence_withup, start_sites_withup, orf_length_withup
430 |     else:
431 |         return orf_sequence, start_sites, orf_length
432 | 
433 | 
434 | def replace_last_stop(orf_seq):
435 | 
436 |     """
437 |     replace * with nothing as the final character in in string
438 | 
439 |     Parameters
440 |     ----------
441 |     orf_seq : str
442 |         orf_sequence
443 | 
444 |     Returns
445 |     -------
446 |     orf_seq : str
447 |         orf_sequence
448 | 
449 |     Examples
450 |     --------
451 | 
452 |     replace_last_stop("META*")
453 |     replace_last_stop("METAL")
454 | 
455 |     """
456 | 
457 |     if orf_seq[-1] == '*':
458 |         replaced_orf_seq = orf_seq[0:-1]
459 |         return replaced_orf_seq
460 |     else:
461 |         return orf_seq
462 | 
463 | 
464 | def filter_objects(filter, *objects):
465 | 
466 |     """
467 |     filter multiple objects
468 | 
469 |     Parameters
470 |     ----------
471 |     filter : list
472 |         boolean list
473 |     objects :
474 |         objects to filter
475 | 
476 |     Returns
477 |     -------
478 |     objects :
479 |         filtered objects
480 |     """
481 | 
482 |     new_objects = []
483 |     for o in objects:
484 |         new_objects.append(o[filter])
485 | 
486 |     return new_objects
487 | 
488 | 
489 | def convert_start_stop_to_nt(start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop):
490 |     """
491 |     Convert AA locations to nt locations
492 | 
493 |     Parameters
494 |     ----------
495 |     start_sites : list
496 |         list of start sites
497 |     stop_sites : list
498 |         list of stop sites
499 |     seq_length_nt : list
500 |         list of sequence lengths (in nt)
501 |     orf_length : list
502 |         list of orf lengths
503 |     frame : list
504 |         list of frames
505 |     last_aa_is_stop : list
506 |         list of bool values for if the stop site refers to the stop codon (*)
507 |         or not.
508 | 
509 |     Returns
510 |     -------
511 |     start_site_nt : list
512 |         list of start sites (in nt)
513 |     stop_site_nt : list
514 |         list of stop sites (in nt)
515 |     utr3_length : list
516 |         list of 3' utr lengths (in nt)
517 |     """
518 | 
519 |     start_site_nt = (start_sites*3) - 3 + frame
520 |     # only give a stop_site_nt location if the last AA is * //// NOT ANYMORE
521 |     # using NAN values gives issues when trying to convert to int
522 |     stop_site_nt = orf_length*3 + start_site_nt + 3 - 1
523 |     stop_site_nt[np.logical_not(last_aa_is_stop)] = seq_length_nt[np.logical_not(last_aa_is_stop)]
524 | 
525 |     utr3_length = np.zeros(len(start_site_nt))
526 |     utr3_length[last_aa_is_stop] = seq_length_nt[last_aa_is_stop] - stop_site_nt[last_aa_is_stop]
527 |     utr3_length = utr3_length.astype(int)
528 |     return start_site_nt, stop_site_nt, utr3_length
529 | 
530 | 
531 | def check_first_aa(orf_sequence, start_codon='M'):
532 |     """
533 |     Check that the first AA in a list of ORF sequences is M.
534 | 
535 |     Parameters
536 |     ----------
537 |     orf_sequence :
538 |         list of orf sequences
539 |     start_codon :
540 |         character representing the start codon
541 | 
542 |     Returns
543 |     -------
544 |     first_MET : numpy array
545 |         array matching orf_sequence with either the start codon or 'ALT'
546 | 
547 |     Examples
548 |     --------
549 |     check_first_aa(['META','ETAM'])
550 |     """
551 | 
552 |     first_aa = [o[0] for o in orf_sequence]
553 |     first_MET = np.where(np.array(first_aa) == start_codon, start_codon, 'ALT')
554 |     return first_MET
555 | 
556 | 
557 | def find_all_orfs(aa_frames, min_orf_length, min_upstream_length=50):
558 |     matched_index = []
559 |     start_sites = []
560 |     stop_sites = []
561 |     orf_sequence = []
562 | 
563 |     for i in range(len(aa_frames)):
564 | 
565 |         aa_seq = aa_frames[i]
566 |         start_locs, end_locs = orf_start_stop_from_aa(aa_seq, max_only=False, min_upstream_length=min_upstream_length)
567 |         first_stop = np.char.find(aa_seq, '*')
568 |         # if returning all > 100AA
569 |         # OR if potential upstream incomplete
570 |         orf_lengths = (np.array(end_locs) - np.array(start_locs))
571 |         above_min_length = np.logical_or(np.logical_or(orf_lengths >= min_orf_length, start_locs < first_stop), first_stop == -1)
572 | 
573 |         orf_lengths = orf_lengths[above_min_length]
574 |         max_start = np.array(start_locs)[above_min_length]
575 |         max_end = np.array(end_locs)[above_min_length]
576 |         rep_index = np.repeat(i, len(orf_lengths))
577 | 
578 |         start_sites.append(max_start)
579 |         stop_sites.append(max_end)
580 |         matched_index.append(rep_index)
581 | 
582 |         # extract orf sequence
583 |         if np.array(max_start).size == 1:
584 |             orf_sequence.append(aa_seq[int(max_start):int(max_end)])
585 |         elif np.array(max_start).size > 1:
586 |             orf_sequence.append([aa_seq[sta:end] for sta, end in zip(max_start, max_end)])
587 | 
588 |     start_sites = np.hstack(start_sites)
589 |     stop_sites = np.hstack(stop_sites)
590 |     matched_index = np.hstack(matched_index)
591 |     orf_sequence = np.hstack(orf_sequence)
592 | 
593 |     # check if the last AA is a stop (*) and trim it if neccessary
594 |     last_aa_is_stop = [o[-1] == '*' for o in orf_sequence]
595 |     orf_sequence = np.array([replace_last_stop(o) for o in orf_sequence])
596 | 
597 |     orf_length = np.array([len(o) for o in orf_sequence])
598 | 
599 |     # add 1 to convert pythonic index to normal-person index...
600 |     start_sites = np.array(start_sites) + 1
601 |     stop_sites = np.array(stop_sites)
602 |     last_aa_is_stop = np.array(last_aa_is_stop)
603 | 
604 |     return orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index
605 | 
606 | 
607 | def unique_number_from_list(input_list):
608 |     """
609 |     Produce a list of integers corresponding to the number of times an
610 |     element in the input list has been observed.
611 | 
612 |     Parameters
613 |     ----------
614 |     input_list : list
615 |         list of values
616 | 
617 |     Returns
618 |     -------
619 |     occurrence : list
620 |         integer list
621 | 
622 |     Examples
623 |     --------
624 | 
625 |     unique_number_from_list(['a','a','b','c','c','c'])
626 |     unique_number_from_list(['a','b','c'])
627 | 
628 |     """
629 |     dups = {}
630 |     occurrence = []
631 |     for i, val in enumerate(input_list):
632 |         if val not in dups:
633 |             # Store index of first occurrence and occurrence value
634 |             dups[val] = [i, 1]
635 | 
636 |         # Increment occurrence value,
637 |         else:
638 |             dups[val][1] += 1
639 |             # Use stored occurrence value
640 |         occurrence.append(dups[val][1])
641 |     return occurrence
642 | 
643 | 
644 | def add_orf_classification(orf_df):
645 |     """
646 |     Generate ORF type classification from an orf_df.
647 |     complete: Complete CDS - contains start codon and stop codon
648 |     incomplete_5prime: Incomplete CDS - has stop codon, but start of sequence
649 |                        indicates that an upstream start codon may be missing.
650 |     incomplete_3prime: Incomplete CDS - has start codon, but no stop codon.
651 |     incomplete: Incomplete CDS - Both start codon and stop codon not found.
652 | 
653 |     Parameters
654 |     ----------
655 |     orf_df : DataFrame
656 |         orf_df DataFrame
657 | 
658 |     Returns
659 |     -------
660 |     orf_class : np.array
661 |         numpy array of orf classifications
662 | 
663 |     """
664 |     orf_class = np.empty(len(orf_df['first_MET']), dtype='object')
665 | 
666 |     orf_class[np.logical_and(orf_df['first_MET'] == "M", orf_df['final_stop'] == "STOP")] = 'complete'
667 |     orf_class[np.logical_and(orf_df['first_MET'] != "M", orf_df['final_stop'] == "STOP")] = 'incomplete_5prime'
668 |     orf_class[np.logical_and(orf_df['first_MET'] == "M", orf_df['final_stop'] != "STOP")] = 'incomplete_3prime'
669 |     orf_class[np.logical_and(orf_df['first_MET'] != "M", orf_df['final_stop'] != "STOP")] = 'incomplete'
670 | 
671 |     return orf_class
672 | 
673 | 
674 | def read_fasta(fasta_file):
675 |     """
676 |     read in a fasta file
677 | 
678 |     Parameters
679 |     ----------
680 |     fasta_file : str
681 |         path to fasta file
682 | 
683 |     Returns
684 |     -------
685 |     sequences :
686 |         SeqIO records of each sequence
687 |     """
688 |     all_sequences = []
689 | 
690 |     # read in fasta file
691 |     for record in SeqIO.parse(fasta_file, 'fasta'):
692 |         all_sequences.append(record.upper())
693 | 
694 |     return all_sequences
695 | 
696 | 
697 | def write_orf_data(orf_df, file_out):
698 |     """
699 |     Write ORF sequence metadata to txt file.
700 | 
701 |     Parameters
702 |     ----------
703 |     orf_df : DataFrame
704 |         orf_df DataFrame
705 |     file_out : str
706 |         path to file to write txt file
707 | 
708 |     """
709 | 
710 |     orf_df = orf_df[['fasta_id', 'id', 'frame', 'strand', 'seq_length_nt', 'start_site_nt', 'stop_site_nt', 'utr3_length', 'start_site', 'stop_site', 'orf_length', 'first_MET', 'final_stop', 'orf_class']]
711 | 
712 |     orf_df.columns = ['orf_id', 'transcript_id', 'frame', 'strand', 'seq_length_nt', 'start_site_nt', 'stop_site_nt', 'utr3_length_nt', 'start_site_aa', 'stop_site_aa', 'orf_length_aa', 'first_aa_MET', 'final_aa_stop', 'orf_class']
713 | 
714 |     #orf_df.to_csv(file_out, index=False, sep='\t')
715 | 
716 |     if not os.path.isfile(file_out):
717 |         orf_df.to_csv(file_out, mode='a', index=False, sep='\t')
718 |     elif len(orf_df.columns) != len(pd.read_csv(file_out, nrows=1, sep='\t').columns):
719 |         raise Exception("Columns do not match!! ORF data has " + str(len(orf_df.columns)) + " columns. Output txt file has " + str(len(pd.read_csv(file_out, nrows=1, sep='\t').columns)) + " columns.")
720 |     elif not (orf_df.columns == pd.read_csv(file_out, nrows=1, sep='\t').columns).all():
721 |         raise Exception("Columns and column order of ORF data and txt file do not match!!")
722 |     else:
723 |         orf_df.to_csv(file_out, mode='a', index=False, sep='\t', header=False)
724 | 
725 | 
726 | def write_orf_fasta(orf_df, file_out):
727 |     """
728 |     Write ORF sequences to a fasta file.
729 | 
730 |     Parameters
731 |     ----------
732 |     orf_df : DataFrame
733 |         orf_df DataFrame
734 |     file_out : str
735 |         path to file to write fasta sequences
736 | 
737 |     """
738 |     orf_df['fasta_id'] = '>' + orf_df.fasta_id
739 |     orf_df.to_csv(file_out, mode = 'a', index=False, sep='\n', header=False, columns=['fasta_id', 'orf_sequence'])
740 | 
741 | def write_orf_cds(orf_df, file_out):
742 |     """
743 |     Write ORF CDS sequences to a fasta file.
744 | 
745 |     Parameters
746 |     ----------
747 |     orf_df : DataFrame
748 |         orf_df DataFrame
749 |     file_out : str
750 |         path to file to write fasta sequences
751 | 
752 |     """
753 |     orf_df['fasta_id'] = '>' + orf_df.fasta_id
754 |     orf_df.to_csv(file_out, mode = 'a', index=False, sep='\n', header=False, columns=['fasta_id', 'cds_seq'])
755 | 
756 | def batch_iterator(iterator, batch_size):
757 |     """Returns lists of length batch_size.
758 | 
759 |     This can be used on any iterator, for example to batch up
760 |     SeqRecord objects from Bio.SeqIO.parse(...), or to batch
761 |     Alignment objects from Bio.AlignIO.parse(...), or simply
762 |     lines from a file handle.
763 | 
764 |     This is a generator function, and it returns lists of the
765 |     entries from the supplied iterator.  Each list will have
766 |     batch_size entries, although the final list may be shorter.
767 |     """
768 |     entry = True  # Make sure we loop once
769 |     while entry:
770 |         batch = []
771 |         while len(batch) < batch_size:
772 |             try:
773 |                 entry = next(iterator)
774 |             except StopIteration:
775 |                 entry = None
776 |             if entry is None:
777 |                 # End of file
778 |                 break
779 |             batch.append(entry)
780 |         if batch:
781 |             yield batch
782 | 


--------------------------------------------------------------------------------
/borf/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/signalbash/borf/52bca757f95027388c5f8cdb8de80d88d5974b27/borf/tests/__init__.py


--------------------------------------------------------------------------------
/borf/tests/test_borf.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pandas as pd
  3 | import numpy as np
  4 | 
  5 | from borf.get_orfs import read_fasta
  6 | from borf.get_orfs import find_next_stop
  7 | from borf.get_orfs import find_max_orf_index
  8 | from borf.get_orfs import orf_start_stop_from_aa
  9 | from borf.get_orfs import find_longest_orfs
 10 | from borf.get_orfs import replace_last_stop
 11 | from borf.get_orfs import add_upstream_aas
 12 | from borf.get_orfs import filter_objects
 13 | from borf.get_orfs import translate_all_frames
 14 | from borf.get_orfs import convert_start_stop_to_nt
 15 | from borf.get_orfs import check_first_aa
 16 | from borf.get_orfs import unique_number_from_list
 17 | from borf.get_orfs import find_all_orfs
 18 | from borf.get_orfs import add_orf_classification
 19 | from borf.get_orfs import get_orfs
 20 | 
 21 | 
 22 | class TestReadFasta(unittest.TestCase):
 23 |     def test_read_fasta(self):
 24 | 
 25 |         # check that files are read into correct format"
 26 |         read_sequence = read_fasta('test_data/test_mutliple_frame_orfs.fa')
 27 |         seq_array = [str(x.seq) for x in read_sequence]
 28 |         # check sequence matches
 29 |         # (only check first/last few nts, and total length)
 30 |         t_start = seq_array[0][0:20] == 'GCTTCGGGTTGGTGTCATGG'
 31 |         t_end = seq_array[0][-1:-20:-1] == 'AGTTGTGTTACCGGGACGG'
 32 |         t_len = len(seq_array[0]) == 2757
 33 | 
 34 |         self.assertTrue(t_start and t_end and t_len)
 35 | 
 36 | 
 37 | class TestFindNextStop(unittest.TestCase):
 38 | 
 39 |     def test_next_stop_not_longest(self):
 40 |         # "check this finds the NEXT stop codon"
 41 |         # assert find_next_stop("AAAMBBB*CCC*", 4) == 8
 42 |         next_stop = find_next_stop("AMEATBALL*", 0)
 43 |         self.assertEqual(next_stop, 10)
 44 | 
 45 |     def test_next_stop_from_within(self):
 46 |         # "check this finds the NEXT stop codon when given a start position
 47 |         # greater than 0/1"
 48 |         orf = "AMEATY*METABALL*"
 49 |         next_stop = find_next_stop(orf, 7)
 50 |         self.assertEqual(next_stop, len(orf))
 51 | 
 52 |     def test_next_stop_final(self):
 53 |         # "check that this returns the length of the given string when no stop
 54 |         # codon is found"
 55 |         orf = "AMEATBALL"
 56 |         next_stop = find_next_stop(orf, 0)
 57 |         self.assertEqual(next_stop, len(orf))
 58 | 
 59 | 
 60 | class TestFindMaxOrfIndex(unittest.TestCase):
 61 | 
 62 |     def test_find_max_orf_index(self):
 63 |         # test basic usage of finding the two maximum values
 64 |         self.assertEqual(find_max_orf_index(start_locs=[0, 100],
 65 |                                             end_locs=[1000, 200]), (0, 1000))
 66 | 
 67 |     def test_find_max_orf_index_offby1(self):
 68 |         # test when second index is greater by one
 69 |         self.assertEqual(find_max_orf_index(start_locs=[0, 100],
 70 |                                             end_locs=[999, 1100]), (100, 1100))
 71 | 
 72 |     def test_find_max_orf_index_equal(self):
 73 |         # test that first instance of the max is returned
 74 |         self.assertEqual(find_max_orf_index(start_locs=[0, 100],
 75 |                                             end_locs=[1000, 1100]), (0, 1000))
 76 | 
 77 | 
 78 | class TestOrfStartStopFromAA(unittest.TestCase):
 79 | 
 80 |     def test_correct_start_stop(self):
 81 |         # tests that the correct start/stop locations are given
 82 |         # in non-pythonic (1-indexed) manner
 83 |         self.assertEqual(orf_start_stop_from_aa('AMEATBALL*'), (1, 10))
 84 | 
 85 |     def test_start_stop_no_stop_codon(self):
 86 |         # tests that stop location is the final aa when no stop codon is found
 87 |         self.assertEqual(orf_start_stop_from_aa('AMEATBALL'), (1, 9))
 88 | 
 89 |     def test_start_stop_longest(self):
 90 |         # tests that the start/stop locations are given for the LONGEST orf
 91 |         self.assertEqual(orf_start_stop_from_aa('MAUL*AMEATBALL'), (6, 14))
 92 | 
 93 | 
 94 | class TestFindLongestORF(unittest.TestCase):
 95 | 
 96 |     def test_find_longest_orf_output_format(self):
 97 |         # tests that a length 5 tupple output, and each is the correct numpy
 98 |         # array type
 99 |         long_orf = find_longest_orfs(['AMEATBALL'])
100 | 
101 |         t_len = len(long_orf) == 5
102 |         # test numpy types of all outputs
103 |         t0 = long_orf[0].dtype == '<U8'
104 |         t1 = long_orf[1].dtype == 'int64'
105 |         t2 = long_orf[2].dtype == 'int64'
106 |         t3 = long_orf[3].dtype == 'int64'
107 |         t4 = long_orf[4].dtype == 'bool'
108 | 
109 |         all_right_types = t0 and t1 and t2 and t3 and t4 and t_len
110 |         self.assertTrue(all_right_types)
111 | 
112 |     def test_find_longest_orf_trimmed(self):
113 |         # check that the last * is trimmed from the orf sequence
114 |         self.assertEqual(find_longest_orfs(['AMEATBALL*'])[0], ['MEATBALL'])
115 | 
116 |     def test_find_longest_orf_multiple(self):
117 |         input = ['AMEATBALL*', 'TWOMEATBALLS']
118 |         result = find_longest_orfs(input)
119 |         self.assertEqual(len(result[0]), len(input))
120 | 
121 |     def test_find_longest_orf_stopsites(self):
122 |         # check that the stop site is calculated as the * for seqs with it,
123 |         # and the last AA for those without
124 |         stop_loc_with_stop = find_longest_orfs(['AMEATBALL*'])[2]
125 |         stop_loc_without_stop = find_longest_orfs(['AMEATBALL'])[2]
126 | 
127 |         self.assertEqual(stop_loc_with_stop, stop_loc_without_stop + 1)
128 | 
129 | 
130 | class TestReplaceLastStop(unittest.TestCase):
131 | 
132 |     def test_replace_last_stop(self):
133 |         # check that the last * is trimmed from the orf sequence
134 |         self.assertEqual(replace_last_stop('MEATBALL'),
135 |                          replace_last_stop('MEATBALL*'))
136 | 
137 | 
138 | class TestAddUpstreamAAs(unittest.TestCase):
139 | 
140 |     def test_add_upstream_aa_output(self):
141 |         # check all outputs generated and all in correct type
142 |         aa_sequence = np.array(['ALONGERUPSTREAMMEATBALL'])
143 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
144 |             aa_sequence)
145 |         output = add_upstream_aas(aa_sequence, stop_sites, start_sites,
146 |                                   orf_sequence, orf_length,
147 |                                   min_upstream_length=5)
148 | 
149 |         t_len = len(output) == 3
150 |         # test numpy types of all outputs
151 |         t0 = output[0].dtype.type == np.str_
152 |         t1 = output[1].dtype == 'int64'
153 |         t2 = output[2].dtype == 'int64'
154 | 
155 |         all_right_types = t0 and t1 and t2 and t_len
156 |         self.assertTrue(all_right_types)
157 | 
158 |     def test_add_upstream_aa(self):
159 |         # test expected output
160 |         aa_sequence = np.array(['ALONGERUPSTREAMMEATBALL'])
161 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
162 |             aa_sequence)
163 |         output = add_upstream_aas(
164 |             aa_sequence,
165 |             stop_sites,
166 |             start_sites,
167 |             orf_sequence,
168 |             orf_length,
169 |             min_upstream_length=5)
170 | 
171 |         self.assertEqual(output[0], 'ALONGERUPSTREAMMEATBALL')
172 | 
173 |     def test_add_upstream_aa_multi(self):
174 |         # test with multiple inputs
175 |         aa_sequence = np.array(
176 |             ['ALONGERUPSTREAMMEATBALL', 'TWODOZENMEATBALLS', 'BROWNBEARMAULSGIANTSQUID'])
177 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
178 |             aa_sequence)
179 |         output = add_upstream_aas(
180 |             aa_sequence,
181 |             stop_sites,
182 |             start_sites,
183 |             orf_sequence,
184 |             orf_length,
185 |             min_upstream_length=5)
186 | 
187 |         self.assertTrue(np.all(output[0] == np.array(
188 |             ['ALONGERUPSTREAMMEATBALL', 'TWODOZENMEATBALLS', 'BROWNBEARMAULSGIANTSQUID'])))
189 | 
190 |     def test_add_upstream_aa_noupstream(self):
191 |         # test with no viable upstream AAs
192 |         aa_sequence = np.array(['BEAREATS*MEATBALLS'])
193 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
194 |             aa_sequence)
195 |         output = add_upstream_aas(
196 |             aa_sequence,
197 |             stop_sites,
198 |             start_sites,
199 |             orf_sequence,
200 |             orf_length,
201 |             min_upstream_length=5)
202 | 
203 |         self.assertEqual(output[0], 'MEATBALLS')
204 | 
205 |     def test_add_upstream_aa_shortupstream(self):
206 |         # test with upstream AAs too short
207 |         aa_sequence = np.array(['BEARMEATBALLS'])
208 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
209 |             aa_sequence)
210 |         output = add_upstream_aas(
211 |             aa_sequence,
212 |             stop_sites,
213 |             start_sites,
214 |             orf_sequence,
215 |             orf_length,
216 |             min_upstream_length=5)
217 | 
218 |         self.assertEqual(output[0], 'MEATBALLS')
219 | 
220 |     def test_add_upstream_aa_exactupstream(self):
221 |         # test with upstream AAs of exactly  min_upstream_length
222 |         aa_sequence = np.array(['BEARMEATBALLS'])
223 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
224 |             aa_sequence)
225 |         output = add_upstream_aas(
226 |             aa_sequence,
227 |             stop_sites,
228 |             start_sites,
229 |             orf_sequence,
230 |             orf_length,
231 |             min_upstream_length=4)
232 | 
233 |         self.assertEqual(output[0], 'BEARMEATBALLS')
234 | 
235 | 
236 | class TestFilterObjects(unittest.TestCase):
237 | 
238 |     def test_filter_objects(self):
239 |         # check input arrays can be filtered
240 |         letters = np.array(['A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'J'])
241 |         values = np.array([1, 2, 3, 4, 5, 4, 3, 2, 1])
242 |         filter = values < 3
243 |         output = filter_objects(filter, letters, values)
244 | 
245 |         self.assertTrue(np.all(output[0] == np.array(['A', 'B', 'I', 'J'])) and
246 |                         np.all(output[1] == np.array([1, 2, 2, 1])))
247 | 
248 | 
249 | class TestTranslateAllFrames(unittest.TestCase):
250 | 
251 |     def test_translate_output_format(self):
252 |         # tests that a length 3 tupple output, and each is the correct numpy
253 |         # array type
254 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
255 |         output = translate_all_frames(sequences, both_strands=False)
256 | 
257 |         t_len = len(output) == 6
258 |         # test numpy types of all outputs
259 |         t0 = output[0].dtype.type == np.str_
260 |         t1 = output[1].dtype.type == np.str_
261 |         t2 = output[2].dtype == 'int64'
262 |         t3 = output[3].dtype.type == np.str_
263 |         t4 = output[4].dtype == 'int64'
264 |         t5 = output[5].dtype == 'int64'
265 | 
266 |         all_right_types = t0 and t1 and t2 and t3 and t4 and t5 and t_len
267 |         self.assertTrue(all_right_types)
268 | 
269 |     def test_translate_allframes(self):
270 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
271 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
272 |             sequences, both_strands=False)
273 |         self.assertTrue(np.all(frame == np.array([1, 2, 3])))
274 | 
275 |     def test_translate_alltransframes(self):
276 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
277 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
278 |             sequences, both_strands=False)
279 |         self.assertTrue(np.all(aa_frames == np.array(
280 |             ['MANATEE*', 'WRTRPKN', 'GERDRRI'])))
281 | 
282 |     def test_translate_posstrand(self):
283 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
284 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
285 |             sequences, both_strands=False)
286 |         self.assertTrue(np.all(strand == np.array(['+', '+', '+'])))
287 | 
288 |     def test_translate_seq_length_nt(self):
289 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
290 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
291 |             sequences, both_strands=False)
292 |         self.assertTrue(np.all(seq_length_nt == np.array([24, 24, 24])))
293 | 
294 |     def test_translate_seq_length(self):
295 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
296 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
297 |             sequences, both_strands=False)
298 |         self.assertTrue(np.all(seq_length == np.array([8, 7, 7])))
299 | 
300 |     def test_translate_bothstrands(self):
301 |         sequences = read_fasta('test_data/test_trans_all_frames.fa')
302 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
303 |             sequences, both_strands=True)
304 | 
305 |         frame_correct = np.all(frame == np.array([1, 1, 2, 2, 3, 3]))
306 |         strand_correct = np.all(strand == np.array(
307 |             ['+', '-', '+', '-', '+', '-']))
308 |         trans_correct = np.all(aa_frames == np.array(
309 |             ['MANATEE*', 'LFFGRVRH', 'WRTRPKN', 'YSSVAFA', 'GERDRRI', 'ILRSRSP']))
310 | 
311 |         self.assertTrue(frame_correct and strand_correct and trans_correct)
312 | 
313 | 
314 | class TestConvertAANT(unittest.TestCase):
315 | 
316 |     def test_convert_nt_output_format(self):
317 |         # tests that a length 3 tupple output, and each is the correct numpy
318 |         # array type
319 |         sequences = read_fasta('test_data/test_frames.fa')
320 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
321 |             sequences, both_strands=False)
322 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
323 |             aa_frames)
324 |         # filter data by minimum orf length
325 |         keep = orf_length >= 6
326 |         aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
327 |             keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)
328 | 
329 |         output = convert_start_stop_to_nt(
330 |             start_sites,
331 |             stop_sites,
332 |             seq_length_nt,
333 |             orf_length,
334 |             frame,
335 |             last_aa_is_stop)
336 | 
337 |         t_len = len(output) == 3
338 |         # test numpy types of all outputs
339 |         t0 = output[0].dtype == 'int64'
340 |         t1 = output[1].dtype == 'int64'
341 |         t2 = output[2].dtype == 'int64'
342 | 
343 |         all_right_types = t0 and t1 and t2 and t_len
344 |         self.assertTrue(all_right_types)
345 | 
346 |     def test_convert_start_nt(self):
347 |         sequences = read_fasta('test_data/test_frames.fa')
348 | 
349 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
350 |             sequences, both_strands=False)
351 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
352 |             aa_frames)
353 |         # filter data by minimum orf length
354 |         keep = orf_length >= 6
355 |         aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
356 |             keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)
357 | 
358 |         start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(
359 |             start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop)
360 | 
361 |         self.assertTrue(np.all(start_site_nt == np.array([1, 2, 3])))
362 | 
363 |     def test_convert_stop_nt(self):
364 |         sequences = read_fasta('test_data/test_frames.fa')
365 | 
366 |         ids, aa_frames, frame, strand,seq_length_nt, seq_length = translate_all_frames(sequences, both_strands=False)
367 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
368 |             aa_frames)
369 |         # filter data by minimum orf length
370 |         keep = orf_length >= 6
371 |         aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
372 |             keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)
373 | 
374 |         start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(
375 |             start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop)
376 |         self.assertTrue(np.all(stop_site_nt == np.array([21, 22, 23])))
377 | 
378 |     def test_convert_stop_nt_3incomplete(self):
379 |         sequences = read_fasta('test_data/test_stopsitent.fa')
380 | 
381 |         ids, aa_frames, frame, strand,seq_length_nt, seq_length = translate_all_frames(sequences, both_strands=False)
382 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(aa_frames)
383 |         # filter data by minimum orf length
384 |         keep = orf_length >= 6
385 |         aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
386 |             keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)
387 | 
388 |         start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(
389 |             start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop)
390 |         self.assertTrue(np.all(stop_site_nt == seq_length_nt))
391 | 
392 | 
393 |     def test_convert_utr_nt(self):
394 |         sequences = read_fasta('test_data/test_frames.fa')
395 | 
396 |         ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
397 |             sequences, both_strands=False)
398 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
399 |             aa_frames)
400 |         # filter data by minimum orf length
401 |         keep = orf_length >= 6
402 |         aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
403 |             keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)
404 | 
405 |         start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(
406 |             start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop)
407 |         self.assertTrue(np.all(utr3_length == np.array([5, 4, 3])))
408 | 
409 | 
410 | class TestCheckFirstAA(unittest.TestCase):
411 | 
412 |     def test_check_first_aa_pos(self):
413 |         # tests that a length 3 tupple output, and each is the correct numpy
414 |         # array type
415 |         aa_sequence = np.array(['MEATBALL'])
416 |         self.assertEqual(check_first_aa(aa_sequence), 'M')
417 | 
418 |     def test_check_first_aa_neg(self):
419 |         # tests that a length 3 tupple output, and each is the correct numpy
420 |         # array type
421 |         aa_sequence = np.array(['NOTAMEATBALL'])
422 |         self.assertEqual(check_first_aa(aa_sequence), 'ALT')
423 | 
424 |     def test_check_first_aa_multi(self):
425 |         # tests that a length 3 tupple output, and each is the correct numpy
426 |         # array type
427 |         aa_sequence = np.array(['MEATBALL', 'NOTAMEATBALL'])
428 |         self.assertTrue(np.all(check_first_aa(
429 |             aa_sequence) == np.array(['M', 'ALT'])))
430 | 
431 | 
432 | class TestCheckUniqueN(unittest.TestCase):
433 | 
434 |     def test_check_unique_n(self):
435 |         # tests that a length 3 tupple output, and each is the correct numpy
436 |         # array type
437 |         values = np.array(
438 |             ['MEATBALL', 'MEATBALL', 'BEAR', 'MEATBALL', 'MEATBALLS'])
439 |         self.assertEqual(unique_number_from_list(values), [1, 2, 1, 3, 1])
440 | 
441 | 
442 | class TestFindAllORFs(unittest.TestCase):
443 | 
444 |     def test_find_all_orfs_output_format(self):
445 | 
446 |         aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*'])
447 |         output = find_all_orfs(aa_seqs, min_orf_length=5)
448 | 
449 |         t_len = len(output) == 6
450 |         # test numpy types of all outputs
451 |         t0 = output[0].dtype.type == np.str_
452 |         t1 = output[1].dtype == 'int64'
453 |         t2 = output[2].dtype == 'int64'
454 |         t3 = output[3].dtype == 'int64'
455 |         t4 = output[4].dtype == 'bool'
456 |         t5 = output[5].dtype == 'int64'
457 | 
458 |         all_right_types = t0 and t1 and t2 and t3 and t4 and t5 and t_len
459 |         self.assertTrue(all_right_types)
460 | 
461 |     def test_find_two_orfs(self):
462 |         # tests that a length 3 tupple output, and each is the correct numpy
463 |         # array type
464 |         aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*'])
465 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(
466 |             aa_seqs, min_orf_length=5)
467 | 
468 |         orf_correct = np.all(orf_sequence == np.array(
469 |             ['MEATBALL', 'MEATBALLBEAR']))
470 |         start_correct = np.all(start_sites == np.array([1, 10]))
471 |         stop_correct = np.all(stop_sites == np.array([9, 22]))
472 |         orf_length_correct = np.all(orf_length == np.array([8, 12]))
473 |         last_aa_is_stop_correct = np.all(
474 |             last_aa_is_stop == np.array([True, True]))
475 |         matched_index_correct = np.all(matched_index == np.array([0, 0]))
476 | 
477 |         self.assertTrue(
478 |             orf_correct and start_correct and stop_correct and orf_length_correct and last_aa_is_stop_correct and last_aa_is_stop_correct and matched_index_correct)
479 | 
480 |     def test_find_multi_orfs(self):
481 |         # tests that a length 3 tupple output, and each is the correct numpy
482 |         # array type
483 |         aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*', '*NOPE', 'MELMCAT'])
484 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_seqs, min_orf_length=5)
485 | 
486 |         self.assertTrue(np.all(orf_sequence == np.array(['MEATBALL', 'MEATBALLBEAR', 'MELMCAT'])))
487 | 
488 |     def test_find_multi_orfs_index(self):
489 |         # tests that a length 3 tupple output, and each is the correct numpy
490 |         # array type
491 |         aa_seqs = np.array(['MEATBALL*MEATBALLBEAR*', '*NOPE', 'MELMCAT'])
492 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_seqs, min_orf_length=5)
493 | 
494 |         self.assertTrue(np.all(matched_index == np.array([0, 0, 2])))
495 | 
496 |     def test_find_all_orfs_upstream_ic(self):
497 |         # tests that a length 3 tupple output, and each is the correct numpy
498 |         # array type
499 |         aa_seqs = np.array(['*NOPE', 'YES'])
500 |         orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop, matched_index = find_all_orfs(aa_seqs, min_orf_length=5)
501 | 
502 |         self.assertTrue(np.all(orf_sequence == np.array(['YES'])))
503 | 
504 | class TestAddOrfClass(unittest.TestCase):
505 | 
506 |     def test_add_orf_classification_complete(self):
507 |         orf_df = pd.DataFrame(index=range(1))
508 |         orf_df['first_MET'] = 'M'
509 |         orf_df['final_stop'] = 'STOP'
510 | 
511 |         self.assertTrue(np.all(add_orf_classification(orf_df) ==
512 |                                np.array(['complete'])))
513 | 
514 |     def test_add_orf_classification_incomplete_5prime(self):
515 |         orf_df = pd.DataFrame(index=range(1))
516 |         orf_df['first_MET'] = 'ALT'
517 |         orf_df['final_stop'] = 'STOP'
518 | 
519 |         self.assertTrue(np.all(add_orf_classification(orf_df) ==
520 |                                np.array(['incomplete_5prime'])))
521 | 
522 |     def test_add_orf_classification_incomplete_3prime(self):
523 |         orf_df = pd.DataFrame(index=range(1))
524 |         orf_df['first_MET'] = 'M'
525 |         orf_df['final_stop'] = 'ALT'
526 | 
527 |         self.assertTrue(np.all(add_orf_classification(orf_df) ==
528 |                                np.array(['incomplete_3prime'])))
529 | 
530 |     def test_add_orf_classification_incomplete(self):
531 |         orf_df = pd.DataFrame(index=range(1))
532 |         orf_df['first_MET'] = 'ALT'
533 |         orf_df['final_stop'] = 'ALT'
534 | 
535 |         self.assertTrue(np.all(add_orf_classification(orf_df) ==
536 |                                np.array(['incomplete'])))
537 | 
538 |     def test_add_orf_classification_multi(self):
539 |         orf_df = pd.DataFrame(index=range(4))
540 |         orf_df['first_MET'] = ['M', 'ALT', 'M', 'ALT']
541 |         orf_df['final_stop'] = ['STOP', 'STOP', 'ALT', 'ALT']
542 | 
543 |         self.assertTrue(np.all(add_orf_classification(orf_df) ==
544 |                                np.array(['complete', 'incomplete_5prime',
545 |                                          'incomplete_3prime', 'incomplete'])))
546 | 
547 | 
548 | class TestGetORFs(unittest.TestCase):
549 | 
550 |     def test_get_orf_base(self):
551 | 
552 |         expected = pd.DataFrame(index=range(1))
553 |         expected['id'] = 'Single_FA'
554 |         expected['aa_sequence'] = 'MIMIKL*P'
555 |         expected['frame'] = 1
556 |         expected['strand'] = '+'
557 |         expected['seq_length'] = 8
558 |         expected['seq_length_nt'] = 26
559 |         expected['orf_sequence'] = 'MIMIKL'
560 |         expected['start_site'] = 1
561 |         expected['stop_site'] = 7
562 |         expected['orf_length'] = 6
563 |         expected['start_site_nt'] = 1
564 |         expected['stop_site_nt'] = 21
565 |         expected['utr3_length'] = 5
566 |         expected['first_MET'] = 'M'
567 |         expected['final_stop'] = 'STOP'
568 |         expected['isoform_number'] = 1
569 |         expected['orf_class'] = 'complete'
570 |         expected['fasta_id'] = '>Single_FA.orf1 complete:1-21 strand:+'
571 | 
572 |         all_sequences = read_fasta('test_data/test_getorfs.fa')
573 |         orf_df = get_orfs(all_sequences, min_orf_length=5)
574 | 
575 |         self.assertTrue(orf_df.equals(expected))
576 | 
577 |     def test_get_orf_all(self):
578 | 
579 |         expected = pd.DataFrame(index=range(2))
580 |         expected['id'] = ['Single_FA', 'Single_FA']
581 |         expected['aa_sequence'] = ['MIMIKL*P', 'GLQLNHDH']
582 |         expected['frame'] = [1, 3]
583 |         expected['strand'] = ['+', '-']
584 |         expected['seq_length'] = [8, 8]
585 |         expected['seq_length_nt'] = [26, 26]
586 |         expected['orf_sequence'] = ['MIMIKL', 'GLQLNHDH']
587 |         expected['start_site'] = [1, 1]
588 |         expected['stop_site'] = [7, 8]
589 |         expected['orf_length'] = [6, 8]
590 |         expected['start_site_nt'] = [1, 3]
591 |         expected['stop_site_nt'] = [21, 26]
592 |         expected['utr3_length'] = [5, 0]
593 |         expected['first_MET'] = ['M', 'ALT']
594 |         expected['final_stop'] = ['STOP', 'ALT']
595 |         expected['isoform_number'] = [1, 2]
596 |         expected['orf_class'] = ['complete', 'incomplete']
597 |         expected['fasta_id'] = ['>Single_FA.orf1 complete:1-21 strand:+',
598 |                                 '>Single_FA.orf2 incomplete:3-26 strand:-']
599 | 
600 |         all_sequences = read_fasta('test_data/test_getorfs.fa')
601 |         orf_df = get_orfs(all_sequences, min_orf_length=5, both_strands=True, all_orfs=True)
602 | 
603 |         self.assertTrue(orf_df.equals(expected))
604 | 
605 | 
606 | if __name__ == '__main__':
607 |     unittest.main()
608 | 


--------------------------------------------------------------------------------
/borf/tests/test_frames.fa:
--------------------------------------------------------------------------------
1 | >Frame_1
2 | atgatcatgattaagctgtaaccccc
3 | >Frame_2
4 | aatgatcatgattaagctgtaacccc
5 | >Frame_3
6 | aaatgatcatgattaagctgtaaccc
7 | 


--------------------------------------------------------------------------------
/borf/tests/test_getorfs.fa:
--------------------------------------------------------------------------------
1 | >Single_FA
2 | ATGATCATGATTAAGCTGTAACCCCC
3 | 
4 | 


--------------------------------------------------------------------------------
/borf/tests/test_mutliple_frame_orfs.fa:
--------------------------------------------------------------------------------
1 | >ENST00000327044.7(-)
2 | GCTTCGGGTTGGTGTCATGGCAGCTGCGGGGAGCCGCAAGAGGCGCCTGGCGGAGCTGACGGTGGACGAGTTCCTAGCTTCGGGCTTTGACTCCGAGTCCGAATCCGAGTCCGAAAATTCTCCACAAGCGGAGACACGGGAAGCACGCGAGGCTGCCCGGAGTCCGGATAAGCCGGGCGGGAGCCCCTCGGCCAGCCGGCGTAAAGGCCGTGCCTCTGAGCACAAAGACCAGCTCTCTCGGCTGAAGGACAGAGACCCCGAGTTCTACAAGTTCCTGCAGGAGAATGACCAGAGCCTGCTAAACTTCAGCGACTCGGACAGCTCTGAGGAGGAAGAGGGGCCGTTCCACTCCCTGCCAGATGTGCTGGAGGAAGCCAGTGAGGAGGAGGATGGAGCGGAGGAAGGAGAAGATGGGGACAGAGTCCCCAGAGGGCTGAAGGGGAAGAAGAATTCTGTTCCTGTGACCGTCGCCATGGTTGAGAGATGGAAGCAGGCAGCAAAGCAACGCCTCACTCCAAAGCTGTTCCATGAAGTGGTACAGGCGTTCCGAGCAGCTGTGGCCACCACCCGAGGGGACCAGGAAAGTGCTGAGGCCAACAAATTCCAGGTCACGGACAGTGCTGCATTCAATGCTCTGGTTACCTTCTGCATCAGAGACCTCATTGGCTGTCTCCAGAAGCTGCTGTTTGGAAAGGTGGCAAAGGATAGCAGCAGGATGCTGCAGCCGTCCAGCAGCCCGCTCTGGGGGAAGCTTCGTGTGGACATCAAGGCTTACCTGGGCTCGGCCATACAGCTGGTGTCCTGTCTGTCGGAGACGACGGTGTTGGCGGCCGTGCTGCGGCACATCAGCGTGCTGGTGCCCTGCTTCCTGACCTTCCCCAAGCAGTGCCGCATGCTGCTCAAGAGAATGGTGATCGTATGGAGCACTGGGGAAGAGTCTCTGCGGGTGCTGGCTTTCCTGGTCCTCAGCAGAGTCTGCCGGCACAAGAAGGACACTTTCCTTGGCCCCGTCCTCAAGCAAATGTACATCACGTATGTGAGGAACTGCAAGTTCACCTCGCCTGGTGCCCTCCCCTTCATCAGTTTCATGCAGTGGACCTTGACGGAGCTGCTGGCCCTGGAGCCGGGTGTGGCCTACCAGCACGCCTTCCTCTACATCCGCCAGCTCGCCATACACCTGCGCAACGCCATGACCACTCGCAAGAAGGAAACATACCAGTCTGTGTACAACTGGCAGTATGTGCACTGCCTCTTCCTGTGGTGCCGGGTCCTGAGCACTGCGGGCCCCAGCGAAGCCCTCCAGCCCTTGGTCTACCCCCTTGCCCAAGTCATCATTGGCTGTATCAAGCTCATCCCCACTGCCCGCTTCTACCCGCTGCGAATGCACTGCATCCGTGCCCTGACGCTGCTCTCGGGGAGCTCGGGGGCCTTCATCCCGGTGCTGCCTTTCATCCTGGAGATGTTCCAGCAGGTCGACTTCAACAGGAAGCCAGGGCGCATGAGCTCCAAGCCCATCAACTTCTCCGTGATCCTGAAGCTGTCCAATGTCAACCTGCAGGAGAAGGCGTACCGGGACGGCCTGGTGGAGCAGCTGTACGACCTCACCCTGGAGTACCTGCACAGCCAGGCACACTGCATCGGCTTCCCGGAGCTGGTGCTGCCTGTGGTCCTGCAGCTGAAGTCGTTCCTCCGGGAGTGCAAGGTGGCCAACTACTGCCGGCAGGTGCAGCAGCTGCTTGGGAAGGTTCAGGAGAACTCGGCATACATCTGCAGCCGCCGCCAGAGGGTTTCCTTCGGCGTCTCTGAGCAGCAGGCAGTGGAAGCCTGGGAGAAGCTGACCCGGGAAGAGGGGACACCCCTGACCTTGTACTACAGCCACTGGCGCAAGCTGCGTGACCGGGAGATCCAGCTGGAGATCAGTGGCAAAGAGCGGCTGGAAGACCTGAACTTCCCTGAGATCAAACGAAGGAAGATGGCTGACAGGAAGGATGAGGACAGGAAGCAATTTAAAGACCTCTTTGACCTGAACAGCTCTGAAGAGGACGACACCGAGGGATTCTCGGAGAGAGGGATACTGAGGCCCCTGAGCACTCGGCATGGGGTGGAAGACGATGAAGAGGACGAGGAGGAGGGCGAGGAGGACAGCAGCAACTCGGAGGATGGAGACCCAGACGCAGAGGCGGGGCTGGCCCCTGGGGAGCTGCAGCAGCTGGCCCAGGGGCCGGAGGACGAGCTGGAGGATCTGCAGCTCTCAGAGGACGACTGAGGCAGCCCATCTGGGGGGCCTGTAGGGGCTGCCGGGCTGGTGGCCAGTGTTTCCACCTCCCTGGCAGTCAGGCCTAGAGGCTGGCGTCTGTGCAGTTGGGGGAGGCAGTAGACACGGGACAGGCTTTATTATTTATTTTTCAGCATGAAAGACCAAACGTATCGAGAGCTGGGCTGGGCTGGGCTGGTGTGGCTGCTGAAGCCCCACAGCTGTGGGCTGCTGAAGTCAGCTCCGCGGGGGAGCTGACCCTGACGTCAGCAGACCGAGACCAGTCCCAGTTCCAGGGGGAGGCCTGCAGGCCCCTGGCCCCTTCCACCACCTCTGCCCTCCGTCTGCAGACCTCGTCCATCTGCACCAGGCTCTGCCTTCACTCCCCCAAGTCTTTGAAAATTTGTTCCTTTCCTTTGAAGTCACATTTTCTTTTAAAATTTTTTGTTTTGCATCCGAAACCGAAAGAAATAAAGCGGTGGGAGGCAGGGCCATTGTGTTGA
3 | 


--------------------------------------------------------------------------------
/borf/tests/test_trans_all_frames.fa:
--------------------------------------------------------------------------------
1 | >MANATEE_seq
2 | atggcgaacgcgaccgaagaataa
3 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    = "-W"  # This flag turns warnings into errors.
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = PackagingScientificPython
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=PackagingScientificPython
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/source/_static/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/signalbash/borf/52bca757f95027388c5f8cdb8de80d88d5974b27/docs/source/_static/.placeholder


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # borf documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Jun 28 12:35:56 2018.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | # import os
 21 | # import sys
 22 | # sys.path.insert(0, os.path.abspath('.'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = [
 35 |     'sphinx.ext.autodoc',
 36 |     'sphinx.ext.autosummary',
 37 |     'sphinx.ext.githubpages',
 38 |     'sphinx.ext.intersphinx',
 39 |     'sphinx.ext.mathjax',
 40 |     'sphinx.ext.viewcode',
 41 |     'IPython.sphinxext.ipython_directive',
 42 |     'IPython.sphinxext.ipython_console_highlighting',
 43 |     'matplotlib.sphinxext.plot_directive',
 44 |     'numpydoc',
 45 |     'sphinx_copybutton',
 46 | ]
 47 | 
 48 | # Configuration options for plot_directive. See:
 49 | # https://github.com/matplotlib/matplotlib/blob/f3ed922d935751e08494e5fb5311d3050a3b637b/lib/matplotlib/sphinxext/plot_directive.py#L81
 50 | plot_html_show_source_link = False
 51 | plot_html_show_formats = False
 52 | 
 53 | # Generate the API documentation when building
 54 | autosummary_generate = True
 55 | numpydoc_show_class_members = False
 56 | 
 57 | # Add any paths that contain templates here, relative to this directory.
 58 | templates_path = ['_templates']
 59 | 
 60 | # The suffix(es) of source filenames.
 61 | # You can specify multiple suffix as a list of string:
 62 | #
 63 | # source_suffix = ['.rst', '.md']
 64 | source_suffix = '.rst'
 65 | 
 66 | # The master toctree document.
 67 | master_doc = 'index'
 68 | 
 69 | # General information about the project.
 70 | project = 'borf'
 71 | copyright = '2019, Beth Signal'
 72 | author = 'Beth Signal'
 73 | 
 74 | # The version info for the project you're documenting, acts as replacement for
 75 | # |version| and |release|, also used in various other places throughout the
 76 | # built documents.
 77 | #
 78 | import borf
 79 | # The short X.Y version.
 80 | version = borf.__version__
 81 | # The full version, including alpha/beta/rc tags.
 82 | release = borf.__version__
 83 | 
 84 | # The language for content autogenerated by Sphinx. Refer to documentation
 85 | # for a list of supported languages.
 86 | #
 87 | # This is also used if you do content translation via gettext catalogs.
 88 | # Usually you set "language" from the command line for these cases.
 89 | language = None
 90 | 
 91 | # List of patterns, relative to source directory, that match files and
 92 | # directories to ignore when looking for source files.
 93 | # This patterns also effect to html_static_path and html_extra_path
 94 | exclude_patterns = []
 95 | 
 96 | # The name of the Pygments (syntax highlighting) style to use.
 97 | pygments_style = 'sphinx'
 98 | 
 99 | # If true, `todo` and `todoList` produce output, else they produce nothing.
100 | todo_include_todos = False
101 | 
102 | 
103 | # -- Options for HTML output ----------------------------------------------
104 | 
105 | # The theme to use for HTML and HTML Help pages.  See the documentation for
106 | # a list of builtin themes.
107 | #
108 | html_theme = 'sphinx_rtd_theme'
109 | import sphinx_rtd_theme
110 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
111 | 
112 | # Theme options are theme-specific and customize the look and feel of a theme
113 | # further.  For a list of options available for each theme, see the
114 | # documentation.
115 | #
116 | # html_theme_options = {}
117 | 
118 | # Add any paths that contain custom static files (such as style sheets) here,
119 | # relative to this directory. They are copied after the builtin static files,
120 | # so a file named "default.css" will overwrite the builtin "default.css".
121 | html_static_path = ['_static']
122 | 
123 | # Custom sidebar templates, must be a dictionary that maps document names
124 | # to template names.
125 | #
126 | # This is required for the alabaster theme
127 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
128 | html_sidebars = {
129 |     '**': [
130 |         'relations.html',  # needs 'show_related': True theme option to display
131 |         'searchbox.html',
132 |     ]
133 | }
134 | 
135 | 
136 | # -- Options for HTMLHelp output ------------------------------------------
137 | 
138 | # Output file base name for HTML help builder.
139 | htmlhelp_basename = 'borf'
140 | 
141 | 
142 | # -- Options for LaTeX output ---------------------------------------------
143 | 
144 | latex_elements = {
145 |     # The paper size ('letterpaper' or 'a4paper').
146 |     #
147 |     # 'papersize': 'letterpaper',
148 | 
149 |     # The font size ('10pt', '11pt' or '12pt').
150 |     #
151 |     # 'pointsize': '10pt',
152 | 
153 |     # Additional stuff for the LaTeX preamble.
154 |     #
155 |     # 'preamble': '',
156 | 
157 |     # Latex figure (float) alignment
158 |     #
159 |     # 'figure_align': 'htbp',
160 | }
161 | 
162 | # Grouping the document tree into LaTeX files. List of tuples
163 | # (source start file, target name, title,
164 | #  author, documentclass [howto, manual, or own class]).
165 | latex_documents = [
166 |     (master_doc, 'borf.tex', 'borf Documentation',
167 |      'Contributors', 'manual'),
168 | ]
169 | 
170 | 
171 | # -- Options for manual page output ---------------------------------------
172 | 
173 | # One entry per manual page. List of tuples
174 | # (source start file, name, description, authors, manual section).
175 | man_pages = [
176 |     (master_doc, 'borf', 'borf Documentation',
177 |      [author], 1)
178 | ]
179 | 
180 | 
181 | # -- Options for Texinfo output -------------------------------------------
182 | 
183 | # Grouping the document tree into Texinfo files. List of tuples
184 | # (source start file, target name, title, author,
185 | #  dir menu entry, description, category)
186 | texinfo_documents = [
187 |     (master_doc, 'borf', 'borf Documentation',
188 |      author, 'borf', 'Better ORF predictions',
189 |      'Miscellaneous'),
190 | ]
191 | 
192 | 
193 | 
194 | 
195 | # Example configuration for intersphinx: refer to the Python standard library.
196 | intersphinx_mapping = {
197 |     'python': ('https://docs.python.org/3/', None),
198 |     'numpy': ('https://docs.scipy.org/doc/numpy/', None),
199 |     'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
200 |     'pandas': ('https://pandas.pydata.org/pandas-docs/stable', None),
201 |     'matplotlib': ('https://matplotlib.org', None),
202 | }
203 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. Packaging Scientific Python documentation master file, created by
 2 |    sphinx-quickstart on Thu Jun 28 12:35:56 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | borf Documentation
 7 | ==================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    installation
13 |    usage
14 |    release-history
15 |    min_versions
16 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Installation
3 | ============
4 | 
5 | At the command line::
6 | 
7 |     $ pip install borf
8 | 


--------------------------------------------------------------------------------
/docs/source/min_versions.rst:
--------------------------------------------------------------------------------
 1 | ===================================
 2 | Minimum Version of Python and NumPy
 3 | ===================================
 4 | 
 5 | 
 6 | - This project supports at least the minor versions of Python
 7 |   initially released 42 months prior to a planned project release
 8 |   date.
 9 | - The project will always support at least the 2 latest minor
10 |   versions of Python.
11 | - The project will support minor versions of ``numpy`` initially
12 |   released in the 24 months prior to a planned project release date or
13 |   the oldest version that supports the minimum Python version
14 |   (whichever is higher).
15 | - The project will always support at least the 3 latest minor
16 |   versions of NumPy.
17 | 
18 | The minimum supported version of Python will be set to
19 | ``python_requires`` in ``setup``.  All supported minor versions of
20 | Python will be in the test matrix and have binary artifacts built
21 | for releases.
22 | 
23 | The project should adjust upward the minimum Python and NumPy
24 | version support on every minor and major release, but never on a
25 | patch release.
26 | 
27 | This is consistent with NumPy `NEP 29
28 | <https://numpy.org/neps/nep-0029-deprecation_policy.html>`__.
29 | 


--------------------------------------------------------------------------------
/docs/source/release-history.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Release History
3 | ===============
4 | 
5 | Initial Release (YYYY-MM-DD)
6 | ----------------------------
7 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
 1 | =====
 2 | Usage
 3 | =====
 4 | 
 5 | Start by importing borf.
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |     import borf
10 | 
11 | 
12 | .. autofunction:: borf.get_orfs.get_orfs


--------------------------------------------------------------------------------
/github_deploy_key_betsig_borf.enc:
--------------------------------------------------------------------------------
1 | gAAAAABdjXdAeBTX9p_4dnxRTUqYHrUz14eaRTmHWQ5uZwXZvrRaf9c0UqrXkIIsIY0uURcEDiCBD7sPjXXEU9XBZ8yg9RMeRP5GH2hgvHHI5qOw3znERoOGnnzbAKIzIs8fz8zmI7JPCQ8Ni7AaaxRo4gk8vy8AousTK8hzLxa4wT8iifmEQ3YxLVDeiXs5GLzOxus3DaxB8Ho__nqoB4WXwoV6UNfgbbfIMwVIgajgSaOXt_dCPQVFun5Wd9lN4mTWBr7lBlMotrLY91sr1K621MM_2QMtsuNrhX4jZdNe-6huE5HyTwxk6uLQh1xSSnNMBhrtgfEZMEgL-SW2Ux9_z-Niw-gKDSnhnu7SktQQokJeZn9oTkTjgzs4cO-bfdKt4YZ9EXvSvSVHUgEtfmktiZXr-slvuTCbKdXEZDq8RHA0JeK7qkUQ5T8WhPyVxEN9MRs5dIkdHoM7lkVsyEYpOvj0zNNAnw6uVUIEwz_bpKRIbbXxAH_N2r7AxeITw9gS8WTWfUKtISxAAFInOY0yDVK8Coz2xKMbqSZBEXcUGlJLs3SPPKPJvISCLm8HAnC-qQXrUzQdDEm1wZLj5IUXHoajZu1VcDGsX128-Rqq5RhCoBFvxK9Oj5getYAIBUhx0-OZdYkid9g0AgzBylY9Vkh1935j2k3XM6VtWHLq5WiEoKGNvXxHJAyvFQVxS5vfLA7vUocXXlip7z5q85PgYlwq5RtvowskLA902GcaZveLwv96MS0Bw9bUBsAUSp_6bNUXz6XDkXUMdvxcP7E6F7-k379hJv5-mX-bYKtToKQpNlHVn52G_ZHKIxJ5orKmsG0xseVsZ76uQu4jFENAAGsUpMiurBQT3gpjuonCyKMwNyjC1DJL6rmvH3GnpOAq3OobCCC0HodInZpJyuejOQTjDyxwjmaOhqSpbkLKIRkJOKUGCg0ieP0owf1ap32-J_XL3K5lXeg1MuL3-AQz8rIYzAK2GJgbL8paMbGKEch6BbNRacpeYCzKSWQ9X3B753jeBL5u-3NlTgHDHDZavK8HOoBvGjLb7Xcn6FuiW2e_5q_d3V4_tR_JNB38tt-mgj-8i7voqK2u1ygllKPp3eAI77pI-0_xHNw6RoHhEsHBINsopDyL79b_EtBQgKLLXjiuOJ8jiYT1UZQrICu5wU3JnbJchE4zykeMbJzoxIvSWUa0mt8iD2XGfPj7fFVKyykchQQbmE2HYAYhs3loKao8aHZWz4NKjHf_sH3266tnzH99LV6Y6E_l6CEAobyjyGU6evgZKMwz38-DBzmlsZSzzCaBl5CCu_lDVnyyTnY79Qnup4t_Vc0CO4IwpftPfL803QTN4doEAAzn52xXrUs-G8nxlxIQCBcddWKf4YO6WerK65aRDLNAgw1yPYwlvQaqfdNmHkbbeeWPtZcHDFI6qkf6oL3rjkk7FaWSZ2C0BHk43KsMsPOPvNzxmTQF2_VhMZgJKxckTFQNVRMgWyeuk8hRwJZnwjH5PwloPBQS7q2sd1kzH7d8642slzIHMYl_nz_d0wdRlNU_cpcJYsZlyfi1C8ikoBjLhqXpPVYyWTx-b1MiKLKN6AyPtmqZuotvhiRvbh3OqKyFvkkZRk5yZmrBD4dF3TM0feMlZWtpNpCBBaxREW_D29YG4C2zvvkVsjFs2zCVqe1Hz3Qwf3EmihQ5Rt7XjUJHzeOH1ce1c3SRlLxbbpyreo9mNf9r0mEaxAghZYmbodcNDygUQ6MarYSfwLE42U-ADLodT8mNbwOnPONkARc73pVsDEQWpokX9mRHholWteAG4kw7c3t3l2o1iPbYvoNz6XhtEqDp3M3oEgvoooco70rpB3vMwqFbuzp3xGDFtqqVH4nmSBbevzdDjVX4cgc53qqqMemdR2DY1krXZQVUcfT8j3yEPNdZ1a9nXjW8osO4GajJ0bEhenAKlpjBi7KkVJuMp3EVjwAZo3R23Vg5xs8Plglc7W--G_ZsIV1YH-iLzFTiRPLc5acPz7gfmvJOYPWcBtvear4ng7iQbiSR1sRsSbsNIc94CozLRm7Zumv8RjBcMQd1PvT3H_mABBSs09USoqsdr1SnfGeC82T1WgUStTj83hJXOEFCdEZDNRCEI3Jf_AAoqbkxJWU0s_ZcD_zEYauXC4HpnLCNQvAYd6gYEkUFVtkxDCrRixVbKng3AS8Tc-uB_G8o9k97aGHJ-cvgosooR_Y6ZhLNSVPjBdWnmaCLxXL3YNhEy7ugvs_h0PwqJZ2nwNhv2taljqqDML6BNet26ZU6wogeSTEXq1L51xJa1n4TDEvPXtInQECQQJmIZ7CC3nypNsWMDxeQQ6Xrjn5ntYPsoL0xsLaYxsX3qcPRLZ2hHDnoTDnYTPnyqLSJ_omEgpr6nXupwh6dmicFUt2hSJhGU610c-0OyIQqO1lTNd6fSdwwVxMsfDzGXK68Rv9lMH_H0-jymAY0HcQGv5JRjpL71Ub2dFJOtOD-jH-neS2iF5LDdFLwtruBc0-nwuKWBF-6ixuuOvrSsfGi0preV1NAjjQZzZu65SItIWubfmS3w8x4sgn9dXTjjX0aH9dg8V7TBlVI5qTTABkQgEmNFBGMjdTm90lLClzxZSva7Dl9dYBD1m02m4gvcZHJ2AZ_1nPkaiI7s_OjQImsPazROwGUREQtOsSjpC8DRnFFXZWn_cYTje7zShflqM0h-qoQLGx0Nhu_bBqXm9tPkJQ6i_eM87UL3_SIIVAevhbL_SJCoPAPEa7tQaqcKx3coGrVGuxQaHQxQgQ7LDjc_p9U8_mvgMhsZrFaCH0xTqh6UEuIjFHoxnAqLIoJ8_YzPQqvHz7ovpXudLGoVPzTRinq1wsHRWe3sniD0F6CtWaDHCdPJdr6WIXP2BU0MaTosVTXO7S1b8_1LnKco44LBDPUbNh8MqGK2_z31FJluka5kYwox2WYv3jR5eTJDA1G_XzbCWJHuVRa0-j6_gradw7cW3i8b5N_qlZrwnPWjPT2qhwhybBRa74tXWApyrP8elLy6Pe_1RHtU9Ffkurj2RJjNJ59y0QdsuutzsN7KqPInAEnmv9cHe9ACDnMaud7fQTpV_hTt-nahVcmuepu0a-s9-HNkd9OKkWVVMNZh05ZESHhHvZyUZQ9f88Pa5Tgb8vUGM9aC8SQ7H0aHje-csa1svZJlXbdtK350UmtE10Kh5bIksyn6lCDzibi9O2sKjE0860vW2PV5P6qbAKw4w1fxID2nnJU4ob7Tf8XLK09mmv_6_f5xhfDRqAgybjRo0aF1B9Tx8jdeO_jx4HyUbk6QX99fH3B9fpfAAuhMUiv77gFeI_m5rumBfF9rYpgczAeOjQhKzSjTCDS91HB97rBiK9KM0J8lKtod2kO3RFusssYI_5cN8DqIz5CST-12l9JsZcX1dN4gaW3cIMZgNrNJFfkOSj2cq4C5S6tQrRY85pauPp4tgZU0veb7T_ZJOKEZNxX2KnalVdcB43WXBIJHe5AWeahvD5_CNGk2C1xx1iisskPxEZTNcJzD0ksQoZDmNvFZEcHHbazh0eDeg8OtfhR-GgCH8jz2b_BC0s571vrwGL584hSr3ViEmOg15qHAQNYrAjc9JtCufwl9Wr5WnTm8sM6gSkjhVFajl7L4cmXhB4d7KYFTFjw5cXZVSychc_4XuLOFwdjfT7H3RtYVuZNDId_7wwEES6547UjzBOhqRqYmpPb2fvCZVp8fu2qJ9oaeI3ApqvievBJL2crFJrddrthgbRlvbFVmiJw5CLDwDppu2rclzPNB5yB_1dH5So6zr2xbj3ScSFPAtEmlc-f6EGlFbjjPTbc3rxgYjeVKjB53kQQtSc8vZUpegXmppTKcJDMBPnocyDYmjN5ROE0ETvD5euteiCjfiSYOikSAd--Gzu3fYmvENeJhd6wWcpYTycGAfaLcaawAlnrNttG3HCg-AFf2OoQIrK3LyKnK6Y7OSR6WKGX_Z9-K2JF-NNNnc_dLd73dmgI_9KpFQ-19TXyTSAOCf8b_xK1W7XygLWGPEyLKVWG-vpT9CB_ZIdE2a0PLNV2kBydQsbLT-9l-GRNz-GrmrZuvwjv77QbLxNw9GvXtCpTgPVflETtWEUkYV7_iuY7g5_jcKHfSFmuH9Hq2NUKf50k0cDLUoGFYTKghWW3wZQ6BgkRbCW6Ct6FC9xaIvwWQO2IdRj304nfSEiVAAUXwBg4tTIlayOP9efjx9nYsyWJcSiu1KvPCaIcOU23BGQ_-gLM_1Ox2vXE0yzA_ahz6zZLxzvJ099JYSyhocqGzYV2EdaONGb1LlNutNsLAJ6LokGDqd8DwxEfrqzzPCr2ys6UhSAu0sEfUgjEK7PCpOfpBs0i7JCHttzvQuw0uSlcWNweoOCyLItPFdOq66NErjr-Lmm9-zmEYUhgx7NAQyvm1-XYyDjTdEgv8w==


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # These are required for developing the package (running the tests, building
 2 | # the documentation) but not necessarily required for _using_ it.
 3 | codecov
 4 | coverage
 5 | flake8
 6 | pytest
 7 | sphinx
 8 | # These are dependencies of various sphinx extensions for documentation.
 9 | ipython
10 | matplotlib
11 | numpydoc
12 | sphinx-copybutton
13 | sphinx_rtd_theme
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # List required packages in this file, one per line.
2 | numpy
3 | pandas
4 | biopython
5 | scikit-bio
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440-post
4 | versionfile_source = borf/_version.py
5 | versionfile_build = borf/_version.py
6 | tag_prefix = v
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | from setuptools import setup, find_packages
 3 | import sys
 4 | import versioneer
 5 | 
 6 | 
 7 | # NOTE: This file must remain Python 2 compatible for the foreseeable future,
 8 | # to ensure that we error out properly for people with outdated setuptools
 9 | # and/or pip.
10 | min_version = (3, 5)
11 | if sys.version_info < min_version:
12 |     error = """
13 | borf does not support Python {0}.{1}.
14 | Python {2}.{3} and above is required. Check your Python version like so:
15 | 
16 | python3 --version
17 | 
18 | This may be due to an out-of-date pip. Make sure you have pip >= 9.0.1.
19 | Upgrade pip like so:
20 | 
21 | pip install --upgrade pip
22 | """.format(*(sys.version_info[:2] + min_version))
23 |     sys.exit(error)
24 | 
25 | here = path.abspath(path.dirname(__file__))
26 | 
27 | with open(path.join(here, 'README.rst'), encoding='utf-8') as readme_file:
28 |     readme = readme_file.read()
29 | 
30 | with open(path.join(here, 'requirements.txt')) as requirements_file:
31 |     # Parse requirements.txt, ignoring any commented-out lines.
32 |     requirements = [line for line in requirements_file.read().splitlines()
33 |                     if not line.startswith('#')]
34 | 
35 | 
36 | setup(
37 |     name='borf',
38 |     version=versioneer.get_version(),
39 |     cmdclass=versioneer.get_cmdclass(),
40 |     description="ORF predictions from .fa files",
41 |     long_description=readme,
42 |     author="Beth Signal",
43 |     author_email='bethany.signal@uts.edu.au',
44 |     url='https://github.com/betsig/borf',
45 |     python_requires='>={}'.format('.'.join(str(n) for n in min_version)),
46 |     packages=find_packages(exclude=['docs', 'tests']),
47 |     entry_points={
48 |         'console_scripts': [
49 |             'borf = borf.borf:main',
50 |         ],
51 |     },
52 |     include_package_data=True,
53 |     package_data={
54 |         'borf': [
55 |             # When adding files here, remember to update MANIFEST.in as well,
56 |             # or else they will not be included in the distribution on PyPI!
57 |             # 'path/to/data_file',
58 |             'test_data/*.fa'
59 |         ]
60 |     },
61 |     install_requires=requirements,
62 |     license="MIT",
63 |     long_description_content_type="text/x-rst",
64 |     classifiers=[
65 |         'Development Status :: 2 - Pre-Alpha',
66 |         'Natural Language :: English',
67 |         'Programming Language :: Python :: 3',
68 |     ],
69 | )
70 | 


--------------------------------------------------------------------------------
/test_data/test_frames.fa:
--------------------------------------------------------------------------------
1 | >Frame_1
2 | atgatcatgattaagctgtaaccccc
3 | >Frame_2
4 | aatgatcatgattaagctgtaacccc
5 | >Frame_3
6 | aaatgatcatgattaagctgtaaccc
7 | 


--------------------------------------------------------------------------------
/test_data/test_getorfs.fa:
--------------------------------------------------------------------------------
1 | >Single_FA
2 | ATGATCATGATTAAGCTGTAACCCCC
3 | 
4 | 


--------------------------------------------------------------------------------
/test_data/test_mutliple_frame_orfs.fa:
--------------------------------------------------------------------------------
1 | >ENST00000327044.7(-)
2 | GCTTCGGGTTGGTGTCATGGCAGCTGCGGGGAGCCGCAAGAGGCGCCTGGCGGAGCTGACGGTGGACGAGTTCCTAGCTTCGGGCTTTGACTCCGAGTCCGAATCCGAGTCCGAAAATTCTCCACAAGCGGAGACACGGGAAGCACGCGAGGCTGCCCGGAGTCCGGATAAGCCGGGCGGGAGCCCCTCGGCCAGCCGGCGTAAAGGCCGTGCCTCTGAGCACAAAGACCAGCTCTCTCGGCTGAAGGACAGAGACCCCGAGTTCTACAAGTTCCTGCAGGAGAATGACCAGAGCCTGCTAAACTTCAGCGACTCGGACAGCTCTGAGGAGGAAGAGGGGCCGTTCCACTCCCTGCCAGATGTGCTGGAGGAAGCCAGTGAGGAGGAGGATGGAGCGGAGGAAGGAGAAGATGGGGACAGAGTCCCCAGAGGGCTGAAGGGGAAGAAGAATTCTGTTCCTGTGACCGTCGCCATGGTTGAGAGATGGAAGCAGGCAGCAAAGCAACGCCTCACTCCAAAGCTGTTCCATGAAGTGGTACAGGCGTTCCGAGCAGCTGTGGCCACCACCCGAGGGGACCAGGAAAGTGCTGAGGCCAACAAATTCCAGGTCACGGACAGTGCTGCATTCAATGCTCTGGTTACCTTCTGCATCAGAGACCTCATTGGCTGTCTCCAGAAGCTGCTGTTTGGAAAGGTGGCAAAGGATAGCAGCAGGATGCTGCAGCCGTCCAGCAGCCCGCTCTGGGGGAAGCTTCGTGTGGACATCAAGGCTTACCTGGGCTCGGCCATACAGCTGGTGTCCTGTCTGTCGGAGACGACGGTGTTGGCGGCCGTGCTGCGGCACATCAGCGTGCTGGTGCCCTGCTTCCTGACCTTCCCCAAGCAGTGCCGCATGCTGCTCAAGAGAATGGTGATCGTATGGAGCACTGGGGAAGAGTCTCTGCGGGTGCTGGCTTTCCTGGTCCTCAGCAGAGTCTGCCGGCACAAGAAGGACACTTTCCTTGGCCCCGTCCTCAAGCAAATGTACATCACGTATGTGAGGAACTGCAAGTTCACCTCGCCTGGTGCCCTCCCCTTCATCAGTTTCATGCAGTGGACCTTGACGGAGCTGCTGGCCCTGGAGCCGGGTGTGGCCTACCAGCACGCCTTCCTCTACATCCGCCAGCTCGCCATACACCTGCGCAACGCCATGACCACTCGCAAGAAGGAAACATACCAGTCTGTGTACAACTGGCAGTATGTGCACTGCCTCTTCCTGTGGTGCCGGGTCCTGAGCACTGCGGGCCCCAGCGAAGCCCTCCAGCCCTTGGTCTACCCCCTTGCCCAAGTCATCATTGGCTGTATCAAGCTCATCCCCACTGCCCGCTTCTACCCGCTGCGAATGCACTGCATCCGTGCCCTGACGCTGCTCTCGGGGAGCTCGGGGGCCTTCATCCCGGTGCTGCCTTTCATCCTGGAGATGTTCCAGCAGGTCGACTTCAACAGGAAGCCAGGGCGCATGAGCTCCAAGCCCATCAACTTCTCCGTGATCCTGAAGCTGTCCAATGTCAACCTGCAGGAGAAGGCGTACCGGGACGGCCTGGTGGAGCAGCTGTACGACCTCACCCTGGAGTACCTGCACAGCCAGGCACACTGCATCGGCTTCCCGGAGCTGGTGCTGCCTGTGGTCCTGCAGCTGAAGTCGTTCCTCCGGGAGTGCAAGGTGGCCAACTACTGCCGGCAGGTGCAGCAGCTGCTTGGGAAGGTTCAGGAGAACTCGGCATACATCTGCAGCCGCCGCCAGAGGGTTTCCTTCGGCGTCTCTGAGCAGCAGGCAGTGGAAGCCTGGGAGAAGCTGACCCGGGAAGAGGGGACACCCCTGACCTTGTACTACAGCCACTGGCGCAAGCTGCGTGACCGGGAGATCCAGCTGGAGATCAGTGGCAAAGAGCGGCTGGAAGACCTGAACTTCCCTGAGATCAAACGAAGGAAGATGGCTGACAGGAAGGATGAGGACAGGAAGCAATTTAAAGACCTCTTTGACCTGAACAGCTCTGAAGAGGACGACACCGAGGGATTCTCGGAGAGAGGGATACTGAGGCCCCTGAGCACTCGGCATGGGGTGGAAGACGATGAAGAGGACGAGGAGGAGGGCGAGGAGGACAGCAGCAACTCGGAGGATGGAGACCCAGACGCAGAGGCGGGGCTGGCCCCTGGGGAGCTGCAGCAGCTGGCCCAGGGGCCGGAGGACGAGCTGGAGGATCTGCAGCTCTCAGAGGACGACTGAGGCAGCCCATCTGGGGGGCCTGTAGGGGCTGCCGGGCTGGTGGCCAGTGTTTCCACCTCCCTGGCAGTCAGGCCTAGAGGCTGGCGTCTGTGCAGTTGGGGGAGGCAGTAGACACGGGACAGGCTTTATTATTTATTTTTCAGCATGAAAGACCAAACGTATCGAGAGCTGGGCTGGGCTGGGCTGGTGTGGCTGCTGAAGCCCCACAGCTGTGGGCTGCTGAAGTCAGCTCCGCGGGGGAGCTGACCCTGACGTCAGCAGACCGAGACCAGTCCCAGTTCCAGGGGGAGGCCTGCAGGCCCCTGGCCCCTTCCACCACCTCTGCCCTCCGTCTGCAGACCTCGTCCATCTGCACCAGGCTCTGCCTTCACTCCCCCAAGTCTTTGAAAATTTGTTCCTTTCCTTTGAAGTCACATTTTCTTTTAAAATTTTTTGTTTTGCATCCGAAACCGAAAGAAATAAAGCGGTGGGAGGCAGGGCCATTGTGTTGA
3 | 


--------------------------------------------------------------------------------
/test_data/test_stopsitent.fa:
--------------------------------------------------------------------------------
1 | >test_final_len_f1
2 | atgatcatgattaagctgttttttttt
3 | >test_final_len_f2
4 | atgatcatgattaagctgtttttttttt
5 | >test_final_len_f3
6 | atgatcatgattaagctgttttttttttt
7 | 


--------------------------------------------------------------------------------
/test_data/test_trans_all_frames.fa:
--------------------------------------------------------------------------------
1 | >MANATEE_seq
2 | atggcgaacgcgaccgaagaataa
3 | 


--------------------------------------------------------------------------------
/versioneer.py:
--------------------------------------------------------------------------------
   1 | 
   2 | # Version: 0.18
   3 | 
   4 | """The Versioneer - like a rocketeer, but for versions.
   5 | 
   6 | The Versioneer
   7 | ==============
   8 | 
   9 | * like a rocketeer, but for versions!
  10 | * https://github.com/warner/python-versioneer
  11 | * Brian Warner
  12 | * License: Public Domain
  13 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
  14 | * [![Latest Version]
  15 | (https://pypip.in/version/versioneer/badge.svg?style=flat)
  16 | ](https://pypi.python.org/pypi/versioneer/)
  17 | * [![Build Status]
  18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master)
  19 | ](https://travis-ci.org/warner/python-versioneer)
  20 | 
  21 | This is a tool for managing a recorded version number in distutils-based
  22 | python projects. The goal is to remove the tedious and error-prone "update
  23 | the embedded version string" step from your release process. Making a new
  24 | release should be as easy as recording a new tag in your version-control
  25 | system, and maybe making new tarballs.
  26 | 
  27 | 
  28 | ## Quick Install
  29 | 
  30 | * `pip install versioneer` to somewhere to your $PATH
  31 | * add a `[versioneer]` section to your setup.cfg (see below)
  32 | * run `versioneer install` in your source tree, commit the results
  33 | 
  34 | ## Version Identifiers
  35 | 
  36 | Source trees come from a variety of places:
  37 | 
  38 | * a version-control system checkout (mostly used by developers)
  39 | * a nightly tarball, produced by build automation
  40 | * a snapshot tarball, produced by a web-based VCS browser, like github's
  41 |   "tarball from tag" feature
  42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI
  43 | 
  44 | Within each source tree, the version identifier (either a string or a number,
  45 | this tool is format-agnostic) can come from a variety of places:
  46 | 
  47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
  48 |   about recent "tags" and an absolute revision-id
  49 | * the name of the directory into which the tarball was unpacked
  50 | * an expanded VCS keyword ($Id$, etc)
  51 | * a `_version.py` created by some earlier build step
  52 | 
  53 | For released software, the version identifier is closely related to a VCS
  54 | tag. Some projects use tag names that include more than just the version
  55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
  56 | needs to strip the tag prefix to extract the version identifier. For
  57 | unreleased software (between tags), the version identifier should provide
  58 | enough information to help developers recreate the same tree, while also
  59 | giving them an idea of roughly how old the tree is (after version 1.2, before
  60 | version 1.3). Many VCS systems can report a description that captures this,
  61 | for example `git describe --tags --dirty --always` reports things like
  62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
  63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
  64 | uncommitted changes.
  65 | 
  66 | The version identifier is used for multiple purposes:
  67 | 
  68 | * to allow the module to self-identify its version: `myproject.__version__`
  69 | * to choose a name and prefix for a 'setup.py sdist' tarball
  70 | 
  71 | ## Theory of Operation
  72 | 
  73 | Versioneer works by adding a special `_version.py` file into your source
  74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to
  75 | dynamically ask the VCS tool for version information at import time.
  76 | 
  77 | `_version.py` also contains `$Revision$` markers, and the installation
  78 | process marks `_version.py` to have this marker rewritten with a tag name
  79 | during the `git archive` command. As a result, generated tarballs will
  80 | contain enough information to get the proper version.
  81 | 
  82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to
  83 | the top level of your source tree, next to `setup.py` and the `setup.cfg`
  84 | that configures it. This overrides several distutils/setuptools commands to
  85 | compute the version when invoked, and changes `setup.py build` and `setup.py
  86 | sdist` to replace `_version.py` with a small static file that contains just
  87 | the generated version data.
  88 | 
  89 | ## Installation
  90 | 
  91 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
  92 | 
  93 | ## Version-String Flavors
  94 | 
  95 | Code which uses Versioneer can learn about its version string at runtime by
  96 | importing `_version` from your main `__init__.py` file and running the
  97 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
  98 | import the top-level `versioneer.py` and run `get_versions()`.
  99 | 
 100 | Both functions return a dictionary with different flavors of version
 101 | information:
 102 | 
 103 | * `['version']`: A condensed version string, rendered using the selected
 104 |   style. This is the most commonly used value for the project's version
 105 |   string. The default "pep440" style yields strings like `0.11`,
 106 |   `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
 107 |   below for alternative styles.
 108 | 
 109 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the
 110 |   full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
 111 | 
 112 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
 113 |   commit date in ISO 8601 format. This will be None if the date is not
 114 |   available.
 115 | 
 116 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
 117 |   this is only accurate if run in a VCS checkout, otherwise it is likely to
 118 |   be False or None
 119 | 
 120 | * `['error']`: if the version string could not be computed, this will be set
 121 |   to a string describing the problem, otherwise it will be None. It may be
 122 |   useful to throw an exception in setup.py if this is set, to avoid e.g.
 123 |   creating tarballs with a version string of "unknown".
 124 | 
 125 | Some variants are more useful than others. Including `full-revisionid` in a
 126 | bug report should allow developers to reconstruct the exact code being tested
 127 | (or indicate the presence of local changes that should be shared with the
 128 | developers). `version` is suitable for display in an "about" box or a CLI
 129 | `--version` output: it can be easily compared against release notes and lists
 130 | of bugs fixed in various releases.
 131 | 
 132 | The installer adds the following text to your `__init__.py` to place a basic
 133 | version in `YOURPROJECT.__version__`:
 134 | 
 135 |     from ._version import get_versions
 136 |     __version__ = get_versions()['version']
 137 |     del get_versions
 138 | 
 139 | ## Styles
 140 | 
 141 | The setup.cfg `style=` configuration controls how the VCS information is
 142 | rendered into a version string.
 143 | 
 144 | The default style, "pep440", produces a PEP440-compliant string, equal to the
 145 | un-prefixed tag name for actual releases, and containing an additional "local
 146 | version" section with more detail for in-between builds. For Git, this is
 147 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
 148 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
 149 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
 150 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released
 151 | software (exactly equal to a known tag), the identifier will only contain the
 152 | stripped tag, e.g. "0.11".
 153 | 
 154 | Other styles are available. See [details.md](details.md) in the Versioneer
 155 | source tree for descriptions.
 156 | 
 157 | ## Debugging
 158 | 
 159 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
 160 | to return a version of "0+unknown". To investigate the problem, run `setup.py
 161 | version`, which will run the version-lookup code in a verbose mode, and will
 162 | display the full contents of `get_versions()` (including the `error` string,
 163 | which may help identify what went wrong).
 164 | 
 165 | ## Known Limitations
 166 | 
 167 | Some situations are known to cause problems for Versioneer. This details the
 168 | most significant ones. More can be found on Github
 169 | [issues page](https://github.com/warner/python-versioneer/issues).
 170 | 
 171 | ### Subprojects
 172 | 
 173 | Versioneer has limited support for source trees in which `setup.py` is not in
 174 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
 175 | two common reasons why `setup.py` might not be in the root:
 176 | 
 177 | * Source trees which contain multiple subprojects, such as
 178 |   [Buildbot](https://github.com/buildbot/buildbot), which contains both
 179 |   "master" and "slave" subprojects, each with their own `setup.py`,
 180 |   `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
 181 |   distributions (and upload multiple independently-installable tarballs).
 182 | * Source trees whose main purpose is to contain a C library, but which also
 183 |   provide bindings to Python (and perhaps other langauges) in subdirectories.
 184 | 
 185 | Versioneer will look for `.git` in parent directories, and most operations
 186 | should get the right version string. However `pip` and `setuptools` have bugs
 187 | and implementation details which frequently cause `pip install .` from a
 188 | subproject directory to fail to find a correct version string (so it usually
 189 | defaults to `0+unknown`).
 190 | 
 191 | `pip install --editable .` should work correctly. `setup.py install` might
 192 | work too.
 193 | 
 194 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
 195 | some later version.
 196 | 
 197 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
 198 | this issue. The discussion in
 199 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
 200 | issue from the Versioneer side in more detail.
 201 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and
 202 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
 203 | pip to let Versioneer work correctly.
 204 | 
 205 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the
 206 | `setup.cfg`, so subprojects were completely unsupported with those releases.
 207 | 
 208 | ### Editable installs with setuptools <= 18.5
 209 | 
 210 | `setup.py develop` and `pip install --editable .` allow you to install a
 211 | project into a virtualenv once, then continue editing the source code (and
 212 | test) without re-installing after every change.
 213 | 
 214 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
 215 | convenient way to specify executable scripts that should be installed along
 216 | with the python package.
 217 | 
 218 | These both work as expected when using modern setuptools. When using
 219 | setuptools-18.5 or earlier, however, certain operations will cause
 220 | `pkg_resources.DistributionNotFound` errors when running the entrypoint
 221 | script, which must be resolved by re-installing the package. This happens
 222 | when the install happens with one version, then the egg_info data is
 223 | regenerated while a different version is checked out. Many setup.py commands
 224 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
 225 | a different virtualenv), so this can be surprising.
 226 | 
 227 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
 228 | this one, but upgrading to a newer version of setuptools should probably
 229 | resolve it.
 230 | 
 231 | ### Unicode version strings
 232 | 
 233 | While Versioneer works (and is continually tested) with both Python 2 and
 234 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
 235 | Newer releases probably generate unicode version strings on py2. It's not
 236 | clear that this is wrong, but it may be surprising for applications when then
 237 | write these strings to a network connection or include them in bytes-oriented
 238 | APIs like cryptographic checksums.
 239 | 
 240 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
 241 | this question.
 242 | 
 243 | 
 244 | ## Updating Versioneer
 245 | 
 246 | To upgrade your project to a new release of Versioneer, do the following:
 247 | 
 248 | * install the new Versioneer (`pip install -U versioneer` or equivalent)
 249 | * edit `setup.cfg`, if necessary, to include any new configuration settings
 250 |   indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
 251 | * re-run `versioneer install` in your source tree, to replace
 252 |   `SRC/_version.py`
 253 | * commit any changed files
 254 | 
 255 | ## Future Directions
 256 | 
 257 | This tool is designed to make it easily extended to other version-control
 258 | systems: all VCS-specific components are in separate directories like
 259 | src/git/ . The top-level `versioneer.py` script is assembled from these
 260 | components by running make-versioneer.py . In the future, make-versioneer.py
 261 | will take a VCS name as an argument, and will construct a version of
 262 | `versioneer.py` that is specific to the given VCS. It might also take the
 263 | configuration arguments that are currently provided manually during
 264 | installation by editing setup.py . Alternatively, it might go the other
 265 | direction and include code from all supported VCS systems, reducing the
 266 | number of intermediate scripts.
 267 | 
 268 | 
 269 | ## License
 270 | 
 271 | To make Versioneer easier to embed, all its code is dedicated to the public
 272 | domain. The `_version.py` that it creates is also in the public domain.
 273 | Specifically, both are released under the Creative Commons "Public Domain
 274 | Dedication" license (CC0-1.0), as described in
 275 | https://creativecommons.org/publicdomain/zero/1.0/ .
 276 | 
 277 | """
 278 | 
 279 | from __future__ import print_function
 280 | try:
 281 |     import configparser
 282 | except ImportError:
 283 |     import ConfigParser as configparser
 284 | import errno
 285 | import json
 286 | import os
 287 | import re
 288 | import subprocess
 289 | import sys
 290 | 
 291 | 
 292 | class VersioneerConfig:
 293 |     """Container for Versioneer configuration parameters."""
 294 | 
 295 | 
 296 | def get_root():
 297 |     """Get the project root directory.
 298 | 
 299 |     We require that all commands are run from the project root, i.e. the
 300 |     directory that contains setup.py, setup.cfg, and versioneer.py .
 301 |     """
 302 |     root = os.path.realpath(os.path.abspath(os.getcwd()))
 303 |     setup_py = os.path.join(root, "setup.py")
 304 |     versioneer_py = os.path.join(root, "versioneer.py")
 305 |     if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
 306 |         # allow 'python path/to/setup.py COMMAND'
 307 |         root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
 308 |         setup_py = os.path.join(root, "setup.py")
 309 |         versioneer_py = os.path.join(root, "versioneer.py")
 310 |     if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
 311 |         err = ("Versioneer was unable to run the project root directory. "
 312 |                "Versioneer requires setup.py to be executed from "
 313 |                "its immediate directory (like 'python setup.py COMMAND'), "
 314 |                "or in a way that lets it use sys.argv[0] to find the root "
 315 |                "(like 'python path/to/setup.py COMMAND').")
 316 |         raise VersioneerBadRootError(err)
 317 |     try:
 318 |         # Certain runtime workflows (setup.py install/develop in a setuptools
 319 |         # tree) execute all dependencies in a single python process, so
 320 |         # "versioneer" may be imported multiple times, and python's shared
 321 |         # module-import table will cache the first one. So we can't use
 322 |         # os.path.dirname(__file__), as that will find whichever
 323 |         # versioneer.py was first imported, even in later projects.
 324 |         me = os.path.realpath(os.path.abspath(__file__))
 325 |         me_dir = os.path.normcase(os.path.splitext(me)[0])
 326 |         vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
 327 |         if me_dir != vsr_dir:
 328 |             print("Warning: build in %s is using versioneer.py from %s"
 329 |                   % (os.path.dirname(me), versioneer_py))
 330 |     except NameError:
 331 |         pass
 332 |     return root
 333 | 
 334 | 
 335 | def get_config_from_root(root):
 336 |     """Read the project setup.cfg file to determine Versioneer config."""
 337 |     # This might raise EnvironmentError (if setup.cfg is missing), or
 338 |     # configparser.NoSectionError (if it lacks a [versioneer] section), or
 339 |     # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
 340 |     # the top of versioneer.py for instructions on writing your setup.cfg .
 341 |     setup_cfg = os.path.join(root, "setup.cfg")
 342 |     parser = configparser.SafeConfigParser()
 343 |     with open(setup_cfg, "r") as f:
 344 |         parser.readfp(f)
 345 |     VCS = parser.get("versioneer", "VCS")  # mandatory
 346 | 
 347 |     def get(parser, name):
 348 |         if parser.has_option("versioneer", name):
 349 |             return parser.get("versioneer", name)
 350 |         return None
 351 |     cfg = VersioneerConfig()
 352 |     cfg.VCS = VCS
 353 |     cfg.style = get(parser, "style") or ""
 354 |     cfg.versionfile_source = get(parser, "versionfile_source")
 355 |     cfg.versionfile_build = get(parser, "versionfile_build")
 356 |     cfg.tag_prefix = get(parser, "tag_prefix")
 357 |     if cfg.tag_prefix in ("''", '""'):
 358 |         cfg.tag_prefix = ""
 359 |     cfg.parentdir_prefix = get(parser, "parentdir_prefix")
 360 |     cfg.verbose = get(parser, "verbose")
 361 |     return cfg
 362 | 
 363 | 
 364 | class NotThisMethod(Exception):
 365 |     """Exception raised if a method is not valid for the current scenario."""
 366 | 
 367 | 
 368 | # these dictionaries contain VCS-specific tools
 369 | LONG_VERSION_PY = {}
 370 | HANDLERS = {}
 371 | 
 372 | 
 373 | def register_vcs_handler(vcs, method):  # decorator
 374 |     """Decorator to mark a method as the handler for a particular VCS."""
 375 |     def decorate(f):
 376 |         """Store f in HANDLERS[vcs][method]."""
 377 |         if vcs not in HANDLERS:
 378 |             HANDLERS[vcs] = {}
 379 |         HANDLERS[vcs][method] = f
 380 |         return f
 381 |     return decorate
 382 | 
 383 | 
 384 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 385 |                 env=None):
 386 |     """Call the given command(s)."""
 387 |     assert isinstance(commands, list)
 388 |     p = None
 389 |     for c in commands:
 390 |         try:
 391 |             dispcmd = str([c] + args)
 392 |             # remember shell=False, so use git.cmd on windows, not just git
 393 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 394 |                                  stdout=subprocess.PIPE,
 395 |                                  stderr=(subprocess.PIPE if hide_stderr
 396 |                                          else None))
 397 |             break
 398 |         except EnvironmentError:
 399 |             e = sys.exc_info()[1]
 400 |             if e.errno == errno.ENOENT:
 401 |                 continue
 402 |             if verbose:
 403 |                 print("unable to run %s" % dispcmd)
 404 |                 print(e)
 405 |             return None, None
 406 |     else:
 407 |         if verbose:
 408 |             print("unable to find command, tried %s" % (commands,))
 409 |         return None, None
 410 |     stdout = p.communicate()[0].strip()
 411 |     if sys.version_info[0] >= 3:
 412 |         stdout = stdout.decode()
 413 |     if p.returncode != 0:
 414 |         if verbose:
 415 |             print("unable to run %s (error)" % dispcmd)
 416 |             print("stdout was %s" % stdout)
 417 |         return None, p.returncode
 418 |     return stdout, p.returncode
 419 | 
 420 | 
 421 | LONG_VERSION_PY['git'] = '''
 422 | # This file helps to compute a version number in source trees obtained from
 423 | # git-archive tarball (such as those provided by githubs download-from-tag
 424 | # feature). Distribution tarballs (built by setup.py sdist) and build
 425 | # directories (produced by setup.py build) will contain a much shorter file
 426 | # that just contains the computed version number.
 427 | 
 428 | # This file is released into the public domain. Generated by
 429 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 430 | 
 431 | """Git implementation of _version.py."""
 432 | 
 433 | import errno
 434 | import os
 435 | import re
 436 | import subprocess
 437 | import sys
 438 | 
 439 | 
 440 | def get_keywords():
 441 |     """Get the keywords needed to look up the version information."""
 442 |     # these strings will be replaced by git during git-archive.
 443 |     # setup.py/versioneer.py will grep for the variable names, so they must
 444 |     # each be defined on a line of their own. _version.py will just call
 445 |     # get_keywords().
 446 |     git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
 447 |     git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
 448 |     git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
 449 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 450 |     return keywords
 451 | 
 452 | 
 453 | class VersioneerConfig:
 454 |     """Container for Versioneer configuration parameters."""
 455 | 
 456 | 
 457 | def get_config():
 458 |     """Create, populate and return the VersioneerConfig() object."""
 459 |     # these strings are filled in when 'setup.py versioneer' creates
 460 |     # _version.py
 461 |     cfg = VersioneerConfig()
 462 |     cfg.VCS = "git"
 463 |     cfg.style = "%(STYLE)s"
 464 |     cfg.tag_prefix = "%(TAG_PREFIX)s"
 465 |     cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
 466 |     cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
 467 |     cfg.verbose = False
 468 |     return cfg
 469 | 
 470 | 
 471 | class NotThisMethod(Exception):
 472 |     """Exception raised if a method is not valid for the current scenario."""
 473 | 
 474 | 
 475 | LONG_VERSION_PY = {}
 476 | HANDLERS = {}
 477 | 
 478 | 
 479 | def register_vcs_handler(vcs, method):  # decorator
 480 |     """Decorator to mark a method as the handler for a particular VCS."""
 481 |     def decorate(f):
 482 |         """Store f in HANDLERS[vcs][method]."""
 483 |         if vcs not in HANDLERS:
 484 |             HANDLERS[vcs] = {}
 485 |         HANDLERS[vcs][method] = f
 486 |         return f
 487 |     return decorate
 488 | 
 489 | 
 490 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 491 |                 env=None):
 492 |     """Call the given command(s)."""
 493 |     assert isinstance(commands, list)
 494 |     p = None
 495 |     for c in commands:
 496 |         try:
 497 |             dispcmd = str([c] + args)
 498 |             # remember shell=False, so use git.cmd on windows, not just git
 499 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 500 |                                  stdout=subprocess.PIPE,
 501 |                                  stderr=(subprocess.PIPE if hide_stderr
 502 |                                          else None))
 503 |             break
 504 |         except EnvironmentError:
 505 |             e = sys.exc_info()[1]
 506 |             if e.errno == errno.ENOENT:
 507 |                 continue
 508 |             if verbose:
 509 |                 print("unable to run %%s" %% dispcmd)
 510 |                 print(e)
 511 |             return None, None
 512 |     else:
 513 |         if verbose:
 514 |             print("unable to find command, tried %%s" %% (commands,))
 515 |         return None, None
 516 |     stdout = p.communicate()[0].strip()
 517 |     if sys.version_info[0] >= 3:
 518 |         stdout = stdout.decode()
 519 |     if p.returncode != 0:
 520 |         if verbose:
 521 |             print("unable to run %%s (error)" %% dispcmd)
 522 |             print("stdout was %%s" %% stdout)
 523 |         return None, p.returncode
 524 |     return stdout, p.returncode
 525 | 
 526 | 
 527 | def versions_from_parentdir(parentdir_prefix, root, verbose):
 528 |     """Try to determine the version from the parent directory name.
 529 | 
 530 |     Source tarballs conventionally unpack into a directory that includes both
 531 |     the project name and a version string. We will also support searching up
 532 |     two directory levels for an appropriately named parent directory
 533 |     """
 534 |     rootdirs = []
 535 | 
 536 |     for i in range(3):
 537 |         dirname = os.path.basename(root)
 538 |         if dirname.startswith(parentdir_prefix):
 539 |             return {"version": dirname[len(parentdir_prefix):],
 540 |                     "full-revisionid": None,
 541 |                     "dirty": False, "error": None, "date": None}
 542 |         else:
 543 |             rootdirs.append(root)
 544 |             root = os.path.dirname(root)  # up a level
 545 | 
 546 |     if verbose:
 547 |         print("Tried directories %%s but none started with prefix %%s" %%
 548 |               (str(rootdirs), parentdir_prefix))
 549 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 550 | 
 551 | 
 552 | @register_vcs_handler("git", "get_keywords")
 553 | def git_get_keywords(versionfile_abs):
 554 |     """Extract version information from the given file."""
 555 |     # the code embedded in _version.py can just fetch the value of these
 556 |     # keywords. When used from setup.py, we don't want to import _version.py,
 557 |     # so we do it with a regexp instead. This function is not used from
 558 |     # _version.py.
 559 |     keywords = {}
 560 |     try:
 561 |         f = open(versionfile_abs, "r")
 562 |         for line in f.readlines():
 563 |             if line.strip().startswith("git_refnames ="):
 564 |                 mo = re.search(r'=\s*"(.*)"', line)
 565 |                 if mo:
 566 |                     keywords["refnames"] = mo.group(1)
 567 |             if line.strip().startswith("git_full ="):
 568 |                 mo = re.search(r'=\s*"(.*)"', line)
 569 |                 if mo:
 570 |                     keywords["full"] = mo.group(1)
 571 |             if line.strip().startswith("git_date ="):
 572 |                 mo = re.search(r'=\s*"(.*)"', line)
 573 |                 if mo:
 574 |                     keywords["date"] = mo.group(1)
 575 |         f.close()
 576 |     except EnvironmentError:
 577 |         pass
 578 |     return keywords
 579 | 
 580 | 
 581 | @register_vcs_handler("git", "keywords")
 582 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
 583 |     """Get version information from git keywords."""
 584 |     if not keywords:
 585 |         raise NotThisMethod("no keywords at all, weird")
 586 |     date = keywords.get("date")
 587 |     if date is not None:
 588 |         # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
 589 |         # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
 590 |         # -like" string, which we must then edit to make compliant), because
 591 |         # it's been around since git-1.5.3, and it's too difficult to
 592 |         # discover which version we're using, or to work around using an
 593 |         # older one.
 594 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 595 |     refnames = keywords["refnames"].strip()
 596 |     if refnames.startswith("$Format"):
 597 |         if verbose:
 598 |             print("keywords are unexpanded, not using")
 599 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
 600 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
 601 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
 602 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
 603 |     TAG = "tag: "
 604 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
 605 |     if not tags:
 606 |         # Either we're using git < 1.8.3, or there really are no tags. We use
 607 |         # a heuristic: assume all version tags have a digit. The old git %%d
 608 |         # expansion behaves like git log --decorate=short and strips out the
 609 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
 610 |         # between branches and tags. By ignoring refnames without digits, we
 611 |         # filter out many common branch names like "release" and
 612 |         # "stabilization", as well as "HEAD" and "master".
 613 |         tags = set([r for r in refs if re.search(r'\d', r)])
 614 |         if verbose:
 615 |             print("discarding '%%s', no digits" %% ",".join(refs - tags))
 616 |     if verbose:
 617 |         print("likely tags: %%s" %% ",".join(sorted(tags)))
 618 |     for ref in sorted(tags):
 619 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
 620 |         if ref.startswith(tag_prefix):
 621 |             r = ref[len(tag_prefix):]
 622 |             if verbose:
 623 |                 print("picking %%s" %% r)
 624 |             return {"version": r,
 625 |                     "full-revisionid": keywords["full"].strip(),
 626 |                     "dirty": False, "error": None,
 627 |                     "date": date}
 628 |     # no suitable tags, so version is "0+unknown", but full hex is still there
 629 |     if verbose:
 630 |         print("no suitable tags, using unknown + full revision id")
 631 |     return {"version": "0+unknown",
 632 |             "full-revisionid": keywords["full"].strip(),
 633 |             "dirty": False, "error": "no suitable tags", "date": None}
 634 | 
 635 | 
 636 | @register_vcs_handler("git", "pieces_from_vcs")
 637 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
 638 |     """Get version from 'git describe' in the root of the source tree.
 639 | 
 640 |     This only gets called if the git-archive 'subst' keywords were *not*
 641 |     expanded, and _version.py hasn't already been rewritten with a short
 642 |     version string, meaning we're inside a checked out source tree.
 643 |     """
 644 |     GITS = ["git"]
 645 |     if sys.platform == "win32":
 646 |         GITS = ["git.cmd", "git.exe"]
 647 | 
 648 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
 649 |                           hide_stderr=True)
 650 |     if rc != 0:
 651 |         if verbose:
 652 |             print("Directory %%s not under git control" %% root)
 653 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
 654 | 
 655 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
 656 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
 657 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
 658 |                                           "--always", "--long",
 659 |                                           "--match", "%%s*" %% tag_prefix],
 660 |                                    cwd=root)
 661 |     # --long was added in git-1.5.5
 662 |     if describe_out is None:
 663 |         raise NotThisMethod("'git describe' failed")
 664 |     describe_out = describe_out.strip()
 665 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
 666 |     if full_out is None:
 667 |         raise NotThisMethod("'git rev-parse' failed")
 668 |     full_out = full_out.strip()
 669 | 
 670 |     pieces = {}
 671 |     pieces["long"] = full_out
 672 |     pieces["short"] = full_out[:7]  # maybe improved later
 673 |     pieces["error"] = None
 674 | 
 675 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
 676 |     # TAG might have hyphens.
 677 |     git_describe = describe_out
 678 | 
 679 |     # look for -dirty suffix
 680 |     dirty = git_describe.endswith("-dirty")
 681 |     pieces["dirty"] = dirty
 682 |     if dirty:
 683 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
 684 | 
 685 |     # now we have TAG-NUM-gHEX or HEX
 686 | 
 687 |     if "-" in git_describe:
 688 |         # TAG-NUM-gHEX
 689 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
 690 |         if not mo:
 691 |             # unparseable. Maybe git-describe is misbehaving?
 692 |             pieces["error"] = ("unable to parse git-describe output: '%%s'"
 693 |                                %% describe_out)
 694 |             return pieces
 695 | 
 696 |         # tag
 697 |         full_tag = mo.group(1)
 698 |         if not full_tag.startswith(tag_prefix):
 699 |             if verbose:
 700 |                 fmt = "tag '%%s' doesn't start with prefix '%%s'"
 701 |                 print(fmt %% (full_tag, tag_prefix))
 702 |             pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
 703 |                                %% (full_tag, tag_prefix))
 704 |             return pieces
 705 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
 706 | 
 707 |         # distance: number of commits since tag
 708 |         pieces["distance"] = int(mo.group(2))
 709 | 
 710 |         # commit: short hex revision ID
 711 |         pieces["short"] = mo.group(3)
 712 | 
 713 |     else:
 714 |         # HEX: no tags
 715 |         pieces["closest-tag"] = None
 716 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
 717 |                                     cwd=root)
 718 |         pieces["distance"] = int(count_out)  # total number of commits
 719 | 
 720 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
 721 |     date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
 722 |                        cwd=root)[0].strip()
 723 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 724 | 
 725 |     return pieces
 726 | 
 727 | 
 728 | def plus_or_dot(pieces):
 729 |     """Return a + if we don't already have one, else return a ."""
 730 |     if "+" in pieces.get("closest-tag", ""):
 731 |         return "."
 732 |     return "+"
 733 | 
 734 | 
 735 | def render_pep440(pieces):
 736 |     """Build up version string, with post-release "local version identifier".
 737 | 
 738 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
 739 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
 740 | 
 741 |     Exceptions:
 742 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
 743 |     """
 744 |     if pieces["closest-tag"]:
 745 |         rendered = pieces["closest-tag"]
 746 |         if pieces["distance"] or pieces["dirty"]:
 747 |             rendered += plus_or_dot(pieces)
 748 |             rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
 749 |             if pieces["dirty"]:
 750 |                 rendered += ".dirty"
 751 |     else:
 752 |         # exception #1
 753 |         rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
 754 |                                           pieces["short"])
 755 |         if pieces["dirty"]:
 756 |             rendered += ".dirty"
 757 |     return rendered
 758 | 
 759 | 
 760 | def render_pep440_pre(pieces):
 761 |     """TAG[.post.devDISTANCE] -- No -dirty.
 762 | 
 763 |     Exceptions:
 764 |     1: no tags. 0.post.devDISTANCE
 765 |     """
 766 |     if pieces["closest-tag"]:
 767 |         rendered = pieces["closest-tag"]
 768 |         if pieces["distance"]:
 769 |             rendered += ".post.dev%%d" %% pieces["distance"]
 770 |     else:
 771 |         # exception #1
 772 |         rendered = "0.post.dev%%d" %% pieces["distance"]
 773 |     return rendered
 774 | 
 775 | 
 776 | def render_pep440_post(pieces):
 777 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
 778 | 
 779 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
 780 |     (a dirty tree will appear "older" than the corresponding clean one),
 781 |     but you shouldn't be releasing software with -dirty anyways.
 782 | 
 783 |     Exceptions:
 784 |     1: no tags. 0.postDISTANCE[.dev0]
 785 |     """
 786 |     if pieces["closest-tag"]:
 787 |         rendered = pieces["closest-tag"]
 788 |         if pieces["distance"] or pieces["dirty"]:
 789 |             rendered += ".post%%d" %% pieces["distance"]
 790 |             if pieces["dirty"]:
 791 |                 rendered += ".dev0"
 792 |             rendered += plus_or_dot(pieces)
 793 |             rendered += "g%%s" %% pieces["short"]
 794 |     else:
 795 |         # exception #1
 796 |         rendered = "0.post%%d" %% pieces["distance"]
 797 |         if pieces["dirty"]:
 798 |             rendered += ".dev0"
 799 |         rendered += "+g%%s" %% pieces["short"]
 800 |     return rendered
 801 | 
 802 | 
 803 | def render_pep440_old(pieces):
 804 |     """TAG[.postDISTANCE[.dev0]] .
 805 | 
 806 |     The ".dev0" means dirty.
 807 | 
 808 |     Eexceptions:
 809 |     1: no tags. 0.postDISTANCE[.dev0]
 810 |     """
 811 |     if pieces["closest-tag"]:
 812 |         rendered = pieces["closest-tag"]
 813 |         if pieces["distance"] or pieces["dirty"]:
 814 |             rendered += ".post%%d" %% pieces["distance"]
 815 |             if pieces["dirty"]:
 816 |                 rendered += ".dev0"
 817 |     else:
 818 |         # exception #1
 819 |         rendered = "0.post%%d" %% pieces["distance"]
 820 |         if pieces["dirty"]:
 821 |             rendered += ".dev0"
 822 |     return rendered
 823 | 
 824 | 
 825 | def render_git_describe(pieces):
 826 |     """TAG[-DISTANCE-gHEX][-dirty].
 827 | 
 828 |     Like 'git describe --tags --dirty --always'.
 829 | 
 830 |     Exceptions:
 831 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
 832 |     """
 833 |     if pieces["closest-tag"]:
 834 |         rendered = pieces["closest-tag"]
 835 |         if pieces["distance"]:
 836 |             rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
 837 |     else:
 838 |         # exception #1
 839 |         rendered = pieces["short"]
 840 |     if pieces["dirty"]:
 841 |         rendered += "-dirty"
 842 |     return rendered
 843 | 
 844 | 
 845 | def render_git_describe_long(pieces):
 846 |     """TAG-DISTANCE-gHEX[-dirty].
 847 | 
 848 |     Like 'git describe --tags --dirty --always -long'.
 849 |     The distance/hash is unconditional.
 850 | 
 851 |     Exceptions:
 852 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
 853 |     """
 854 |     if pieces["closest-tag"]:
 855 |         rendered = pieces["closest-tag"]
 856 |         rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
 857 |     else:
 858 |         # exception #1
 859 |         rendered = pieces["short"]
 860 |     if pieces["dirty"]:
 861 |         rendered += "-dirty"
 862 |     return rendered
 863 | 
 864 | 
 865 | def render(pieces, style):
 866 |     """Render the given version pieces into the requested style."""
 867 |     if pieces["error"]:
 868 |         return {"version": "unknown",
 869 |                 "full-revisionid": pieces.get("long"),
 870 |                 "dirty": None,
 871 |                 "error": pieces["error"],
 872 |                 "date": None}
 873 | 
 874 |     if not style or style == "default":
 875 |         style = "pep440"  # the default
 876 | 
 877 |     if style == "pep440":
 878 |         rendered = render_pep440(pieces)
 879 |     elif style == "pep440-pre":
 880 |         rendered = render_pep440_pre(pieces)
 881 |     elif style == "pep440-post":
 882 |         rendered = render_pep440_post(pieces)
 883 |     elif style == "pep440-old":
 884 |         rendered = render_pep440_old(pieces)
 885 |     elif style == "git-describe":
 886 |         rendered = render_git_describe(pieces)
 887 |     elif style == "git-describe-long":
 888 |         rendered = render_git_describe_long(pieces)
 889 |     else:
 890 |         raise ValueError("unknown style '%%s'" %% style)
 891 | 
 892 |     return {"version": rendered, "full-revisionid": pieces["long"],
 893 |             "dirty": pieces["dirty"], "error": None,
 894 |             "date": pieces.get("date")}
 895 | 
 896 | 
 897 | def get_versions():
 898 |     """Get version information or return default if unable to do so."""
 899 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
 900 |     # __file__, we can work backwards from there to the root. Some
 901 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
 902 |     # case we can only use expanded keywords.
 903 | 
 904 |     cfg = get_config()
 905 |     verbose = cfg.verbose
 906 | 
 907 |     try:
 908 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
 909 |                                           verbose)
 910 |     except NotThisMethod:
 911 |         pass
 912 | 
 913 |     try:
 914 |         root = os.path.realpath(__file__)
 915 |         # versionfile_source is the relative path from the top of the source
 916 |         # tree (where the .git directory might live) to this file. Invert
 917 |         # this to find the root from __file__.
 918 |         for i in cfg.versionfile_source.split('/'):
 919 |             root = os.path.dirname(root)
 920 |     except NameError:
 921 |         return {"version": "0+unknown", "full-revisionid": None,
 922 |                 "dirty": None,
 923 |                 "error": "unable to find root of source tree",
 924 |                 "date": None}
 925 | 
 926 |     try:
 927 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
 928 |         return render(pieces, cfg.style)
 929 |     except NotThisMethod:
 930 |         pass
 931 | 
 932 |     try:
 933 |         if cfg.parentdir_prefix:
 934 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
 935 |     except NotThisMethod:
 936 |         pass
 937 | 
 938 |     return {"version": "0+unknown", "full-revisionid": None,
 939 |             "dirty": None,
 940 |             "error": "unable to compute version", "date": None}
 941 | '''
 942 | 
 943 | 
 944 | @register_vcs_handler("git", "get_keywords")
 945 | def git_get_keywords(versionfile_abs):
 946 |     """Extract version information from the given file."""
 947 |     # the code embedded in _version.py can just fetch the value of these
 948 |     # keywords. When used from setup.py, we don't want to import _version.py,
 949 |     # so we do it with a regexp instead. This function is not used from
 950 |     # _version.py.
 951 |     keywords = {}
 952 |     try:
 953 |         f = open(versionfile_abs, "r")
 954 |         for line in f.readlines():
 955 |             if line.strip().startswith("git_refnames ="):
 956 |                 mo = re.search(r'=\s*"(.*)"', line)
 957 |                 if mo:
 958 |                     keywords["refnames"] = mo.group(1)
 959 |             if line.strip().startswith("git_full ="):
 960 |                 mo = re.search(r'=\s*"(.*)"', line)
 961 |                 if mo:
 962 |                     keywords["full"] = mo.group(1)
 963 |             if line.strip().startswith("git_date ="):
 964 |                 mo = re.search(r'=\s*"(.*)"', line)
 965 |                 if mo:
 966 |                     keywords["date"] = mo.group(1)
 967 |         f.close()
 968 |     except EnvironmentError:
 969 |         pass
 970 |     return keywords
 971 | 
 972 | 
 973 | @register_vcs_handler("git", "keywords")
 974 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
 975 |     """Get version information from git keywords."""
 976 |     if not keywords:
 977 |         raise NotThisMethod("no keywords at all, weird")
 978 |     date = keywords.get("date")
 979 |     if date is not None:
 980 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
 981 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
 982 |         # -like" string, which we must then edit to make compliant), because
 983 |         # it's been around since git-1.5.3, and it's too difficult to
 984 |         # discover which version we're using, or to work around using an
 985 |         # older one.
 986 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 987 |     refnames = keywords["refnames"].strip()
 988 |     if refnames.startswith("$Format"):
 989 |         if verbose:
 990 |             print("keywords are unexpanded, not using")
 991 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
 992 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
 993 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
 994 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
 995 |     TAG = "tag: "
 996 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
 997 |     if not tags:
 998 |         # Either we're using git < 1.8.3, or there really are no tags. We use
 999 |         # a heuristic: assume all version tags have a digit. The old git %d
1000 |         # expansion behaves like git log --decorate=short and strips out the
1001 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
1002 |         # between branches and tags. By ignoring refnames without digits, we
1003 |         # filter out many common branch names like "release" and
1004 |         # "stabilization", as well as "HEAD" and "master".
1005 |         tags = set([r for r in refs if re.search(r'\d', r)])
1006 |         if verbose:
1007 |             print("discarding '%s', no digits" % ",".join(refs - tags))
1008 |     if verbose:
1009 |         print("likely tags: %s" % ",".join(sorted(tags)))
1010 |     for ref in sorted(tags):
1011 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
1012 |         if ref.startswith(tag_prefix):
1013 |             r = ref[len(tag_prefix):]
1014 |             if verbose:
1015 |                 print("picking %s" % r)
1016 |             return {"version": r,
1017 |                     "full-revisionid": keywords["full"].strip(),
1018 |                     "dirty": False, "error": None,
1019 |                     "date": date}
1020 |     # no suitable tags, so version is "0+unknown", but full hex is still there
1021 |     if verbose:
1022 |         print("no suitable tags, using unknown + full revision id")
1023 |     return {"version": "0+unknown",
1024 |             "full-revisionid": keywords["full"].strip(),
1025 |             "dirty": False, "error": "no suitable tags", "date": None}
1026 | 
1027 | 
1028 | @register_vcs_handler("git", "pieces_from_vcs")
1029 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
1030 |     """Get version from 'git describe' in the root of the source tree.
1031 | 
1032 |     This only gets called if the git-archive 'subst' keywords were *not*
1033 |     expanded, and _version.py hasn't already been rewritten with a short
1034 |     version string, meaning we're inside a checked out source tree.
1035 |     """
1036 |     GITS = ["git"]
1037 |     if sys.platform == "win32":
1038 |         GITS = ["git.cmd", "git.exe"]
1039 | 
1040 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
1041 |                           hide_stderr=True)
1042 |     if rc != 0:
1043 |         if verbose:
1044 |             print("Directory %s not under git control" % root)
1045 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
1046 | 
1047 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
1048 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
1049 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
1050 |                                           "--always", "--long",
1051 |                                           "--match", "%s*" % tag_prefix],
1052 |                                    cwd=root)
1053 |     # --long was added in git-1.5.5
1054 |     if describe_out is None:
1055 |         raise NotThisMethod("'git describe' failed")
1056 |     describe_out = describe_out.strip()
1057 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
1058 |     if full_out is None:
1059 |         raise NotThisMethod("'git rev-parse' failed")
1060 |     full_out = full_out.strip()
1061 | 
1062 |     pieces = {}
1063 |     pieces["long"] = full_out
1064 |     pieces["short"] = full_out[:7]  # maybe improved later
1065 |     pieces["error"] = None
1066 | 
1067 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
1068 |     # TAG might have hyphens.
1069 |     git_describe = describe_out
1070 | 
1071 |     # look for -dirty suffix
1072 |     dirty = git_describe.endswith("-dirty")
1073 |     pieces["dirty"] = dirty
1074 |     if dirty:
1075 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
1076 | 
1077 |     # now we have TAG-NUM-gHEX or HEX
1078 | 
1079 |     if "-" in git_describe:
1080 |         # TAG-NUM-gHEX
1081 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
1082 |         if not mo:
1083 |             # unparseable. Maybe git-describe is misbehaving?
1084 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
1085 |                                % describe_out)
1086 |             return pieces
1087 | 
1088 |         # tag
1089 |         full_tag = mo.group(1)
1090 |         if not full_tag.startswith(tag_prefix):
1091 |             if verbose:
1092 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
1093 |                 print(fmt % (full_tag, tag_prefix))
1094 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
1095 |                                % (full_tag, tag_prefix))
1096 |             return pieces
1097 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
1098 | 
1099 |         # distance: number of commits since tag
1100 |         pieces["distance"] = int(mo.group(2))
1101 | 
1102 |         # commit: short hex revision ID
1103 |         pieces["short"] = mo.group(3)
1104 | 
1105 |     else:
1106 |         # HEX: no tags
1107 |         pieces["closest-tag"] = None
1108 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
1109 |                                     cwd=root)
1110 |         pieces["distance"] = int(count_out)  # total number of commits
1111 | 
1112 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
1113 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
1114 |                        cwd=root)[0].strip()
1115 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
1116 | 
1117 |     return pieces
1118 | 
1119 | 
1120 | def do_vcs_install(manifest_in, versionfile_source, ipy):
1121 |     """Git-specific installation logic for Versioneer.
1122 | 
1123 |     For Git, this means creating/changing .gitattributes to mark _version.py
1124 |     for export-subst keyword substitution.
1125 |     """
1126 |     GITS = ["git"]
1127 |     if sys.platform == "win32":
1128 |         GITS = ["git.cmd", "git.exe"]
1129 |     files = [manifest_in, versionfile_source]
1130 |     if ipy:
1131 |         files.append(ipy)
1132 |     try:
1133 |         me = __file__
1134 |         if me.endswith(".pyc") or me.endswith(".pyo"):
1135 |             me = os.path.splitext(me)[0] + ".py"
1136 |         versioneer_file = os.path.relpath(me)
1137 |     except NameError:
1138 |         versioneer_file = "versioneer.py"
1139 |     files.append(versioneer_file)
1140 |     present = False
1141 |     try:
1142 |         f = open(".gitattributes", "r")
1143 |         for line in f.readlines():
1144 |             if line.strip().startswith(versionfile_source):
1145 |                 if "export-subst" in line.strip().split()[1:]:
1146 |                     present = True
1147 |         f.close()
1148 |     except EnvironmentError:
1149 |         pass
1150 |     if not present:
1151 |         f = open(".gitattributes", "a+")
1152 |         f.write("%s export-subst\n" % versionfile_source)
1153 |         f.close()
1154 |         files.append(".gitattributes")
1155 |     run_command(GITS, ["add", "--"] + files)
1156 | 
1157 | 
1158 | def versions_from_parentdir(parentdir_prefix, root, verbose):
1159 |     """Try to determine the version from the parent directory name.
1160 | 
1161 |     Source tarballs conventionally unpack into a directory that includes both
1162 |     the project name and a version string. We will also support searching up
1163 |     two directory levels for an appropriately named parent directory
1164 |     """
1165 |     rootdirs = []
1166 | 
1167 |     for i in range(3):
1168 |         dirname = os.path.basename(root)
1169 |         if dirname.startswith(parentdir_prefix):
1170 |             return {"version": dirname[len(parentdir_prefix):],
1171 |                     "full-revisionid": None,
1172 |                     "dirty": False, "error": None, "date": None}
1173 |         else:
1174 |             rootdirs.append(root)
1175 |             root = os.path.dirname(root)  # up a level
1176 | 
1177 |     if verbose:
1178 |         print("Tried directories %s but none started with prefix %s" %
1179 |               (str(rootdirs), parentdir_prefix))
1180 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
1181 | 
1182 | 
1183 | SHORT_VERSION_PY = """
1184 | # This file was generated by 'versioneer.py' (0.18) from
1185 | # revision-control system data, or from the parent directory name of an
1186 | # unpacked source archive. Distribution tarballs contain a pre-generated copy
1187 | # of this file.
1188 | 
1189 | import json
1190 | 
1191 | version_json = '''
1192 | %s
1193 | '''  # END VERSION_JSON
1194 | 
1195 | 
1196 | def get_versions():
1197 |     return json.loads(version_json)
1198 | """
1199 | 
1200 | 
1201 | def versions_from_file(filename):
1202 |     """Try to determine the version from _version.py if present."""
1203 |     try:
1204 |         with open(filename) as f:
1205 |             contents = f.read()
1206 |     except EnvironmentError:
1207 |         raise NotThisMethod("unable to read _version.py")
1208 |     mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
1209 |                    contents, re.M | re.S)
1210 |     if not mo:
1211 |         mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
1212 |                        contents, re.M | re.S)
1213 |     if not mo:
1214 |         raise NotThisMethod("no version_json in _version.py")
1215 |     return json.loads(mo.group(1))
1216 | 
1217 | 
1218 | def write_to_version_file(filename, versions):
1219 |     """Write the given version number to the given _version.py file."""
1220 |     os.unlink(filename)
1221 |     contents = json.dumps(versions, sort_keys=True,
1222 |                           indent=1, separators=(",", ": "))
1223 |     with open(filename, "w") as f:
1224 |         f.write(SHORT_VERSION_PY % contents)
1225 | 
1226 |     print("set %s to '%s'" % (filename, versions["version"]))
1227 | 
1228 | 
1229 | def plus_or_dot(pieces):
1230 |     """Return a + if we don't already have one, else return a ."""
1231 |     if "+" in pieces.get("closest-tag", ""):
1232 |         return "."
1233 |     return "+"
1234 | 
1235 | 
1236 | def render_pep440(pieces):
1237 |     """Build up version string, with post-release "local version identifier".
1238 | 
1239 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
1240 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
1241 | 
1242 |     Exceptions:
1243 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
1244 |     """
1245 |     if pieces["closest-tag"]:
1246 |         rendered = pieces["closest-tag"]
1247 |         if pieces["distance"] or pieces["dirty"]:
1248 |             rendered += plus_or_dot(pieces)
1249 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
1250 |             if pieces["dirty"]:
1251 |                 rendered += ".dirty"
1252 |     else:
1253 |         # exception #1
1254 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
1255 |                                           pieces["short"])
1256 |         if pieces["dirty"]:
1257 |             rendered += ".dirty"
1258 |     return rendered
1259 | 
1260 | 
1261 | def render_pep440_pre(pieces):
1262 |     """TAG[.post.devDISTANCE] -- No -dirty.
1263 | 
1264 |     Exceptions:
1265 |     1: no tags. 0.post.devDISTANCE
1266 |     """
1267 |     if pieces["closest-tag"]:
1268 |         rendered = pieces["closest-tag"]
1269 |         if pieces["distance"]:
1270 |             rendered += ".post.dev%d" % pieces["distance"]
1271 |     else:
1272 |         # exception #1
1273 |         rendered = "0.post.dev%d" % pieces["distance"]
1274 |     return rendered
1275 | 
1276 | 
1277 | def render_pep440_post(pieces):
1278 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
1279 | 
1280 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
1281 |     (a dirty tree will appear "older" than the corresponding clean one),
1282 |     but you shouldn't be releasing software with -dirty anyways.
1283 | 
1284 |     Exceptions:
1285 |     1: no tags. 0.postDISTANCE[.dev0]
1286 |     """
1287 |     if pieces["closest-tag"]:
1288 |         rendered = pieces["closest-tag"]
1289 |         if pieces["distance"] or pieces["dirty"]:
1290 |             rendered += ".post%d" % pieces["distance"]
1291 |             if pieces["dirty"]:
1292 |                 rendered += ".dev0"
1293 |             rendered += plus_or_dot(pieces)
1294 |             rendered += "g%s" % pieces["short"]
1295 |     else:
1296 |         # exception #1
1297 |         rendered = "0.post%d" % pieces["distance"]
1298 |         if pieces["dirty"]:
1299 |             rendered += ".dev0"
1300 |         rendered += "+g%s" % pieces["short"]
1301 |     return rendered
1302 | 
1303 | 
1304 | def render_pep440_old(pieces):
1305 |     """TAG[.postDISTANCE[.dev0]] .
1306 | 
1307 |     The ".dev0" means dirty.
1308 | 
1309 |     Eexceptions:
1310 |     1: no tags. 0.postDISTANCE[.dev0]
1311 |     """
1312 |     if pieces["closest-tag"]:
1313 |         rendered = pieces["closest-tag"]
1314 |         if pieces["distance"] or pieces["dirty"]:
1315 |             rendered += ".post%d" % pieces["distance"]
1316 |             if pieces["dirty"]:
1317 |                 rendered += ".dev0"
1318 |     else:
1319 |         # exception #1
1320 |         rendered = "0.post%d" % pieces["distance"]
1321 |         if pieces["dirty"]:
1322 |             rendered += ".dev0"
1323 |     return rendered
1324 | 
1325 | 
1326 | def render_git_describe(pieces):
1327 |     """TAG[-DISTANCE-gHEX][-dirty].
1328 | 
1329 |     Like 'git describe --tags --dirty --always'.
1330 | 
1331 |     Exceptions:
1332 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
1333 |     """
1334 |     if pieces["closest-tag"]:
1335 |         rendered = pieces["closest-tag"]
1336 |         if pieces["distance"]:
1337 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
1338 |     else:
1339 |         # exception #1
1340 |         rendered = pieces["short"]
1341 |     if pieces["dirty"]:
1342 |         rendered += "-dirty"
1343 |     return rendered
1344 | 
1345 | 
1346 | def render_git_describe_long(pieces):
1347 |     """TAG-DISTANCE-gHEX[-dirty].
1348 | 
1349 |     Like 'git describe --tags --dirty --always -long'.
1350 |     The distance/hash is unconditional.
1351 | 
1352 |     Exceptions:
1353 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
1354 |     """
1355 |     if pieces["closest-tag"]:
1356 |         rendered = pieces["closest-tag"]
1357 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
1358 |     else:
1359 |         # exception #1
1360 |         rendered = pieces["short"]
1361 |     if pieces["dirty"]:
1362 |         rendered += "-dirty"
1363 |     return rendered
1364 | 
1365 | 
1366 | def render(pieces, style):
1367 |     """Render the given version pieces into the requested style."""
1368 |     if pieces["error"]:
1369 |         return {"version": "unknown",
1370 |                 "full-revisionid": pieces.get("long"),
1371 |                 "dirty": None,
1372 |                 "error": pieces["error"],
1373 |                 "date": None}
1374 | 
1375 |     if not style or style == "default":
1376 |         style = "pep440"  # the default
1377 | 
1378 |     if style == "pep440":
1379 |         rendered = render_pep440(pieces)
1380 |     elif style == "pep440-pre":
1381 |         rendered = render_pep440_pre(pieces)
1382 |     elif style == "pep440-post":
1383 |         rendered = render_pep440_post(pieces)
1384 |     elif style == "pep440-old":
1385 |         rendered = render_pep440_old(pieces)
1386 |     elif style == "git-describe":
1387 |         rendered = render_git_describe(pieces)
1388 |     elif style == "git-describe-long":
1389 |         rendered = render_git_describe_long(pieces)
1390 |     else:
1391 |         raise ValueError("unknown style '%s'" % style)
1392 | 
1393 |     return {"version": rendered, "full-revisionid": pieces["long"],
1394 |             "dirty": pieces["dirty"], "error": None,
1395 |             "date": pieces.get("date")}
1396 | 
1397 | 
1398 | class VersioneerBadRootError(Exception):
1399 |     """The project root directory is unknown or missing key files."""
1400 | 
1401 | 
1402 | def get_versions(verbose=False):
1403 |     """Get the project version from whatever source is available.
1404 | 
1405 |     Returns dict with two keys: 'version' and 'full'.
1406 |     """
1407 |     if "versioneer" in sys.modules:
1408 |         # see the discussion in cmdclass.py:get_cmdclass()
1409 |         del sys.modules["versioneer"]
1410 | 
1411 |     root = get_root()
1412 |     cfg = get_config_from_root(root)
1413 | 
1414 |     assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
1415 |     handlers = HANDLERS.get(cfg.VCS)
1416 |     assert handlers, "unrecognized VCS '%s'" % cfg.VCS
1417 |     verbose = verbose or cfg.verbose
1418 |     assert cfg.versionfile_source is not None, \
1419 |         "please set versioneer.versionfile_source"
1420 |     assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
1421 | 
1422 |     versionfile_abs = os.path.join(root, cfg.versionfile_source)
1423 | 
1424 |     # extract version from first of: _version.py, VCS command (e.g. 'git
1425 |     # describe'), parentdir. This is meant to work for developers using a
1426 |     # source checkout, for users of a tarball created by 'setup.py sdist',
1427 |     # and for users of a tarball/zipball created by 'git archive' or github's
1428 |     # download-from-tag feature or the equivalent in other VCSes.
1429 | 
1430 |     get_keywords_f = handlers.get("get_keywords")
1431 |     from_keywords_f = handlers.get("keywords")
1432 |     if get_keywords_f and from_keywords_f:
1433 |         try:
1434 |             keywords = get_keywords_f(versionfile_abs)
1435 |             ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
1436 |             if verbose:
1437 |                 print("got version from expanded keyword %s" % ver)
1438 |             return ver
1439 |         except NotThisMethod:
1440 |             pass
1441 | 
1442 |     try:
1443 |         ver = versions_from_file(versionfile_abs)
1444 |         if verbose:
1445 |             print("got version from file %s %s" % (versionfile_abs, ver))
1446 |         return ver
1447 |     except NotThisMethod:
1448 |         pass
1449 | 
1450 |     from_vcs_f = handlers.get("pieces_from_vcs")
1451 |     if from_vcs_f:
1452 |         try:
1453 |             pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
1454 |             ver = render(pieces, cfg.style)
1455 |             if verbose:
1456 |                 print("got version from VCS %s" % ver)
1457 |             return ver
1458 |         except NotThisMethod:
1459 |             pass
1460 | 
1461 |     try:
1462 |         if cfg.parentdir_prefix:
1463 |             ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
1464 |             if verbose:
1465 |                 print("got version from parentdir %s" % ver)
1466 |             return ver
1467 |     except NotThisMethod:
1468 |         pass
1469 | 
1470 |     if verbose:
1471 |         print("unable to compute version")
1472 | 
1473 |     return {"version": "0+unknown", "full-revisionid": None,
1474 |             "dirty": None, "error": "unable to compute version",
1475 |             "date": None}
1476 | 
1477 | 
1478 | def get_version():
1479 |     """Get the short version string for this project."""
1480 |     return get_versions()["version"]
1481 | 
1482 | 
1483 | def get_cmdclass():
1484 |     """Get the custom setuptools/distutils subclasses used by Versioneer."""
1485 |     if "versioneer" in sys.modules:
1486 |         del sys.modules["versioneer"]
1487 |         # this fixes the "python setup.py develop" case (also 'install' and
1488 |         # 'easy_install .'), in which subdependencies of the main project are
1489 |         # built (using setup.py bdist_egg) in the same python process. Assume
1490 |         # a main project A and a dependency B, which use different versions
1491 |         # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
1492 |         # sys.modules by the time B's setup.py is executed, causing B to run
1493 |         # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
1494 |         # sandbox that restores sys.modules to it's pre-build state, so the
1495 |         # parent is protected against the child's "import versioneer". By
1496 |         # removing ourselves from sys.modules here, before the child build
1497 |         # happens, we protect the child from the parent's versioneer too.
1498 |         # Also see https://github.com/warner/python-versioneer/issues/52
1499 | 
1500 |     cmds = {}
1501 | 
1502 |     # we add "version" to both distutils and setuptools
1503 |     from distutils.core import Command
1504 | 
1505 |     class cmd_version(Command):
1506 |         description = "report generated version string"
1507 |         user_options = []
1508 |         boolean_options = []
1509 | 
1510 |         def initialize_options(self):
1511 |             pass
1512 | 
1513 |         def finalize_options(self):
1514 |             pass
1515 | 
1516 |         def run(self):
1517 |             vers = get_versions(verbose=True)
1518 |             print("Version: %s" % vers["version"])
1519 |             print(" full-revisionid: %s" % vers.get("full-revisionid"))
1520 |             print(" dirty: %s" % vers.get("dirty"))
1521 |             print(" date: %s" % vers.get("date"))
1522 |             if vers["error"]:
1523 |                 print(" error: %s" % vers["error"])
1524 |     cmds["version"] = cmd_version
1525 | 
1526 |     # we override "build_py" in both distutils and setuptools
1527 |     #
1528 |     # most invocation pathways end up running build_py:
1529 |     #  distutils/build -> build_py
1530 |     #  distutils/install -> distutils/build ->..
1531 |     #  setuptools/bdist_wheel -> distutils/install ->..
1532 |     #  setuptools/bdist_egg -> distutils/install_lib -> build_py
1533 |     #  setuptools/install -> bdist_egg ->..
1534 |     #  setuptools/develop -> ?
1535 |     #  pip install:
1536 |     #   copies source tree to a tempdir before running egg_info/etc
1537 |     #   if .git isn't copied too, 'git describe' will fail
1538 |     #   then does setup.py bdist_wheel, or sometimes setup.py install
1539 |     #  setup.py egg_info -> ?
1540 | 
1541 |     # we override different "build_py" commands for both environments
1542 |     if "setuptools" in sys.modules:
1543 |         from setuptools.command.build_py import build_py as _build_py
1544 |     else:
1545 |         from distutils.command.build_py import build_py as _build_py
1546 | 
1547 |     class cmd_build_py(_build_py):
1548 |         def run(self):
1549 |             root = get_root()
1550 |             cfg = get_config_from_root(root)
1551 |             versions = get_versions()
1552 |             _build_py.run(self)
1553 |             # now locate _version.py in the new build/ directory and replace
1554 |             # it with an updated value
1555 |             if cfg.versionfile_build:
1556 |                 target_versionfile = os.path.join(self.build_lib,
1557 |                                                   cfg.versionfile_build)
1558 |                 print("UPDATING %s" % target_versionfile)
1559 |                 write_to_version_file(target_versionfile, versions)
1560 |     cmds["build_py"] = cmd_build_py
1561 | 
1562 |     if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
1563 |         from cx_Freeze.dist import build_exe as _build_exe
1564 |         # nczeczulin reports that py2exe won't like the pep440-style string
1565 |         # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
1566 |         # setup(console=[{
1567 |         #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
1568 |         #   "product_version": versioneer.get_version(),
1569 |         #   ...
1570 | 
1571 |         class cmd_build_exe(_build_exe):
1572 |             def run(self):
1573 |                 root = get_root()
1574 |                 cfg = get_config_from_root(root)
1575 |                 versions = get_versions()
1576 |                 target_versionfile = cfg.versionfile_source
1577 |                 print("UPDATING %s" % target_versionfile)
1578 |                 write_to_version_file(target_versionfile, versions)
1579 | 
1580 |                 _build_exe.run(self)
1581 |                 os.unlink(target_versionfile)
1582 |                 with open(cfg.versionfile_source, "w") as f:
1583 |                     LONG = LONG_VERSION_PY[cfg.VCS]
1584 |                     f.write(LONG %
1585 |                             {"DOLLAR": "$",
1586 |                              "STYLE": cfg.style,
1587 |                              "TAG_PREFIX": cfg.tag_prefix,
1588 |                              "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1589 |                              "VERSIONFILE_SOURCE": cfg.versionfile_source,
1590 |                              })
1591 |         cmds["build_exe"] = cmd_build_exe
1592 |         del cmds["build_py"]
1593 | 
1594 |     if 'py2exe' in sys.modules:  # py2exe enabled?
1595 |         try:
1596 |             from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
1597 |         except ImportError:
1598 |             from py2exe.build_exe import py2exe as _py2exe  # py2
1599 | 
1600 |         class cmd_py2exe(_py2exe):
1601 |             def run(self):
1602 |                 root = get_root()
1603 |                 cfg = get_config_from_root(root)
1604 |                 versions = get_versions()
1605 |                 target_versionfile = cfg.versionfile_source
1606 |                 print("UPDATING %s" % target_versionfile)
1607 |                 write_to_version_file(target_versionfile, versions)
1608 | 
1609 |                 _py2exe.run(self)
1610 |                 os.unlink(target_versionfile)
1611 |                 with open(cfg.versionfile_source, "w") as f:
1612 |                     LONG = LONG_VERSION_PY[cfg.VCS]
1613 |                     f.write(LONG %
1614 |                             {"DOLLAR": "$",
1615 |                              "STYLE": cfg.style,
1616 |                              "TAG_PREFIX": cfg.tag_prefix,
1617 |                              "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1618 |                              "VERSIONFILE_SOURCE": cfg.versionfile_source,
1619 |                              })
1620 |         cmds["py2exe"] = cmd_py2exe
1621 | 
1622 |     # we override different "sdist" commands for both environments
1623 |     if "setuptools" in sys.modules:
1624 |         from setuptools.command.sdist import sdist as _sdist
1625 |     else:
1626 |         from distutils.command.sdist import sdist as _sdist
1627 | 
1628 |     class cmd_sdist(_sdist):
1629 |         def run(self):
1630 |             versions = get_versions()
1631 |             self._versioneer_generated_versions = versions
1632 |             # unless we update this, the command will keep using the old
1633 |             # version
1634 |             self.distribution.metadata.version = versions["version"]
1635 |             return _sdist.run(self)
1636 | 
1637 |         def make_release_tree(self, base_dir, files):
1638 |             root = get_root()
1639 |             cfg = get_config_from_root(root)
1640 |             _sdist.make_release_tree(self, base_dir, files)
1641 |             # now locate _version.py in the new base_dir directory
1642 |             # (remembering that it may be a hardlink) and replace it with an
1643 |             # updated value
1644 |             target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
1645 |             print("UPDATING %s" % target_versionfile)
1646 |             write_to_version_file(target_versionfile,
1647 |                                   self._versioneer_generated_versions)
1648 |     cmds["sdist"] = cmd_sdist
1649 | 
1650 |     return cmds
1651 | 
1652 | 
1653 | CONFIG_ERROR = """
1654 | setup.cfg is missing the necessary Versioneer configuration. You need
1655 | a section like:
1656 | 
1657 |  [versioneer]
1658 |  VCS = git
1659 |  style = pep440
1660 |  versionfile_source = src/myproject/_version.py
1661 |  versionfile_build = myproject/_version.py
1662 |  tag_prefix =
1663 |  parentdir_prefix = myproject-
1664 | 
1665 | You will also need to edit your setup.py to use the results:
1666 | 
1667 |  import versioneer
1668 |  setup(version=versioneer.get_version(),
1669 |        cmdclass=versioneer.get_cmdclass(), ...)
1670 | 
1671 | Please read the docstring in ./versioneer.py for configuration instructions,
1672 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
1673 | """
1674 | 
1675 | SAMPLE_CONFIG = """
1676 | # See the docstring in versioneer.py for instructions. Note that you must
1677 | # re-run 'versioneer.py setup' after changing this section, and commit the
1678 | # resulting files.
1679 | 
1680 | [versioneer]
1681 | #VCS = git
1682 | #style = pep440
1683 | #versionfile_source =
1684 | #versionfile_build =
1685 | #tag_prefix =
1686 | #parentdir_prefix =
1687 | 
1688 | """
1689 | 
1690 | INIT_PY_SNIPPET = """
1691 | from ._version import get_versions
1692 | __version__ = get_versions()['version']
1693 | del get_versions
1694 | """
1695 | 
1696 | 
1697 | def do_setup():
1698 |     """Main VCS-independent setup function for installing Versioneer."""
1699 |     root = get_root()
1700 |     try:
1701 |         cfg = get_config_from_root(root)
1702 |     except (EnvironmentError, configparser.NoSectionError,
1703 |             configparser.NoOptionError) as e:
1704 |         if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
1705 |             print("Adding sample versioneer config to setup.cfg",
1706 |                   file=sys.stderr)
1707 |             with open(os.path.join(root, "setup.cfg"), "a") as f:
1708 |                 f.write(SAMPLE_CONFIG)
1709 |         print(CONFIG_ERROR, file=sys.stderr)
1710 |         return 1
1711 | 
1712 |     print(" creating %s" % cfg.versionfile_source)
1713 |     with open(cfg.versionfile_source, "w") as f:
1714 |         LONG = LONG_VERSION_PY[cfg.VCS]
1715 |         f.write(LONG % {"DOLLAR": "$",
1716 |                         "STYLE": cfg.style,
1717 |                         "TAG_PREFIX": cfg.tag_prefix,
1718 |                         "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1719 |                         "VERSIONFILE_SOURCE": cfg.versionfile_source,
1720 |                         })
1721 | 
1722 |     ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
1723 |                        "__init__.py")
1724 |     if os.path.exists(ipy):
1725 |         try:
1726 |             with open(ipy, "r") as f:
1727 |                 old = f.read()
1728 |         except EnvironmentError:
1729 |             old = ""
1730 |         if INIT_PY_SNIPPET not in old:
1731 |             print(" appending to %s" % ipy)
1732 |             with open(ipy, "a") as f:
1733 |                 f.write(INIT_PY_SNIPPET)
1734 |         else:
1735 |             print(" %s unmodified" % ipy)
1736 |     else:
1737 |         print(" %s doesn't exist, ok" % ipy)
1738 |         ipy = None
1739 | 
1740 |     # Make sure both the top-level "versioneer.py" and versionfile_source
1741 |     # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
1742 |     # they'll be copied into source distributions. Pip won't be able to
1743 |     # install the package without this.
1744 |     manifest_in = os.path.join(root, "MANIFEST.in")
1745 |     simple_includes = set()
1746 |     try:
1747 |         with open(manifest_in, "r") as f:
1748 |             for line in f:
1749 |                 if line.startswith("include "):
1750 |                     for include in line.split()[1:]:
1751 |                         simple_includes.add(include)
1752 |     except EnvironmentError:
1753 |         pass
1754 |     # That doesn't cover everything MANIFEST.in can do
1755 |     # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
1756 |     # it might give some false negatives. Appending redundant 'include'
1757 |     # lines is safe, though.
1758 |     if "versioneer.py" not in simple_includes:
1759 |         print(" appending 'versioneer.py' to MANIFEST.in")
1760 |         with open(manifest_in, "a") as f:
1761 |             f.write("include versioneer.py\n")
1762 |     else:
1763 |         print(" 'versioneer.py' already in MANIFEST.in")
1764 |     if cfg.versionfile_source not in simple_includes:
1765 |         print(" appending versionfile_source ('%s') to MANIFEST.in" %
1766 |               cfg.versionfile_source)
1767 |         with open(manifest_in, "a") as f:
1768 |             f.write("include %s\n" % cfg.versionfile_source)
1769 |     else:
1770 |         print(" versionfile_source already in MANIFEST.in")
1771 | 
1772 |     # Make VCS-specific changes. For git, this means creating/changing
1773 |     # .gitattributes to mark _version.py for export-subst keyword
1774 |     # substitution.
1775 |     do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
1776 |     return 0
1777 | 
1778 | 
1779 | def scan_setup_py():
1780 |     """Validate the contents of setup.py against Versioneer's expectations."""
1781 |     found = set()
1782 |     setters = False
1783 |     errors = 0
1784 |     with open("setup.py", "r") as f:
1785 |         for line in f.readlines():
1786 |             if "import versioneer" in line:
1787 |                 found.add("import")
1788 |             if "versioneer.get_cmdclass()" in line:
1789 |                 found.add("cmdclass")
1790 |             if "versioneer.get_version()" in line:
1791 |                 found.add("get_version")
1792 |             if "versioneer.VCS" in line:
1793 |                 setters = True
1794 |             if "versioneer.versionfile_source" in line:
1795 |                 setters = True
1796 |     if len(found) != 3:
1797 |         print("")
1798 |         print("Your setup.py appears to be missing some important items")
1799 |         print("(but I might be wrong). Please make sure it has something")
1800 |         print("roughly like the following:")
1801 |         print("")
1802 |         print(" import versioneer")
1803 |         print(" setup( version=versioneer.get_version(),")
1804 |         print("        cmdclass=versioneer.get_cmdclass(),  ...)")
1805 |         print("")
1806 |         errors += 1
1807 |     if setters:
1808 |         print("You should remove lines like 'versioneer.VCS = ' and")
1809 |         print("'versioneer.versionfile_source = ' . This configuration")
1810 |         print("now lives in setup.cfg, and should be removed from setup.py")
1811 |         print("")
1812 |         errors += 1
1813 |     return errors
1814 | 
1815 | 
1816 | if __name__ == "__main__":
1817 |     cmd = sys.argv[1]
1818 |     if cmd == "setup":
1819 |         errors = do_setup()
1820 |         errors += scan_setup_py()
1821 |         if errors:
1822 |             sys.exit(1)
1823 | 


--------------------------------------------------------------------------------