├── requirements.txt ├── .gitattributes ├── MANIFEST.in ├── gglsbl ├── __init__.py ├── utils.py ├── tests.py ├── client.py ├── protocol.py ├── storage.py └── _version.py ├── setup.cfg ├── misc └── protobuf.proto ├── setup.py ├── .gitignore ├── tox.ini ├── README.md ├── bin └── gglsbl_client.py ├── LICENSE └── versioneer.py /requirements.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | gglsbl/_version.py export-subst 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.md 2 | include versioneer.py 3 | include gglsbl/_version.py 4 | -------------------------------------------------------------------------------- /gglsbl/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = [ 4 | 'SafeBrowsingList' 5 | ] 6 | 7 | from gglsbl.client import SafeBrowsingList 8 | 9 | from gglsbl._version import get_versions 10 | __version__ = get_versions()['version'] 11 | del get_versions 12 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_file = LICENSE 6 | 7 | [versioneer] 8 | VCS = git 9 | style = pep440 10 | versionfile_source = gglsbl/_version.py 11 | versionfile_build = gglsbl/_version.py 12 | tag_prefix = 13 | parentdir_prefix = gglsbl- 14 | 15 | -------------------------------------------------------------------------------- /gglsbl/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import binascii 3 | import sys 4 | 5 | 6 | def to_hex_2(v): 7 | return v.encode("hex") 8 | 9 | 10 | def to_hex_3(v): 11 | return binascii.hexlify(v) 12 | 13 | 14 | global to_hex 15 | 16 | 17 | if (sys.version_info > (3, 0)): 18 | to_hex = to_hex_3 19 | else: 20 | to_hex = to_hex_2 21 | -------------------------------------------------------------------------------- /misc/protobuf.proto: -------------------------------------------------------------------------------- 1 | // Chunk data encoding format for the shavar-proto list format. 2 | message ChunkData { 3 | required int32 chunk_number = 1; 4 | 5 | // The chunk type is either an add or sub chunk. 6 | enum ChunkType { 7 | ADD = 0; 8 | SUB = 1; 9 | } 10 | optional ChunkType chunk_type = 2 [default = ADD]; 11 | 12 | // Prefix type which currently is either 4B or 32B. The default is set 13 | // to the prefix length, so it doesn't have to be set at all for most 14 | // chunks. 15 | enum PrefixType { 16 | PREFIX_4B = 0; 17 | FULL_32B = 1; 18 | } 19 | optional PrefixType prefix_type = 3 [default = PREFIX_4B]; 20 | // Stores all SHA256 add or sub prefixes or full-length hashes. The number 21 | // of hashes can be inferred from the length of the hashes string and the 22 | // prefix type above. 23 | optional bytes hashes = 4; 24 | 25 | // Sub chunks also encode one add chunk number for every hash stored above. 26 | repeated int32 add_numbers = 5 [packed = true]; 27 | } 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import versioneer 5 | 6 | setup(name='gglsbl', 7 | version=versioneer.get_version(), 8 | cmdclass=versioneer.get_cmdclass(), 9 | description="Client library for Google Safe Browsing Update API v4", 10 | classifiers=[ 11 | "Operating System :: POSIX", 12 | "Environment :: Console", 13 | "Programming Language :: Python", 14 | "Programming Language :: Python :: 2", 15 | "Programming Language :: Python :: 3", 16 | "Topic :: Internet", 17 | "Topic :: Security", 18 | "Topic :: Software Development :: Libraries :: Python Modules", 19 | ], 20 | keywords='google safe browsing api client', 21 | author='Aleh Filipovich', 22 | author_email='aleh@vaolix.com', 23 | url='https://github.com/afilipovich/gglsbl', 24 | license='Apache2', 25 | packages=['gglsbl'], 26 | install_requires=['google-api-python-client>=1.4.2,<2'], 27 | scripts=['bin/gglsbl_client.py'], 28 | ) 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | flake8.txt 43 | pytest.xml 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Django stuff: 50 | *.log 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # PyBuilder 56 | target/ 57 | 58 | # SQLite cache DB 59 | *.sqlite 60 | 61 | # Pycharm project files and venv 62 | .idea 63 | venv 64 | 65 | # ctags 66 | tags 67 | 68 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = py37,py311,lint,coverage-report 4 | 5 | [tox:jenkins] 6 | toxworkdir = /tmp/{env:BUILD_TAG}/.tox 7 | 8 | [testenv] 9 | setenv = 10 | PYTHONWARNINGS=ignore 11 | deps = 12 | -r{toxinidir}/requirements.txt 13 | coverage 14 | mock 15 | pytest 16 | pytest-flask 17 | pytest-factoryboy 18 | httpretty<0.6.5 19 | commands = 20 | {envbindir}/coverage run \ 21 | --parallel \ 22 | -m pytest \ 23 | --junitxml={toxinidir}/pytest.xml \ 24 | {posargs:-vv} 25 | 26 | [testenv:coverage-report] 27 | deps = coverage 28 | skip_install = True 29 | commands = 30 | {envbindir}/coverage combine 31 | - {envbindir}/coverage xml -i 32 | {envbindir}/coverage report 33 | 34 | [testenv:lint] 35 | basepython=python3.11 36 | deps = 37 | flake8 38 | flake8-builtins 39 | flake8-docstrings 40 | flake8-import-order 41 | flake8-mutable 42 | flake8-pep3101 43 | flake8-string-format 44 | skip_install = True 45 | commands = 46 | {envbindir}/flake8 \ 47 | --tee \ 48 | --output={toxinidir}/flake8.txt \ 49 | --exclude=gglsbl/_version.py,gglsbl/tests.py \ 50 | --ignore=D102,D103,D400,D401,D413,P101,D100,D101,D104,I100,I101,I201,A003,F821,I202,E741,P103,W503 \ 51 | {posargs} \ 52 | gglsbl bin 53 | 54 | [flake8] 55 | max-line-length = 120 56 | import-order-style = google 57 | 58 | [pytest] 59 | testpaths = gglsbl/tests.py 60 | addopts = --doctest-modules 61 | 62 | [coverage:run] 63 | branch = True 64 | omit = *lib/python3.7/*,*lib/python2.7/*,*lib/python3.11/* 65 | 66 | [coverage:report] 67 | show_missing = True 68 | exclude_lines = 69 | noqa: nocover 70 | if __name__ == '__main__': 71 | ; fail_under = 100 72 | 73 | [coverage:xml] 74 | output = coverage.xml 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gglsbl 2 | ====== 3 | 4 | Python client library for Google Safe Browsing Update API v4. 5 | 6 | The code was developed according to official 7 | [Developers Guide](https://developers.google.com/safe-browsing/v4/update-api), however this is not a reference implementation. 8 | 9 | Quick start 10 | ----------- 11 | 12 | ###### Get Google API key 13 | Instructions to procure API key can be found [here](https://developers.google.com/safe-browsing/v4/get-started). 14 | Please note that v3/v4 key is different from v2.2 API. API v3 key may work with current API v4. 15 | 16 | ###### Install the library 17 | 18 | ``` 19 | python setup.py install 20 | ``` 21 | 22 | ###### To sync local hash prefix cache 23 | 24 | ```python 25 | from gglsbl import SafeBrowsingList 26 | sbl = SafeBrowsingList('API KEY GOES HERE') 27 | sbl.update_hash_prefix_cache() 28 | ``` 29 | 30 | ###### URL lookup 31 | 32 | ```python 33 | from gglsbl import SafeBrowsingList 34 | sbl = SafeBrowsingList('API KEY GOES HERE') 35 | threat_list = sbl.lookup_url('http://github.com/') 36 | if threat_list == None: 37 | print("no threat') 38 | else: 39 | print('threats: ' + str(threat_list)) 40 | ``` 41 | 42 | CLI Tool 43 | -------- 44 | *bin/gglsbl_client.py* can be used for a quick check or as a code example. 45 | 46 | ###### To immediately sync local cache with Safe Browsing API. 47 | ``` 48 | gglsbl_client.py --api-key 'API KEY GOES HERE' --onetime 49 | ``` 50 | _Please mind [Request Frequency policy](https://developers.google.com/safe-browsing/v4/request-frequency) if you are going to use this command for more than a one-time test._ 51 | 52 | ###### To look up URL 53 | ``` 54 | gglsbl_client.py --api-key 'API KEY GOES HERE' --check-url http://github.com/ 55 | ``` 56 | 57 | ###### Fore more options please see 58 | ``` 59 | gglsbl_client.py --help 60 | ``` 61 | 62 | Running in Distributed Environment 63 | ------- 64 | For cases when multiple apps and/or servers would benifit from sharing same GSB cache please see [gglsbl-rest](https://github.com/mlsecproject/gglsbl-rest) project maintained by [Alexandre Sieira](https://github.com/asieira). 65 | 66 | Running on Python3 67 | ------------ 68 | Current version of library is fully compatible with both **python2.7** and **python3**. 69 | 70 | _If you prefer to use older v3 version of Safe Browsing API there is a [python3 port](https://github.com/Stefan-Code/gglsbl3) of the legacy version made by [Stefan](https://github.com/Stefan-Code)._ 71 | -------------------------------------------------------------------------------- /bin/gglsbl_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Keeps local Google Safe Browsing cache in sync. 4 | 5 | Accessing Google Safe Browsing API requires API key, you can find 6 | more info on getting it here: 7 | https://developers.google.com/safe-browsing/lookup_guide#GettingStarted 8 | 9 | """ 10 | 11 | import argparse 12 | import sys 13 | import time 14 | 15 | from gglsbl import SafeBrowsingList 16 | 17 | import logging 18 | log = logging.getLogger('gglsbl') 19 | log.setLevel(logging.DEBUG) 20 | 21 | 22 | def setupArgsParser(): 23 | parser = argparse.ArgumentParser(description=__doc__) 24 | parser.add_argument('--api-key', 25 | default=None, 26 | required=True, 27 | help='Safe Browsing API key [REQUIRED]') 28 | parser.add_argument('--db-path', 29 | default='/tmp/gsb_v4.db', 30 | help='Path to SQLite DB') 31 | parser.add_argument('--log', 32 | default=None, 33 | help='Path to log file, by default log to STDERR') 34 | parser.add_argument('--check-url', 35 | default=None, 36 | help='Check if URL is in black list and exit') 37 | parser.add_argument('--debug', 38 | default=False, 39 | action='store_true', 40 | help='Show debug output') 41 | parser.add_argument('--onetime', 42 | default=False, 43 | action='store_true', 44 | help='Run blacklists sync only once with reduced delays') 45 | parser.add_argument('--timeout', 46 | default=10, 47 | type=int, 48 | help=('SQLite connection timeout. Default is 10 seconds. Increase if you get' 49 | ' occasional "database is locked" errors')) 50 | parser.add_argument('--blacklisted-return-code', 51 | default=0, 52 | type=int, 53 | help='Return this code from process when URL is blacklisted') 54 | return parser 55 | 56 | 57 | def setupLogger(log_file, debug): 58 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 59 | lh = log_file is None and logging.StreamHandler() or logging.FileHandler(log_file) 60 | lh.setLevel(debug and logging.DEBUG or logging.INFO) 61 | lh.setFormatter(formatter) 62 | log = logging.getLogger('gglsbl') 63 | log.addHandler(lh) 64 | 65 | 66 | def run_sync(sbl): 67 | try: 68 | sbl.update_hash_prefix_cache() 69 | except (KeyboardInterrupt, SystemExit): 70 | log.info('Shutting down') 71 | sys.exit(0) 72 | except Exception: 73 | log.exception('Failed to synchronize with GSB service') 74 | time.sleep(3) 75 | 76 | 77 | def main(): 78 | args_parser = setupArgsParser() 79 | args = args_parser.parse_args() 80 | setupLogger(args.log, args.debug) 81 | if args.check_url: 82 | sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, timeout=args.timeout) 83 | bl = sbl.lookup_url(args.check_url) 84 | if bl is None: 85 | print('{} is not blacklisted'.format(args.check_url)) 86 | else: 87 | print('{} is blacklisted in {}'.format(args.check_url, bl)) 88 | sys.exit(args.blacklisted_return_code) 89 | sys.exit(0) 90 | if args.onetime: 91 | sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, discard_fair_use_policy=True, timeout=args.timeout) 92 | run_sync(sbl) 93 | else: 94 | sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, timeout=args.timeout) 95 | while True: 96 | run_sync(sbl) 97 | 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /gglsbl/tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from gglsbl.protocol import URL 4 | 5 | class SafeBrowsingListTestCase(unittest.TestCase): 6 | def setUp(self): 7 | self.canonical_urls = { 8 | "http://host/%25%32%35": "http://host/%25", 9 | "http://host/%25%32%35%25%32%35": "http://host/%25%25", 10 | "http://host/%2525252525252525": "http://host/%25", 11 | "http://host/asdf%25%32%35asd": "http://host/asdf%25asd", 12 | "http://host/%%%25%32%35asd%%": "http://host/%25%25%25asd%25%25", 13 | "http://www.google.com/": "http://www.google.com/", 14 | "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/": "http://168.188.99.26/.secure/www.ebay.com/", 15 | "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/": "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/", 16 | "http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B": "http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+", 17 | "http://3279880203/blah": "http://195.127.0.11/blah", 18 | "http://0xc37f000b/blah": "http://195.127.0.11/blah", 19 | "http://www.google.com/blah/..": "http://www.google.com/", 20 | "www.google.com/": "http://www.google.com/", 21 | "www.google.com": "http://www.google.com/", 22 | "http://www.evil.com/blah#frag": "http://www.evil.com/blah", 23 | "http://www.GOOgle.com/": "http://www.google.com/", 24 | "google.com": "http://google.com/", 25 | "google.com:443/abc": "http://google.com:443/abc", 26 | "//google.com:443/abc": "http://google.com:443/abc", 27 | "ftp://google.com:443/abc": "ftp://google.com:443/abc", 28 | "http://www.google.com.../": "http://www.google.com/", 29 | "http://www.google.com/foo\tbar\rbaz\n2": "http://www.google.com/foobarbaz2", 30 | "http://www.google.com/q?": "http://www.google.com/q?", 31 | "http://www.google.com/q?r?": "http://www.google.com/q?r?", 32 | "http://www.google.com/q?r?s": "http://www.google.com/q?r?s", 33 | "http://evil.com/foo#bar#baz": "http://evil.com/foo", 34 | "http://evil.com/foo;": "http://evil.com/foo;", 35 | "http://evil.com/foo?bar;": "http://evil.com/foo?bar;", 36 | b"http://\x01\x80.com/": "http://%01%80.com/", 37 | b"http://\x01\xf0.com/": "http://%01%F0.com/", 38 | "http://notrailingslash.com": "http://notrailingslash.com/", 39 | "http://www.gotaport.com:1234/": "http://www.gotaport.com:1234/", 40 | " http://www.google.com/ ": "http://www.google.com/", 41 | "http:// leadingspace.com/": "http://%20leadingspace.com/", 42 | "http://%20leadingspace.com/": "http://%20leadingspace.com/", 43 | "%20leadingspace.com/": "http://%20leadingspace.com/", 44 | "https://www.securesite.com/": "https://www.securesite.com/", 45 | "http://host.com/ab%23cd": "http://host.com/ab%23cd", 46 | "http://host.com//twoslashes?more//slashes": "http://host.com/twoslashes?more//slashes", 47 | "http://www.wtp101.com/bk?redir=http%3A%2F%2Ftags.bluekai.com%2Fsite%2F2750%3Fid%3D%3CPARTNER_UUID%3E%0D%0A%26redir%3Dhttp%3A%2F%2Fwww.wtp101.com%2Fpush%2Fbluekai%3Fxid%3D%24BK_UUID": "http://www.wtp101.com/bk?redir=http://tags.bluekai.com/site/2750?id=%0D%0A&redir=http://www.wtp101.com/push/bluekai?xid=$BK_UUID", 48 | } 49 | 50 | self.url_permutations = { 51 | 'http://a.b.c/1/2.html?param=1': [ 52 | 'a.b.c/1/2.html?param=1', 53 | 'a.b.c/1/2.html', 54 | 'a.b.c/', 55 | 'a.b.c/1/', 56 | 'b.c/1/2.html?param=1', 57 | 'b.c/1/2.html', 58 | 'b.c/', 59 | 'b.c/1/', 60 | ], 61 | 'http://a.b.c/1/2/?param=1': [ 62 | 'a.b.c/1/2/?param=1', 63 | 'a.b.c/1/2/', 64 | 'a.b.c/', 65 | 'a.b.c/1/', 66 | 'b.c/1/2/?param=1', 67 | 'b.c/1/2/', 68 | 'b.c/', 69 | 'b.c/1/', 70 | ], 71 | 'http://1.2.3.4/1/2.html?param=1': [ 72 | '1.2.3.4/1/2.html?param=1', 73 | '1.2.3.4/1/2.html', 74 | '1.2.3.4/', 75 | '1.2.3.4/1/', 76 | ], 77 | 'http://a.b.c/1/2/3/4/5/6/7.html?param=1': [ 78 | 'a.b.c/1/2/3/4/5/6/7.html?param=1', 79 | 'a.b.c/1/2/3/4/5/6/7.html', 80 | 'a.b.c/', 81 | 'a.b.c/1/', 82 | 'a.b.c/1/2/', 83 | 'a.b.c/1/2/3/', 84 | 'b.c/1/2/3/4/5/6/7.html?param=1', 85 | 'b.c/1/2/3/4/5/6/7.html', 86 | 'b.c/', 87 | 'b.c/1/', 88 | 'b.c/1/2/', 89 | 'b.c/1/2/3/', 90 | ], 91 | 'ttp://a.b.c.d.e.f.g/1.html': [ 92 | 'a.b.c.d.e.f.g/1.html', 93 | 'a.b.c.d.e.f.g/', 94 | 'c.d.e.f.g/1.html', 95 | 'c.d.e.f.g/', 96 | 'd.e.f.g/1.html', 97 | 'd.e.f.g/', 98 | 'e.f.g/1.html', 99 | 'e.f.g/', 100 | 'f.g/1.html', 101 | 'f.g/', 102 | ], 103 | 'http://a.b/': [ 104 | 'a.b/', 105 | ], 106 | } 107 | 108 | def test_canonicalize(self): 109 | for nu, cu in self.canonical_urls.items(): 110 | self.assertEqual(URL(nu).canonical, cu) 111 | 112 | def test_permutations(self): 113 | for k, v in self.url_permutations.items(): 114 | p = list(URL.url_permutations(k)) 115 | self.assertEqual(p, v) 116 | -------------------------------------------------------------------------------- /gglsbl/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from base64 import b64decode 4 | 5 | import logging 6 | 7 | from gglsbl.utils import to_hex 8 | from gglsbl.protocol import SafeBrowsingApiClient, URL 9 | from gglsbl.storage import SqliteStorage, ThreatList, HashPrefixList 10 | 11 | 12 | log = logging.getLogger('gglsbl') 13 | log.addHandler(logging.NullHandler()) 14 | 15 | 16 | class SafeBrowsingList(object): 17 | """Interface for Google Safe Browsing API 18 | 19 | supporting partial update of the local cache. 20 | https://developers.google.com/safe-browsing/v4/ 21 | """ 22 | 23 | def __init__(self, api_key, db_path='/tmp/gsb_v4.db', 24 | discard_fair_use_policy=False, platforms=None, timeout=10): 25 | """Constructor. 26 | 27 | Args: 28 | api_key: string, a key for API authentication. 29 | db_path: string, path to SQLite DB file to store cached data. 30 | discard_fair_use_policy: boolean, disable request frequency throttling (only for testing). 31 | platforms: list, threat lists to look up, default includes all platforms. 32 | timeout: seconds to wait for Sqlite DB to become unlocked from concurrent WRITE transaction. 33 | """ 34 | self.api_client = SafeBrowsingApiClient(api_key, discard_fair_use_policy=discard_fair_use_policy) 35 | self.storage = SqliteStorage(db_path, timeout=timeout) 36 | self.platforms = platforms 37 | 38 | def _verify_threat_list_checksum(self, threat_list, remote_checksum): 39 | local_checksum = self.storage.hash_prefix_list_checksum(threat_list) 40 | return remote_checksum == local_checksum 41 | 42 | def update_hash_prefix_cache(self): 43 | """Update locally cached threat lists.""" 44 | try: 45 | self.storage.cleanup_full_hashes() 46 | self.storage.commit() 47 | self._sync_threat_lists() 48 | self.storage.commit() 49 | self._sync_hash_prefix_cache() 50 | except Exception: 51 | self.storage.rollback() 52 | raise 53 | 54 | def _sync_threat_lists(self): 55 | threat_lists_to_remove = dict() 56 | for ts in self.storage.get_threat_lists(): 57 | threat_lists_to_remove[repr(ts)] = ts 58 | threat_lists = self.api_client.get_threats_lists() 59 | for entry in threat_lists: 60 | threat_list = ThreatList.from_api_entry(entry) 61 | if self.platforms is None or threat_list.platform_type in self.platforms: 62 | self.storage.add_threat_list(threat_list) 63 | try: 64 | del threat_lists_to_remove[repr(threat_list)] 65 | except KeyError: 66 | pass 67 | for ts in threat_lists_to_remove.values(): 68 | self.storage.delete_hash_prefix_list(ts) 69 | self.storage.delete_threat_list(ts) 70 | del threat_lists_to_remove 71 | 72 | def _sync_hash_prefix_cache(self): 73 | client_state = self.storage.get_client_state() 74 | for response in self.api_client.get_threats_update(client_state): 75 | response_threat_list = ThreatList(response['threatType'], response['platformType'], 76 | response['threatEntryType']) 77 | if response['responseType'] == 'FULL_UPDATE': 78 | self.storage.delete_hash_prefix_list(response_threat_list) 79 | for r in response.get('removals', []): 80 | self.storage.remove_hash_prefix_indices(response_threat_list, r['rawIndices']['indices']) 81 | for a in response.get('additions', []): 82 | hash_prefix_list = HashPrefixList(a['rawHashes']['prefixSize'], b64decode(a['rawHashes']['rawHashes'])) 83 | self.storage.populate_hash_prefix_list(response_threat_list, hash_prefix_list) 84 | expected_checksum = b64decode(response['checksum']['sha256']) 85 | log.info('Verifying threat hash prefix list checksum') 86 | if self._verify_threat_list_checksum(response_threat_list, expected_checksum): 87 | log.info('Local cache checksum matches the server: {}'.format(to_hex(expected_checksum))) 88 | self.storage.update_threat_list_client_state(response_threat_list, response['newClientState']) 89 | self.storage.commit() 90 | else: 91 | raise Exception('Local cache checksum does not match the server: ' 92 | '"{}". Consider removing {}'.format(to_hex(expected_checksum), self.storage.db_path)) 93 | 94 | def _sync_full_hashes(self, hash_prefixes): 95 | """Download full hashes matching hash_prefixes. 96 | 97 | Also update cache expiration timestamps. 98 | """ 99 | client_state = self.storage.get_client_state() 100 | fh_response = self.api_client.get_full_hashes(hash_prefixes, client_state) 101 | 102 | # update negative cache for each hash prefix 103 | # store full hash (insert or update) with positive cache bumped up 104 | for m in fh_response.get('matches', []): 105 | threat_list = ThreatList(m['threatType'], m['platformType'], m['threatEntryType']) 106 | hash_value = b64decode(m['threat']['hash']) 107 | cache_duration = int(m['cacheDuration'].rstrip('s')) 108 | malware_threat_type = None 109 | for metadata in m['threatEntryMetadata'].get('entries', []): 110 | k = b64decode(metadata['key']) 111 | v = b64decode(metadata['value']) 112 | if k == 'malware_threat_type': 113 | malware_threat_type = v 114 | self.storage.store_full_hash(threat_list, hash_value, cache_duration, malware_threat_type) 115 | 116 | negative_cache_duration = int(fh_response['negativeCacheDuration'].rstrip('s')) 117 | for prefix_value in hash_prefixes: 118 | self.storage.update_hash_prefix_expiration(prefix_value, negative_cache_duration) 119 | 120 | def lookup_url(self, url): 121 | """Look up specified URL in Safe Browsing threat lists.""" 122 | if type(url) is not str: 123 | url = url.encode('utf8') 124 | if not url.strip(): 125 | raise ValueError("Empty input string.") 126 | url_hashes = URL(url).hashes 127 | try: 128 | list_names = self._lookup_hashes(url_hashes) 129 | self.storage.commit() 130 | except Exception: 131 | self.storage.rollback() 132 | raise 133 | if list_names: 134 | return list_names 135 | return None 136 | 137 | def _lookup_hashes(self, full_hashes): 138 | """Lookup URL hash in blacklists 139 | 140 | Returns names of lists it was found in. 141 | """ 142 | full_hashes = list(full_hashes) 143 | cues = [fh[0:4] for fh in full_hashes] 144 | result = [] 145 | matching_prefixes = {} 146 | matching_full_hashes = set() 147 | is_potential_threat = False 148 | # First lookup hash prefixes which match full URL hash 149 | for (hash_prefix, negative_cache_expired) in self.storage.lookup_hash_prefix(cues): 150 | for full_hash in full_hashes: 151 | if full_hash.startswith(hash_prefix): 152 | is_potential_threat = True 153 | # consider hash prefix negative cache as expired if it is expired in at least one threat list 154 | matching_prefixes[hash_prefix] = matching_prefixes.get(hash_prefix, False) or negative_cache_expired 155 | matching_full_hashes.add(full_hash) 156 | # if none matches, URL hash is clear 157 | if not is_potential_threat: 158 | return [] 159 | # if there is non-expired full hash, URL is blacklisted 160 | matching_expired_threat_lists = set() 161 | for threat_list, has_expired in self.storage.lookup_full_hashes(matching_full_hashes): 162 | if has_expired: 163 | matching_expired_threat_lists.add(threat_list) 164 | else: 165 | result.append(threat_list) 166 | if result: 167 | return result 168 | 169 | # If there are no matching expired full hash entries 170 | # and negative cache is still current for all prefixes, consider it safe 171 | if len(matching_expired_threat_lists) == 0 and sum(map(int, matching_prefixes.values())) == 0: 172 | log.info('Negative cache hit.') 173 | return [] 174 | 175 | # Now we can assume that there are expired matching full hash entries and/or 176 | # cache prefix entries with expired negative cache. Both require full hash sync. 177 | self._sync_full_hashes(matching_prefixes.keys()) 178 | # Now repeat full hash lookup 179 | for threat_list, has_expired in self.storage.lookup_full_hashes(matching_full_hashes): 180 | if not has_expired: 181 | result.append(threat_list) 182 | return result 183 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /gglsbl/protocol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | from functools import wraps 5 | 6 | try: 7 | import urllib 8 | import urlparse 9 | except ImportError: 10 | import urllib.parse as urllib 11 | from urllib import parse as urlparse 12 | 13 | import struct 14 | import time 15 | import posixpath 16 | import re 17 | import hashlib 18 | import socket 19 | import random 20 | from base64 import b64encode 21 | 22 | try: 23 | from googleapiclient.discovery import build 24 | from googleapiclient.errors import HttpError 25 | except ImportError: 26 | from apiclient.discovery import build 27 | from apiclient.errors import HttpError 28 | 29 | import logging 30 | from ._version import get_versions 31 | 32 | 33 | __version__ = get_versions()['version'] 34 | del get_versions 35 | 36 | log = logging.getLogger('gglsbl') 37 | log.addHandler(logging.NullHandler()) 38 | 39 | 40 | _fail_count = 0 41 | 42 | 43 | def autoretry(func): 44 | @wraps(func) 45 | def wrapper(*args, **kwargs): 46 | global _fail_count 47 | while True: 48 | try: 49 | r = func(*args, **kwargs) 50 | _fail_count = 0 51 | return r 52 | except HttpError as e: 53 | if not (hasattr(e, 'resp') and 'status' in e.resp 54 | and e.resp['status'].isdigit and int(e.resp['status']) >= 500): 55 | raise # we do not want to retry auth errors etc. 56 | _fail_count += 1 57 | wait_for = min(2 ** (_fail_count - 1) * 15 * 60 * (1 + random.random()), 24 * 60 * 60) 58 | log.exception('Call Failed for %s time(s). Retrying in %s seconds: %s', 59 | _fail_count, wait_for, str(e)) 60 | time.sleep(wait_for) 61 | except socket.error: 62 | transient_error_wait = 2 63 | log.exception('Socket error, retrying in {} seconds.'.format(transient_error_wait)) 64 | time.sleep(transient_error_wait) 65 | return wrapper 66 | 67 | 68 | class SafeBrowsingApiClient(object): 69 | def __init__(self, developer_key, client_id='python-gglsbl', 70 | client_version=__version__, discard_fair_use_policy=True): 71 | """Constructor. 72 | 73 | :param developer_key: Google API key 74 | :param discard_fair_use_policy: do not wait between individual API calls as requested by the spec 75 | """ 76 | self.client_id = client_id 77 | self.client_version = client_version 78 | self.discard_fair_use_policy = discard_fair_use_policy 79 | if self.discard_fair_use_policy: 80 | log.warn('Circumventing request frequency throttling is against Safe Browsing API policy.') 81 | self.service = build('safebrowsing', 'v4', developerKey=developer_key, cache_discovery=False) 82 | self.next_threats_update_req_no_sooner_than = None 83 | self.next_full_hashes_req_no_sooner_than = None 84 | 85 | def get_wait_duration(self, response): 86 | if self.discard_fair_use_policy: 87 | return None 88 | minimum_wait_duration = response.get('minimumWaitDuration') 89 | if minimum_wait_duration is None: 90 | return None 91 | return time.time() + float(minimum_wait_duration.rstrip('s')) 92 | 93 | @staticmethod 94 | def fair_use_delay(next_request_no_sooner_than): 95 | if next_request_no_sooner_than is not None: 96 | sleep_for = max(0, next_request_no_sooner_than - time.time()) 97 | log.info('Sleeping for {} seconds until next request.'.format(sleep_for)) 98 | time.sleep(sleep_for) 99 | 100 | @autoretry 101 | def get_threats_lists(self): 102 | """Retrieve all available threat lists""" 103 | response = self.service.threatLists().list().execute() 104 | return response['threatLists'] 105 | 106 | def get_threats_update(self, client_state): 107 | """Fetch hash prefixes update for given threat list. 108 | 109 | client_state is a dict which looks like {(threatType, platformType, threatEntryType): clientState} 110 | """ 111 | request_body = { 112 | "client": { 113 | "clientId": self.client_id, 114 | "clientVersion": self.client_version, 115 | }, 116 | "listUpdateRequests": [], 117 | } 118 | for (threat_type, platform_type, threat_entry_type), current_state in client_state.items(): 119 | request_body['listUpdateRequests'].append( 120 | { 121 | "threatType": threat_type, 122 | "platformType": platform_type, 123 | "threatEntryType": threat_entry_type, 124 | "state": current_state, 125 | "constraints": { 126 | "supportedCompressions": ["RAW"] 127 | } 128 | } 129 | ) 130 | self.fair_use_delay(self.next_threats_update_req_no_sooner_than) 131 | 132 | @autoretry 133 | def _get_threats_update(): 134 | nonlocal self, request_body 135 | res = self.service.threatListUpdates().fetch(body=request_body).execute() 136 | self.next_threats_update_req_no_sooner_than = self.get_wait_duration(res) 137 | return res['listUpdateResponses'] 138 | 139 | return _get_threats_update() 140 | 141 | def get_full_hashes(self, prefixes, client_state): 142 | """Find full hashes matching hash prefixes. 143 | 144 | client_state is a dict which looks like {(threatType, platformType, threatEntryType): clientState} 145 | """ 146 | request_body = { 147 | "client": { 148 | "clientId": self.client_id, 149 | "clientVersion": self.client_version, 150 | }, 151 | "clientStates": [], 152 | "threatInfo": { 153 | "threatTypes": [], 154 | "platformTypes": [], 155 | "threatEntryTypes": [], 156 | "threatEntries": [], 157 | } 158 | } 159 | for prefix in prefixes: 160 | request_body['threatInfo']['threatEntries'].append({"hash": b64encode(prefix).decode()}) 161 | for ((threatType, platformType, threatEntryType), clientState) in client_state.items(): 162 | request_body['clientStates'].append(clientState) 163 | if threatType not in request_body['threatInfo']['threatTypes']: 164 | request_body['threatInfo']['threatTypes'].append(threatType) 165 | if platformType not in request_body['threatInfo']['platformTypes']: 166 | request_body['threatInfo']['platformTypes'].append(platformType) 167 | if threatEntryType not in request_body['threatInfo']['threatEntryTypes']: 168 | request_body['threatInfo']['threatEntryTypes'].append(threatEntryType) 169 | self.fair_use_delay(self.next_full_hashes_req_no_sooner_than) 170 | 171 | @autoretry 172 | def _get_full_hashes(): 173 | nonlocal self, request_body 174 | res = self.service.fullHashes().find(body=request_body).execute() 175 | self.next_full_hashes_req_no_sooner_than = self.get_wait_duration(res) 176 | return res 177 | 178 | return _get_full_hashes() 179 | 180 | 181 | class URL(object): 182 | """URL representation suitable for lookup""" 183 | 184 | __py3 = (sys.version_info > (3, 0)) 185 | 186 | def __init__(self, url): 187 | """Constructor. 188 | 189 | :param url: can be either of str or bytes type. 190 | """ 191 | if self.__py3: 192 | if type(url) is bytes: 193 | self.url = bytes(url) 194 | else: 195 | self.url = url.encode() 196 | else: 197 | self.url = str(url) 198 | 199 | @property 200 | def hashes(self): 201 | """Hashes of all possible permutations of the URL in canonical form""" 202 | for url_variant in self.url_permutations(self.canonical): 203 | url_hash = self.digest(url_variant) 204 | yield url_hash 205 | 206 | @property 207 | def canonical(self): 208 | """Convert URL to its canonical form.""" 209 | def full_unescape(u): 210 | uu = urllib.unquote(u) 211 | if uu == u: 212 | return uu 213 | else: 214 | return full_unescape(uu) 215 | 216 | def full_unescape_to_bytes(u): 217 | uu = urlparse.unquote_to_bytes(u) 218 | if uu == u: 219 | return uu 220 | else: 221 | return full_unescape_to_bytes(uu) 222 | 223 | def quote(s): 224 | safe_chars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~' 225 | return urllib.quote(s, safe=safe_chars) 226 | 227 | url = self.url.strip() 228 | url = url.replace(b'\n', b'').replace(b'\r', b'').replace(b'\t', b'') 229 | url = url.split(b'#', 1)[0] 230 | if url.startswith(b'//'): 231 | url = b'http:' + url 232 | if len(url.split(b'://')) <= 1: 233 | url = b'http://' + url 234 | # at python3 work with bytes instead of string 235 | # as URL may contain invalid unicode characters 236 | if self.__py3 and type(url) is bytes: 237 | url = quote(full_unescape_to_bytes(url)) 238 | else: 239 | url = quote(full_unescape(url)) 240 | url_parts = urlparse.urlsplit(url) 241 | if not url_parts[0]: 242 | url = 'http://{}'.format(url) 243 | url_parts = urlparse.urlsplit(url) 244 | protocol = url_parts.scheme 245 | if self.__py3: 246 | host = full_unescape_to_bytes(url_parts.hostname) 247 | path = full_unescape_to_bytes(url_parts.path) 248 | else: 249 | host = full_unescape(url_parts.hostname) 250 | path = full_unescape(url_parts.path) 251 | query = url_parts.query 252 | if not query and '?' not in url: 253 | query = None 254 | if not path: 255 | path = b'/' 256 | has_trailing_slash = (path[-1:] == b'/') 257 | path = posixpath.normpath(path).replace(b'//', b'/') 258 | if has_trailing_slash and path[-1:] != b'/': 259 | path = path + b'/' 260 | port = url_parts.port 261 | host = host.strip(b'.') 262 | host = re.sub(br'\.+', b'.', host).lower() 263 | if host.isdigit(): 264 | try: 265 | host = socket.inet_ntoa(struct.pack("!I", int(host))) 266 | except Exception: 267 | pass 268 | elif host.startswith(b'0x') and b'.' not in host: 269 | try: 270 | host = socket.inet_ntoa(struct.pack("!I", int(host, 16))) 271 | except Exception: 272 | pass 273 | quoted_path = quote(path) 274 | quoted_host = quote(host) 275 | if port is not None: 276 | quoted_host = '{}:{}'.format(quoted_host, port) 277 | canonical_url = '{}://{}{}'.format(protocol, quoted_host, quoted_path) 278 | if query is not None: 279 | canonical_url = '{}?{}'.format(canonical_url, query) 280 | return canonical_url 281 | 282 | @staticmethod 283 | def url_permutations(url): 284 | """Try all permutations of hostname and path which can be applied 285 | 286 | to blacklisted URLs 287 | """ 288 | def url_host_permutations(host): 289 | if re.match(r'\d+\.\d+\.\d+\.\d+', host): 290 | yield host 291 | return 292 | parts = host.split('.') 293 | l = min(len(parts), 5) 294 | if l > 4: 295 | yield host 296 | for i in range(l - 1): 297 | yield '.'.join(parts[i - l:]) 298 | 299 | def url_path_permutations(path): 300 | yield path 301 | query = None 302 | if '?' in path: 303 | path, query = path.split('?', 1) 304 | if query is not None: 305 | yield path 306 | path_parts = path.split('/')[0:-1] 307 | curr_path = '' 308 | for i in range(min(4, len(path_parts))): 309 | curr_path = curr_path + path_parts[i] + '/' 310 | yield curr_path 311 | 312 | protocol, address_str = urllib.splittype(url) 313 | host, path = urllib.splithost(address_str) 314 | user, host = urllib.splituser(host) 315 | host, port = urllib.splitport(host) 316 | host = host.strip('/') 317 | seen_permutations = set() 318 | for h in url_host_permutations(host): 319 | for p in url_path_permutations(path): 320 | u = '{}{}'.format(h, p) 321 | if u not in seen_permutations: 322 | yield u 323 | seen_permutations.add(u) 324 | 325 | @staticmethod 326 | def digest(url): 327 | """Hash the URL""" 328 | return hashlib.sha256(url.encode('utf-8')).digest() 329 | 330 | 331 | if __name__ == '__main__': 332 | from pprint import pprint 333 | c = SafeBrowsingApiClient('AIzaSyATpqLltciaMve61Wywb5yNDA8D8BvXEn4') 334 | r = c.get_threats_lists() 335 | pprint(r) 336 | -------------------------------------------------------------------------------- /gglsbl/storage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import hashlib 5 | import contextlib 6 | import sqlite3 7 | import logging 8 | 9 | from gglsbl.utils import to_hex 10 | 11 | 12 | log = logging.getLogger('gglsbl') 13 | log.addHandler(logging.NullHandler()) 14 | 15 | 16 | class ThreatList(object): 17 | """Represents threat list name.""" 18 | 19 | def __init__(self, threat_type, platform_type, threat_entry_type): 20 | """Constructor.""" 21 | self.threat_type = threat_type 22 | self.platform_type = platform_type 23 | self.threat_entry_type = threat_entry_type 24 | 25 | @classmethod 26 | def from_api_entry(cls, entry): 27 | return cls(entry['threatType'], entry['platformType'], entry['threatEntryType']) 28 | 29 | def as_tuple(self): 30 | return (self.threat_type, self.platform_type, self.threat_entry_type) 31 | 32 | def __repr__(self): 33 | """String representation of object""" 34 | return '/'.join(self.as_tuple()) 35 | 36 | 37 | class HashPrefixList(object): 38 | """Wrapper object for threat list data.""" 39 | 40 | def __init__(self, prefix_size, raw_hashes): 41 | """Constructor. 42 | 43 | :param prefix_size: size of hash prefix in bytes (typically 4, sometimes 6) 44 | :param raw_hashes: string consisting of concatenated hash prefixes. 45 | """ 46 | self.prefix_size = prefix_size 47 | self.raw_hashes = raw_hashes 48 | 49 | def __len__(self): 50 | """Number of individual hash prefixes in the list.""" 51 | return int(len(self.raw_hashes) / self.prefix_size) 52 | 53 | def __iter__(self): 54 | """Iterate through concatenated raw hashes.""" 55 | n = self.prefix_size 56 | return (self.raw_hashes[i:i + n] for i in range(0, len(self.raw_hashes), n)) 57 | 58 | 59 | class SqliteStorage(object): 60 | """Storage abstraction for local GSB cache.""" 61 | 62 | schema_version = '1.1' 63 | 64 | def __init__(self, db_path, timeout=10): 65 | """Constructor. 66 | 67 | :param db_path: path to Sqlite DB file 68 | :timeout: Sqlite lock wait timeout in seconds 69 | """ 70 | self.db_path = db_path 71 | do_init_db = not os.path.isfile(db_path) 72 | log.info('Opening SQLite DB {}'.format(db_path)) 73 | self.db = sqlite3.connect(db_path, timeout) 74 | if do_init_db: 75 | log.info('SQLite DB does not exist, initializing') 76 | self.init_db() 77 | if not self.check_schema_version(): 78 | log.warning("Cache schema is not compatible with this library version. Re-creating sqlite DB %s", db_path) 79 | self.db.close() 80 | os.unlink(db_path) 81 | self.db = sqlite3.connect(db_path, timeout) 82 | self.init_db() 83 | self.db.cursor().execute('PRAGMA synchronous = 0') 84 | self.db.cursor().execute('PRAGMA journal_mode = WAL') 85 | 86 | def check_schema_version(self): 87 | q = "SELECT value FROM metadata WHERE name='schema_version'" 88 | v = None 89 | with self.get_cursor() as dbc: 90 | try: 91 | dbc.execute(q) 92 | v = dbc.fetchall()[0][0] 93 | except sqlite3.OperationalError: 94 | log.error('Can not get schema version, it is probably outdated.') 95 | return False 96 | self.db.rollback() # prevent dangling transaction while instance is idle after init 97 | return v == self.schema_version 98 | 99 | @contextlib.contextmanager 100 | def get_cursor(self): 101 | dbc = self.db.cursor() 102 | try: 103 | yield dbc 104 | finally: 105 | dbc.close() 106 | 107 | def init_db(self): 108 | self.db.cursor().execute('PRAGMA synchronous = 0') 109 | self.db.cursor().execute('PRAGMA journal_mode = WAL') 110 | with self.get_cursor() as dbc: 111 | dbc.execute( 112 | """CREATE TABLE metadata ( 113 | name character varying(128) NOT NULL PRIMARY KEY, 114 | value character varying(128) NOT NULL 115 | )""" 116 | ) 117 | dbc.execute( 118 | """INSERT INTO metadata (name, value) VALUES ('schema_version', '{}')""".format(self.schema_version) 119 | ) 120 | dbc.execute( 121 | """CREATE TABLE threat_list ( 122 | threat_type character varying(128) NOT NULL, 123 | platform_type character varying(128) NOT NULL, 124 | threat_entry_type character varying(128) NOT NULL, 125 | client_state character varying(42), 126 | timestamp timestamp without time zone DEFAULT current_timestamp, 127 | PRIMARY KEY (threat_type, platform_type, threat_entry_type) 128 | )""" 129 | ) 130 | dbc.execute( 131 | """CREATE TABLE full_hash ( 132 | value BLOB NOT NULL, 133 | threat_type character varying(128) NOT NULL, 134 | platform_type character varying(128) NOT NULL, 135 | threat_entry_type character varying(128) NOT NULL, 136 | downloaded_at timestamp without time zone DEFAULT current_timestamp, 137 | expires_at timestamp without time zone NOT NULL DEFAULT current_timestamp, 138 | malware_threat_type varchar(32), 139 | PRIMARY KEY (value, threat_type, platform_type, threat_entry_type) 140 | )""" 141 | ) 142 | dbc.execute( 143 | """CREATE TABLE hash_prefix ( 144 | value BLOB NOT NULL, 145 | cue BLOB NOT NULL, 146 | threat_type character varying(128) NOT NULL, 147 | platform_type character varying(128) NOT NULL, 148 | threat_entry_type character varying(128) NOT NULL, 149 | timestamp timestamp without time zone DEFAULT current_timestamp, 150 | negative_expires_at timestamp without time zone NOT NULL DEFAULT current_timestamp, 151 | PRIMARY KEY (value, threat_type, platform_type, threat_entry_type), 152 | FOREIGN KEY(threat_type, platform_type, threat_entry_type) 153 | REFERENCES threat_list(threat_type, platform_type, threat_entry_type) 154 | ON DELETE CASCADE 155 | ) 156 | """ 157 | ) 158 | dbc.execute( 159 | """CREATE INDEX idx_hash_prefix_cue ON hash_prefix (cue)""" 160 | ) 161 | dbc.execute( 162 | """CREATE INDEX idx_hash_prefix_list ON hash_prefix (threat_type, platform_type, threat_entry_type)""" 163 | ) 164 | dbc.execute( 165 | """CREATE INDEX idx_full_hash_expires_at ON full_hash (expires_at)""" 166 | ) 167 | dbc.execute( 168 | """CREATE INDEX idx_full_hash_value ON full_hash (value)""" 169 | ) 170 | self.db.commit() 171 | 172 | def lookup_full_hashes(self, hash_values): 173 | """Query DB to see if hash is blacklisted""" 174 | q = '''SELECT threat_type,platform_type,threat_entry_type, expires_at < current_timestamp AS has_expired 175 | FROM full_hash WHERE value IN ({}) 176 | ''' 177 | output = [] 178 | with self.get_cursor() as dbc: 179 | placeholders = ','.join(['?'] * len(hash_values)) 180 | dbc.execute(q.format(placeholders), [sqlite3.Binary(hv) for hv in hash_values]) 181 | for h in dbc.fetchall(): 182 | threat_type, platform_type, threat_entry_type, has_expired = h 183 | threat_list = ThreatList(threat_type, platform_type, threat_entry_type) 184 | output.append((threat_list, has_expired)) 185 | return output 186 | 187 | def lookup_hash_prefix(self, cues): 188 | """Lookup hash prefixes by cue (first 4 bytes of hash) 189 | 190 | Returns a tuple of (value, negative_cache_expired). 191 | """ 192 | q = '''SELECT value, MAX(negative_expires_at < current_timestamp) AS negative_cache_expired 193 | FROM hash_prefix WHERE cue IN ({}) GROUP BY 1 194 | ''' 195 | output = [] 196 | with self.get_cursor() as dbc: 197 | dbc.execute(q.format(','.join(['?'] * len(cues))), [sqlite3.Binary(cue) for cue in cues]) 198 | for h in dbc.fetchall(): 199 | value, negative_cache_expired = h 200 | output.append((bytes(value), negative_cache_expired)) 201 | return output 202 | 203 | def store_full_hash(self, threat_list, hash_value, cache_duration, malware_threat_type): 204 | """Store full hash found for the given hash prefix""" 205 | log.info('Storing full hash %s to list %s with cache duration %s', 206 | to_hex(hash_value), str(threat_list), cache_duration) 207 | qi = '''INSERT OR IGNORE INTO full_hash 208 | (value, threat_type, platform_type, threat_entry_type, malware_threat_type, downloaded_at) 209 | VALUES 210 | (?, ?, ?, ?, ?, current_timestamp) 211 | ''' 212 | qu = "UPDATE full_hash SET expires_at=datetime(current_timestamp, '+{} SECONDS') \ 213 | WHERE value=? AND threat_type=? AND platform_type=? AND threat_entry_type=?" 214 | 215 | i_parameters = [sqlite3.Binary(hash_value), threat_list.threat_type, 216 | threat_list.platform_type, threat_list.threat_entry_type, malware_threat_type] 217 | u_parameters = [sqlite3.Binary(hash_value), threat_list.threat_type, 218 | threat_list.platform_type, threat_list.threat_entry_type] 219 | 220 | with self.get_cursor() as dbc: 221 | dbc.execute(qi, i_parameters) 222 | dbc.execute(qu.format(int(cache_duration)), u_parameters) 223 | 224 | def delete_hash_prefix_list(self, threat_list): 225 | q = '''DELETE FROM hash_prefix 226 | WHERE threat_type=? AND platform_type=? AND threat_entry_type=? 227 | ''' 228 | parameters = [threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] 229 | with self.get_cursor() as dbc: 230 | dbc.execute(q, parameters) 231 | 232 | def cleanup_full_hashes(self, keep_expired_for=(60 * 60 * 12)): 233 | """Remove long expired full_hash entries.""" 234 | q = '''DELETE FROM full_hash WHERE expires_at < datetime(current_timestamp, '-{} SECONDS') 235 | ''' 236 | log.info('Cleaning up full_hash entries expired more than {} seconds ago.'.format(keep_expired_for)) 237 | with self.get_cursor() as dbc: 238 | dbc.execute(q.format(int(keep_expired_for))) 239 | 240 | def update_hash_prefix_expiration(self, prefix_value, negative_cache_duration): 241 | q = """UPDATE hash_prefix SET negative_expires_at=datetime(current_timestamp, '+{} SECONDS') 242 | WHERE value=?""" 243 | parameters = [sqlite3.Binary(prefix_value)] 244 | with self.get_cursor() as dbc: 245 | dbc.execute(q.format(int(negative_cache_duration)), parameters) 246 | 247 | def get_threat_lists(self): 248 | """Get a list of known threat lists.""" 249 | q = '''SELECT threat_type,platform_type,threat_entry_type FROM threat_list''' 250 | output = [] 251 | with self.get_cursor() as dbc: 252 | dbc.execute(q) 253 | for h in dbc.fetchall(): 254 | threat_type, platform_type, threat_entry_type = h 255 | threat_list = ThreatList(threat_type, platform_type, threat_entry_type) 256 | output.append(threat_list) 257 | return output 258 | 259 | def get_client_state(self): 260 | """Get a dict of known threat lists including clientState values.""" 261 | q = '''SELECT threat_type,platform_type,threat_entry_type,client_state FROM threat_list''' 262 | output = {} 263 | with self.get_cursor() as dbc: 264 | dbc.execute(q) 265 | for h in dbc.fetchall(): 266 | threat_type, platform_type, threat_entry_type, client_state = h 267 | threat_list_tuple = (threat_type, platform_type, threat_entry_type) 268 | output[threat_list_tuple] = client_state 269 | return output 270 | 271 | def add_threat_list(self, threat_list): 272 | """Add threat list entry if it does not exist.""" 273 | q = '''INSERT OR IGNORE INTO threat_list 274 | (threat_type, platform_type, threat_entry_type, timestamp) 275 | VALUES 276 | (?, ?, ?, current_timestamp) 277 | ''' 278 | params = [threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] 279 | with self.get_cursor() as dbc: 280 | dbc.execute(q, params) 281 | 282 | def delete_threat_list(self, threat_list): 283 | """Delete threat list entry.""" 284 | log.info('Deleting cached threat list "{}"'.format(repr(threat_list))) 285 | q = '''DELETE FROM threat_list 286 | WHERE threat_type=? AND platform_type=? AND threat_entry_type=? 287 | ''' 288 | params = [threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] 289 | with self.get_cursor() as dbc: 290 | dbc.execute(q, params) 291 | 292 | def update_threat_list_client_state(self, threat_list, client_state): 293 | log.info('Setting client_state in Sqlite') 294 | q = '''UPDATE threat_list SET timestamp=current_timestamp, client_state=? 295 | WHERE threat_type=? AND platform_type=? AND threat_entry_type=?''' 296 | with self.get_cursor() as dbc: 297 | params = [client_state, threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] 298 | dbc.execute(q, params) 299 | 300 | def hash_prefix_list_checksum(self, threat_list): 301 | """Returns SHA256 checksum for alphabetically-sorted concatenated list of hash prefixes""" 302 | q = '''SELECT value FROM hash_prefix 303 | WHERE threat_type=? AND platform_type=? AND threat_entry_type=? 304 | ORDER BY value 305 | ''' 306 | params = [threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] 307 | with self.get_cursor() as dbc: 308 | dbc.execute(q, params) 309 | all_hashes = b''.join(bytes(h[0]) for h in dbc.fetchall()) 310 | checksum = hashlib.sha256(all_hashes).digest() 311 | return checksum 312 | 313 | def populate_hash_prefix_list(self, threat_list, hash_prefix_list): 314 | log.info('Storing {} entries of hash prefix list {}'.format(len(hash_prefix_list), str(threat_list))) 315 | q = '''INSERT INTO hash_prefix 316 | (value, cue, threat_type, platform_type, threat_entry_type, timestamp) 317 | VALUES 318 | (?, ?, ?, ?, ?, current_timestamp) 319 | ''' 320 | with self.get_cursor() as dbc: 321 | records = [[sqlite3.Binary(prefix_value), sqlite3.Binary(prefix_value[0:4]), threat_list.threat_type, 322 | threat_list.platform_type, threat_list.threat_entry_type] for prefix_value in hash_prefix_list] 323 | dbc.executemany(q, records) 324 | 325 | def get_hash_prefix_values_to_remove(self, threat_list, indices): 326 | log.info('Removing {} records from threat list "{}"'.format(len(indices), str(threat_list))) 327 | indices = set(indices) 328 | q = '''SELECT value FROM hash_prefix 329 | WHERE threat_type=? AND platform_type=? AND threat_entry_type=? 330 | ORDER BY value 331 | ''' 332 | params = [threat_list.threat_type, threat_list.platform_type, threat_list.threat_entry_type] 333 | values_to_remove = [] 334 | with self.get_cursor() as dbc: 335 | dbc.execute(q, params) 336 | i = 0 337 | for h in dbc.fetchall(): 338 | v = bytes(h[0]) 339 | if i in indices: 340 | values_to_remove.append(v) 341 | i += 1 342 | return values_to_remove 343 | 344 | def remove_hash_prefix_indices(self, threat_list, indices): 345 | """Remove records matching idices from a lexicographically-sorted local threat list.""" 346 | batch_size = 40 347 | q = '''DELETE FROM hash_prefix 348 | WHERE threat_type=? AND platform_type=? AND threat_entry_type=? AND value IN ({}) 349 | ''' 350 | prefixes_to_remove = self.get_hash_prefix_values_to_remove(threat_list, indices) 351 | with self.get_cursor() as dbc: 352 | for i in range(0, len(prefixes_to_remove), batch_size): 353 | remove_batch = prefixes_to_remove[i:(i + batch_size)] 354 | params = [ 355 | threat_list.threat_type, 356 | threat_list.platform_type, 357 | threat_list.threat_entry_type 358 | ] + [sqlite3.Binary(b) for b in remove_batch] 359 | dbc.execute(q.format(','.join(['?'] * len(remove_batch))), params) 360 | 361 | def dump_hash_prefix_values(self): 362 | """Export all hash prefix values. 363 | 364 | Returns a list of known hash prefix values 365 | """ 366 | q = '''SELECT distinct value from hash_prefix''' 367 | output = [] 368 | with self.get_cursor() as dbc: 369 | dbc.execute(q) 370 | output = [bytes(r[0]) for r in dbc.fetchall()] 371 | return output 372 | 373 | def rollback(self): 374 | log.info('Rolling back DB transaction.') 375 | self.db.rollback() 376 | 377 | def commit(self): 378 | self.db.commit() 379 | -------------------------------------------------------------------------------- /gglsbl/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.17 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "b988ef687dce589be081b3d0e0e7de2000c31aa8" 28 | git_date = "2023-07-25 14:04:08 +0200" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440" 44 | cfg.tag_prefix = "" 45 | cfg.parentdir_prefix = "gglsbl-" 46 | cfg.versionfile_source = "gglsbl/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | return decorate 68 | 69 | 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 71 | env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 80 | stdout=subprocess.PIPE, 81 | stderr=(subprocess.PIPE if hide_stderr 82 | else None)) 83 | break 84 | except EnvironmentError: 85 | e = sys.exc_info()[1] 86 | if e.errno == errno.ENOENT: 87 | continue 88 | if verbose: 89 | print("unable to run %s" % dispcmd) 90 | print(e) 91 | return None, None 92 | else: 93 | if verbose: 94 | print("unable to find command, tried %s" % (commands,)) 95 | return None, None 96 | stdout = p.communicate()[0].strip() 97 | if sys.version_info[0] >= 3: 98 | stdout = stdout.decode() 99 | if p.returncode != 0: 100 | if verbose: 101 | print("unable to run %s (error)" % dispcmd) 102 | print("stdout was %s" % stdout) 103 | return None, p.returncode 104 | return stdout, p.returncode 105 | 106 | 107 | def versions_from_parentdir(parentdir_prefix, root, verbose): 108 | """Try to determine the version from the parent directory name. 109 | 110 | Source tarballs conventionally unpack into a directory that includes both 111 | the project name and a version string. We will also support searching up 112 | two directory levels for an appropriately named parent directory 113 | """ 114 | rootdirs = [] 115 | 116 | for i in range(3): 117 | dirname = os.path.basename(root) 118 | if dirname.startswith(parentdir_prefix): 119 | return {"version": dirname[len(parentdir_prefix):], 120 | "full-revisionid": None, 121 | "dirty": False, "error": None, "date": None} 122 | else: 123 | rootdirs.append(root) 124 | root = os.path.dirname(root) # up a level 125 | 126 | if verbose: 127 | print("Tried directories %s but none started with prefix %s" % 128 | (str(rootdirs), parentdir_prefix)) 129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 130 | 131 | 132 | @register_vcs_handler("git", "get_keywords") 133 | def git_get_keywords(versionfile_abs): 134 | """Extract version information from the given file.""" 135 | # the code embedded in _version.py can just fetch the value of these 136 | # keywords. When used from setup.py, we don't want to import _version.py, 137 | # so we do it with a regexp instead. This function is not used from 138 | # _version.py. 139 | keywords = {} 140 | try: 141 | f = open(versionfile_abs, "r") 142 | for line in f.readlines(): 143 | if line.strip().startswith("git_refnames ="): 144 | mo = re.search(r'=\s*"(.*)"', line) 145 | if mo: 146 | keywords["refnames"] = mo.group(1) 147 | if line.strip().startswith("git_full ="): 148 | mo = re.search(r'=\s*"(.*)"', line) 149 | if mo: 150 | keywords["full"] = mo.group(1) 151 | if line.strip().startswith("git_date ="): 152 | mo = re.search(r'=\s*"(.*)"', line) 153 | if mo: 154 | keywords["date"] = mo.group(1) 155 | f.close() 156 | except EnvironmentError: 157 | pass 158 | return keywords 159 | 160 | 161 | @register_vcs_handler("git", "keywords") 162 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 163 | """Get version information from git keywords.""" 164 | if not keywords: 165 | raise NotThisMethod("no keywords at all, weird") 166 | date = keywords.get("date") 167 | if date is not None: 168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 170 | # -like" string, which we must then edit to make compliant), because 171 | # it's been around since git-1.5.3, and it's too difficult to 172 | # discover which version we're using, or to work around using an 173 | # older one. 174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 175 | refnames = keywords["refnames"].strip() 176 | if refnames.startswith("$Format"): 177 | if verbose: 178 | print("keywords are unexpanded, not using") 179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 180 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 183 | TAG = "tag: " 184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 185 | if not tags: 186 | # Either we're using git < 1.8.3, or there really are no tags. We use 187 | # a heuristic: assume all version tags have a digit. The old git %d 188 | # expansion behaves like git log --decorate=short and strips out the 189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 190 | # between branches and tags. By ignoring refnames without digits, we 191 | # filter out many common branch names like "release" and 192 | # "stabilization", as well as "HEAD" and "master". 193 | tags = set([r for r in refs if re.search(r'\d', r)]) 194 | if verbose: 195 | print("discarding '%s', no digits" % ",".join(refs - tags)) 196 | if verbose: 197 | print("likely tags: %s" % ",".join(sorted(tags))) 198 | for ref in sorted(tags): 199 | # sorting will prefer e.g. "2.0" over "2.0rc1" 200 | if ref.startswith(tag_prefix): 201 | r = ref[len(tag_prefix):] 202 | if verbose: 203 | print("picking %s" % r) 204 | return {"version": r, 205 | "full-revisionid": keywords["full"].strip(), 206 | "dirty": False, "error": None, 207 | "date": date} 208 | # no suitable tags, so version is "0+unknown", but full hex is still there 209 | if verbose: 210 | print("no suitable tags, using unknown + full revision id") 211 | return {"version": "0+unknown", 212 | "full-revisionid": keywords["full"].strip(), 213 | "dirty": False, "error": "no suitable tags", "date": None} 214 | 215 | 216 | @register_vcs_handler("git", "pieces_from_vcs") 217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 218 | """Get version from 'git describe' in the root of the source tree. 219 | 220 | This only gets called if the git-archive 'subst' keywords were *not* 221 | expanded, and _version.py hasn't already been rewritten with a short 222 | version string, meaning we're inside a checked out source tree. 223 | """ 224 | GITS = ["git"] 225 | if sys.platform == "win32": 226 | GITS = ["git.cmd", "git.exe"] 227 | 228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 229 | hide_stderr=True) 230 | if rc != 0: 231 | if verbose: 232 | print("Directory %s not under git control" % root) 233 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 234 | 235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 236 | # if there isn't one, this yields HEX[-dirty] (no NUM) 237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 238 | "--always", "--long", 239 | "--match", "%s*" % tag_prefix], 240 | cwd=root) 241 | # --long was added in git-1.5.5 242 | if describe_out is None: 243 | raise NotThisMethod("'git describe' failed") 244 | describe_out = describe_out.strip() 245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 246 | if full_out is None: 247 | raise NotThisMethod("'git rev-parse' failed") 248 | full_out = full_out.strip() 249 | 250 | pieces = {} 251 | pieces["long"] = full_out 252 | pieces["short"] = full_out[:7] # maybe improved later 253 | pieces["error"] = None 254 | 255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 256 | # TAG might have hyphens. 257 | git_describe = describe_out 258 | 259 | # look for -dirty suffix 260 | dirty = git_describe.endswith("-dirty") 261 | pieces["dirty"] = dirty 262 | if dirty: 263 | git_describe = git_describe[:git_describe.rindex("-dirty")] 264 | 265 | # now we have TAG-NUM-gHEX or HEX 266 | 267 | if "-" in git_describe: 268 | # TAG-NUM-gHEX 269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 270 | if not mo: 271 | # unparseable. Maybe git-describe is misbehaving? 272 | pieces["error"] = ("unable to parse git-describe output: '%s'" 273 | % describe_out) 274 | return pieces 275 | 276 | # tag 277 | full_tag = mo.group(1) 278 | if not full_tag.startswith(tag_prefix): 279 | if verbose: 280 | fmt = "tag '%s' doesn't start with prefix '%s'" 281 | print(fmt % (full_tag, tag_prefix)) 282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 283 | % (full_tag, tag_prefix)) 284 | return pieces 285 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 286 | 287 | # distance: number of commits since tag 288 | pieces["distance"] = int(mo.group(2)) 289 | 290 | # commit: short hex revision ID 291 | pieces["short"] = mo.group(3) 292 | 293 | else: 294 | # HEX: no tags 295 | pieces["closest-tag"] = None 296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 297 | cwd=root) 298 | pieces["distance"] = int(count_out) # total number of commits 299 | 300 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 302 | cwd=root)[0].strip() 303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 304 | 305 | return pieces 306 | 307 | 308 | def plus_or_dot(pieces): 309 | """Return a + if we don't already have one, else return a .""" 310 | if "+" in pieces.get("closest-tag", ""): 311 | return "." 312 | return "+" 313 | 314 | 315 | def render_pep440(pieces): 316 | """Build up version string, with post-release "local version identifier". 317 | 318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 320 | 321 | Exceptions: 322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 323 | """ 324 | if pieces["closest-tag"]: 325 | rendered = pieces["closest-tag"] 326 | if pieces["distance"] or pieces["dirty"]: 327 | rendered += plus_or_dot(pieces) 328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 329 | if pieces["dirty"]: 330 | rendered += ".dirty" 331 | else: 332 | # exception #1 333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 334 | pieces["short"]) 335 | if pieces["dirty"]: 336 | rendered += ".dirty" 337 | return rendered 338 | 339 | 340 | def render_pep440_pre(pieces): 341 | """TAG[.post.devDISTANCE] -- No -dirty. 342 | 343 | Exceptions: 344 | 1: no tags. 0.post.devDISTANCE 345 | """ 346 | if pieces["closest-tag"]: 347 | rendered = pieces["closest-tag"] 348 | if pieces["distance"]: 349 | rendered += ".post.dev%d" % pieces["distance"] 350 | else: 351 | # exception #1 352 | rendered = "0.post.dev%d" % pieces["distance"] 353 | return rendered 354 | 355 | 356 | def render_pep440_post(pieces): 357 | """TAG[.postDISTANCE[.dev0]+gHEX] . 358 | 359 | The ".dev0" means dirty. Note that .dev0 sorts backwards 360 | (a dirty tree will appear "older" than the corresponding clean one), 361 | but you shouldn't be releasing software with -dirty anyways. 362 | 363 | Exceptions: 364 | 1: no tags. 0.postDISTANCE[.dev0] 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"] or pieces["dirty"]: 369 | rendered += ".post%d" % pieces["distance"] 370 | if pieces["dirty"]: 371 | rendered += ".dev0" 372 | rendered += plus_or_dot(pieces) 373 | rendered += "g%s" % pieces["short"] 374 | else: 375 | # exception #1 376 | rendered = "0.post%d" % pieces["distance"] 377 | if pieces["dirty"]: 378 | rendered += ".dev0" 379 | rendered += "+g%s" % pieces["short"] 380 | return rendered 381 | 382 | 383 | def render_pep440_old(pieces): 384 | """TAG[.postDISTANCE[.dev0]] . 385 | 386 | The ".dev0" means dirty. 387 | 388 | Eexceptions: 389 | 1: no tags. 0.postDISTANCE[.dev0] 390 | """ 391 | if pieces["closest-tag"]: 392 | rendered = pieces["closest-tag"] 393 | if pieces["distance"] or pieces["dirty"]: 394 | rendered += ".post%d" % pieces["distance"] 395 | if pieces["dirty"]: 396 | rendered += ".dev0" 397 | else: 398 | # exception #1 399 | rendered = "0.post%d" % pieces["distance"] 400 | if pieces["dirty"]: 401 | rendered += ".dev0" 402 | return rendered 403 | 404 | 405 | def render_git_describe(pieces): 406 | """TAG[-DISTANCE-gHEX][-dirty]. 407 | 408 | Like 'git describe --tags --dirty --always'. 409 | 410 | Exceptions: 411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 412 | """ 413 | if pieces["closest-tag"]: 414 | rendered = pieces["closest-tag"] 415 | if pieces["distance"]: 416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 417 | else: 418 | # exception #1 419 | rendered = pieces["short"] 420 | if pieces["dirty"]: 421 | rendered += "-dirty" 422 | return rendered 423 | 424 | 425 | def render_git_describe_long(pieces): 426 | """TAG-DISTANCE-gHEX[-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always -long'. 429 | The distance/hash is unconditional. 430 | 431 | Exceptions: 432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 433 | """ 434 | if pieces["closest-tag"]: 435 | rendered = pieces["closest-tag"] 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render(pieces, style): 446 | """Render the given version pieces into the requested style.""" 447 | if pieces["error"]: 448 | return {"version": "unknown", 449 | "full-revisionid": pieces.get("long"), 450 | "dirty": None, 451 | "error": pieces["error"], 452 | "date": None} 453 | 454 | if not style or style == "default": 455 | style = "pep440" # the default 456 | 457 | if style == "pep440": 458 | rendered = render_pep440(pieces) 459 | elif style == "pep440-pre": 460 | rendered = render_pep440_pre(pieces) 461 | elif style == "pep440-post": 462 | rendered = render_pep440_post(pieces) 463 | elif style == "pep440-old": 464 | rendered = render_pep440_old(pieces) 465 | elif style == "git-describe": 466 | rendered = render_git_describe(pieces) 467 | elif style == "git-describe-long": 468 | rendered = render_git_describe_long(pieces) 469 | else: 470 | raise ValueError("unknown style '%s'" % style) 471 | 472 | return {"version": rendered, "full-revisionid": pieces["long"], 473 | "dirty": pieces["dirty"], "error": None, 474 | "date": pieces.get("date")} 475 | 476 | 477 | def get_versions(): 478 | """Get version information or return default if unable to do so.""" 479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 480 | # __file__, we can work backwards from there to the root. Some 481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 482 | # case we can only use expanded keywords. 483 | 484 | cfg = get_config() 485 | verbose = cfg.verbose 486 | 487 | try: 488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 489 | verbose) 490 | except NotThisMethod: 491 | pass 492 | 493 | try: 494 | root = os.path.realpath(__file__) 495 | # versionfile_source is the relative path from the top of the source 496 | # tree (where the .git directory might live) to this file. Invert 497 | # this to find the root from __file__. 498 | for i in cfg.versionfile_source.split('/'): 499 | root = os.path.dirname(root) 500 | except NameError: 501 | return {"version": "0+unknown", "full-revisionid": None, 502 | "dirty": None, 503 | "error": "unable to find root of source tree", 504 | "date": None} 505 | 506 | try: 507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 508 | return render(pieces, cfg.style) 509 | except NotThisMethod: 510 | pass 511 | 512 | try: 513 | if cfg.parentdir_prefix: 514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 515 | except NotThisMethod: 516 | pass 517 | 518 | return {"version": "0+unknown", "full-revisionid": None, 519 | "dirty": None, 520 | "error": "unable to compute version", "date": None} 521 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.17 3 | 4 | """The Versioneer - like a rocketeer, but for versions. 5 | 6 | The Versioneer 7 | ============== 8 | 9 | * like a rocketeer, but for versions! 10 | * https://github.com/warner/python-versioneer 11 | * Brian Warner 12 | * License: Public Domain 13 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, and pypy 14 | * [![Latest Version] 15 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 16 | ](https://pypi.python.org/pypi/versioneer/) 17 | * [![Build Status] 18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 19 | ](https://travis-ci.org/warner/python-versioneer) 20 | 21 | This is a tool for managing a recorded version number in distutils-based 22 | python projects. The goal is to remove the tedious and error-prone "update 23 | the embedded version string" step from your release process. Making a new 24 | release should be as easy as recording a new tag in your version-control 25 | system, and maybe making new tarballs. 26 | 27 | 28 | ## Quick Install 29 | 30 | * `pip install versioneer` to somewhere to your $PATH 31 | * add a `[versioneer]` section to your setup.cfg (see below) 32 | * run `versioneer install` in your source tree, commit the results 33 | 34 | ## Version Identifiers 35 | 36 | Source trees come from a variety of places: 37 | 38 | * a version-control system checkout (mostly used by developers) 39 | * a nightly tarball, produced by build automation 40 | * a snapshot tarball, produced by a web-based VCS browser, like github's 41 | "tarball from tag" feature 42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 43 | 44 | Within each source tree, the version identifier (either a string or a number, 45 | this tool is format-agnostic) can come from a variety of places: 46 | 47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 48 | about recent "tags" and an absolute revision-id 49 | * the name of the directory into which the tarball was unpacked 50 | * an expanded VCS keyword ($Id$, etc) 51 | * a `_version.py` created by some earlier build step 52 | 53 | For released software, the version identifier is closely related to a VCS 54 | tag. Some projects use tag names that include more than just the version 55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 56 | needs to strip the tag prefix to extract the version identifier. For 57 | unreleased software (between tags), the version identifier should provide 58 | enough information to help developers recreate the same tree, while also 59 | giving them an idea of roughly how old the tree is (after version 1.2, before 60 | version 1.3). Many VCS systems can report a description that captures this, 61 | for example `git describe --tags --dirty --always` reports things like 62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 64 | uncommitted changes. 65 | 66 | The version identifier is used for multiple purposes: 67 | 68 | * to allow the module to self-identify its version: `myproject.__version__` 69 | * to choose a name and prefix for a 'setup.py sdist' tarball 70 | 71 | ## Theory of Operation 72 | 73 | Versioneer works by adding a special `_version.py` file into your source 74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 75 | dynamically ask the VCS tool for version information at import time. 76 | 77 | `_version.py` also contains `$Revision$` markers, and the installation 78 | process marks `_version.py` to have this marker rewritten with a tag name 79 | during the `git archive` command. As a result, generated tarballs will 80 | contain enough information to get the proper version. 81 | 82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 83 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 84 | that configures it. This overrides several distutils/setuptools commands to 85 | compute the version when invoked, and changes `setup.py build` and `setup.py 86 | sdist` to replace `_version.py` with a small static file that contains just 87 | the generated version data. 88 | 89 | ## Installation 90 | 91 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. 92 | 93 | ## Version-String Flavors 94 | 95 | Code which uses Versioneer can learn about its version string at runtime by 96 | importing `_version` from your main `__init__.py` file and running the 97 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 98 | import the top-level `versioneer.py` and run `get_versions()`. 99 | 100 | Both functions return a dictionary with different flavors of version 101 | information: 102 | 103 | * `['version']`: A condensed version string, rendered using the selected 104 | style. This is the most commonly used value for the project's version 105 | string. The default "pep440" style yields strings like `0.11`, 106 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 107 | below for alternative styles. 108 | 109 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 110 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 111 | 112 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the 113 | commit date in ISO 8601 format. This will be None if the date is not 114 | available. 115 | 116 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 117 | this is only accurate if run in a VCS checkout, otherwise it is likely to 118 | be False or None 119 | 120 | * `['error']`: if the version string could not be computed, this will be set 121 | to a string describing the problem, otherwise it will be None. It may be 122 | useful to throw an exception in setup.py if this is set, to avoid e.g. 123 | creating tarballs with a version string of "unknown". 124 | 125 | Some variants are more useful than others. Including `full-revisionid` in a 126 | bug report should allow developers to reconstruct the exact code being tested 127 | (or indicate the presence of local changes that should be shared with the 128 | developers). `version` is suitable for display in an "about" box or a CLI 129 | `--version` output: it can be easily compared against release notes and lists 130 | of bugs fixed in various releases. 131 | 132 | The installer adds the following text to your `__init__.py` to place a basic 133 | version in `YOURPROJECT.__version__`: 134 | 135 | from ._version import get_versions 136 | __version__ = get_versions()['version'] 137 | del get_versions 138 | 139 | ## Styles 140 | 141 | The setup.cfg `style=` configuration controls how the VCS information is 142 | rendered into a version string. 143 | 144 | The default style, "pep440", produces a PEP440-compliant string, equal to the 145 | un-prefixed tag name for actual releases, and containing an additional "local 146 | version" section with more detail for in-between builds. For Git, this is 147 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 148 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 149 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 150 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 151 | software (exactly equal to a known tag), the identifier will only contain the 152 | stripped tag, e.g. "0.11". 153 | 154 | Other styles are available. See details.md in the Versioneer source tree for 155 | descriptions. 156 | 157 | ## Debugging 158 | 159 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 160 | to return a version of "0+unknown". To investigate the problem, run `setup.py 161 | version`, which will run the version-lookup code in a verbose mode, and will 162 | display the full contents of `get_versions()` (including the `error` string, 163 | which may help identify what went wrong). 164 | 165 | ## Known Limitations 166 | 167 | Some situations are known to cause problems for Versioneer. This details the 168 | most significant ones. More can be found on Github 169 | [issues page](https://github.com/warner/python-versioneer/issues). 170 | 171 | ### Subprojects 172 | 173 | Versioneer has limited support for source trees in which `setup.py` is not in 174 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are 175 | two common reasons why `setup.py` might not be in the root: 176 | 177 | * Source trees which contain multiple subprojects, such as 178 | [Buildbot](https://github.com/buildbot/buildbot), which contains both 179 | "master" and "slave" subprojects, each with their own `setup.py`, 180 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI 181 | distributions (and upload multiple independently-installable tarballs). 182 | * Source trees whose main purpose is to contain a C library, but which also 183 | provide bindings to Python (and perhaps other langauges) in subdirectories. 184 | 185 | Versioneer will look for `.git` in parent directories, and most operations 186 | should get the right version string. However `pip` and `setuptools` have bugs 187 | and implementation details which frequently cause `pip install .` from a 188 | subproject directory to fail to find a correct version string (so it usually 189 | defaults to `0+unknown`). 190 | 191 | `pip install --editable .` should work correctly. `setup.py install` might 192 | work too. 193 | 194 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in 195 | some later version. 196 | 197 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking 198 | this issue. The discussion in 199 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the 200 | issue from the Versioneer side in more detail. 201 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and 202 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve 203 | pip to let Versioneer work correctly. 204 | 205 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the 206 | `setup.cfg`, so subprojects were completely unsupported with those releases. 207 | 208 | ### Editable installs with setuptools <= 18.5 209 | 210 | `setup.py develop` and `pip install --editable .` allow you to install a 211 | project into a virtualenv once, then continue editing the source code (and 212 | test) without re-installing after every change. 213 | 214 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a 215 | convenient way to specify executable scripts that should be installed along 216 | with the python package. 217 | 218 | These both work as expected when using modern setuptools. When using 219 | setuptools-18.5 or earlier, however, certain operations will cause 220 | `pkg_resources.DistributionNotFound` errors when running the entrypoint 221 | script, which must be resolved by re-installing the package. This happens 222 | when the install happens with one version, then the egg_info data is 223 | regenerated while a different version is checked out. Many setup.py commands 224 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into 225 | a different virtualenv), so this can be surprising. 226 | 227 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes 228 | this one, but upgrading to a newer version of setuptools should probably 229 | resolve it. 230 | 231 | ### Unicode version strings 232 | 233 | While Versioneer works (and is continually tested) with both Python 2 and 234 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. 235 | Newer releases probably generate unicode version strings on py2. It's not 236 | clear that this is wrong, but it may be surprising for applications when then 237 | write these strings to a network connection or include them in bytes-oriented 238 | APIs like cryptographic checksums. 239 | 240 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates 241 | this question. 242 | 243 | 244 | ## Updating Versioneer 245 | 246 | To upgrade your project to a new release of Versioneer, do the following: 247 | 248 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 249 | * edit `setup.cfg`, if necessary, to include any new configuration settings 250 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. 251 | * re-run `versioneer install` in your source tree, to replace 252 | `SRC/_version.py` 253 | * commit any changed files 254 | 255 | ## Future Directions 256 | 257 | This tool is designed to make it easily extended to other version-control 258 | systems: all VCS-specific components are in separate directories like 259 | src/git/ . The top-level `versioneer.py` script is assembled from these 260 | components by running make-versioneer.py . In the future, make-versioneer.py 261 | will take a VCS name as an argument, and will construct a version of 262 | `versioneer.py` that is specific to the given VCS. It might also take the 263 | configuration arguments that are currently provided manually during 264 | installation by editing setup.py . Alternatively, it might go the other 265 | direction and include code from all supported VCS systems, reducing the 266 | number of intermediate scripts. 267 | 268 | 269 | ## License 270 | 271 | To make Versioneer easier to embed, all its code is dedicated to the public 272 | domain. The `_version.py` that it creates is also in the public domain. 273 | Specifically, both are released under the Creative Commons "Public Domain 274 | Dedication" license (CC0-1.0), as described in 275 | https://creativecommons.org/publicdomain/zero/1.0/ . 276 | 277 | """ 278 | 279 | from __future__ import print_function 280 | try: 281 | import configparser 282 | except ImportError: 283 | import ConfigParser as configparser 284 | import errno 285 | import json 286 | import os 287 | import re 288 | import subprocess 289 | import sys 290 | 291 | 292 | class VersioneerConfig: 293 | """Container for Versioneer configuration parameters.""" 294 | 295 | 296 | def get_root(): 297 | """Get the project root directory. 298 | 299 | We require that all commands are run from the project root, i.e. the 300 | directory that contains setup.py, setup.cfg, and versioneer.py . 301 | """ 302 | root = os.path.realpath(os.path.abspath(os.getcwd())) 303 | setup_py = os.path.join(root, "setup.py") 304 | versioneer_py = os.path.join(root, "versioneer.py") 305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 306 | # allow 'python path/to/setup.py COMMAND' 307 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 308 | setup_py = os.path.join(root, "setup.py") 309 | versioneer_py = os.path.join(root, "versioneer.py") 310 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 311 | err = ("Versioneer was unable to run the project root directory. " 312 | "Versioneer requires setup.py to be executed from " 313 | "its immediate directory (like 'python setup.py COMMAND'), " 314 | "or in a way that lets it use sys.argv[0] to find the root " 315 | "(like 'python path/to/setup.py COMMAND').") 316 | raise VersioneerBadRootError(err) 317 | try: 318 | # Certain runtime workflows (setup.py install/develop in a setuptools 319 | # tree) execute all dependencies in a single python process, so 320 | # "versioneer" may be imported multiple times, and python's shared 321 | # module-import table will cache the first one. So we can't use 322 | # os.path.dirname(__file__), as that will find whichever 323 | # versioneer.py was first imported, even in later projects. 324 | me = os.path.realpath(os.path.abspath(__file__)) 325 | me_dir = os.path.normcase(os.path.splitext(me)[0]) 326 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) 327 | if me_dir != vsr_dir: 328 | print("Warning: build in %s is using versioneer.py from %s" 329 | % (os.path.dirname(me), versioneer_py)) 330 | except NameError: 331 | pass 332 | return root 333 | 334 | 335 | def get_config_from_root(root): 336 | """Read the project setup.cfg file to determine Versioneer config.""" 337 | # This might raise EnvironmentError (if setup.cfg is missing), or 338 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 339 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 340 | # the top of versioneer.py for instructions on writing your setup.cfg . 341 | setup_cfg = os.path.join(root, "setup.cfg") 342 | parser = configparser.SafeConfigParser() 343 | with open(setup_cfg, "r") as f: 344 | parser.readfp(f) 345 | VCS = parser.get("versioneer", "VCS") # mandatory 346 | 347 | def get(parser, name): 348 | if parser.has_option("versioneer", name): 349 | return parser.get("versioneer", name) 350 | return None 351 | cfg = VersioneerConfig() 352 | cfg.VCS = VCS 353 | cfg.style = get(parser, "style") or "" 354 | cfg.versionfile_source = get(parser, "versionfile_source") 355 | cfg.versionfile_build = get(parser, "versionfile_build") 356 | cfg.tag_prefix = get(parser, "tag_prefix") 357 | if cfg.tag_prefix in ("''", '""'): 358 | cfg.tag_prefix = "" 359 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 360 | cfg.verbose = get(parser, "verbose") 361 | return cfg 362 | 363 | 364 | class NotThisMethod(Exception): 365 | """Exception raised if a method is not valid for the current scenario.""" 366 | 367 | # these dictionaries contain VCS-specific tools 368 | LONG_VERSION_PY = {} 369 | HANDLERS = {} 370 | 371 | 372 | def register_vcs_handler(vcs, method): # decorator 373 | """Decorator to mark a method as the handler for a particular VCS.""" 374 | def decorate(f): 375 | """Store f in HANDLERS[vcs][method].""" 376 | if vcs not in HANDLERS: 377 | HANDLERS[vcs] = {} 378 | HANDLERS[vcs][method] = f 379 | return f 380 | return decorate 381 | 382 | 383 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 384 | env=None): 385 | """Call the given command(s).""" 386 | assert isinstance(commands, list) 387 | p = None 388 | for c in commands: 389 | try: 390 | dispcmd = str([c] + args) 391 | # remember shell=False, so use git.cmd on windows, not just git 392 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 393 | stdout=subprocess.PIPE, 394 | stderr=(subprocess.PIPE if hide_stderr 395 | else None)) 396 | break 397 | except EnvironmentError: 398 | e = sys.exc_info()[1] 399 | if e.errno == errno.ENOENT: 400 | continue 401 | if verbose: 402 | print("unable to run %s" % dispcmd) 403 | print(e) 404 | return None, None 405 | else: 406 | if verbose: 407 | print("unable to find command, tried %s" % (commands,)) 408 | return None, None 409 | stdout = p.communicate()[0].strip() 410 | if sys.version_info[0] >= 3: 411 | stdout = stdout.decode() 412 | if p.returncode != 0: 413 | if verbose: 414 | print("unable to run %s (error)" % dispcmd) 415 | print("stdout was %s" % stdout) 416 | return None, p.returncode 417 | return stdout, p.returncode 418 | LONG_VERSION_PY['git'] = ''' 419 | # This file helps to compute a version number in source trees obtained from 420 | # git-archive tarball (such as those provided by githubs download-from-tag 421 | # feature). Distribution tarballs (built by setup.py sdist) and build 422 | # directories (produced by setup.py build) will contain a much shorter file 423 | # that just contains the computed version number. 424 | 425 | # This file is released into the public domain. Generated by 426 | # versioneer-0.17 (https://github.com/warner/python-versioneer) 427 | 428 | """Git implementation of _version.py.""" 429 | 430 | import errno 431 | import os 432 | import re 433 | import subprocess 434 | import sys 435 | 436 | 437 | def get_keywords(): 438 | """Get the keywords needed to look up the version information.""" 439 | # these strings will be replaced by git during git-archive. 440 | # setup.py/versioneer.py will grep for the variable names, so they must 441 | # each be defined on a line of their own. _version.py will just call 442 | # get_keywords(). 443 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 444 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 445 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" 446 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 447 | return keywords 448 | 449 | 450 | class VersioneerConfig: 451 | """Container for Versioneer configuration parameters.""" 452 | 453 | 454 | def get_config(): 455 | """Create, populate and return the VersioneerConfig() object.""" 456 | # these strings are filled in when 'setup.py versioneer' creates 457 | # _version.py 458 | cfg = VersioneerConfig() 459 | cfg.VCS = "git" 460 | cfg.style = "%(STYLE)s" 461 | cfg.tag_prefix = "%(TAG_PREFIX)s" 462 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 463 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 464 | cfg.verbose = False 465 | return cfg 466 | 467 | 468 | class NotThisMethod(Exception): 469 | """Exception raised if a method is not valid for the current scenario.""" 470 | 471 | 472 | LONG_VERSION_PY = {} 473 | HANDLERS = {} 474 | 475 | 476 | def register_vcs_handler(vcs, method): # decorator 477 | """Decorator to mark a method as the handler for a particular VCS.""" 478 | def decorate(f): 479 | """Store f in HANDLERS[vcs][method].""" 480 | if vcs not in HANDLERS: 481 | HANDLERS[vcs] = {} 482 | HANDLERS[vcs][method] = f 483 | return f 484 | return decorate 485 | 486 | 487 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 488 | env=None): 489 | """Call the given command(s).""" 490 | assert isinstance(commands, list) 491 | p = None 492 | for c in commands: 493 | try: 494 | dispcmd = str([c] + args) 495 | # remember shell=False, so use git.cmd on windows, not just git 496 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 497 | stdout=subprocess.PIPE, 498 | stderr=(subprocess.PIPE if hide_stderr 499 | else None)) 500 | break 501 | except EnvironmentError: 502 | e = sys.exc_info()[1] 503 | if e.errno == errno.ENOENT: 504 | continue 505 | if verbose: 506 | print("unable to run %%s" %% dispcmd) 507 | print(e) 508 | return None, None 509 | else: 510 | if verbose: 511 | print("unable to find command, tried %%s" %% (commands,)) 512 | return None, None 513 | stdout = p.communicate()[0].strip() 514 | if sys.version_info[0] >= 3: 515 | stdout = stdout.decode() 516 | if p.returncode != 0: 517 | if verbose: 518 | print("unable to run %%s (error)" %% dispcmd) 519 | print("stdout was %%s" %% stdout) 520 | return None, p.returncode 521 | return stdout, p.returncode 522 | 523 | 524 | def versions_from_parentdir(parentdir_prefix, root, verbose): 525 | """Try to determine the version from the parent directory name. 526 | 527 | Source tarballs conventionally unpack into a directory that includes both 528 | the project name and a version string. We will also support searching up 529 | two directory levels for an appropriately named parent directory 530 | """ 531 | rootdirs = [] 532 | 533 | for i in range(3): 534 | dirname = os.path.basename(root) 535 | if dirname.startswith(parentdir_prefix): 536 | return {"version": dirname[len(parentdir_prefix):], 537 | "full-revisionid": None, 538 | "dirty": False, "error": None, "date": None} 539 | else: 540 | rootdirs.append(root) 541 | root = os.path.dirname(root) # up a level 542 | 543 | if verbose: 544 | print("Tried directories %%s but none started with prefix %%s" %% 545 | (str(rootdirs), parentdir_prefix)) 546 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 547 | 548 | 549 | @register_vcs_handler("git", "get_keywords") 550 | def git_get_keywords(versionfile_abs): 551 | """Extract version information from the given file.""" 552 | # the code embedded in _version.py can just fetch the value of these 553 | # keywords. When used from setup.py, we don't want to import _version.py, 554 | # so we do it with a regexp instead. This function is not used from 555 | # _version.py. 556 | keywords = {} 557 | try: 558 | f = open(versionfile_abs, "r") 559 | for line in f.readlines(): 560 | if line.strip().startswith("git_refnames ="): 561 | mo = re.search(r'=\s*"(.*)"', line) 562 | if mo: 563 | keywords["refnames"] = mo.group(1) 564 | if line.strip().startswith("git_full ="): 565 | mo = re.search(r'=\s*"(.*)"', line) 566 | if mo: 567 | keywords["full"] = mo.group(1) 568 | if line.strip().startswith("git_date ="): 569 | mo = re.search(r'=\s*"(.*)"', line) 570 | if mo: 571 | keywords["date"] = mo.group(1) 572 | f.close() 573 | except EnvironmentError: 574 | pass 575 | return keywords 576 | 577 | 578 | @register_vcs_handler("git", "keywords") 579 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 580 | """Get version information from git keywords.""" 581 | if not keywords: 582 | raise NotThisMethod("no keywords at all, weird") 583 | date = keywords.get("date") 584 | if date is not None: 585 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant 586 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 587 | # -like" string, which we must then edit to make compliant), because 588 | # it's been around since git-1.5.3, and it's too difficult to 589 | # discover which version we're using, or to work around using an 590 | # older one. 591 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 592 | refnames = keywords["refnames"].strip() 593 | if refnames.startswith("$Format"): 594 | if verbose: 595 | print("keywords are unexpanded, not using") 596 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 597 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 598 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 599 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 600 | TAG = "tag: " 601 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 602 | if not tags: 603 | # Either we're using git < 1.8.3, or there really are no tags. We use 604 | # a heuristic: assume all version tags have a digit. The old git %%d 605 | # expansion behaves like git log --decorate=short and strips out the 606 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 607 | # between branches and tags. By ignoring refnames without digits, we 608 | # filter out many common branch names like "release" and 609 | # "stabilization", as well as "HEAD" and "master". 610 | tags = set([r for r in refs if re.search(r'\d', r)]) 611 | if verbose: 612 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) 613 | if verbose: 614 | print("likely tags: %%s" %% ",".join(sorted(tags))) 615 | for ref in sorted(tags): 616 | # sorting will prefer e.g. "2.0" over "2.0rc1" 617 | if ref.startswith(tag_prefix): 618 | r = ref[len(tag_prefix):] 619 | if verbose: 620 | print("picking %%s" %% r) 621 | return {"version": r, 622 | "full-revisionid": keywords["full"].strip(), 623 | "dirty": False, "error": None, 624 | "date": date} 625 | # no suitable tags, so version is "0+unknown", but full hex is still there 626 | if verbose: 627 | print("no suitable tags, using unknown + full revision id") 628 | return {"version": "0+unknown", 629 | "full-revisionid": keywords["full"].strip(), 630 | "dirty": False, "error": "no suitable tags", "date": None} 631 | 632 | 633 | @register_vcs_handler("git", "pieces_from_vcs") 634 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 635 | """Get version from 'git describe' in the root of the source tree. 636 | 637 | This only gets called if the git-archive 'subst' keywords were *not* 638 | expanded, and _version.py hasn't already been rewritten with a short 639 | version string, meaning we're inside a checked out source tree. 640 | """ 641 | GITS = ["git"] 642 | if sys.platform == "win32": 643 | GITS = ["git.cmd", "git.exe"] 644 | 645 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 646 | hide_stderr=True) 647 | if rc != 0: 648 | if verbose: 649 | print("Directory %%s not under git control" %% root) 650 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 651 | 652 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 653 | # if there isn't one, this yields HEX[-dirty] (no NUM) 654 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 655 | "--always", "--long", 656 | "--match", "%%s*" %% tag_prefix], 657 | cwd=root) 658 | # --long was added in git-1.5.5 659 | if describe_out is None: 660 | raise NotThisMethod("'git describe' failed") 661 | describe_out = describe_out.strip() 662 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 663 | if full_out is None: 664 | raise NotThisMethod("'git rev-parse' failed") 665 | full_out = full_out.strip() 666 | 667 | pieces = {} 668 | pieces["long"] = full_out 669 | pieces["short"] = full_out[:7] # maybe improved later 670 | pieces["error"] = None 671 | 672 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 673 | # TAG might have hyphens. 674 | git_describe = describe_out 675 | 676 | # look for -dirty suffix 677 | dirty = git_describe.endswith("-dirty") 678 | pieces["dirty"] = dirty 679 | if dirty: 680 | git_describe = git_describe[:git_describe.rindex("-dirty")] 681 | 682 | # now we have TAG-NUM-gHEX or HEX 683 | 684 | if "-" in git_describe: 685 | # TAG-NUM-gHEX 686 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 687 | if not mo: 688 | # unparseable. Maybe git-describe is misbehaving? 689 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 690 | %% describe_out) 691 | return pieces 692 | 693 | # tag 694 | full_tag = mo.group(1) 695 | if not full_tag.startswith(tag_prefix): 696 | if verbose: 697 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 698 | print(fmt %% (full_tag, tag_prefix)) 699 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 700 | %% (full_tag, tag_prefix)) 701 | return pieces 702 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 703 | 704 | # distance: number of commits since tag 705 | pieces["distance"] = int(mo.group(2)) 706 | 707 | # commit: short hex revision ID 708 | pieces["short"] = mo.group(3) 709 | 710 | else: 711 | # HEX: no tags 712 | pieces["closest-tag"] = None 713 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 714 | cwd=root) 715 | pieces["distance"] = int(count_out) # total number of commits 716 | 717 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 718 | date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], 719 | cwd=root)[0].strip() 720 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 721 | 722 | return pieces 723 | 724 | 725 | def plus_or_dot(pieces): 726 | """Return a + if we don't already have one, else return a .""" 727 | if "+" in pieces.get("closest-tag", ""): 728 | return "." 729 | return "+" 730 | 731 | 732 | def render_pep440(pieces): 733 | """Build up version string, with post-release "local version identifier". 734 | 735 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 736 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 737 | 738 | Exceptions: 739 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 740 | """ 741 | if pieces["closest-tag"]: 742 | rendered = pieces["closest-tag"] 743 | if pieces["distance"] or pieces["dirty"]: 744 | rendered += plus_or_dot(pieces) 745 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 746 | if pieces["dirty"]: 747 | rendered += ".dirty" 748 | else: 749 | # exception #1 750 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 751 | pieces["short"]) 752 | if pieces["dirty"]: 753 | rendered += ".dirty" 754 | return rendered 755 | 756 | 757 | def render_pep440_pre(pieces): 758 | """TAG[.post.devDISTANCE] -- No -dirty. 759 | 760 | Exceptions: 761 | 1: no tags. 0.post.devDISTANCE 762 | """ 763 | if pieces["closest-tag"]: 764 | rendered = pieces["closest-tag"] 765 | if pieces["distance"]: 766 | rendered += ".post.dev%%d" %% pieces["distance"] 767 | else: 768 | # exception #1 769 | rendered = "0.post.dev%%d" %% pieces["distance"] 770 | return rendered 771 | 772 | 773 | def render_pep440_post(pieces): 774 | """TAG[.postDISTANCE[.dev0]+gHEX] . 775 | 776 | The ".dev0" means dirty. Note that .dev0 sorts backwards 777 | (a dirty tree will appear "older" than the corresponding clean one), 778 | but you shouldn't be releasing software with -dirty anyways. 779 | 780 | Exceptions: 781 | 1: no tags. 0.postDISTANCE[.dev0] 782 | """ 783 | if pieces["closest-tag"]: 784 | rendered = pieces["closest-tag"] 785 | if pieces["distance"] or pieces["dirty"]: 786 | rendered += ".post%%d" %% pieces["distance"] 787 | if pieces["dirty"]: 788 | rendered += ".dev0" 789 | rendered += plus_or_dot(pieces) 790 | rendered += "g%%s" %% pieces["short"] 791 | else: 792 | # exception #1 793 | rendered = "0.post%%d" %% pieces["distance"] 794 | if pieces["dirty"]: 795 | rendered += ".dev0" 796 | rendered += "+g%%s" %% pieces["short"] 797 | return rendered 798 | 799 | 800 | def render_pep440_old(pieces): 801 | """TAG[.postDISTANCE[.dev0]] . 802 | 803 | The ".dev0" means dirty. 804 | 805 | Eexceptions: 806 | 1: no tags. 0.postDISTANCE[.dev0] 807 | """ 808 | if pieces["closest-tag"]: 809 | rendered = pieces["closest-tag"] 810 | if pieces["distance"] or pieces["dirty"]: 811 | rendered += ".post%%d" %% pieces["distance"] 812 | if pieces["dirty"]: 813 | rendered += ".dev0" 814 | else: 815 | # exception #1 816 | rendered = "0.post%%d" %% pieces["distance"] 817 | if pieces["dirty"]: 818 | rendered += ".dev0" 819 | return rendered 820 | 821 | 822 | def render_git_describe(pieces): 823 | """TAG[-DISTANCE-gHEX][-dirty]. 824 | 825 | Like 'git describe --tags --dirty --always'. 826 | 827 | Exceptions: 828 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 829 | """ 830 | if pieces["closest-tag"]: 831 | rendered = pieces["closest-tag"] 832 | if pieces["distance"]: 833 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 834 | else: 835 | # exception #1 836 | rendered = pieces["short"] 837 | if pieces["dirty"]: 838 | rendered += "-dirty" 839 | return rendered 840 | 841 | 842 | def render_git_describe_long(pieces): 843 | """TAG-DISTANCE-gHEX[-dirty]. 844 | 845 | Like 'git describe --tags --dirty --always -long'. 846 | The distance/hash is unconditional. 847 | 848 | Exceptions: 849 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 850 | """ 851 | if pieces["closest-tag"]: 852 | rendered = pieces["closest-tag"] 853 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 854 | else: 855 | # exception #1 856 | rendered = pieces["short"] 857 | if pieces["dirty"]: 858 | rendered += "-dirty" 859 | return rendered 860 | 861 | 862 | def render(pieces, style): 863 | """Render the given version pieces into the requested style.""" 864 | if pieces["error"]: 865 | return {"version": "unknown", 866 | "full-revisionid": pieces.get("long"), 867 | "dirty": None, 868 | "error": pieces["error"], 869 | "date": None} 870 | 871 | if not style or style == "default": 872 | style = "pep440" # the default 873 | 874 | if style == "pep440": 875 | rendered = render_pep440(pieces) 876 | elif style == "pep440-pre": 877 | rendered = render_pep440_pre(pieces) 878 | elif style == "pep440-post": 879 | rendered = render_pep440_post(pieces) 880 | elif style == "pep440-old": 881 | rendered = render_pep440_old(pieces) 882 | elif style == "git-describe": 883 | rendered = render_git_describe(pieces) 884 | elif style == "git-describe-long": 885 | rendered = render_git_describe_long(pieces) 886 | else: 887 | raise ValueError("unknown style '%%s'" %% style) 888 | 889 | return {"version": rendered, "full-revisionid": pieces["long"], 890 | "dirty": pieces["dirty"], "error": None, 891 | "date": pieces.get("date")} 892 | 893 | 894 | def get_versions(): 895 | """Get version information or return default if unable to do so.""" 896 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 897 | # __file__, we can work backwards from there to the root. Some 898 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 899 | # case we can only use expanded keywords. 900 | 901 | cfg = get_config() 902 | verbose = cfg.verbose 903 | 904 | try: 905 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 906 | verbose) 907 | except NotThisMethod: 908 | pass 909 | 910 | try: 911 | root = os.path.realpath(__file__) 912 | # versionfile_source is the relative path from the top of the source 913 | # tree (where the .git directory might live) to this file. Invert 914 | # this to find the root from __file__. 915 | for i in cfg.versionfile_source.split('/'): 916 | root = os.path.dirname(root) 917 | except NameError: 918 | return {"version": "0+unknown", "full-revisionid": None, 919 | "dirty": None, 920 | "error": "unable to find root of source tree", 921 | "date": None} 922 | 923 | try: 924 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 925 | return render(pieces, cfg.style) 926 | except NotThisMethod: 927 | pass 928 | 929 | try: 930 | if cfg.parentdir_prefix: 931 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 932 | except NotThisMethod: 933 | pass 934 | 935 | return {"version": "0+unknown", "full-revisionid": None, 936 | "dirty": None, 937 | "error": "unable to compute version", "date": None} 938 | ''' 939 | 940 | 941 | @register_vcs_handler("git", "get_keywords") 942 | def git_get_keywords(versionfile_abs): 943 | """Extract version information from the given file.""" 944 | # the code embedded in _version.py can just fetch the value of these 945 | # keywords. When used from setup.py, we don't want to import _version.py, 946 | # so we do it with a regexp instead. This function is not used from 947 | # _version.py. 948 | keywords = {} 949 | try: 950 | f = open(versionfile_abs, "r") 951 | for line in f.readlines(): 952 | if line.strip().startswith("git_refnames ="): 953 | mo = re.search(r'=\s*"(.*)"', line) 954 | if mo: 955 | keywords["refnames"] = mo.group(1) 956 | if line.strip().startswith("git_full ="): 957 | mo = re.search(r'=\s*"(.*)"', line) 958 | if mo: 959 | keywords["full"] = mo.group(1) 960 | if line.strip().startswith("git_date ="): 961 | mo = re.search(r'=\s*"(.*)"', line) 962 | if mo: 963 | keywords["date"] = mo.group(1) 964 | f.close() 965 | except EnvironmentError: 966 | pass 967 | return keywords 968 | 969 | 970 | @register_vcs_handler("git", "keywords") 971 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 972 | """Get version information from git keywords.""" 973 | if not keywords: 974 | raise NotThisMethod("no keywords at all, weird") 975 | date = keywords.get("date") 976 | if date is not None: 977 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 978 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 979 | # -like" string, which we must then edit to make compliant), because 980 | # it's been around since git-1.5.3, and it's too difficult to 981 | # discover which version we're using, or to work around using an 982 | # older one. 983 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 984 | refnames = keywords["refnames"].strip() 985 | if refnames.startswith("$Format"): 986 | if verbose: 987 | print("keywords are unexpanded, not using") 988 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 989 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 990 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 991 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 992 | TAG = "tag: " 993 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 994 | if not tags: 995 | # Either we're using git < 1.8.3, or there really are no tags. We use 996 | # a heuristic: assume all version tags have a digit. The old git %d 997 | # expansion behaves like git log --decorate=short and strips out the 998 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 999 | # between branches and tags. By ignoring refnames without digits, we 1000 | # filter out many common branch names like "release" and 1001 | # "stabilization", as well as "HEAD" and "master". 1002 | tags = set([r for r in refs if re.search(r'\d', r)]) 1003 | if verbose: 1004 | print("discarding '%s', no digits" % ",".join(refs - tags)) 1005 | if verbose: 1006 | print("likely tags: %s" % ",".join(sorted(tags))) 1007 | for ref in sorted(tags): 1008 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1009 | if ref.startswith(tag_prefix): 1010 | r = ref[len(tag_prefix):] 1011 | if verbose: 1012 | print("picking %s" % r) 1013 | return {"version": r, 1014 | "full-revisionid": keywords["full"].strip(), 1015 | "dirty": False, "error": None, 1016 | "date": date} 1017 | # no suitable tags, so version is "0+unknown", but full hex is still there 1018 | if verbose: 1019 | print("no suitable tags, using unknown + full revision id") 1020 | return {"version": "0+unknown", 1021 | "full-revisionid": keywords["full"].strip(), 1022 | "dirty": False, "error": "no suitable tags", "date": None} 1023 | 1024 | 1025 | @register_vcs_handler("git", "pieces_from_vcs") 1026 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1027 | """Get version from 'git describe' in the root of the source tree. 1028 | 1029 | This only gets called if the git-archive 'subst' keywords were *not* 1030 | expanded, and _version.py hasn't already been rewritten with a short 1031 | version string, meaning we're inside a checked out source tree. 1032 | """ 1033 | GITS = ["git"] 1034 | if sys.platform == "win32": 1035 | GITS = ["git.cmd", "git.exe"] 1036 | 1037 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 1038 | hide_stderr=True) 1039 | if rc != 0: 1040 | if verbose: 1041 | print("Directory %s not under git control" % root) 1042 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 1043 | 1044 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1045 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1046 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 1047 | "--always", "--long", 1048 | "--match", "%s*" % tag_prefix], 1049 | cwd=root) 1050 | # --long was added in git-1.5.5 1051 | if describe_out is None: 1052 | raise NotThisMethod("'git describe' failed") 1053 | describe_out = describe_out.strip() 1054 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1055 | if full_out is None: 1056 | raise NotThisMethod("'git rev-parse' failed") 1057 | full_out = full_out.strip() 1058 | 1059 | pieces = {} 1060 | pieces["long"] = full_out 1061 | pieces["short"] = full_out[:7] # maybe improved later 1062 | pieces["error"] = None 1063 | 1064 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1065 | # TAG might have hyphens. 1066 | git_describe = describe_out 1067 | 1068 | # look for -dirty suffix 1069 | dirty = git_describe.endswith("-dirty") 1070 | pieces["dirty"] = dirty 1071 | if dirty: 1072 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1073 | 1074 | # now we have TAG-NUM-gHEX or HEX 1075 | 1076 | if "-" in git_describe: 1077 | # TAG-NUM-gHEX 1078 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1079 | if not mo: 1080 | # unparseable. Maybe git-describe is misbehaving? 1081 | pieces["error"] = ("unable to parse git-describe output: '%s'" 1082 | % describe_out) 1083 | return pieces 1084 | 1085 | # tag 1086 | full_tag = mo.group(1) 1087 | if not full_tag.startswith(tag_prefix): 1088 | if verbose: 1089 | fmt = "tag '%s' doesn't start with prefix '%s'" 1090 | print(fmt % (full_tag, tag_prefix)) 1091 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 1092 | % (full_tag, tag_prefix)) 1093 | return pieces 1094 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1095 | 1096 | # distance: number of commits since tag 1097 | pieces["distance"] = int(mo.group(2)) 1098 | 1099 | # commit: short hex revision ID 1100 | pieces["short"] = mo.group(3) 1101 | 1102 | else: 1103 | # HEX: no tags 1104 | pieces["closest-tag"] = None 1105 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 1106 | cwd=root) 1107 | pieces["distance"] = int(count_out) # total number of commits 1108 | 1109 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 1110 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 1111 | cwd=root)[0].strip() 1112 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1113 | 1114 | return pieces 1115 | 1116 | 1117 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1118 | """Git-specific installation logic for Versioneer. 1119 | 1120 | For Git, this means creating/changing .gitattributes to mark _version.py 1121 | for export-subst keyword substitution. 1122 | """ 1123 | GITS = ["git"] 1124 | if sys.platform == "win32": 1125 | GITS = ["git.cmd", "git.exe"] 1126 | files = [manifest_in, versionfile_source] 1127 | if ipy: 1128 | files.append(ipy) 1129 | try: 1130 | me = __file__ 1131 | if me.endswith(".pyc") or me.endswith(".pyo"): 1132 | me = os.path.splitext(me)[0] + ".py" 1133 | versioneer_file = os.path.relpath(me) 1134 | except NameError: 1135 | versioneer_file = "versioneer.py" 1136 | files.append(versioneer_file) 1137 | present = False 1138 | try: 1139 | f = open(".gitattributes", "r") 1140 | for line in f.readlines(): 1141 | if line.strip().startswith(versionfile_source): 1142 | if "export-subst" in line.strip().split()[1:]: 1143 | present = True 1144 | f.close() 1145 | except EnvironmentError: 1146 | pass 1147 | if not present: 1148 | f = open(".gitattributes", "a+") 1149 | f.write("%s export-subst\n" % versionfile_source) 1150 | f.close() 1151 | files.append(".gitattributes") 1152 | run_command(GITS, ["add", "--"] + files) 1153 | 1154 | 1155 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1156 | """Try to determine the version from the parent directory name. 1157 | 1158 | Source tarballs conventionally unpack into a directory that includes both 1159 | the project name and a version string. We will also support searching up 1160 | two directory levels for an appropriately named parent directory 1161 | """ 1162 | rootdirs = [] 1163 | 1164 | for i in range(3): 1165 | dirname = os.path.basename(root) 1166 | if dirname.startswith(parentdir_prefix): 1167 | return {"version": dirname[len(parentdir_prefix):], 1168 | "full-revisionid": None, 1169 | "dirty": False, "error": None, "date": None} 1170 | else: 1171 | rootdirs.append(root) 1172 | root = os.path.dirname(root) # up a level 1173 | 1174 | if verbose: 1175 | print("Tried directories %s but none started with prefix %s" % 1176 | (str(rootdirs), parentdir_prefix)) 1177 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1178 | 1179 | SHORT_VERSION_PY = """ 1180 | # This file was generated by 'versioneer.py' (0.17) from 1181 | # revision-control system data, or from the parent directory name of an 1182 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1183 | # of this file. 1184 | 1185 | import json 1186 | 1187 | version_json = ''' 1188 | %s 1189 | ''' # END VERSION_JSON 1190 | 1191 | 1192 | def get_versions(): 1193 | return json.loads(version_json) 1194 | """ 1195 | 1196 | 1197 | def versions_from_file(filename): 1198 | """Try to determine the version from _version.py if present.""" 1199 | try: 1200 | with open(filename) as f: 1201 | contents = f.read() 1202 | except EnvironmentError: 1203 | raise NotThisMethod("unable to read _version.py") 1204 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", 1205 | contents, re.M | re.S) 1206 | if not mo: 1207 | mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", 1208 | contents, re.M | re.S) 1209 | if not mo: 1210 | raise NotThisMethod("no version_json in _version.py") 1211 | return json.loads(mo.group(1)) 1212 | 1213 | 1214 | def write_to_version_file(filename, versions): 1215 | """Write the given version number to the given _version.py file.""" 1216 | os.unlink(filename) 1217 | contents = json.dumps(versions, sort_keys=True, 1218 | indent=1, separators=(",", ": ")) 1219 | with open(filename, "w") as f: 1220 | f.write(SHORT_VERSION_PY % contents) 1221 | 1222 | print("set %s to '%s'" % (filename, versions["version"])) 1223 | 1224 | 1225 | def plus_or_dot(pieces): 1226 | """Return a + if we don't already have one, else return a .""" 1227 | if "+" in pieces.get("closest-tag", ""): 1228 | return "." 1229 | return "+" 1230 | 1231 | 1232 | def render_pep440(pieces): 1233 | """Build up version string, with post-release "local version identifier". 1234 | 1235 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1236 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1237 | 1238 | Exceptions: 1239 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1240 | """ 1241 | if pieces["closest-tag"]: 1242 | rendered = pieces["closest-tag"] 1243 | if pieces["distance"] or pieces["dirty"]: 1244 | rendered += plus_or_dot(pieces) 1245 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1246 | if pieces["dirty"]: 1247 | rendered += ".dirty" 1248 | else: 1249 | # exception #1 1250 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 1251 | pieces["short"]) 1252 | if pieces["dirty"]: 1253 | rendered += ".dirty" 1254 | return rendered 1255 | 1256 | 1257 | def render_pep440_pre(pieces): 1258 | """TAG[.post.devDISTANCE] -- No -dirty. 1259 | 1260 | Exceptions: 1261 | 1: no tags. 0.post.devDISTANCE 1262 | """ 1263 | if pieces["closest-tag"]: 1264 | rendered = pieces["closest-tag"] 1265 | if pieces["distance"]: 1266 | rendered += ".post.dev%d" % pieces["distance"] 1267 | else: 1268 | # exception #1 1269 | rendered = "0.post.dev%d" % pieces["distance"] 1270 | return rendered 1271 | 1272 | 1273 | def render_pep440_post(pieces): 1274 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1275 | 1276 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1277 | (a dirty tree will appear "older" than the corresponding clean one), 1278 | but you shouldn't be releasing software with -dirty anyways. 1279 | 1280 | Exceptions: 1281 | 1: no tags. 0.postDISTANCE[.dev0] 1282 | """ 1283 | if pieces["closest-tag"]: 1284 | rendered = pieces["closest-tag"] 1285 | if pieces["distance"] or pieces["dirty"]: 1286 | rendered += ".post%d" % pieces["distance"] 1287 | if pieces["dirty"]: 1288 | rendered += ".dev0" 1289 | rendered += plus_or_dot(pieces) 1290 | rendered += "g%s" % pieces["short"] 1291 | else: 1292 | # exception #1 1293 | rendered = "0.post%d" % pieces["distance"] 1294 | if pieces["dirty"]: 1295 | rendered += ".dev0" 1296 | rendered += "+g%s" % pieces["short"] 1297 | return rendered 1298 | 1299 | 1300 | def render_pep440_old(pieces): 1301 | """TAG[.postDISTANCE[.dev0]] . 1302 | 1303 | The ".dev0" means dirty. 1304 | 1305 | Eexceptions: 1306 | 1: no tags. 0.postDISTANCE[.dev0] 1307 | """ 1308 | if pieces["closest-tag"]: 1309 | rendered = pieces["closest-tag"] 1310 | if pieces["distance"] or pieces["dirty"]: 1311 | rendered += ".post%d" % pieces["distance"] 1312 | if pieces["dirty"]: 1313 | rendered += ".dev0" 1314 | else: 1315 | # exception #1 1316 | rendered = "0.post%d" % pieces["distance"] 1317 | if pieces["dirty"]: 1318 | rendered += ".dev0" 1319 | return rendered 1320 | 1321 | 1322 | def render_git_describe(pieces): 1323 | """TAG[-DISTANCE-gHEX][-dirty]. 1324 | 1325 | Like 'git describe --tags --dirty --always'. 1326 | 1327 | Exceptions: 1328 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1329 | """ 1330 | if pieces["closest-tag"]: 1331 | rendered = pieces["closest-tag"] 1332 | if pieces["distance"]: 1333 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1334 | else: 1335 | # exception #1 1336 | rendered = pieces["short"] 1337 | if pieces["dirty"]: 1338 | rendered += "-dirty" 1339 | return rendered 1340 | 1341 | 1342 | def render_git_describe_long(pieces): 1343 | """TAG-DISTANCE-gHEX[-dirty]. 1344 | 1345 | Like 'git describe --tags --dirty --always -long'. 1346 | The distance/hash is unconditional. 1347 | 1348 | Exceptions: 1349 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1350 | """ 1351 | if pieces["closest-tag"]: 1352 | rendered = pieces["closest-tag"] 1353 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1354 | else: 1355 | # exception #1 1356 | rendered = pieces["short"] 1357 | if pieces["dirty"]: 1358 | rendered += "-dirty" 1359 | return rendered 1360 | 1361 | 1362 | def render(pieces, style): 1363 | """Render the given version pieces into the requested style.""" 1364 | if pieces["error"]: 1365 | return {"version": "unknown", 1366 | "full-revisionid": pieces.get("long"), 1367 | "dirty": None, 1368 | "error": pieces["error"], 1369 | "date": None} 1370 | 1371 | if not style or style == "default": 1372 | style = "pep440" # the default 1373 | 1374 | if style == "pep440": 1375 | rendered = render_pep440(pieces) 1376 | elif style == "pep440-pre": 1377 | rendered = render_pep440_pre(pieces) 1378 | elif style == "pep440-post": 1379 | rendered = render_pep440_post(pieces) 1380 | elif style == "pep440-old": 1381 | rendered = render_pep440_old(pieces) 1382 | elif style == "git-describe": 1383 | rendered = render_git_describe(pieces) 1384 | elif style == "git-describe-long": 1385 | rendered = render_git_describe_long(pieces) 1386 | else: 1387 | raise ValueError("unknown style '%s'" % style) 1388 | 1389 | return {"version": rendered, "full-revisionid": pieces["long"], 1390 | "dirty": pieces["dirty"], "error": None, 1391 | "date": pieces.get("date")} 1392 | 1393 | 1394 | class VersioneerBadRootError(Exception): 1395 | """The project root directory is unknown or missing key files.""" 1396 | 1397 | 1398 | def get_versions(verbose=False): 1399 | """Get the project version from whatever source is available. 1400 | 1401 | Returns dict with two keys: 'version' and 'full'. 1402 | """ 1403 | if "versioneer" in sys.modules: 1404 | # see the discussion in cmdclass.py:get_cmdclass() 1405 | del sys.modules["versioneer"] 1406 | 1407 | root = get_root() 1408 | cfg = get_config_from_root(root) 1409 | 1410 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1411 | handlers = HANDLERS.get(cfg.VCS) 1412 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1413 | verbose = verbose or cfg.verbose 1414 | assert cfg.versionfile_source is not None, \ 1415 | "please set versioneer.versionfile_source" 1416 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1417 | 1418 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1419 | 1420 | # extract version from first of: _version.py, VCS command (e.g. 'git 1421 | # describe'), parentdir. This is meant to work for developers using a 1422 | # source checkout, for users of a tarball created by 'setup.py sdist', 1423 | # and for users of a tarball/zipball created by 'git archive' or github's 1424 | # download-from-tag feature or the equivalent in other VCSes. 1425 | 1426 | get_keywords_f = handlers.get("get_keywords") 1427 | from_keywords_f = handlers.get("keywords") 1428 | if get_keywords_f and from_keywords_f: 1429 | try: 1430 | keywords = get_keywords_f(versionfile_abs) 1431 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1432 | if verbose: 1433 | print("got version from expanded keyword %s" % ver) 1434 | return ver 1435 | except NotThisMethod: 1436 | pass 1437 | 1438 | try: 1439 | ver = versions_from_file(versionfile_abs) 1440 | if verbose: 1441 | print("got version from file %s %s" % (versionfile_abs, ver)) 1442 | return ver 1443 | except NotThisMethod: 1444 | pass 1445 | 1446 | from_vcs_f = handlers.get("pieces_from_vcs") 1447 | if from_vcs_f: 1448 | try: 1449 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1450 | ver = render(pieces, cfg.style) 1451 | if verbose: 1452 | print("got version from VCS %s" % ver) 1453 | return ver 1454 | except NotThisMethod: 1455 | pass 1456 | 1457 | try: 1458 | if cfg.parentdir_prefix: 1459 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1460 | if verbose: 1461 | print("got version from parentdir %s" % ver) 1462 | return ver 1463 | except NotThisMethod: 1464 | pass 1465 | 1466 | if verbose: 1467 | print("unable to compute version") 1468 | 1469 | return {"version": "0+unknown", "full-revisionid": None, 1470 | "dirty": None, "error": "unable to compute version", 1471 | "date": None} 1472 | 1473 | 1474 | def get_version(): 1475 | """Get the short version string for this project.""" 1476 | return get_versions()["version"] 1477 | 1478 | 1479 | def get_cmdclass(): 1480 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" 1481 | if "versioneer" in sys.modules: 1482 | del sys.modules["versioneer"] 1483 | # this fixes the "python setup.py develop" case (also 'install' and 1484 | # 'easy_install .'), in which subdependencies of the main project are 1485 | # built (using setup.py bdist_egg) in the same python process. Assume 1486 | # a main project A and a dependency B, which use different versions 1487 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1488 | # sys.modules by the time B's setup.py is executed, causing B to run 1489 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1490 | # sandbox that restores sys.modules to it's pre-build state, so the 1491 | # parent is protected against the child's "import versioneer". By 1492 | # removing ourselves from sys.modules here, before the child build 1493 | # happens, we protect the child from the parent's versioneer too. 1494 | # Also see https://github.com/warner/python-versioneer/issues/52 1495 | 1496 | cmds = {} 1497 | 1498 | # we add "version" to both distutils and setuptools 1499 | from distutils.core import Command 1500 | 1501 | class cmd_version(Command): 1502 | description = "report generated version string" 1503 | user_options = [] 1504 | boolean_options = [] 1505 | 1506 | def initialize_options(self): 1507 | pass 1508 | 1509 | def finalize_options(self): 1510 | pass 1511 | 1512 | def run(self): 1513 | vers = get_versions(verbose=True) 1514 | print("Version: %s" % vers["version"]) 1515 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1516 | print(" dirty: %s" % vers.get("dirty")) 1517 | print(" date: %s" % vers.get("date")) 1518 | if vers["error"]: 1519 | print(" error: %s" % vers["error"]) 1520 | cmds["version"] = cmd_version 1521 | 1522 | # we override "build_py" in both distutils and setuptools 1523 | # 1524 | # most invocation pathways end up running build_py: 1525 | # distutils/build -> build_py 1526 | # distutils/install -> distutils/build ->.. 1527 | # setuptools/bdist_wheel -> distutils/install ->.. 1528 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1529 | # setuptools/install -> bdist_egg ->.. 1530 | # setuptools/develop -> ? 1531 | # pip install: 1532 | # copies source tree to a tempdir before running egg_info/etc 1533 | # if .git isn't copied too, 'git describe' will fail 1534 | # then does setup.py bdist_wheel, or sometimes setup.py install 1535 | # setup.py egg_info -> ? 1536 | 1537 | # we override different "build_py" commands for both environments 1538 | if "setuptools" in sys.modules: 1539 | from setuptools.command.build_py import build_py as _build_py 1540 | else: 1541 | from distutils.command.build_py import build_py as _build_py 1542 | 1543 | class cmd_build_py(_build_py): 1544 | def run(self): 1545 | root = get_root() 1546 | cfg = get_config_from_root(root) 1547 | versions = get_versions() 1548 | _build_py.run(self) 1549 | # now locate _version.py in the new build/ directory and replace 1550 | # it with an updated value 1551 | if cfg.versionfile_build: 1552 | target_versionfile = os.path.join(self.build_lib, 1553 | cfg.versionfile_build) 1554 | print("UPDATING %s" % target_versionfile) 1555 | write_to_version_file(target_versionfile, versions) 1556 | cmds["build_py"] = cmd_build_py 1557 | 1558 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1559 | from cx_Freeze.dist import build_exe as _build_exe 1560 | # nczeczulin reports that py2exe won't like the pep440-style string 1561 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. 1562 | # setup(console=[{ 1563 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION 1564 | # "product_version": versioneer.get_version(), 1565 | # ... 1566 | 1567 | class cmd_build_exe(_build_exe): 1568 | def run(self): 1569 | root = get_root() 1570 | cfg = get_config_from_root(root) 1571 | versions = get_versions() 1572 | target_versionfile = cfg.versionfile_source 1573 | print("UPDATING %s" % target_versionfile) 1574 | write_to_version_file(target_versionfile, versions) 1575 | 1576 | _build_exe.run(self) 1577 | os.unlink(target_versionfile) 1578 | with open(cfg.versionfile_source, "w") as f: 1579 | LONG = LONG_VERSION_PY[cfg.VCS] 1580 | f.write(LONG % 1581 | {"DOLLAR": "$", 1582 | "STYLE": cfg.style, 1583 | "TAG_PREFIX": cfg.tag_prefix, 1584 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1585 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1586 | }) 1587 | cmds["build_exe"] = cmd_build_exe 1588 | del cmds["build_py"] 1589 | 1590 | if 'py2exe' in sys.modules: # py2exe enabled? 1591 | try: 1592 | from py2exe.distutils_buildexe import py2exe as _py2exe # py3 1593 | except ImportError: 1594 | from py2exe.build_exe import py2exe as _py2exe # py2 1595 | 1596 | class cmd_py2exe(_py2exe): 1597 | def run(self): 1598 | root = get_root() 1599 | cfg = get_config_from_root(root) 1600 | versions = get_versions() 1601 | target_versionfile = cfg.versionfile_source 1602 | print("UPDATING %s" % target_versionfile) 1603 | write_to_version_file(target_versionfile, versions) 1604 | 1605 | _py2exe.run(self) 1606 | os.unlink(target_versionfile) 1607 | with open(cfg.versionfile_source, "w") as f: 1608 | LONG = LONG_VERSION_PY[cfg.VCS] 1609 | f.write(LONG % 1610 | {"DOLLAR": "$", 1611 | "STYLE": cfg.style, 1612 | "TAG_PREFIX": cfg.tag_prefix, 1613 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1614 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1615 | }) 1616 | cmds["py2exe"] = cmd_py2exe 1617 | 1618 | # we override different "sdist" commands for both environments 1619 | if "setuptools" in sys.modules: 1620 | from setuptools.command.sdist import sdist as _sdist 1621 | else: 1622 | from distutils.command.sdist import sdist as _sdist 1623 | 1624 | class cmd_sdist(_sdist): 1625 | def run(self): 1626 | versions = get_versions() 1627 | self._versioneer_generated_versions = versions 1628 | # unless we update this, the command will keep using the old 1629 | # version 1630 | self.distribution.metadata.version = versions["version"] 1631 | return _sdist.run(self) 1632 | 1633 | def make_release_tree(self, base_dir, files): 1634 | root = get_root() 1635 | cfg = get_config_from_root(root) 1636 | _sdist.make_release_tree(self, base_dir, files) 1637 | # now locate _version.py in the new base_dir directory 1638 | # (remembering that it may be a hardlink) and replace it with an 1639 | # updated value 1640 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1641 | print("UPDATING %s" % target_versionfile) 1642 | write_to_version_file(target_versionfile, 1643 | self._versioneer_generated_versions) 1644 | cmds["sdist"] = cmd_sdist 1645 | 1646 | return cmds 1647 | 1648 | 1649 | CONFIG_ERROR = """ 1650 | setup.cfg is missing the necessary Versioneer configuration. You need 1651 | a section like: 1652 | 1653 | [versioneer] 1654 | VCS = git 1655 | style = pep440 1656 | versionfile_source = src/myproject/_version.py 1657 | versionfile_build = myproject/_version.py 1658 | tag_prefix = 1659 | parentdir_prefix = myproject- 1660 | 1661 | You will also need to edit your setup.py to use the results: 1662 | 1663 | import versioneer 1664 | setup(version=versioneer.get_version(), 1665 | cmdclass=versioneer.get_cmdclass(), ...) 1666 | 1667 | Please read the docstring in ./versioneer.py for configuration instructions, 1668 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1669 | """ 1670 | 1671 | SAMPLE_CONFIG = """ 1672 | # See the docstring in versioneer.py for instructions. Note that you must 1673 | # re-run 'versioneer.py setup' after changing this section, and commit the 1674 | # resulting files. 1675 | 1676 | [versioneer] 1677 | #VCS = git 1678 | #style = pep440 1679 | #versionfile_source = 1680 | #versionfile_build = 1681 | #tag_prefix = 1682 | #parentdir_prefix = 1683 | 1684 | """ 1685 | 1686 | INIT_PY_SNIPPET = """ 1687 | from ._version import get_versions 1688 | __version__ = get_versions()['version'] 1689 | del get_versions 1690 | """ 1691 | 1692 | 1693 | def do_setup(): 1694 | """Main VCS-independent setup function for installing Versioneer.""" 1695 | root = get_root() 1696 | try: 1697 | cfg = get_config_from_root(root) 1698 | except (EnvironmentError, configparser.NoSectionError, 1699 | configparser.NoOptionError) as e: 1700 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1701 | print("Adding sample versioneer config to setup.cfg", 1702 | file=sys.stderr) 1703 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1704 | f.write(SAMPLE_CONFIG) 1705 | print(CONFIG_ERROR, file=sys.stderr) 1706 | return 1 1707 | 1708 | print(" creating %s" % cfg.versionfile_source) 1709 | with open(cfg.versionfile_source, "w") as f: 1710 | LONG = LONG_VERSION_PY[cfg.VCS] 1711 | f.write(LONG % {"DOLLAR": "$", 1712 | "STYLE": cfg.style, 1713 | "TAG_PREFIX": cfg.tag_prefix, 1714 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1715 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1716 | }) 1717 | 1718 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), 1719 | "__init__.py") 1720 | if os.path.exists(ipy): 1721 | try: 1722 | with open(ipy, "r") as f: 1723 | old = f.read() 1724 | except EnvironmentError: 1725 | old = "" 1726 | if INIT_PY_SNIPPET not in old: 1727 | print(" appending to %s" % ipy) 1728 | with open(ipy, "a") as f: 1729 | f.write(INIT_PY_SNIPPET) 1730 | else: 1731 | print(" %s unmodified" % ipy) 1732 | else: 1733 | print(" %s doesn't exist, ok" % ipy) 1734 | ipy = None 1735 | 1736 | # Make sure both the top-level "versioneer.py" and versionfile_source 1737 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1738 | # they'll be copied into source distributions. Pip won't be able to 1739 | # install the package without this. 1740 | manifest_in = os.path.join(root, "MANIFEST.in") 1741 | simple_includes = set() 1742 | try: 1743 | with open(manifest_in, "r") as f: 1744 | for line in f: 1745 | if line.startswith("include "): 1746 | for include in line.split()[1:]: 1747 | simple_includes.add(include) 1748 | except EnvironmentError: 1749 | pass 1750 | # That doesn't cover everything MANIFEST.in can do 1751 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1752 | # it might give some false negatives. Appending redundant 'include' 1753 | # lines is safe, though. 1754 | if "versioneer.py" not in simple_includes: 1755 | print(" appending 'versioneer.py' to MANIFEST.in") 1756 | with open(manifest_in, "a") as f: 1757 | f.write("include versioneer.py\n") 1758 | else: 1759 | print(" 'versioneer.py' already in MANIFEST.in") 1760 | if cfg.versionfile_source not in simple_includes: 1761 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 1762 | cfg.versionfile_source) 1763 | with open(manifest_in, "a") as f: 1764 | f.write("include %s\n" % cfg.versionfile_source) 1765 | else: 1766 | print(" versionfile_source already in MANIFEST.in") 1767 | 1768 | # Make VCS-specific changes. For git, this means creating/changing 1769 | # .gitattributes to mark _version.py for export-subst keyword 1770 | # substitution. 1771 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1772 | return 0 1773 | 1774 | 1775 | def scan_setup_py(): 1776 | """Validate the contents of setup.py against Versioneer's expectations.""" 1777 | found = set() 1778 | setters = False 1779 | errors = 0 1780 | with open("setup.py", "r") as f: 1781 | for line in f.readlines(): 1782 | if "import versioneer" in line: 1783 | found.add("import") 1784 | if "versioneer.get_cmdclass()" in line: 1785 | found.add("cmdclass") 1786 | if "versioneer.get_version()" in line: 1787 | found.add("get_version") 1788 | if "versioneer.VCS" in line: 1789 | setters = True 1790 | if "versioneer.versionfile_source" in line: 1791 | setters = True 1792 | if len(found) != 3: 1793 | print("") 1794 | print("Your setup.py appears to be missing some important items") 1795 | print("(but I might be wrong). Please make sure it has something") 1796 | print("roughly like the following:") 1797 | print("") 1798 | print(" import versioneer") 1799 | print(" setup( version=versioneer.get_version(),") 1800 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1801 | print("") 1802 | errors += 1 1803 | if setters: 1804 | print("You should remove lines like 'versioneer.VCS = ' and") 1805 | print("'versioneer.versionfile_source = ' . This configuration") 1806 | print("now lives in setup.cfg, and should be removed from setup.py") 1807 | print("") 1808 | errors += 1 1809 | return errors 1810 | 1811 | if __name__ == "__main__": 1812 | cmd = sys.argv[1] 1813 | if cmd == "setup": 1814 | errors = do_setup() 1815 | errors += scan_setup_py() 1816 | if errors: 1817 | sys.exit(1) 1818 | --------------------------------------------------------------------------------