├── .gitattributes ├── MANIFEST.in ├── bin └── shotgunCache ├── setup.cfg ├── shotgunCache ├── __init__.py ├── resources │ └── config.yaml.template ├── validateCounts.py ├── _version.py ├── entityConfig.py ├── monitor.py ├── validateFields.py ├── utils.py ├── entityImporter.py ├── controller.py └── main.py ├── .gitignore ├── LICENSE ├── setup.py ├── README.md └── versioneer.py /.gitattributes: -------------------------------------------------------------------------------- 1 | shotgunCache/_version.py export-subst 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include README.md 3 | recursive-include shotgunCache/resources * 4 | -------------------------------------------------------------------------------- /bin/shotgunCache: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from shotgunCache.main import main 4 | 5 | sys.exit(main(sys.argv)) 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | max-line-length = 160 6 | exclude = _version.py 7 | ignore = E302,E301,F403 8 | -------------------------------------------------------------------------------- /shotgunCache/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | shotgunCache 5 | 6 | Copyright (c) 2015 Moonbot Studios. All rights reserved. 7 | """ 8 | 9 | from controller import * 10 | from monitor import * 11 | from entityConfig import * 12 | from entityImporter import * 13 | from validateCounts import * 14 | from validateFields import * 15 | from utils import * 16 | 17 | from ._version import get_versions 18 | __version__ = get_versions()['version'] 19 | del get_versions 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | config.yaml 2 | 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Packages 9 | *.egg 10 | *.egg-info 11 | dist 12 | build 13 | eggs 14 | parts 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | htmlcov 30 | 31 | # Translations 32 | *.mo 33 | 34 | # Complexity 35 | output/*.html 36 | output/*/index.html 37 | 38 | # Sphinx 39 | docs/_build 40 | 41 | .vagrant 42 | *.DS_Store 43 | 44 | MANIFEST 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Moonbot Studios 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | try: 5 | from setuptools import setup 6 | except ImportError: 7 | from distutils.core import setup 8 | 9 | import versioneer 10 | 11 | versioneer.VCS = 'git' 12 | versioneer.versionfile_source = 'shotgunCache/_version.py' 13 | versioneer.versionfile_build = 'shotgunCache/_version.py' 14 | versioneer.tag_prefix = '' # tags are like 1.2.0 15 | versioneer.parentdir_prefix = 'shotgunCache' # dirname like 'myproject-1.2.0' 16 | 17 | readme = open('README.md').read().strip() 18 | license = open('LICENSE').read().strip() 19 | 20 | setup( 21 | name='shotgunCache', 22 | version=versioneer.get_version(), 23 | license=license, 24 | cmdclass=versioneer.get_cmdclass(), 25 | description='Shotgun Cache Server', 26 | long_description=readme, 27 | author='Moonbot Studios', 28 | author_email='brennan@moonbotstudios.com', 29 | url='https://github.com/moonbot/shotgun-cache-server', 30 | packages=[ 31 | 'shotgunCache', 32 | ], 33 | scripts=[ 34 | 'bin/shotgunCache' 35 | ], 36 | package_dir={'shotgunCache': 37 | 'shotgunCache'}, 38 | include_package_data=True, 39 | install_requires=[ 40 | 'rethinkdb>=2.0.0.post1', 41 | 'pyyaml>=3.11', 42 | 'ruamel.yaml>=0.8', 43 | 'pyzmq>=13.1.0', 44 | 'shotgun_api3>=3.0.19', 45 | ], 46 | zip_safe=False, 47 | keywords='shotgunCache', 48 | ) 49 | -------------------------------------------------------------------------------- /shotgunCache/resources/config.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | # Shotgun configuration for your site 3 | shotgun: 4 | base_url: https://yoursite.shotgunstudio.com 5 | script_name: shotgunEventDaemon 6 | api_key: 841c9a51bf45e1872c3e5f2435dade67458bc858 7 | ensure_ascii: false # Fixed unicode errors 8 | 9 | # Connection information for rethinkdb 10 | # This is provided directly to the rethinkdb connect command 11 | # http://rethinkdb.com/api/python/connect/ 12 | rethink: 13 | host: 'localhost' 14 | port: 28015 15 | timeout: 20 16 | db: 'shotguncache' 17 | 18 | # Where the history state is tracked 19 | # This includes: 20 | # - Latest event log entry processed 21 | # - Hashes for each of the entity config files 22 | # - List of which entities have been imported 23 | history_filename: history 24 | 25 | # Location of where the cache configuration folder is located 26 | entity_config_foldername: entity-configs 27 | 28 | # Template for naming the rethinkdb tables 29 | rethink_schema_table: schema 30 | rethink_entity_table_template: entity_{type} 31 | rethink_stat_table_template: stat_{type} 32 | 33 | # Main url for the cache server to listen for input from the monitor 34 | # or interact with the command line while active 35 | zmq_controller_work_url: tcp://0.0.0.0:5557 36 | 37 | # Whether to track stats on the performance of the 38 | # Cache server 39 | enable_stats: true 40 | 41 | # List of fields with default values 42 | # If not specified, then None/null will be used 43 | shotgun_field_type_defaults: 44 | multi_entity: [] 45 | tag_list: [] 46 | 47 | # If a cached entity is no longer cached, delete the data from the cache server 48 | delete_cache_for_untracked_entities: true 49 | 50 | # How entities are intially imported or triggered for reimport 51 | import: 52 | 53 | # No. of processes to use when retreiving information from shotgun 54 | # All calls to shotgun will be distributed among each of these 55 | processes: 4 56 | 57 | # Limit for each of the shotgun find requests 58 | batch_size: 250 59 | 60 | zmq_pull_url: tcp://0.0.0.0:5558 61 | zmq_post_url: tcp://0.0.0.0:5559 62 | 63 | 64 | # Settings for the shotgun Monitor 65 | monitor: 66 | # Number of seconds to wait before requesting new events after each batch of events 67 | # is done processing 68 | fetch_interval: 1 69 | 70 | # If the connection to shotgun fails, number of seconds to wait until we retry. 71 | # This allows for occasional network hiccups, server restarts, application maintenance, 72 | # etc. 73 | conn_retry_sleep: 60 74 | 75 | # Number of times to retry connection before logging an error level message (which 76 | # sends an email in the default configuration) 77 | max_conn_retries: 5 78 | 79 | # Maximum number of events to fetch at once. 80 | max_event_batch_size: 500 81 | 82 | # Time between posting stat about monitor being active 83 | heartbeat_interval: 60 84 | 85 | validate_counts: 86 | # No. of processes to use when validating information from shotgun 87 | # All calls to shotgun will be distributed among each of these 88 | processes: 4 89 | 90 | create_entity_config: 91 | # List of fields name patterns to ignore 92 | # when creating entity config files 93 | field_patterns_to_ignore: 94 | - cached_display_name 95 | - pinned 96 | - visible 97 | - billboard 98 | - sg_uploaded_movie 99 | - sg_uploaded_movie_mp4 100 | - sg_uploaded_movie_webm 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Shotgun Cache 3 | 4 | This python module provides the tools required to maintain a local caching layer for Shotgun. 5 | This aims to reduce the delay when accessing the data stored in Shotgun. 6 | 7 | On average queries to Shotgun take between 100-500 ms, with the caching layer this can be reduced to 10-20 ms. 8 | 9 | We've utilized [RethinkDB](http://rethinkdb.com/) as the database backend due to its speed, scalability, and query capabilities. 10 | It's a schema-less database, but still supports joining and merging via querys. 11 | 12 | ## How it works 13 | 14 | This caching layer is aimed at provided a partial replica of your Shotgun database. 15 | This allows you to limit the caching to only the entity types and fields you need. 16 | 17 | On first load for each entity type, a batch import is performed loading all existing entities. 18 | Then, the database is kept in sync through a process similar to Shotgun's Event Log Daemon. 19 | Shotgun is polled at a periodic interval (default is 2 seconds) for changes using Event Log Entries. 20 | The changes reported in these enties are then applied to the cached data. 21 | 22 | ## Limitations 23 | 24 | - The cache layer is not setup for posting changes in the cache to Shotgun. 25 | It only receives changes from Shotgun and applies them to the cache. 26 | - When the cache configuration changes, the current items for changed entity types are deleted and reloaded. 27 | - There is a delay associated with the cache based on the `fetch_interval`. 28 | By default this is 2 seconds. 29 | So the data stored in the cache can be up to 2 seconds behind any changes made in Shotgun. 30 | If your using this cache data in your scripts, you should consider whether this 2 seconds of delay is acceptable. 31 | 32 | ## How to use 33 | 34 | This module only provides the tools to maintain the local database. 35 | It doesn't include tools for your scripts to access the database. 36 | For this, I recommend using our `ShotgunCacheProxy` (Coming Soon) which allows you to use Shotgun's existing API to communicate to the caching layer. 37 | 38 | You could also you the RethinkDB directly to query data. 39 | 40 | ## System Requirements 41 | The cache server can be run on any machine that has Python 2.7 installed and has network access to your Shotgun server. 42 | 43 | You'll need to install RethinkDB on your server. 44 | More information about the requirements and installation can be found here: 45 | http://rethinkdb.com/docs/install/ 46 | 47 | ### Required Python Modules 48 | - [Shotgun Python API](https://github.com/shotgunsoftware/python-api) v3.0+ 49 | - [ZeroMQ](http://zeromq.org/bindings:python) 50 | - [yaml](http://pyyaml.org/) 51 | - [ruamel.yaml](https://pypi.python.org/pypi/ruamel.yaml/0.6) 52 | - [rethinkdb](http://rethinkdb.com/docs/install-drivers/python/) 53 | 54 | 55 | ## Setup 56 | 57 | First thing you need to do is download this repo. 58 | Once downloaded, you can install the script using setup tools. 59 | This should automatically install all python dependencies except the Shotgun Python API which must be installed manually. 60 | Navigate inside the git repo and run: 61 | ``` 62 | $ python setup.py install 63 | ``` 64 | 65 | Once installed, you can begin the cache setup process by running 66 | ``` 67 | $ shotgunCache setup 68 | ``` 69 | 70 | Follow the prompts, providing the required information. 71 | You will need to provide an API key for the script to access Shotgun. 72 | More details about this can be found here: 73 | https://support.shotgunsoftware.com/entries/21193476-How-to-create-and-manage-API-Scripts 74 | 75 | After you've run the setup, make any changes required to the generated entity config files. 76 | The entity configs are stored as json files and you can adjust them by: 77 | - Removing any fields from the `fields` key you don't want to cache. 78 | 79 | 80 | ## Starting the server 81 | Once you've completed the setup process, your ready to start the server 82 | 83 | If you installed the config to any location other than the default (~/shotguncache) you'll need to 84 | set the `SHOTGUN_CACHE_CONFIG` environment variable to point to this new path. 85 | 86 | ``` 87 | $ shotgunCache run 88 | ``` 89 | 90 | Once started the server will import all new entities from Shotgun in one batch, then apply the changes received to each entity type from the Event Log Entries. 91 | 92 | 93 | ---------- 94 | 95 | 96 | ## TODO 97 | 98 | - Project specific schema for entity config manager? 99 | - Binary support for images, thumbnails, etc... 100 | - Implement support for filtering entities that are cached. 101 | - This is easy to do on initial import 102 | - However, we need some way to maintain the filters through Event Log updates 103 | - Figure out a way to handle storing of event log entries 104 | - No easy way to load event log entries for before the cache started due to lack of support 105 | for filtering based on the `meta` field. 106 | - We could filter by the `entity` field, but this would only allow for handling entities that aren't currently deleted. 107 | - It would be great to fit this in the same system as the current entity config for caches, but there would be a lot of work arounds to reproduce the filters and such. 108 | -------------------------------------------------------------------------------- /shotgunCache/validateCounts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import logging 4 | import datetime 5 | import multiprocessing 6 | import Queue 7 | 8 | import rethinkdb 9 | 10 | __all__ = [ 11 | 'CountValidator' 12 | ] 13 | 14 | LOG = logging.getLogger(__name__) 15 | 16 | 17 | class CountValidator(object): 18 | def __init__(self, config, entityConfigs): 19 | super(CountValidator, self).__init__() 20 | self.config = config 21 | self.entityConfigs = entityConfigs 22 | 23 | self.workQueue = multiprocessing.JoinableQueue() 24 | self.resultQueue = multiprocessing.Queue() 25 | self.processes = [] 26 | self.results = [] 27 | 28 | def start(self, raiseExc=True): 29 | LOG.info("Starting Validate Counts") 30 | self.launchWorkers() 31 | self.run() 32 | self.terminateWorkers() 33 | 34 | if raiseExc: 35 | failed = [] 36 | for result in self.results: 37 | if result['failed']: 38 | failed.append(result) 39 | 40 | if len(failed): 41 | raise RuntimeError("Validation Failed, {0} cached entity type(s) do not match".format(len(failed))) 42 | 43 | return self.results 44 | 45 | def launchWorkers(self): 46 | processCount = min(len(self.entityConfigs), self.config['validate_counts.processes']) 47 | LOG.debug("Launching {0} validate workers".format(processCount)) 48 | for n in range(processCount): 49 | worker = CountValidateWorker(self.workQueue, self.resultQueue, self.config, self.entityConfigs) 50 | proc = multiprocessing.Process(target=worker.start) 51 | proc.start() 52 | self.processes.append(proc) 53 | 54 | def run(self): 55 | LOG.debug("Adding items to validate queue") 56 | for config in self.entityConfigs: 57 | data = {'configType': config.type} 58 | self.workQueue.put(data) 59 | self.workQueue.join() 60 | 61 | results = [] 62 | while True: 63 | try: 64 | result = self.resultQueue.get(False) 65 | except Queue.Empty: 66 | break 67 | else: 68 | if result: 69 | results.append(result) 70 | self.results = results 71 | 72 | def terminateWorkers(self): 73 | LOG.debug("Terminating validate workers") 74 | for proc in self.processes: 75 | proc.terminate() 76 | self.processes = [] 77 | 78 | 79 | class CountValidateWorker(object): 80 | def __init__(self, workQueue, resultQueue, config, entityConfigs): 81 | super(CountValidateWorker, self).__init__() 82 | self.workQueue = workQueue 83 | self.resultQueue = resultQueue 84 | self.config = config 85 | self.entityConfigs = dict([(c.type, c) for c in entityConfigs]) 86 | 87 | self.sg = None 88 | self.rethink = None 89 | 90 | def start(self): 91 | self.sg = self.config.createShotgunConnection(convert_datetimes_to_utc=False) 92 | self.rethink = self.config.createRethinkConnection() 93 | self.run() 94 | 95 | def run(self): 96 | workerPID = os.getpid() 97 | LOG.debug("Validate Worker Running: {0}".format(workerPID)) 98 | while True: 99 | try: 100 | work = self.workQueue.get() 101 | except Queue.Emtpy: 102 | continue 103 | time.sleep(0.1) 104 | 105 | entityConfig = self.entityConfigs[work['configType']] 106 | 107 | LOG.debug("Getting Shotgun counts for type: '{0}'".format(work['configType'])) 108 | sgResult = self.sg.summarize(entityConfig.type, [], summary_fields=[{'field': 'id', 'type': 'count'}]) 109 | 110 | sgCount = sgResult['summaries']['id'] 111 | 112 | cacheCount = 0 113 | try: 114 | LOG.debug("Getting cache counts for type: '{0}'".format(work['configType'])) 115 | cacheSearchTime = datetime.datetime.utcnow() 116 | cacheCount = rethinkdb.table(entityConfig['table']).count().run(self.rethink) 117 | except rethinkdb.errors.RqlRuntimeError: 118 | cacheCount = 0 119 | 120 | # Find the diff of events that have happened in Shotgun, but not been saved to the cache yet 121 | # Searches all event log entries for this entity type that are New, Retired, or Revive occurring in the past fetch_interval 122 | # including a small amount of processing padding for the cache 123 | self.config.history.load() 124 | latestCachedEventID = self.config.history['latest_event_log_entry']['id'] 125 | minTime = cacheSearchTime - datetime.timedelta(seconds=self.config['monitor.fetch_interval'] + 0.05) 126 | maxTime = cacheSearchTime 127 | eventTypes = ['Shotgun_{entityType}_{changeType}'.format(entityType=entityConfig.type, changeType=t) for t in ['New', 'Retirement', 'Revival']] 128 | eventLogFilters = [ 129 | ['event_type', 'in', eventTypes], 130 | ['created_at', 'between', [minTime, maxTime]], 131 | ['id', 'greater_than', latestCachedEventID] 132 | ] 133 | 134 | LOG.debug("Getting Pending Event Log Entries for type: '{0}'".format(work['configType'])) 135 | eventLogEntries = self.sg.find('EventLogEntry', eventLogFilters, ['event_type', 'id']) 136 | 137 | additions = len([e for e in eventLogEntries if 'New' in e['event_type'] or 'Revival' in e['event_type']]) 138 | removals = len([e for e in eventLogEntries if 'Retirement' in e['event_type']]) 139 | pendingDiff = additions - removals 140 | 141 | failed = sgCount - pendingDiff != cacheCount 142 | 143 | if failed: 144 | LOG.debug("'{0}' counts don't match, SG: {1} Cache: {2}".format(entityConfig.type, sgCount, cacheCount)) 145 | else: 146 | LOG.debug("'{0}' counts match, SG: {1} Cache: {2}".format(entityConfig.type, sgCount, cacheCount)) 147 | 148 | result = { 149 | 'work': work, 150 | 'entityType': work['configType'], 151 | 'failed': failed, 152 | 'sgCount': sgCount, 153 | 'pendingEvents': len(eventLogEntries), 154 | 'pendingDiff': pendingDiff, 155 | 'cacheCount': cacheCount, 156 | } 157 | self.resultQueue.put(result) 158 | 159 | self.workQueue.task_done() 160 | -------------------------------------------------------------------------------- /shotgunCache/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.12 (https://github.com/warner/python-versioneer) 10 | 11 | # these strings will be replaced by git during git-archive 12 | git_refnames = " (HEAD -> master, tag: v0.2.0)" 13 | git_full = "1a4d287e89cf6422b564accc5db4e7879aaad51d" 14 | 15 | # these strings are filled in when 'setup.py versioneer' creates _version.py 16 | tag_prefix = "" 17 | parentdir_prefix = "shotgunCache" 18 | versionfile_source = "shotgunCache/_version.py" 19 | 20 | import os, sys, re, subprocess, errno 21 | 22 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 23 | assert isinstance(commands, list) 24 | p = None 25 | for c in commands: 26 | try: 27 | # remember shell=False, so use git.cmd on windows, not just git 28 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 29 | stderr=(subprocess.PIPE if hide_stderr 30 | else None)) 31 | break 32 | except EnvironmentError: 33 | e = sys.exc_info()[1] 34 | if e.errno == errno.ENOENT: 35 | continue 36 | if verbose: 37 | print("unable to run %s" % args[0]) 38 | print(e) 39 | return None 40 | else: 41 | if verbose: 42 | print("unable to find command, tried %s" % (commands,)) 43 | return None 44 | stdout = p.communicate()[0].strip() 45 | if sys.version >= '3': 46 | stdout = stdout.decode() 47 | if p.returncode != 0: 48 | if verbose: 49 | print("unable to run %s (error)" % args[0]) 50 | return None 51 | return stdout 52 | 53 | 54 | def versions_from_parentdir(parentdir_prefix, root, verbose=False): 55 | # Source tarballs conventionally unpack into a directory that includes 56 | # both the project name and a version string. 57 | dirname = os.path.basename(root) 58 | if not dirname.startswith(parentdir_prefix): 59 | if verbose: 60 | print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % 61 | (root, dirname, parentdir_prefix)) 62 | return None 63 | return {"version": dirname[len(parentdir_prefix):], "full": ""} 64 | 65 | def git_get_keywords(versionfile_abs): 66 | # the code embedded in _version.py can just fetch the value of these 67 | # keywords. When used from setup.py, we don't want to import _version.py, 68 | # so we do it with a regexp instead. This function is not used from 69 | # _version.py. 70 | keywords = {} 71 | try: 72 | f = open(versionfile_abs,"r") 73 | for line in f.readlines(): 74 | if line.strip().startswith("git_refnames ="): 75 | mo = re.search(r'=\s*"(.*)"', line) 76 | if mo: 77 | keywords["refnames"] = mo.group(1) 78 | if line.strip().startswith("git_full ="): 79 | mo = re.search(r'=\s*"(.*)"', line) 80 | if mo: 81 | keywords["full"] = mo.group(1) 82 | f.close() 83 | except EnvironmentError: 84 | pass 85 | return keywords 86 | 87 | def git_versions_from_keywords(keywords, tag_prefix, verbose=False): 88 | if not keywords: 89 | return {} # keyword-finding function failed to find keywords 90 | refnames = keywords["refnames"].strip() 91 | if refnames.startswith("$Format"): 92 | if verbose: 93 | print("keywords are unexpanded, not using") 94 | return {} # unexpanded, so not in an unpacked git-archive tarball 95 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 96 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 97 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 98 | TAG = "tag: " 99 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 100 | if not tags: 101 | # Either we're using git < 1.8.3, or there really are no tags. We use 102 | # a heuristic: assume all version tags have a digit. The old git %d 103 | # expansion behaves like git log --decorate=short and strips out the 104 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 105 | # between branches and tags. By ignoring refnames without digits, we 106 | # filter out many common branch names like "release" and 107 | # "stabilization", as well as "HEAD" and "master". 108 | tags = set([r for r in refs if re.search(r'\d', r)]) 109 | if verbose: 110 | print("discarding '%s', no digits" % ",".join(refs-tags)) 111 | if verbose: 112 | print("likely tags: %s" % ",".join(sorted(tags))) 113 | for ref in sorted(tags): 114 | # sorting will prefer e.g. "2.0" over "2.0rc1" 115 | if ref.startswith(tag_prefix): 116 | r = ref[len(tag_prefix):] 117 | if verbose: 118 | print("picking %s" % r) 119 | return { "version": r, 120 | "full": keywords["full"].strip() } 121 | # no suitable tags, so we use the full revision id 122 | if verbose: 123 | print("no suitable tags, using full revision id") 124 | return { "version": keywords["full"].strip(), 125 | "full": keywords["full"].strip() } 126 | 127 | 128 | def git_versions_from_vcs(tag_prefix, root, verbose=False): 129 | # this runs 'git' from the root of the source tree. This only gets called 130 | # if the git-archive 'subst' keywords were *not* expanded, and 131 | # _version.py hasn't already been rewritten with a short version string, 132 | # meaning we're inside a checked out source tree. 133 | 134 | if not os.path.exists(os.path.join(root, ".git")): 135 | if verbose: 136 | print("no .git in %s" % root) 137 | return {} 138 | 139 | GITS = ["git"] 140 | if sys.platform == "win32": 141 | GITS = ["git.cmd", "git.exe"] 142 | stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"], 143 | cwd=root) 144 | if stdout is None: 145 | return {} 146 | if not stdout.startswith(tag_prefix): 147 | if verbose: 148 | print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) 149 | return {} 150 | tag = stdout[len(tag_prefix):] 151 | stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 152 | if stdout is None: 153 | return {} 154 | full = stdout.strip() 155 | if tag.endswith("-dirty"): 156 | full += "-dirty" 157 | return {"version": tag, "full": full} 158 | 159 | 160 | def get_versions(default={"version": "unknown", "full": ""}, verbose=False): 161 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 162 | # __file__, we can work backwards from there to the root. Some 163 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 164 | # case we can only use expanded keywords. 165 | 166 | keywords = { "refnames": git_refnames, "full": git_full } 167 | ver = git_versions_from_keywords(keywords, tag_prefix, verbose) 168 | if ver: 169 | return ver 170 | 171 | try: 172 | root = os.path.abspath(__file__) 173 | # versionfile_source is the relative path from the top of the source 174 | # tree (where the .git directory might live) to this file. Invert 175 | # this to find the root from __file__. 176 | for i in range(len(versionfile_source.split(os.sep))): 177 | root = os.path.dirname(root) 178 | except NameError: 179 | return default 180 | 181 | return (git_versions_from_vcs(tag_prefix, root, verbose) 182 | or versions_from_parentdir(parentdir_prefix, root, verbose) 183 | or default) 184 | -------------------------------------------------------------------------------- /shotgunCache/entityConfig.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | import hashlib 5 | import fnmatch 6 | import json 7 | from collections import Mapping, OrderedDict 8 | 9 | __all__ = [ 10 | 'EntityConfig', 11 | 'EntityConfigManager', 12 | ] 13 | 14 | LOG = logging.getLogger(__name__) 15 | 16 | 17 | class EntityConfig(Mapping): 18 | """ 19 | Config dictionary for a single entity type 20 | """ 21 | def __init__(self, type, configPath): 22 | self.type = type 23 | self.configPath = configPath 24 | self.hash = None 25 | 26 | self.config = None 27 | self.loadConfig() 28 | 29 | def __getitem__(self, key): 30 | return self.config.__getitem__(key) 31 | 32 | def __iter__(self): 33 | return self.config.__iter__() 34 | 35 | def __len__(self): 36 | return len(self.config) 37 | 38 | def loadConfig(self): 39 | """ 40 | Read the config from the json file 41 | """ 42 | LOG.debug("Loading Entity Config for type: {0}".format(self.type)) 43 | 44 | with open(self.configPath, 'r') as f: 45 | data = f.read() 46 | self.hash = str(hashlib.md5(data).hexdigest()) 47 | 48 | try: 49 | config = json.loads(data) 50 | except ValueError, e: 51 | raise type(e), type(e)(e.message + ' happens in {0}'.format(self.configPath)), sys.exc_info()[2] 52 | 53 | self.config = config 54 | 55 | 56 | class EntityConfigManager(object): 57 | """ 58 | Manages the entity config files storing the details of how 59 | Shotgun Entities are stored in the database 60 | """ 61 | def __init__(self, config): 62 | super(EntityConfigManager, self).__init__() 63 | self.config = config 64 | 65 | self.sg = self.config.createShotgunConnection() 66 | 67 | self.configs = {} 68 | self.schema = None 69 | 70 | def __contains__(self, key): 71 | return key in self.configs 72 | 73 | def load(self): 74 | LOG.debug("Retrieving schema from shotgun") 75 | self.schema = self.sg.schema_read() 76 | self.loadConfigFromFiles() 77 | 78 | def loadConfigFromFiles(self): 79 | """ 80 | Read the config files and create EntityConfig instances 81 | """ 82 | for path in self.getConfigFilePaths(): 83 | typ = os.path.basename(os.path.splitext(path)[0]) 84 | if typ == 'EventLogEntry': 85 | raise NotImplemented("Can't cache EventLogEntries") 86 | 87 | config = EntityConfig( 88 | typ, 89 | path, 90 | ) 91 | self.validateConfig(config) 92 | self.configs[config.type] = config 93 | 94 | def validateConfig(self, config): 95 | """ 96 | Validate an entity config dictionary 97 | 98 | Args: 99 | config (dict): entity config dictionary 100 | Raises: 101 | ValueError 102 | """ 103 | if 'fields' not in config or not len(config['fields']): 104 | raise ValueError("No fields defined for '{0}' in {1}".format(config.type, config.configPath)) 105 | 106 | if config.type not in self.schema: 107 | raise ValueError("Type '{0}' missing from Shotgun schema, defined in {1}".format(config.type, config.configPath)) 108 | 109 | typeSchema = self.schema[config.type] 110 | for field in config.config.get('fields', {}): 111 | if field not in typeSchema: 112 | raise ValueError("Field '{0}' for Type '{1}' missing from Shotgun schema, defined in {2}".format(field, config.type, config.configPath)) 113 | 114 | def getConfigFilePaths(self): 115 | """ 116 | Get a list of all config file paths containing entity configs 117 | Returns: 118 | list of str: file paths 119 | """ 120 | path = os.path.abspath(self.config.entityConfigFolderPath) 121 | result = [] 122 | if not os.path.exists(path): 123 | LOG.debug("Creating entity config folder: {0}".format(path)) 124 | os.mkdir(path) 125 | 126 | for f in os.listdir(path): 127 | if not f.endswith('.json'): 128 | continue 129 | result.append(os.path.join(path, f)) 130 | LOG.debug("Found {0} Entity Config Files".format(len(result))) 131 | return result 132 | 133 | def allConfigs(self): 134 | """ 135 | Get a list of all EntityConfig instances 136 | 137 | Returns: 138 | list of EntityConfig 139 | """ 140 | return self.configs.values() 141 | 142 | def getEntityTypes(self): 143 | """ 144 | Get a list of all entity types we have configs for 145 | 146 | Returns: 147 | list of str: entity types 148 | """ 149 | return self.configs.keys() 150 | 151 | def getConfigForType(self, type): 152 | """ 153 | Get the entity config instance for the supplied type 154 | 155 | Args: 156 | type (str): Shotgun Entity Type 157 | """ 158 | return self.configs.__getitem__(type) 159 | 160 | def createEntityConfigFiles(self, types, tableTemplate=None, ignoreFields=[]): 161 | """ 162 | Create the entity config json files for the supplied shotgun entity types 163 | 164 | Args: 165 | types (list of str): List of Shotgun Entity Types 166 | tableTemplate (str): Template for the rethinkdb table name 167 | supplied format keywords: 168 | type - Shotgun type 169 | Ex: 170 | entity-{type} 171 | ignoreFields (list of str): global list of field names to exclude. 172 | These can use wildcards using fnmatch 173 | Ex: 174 | created_* 175 | cached_display_name 176 | 177 | Raises: 178 | ValueError 179 | 180 | """ 181 | LOG.debug("Reading Shotgun schema") 182 | schema = self.sg.schema_read() 183 | 184 | LOG.debug("Creating config files") 185 | result = [] 186 | for sgType in types: 187 | if sgType not in schema: 188 | raise ValueError("Missing shotgun entity type: {0}".format(sgType)) 189 | 190 | if sgType == 'EventLogEntry': 191 | raise NotImplemented("Can't cache EventLogEntry entities") 192 | 193 | destFolderPath = os.path.abspath(self.config.entityConfigFolderPath) 194 | destPath = os.path.join(destFolderPath, '{type}.json'.format(type=sgType)) 195 | 196 | entityConfig = OrderedDict() 197 | 198 | table = tableTemplate.format(type=sgType) 199 | 200 | entityConfig['table'] = table 201 | 202 | typeSchema = schema[sgType] 203 | fields = typeSchema.keys() 204 | 205 | if ignoreFields: 206 | def excludeIgnoredFields(field): 207 | for pat in ignoreFields: 208 | result = fnmatch.fnmatch(field, pat) 209 | if result: 210 | return False 211 | return True 212 | fields = filter(excludeIgnoredFields, fields) 213 | 214 | fieldsConfig = OrderedDict() 215 | for field in sorted(fields): 216 | fieldConfig = {} 217 | fieldSchema = typeSchema[field] 218 | 219 | fieldDataType = fieldSchema.get('data_type', {}).get('value', None) 220 | if fieldDataType == 'multi_entity': 221 | fieldConfig['mapping'] = {'type': 'nested', 'include_in_parent': True} 222 | elif fieldDataType == 'image': 223 | # Not supported yet 224 | continue 225 | 226 | fieldsConfig[field] = fieldConfig 227 | entityConfig['fields'] = fieldsConfig 228 | 229 | if not os.path.exists(destFolderPath): 230 | os.makedirs(destFolderPath) 231 | with open(destPath, 'w') as f: 232 | f.write(json.dumps(entityConfig, indent=4)) 233 | result.append((sgType, destPath)) 234 | 235 | return result 236 | -------------------------------------------------------------------------------- /shotgunCache/monitor.py: -------------------------------------------------------------------------------- 1 | import time 2 | import datetime 3 | import socket 4 | import logging 5 | 6 | import zmq 7 | import shotgun_api3 as sg 8 | 9 | __all__ = [ 10 | 'ShotgunEventMonitor', 11 | ] 12 | 13 | LOG = logging.getLogger(__name__) 14 | 15 | processStartTime = time.time() 16 | 17 | 18 | class ShotgunEventMonitor(object): 19 | """ 20 | Stripped down version of the Shotgun Event Daemon 21 | Records when EventLogEntry's have changed in shotgun 22 | and sends the changes to the DatabaseController through zmq 23 | """ 24 | eventSubTypes = ['New', 'Change', 'Retirement', 'Revival'] 25 | 26 | def __init__(self, config): 27 | super(ShotgunEventMonitor, self).__init__() 28 | self.config = config 29 | 30 | self.latestEventLogEntry = self.config.history.get('latest_event_log_entry', None) 31 | 32 | self.entityTypes = [] 33 | 34 | self.sg = None 35 | self.context = None 36 | self.socket = None 37 | 38 | self._latestEventID = None 39 | self._latestEventIDPath = None 40 | self._loopStartTime = None 41 | 42 | def start(self): 43 | self.context = zmq.Context() 44 | self.socket = self.context.socket(zmq.PUSH) 45 | self.socket.connect(self.config['zmq_controller_work_url']) 46 | self.loadInitialEventID() 47 | self.buildBaseFilters() 48 | self.run() 49 | 50 | def run(self): 51 | LOG.info("Monitoring Shotgun") 52 | totalEventsInLoop = 0 53 | reset = False 54 | heartbeatTime = None 55 | self.socket.send_pyobj({'type': 'monitorStarted'}) 56 | while True: 57 | if reset: 58 | totalEventsInLoop = 0 59 | timeToPost = 0 60 | self._loopStartTime = time.time() 61 | 62 | events = self.getNewEvents() 63 | 64 | if len(events): 65 | LOG.debug("Received {0} new events".format(len(events))) 66 | 67 | postStartTime = time.time() 68 | body = { 69 | 'type': 'eventLogEntries', 70 | 'data': { 71 | 'entities': events, 72 | }, 73 | } 74 | 75 | self.socket.send_pyobj(body) 76 | timeToPost = time.time() - postStartTime 77 | 78 | self.setLatestEventLogEntry(events[-1]) 79 | totalEventsInLoop += len(events) 80 | reset = False 81 | else: 82 | self.fetchDelay() 83 | reset = True 84 | 85 | # Only report status for loops with events 86 | if totalEventsInLoop: 87 | statData = { 88 | 'type': 'stat', 89 | 'data': { 90 | 'type': 'shotgun_event_update', 91 | 'total_events': totalEventsInLoop, 92 | # might not need this, most of the time below 1 ms 93 | 'duration': round(timeToPost * 1000, 3), # ms 94 | 'created_at': datetime.datetime.utcnow().isoformat(), 95 | }, 96 | } 97 | self.socket.send_pyobj(statData) 98 | 99 | # Track monitor status so we can graph when it goes down 100 | currTime = time.time() 101 | if heartbeatTime is None or currTime - heartbeatTime > self.config['monitor']['heartbeat_interval']: 102 | statData = { 103 | 'type': 'stat', 104 | 'data': { 105 | 'type': 'monitor_status', 106 | 'created_at': datetime.datetime.utcnow().isoformat(), 107 | 'uptime': time.time() - processStartTime, # seconds 108 | }, 109 | } 110 | self.socket.send_pyobj(statData) 111 | heartbeatTime = currTime 112 | 113 | def getNewEvents(self): 114 | """ 115 | Fetch the new EventLogEntry entities from Shotgun 116 | Loops until successful 117 | """ 118 | filters = [ 119 | ['id', 'greater_than', self.latestEventLogEntry['id']] 120 | ] 121 | filters.extend(self.baseFilters) 122 | fields = [ 123 | 'id', 124 | 'event_type', 125 | 'attribute_name', 126 | 'meta', 127 | 'entity', 128 | 'user', 129 | 'project', 130 | 'session_uuid', 131 | 'created_at' 132 | ] 133 | order = [ 134 | {'column': 'id', 'direction': 'asc'} 135 | ] 136 | 137 | conn_attempts = 0 138 | while True: 139 | try: 140 | _sg = self.connect() 141 | result = _sg.find("EventLogEntry", filters, fields, order, limit=self.config['monitor.max_event_batch_size']) 142 | break 143 | except (sg.ProtocolError, sg.ResponseError, socket.error): 144 | # self.connect(force=True) 145 | LOG.warning("Unable to connect to Shotgun (attempt {0} of {1})".format(conn_attempts + 1, self.config['monitor.max_conn_retries'])) 146 | conn_attempts += 1 147 | except Exception: 148 | # self.connect(force=True) 149 | LOG.warning("Unable to connect to Shotgun (attempt {0} of {1})".format(conn_attempts + 1, self.config['monitor.max_conn_retries'])) 150 | conn_attempts += 1 151 | 152 | if conn_attempts >= self.config['monitor.max_conn_retries']: 153 | LOG.warning("Unable to connect to Shotgun after max attempts, retrying in {0} seconds".format(self.config['monitor.conn_retry_sleep'])) 154 | time.sleep(self.config['monitor.conn_retry_sleep']) 155 | 156 | return result 157 | 158 | def prepareEntityEvents(self, events): 159 | result = [] 160 | for event in events: 161 | result.append({ 162 | 'type': 'entityUpdate', 163 | 'data': event 164 | }) 165 | return result 166 | 167 | def loadInitialEventID(self): 168 | if self.latestEventLogEntry is None: 169 | LOG.debug("Loading initial EventLogEntry id") 170 | 171 | while True: 172 | conn_attempts = 0 173 | result = None 174 | order = [{'column': 'id', 'direction': 'desc'}] 175 | try: 176 | _sg = self.connect() 177 | result = _sg.find_one("EventLogEntry", filters=[], fields=['id', 'created_at'], order=order) 178 | break 179 | except (sg.ProtocolError, sg.ResponseError, socket.error): 180 | self.connect(force=True) 181 | LOG.warning("Unable to connect to Shotgun (attempt {0} of {1})".format(conn_attempts + 1, self.config['monitor.max_conn_retries'])) 182 | conn_attempts += 1 183 | 184 | if conn_attempts >= self.config['monitor.max_conn_retries']: 185 | LOG.warning("Unable to connect to Shotgun after max attempts, retrying in {0} seconds".format(self.config['monitor.conn_retry_sleep'])) 186 | time.sleep(self.config['monitor.conn_retry_sleep']) 187 | 188 | self.setLatestEventLogEntry(result) 189 | 190 | def setLatestEventLogEntry(self, entity): 191 | _entity = dict([(k, v) for k, v in entity.items() if k in ['id', 'created_at']]) 192 | self.socket.send_pyobj({ 193 | 'type': 'latestEventLogEntry', 194 | 'data': { 195 | 'entity': _entity 196 | } 197 | }) 198 | self.latestEventLogEntry = _entity 199 | 200 | def connect(self, force=False): 201 | if force or self.sg is None: 202 | LOG.debug("Connecting to Shotgun") 203 | self.sg = self.config.createShotgunConnection() 204 | return self.sg 205 | 206 | def fetchDelay(self): 207 | diff = 0 208 | if self._loopStartTime is not None: 209 | diff = time.time() - self._loopStartTime 210 | sleepTime = max(self.config['monitor.fetch_interval'] - diff, 0) 211 | if sleepTime: 212 | time.sleep(sleepTime) 213 | 214 | def setEntityTypes(self, entityTypes): 215 | self.entityTypes = entityTypes 216 | 217 | def buildBaseFilters(self): 218 | filters = [] 219 | filters.extend(self.buildEntityTypeFilters()) 220 | self.baseFilters = filters 221 | 222 | def buildEntityTypeFilters(self): 223 | result = { 224 | "filter_operator": "any", 225 | "filters": [] 226 | } 227 | for entityType in self.entityTypes: 228 | for subType in self.eventSubTypes: 229 | eventType = 'Shotgun_{entityType}_{subType}'.format( 230 | entityType=entityType, 231 | subType=subType 232 | ) 233 | result['filters'].append(['event_type', 'is', eventType]) 234 | return [result] 235 | -------------------------------------------------------------------------------- /shotgunCache/validateFields.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import logging 4 | import multiprocessing 5 | import difflib 6 | import Queue 7 | 8 | import rethinkdb 9 | 10 | import utils 11 | 12 | __all__ = [ 13 | 'FieldValidator' 14 | ] 15 | 16 | LOG = logging.getLogger(__name__) 17 | 18 | 19 | class FieldValidator(object): 20 | def __init__(self, config, entityConfigManager, entityConfigs, filters, filterOperator, fields, allCachedFields=False): 21 | super(FieldValidator, self).__init__() 22 | self.config = config 23 | self.entityConfigManager = entityConfigManager 24 | self.entityConfigs = entityConfigs 25 | self.filters = filters 26 | self.fields = fields 27 | self.filterOperator = filterOperator 28 | self.allCachedFields = allCachedFields 29 | 30 | self.workQueue = multiprocessing.JoinableQueue() 31 | self.resultQueue = multiprocessing.Queue() 32 | self.processes = [] 33 | self.results = [] 34 | 35 | def start(self, raiseExc=True): 36 | LOG.info("Starting Validate Counts") 37 | self.launchWorkers() 38 | self.run() 39 | self.terminateWorkers() 40 | 41 | if raiseExc: 42 | failed = [] 43 | for result in self.results: 44 | if result['failed']: 45 | failed.append(result) 46 | 47 | if len(failed): 48 | raise RuntimeError("Validation Failed, {0} cached entity type(s) do not match".format(len(failed))) 49 | 50 | return self.results 51 | 52 | def launchWorkers(self): 53 | processCount = min(len(self.entityConfigs), self.config['validate_counts.processes']) 54 | LOG.debug("Launching {0} validate workers".format(processCount)) 55 | for n in range(processCount): 56 | worker = FieldValidateWorker( 57 | self.workQueue, 58 | self.resultQueue, 59 | self.config, 60 | self.entityConfigManager, 61 | self.entityConfigs, 62 | filters=self.filters, 63 | filterOperator=self.filterOperator, 64 | fields=self.fields, 65 | allCachedFields=self.allCachedFields, 66 | ) 67 | proc = multiprocessing.Process(target=worker.start) 68 | proc.start() 69 | self.processes.append(proc) 70 | 71 | def run(self): 72 | LOG.debug("Adding items to validate queue") 73 | for config in self.entityConfigs: 74 | data = {'configType': config.type} 75 | self.workQueue.put(data) 76 | self.workQueue.join() 77 | 78 | results = [] 79 | while True: 80 | try: 81 | result = self.resultQueue.get(False) 82 | except Queue.Empty: 83 | break 84 | else: 85 | if result: 86 | results.append(result) 87 | self.results = results 88 | 89 | def terminateWorkers(self): 90 | LOG.debug("Terminating validate workers") 91 | for proc in self.processes: 92 | proc.terminate() 93 | self.processes = [] 94 | 95 | 96 | class ValidateWorker(object): 97 | def __init__(self, workQueue, resultQueue, config, entityConfigManager, entityConfigs, **kwargs): 98 | super(ValidateWorker, self).__init__() 99 | self.workQueue = workQueue 100 | self.resultQueue = resultQueue 101 | self.config = config 102 | self.entityConfigManager = entityConfigManager 103 | self.entityConfigs = dict([(c.type, c) for c in entityConfigs]) 104 | 105 | for k, v in kwargs.items(): 106 | setattr(self, k, v) 107 | 108 | self.sg = None 109 | self.rethink = None 110 | 111 | def start(self): 112 | self.sg = self.config.createShotgunConnection(convert_datetimes_to_utc=False) 113 | self.rethink = self.config.createRethinkConnection() 114 | self.run() 115 | 116 | def run(self): 117 | raise NotImplemented() 118 | 119 | 120 | class FieldValidateWorker(ValidateWorker): 121 | def stripNestedEntities(self, entityConfig, entities): 122 | # Strip extra data from nested entities so 123 | # only type and id remains 124 | for entity in entities: 125 | entitySchema = self.entityConfigManager.schema[entityConfig.type] 126 | for field, val in entity.items(): 127 | if field not in entitySchema: 128 | continue 129 | if field in ['type', 'id']: 130 | continue 131 | fieldDataType = entitySchema[field].get('data_type', {}).get('value', None) 132 | if fieldDataType == 'multi_entity': 133 | val = [utils.getBaseEntity(e) for e in val] 134 | entity[field] = val 135 | elif fieldDataType == 'entity': 136 | val = utils.getBaseEntity(val) 137 | entity[field] = val 138 | 139 | def run(self): 140 | workerPID = os.getpid() 141 | LOG.debug("Field Validate Worker Running: {0}".format(workerPID)) 142 | while True: 143 | try: 144 | work = self.workQueue.get() 145 | except Queue.Emtpy: 146 | continue 147 | time.sleep(0.1) 148 | 149 | entityConfig = self.entityConfigs[work['configType']] 150 | 151 | if self.allCachedFields: 152 | fields = entityConfig['fields'].keys() 153 | else: 154 | fields = self.fields[:] 155 | fields.append('id') 156 | fields.append('type') 157 | fields = list(set(fields)) 158 | 159 | LOG.debug("Getting fields from Shotgun for type: {0}".format(work['configType'])) 160 | shotgunResult = self.sg.find( 161 | entityConfig.type, 162 | filter_operator=self.filterOperator, 163 | filters=self.filters, 164 | fields=fields, 165 | order=[{'field_name': 'id', 'direction': 'asc'}] 166 | ) 167 | 168 | # Convert any nested entities to base entities (type and id only) 169 | self.stripNestedEntities(entityConfig, shotgunResult) 170 | 171 | # Group by id's to match with cache 172 | # Group into a dictionary with the id as key 173 | shotgunMap = dict([(e['id'], e) for e in shotgunResult]) 174 | 175 | LOG.debug("Getting fields from cache for type: {0}".format(work['configType'])) 176 | 177 | # Have to batch requests to shotgun in groups of 1024 178 | cacheMatches = [] 179 | LOG.debug("Getting total match count from cache for type: {0}".format(work['configType'])) 180 | cacheMatches = list(rethinkdb.table(entityConfig['table']) 181 | .filter(lambda e: e['id'] in shotgunMap.keys()) 182 | .pluck(fields) 183 | .run(self.rethink)) 184 | 185 | # Check for missing ids 186 | missingFromCache = [] 187 | missingFromShotgun = [] 188 | # print("cacheMatches: {0}".format(cacheMatches)) # TESTING 189 | cacheMap = dict([(e['id'], e) for e in cacheMatches]) 190 | if len(cacheMap) != len(shotgunMap): 191 | cacheIDSet = set(cacheMap) 192 | shotgunIDSet = set(shotgunMap.keys()) 193 | 194 | missingIDsFromCache = cacheIDSet.difference(shotgunIDSet) 195 | missingFromCache = dict([(_id, cacheMap[_id]) for _id in missingIDsFromCache]) 196 | 197 | missingIDsFromShotgun = shotgunIDSet.difference(cacheIDSet) 198 | missingFromShotgun = dict([(_id, cacheMap[_id]) for _id in missingIDsFromShotgun]) 199 | 200 | # Compare the data for each 201 | failed = False 202 | diffs = [] 203 | for _id, shotgunData in shotgunMap.items(): 204 | if _id not in cacheMap: 205 | continue 206 | cacheData = cacheMap[_id] 207 | 208 | # Sort the nested entities by ID 209 | # Their sort order is not enforced by shotgun 210 | # So we can't count on it staying consistent 211 | shotgunData = utils.sortMultiEntityFieldsByID(self.entityConfigManager.schema, shotgunData) 212 | cacheData = utils.sortMultiEntityFieldsByID(self.entityConfigManager.schema, cacheData) 213 | 214 | shotgunJson = utils.prettyJson(shotgunData) 215 | cacheJson = utils.prettyJson(cacheData) 216 | if shotgunJson != cacheJson: 217 | diff = difflib.unified_diff( 218 | str(shotgunJson).split('\n'), 219 | str(cacheJson).split('\n'), 220 | lineterm="", 221 | n=5, 222 | ) 223 | # Skip first 3 lines 224 | header = '{type}:{id}\n'.format(type=work['configType'], id=_id) 225 | [diff.next() for x in range(3)] 226 | diff = header + '\n'.join(diff) 227 | diffs.append(diff) 228 | 229 | result = { 230 | 'work': work, 231 | 'entityType': work['configType'], 232 | 'failed': failed, 233 | 'shotgunMatchCount': len(shotgunMap), 234 | 'cacheMatchCount': len(cacheMap), 235 | 'missingFromCache': missingFromCache, 236 | 'missingFromShotgun': missingFromShotgun, 237 | 'diffs': diffs, 238 | } 239 | self.resultQueue.put(result) 240 | 241 | self.workQueue.task_done() 242 | -------------------------------------------------------------------------------- /shotgunCache/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import datetime 4 | import logging 5 | from copy import deepcopy 6 | from collections import Mapping, MutableMapping 7 | import json 8 | 9 | import yaml 10 | import rethinkdb 11 | import shotgun_api3 as sg 12 | 13 | __all__ = [ 14 | 'ShotgunAPIWrapper', 15 | 'convertStrToDatetime', 16 | 'addNumberSign', 17 | 'getBaseEntity', 18 | 'prettyJson', 19 | 'chunks', 20 | 'combine_dict', 21 | 'update_dict', 22 | 'EncodedDict', 23 | 'DeepDict', 24 | 'sortMultiEntityFieldsByID', 25 | 'get_dict_diff', 26 | 'get_deep_keys', 27 | 'has_deep_key', 28 | 'get_deep_item', 29 | 'set_deep_item', 30 | 'del_deep_item', 31 | 'Config', 32 | 'History', 33 | ] 34 | 35 | 36 | LOG = logging.getLogger(__name__) 37 | 38 | 39 | class ShotgunAPIWrapper(sg.Shotgun): 40 | """ 41 | Wrapper for shotgun that disables the date time instance creation 42 | Returning the raw data from json 43 | """ 44 | def _transform_inbound(self, data): 45 | # Skip transforming inbound data so it correctly matches for our proxy 46 | return data 47 | 48 | 49 | def convertStrToDatetime(dateStr): 50 | return datetime.datetime(*map(int, re.split('[^\d]', dateStr)[:-1])) 51 | 52 | 53 | def sortMultiEntityFieldsByID(schema, entity): 54 | """ 55 | Sort all multi-entity fields in an entity by their ID. 56 | 57 | Args: 58 | schema (dict): Shotgun Schema 59 | entity (dict): Entity dictionary 60 | 61 | Returns: 62 | dict: entity with fields sorted 63 | """ 64 | result = {} 65 | entitySchema = schema[entity['type']] 66 | for field, val in entity.items(): 67 | if field in ['id', 'type']: 68 | result[field] = val 69 | continue 70 | 71 | fieldSchema = entitySchema[field] 72 | dataType = fieldSchema['data_type']['value'] 73 | if dataType == 'multi_entity': 74 | val = sorted(val, key=lambda e: e['id']) 75 | result[field] = val 76 | return result 77 | 78 | 79 | def addNumberSign(num): 80 | if num > 0: 81 | num = '+' + str(num) 82 | elif num < 0: 83 | num = '-' + str(abs(num)) 84 | return num 85 | 86 | 87 | def getBaseEntity(entity): 88 | """ 89 | Remove extra information from an entity dict 90 | keeping only type and id 91 | """ 92 | if entity is None: 93 | return entity 94 | return dict([(k, v) for k, v in entity.items() if k in ['id', 'type']]) 95 | 96 | 97 | def get_dict_diff(a, b): 98 | """ 99 | Get the differences between a, and b 100 | Supports nested dictionaries as well. 101 | 102 | >>> a = dict( 103 | ... myBool = True, 104 | ... myDict = {1:'a', 2:'b'}, 105 | ... ) 106 | >>> b = dict( 107 | ... myBool = False, 108 | ... myDict = {3:'c'}, 109 | ... myString = 'hi' 110 | ... ) 111 | >>> get_dict_diff(b, a) 112 | {'myString': 'hi', 'myDict': {3: 'c'}, 'myBool': False} 113 | >>> a['myBool'] = False 114 | >>> get_dict_diff(b, a) 115 | {'myString': 'hi', 'myDict': {3: 'c'}} 116 | """ 117 | diff = {} 118 | for k, a_value in a.items(): 119 | if k in b.keys(): 120 | b_value = b[k] 121 | if a_value == b_value: 122 | continue 123 | else: 124 | # Check for a nested dict 125 | # If so, compare values inside it 126 | if isinstance(a_value, MutableMapping): 127 | # set any nested differences 128 | nested_diff = get_dict_diff(a_value, b_value) 129 | if not nested_diff: 130 | continue 131 | diff[k] = nested_diff 132 | 133 | # If it hasn't been added to the diff as a nested diff 134 | # add it now 135 | if k not in diff: 136 | diff[k] = a_value 137 | 138 | return diff 139 | 140 | 141 | def chunks(l, n): 142 | """ 143 | Yield successive n-sized chunks from l. 144 | """ 145 | for i in xrange(0, len(l), n): 146 | yield l[i:i+n] 147 | 148 | 149 | def combine_dict(a, b, copy=True): 150 | """ 151 | Return a dict that is the result of recursively 152 | updating dict `a` with dict `b`. Performs a deep 153 | copy to avoid altering the given objects. 154 | """ 155 | result = deepcopy(a) 156 | update_dict(result, b, copy=copy) 157 | return result 158 | 159 | 160 | def prettyJson(obj): 161 | return json.dumps(obj, sort_keys=True, indent=4, separators=(',', ': ')) 162 | 163 | 164 | def update_dict(a, b, copy=True): 165 | """ 166 | Update dictionary A with B recursively. 167 | This means that dictionary values are not 168 | simply replaced, but updated as well. 169 | 170 | `copy` - if True, uses a copy of B before updating 171 | so that if A is changed it will not affect any 172 | elements of B 173 | 174 | >>> a = dict( 175 | ... myBool = True, 176 | ... myDict = {1:'a', 2:'b'}, 177 | ... ) 178 | >>> b = dict( 179 | ... myBool = False, 180 | ... myDict = {3:'c'}, 181 | ... myString = 'hi' 182 | ... ) 183 | >>> update_dict(a, b) 184 | >>> a 185 | {'myDict': {1: 'a', 2: 'b', 3: 'c'}, 'myBool': False, 'myString': 'hi'} 186 | """ 187 | if copy: 188 | b = deepcopy(b) 189 | for k in b.keys(): 190 | if isinstance(b[k], Mapping) and k in a and isinstance(a[k], MutableMapping): 191 | # update existing key 192 | update_dict(a[k], b[k]) 193 | else: 194 | # assign new key 195 | a[k] = b[k] 196 | return a 197 | 198 | 199 | class EncodedDict(MutableMapping): 200 | """ 201 | This is an abstract class for any dict-like classes 202 | that support holding data that is encoded and 203 | decoded on get and set. This means that data 204 | is changed at access time, so the EncodedDict provides 205 | a standardized way to get at the raw data, if needed. 206 | 207 | Subclasses must store the core data in `_data`, 208 | or override the `data` property to return 209 | the raw data, which must not be a copy 210 | """ 211 | def __init__(self, *args, **kwargs): 212 | self._data = dict(*args, **kwargs) 213 | 214 | def __getitem__(self, key): 215 | return self.encode(self._data.__getitem__(key)) 216 | 217 | def __setitem__(self, key, value): 218 | return self._data.__setitem__(key, value) 219 | 220 | def __delitem__(self, key): 221 | return self._data.__delitem__(key) 222 | 223 | def __contains__(self, key): 224 | return self._data.__contains__(key) 225 | 226 | def __iter__(self): 227 | return self._data.__iter__() 228 | 229 | def __len__(self): 230 | return self._data.__len__() 231 | 232 | def encode(self, value): 233 | """ 234 | Encode and return the given value. 235 | Override this method to implement custom encoding 236 | """ 237 | return value 238 | 239 | def __repr__(self): 240 | """ 241 | Return the repr of the encoded dictionary 242 | """ 243 | return dict(self).__repr__() 244 | 245 | @property 246 | def data(self): 247 | return self._data 248 | 249 | 250 | class DeepDict(EncodedDict): 251 | """ 252 | Basic dictionary that allows you to get child items 253 | at any depth using a dot syntax, eg. 'my.deep.key'. 254 | 255 | All keys must be non-empty strings that do not contain '.' 256 | 257 | When setting values, the child-most dictionary must 258 | already exist 259 | 260 | >>> d = DeepDict({1:2}) 261 | >>> d = DeepDict( 262 | ... a=4, 263 | ... b=5, 264 | ... c=dict( 265 | ... d=dict( 266 | ... e=10 267 | ... ) 268 | ... ) 269 | ... ) 270 | >>> d['c.d'] 271 | {'e': 10} 272 | >>> d['c.d.e'] 273 | 10 274 | >>> d['c.d.e'] = 3 275 | >>> d 276 | {'a': 4, 'c': {'d': {'e': 3}}, 'b': 5} 277 | >>> d.has_key('c.d.e') 278 | True 279 | 280 | >>> # overwrite existing integer value with another nested layer 281 | >>> d['c.d.e.f'] = 7 282 | >>> d 283 | {'a': 4, 'c': {'d': {'e': 3}}, 'b': 5} 284 | 285 | >>> # add new nested item directly 286 | >>> d['1.2.3'] = 5 287 | >>> d 288 | {'a': 4, '1': {'2': {'3': 5}}, 'c': {'d': {'e': 3}}, 'b': 5} 289 | 290 | >>> d.keys() 291 | ['a', '1', 'c', 'b'] 292 | >>> d.deep_keys() 293 | ['a', '1.2.3', 'c.d.e', 'b'] 294 | 295 | >>> del d['c.d'] 296 | >>> d 297 | {'a': 4, '1': {'2': {'3': 5}}, 'c': {}, 'b': 5} 298 | 299 | >>> # access raw data using deep key 300 | >>> d.get_raw('1.2.3') 301 | 5 302 | """ 303 | def __getitem__(self, k): 304 | return self.encode(get_deep_item(self.data, k)) 305 | 306 | def __contains__(self, key): 307 | return has_deep_key(self._data, key) 308 | 309 | def __setitem__(self, k, v): 310 | set_deep_item(self.data, k, v) 311 | 312 | def __delitem__(self, k): 313 | del_deep_item(self.data, k) 314 | 315 | def has_key(self, key): 316 | return self.__contains__(key) 317 | 318 | def deep_keys(self): 319 | return get_deep_keys(self) 320 | 321 | def get_raw(self, key): 322 | return get_deep_item(self.data, key) 323 | 324 | 325 | def get_deep_keys(dict): 326 | keys = [] 327 | for k, v in dict.items(): 328 | if isinstance(v, Mapping): 329 | deepKeys = get_deep_keys(v) 330 | keys.extend(['{0}.{1}'.format(k, deepK) for deepK in deepKeys]) 331 | else: 332 | keys.append(k) 333 | return keys 334 | 335 | 336 | def has_deep_key(dict, key): 337 | keys = key.split('.', 1) 338 | if keys[0] in dict: 339 | if len(keys) == 1: 340 | return True 341 | v = dict[keys[0]] 342 | if isinstance(v, Mapping): 343 | return has_deep_key(v, keys[1]) 344 | return False 345 | 346 | 347 | def get_deep_item(d, k, sep='.'): 348 | """ 349 | Return the value for `k` from the dictionary `d`, 350 | by splitting the key and searching recursively 351 | """ 352 | if not isinstance(k, basestring): 353 | raise KeyError('expected string, got {0}: {1}'.format(type(k).__name__, k)) 354 | val = d 355 | # recursively look for dictionary values, then 356 | # return the last value 357 | for key in k.split(sep): 358 | if key and isinstance(val, Mapping) and key in val: 359 | val = val.__getitem__(key) 360 | else: 361 | raise KeyError(k) 362 | return val 363 | 364 | 365 | def set_deep_item(d, k, v, sep='.'): 366 | """ 367 | Recurse into the dictionary `d` by splitting 368 | key `k` by `sep` and setting dictionary values appropriately. 369 | Will create or override intermediate key values if they 370 | are not dictionaries 371 | """ 372 | if not isinstance(k, basestring): 373 | raise KeyError('expected string, got {0}: {1}'.format(type(k).__name__, k)) 374 | # split and validate key 375 | keys = k.split(sep) 376 | for key in keys: 377 | if not key: 378 | raise KeyError(k) 379 | # loop through and get/create dictionary 380 | # items for all but the last key 381 | val = d 382 | for key in keys[:-1]: 383 | if key not in val: 384 | # create new dictionary item for key 385 | val[key] = {} 386 | val = dict.__getitem__(val, key) 387 | # force into being a dictionary 388 | if not isinstance(val, MutableMapping): 389 | val = {} 390 | val.__setitem__(keys[-1], v) 391 | 392 | 393 | def del_deep_item(d, k, sep='.'): 394 | """ 395 | Recurse into the dictionary `d` by splitting 396 | key `k` by `sep` and deleting the value at the last key 397 | """ 398 | if not isinstance(k, basestring): 399 | raise KeyError('expected string, got {0}: {1}'.format(type(k).__name__, k)) 400 | keys = k.split(sep) 401 | val = d 402 | for key in keys[:-1]: 403 | if isinstance(val, MutableMapping) and key in val: 404 | val = dict.__getitem__(val, key) 405 | else: 406 | raise KeyError(k) 407 | val.__delitem__(keys[-1]) 408 | 409 | 410 | class Config(DeepDict): 411 | """ 412 | Main configuration dictionary for the shotgunCache 413 | """ 414 | _history = None 415 | 416 | @classmethod 417 | def loadFromYaml(cls, yamlPath): 418 | result = yaml.load(open(yamlPath, 'r').read()) 419 | return cls(result) 420 | 421 | def createShotgunConnection(self, raw=True, **kwargs): 422 | cls = ShotgunAPIWrapper if raw else sg.Shotgun 423 | kw = self['shotgun'].copy() 424 | kw.update(kwargs) 425 | sgConn = cls( 426 | **kw 427 | ) 428 | return sgConn 429 | 430 | def createRethinkConnection(self, **kwargs): 431 | kw = self['rethink'].copy() 432 | kw.update(kwargs) 433 | conn = rethinkdb.connect(**kw) 434 | return conn 435 | 436 | @property 437 | def history(self): 438 | if self._history is None: 439 | self._history = History(self.historyPath) 440 | return self._history 441 | 442 | @history.setter 443 | def history(self, value): 444 | self._history = value 445 | 446 | @property 447 | def historyPath(self): 448 | import main 449 | configPath = os.environ.get(main.CONFIG_PATH_ENV_KEY) 450 | historyPath = os.path.join(configPath, self['history_filename']) 451 | return historyPath 452 | 453 | @property 454 | def entityConfigFolderPath(self): 455 | import main 456 | configPath = os.environ.get(main.CONFIG_PATH_ENV_KEY) 457 | historyPath = os.path.join(configPath, self['entity_config_foldername']) 458 | return historyPath 459 | 460 | 461 | class History(DeepDict): 462 | """ 463 | Used to track the history state of the cache 464 | Loads and saves to a yaml file 465 | """ 466 | def __init__(self, historyFilePath): 467 | path = os.path.expanduser(historyFilePath) 468 | path = os.path.abspath(path) 469 | self.historyFilePath = path 470 | super(History, self).__init__({}) 471 | self.load() 472 | 473 | def load(self): 474 | if os.path.exists(self.historyFilePath): 475 | result = yaml.load(open(self.historyFilePath, 'r').read()) 476 | else: 477 | LOG.info("No existing history file at {0}".format(self.historyFilePath)) 478 | result = {} 479 | self._data = result 480 | return result 481 | 482 | def save(self): 483 | with open(self.historyFilePath, 'w') as f: 484 | yaml.dump(dict(self), f, default_flow_style=False, indent=4) 485 | -------------------------------------------------------------------------------- /shotgunCache/entityImporter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import zmq 4 | import math 5 | import traceback 6 | import datetime 7 | import time 8 | import multiprocessing 9 | 10 | import rethinkdb 11 | 12 | import utils 13 | 14 | __all__ = [ 15 | 'ImportManager', 16 | 'ImportWorker', 17 | ] 18 | 19 | LOG = logging.getLogger(__name__) 20 | 21 | 22 | class ImportManager(object): 23 | def __init__(self, controller, config): 24 | super(ImportManager, self).__init__() 25 | self.controller = controller 26 | self.config = config 27 | 28 | self.workID = 0 29 | self.activeWorkItemsPerType = {} 30 | self.countsPerType = {} 31 | self.idsPerType = {} 32 | self.workPullSocket = None 33 | self.workPostSocket = None 34 | self.totalEntitiesImported = 0 35 | 36 | def importEntities(self, entityConfigs): 37 | """ 38 | Batch import entities from shotgun into the local shotgun cache 39 | Uses multiple processes to speed up retrieval 40 | """ 41 | LOG.debug("Importing {0} entity types".format(len(entityConfigs))) 42 | importStartTime = time.time() 43 | 44 | # Reset 45 | self.workID = 0 46 | self.activeWorkItemsPerType = {} 47 | self.countsPerType = {} 48 | self.idsPerType = {} 49 | self.importFailed = False 50 | self.totalEntitiesImported = 0 51 | self.importTimestampsPerType = {} 52 | 53 | self.createPostSocket() 54 | self.createPullSocket() 55 | 56 | processes = self.launchImportProcesses(entityConfigs) 57 | 58 | self.post_countWork(entityConfigs, self.workPostSocket) 59 | 60 | while True: 61 | work = self.workPullSocket.recv_pyobj() 62 | 63 | if not isinstance(work, dict): 64 | raise TypeError("Invalid work item, expected dict: {0}".format(work)) 65 | 66 | # Update the count of active work items 67 | configType = work['work']['configType'] 68 | activeWorkItems = self.activeWorkItemsPerType[configType] 69 | workID = work['work']['id'] 70 | activeWorkItems.remove(workID) 71 | 72 | meth_name = 'handle_{0}'.format(work['type']) 73 | if hasattr(self, meth_name): 74 | getattr(self, meth_name)(work) 75 | else: 76 | raise ValueError("Unhandled work type: {0}".format(work['type'])) 77 | 78 | if not len(self.activeWorkItemsPerType): 79 | break 80 | 81 | for proc in processes: 82 | LOG.debug("Terminating import worker process: {0}".format(proc.pid)) 83 | proc.terminate() 84 | 85 | timeToImport = (time.time() - importStartTime) * 1000 # ms 86 | self.totalEntitiesImported = sum([c['importCount'] for c in self.countsPerType.values()]) 87 | self.post_stat(timeToImport, entityConfigs) 88 | 89 | if self.importFailed: 90 | raise IOError("Import Process failed for one ore more entities, check log for details") 91 | 92 | LOG.debug("Imported {0} entities".format(self.totalEntitiesImported)) 93 | 94 | def createPostSocket(self): 95 | workPostContext = zmq.Context() 96 | self.workPostSocket = workPostContext.socket(zmq.PUSH) 97 | self.workPostSocket.bind(self.config['import.zmq_pull_url']) 98 | 99 | def createPullSocket(self): 100 | workPullSocket = zmq.Context() 101 | self.workPullSocket = workPullSocket.socket(zmq.PULL) 102 | self.workPullSocket.bind(self.config['import.zmq_post_url']) 103 | 104 | def launchImportProcesses(self, entityConfigs): 105 | """ 106 | Use multiprocessing to start a pool of entity import processes 107 | Each of these use zmq as a message queue for work items which retrieve 108 | information from shotgun. 109 | """ 110 | # Tried using multiprocessing.Pool 111 | # but had better luck with Processes directly 112 | # due to using the importer class and instance methods 113 | processes = [] 114 | numProcesses = self.config['import.processes'] 115 | for n in range(numProcesses): 116 | importer = ImportWorker( 117 | config=self.config, 118 | entityConfigs=entityConfigs, 119 | ) 120 | proc = multiprocessing.Process(target=importer.start) 121 | proc.start() 122 | processes.append(proc) 123 | LOG.debug("Launched process {0}/{1}: {2}".format(n + 1, numProcesses, proc.pid)) 124 | 125 | # Give time for all the workers to connect 126 | time.sleep(1) 127 | return processes 128 | 129 | def handle_exception(self, work): 130 | entityType = work['work']['configType'] 131 | LOG.error("Import Failed for type '{type}'.\n{tb}".format( 132 | type=entityType, 133 | tb=work['data']['traceback'] 134 | )) 135 | self.importFailed = True 136 | 137 | def handle_counts(self, work): 138 | counts = work['data'] 139 | entityType = work['work']['configType'] 140 | self.countsPerType[entityType] = counts 141 | 142 | for page in range(counts['pageCount']): 143 | getEntitiesWork = { 144 | 'type': 'getEntities', 145 | 'id': self.workID, 146 | 'page': page + 1, 147 | 'configType': entityType 148 | } 149 | self.workPostSocket.send_pyobj(getEntitiesWork) 150 | self.activeWorkItemsPerType[entityType].append(self.workID) 151 | self.workID += 1 152 | 153 | def handle_entitiesImported(self, work): 154 | entities = work['data']['entities'] 155 | entityType = work['work']['configType'] 156 | pageCount = self.countsPerType[entityType]['pageCount'] 157 | 158 | self.countsPerType[entityType].setdefault('importCount', 0) 159 | self.countsPerType[entityType]['importCount'] += len(entities) 160 | self.idsPerType.setdefault(entityType, []).extend([e['id'] for e in entities]) 161 | LOG.info("Imported {currCount}/{totalCount} entities for type '{typ}' on page {page}/{pageCount}".format( 162 | currCount=self.countsPerType[entityType]['importCount'], 163 | totalCount=self.countsPerType[entityType]['entityCount'], 164 | typ=entityType, 165 | page=work['work']['page'], 166 | pageCount=pageCount, 167 | )) 168 | 169 | entityConfig = self.controller.entityConfigManager.getConfigForType(entityType) 170 | self.controller.post_entities(entityConfig, entities) 171 | 172 | # Store the timestamp for the import 173 | # We'll use this to discard old EventLogEntities that happened before the import 174 | # However, eventlogentry's that are created while importing will still be applied 175 | timestamps = self.importTimestampsPerType.setdefault(entityType, {}) 176 | timestamps.setdefault('startImportTimestamp', work['data']['startImportTimestamp']) 177 | 178 | if not len(self.activeWorkItemsPerType[entityType]): 179 | LOG.info("Imported all entities for type '{0}'".format(entityType)) 180 | 181 | # Get a list of 182 | cachedEntityIDs = set(rethinkdb 183 | .table(entityConfig['table']) 184 | .map(lambda asset: asset['id']) 185 | .coerce_to('array') 186 | .run(self.controller.rethink) 187 | ) 188 | importedEntityIDs = set(self.idsPerType[entityType]) 189 | diffIDs = cachedEntityIDs.difference(importedEntityIDs) 190 | 191 | if len(diffIDs): 192 | # Delete these extra entities 193 | # This allows us to update the cache in place without 194 | # having the drop the table before the import, allowing for 195 | # a more seamless import / update process 196 | LOG.info("Deleting extra entities found in cache with IDs: {0}".format(diffIDs)) 197 | rethinkdb.db('shotguncache').table(entityConfig['table']).get_all(rethinkdb.args(diffIDs)).delete().run(self.controller.rethink) 198 | 199 | self.config.history.setdefault('config_hashes', {})[entityType] = entityConfig.hash 200 | self.config.history.setdefault('cached_entity_types', {})[entityType] = self.importTimestampsPerType[entityType] 201 | self.config.history.save() 202 | 203 | self.activeWorkItemsPerType.pop(entityType) 204 | 205 | def post_countWork(self, entityConfigs, workSocket): 206 | """ 207 | Send work items to the import processes to load information 208 | about the counts of the entities 209 | """ 210 | for config in entityConfigs: 211 | work = {'type': 'getCount', 'id': self.workID, 'configType': config.type} 212 | self.activeWorkItemsPerType.setdefault(config.type, []).append(self.workID) 213 | workSocket.send_pyobj(work) 214 | self.workID += 1 215 | 216 | self.post_entityConfig(config) 217 | 218 | def post_entityConfig(self, entityConfig): 219 | LOG.debug("Posting entity config") 220 | 221 | schemaTable = self.config['rethink_schema_table'] 222 | 223 | if schemaTable not in rethinkdb.table_list().run(self.controller.rethink): 224 | LOG.debug("Creating table for schema: {0}".format(entityConfig.type)) 225 | rethinkdb.table_create(schemaTable, primary_key='type').run(self.controller.rethink) 226 | 227 | entitySchema = self.controller.entityConfigManager.schema[entityConfig.type] 228 | cacheSchema = dict([(field, s) for field, s in entitySchema.items() if field in entityConfig['fields']]) 229 | 230 | if LOG.getEffectiveLevel() < 10: 231 | LOG.debug("Cache Schema:\n{0}".format(utils.prettyJson(cacheSchema))) 232 | 233 | config = {} 234 | config['type'] = entityConfig.type 235 | config['schema'] = cacheSchema 236 | config['created_at'] = datetime.datetime.utcnow().isoformat() 237 | 238 | result = rethinkdb.table(schemaTable).insert(config, conflict="replace").run(self.controller.rethink) 239 | if result['errors']: 240 | raise IOError(result['first_error']) 241 | 242 | def post_stat(self, totalImportTime, entityConfigs): 243 | """ 244 | Post related stats about the import process to the db to provide analytics. 245 | These are posted based on the overall importEntities process, not individual imports. 246 | """ 247 | stat = { 248 | 'type': 'import_entities', 249 | 'types_imported_count': len(entityConfigs), 250 | 'entity_types': [c.type for c in entityConfigs], 251 | 'total_entities_imported': self.totalEntitiesImported, 252 | 'duration': round(totalImportTime, 3), 253 | 'created_at': datetime.datetime.utcnow().isoformat(), 254 | 'processes': self.config['import.processes'], 255 | 'batch_size': self.config['import.batch_size'], 256 | 'import_failed': self.importFailed, 257 | # Summarize and page counts shotgun calls 258 | 'total_shotgun_calls': sum([c['pageCount'] for c in self.countsPerType.values()]) + len(entityConfigs), 259 | } 260 | self.controller.post_stat(stat) 261 | 262 | 263 | class ImportWorker(object): 264 | def __init__(self, config, entityConfigs): 265 | super(ImportWorker, self).__init__() 266 | self.config = config 267 | self.entityConfigs = dict([(c.type, c) for c in entityConfigs]) 268 | 269 | self.sg = None 270 | self.incomingContext = None 271 | self.workPullContext = None 272 | self.workPostContext = None 273 | self.workPostSocket = None 274 | 275 | def start(self): 276 | self.sg = self.config.createShotgunConnection() 277 | self.createPullSocket() 278 | self.createPostSocket() 279 | self.run() 280 | 281 | def createPostSocket(self): 282 | self.workPostContext = zmq.Context() 283 | self.workPostSocket = self.workPostContext.socket(zmq.PUSH) 284 | self.workPostSocket.connect(self.config['import.zmq_post_url']) 285 | 286 | def createPullSocket(self): 287 | self.workPullContext = zmq.Context() 288 | self.workPullContext = self.workPullContext.socket(zmq.PULL) 289 | self.workPullContext.connect(self.config['import.zmq_pull_url']) 290 | 291 | def run(self): 292 | LOG.debug("Running Entity Import Loop") 293 | 294 | while True: 295 | work = self.workPullContext.recv_pyobj() 296 | 297 | if not isinstance(work, dict): 298 | raise TypeError("Invalid work item, expected dict: {0}".format(work)) 299 | 300 | meth_name = 'handle_{0}'.format(work['type']) 301 | if hasattr(self, meth_name): 302 | try: 303 | getattr(self, meth_name)(work) 304 | except Exception, e: 305 | result = { 306 | 'type': 'exception', 307 | 'data': { 308 | 'exc': e, 309 | 'traceback': traceback.format_exc() 310 | }, 311 | 'work': work 312 | } 313 | self.workPostSocket.send_pyobj(result) 314 | else: 315 | raise ValueError("Unhandled work type: {0}".format(work['type'])) 316 | 317 | def handle_getCount(self, work): 318 | LOG.debug("Getting counts for type '{0}' on process {1}".format(work['configType'], os.getpid())) 319 | entityConfig = self.entityConfigs[work['configType']] 320 | entityCount = self.getEntityCount(entityConfig) 321 | pageCount = int(math.ceil(entityCount / float(self.config['import.batch_size']))) 322 | result = { 323 | 'type': 'counts', 324 | 'data': { 325 | 'entityCount': entityCount, 326 | 'pageCount': pageCount, 327 | }, 328 | 'work': work, 329 | } 330 | self.workPostSocket.send_pyobj(result) 331 | 332 | def handle_getEntities(self, work): 333 | LOG.debug("Importing Entities for type '{0}' on page {1} on process {2}".format(work['configType'], work['page'], os.getpid())) 334 | startImportTimestamp = datetime.datetime.utcnow().isoformat() 335 | entityConfig = self.entityConfigs[work['configType']] 336 | entities = self.getEntities(entityConfig, work['page']) 337 | result = { 338 | 'type': 'entitiesImported', 339 | 'data': { 340 | 'entities': entities, 341 | 'startImportTimestamp': startImportTimestamp, 342 | }, 343 | 'work': work, 344 | } 345 | self.workPostSocket.send_pyobj(result) 346 | 347 | def getEntities(self, entityConfig, page): 348 | try: 349 | kwargs = dict( 350 | entity_type=entityConfig.type, 351 | fields=entityConfig.get('fields', {}).keys(), 352 | filters=[], 353 | order=[{'column': 'id', 'direction': 'asc'}], 354 | limit=self.config['import.batch_size'], 355 | page=page 356 | ) 357 | result = self.sg.find(**kwargs) 358 | except Exception: 359 | LOG.exception("Type: {entity_type}, filters: {filters}, fields: {fields} filterOperator: {filter_operator}".format(**kwargs)) 360 | raise 361 | 362 | return result 363 | 364 | def getEntityCount(self, entityConfig): 365 | result = self.sg.summarize( 366 | entity_type=entityConfig.type, 367 | filters=[], 368 | summary_fields=[{'field': 'id', 'type': 'count'}], 369 | ) 370 | return result['summaries']['id'] 371 | -------------------------------------------------------------------------------- /shotgunCache/controller.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import datetime 3 | import multiprocessing 4 | import fnmatch 5 | 6 | import zmq 7 | import rethinkdb 8 | 9 | import entityConfig 10 | import entityImporter 11 | import monitor 12 | import utils 13 | 14 | __all__ = [ 15 | 'DatabaseController', 16 | ] 17 | 18 | LOG = logging.getLogger(__name__) 19 | 20 | 21 | class DatabaseController(object): 22 | """ 23 | Main Database controller 24 | Launches all other processes and coordinates communication 25 | """ 26 | def __init__(self, config): 27 | super(DatabaseController, self).__init__() 28 | self.config = config 29 | 30 | self.rethink = self.config.createRethinkConnection() 31 | self.sg = self.config.createShotgunConnection() 32 | self.firstEventPostSinceImport = False 33 | 34 | self.entityConfigManager = entityConfig.EntityConfigManager(config=self.config) 35 | self._init_monitor() 36 | self._init_entityImportManager() 37 | 38 | def _init_monitor(self): 39 | self.monitorContext = zmq.Context() 40 | self.monitorSocket = self.monitorContext.socket(zmq.PULL) 41 | 42 | self.monitor = monitor.ShotgunEventMonitor(config=self.config) 43 | self.monitorProcess = multiprocessing.Process(target=self.monitor.start) 44 | self.monitorProcess.daemon = True 45 | 46 | def _init_entityImportManager(self): 47 | self.entityImportManager = entityImporter.ImportManager( 48 | config=self.config, 49 | controller=self, 50 | ) 51 | 52 | def start(self): 53 | LOG.info("Starting Up Cache") 54 | self.entityConfigManager.load() 55 | if not len(self.entityConfigManager.configs): 56 | raise IOError("No entity configs found to cache in {0}".format(self.config['entity_config_folder'])) 57 | 58 | self.monitor.setEntityTypes(self.entityConfigManager.getEntityTypes()) 59 | self.monitorSocket.bind(self.config['zmq_controller_work_url']) 60 | self.monitorProcess.start() 61 | 62 | # Create the database 63 | dbName = self.rethink.db 64 | if self.rethink.db not in rethinkdb.db_list().run(self.rethink): 65 | LOG.info("Creating rethink database: {0}".format(dbName)) 66 | rethinkdb.db_create(dbName).run(self.rethink) 67 | 68 | self.run() 69 | 70 | def run(self): 71 | LOG.info("Starting Main Event Loop") 72 | while True: 73 | work = self.monitorSocket.recv_pyobj() 74 | 75 | if not isinstance(work, dict): 76 | raise TypeError("Invalid work item, expected dict: {0}".format(work)) 77 | 78 | if LOG.getEffectiveLevel() < 10: 79 | # Only if we really want it 80 | LOG.debug("Work: \n" + utils.prettyJson(work)) 81 | 82 | meth_name = 'handle_{0}'.format(work['type']) 83 | if hasattr(self, meth_name): 84 | getattr(self, meth_name)(work) 85 | else: 86 | raise ValueError("Unhandled work type: {0}".format(work['type'])) 87 | 88 | def handle_monitorStarted(self, work): 89 | # We wait until the monitor signals that it's running 90 | # this way we don't miss any events that occur while importing 91 | self.importEntities() 92 | 93 | def handle_stat(self, work): 94 | self.post_stat(work['data']) 95 | 96 | def handle_eventLogEntries(self, work): 97 | eventLogEntries = work['data']['entities'] 98 | LOG.info("Posting {0} Events".format(len(eventLogEntries))) 99 | self.post_eventLogEntryChanges(eventLogEntries) 100 | 101 | def handle_reloadChangedConfigs(self, work): 102 | LOG.info("Reloading changed configs") 103 | self.config.history.load() 104 | self.importEntities() 105 | 106 | def handle_latestEventLogEntry(self, work): 107 | LOG.debug("Setting Latest Event Log Entry: {0}".format(work['data']['entity'])) 108 | self.config.history['latest_event_log_entry'] = work['data']['entity'] 109 | self.config.history.save() 110 | 111 | def importEntities(self): 112 | configs = self.entityConfigManager.allConfigs() 113 | 114 | def checkConfigNeedsUpdate(cacheConfig): 115 | if cacheConfig.hash is None: 116 | return True 117 | elif cacheConfig.hash != self.config.history.get('config_hashes', {}).get(cacheConfig.type, None): 118 | return True 119 | return False 120 | 121 | configsToLoad = [] 122 | configsToLoad.extend(filter(checkConfigNeedsUpdate, configs)) 123 | configsToLoad.extend(filter(lambda c: c.type not in self.config.history.get('cached_entity_types', {}), configs)) 124 | 125 | # Deduplicate 126 | configsToLoad = dict([(c.type, c) for c in configsToLoad]).values() 127 | 128 | self.deleteUntrackedFromCache(configs) 129 | 130 | if not len(configsToLoad): 131 | LOG.info("All entity types have been imported.") 132 | return 133 | 134 | LOG.info("Importing {0} entity types into the cache".format(len(configsToLoad))) 135 | self.entityImportManager.importEntities(configsToLoad) 136 | LOG.info("Import complete!") 137 | self.firstEventPostSinceImport = True 138 | 139 | def deleteUntrackedFromCache(self, configs): 140 | """ 141 | Delete data from cache for entities that are no longer cached 142 | """ 143 | if not self.config['delete_cache_for_untracked_entities']: 144 | return 145 | 146 | # Get the list of cached entity types 147 | tableTemplate = self.config['rethink_entity_table_template'] 148 | existingTables = rethinkdb.table_list().run(self.rethink) 149 | 150 | existingCacheTables = [] 151 | tablePattern = tableTemplate.format(type="*") 152 | for table in existingTables: 153 | if fnmatch.fnmatch(table, tablePattern): 154 | existingCacheTables.append(table) 155 | 156 | usedCacheTables = [c['table'] for c in configs] 157 | unusedCacheTables = [t for t in existingCacheTables if t not in usedCacheTables] 158 | LOG.debug("Unusesd cache tables: {0}".format(unusedCacheTables)) 159 | 160 | LOG.info("Deleting {0} cache tables".format(len(unusedCacheTables))) 161 | for table in unusedCacheTables: 162 | LOG.info("Deleting table: {0}".format(table)) 163 | rethinkdb.table_drop(table).run(self.rethink) 164 | 165 | def post_entities(self, entityConfig, entities): 166 | LOG.debug("Posting entities") 167 | 168 | tableName = entityConfig['table'] 169 | 170 | if tableName not in rethinkdb.table_list().run(self.rethink): 171 | LOG.debug("Creating table for type: {0}".format(entityConfig.type)) 172 | rethinkdb.table_create(tableName).run(self.rethink) 173 | 174 | for entity in entities: 175 | # Get rid of extra data found in sub-entities 176 | # We don't have a way to reliably keep these up to date except 177 | # for the type and id 178 | entitySchema = self.entityConfigManager.schema[entityConfig.type] 179 | for field, val in entity.items(): 180 | if field not in entitySchema: 181 | continue 182 | if field in ['type', 'id']: 183 | continue 184 | fieldDataType = entitySchema[field].get('data_type', {}).get('value', None) 185 | if fieldDataType == 'multi_entity': 186 | val = [utils.getBaseEntity(e) for e in val] 187 | entity[field] = val 188 | elif fieldDataType == 'entity': 189 | val = utils.getBaseEntity(val) 190 | entity[field] = val 191 | 192 | # TODO 193 | # Since we aren't deleting the existing table 194 | # Need to delete any extra entities at the end 195 | result = rethinkdb.table(tableName).insert(entities, conflict="replace").run(self.rethink) 196 | if result['errors']: 197 | raise IOError(result['first_error']) 198 | 199 | if result['errors']: 200 | # TODO 201 | # Better error descriptions? 202 | raise IOError("Errors occurred creating entities: {0}".format(result)) 203 | 204 | def post_eventLogEntryChanges(self, eventLogEntries): 205 | if LOG.getEffectiveLevel() < 10: 206 | LOG.debug("--- Event Log Entries ---\n{0}\n".format(utils.prettyJson(eventLogEntries))) 207 | 208 | eventLogEntries = self.filterEventsBeforeImport(eventLogEntries) 209 | 210 | if not len(eventLogEntries): 211 | return 212 | 213 | for entry in eventLogEntries: 214 | meta = entry['meta'] 215 | entityType, changeType = entry['event_type'].split('_', 3)[1:] 216 | 217 | entityType = meta['entity_type'] 218 | # entityID = meta['entity_id'] 219 | 220 | if entityType not in self.entityConfigManager: 221 | # This also excludes special entities such as Version_sg_linked_versions_Connection 222 | LOG.debug("Ignoring uncached entity type: {0}".format(entityType)) 223 | continue 224 | 225 | entityConfig = self.entityConfigManager.getConfigForType(entityType) 226 | 227 | meth_name = 'post_eventlog_{0}'.format(changeType) 228 | if hasattr(self, meth_name): 229 | getattr(self, meth_name)(entry, entityConfig) 230 | else: 231 | raise ValueError("Unhandled change type: {0}".format(changeType)) 232 | 233 | submitFinishTime = datetime.datetime.utcnow() 234 | 235 | # if LOG.getEffectiveLevel() < 10: 236 | # LOG.debug("--- Responses ---\n{0}\n".format(utils.prettyJson(responses))) 237 | 238 | # # It's possible to run into a few errors here when shotgun events occurred while importing 239 | # # Someone could modify and then delete an entity before the import is finished 240 | # # while importing, we capture this change 241 | # # then after importing, we apply the modification from the events that occured while importing 242 | # # however, we can't apply the modification because the entity no longer exists 243 | # # So on the first event post after importing we ignore DocumentMissingException's 244 | # if responses['errors']: 245 | # ignoreError = False 246 | # for request, response in zip(requests, responses['items']): 247 | # for responseEntry in response.values(): 248 | # if responseEntry.get('error', None): 249 | # errStr = responseEntry['error'] 250 | # if errStr.startswith('DocumentMissingException') and self.firstEventPostSinceImport: 251 | # LOG.warning("Got DocumentMissingException error, but ignoring because it was during import") 252 | # ignoreError = True 253 | # else: 254 | # LOG.error(errStr) 255 | # LOG.debug("Request:\n{0}".format(utils.prettyJson(request))) 256 | # LOG.debug("Response:\n{0}".format(utils.prettyJson(response))) 257 | # if not ignoreError: 258 | # raise IOError("Errors occurred creating entities") 259 | 260 | if self.firstEventPostSinceImport: 261 | self.firstEventPostSinceImport = False 262 | 263 | if self.config['enable_stats']: 264 | # Post the min/max/avg delay in milliseconds 265 | delays = [] 266 | for entry in eventLogEntries: 267 | createdTime = entry['created_at'] 268 | createdTime = utils.convertStrToDatetime(createdTime) 269 | diff = submitFinishTime - createdTime 270 | diff_ms = diff.microseconds / float(1000) 271 | delays.append(diff_ms) 272 | 273 | avg = reduce(lambda x, y: x + y, delays) / len(delays) 274 | stat = { 275 | 'type': 'post_to_cache', 276 | 'min_shotgun_to_cache_delay': min(delays), 277 | 'max_shotgun_to_cache_delay': max(delays), 278 | 'avg_shotgun_to_cache_delay': avg, 279 | 'created_at': submitFinishTime.isoformat(), 280 | } 281 | self.post_stat(stat) 282 | 283 | def post_stat(self, statDict): 284 | if not self.config['enable_stats']: 285 | return 286 | 287 | LOG.debug("Posting stat: {0}".format(statDict['type'])) 288 | statTable = self.config['rethink_stat_table_template'].format(type=statDict['type']) 289 | if statTable not in rethinkdb.table_list().run(self.rethink): 290 | rethinkdb.table_create(statTable).run(self.rethink) 291 | 292 | rethinkdb.table(statTable).insert(statDict).run(self.rethink) 293 | 294 | def filterEventsBeforeImport(self, eventLogEntries): 295 | result = [] 296 | for entry in eventLogEntries: 297 | entityType, changeType = entry['event_type'].split('_', 3)[1:] 298 | importTimestamps = self.config.history['cached_entity_types'][entityType] 299 | importTimestamp = utils.convertStrToDatetime(importTimestamps['startImportTimestamp']) 300 | 301 | # Ignore old event log entries 302 | entryTimestamp = utils.convertStrToDatetime(entry['created_at']) 303 | if entryTimestamp < importTimestamp: 304 | LOG.debug("Ignoring EventLogEntry occurring before import of '{0}': {1}".format(entityType, entry['id'])) 305 | if LOG.getEffectiveLevel() < 10: 306 | LOG.debug("Old Entry: \n" + utils.prettyJson(entry)) 307 | continue 308 | 309 | result.append(entry) 310 | return result 311 | 312 | def post_eventlog_Change(self, entry, entityConfig): 313 | meta = entry['meta'] 314 | 315 | LOG.debug("Posting change to entity: {meta[entity_type]}:{meta[entity_id]}".format(meta=meta)) 316 | 317 | attrName = entry['attribute_name'] 318 | if attrName not in entityConfig['fields']: 319 | LOG.debug("Untracked field updated: {0}".format(attrName)) 320 | return 321 | 322 | table = rethinkdb.table(entityConfig['table']) 323 | entity = table.get(meta['entity_id']) 324 | 325 | if meta.get('field_data_type', '') in ['multi_entity', 'entity']: 326 | if 'added' in meta and meta['added']: 327 | val = [utils.getBaseEntity(e) for e in meta['added']] 328 | result = entity.update(lambda e: {attrName: e[attrName].splice_at(-1, val)}).run(self.rethink) 329 | if result['errors']: 330 | raise IOError(result['first_error']) 331 | elif 'removed' in meta and meta['removed']: 332 | val = [utils.getBaseEntity(e) for e in meta['removed']] 333 | result = entity.update(lambda e: {attrName: e[attrName].difference(val)}).run(self.rethink) 334 | if result['errors']: 335 | raise IOError(result['first_error']) 336 | elif 'new_value' in meta: 337 | val = meta['new_value'] 338 | if val is not None and isinstance(val, dict): 339 | val = utils.getBaseEntity(val) 340 | result = entity.update({attrName: val}).run(self.rethink) 341 | if result['errors']: 342 | raise IOError(result['first_error']) 343 | 344 | else: 345 | if meta['entity_type'] not in self.entityConfigManager.configs: 346 | LOG.debug("Ignoring entry for non-cached entity type: {0}".format(meta['entity_type'])) 347 | return 348 | 349 | val = meta['new_value'] 350 | result = entity.update({attrName: val}).run(self.rethink) 351 | if result['errors']: 352 | raise IOError(result['first_error']) 353 | 354 | if 'updated_at' in entityConfig['fields']: 355 | result = entity.update({'updated_at': entry['created_at']}).run(self.rethink) 356 | if result['errors']: 357 | raise IOError(result['first_error']) 358 | 359 | if 'updated_by' in entityConfig['fields']: 360 | result = entity.update({'updated_by': utils.getBaseEntity(entry['user'])}).run(self.rethink) 361 | if result['errors']: 362 | raise IOError(result['first_error']) 363 | 364 | def post_eventlog_New(self, entry, entityConfig): 365 | meta = entry['meta'] 366 | entityType, changeType = entry['event_type'].split('_', 3)[1:] 367 | 368 | LOG.debug("Posting new entity: {meta[entity_type]}:{meta[entity_id]}".format(meta=meta)) 369 | 370 | body = {'type': meta['entity_type'], 'id': meta['entity_id']} 371 | 372 | # Load the default values for each field 373 | for field in entityConfig['fields']: 374 | fieldSchema = self.entityConfigManager.schema[entityType][field] 375 | 376 | if 'data_type' not in fieldSchema: 377 | # No data_type for visible field 378 | fieldDefault = None 379 | else: 380 | fieldType = fieldSchema['data_type']['value'] 381 | fieldDefault = self.config['shotgun_field_type_defaults'].get(fieldType, None) 382 | 383 | body.setdefault(field, fieldDefault) 384 | 385 | # We don't get an update for the project field, so 386 | # we need to add that here from the event log entry 387 | body['project'] = entry['project'] 388 | 389 | if 'created_at' in entityConfig['fields']: 390 | body['created_at'] = entry['created_at'] 391 | if 'created_by' in entityConfig['fields']: 392 | body['created_by'] = utils.getBaseEntity(entry['user']) 393 | 394 | if 'updated_at' in entityConfig['fields']: 395 | body['updated_at'] = entry['created_at'] 396 | if 'updated_by' in entityConfig['fields']: 397 | body['updated_by'] = utils.getBaseEntity(entry['user']) 398 | 399 | result = rethinkdb.table(entityConfig['table']).insert(body).run(self.rethink) 400 | if result['errors']: 401 | raise IOError(result['first_error']) 402 | 403 | def post_eventlog_Retirement(self, entry, entityConfig): 404 | meta = entry['meta'] 405 | LOG.debug("Deleting entity: {meta[entity_type]}:{meta[entity_id]}".format(meta=meta)) 406 | 407 | result = rethinkdb.table(entityConfig['table']).get(meta['entity_id']).delete().run(self.rethink) 408 | if result['errors']: 409 | raise IOError(result['first_error']) 410 | 411 | def post_eventlog_Revival(self, entry, entityConfig): 412 | meta = entry['meta'] 413 | entityType, changeType = entry['event_type'].split('_', 3)[1:] 414 | 415 | LOG.debug("Reviving entity: {meta[entity_type]}:{meta[entity_id]}".format(meta=meta)) 416 | 417 | # This is one of the few times we have to go and retrieve the information from shotgun 418 | filters = [['id', 'is', entry['entity']['id']]] 419 | body = self.sg.find_one(entityType, filters, entityConfig['fields'].keys()) 420 | 421 | # Trim to base entities for nested entities 422 | for field in body: 423 | fieldSchema = self.entityConfigManager.schema[entityType][field] 424 | 425 | if fieldSchema['data_type']['value'] in ['multi_entity', 'entity']: 426 | val = body[field] 427 | if isinstance(val, (list, tuple)): 428 | val = [utils.getBaseEntity(e) for e in val] 429 | else: 430 | val = utils.getBaseEntity() 431 | body[field] = val 432 | 433 | if 'updated_at' in entityConfig['fields']: 434 | body['updated_at'] = entry['created_at'] 435 | if 'updated_by' in entityConfig['fields']: 436 | body['updated_by'] = utils.getBaseEntity(entry['user']) 437 | 438 | result = rethinkdb.table(entityConfig['table']).insert(body).run(self.rethink) 439 | if result['errors']: 440 | raise IOError(result['first_error']) 441 | -------------------------------------------------------------------------------- /shotgunCache/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import json 8 | import argparse 9 | import logging 10 | import fnmatch 11 | import rethinkdb 12 | 13 | LOG = logging.getLogger('shotgunCache') 14 | SCRIPT_DIR = os.path.dirname(__file__) 15 | DEFAULT_CONFIG_PATH = '~/shotguncache' 16 | CONFIG_PATH_ENV_KEY = 'SHOTGUN_CACHE_CONFIG' 17 | 18 | 19 | class Parser(object): 20 | bannerWidth = 84 21 | 22 | def __init__(self): 23 | self.configPath = DEFAULT_CONFIG_PATH 24 | 25 | self.parser = argparse.ArgumentParser() 26 | self.parser.add_argument( 27 | '-c', '--config', 28 | default=None, 29 | help='Config folder path. ' 30 | 'If not specified uses default({0}) or value of env var {1}'.format( 31 | DEFAULT_CONFIG_PATH, CONFIG_PATH_ENV_KEY 32 | ) 33 | ) 34 | self.parser.add_argument( 35 | '-v', '--verbosity', 36 | default=3, 37 | help='Verbosity level between 1 and 4. 1 - errors and warnings, 4 - everything' 38 | ) 39 | 40 | self.subparsers = self.parser.add_subparsers() 41 | self.setup_subparser_setup() 42 | self.setup_subparser_run() 43 | self.setup_subparser_createEntityConfigs() 44 | self.setup_subparser_validateCounts() 45 | self.setup_subparser_validateFields() 46 | self.setup_subparser_rebuild() 47 | self.setup_subparser_resetStats() 48 | 49 | def parse(self, args): 50 | if not len(args): 51 | args = ['-h'] 52 | 53 | parsed = self.parser.parse_args(args) 54 | 55 | # Verbosity 56 | verbosity = int(parsed.verbosity) 57 | if verbosity == 1: 58 | LOG.setLevel(logging.ERROR) 59 | elif verbosity == 2: 60 | LOG.setLevel(logging.WARNING) 61 | elif verbosity == 3: 62 | LOG.setLevel(logging.INFO) 63 | elif verbosity == 4: 64 | LOG.setLevel(logging.DEBUG) 65 | elif verbosity == 5: 66 | LOG.setLevel(1) 67 | 68 | # Config Path 69 | configPath = self.configPath 70 | if parsed.config is not None: 71 | configPath = parsed.config 72 | else: 73 | if CONFIG_PATH_ENV_KEY in os.environ: 74 | configPath = os.environ[CONFIG_PATH_ENV_KEY] 75 | LOG.debug("Using config path from env var {0}".format(CONFIG_PATH_ENV_KEY)) 76 | self.configPath = resolveConfigPath(configPath) 77 | os.environ[CONFIG_PATH_ENV_KEY] = self.configPath 78 | LOG.debug("Config Path: {0}".format(self.configPath)) 79 | 80 | return parsed.func(vars(parsed)) 81 | 82 | @property 83 | def configFilePath(self): 84 | return os.path.join(self.configPath, 'config.yaml') 85 | 86 | def setup_subparser_setup(self): 87 | parser = self.subparsers.add_parser( 88 | 'setup', 89 | help='Guided setup for configuring your cache server', 90 | ) 91 | parser.set_defaults(func=self.handle_setup) 92 | 93 | def setup_subparser_run(self): 94 | parser = self.subparsers.add_parser( 95 | 'run', 96 | help='Run the cache server', 97 | ) 98 | parser.set_defaults(func=self.handle_run) 99 | 100 | def setup_subparser_createEntityConfigs(self): 101 | parser = self.subparsers.add_parser( 102 | 'create-entity-configs', 103 | help='Generate entity config files from Shotgun schema', 104 | ) 105 | parser.add_argument( 106 | 'entityTypes', 107 | nargs="+", 108 | help='Entity Types to create configs for' 109 | ) 110 | parser.set_defaults(func=self.handle_createEntityConfigs) 111 | 112 | def setup_subparser_validateCounts(self): 113 | parser = self.subparsers.add_parser( 114 | 'validate-counts', 115 | help='Fast validation. Validate current entity counts between Shotgun and cache.' 116 | ) 117 | parser.add_argument( 118 | '-p', '--processes', 119 | default=8, 120 | help='Number of processes to use for retrieving data from the databases' 121 | ) 122 | parser.add_argument( 123 | '--missing', 124 | action="store_true", 125 | default=False, 126 | help='List the entity id\'s for missing entities' 127 | ) 128 | parser.add_argument( 129 | 'entityTypes', 130 | nargs='*', 131 | default=[], 132 | help='List of entity types to check. By default all are checked' 133 | ) 134 | parser.set_defaults(func=self.handle_validateCounts) 135 | 136 | def setup_subparser_validateFields(self): 137 | parser = self.subparsers.add_parser( 138 | 'validate-fields', 139 | help='Slow validation. Validates entities based on data from fields between Shotgun and cache.' 140 | 'By default this validates the entity type and id fields.' 141 | ) 142 | parser.add_argument( 143 | '-p', '--processes', 144 | default=8, 145 | help='Number of processes to use for retrieving data from the databases' 146 | ) 147 | parser.add_argument( 148 | '--filters', 149 | default=[], 150 | type=json.loads, 151 | help='JSON formatted list of filters. Ex: \'[["id","is",100]]\'', 152 | ) 153 | parser.add_argument( 154 | '--filterop', 155 | default='all', 156 | help='Filter operator' 157 | ) 158 | parser.add_argument( 159 | '--fields', 160 | default="type,id", 161 | help='Fields to check, comma separated no spaces', 162 | ) 163 | parser.add_argument( 164 | '--allCachedFields', 165 | default=False, 166 | action="store_true", 167 | help='Use all cached fields', 168 | ) 169 | parser.add_argument( 170 | '--all', 171 | default=False, 172 | action="store_true", 173 | help='Check all cached entity types.' 174 | 'WARNING: This can take a really long time and be taxing on Shotgun servers.' 175 | ) 176 | parser.add_argument( 177 | 'entityTypes', 178 | nargs='*', 179 | default=[], 180 | help='List of entity types to check. By default all are checked' 181 | ) 182 | parser.set_defaults(func=self.handle_validateFields) 183 | 184 | def setup_subparser_rebuild(self): 185 | parser = self.subparsers.add_parser( 186 | 'rebuild', 187 | help='Rebuild of a list of entity types, rebuilds live if the server is currently running.', 188 | ) 189 | parser.add_argument( 190 | '--live', 191 | default=False, 192 | action="store_true", 193 | help='Send a signal to the active cache server to reload entities', 194 | ) 195 | parser.add_argument( 196 | '--url', 197 | default=None, 198 | help="URL to the shotgunCache server." 199 | "Defaults to 'zmq_controller_work_url' in config if not set" 200 | ) 201 | parser.add_argument( 202 | '--all', 203 | default=False, 204 | action="store_true", 205 | help='Rebuild all cached entity types' 206 | ) 207 | parser.add_argument( 208 | 'entityTypes', 209 | nargs="*", 210 | help="Entity types to re-download from Shotgun." 211 | "WARNING, this will delete and rebuild the entire local cache of this entity type." 212 | ) 213 | parser.set_defaults(func=self.handle_rebuild) 214 | 215 | def setup_subparser_resetStats(self): 216 | parser = self.subparsers.add_parser( 217 | 'reset-stats', 218 | help='Delete saved cache stats', 219 | ) 220 | parser.add_argument( 221 | '-a', '--all', 222 | help='Reset all stat types', 223 | default=False, 224 | action='store_true' 225 | ) 226 | parser.add_argument( 227 | 'statTypes', 228 | nargs="*", 229 | help='List of statistic types to delete' 230 | ) 231 | parser.set_defaults(func=self.handle_resetStats) 232 | 233 | def handle_setup(self, parseResults): 234 | import shotgunCache 235 | # Use ruamel to support preserving comments 236 | import ruamel.yaml 237 | 238 | def askUser(msg, error_msg="", required=False, retryCount=2): 239 | tryNum = 1 240 | while True: 241 | result = raw_input(msg) 242 | if result == "" and error_msg: 243 | print(error_msg) 244 | if not required or result != "" or tryNum >= retryCount: 245 | break 246 | tryNum += 1 247 | return result 248 | 249 | configTemplatePath = os.path.join(SCRIPT_DIR, 'resources', 'config.yaml.template') 250 | configTemplate = ruamel.yaml.load(open(configTemplatePath, 'r').read(), ruamel.yaml.RoundTripLoader) 251 | 252 | # Create the config folder 253 | configPath = askUser("Config Folder ({0}): ".format(self.configPath)) 254 | if configPath == "": 255 | configPath = self.configPath 256 | 257 | defaultConfigPath = resolveConfigPath(DEFAULT_CONFIG_PATH) 258 | if configPath != defaultConfigPath: 259 | print ('\nYou\'ve set the config folder to location other than \'{0}\'\n'.format(DEFAULT_CONFIG_PATH) + 260 | 'In order for shotgunCache to find your config folder\n' 261 | 'you\'ll need to either set the env var \'{0}\' to the path,\n'.format(CONFIG_PATH_ENV_KEY) + 262 | 'or supply it via the --config flag each time you run a command\n') 263 | askUser("Press ENTER to continue") 264 | 265 | # Create the config folder 266 | configPath = resolveConfigPath(configPath) 267 | configPathDir = os.path.dirname(configPath) 268 | if not os.path.exists(configPathDir): 269 | print("ERROR: Parent folder for config doesn't exist, please create it: {0}".format(configPathDir)) 270 | if not os.path.exists(configPath): 271 | LOG.debug("Creating config folder: {0}".format(configPath)) 272 | os.mkdir(configPath) 273 | 274 | # Load shotgun settings 275 | base_url = askUser( 276 | "Shotgun Server URL (Ex: {0}): ".format(configTemplate['shotgun']['base_url']), 277 | 'ERROR: You must supply a base url', 278 | required=True, 279 | ) 280 | if not base_url: 281 | return 282 | configTemplate['shotgun']['base_url'] = base_url 283 | 284 | helpURL = "https://support.shotgunsoftware.com/entries/21193476-How-to-create-and-manage-API-Scripts" 285 | print("\nTo connect to Shotgun, we need script name and an API key, for more information visit:\n{0}\n".format(helpURL)) 286 | 287 | script_name = askUser( 288 | "Script Name (Ex: {0}): ".format(configTemplate['shotgun']['script_name']), 289 | 'ERROR: You must supply a script name', 290 | required=True, 291 | ) 292 | if not script_name: 293 | return 294 | configTemplate['shotgun']['script_name'] = script_name 295 | 296 | api_key = askUser( 297 | "API Key (Ex: {0}): ".format(configTemplate['shotgun']['api_key']), 298 | 'ERROR: You must supply a script name', 299 | required=True, 300 | ) 301 | if not api_key: 302 | return 303 | configTemplate['shotgun']['api_key'] = api_key 304 | 305 | rethink_host = askUser( 306 | "RethinkDB host ({0}): ".format(configTemplate['rethink']['host']), 307 | ) 308 | if rethink_host: 309 | configTemplate['rethink']['host'] = rethink_host 310 | 311 | rethink_port = askUser( 312 | "RethinkDB host ({0}): ".format(configTemplate['rethink']['port']), 313 | ) 314 | if rethink_port: 315 | configTemplate['rethink']['host'] = rethink_port 316 | 317 | rethink_db = askUser( 318 | "RethinkDB Database name ({0}): ".format(configTemplate['rethink']['db']), 319 | ) 320 | if rethink_db: 321 | configTemplate['rethink']['db'] = rethink_db 322 | 323 | configFilePath = os.path.join(configPath, 'config.yaml') 324 | with open(configFilePath, 'w') as f: 325 | data = ruamel.yaml.dump(configTemplate, Dumper=ruamel.yaml.RoundTripDumper, indent=4) 326 | f.write(data) 327 | 328 | entityConfigFolder = os.path.join(configPath, configTemplate['entity_config_foldername']) 329 | if not os.path.exists(entityConfigFolder): 330 | LOG.debug("Creating entity config folder: {0}".format(entityConfigFolder)) 331 | os.mkdir(entityConfigFolder) 332 | 333 | print("\nWhich entities would you like to cache?\nThis should be a space separated list of Shotgun entity types.\nYou can also leave this blank and specify these later using 'shotgunCache create-entity-configs'\n") 334 | entityTypes = askUser("Entity Types (Ex: Asset Shot): ", required=False) 335 | if entityTypes: 336 | entityTypes = entityTypes.split(' ') 337 | 338 | config = shotgunCache.Config.loadFromYaml(configFilePath) 339 | controller = shotgunCache.DatabaseController(config) 340 | newConfigs = controller.entityConfigManager.createEntityConfigFiles( 341 | entityTypes, 342 | tableTemplate=controller.config['rethink_entity_table_template'], 343 | ignoreFields=controller.config['create_entity_config']['field_patterns_to_ignore'], 344 | ) 345 | for entityType, configPath in newConfigs: 346 | print("'{0}' Entity Config Template: {1}".format(entityType, configPath)) 347 | 348 | print ( 349 | '\nSetup now complete!\n\n' 350 | 'Before starting the server, you can make additional configuration changes\n' 351 | 'See https://github.com/moonbot/shotgun-cache-server/wiki for more details\n\n' 352 | 'Once you\'ve finished your changes you can start the server by running\n' 353 | '> shotgunCache run' 354 | ) 355 | 356 | def handle_createEntityConfigs(self, parseResults): 357 | import shotgunCache 358 | 359 | config = shotgunCache.Config.loadFromYaml(self.configFilePath) 360 | 361 | controller = shotgunCache.DatabaseController(config) 362 | newConfigs = controller.entityConfigManager.createEntityConfigFiles( 363 | parseResults['entityTypes'], 364 | tableTemplate=controller.config['rethink_entity_table_template'], 365 | ignoreFields=controller.config['create_entity_config']['field_patterns_to_ignore'], 366 | ) 367 | for entityType, configPath in newConfigs: 368 | print("'{0}' Entity Config Template: {1}".format(entityType, configPath)) 369 | 370 | def handle_run(self, parseResults): 371 | import shotgunCache 372 | 373 | config = shotgunCache.Config.loadFromYaml(self.configFilePath) 374 | 375 | controller = shotgunCache.DatabaseController(config) 376 | controller.start() 377 | 378 | def handle_validateCounts(self, parseResults): 379 | import shotgunCache 380 | print('Validating Counts...') 381 | 382 | config = shotgunCache.Config.loadFromYaml(self.configFilePath) 383 | 384 | entityConfigManager = shotgunCache.EntityConfigManager(config=config) 385 | entityConfigManager.load() 386 | 387 | if parseResults['entityTypes']: 388 | entityConfigs = [entityConfigManager.getConfigForType(t) for t in parseResults['entityTypes']] 389 | else: 390 | entityConfigs = entityConfigManager.allConfigs() 391 | 392 | if not len(entityConfigs): 393 | print('No entities are configured to be cached') 394 | return 395 | 396 | validator = shotgunCache.CountValidator(config, entityConfigs) 397 | results = validator.start(raiseExc=False) 398 | 399 | failed = False 400 | totalCounts = {'sgCount': 0, 'cacheCount': 0, 'pendingDiff': 0} 401 | lineFmt = "{entityType: <26} {status: <10} {sgCount: <10} {cacheCount: <10} {pendingDiff: <12} {shotgunDiff: <12}" 402 | 403 | # Title Line 404 | titleLine = lineFmt.format( 405 | status="Status", 406 | entityType="Entity Type", 407 | sgCount="Shotgun", 408 | pendingDiff="Pending", 409 | cacheCount="Cache", 410 | shotgunDiff="Shotgun Diff" 411 | ) 412 | print(titleLine) 413 | print('-' * self.bannerWidth) 414 | 415 | # Entity Totals 416 | for result in sorted(results, key=lambda r: r['entityType']): 417 | status = 'FAIL' if result['failed'] else 'OK' 418 | shotgunDiff = result['sgCount'] - result['cacheCount'] 419 | print(lineFmt.format( 420 | entityType=result['entityType'], 421 | status=status, 422 | sgCount=result['sgCount'], 423 | cacheCount=result['cacheCount'], 424 | pendingDiff=shotgunCache.addNumberSign(result['pendingDiff']), 425 | shotgunDiff=shotgunCache.addNumberSign(shotgunDiff), 426 | )) 427 | totalCounts['sgCount'] += result['sgCount'] 428 | totalCounts['cacheCount'] += result['cacheCount'] 429 | totalCounts['pendingDiff'] += result['pendingDiff'] 430 | if result['failed']: 431 | failed = True 432 | 433 | # Total 434 | print('-' * self.bannerWidth) 435 | status = 'ERRORS' if failed else 'OK' 436 | shotgunDiff = totalCounts['sgCount'] - totalCounts['cacheCount'] 437 | print(lineFmt.format( 438 | entityType="Total", 439 | status=status, 440 | sgCount=totalCounts['sgCount'], 441 | cacheCount=totalCounts['cacheCount'], 442 | shotgunDiff=shotgunCache.addNumberSign(shotgunDiff), 443 | pendingDiff=shotgunCache.addNumberSign(totalCounts['pendingDiff']), 444 | )) 445 | 446 | def handle_validateFields(self, parseResults): 447 | import shotgunCache 448 | print('Validating Data...') 449 | 450 | config = shotgunCache.Config.loadFromYaml(self.configFilePath) 451 | 452 | entityConfigManager = shotgunCache.EntityConfigManager(config=config) 453 | entityConfigManager.load() 454 | 455 | if parseResults['all']: 456 | entityConfigs = entityConfigManager.allConfigs() 457 | elif parseResults['entityTypes']: 458 | entityConfigs = [entityConfigManager.getConfigForType(t) for t in parseResults['entityTypes']] 459 | else: 460 | print('ERROR: No entity types specified') 461 | return 462 | 463 | if not len(entityConfigs): 464 | print('No entities are configured to be cached') 465 | return 466 | 467 | filters = parseResults['filters'] 468 | if not isinstance(filters, (list, tuple)): 469 | raise TypeError("Filters must be a list or a tuple") 470 | 471 | filterOperator = parseResults['filterop'] 472 | if filterOperator not in ['all', 'any']: 473 | raise ValueError("Filter operator must be either 'all' or 'any'. Got {0}".format(filterOperator)) 474 | 475 | fields = parseResults['fields'].split(',') 476 | 477 | validator = shotgunCache.FieldValidator( 478 | config, 479 | entityConfigManager, 480 | entityConfigs, 481 | filters=filters, 482 | filterOperator=filterOperator, 483 | fields=fields, 484 | allCachedFields=parseResults['allCachedFields'], 485 | ) 486 | results = validator.start(raiseExc=False) 487 | results = sorted(results, key=lambda r: r['entityType']) 488 | 489 | errors = 0 490 | totalShotgunEntityCount = sum((r['shotgunMatchCount'] for r in results)) 491 | 492 | # Missing Items 493 | for result in results: 494 | for msg, key in [('Shotgun', 'missingFromShotgun'), ('Cache', 'missingFromCache')]: 495 | entityType = result['entityType'] 496 | if not result[key]: 497 | continue 498 | 499 | print("MISSING: '{entityType}' Entity has {numMissing} missing from {source}".format( 500 | entityType=entityType, 501 | numMissing=len(result[key]), 502 | source=msg) 503 | ) 504 | print('-' * self.bannerWidth) 505 | for missingEntity in result[key]: 506 | errors += 1 507 | print(shotgunCache.prettyJson(missingEntity)) 508 | print 509 | 510 | # Different Items 511 | for result in results: 512 | if not result['diffs']: 513 | continue 514 | 515 | print("DIFFERENCE: '{entityType}' has {diffCount}/{totalCount} matches that are different".format( 516 | entityType=entityType, 517 | diffCount=len(result['diffs']), 518 | totalCount=result['shotgunMatchCount']) 519 | ) 520 | print('-' * self.bannerWidth) 521 | print('--- Shotgun\n+++ Cache') 522 | for diff in result['diffs']: 523 | errors += 1 524 | print(diff) 525 | print 526 | 527 | if not errors: 528 | print('SUCCESS: All {0} entities valid'.format(totalShotgunEntityCount)) 529 | else: 530 | print('ERROR: {0} errors found in {0} entities'.format(errors, totalShotgunEntityCount)) 531 | 532 | def handle_rebuild(self, parseResults): 533 | import shotgunCache 534 | import zmq 535 | 536 | if not parseResults['entityTypes'] and not parseResults['all']: 537 | print('ERROR: No config types specified') 538 | return 1 539 | 540 | config = shotgunCache.Config.loadFromYaml(self.configFilePath) 541 | 542 | entityConfigManager = shotgunCache.EntityConfigManager(config=config) 543 | LOG.info("Loading entity configs") 544 | entityConfigManager.load() 545 | 546 | availableTypes = entityConfigManager.getEntityTypes() 547 | configTypes = parseResults['entityTypes'] 548 | if parseResults['all']: 549 | configTypes = availableTypes 550 | else: 551 | for configType in configTypes: 552 | if configType not in availableTypes: 553 | print("WARNING: No cache configured for entity type '{0}'".format(configType)) 554 | 555 | for configType in configTypes: 556 | if configType in config.history['config_hashes']: 557 | print("Clearing saved hash for '{0}'".format(configType)) 558 | config.history['config_hashes'][configType] = None 559 | 560 | config.history.save() 561 | 562 | if parseResults['live']: 563 | url = parseResults['url'] 564 | if url is None: 565 | url = config['zmq_controller_work_url'] 566 | 567 | # Check if the server is running, if so, send a rebuild signal 568 | context = zmq.Context() 569 | socket = context.socket(zmq.PUSH) 570 | socket.connect(url) 571 | 572 | work = { 573 | 'type': 'reloadChangedConfigs', 574 | 'data': {} 575 | } 576 | print('Sending reload signal to cache server') 577 | socket.send_pyobj(work) 578 | print('Reload signal sent to {0}'.format(url)) 579 | else: 580 | print('Entities will be reloaded the next time the server is launched') 581 | 582 | def handle_resetStats(self, parseResults): 583 | import shotgunCache 584 | 585 | config = shotgunCache.Config.loadFromYaml(self.configFilePath) 586 | 587 | statTableTemplate = config['rethink_stat_table_template'] 588 | 589 | rethink = rethinkdb.connect(**config['rethink']) 590 | existingTables = rethink.table_list() 591 | 592 | pattern = statTableTemplate.format(type='*') 593 | 594 | tablesToDrop = [] 595 | if parseResults['all']: 596 | tablesToDrop = [t for t in existingTables if fnmatch.fnmatch(t, pattern)] 597 | else: 598 | if not len(parseResults['statTypes']): 599 | raise ValueError("No stat types supplied, and '--all' was not supplied") 600 | for statType in parseResults['statTypes']: 601 | table = statTableTemplate.format(type=statType) 602 | if table not in existingTables: 603 | LOG.warning("No table exists for type '{0}': {1}".format(statType, table)) 604 | else: 605 | tablesToDrop.append(table) 606 | 607 | if not len(tablesToDrop): 608 | LOG.info("No stats to delete") 609 | return 610 | 611 | LOG.info("Deleting {0} stat tables".format(len(tablesToDrop))) 612 | for table in tablesToDrop: 613 | LOG.info("Deleting table: {0}".format(table)) 614 | rethink.table_drop(table) 615 | 616 | def resolveConfigPath(path): 617 | path = os.path.expanduser(path) 618 | path = os.path.abspath(path) 619 | return path 620 | 621 | 622 | def main(args, exit=False): 623 | global CONFIG_PATH 624 | 625 | logging.basicConfig( 626 | format="%(asctime)s %(levelname)-8s %(message)s", 627 | datefmt="%m/%d/%Y %I:%M:%S %p", # ISO8601 628 | ) 629 | 630 | parser = Parser() 631 | exitCode = parser.parse(args[1:]) 632 | if exit: 633 | LOG.debug("Exit Code: {0}".format(exitCode)) 634 | sys.exit(exitCode) 635 | return exitCode 636 | 637 | 638 | if __name__ == '__main__': 639 | main(sys.argv) 640 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.12 3 | 4 | """ 5 | The Versioneer 6 | ============== 7 | 8 | * like a rocketeer, but for versions! 9 | * https://github.com/warner/python-versioneer 10 | * Brian Warner 11 | * License: Public Domain 12 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy 13 | 14 | [![Build Status](https://travis-ci.org/warner/python-versioneer.png?branch=master)](https://travis-ci.org/warner/python-versioneer) 15 | 16 | This is a tool for managing a recorded version number in distutils-based 17 | python projects. The goal is to remove the tedious and error-prone "update 18 | the embedded version string" step from your release process. Making a new 19 | release should be as easy as recording a new tag in your version-control 20 | system, and maybe making new tarballs. 21 | 22 | 23 | ## Quick Install 24 | 25 | * `pip install versioneer` to somewhere to your $PATH 26 | * run `versioneer-installer` in your source tree: this installs `versioneer.py` 27 | * follow the instructions below (also in the `versioneer.py` docstring) 28 | 29 | ## Version Identifiers 30 | 31 | Source trees come from a variety of places: 32 | 33 | * a version-control system checkout (mostly used by developers) 34 | * a nightly tarball, produced by build automation 35 | * a snapshot tarball, produced by a web-based VCS browser, like github's 36 | "tarball from tag" feature 37 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 38 | 39 | Within each source tree, the version identifier (either a string or a number, 40 | this tool is format-agnostic) can come from a variety of places: 41 | 42 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 43 | about recent "tags" and an absolute revision-id 44 | * the name of the directory into which the tarball was unpacked 45 | * an expanded VCS keyword ($Id$, etc) 46 | * a `_version.py` created by some earlier build step 47 | 48 | For released software, the version identifier is closely related to a VCS 49 | tag. Some projects use tag names that include more than just the version 50 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 51 | needs to strip the tag prefix to extract the version identifier. For 52 | unreleased software (between tags), the version identifier should provide 53 | enough information to help developers recreate the same tree, while also 54 | giving them an idea of roughly how old the tree is (after version 1.2, before 55 | version 1.3). Many VCS systems can report a description that captures this, 56 | for example 'git describe --tags --dirty --always' reports things like 57 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 58 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 59 | uncommitted changes. 60 | 61 | The version identifier is used for multiple purposes: 62 | 63 | * to allow the module to self-identify its version: `myproject.__version__` 64 | * to choose a name and prefix for a 'setup.py sdist' tarball 65 | 66 | ## Theory of Operation 67 | 68 | Versioneer works by adding a special `_version.py` file into your source 69 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 70 | dynamically ask the VCS tool for version information at import time. However, 71 | when you use "setup.py build" or "setup.py sdist", `_version.py` in the new 72 | copy is replaced by a small static file that contains just the generated 73 | version data. 74 | 75 | `_version.py` also contains `$Revision$` markers, and the installation 76 | process marks `_version.py` to have this marker rewritten with a tag name 77 | during the "git archive" command. As a result, generated tarballs will 78 | contain enough information to get the proper version. 79 | 80 | 81 | ## Installation 82 | 83 | First, decide on values for the following configuration variables: 84 | 85 | * `VCS`: the version control system you use. Currently accepts "git". 86 | 87 | * `versionfile_source`: 88 | 89 | A project-relative pathname into which the generated version strings should 90 | be written. This is usually a `_version.py` next to your project's main 91 | `__init__.py` file, so it can be imported at runtime. If your project uses 92 | `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. 93 | This file should be checked in to your VCS as usual: the copy created below 94 | by `setup.py versioneer` will include code that parses expanded VCS 95 | keywords in generated tarballs. The 'build' and 'sdist' commands will 96 | replace it with a copy that has just the calculated version string. 97 | 98 | This must be set even if your project does not have any modules (and will 99 | therefore never import `_version.py`), since "setup.py sdist" -based trees 100 | still need somewhere to record the pre-calculated version strings. Anywhere 101 | in the source tree should do. If there is a `__init__.py` next to your 102 | `_version.py`, the `setup.py versioneer` command (described below) will 103 | append some `__version__`-setting assignments, if they aren't already 104 | present. 105 | 106 | * `versionfile_build`: 107 | 108 | Like `versionfile_source`, but relative to the build directory instead of 109 | the source directory. These will differ when your setup.py uses 110 | 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, 111 | then you will probably have `versionfile_build='myproject/_version.py'` and 112 | `versionfile_source='src/myproject/_version.py'`. 113 | 114 | If this is set to None, then `setup.py build` will not attempt to rewrite 115 | any `_version.py` in the built tree. If your project does not have any 116 | libraries (e.g. if it only builds a script), then you should use 117 | `versionfile_build = None` and override `distutils.command.build_scripts` 118 | to explicitly insert a copy of `versioneer.get_version()` into your 119 | generated script. 120 | 121 | * `tag_prefix`: 122 | 123 | a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. 124 | If your tags look like 'myproject-1.2.0', then you should use 125 | tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this 126 | should be an empty string. 127 | 128 | * `parentdir_prefix`: 129 | 130 | a string, frequently the same as tag_prefix, which appears at the start of 131 | all unpacked tarball filenames. If your tarball unpacks into 132 | 'myproject-1.2.0', this should be 'myproject-'. 133 | 134 | This tool provides one script, named `versioneer-installer`. That script does 135 | one thing: write a copy of `versioneer.py` into the current directory. 136 | 137 | To versioneer-enable your project: 138 | 139 | * 1: Run `versioneer-installer` to copy `versioneer.py` into the top of your 140 | source tree. 141 | 142 | * 2: add the following lines to the top of your `setup.py`, with the 143 | configuration values you decided earlier: 144 | 145 | import versioneer 146 | versioneer.VCS = 'git' 147 | versioneer.versionfile_source = 'src/myproject/_version.py' 148 | versioneer.versionfile_build = 'myproject/_version.py' 149 | versioneer.tag_prefix = '' # tags are like 1.2.0 150 | versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0' 151 | 152 | * 3: add the following arguments to the setup() call in your setup.py: 153 | 154 | version=versioneer.get_version(), 155 | cmdclass=versioneer.get_cmdclass(), 156 | 157 | * 4: now run `setup.py versioneer`, which will create `_version.py`, and will 158 | modify your `__init__.py` (if one exists next to `_version.py`) to define 159 | `__version__` (by calling a function from `_version.py`). It will also 160 | modify your `MANIFEST.in` to include both `versioneer.py` and the generated 161 | `_version.py` in sdist tarballs. 162 | 163 | * 5: commit these changes to your VCS. To make sure you won't forget, 164 | `setup.py versioneer` will mark everything it touched for addition. 165 | 166 | ## Post-Installation Usage 167 | 168 | Once established, all uses of your tree from a VCS checkout should get the 169 | current version string. All generated tarballs should include an embedded 170 | version string (so users who unpack them will not need a VCS tool installed). 171 | 172 | If you distribute your project through PyPI, then the release process should 173 | boil down to two steps: 174 | 175 | * 1: git tag 1.0 176 | * 2: python setup.py register sdist upload 177 | 178 | If you distribute it through github (i.e. users use github to generate 179 | tarballs with `git archive`), the process is: 180 | 181 | * 1: git tag 1.0 182 | * 2: git push; git push --tags 183 | 184 | Currently, all version strings must be based upon a tag. Versioneer will 185 | report "unknown" until your tree has at least one tag in its history. This 186 | restriction will be fixed eventually (see issue #12). 187 | 188 | ## Version-String Flavors 189 | 190 | Code which uses Versioneer can learn about its version string at runtime by 191 | importing `_version` from your main `__init__.py` file and running the 192 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 193 | import the top-level `versioneer.py` and run `get_versions()`. 194 | 195 | Both functions return a dictionary with different keys for different flavors 196 | of the version string: 197 | 198 | * `['version']`: condensed tag+distance+shortid+dirty identifier. For git, 199 | this uses the output of `git describe --tags --dirty --always` but strips 200 | the tag_prefix. For example "0.11-2-g1076c97-dirty" indicates that the tree 201 | is like the "1076c97" commit but has uncommitted changes ("-dirty"), and 202 | that this commit is two revisions ("-2-") beyond the "0.11" tag. For 203 | released software (exactly equal to a known tag), the identifier will only 204 | contain the stripped tag, e.g. "0.11". 205 | 206 | * `['full']`: detailed revision identifier. For Git, this is the full SHA1 207 | commit id, followed by "-dirty" if the tree contains uncommitted changes, 208 | e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac-dirty". 209 | 210 | Some variants are more useful than others. Including `full` in a bug report 211 | should allow developers to reconstruct the exact code being tested (or 212 | indicate the presence of local changes that should be shared with the 213 | developers). `version` is suitable for display in an "about" box or a CLI 214 | `--version` output: it can be easily compared against release notes and lists 215 | of bugs fixed in various releases. 216 | 217 | In the future, this will also include a 218 | [PEP-0440](http://legacy.python.org/dev/peps/pep-0440/) -compatible flavor 219 | (e.g. `1.2.post0.dev123`). This loses a lot of information (and has no room 220 | for a hash-based revision id), but is safe to use in a `setup.py` 221 | "`version=`" argument. It also enables tools like *pip* to compare version 222 | strings and evaluate compatibility constraint declarations. 223 | 224 | The `setup.py versioneer` command adds the following text to your 225 | `__init__.py` to place a basic version in `YOURPROJECT.__version__`: 226 | 227 | from ._version import get_versions 228 | __version__ = get_versions()['version'] 229 | del get_versions 230 | 231 | ## Updating Versioneer 232 | 233 | To upgrade your project to a new release of Versioneer, do the following: 234 | 235 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 236 | * re-run `versioneer-installer` in your source tree to replace your copy of 237 | `versioneer.py` 238 | * edit `setup.py`, if necessary, to include any new configuration settings 239 | indicated by the release notes 240 | * re-run `setup.py versioneer` to replace `SRC/_version.py` 241 | * commit any changed files 242 | 243 | ### Upgrading from 0.10 to 0.11 244 | 245 | You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running 246 | `setup.py versioneer`. This will enable the use of additional version-control 247 | systems (SVN, etc) in the future. 248 | 249 | ### Upgrading from 0.11 to 0.12 250 | 251 | Nothing special. 252 | 253 | ## Future Directions 254 | 255 | This tool is designed to make it easily extended to other version-control 256 | systems: all VCS-specific components are in separate directories like 257 | src/git/ . The top-level `versioneer.py` script is assembled from these 258 | components by running make-versioneer.py . In the future, make-versioneer.py 259 | will take a VCS name as an argument, and will construct a version of 260 | `versioneer.py` that is specific to the given VCS. It might also take the 261 | configuration arguments that are currently provided manually during 262 | installation by editing setup.py . Alternatively, it might go the other 263 | direction and include code from all supported VCS systems, reducing the 264 | number of intermediate scripts. 265 | 266 | 267 | ## License 268 | 269 | To make Versioneer easier to embed, all its code is hereby released into the 270 | public domain. The `_version.py` that it creates is also in the public 271 | domain. 272 | 273 | """ 274 | 275 | import os, sys, re, subprocess, errno 276 | from distutils.core import Command 277 | from distutils.command.sdist import sdist as _sdist 278 | from distutils.command.build import build as _build 279 | 280 | # these configuration settings will be overridden by setup.py after it 281 | # imports us 282 | versionfile_source = None 283 | versionfile_build = None 284 | tag_prefix = None 285 | parentdir_prefix = None 286 | VCS = None 287 | 288 | # these dictionaries contain VCS-specific tools 289 | LONG_VERSION_PY = {} 290 | 291 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 292 | assert isinstance(commands, list) 293 | p = None 294 | for c in commands: 295 | try: 296 | # remember shell=False, so use git.cmd on windows, not just git 297 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 298 | stderr=(subprocess.PIPE if hide_stderr 299 | else None)) 300 | break 301 | except EnvironmentError: 302 | e = sys.exc_info()[1] 303 | if e.errno == errno.ENOENT: 304 | continue 305 | if verbose: 306 | print("unable to run %s" % args[0]) 307 | print(e) 308 | return None 309 | else: 310 | if verbose: 311 | print("unable to find command, tried %s" % (commands,)) 312 | return None 313 | stdout = p.communicate()[0].strip() 314 | if sys.version >= '3': 315 | stdout = stdout.decode() 316 | if p.returncode != 0: 317 | if verbose: 318 | print("unable to run %s (error)" % args[0]) 319 | return None 320 | return stdout 321 | 322 | LONG_VERSION_PY['git'] = ''' 323 | # This file helps to compute a version number in source trees obtained from 324 | # git-archive tarball (such as those provided by githubs download-from-tag 325 | # feature). Distribution tarballs (built by setup.py sdist) and build 326 | # directories (produced by setup.py build) will contain a much shorter file 327 | # that just contains the computed version number. 328 | 329 | # This file is released into the public domain. Generated by 330 | # versioneer-0.12 (https://github.com/warner/python-versioneer) 331 | 332 | # these strings will be replaced by git during git-archive 333 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 334 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 335 | 336 | # these strings are filled in when 'setup.py versioneer' creates _version.py 337 | tag_prefix = "%(TAG_PREFIX)s" 338 | parentdir_prefix = "%(PARENTDIR_PREFIX)s" 339 | versionfile_source = "%(VERSIONFILE_SOURCE)s" 340 | 341 | import os, sys, re, subprocess, errno 342 | 343 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 344 | assert isinstance(commands, list) 345 | p = None 346 | for c in commands: 347 | try: 348 | # remember shell=False, so use git.cmd on windows, not just git 349 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 350 | stderr=(subprocess.PIPE if hide_stderr 351 | else None)) 352 | break 353 | except EnvironmentError: 354 | e = sys.exc_info()[1] 355 | if e.errno == errno.ENOENT: 356 | continue 357 | if verbose: 358 | print("unable to run %%s" %% args[0]) 359 | print(e) 360 | return None 361 | else: 362 | if verbose: 363 | print("unable to find command, tried %%s" %% (commands,)) 364 | return None 365 | stdout = p.communicate()[0].strip() 366 | if sys.version >= '3': 367 | stdout = stdout.decode() 368 | if p.returncode != 0: 369 | if verbose: 370 | print("unable to run %%s (error)" %% args[0]) 371 | return None 372 | return stdout 373 | 374 | 375 | def versions_from_parentdir(parentdir_prefix, root, verbose=False): 376 | # Source tarballs conventionally unpack into a directory that includes 377 | # both the project name and a version string. 378 | dirname = os.path.basename(root) 379 | if not dirname.startswith(parentdir_prefix): 380 | if verbose: 381 | print("guessing rootdir is '%%s', but '%%s' doesn't start with prefix '%%s'" %% 382 | (root, dirname, parentdir_prefix)) 383 | return None 384 | return {"version": dirname[len(parentdir_prefix):], "full": ""} 385 | 386 | def git_get_keywords(versionfile_abs): 387 | # the code embedded in _version.py can just fetch the value of these 388 | # keywords. When used from setup.py, we don't want to import _version.py, 389 | # so we do it with a regexp instead. This function is not used from 390 | # _version.py. 391 | keywords = {} 392 | try: 393 | f = open(versionfile_abs,"r") 394 | for line in f.readlines(): 395 | if line.strip().startswith("git_refnames ="): 396 | mo = re.search(r'=\s*"(.*)"', line) 397 | if mo: 398 | keywords["refnames"] = mo.group(1) 399 | if line.strip().startswith("git_full ="): 400 | mo = re.search(r'=\s*"(.*)"', line) 401 | if mo: 402 | keywords["full"] = mo.group(1) 403 | f.close() 404 | except EnvironmentError: 405 | pass 406 | return keywords 407 | 408 | def git_versions_from_keywords(keywords, tag_prefix, verbose=False): 409 | if not keywords: 410 | return {} # keyword-finding function failed to find keywords 411 | refnames = keywords["refnames"].strip() 412 | if refnames.startswith("$Format"): 413 | if verbose: 414 | print("keywords are unexpanded, not using") 415 | return {} # unexpanded, so not in an unpacked git-archive tarball 416 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 417 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 418 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 419 | TAG = "tag: " 420 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 421 | if not tags: 422 | # Either we're using git < 1.8.3, or there really are no tags. We use 423 | # a heuristic: assume all version tags have a digit. The old git %%d 424 | # expansion behaves like git log --decorate=short and strips out the 425 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 426 | # between branches and tags. By ignoring refnames without digits, we 427 | # filter out many common branch names like "release" and 428 | # "stabilization", as well as "HEAD" and "master". 429 | tags = set([r for r in refs if re.search(r'\d', r)]) 430 | if verbose: 431 | print("discarding '%%s', no digits" %% ",".join(refs-tags)) 432 | if verbose: 433 | print("likely tags: %%s" %% ",".join(sorted(tags))) 434 | for ref in sorted(tags): 435 | # sorting will prefer e.g. "2.0" over "2.0rc1" 436 | if ref.startswith(tag_prefix): 437 | r = ref[len(tag_prefix):] 438 | if verbose: 439 | print("picking %%s" %% r) 440 | return { "version": r, 441 | "full": keywords["full"].strip() } 442 | # no suitable tags, so we use the full revision id 443 | if verbose: 444 | print("no suitable tags, using full revision id") 445 | return { "version": keywords["full"].strip(), 446 | "full": keywords["full"].strip() } 447 | 448 | 449 | def git_versions_from_vcs(tag_prefix, root, verbose=False): 450 | # this runs 'git' from the root of the source tree. This only gets called 451 | # if the git-archive 'subst' keywords were *not* expanded, and 452 | # _version.py hasn't already been rewritten with a short version string, 453 | # meaning we're inside a checked out source tree. 454 | 455 | if not os.path.exists(os.path.join(root, ".git")): 456 | if verbose: 457 | print("no .git in %%s" %% root) 458 | return {} 459 | 460 | GITS = ["git"] 461 | if sys.platform == "win32": 462 | GITS = ["git.cmd", "git.exe"] 463 | stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"], 464 | cwd=root) 465 | if stdout is None: 466 | return {} 467 | if not stdout.startswith(tag_prefix): 468 | if verbose: 469 | print("tag '%%s' doesn't start with prefix '%%s'" %% (stdout, tag_prefix)) 470 | return {} 471 | tag = stdout[len(tag_prefix):] 472 | stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 473 | if stdout is None: 474 | return {} 475 | full = stdout.strip() 476 | if tag.endswith("-dirty"): 477 | full += "-dirty" 478 | return {"version": tag, "full": full} 479 | 480 | 481 | def get_versions(default={"version": "unknown", "full": ""}, verbose=False): 482 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 483 | # __file__, we can work backwards from there to the root. Some 484 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 485 | # case we can only use expanded keywords. 486 | 487 | keywords = { "refnames": git_refnames, "full": git_full } 488 | ver = git_versions_from_keywords(keywords, tag_prefix, verbose) 489 | if ver: 490 | return ver 491 | 492 | try: 493 | root = os.path.abspath(__file__) 494 | # versionfile_source is the relative path from the top of the source 495 | # tree (where the .git directory might live) to this file. Invert 496 | # this to find the root from __file__. 497 | for i in range(len(versionfile_source.split(os.sep))): 498 | root = os.path.dirname(root) 499 | except NameError: 500 | return default 501 | 502 | return (git_versions_from_vcs(tag_prefix, root, verbose) 503 | or versions_from_parentdir(parentdir_prefix, root, verbose) 504 | or default) 505 | ''' 506 | 507 | def git_get_keywords(versionfile_abs): 508 | # the code embedded in _version.py can just fetch the value of these 509 | # keywords. When used from setup.py, we don't want to import _version.py, 510 | # so we do it with a regexp instead. This function is not used from 511 | # _version.py. 512 | keywords = {} 513 | try: 514 | f = open(versionfile_abs,"r") 515 | for line in f.readlines(): 516 | if line.strip().startswith("git_refnames ="): 517 | mo = re.search(r'=\s*"(.*)"', line) 518 | if mo: 519 | keywords["refnames"] = mo.group(1) 520 | if line.strip().startswith("git_full ="): 521 | mo = re.search(r'=\s*"(.*)"', line) 522 | if mo: 523 | keywords["full"] = mo.group(1) 524 | f.close() 525 | except EnvironmentError: 526 | pass 527 | return keywords 528 | 529 | def git_versions_from_keywords(keywords, tag_prefix, verbose=False): 530 | if not keywords: 531 | return {} # keyword-finding function failed to find keywords 532 | refnames = keywords["refnames"].strip() 533 | if refnames.startswith("$Format"): 534 | if verbose: 535 | print("keywords are unexpanded, not using") 536 | return {} # unexpanded, so not in an unpacked git-archive tarball 537 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 538 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 539 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 540 | TAG = "tag: " 541 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 542 | if not tags: 543 | # Either we're using git < 1.8.3, or there really are no tags. We use 544 | # a heuristic: assume all version tags have a digit. The old git %d 545 | # expansion behaves like git log --decorate=short and strips out the 546 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 547 | # between branches and tags. By ignoring refnames without digits, we 548 | # filter out many common branch names like "release" and 549 | # "stabilization", as well as "HEAD" and "master". 550 | tags = set([r for r in refs if re.search(r'\d', r)]) 551 | if verbose: 552 | print("discarding '%s', no digits" % ",".join(refs-tags)) 553 | if verbose: 554 | print("likely tags: %s" % ",".join(sorted(tags))) 555 | for ref in sorted(tags): 556 | # sorting will prefer e.g. "2.0" over "2.0rc1" 557 | if ref.startswith(tag_prefix): 558 | r = ref[len(tag_prefix):] 559 | if verbose: 560 | print("picking %s" % r) 561 | return { "version": r, 562 | "full": keywords["full"].strip() } 563 | # no suitable tags, so we use the full revision id 564 | if verbose: 565 | print("no suitable tags, using full revision id") 566 | return { "version": keywords["full"].strip(), 567 | "full": keywords["full"].strip() } 568 | 569 | 570 | def git_versions_from_vcs(tag_prefix, root, verbose=False): 571 | # this runs 'git' from the root of the source tree. This only gets called 572 | # if the git-archive 'subst' keywords were *not* expanded, and 573 | # _version.py hasn't already been rewritten with a short version string, 574 | # meaning we're inside a checked out source tree. 575 | 576 | if not os.path.exists(os.path.join(root, ".git")): 577 | if verbose: 578 | print("no .git in %s" % root) 579 | return {} 580 | 581 | GITS = ["git"] 582 | if sys.platform == "win32": 583 | GITS = ["git.cmd", "git.exe"] 584 | stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"], 585 | cwd=root) 586 | if stdout is None: 587 | return {} 588 | if not stdout.startswith(tag_prefix): 589 | if verbose: 590 | print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) 591 | return {} 592 | tag = stdout[len(tag_prefix):] 593 | stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 594 | if stdout is None: 595 | return {} 596 | full = stdout.strip() 597 | if tag.endswith("-dirty"): 598 | full += "-dirty" 599 | return {"version": tag, "full": full} 600 | 601 | 602 | def do_vcs_install(manifest_in, versionfile_source, ipy): 603 | GITS = ["git"] 604 | if sys.platform == "win32": 605 | GITS = ["git.cmd", "git.exe"] 606 | files = [manifest_in, versionfile_source] 607 | if ipy: 608 | files.append(ipy) 609 | try: 610 | me = __file__ 611 | if me.endswith(".pyc") or me.endswith(".pyo"): 612 | me = os.path.splitext(me)[0] + ".py" 613 | versioneer_file = os.path.relpath(me) 614 | except NameError: 615 | versioneer_file = "versioneer.py" 616 | files.append(versioneer_file) 617 | present = False 618 | try: 619 | f = open(".gitattributes", "r") 620 | for line in f.readlines(): 621 | if line.strip().startswith(versionfile_source): 622 | if "export-subst" in line.strip().split()[1:]: 623 | present = True 624 | f.close() 625 | except EnvironmentError: 626 | pass 627 | if not present: 628 | f = open(".gitattributes", "a+") 629 | f.write("%s export-subst\n" % versionfile_source) 630 | f.close() 631 | files.append(".gitattributes") 632 | run_command(GITS, ["add", "--"] + files) 633 | 634 | def versions_from_parentdir(parentdir_prefix, root, verbose=False): 635 | # Source tarballs conventionally unpack into a directory that includes 636 | # both the project name and a version string. 637 | dirname = os.path.basename(root) 638 | if not dirname.startswith(parentdir_prefix): 639 | if verbose: 640 | print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % 641 | (root, dirname, parentdir_prefix)) 642 | return None 643 | return {"version": dirname[len(parentdir_prefix):], "full": ""} 644 | 645 | SHORT_VERSION_PY = """ 646 | # This file was generated by 'versioneer.py' (0.12) from 647 | # revision-control system data, or from the parent directory name of an 648 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 649 | # of this file. 650 | 651 | version_version = '%(version)s' 652 | version_full = '%(full)s' 653 | def get_versions(default={}, verbose=False): 654 | return {'version': version_version, 'full': version_full} 655 | 656 | """ 657 | 658 | DEFAULT = {"version": "unknown", "full": "unknown"} 659 | 660 | def versions_from_file(filename): 661 | versions = {} 662 | try: 663 | with open(filename) as f: 664 | for line in f.readlines(): 665 | mo = re.match("version_version = '([^']+)'", line) 666 | if mo: 667 | versions["version"] = mo.group(1) 668 | mo = re.match("version_full = '([^']+)'", line) 669 | if mo: 670 | versions["full"] = mo.group(1) 671 | except EnvironmentError: 672 | return {} 673 | 674 | return versions 675 | 676 | def write_to_version_file(filename, versions): 677 | with open(filename, "w") as f: 678 | f.write(SHORT_VERSION_PY % versions) 679 | 680 | print("set %s to '%s'" % (filename, versions["version"])) 681 | 682 | 683 | def get_root(): 684 | try: 685 | return os.path.dirname(os.path.abspath(__file__)) 686 | except NameError: 687 | return os.path.dirname(os.path.abspath(sys.argv[0])) 688 | 689 | def vcs_function(vcs, suffix): 690 | return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None) 691 | 692 | def get_versions(default=DEFAULT, verbose=False): 693 | # returns dict with two keys: 'version' and 'full' 694 | assert versionfile_source is not None, "please set versioneer.versionfile_source" 695 | assert tag_prefix is not None, "please set versioneer.tag_prefix" 696 | assert parentdir_prefix is not None, "please set versioneer.parentdir_prefix" 697 | assert VCS is not None, "please set versioneer.VCS" 698 | 699 | # I am in versioneer.py, which must live at the top of the source tree, 700 | # which we use to compute the root directory. py2exe/bbfreeze/non-CPython 701 | # don't have __file__, in which case we fall back to sys.argv[0] (which 702 | # ought to be the setup.py script). We prefer __file__ since that's more 703 | # robust in cases where setup.py was invoked in some weird way (e.g. pip) 704 | root = get_root() 705 | versionfile_abs = os.path.join(root, versionfile_source) 706 | 707 | # extract version from first of _version.py, VCS command (e.g. 'git 708 | # describe'), parentdir. This is meant to work for developers using a 709 | # source checkout, for users of a tarball created by 'setup.py sdist', 710 | # and for users of a tarball/zipball created by 'git archive' or github's 711 | # download-from-tag feature or the equivalent in other VCSes. 712 | 713 | get_keywords_f = vcs_function(VCS, "get_keywords") 714 | versions_from_keywords_f = vcs_function(VCS, "versions_from_keywords") 715 | if get_keywords_f and versions_from_keywords_f: 716 | vcs_keywords = get_keywords_f(versionfile_abs) 717 | ver = versions_from_keywords_f(vcs_keywords, tag_prefix) 718 | if ver: 719 | if verbose: print("got version from expanded keyword %s" % ver) 720 | return ver 721 | 722 | ver = versions_from_file(versionfile_abs) 723 | if ver: 724 | if verbose: print("got version from file %s %s" % (versionfile_abs,ver)) 725 | return ver 726 | 727 | versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs") 728 | if versions_from_vcs_f: 729 | ver = versions_from_vcs_f(tag_prefix, root, verbose) 730 | if ver: 731 | if verbose: print("got version from VCS %s" % ver) 732 | return ver 733 | 734 | ver = versions_from_parentdir(parentdir_prefix, root, verbose) 735 | if ver: 736 | if verbose: print("got version from parentdir %s" % ver) 737 | return ver 738 | 739 | if verbose: print("got version from default %s" % default) 740 | return default 741 | 742 | def get_version(verbose=False): 743 | return get_versions(verbose=verbose)["version"] 744 | 745 | class cmd_version(Command): 746 | description = "report generated version string" 747 | user_options = [] 748 | boolean_options = [] 749 | def initialize_options(self): 750 | pass 751 | def finalize_options(self): 752 | pass 753 | def run(self): 754 | ver = get_version(verbose=True) 755 | print("Version is currently: %s" % ver) 756 | 757 | 758 | class cmd_build(_build): 759 | def run(self): 760 | versions = get_versions(verbose=True) 761 | _build.run(self) 762 | # now locate _version.py in the new build/ directory and replace it 763 | # with an updated value 764 | if versionfile_build: 765 | target_versionfile = os.path.join(self.build_lib, versionfile_build) 766 | print("UPDATING %s" % target_versionfile) 767 | os.unlink(target_versionfile) 768 | with open(target_versionfile, "w") as f: 769 | f.write(SHORT_VERSION_PY % versions) 770 | 771 | if 'cx_Freeze' in sys.modules: # cx_freeze enabled? 772 | from cx_Freeze.dist import build_exe as _build_exe 773 | 774 | class cmd_build_exe(_build_exe): 775 | def run(self): 776 | versions = get_versions(verbose=True) 777 | target_versionfile = versionfile_source 778 | print("UPDATING %s" % target_versionfile) 779 | os.unlink(target_versionfile) 780 | with open(target_versionfile, "w") as f: 781 | f.write(SHORT_VERSION_PY % versions) 782 | 783 | _build_exe.run(self) 784 | os.unlink(target_versionfile) 785 | with open(versionfile_source, "w") as f: 786 | assert VCS is not None, "please set versioneer.VCS" 787 | LONG = LONG_VERSION_PY[VCS] 788 | f.write(LONG % {"DOLLAR": "$", 789 | "TAG_PREFIX": tag_prefix, 790 | "PARENTDIR_PREFIX": parentdir_prefix, 791 | "VERSIONFILE_SOURCE": versionfile_source, 792 | }) 793 | 794 | class cmd_sdist(_sdist): 795 | def run(self): 796 | versions = get_versions(verbose=True) 797 | self._versioneer_generated_versions = versions 798 | # unless we update this, the command will keep using the old version 799 | self.distribution.metadata.version = versions["version"] 800 | return _sdist.run(self) 801 | 802 | def make_release_tree(self, base_dir, files): 803 | _sdist.make_release_tree(self, base_dir, files) 804 | # now locate _version.py in the new base_dir directory (remembering 805 | # that it may be a hardlink) and replace it with an updated value 806 | target_versionfile = os.path.join(base_dir, versionfile_source) 807 | print("UPDATING %s" % target_versionfile) 808 | os.unlink(target_versionfile) 809 | with open(target_versionfile, "w") as f: 810 | f.write(SHORT_VERSION_PY % self._versioneer_generated_versions) 811 | 812 | INIT_PY_SNIPPET = """ 813 | from ._version import get_versions 814 | __version__ = get_versions()['version'] 815 | del get_versions 816 | """ 817 | 818 | class cmd_update_files(Command): 819 | description = "install/upgrade Versioneer files: __init__.py SRC/_version.py" 820 | user_options = [] 821 | boolean_options = [] 822 | def initialize_options(self): 823 | pass 824 | def finalize_options(self): 825 | pass 826 | def run(self): 827 | print(" creating %s" % versionfile_source) 828 | with open(versionfile_source, "w") as f: 829 | assert VCS is not None, "please set versioneer.VCS" 830 | LONG = LONG_VERSION_PY[VCS] 831 | f.write(LONG % {"DOLLAR": "$", 832 | "TAG_PREFIX": tag_prefix, 833 | "PARENTDIR_PREFIX": parentdir_prefix, 834 | "VERSIONFILE_SOURCE": versionfile_source, 835 | }) 836 | 837 | ipy = os.path.join(os.path.dirname(versionfile_source), "__init__.py") 838 | if os.path.exists(ipy): 839 | try: 840 | with open(ipy, "r") as f: 841 | old = f.read() 842 | except EnvironmentError: 843 | old = "" 844 | if INIT_PY_SNIPPET not in old: 845 | print(" appending to %s" % ipy) 846 | with open(ipy, "a") as f: 847 | f.write(INIT_PY_SNIPPET) 848 | else: 849 | print(" %s unmodified" % ipy) 850 | else: 851 | print(" %s doesn't exist, ok" % ipy) 852 | ipy = None 853 | 854 | # Make sure both the top-level "versioneer.py" and versionfile_source 855 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 856 | # they'll be copied into source distributions. Pip won't be able to 857 | # install the package without this. 858 | manifest_in = os.path.join(get_root(), "MANIFEST.in") 859 | simple_includes = set() 860 | try: 861 | with open(manifest_in, "r") as f: 862 | for line in f: 863 | if line.startswith("include "): 864 | for include in line.split()[1:]: 865 | simple_includes.add(include) 866 | except EnvironmentError: 867 | pass 868 | # That doesn't cover everything MANIFEST.in can do 869 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 870 | # it might give some false negatives. Appending redundant 'include' 871 | # lines is safe, though. 872 | if "versioneer.py" not in simple_includes: 873 | print(" appending 'versioneer.py' to MANIFEST.in") 874 | with open(manifest_in, "a") as f: 875 | f.write("include versioneer.py\n") 876 | else: 877 | print(" 'versioneer.py' already in MANIFEST.in") 878 | if versionfile_source not in simple_includes: 879 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 880 | versionfile_source) 881 | with open(manifest_in, "a") as f: 882 | f.write("include %s\n" % versionfile_source) 883 | else: 884 | print(" versionfile_source already in MANIFEST.in") 885 | 886 | # Make VCS-specific changes. For git, this means creating/changing 887 | # .gitattributes to mark _version.py for export-time keyword 888 | # substitution. 889 | do_vcs_install(manifest_in, versionfile_source, ipy) 890 | 891 | def get_cmdclass(): 892 | cmds = {'version': cmd_version, 893 | 'versioneer': cmd_update_files, 894 | 'build': cmd_build, 895 | 'sdist': cmd_sdist, 896 | } 897 | if 'cx_Freeze' in sys.modules: # cx_freeze enabled? 898 | cmds['build_exe'] = cmd_build_exe 899 | del cmds['build'] 900 | 901 | return cmds 902 | --------------------------------------------------------------------------------