├── .env.example ├── .gitignore ├── .travis.yml ├── LICENSE ├── Procfile ├── README.rst ├── app.json ├── artwork ├── webhookdb-square.png └── webhookdb.png ├── dev-requirements.txt ├── docs ├── Makefile ├── add-new-repo.rst ├── architecture.rst ├── conf.py ├── http-api.rst ├── index.rst └── make.bat ├── manage.py ├── requirements.txt ├── setup.py ├── tests ├── cassettes │ └── test_pull_request.test_happy_path.json ├── conftest.py ├── factories.py └── test_pull_request.py └── webhookdb ├── __init__.py ├── config.py ├── exceptions.py ├── load ├── __init__.py ├── issue.py ├── label.py ├── milestone.py ├── pull_request.py ├── pull_request_file.py ├── ratelimit.py ├── repository.py ├── repository_hook.py └── user.py ├── models ├── __init__.py └── github.py ├── oauth.py ├── process ├── __init__.py ├── issue.py ├── label.py ├── milestone.py ├── pull_request.py ├── pull_request_file.py ├── repository.py ├── repository_hook.py └── user.py ├── replication ├── __init__.py ├── issue.py ├── pull_request.py └── repository.py ├── tasks ├── __init__.py ├── fetch.py ├── issue.py ├── label.py ├── milestone.py ├── pull_request.py ├── pull_request_file.py ├── repository.py ├── repository_hook.py └── user.py ├── templates ├── base.html ├── home-anonymous.html ├── home.html ├── install.html └── uninstall.html ├── ui └── __init__.py └── worker.py /.env.example: -------------------------------------------------------------------------------- 1 | WEBHOOKDB_CONFIG=development 2 | OAUTHLIB_INSECURE_TRANSPORT=1 3 | GITHUB_OAUTH_CLIENT_ID= 4 | GITHUB_OAUTH_CLIENT_SECRET= 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | github.db 4 | docs/_build 5 | .env 6 | .cache 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.7" 5 | cache: pip 6 | install: 7 | - travis_retry pip install -r requirements.txt 8 | - travis_retry pip install -r dev-requirements.txt 9 | - travis_retry pip install codecov 10 | - pip install -e . 11 | script: 12 | - py.test --cov=webhookdb 13 | - cd docs && make html 14 | - cd .. 15 | after_success: 16 | - codecov 17 | branches: 18 | only: 19 | - master 20 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn webhookdb:create_app\(\) --log-file=- 2 | worker: celery worker --app=webhookdb.worker 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | WebhookDB 2 | ========= 3 | 4 | This project allows you to replicate Github's database over HTTP using webhooks. 5 | It's useful if you want to treat Github's APIs as a database, querying over 6 | pull requests and issues. Github doesn't like that, and you'll quickly hit the 7 | API's rate limits -- but if you use WebhookDB, you don't have to worry about it! 8 | Just populate the initial data into the database, set up the webhook replication 9 | to keep it in sync, and query your local database however you'd like! 10 | 11 | |build-status| |coverage-status| |docs| 12 | 13 | |heroku-deploy| 14 | 15 | .. |heroku-deploy| image:: https://www.herokucdn.com/deploy/button.png 16 | :target: https://heroku.com/deploy 17 | :alt: Deploy to Heroku 18 | .. |build-status| image:: https://travis-ci.org/singingwolfboy/webhookdb.svg?branch=master 19 | :target: https://travis-ci.org/singingwolfboy/webhookdb 20 | .. |coverage-status| image:: http://codecov.io/github/singingwolfboy/webhookdb/coverage.svg?branch=master 21 | :target: http://codecov.io/github/singingwolfboy/webhookdb?branch=master 22 | .. |docs| image:: https://readthedocs.org/projects/webhookdb/badge/?version=latest 23 | :target: http://webhookdb.readthedocs.org/en/latest/ 24 | :alt: Documentation badge 25 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "WebhookDB", 3 | "description": "Replicates Github's database via HTTP webhooks.", 4 | "keywords": [ 5 | "databases", 6 | "webhooks" 7 | ], 8 | "website": "https://github.com/singingwolfboy/webhookdb", 9 | "repository": "https://github.com/singingwolfboy/webhookdb", 10 | "logo": "https://raw.githubusercontent.com/singingwolfboy/webhookdb/master/artwork/webhookdb-square.png", 11 | "success_url": "/", 12 | "scripts": { 13 | "postdeploy": "python manage.py dbcreate" 14 | }, 15 | "env": { 16 | "FLASK_SECRET_KEY": { 17 | "description": "A secret key for verifying the integrity of signed cookies.", 18 | "generator": "secret" 19 | }, 20 | "GITHUB_CLIENT_ID": { 21 | "description": "The OAuth client ID for your application, assigned by Github" 22 | }, 23 | "GITHUB_CLIENT_SECRET": { 24 | "description": "The OAuth client secret for your application, assigned by Github" 25 | } 26 | }, 27 | "addons": [ 28 | "heroku-postgresql", 29 | "bugsnag", 30 | "rediscloud", 31 | "rabbitmq-bigwig" 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /artwork/webhookdb-square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/singingwolfboy/webhookdb/d4324e122cee5fd44bc68eae245756677b3e187b/artwork/webhookdb-square.png -------------------------------------------------------------------------------- /artwork/webhookdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/singingwolfboy/webhookdb/d4324e122cee5fd44bc68eae245756677b3e187b/artwork/webhookdb.png -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | # docs 4 | sphinx 5 | sphinxcontrib-httpdomain 6 | 7 | # tests 8 | pytest 9 | pytest-cover 10 | pytest-factoryboy 11 | factory_boy 12 | betamax 13 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/WebhookDB.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/WebhookDB.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/WebhookDB" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/WebhookDB" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/add-new-repo.rst: -------------------------------------------------------------------------------- 1 | Adding a New Repo 2 | ================= 3 | 4 | Adding a new repo to WebhookDB is a two-step process. 5 | 6 | Install the Github Webhooks 7 | --------------------------- 8 | You can do this by going to the admin page for the Github repo, and setting up 9 | the webhooks manually. Alternatively, you can visit WebhookDB's 10 | :http:get:`/install` page and put in the info for the repo you want to install 11 | the hooks into. Note that WebhookDB will only be able to install these hooks 12 | for you if the Github user associated with WebhookDB has admin permissions 13 | on the repo you request -- otherwise, Github will return a "404 Not Found" 14 | response to WebhookDB's attempts to install the webhooks. 15 | 16 | The webhooks that should exist are: 17 | 18 | * :http:post:`/replication/pull_request` (for the ``pull_request`` event) 19 | * :http:post:`/replication/issue` (for the ``issues`` event) 20 | 21 | Load past history 22 | ----------------- 23 | The webhooks will ensure that any new events that occur in your repository are 24 | captured by WebhookDB, but if you want to load in the past history of your 25 | repo, you'll need to do that separately. Right now, the simplest way to do this 26 | is using ``curl`` or a similar tool to make HTTP requests to the WebhookDB server. 27 | 28 | To load pull requests for a repo, use :http:post:`/load/repos/(owner)/(repo)/pulls`. 29 | If you want to load *all* pull requests, including closed pull requests, you'll 30 | need to use the ``?state=all`` query parameter. This API endpoint will create 31 | tasks on the task queue, but those tasks won't get run unless there is a worker 32 | process, so check to be sure that a worker is running. (If you're using Heroku, 33 | log in to the website and drag the worker slider on the project dashboard, or 34 | `use the command line tool `_.) 35 | -------------------------------------------------------------------------------- /docs/architecture.rst: -------------------------------------------------------------------------------- 1 | Architecture 2 | ============ 3 | 4 | WebhookDB's codebase is separated into a multilayered architecture. This 5 | document will describe the layers from the ground up. 6 | 7 | Database Models 8 | --------------- 9 | Like all web applications, WebhookDB stores information in a database. The 10 | information is organized into conceptual models, most of which are directly 11 | pulled from GitHub. These models are defined using the `SQLAlchemy`_ ORM, and 12 | they are located in the ``models`` directory of the project. 13 | 14 | Many of these models inherit from the 15 | :class:`~webhookdb.models.github.ReplicationTimestampMixin`, which automatically 16 | adds two database columns: ``last_replicated_via_webhook_at`` and 17 | ``last_replicated_via_api_at``. This allows future database queries to determine 18 | how stale the data is. There is also a virtual property simply called 19 | ``last_replicated_at`` -- this returns the more recent of these two columns. 20 | 21 | Data Processing 22 | --------------- 23 | The next layer is the data processing layer, which is stored in the ``process`` 24 | directory of the project. This layer consists of functions which accept 25 | the parsed JSON output of the GitHub API responses, and updates the database 26 | to reflect the information provided in that parsed JSON. Each data model has its 27 | own data processing function: the :class:`~webhookdb.models.github.User` model 28 | has a corresponding :func:`~webhookdb.process.user.process_user` function, for 29 | example, and the :class:`~webhookdb.models.github.PullRequest` model has a 30 | corresponding :func:`~webhookdb.process.pull_request.process_pull_request` 31 | function. 32 | 33 | API responses often include nested data: for example, if you request information 34 | about a pull request from GitHub's `pull request API`_, it will include detailed 35 | user information about the author of the pull request, even though that is 36 | information that should be stored in the :class:`~webhookdb.models.github.User` 37 | model, not the :class:`~webhookdb.models.github.PullRequest` model. Each 38 | data processing function will *only* process the data for the model that it 39 | is named for, but it will delegate nested data to the data processing function 40 | for that nested data type. This means that 41 | :func:`~webhookdb.process.pull_request.process_pull_request` calls 42 | :func:`~webhookdb.process.user.process_user`, for example. 43 | 44 | It's important to note that functions in the data processing layer do not know 45 | where the data came from, and for the most part, they don't care. The data might 46 | come from an API response, or from a webhook notification. It might be top-level, 47 | or it might be some nested data that a different data processing function passed 48 | to it. These functions never seek out data on their own, but instead they are 49 | called by functions that retrieve the data. This means that functions in the 50 | data processing layer never make HTTP requests, although they can and do make 51 | database queries. 52 | 53 | Celery Tasks 54 | ------------ 55 | The next layer is the `Celery`_ tasks, which are stored in the ``tasks`` 56 | directory. This layer makes HTTP requests to GitHub's API, and passes the 57 | results of those requests on to the data processing layer. HTTP requests can be 58 | slow, and they can fail for any number of reasons (networking problems, problems 59 | on GitHub's end, rate limiting issues, etc), so we use the `Celery`_ task queue 60 | to make these tasks more robust against failure. 61 | 62 | Fetching data for an individual model, such as a single user or a single pull 63 | request, is relatively straightforward, and is handled by the "sync" task 64 | for the data model. For example, :func:`webhookdb.tasks.user.sync_user` 65 | will fetch data for an individual user, and 66 | :func:`webhookdb.tasks.pull_request.sync_pull_request` will fetch data for an 67 | individual pull request. 68 | 69 | Fetching data for a group of models, such as *all* pull requests 70 | in a repository, is much more complicated. GitHub's API responses are paginated, 71 | so it's natural to work on a per-page basis. For each data model, there is a 72 | "spawn page tasks" task, which makes a single API call to determine 73 | how many pages there are in the response. Based on that information, it calls 74 | the "sync page" task as many times as necessary: that task will make 75 | a single HTTP request to retrieve the indicated page of the API response, 76 | and will call the data processing functions for each item in the page. (Note 77 | that all of the "sync page" functions can be processed in parallel with each 78 | other.) Once all of the "sync page" tasks have completed, there is a "scanned" 79 | task that gets called, which handles any cleanup work necessary to indicate 80 | that the group of models is done being scanned. For example, to fetch data 81 | for all pull requests in a repository, the relevant tasks are 82 | :func:`webhookdb.tasks.pull_request.spawn_page_tasks_for_pull_requests`, 83 | :func:`webhookdb.tasks.pull_request.sync_page_of_pull_requests`, 84 | and :func:`webhookdb.tasks.pull_request.pull_requests_scanned`. 85 | 86 | Note that this uses Celery's :ref:`chord workflow `, 87 | and it is subject to all of the performance issues of that workflow. 88 | 89 | Replication HTTP endpoints 90 | -------------------------- 91 | The replication layer is stored in the ``replication`` directory, and it 92 | consists of a :ref:`Flask blueprint ` designed to be used 93 | by the webhook system on GitHub. Once your repository on GitHub has its 94 | replication webhooks set up properly, GitHub will make an HTTP request to 95 | this endpoint every time an event happens on GitHub. The replication endpoint 96 | will pass the data in that request to the data processing layer, and will 97 | queue celery tasks to update other information if necessary. (For example, 98 | when a pull request is updated, the pull request files must be rescanned, 99 | so the replication endpoint will queue the 100 | :func:`webhookdb.tasks.pull_request_file.spawn_page_tasks_for_pull_request_files` 101 | task.) This layer also handles the ``ping`` event that GitHub sends to all 102 | webhook endpoints as a test. 103 | 104 | Load HTTP endpoints 105 | ------------------- 106 | Sometimes, users want to tell WebhookDB that it should load data from GitHub 107 | directly, rather than waiting for that data to replicate to WebhookDB via 108 | webhooks. The load layer is stored in the ``load`` directory, and it consists 109 | of a :ref:`Flask blueprint ` that is designed to mirror the 110 | GitHub API fairly closely. When a user sends a POST request to one of these 111 | endpoints, WebhookDB will queue a Celery task to load the requested data from 112 | the GitHub API. 113 | 114 | User Interface 115 | -------------- 116 | The user interface is stored in the ``ui`` directory, and it consists of a 117 | :ref:`Flask blueprint ` of pages that return HTML web pages, 118 | rather than a JSON API. 119 | 120 | 121 | .. _SQLAlchemy: http://www.sqlalchemy.org/ 122 | .. _Celery: http://www.celeryproject.org/ 123 | .. _pull request API: https://developer.github.com/v3/pulls/#get-a-single-pull-request 124 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # WebhookDB documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Feb 25 10:08:26 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | sys.path.insert(0, os.path.abspath('..')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = [ 29 | 'sphinx.ext.autodoc', 30 | 'sphinx.ext.intersphinx', 31 | 'sphinxcontrib.autohttp.flask', 32 | ] 33 | 34 | # Add any paths that contain templates here, relative to this directory. 35 | templates_path = ['_templates'] 36 | 37 | # The suffix of source filenames. 38 | source_suffix = '.rst' 39 | 40 | # The encoding of source files. 41 | #source_encoding = 'utf-8-sig' 42 | 43 | # The master toctree document. 44 | master_doc = 'index' 45 | 46 | # General information about the project. 47 | project = u'WebhookDB' 48 | copyright = u'2015, David Baumgold' 49 | 50 | # The version info for the project you're documenting, acts as replacement for 51 | # |version| and |release|, also used in various other places throughout the 52 | # built documents. 53 | # 54 | # The short X.Y version. 55 | version = '0.0.1' 56 | # The full version, including alpha/beta/rc tags. 57 | release = '0.0.1' 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | #language = None 62 | 63 | # There are two options for replacing |today|: either, you set today to some 64 | # non-false value, then it is used: 65 | #today = '' 66 | # Else, today_fmt is used as the format for a strftime call. 67 | #today_fmt = '%B %d, %Y' 68 | 69 | # List of patterns, relative to source directory, that match files and 70 | # directories to ignore when looking for source files. 71 | exclude_patterns = ['_build'] 72 | 73 | # The reST default role (used for this markup: `text`) to use for all documents. 74 | #default_role = None 75 | 76 | # If true, '()' will be appended to :func: etc. cross-reference text. 77 | #add_function_parentheses = True 78 | 79 | # If true, the current module name will be prepended to all description 80 | # unit titles (such as .. function::). 81 | #add_module_names = True 82 | 83 | # If true, sectionauthor and moduleauthor directives will be shown in the 84 | # output. They are ignored by default. 85 | #show_authors = False 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = 'sphinx' 89 | 90 | # A list of ignored prefixes for module index sorting. 91 | #modindex_common_prefix = [] 92 | 93 | 94 | # -- Options for HTML output --------------------------------------------------- 95 | 96 | # The theme to use for HTML and HTML Help pages. See the documentation for 97 | # a list of builtin themes. 98 | html_theme = 'alabaster' 99 | 100 | # Theme options are theme-specific and customize the look and feel of a theme 101 | # further. For a list of options available for each theme, see the 102 | # documentation. 103 | #html_theme_options = {} 104 | 105 | # Add any paths that contain custom themes here, relative to this directory. 106 | #html_theme_path = [] 107 | 108 | # The name for this set of Sphinx documents. If None, it defaults to 109 | # " v documentation". 110 | #html_title = None 111 | 112 | # A shorter title for the navigation bar. Default is the same as html_title. 113 | #html_short_title = None 114 | 115 | # The name of an image file (relative to this directory) to place at the top 116 | # of the sidebar. 117 | #html_logo = None 118 | 119 | # The name of an image file (within the static path) to use as favicon of the 120 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 121 | # pixels large. 122 | #html_favicon = None 123 | 124 | # Add any paths that contain custom static files (such as style sheets) here, 125 | # relative to this directory. They are copied after the builtin static files, 126 | # so a file named "default.css" will overwrite the builtin "default.css". 127 | html_static_path = ['_static'] 128 | 129 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 130 | # using the given strftime format. 131 | #html_last_updated_fmt = '%b %d, %Y' 132 | 133 | # If true, SmartyPants will be used to convert quotes and dashes to 134 | # typographically correct entities. 135 | #html_use_smartypants = True 136 | 137 | # Custom sidebar templates, maps document names to template names. 138 | #html_sidebars = {} 139 | 140 | # Additional templates that should be rendered to pages, maps page names to 141 | # template names. 142 | #html_additional_pages = {} 143 | 144 | # If false, no module index is generated. 145 | #html_domain_indices = True 146 | 147 | # If false, no index is generated. 148 | #html_use_index = True 149 | 150 | # If true, the index is split into individual pages for each letter. 151 | #html_split_index = False 152 | 153 | # If true, links to the reST sources are added to the pages. 154 | #html_show_sourcelink = True 155 | 156 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 157 | #html_show_sphinx = True 158 | 159 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 160 | #html_show_copyright = True 161 | 162 | # If true, an OpenSearch description file will be output, and all pages will 163 | # contain a tag referring to it. The value of this option must be the 164 | # base URL from which the finished HTML is served. 165 | #html_use_opensearch = '' 166 | 167 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 168 | #html_file_suffix = None 169 | 170 | # Output file base name for HTML help builder. 171 | htmlhelp_basename = 'WebhookDBdoc' 172 | 173 | 174 | # -- Options for LaTeX output -------------------------------------------------- 175 | 176 | latex_elements = { 177 | # The paper size ('letterpaper' or 'a4paper'). 178 | #'papersize': 'letterpaper', 179 | 180 | # The font size ('10pt', '11pt' or '12pt'). 181 | #'pointsize': '10pt', 182 | 183 | # Additional stuff for the LaTeX preamble. 184 | #'preamble': '', 185 | } 186 | 187 | # Grouping the document tree into LaTeX files. List of tuples 188 | # (source start file, target name, title, author, documentclass [howto/manual]). 189 | latex_documents = [ 190 | ('index', 'WebhookDB.tex', u'WebhookDB Documentation', 191 | u'David Baumgold', 'manual'), 192 | ] 193 | 194 | # The name of an image file (relative to this directory) to place at the top of 195 | # the title page. 196 | #latex_logo = None 197 | 198 | # For "manual" documents, if this is true, then toplevel headings are parts, 199 | # not chapters. 200 | #latex_use_parts = False 201 | 202 | # If true, show page references after internal links. 203 | #latex_show_pagerefs = False 204 | 205 | # If true, show URL addresses after external links. 206 | #latex_show_urls = False 207 | 208 | # Documents to append as an appendix to all manuals. 209 | #latex_appendices = [] 210 | 211 | # If false, no module index is generated. 212 | #latex_domain_indices = True 213 | 214 | 215 | # -- Options for manual page output -------------------------------------------- 216 | 217 | # One entry per manual page. List of tuples 218 | # (source start file, name, description, authors, manual section). 219 | man_pages = [ 220 | ('index', 'webhookdb', u'WebhookDB Documentation', 221 | [u'David Baumgold'], 1) 222 | ] 223 | 224 | # If true, show URL addresses after external links. 225 | #man_show_urls = False 226 | 227 | 228 | # -- Options for Texinfo output ------------------------------------------------ 229 | 230 | # Grouping the document tree into Texinfo files. List of tuples 231 | # (source start file, target name, title, author, 232 | # dir menu entry, description, category) 233 | texinfo_documents = [ 234 | ('index', 'WebhookDB', u'WebhookDB Documentation', 235 | u'David Baumgold', 'WebhookDB', 'One line description of project.', 236 | 'Miscellaneous'), 237 | ] 238 | 239 | # Documents to append as an appendix to all manuals. 240 | #texinfo_appendices = [] 241 | 242 | # If false, no module index is generated. 243 | #texinfo_domain_indices = True 244 | 245 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 246 | #texinfo_show_urls = 'footnote' 247 | 248 | intersphinx_mapping = { 249 | 'celery': ('http://docs.celeryproject.org/en/latest/', None), 250 | 'flask': ('http://flask.pocoo.org/docs/', None), 251 | } 252 | -------------------------------------------------------------------------------- /docs/http-api.rst: -------------------------------------------------------------------------------- 1 | HTTP API 2 | ======== 3 | 4 | UI 5 | -- 6 | .. autoflask:: webhookdb:create_app() 7 | :blueprints: ui 8 | :include-empty-docstring: 9 | 10 | Load Data 11 | --------- 12 | .. autoflask:: webhookdb:create_app() 13 | :blueprints: load 14 | :include-empty-docstring: 15 | 16 | Replication 17 | ----------- 18 | .. autoflask:: webhookdb:create_app() 19 | :blueprints: replication 20 | :include-empty-docstring: 21 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. WebhookDB documentation master file, created by 2 | sphinx-quickstart on Wed Feb 25 10:08:26 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to WebhookDB's documentation! 7 | ===================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | architecture 15 | add-new-repo 16 | http-api 17 | 18 | 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\WebhookDB.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\WebhookDB.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals, print_function 3 | import flask 4 | from flask.ext.script import Manager, prompt_bool 5 | import sqlalchemy 6 | from webhookdb import create_app, db, celery 7 | from webhookdb.models import ( 8 | OAuth, User, Repository, UserRepoAssociation, RepositoryHook, Milestone, 9 | PullRequest, PullRequestFile, IssueLabel, Issue, Mutex 10 | ) 11 | 12 | manager = Manager(create_app) 13 | manager.add_option('-c', '--config', dest='config', required=False) 14 | 15 | 16 | @manager.command 17 | def dbcreate(): 18 | "Creates database tables from SQLAlchemy models" 19 | db.create_all() 20 | db.session.commit() 21 | 22 | 23 | @manager.command 24 | def dbdrop(): 25 | "Drops database tables" 26 | if prompt_bool("Are you sure you want to lose all your data"): 27 | db.drop_all() 28 | db.session.commit() 29 | 30 | 31 | @manager.command 32 | def sql(): 33 | "Dumps SQL for creating database tables" 34 | def dump(sql, *multiparams, **params): 35 | print(sql.compile(dialect=engine.dialect)) 36 | engine = sqlalchemy.create_engine('postgresql://', strategy='mock', executor=dump) 37 | db.metadata.create_all(engine, checkfirst=False) 38 | 39 | 40 | @manager.command 41 | def worker(): 42 | "Start a Celery worker" 43 | worker = celery.Worker() 44 | worker.start() 45 | 46 | 47 | @manager.shell 48 | def make_shell_context(): 49 | return dict( 50 | app=flask.current_app, celery=celery, 51 | db=db, OAuth=OAuth, 52 | User=User, Repository=Repository, UserRepoAssociation=UserRepoAssociation, 53 | RepositoryHook=RepositoryHook, Milestone=Milestone, 54 | PullRequest=PullRequest, PullRequestFile=PullRequestFile, 55 | IssueLabel=IssueLabel, Issue=Issue, 56 | Mutex=Mutex, 57 | ) 58 | 59 | 60 | if __name__ == "__main__": 61 | manager.run() 62 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==0.10.1 2 | Flask-Dance[sqla]==0.6.0 3 | Flask-SQLAlchemy==2.0 4 | Flask-Script 5 | Flask-SSLify 6 | Flask-Bootstrap==3.3.0.1 7 | Flask-Login==0.2.11 8 | celery==3.1.17 9 | redis==2.10.3 10 | librabbitmq==1.6.1 11 | blinker 12 | requests==2.5.0 13 | requests-oauthlib==0.5.0 14 | bugsnag 15 | gunicorn==19.1.1 16 | psycopg2==2.5.4 17 | iso8601 18 | sqlalchemy_utils==0.29.5 19 | colour==0.0.6 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from setuptools import setup, Command, find_packages 5 | 6 | 7 | def is_requirement(line): 8 | line = line.strip() 9 | # Skip blank lines, comments, and editable installs 10 | return not ( 11 | line == '' or 12 | line.startswith('-r') or 13 | line.startswith('#') or 14 | line.startswith('-e') or 15 | line.startswith('git+') 16 | ) 17 | 18 | 19 | def get_requirements(path): 20 | with open(path) as f: 21 | lines = f.readlines() 22 | return [l.strip() for l in lines if is_requirement(l)] 23 | 24 | 25 | setup( 26 | name="WebhookDB", 27 | version="0.0.1", 28 | description="Replicates Github's database via HTTP webhooks", 29 | long_description=open('README.rst').read(), 30 | author="David Baumgold", 31 | author_email="david@davidbaumgold.com", 32 | url="https://github.com/singingwolfboy/webhookdb", 33 | packages=find_packages(), 34 | install_requires=get_requirements("requirements.txt"), 35 | tests_require=get_requirements("dev-requirements.txt"), 36 | license='AGPL', 37 | classifiers=( 38 | 'License :: OSI Approved :: GNU Affero General Public License v3', 39 | 'Framework :: Flask', 40 | 'Programming Language :: Python', 41 | 'Programming Language :: Python :: 2.7', 42 | ), 43 | zip_safe=False, 44 | ) 45 | -------------------------------------------------------------------------------- /tests/cassettes/test_pull_request.test_happy_path.json: -------------------------------------------------------------------------------- 1 | {"http_interactions": [{"request": {"body": {"string": "", "encoding": "utf-8"}, "headers": {"Connection": ["keep-alive"], "Accept-Encoding": ["gzip, deflate"], "Accept": ["*/*"], "User-Agent": ["python-requests/2.5.0 CPython/2.7.10 Darwin/15.0.0"]}, "method": "GET", "uri": "https://api.github.com/repos/octocat/Hello-World/pulls/1/files?per_page=100&page=1"}, "response": {"body": {"base64_string": "H4sIAAAAAAAAA51SS4/TMBD+K4O1t7abJm3TdCVEEayAAxxYrTgQVE3sSWPWtSPbUVUQ/HYm6bJ0K4SAo+f1vfzxqwgNiiuxTFcV5RmtZkVGSCqvsgXJVaGKdDWldJ5KmmUrqsRY1NqQxR3x1vvr5y/fXnMtRIxd4MrOKV1rUlxDpXTUznI5HwtFhu5f6VjIBu2WuLMci8q4atN5w9tNjG24SpKtjk1XXUq3S5yMTmJMXpMxbvLBeaOSfiNZTudzLHA6y6hYVDlWeZpOZ7MqQ1lgvZRcLaaLKkseSHrc/xsOL/wHjHQ2ko3hDAtbfXmiy1Prwm/V/dy/J/7MU/30b8Wy7S1G2bCX6zVMUhil4xzW69JOBgNhMPBJacsS3jmwtDfaEmAEsgpcDX24pR09Hh5dwO5OaQ/fT2N44QkjBUDgFnFM/gC183BwnYfWu89cA4nGkIJSnORXCtD2ONYF8r/WGfgCpDqDOX4ViA2B7Lxna2Hv/J222xPg6I4He0UHkAM1dXaZ3WdgHW8oBuja4aIlSSEgU3/F3V59YBZveEqj0V/4Bu3aeOwOkelBJ/NPbpl7SHq1m17G5sGFR3+1zzwZhEXXyQaOv/HPAYhvn34Ag/9PJJkDAAA=", "encoding": "utf-8"}, "headers": {"vary": ["Accept", "Accept-Encoding"], "x-github-media-type": ["github.v3"], "x-xss-protection": ["1; mode=block"], "x-content-type-options": ["nosniff"], "etag": ["W/\"b0657030f089b5b8d07e9704ea888ee7\""], "cache-control": ["public", "max-age=60", "s-maxage=60"], "status": ["200 OK"], "x-ratelimit-remaining": ["48"], "x-served-by": ["bae57931a6fe678a3dffe9be8e7819c8"], "access-control-expose-headers": ["ETag", "Link", "X-GitHub-OTP", "X-RateLimit-Limit", "X-RateLimit-Remaining", "X-RateLimit-Reset", "X-OAuth-Scopes", "X-Accepted-OAuth-Scopes", "X-Poll-Interval"], "transfer-encoding": ["chunked"], "x-github-request-id": ["452EEC24:151EA:50E8E9D:561FDA52"], "access-control-allow-credentials": ["true"], "last-modified": ["Tue", "07 Apr 2015 14:46:46 GMT"], "date": ["Thu", "15 Oct 2015 16:54:42 GMT"], "access-control-allow-origin": ["*"], "content-security-policy": ["default-src 'none'"], "content-encoding": ["gzip"], "strict-transport-security": ["max-age=31536000; includeSubdomains; preload"], "server": ["GitHub.com"], "x-ratelimit-limit": ["60"], "x-frame-options": ["deny"], "content-type": ["application/json; charset=utf-8"], "x-ratelimit-reset": ["1444928491"]}, "status": {"message": "OK", "code": 200}, "url": "https://api.github.com/repos/octocat/Hello-World/pulls/1/files?per_page=100&page=1"}, "recorded_at": "2015-10-15T16:54:42"}], "recorded_with": "betamax/0.5.0"} -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import betamax 3 | import os 4 | import json 5 | from datetime import datetime 6 | from webhookdb import create_app, db 7 | from webhookdb.oauth import GithubSession 8 | from webhookdb.tasks.fetch import github 9 | from flask.testing import FlaskClient 10 | from factories import ( 11 | UserFactory, RepoFactory, MilestoneFactory, PullRequestFactory 12 | ) 13 | from pytest_factoryboy import register 14 | 15 | 16 | register(UserFactory) 17 | register(RepoFactory) 18 | register(MilestoneFactory) 19 | register(PullRequestFactory) 20 | 21 | 22 | record_mode = 'none' if os.environ.get("CI") else 'once' 23 | 24 | with betamax.Betamax.configure() as config: 25 | config.cassette_library_dir = 'tests/cassettes' 26 | config.default_cassette_options['record_mode'] = record_mode 27 | 28 | 29 | class GitHubJSONEncoder(json.JSONEncoder): 30 | def default(self, o): 31 | if isinstance(o, datetime): 32 | stripped = o.replace(microsecond=0) 33 | return stripped.isoformat() + "Z" 34 | if hasattr(o, "github_json"): 35 | return o.github_json 36 | return json.JSONEncoder.default(self, o) 37 | 38 | 39 | class WebhookExtendedClient(FlaskClient): 40 | def pull_request_webhook( 41 | self, path="/replication", base_url="https://webhookdb.herokuapp.com/", 42 | pull_request=None, action="opened", sender=None, 43 | *args, **kwargs 44 | ): 45 | if not pull_request: 46 | raise ValueError("pull_request required") 47 | if not sender: 48 | raise ValueError("sender required") 49 | data = { 50 | "action": action, 51 | "number": pull_request.number, 52 | "pull_request": pull_request.github_json, 53 | "organization": pull_request.user.github_json, 54 | "sender": sender.github_json, 55 | } 56 | headers = { 57 | "User-Agent": "GitHub-Hookshot/044aadd", 58 | "Content-Type": "application/json", 59 | "X-Github-Event": "pull_request", 60 | } 61 | return self.post( 62 | base_url=base_url, 63 | path=path, 64 | headers=headers, 65 | data=json.dumps(data, cls=GitHubJSONEncoder), 66 | ) 67 | 68 | 69 | @pytest.fixture 70 | def app(request): 71 | """ 72 | Return a WebhookDB Flask app, set up in testing mode. 73 | """ 74 | _app = create_app(config="test") 75 | _app.test_client_class = WebhookExtendedClient 76 | db.create_all(app=_app) 77 | def teardown(): 78 | db.drop_all(app=_app) 79 | request.addfinalizer(teardown) 80 | return _app 81 | 82 | 83 | @pytest.fixture 84 | def github_betamax(request): 85 | """ 86 | Copied from Betamax's `betamax_session` fixture, but using the Flask-Dance 87 | `github` session that is used in the Celery tasks. 88 | """ 89 | cassette_name = '' 90 | 91 | if request.module is not None: 92 | cassette_name += request.module.__name__ + '.' 93 | 94 | if request.cls is not None: 95 | cassette_name += request.cls.__name__ + '.' 96 | 97 | cassette_name += request.function.__name__ 98 | 99 | recorder = betamax.Betamax(github) 100 | recorder.use_cassette(cassette_name) 101 | recorder.start() 102 | request.addfinalizer(recorder.stop) 103 | 104 | return github 105 | -------------------------------------------------------------------------------- /tests/factories.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import factory 3 | from factory.fuzzy import FuzzyText, FuzzyInteger, FuzzyChoice, FuzzyNaiveDateTime 4 | from factory.alchemy import SQLAlchemyModelFactory 5 | from webhookdb import db 6 | from webhookdb.models import ( 7 | User, Repository, UserRepoAssociation, RepositoryHook, Milestone, 8 | PullRequest, PullRequestFile, IssueLabel, Issue 9 | ) 10 | 11 | class BaseFactory(SQLAlchemyModelFactory): 12 | class Meta(object): 13 | abstract = True 14 | sqlalchemy_session = db.session 15 | 16 | now = datetime.utcnow() 17 | yesterday = datetime.utcnow() - timedelta(days=1) 18 | week_ago = datetime.utcnow() - timedelta(days=7) 19 | 20 | 21 | class UserFactory(BaseFactory): 22 | id = FuzzyInteger(999999) 23 | login = FuzzyText(length=8) 24 | site_admin = False 25 | name = FuzzyText(length=16) 26 | company = "GitHub" 27 | email = "fake@example.com" 28 | hireable = False 29 | bio = FuzzyText(prefix="autogenerated: ", length=50) 30 | public_repos_count = FuzzyInteger(50) 31 | public_gists_count = FuzzyInteger(50) 32 | followers_count = FuzzyInteger(50) 33 | following_count = FuzzyInteger(50) 34 | created_at = FuzzyNaiveDateTime(start_dt=week_ago, end_dt=yesterday) 35 | updated_at = FuzzyNaiveDateTime(start_dt=yesterday, end_dt=now) 36 | 37 | class Meta(object): 38 | model = User 39 | 40 | 41 | class RepoFactory(BaseFactory): 42 | id = FuzzyInteger(999999) 43 | name = FuzzyText(length=8) 44 | owner = factory.SubFactory(UserFactory) 45 | owner_login = factory.LazyAttribute(lambda repo: repo.owner.login) 46 | organization = factory.LazyAttribute(lambda repo: repo.owner) 47 | organization_login = factory.LazyAttribute(lambda repo: repo.organization.login) 48 | private = False 49 | description = FuzzyText(prefix="autogenerated: ", length=50) 50 | fork = False 51 | created_at = FuzzyNaiveDateTime(start_dt=week_ago, end_dt=yesterday) 52 | updated_at = FuzzyNaiveDateTime(start_dt=yesterday, end_dt=now) 53 | pushed_at = FuzzyNaiveDateTime(start_dt=yesterday, end_dt=now) 54 | size = FuzzyInteger(100, 1000000) 55 | stargazers_count = FuzzyInteger(50) 56 | watchers_count = FuzzyInteger(50) 57 | language = FuzzyChoice(["Python", "JavaScript", "Ruby", "Markdown"]) 58 | has_issues = True 59 | has_downloads = True 60 | has_wiki = True 61 | has_pages = True 62 | forks_count = FuzzyInteger(50) 63 | open_issues_count = FuzzyInteger(100) 64 | default_branch = "master" 65 | 66 | class Meta(object): 67 | model = Repository 68 | 69 | 70 | class MilestoneFactory(BaseFactory): 71 | repo = factory.SubFactory(RepoFactory) 72 | number = factory.Sequence(int) 73 | state = "open" 74 | title = factory.Sequence(lambda n: "v{}.0".format(n)) 75 | description = FuzzyText(prefix="autogenerated: ", length=50) 76 | creator = factory.LazyAttribute(lambda m: m.repo.owner) 77 | creator_login = factory.LazyAttribute(lambda m: m.repo.owner_login) 78 | open_issues_count = FuzzyInteger(20) 79 | closed_issues_count = FuzzyInteger(20) 80 | created_at = FuzzyNaiveDateTime(start_dt=week_ago, end_dt=yesterday) 81 | updated_at = FuzzyNaiveDateTime(start_dt=yesterday, end_dt=now) 82 | closed_at = None 83 | due_at = None 84 | 85 | class Meta(object): 86 | model = Milestone 87 | 88 | 89 | class PullRequestFactory(BaseFactory): 90 | id = FuzzyInteger(999999) 91 | number = factory.Sequence(int) 92 | state = "open" 93 | locked = False 94 | user = factory.SubFactory(UserFactory) 95 | user_login = factory.LazyAttribute(lambda pr: pr.user.login) 96 | title = factory.Sequence(lambda n: "PR #{}".format(n)) 97 | body = FuzzyText(prefix="autogenerated: ", length=50) 98 | created_at = FuzzyNaiveDateTime(start_dt=week_ago, end_dt=yesterday) 99 | updated_at = FuzzyNaiveDateTime(start_dt=yesterday, end_dt=now) 100 | base_repo = factory.SubFactory(RepoFactory) 101 | base_ref = "master" 102 | head_repo = factory.SubFactory(RepoFactory) 103 | head_ref = FuzzyText(length=10) 104 | merged = False 105 | comments_count = FuzzyInteger(10) 106 | review_comments_count = FuzzyInteger(15) 107 | commits_count = FuzzyInteger(5) 108 | additions = FuzzyInteger(50) 109 | deletions = FuzzyInteger(50) 110 | changed_files = FuzzyInteger(20) 111 | 112 | class Meta(object): 113 | model = PullRequest 114 | -------------------------------------------------------------------------------- /tests/test_pull_request.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datetime import datetime 3 | from webhookdb.models import ( 4 | User, Repository, UserRepoAssociation, RepositoryHook, Milestone, 5 | PullRequest, PullRequestFile, IssueLabel, Issue 6 | ) 7 | 8 | pytestmark = pytest.mark.usefixtures("github_betamax") 9 | 10 | 11 | def test_happy_path(app, user_factory, repo_factory, pull_request_factory): 12 | # make some models to use for generating test data, but don't save them 13 | octocat = user_factory.build(login="octocat") 14 | repo = repo_factory.build(name="Hello-World", owner=octocat, fork=False) 15 | unoju = user_factory.build(login="unoju") 16 | repo2 = repo_factory.build(name="Hello-World", owner=unoju, fork=True) 17 | pr = pull_request_factory.build( 18 | base_repo=repo, head_repo=repo2, number=1, user=unoju, 19 | title="Edited README via GitHub", 20 | body="Please pull these awesome changes", 21 | ) 22 | 23 | # double-check that database is empty 24 | with app.test_request_context('/'): 25 | assert User.query.count() == 0 26 | assert Repository.query.count() == 0 27 | assert PullRequest.query.count() == 0 28 | 29 | # make a client and simulate a webhook notification from GitHub 30 | client = app.test_client() 31 | response = client.pull_request_webhook(sender=unoju, pull_request=pr) 32 | assert response.status_code == 200 33 | 34 | # check that the database is populated 35 | with app.test_request_context('/'): 36 | assert User.query.count() == 2 37 | assert User.query.filter_by(login="octocat").one() 38 | assert User.query.filter_by(login="unoju").one() 39 | 40 | assert Repository.query.count() == 2 41 | repo1 = Repository.query.filter_by(owner_login="octocat").one() 42 | assert repo1.name == "Hello-World" 43 | assert not repo1.fork 44 | repo2 = Repository.query.filter_by(owner_login="unoju").one() 45 | assert repo2.name == "Hello-World" 46 | assert repo2.fork 47 | 48 | assert PullRequest.query.count() == 1 49 | pr = PullRequest.query.first() 50 | assert pr.title == "Edited README via GitHub" 51 | assert pr.body == "Please pull these awesome changes" 52 | assert pr.user.login == "unoju" 53 | 54 | -------------------------------------------------------------------------------- /webhookdb/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import print_function, unicode_literals 3 | 4 | # UTF-8 stderr: http://stackoverflow.com/a/2001767/141395 5 | import codecs 6 | import sys 7 | reload(sys) 8 | sys.setdefaultencoding('utf-8') 9 | sys.stdout = codecs.getwriter('utf-8')(sys.stdout) 10 | sys.stderr = codecs.getwriter('utf-8')(sys.stderr) 11 | 12 | import os 13 | 14 | from flask import Flask 15 | from werkzeug.contrib.fixers import ProxyFix 16 | import bugsnag 17 | from bugsnag.flask import handle_exceptions 18 | from bugsnag.celery import connect_failure_handler 19 | from flask.ext.sqlalchemy import SQLAlchemy 20 | from flask_sslify import SSLify 21 | from flask_bootstrap import Bootstrap 22 | from flask_login import LoginManager 23 | from celery import Celery 24 | 25 | db = SQLAlchemy() 26 | bootstrap = Bootstrap() 27 | celery = Celery() 28 | 29 | login_manager = LoginManager() 30 | login_manager.session_protection = 'strong' 31 | login_manager.login_view = 'github.login' 32 | 33 | 34 | def expand_config(name): 35 | if not name: 36 | name = "default" 37 | return "webhookdb.config.{classname}Config".format(classname=name.capitalize()) 38 | 39 | 40 | def create_app(config=None): 41 | app = Flask(__name__) 42 | app.wsgi_app = ProxyFix(app.wsgi_app) 43 | config = config or os.environ.get("WEBHOOKDB_CONFIG") or "default" 44 | app.config.from_object(expand_config(config)) 45 | 46 | if not app.config["TESTING"]: 47 | handle_exceptions(app) 48 | 49 | db.init_app(app) 50 | bootstrap.init_app(app) 51 | login_manager.init_app(app) 52 | create_celery_app(app) 53 | if not app.debug: 54 | SSLify(app) 55 | 56 | from .oauth import github_bp 57 | app.register_blueprint(github_bp, url_prefix="/login") 58 | 59 | from .replication import replication as repl_blueprint 60 | app.register_blueprint(repl_blueprint, url_prefix="/replication") 61 | 62 | from .load import load as load_blueprint 63 | app.register_blueprint(load_blueprint, url_prefix="/load") 64 | 65 | from .tasks import tasks as tasks_blueprint 66 | app.register_blueprint(tasks_blueprint, url_prefix="/tasks") 67 | 68 | from .ui import ui as ui_blueprint 69 | app.register_blueprint(ui_blueprint) 70 | 71 | return app 72 | 73 | 74 | def create_celery_app(app=None, config="worker"): 75 | """ 76 | adapted from http://flask.pocoo.org/docs/0.10/patterns/celery/ 77 | """ 78 | app = app or create_app(config=config) 79 | celery.main = app.import_name 80 | celery.conf["BROKER_URL"] = app.config["CELERY_BROKER_URL"] 81 | celery.conf.update(app.config) 82 | TaskBase = celery.Task 83 | class ContextTask(TaskBase): 84 | abstract = True 85 | def __call__(self, *args, **kwargs): 86 | with app.app_context(): 87 | return TaskBase.__call__(self, *args, **kwargs) 88 | celery.Task = ContextTask 89 | if not app.config["TESTING"]: 90 | connect_failure_handler() 91 | bugsnag.configure(ignore_classes=[ 92 | "webhookdb.exceptions.StaleData", 93 | "webhookdb.exceptions.NothingToDo", 94 | "webhookdb.exceptions.RateLimited", 95 | ]) 96 | return celery 97 | -------------------------------------------------------------------------------- /webhookdb/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals 3 | import os 4 | 5 | RABBITMQ_PROVIDER = "bigwig" 6 | REDIS_PROVIDER = "rediscloud" 7 | 8 | 9 | class DefaultConfig(object): 10 | SECRET_KEY = os.environ.get("FLASK_SECRET_KEY", "secrettoeveryone") 11 | GITHUB_OAUTH_CLIENT_ID = os.environ.get("GITHUB_OAUTH_CLIENT_ID") 12 | GITHUB_OAUTH_CLIENT_SECRET = os.environ.get("GITHUB_OAUTH_CLIENT_SECRET") 13 | SQLALCHEMY_DATABASE_URI = os.environ.get("DATABASE_URL", "sqlite:///github.db") 14 | CELERY_ACCEPT_CONTENT = ["json"] 15 | CELERY_TASK_SERIALIZER = "json" 16 | CELERY_RESULT_SERIALIZER = 'json' 17 | CELERY_EAGER_PROPAGATES_EXCEPTIONS = True 18 | if RABBITMQ_PROVIDER == "bigwig": 19 | # TX_URL for producers 20 | CELERY_BROKER_URL = os.environ.get("RABBITMQ_BIGWIG_TX_URL", "amqp://") 21 | elif RABBITMQ_PROVIDER == "cloudamqp": 22 | CELERY_BROKER_URL = os.environ.get("CLOUDAMQP_URL", "amqp://") 23 | # recommended by CloudAMQP for their free plan 24 | BROKER_POOL_LIMIT = 1 25 | if REDIS_PROVIDER == "rediscloud": 26 | CELERY_RESULT_BACKEND = os.environ.get("REDISCLOUD_URL", "redis://") 27 | 28 | 29 | class WorkerConfig(DefaultConfig): 30 | if RABBITMQ_PROVIDER == "bigwig": 31 | # RX_URL for consumers 32 | CELERY_BROKER_URL = os.environ.get("RABBITMQ_BIGWIG_RX_URL", "amqp://") 33 | 34 | 35 | class DevelopmentConfig(DefaultConfig): 36 | DEBUG = True 37 | 38 | 39 | class TestConfig(DefaultConfig): 40 | TESTING = True 41 | SQLALCHEMY_DATABASE_URI = "sqlite://" # in-memory database 42 | CELERY_ALWAYS_EAGER = True 43 | -------------------------------------------------------------------------------- /webhookdb/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals 3 | from datetime import datetime 4 | 5 | class WebhookDBException(Exception): 6 | "Base exception class that all others in this package inherit from" 7 | pass 8 | 9 | class MissingData(WebhookDBException): 10 | def __init__(self, message, obj): 11 | self.message = message 12 | self.obj = obj 13 | WebhookDBException.__init__(self, message) 14 | 15 | 16 | class StaleData(WebhookDBException): 17 | pass 18 | 19 | class NothingToDo(WebhookDBException): 20 | pass 21 | 22 | 23 | class RateLimited(WebhookDBException): 24 | def __init__(self, response): 25 | self.response = response 26 | try: 27 | message = response.json()["message"] 28 | except Exception: 29 | message = getattr(response, "content", response) 30 | WebhookDBException.__init__(self, message) 31 | 32 | @property 33 | def reset(self): 34 | """ 35 | If set, a datetime that indicates when the rate limit will 36 | be reset. If not set, the reset time is unknown. 37 | """ 38 | if self.response is None: 39 | return None 40 | reset_epoch_str = self.response.headers.get("X-RateLimit-Reset") 41 | if not reset_epoch_str: 42 | return None 43 | try: 44 | reset_epoch = int(reset_epoch_str) 45 | except Exception: 46 | return None 47 | return datetime.fromtimestamp(reset_epoch) 48 | 49 | 50 | class NotFound(WebhookDBException): 51 | def __init__(self, message, info=None): 52 | self.message = message 53 | self.info = info or {} 54 | WebhookDBException.__init__(self, message) 55 | 56 | 57 | class DatabaseError(WebhookDBException): 58 | def __init__(self, message, info=None): 59 | self.message = message 60 | self.info = info or {} 61 | WebhookDBException.__init__(self, message) 62 | -------------------------------------------------------------------------------- /webhookdb/load/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import Blueprint 5 | 6 | load = Blueprint('load', __name__) 7 | 8 | from .repository import repository 9 | from .repository_hook import repository_hook, repository_hooks 10 | from .user import user_repositories, own_repositories 11 | from .pull_request import pull_request, pull_requests 12 | from .pull_request_file import pull_request_files 13 | from .milestone import milestone, milestones 14 | from .label import label, labels 15 | from .issue import issue, issues 16 | from .ratelimit import attach_ratelimit_headers, request_rate_limited 17 | -------------------------------------------------------------------------------- /webhookdb/load/issue.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.issue import ( 9 | sync_issue, spawn_page_tasks_for_issues 10 | ) 11 | from webhookdb.exceptions import NotFound 12 | 13 | 14 | @load.route('/repos///issues/', methods=["POST"]) 15 | def issue(owner, repo, number): 16 | """ 17 | Load a single issue from Github into WebhookDB. 18 | 19 | :query children: scan all children objects. Defaults to ``false`` 20 | :query inline: process the request inline instead of creating a task 21 | on the task queue. Defaults to ``false``. Ignored if ``children`` 22 | is true. 23 | :statuscode 200: issue successfully loaded inline 24 | :statuscode 202: task successfully queued 25 | :statuscode 404: specified issue was not found on Github 26 | """ 27 | children = bool(request.args.get("children", False)) 28 | inline = bool(request.args.get("inline", False)) 29 | bugsnag_ctx = { 30 | "owner": owner, "repo": repo, "number": number, 31 | "inline": inline, "children": children, 32 | } 33 | bugsnag.configure_request(meta_data=bugsnag_ctx) 34 | 35 | if inline and not children: 36 | try: 37 | sync_issue( 38 | owner, repo, number, children=False, 39 | requestor_id=current_user.get_id(), 40 | ) 41 | except NotFound as exc: 42 | return jsonify({"message": exc.message}), 404 43 | else: 44 | return jsonify({"message": "success"}) 45 | else: 46 | result = sync_issue.delay( 47 | owner, repo, number, children=children, 48 | requestor_id=current_user.get_id(), 49 | ) 50 | resp = jsonify({"message": "queued"}) 51 | resp.status_code = 202 52 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 53 | return resp 54 | 55 | @load.route('/repos///issues', methods=["POST"]) 56 | def issues(owner, repo): 57 | """ 58 | Queue tasks to load all issues on a single Github repository 59 | into WebhookDB. 60 | 61 | :query children: scan all children objects. Defaults to ``false`` 62 | :query state: one of ``all``, ``open``, or ``closed``. This parameter 63 | is proxied to the `Github API for listing issues`_. 64 | :statuscode 202: task successfully queued 65 | 66 | .. _Github API for listing issues: https://developer.github.com/v3/issues/#list-issues-for-a-repository 67 | """ 68 | bugsnag_ctx = {"owner": owner, "repo": repo} 69 | bugsnag.configure_request(meta_data=bugsnag_ctx) 70 | state = request.args.get("state", "open") 71 | children = bool(request.args.get("children", False)) 72 | 73 | result = spawn_page_tasks_for_issues.delay( 74 | owner, repo, state, children=children, 75 | requestor_id=current_user.get_id(), 76 | ) 77 | resp = jsonify({"message": "queued"}) 78 | resp.status_code = 202 79 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 80 | return resp 81 | -------------------------------------------------------------------------------- /webhookdb/load/label.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.label import ( 9 | sync_label, spawn_page_tasks_for_labels 10 | ) 11 | from webhookdb.exceptions import NotFound 12 | 13 | 14 | @load.route('/repos///labels/', methods=["POST"]) 15 | def label(owner, repo, name): 16 | """ 17 | Load a single label from Github into WebhookDB. 18 | 19 | :query children: scan all children objects. Defaults to ``false`` 20 | :query inline: process the request inline instead of creating a task 21 | on the task queue. Defaults to ``false``. 22 | :statuscode 200: label successfully loaded inline 23 | :statuscode 202: task successfully queued 24 | :statuscode 404: specified label was not found on Github 25 | """ 26 | inline = bool(request.args.get("inline", False)) 27 | children = bool(request.args.get("children", False)) 28 | bugsnag_ctx = {"owner": owner, "repo": repo, "name": name, "inline": inline} 29 | bugsnag.configure_request(meta_data=bugsnag_ctx) 30 | 31 | if inline and not children: 32 | try: 33 | sync_label( 34 | owner, repo, name, children=False, 35 | requestor_id=current_user.get_id(), 36 | ) 37 | except NotFound as exc: 38 | return jsonify({"message": exc.message}), 404 39 | else: 40 | return jsonify({"message": "success"}) 41 | else: 42 | result = sync_label.delay(owner, repo, name, children=children) 43 | resp = jsonify({"message": "queued"}) 44 | resp.status_code = 202 45 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 46 | return resp 47 | 48 | @load.route('/repos///labels', methods=["POST"]) 49 | def labels(owner, repo): 50 | """ 51 | Queue tasks to load all labels on a single Github repository 52 | into WebhookDB. 53 | 54 | :statuscode 202: task successfully queued 55 | """ 56 | bugsnag_ctx = {"owner": owner, "repo": repo} 57 | bugsnag.configure_request(meta_data=bugsnag_ctx) 58 | children = bool(request.args.get("children", False)) 59 | 60 | result = spawn_page_tasks_for_labels.delay( 61 | owner, repo, requestor_id=current_user.get_id(), 62 | ) 63 | resp = jsonify({"message": "queued"}) 64 | resp.status_code = 202 65 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 66 | return resp 67 | -------------------------------------------------------------------------------- /webhookdb/load/milestone.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.milestone import ( 9 | sync_milestone, spawn_page_tasks_for_milestones 10 | ) 11 | from webhookdb.exceptions import NotFound 12 | 13 | 14 | @load.route('/repos///milestones/', methods=["POST"]) 15 | def milestone(owner, repo, number): 16 | """ 17 | Load a single milestone from Github into WebhookDB. 18 | 19 | :query inline: process the request inline instead of creating a task 20 | on the task queue. Defaults to ``false``. 21 | :statuscode 200: milestone successfully loaded inline 22 | :statuscode 202: task successfully queued 23 | :statuscode 404: specified milestone was not found on Github 24 | """ 25 | inline = bool(request.args.get("inline", False)) 26 | children = bool(request.args.get("children", False)) 27 | bugsnag_ctx = {"owner": owner, "repo": repo, "number": number, "inline": inline} 28 | bugsnag.configure_request(meta_data=bugsnag_ctx) 29 | 30 | if inline and not children: 31 | try: 32 | sync_milestone( 33 | owner, repo, number, children=children, 34 | requestor_id=current_user.get_id(), 35 | ) 36 | except NotFound as exc: 37 | return jsonify({"message": exc.message}), 404 38 | else: 39 | return jsonify({"message": "success"}) 40 | else: 41 | result = sync_milestone.delay( 42 | owner, repo, number, children=children, 43 | requestor_id=current_user.get_id(), 44 | ) 45 | resp = jsonify({"message": "queued"}) 46 | resp.status_code = 202 47 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 48 | return resp 49 | 50 | @load.route('/repos///milestones', methods=["POST"]) 51 | def milestones(owner, repo): 52 | """ 53 | Queue tasks to load all milestones on a single Github repository 54 | into WebhookDB. 55 | 56 | :statuscode 202: task successfully queued 57 | """ 58 | bugsnag_ctx = {"owner": owner, "repo": repo} 59 | bugsnag.configure_request(meta_data=bugsnag_ctx) 60 | state = request.args.get("state", "open") 61 | children = bool(request.args.get("children", False)) 62 | 63 | result = spawn_page_tasks_for_milestones.delay( 64 | owner, repo, state, children=children, 65 | requestor_id=current_user.get_id(), 66 | ) 67 | resp = jsonify({"message": "queued"}) 68 | resp.status_code = 202 69 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 70 | return resp 71 | -------------------------------------------------------------------------------- /webhookdb/load/pull_request.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.pull_request import ( 9 | sync_pull_request, spawn_page_tasks_for_pull_requests 10 | ) 11 | from webhookdb.exceptions import NotFound 12 | 13 | 14 | @load.route('/repos///pulls/', methods=["POST"]) 15 | def pull_request(owner, repo, number): 16 | """ 17 | Load a single pull request from Github into WebhookDB. 18 | 19 | :query inline: process the request inline instead of creating a task 20 | on the task queue. Defaults to ``false``. 21 | :statuscode 200: pull request successfully loaded inline 22 | :statuscode 202: task successfully queued 23 | :statuscode 404: specified pull request was not found on Github 24 | """ 25 | inline = bool(request.args.get("inline", False)) 26 | children = bool(request.args.get("children", False)) 27 | bugsnag_ctx = {"owner": owner, "repo": repo, "number": number, "inline": inline} 28 | bugsnag.configure_request(meta_data=bugsnag_ctx) 29 | 30 | if inline and not children: 31 | try: 32 | sync_pull_request( 33 | owner, repo, number, children=False, 34 | requestor_id=current_user.get_id(), 35 | ) 36 | except NotFound as exc: 37 | return jsonify({"message": exc.message}), 404 38 | else: 39 | return jsonify({"message": "success"}) 40 | else: 41 | result = sync_pull_request.delay( 42 | owner, repo, number, children=children, 43 | requestor_id=current_user.get_id(), 44 | ) 45 | resp = jsonify({"message": "queued"}) 46 | resp.status_code = 202 47 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 48 | return resp 49 | 50 | @load.route('/repos///pulls', methods=["POST"]) 51 | def pull_requests(owner, repo): 52 | """ 53 | Queue tasks to load all pull requests on a single Github repository 54 | into WebhookDB. 55 | 56 | :query state: one of ``all``, ``open``, or ``closed``. This parameter 57 | is proxied to the `Github API for listing pull requests`_. 58 | :statuscode 202: task successfully queued 59 | 60 | .. _Github API for listing pull requests: https://developer.github.com/v3/pulls/#list-pull-requests 61 | """ 62 | bugsnag_ctx = {"owner": owner, "repo": repo} 63 | bugsnag.configure_request(meta_data=bugsnag_ctx) 64 | state = request.args.get("state", "open") 65 | children = bool(request.args.get("children", False)) 66 | 67 | result = spawn_page_tasks_for_pull_requests.delay( 68 | owner, repo, state, children=children, 69 | requestor_id=current_user.get_id(), 70 | ) 71 | resp = jsonify({"message": "queued"}) 72 | resp.status_code = 202 73 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 74 | return resp 75 | -------------------------------------------------------------------------------- /webhookdb/load/pull_request_file.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.pull_request_file import spawn_page_tasks_for_pull_request_files 9 | 10 | @load.route('/repos///pulls//files', methods=["POST"]) 11 | def pull_request_files(owner, repo, number): 12 | """ 13 | Queue tasks to load the pull request files (diffs) for a single pull request 14 | into WebhookDB. 15 | 16 | :statuscode 202: task successfully queued 17 | """ 18 | bugsnag_ctx = {"owner": owner, "repo": repo, "number": number} 19 | bugsnag.configure_request(meta_data=bugsnag_ctx) 20 | children = bool(request.args.get("children", False)) 21 | 22 | result = spawn_page_tasks_for_pull_request_files.delay( 23 | owner, repo, number, children=children, 24 | requestor_id=current_user.get_id(), 25 | ) 26 | resp = jsonify({"message": "queued"}) 27 | resp.status_code = 202 28 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 29 | return resp 30 | -------------------------------------------------------------------------------- /webhookdb/load/ratelimit.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from . import load 6 | from flask import jsonify 7 | from flask_dance.contrib.github import github 8 | from webhookdb.exceptions import RateLimited 9 | 10 | 11 | @load.after_request 12 | def attach_ratelimit_headers(response, gh_response=None): 13 | # A response with a non-OK response code is falsy, so can't just do: 14 | # gh_response = gh_response or getattr(github, "last_response", None) 15 | # Instead, we have to actually check for None 16 | if gh_response is None: 17 | gh_response = getattr(github, "last_response", None) 18 | if gh_response is None: 19 | return response 20 | 21 | # attach ratelimit headers 22 | headers = ("X-RateLimit-Limit", "X-RateLimit-Remaining", "X-RateLimit-Reset") 23 | for h in headers: 24 | if h in gh_response.headers: 25 | response.headers[h] = gh_response.headers[h] 26 | return response 27 | 28 | 29 | @load.errorhandler(RateLimited) 30 | def request_rate_limited(error): 31 | gh_resp = error.response 32 | try: 33 | upstream_msg = gh_resp.json()["message"] 34 | except Exception: 35 | upstream_msg = "Rate limited." 36 | 37 | wait_time = error.reset - datetime.now() 38 | sec = int(wait_time.total_seconds()) 39 | wait_msg = "Try again in {sec} {unit}.".format( 40 | sec=sec, unit="second" if sec == 1 else "seconds", 41 | ) 42 | 43 | msg = "{upstream} {wait}".format( 44 | upstream=upstream_msg, 45 | wait=wait_msg, 46 | ) 47 | resp = jsonify({"error": msg}) 48 | resp.status_code = 503 49 | return resp 50 | -------------------------------------------------------------------------------- /webhookdb/load/repository.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.repository import sync_repository 9 | from webhookdb.exceptions import NotFound 10 | 11 | 12 | @load.route('/repos//', methods=["POST"]) 13 | def repository(owner, repo): 14 | """ 15 | Load a single repository from Github into WebhookDB. Note that this does 16 | not load issues, pull requests, etc for that repository into WebhookDB. 17 | 18 | :query inline: process the request inline instead of creating a task 19 | on the task queue. Defaults to ``false``. 20 | :statuscode 200: repository successfully loaded inline 21 | :statuscode 202: task successfully queued 22 | :statuscode 404: specified repository was not found on Github 23 | """ 24 | inline = bool(request.args.get("inline", False)) 25 | children = bool(request.args.get("children", False)) 26 | bugsnag_ctx = { 27 | "owner": owner, "repo": repo, 28 | "inline": inline, "children": children, 29 | } 30 | bugsnag.configure_request(meta_data=bugsnag_ctx) 31 | 32 | if inline and not children: 33 | try: 34 | sync_repository( 35 | owner, repo, children=children, 36 | requestor_id=current_user.get_id(), 37 | ) 38 | except NotFound as exc: 39 | return jsonify({"message": exc.message}), 404 40 | else: 41 | return jsonify({"message": "success"}) 42 | else: 43 | result = sync_repository.delay( 44 | owner, repo, children=children, 45 | requestor_id=current_user.get_id(), 46 | ) 47 | resp = jsonify({"message": "queued"}) 48 | resp.status_code = 202 49 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 50 | return resp 51 | -------------------------------------------------------------------------------- /webhookdb/load/repository_hook.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.repository_hook import ( 9 | sync_repository_hook, spawn_page_tasks_for_repository_hooks 10 | ) 11 | from webhookdb.exceptions import NotFound 12 | 13 | 14 | @load.route('/repos///hooks/', methods=["POST"]) 15 | def repository_hook(owner, repo, hook_id): 16 | """ 17 | Load a single repository hook from Github into WebhookDB. 18 | 19 | :query inline: process the request inline instead of creating a task 20 | on the task queue. Defaults to ``false``. 21 | :statuscode 200: hook successfully loaded inline 22 | :statuscode 202: task successfully queued 23 | :statuscode 404: specified hook was not found on Github 24 | """ 25 | inline = bool(request.args.get("inline", False)) 26 | children = bool(request.args.get("children", False)) 27 | bugsnag_ctx = { 28 | "owner": owner, "repo": repo, "hook_id": hook_id, 29 | "inline": inline, "children": children, 30 | } 31 | bugsnag.configure_request(meta_data=bugsnag_ctx) 32 | 33 | if inline and not children: 34 | try: 35 | sync_repository_hook( 36 | owner, repo, hook_id, children=children, 37 | requestor_id=current_user.get_id(), 38 | ) 39 | except NotFound as exc: 40 | return jsonify({"message": exc.message}), 404 41 | else: 42 | return jsonify({"message": "success"}) 43 | else: 44 | result = sync_repository_hook.delay( 45 | owner, repo, number, children=children, 46 | requestor_id=current_user.get_id(), 47 | ) 48 | resp = jsonify({"message": "queued"}) 49 | resp.status_code = 202 50 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 51 | return resp 52 | 53 | @load.route('/repos///hooks', methods=["POST"]) 54 | def repository_hooks(owner, repo): 55 | """ 56 | Queue tasks to load all hooks on a single Github repository into WebhookDB. 57 | 58 | :statuscode 202: task successfully queued 59 | """ 60 | children = bool(request.args.get("children", False)) 61 | bugsnag_ctx = {"owner": owner, "repo": repo, "children": children} 62 | bugsnag.configure_request(meta_data=bugsnag_ctx) 63 | 64 | result = spawn_page_tasks_for_repository_hooks.delay( 65 | owner, repo, children=children, 66 | requestor_id=current_user.get_id(), 67 | ) 68 | resp = jsonify({"message": "queued"}) 69 | resp.status_code = 202 70 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 71 | return resp 72 | -------------------------------------------------------------------------------- /webhookdb/load/user.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify, url_for 5 | from flask_login import login_required, current_user 6 | import bugsnag 7 | from . import load 8 | from webhookdb.tasks.user import sync_user 9 | from webhookdb.tasks.repository import spawn_page_tasks_for_user_repositories 10 | from webhookdb.exceptions import NotFound 11 | from sqlalchemy.orm.exc import NoResultFound 12 | 13 | 14 | @load.route('/user//repos', methods=["POST"]) 15 | def user_repositories(username): 16 | """ 17 | Queue tasks to load all of the given user's repositories into WebhookDB. 18 | 19 | :query children: scan all children objects. Defaults to false 20 | :query type: one of ``all``, ``owner``, ``member``. Default: ``owner``. 21 | This parameter is proxied to the `Github API for listing user repositories`_. 22 | :statuscode 202: task successfully queued 23 | 24 | .. _Github API for listing user repositories: https://developer.github.com/v3/repos/#list-user-repositories 25 | """ 26 | bugsnag_ctx = {"username": username} 27 | bugsnag.configure_request(meta_data=bugsnag_ctx) 28 | children = bool(request.args.get("children", False)) 29 | type = request.args.get("type", "owner") 30 | 31 | user = User.get(username) 32 | if not user: 33 | # queue a task to load the user 34 | sync_user.delay(username) 35 | 36 | result = spawn_page_tasks_for_user_repositories.delay( 37 | username, type=type, children=children, 38 | requestor_id=current_user.get_id(), 39 | ) 40 | resp = jsonify({"message": "queued"}) 41 | resp.status_code = 202 42 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 43 | return resp 44 | 45 | 46 | @load.route('/user/repos', methods=["POST"]) 47 | @login_required 48 | def own_repositories(): 49 | """ 50 | Queue tasks to load all of the logged-in user's repositories into WebhookDB. 51 | 52 | :query children: scan all children objects. Defaults to false 53 | :query type: one of ``all``, ``owner``, ``public``, ``private``, ``member``. 54 | Default: ``all``. This parameter is proxied to the 55 | `Github API for listing your repositories`_. 56 | :statuscode 202: task successfully queued 57 | 58 | .. _Github API for listing your repositories: https://developer.github.com/v3/repos/#list-your-repositories 59 | """ 60 | children = bool(request.args.get("children", False)) 61 | type = request.args.get("type", "all") 62 | 63 | result = spawn_page_tasks_for_user_repositories.delay( 64 | current_user.login, type=type, children=children, 65 | requestor_id=current_user.get_id(), 66 | ) 67 | resp = jsonify({"message": "queued"}) 68 | resp.status_code = 202 69 | resp.headers["Location"] = url_for("tasks.status", task_id=result.id) 70 | return resp 71 | -------------------------------------------------------------------------------- /webhookdb/models/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals 3 | from datetime import datetime 4 | from flask_dance.consumer.backend.sqla import OAuthConsumerMixin 5 | from sqlalchemy import text 6 | from webhookdb import db, login_manager 7 | from .github import ( 8 | User, Repository, UserRepoAssociation, RepositoryHook, Milestone, 9 | PullRequest, PullRequestFile, IssueLabel, Issue 10 | ) 11 | 12 | 13 | class OAuth(db.Model, OAuthConsumerMixin): 14 | "Used by Flask-Dance" 15 | user_id = db.Column(db.Integer, db.ForeignKey(User.id)) 16 | user = db.relationship(User) 17 | 18 | 19 | class Mutex(db.Model): 20 | __tablename__ = "webhookdb_mutex" 21 | 22 | name = db.Column(db.String(256), primary_key=True) 23 | created_at = db.Column(db.DateTime, default=datetime.utcnow) 24 | user_id = db.Column(db.Integer, index=True) 25 | user = db.relationship( 26 | User, 27 | primaryjoin=(user_id == User.id), 28 | foreign_keys=user_id, 29 | remote_side=User.id, 30 | backref="held_locks", 31 | ) 32 | 33 | 34 | @login_manager.user_loader 35 | def load_user(user_id): 36 | "Used by Flask-Login" 37 | return User.query.get(int(user_id)) 38 | -------------------------------------------------------------------------------- /webhookdb/models/github.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals 3 | from datetime import datetime 4 | from sqlalchemy import func, and_ 5 | from sqlalchemy.orm import backref 6 | from sqlalchemy.ext.hybrid import hybrid_property, hybrid_method 7 | from sqlalchemy.ext.mutable import MutableDict 8 | from sqlalchemy.ext.associationproxy import association_proxy 9 | from sqlalchemy.orm.exc import NoResultFound 10 | from sqlalchemy_utils import JSONType, ColorType, ScalarListType 11 | from flask_login import UserMixin 12 | from webhookdb import db 13 | 14 | 15 | class ReplicationTimestampMixin(object): 16 | """ 17 | This allows us to keep track of how stale our local data is. 18 | """ 19 | last_replicated_via_webhook_at = db.Column(db.DateTime) 20 | last_replicated_via_api_at = db.Column(db.DateTime) 21 | 22 | @hybrid_property 23 | def last_replicated_at(self): 24 | """ 25 | Return whichever value is greater. If neither is set, 26 | return min date (for type consistency). 27 | """ 28 | options = [ 29 | self.last_replicated_via_webhook_at, 30 | self.last_replicated_via_api_at, 31 | datetime.min, 32 | ] 33 | return max(dt for dt in options if dt) 34 | 35 | @last_replicated_at.expression 36 | def last_replicated_at(cls): 37 | webhook = cls.last_replicated_via_webhook_at 38 | api = cls.last_replicated_via_api_at 39 | return func.greatest(webhook, api, datetime.min) 40 | 41 | 42 | class User(db.Model, ReplicationTimestampMixin, UserMixin): 43 | __tablename__ = "github_user" 44 | 45 | id = db.Column(db.Integer, primary_key=True) 46 | login = db.Column(db.String(256)) 47 | site_admin = db.Column(db.Boolean) 48 | name = db.Column(db.String(256)) 49 | company = db.Column(db.String(256)) 50 | blog = db.Column(db.String(256)) 51 | location = db.Column(db.String(256)) 52 | email = db.Column(db.String(256)) 53 | hireable = db.Column(db.Boolean) 54 | bio = db.Column(db.Text) 55 | public_repos_count = db.Column(db.Integer) 56 | public_gists_count = db.Column(db.Integer) 57 | followers_count = db.Column(db.Integer) 58 | following_count = db.Column(db.Integer) 59 | created_at = db.Column(db.DateTime) 60 | updated_at = db.Column(db.DateTime) 61 | 62 | # not on github -- used for keeping track of scanning children 63 | repos_last_scanned_at = db.Column(db.DateTime) 64 | 65 | @classmethod 66 | def get(cls, username): 67 | """ 68 | Fetch a user object by username. 69 | 70 | If the user doesn't exist in the webhookdb database, return None. 71 | This can still raise a MultipleResultsFound exception. 72 | """ 73 | query = cls.query.filter_by(login=username) 74 | try: 75 | return query.one() 76 | except NoResultFound: 77 | return None 78 | 79 | def __unicode__(self): 80 | return "@{login}".format(login=self.login or "") 81 | 82 | def __str__(self): 83 | return unicode(self).encode('utf-8') 84 | 85 | @property 86 | def github_json(self): 87 | url = "https://api.github.com/users/{login}".format(login=self.login) 88 | html_url = "https://github.com/{login}".format(login=self.login) 89 | avatar_url = "https://avatars.githubusercontent.com/u/{id}".format( 90 | id=self.id, 91 | ) 92 | serialized = { 93 | "login": self.login, 94 | "id": self.id, 95 | "avatar_url": avatar_url, 96 | "gravatar_id": "", 97 | "url": url, 98 | "html_url": html_url, 99 | "followers_url": url + "/followers", 100 | "following_url": url + "/following{/other_user}", 101 | "gists_url": url + "/gists{/gist_id}", 102 | "starred_url": url + "/starred{/owner}{/repo}", 103 | "subscriptions_url": url + "/subscriptions", 104 | "organizations_url": url + "/orgs", 105 | "repos_url": url + "/repos", 106 | "events_url": url + "/events{/privacy}", 107 | "received_events_url": url + "/received_events", 108 | "type": "User", 109 | "site_admin": self.site_admin, 110 | } 111 | return serialized 112 | 113 | 114 | class Repository(db.Model, ReplicationTimestampMixin): 115 | __tablename__ = "github_repository" 116 | 117 | id = db.Column(db.Integer, primary_key=True) 118 | name = db.Column(db.String(256)) 119 | owner_id = db.Column(db.Integer, index=True) 120 | owner_login = db.Column(db.String(256)) 121 | owner = db.relationship( 122 | User, 123 | primaryjoin=(owner_id == User.id), 124 | foreign_keys=owner_id, 125 | remote_side=User.id, 126 | backref="owned_repos", 127 | ) 128 | organization_id = db.Column(db.Integer, index=True) 129 | organization_login = db.Column(db.String(256)) 130 | organization = db.relationship( 131 | User, 132 | primaryjoin=(organization_id == User.id), 133 | foreign_keys=organization_id, 134 | remote_side=User.id, 135 | ) 136 | private = db.Column(db.Boolean) 137 | description = db.Column(db.String(1024)) 138 | fork = db.Column(db.Boolean) 139 | created_at = db.Column(db.DateTime) 140 | updated_at = db.Column(db.DateTime) 141 | pushed_at = db.Column(db.DateTime) 142 | homepage = db.Column(db.String(256)) 143 | size = db.Column(db.Integer) 144 | stargazers_count = db.Column(db.Integer) 145 | watchers_count = db.Column(db.Integer) 146 | language = db.Column(db.String(256)) 147 | has_issues = db.Column(db.Boolean) 148 | has_downloads = db.Column(db.Boolean) 149 | has_wiki = db.Column(db.Boolean) 150 | has_pages = db.Column(db.Boolean) 151 | forks_count = db.Column(db.Integer) 152 | open_issues_count = db.Column(db.Integer) 153 | default_branch = db.Column(db.String(256), default="master") 154 | 155 | # not on github -- used for keeping track of scanning children 156 | hooks_last_scanned_at = db.Column(db.DateTime) 157 | issues_last_scanned_at = db.Column(db.DateTime) 158 | pull_requests_last_scanned_at = db.Column(db.DateTime) 159 | labels_last_scanned_at = db.Column(db.DateTime) 160 | milestones_last_scanned_at = db.Column(db.DateTime) 161 | 162 | # just for finding all the admins on a repo 163 | admin_assocs = db.relationship( 164 | lambda: UserRepoAssociation, 165 | primaryjoin=lambda: and_( 166 | Repository.id == UserRepoAssociation.repo_id, 167 | UserRepoAssociation.can_admin == True, 168 | ), 169 | foreign_keys=id, 170 | uselist=True, 171 | ) 172 | admins = association_proxy("admin_assocs", "user") 173 | 174 | @hybrid_property 175 | def full_name(self): 176 | return "{owner_login}/{name}".format( 177 | owner_login=self.owner_login or "", 178 | name=self.name or "", 179 | ) 180 | 181 | @full_name.expression 182 | def full_name(cls): 183 | name = func.coalesce(cls.name, "") 184 | owner_login = func.coalesce(cls.owner_login, "") 185 | return func.concat(name, '/', owner_login) 186 | 187 | @classmethod 188 | def get(cls, owner, name): 189 | """ 190 | Fetch a single repository given two things: 191 | * the username of the repo's owner, as a string 192 | * the name of the repo, as a string 193 | 194 | If the repository doesn't exist in the webhookdb database, return None. 195 | This can still raise a MultipleResultsFound exception. 196 | """ 197 | query = cls.query.filter_by(owner_login=owner, name=name) 198 | try: 199 | return query.one() 200 | except NoResultFound: 201 | return None 202 | 203 | def __unicode__(self): 204 | return self.full_name 205 | 206 | def __str__(self): 207 | return unicode(self).encode('utf-8') 208 | 209 | @property 210 | def github_json(self): 211 | url = "https://api.github.com/repos/{owner}/{repo}".format( 212 | owner=self.owner_login, 213 | repo=self.name, 214 | ) 215 | html_url = "https://github.com/{owner}/{repo}".format( 216 | owner=self.owner_login, 217 | repo=self.name, 218 | ) 219 | git_url = "git://github.com/{owner}/{repo}.git".format( 220 | owner=self.owner_login, 221 | repo=self.name, 222 | ) 223 | ssh_url = "git@github.com:{owner}/{repo}.git".format( 224 | owner=self.owner_login, 225 | repo=self.name, 226 | ) 227 | svn_url = "https://github.com/{owner}/{repo}".format( 228 | owner=self.owner_login, 229 | repo=self.name, 230 | ) 231 | clone_url = svn_url + ".git" 232 | serialized = { 233 | "id": self.id, 234 | "name": self.name, 235 | "full_name": self.full_name, 236 | "owner": self.owner.github_json, 237 | "private": self.private, 238 | "html_url": html_url, 239 | "description": self.description, 240 | "fork": self.fork, 241 | "url": url, 242 | "forks_url": url + "/forks", 243 | "keys_url": url + "/keys{/key_id}", 244 | "collaborators_url": url + "/collaborators{/collaborator}", 245 | "teams_url": url + "/teams", 246 | "hooks_url": url + "/hooks", 247 | "issue_events_url": url + "/issues/events{/number}", 248 | "events_url": url + "/events", 249 | "assignees_url": url + "/assignees{/user}", 250 | "branches_url": url + "/branches{/branch}", 251 | "tags_url": url + "/tags", 252 | "blobs_url": url + "/git/blobs{/sha}", 253 | "git_tags_url": url + "/git/tags{/sha}", 254 | "git_refs_url": url + "/git/refs{/sha}", 255 | "trees_url": url + "/git/trees{/sha}", 256 | "statuses_url": url + "/statuses/{sha}", 257 | "languages_url": url + "/languages", 258 | "stargazers_url": url + "/stargazers", 259 | "contributors_url": url + "/contributors", 260 | "subscribers_url": url + "/subscribers", 261 | "subscription_url": url + "/subscription", 262 | "commits_url": url + "/commits{/sha}", 263 | "git_commits_url": url + "/git/commits{/sha}", 264 | "comments_url": url + "/comments{/number}", 265 | "issue_comment_url": url + "/issues/comments{/number}", 266 | "contents_url": url + "/contents/{+path}", 267 | "compare_url": url + "/compare/{base}...{head}", 268 | "merges_url": url + "/merges", 269 | "archive_url": url + "/{archive_format}{/ref}", 270 | "downloads_url": url +"/downloads", 271 | "issues_url": url + "/issues{/number}", 272 | "pulls_url": url + "/pulls{/number}", 273 | "milestones_url": url + "/milestones{/number}", 274 | "notifications_url": url + "/notifications{?since,all,participating}", 275 | "labels_url": url + "/labels{/name}", 276 | "releases_url": url + "/releases{/id}", 277 | "created_at": self.created_at, 278 | "updated_at": self.updated_at, 279 | "pushed_at": self.pushed_at, 280 | "git_url": git_url, 281 | "ssh_url": ssh_url, 282 | "clone_url": clone_url, 283 | "svn_url": svn_url, 284 | "homepage": self.homepage, 285 | "size": self.size, 286 | "stargazers_count": self.stargazers_count, 287 | "watchers_count": self.watchers_count, 288 | "language": self.language, 289 | "has_issues": self.has_issues, 290 | "has_downloads": self.has_downloads, 291 | "has_wiki": self.has_wiki, 292 | "has_pages": self.has_pages, 293 | "forks_count": self.forks_count, 294 | "mirror_url": None, # FIXME 295 | "open_issues_count": self.open_issues_count, 296 | "forks": self.forks_count, 297 | "open_issues": self.open_issues_count, 298 | "watchers": self.watchers_count, 299 | "default_branch": self.default_branch, 300 | } 301 | return serialized 302 | 303 | 304 | class UserRepoAssociation(db.Model, ReplicationTimestampMixin): 305 | __tablename__ = "github_user_repository_association" 306 | 307 | user_id = db.Column(db.Integer, primary_key=True) 308 | user = db.relationship( 309 | User, 310 | primaryjoin=(user_id == User.id), 311 | foreign_keys=user_id, 312 | backref=backref("user_repo_assocs", cascade="all, delete-orphan"), 313 | ) 314 | repo_id = db.Column(db.Integer, primary_key=True) 315 | repo = db.relationship( 316 | Repository, 317 | primaryjoin=(repo_id == Repository.id), 318 | foreign_keys=repo_id, 319 | backref=backref("user_repo_assocs", cascade="all, delete-orphan"), 320 | ) 321 | 322 | # permissions 323 | can_pull = db.Column(db.Boolean, default=True) 324 | can_push = db.Column(db.Boolean, default=False) 325 | can_admin = db.Column(db.Boolean, default=False) 326 | 327 | 328 | class RepositoryHook(db.Model, ReplicationTimestampMixin): 329 | __tablename__ = "github_repository_hook" 330 | 331 | repo_id = db.Column(db.Integer, index=True) 332 | repo = db.relationship( 333 | Repository, 334 | primaryjoin=(repo_id == Repository.id), 335 | foreign_keys=repo_id, 336 | backref="hooks", 337 | ) 338 | id = db.Column(db.Integer, primary_key=True) 339 | name = db.Column(db.String(64)) 340 | url = db.Column(db.Text) # the webhook URL 341 | config = db.Column(MutableDict.as_mutable(JSONType)) 342 | events = db.Column(ScalarListType) 343 | active = db.Column(db.Boolean) 344 | last_response = db.Column(MutableDict.as_mutable(JSONType)) 345 | created_at = db.Column(db.DateTime) 346 | updated_at = db.Column(db.DateTime) 347 | 348 | 349 | class Milestone(db.Model, ReplicationTimestampMixin): 350 | __tablename__ = "github_milestone" 351 | 352 | repo_id = db.Column(db.Integer, primary_key=True) 353 | repo = db.relationship( 354 | Repository, 355 | primaryjoin=(repo_id == Repository.id), 356 | foreign_keys=repo_id, 357 | backref="milestones", 358 | ) 359 | number = db.Column(db.Integer, primary_key=True) 360 | state = db.Column(db.String(64)) 361 | title = db.Column(db.String(256)) 362 | description = db.Column(db.Text) 363 | creator_id = db.Column(db.Integer, index=True) 364 | creator_login = db.Column(db.String(256)) 365 | creator = db.relationship( 366 | User, 367 | primaryjoin=(creator_id == User.id), 368 | foreign_keys=creator_id, 369 | remote_side=User.id, 370 | backref="created_milestones", 371 | ) 372 | open_issues_count = db.Column(db.Integer) 373 | closed_issues_count = db.Column(db.Integer) 374 | created_at = db.Column(db.DateTime) 375 | updated_at = db.Column(db.DateTime) 376 | closed_at = db.Column(db.DateTime) 377 | due_at = db.Column(db.DateTime) 378 | 379 | @classmethod 380 | def get(cls, repo_owner, repo_name, number): 381 | """ 382 | Fetch a single milestone given three things: 383 | * the username of the repo's owner, as a string 384 | * the name of the repo, as a string 385 | * the number of the milestone, as an integer 386 | 387 | If the milestone doesn't exist in the webhookdb database, return None. 388 | This can still raise a MultipleResultsFound exception. 389 | """ 390 | query = ( 391 | cls.query.join(Repository, cls.repo_id == Repository.id) 392 | .filter(Repository.owner_login == repo_owner) 393 | .filter(Repository.name == repo_name) 394 | .filter(cls.number == number) 395 | ) 396 | try: 397 | return query.one() 398 | except NoResultFound: 399 | return None 400 | 401 | @property 402 | def github_json(self): 403 | url = "https://api.github.com/repos/{owner}/{repo}/milestones/{number}".format( 404 | owner=self.repo.owner_login, 405 | repo=self.repo.name, 406 | number=self.number, 407 | ) 408 | html_url = "https://github.com/{owner}/{repo}/milestones/{title}".format( 409 | owner=self.repo.owner_login, 410 | repo=self.repo.name, 411 | title=self.title, 412 | ) 413 | serialized = { 414 | "url": url, 415 | "html_url": html_url, 416 | "labels_url": url + "/labels", 417 | "id": self.id, 418 | "number": self.number, 419 | "state": self.state, 420 | "title": self.title, 421 | "description": self.description, 422 | "creator": self.creator.github_json, 423 | "open_issues": self.open_issues_count, 424 | "closed_issues": self.closed_issues_count, 425 | "created_at": self.created_at, 426 | "updated_at": self.updated_at, 427 | "closed_at": self.closed_at, 428 | "due_on": self.due_at, 429 | } 430 | return serialized 431 | 432 | 433 | class PullRequest(db.Model, ReplicationTimestampMixin): 434 | __tablename__ = "github_pull_request" 435 | 436 | id = db.Column(db.Integer, primary_key=True) 437 | number = db.Column(db.Integer) 438 | state = db.Column(db.String(64)) 439 | locked = db.Column(db.Boolean) 440 | user_id = db.Column(db.Integer, index=True) 441 | user_login = db.Column(db.String(256)) 442 | user = db.relationship( 443 | User, 444 | primaryjoin=(user_id == User.id), 445 | foreign_keys=user_id, 446 | remote_side=User.id, 447 | backref=backref("created_pull_requests", order_by=number) 448 | ) 449 | title = db.Column(db.String(256)) 450 | body = db.Column(db.Text) 451 | created_at = db.Column(db.DateTime) 452 | updated_at = db.Column(db.DateTime) 453 | closed_at = db.Column(db.DateTime) 454 | merged_at = db.Column(db.DateTime) 455 | assignee_id = db.Column(db.Integer, index=True) 456 | assignee_login = db.Column(db.String(256)) 457 | assignee = db.relationship( 458 | User, 459 | primaryjoin=(assignee_id == User.id), 460 | foreign_keys=assignee_id, 461 | remote_side=User.id, 462 | backref=backref("assigned_pull_requests", order_by=number), 463 | ) 464 | base_repo_id = db.Column(db.Integer, index=True) 465 | base_repo = db.relationship( 466 | Repository, 467 | primaryjoin=(base_repo_id == Repository.id), 468 | foreign_keys=base_repo_id, 469 | remote_side=Repository.id, 470 | backref=backref("pull_requests", order_by=number), 471 | ) 472 | base_ref = db.Column(db.String(256)) 473 | head_repo_id = db.Column(db.Integer, index=True) 474 | head_repo = db.relationship( 475 | Repository, 476 | primaryjoin=(head_repo_id == Repository.id), 477 | foreign_keys=head_repo_id, 478 | remote_side=Repository.id, 479 | ) 480 | head_ref = db.Column(db.String(256)) 481 | milestone_number = db.Column(db.Integer) 482 | milestone = db.relationship( 483 | Milestone, 484 | primaryjoin=and_( 485 | milestone_number == Milestone.number, 486 | head_repo_id == Milestone.repo_id 487 | ), 488 | foreign_keys=[milestone_number, head_repo_id], 489 | backref=backref("pull_requests", order_by=number), 490 | ) 491 | merged = db.Column(db.Boolean) 492 | mergeable = db.Column(db.Boolean) 493 | mergeable_state = db.Column(db.String(64)) 494 | merged_by_id = db.Column(db.Integer, index=True) 495 | merged_by_login = db.Column(db.String(256)) 496 | merged_by = db.relationship( 497 | User, 498 | primaryjoin=(merged_by_id == User.id), 499 | foreign_keys=merged_by_id, 500 | remote_side=User.id, 501 | backref=backref("merged_pull_requests", order_by=number), 502 | ) 503 | comments_count = db.Column(db.Integer) 504 | review_comments_count = db.Column(db.Integer) 505 | commits_count = db.Column(db.Integer) 506 | additions = db.Column(db.Integer) 507 | deletions = db.Column(db.Integer) 508 | changed_files = db.Column(db.Integer) 509 | 510 | # not on github -- used for keeping track of scanning children 511 | files_last_scanned_at = db.Column(db.DateTime) 512 | 513 | @classmethod 514 | def get(cls, repo_owner, repo_name, number): 515 | """ 516 | Fetch a single pull request given three things: 517 | * the username of the repo's owner, as a string 518 | * the name of the repo, as a string 519 | * the number of the pull request, as an integer 520 | 521 | If the pull request doesn't exist in the webhookdb database, return None. 522 | This can still raise a MultipleResultsFound exception. 523 | """ 524 | query = ( 525 | cls.query.join(Repository, cls.base_repo_id == Repository.id) 526 | .filter(Repository.owner_login == repo_owner) 527 | .filter(Repository.name == repo_name) 528 | .filter(cls.number == number) 529 | ) 530 | try: 531 | return query.one() 532 | except NoResultFound: 533 | return None 534 | 535 | def __unicode__(self): 536 | return "{base_repo}#{number}".format( 537 | base_repo=self.base_repo or "/", 538 | number=self.number or "", 539 | ) 540 | 541 | def __str__(self): 542 | return unicode(self).encode('utf-8') 543 | 544 | @property 545 | def github_json(self): 546 | """ 547 | Serialize to a JSON-serializable dict that matches GitHub's 548 | JSON serialization. 549 | """ 550 | url = "https://api.github.com/repos/{owner}/{repo}/pulls/{number}".format( 551 | owner=self.base_repo.owner_login, 552 | repo=self.base_repo.name, 553 | number=self.number, 554 | ) 555 | issue_url = "https://api.github.com/repos/{owner}/{repo}/issues/{number}".format( 556 | owner=self.base_repo.owner_login, 557 | repo=self.base_repo.name, 558 | number=self.number, 559 | ) 560 | html_url = "https://github.com/{owner}/{repo}/pull/{number}".format( 561 | owner=self.base_repo.owner_login, 562 | repo=self.base_repo.name, 563 | number=self.number, 564 | ) 565 | serialized = { 566 | "id": self.id, 567 | "url": url, 568 | "issue_url": issue_url, 569 | "html_url": html_url, 570 | "diff_url": html_url + ".diff", 571 | "patch_url": html_url + ".patch", 572 | "commits_url": url + "/commits", 573 | "comments_url": issue_url + "/comments", 574 | "review_comments_url": url + "/comments", 575 | "review_comment_url": url + "/comment{/number}", 576 | "statuses_url": url + "/statuses/1234567890abcdef", 577 | "_links": { 578 | "self": { 579 | "href": url, 580 | }, 581 | "html": { 582 | "href": html_url, 583 | }, 584 | "issue": { 585 | "href": issue_url, 586 | }, 587 | "comments": { 588 | "href": issue_url + "/comments", 589 | }, 590 | "review_comments": { 591 | "href": url + "/comments", 592 | }, 593 | "review_comment": { 594 | "href": url + "/comment{/number}", 595 | }, 596 | "commits": { 597 | "href": url + "/commits", 598 | }, 599 | "statuses": { 600 | "href": url + "/statuses/1234567890abcdef", 601 | } 602 | }, 603 | "number": self.number, 604 | "state": self.state, 605 | "locked": self.locked, 606 | "title": self.title, 607 | "user": self.user.github_json, 608 | "body": self.body, 609 | "created_at": self.created_at, 610 | "updated_at": self.updated_at, 611 | "closed_at": self.closed_at, 612 | "merged_at": self.merged_at, 613 | "assignee": getattr(self.assignee, "github_json", None), 614 | "milestone": getattr(self.milestone, "github_json", None), 615 | "head": { 616 | "label": "{owner}:{ref}".format( 617 | owner=self.head_repo.owner_login, ref=self.head_ref 618 | ), 619 | "ref": self.head_ref, 620 | "sha": "1234567890abcdef", 621 | "user": self.head_repo.owner.github_json, 622 | "repo": self.head_repo.github_json, 623 | }, 624 | "base": { 625 | "label": "{owner}:{ref}".format( 626 | owner=self.base_repo.owner_login, ref=self.base_ref 627 | ), 628 | "ref": self.base_ref, 629 | "sha": "1234567890abcdef", 630 | "user": self.base_repo.owner.github_json, 631 | "repo": self.base_repo.github_json, 632 | }, 633 | "merged": self.merged, 634 | "mergeable": self.mergeable, 635 | "mergeable_state": self.mergeable_state, 636 | "merged_by": getattr(self.assignee, "github_json", None), 637 | "comments": self.comments_count, 638 | "review_comments": self.review_comments_count, 639 | "commits": self.commits_count, 640 | "additions": self.additions, 641 | "deletions": self.deletions, 642 | "changed_files": self.changed_files, 643 | "repository": self.base_repo.github_json, 644 | "organization": getattr(self.base_repo.organization, "github_json", None), 645 | } 646 | return serialized 647 | 648 | 649 | class PullRequestFile(db.Model, ReplicationTimestampMixin): 650 | __tablename__ = "github_pull_request_file" 651 | 652 | pull_request_id = db.Column(db.Integer, db.ForeignKey(PullRequest.id), primary_key=True) 653 | pull_request = db.relationship(PullRequest) 654 | 655 | sha = db.Column(db.String(40), primary_key=True) 656 | filename = db.Column(db.String(256)) 657 | status = db.Column(db.String(64)) 658 | additions = db.Column(db.Integer) 659 | deletions = db.Column(db.Integer) 660 | changes = db.Column(db.Integer) 661 | patch = db.Column(db.Text) 662 | 663 | 664 | def __unicode__(self): 665 | return "{pr} {filename}".format( 666 | pr=self.pull_request or "/#", 667 | filename=self.filename or "", 668 | ) 669 | 670 | def __str__(self): 671 | return unicode(self).encode('utf-8') 672 | 673 | 674 | class IssueLabel(db.Model, ReplicationTimestampMixin): 675 | __tablename__ = "github_issue_label" 676 | 677 | repo_id = db.Column(db.Integer, primary_key=True) 678 | name = db.Column(db.String(256), primary_key=True) 679 | color = db.Column(ColorType) 680 | 681 | repo = db.relationship( 682 | Repository, 683 | primaryjoin=(repo_id == Repository.id), 684 | foreign_keys=repo_id, 685 | remote_side=Repository.id, 686 | backref="labels", 687 | ) 688 | 689 | @classmethod 690 | def get(cls, repo_owner, repo_name, name): 691 | """ 692 | Fetch a single label given three things: 693 | * the username of the repo's owner, as a string 694 | * the name of the repo, as a string 695 | * the name of the label, as a string 696 | 697 | If the label doesn't exist in the webhookdb database, return None. 698 | This can still raise a MultipleResultsFound exception. 699 | """ 700 | query = ( 701 | cls.query.join(Repository, cls.repo_id == Repository.id) 702 | .filter(Repository.owner_login == repo_owner) 703 | .filter(Repository.name == repo_name) 704 | .filter(cls.name == name) 705 | ) 706 | try: 707 | return query.one() 708 | except NoResultFound: 709 | return None 710 | 711 | def __unicode__(self): 712 | return self.name 713 | 714 | def __str__(self): 715 | return unicode(self).encode('utf-8') 716 | 717 | def __repr__(self): 718 | return "<{cls} {name} {repo}>".format( 719 | cls=self.__class__.__name__, 720 | name=self.name, 721 | repo=self.repo, 722 | ) 723 | 724 | @property 725 | def github_json(self): 726 | url = "https://api.github.com/repos/{owner}/{repo}/labels/{name}", 727 | serialized = { 728 | "url": url, 729 | "name": self.name, 730 | "color": str(self.color).replace("#", ""), 731 | } 732 | return serialized 733 | 734 | 735 | label_association_table = db.Table("github_issue_label_association", db.Model.metadata, 736 | db.Column("issue_id", db.Integer, index=True), 737 | db.Column("label_name", db.String(256), index=True), 738 | ) 739 | 740 | 741 | class Issue(db.Model, ReplicationTimestampMixin): 742 | __tablename__ = "github_issue" 743 | 744 | id = db.Column(db.Integer, primary_key=True) 745 | repo_id = db.Column(db.Integer, index=True) 746 | repo = db.relationship( 747 | Repository, 748 | primaryjoin=(repo_id == Repository.id), 749 | foreign_keys=repo_id, 750 | backref=backref("issues", order_by=lambda: Issue.number), 751 | ) 752 | number = db.Column(db.Integer) 753 | state = db.Column(db.String(64)) 754 | title = db.Column(db.String(256)) 755 | body = db.Column(db.Text) 756 | user_id = db.Column(db.Integer, index=True) 757 | user_login = db.Column(db.String(256)) 758 | user = db.relationship( 759 | User, 760 | primaryjoin=(user_id == User.id), 761 | foreign_keys=user_id, 762 | remote_side=User.id, 763 | backref=backref("created_issues", order_by=lambda: Issue.number), 764 | ) 765 | labels = db.relationship( 766 | IssueLabel, 767 | secondary=label_association_table, 768 | primaryjoin=and_( 769 | label_association_table.c.label_name == IssueLabel.name, 770 | repo_id == IssueLabel.repo_id 771 | ), 772 | secondaryjoin=(id == label_association_table.c.issue_id), 773 | foreign_keys=[id, repo_id], 774 | backref=backref("issues", order_by=lambda: Issue.number), 775 | ) 776 | assignee_id = db.Column(db.Integer, index=True) 777 | assignee_login = db.Column(db.String(256)) 778 | assignee = db.relationship( 779 | User, 780 | primaryjoin=(assignee_id == User.id), 781 | foreign_keys=assignee_id, 782 | remote_side=User.id, 783 | backref=backref("assigned_issues", order_by=lambda: Issue.number), 784 | ) 785 | milestone_number = db.Column(db.Integer) 786 | milestone = db.relationship( 787 | Milestone, 788 | primaryjoin=and_( 789 | milestone_number == Milestone.number, 790 | repo_id == Milestone.repo_id 791 | ), 792 | foreign_keys=[milestone_number, repo_id], 793 | backref=backref("issues", order_by=lambda: Issue.number), 794 | ) 795 | comments_count = db.Column(db.Integer) 796 | created_at = db.Column(db.DateTime) 797 | updated_at = db.Column(db.DateTime) 798 | closed_at = db.Column(db.DateTime) 799 | closed_by_id = db.Column(db.Integer, index=True) 800 | closed_by_login = db.Column(db.String(256)) 801 | closed_by = db.relationship( 802 | User, 803 | primaryjoin=(closed_by_id == User.id), 804 | foreign_keys=closed_by_id, 805 | backref=backref("closed_issues", order_by=lambda: Issue.number), 806 | ) 807 | 808 | @classmethod 809 | def get(cls, repo_owner, repo_name, number): 810 | """ 811 | Fetch a single issue given three things: 812 | * the username of the repo's owner, as a string 813 | * the name of the repo, as a string 814 | * the number of the issue, as an integer 815 | 816 | If the issue doesn't exist in the webhookdb database, return None. 817 | This can still raise a MultipleResultsFound exception. 818 | """ 819 | query = ( 820 | cls.query.join(Repository, cls.repo_id == Repository.id) 821 | .filter(Repository.owner_login == repo_owner) 822 | .filter(Repository.name == repo_name) 823 | .filter(cls.number == number) 824 | ) 825 | try: 826 | return query.one() 827 | except NoResultFound: 828 | return None 829 | 830 | @property 831 | def github_json(self): 832 | url = "https://api.github.com/repos/{owner}/{repo}/issues/{number}".format( 833 | owner=self.repo.owner_login, 834 | repo=self.repo.name, 835 | number=self.number, 836 | ) 837 | html_url = "https://github.com/{owner}/{repo}/issues/{number}".format( 838 | owner=self.repo.owner_login, 839 | repo=self.repo.name, 840 | number=self.number, 841 | ) 842 | pr_url = "https://api.github.com/repos/{owner}/{repo}/pulls/{number}".format( 843 | owner=self.repo.owner_login, 844 | repo=self.repo.name, 845 | number=self.number, 846 | ) 847 | html_pr_url = "https://github.com/{owner}/{repo}/pulls/{number}".format( 848 | owner=self.repo.owner_login, 849 | repo=self.repo.name, 850 | number=self.number, 851 | ) 852 | serialized = { 853 | "id": self.id, 854 | "url": url, 855 | "labels_url": url + "/labels{/name}", 856 | "comments_url": url + "/comments", 857 | "events_url": url + "/events", 858 | "html_url": html_url, 859 | "number": self.number, 860 | "state": self.state, 861 | "title": self.title, 862 | "body": self.body, 863 | "user": self.user.github_json, 864 | "labels": [label.github_json for label in self.labels], 865 | "assignee": getattr(self.assignee, "github_json", None), 866 | "milestone": getattr(self.milestone, "github_json", None), 867 | "locked": self.locked, 868 | "comments": self.comments_count, 869 | "pull_request": { 870 | "url": pr_url, 871 | "html_url": html_pr_url, 872 | "diff_url": html_pr_url + ".diff", 873 | "patch_url": html_pr_url + ".patch", 874 | }, 875 | "closed_at": self.closed_at, 876 | "created_at": self.created_at, 877 | "updated_at": self.updated_at, 878 | "closed_by": getattr(self.closed_by, "github_json", None), 879 | } 880 | return serialized 881 | -------------------------------------------------------------------------------- /webhookdb/oauth.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | import os 5 | from datetime import datetime 6 | 7 | from flask import request, flash 8 | from flask_dance.contrib.github import make_github_blueprint 9 | from flask_dance.consumer.requests import OAuth2Session 10 | from flask_dance.consumer.backend.sqla import SQLAlchemyBackend 11 | from flask_dance.consumer import oauth_authorized, oauth_error 12 | from flask_login import login_user, current_user 13 | from webhookdb import db 14 | from webhookdb.models import OAuth 15 | from webhookdb.exceptions import RateLimited 16 | 17 | 18 | class GithubSession(OAuth2Session): 19 | """ 20 | A requests.Session subclass with a few special properties: 21 | 22 | * base_url relative resolution (from OAuth2SessionWithBaseURL) 23 | * remembers the last request it made, using the `last_response` property 24 | * raises a RateLimited exception if our Github rate limit has expired 25 | """ 26 | last_response = None 27 | 28 | def request(self, method, url, data=None, headers=None, **kwargs): 29 | resp = super(GithubSession, self).request( 30 | method=method, url=url, data=data, headers=headers, **kwargs 31 | ) 32 | self.last_response = resp 33 | if resp.status_code == 403 and resp.headers.get("X-RateLimit-Remaining"): 34 | rl_remaining = int(resp.headers["X-RateLimit-Remaining"]) 35 | if rl_remaining < 1: 36 | raise RateLimited(response=resp) 37 | return resp 38 | 39 | 40 | github_bp = make_github_blueprint( 41 | scope="admin:repo_hook", 42 | redirect_to="ui.index", 43 | session_class=GithubSession, 44 | ) 45 | github_bp.backend = SQLAlchemyBackend(OAuth, db.session, user=current_user) 46 | 47 | 48 | @oauth_authorized.connect_via(github_bp) 49 | def github_logged_in(blueprint, token): 50 | if not token: 51 | flash("Failed to log in with Github") 52 | if "error_reason" in token: 53 | msg = "Access denied. Reason={reason} error={error}".format( 54 | reason=request.args["error_reason"], 55 | error=request.args["error_description"], 56 | ) 57 | flash(msg) 58 | else: 59 | # figure out who the user is 60 | try: 61 | resp = blueprint.session.get("/user") 62 | except RateLimited: 63 | flash( 64 | "Sorry, Github has rate-limited your access to their API, so " 65 | "we can't determine who you are and log you in. Please try " 66 | "again in an hour or so.", 67 | category="error" 68 | ) 69 | return 70 | 71 | if resp.ok: 72 | from webhookdb.tasks.user import process_user 73 | user = process_user(resp.json(), via="api", fetched_at=datetime.now()) 74 | login_user(user) 75 | flash("Successfully signed in with Github") 76 | else: 77 | # might be rate limited or something... 78 | msg = ( 79 | "Sorry, Github is having some trouble, and we couldn't access" 80 | "your information. Please try again later." 81 | ) 82 | try: 83 | message = response.json()["message"] 84 | msg += " The message from Github was: {msg}".format(msg=message) 85 | except Exception: 86 | pass 87 | flash(msg, category="error") 88 | 89 | 90 | # notify on OAuth provider error 91 | @oauth_error.connect_via(github_bp) 92 | def github_error(blueprint, error, error_description=None, error_uri=None): 93 | msg = ( 94 | "OAuth error from Github! " 95 | "error={error} description={description} uri={uri}" 96 | ).format( 97 | name=blueprint.name, 98 | error=error, 99 | description=error_description, 100 | uri=error_uri, 101 | ) 102 | flash(msg, category="error") 103 | -------------------------------------------------------------------------------- /webhookdb/process/__init__.py: -------------------------------------------------------------------------------- 1 | from .user import process_user 2 | from .label import process_label 3 | from .milestone import process_milestone 4 | from .issue import process_issue 5 | from .repository import process_repository 6 | from .repository_hook import process_repository_hook 7 | from .pull_request import process_pull_request 8 | from .pull_request_file import process_pull_request_file 9 | -------------------------------------------------------------------------------- /webhookdb/process/issue.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from webhookdb import db 7 | from webhookdb.models import Issue 8 | from webhookdb.process import process_user, process_label, process_milestone 9 | from webhookdb.exceptions import MissingData, StaleData 10 | 11 | 12 | def process_issue(issue_data, via="webhook", fetched_at=None, commit=True): 13 | issue_id = issue_data.get("id") 14 | if not issue_id: 15 | raise MissingData("no issue ID", obj=issue_data) 16 | 17 | # fetch the object from the database, 18 | # or create it if it doesn't exist in the DB 19 | issue = Issue.query.get(issue_id) 20 | if not issue: 21 | issue = Issue(id=issue_id) 22 | 23 | # should we update the object? 24 | fetched_at = fetched_at or datetime.now() 25 | if issue.last_replicated_at > fetched_at: 26 | raise StaleData() 27 | 28 | # Most fields have the same name in our model as they do in Github's API. 29 | # However, some are different. This mapping contains just the differences. 30 | field_to_model = { 31 | "comments": "comments_count", 32 | } 33 | 34 | # update the object 35 | fields = ( 36 | "number", "state", "title", "body", "comments", 37 | ) 38 | for field in fields: 39 | if field in issue_data: 40 | mfield = field_to_model.get(field, field) 41 | setattr(issue, mfield, issue_data[field]) 42 | dt_fields = ("created_at", "updated_at", "closed_at") 43 | for field in dt_fields: 44 | if issue_data.get(field): 45 | dt = parse_date(issue_data[field]).replace(tzinfo=None) 46 | mfield = field_to_model.get(field, field) 47 | setattr(issue, mfield, dt) 48 | 49 | # user references 50 | user_fields = ("user", "assignee", "closed_by") 51 | for user_field in user_fields: 52 | if user_field not in issue_data: 53 | continue 54 | user_data = issue_data[user_field] 55 | id_field = "{}_id".format(user_field) 56 | login_field = "{}_login".format(user_field) 57 | if user_data: 58 | setattr(issue, id_field, user_data["id"]) 59 | if hasattr(issue, login_field): 60 | setattr(issue, login_field, user_data["login"]) 61 | try: 62 | process_user(user_data, via=via, fetched_at=fetched_at) 63 | except StaleData: 64 | pass 65 | else: 66 | setattr(issue, id_field, None) 67 | if hasattr(issue, login_field): 68 | setattr(issue, login_field, None) 69 | 70 | # used for labels and milestone 71 | repo_id = None 72 | 73 | # label reference 74 | if "labels" in issue_data: 75 | label_data_list = issue_data["labels"] 76 | if label_data_list: 77 | labels = [] 78 | for label_data in label_data_list: 79 | label = process_label( 80 | label_data, via=via, fetched_at=fetched_at, commit=False, 81 | repo_id=repo_id, 82 | ) 83 | repo_id = repo_id or label.repo_id 84 | labels.append(label) 85 | issue.labels = labels 86 | else: 87 | issue.labels = [] 88 | 89 | # milestone reference 90 | if "milestone" in issue_data: 91 | milestone_data = issue_data["milestone"] 92 | if milestone_data: 93 | milestone = process_milestone( 94 | milestone_data, via=via, fetched_at=fetched_at, commit=False, 95 | repo_id=repo_id, 96 | ) 97 | repo_id = repo_id or milestone.repo_id 98 | issue.milestone_number = milestone.number 99 | else: 100 | issue.milestone = None 101 | 102 | # update replication timestamp 103 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 104 | if hasattr(issue, replicated_dt_field): 105 | setattr(issue, replicated_dt_field, fetched_at) 106 | 107 | # add to DB session, so that it will be committed 108 | db.session.add(issue) 109 | if commit: 110 | db.session.commit() 111 | 112 | return issue 113 | -------------------------------------------------------------------------------- /webhookdb/process/label.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from urlobject import URLObject 7 | from colour import Color 8 | from webhookdb import db 9 | from webhookdb.models import IssueLabel, Repository 10 | from webhookdb.exceptions import MissingData, StaleData, NotFound 11 | from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound 12 | 13 | 14 | def process_label(label_data, via="webhook", fetched_at=None, commit=True, 15 | repo_id=None): 16 | name = label_data.get("name") 17 | if not name: 18 | raise MissingData("no label name") 19 | 20 | if not repo_id: 21 | url = label_data.get("url") 22 | if not url: 23 | raise MissingData("no label url") 24 | 25 | # parse repo info from url 26 | path = URLObject(url).path 27 | assert path.segments[0] == "repos" 28 | repo_owner = path.segments[1] 29 | repo_name = path.segments[2] 30 | 31 | # fetch repo from database 32 | try: 33 | repo = Repository.get(repo_owner, repo_name) 34 | except MultipleResultsFound: 35 | msg = "Repo {owner}/{repo} found multiple times!".format( 36 | owner=repo_owner, repo=repo_name, 37 | ) 38 | raise DatabaseError(msg, { 39 | "type": "label", 40 | "owner": repo_owner, 41 | "repo": repo_name, 42 | }) 43 | if not repo: 44 | msg = "Repo {owner}/{repo} not loaded in webhookdb".format( 45 | owner=repo_owner, repo=repo_name, 46 | ) 47 | raise NotFound(msg, { 48 | "type": "label", 49 | "owner": repo_owner, 50 | "repo": repo_name, 51 | }) 52 | repo_id = repo.id 53 | 54 | # fetch the object from the database, 55 | # or create it if it doesn't exist in the DB 56 | label = IssueLabel.query.get((repo_id, name)) 57 | if not label: 58 | label = IssueLabel(repo_id=repo_id, name=name) 59 | 60 | # should we update the object? 61 | fetched_at = fetched_at or datetime.now() 62 | if label.last_replicated_at > fetched_at: 63 | raise StaleData() 64 | 65 | # color reference 66 | if "color" in label_data: 67 | color_hex = label_data["color"] 68 | if color_hex: 69 | label.color = Color("#{hex}".format(hex=color_hex)) 70 | else: 71 | label.color = None 72 | 73 | # update replication timestamp 74 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 75 | if hasattr(label, replicated_dt_field): 76 | setattr(label, replicated_dt_field, fetched_at) 77 | 78 | # add to DB session, so that it will be committed 79 | db.session.add(label) 80 | if commit: 81 | db.session.commit() 82 | 83 | return label 84 | -------------------------------------------------------------------------------- /webhookdb/process/milestone.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from urlobject import URLObject 7 | from webhookdb import db 8 | from webhookdb.models import Milestone, Repository 9 | from webhookdb.process import process_user 10 | from webhookdb.exceptions import MissingData, StaleData 11 | 12 | 13 | def process_milestone(milestone_data, via="webhook", fetched_at=None, commit=True, 14 | repo_id=None): 15 | number = milestone_data.get("number") 16 | if not number: 17 | raise MissingData("no milestone number") 18 | 19 | if not repo_id: 20 | url = milestone_data.get("url") 21 | if not url: 22 | raise MissingData("no milestone url") 23 | 24 | # parse repo info from url 25 | path = URLObject(url).path 26 | assert path.segments[0] == "repos" 27 | repo_owner = path.segments[1] 28 | repo_name = path.segments[2] 29 | 30 | # fetch repo from database 31 | try: 32 | repo = Repository.get(repo_owner, repo_name) 33 | except MultipleResultsFound: 34 | msg = "Repo {owner}/{repo} found multiple times!".format( 35 | owner=repo_owner, repo=repo_name, 36 | ) 37 | raise DatabaseError(msg, { 38 | "type": "milestone", 39 | "owner": repo_owner, 40 | "repo": repo_name, 41 | }) 42 | if not repo: 43 | msg = "Repo {owner}/{repo} not loaded in webhookdb".format( 44 | owner=repo_owner, repo=repo_name, 45 | ) 46 | raise NotFound(msg, { 47 | "type": "milestone", 48 | "owner": repo_owner, 49 | "repo": repo_name, 50 | }) 51 | repo_id = repo.id 52 | 53 | # fetch the object from the database, 54 | # or create it if it doesn't exist in the DB 55 | milestone = Milestone.query.get((repo_id, number)) 56 | if not milestone: 57 | milestone = Milestone(repo_id=repo_id, number=number) 58 | 59 | # should we update the object? 60 | fetched_at = fetched_at or datetime.now() 61 | if milestone.last_replicated_at > fetched_at: 62 | raise StaleData() 63 | 64 | # Most fields have the same name in our model as they do in Github's API. 65 | # However, some are different. This mapping contains just the differences. 66 | field_to_model = { 67 | "open_issues": "open_issues_count", 68 | "closed_issues": "closed_issues_count", 69 | "due_on": "due_at", 70 | } 71 | 72 | # update the object 73 | fields = ( 74 | "state", "title", "description", "open_issues", "closed_issues", 75 | ) 76 | for field in fields: 77 | if field in milestone_data: 78 | mfield = field_to_model.get(field, field) 79 | setattr(milestone, mfield, milestone_data[field]) 80 | dt_fields = ("created_at", "updated_at", "closed_at", "due_on") 81 | for field in dt_fields: 82 | if milestone_data.get(field): 83 | dt = parse_date(milestone_data[field]).replace(tzinfo=None) 84 | mfield = field_to_model.get(field, field) 85 | setattr(milestone, mfield, dt) 86 | 87 | # user references 88 | user_fields = ("creator",) 89 | for user_field in user_fields: 90 | if user_field not in milestone_data: 91 | continue 92 | user_data = milestone_data[user_field] 93 | id_field = "{}_id".format(user_field) 94 | login_field = "{}_login".format(user_field) 95 | if user_data: 96 | setattr(milestone, id_field, user_data["id"]) 97 | if hasattr(milestone, login_field): 98 | setattr(milestone, login_field, user_data["login"]) 99 | try: 100 | process_user(user_data, via=via, fetched_at=fetched_at) 101 | except StaleData: 102 | pass 103 | else: 104 | setattr(milestone, id_field, None) 105 | if hasattr(milestone, login_field): 106 | setattr(milestone, login_field, None) 107 | 108 | # update replication timestamp 109 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 110 | if hasattr(milestone, replicated_dt_field): 111 | setattr(milestone, replicated_dt_field, fetched_at) 112 | 113 | # add to DB session, so that it will be committed 114 | db.session.add(milestone) 115 | if commit: 116 | db.session.commit() 117 | 118 | return milestone 119 | -------------------------------------------------------------------------------- /webhookdb/process/pull_request.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from webhookdb import db 7 | from webhookdb.models import PullRequest, Repository 8 | from webhookdb.process import process_user, process_repository 9 | from webhookdb.exceptions import MissingData, StaleData 10 | 11 | 12 | def process_pull_request(pr_data, via="webhook", fetched_at=None, commit=True): 13 | pr_id = pr_data.get("id") 14 | if not pr_id: 15 | raise MissingData("no pull_request ID", obj=pr_data) 16 | 17 | # fetch the object from the database, 18 | # or create it if it doesn't exist in the DB 19 | pr = PullRequest.query.get(pr_id) 20 | if not pr: 21 | pr = PullRequest(id=pr_id) 22 | 23 | # should we update the object? 24 | fetched_at = fetched_at or datetime.now() 25 | if pr.last_replicated_at > fetched_at: 26 | raise StaleData() 27 | 28 | # Most fields have the same name in our model as they do in Github's API. 29 | # However, some are different. This mapping contains just the differences. 30 | field_to_model = { 31 | "comments": "comments_count", 32 | "review_comments": "review_comments_count", 33 | "commits": "commits_count", 34 | } 35 | 36 | # update the object 37 | fields = ( 38 | "number", "state", "locked", "title", "body", "merged", "mergeable", 39 | "comments", "review_comments", "commits", "additions", "deletions", 40 | "changed_files", 41 | ) 42 | for field in fields: 43 | if field in pr_data: 44 | mfield = field_to_model.get(field, field) 45 | setattr(pr, mfield, pr_data[field]) 46 | dt_fields = ("created_at", "updated_at", "closed_at", "merged_at") 47 | for field in dt_fields: 48 | if pr_data.get(field): 49 | dt = parse_date(pr_data[field]).replace(tzinfo=None) 50 | mfield = field_to_model.get(field, field) 51 | setattr(pr, mfield, dt) 52 | 53 | # user references 54 | user_fields = ("user", "assignee", "merged_by") 55 | for user_field in user_fields: 56 | if user_field not in pr_data: 57 | continue 58 | user_data = pr_data[user_field] 59 | id_field = "{}_id".format(user_field) 60 | login_field = "{}_login".format(user_field) 61 | if user_data: 62 | setattr(pr, id_field, user_data["id"]) 63 | if hasattr(pr, login_field): 64 | setattr(pr, login_field, user_data["login"]) 65 | try: 66 | process_user(user_data, via=via, fetched_at=fetched_at) 67 | except StaleData: 68 | pass 69 | else: 70 | setattr(pr, id_field, None) 71 | if hasattr(pr, login_field): 72 | setattr(pr, login_field, None) 73 | 74 | # repository references 75 | refs = ("base", "head") 76 | for ref in refs: 77 | if not ref in pr_data: 78 | continue 79 | ref_data = pr_data[ref] 80 | ref_field = "{}_ref".format(ref) 81 | setattr(pr, ref_field, ref_data["ref"]) 82 | repo_data = ref_data["repo"] 83 | repo_id_field = "{}_repo_id".format(ref) 84 | if repo_data: 85 | setattr(pr, repo_id_field, repo_data["id"]) 86 | try: 87 | process_repository(repo_data, via=via, fetched_at=fetched_at) 88 | except StaleData: 89 | pass 90 | else: 91 | setattr(pr, repo_id_field, None) 92 | 93 | # update replication timestamp 94 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 95 | if hasattr(pr, replicated_dt_field): 96 | setattr(pr, replicated_dt_field, fetched_at) 97 | 98 | # add to DB session, so that it will be committed 99 | db.session.add(pr) 100 | if commit: 101 | db.session.commit() 102 | 103 | return pr 104 | -------------------------------------------------------------------------------- /webhookdb/process/pull_request_file.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from webhookdb import db 6 | from webhookdb.models import PullRequestFile 7 | from webhookdb.exceptions import MissingData, StaleData, NothingToDo 8 | 9 | 10 | def process_pull_request_file( 11 | prf_data, via="webhook", fetched_at=None, commit=True, 12 | pull_request_id=None, 13 | ): 14 | sha = prf_data.get("sha") 15 | if not sha: 16 | # This indicates a moved file: for example, moving /tmp/a.txt 17 | # to /tmp/b.txt. I don't know why Github marks moved files this 18 | # way, but it's not actually an error. 19 | raise NothingToDo("no pull request file SHA") 20 | 21 | pr_id = pull_request_id 22 | if not pr_id: 23 | raise MissingData("no pull_request_id", obj=prf_data) 24 | 25 | # fetch the object from the database, 26 | # or create it if it doesn't exist in the DB 27 | prf = PullRequestFile.query.get((pr_id, sha)) 28 | if not prf: 29 | prf = PullRequestFile(sha=sha, pull_request_id=pr_id) 30 | 31 | # should we update the object? 32 | fetched_at = fetched_at or datetime.now() 33 | if prf.last_replicated_at > fetched_at: 34 | raise StaleData() 35 | 36 | # update the object 37 | fields = ( 38 | "filename", "status", "additions", "deletions", "changes", "patch", 39 | ) 40 | for field in fields: 41 | if field in prf_data: 42 | setattr(prf, field, prf_data[field]) 43 | 44 | # update replication timestamp 45 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 46 | if hasattr(prf, replicated_dt_field): 47 | setattr(prf, replicated_dt_field, fetched_at) 48 | 49 | # add to DB session, so that it will be committed 50 | db.session.add(prf) 51 | if commit: 52 | db.session.commit() 53 | 54 | return prf 55 | -------------------------------------------------------------------------------- /webhookdb/process/repository.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from webhookdb import db 7 | from webhookdb.models import Repository, UserRepoAssociation 8 | from webhookdb.process import process_user 9 | from webhookdb.exceptions import MissingData, StaleData 10 | 11 | 12 | def process_repository(repo_data, via="webhook", fetched_at=None, commit=True, 13 | requestor_id=None): 14 | repo_id = repo_data.get("id") 15 | if not repo_id: 16 | raise MissingData("no repo ID") 17 | 18 | # fetch the object from the database, 19 | # or create it if it doesn't exist in the DB 20 | repo = Repository.query.get(repo_id) 21 | if not repo: 22 | repo = Repository(id=repo_id) 23 | 24 | # should we update the object? 25 | fetched_at = fetched_at or datetime.now() 26 | if repo.last_replicated_at > fetched_at: 27 | raise StaleData() 28 | 29 | # update the object 30 | fields = ( 31 | "name", "private", "description", "fork", "homepage", "size", 32 | "stargazers_count", "watchers_count", "language", "has_issues", 33 | "has_downloads", "has_wiki", "has_pages", "forks_count", 34 | "open_issues_count", "default_branch", 35 | ) 36 | for field in fields: 37 | if field in repo_data: 38 | setattr(repo, field, repo_data[field]) 39 | dt_fields = ("created_at", "updated_at", "pushed_at") 40 | for field in dt_fields: 41 | if repo_data.get(field): 42 | dt = parse_date(repo_data[field]).replace(tzinfo=None) 43 | setattr(repo, field, dt) 44 | 45 | # user references 46 | user_fields = ("owner", "organization") 47 | for user_field in user_fields: 48 | if user_field not in repo_data: 49 | continue 50 | user_data = repo_data[user_field] 51 | id_field = "{}_id".format(user_field) 52 | login_field = "{}_login".format(user_field) 53 | if user_data: 54 | setattr(repo, id_field, user_data["id"]) 55 | if hasattr(repo, login_field): 56 | setattr(repo, login_field, user_data["login"]) 57 | try: 58 | process_user(user_data, via=via, fetched_at=fetched_at) 59 | except StaleData: 60 | pass 61 | else: 62 | setattr(repo, id_field, None) 63 | if hasattr(repo, login_field): 64 | setattr(repo, login_field, None) 65 | 66 | # update replication timestamp 67 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 68 | if hasattr(repo, replicated_dt_field): 69 | setattr(repo, replicated_dt_field, fetched_at) 70 | 71 | # add to DB session, so that it will be committed 72 | db.session.add(repo) 73 | 74 | # if we have requestor_id and permissions, update the permissions object 75 | if requestor_id and repo_data.get("permissions"): 76 | permissions_data = repo_data["permissions"] 77 | assoc = UserRepoAssociation.query.get((requestor_id, repo_id)) 78 | if not assoc: 79 | assoc = UserRepoAssociation(user_id=requestor_id, repo_id=repo_id) 80 | for perm in ("admin", "push", "pull"): 81 | if perm in permissions_data: 82 | perm_attr = "can_{perm}".format(perm=perm) 83 | setattr(assoc, perm_attr, permissions_data[perm]) 84 | db.session.add(assoc) 85 | 86 | if commit: 87 | db.session.commit() 88 | 89 | return repo 90 | -------------------------------------------------------------------------------- /webhookdb/process/repository_hook.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from urlobject import URLObject 7 | from webhookdb import db 8 | from webhookdb.models import RepositoryHook, Repository 9 | from webhookdb.exceptions import MissingData, StaleData 10 | from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound 11 | 12 | 13 | def process_repository_hook(hook_data, via="webhook", fetched_at=None, commit=True, 14 | requestor_id=None, repo_id=None): 15 | hook_id = hook_data.get("id") 16 | if not hook_id: 17 | raise MissingData("no hook ID") 18 | 19 | if not repo_id: 20 | url = hook_data.get("url") 21 | if not url: 22 | raise MissingData("no hook url") 23 | 24 | # parse repo info from url 25 | path = URLObject(url).path 26 | assert path.segments[0] == "repos" 27 | repo_owner = path.segments[1] 28 | repo_name = path.segments[2] 29 | 30 | # fetch repo from database 31 | repo_query = (Repository.query 32 | .filter(Repository.owner_login == repo_owner) 33 | .filter(Repository.name == repo_name) 34 | ) 35 | try: 36 | repo = repo_query.one() 37 | except NoResultFound: 38 | msg = "Repo {owner}/{repo} not loaded in webhookdb".format( 39 | owner=repo_owner, repo=repo_name, 40 | ) 41 | raise NotFound(msg, { 42 | "type": "repo_hook", 43 | "owner": repo_owner, 44 | "repo": repo_name, 45 | }) 46 | except MultipleResultsFound: 47 | msg = "Repo {owner}/{repo} found multiple times!".format( 48 | owner=repo_owner, repo=repo_name, 49 | ) 50 | raise DatabaseError(msg, { 51 | "type": "repo_hook", 52 | "owner": repo_owner, 53 | "repo": repo_name, 54 | }) 55 | repo_id = repo.id 56 | 57 | # fetch the object from the database, 58 | # or create it if it doesn't exist in the DB 59 | hook = RepositoryHook.query.get(hook_id) 60 | if not hook: 61 | hook = RepositoryHook(id=hook_id, repo_id=repo_id) 62 | 63 | # should we update the object? 64 | fetched_at = fetched_at or datetime.now() 65 | if hook.last_replicated_at > fetched_at: 66 | raise StaleData() 67 | 68 | # update the object 69 | fields = ( 70 | "name", "config", "events", "active", "last_response", 71 | ) 72 | for field in fields: 73 | if field in hook_data: 74 | setattr(hook, field, hook_data[field]) 75 | dt_fields = ("created_at", "updated_at") 76 | for field in dt_fields: 77 | if hook_data.get(field): 78 | dt = parse_date(hook_data[field]).replace(tzinfo=None) 79 | setattr(hook, field, dt) 80 | 81 | # `url` is special -- it's the value in the `config` object, 82 | # NOT the top-level `url` property 83 | hook.url = hook_data.get("config", {}).get("url") 84 | 85 | # update replication timestamp 86 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 87 | if hasattr(hook, replicated_dt_field): 88 | setattr(hook, replicated_dt_field, fetched_at) 89 | 90 | # add to DB session, so that it will be committed 91 | db.session.add(hook) 92 | 93 | if commit: 94 | db.session.commit() 95 | 96 | return hook 97 | -------------------------------------------------------------------------------- /webhookdb/process/user.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from webhookdb import db 7 | from webhookdb.models import User 8 | from webhookdb.exceptions import MissingData, StaleData 9 | 10 | 11 | def process_user(user_data, via="webhook", fetched_at=None, commit=True): 12 | user_id = user_data.get("id") 13 | if not user_id: 14 | raise MissingData("no user ID") 15 | 16 | # fetch the object from the database, 17 | # or create it if it doesn't exist in the DB 18 | user = User.query.get(user_id) 19 | if not user: 20 | user = User(id=user_id) 21 | 22 | # should we update the object? 23 | fetched_at = fetched_at or datetime.now() 24 | if user.last_replicated_at > fetched_at: 25 | raise StaleData() 26 | 27 | # Most fields have the same name in our model as they do in Github's API. 28 | # However, some are different. This mapping contains just the differences. 29 | field_to_model = { 30 | "public_repos": "public_repos_count", 31 | "public_gists": "public_gists_count", 32 | "followers": "followers_count", 33 | "following": "following_count", 34 | } 35 | 36 | # update the object 37 | fields = ( 38 | "login", "site_admin", "name", "company", "blog", "location", 39 | "email", "hireable", "bio", "public_repos", 40 | "public_gists", "followers", "following", 41 | ) 42 | for field in fields: 43 | if field in user_data: 44 | mfield = field_to_model.get(field, field) 45 | setattr(user, mfield, user_data[field]) 46 | dt_fields = ("created_at", "updated_at") 47 | for field in dt_fields: 48 | if user_data.get(field): 49 | dt = parse_date(user_data[field]).replace(tzinfo=None) 50 | setattr(user, field, dt) 51 | 52 | # update replication timestamp 53 | replicated_dt_field = "last_replicated_via_{}_at".format(via) 54 | if hasattr(user, replicated_dt_field): 55 | setattr(user, replicated_dt_field, fetched_at) 56 | 57 | # add to DB session, so that it will be committed 58 | db.session.add(user) 59 | if commit: 60 | db.session.commit() 61 | 62 | return user 63 | -------------------------------------------------------------------------------- /webhookdb/replication/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import Blueprint, request, jsonify 5 | 6 | replication = Blueprint('replication', __name__) 7 | 8 | from .repository import repository 9 | from .pull_request import pull_request 10 | from .issue import issue 11 | 12 | @replication.route('', methods=["POST"]) 13 | def main(): 14 | """ 15 | Webhook endpoint for all events on GitHub. 16 | """ 17 | event = request.headers.get("X-Github-Event", "").lower() 18 | if event == "issues": 19 | return issue() 20 | elif event == "pull_request": 21 | return pull_request() 22 | elif event == "repository": 23 | return repository() 24 | else: 25 | return jsonify({"error": "unhandled event", "event": event}), 400 26 | 27 | 28 | @replication.before_request 29 | def ping(): 30 | """ 31 | Handle the "ping" event 32 | https://developer.github.com/webhooks/#ping-event 33 | """ 34 | if request.headers.get("X-Github-Event", "").lower() == "ping": 35 | return jsonify({"message": "pong"}) 36 | 37 | @replication.before_request 38 | def payload(): 39 | """ 40 | Every request should have a payload, or it's invalid. 41 | """ 42 | if not request.get_json(): 43 | return jsonify({"error": "no payload"}), 400 44 | -------------------------------------------------------------------------------- /webhookdb/replication/issue.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify 5 | import bugsnag 6 | from . import replication 7 | from webhookdb.exceptions import MissingData, StaleData 8 | from webhookdb.tasks.issue import process_issue 9 | 10 | 11 | @replication.route('/issue', methods=["POST"]) 12 | def issue(): 13 | """ 14 | Webhook endpoint for ``issues`` events on Github. 15 | """ 16 | payload = request.get_json() 17 | bugsnag.configure_request(meta_data={"payload": payload}) 18 | 19 | issue_data = payload.get("issue") 20 | if not issue_data: 21 | resp = jsonify({"error": "no issue in payload"}) 22 | resp.status_code = 400 23 | return resp 24 | 25 | try: 26 | issue = process_issue(issue_data) 27 | except MissingData as err: 28 | return jsonify({"error": err.message, "obj": err.obj}), 400 29 | except StaleData: 30 | return jsonify({"message": "stale data"}) 31 | 32 | return jsonify({"message": "success"}) 33 | -------------------------------------------------------------------------------- /webhookdb/replication/pull_request.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify 5 | import bugsnag 6 | from . import replication 7 | from webhookdb.models import PullRequestFile 8 | from webhookdb.exceptions import MissingData, StaleData 9 | from webhookdb.tasks.pull_request import process_pull_request 10 | from webhookdb.tasks.pull_request_file import ( 11 | sync_page_of_pull_request_files, spawn_page_tasks_for_pull_request_files 12 | ) 13 | 14 | 15 | @replication.route('/pull_request', methods=["POST"]) 16 | def pull_request(): 17 | """ 18 | Webhook endpoint for ``pull_request`` events on Github. 19 | """ 20 | payload = request.get_json() 21 | bugsnag.configure_request(meta_data={"payload": payload}) 22 | 23 | pr_data = payload.get("pull_request") 24 | if not pr_data: 25 | resp = jsonify({"error": "no pull_request in payload"}) 26 | resp.status_code = 400 27 | return resp 28 | 29 | try: 30 | pr = process_pull_request(pr_data) 31 | except MissingData as err: 32 | return jsonify({"error": err.message, "obj": err.obj}), 400 33 | except StaleData: 34 | return jsonify({"message": "stale data"}) 35 | 36 | # Fetch the pull request files, too! 37 | if pr.changed_files < 100: 38 | # If there are fewer than 100, do it inline 39 | PullRequestFile.query.filter_by(pull_request_id=pr.id).delete() 40 | sync_page_of_pull_request_files( 41 | owner=pr.base_repo.owner_login, repo=pr.base_repo.name, 42 | number=pr.number, pull_request_id=pr.id, 43 | ) 44 | else: 45 | # otherwise, spawn tasks 46 | spawn_page_tasks_for_pull_request_files.delay( 47 | pr.base_repo.owner_login, pr.base_repo.name, pr.number 48 | ) 49 | 50 | return jsonify({"message": "success"}) 51 | -------------------------------------------------------------------------------- /webhookdb/replication/repository.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from flask import request, jsonify 5 | import bugsnag 6 | from . import replication 7 | from webhookdb.tasks.repository import process_repository 8 | from webhookdb.exceptions import StaleData, MissingData 9 | 10 | 11 | @replication.route('/repository', methods=["POST"]) 12 | def repository(): 13 | payload = request.get_json() 14 | bugsnag.configure_request(meta_data={"payload": payload}) 15 | 16 | repo_data = payload.get("repository") 17 | if not repo_data: 18 | resp = jsonify({"error": "no repository in payload"}) 19 | resp.status_code = 400 20 | return resp 21 | 22 | try: 23 | process_repository(repo_data) 24 | except MissingData as err: 25 | return jsonify({"error": err.message, "obj": err.obj}), 400 26 | except StaleData: 27 | return jsonify({"message": "stale data"}) 28 | else: 29 | return jsonify({"message": "success"}) 30 | -------------------------------------------------------------------------------- /webhookdb/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | import logging 5 | from webhookdb import celery 6 | from webhookdb.oauth import github_bp 7 | from celery.utils.log import get_task_logger 8 | from flask import Blueprint, jsonify 9 | 10 | # set up logging 11 | logger = get_task_logger(__name__) 12 | logger.setLevel(logging.INFO) 13 | 14 | # create a Flask blueprint for getting task status info 15 | tasks = Blueprint('tasks', __name__) 16 | 17 | @tasks.route('/status/') 18 | def status(task_id): 19 | result = celery.AsyncResult(task_id) 20 | return jsonify({"status": result.state}) 21 | 22 | # Working in a Celery task means we can't take advantage of Flask-Dance's 23 | # session proxies, so we'll explicitly define the Github session here. 24 | github = github_bp.session 25 | -------------------------------------------------------------------------------- /webhookdb/tasks/fetch.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from webhookdb.tasks import celery, github, logger 5 | from webhookdb.exceptions import NotFound, RateLimited 6 | from requests.exceptions import RequestException 7 | 8 | 9 | @celery.task(bind=True) 10 | def fetch_url_from_github(self, url, as_user=None, requestor_id=None, **kwargs): 11 | if "method" in kwargs: 12 | method = kwargs.pop("method") 13 | else: 14 | method = "GET" 15 | if method.upper() == "HEAD": 16 | kwargs.setdefault("allow_redirects", False) 17 | 18 | username = "anonymous" 19 | if as_user: 20 | github.blueprint.config["user"] = as_user 21 | username = "@{login}".format(login=as_user.login) 22 | elif requestor_id: 23 | github.blueprint.config["user_id"] = int(requestor_id) 24 | username = "user {}".format(requestor_id) 25 | 26 | logger.info("{method} {url} as {username}".format( 27 | method=method, url=url, username=username, 28 | )) 29 | 30 | try: 31 | resp = github.request(method=method, url=url, **kwargs) 32 | except RateLimited as exc: 33 | logger.info("rate limited: {url}".format(url=url)) 34 | # if this task is being executed inline, let the exception raise 35 | # so that Flask's error-handling mechanisms can catch it 36 | if self.request.is_eager: 37 | raise 38 | # otherwise, schedule this task to retry when the rate limit is reset 39 | else: 40 | logger.warn("Retrying {url} at {reset}".format(url=url, reset=exc.reset)) 41 | self.retry(exc=exc, eta=exc.reset) 42 | 43 | if resp.status_code == 404: 44 | logger.info("not found: {url}".format(url=url)) 45 | raise NotFound(url) 46 | if not resp.ok: 47 | raise RequestException(resp.text) 48 | return resp 49 | -------------------------------------------------------------------------------- /webhookdb/tasks/issue.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from celery import group 7 | from urlobject import URLObject 8 | from webhookdb import db 9 | from webhookdb.models import Issue, Repository, Mutex 10 | from webhookdb.process import process_issue 11 | from webhookdb.tasks import celery, logger 12 | from webhookdb.tasks.fetch import fetch_url_from_github 13 | from webhookdb.exceptions import NotFound 14 | from sqlalchemy.exc import IntegrityError 15 | 16 | LOCK_TEMPLATE = "Repository|{owner}/{repo}|issues" 17 | 18 | 19 | @celery.task(bind=True) 20 | def sync_issue(self, owner, repo, number, children=False, requestor_id=None): 21 | issue_url = "/repos/{owner}/{repo}/issues/{number}".format( 22 | owner=owner, repo=repo, number=number, 23 | ) 24 | try: 25 | resp = fetch_url_from_github(issue_url, requestor_id=requestor_id) 26 | except NotFound: 27 | # add more context 28 | msg = "Issue {owner}/{repo}#{number} not found".format( 29 | owner=owner, repo=repo, number=number, 30 | ) 31 | raise NotFound(msg, { 32 | "type": "issue", 33 | "owner": owner, 34 | "repo": repo, 35 | "number": number, 36 | }) 37 | issue_data = resp.json() 38 | try: 39 | issue = process_issue( 40 | issue_data, via="api", fetched_at=datetime.now(), commit=True, 41 | ) 42 | except IntegrityError as exc: 43 | self.retry(exc=exc) 44 | # ignore `children` attribute for now 45 | return issue.id 46 | 47 | 48 | @celery.task(bind=True) 49 | def sync_page_of_issues(self, owner, repo, state="all", children=False, 50 | requestor_id=None, per_page=100, page=1): 51 | issue_page_url = ( 52 | "/repos/{owner}/{repo}/issues?" 53 | "state={state}&per_page={per_page}&page={page}" 54 | ).format( 55 | owner=owner, repo=repo, 56 | state=state, per_page=per_page, page=page 57 | ) 58 | resp = fetch_url_from_github(issue_page_url, requestor_id=requestor_id) 59 | fetched_at = datetime.now() 60 | issue_data_list = resp.json() 61 | results = [] 62 | for issue_data in issue_data_list: 63 | try: 64 | issue = process_issue( 65 | issue_data, via="api", fetched_at=fetched_at, commit=True, 66 | ) 67 | # ignore `children` attribute for now 68 | results.append(issue.id) 69 | except IntegrityError as exc: 70 | self.retry(exc=exc) 71 | return results 72 | 73 | 74 | @celery.task() 75 | def issues_scanned(owner, repo, requestor_id=None): 76 | """ 77 | Update the timestamp on the repository object, 78 | and delete old issues that weren't updated. 79 | """ 80 | repo_name = repo 81 | repo = Repository.get(owner, repo_name) 82 | prev_scan_at = repo.issues_last_scanned_at 83 | repo.issues_last_scanned_at = datetime.now() 84 | db.session.add(repo) 85 | 86 | if prev_scan_at: 87 | # delete any issues that were not updated since the previous scan -- 88 | # they have been removed from Github 89 | query = ( 90 | Issue.query.filter_by(repo_id=repo.id) 91 | .filter(Issue.last_replicated_at < prev_scan_at) 92 | ) 93 | query.delete() 94 | 95 | # delete the mutex 96 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo_name) 97 | Mutex.query.filter_by(name=lock_name).delete() 98 | logger.info("Lock {name} deleted".format(name=lock_name)) 99 | 100 | db.session.commit() 101 | 102 | 103 | @celery.task() 104 | def spawn_page_tasks_for_issues(owner, repo, state="all", children=False, 105 | requestor_id=None, per_page=100): 106 | # acquire lock or fail (we're already in a transaction) 107 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) 108 | existing = Mutex.query.get(lock_name) 109 | if existing: 110 | return False 111 | lock = Mutex(name=lock_name, user_id=requestor_id) 112 | db.session.add(lock) 113 | try: 114 | db.session.commit() 115 | except IntegrityError: 116 | return False 117 | else: 118 | logger.info("Lock {name} set by {requestor_id}".format( 119 | name=lock_name, requestor_id=requestor_id, 120 | )) 121 | 122 | issue_list_url = ( 123 | "/repos/{owner}/{repo}/issues?" 124 | "state={state}&per_page={per_page}" 125 | ).format( 126 | owner=owner, repo=repo, 127 | state=state, per_page=per_page, 128 | ) 129 | resp = fetch_url_from_github( 130 | issue_list_url, method="HEAD", requestor_id=requestor_id, 131 | ) 132 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 133 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 134 | g = group( 135 | sync_page_of_issues.s( 136 | owner=owner, repo=repo, state=state, children=children, 137 | requestor_id=requestor_id, 138 | per_page=per_page, page=page, 139 | ) for page in xrange(1, last_page_num+1) 140 | ) 141 | finisher = issues_scanned.si( 142 | owner=owner, repo=repo, requestor_id=requestor_id, 143 | ) 144 | return (g | finisher).delay() 145 | -------------------------------------------------------------------------------- /webhookdb/tasks/label.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from celery import group 6 | from urlobject import URLObject 7 | from webhookdb import db, celery 8 | from webhookdb.process import process_label 9 | from webhookdb.models import IssueLabel, Repository, Mutex 10 | from webhookdb.exceptions import NotFound, StaleData, MissingData, DatabaseError 11 | from sqlalchemy.exc import IntegrityError 12 | from webhookdb.tasks import logger 13 | from webhookdb.tasks.fetch import fetch_url_from_github 14 | 15 | LOCK_TEMPLATE = "Repository|{owner}/{repo}|labels" 16 | 17 | 18 | @celery.task(bind=True) 19 | def sync_label(self, owner, repo, name, children=False, requestor_id=None): 20 | label_url = "/repos/{owner}/{repo}/labels/{name}".format( 21 | owner=owner, repo=repo, name=name, 22 | ) 23 | try: 24 | resp = fetch_url_from_github(label_url, requestor_id=requestor_id) 25 | except NotFound: 26 | # add more context 27 | msg = "Label {name} on {owner}/{repo} not found".format( 28 | name=name, owner=owner, repo=repo, 29 | ) 30 | raise NotFound(msg, { 31 | "type": "label", 32 | "name": name, 33 | "owner": owner, 34 | "repo": repo, 35 | }) 36 | label_data = resp.json() 37 | try: 38 | label = process_label( 39 | label_data, via="api", fetched_at=datetime.now(), commit=True, 40 | ) 41 | except IntegrityError as exc: 42 | # multiple workers tried to insert the same label simulataneously. Retry! 43 | self.retry(exc=exc) 44 | return label.name 45 | 46 | 47 | @celery.task(bind=True) 48 | def sync_page_of_labels(self, owner, repo, children=False, requestor_id=None, 49 | per_page=100, page=1): 50 | label_page_url = ( 51 | "/repos/{owner}/{repo}/labels?" 52 | "per_page={per_page}&page={page}" 53 | ).format( 54 | owner=owner, repo=repo, 55 | per_page=per_page, page=page 56 | ) 57 | resp = fetch_url_from_github(label_page_url, requestor_id=requestor_id) 58 | fetched_at = datetime.now() 59 | label_data_list = resp.json() 60 | results = [] 61 | repo_id = None 62 | for label_data in label_data_list: 63 | try: 64 | label = process_label( 65 | label_data, via="api", fetched_at=fetched_at, commit=True, 66 | repo_id=repo_id, 67 | ) 68 | repo_id = repo_id or label.repo_id 69 | results.append(label.name) 70 | except IntegrityError as exc: 71 | self.retry(exc=exc) 72 | return results 73 | 74 | 75 | @celery.task() 76 | def labels_scanned(owner, repo, requestor_id=None): 77 | """ 78 | Update the timestamp on the repository object, 79 | and delete old labels that weren't updated. 80 | """ 81 | repo_name = repo 82 | repo = Repository.get(owner, repo_name) 83 | prev_scan_at = repo.labels_last_scanned_at 84 | repo.labels_last_scanned_at = datetime.now() 85 | db.session.add(repo) 86 | 87 | if prev_scan_at: 88 | # delete any labels that were not updated since the previous scan -- 89 | # they have been removed from Github 90 | query = ( 91 | Label.query.filter_by(repo_id=repo.id) 92 | .filter(Label.last_replicated_at < prev_scan_at) 93 | ) 94 | query.delete() 95 | 96 | # delete the mutex 97 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo_name) 98 | Mutex.query.filter_by(name=lock_name).delete() 99 | logger.info("Lock {name} deleted".format(name=lock_name)) 100 | 101 | db.session.commit() 102 | 103 | 104 | @celery.task() 105 | def spawn_page_tasks_for_labels(owner, repo, children=False, 106 | requestor_id=None, per_page=100): 107 | # acquire lock or fail (we're already in a transaction) 108 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) 109 | existing = Mutex.query.get(lock_name) 110 | if existing: 111 | return False 112 | lock = Mutex(name=lock_name, user_id=requestor_id) 113 | db.session.add(lock) 114 | try: 115 | db.session.commit() 116 | except IntegrityError: 117 | return False 118 | else: 119 | logger.info("Lock {name} set by {requestor_id}".format( 120 | name=lock_name, requestor_id=requestor_id, 121 | )) 122 | 123 | label_list_url = ( 124 | "/repos/{owner}/{repo}/labels?per_page={per_page}" 125 | ).format( 126 | owner=owner, repo=repo, per_page=per_page, 127 | ) 128 | resp = fetch_url_from_github( 129 | label_list_url, method="HEAD", requestor_id=requestor_id, 130 | ) 131 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 132 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 133 | g = group( 134 | sync_page_of_labels.s( 135 | owner=owner, repo=repo, requestor_id=requestor_id, 136 | per_page=per_page, page=page 137 | ) for page in xrange(1, last_page_num+1) 138 | ) 139 | finisher = labels_scanned.si( 140 | owner=owner, repo=repo, requestor_id=requestor_id, 141 | ) 142 | return (g | finisher).delay() 143 | -------------------------------------------------------------------------------- /webhookdb/tasks/milestone.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from celery import group 7 | from urlobject import URLObject 8 | from webhookdb import db, celery 9 | from webhookdb.process import process_milestone 10 | from webhookdb.models import Milestone, Repository, Mutex 11 | from webhookdb.exceptions import NotFound, StaleData, MissingData, DatabaseError 12 | from sqlalchemy.exc import IntegrityError 13 | from webhookdb.tasks import logger 14 | from webhookdb.tasks.fetch import fetch_url_from_github 15 | 16 | LOCK_TEMPLATE = "Repository|{owner}/{repo}|milestones" 17 | 18 | 19 | @celery.task(bind=True) 20 | def sync_milestone(self, owner, repo, number, children=False, requestor_id=None): 21 | milestone_url = "/repos/{owner}/{repo}/milestones/{number}".format( 22 | owner=owner, repo=repo, number=number, 23 | ) 24 | try: 25 | resp = fetch_url_from_github(milestone_url, requestor_id=requestor_id) 26 | except NotFound: 27 | # add more context 28 | msg = "Milestone #{number} on {owner}/{repo} not found".format( 29 | number=number, owner=owner, repo=repo, 30 | ) 31 | raise NotFound(msg, { 32 | "type": "milestone", 33 | "number": number, 34 | "owner": owner, 35 | "repo": repo, 36 | }) 37 | milestone_data = resp.json() 38 | try: 39 | milestone = process_milestone( 40 | milestone_data, via="api", fetched_at=datetime.now(), commit=True, 41 | ) 42 | except IntegrityError as exc: 43 | # multiple workers tried to insert the same milestone simulataneously. Retry! 44 | self.retry(exc=exc) 45 | return milestone.number 46 | 47 | 48 | @celery.task(bind=True) 49 | def sync_page_of_milestones(self, owner, repo, state="all", 50 | children=False, requestor_id=None, 51 | per_page=100, page=1): 52 | milestone_page_url = ( 53 | "/repos/{owner}/{repo}/milestones?" 54 | "state={state}&per_page={per_page}&page={page}" 55 | ).format( 56 | owner=owner, repo=repo, 57 | state=state, per_page=per_page, page=page 58 | ) 59 | resp = fetch_url_from_github(milestone_page_url, requestor_id=requestor_id) 60 | fetched_at = datetime.now() 61 | milestone_data_list = resp.json() 62 | results = [] 63 | repo_id = None 64 | for milestone_data in milestone_data_list: 65 | try: 66 | milestone = process_milestone( 67 | milestone_data, via="api", fetched_at=fetched_at, commit=True, 68 | repo_id=repo_id, 69 | ) 70 | repo_id = repo_id or milestone.repo_id 71 | results.append(milestone.number) 72 | except IntegrityError as exc: 73 | self.retry(exc=exc) 74 | return results 75 | 76 | 77 | @celery.task() 78 | def milestones_scanned(owner, repo, requestor_id=None): 79 | """ 80 | Update the timestamp on the repository object, 81 | and delete old milestones that weren't updated. 82 | """ 83 | repo_name = repo 84 | repo = Repository.get(owner, repo_name) 85 | prev_scan_at = repo.milestones_last_scanned_at 86 | repo.milestones_last_scanned_at = datetime.now() 87 | db.session.add(repo) 88 | 89 | if prev_scan_at: 90 | # delete any milestones that were not updated since the previous scan -- 91 | # they have been removed from Github 92 | query = ( 93 | Milestone.query.filter_by(repo_id=repo.id) 94 | .filter(Milestone.last_replicated_at < prev_scan_at) 95 | ) 96 | query.delete() 97 | 98 | # delete the mutex 99 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo_name) 100 | Mutex.query.filter_by(name=lock_name).delete() 101 | logger.info("Lock {name} deleted".format(name=lock_name)) 102 | 103 | db.session.commit() 104 | 105 | 106 | @celery.task() 107 | def spawn_page_tasks_for_milestones(owner, repo, state="all", children=False, 108 | requestor_id=None, per_page=100): 109 | # acquire lock or fail (we're already in a transaction) 110 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) 111 | existing = Mutex.query.get(lock_name) 112 | if existing: 113 | return False 114 | lock = Mutex(name=lock_name, user_id=requestor_id) 115 | db.session.add(lock) 116 | try: 117 | db.session.commit() 118 | except IntegrityError: 119 | return False 120 | else: 121 | logger.info("Lock {name} set by {requestor_id}".format( 122 | name=lock_name, requestor_id=requestor_id, 123 | )) 124 | 125 | milestone_list_url = ( 126 | "/repos/{owner}/{repo}/pulls?" 127 | "state={state}&per_page={per_page}" 128 | ).format( 129 | owner=owner, repo=repo, 130 | state=state, per_page=per_page, 131 | ) 132 | resp = fetch_url_from_github( 133 | milestone_list_url, method="HEAD", requestor_id=requestor_id, 134 | ) 135 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 136 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 137 | g = group( 138 | sync_page_of_milestones.s( 139 | owner=owner, repo=repo, state=state, requestor_id=requestor_id, 140 | per_page=per_page, page=page, 141 | ) for page in xrange(1, last_page_num+1) 142 | ) 143 | finisher = milestones_scanned.si( 144 | owner=owner, repo=repo, requestor_id=requestor_id, 145 | ) 146 | return (g | finisher).delay() 147 | -------------------------------------------------------------------------------- /webhookdb/tasks/pull_request.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from celery import group 7 | from webhookdb import db 8 | from webhookdb.process import process_pull_request 9 | from webhookdb.models import PullRequest, Repository, Mutex 10 | from webhookdb.exceptions import NotFound 11 | from sqlalchemy.exc import IntegrityError 12 | from webhookdb.tasks import celery, logger 13 | from webhookdb.tasks.fetch import fetch_url_from_github 14 | from webhookdb.tasks.pull_request_file import spawn_page_tasks_for_pull_request_files 15 | from urlobject import URLObject 16 | 17 | LOCK_TEMPLATE = "Repository|{owner}/{repo}|pulls" 18 | 19 | 20 | @celery.task(bind=True) 21 | def sync_pull_request(self, owner, repo, number, 22 | children=False, requestor_id=None): 23 | pr_url = "/repos/{owner}/{repo}/pulls/{number}".format( 24 | owner=owner, repo=repo, number=number, 25 | ) 26 | try: 27 | resp = fetch_url_from_github(pr_url, requestor_id=requestor_id) 28 | except NotFound: 29 | # add more context 30 | msg = "PR {owner}/{repo}#{number} not found".format( 31 | owner=owner, repo=repo, number=number, 32 | ) 33 | raise NotFound(msg, { 34 | "type": "pull_request", 35 | "owner": owner, 36 | "repo": repo, 37 | "number": number, 38 | }) 39 | pr_data = resp.json() 40 | try: 41 | pr = process_pull_request( 42 | pr_data, via="api", fetched_at=datetime.now(), commit=True, 43 | ) 44 | except IntegrityError as exc: 45 | self.retry(exc=exc) 46 | 47 | if children: 48 | spawn_page_tasks_for_pull_request_files.delay( 49 | owner, repo, number, children=children, requestor_id=requestor_id, 50 | ) 51 | 52 | return pr.id 53 | 54 | 55 | @celery.task(bind=True) 56 | def sync_page_of_pull_requests(self, owner, repo, state="all", children=False, 57 | requestor_id=None, per_page=100, page=1): 58 | pr_page_url = ( 59 | "/repos/{owner}/{repo}/pulls?" 60 | "state={state}&per_page={per_page}&page={page}" 61 | ).format( 62 | owner=owner, repo=repo, 63 | state=state, per_page=per_page, page=page 64 | ) 65 | resp = fetch_url_from_github(pr_page_url, requestor_id=requestor_id) 66 | fetched_at = datetime.now() 67 | pr_data_list = resp.json() 68 | results = [] 69 | for pr_data in pr_data_list: 70 | try: 71 | pr = process_pull_request( 72 | pr_data, via="api", fetched_at=fetched_at, commit=True, 73 | ) 74 | results.append(pr.id) 75 | except IntegrityError as exc: 76 | self.retry(exc=exc) 77 | 78 | if children: 79 | spawn_page_tasks_for_pull_request_files.delay( 80 | owner, repo, pr.number, children=children, 81 | requestor_id=requestor_id, 82 | ) 83 | return results 84 | 85 | 86 | @celery.task() 87 | def pull_requests_scanned(owner, repo, requestor_id=None): 88 | """ 89 | Update the timestamp on the repository object, 90 | and delete old pull request that weren't updated. 91 | """ 92 | repo_name = repo 93 | repo = Repository.get(owner, repo_name) 94 | prev_scan_at = repo.pull_requests_last_scanned_at 95 | repo.pull_requests_last_scanned_at = datetime.now() 96 | db.session.add(repo) 97 | 98 | if prev_scan_at: 99 | # delete any PRs that were not updated since the previous scan -- 100 | # they have been removed from Github 101 | query = ( 102 | PullRequest.query.filter_by(base_repo_id=repo.id) 103 | .filter(PullRequest.last_replicated_at < prev_scan_at) 104 | ) 105 | query.delete() 106 | 107 | # delete the mutex 108 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo_name) 109 | Mutex.query.filter_by(name=lock_name).delete() 110 | logger.info("Lock {name} deleted".format(name=lock_name)) 111 | 112 | db.session.commit() 113 | 114 | 115 | @celery.task() 116 | def spawn_page_tasks_for_pull_requests(owner, repo, state="all", children=False, 117 | requestor_id=None, per_page=100): 118 | # acquire lock or fail (we're already in a transaction) 119 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) 120 | existing = Mutex.query.get(lock_name) 121 | if existing: 122 | return False 123 | lock = Mutex(name=lock_name, user_id=requestor_id) 124 | db.session.add(lock) 125 | try: 126 | db.session.commit() 127 | except IntegrityError: 128 | return False 129 | else: 130 | logger.info("Lock {name} set by {requestor_id}".format( 131 | name=lock_name, requestor_id=requestor_id, 132 | )) 133 | 134 | pr_list_url = ( 135 | "/repos/{owner}/{repo}/pulls?" 136 | "state={state}&per_page={per_page}" 137 | ).format( 138 | owner=owner, repo=repo, 139 | state=state, per_page=per_page, 140 | ) 141 | resp = fetch_url_from_github( 142 | pr_list_url, method="HEAD", requestor_id=requestor_id, 143 | ) 144 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 145 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 146 | g = group( 147 | sync_page_of_pull_requests.s( 148 | owner=owner, repo=repo, state=state, 149 | children=children, requestor_id=requestor_id, 150 | per_page=per_page, page=page 151 | ) for page in xrange(1, last_page_num+1) 152 | ) 153 | finisher = pull_requests_scanned.si( 154 | owner=owner, repo=repo, requestor_id=requestor_id, 155 | ) 156 | return (g | finisher).delay() 157 | -------------------------------------------------------------------------------- /webhookdb/tasks/pull_request_file.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from celery import group 6 | from webhookdb import db 7 | from webhookdb.process import process_pull_request_file 8 | from webhookdb.models import PullRequestFile, PullRequest, Mutex 9 | from webhookdb.exceptions import ( 10 | NotFound, NothingToDo, DatabaseError 11 | ) 12 | from sqlalchemy.exc import IntegrityError 13 | from webhookdb.tasks import celery 14 | from webhookdb.tasks.fetch import fetch_url_from_github 15 | from urlobject import URLObject 16 | 17 | LOCK_TEMPLATE = "PullRequest|{owner}/{repo}#{number}|files" 18 | 19 | 20 | @celery.task(bind=True) 21 | def sync_page_of_pull_request_files(self, owner, repo, number, pull_request_id=None, 22 | children=False, requestor_id=None, 23 | per_page=100, page=1): 24 | if not pull_request_id: 25 | pull_request_id = PullRequest.get(owner, repo, number).id 26 | 27 | prf_page_url = ( 28 | "/repos/{owner}/{repo}/pulls/{number}/files?" 29 | "per_page={per_page}&page={page}" 30 | ).format( 31 | owner=owner, repo=repo, number=number, 32 | per_page=per_page, page=page, 33 | ) 34 | resp = fetch_url_from_github(prf_page_url, requestor_id=requestor_id) 35 | fetched_at = datetime.now() 36 | prf_data_list = resp.json() 37 | results = [] 38 | for prf_data in prf_data_list: 39 | try: 40 | prf = process_pull_request_file( 41 | prf_data, via="api", fetched_at=fetched_at, commit=True, 42 | pull_request_id=pull_request_id, 43 | ) 44 | results.append(prf.sha) 45 | except IntegrityError as exc: 46 | self.retry(exc=exc) 47 | except NothingToDo: 48 | pass 49 | return results 50 | 51 | 52 | @celery.task() 53 | def pull_request_files_scanned(owner, repo, number, requestor_id=None): 54 | """ 55 | Update the timestamp on the pull request object, 56 | and delete old pull request files that weren't updated. 57 | """ 58 | pr = PullRequest.get(owner, repo, number) 59 | prev_scan_at = pr.files_last_scanned_at 60 | pr.files_last_scanned_at = datetime.now() 61 | db.session.add(pr) 62 | 63 | if prev_scan_at: 64 | # delete any files that were not updated since the previous scan -- 65 | # they have been removed from Github 66 | query = ( 67 | PullRequestFile.query.filter_by(pull_request_id=pr.id) 68 | .filter(PullRequestFile.last_replicated_at < prev_scan_at) 69 | ) 70 | query.delete() 71 | 72 | # delete the mutex 73 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo, number=number) 74 | Mutex.query.filter_by(name=lock_name).delete() 75 | 76 | db.session.commit() 77 | 78 | 79 | @celery.task() 80 | def spawn_page_tasks_for_pull_request_files(owner, repo, number, children=False, 81 | requestor_id=None, per_page=100): 82 | # acquire lock or fail (we're already in a transaction) 83 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo, number=number) 84 | existing = Mutex.query.get(lock_name) 85 | if existing: 86 | return False 87 | lock = Mutex(name=lock_name, user_id=requestor_id) 88 | db.session.add(lock) 89 | try: 90 | db.session.commit() 91 | except IntegrityError: 92 | return False 93 | 94 | pr = PullRequest.get(owner, repo, number) 95 | 96 | prf_list_url = ( 97 | "/repos/{owner}/{repo}/pulls/{number}/files?" 98 | "per_page={per_page}" 99 | ).format( 100 | owner=owner, repo=repo, number=number, 101 | per_page=per_page, 102 | ) 103 | resp = fetch_url_from_github( 104 | prf_list_url, method="HEAD", requestor_id=requestor_id, 105 | ) 106 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 107 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 108 | 109 | g = group( 110 | sync_page_of_pull_request_files.s( 111 | owner=owner, repo=repo, number=number, pull_request_id=pr.id, 112 | children=children, requestor_id=requestor_id, 113 | per_page=per_page, page=page, 114 | ) for page in xrange(1, last_page_num+1) 115 | ) 116 | finisher = pull_request_files_scanned.si( 117 | owner=owner, repo=repo, number=number, requestor_id=requestor_id, 118 | ) 119 | return (g | finisher).delay() 120 | -------------------------------------------------------------------------------- /webhookdb/tasks/repository.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from celery import group 7 | from webhookdb import db 8 | from webhookdb.process import process_repository 9 | from webhookdb.models import Repository, User, UserRepoAssociation, Mutex 10 | from webhookdb.exceptions import NotFound, StaleData, MissingData 11 | from sqlalchemy.exc import IntegrityError, SQLAlchemyError 12 | from webhookdb.tasks import celery, logger 13 | from webhookdb.tasks.fetch import fetch_url_from_github 14 | from webhookdb.tasks.issue import spawn_page_tasks_for_issues 15 | from webhookdb.tasks.label import spawn_page_tasks_for_labels 16 | from webhookdb.tasks.milestone import spawn_page_tasks_for_milestones 17 | from webhookdb.tasks.pull_request import spawn_page_tasks_for_pull_requests 18 | from webhookdb.tasks.repository_hook import spawn_page_tasks_for_repository_hooks 19 | from urlobject import URLObject 20 | 21 | LOCK_TEMPLATE = "User|{username}|repos" 22 | 23 | 24 | 25 | @celery.task(bind=True) 26 | def sync_repository(self, owner, repo, children=False, requestor_id=None): 27 | repo_url = "/repos/{owner}/{repo}".format(owner=owner, repo=repo) 28 | try: 29 | resp = fetch_url_from_github(repo_url, requestor_id=requestor_id) 30 | except NotFound: 31 | # add more context 32 | msg = "Repo {owner}/{repo} not found".format(owner=owner, repo=repo) 33 | raise NotFound(msg, { 34 | "type": "repository", 35 | "owner": owner, 36 | "repo": repo, 37 | }) 38 | repo_data = resp.json() 39 | try: 40 | repo = process_repository( 41 | repo_data, via="api", fetched_at=datetime.now(), commit=True, 42 | requestor_id=requestor_id, 43 | ) 44 | except IntegrityError as exc: 45 | self.retry(exc=exc) 46 | 47 | if children: 48 | spawn_page_tasks_for_issues.delay( 49 | owner, repo, children=children, requestor_id=requestor_id, 50 | ) 51 | spawn_page_tasks_for_labels.delay( 52 | owner, repo, children=children, requestor_id=requestor_id, 53 | ) 54 | spawn_page_tasks_for_milestones.delay( 55 | owner, repo, children=children, requestor_id=requestor_id, 56 | ) 57 | spawn_page_tasks_for_pull_requests.delay( 58 | owner, repo, children=children, requestor_id=requestor_id, 59 | ) 60 | spawn_page_tasks_for_repository_hooks.delay( 61 | owner, repo, children=children, requestor_id=requestor_id, 62 | ) 63 | 64 | return repo.id 65 | 66 | 67 | @celery.task(bind=True) 68 | def sync_page_of_repositories_for_user(self, username, type="all", 69 | children=False, requestor_id=None, 70 | per_page=100, page=1): 71 | repo_page_url = ( 72 | "/users/{username}/repos?type={type}&per_page={per_page}&page={page}" 73 | ).format( 74 | username=username, type=type, per_page=per_page, page=page, 75 | ) 76 | 77 | if requestor_id: 78 | requestor = User.query.get(int(requestor_id)) 79 | assert requestor 80 | if requestor.login == username: 81 | # we can use the API for getting your *own* repos 82 | repo_page_url = ( 83 | "/user/repos?type={type}&per_page={per_page}&page={page}" 84 | ).format( 85 | type=type, per_page=per_page, page=page 86 | ) 87 | 88 | resp = fetch_url_from_github( 89 | repo_page_url, requestor_id=requestor_id, 90 | headers={"Accept": "application/vnd.github.moondragon+json"}, 91 | ) 92 | fetched_at = datetime.now() 93 | repo_data_list = resp.json() 94 | results = [] 95 | for repo_data in repo_data_list: 96 | try: 97 | repo = process_repository( 98 | repo_data, via="api", fetched_at=fetched_at, commit=True, 99 | requestor_id=requestor_id, 100 | ) 101 | results.append(repo.id) 102 | except IntegrityError as exc: 103 | self.retry(exc=exc) 104 | 105 | if children: 106 | owner = repo.owner_login 107 | spawn_page_tasks_for_issues.delay( 108 | owner, repo.name, children=children, requestor_id=requestor_id, 109 | ) 110 | spawn_page_tasks_for_labels.delay( 111 | owner, repo.name, children=children, requestor_id=requestor_id, 112 | ) 113 | spawn_page_tasks_for_milestones.delay( 114 | owner, repo.name, children=children, requestor_id=requestor_id, 115 | ) 116 | spawn_page_tasks_for_pull_requests.delay( 117 | owner, repo.name, children=children, requestor_id=requestor_id, 118 | ) 119 | # only try to get repo hooks if the requestor is an admin on this repo 120 | assoc = UserRepoAssociation.query.get((requestor_id, repo.id)) 121 | if assoc and assoc.can_admin: 122 | spawn_page_tasks_for_repository_hooks.delay( 123 | owner, repo.name, children=children, requestor_id=requestor_id, 124 | ) 125 | 126 | return results 127 | 128 | 129 | @celery.task() 130 | def user_repositories_scanned(username, requestor_id=None): 131 | """ 132 | Update the timestamp on the pull request object, 133 | and delete old pull request files that weren't updated. 134 | """ 135 | user = User.get(username) 136 | prev_scan_at = user.repos_last_scanned_at 137 | user.repos_last_scanned_at = datetime.now() 138 | db.session.add(user) 139 | 140 | if prev_scan_at: 141 | # delete any repos that the user owns that were not updated 142 | # since the previous scan -- the user must have deleted those 143 | # repos from Github 144 | query = ( 145 | Repository.query.filter_by(owner_id=user.id) 146 | .filter(Repository.last_replicated_at < prev_scan_at) 147 | ) 148 | query.delete(synchronize_session=False) 149 | 150 | # delete the mutex 151 | lock_name = LOCK_TEMPLATE.format(username=username) 152 | Mutex.query.filter_by(name=lock_name).delete() 153 | logger.info("Lock {name} deleted".format(name=lock_name)) 154 | 155 | db.session.commit() 156 | 157 | 158 | @celery.task() 159 | def spawn_page_tasks_for_user_repositories( 160 | username, type="all", children=False, requestor_id=None, per_page=100, 161 | ): 162 | # acquire lock or fail (we're already in a transaction) 163 | lock_name = LOCK_TEMPLATE.format(username=username) 164 | existing = Mutex.query.get(lock_name) 165 | if existing: 166 | return False 167 | lock = Mutex(name=lock_name, user_id=requestor_id) 168 | db.session.add(lock) 169 | try: 170 | db.session.commit() 171 | except IntegrityError: 172 | return False 173 | else: 174 | logger.info("Lock {name} set by {requestor_id}".format( 175 | name=lock_name, requestor_id=requestor_id, 176 | )) 177 | 178 | repo_page_url = ( 179 | "/users/{username}/repos?type={type}&per_page={per_page}" 180 | ).format( 181 | username=username, type=type, per_page=per_page, 182 | ) 183 | 184 | if requestor_id: 185 | requestor = User.query.get(int(requestor_id)) 186 | assert requestor 187 | if requestor.login == username: 188 | # we can use the API for getting your *own* repos 189 | repo_page_url = ( 190 | "/user/repos?type={type}&per_page={per_page}" 191 | ).format( 192 | type=type, per_page=per_page, 193 | ) 194 | 195 | resp = fetch_url_from_github( 196 | repo_page_url, method="HEAD", requestor_id=requestor_id, 197 | headers={"Accept": "application/vnd.github.moondragon+json"}, 198 | ) 199 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 200 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 201 | g = group( 202 | sync_page_of_repositories_for_user.s( 203 | username=username, type=type, 204 | children=children, requestor_id=requestor_id, 205 | per_page=per_page, page=page, 206 | ) for page in xrange(1, last_page_num+1) 207 | ) 208 | finisher = user_repositories_scanned.si( 209 | username=username, requestor_id=requestor_id, 210 | ) 211 | return (g | finisher).delay() 212 | -------------------------------------------------------------------------------- /webhookdb/tasks/repository_hook.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from celery import group 7 | from webhookdb import db 8 | from webhookdb.process import process_repository_hook 9 | from webhookdb.models import RepositoryHook, Repository, Mutex 10 | from webhookdb.exceptions import NotFound 11 | from sqlalchemy.exc import IntegrityError, SQLAlchemyError 12 | from webhookdb.tasks import celery, logger 13 | from webhookdb.tasks.fetch import fetch_url_from_github 14 | from urlobject import URLObject 15 | 16 | LOCK_TEMPLATE = "Repository|{owner}/{repo}|hooks" 17 | 18 | 19 | @celery.task(bind=True) 20 | def sync_repository_hook(self, owner, repo, hook_id, 21 | children=False, requestor_id=None): 22 | hook_url = "/repos/{owner}/{repo}/hooks/{hook_id}".format( 23 | owner=owner, repo=repo, hook_id=hook_id, 24 | ) 25 | try: 26 | resp = fetch_url_from_github(hook_url, requestor_id=requestor_id) 27 | except NotFound: 28 | # add more context 29 | msg = "Hook #{hook_id} for {owner}/{repo} not found".format( 30 | hook_id=hook_id, owner=owner, repo=repo, 31 | ) 32 | raise NotFound(msg, { 33 | "type": "repo_hook", 34 | "owner": owner, 35 | "repo": repo, 36 | "hook_id": hook_id, 37 | }) 38 | hook_data = resp.json() 39 | try: 40 | hook = process_repository_hook( 41 | hook_data, via="api", fetched_at=datetime.now(), commit=True, 42 | requestor_id=requestor_id, 43 | ) 44 | except IntegrityError as exc: 45 | self.retry(exc=exc) 46 | return hook.id 47 | 48 | 49 | @celery.task(bind=True) 50 | def sync_page_of_repository_hooks(self, owner, repo, children=False, 51 | requestor_id=None, per_page=100, page=1): 52 | hook_page_url = ( 53 | "/repos/{owner}/{repo}/hooks?per_page={per_page}&page={page}" 54 | ).format( 55 | owner=owner, repo=repo, per_page=per_page, page=page, 56 | ) 57 | resp = fetch_url_from_github(hook_page_url, requestor_id=requestor_id) 58 | fetched_at = datetime.now() 59 | hook_data_list = resp.json() 60 | results = [] 61 | for hook_data in hook_data_list: 62 | try: 63 | hook = process_repository_hook( 64 | hook_data, via="api", fetched_at=fetched_at, commit=True, 65 | requestor_id=requestor_id, 66 | ) 67 | results.append(hook.id) 68 | except IntegrityError as exc: 69 | self.retry(exc=exc) 70 | return results 71 | 72 | 73 | @celery.task() 74 | def hooks_scanned(owner, repo, requestor_id=None): 75 | """ 76 | Update the timestamp on the repository object, 77 | and delete old hooks that weren't updated. 78 | """ 79 | repo_name = repo 80 | repo = Repository.get(owner, repo_name) 81 | prev_scan_at = repo.hooks_last_scanned_at 82 | repo.hooks_last_scanned_at = datetime.now() 83 | db.session.add(repo) 84 | 85 | if prev_scan_at: 86 | # delete any hooks that were not updated since the previous scan -- 87 | # they have been removed from Github 88 | query = ( 89 | RepositoryHook.query.filter_by(repo_id=repo.id) 90 | .filter(RepositoryHook.last_replicated_at < prev_scan_at) 91 | ) 92 | query.delete() 93 | 94 | # delete the mutex 95 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo_name) 96 | Mutex.query.filter_by(name=lock_name).delete() 97 | logger.info("Lock {name} deleted".format(name=lock_name)) 98 | 99 | db.session.commit() 100 | 101 | 102 | @celery.task() 103 | def spawn_page_tasks_for_repository_hooks( 104 | owner, repo, children=False, requestor_id=None, per_page=100, 105 | ): 106 | # acquire lock or fail (we're already in a transaction) 107 | lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) 108 | existing = Mutex.query.get(lock_name) 109 | if existing: 110 | return False 111 | lock = Mutex(name=lock_name, user_id=requestor_id) 112 | db.session.add(lock) 113 | try: 114 | db.session.commit() 115 | except IntegrityError: 116 | return False 117 | else: 118 | logger.info("Lock {name} set by {requestor_id}".format( 119 | name=lock_name, requestor_id=requestor_id, 120 | )) 121 | 122 | hook_page_url = ( 123 | "/repos/{owner}/{repo}/hooks?per_page={per_page}" 124 | ).format( 125 | owner=owner, repo=repo, type=type, per_page=per_page, 126 | ) 127 | resp = fetch_url_from_github( 128 | hook_page_url, method="HEAD", requestor_id=requestor_id, 129 | ) 130 | last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) 131 | last_page_num = int(last_page_url.query.dict.get('page', 1)) 132 | g = group( 133 | sync_page_of_repository_hooks.s( 134 | owner=owner, repo=repo, 135 | children=children, requestor_id=requestor_id, 136 | per_page=per_page, page=page, 137 | ) for page in xrange(1, last_page_num+1) 138 | ) 139 | finisher = hooks_scanned.si( 140 | owner=owner, repo=repo, requestor_id=requestor_id, 141 | ) 142 | return (g | finisher).delay() 143 | -------------------------------------------------------------------------------- /webhookdb/tasks/user.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | from datetime import datetime 5 | from iso8601 import parse_date 6 | from webhookdb import db, celery 7 | from webhookdb.process import process_user 8 | from webhookdb.models import User 9 | from webhookdb.exceptions import NotFound, StaleData, MissingData 10 | from sqlalchemy.exc import IntegrityError 11 | from webhookdb.tasks.fetch import fetch_url_from_github 12 | from webhookdb.tasks.repository import spawn_page_tasks_for_user_repositories 13 | 14 | 15 | @celery.task(bind=True) 16 | def sync_user(self, username, children=False, requestor_id=None): 17 | user_url = "/users/{username}".format(username=username) 18 | 19 | if requestor_id: 20 | requestor = User.query.get(int(requestor_id)) 21 | assert requestor 22 | if requestor.login == username: 23 | # we can use the API for getting the authenticated user 24 | user_url = "/user" 25 | 26 | try: 27 | resp = fetch_url_from_github(user_url, requestor_id=requestor_id) 28 | except NotFound: 29 | # add more context 30 | msg = "User @{username} not found".format(username=username) 31 | raise NotFound(msg, { 32 | "type": "user", 33 | "username": username, 34 | }) 35 | user_data = resp.json() 36 | try: 37 | user = process_user( 38 | user_data, via="api", fetched_at=datetime.now(), commit=True, 39 | ) 40 | except IntegrityError as exc: 41 | # multiple workers tried to insert the same user simulataneously. Retry! 42 | self.retry(exc=exc) 43 | 44 | if children: 45 | spawn_page_tasks_for_user_repositories.delay( 46 | username, children=children, requestor_id=requestor_id, 47 | ) 48 | 49 | return user.id 50 | -------------------------------------------------------------------------------- /webhookdb/templates/base.html: -------------------------------------------------------------------------------- 1 | {% extends "bootstrap/base.html" %} 2 | 3 | {% block title %}WebhookDB{% endblock %} 4 | 5 | {% block head %} 6 | {{ super() }} 7 | 8 | 9 | {% endblock %} 10 | 11 | {# 12 | {% block styles %} 13 | {{ super() }} 14 | 15 | {% endblock %} 16 | #} 17 | 18 | {% block navbar %} 19 | 51 | {% endblock %} 52 | 53 | {% block content %} 54 |
55 | {% block content_header %}{% endblock %} 56 | 57 | {% for message in get_flashed_messages() %} 58 |
59 | 60 | {{ message }} 61 |
62 | {% endfor %} 63 | 64 | {% block page_content %}{% endblock %} 65 |
66 | {% endblock %} 67 | -------------------------------------------------------------------------------- /webhookdb/templates/home-anonymous.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content_header %} 4 |

Welcome

5 | {% endblock %} 6 | 7 | {% block page_content %} 8 | Please log in with Github 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /webhookdb/templates/home.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content_header %} 4 |

Your Repos

5 | {% endblock %} 6 | 7 | {% block page_content %} 8 |
9 | 10 |
11 |
    12 | {% for repo, has_self_hook in repos %} 13 |
  • 15 | {{repo.full_name}} 16 | {% if not repo.hooks_last_scanned_at %} 17 | (hooks not yet loaded) 18 | {% elif has_self_hook %} 19 | (replication on) 20 | {% else %} 21 | (replication off) 22 | {% endif %} 23 | 25 | [load hooks] 26 | 27 | {% if has_self_hook %} 28 | 30 | [disable replication] 31 | 32 | {% else %} 33 | 35 | [enable replication] 36 | 37 | {% endif %} 38 |
  • 39 | {% else %} 40 |
  • No repos loaded! Click the button above to sync your repos from Github.
  • 41 | {% endfor %} 42 |
43 | {% endblock %} 44 | 45 | {% block scripts %} 46 | 47 | 48 | 66 | {% endblock %} 67 | -------------------------------------------------------------------------------- /webhookdb/templates/install.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content_header %} 4 |

Install Webhooks

5 | {% endblock %} 6 | 7 | {% block page_content %} 8 |
9 |

10 | Clicking this button will install the Github replication webhooks for the repo 11 | you specify. 12 |

13 | 14 | 15 | 16 | 17 | 18 |
19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /webhookdb/templates/uninstall.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content_header %} 4 |

Uninstall Webhooks

5 | {% endblock %} 6 | 7 | {% block page_content %} 8 |
9 |

10 | Clicking this button will uninstall the Github replication webhooks for the repo 11 | you specify. 12 |

13 | 14 | 15 | 16 | 17 | 18 |
19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /webhookdb/ui/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import unicode_literals, print_function 3 | 4 | import logging 5 | from datetime import datetime 6 | from flask import Blueprint, request, render_template, jsonify, url_for 7 | from flask_login import current_user 8 | from flask_dance.contrib.github import github 9 | from sqlalchemy.sql import func, cast 10 | from webhookdb import db 11 | from webhookdb.models import Repository, RepositoryHook, UserRepoAssociation 12 | from webhookdb.tasks.repository_hook import process_repository_hook 13 | import bugsnag 14 | 15 | ui = Blueprint('ui', __name__) 16 | 17 | 18 | @ui.route("/") 19 | def index(): 20 | """ 21 | Home page. 22 | 23 | If the user is not currently logged in with Github, explain what WebhookDB 24 | is, and ask them to log in. 25 | 26 | If the user *is* logged in with Github, show them their Github repos, 27 | and allow them to re-sync repos from Github. 28 | """ 29 | if current_user.is_anonymous(): 30 | return render_template("home-anonymous.html") 31 | else: 32 | replication_url = url_for( 33 | "replication.pull_request", 34 | _external=True, 35 | ) 36 | is_self_hook = (RepositoryHook.url == replication_url) 37 | repos = ( 38 | db.session.query(Repository, func.sum(cast(is_self_hook, db.Integer))) 39 | .outerjoin(RepositoryHook, RepositoryHook.repo_id == Repository.id) 40 | .join(UserRepoAssociation, UserRepoAssociation.repo_id == Repository.id) 41 | .filter(UserRepoAssociation.user_id == current_user.id) 42 | .filter(UserRepoAssociation.can_admin == True) 43 | .group_by(Repository) 44 | .order_by( 45 | (Repository.owner_id == current_user.id).desc(), 46 | func.lower(Repository.owner_login), 47 | func.lower(Repository.name), 48 | ) 49 | ) 50 | return render_template("home.html", repos=repos) 51 | 52 | 53 | @ui.route("/install", methods=("GET", "POST")) 54 | def install(): 55 | if request.method == "GET": 56 | return render_template("install.html") 57 | owner_login = request.values.get("owner", "") 58 | if not owner_login: 59 | return jsonify({"error": "missing owner param"}), 400 60 | repo_name = request.values.get("repo", "") 61 | if not repo_name: 62 | return jsonify({"error": "missing repo param"}), 400 63 | 64 | hook_url = "/repos/{owner}/{repo}/hooks".format( 65 | owner=owner_login, repo=repo_name, 66 | ) 67 | body = { 68 | "name": "web", 69 | "events": ["pull_request", "issue"], 70 | "config": { 71 | "url": url_for("replication.main", _external=True), 72 | "content_type": "json", 73 | } 74 | } 75 | bugsnag_context = {"owner": owner_login, "repo": repo_name, "body": body} 76 | bugsnag.configure_request(meta_data=bugsnag_context) 77 | 78 | logging.info("POST {}".format(hook_url)) 79 | hook_resp = github.post(hook_url, json=body) 80 | if not hook_resp.ok: 81 | error_obj = hook_resp.json() 82 | resp = jsonify({"error": error_obj["message"]}) 83 | resp.status_code = 503 84 | return resp 85 | else: 86 | hook_data = hook_resp.json() 87 | process_repository_hook( 88 | hook_data, via="api", fetched_at=datetime.now(), commit=True, 89 | requestor_id=current_user.get_id(), 90 | ) 91 | 92 | return jsonify({"message": "success"}) 93 | 94 | 95 | @ui.route("/uninstall", methods=("GET", "POST")) 96 | def uninstall(): 97 | if request.method == "GET": 98 | return render_template("uninstall.html") 99 | owner_login = request.values.get("owner", "") 100 | if not owner_login: 101 | return jsonify({"error": "missing owner param"}), 400 102 | repo_name = request.values.get("repo", "") 103 | if not repo_name: 104 | return jsonify({"error": "missing repo param"}), 400 105 | 106 | replication_urls = [ 107 | url_for( 108 | "replication.{endpoint}".format(endpoint=endpoint), 109 | _external=True, 110 | ) 111 | for endpoint in ("main", "pull_request", "issue") 112 | ] 113 | 114 | repo_hooks = ( 115 | RepositoryHook.query 116 | .join(Repository, Repository.id == RepositoryHook.repo_id) 117 | .filter(Repository.owner_login == owner_login) 118 | .filter(Repository.name == repo_name) 119 | .filter(RepositoryHook.url.in_(replication_urls)) 120 | ) 121 | 122 | deleted_ids = [] 123 | errored_ids = [] 124 | for repo_hook in repo_hooks: 125 | api_url = "/repos/{owner}/{repo}/hooks/{hook_id}".format( 126 | owner=owner_login, repo=repo_name, hook_id=repo_hook.id, 127 | ) 128 | logging.info("DELETE {}".format(api_url)) 129 | hook_resp = github.delete(api_url) 130 | if hook_resp.ok: 131 | deleted_ids.append(repo_hook.id) 132 | else: 133 | errored_ids.append(repo_hook.id) 134 | 135 | # delete from local database 136 | if deleted_ids: 137 | query = RepositoryHook.query.filter(RepositoryHook.id.in_(deleted_ids)) 138 | query.delete(synchronize_session=False) 139 | db.session.commit() 140 | return jsonify({"message": "deleted", "ids": deleted_ids}) 141 | else: 142 | return jsonify({"message": "no hooks deleted", "ids": []}) 143 | -------------------------------------------------------------------------------- /webhookdb/worker.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file only exists because celery can't handle a factory function 3 | being passed as the application instance, like this: 4 | 5 | $ celery worker --app=webhookdb.create_celery_app() 6 | 7 | If celery ever gets this capability, this file can be deleted. 8 | """ 9 | 10 | from webhookdb import create_celery_app 11 | 12 | application = create_celery_app(config="worker") 13 | --------------------------------------------------------------------------------