├── .cookiecutter └── includes │ └── .github │ └── workflows │ └── environments.json ├── .coveragerc ├── .eslintrc ├── .gitattributes ├── .github ├── dependabot.yml ├── stale.yml └── workflows │ ├── ci.yml │ ├── codeql.yml │ ├── deploy.yml │ ├── redeploy.yml │ └── slack.yml ├── .gitignore ├── .isort.cfg ├── .python-version ├── Dockerfile ├── HACKING.md ├── LICENSE ├── Makefile ├── README.md ├── bin ├── init-env ├── install-python └── logger ├── bouncer ├── __init__.py ├── _version.py ├── app.py ├── embed_detector.py ├── scripts │ ├── redirect.js │ └── test │ │ └── redirect-test.js ├── search.py ├── static │ ├── images │ │ ├── facebook.png │ │ ├── hypothesis-icon.svg │ │ ├── sad-annotation.svg │ │ └── twitter.png │ └── styles │ │ └── bouncer.css ├── templates │ ├── annotation.html.jinja2 │ ├── base.html.jinja2 │ └── error.html.jinja2 ├── util.py └── views.py ├── conf ├── development.ini ├── gunicorn-dev.conf.py ├── gunicorn.conf.py ├── production.ini ├── supervisord-dev.conf └── supervisord.conf ├── karma.config.js ├── package-lock.json ├── package.json ├── pyproject.toml ├── requirements ├── checkformatting.in ├── checkformatting.txt ├── coverage.in ├── coverage.txt ├── dev.in ├── dev.txt ├── format.in ├── format.txt ├── functests.in ├── functests.txt ├── lint.in ├── lint.txt ├── requirements.in ├── requirements.txt ├── tests.in └── tests.txt ├── setup.cfg ├── tests ├── __init__.py ├── conftest.py ├── functional │ ├── __init__.py │ ├── conftest.py │ └── views │ │ ├── __init__.py │ │ └── healthcheck_test.py └── unit │ ├── __init__.py │ └── bouncer │ ├── __init__.py │ ├── app_test.py │ ├── embed_detector_test.py │ ├── search_test.py │ ├── util_test.py │ └── views_test.py └── tox.ini /.cookiecutter/includes/.github/workflows/environments.json: -------------------------------------------------------------------------------- 1 | { 2 | "staging": { 3 | "github_environment_name": "Staging", 4 | "github_environment_url": "https://staging.hyp.is/FiqzonGfEe2o-AfEssZXnw/en.wikipedia.org/wiki/Wikipedia:Terminal_Event_Management_Policy", 5 | "aws_region": "us-west-1", 6 | "elasticbeanstalk_application": "bouncer", 7 | "elasticbeanstalk_environment": "staging" 8 | }, 9 | "production": { 10 | "needs": ["staging"], 11 | "github_environment_name": "Production", 12 | "github_environment_url": "https://hyp.is/FiqzonGfEe2o-AfEssZXnw/en.wikipedia.org/wiki/Wikipedia:Terminal_Event_Management_Policy", 13 | "aws_region": "us-west-1", 14 | "elasticbeanstalk_application": "bouncer", 15 | "elasticbeanstalk_environment": "prod" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | parallel = True 4 | source = 5 | bouncer 6 | tests/unit 7 | 8 | [report] 9 | show_missing = True 10 | precision = 2 11 | fail_under = 100 12 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "hypothesis", 3 | "parserOptions": { 4 | "sourceType": "module" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | bouncer/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | 4 | - package-ecosystem: pip 5 | directory: "/" 6 | schedule: 7 | interval: monthly 8 | open-pull-requests-limit: 10 9 | ignore: 10 | - dependency-name: elasticsearch 11 | versions: 12 | - "> 6.3.1" 13 | 14 | - package-ecosystem: docker 15 | directory: "/" 16 | schedule: 17 | interval: monthly 18 | open-pull-requests-limit: 10 19 | ignore: 20 | # Only send PRs for patch versions of Python. 21 | - dependency-name: "python" 22 | update-types: [ "version-update:semver-major", "version-update:semver-minor" ] 23 | 24 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Label to use when marking as stale 2 | staleLabel: stale 3 | # Limit to only `issues` or `pulls` 4 | only: pulls 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '.cookiecutter/*' 7 | - 'docs/*' 8 | - 'requirements/*.in' 9 | - 'requirements/dev.txt' 10 | - '**/.gitignore' 11 | - '*.md' 12 | - 'LICENSE' 13 | workflow_call: 14 | 15 | jobs: 16 | backend: 17 | name: Backend 18 | runs-on: ubuntu-latest 19 | env: 20 | TOX_PARALLEL_NO_SPINNER: 1 21 | 22 | steps: 23 | - name: Checkout git repo 24 | uses: actions/checkout@v3 25 | 26 | - name: Setup python 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version-file: '.python-version' 30 | 31 | - name: Update pip 32 | run: python -m pip install --upgrade pip 33 | 34 | - name: Install tox 35 | run: python -m pip install 'tox<4' 36 | 37 | - name: Cache the .tox dir 38 | uses: actions/cache@v3 39 | with: 40 | path: .tox 41 | key: ${{ runner.os }}-tox-${{ hashFiles('tox.ini', 'requirements*', 'setup.py', 'setup.cfg') }} 42 | restore-keys: | 43 | ${{ runner.os }}-tox- 44 | 45 | - name: Run tox 46 | run: tox --parallel auto -e checkformatting,lint,tests,coverage,functests 47 | 48 | frontend: 49 | name: Frontend 50 | runs-on: ubuntu-latest 51 | 52 | steps: 53 | - name: Checkout 54 | uses: actions/checkout@v3 55 | 56 | - name: Cache the node_modules dir 57 | uses: actions/cache@v3 58 | with: 59 | path: node_modules 60 | key: ${{ runner.os }}-node_modules-${{ hashFiles('package-lock.json') }} 61 | 62 | - name: Lint 63 | run: make frontend-lint 64 | 65 | - name: Test 66 | run: make frontend-test 67 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: CodeQL 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | schedule: 9 | # ┌───────────── minute (0 - 59) 10 | # │ ┌───────────── hour (0 - 23) 11 | # │ │ ┌───────────── day of the month (1 - 31) 12 | # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) 13 | # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) 14 | # │ │ │ │ │ 15 | # │ │ │ │ │ 16 | # │ │ │ │ │ 17 | # * * * * * 18 | - cron: '30 1 * * 0' 19 | 20 | jobs: 21 | CodeQL-Build: 22 | # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest 23 | runs-on: ubuntu-latest 24 | 25 | permissions: 26 | # required for all workflows 27 | security-events: write 28 | 29 | # only required for workflows in private repositories 30 | actions: read 31 | contents: read 32 | 33 | steps: 34 | - name: Checkout repository 35 | uses: actions/checkout@v3 36 | 37 | # Initializes the CodeQL tools for scanning. 38 | - name: Initialize CodeQL 39 | uses: github/codeql-action/init@v2 40 | # Override language selection by uncommenting this and choosing your languages 41 | # with: 42 | # languages: go, javascript, csharp, python, cpp, java 43 | 44 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 45 | # If this step fails, then you should remove it and run the build manually (see below). 46 | - name: Autobuild 47 | uses: github/codeql-action/autobuild@v2 48 | 49 | # ℹ️ Command-line programs to run using the OS shell. 50 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 51 | 52 | # ✏️ If the Autobuild fails above, remove it and uncomment the following 53 | # three lines and modify them (or add more) to build your code if your 54 | # project uses a compiled language 55 | 56 | #- run: | 57 | # make bootstrap 58 | # make release 59 | 60 | - name: Perform CodeQL Analysis 61 | uses: github/codeql-action/analyze@v2 62 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | concurrency: 3 | group: deploy 4 | cancel-in-progress: true 5 | on: 6 | workflow_dispatch: 7 | push: 8 | branches: 9 | - main 10 | paths-ignore: 11 | - '.cookiecutter/*' 12 | - '.github/*' 13 | - 'docs/*' 14 | - 'requirements/*' 15 | - '!requirements/requirements.txt' 16 | - 'tests/*' 17 | - '**/.gitignore' 18 | - '*.md' 19 | - 'tox.ini' 20 | jobs: 21 | docker_hub: 22 | name: Docker Hub 23 | uses: hypothesis/workflows/.github/workflows/dockerhub.yml@main 24 | with: 25 | Application: ${{ github.event.repository.name }} 26 | secrets: inherit 27 | staging: 28 | name: Staging 29 | needs: [docker_hub] 30 | uses: hypothesis/workflows/.github/workflows/deploy.yml@main 31 | with: 32 | operation: deploy 33 | github_environment_name: Staging 34 | github_environment_url: https://staging.hyp.is/b01DwMzhEe6X2ssmLT58kw/en.wikipedia.org/wiki/Wikipedia:Terminal_Event_Management_Policy 35 | aws_region: us-west-1 36 | elasticbeanstalk_application: bouncer 37 | elasticbeanstalk_environment: staging 38 | docker_tag: ${{ needs.Docker_Hub.outputs.docker_tag }} 39 | secrets: inherit 40 | production: 41 | name: Production 42 | needs: [docker_hub, staging] 43 | uses: hypothesis/workflows/.github/workflows/deploy.yml@main 44 | with: 45 | operation: deploy 46 | github_environment_name: Production 47 | github_environment_url: https://hyp.is/FiqzonGfEe2o-AfEssZXnw/en.wikipedia.org/wiki/Wikipedia:Terminal_Event_Management_Policy 48 | aws_region: us-west-1 49 | elasticbeanstalk_application: bouncer 50 | elasticbeanstalk_environment: prod 51 | docker_tag: ${{ needs.Docker_Hub.outputs.docker_tag }} 52 | secrets: inherit 53 | -------------------------------------------------------------------------------- /.github/workflows/redeploy.yml: -------------------------------------------------------------------------------- 1 | name: Redeploy 2 | concurrency: 3 | group: deploy 4 | cancel-in-progress: true 5 | on: 6 | workflow_dispatch: 7 | inputs: 8 | staging: 9 | type: boolean 10 | description: Redeploy Staging 11 | production: 12 | type: boolean 13 | description: Redeploy Production 14 | jobs: 15 | staging: 16 | name: Staging 17 | if: inputs.staging 18 | uses: hypothesis/workflows/.github/workflows/deploy.yml@main 19 | with: 20 | operation: redeploy 21 | github_environment_name: Staging 22 | github_environment_url: https://staging.hyp.is/FiqzonGfEe2o-AfEssZXnw/en.wikipedia.org/wiki/Wikipedia:Terminal_Event_Management_Policy 23 | aws_region: us-west-1 24 | elasticbeanstalk_application: bouncer 25 | elasticbeanstalk_environment: staging 26 | secrets: inherit 27 | production: 28 | name: Production 29 | if: inputs.production 30 | uses: hypothesis/workflows/.github/workflows/deploy.yml@main 31 | with: 32 | operation: redeploy 33 | github_environment_name: Production 34 | github_environment_url: https://hyp.is/FiqzonGfEe2o-AfEssZXnw/en.wikipedia.org/wiki/Wikipedia:Terminal_Event_Management_Policy 35 | aws_region: us-west-1 36 | elasticbeanstalk_application: bouncer 37 | elasticbeanstalk_environment: prod 38 | secrets: inherit 39 | -------------------------------------------------------------------------------- /.github/workflows/slack.yml: -------------------------------------------------------------------------------- 1 | name: Slack 2 | on: 3 | workflow_run: 4 | workflows: [CI] 5 | types: [completed] 6 | branches: [main] 7 | jobs: 8 | on-failure: 9 | runs-on: ubuntu-latest 10 | if: ${{ github.event.workflow_run.conclusion == 'failure' }} 11 | steps: 12 | - name: Post to Slack 13 | uses: slackapi/slack-github-action@v1.24.0 14 | with: 15 | channel-id: 'C4K6M7P5E' 16 | slack-message: "A workflow run failed\n*Repo:* `${{ github.event.repository.full_name }}` (${{ github.event.repository.html_url }})\n*Workflow:* ${{ github.event.workflow.name }} (${{ github.event.workflow.html_url }})\n*Branch:* `${{ github.event.workflow_run.head_branch }}`\n*Commit:* `${{ github.event.workflow_run.head_commit.id }}`\n*Run:* ${{ github.event.workflow_run.html_url }}\n*Conclusion:* ${{ github.event.workflow_run.conclusion }}" 17 | env: 18 | SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | .coverage 4 | .coverage.* 5 | .cache 6 | .tox 7 | 8 | node_modules 9 | bouncer/static/scripts/bundle.js 10 | supervisord.log 11 | supervisord.pid 12 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | # A Black (https://github.com/psf/black)-compatible isort 2 | # (https://timothycrosley.github.io/isort/) config. Copy-pasted from Black's 3 | # README. 4 | [settings] 5 | multi_line_output=3 6 | include_trailing_comma=True 7 | force_grid_wrap=0 8 | use_parentheses=True 9 | line_length=88 10 | default_section=THIRDPARTY 11 | known_first_party=bouncer,tests 12 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11.7 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.11-alpine3.19 2 | MAINTAINER Hypothes.is Project and contributors 3 | 4 | # Install system build and runtime dependencies. 5 | RUN apk add --no-cache \ 6 | curl \ 7 | nodejs \ 8 | npm \ 9 | supervisor 10 | 11 | # Create the bouncer user, group, home directory and package directory. 12 | RUN addgroup -S bouncer \ 13 | && adduser -S -G bouncer -h /var/lib/bouncer bouncer 14 | WORKDIR /var/lib/bouncer 15 | 16 | # Copy packaging 17 | COPY README.md package.json requirements/requirements.txt ./ 18 | 19 | RUN npm install --production 20 | 21 | RUN pip3 install --no-cache-dir -U pip \ 22 | && pip3 install --no-cache-dir -r requirements.txt 23 | 24 | COPY . . 25 | 26 | # Start the web server by default 27 | EXPOSE 8000 28 | USER bouncer 29 | CMD ["bin/init-env", "supervisord", "-c" , "conf/supervisord.conf"] 30 | -------------------------------------------------------------------------------- /HACKING.md: -------------------------------------------------------------------------------- 1 | Changing the Project's Python Dependencies 2 | ------------------------------------------ 3 | 4 | ### To Add a New Dependency 5 | 6 | Add the dependency to the appropriate [`requirements/*.in`](requirements/) 7 | file(s) and then run: 8 | 9 | ```terminal 10 | make requirements 11 | ``` 12 | 13 | ### To Remove a Dependency 14 | 15 | Remove the dependency from the appropriate [`requirements/*.in`](requirements) 16 | file(s) and then run: 17 | 18 | ```terminal 19 | make requirements 20 | ``` 21 | 22 | ### To Upgrade or Downgrade a Dependency 23 | 24 | We rely on [Dependabot](https://github.com/dependabot) to keep all our 25 | dependencies up to date by sending automated pull requests to all our repos. 26 | But if you need to upgrade or downgrade a dependency manually you can do that 27 | locally. 28 | 29 | To upgrade a package to the latest version in all `requirements/*.txt` files: 30 | 31 | ```terminal 32 | make requirements --always-make args='--upgrade-package ' 33 | ``` 34 | 35 | To upgrade or downgrade a package to a specific version: 36 | 37 | ```terminal 38 | make requirements --always-make args='--upgrade-package ==' 39 | ``` 40 | 41 | To upgrade **all** dependencies to their latest versions: 42 | 43 | ```terminal 44 | make requirements --always-make args=--upgrade 45 | ``` 46 | 47 | 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Hypothes.is Project and contributors 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 16 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help 2 | help: 3 | @echo "make help Show this help message" 4 | @echo "make dev Run the app in the development server" 5 | @echo "make lint Run the code linter(s) and print any warnings" 6 | @echo "make format Correctly format the code" 7 | @echo "make checkformatting Crash if the code isn't correctly formatted" 8 | @echo "make test Run the unit tests" 9 | @echo "make functests Run the functional tests" 10 | @echo "make coverage Print the unit test coverage report" 11 | @echo "make sure Make sure that the formatter, linter, tests, etc all pass" 12 | @echo "make docker Make the app's Docker image" 13 | @echo "make run-docker Run the app's Docker image locally. " 14 | @echo " This command exists for conveniently testing the Docker image " 15 | @echo " locally in production mode. It assumes that h's Elasticsearch " 16 | @echo " service is being run using docker-compose in the 'h_default' " 17 | @echo " network." 18 | 19 | .PHONY: dev 20 | dev: node_modules/.uptodate python 21 | @tox -qe dev 22 | 23 | .PHONY: lint 24 | lint: backend-lint frontend-lint 25 | 26 | .PHONY: backend-lint 27 | backend-lint: python 28 | @tox -qe lint 29 | 30 | .PHONY: frontend-lint 31 | frontend-lint: node_modules/.uptodate 32 | @./node_modules/.bin/eslint bouncer/scripts 33 | 34 | .PHONY: format 35 | format: python 36 | @tox -qe format 37 | 38 | .PHONY: checkformatting 39 | checkformatting: python 40 | @tox -qe checkformatting 41 | 42 | .PHONY: test 43 | test: backend-test frontend-test 44 | 45 | .PHONY: functests 46 | functests: python 47 | @tox -qe functests 48 | 49 | .PHONY: backend-test 50 | backend-test: python 51 | @tox -q 52 | 53 | .PHONY: frontend-test 54 | frontend-test: node_modules/.uptodate 55 | @./node_modules/karma/bin/karma start karma.config.js 56 | 57 | .PHONY: coverage 58 | coverage: python 59 | @tox -qe coverage 60 | 61 | # Tell make how to compile requirements/*.txt files. 62 | # 63 | # `touch` is used to pre-create an empty requirements/%.txt file if none 64 | # exists, otherwise tox crashes. 65 | # 66 | # $(subst) is used because in the special case of making requirements.txt we 67 | # actually need to touch dev.txt not requirements.txt and we need to run 68 | # `tox -e dev ...` not `tox -e requirements ...` 69 | # 70 | # $(basename $(notdir $@))) gets just the environment name from the 71 | # requirements/%.txt filename, for example requirements/foo.txt -> foo. 72 | requirements/%.txt: requirements/%.in 73 | @touch -a $(subst requirements.txt,dev.txt,$@) 74 | @tox -qe $(subst requirements,dev,$(basename $(notdir $@))) --run-command 'pip --quiet --disable-pip-version-check install pip-tools' 75 | @tox -qe $(subst requirements,dev,$(basename $(notdir $@))) --run-command 'pip-compile --allow-unsafe --quiet $(args) $<' 76 | 77 | # Inform make of the dependencies between our requirements files so that it 78 | # knows what order to re-compile them in and knows to re-compile a file if a 79 | # file that it depends on has been changed. 80 | requirements/dev.txt: requirements/requirements.txt 81 | requirements/tests.txt: requirements/requirements.txt 82 | requirements/functests.txt: requirements/requirements.txt 83 | requirements/lint.txt: requirements/tests.txt requirements/functests.txt 84 | 85 | # Add a requirements target so you can just run `make requirements` to 86 | # re-compile *all* the requirements files at once. 87 | # 88 | # This needs to be able to re-create requirements/*.txt files that don't exist 89 | # yet or that have been deleted so it can't just depend on all the 90 | # requirements/*.txt files that exist on disk $(wildcard requirements/*.txt). 91 | # 92 | # Instead we generate the list of requirements/*.txt files by getting all the 93 | # requirements/*.in files from disk ($(wildcard requirements/*.in)) and replace 94 | # the .in's with .txt's. 95 | .PHONY: requirements requirements/ 96 | requirements requirements/: $(foreach file,$(wildcard requirements/*.in),$(basename $(file)).txt) 97 | 98 | .PHONY: sure 99 | sure: checkformatting lint test coverage functests 100 | 101 | .PHONY: docker 102 | docker: 103 | @git archive --format=tar.gz HEAD | docker build -t hypothesis/bouncer:$(DOCKER_TAG) - 104 | 105 | .PHONY: run-docker 106 | run-docker: 107 | @docker run \ 108 | --net h_default \ 109 | -e "ELASTICSEARCH_URL=http://elasticsearch:9200" \ 110 | -p 8000:8000 \ 111 | hypothesis/bouncer:$(DOCKER_TAG) 112 | 113 | .PHONY: python 114 | python: 115 | @./bin/install-python 116 | 117 | DOCKER_TAG = dev 118 | 119 | node_modules/.uptodate: package.json 120 | @echo installing javascript dependencies 121 | @node_modules/.bin/check-dependencies 2>/dev/null || npm install 122 | @touch $@ 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/hypothesis/bouncer/workflows/Continuous%20integration/badge.svg?branch=main)](https://github.com/hypothesis/bouncer/actions?query=branch%3Amain) 2 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) 3 | 4 | Hypothesis Direct-Link Bouncer Service 5 | ====================================== 6 | 7 | Installing bouncer in a development environment 8 | ----------------------------------------------- 9 | 10 | ### You will need 11 | 12 | * [Git](https://git-scm.com/) 13 | 14 | * [Node](https://nodejs.org/) and npm. 15 | On Linux you should follow 16 | [nodejs.org's instructions for installing node](https://nodejs.org/en/download/package-manager/) 17 | because the version of node in the standard Ubuntu package repositories is 18 | too old. 19 | On macOS you should use [Homebrew](https://brew.sh/) to install node. 20 | 21 | * [pyenv](https://github.com/pyenv/pyenv) 22 | Follow the instructions in the pyenv README to install it. 23 | The Homebrew method works best on macOS. 24 | 25 | ### Clone the Git repo 26 | 27 | git clone https://github.com/hypothesis/bouncer.git 28 | 29 | This will download the code into an `bouncer` directory in your current working 30 | directory. You need to be in the `bouncer` directory from the remainder of the 31 | installation process: 32 | 33 | cd bouncer 34 | 35 | ### Start the development server 36 | 37 | make dev 38 | 39 | The first time you run `make dev` it might take a while to start because it'll 40 | need to install the application dependencies and build the assets. 41 | 42 | This will start the server on port 8000 (http://localhost:8000), reload the 43 | application whenever changes are made to the source code, and restart it should 44 | it crash for some reason. 45 | 46 | **That's it!** You’ve finished setting up your bouncer development environment. Run 47 | `make help` to see all the commands that're available for running the tests, 48 | linting, code formatting, etc. 49 | 50 | Configuration 51 | ------------- 52 | 53 | You can set various environment variables to configure bouncer: 54 | 55 |
56 |
CHROME_EXTENSION_ID
57 |
The ID of the Hypothesis Chrome extension that bouncer will communicate with 58 | (default: the ID of the official Hypothesis Chrome extension)
59 | 60 |
DEBUG
61 |
If DEBUG is set (to any value) then tracebacks will be printed to the 62 | terminal for any unexpected Python exceptions. If there is no DEBUG 63 | variable set in the environment then unexpected Python exceptions will be 64 | reported to Sentry and a generic error page shown to the user.
65 | 66 |
ELASTICSEARCH_URL
67 |
The url (host and port) of the Elasticsearch server that bouncer will read 68 | annotations from (default: http://localhost:9200)
69 | 70 |
ELASTICSEARCH_INDEX
71 |
The name of the Elasticsearch index that bouncer will read annotations 72 | from (default: hypothesis)
73 | 74 |
HYPOTHESIS_AUTHORITY
75 |
The domain name of the Hypothesis service's first party authority. 76 | This is usually the same as the domain name of the Hypothesis service 77 | (default: localhost).
78 | 79 |
HYPOTHESIS_URL
80 |
The URL of the Hypothesis front page that requests to bouncer's front page 81 | will be redirected to (default: https://hypothes.is)
82 | 83 |
SENTRY_DSN
84 |
The DSN (Data Source Name) that bouncer will use to report crashes to 85 | Sentry
86 | 87 |
VIA_BASE_URL
88 |
The base URL of the Via service that bouncer will redirect users to if they 89 | don't have the Hypothesis Chrome extension installed 90 | (default: https://via.hypothes.is)
91 |
92 | 93 | Route Syntax/API 94 | ---------------- 95 | 96 | ### Share Annotations on Page/URL (`/go`) 97 | 98 | Go to a specified URL and display annotations there. Optionally filter which 99 | annotations are displayed. 100 | 101 | Querystring parameters: 102 | 103 | * `url` (required): URL of target page/document 104 | * `group` (optional): group ID. Show annotations within a specified group. 105 | * `q` (optional): Search query. Filter annotations at URL to those that match 106 | this search query. 107 | 108 | ### Share an Annotation (`/{id}` or `/{id}/{url}`) 109 | 110 | Go to an individual annotation, where `id` is the annotation's unique ID. 111 | 112 | Optional `url` path parameter: URL of the annotation's target document. 113 | This is intended to enhance the readability of shared annotation URLs and 114 | is functionally identical to the `/{id}` route. 115 | 116 | -------------------------------------------------------------------------------- /bin/init-env: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | export INSTANCE_ID=$(wget -O - -T 1 http://169.254.169.254/1.0/meta-data/instance-id 2>/dev/null || echo '') 6 | 7 | exec "$@" 8 | -------------------------------------------------------------------------------- /bin/install-python: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # 3 | # Install each required version of Python (from the .python-version file), 4 | # if it's not installed already. 5 | # 6 | # Also install tox in each pyenv copy of Python, if not installed already. 7 | # 8 | # Requirements 9 | # ============ 10 | # 11 | # * pyenv (https://github.com/pyenv/pyenv) to install versions of Python. 12 | # 13 | # Usage 14 | # ===== 15 | # 16 | # $ ./bin/install-python 17 | 18 | # Exit if we're running on GitHub Actions. 19 | # On GitHub Actions we just want to use the versions of Python provided in the 20 | # GitHub Actions VM. 21 | if [ "$GITHUB_ACTIONS" = "true" ] 22 | then 23 | exit 24 | fi 25 | 26 | # Exit if we're running on Jenkins. 27 | # On Jenkins we run the tests in Docker and we just want to use the versions of 28 | # Python provided in the Docker container. 29 | if [ -n "${JENKINS_URL+set}" ]; then 30 | exit 31 | fi 32 | 33 | # Loop over every $python_version in the .python-version file. 34 | while IFS= read -r python_version 35 | do 36 | # Install this version of Python in pyenv if it's not installed already. 37 | pyenv install --skip-existing "$python_version" 38 | 39 | # Install tox in this version of Python if it's not already installed. 40 | if ! "$(pyenv root)/versions/$python_version/bin/tox" --version > /dev/null 2>&1 41 | then 42 | "$(pyenv root)/versions/$python_version/bin/pip" install --quiet --disable-pip-version-check 'tox<4' > /dev/null 43 | pyenv rehash 44 | fi 45 | done < .python-version 46 | -------------------------------------------------------------------------------- /bin/logger: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | logger is a supervisord event listener program. 5 | 6 | It aggregates the output of multiple programs running in supervisor and 7 | reprints their output with the addition of a honcho-like prefix. This prefix 8 | helps to distinguish the output of different programs. 9 | 10 | Here's an example supervisor configuration file that uses logger: 11 | 12 | [supervisord] 13 | nodaemon=true 14 | environment=PYTHONUNBUFFERED="1" 15 | logfile=/dev/null 16 | logfile_maxbytes=0 17 | 18 | [program:web] 19 | command=gunicorn myproject:app 20 | stdout_logfile=NONE 21 | stderr_logfile=NONE 22 | stdout_events_enabled=true 23 | stderr_events_enabled=true 24 | 25 | [program:worker] 26 | command=celery -A myproject worker -l info 27 | stdout_logfile=NONE 28 | stderr_logfile=NONE 29 | stdout_events_enabled=true 30 | stderr_events_enabled=true 31 | 32 | [eventlistener:logger] 33 | command=logger 34 | buffer_size=100 35 | events=PROCESS_LOG 36 | stderr_logfile=/dev/fd/1 37 | stderr_logfile_maxbytes=0 38 | 39 | And here's an example of the output you might see from supervisord: 40 | 41 | 2017-01-24 17:25:02,903 INFO supervisord started with pid 15433 42 | 2017-01-24 17:25:03,907 INFO spawned: 'logger' with pid 15439 43 | 2017-01-24 17:25:03,910 INFO spawned: 'web' with pid 15440 44 | 2017-01-24 17:25:03,913 INFO spawned: 'worker' with pid 15441 45 | 2017-01-24 17:25:05,216 INFO success: logger entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 46 | 2017-01-24 17:25:05,217 INFO success: web entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 47 | 2017-01-24 17:25:05,217 INFO success: worker entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 48 | web (stderr) | 2017-01-24 17:25:04,203 [15440] [gunicorn.error:INFO] Starting gunicorn 19.6.0 49 | web (stderr) | 2017-01-24 17:25:04,205 [15440] [gunicorn.error:INFO] Listening at: http://127.0.0.1:5000 (15440) 50 | web (stderr) | 2017-01-24 17:25:04,206 [15440] [gunicorn.error:INFO] Using worker: sync 51 | web (stderr) | 2017-01-24 17:25:04,211 [15449] [gunicorn.error:INFO] Booting worker with pid: 15449 52 | worker | 53 | worker | -------------- celery@mahler.local v3.1.25 (Cipater) 54 | worker | ---- **** ----- 55 | worker | --- * *** * -- Darwin-16.3.0-x86_64-i386-64bit 56 | ... 57 | 58 | Note that in the configuration above we disable the logfiles for the 59 | individual programs and for the supervisor daemon itself. This isn't required 60 | but may be useful in containerised environments. 61 | 62 | By setting "stderr_logfile=/dev/fd/1" in the [eventlistener:logger] section, 63 | we redirect the aggregated output back to STDOUT (FD 1). You can also log the 64 | aggregated output to a single file. 65 | """ 66 | 67 | import sys 68 | 69 | WIDTH = 20 70 | 71 | 72 | def main(): 73 | while True: 74 | _write('READY\n') 75 | header = _parse_header(sys.stdin.readline()) 76 | payload = sys.stdin.read(int(header['len'])) 77 | 78 | # Only handle PROCESS_LOG_* events and just ACK anything else. 79 | if header['eventname'] == 'PROCESS_LOG_STDOUT': 80 | _log_payload(payload) 81 | elif header['eventname'] == 'PROCESS_LOG_STDERR': 82 | _log_payload(payload, err=True) 83 | 84 | _write('RESULT 2\nOK') 85 | 86 | 87 | def _write(s): 88 | sys.stdout.write(s) 89 | sys.stdout.flush() 90 | 91 | 92 | def _parse_header(data): 93 | return dict([x.split(':') for x in data.split()]) 94 | 95 | 96 | def _log_payload(payload, err=False): 97 | headerdata, data = payload.split('\n', 1) 98 | header = _parse_header(headerdata) 99 | name = header['processname'] 100 | if err: 101 | name += ' (stderr)' 102 | prefix = '{name:{width}} | '.format(name=name, width=WIDTH) 103 | for line in data.splitlines(): 104 | sys.stderr.write(prefix + line + '\n') 105 | sys.stderr.flush() 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /bouncer/__init__.py: -------------------------------------------------------------------------------- 1 | from bouncer._version import get_version 2 | 3 | __all__ = ("__version__",) 4 | __version__ = get_version() 5 | -------------------------------------------------------------------------------- /bouncer/_version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import datetime 4 | import subprocess 5 | from subprocess import DEVNULL # Python 3 6 | 7 | __all__ = ("get_version",) 8 | 9 | # git-archive substitution markers. When this file is written out by a `git 10 | # archive` command, these will be replaced by the short commit hash and the 11 | # commit date, respectively. 12 | VERSION_GIT_REF = "db2237bc" 13 | VERSION_GIT_DATE = "1748267343" 14 | 15 | # Fallback version in case we cannot derive the version. 16 | VERSION_UNKNOWN = "0+unknown" 17 | 18 | 19 | def fetch_git_ref(): 20 | ref = subprocess.check_output( 21 | ["git", "rev-parse", "--short", "HEAD"], stderr=DEVNULL 22 | ).strip() 23 | return ref.decode("utf-8") 24 | 25 | 26 | def fetch_git_date(ref): 27 | ts = subprocess.check_output(["git", "show", "-s", "--format=%ct", ref]) 28 | return datetime.datetime.fromtimestamp(int(ts)) 29 | 30 | 31 | def fetch_git_dirty(): 32 | dirty_tree = subprocess.call(["git", "diff-files", "--quiet"]) != 0 33 | dirty_index = ( 34 | subprocess.call(["git", "diff-index", "--quiet", "--cached", "HEAD"]) != 0 35 | ) 36 | return dirty_tree or dirty_index 37 | 38 | 39 | def git_version(): 40 | ref = fetch_git_ref() 41 | date = fetch_git_date(ref) 42 | dirty = fetch_git_dirty() 43 | return pep440_version(date, ref, dirty) 44 | 45 | 46 | def git_archive_version(): # pragma: nocover 47 | ref = VERSION_GIT_REF 48 | date = datetime.datetime.fromtimestamp(int(VERSION_GIT_DATE)) 49 | return pep440_version(date, ref) 50 | 51 | 52 | def pep440_version(date, ref, dirty=False): 53 | """Build a PEP440-compliant version number from the passed information.""" 54 | return "{date}+g{ref}{dirty}".format( 55 | date=date.strftime("%Y%m%d"), ref=ref, dirty=".dirty" if dirty else "" 56 | ) 57 | 58 | 59 | def get_version(): # pragma: nocover 60 | """Fetch the current application version.""" 61 | # First we try to retrieve the current application version from git. 62 | try: 63 | return git_version() 64 | except (subprocess.CalledProcessError, FileNotFoundError): 65 | pass 66 | 67 | # We are not in a git checkout or extracting the version from git failed, 68 | # so we attempt to read a version written into the header of this file by 69 | # `git archive`. 70 | if not VERSION_GIT_REF.startswith("$"): 71 | return git_archive_version() 72 | 73 | # If neither of these strategies work, we fall back to VERSION_UNKNOWN. 74 | return VERSION_UNKNOWN 75 | -------------------------------------------------------------------------------- /bouncer/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pyramid.config 5 | 6 | from bouncer._version import get_version 7 | 8 | 9 | def settings(): # pragma: nocover 10 | """ 11 | Return the app's configuration settings as a dict. 12 | 13 | Settings are read from environment variables and fall back to hardcoded 14 | defaults if those variables aren't defined. 15 | 16 | """ 17 | via_base_url = os.environ.get("VIA_BASE_URL", "https://via.hypothes.is") 18 | if via_base_url.endswith("/"): 19 | via_base_url = via_base_url[:-1] 20 | 21 | debug = "DEBUG" in os.environ 22 | 23 | extension_ids = os.environ.get( 24 | "CHROME_EXTENSION_ID", "bjfhmglciegochdpefhhlphglcehbmek" 25 | ) 26 | if extension_ids.strip().startswith("{"): 27 | extension_ids = json.loads(extension_ids) 28 | if not extension_ids.get("default"): 29 | raise Exception('CHROME_EXTENSION_ID map must have a "default" key') 30 | else: 31 | extension_ids = {"default": extension_ids} 32 | 33 | result = { 34 | "chrome_extension_id": extension_ids, 35 | "debug": debug, 36 | "elasticsearch_index": os.environ.get("ELASTICSEARCH_INDEX", "hypothesis"), 37 | "hypothesis_authority": os.environ.get("HYPOTHESIS_AUTHORITY", "localhost"), 38 | "hypothesis_url": os.environ.get("HYPOTHESIS_URL", "https://hypothes.is"), 39 | "via_base_url": via_base_url, 40 | } 41 | 42 | if "ELASTICSEARCH_URL" in os.environ: 43 | result["elasticsearch_url"] = os.environ["ELASTICSEARCH_URL"] 44 | return result 45 | 46 | 47 | def create_app(_=None, **_settings): # pragma: nocover 48 | """Configure and return the WSGI app.""" 49 | config = pyramid.config.Configurator(settings=settings()) 50 | config.add_static_view(name="static", path="static") 51 | config.include("pyramid_jinja2") 52 | config.registry.settings["jinja2.filters"] = { 53 | "static_path": "pyramid_jinja2.filters:static_path_filter", 54 | "static_url": "pyramid_jinja2.filters:static_url_filter", 55 | } 56 | config.include("bouncer.search") 57 | config.include("bouncer.views") 58 | 59 | # Enable Sentry's "Releases" feature, see: 60 | # https://docs.sentry.io/platforms/python/configuration/options/#release 61 | # 62 | # h_pyramid_sentry passes any h_pyramid_sentry.init.* Pyramid settings 63 | # through to sentry_sdk.init(), see: 64 | # https://github.com/hypothesis/h-pyramid-sentry?tab=readme-ov-file#settings 65 | # 66 | # For the full list of options that sentry_sdk.init() supports see: 67 | # https://docs.sentry.io/platforms/python/configuration/options/ 68 | config.add_settings( 69 | { 70 | "h_pyramid_sentry.init.release": get_version(), 71 | } 72 | ) 73 | config.include("h_pyramid_sentry") 74 | 75 | return config.make_wsgi_app() 76 | -------------------------------------------------------------------------------- /bouncer/embed_detector.py: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | import re 3 | from urllib.parse import urlparse 4 | 5 | # Hardcoded URL patterns where client is assumed to be embedded. 6 | # 7 | # Only the hostname and path are included in the pattern. The path must be 8 | # specified; use "example.com/*" to match all URLs on a particular domain. 9 | # 10 | # Patterns are shell-style wildcards ('*' matches any number of chars, '?' 11 | # matches a single char). 12 | PATTERNS = [ 13 | # Official Hypothesis websites 14 | "h.readthedocs.io/*", 15 | "web.hypothes.is/blog/*", 16 | # Unofficial Hypothesis-affiliated websites 17 | "docdrop.org/*", # See https://github.com/hypothesis/bouncer/issues/389 18 | # Publisher partners 19 | "psycnet.apa.org/fulltext/*", 20 | "awspntest.apa.org/fulltext/*", 21 | "*.semanticscholar.org/reader/*", # See https://hypothes-is.slack.com/archives/C04F8GLTT7U/p1674065065018549 22 | ] 23 | 24 | COMPILED_PATTERNS = [re.compile(fnmatch.translate(pat)) for pat in PATTERNS] 25 | 26 | 27 | def url_embeds_client(url): # pragma: nocover 28 | """ 29 | Test whether ``url`` is known to embed the client. 30 | 31 | This currently just tests the URL against the pattern list ``PATTERNS``. 32 | 33 | Only the hostname and path of the URL are tested. Returns false for non-HTTP 34 | URLs. 35 | 36 | :return: True if the URL matches a pattern. 37 | """ 38 | parsed_url = urlparse(url) 39 | if not parsed_url.scheme.startswith("http"): 40 | return False 41 | 42 | path = parsed_url.path 43 | if not path: 44 | path = "/" 45 | netloc_and_path = parsed_url.netloc + path 46 | 47 | for pat in COMPILED_PATTERNS: 48 | if pat.fullmatch(netloc_and_path): 49 | return True 50 | return False 51 | -------------------------------------------------------------------------------- /bouncer/scripts/redirect.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Configuration information for the client-side code that detects the best way 3 | * to route the user to a URL with Hypothesis activated and specified 4 | * annotations selected. 5 | * 6 | * This is rendered into Bouncer's interstitial page by the backend service. 7 | * 8 | * @typedef {Object} Settings 9 | * @prop {string} chromeExtensionId - ID of the Chrome extension that Bouncer 10 | * should check for in the user's browser. 11 | * @prop {string} extensionUrl - Original URL of the page plus a fragment that 12 | * triggers the extension to activate when the user visits the page. This is 13 | * also used in cases where the original URL embeds the client. 14 | * @prop {string|null} viaUrl - Proxy URL of `null` if the proxy cannot be used 15 | * to display this annotation in context. 16 | */ 17 | 18 | /** 19 | * Return the settings object that the server injected into the page. 20 | * 21 | * @return {Settings} 22 | */ 23 | function getSettings(document) { 24 | return JSON.parse( 25 | document.querySelector('script.js-bouncer-settings').textContent 26 | ); 27 | } 28 | 29 | /** Navigate the browser to the given URL. */ 30 | function defaultNavigateTo(url) { 31 | window.location.replace(url); 32 | } 33 | 34 | /** 35 | * Wrapper around `chrome.runtime.sendMessage` [1] which returns a Promise. 36 | * 37 | * [1] https://developer.chrome.com/docs/extensions/mv3/messaging/#external-webpage 38 | * 39 | * @param {string} extensionId 40 | * @param {object} data 41 | * @return {Promise} Promise that resolves with the result of the call returned 42 | * by the extension or rejects if an error was reported via `chrome.runtime.lastError`. 43 | */ 44 | function sendMessage(extensionId, data) { 45 | const chrome = window.chrome; 46 | return new Promise((resolve, reject) => { 47 | chrome.runtime.sendMessage(extensionId, data, result => { 48 | if (chrome.runtime.lastError) { 49 | reject(chrome.runtime.lastError); 50 | } else { 51 | resolve(result); 52 | } 53 | }); 54 | }); 55 | } 56 | 57 | /** 58 | * Navigate the browser to the requested annotation. 59 | * 60 | * If the browser is Chrome and our Chrome extension is installed then 61 | * navigate to the annotation's direct link for the Chrome extension. 62 | * If the Chrome extension isn't installed or the browser isn't Chrome then 63 | * navigate to the annotation's Via direct link. 64 | * 65 | * Returns a Promise which resolves after the navigation to the annotation's 66 | * URL has been initiated. 67 | * 68 | * @param {(url: string) => void} [navigateTo] - Test seam. Function that 69 | * performs a navigation by modifying `location.href`. 70 | * @param {Settings} [settings] - Test seam. Configuration for the extension 71 | * and redirect. 72 | */ 73 | export async function redirect( 74 | navigateTo = defaultNavigateTo, 75 | settings = getSettings(document) 76 | ) { 77 | // If the proxy cannot be used with this URL, send the user directly to the 78 | // original page. 79 | if (!settings.viaUrl) { 80 | navigateTo(settings.extensionUrl); 81 | return; 82 | } 83 | 84 | if (settings.alwaysUseVia) { 85 | navigateTo(settings.viaUrl); 86 | return; 87 | } 88 | 89 | const chrome = window.chrome; 90 | if (chrome && chrome.runtime && chrome.runtime.sendMessage) { 91 | // The user is using Chrome, redirect them to our Chrome extension if they 92 | // have it installed, via otherwise. 93 | try { 94 | const response = await sendMessage(settings.chromeExtensionId, { 95 | type: 'ping', 96 | queryFeatures: ['activate'], 97 | }); 98 | // The user has our Chrome extension installed :) 99 | if (response.features && response.features.includes('activate')) { 100 | // Extension supports "activate" API that will let it handle 101 | // redirection and activation. 102 | const parsedURL = new URL(settings.extensionUrl); 103 | const query = parsedURL.hash; 104 | parsedURL.hash = ''; 105 | const urlWithoutFragment = parsedURL.toString(); 106 | 107 | try { 108 | await sendMessage(settings.chromeExtensionId, { 109 | type: 'activate', 110 | url: urlWithoutFragment, 111 | query, 112 | }); 113 | } catch (err) { 114 | console.error('Failed to activate extension', err); 115 | } 116 | } else { 117 | // For older extensions, fall back to a normal client-side redirect. 118 | // The installed extension(s) will notice the URL fragment and 119 | // activate. The downside is that if the user has multiple builds 120 | // of the Hypothesis extension installed, it is unpredictable as 121 | // to which will activate first and "win" the race to inject. 122 | navigateTo(settings.extensionUrl); 123 | } 124 | } catch (err) { 125 | // The user doesn't have our Chrome extension installed, or we couldn't 126 | // connect to it. 127 | console.error(err); 128 | navigateTo(settings.viaUrl); 129 | } 130 | } else { 131 | // The user isn't using Chrome, just redirect them to Via. 132 | navigateTo(settings.viaUrl); 133 | } 134 | } 135 | 136 | if (!('__karma__' in window)) { 137 | // Check if in test environment 138 | redirect(); 139 | } 140 | -------------------------------------------------------------------------------- /bouncer/scripts/test/redirect-test.js: -------------------------------------------------------------------------------- 1 | import { redirect } from '../redirect.js'; 2 | 3 | describe('#redirect', () => { 4 | /** 5 | * Error message which a `chrome.runtime.sendMessage` request fails with 6 | * if the extension does not exist. This is reported via `chrome.runtime.lastError` 7 | * inside the `sendMessage` callback. 8 | */ 9 | const extensionConnectError = { 10 | message: 'Could not establish connection. Receiving end does not exist.', 11 | }; 12 | 13 | let settings; 14 | beforeEach(() => { 15 | window.chrome = undefined; 16 | settings = { 17 | chromeExtensionId: 'test-extension-id', 18 | extensionUrl: 19 | 'http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q', 20 | viaUrl: 21 | 'https://via.hypothes.is/http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q', 22 | }; 23 | sinon.stub(window.console, 'error'); 24 | }); 25 | 26 | afterEach(() => { 27 | window.console.error.restore(); 28 | }); 29 | 30 | it('reads settings from the page', () => { 31 | const settings = { 32 | chromeExtensionId: 'a-b-c', 33 | extensionUrl: 'https://example.org/#annotations:123', 34 | viaUrl: 'https://proxy.it/#annotations:123', 35 | }; 36 | const settingsEl = document.createElement('script'); 37 | settingsEl.type = 'application/json'; 38 | settingsEl.className = 'js-bouncer-settings'; 39 | settingsEl.textContent = JSON.stringify(settings); 40 | document.body.appendChild(settingsEl); 41 | const navigateTo = sinon.stub(); 42 | 43 | redirect(navigateTo); 44 | 45 | assert.isTrue(navigateTo.calledWith(settings.viaUrl)); 46 | }); 47 | 48 | [ 49 | // Browser is not Chrome 50 | undefined, 51 | 52 | // `chrome` global exists, but `runtime` property missing 53 | {}, 54 | 55 | // `chrome.runtime` exists, but `sendMessage` function is missing 56 | { runtime: {} }, 57 | ].forEach(chrome => { 58 | it('redirects to Via if `chrome.runtime.sendMessage` API not available', () => { 59 | // Some browsers define window.chrome but not chrome.runtime. 60 | window.chrome = chrome; 61 | const navigateTo = sinon.stub(); 62 | 63 | redirect(navigateTo, settings); 64 | 65 | assert.equal(navigateTo.calledOnce, true); 66 | assert.equal( 67 | navigateTo.calledWithExactly( 68 | 'https://via.hypothes.is/http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q' 69 | ), 70 | true 71 | ); 72 | }); 73 | }); 74 | 75 | it('sends "ping" request to extension', () => { 76 | window.chrome = { 77 | runtime: { 78 | sendMessage: sinon.stub(), 79 | }, 80 | }; 81 | 82 | redirect(() => {}, settings); 83 | 84 | sinon.assert.calledWith( 85 | window.chrome.runtime.sendMessage, 86 | 'test-extension-id', 87 | { 88 | type: 'ping', 89 | queryFeatures: ['activate'], 90 | }, 91 | sinon.match.func 92 | ); 93 | }); 94 | 95 | it('redirects to Via if "ping" request to extension fails', async () => { 96 | window.chrome = { 97 | runtime: { 98 | sendMessage: (id, message, callbackFunction) => { 99 | callbackFunction(); 100 | }, 101 | lastError: extensionConnectError, 102 | }, 103 | }; 104 | const navigateTo = sinon.stub(); 105 | 106 | await redirect(navigateTo, settings); 107 | 108 | sinon.assert.calledWith(console.error, window.chrome.runtime.lastError); 109 | assert.equal(navigateTo.calledOnce, true); 110 | assert.equal( 111 | navigateTo.calledWithExactly( 112 | 'https://via.hypothes.is/http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q' 113 | ), 114 | true 115 | ); 116 | }); 117 | 118 | it('redirects to extension if "ping" request succeeds and "activate" is not supported', async () => { 119 | window.chrome = { 120 | runtime: { 121 | sendMessage: (id, message, callbackFunction) => { 122 | callbackFunction({ type: 'pong' }); 123 | }, 124 | }, 125 | }; 126 | const navigateTo = sinon.stub(); 127 | 128 | await redirect(navigateTo, settings); 129 | 130 | assert.equal(navigateTo.calledOnce, true); 131 | assert.equal( 132 | navigateTo.calledWithExactly( 133 | 'http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q' 134 | ), 135 | true 136 | ); 137 | }); 138 | 139 | it('navigates to annotation URL using "activate" message to extension', async () => { 140 | window.chrome = { 141 | runtime: { 142 | sendMessage: sinon.spy((id, message, callbackFunction) => { 143 | callbackFunction({ type: 'pong', features: ['activate'] }); 144 | }), 145 | }, 146 | }; 147 | const navigateTo = sinon.stub(); 148 | 149 | await redirect(navigateTo, settings); 150 | 151 | sinon.assert.calledWith( 152 | window.chrome.runtime.sendMessage, 153 | 'test-extension-id', 154 | { 155 | type: 'activate', 156 | url: settings.extensionUrl.replace(/#.*$/, ''), 157 | query: new URL(settings.extensionUrl).hash, 158 | }, 159 | sinon.match.func 160 | ); 161 | }); 162 | 163 | it('redirects to original URL if no Via URL provided', () => { 164 | settings.viaUrl = null; 165 | const navigateTo = sinon.stub(); 166 | 167 | redirect(navigateTo, settings); 168 | 169 | assert.isTrue(navigateTo.calledOnce); 170 | assert.isTrue(navigateTo.calledWithExactly(settings.extensionUrl)); 171 | }); 172 | 173 | it('redirects to Via if `alwaysUseVia` is true', () => { 174 | settings.alwaysUseVia = true; 175 | const navigateTo = sinon.stub(); 176 | 177 | redirect(navigateTo, settings); 178 | 179 | assert.isTrue(navigateTo.calledOnce); 180 | assert.isTrue(navigateTo.calledWithExactly(settings.viaUrl)); 181 | }); 182 | }); 183 | -------------------------------------------------------------------------------- /bouncer/search.py: -------------------------------------------------------------------------------- 1 | from elasticsearch import Elasticsearch 2 | 3 | 4 | def get_client(settings): 5 | """Return a client for the Elasticsearch index.""" 6 | host = settings["elasticsearch_url"] 7 | kwargs = {} 8 | 9 | # nb. No AWS credentials here because we assume that if using AWS-managed 10 | # ES, the cluster lives inside a VPC. 11 | return Elasticsearch([host], **kwargs) 12 | 13 | 14 | def includeme(config): # pragma: nocover 15 | settings = config.registry.settings 16 | settings.setdefault("elasticsearch_url", "http://localhost:9200") 17 | 18 | config.registry["es.client"] = get_client(settings) 19 | config.add_request_method(lambda r: r.registry["es.client"], name="es", reify=True) 20 | -------------------------------------------------------------------------------- /bouncer/static/images/facebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/bouncer/static/images/facebook.png -------------------------------------------------------------------------------- /bouncer/static/images/hypothesis-icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Rectangle 2 Copy 18 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /bouncer/static/images/sad-annotation.svg: -------------------------------------------------------------------------------- 1 | 2 | 17 | 19 | 20 | 22 | image/svg+xml 23 | 25 | Slice 1 26 | 27 | 28 | 29 | 49 | 50 | Slice 1 52 | Created with Sketch. 54 | 56 | 59 | 64 | 71 | 76 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /bouncer/static/images/twitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/bouncer/static/images/twitter.png -------------------------------------------------------------------------------- /bouncer/static/styles/bouncer.css: -------------------------------------------------------------------------------- 1 | body { 2 | color: #7a7a7a; 3 | font-family: "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif; 4 | font-size: 14px; 5 | } 6 | 7 | p { 8 | margin: 0; 9 | } 10 | 11 | /* Vertically and horizontally center a block such as a
. */ 12 | .center { 13 | display: block; 14 | left: 50%; 15 | position: absolute; 16 | text-align: center; 17 | top: 50%; 18 | transform: translate(-50%, -50%); 19 | } 20 | 21 | .spinner__icon { 22 | height: 28px; 23 | padding-top: 2px; 24 | width: 24px; 25 | } 26 | .spinner__text { 27 | margin-top: 116px; 28 | } 29 | .spinner__stationary-ring { 30 | border: 3px solid #dbdbdb; 31 | border-radius: 50%; 32 | height: 74px; 33 | width: 74px; 34 | } 35 | .spinner__moving-ring { 36 | animation-duration: 1s; 37 | animation-fill-mode: forwards; 38 | animation-iteration-count: infinite; 39 | animation-name: moving-ring; 40 | animation-timing-function: linear; 41 | border-left: 3px solid #a6a6a6; 42 | border-radius: 100% 0 0 0; 43 | border-top: 3px solid #a6a6a6; 44 | height: 37px; 45 | transform: translate(-3px, -3px); 46 | transform-origin: bottom right; 47 | width: 37px; 48 | } 49 | @keyframes moving-ring { 50 | from { 51 | transform: translate(-3px, -3px) rotate(0deg); 52 | } 53 | to { 54 | transform: translate(-3px, -3px) rotate(359deg); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /bouncer/templates/annotation.html.jinja2: -------------------------------------------------------------------------------- 1 | {% extends "templates/base.html.jinja2" %} 2 | 3 | {% block content %} 4 |
5 | {% trans %}Loading annotation for {{ pretty_url }}{% endtrans %} 10 |
11 |
12 |

{% trans %}Loading annotation for {{ pretty_url }}{% endtrans %}

13 | {% endblock %} 14 | 15 | {% block scripts %} 16 | 17 | 20 | 23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /bouncer/templates/base.html.jinja2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {% if show_metadata == True %} 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | {% endif %} 21 | 22 | {{ title|safe }} 23 | 26 | 27 | 28 |
29 | {% block content %}{% endblock %} 30 |
31 | 32 | {% block scripts %}{% endblock %} 33 | 34 | 35 | -------------------------------------------------------------------------------- /bouncer/templates/error.html.jinja2: -------------------------------------------------------------------------------- 1 | {% extends "templates/base.html.jinja2" %} 2 | 3 | {% set title = message %} 4 | 5 | {% block content %} 6 | 7 |

8 | {{ message|safe }} 9 |

10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /bouncer/util.py: -------------------------------------------------------------------------------- 1 | from urllib import parse 2 | 3 | from markupsafe import Markup 4 | from pyramid import i18n 5 | 6 | _ = i18n.TranslationStringFactory(__package__) 7 | 8 | 9 | #: The maximum length that the "netloc" (the www.example.com part in 10 | #: http://www.example.com/example) can be in the pretty URL that is displayed 11 | #: to the user before it gets truncated. 12 | NETLOC_MAX_LENGTH = 30 13 | 14 | #: The metadata we are populating has fields that play the roles of Title and 15 | #: Description on Twitter and Facebook share cards. We map the annotation's `quote` 16 | #: field to Title. If we lack a quote we try to form one from `document_uri` 17 | #: If that fails, we fall back to this minimal version. 18 | ANNOTATION_BOILERPLATE_QUOTE = _("Hypothesis annotation") 19 | 20 | #: We map the annotation's `text` field to Description. If it's empty, we fall back 21 | #: to this minimal version. 22 | ANNOTATION_BOILERPLATE_TEXT = _("Follow this link to see the annotation in context") 23 | 24 | 25 | class DeletedAnnotationError(Exception): 26 | """Raised if an annotation has been marked as deleted in Elasticsearch.""" 27 | 28 | 29 | class InvalidAnnotationError(Exception): 30 | """Raised if an annotation from Elasticsearch can't be parsed.""" 31 | 32 | def __init__(self, message, reason): 33 | """ 34 | Return a new InvalidAnnotationError instance. 35 | 36 | :param message: a user-friendly error message 37 | :type message: string 38 | 39 | :param reason: a computer-friendly unique string identifying the reason 40 | the exception was raised 41 | :type reason: string 42 | 43 | """ 44 | self.message = message 45 | self.reason = reason 46 | 47 | def __str__(self): 48 | return self.message 49 | 50 | 51 | def parse_document(document): 52 | """ 53 | Return the annotation ID, annotated document's URI, and shared 54 | status from the given Elasticsearch annotation document. 55 | 56 | Also return annotation quote (if available, else empty) and text 57 | to enhance the share card. 58 | 59 | Tools for checking how FB and Twitter display share metadata: 60 | https://developers.facebook.com/tools/debug/sharing/ 61 | https://cards-dev.twitter.com/validator 62 | 63 | :param document: the Elasticsearch annotation document to parse 64 | :type document: dict 65 | 66 | :returns: A dict with extracted metadata properties 67 | 68 | """ 69 | # We assume that Elasticsearch documents always have "_id" and "_source". 70 | annotation_id = document["_id"] 71 | annotation = document["_source"] 72 | 73 | if document["_source"].get("deleted", False) is True: 74 | raise DeletedAnnotationError() 75 | 76 | authority = annotation["authority"] 77 | 78 | # If an annotation isn't deleted then we assume that it always has "group" 79 | # and "shared". 80 | group = annotation["group"] 81 | is_shared = annotation["shared"] is True 82 | 83 | show_metadata = is_shared and group == "__world__" 84 | 85 | document_uri = None 86 | 87 | # This will fill the Title slot in Twitter/OG metadata 88 | quote = None 89 | 90 | # This will fill the Description slot in Twitter/OG metadata 91 | text = annotation.get("text") 92 | if not text: 93 | text = ANNOTATION_BOILERPLATE_TEXT 94 | 95 | has_media_time = False 96 | 97 | try: 98 | targets = annotation["target"] 99 | if targets: # pragma: nocover 100 | document_uri = targets[0]["source"] 101 | selectors = targets[0].get("selector", []) 102 | for selector in selectors: 103 | match selector.get("type"): 104 | case "TextQuoteSelector": 105 | quote = selector.get("exact") 106 | case "MediaTimeSelector": 107 | has_media_time = True 108 | except KeyError: 109 | pass 110 | 111 | # If the annotation has no selectors, quote is still None so apply boilerplate 112 | if quote is None: 113 | quote = get_boilerplate_quote(document_uri) 114 | 115 | if isinstance(document_uri, str) and document_uri.startswith("urn:x-pdf:"): 116 | try: 117 | web_uri = annotation["document"]["web_uri"] 118 | if web_uri: # pragma: nocover 119 | document_uri = web_uri 120 | except KeyError: # pragma: nocover 121 | pass 122 | 123 | if document_uri is None: 124 | raise InvalidAnnotationError( 125 | _("The annotation has no URI"), "annotation_has_no_uri" 126 | ) 127 | 128 | if not isinstance(document_uri, str): 129 | raise InvalidAnnotationError( 130 | _("The annotation has an invalid document URI"), "uri_not_a_string" 131 | ) 132 | 133 | return { 134 | "authority": authority, 135 | "annotation_id": annotation_id, 136 | "document_uri": document_uri, 137 | "show_metadata": show_metadata, 138 | "quote": _escape_quotes(quote), 139 | "text": _escape_quotes(text), 140 | "has_media_time": has_media_time, 141 | } 142 | 143 | 144 | def get_pretty_url(url): 145 | """ 146 | Return the domain name from `url` for display. 147 | """ 148 | try: 149 | parsed_url = parse.urlparse(url) 150 | except (AttributeError, ValueError): 151 | return None 152 | 153 | pretty_url = parsed_url.netloc[:NETLOC_MAX_LENGTH] 154 | if len(parsed_url.netloc) > NETLOC_MAX_LENGTH: 155 | pretty_url += Markup("…") 156 | return pretty_url 157 | 158 | 159 | def get_boilerplate_quote(document_uri): 160 | pretty_url = get_pretty_url(document_uri) 161 | if pretty_url: 162 | return _("Hypothesis annotation for {site}".format(site=pretty_url)) 163 | else: 164 | return ANNOTATION_BOILERPLATE_QUOTE 165 | 166 | 167 | def _escape_quotes(string): 168 | return string.replace('"', "\u0022").replace("'", "\u0027") 169 | -------------------------------------------------------------------------------- /bouncer/views.py: -------------------------------------------------------------------------------- 1 | import json 2 | from urllib import parse 3 | 4 | import h_pyramid_sentry 5 | from elasticsearch import Elasticsearch, exceptions 6 | from pyramid import httpexceptions, i18n, view 7 | from pyramid.httpexceptions import HTTPNoContent 8 | from sentry_sdk import capture_message 9 | 10 | from bouncer import util 11 | from bouncer.embed_detector import url_embeds_client 12 | 13 | _ = i18n.TranslationStringFactory(__package__) 14 | 15 | 16 | class FailedHealthcheck(Exception): 17 | """An exception raised when the healthcheck fails.""" 18 | 19 | 20 | @view.view_defaults(renderer="bouncer:templates/annotation.html.jinja2") 21 | class AnnotationController(object): 22 | def __init__(self, request): 23 | self.request = request 24 | 25 | @view.view_config(route_name="annotation_with_url") 26 | @view.view_config(route_name="annotation_without_url") 27 | def annotation(self): 28 | settings = self.request.registry.settings 29 | 30 | try: 31 | es7_or_later = _es_server_version(self.request.es) >= 7 32 | document = self.request.es.get( 33 | index=settings["elasticsearch_index"], 34 | # Set `doc_type` to the name of the mapping type used by h 35 | # when talking to an ES 6 server, or the endpoint name "_doc" 36 | # in ES 7+. 37 | # 38 | # See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/removal-of-types.html 39 | doc_type="_doc" if es7_or_later else "annotation", 40 | id=self.request.matchdict["id"], 41 | ) 42 | except exceptions.NotFoundError: 43 | raise httpexceptions.HTTPNotFound(_("Annotation not found")) 44 | 45 | try: 46 | parsed_document = util.parse_document(document) 47 | authority = parsed_document["authority"] 48 | annotation_id = parsed_document["annotation_id"] 49 | document_uri = parsed_document["document_uri"] 50 | show_metadata = parsed_document["show_metadata"] 51 | quote = parsed_document["quote"] 52 | text = parsed_document["text"] 53 | has_media_time = parsed_document["has_media_time"] 54 | 55 | except util.DeletedAnnotationError: 56 | raise httpexceptions.HTTPNotFound(_("Annotation not found")) 57 | 58 | except util.InvalidAnnotationError as exc: 59 | raise httpexceptions.HTTPUnprocessableEntity(str(exc)) 60 | 61 | # Remove any existing #fragment identifier from the URI before we 62 | # append our own. 63 | document_uri = parse.urldefrag(document_uri)[0] 64 | 65 | if not _is_valid_http_url(document_uri): 66 | raise httpexceptions.HTTPUnprocessableEntity( 67 | _( 68 | "Sorry, but it looks like this annotation was made on a " 69 | "document that is not publicly available." 70 | ) 71 | ) 72 | 73 | via_url = None 74 | if _can_use_proxy(settings, authority=authority) and not url_embeds_client( 75 | document_uri 76 | ): 77 | via_url = "{via_base_url}/{uri}#annotations:{id}".format( 78 | via_base_url=settings["via_base_url"], 79 | uri=document_uri, 80 | id=annotation_id, 81 | ) 82 | 83 | extension_url = "{uri}#annotations:{id}".format( 84 | uri=document_uri, id=annotation_id 85 | ) 86 | 87 | pretty_url = util.get_pretty_url(document_uri) 88 | 89 | title = util.get_boilerplate_quote(document_uri) 90 | 91 | default_extension = settings["chrome_extension_id"]["default"] 92 | extension_id = settings["chrome_extension_id"].get(authority, default_extension) 93 | 94 | # If a YouTube annotation has a media time associated, this means it 95 | # was made using Via's transcript annotation tool. 96 | # 97 | # This means we force the use of Via, even if the extension is 98 | # installed. 99 | always_use_via = False 100 | if document_uri.startswith("https://www.youtube.com") and has_media_time: 101 | always_use_via = True 102 | 103 | return { 104 | "data": json.dumps( 105 | { 106 | # Warning: variable names change from python_style to 107 | # javaScriptStyle here! 108 | "alwaysUseVia": always_use_via, 109 | "chromeExtensionId": extension_id, 110 | "extensionUrl": extension_url, 111 | "viaUrl": via_url, 112 | } 113 | ), 114 | "show_metadata": show_metadata, 115 | "pretty_url": pretty_url, 116 | "quote": quote, 117 | "text": text, 118 | "title": title, 119 | } 120 | 121 | 122 | def _es_server_version(es: Elasticsearch) -> int: 123 | """Return the major version of the Elasticsearch server.""" 124 | server_version = es.info()["version"]["number"] 125 | major, *other = server_version.split(".") 126 | return int(major) 127 | 128 | 129 | @view.view_config(renderer="bouncer:templates/index.html.jinja2", route_name="index") 130 | def index(request): # pragma: nocover 131 | raise httpexceptions.HTTPFound(location=request.registry.settings["hypothesis_url"]) 132 | 133 | 134 | @view.view_config( 135 | renderer="bouncer:templates/annotation.html.jinja2", route_name="goto_url" 136 | ) 137 | def goto_url(request): 138 | """ 139 | Redirect the user to a specified URL with the annotation client layer 140 | activated. This provides a URL-sharing mechanism. 141 | 142 | Optional querystring parameters can refine the behavior of the annotation 143 | client at the target url by identifying: 144 | 145 | * "group" - a group to focus; OR 146 | * "q" a query to populate the client search with 147 | """ 148 | settings = request.registry.settings 149 | url = request.params.get("url") 150 | 151 | if url is None: 152 | raise httpexceptions.HTTPBadRequest('"url" parameter is missing') 153 | 154 | if not _is_valid_http_url(url): 155 | raise httpexceptions.HTTPBadRequest( 156 | _( 157 | "Sorry, but this service can only show annotations on " 158 | "valid HTTP or HTTPs URLs." 159 | ) 160 | ) 161 | 162 | # Remove any existing #fragment identifier from the URI before we 163 | # append our own. 164 | url = parse.urldefrag(url)[0] 165 | 166 | group = request.params.get("group", "") 167 | query = parse.quote(request.params.get("q", "")) 168 | 169 | # Translate any refining querystring parameters into a URL fragment 170 | # syntax understood by the client 171 | fragment = "annotations:" 172 | 173 | # group will supersede query (q) if both are present 174 | if group: 175 | # focus a specific group in the client 176 | fragment = fragment + "group:{group}".format(group=group) 177 | else: 178 | # populate the client search with a query 179 | fragment = fragment + "query:{query}".format(query=query) 180 | 181 | if not url_embeds_client(url): 182 | via_url = "{via_base_url}/{url}#{fragment}".format( 183 | via_base_url=settings["via_base_url"], url=url, fragment=fragment 184 | ) 185 | else: 186 | via_url = None 187 | 188 | extension_url = "{url}#{fragment}".format(url=url, fragment=fragment) 189 | 190 | pretty_url = util.get_pretty_url(url) 191 | 192 | return { 193 | "data": json.dumps( 194 | { 195 | # nb. We always use the default extension ID here, because we 196 | # don't have an annotation to determine the authority. 197 | "chromeExtensionId": settings["chrome_extension_id"]["default"], 198 | "viaUrl": via_url, 199 | "extensionUrl": extension_url, 200 | } 201 | ), 202 | "pretty_url": pretty_url, 203 | } 204 | 205 | 206 | @view.view_config(route_name="crash") 207 | def crash(request): # pragma: nocover 208 | """Crash if requested to for testing purposes.""" 209 | 210 | # Ensure that no conceivable accident could cause this to be triggered 211 | if request.params.get("cid", "") == "a751bb01": 212 | raise ValueError("Something has gone wrong") 213 | 214 | return HTTPNoContent() 215 | 216 | 217 | @view.view_defaults(renderer="bouncer:templates/error.html.jinja2") 218 | class ErrorController(object): 219 | def __init__(self, exc, request): 220 | self.exc = exc 221 | self.request = request 222 | 223 | @view.view_config(context=httpexceptions.HTTPError) 224 | @view.view_config(context=httpexceptions.HTTPServerError) 225 | def httperror(self): 226 | self.request.response.status_int = self.exc.status_int 227 | # If code raises an HTTPError or HTTPServerError we assume this was 228 | # deliberately raised and: 229 | # 1. Show the user an error page including specific error message 230 | # 2. _Do not_ report the error to Sentry. 231 | return {"message": str(self.exc)} 232 | 233 | @view.view_config(context=Exception) 234 | def error(self): 235 | # If code raises a non-HTTPException exception we assume it was a bug 236 | # and: 237 | # 1. Show the user a generic error page 238 | # 2. Report the details of the error to Sentry. 239 | self.request.response.status_int = 500 240 | 241 | h_pyramid_sentry.report_exception() 242 | 243 | # In debug mode re-raise exceptions so that they get printed in the 244 | # terminal. 245 | if self.request.registry.settings["debug"]: 246 | raise 247 | 248 | return { 249 | "message": _( 250 | "Sorry, but something went wrong with the link. " 251 | "The issue has been reported and we'll try to " 252 | "fix it." 253 | ) 254 | } 255 | 256 | 257 | @view.view_config(route_name="healthcheck", renderer="json", http_cache=0) 258 | def healthcheck(request): 259 | index = request.registry.settings["elasticsearch_index"] 260 | try: 261 | status = request.es.cluster.health(index=index)["status"] 262 | except exceptions.ElasticsearchException as exc: 263 | raise FailedHealthcheck("elasticsearch exception") from exc 264 | 265 | if status not in ("yellow", "green"): 266 | raise FailedHealthcheck("cluster status was {!r}".format(status)) 267 | 268 | if "sentry" in request.params: 269 | capture_message("Test message from the healthcheck() view") 270 | 271 | return {"status": "okay"} 272 | 273 | 274 | def _is_valid_http_url(url): 275 | """ 276 | Return `True` if `url` is a valid HTTP or HTTPS URL. 277 | 278 | Parsing is currently very lenient as the URL only has to be accepted by 279 | `urlparse()`. 280 | """ 281 | try: 282 | parsed_url = parse.urlparse(url) 283 | return parsed_url.scheme == "http" or parsed_url.scheme == "https" 284 | except Exception: 285 | return False 286 | 287 | 288 | def _can_use_proxy(settings, authority): 289 | """ 290 | Return `True` if an annotation can be shown via the proxy service. 291 | 292 | This currently only considers the authority but in future it could also 293 | incorporate checks for whether the target page embeds Hypothesis. 294 | 295 | :param settings: App settings dict 296 | :param authority: Authority of annotation's user 297 | """ 298 | 299 | # The proxy service can only be used with pages that use first party 300 | # accounts, because third-party accounts require the host page to supply 301 | # login information to the client, which in turn relies on the user's cookie 302 | # session and therefore does not work properly through the proxy. 303 | return settings["hypothesis_authority"] == authority 304 | 305 | 306 | def includeme(config): # pragma: nocover 307 | config.add_route("index", "/") 308 | config.add_route("healthcheck", "/_status") 309 | config.add_route("crash", "/_crash") 310 | config.add_route("goto_url", "/go") 311 | config.add_route("annotation_with_url", "/{id}/*url") 312 | config.add_route("annotation_without_url", "/{id}") 313 | config.scan(__name__) 314 | -------------------------------------------------------------------------------- /conf/development.ini: -------------------------------------------------------------------------------- 1 | [app:main] 2 | use = call:bouncer.app:create_app 3 | debug = true 4 | 5 | [loggers] 6 | keys = root, bouncer 7 | 8 | [handlers] 9 | keys = console 10 | 11 | [formatters] 12 | keys = generic 13 | 14 | [logger_root] 15 | level = INFO 16 | handlers = console 17 | 18 | [logger_bouncer] 19 | level = DEBUG 20 | handlers = 21 | qualname = bouncer 22 | 23 | [handler_console] 24 | class = StreamHandler 25 | args = (sys.stderr,) 26 | level = NOTSET 27 | formatter = generic 28 | 29 | [formatter_generic] 30 | format = %(asctime)s %(levelname)-5.5s [%(name)s:%(lineno)s][%(threadName)s] %(message)s 31 | -------------------------------------------------------------------------------- /conf/gunicorn-dev.conf.py: -------------------------------------------------------------------------------- 1 | bind = "0.0.0.0:8000" 2 | reload = True 3 | reload_extra_files = "bouncer/templates" 4 | timeout = 0 5 | -------------------------------------------------------------------------------- /conf/gunicorn.conf.py: -------------------------------------------------------------------------------- 1 | bind = "0.0.0.0:8000" 2 | worker_tmp_dir = "/dev/shm" 3 | -------------------------------------------------------------------------------- /conf/production.ini: -------------------------------------------------------------------------------- 1 | [pipeline:main] 2 | pipeline: 3 | proxy-prefix 4 | bouncer 5 | 6 | [app:bouncer] 7 | use = call:bouncer.app:create_app 8 | 9 | [filter:proxy-prefix] 10 | use: egg:PasteDeploy#prefix 11 | 12 | [loggers] 13 | keys = root, bouncer, alembic 14 | 15 | [handlers] 16 | keys = console 17 | 18 | [formatters] 19 | keys = generic 20 | 21 | [logger_root] 22 | level = INFO 23 | handlers = console 24 | 25 | [logger_bouncer] 26 | level = DEBUG 27 | handlers = 28 | qualname = bouncer 29 | 30 | [logger_alembic] 31 | level = INFO 32 | handlers = 33 | qualname = alembic 34 | 35 | [handler_console] 36 | class = StreamHandler 37 | args = (sys.stderr,) 38 | level = NOTSET 39 | formatter = generic 40 | 41 | [formatter_generic] 42 | format = %(asctime)s %(levelname)-5.5s [%(name)s:%(lineno)s][%(threadName)s] %(message)s 43 | -------------------------------------------------------------------------------- /conf/supervisord-dev.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | silent=true 4 | 5 | [program:web] 6 | command=newrelic-admin run-program gunicorn --paste conf/development.ini --config conf/gunicorn-dev.conf.py 7 | stdout_events_enabled=true 8 | stderr_events_enabled=true 9 | stopsignal=KILL 10 | stopasgroup=true 11 | 12 | [eventlistener:logger] 13 | command=bin/logger --dev 14 | buffer_size=100 15 | events=PROCESS_LOG 16 | stderr_logfile=/dev/fd/1 17 | stderr_logfile_maxbytes=0 18 | stdout_logfile=/dev/null 19 | 20 | [unix_http_server] 21 | file = .supervisor.sock 22 | 23 | [rpcinterface:supervisor] 24 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 25 | 26 | [supervisorctl] 27 | serverurl = unix://.supervisor.sock 28 | prompt = bouncer 29 | -------------------------------------------------------------------------------- /conf/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | environment=PYTHONUNBUFFERED="1" 4 | logfile=/dev/null 5 | logfile_maxbytes=0 6 | 7 | [program:web] 8 | command=newrelic-admin run-program gunicorn --paste conf/production.ini --config conf/gunicorn.conf.py 9 | stdout_logfile=NONE 10 | stderr_logfile=NONE 11 | stdout_events_enabled=true 12 | stderr_events_enabled=true 13 | 14 | [eventlistener:logger] 15 | command=bin/logger 16 | buffer_size=100 17 | events=PROCESS_LOG 18 | stderr_logfile=/dev/fd/1 19 | stderr_logfile_maxbytes=0 20 | -------------------------------------------------------------------------------- /karma.config.js: -------------------------------------------------------------------------------- 1 | module.exports = function (config) { 2 | config.set({ 3 | basePath: '', 4 | frameworks: ['chai', 'mocha', 'sinon'], 5 | files: [ 6 | { pattern: 'bouncer/scripts/*.js', type: 'module', included: false }, 7 | { pattern: 'bouncer/**/*-test.js', type: 'module' }, 8 | ], 9 | reporters: ['progress'], 10 | port: 9876, 11 | colors: true, 12 | logLevel: config.LOG_INFO, 13 | autoWatch: false, 14 | browsers: ['ChromeHeadless'], 15 | singleRun: true, 16 | concurrency: Infinity 17 | }); 18 | }; 19 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "chai": "^4.2.0", 4 | "eslint": "^8.38.0", 5 | "eslint-config-hypothesis": "^2.6.0", 6 | "eslint-plugin-mocha": "^10.5.0", 7 | "karma": "^6.3.16", 8 | "karma-chai": "^0.1.0", 9 | "karma-chrome-launcher": "^3.1.0", 10 | "karma-mocha": "^2.0.1", 11 | "karma-sinon": "^1.0.4", 12 | "mocha": "^11.1.0", 13 | "sinon": "^7.5.0" 14 | }, 15 | "browserslist": "chrome 70, firefox 70, safari 11", 16 | "license": "BSD-2-Clause", 17 | "repository": { 18 | "type": "git", 19 | "url": "https://github.com/hypothesis/bouncer.git" 20 | }, 21 | "prettier": { 22 | "arrowParens": "avoid", 23 | "singleQuote": true, 24 | "importOrder": [ 25 | "^[./]" 26 | ], 27 | "importOrderSeparation": true 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pytest.ini_options] 2 | addopts = "-q" 3 | filterwarnings = [ 4 | "error", # Fail the tests if there are any warnings. 5 | "ignore:^find_module\\(\\) is deprecated and slated for removal in Python 3.12; use find_spec\\(\\) instead$:DeprecationWarning:importlib", 6 | "ignore:^FileFinder.find_loader\\(\\) is deprecated and slated for removal in Python 3.12; use find_spec\\(\\) instead$:DeprecationWarning:importlib", 7 | "ignore:^pkg_resources is deprecated as an API:DeprecationWarning:pkg_resources", 8 | "ignore:^Deprecated call to .pkg_resources\\.declare_namespace\\('.*'\\).\\.:DeprecationWarning:pkg_resources", 9 | "ignore:^pkg_resources is deprecated as an API.:DeprecationWarning:pyramid", 10 | # https://github.com/webpy/webpy/issues/732 11 | "ignore:^\\'cgi\\' is deprecated and slated for removal in Python 3\\.13:DeprecationWarning:webob", 12 | 13 | ] 14 | -------------------------------------------------------------------------------- /requirements/checkformatting.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pip-sync-faster 3 | black 4 | isort 5 | -------------------------------------------------------------------------------- /requirements/checkformatting.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/checkformatting.in 6 | # 7 | black==25.1.0 8 | # via -r checkformatting.in 9 | build==1.0.3 10 | # via pip-tools 11 | click==8.1.7 12 | # via 13 | # black 14 | # pip-tools 15 | importlib-metadata==7.0.1 16 | # via pip-sync-faster 17 | isort==6.0.1 18 | # via -r checkformatting.in 19 | mypy-extensions==1.0.0 20 | # via black 21 | packaging==23.2 22 | # via 23 | # black 24 | # build 25 | pathspec==0.12.1 26 | # via black 27 | pip-sync-faster==0.0.5 28 | # via -r checkformatting.in 29 | pip-tools==7.4.1 30 | # via 31 | # -r checkformatting.in 32 | # pip-sync-faster 33 | platformdirs==4.1.0 34 | # via black 35 | pyproject-hooks==1.0.0 36 | # via 37 | # build 38 | # pip-tools 39 | wheel==0.42.0 40 | # via pip-tools 41 | zipp==3.19.1 42 | # via importlib-metadata 43 | 44 | # The following packages are considered to be unsafe in a requirements file: 45 | pip==23.3.2 46 | # via pip-tools 47 | setuptools==78.1.1 48 | # via pip-tools 49 | -------------------------------------------------------------------------------- /requirements/coverage.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pip-sync-faster 3 | coverage 4 | -------------------------------------------------------------------------------- /requirements/coverage.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/coverage.in 6 | # 7 | build==1.0.3 8 | # via pip-tools 9 | click==8.1.7 10 | # via pip-tools 11 | coverage==7.8.0 12 | # via -r coverage.in 13 | importlib-metadata==7.0.1 14 | # via pip-sync-faster 15 | packaging==23.2 16 | # via build 17 | pip-sync-faster==0.0.5 18 | # via -r coverage.in 19 | pip-tools==7.4.1 20 | # via 21 | # -r coverage.in 22 | # pip-sync-faster 23 | pyproject-hooks==1.0.0 24 | # via 25 | # build 26 | # pip-tools 27 | wheel==0.42.0 28 | # via pip-tools 29 | zipp==3.19.1 30 | # via importlib-metadata 31 | 32 | # The following packages are considered to be unsafe in a requirements file: 33 | pip==23.3.2 34 | # via pip-tools 35 | setuptools==78.1.1 36 | # via pip-tools 37 | -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pip-sync-faster 3 | ipython 4 | ipdb 5 | supervisor 6 | -r requirements.txt 7 | -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/dev.in 6 | # 7 | asttokens==2.4.1 8 | # via stack-data 9 | build==1.0.3 10 | # via pip-tools 11 | certifi==2025.4.26 12 | # via 13 | # -r requirements.txt 14 | # requests 15 | # sentry-sdk 16 | charset-normalizer==3.3.2 17 | # via 18 | # -r requirements.txt 19 | # requests 20 | click==8.1.7 21 | # via pip-tools 22 | decorator==5.1.1 23 | # via 24 | # ipdb 25 | # ipython 26 | elasticsearch==6.3.1 27 | # via -r requirements.txt 28 | executing==2.0.1 29 | # via stack-data 30 | gunicorn==23.0.0 31 | # via -r requirements.txt 32 | h-pyramid-sentry==1.2.4 33 | # via -r requirements.txt 34 | hupper==1.12 35 | # via 36 | # -r requirements.txt 37 | # pyramid 38 | idna==3.7 39 | # via 40 | # -r requirements.txt 41 | # requests 42 | importlib-metadata==7.0.1 43 | # via pip-sync-faster 44 | ipdb==0.13.13 45 | # via -r dev.in 46 | ipython==9.2.0 47 | # via 48 | # -r dev.in 49 | # ipdb 50 | ipython-pygments-lexers==1.1.1 51 | # via ipython 52 | jedi==0.19.1 53 | # via ipython 54 | jinja2==3.1.6 55 | # via 56 | # -r requirements.txt 57 | # pyramid-jinja2 58 | markupsafe==2.1.3 59 | # via 60 | # -r requirements.txt 61 | # jinja2 62 | # pyramid-jinja2 63 | matplotlib-inline==0.1.6 64 | # via ipython 65 | newrelic==10.10.0 66 | # via -r requirements.txt 67 | packaging==23.2 68 | # via 69 | # -r requirements.txt 70 | # build 71 | # gunicorn 72 | parso==0.8.3 73 | # via jedi 74 | pastedeploy==3.1.0 75 | # via 76 | # -r requirements.txt 77 | # plaster-pastedeploy 78 | pexpect==4.9.0 79 | # via ipython 80 | pip-sync-faster==0.0.5 81 | # via -r dev.in 82 | pip-tools==7.4.1 83 | # via 84 | # -r dev.in 85 | # pip-sync-faster 86 | plaster==1.1.2 87 | # via 88 | # -r requirements.txt 89 | # plaster-pastedeploy 90 | # pyramid 91 | plaster-pastedeploy==1.0.1 92 | # via 93 | # -r requirements.txt 94 | # pyramid 95 | prompt-toolkit==3.0.43 96 | # via ipython 97 | ptyprocess==0.7.0 98 | # via pexpect 99 | pure-eval==0.2.2 100 | # via stack-data 101 | pygments==2.17.2 102 | # via 103 | # ipython 104 | # ipython-pygments-lexers 105 | pyproject-hooks==1.0.0 106 | # via 107 | # build 108 | # pip-tools 109 | pyramid==2.0.2 110 | # via 111 | # -r requirements.txt 112 | # h-pyramid-sentry 113 | # pyramid-jinja2 114 | pyramid-jinja2==2.10.1 115 | # via -r requirements.txt 116 | requests==2.32.3 117 | # via -r requirements.txt 118 | sentry-sdk==2.27.0 119 | # via 120 | # -r requirements.txt 121 | # h-pyramid-sentry 122 | six==1.16.0 123 | # via asttokens 124 | stack-data==0.6.3 125 | # via ipython 126 | supervisor==4.2.5 127 | # via -r dev.in 128 | traitlets==5.14.1 129 | # via 130 | # ipython 131 | # matplotlib-inline 132 | translationstring==1.4 133 | # via 134 | # -r requirements.txt 135 | # pyramid 136 | typing-extensions==4.10.0 137 | # via ipython 138 | urllib3==2.2.2 139 | # via 140 | # -r requirements.txt 141 | # elasticsearch 142 | # requests 143 | # sentry-sdk 144 | venusian==3.1.0 145 | # via 146 | # -r requirements.txt 147 | # pyramid 148 | wcwidth==0.2.13 149 | # via prompt-toolkit 150 | webob==1.8.8 151 | # via 152 | # -r requirements.txt 153 | # pyramid 154 | wheel==0.42.0 155 | # via pip-tools 156 | zipp==3.19.1 157 | # via importlib-metadata 158 | zope-deprecation==5.0 159 | # via 160 | # -r requirements.txt 161 | # pyramid 162 | # pyramid-jinja2 163 | zope-interface==6.1 164 | # via 165 | # -r requirements.txt 166 | # pyramid 167 | 168 | # The following packages are considered to be unsafe in a requirements file: 169 | pip==23.3.2 170 | # via pip-tools 171 | setuptools==78.1.1 172 | # via 173 | # -r requirements.txt 174 | # pip-tools 175 | # pyramid 176 | # supervisor 177 | # zope-deprecation 178 | # zope-interface 179 | -------------------------------------------------------------------------------- /requirements/format.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pip-sync-faster 3 | black 4 | isort 5 | -------------------------------------------------------------------------------- /requirements/format.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/format.in 6 | # 7 | black==25.1.0 8 | # via -r format.in 9 | build==1.0.3 10 | # via pip-tools 11 | click==8.1.7 12 | # via 13 | # black 14 | # pip-tools 15 | importlib-metadata==7.0.1 16 | # via pip-sync-faster 17 | isort==6.0.1 18 | # via -r format.in 19 | mypy-extensions==1.0.0 20 | # via black 21 | packaging==23.2 22 | # via 23 | # black 24 | # build 25 | pathspec==0.12.1 26 | # via black 27 | pip-sync-faster==0.0.5 28 | # via -r format.in 29 | pip-tools==7.4.1 30 | # via 31 | # -r format.in 32 | # pip-sync-faster 33 | platformdirs==4.1.0 34 | # via black 35 | pyproject-hooks==1.0.0 36 | # via 37 | # build 38 | # pip-tools 39 | wheel==0.42.0 40 | # via pip-tools 41 | zipp==3.19.1 42 | # via importlib-metadata 43 | 44 | # The following packages are considered to be unsafe in a requirements file: 45 | pip==23.3.2 46 | # via pip-tools 47 | setuptools==78.1.1 48 | # via pip-tools 49 | -------------------------------------------------------------------------------- /requirements/functests.in: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | pip-tools 4 | pip-sync-faster 5 | pytest 6 | webtest 7 | h_matchers 8 | -------------------------------------------------------------------------------- /requirements/functests.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/functests.in 6 | # 7 | beautifulsoup4==4.12.2 8 | # via webtest 9 | build==1.0.3 10 | # via pip-tools 11 | certifi==2025.4.26 12 | # via 13 | # -r requirements.txt 14 | # requests 15 | # sentry-sdk 16 | charset-normalizer==3.3.2 17 | # via 18 | # -r requirements.txt 19 | # requests 20 | click==8.1.7 21 | # via pip-tools 22 | elasticsearch==6.3.1 23 | # via -r requirements.txt 24 | gunicorn==23.0.0 25 | # via -r requirements.txt 26 | h-matchers==1.2.17 27 | # via -r functests.in 28 | h-pyramid-sentry==1.2.4 29 | # via -r requirements.txt 30 | hupper==1.12 31 | # via 32 | # -r requirements.txt 33 | # pyramid 34 | idna==3.7 35 | # via 36 | # -r requirements.txt 37 | # requests 38 | importlib-metadata==7.0.1 39 | # via pip-sync-faster 40 | iniconfig==2.0.0 41 | # via pytest 42 | jinja2==3.1.6 43 | # via 44 | # -r requirements.txt 45 | # pyramid-jinja2 46 | markupsafe==2.1.3 47 | # via 48 | # -r requirements.txt 49 | # jinja2 50 | # pyramid-jinja2 51 | newrelic==10.10.0 52 | # via -r requirements.txt 53 | packaging==23.2 54 | # via 55 | # -r requirements.txt 56 | # build 57 | # gunicorn 58 | # pytest 59 | pastedeploy==3.1.0 60 | # via 61 | # -r requirements.txt 62 | # plaster-pastedeploy 63 | pip-sync-faster==0.0.5 64 | # via -r functests.in 65 | pip-tools==7.4.1 66 | # via 67 | # -r functests.in 68 | # pip-sync-faster 69 | plaster==1.1.2 70 | # via 71 | # -r requirements.txt 72 | # plaster-pastedeploy 73 | # pyramid 74 | plaster-pastedeploy==1.0.1 75 | # via 76 | # -r requirements.txt 77 | # pyramid 78 | pluggy==1.5.0 79 | # via pytest 80 | pyproject-hooks==1.0.0 81 | # via 82 | # build 83 | # pip-tools 84 | pyramid==2.0.2 85 | # via 86 | # -r requirements.txt 87 | # h-pyramid-sentry 88 | # pyramid-jinja2 89 | pyramid-jinja2==2.10.1 90 | # via -r requirements.txt 91 | pytest==8.3.5 92 | # via -r functests.in 93 | requests==2.32.3 94 | # via -r requirements.txt 95 | sentry-sdk==2.27.0 96 | # via 97 | # -r requirements.txt 98 | # h-pyramid-sentry 99 | soupsieve==2.5 100 | # via beautifulsoup4 101 | translationstring==1.4 102 | # via 103 | # -r requirements.txt 104 | # pyramid 105 | urllib3==2.2.2 106 | # via 107 | # -r requirements.txt 108 | # elasticsearch 109 | # requests 110 | # sentry-sdk 111 | venusian==3.1.0 112 | # via 113 | # -r requirements.txt 114 | # pyramid 115 | waitress==3.0.2 116 | # via webtest 117 | webob==1.8.8 118 | # via 119 | # -r requirements.txt 120 | # pyramid 121 | # webtest 122 | webtest==3.0.4 123 | # via -r functests.in 124 | wheel==0.42.0 125 | # via pip-tools 126 | zipp==3.19.1 127 | # via importlib-metadata 128 | zope-deprecation==5.0 129 | # via 130 | # -r requirements.txt 131 | # pyramid 132 | # pyramid-jinja2 133 | zope-interface==6.1 134 | # via 135 | # -r requirements.txt 136 | # pyramid 137 | 138 | # The following packages are considered to be unsafe in a requirements file: 139 | pip==23.3.2 140 | # via pip-tools 141 | setuptools==78.1.1 142 | # via 143 | # -r requirements.txt 144 | # pip-tools 145 | # pyramid 146 | # zope-deprecation 147 | # zope-interface 148 | -------------------------------------------------------------------------------- /requirements/lint.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pip-sync-faster 3 | flake8 4 | -------------------------------------------------------------------------------- /requirements/lint.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/lint.in 6 | # 7 | build==1.0.3 8 | # via pip-tools 9 | click==8.1.7 10 | # via pip-tools 11 | flake8==7.2.0 12 | # via -r lint.in 13 | importlib-metadata==7.0.1 14 | # via pip-sync-faster 15 | mccabe==0.7.0 16 | # via flake8 17 | packaging==23.2 18 | # via build 19 | pip-sync-faster==0.0.5 20 | # via -r lint.in 21 | pip-tools==7.4.1 22 | # via 23 | # -r lint.in 24 | # pip-sync-faster 25 | pycodestyle==2.13.0 26 | # via flake8 27 | pyflakes==3.3.2 28 | # via flake8 29 | pyproject-hooks==1.0.0 30 | # via 31 | # build 32 | # pip-tools 33 | wheel==0.42.0 34 | # via pip-tools 35 | zipp==3.19.1 36 | # via importlib-metadata 37 | 38 | # The following packages are considered to be unsafe in a requirements file: 39 | pip==23.3.2 40 | # via pip-tools 41 | setuptools==78.1.1 42 | # via pip-tools 43 | -------------------------------------------------------------------------------- /requirements/requirements.in: -------------------------------------------------------------------------------- 1 | certifi 2 | elasticsearch==6.3.1 3 | gunicorn 4 | sentry-sdk 5 | h-pyramid-sentry 6 | pyramid 7 | pyramid-jinja2 8 | requests 9 | newrelic 10 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/requirements.in 6 | # 7 | certifi==2025.4.26 8 | # via 9 | # -r requirements.in 10 | # requests 11 | # sentry-sdk 12 | charset-normalizer==3.3.2 13 | # via requests 14 | elasticsearch==6.3.1 15 | # via -r requirements.in 16 | gunicorn==23.0.0 17 | # via -r requirements.in 18 | h-pyramid-sentry==1.2.4 19 | # via -r requirements.in 20 | hupper==1.12 21 | # via pyramid 22 | idna==3.7 23 | # via requests 24 | jinja2==3.1.6 25 | # via pyramid-jinja2 26 | markupsafe==2.1.3 27 | # via 28 | # jinja2 29 | # pyramid-jinja2 30 | newrelic==10.10.0 31 | # via -r requirements.in 32 | packaging==23.2 33 | # via gunicorn 34 | pastedeploy==3.1.0 35 | # via plaster-pastedeploy 36 | plaster==1.1.2 37 | # via 38 | # plaster-pastedeploy 39 | # pyramid 40 | plaster-pastedeploy==1.0.1 41 | # via pyramid 42 | pyramid==2.0.2 43 | # via 44 | # -r requirements.in 45 | # h-pyramid-sentry 46 | # pyramid-jinja2 47 | pyramid-jinja2==2.10.1 48 | # via -r requirements.in 49 | requests==2.32.3 50 | # via -r requirements.in 51 | sentry-sdk==2.27.0 52 | # via 53 | # -r requirements.in 54 | # h-pyramid-sentry 55 | translationstring==1.4 56 | # via pyramid 57 | urllib3==2.2.2 58 | # via 59 | # elasticsearch 60 | # requests 61 | # sentry-sdk 62 | venusian==3.1.0 63 | # via pyramid 64 | webob==1.8.8 65 | # via pyramid 66 | zope-deprecation==5.0 67 | # via 68 | # pyramid 69 | # pyramid-jinja2 70 | zope-interface==6.1 71 | # via pyramid 72 | 73 | # The following packages are considered to be unsafe in a requirements file: 74 | setuptools==78.1.1 75 | # via 76 | # pyramid 77 | # zope-deprecation 78 | # zope-interface 79 | -------------------------------------------------------------------------------- /requirements/tests.in: -------------------------------------------------------------------------------- 1 | pip-tools 2 | pip-sync-faster 3 | coverage 4 | pytest 5 | factory-boy 6 | mock 7 | -r requirements.txt 8 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe requirements/tests.in 6 | # 7 | build==1.0.3 8 | # via pip-tools 9 | certifi==2025.4.26 10 | # via 11 | # -r requirements.txt 12 | # requests 13 | # sentry-sdk 14 | charset-normalizer==3.3.2 15 | # via 16 | # -r requirements.txt 17 | # requests 18 | click==8.1.7 19 | # via pip-tools 20 | coverage==7.8.0 21 | # via -r tests.in 22 | elasticsearch==6.3.1 23 | # via -r requirements.txt 24 | factory-boy==3.3.3 25 | # via -r tests.in 26 | faker==22.2.0 27 | # via factory-boy 28 | gunicorn==23.0.0 29 | # via -r requirements.txt 30 | h-pyramid-sentry==1.2.4 31 | # via -r requirements.txt 32 | hupper==1.12 33 | # via 34 | # -r requirements.txt 35 | # pyramid 36 | idna==3.7 37 | # via 38 | # -r requirements.txt 39 | # requests 40 | importlib-metadata==7.0.1 41 | # via pip-sync-faster 42 | iniconfig==2.0.0 43 | # via pytest 44 | jinja2==3.1.6 45 | # via 46 | # -r requirements.txt 47 | # pyramid-jinja2 48 | markupsafe==2.1.3 49 | # via 50 | # -r requirements.txt 51 | # jinja2 52 | # pyramid-jinja2 53 | mock==5.2.0 54 | # via -r tests.in 55 | newrelic==10.10.0 56 | # via -r requirements.txt 57 | packaging==23.2 58 | # via 59 | # -r requirements.txt 60 | # build 61 | # gunicorn 62 | # pytest 63 | pastedeploy==3.1.0 64 | # via 65 | # -r requirements.txt 66 | # plaster-pastedeploy 67 | pip-sync-faster==0.0.5 68 | # via -r tests.in 69 | pip-tools==7.4.1 70 | # via 71 | # -r tests.in 72 | # pip-sync-faster 73 | plaster==1.1.2 74 | # via 75 | # -r requirements.txt 76 | # plaster-pastedeploy 77 | # pyramid 78 | plaster-pastedeploy==1.0.1 79 | # via 80 | # -r requirements.txt 81 | # pyramid 82 | pluggy==1.5.0 83 | # via pytest 84 | pyproject-hooks==1.0.0 85 | # via 86 | # build 87 | # pip-tools 88 | pyramid==2.0.2 89 | # via 90 | # -r requirements.txt 91 | # h-pyramid-sentry 92 | # pyramid-jinja2 93 | pyramid-jinja2==2.10.1 94 | # via -r requirements.txt 95 | pytest==8.3.5 96 | # via -r tests.in 97 | python-dateutil==2.8.2 98 | # via faker 99 | requests==2.32.3 100 | # via -r requirements.txt 101 | sentry-sdk==2.27.0 102 | # via 103 | # -r requirements.txt 104 | # h-pyramid-sentry 105 | six==1.16.0 106 | # via python-dateutil 107 | translationstring==1.4 108 | # via 109 | # -r requirements.txt 110 | # pyramid 111 | urllib3==2.2.2 112 | # via 113 | # -r requirements.txt 114 | # elasticsearch 115 | # requests 116 | # sentry-sdk 117 | venusian==3.1.0 118 | # via 119 | # -r requirements.txt 120 | # pyramid 121 | webob==1.8.8 122 | # via 123 | # -r requirements.txt 124 | # pyramid 125 | wheel==0.42.0 126 | # via pip-tools 127 | zipp==3.19.1 128 | # via importlib-metadata 129 | zope-deprecation==5.0 130 | # via 131 | # -r requirements.txt 132 | # pyramid 133 | # pyramid-jinja2 134 | zope-interface==6.1 135 | # via 136 | # -r requirements.txt 137 | # pyramid 138 | 139 | # The following packages are considered to be unsafe in a requirements file: 140 | pip==23.3.2 141 | # via pip-tools 142 | setuptools==78.1.1 143 | # via 144 | # -r requirements.txt 145 | # pip-tools 146 | # pyramid 147 | # zope-deprecation 148 | # zope-interface 149 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pep257] 2 | ignore = D202 3 | explain = true 4 | 5 | [yapf] 6 | based_on_style = pep8 7 | 8 | [flake8] 9 | max-line-length = 160 10 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import functools 2 | from unittest import mock 3 | 4 | import pytest 5 | 6 | 7 | def autopatcher(request, target, **kwargs): 8 | """Patch and cleanup automatically. Wraps :py:func:`mock.patch`.""" 9 | options = {"autospec": True} 10 | options.update(kwargs) 11 | patcher = mock.patch(target, **options) 12 | obj = patcher.start() 13 | request.addfinalizer(patcher.stop) 14 | return obj 15 | 16 | 17 | @pytest.fixture 18 | def patch(request): 19 | return functools.partial(autopatcher, request) 20 | -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/tests/functional/__init__.py -------------------------------------------------------------------------------- /tests/functional/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from webtest import TestApp 3 | 4 | from bouncer.app import create_app 5 | 6 | 7 | @pytest.fixture 8 | def app(): 9 | return TestApp(create_app()) 10 | -------------------------------------------------------------------------------- /tests/functional/views/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/tests/functional/views/__init__.py -------------------------------------------------------------------------------- /tests/functional/views/healthcheck_test.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import pytest 4 | 5 | 6 | class TestHealthcheck: 7 | def test_it(self, app): 8 | response = app.get("/_status", status=200) 9 | 10 | assert response.content_type == "application/json" 11 | assert response.json == {"status": "okay"} 12 | assert ( 13 | response.headers["Cache-Control"] 14 | == "max-age=0, must-revalidate, no-cache, no-store" 15 | ) 16 | 17 | @pytest.fixture(autouse=True) 18 | def Elasticsearch(self, patch): 19 | Elasticsearch = patch("bouncer.search.Elasticsearch") 20 | Elasticsearch.return_value.cluster = Mock() 21 | Elasticsearch.return_value.cluster.health.return_value = {"status": "green"} 22 | 23 | return Elasticsearch 24 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/bouncer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hypothesis/bouncer/db2237bced039415dc41af4bc9f297d94cf4e449/tests/unit/bouncer/__init__.py -------------------------------------------------------------------------------- /tests/unit/bouncer/app_test.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest import mock 3 | 4 | import pytest 5 | from pyramid.config import Configurator 6 | 7 | from bouncer.app import create_app 8 | 9 | 10 | def test_the_default_settings(config, pyramid): 11 | create_app() 12 | 13 | pyramid.config.Configurator.assert_called_once_with( 14 | settings={ 15 | "chrome_extension_id": {"default": "bjfhmglciegochdpefhhlphglcehbmek"}, 16 | "debug": False, 17 | "elasticsearch_index": "hypothesis", 18 | "hypothesis_authority": "localhost", 19 | "hypothesis_url": "https://hypothes.is", 20 | "via_base_url": "https://via.hypothes.is", 21 | } 22 | ) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "envvar,extension_id", 27 | [ 28 | ("abc", {"default": "abc"}), 29 | ( 30 | json.dumps({"default": "abc", "bar.com": "def"}), 31 | {"default": "abc", "bar.com": "def"}, 32 | ), 33 | ], 34 | ) 35 | def test_chrome_extension_id(config, os, envvar, extension_id, pyramid): 36 | os.environ["CHROME_EXTENSION_ID"] = envvar 37 | 38 | create_app() 39 | 40 | settings = pyramid.config.Configurator.call_args_list[0][1]["settings"] 41 | assert settings["chrome_extension_id"] == extension_id 42 | 43 | 44 | def test_raises_if_chrome_extension_id_invalid(config, os, pyramid): 45 | os.environ["CHROME_EXTENSION_ID"] = "{}" 46 | 47 | with pytest.raises( 48 | Exception, match='CHROME_EXTENSION_ID map must have a "default" key' 49 | ): 50 | create_app() 51 | 52 | 53 | @pytest.mark.parametrize( 54 | "envvar,base_url", 55 | [ 56 | # Trailing slashes should be stripped. 57 | ("https://via.example.com/", "https://via.example.com"), 58 | # A URL without a trailing slash should go through unmodified. 59 | ("http://via.example.com", "http://via.example.com"), 60 | ], 61 | ) 62 | def test_via_base_url(config, os, envvar, base_url, pyramid): 63 | os.environ["VIA_BASE_URL"] = envvar 64 | 65 | create_app() 66 | 67 | settings = pyramid.config.Configurator.call_args_list[0][1]["settings"] 68 | assert settings["via_base_url"] == base_url 69 | 70 | 71 | @pytest.fixture 72 | def config(): 73 | config = mock.create_autospec(Configurator, instance=True) 74 | config.registry = mock.Mock(spec_set=["settings"], settings={}) 75 | return config 76 | 77 | 78 | @pytest.fixture(autouse=True) 79 | def os(patch): 80 | os = patch("bouncer.app.os") 81 | os.environ = {} 82 | return os 83 | 84 | 85 | @pytest.fixture(autouse=True) 86 | def pyramid(config, patch): 87 | pyramid = patch("bouncer.app.pyramid") 88 | pyramid.config.Configurator.return_value = config 89 | return pyramid 90 | -------------------------------------------------------------------------------- /tests/unit/bouncer/embed_detector_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bouncer.embed_detector import url_embeds_client 4 | 5 | 6 | class TestUrlEmbedsClient: 7 | @pytest.mark.parametrize( 8 | "url", 9 | [ 10 | # Matching HTTPS URL. 11 | "https://web.hypothes.is/blog/article.foo", 12 | # Matching HTTP URL. 13 | "http://web.hypothes.is/blog/article.foo", 14 | # Path omitted. 15 | "http://h.readthedocs.io", 16 | # Matching URLs with ignored query string / fragment. 17 | "http://web.hypothes.is/blog/article.foo?ignore_me=1", 18 | "http://web.hypothes.is/blog/article.foo#ignoreme", 19 | # Example matching URLs for various sites on the list. 20 | "https://docdrop.org/pdf/1Vsd26C0KuMw4Mj1WEBjBz1T8G75vIhWx-gaQEE.pdf/", 21 | "https://docdrop.org/video/AJXGJYl0wJc/", 22 | "https://www.semanticscholar.org/reader/5e331bf7887e2e634bf5b12788849d2d2b74bc7f", 23 | "https://development.semanticscholar.org/reader/5e331bf7887e2e634bf5b12788849d2d2b74bc7f", 24 | "https://staging.semanticscholar.org/reader/5e331bf7887e2e634bf5b12788849d2d2b74bc7f", 25 | ], 26 | ) 27 | def test_returns_true_for_matching_url(self, url): 28 | assert url_embeds_client(url) is True 29 | 30 | @pytest.mark.parametrize( 31 | "url", 32 | [ 33 | # Non-matching domain. 34 | "http://example.com", 35 | # Non-matching path. 36 | "http://web.hypothes.is/help/article.foo", 37 | # Only HTTP* URLs can match. 38 | "nothttp://test-domain.com/fulltext", 39 | ], 40 | ) 41 | def test_returns_false_for_non_matching_url(self, url): 42 | assert url_embeds_client(url) is False 43 | -------------------------------------------------------------------------------- /tests/unit/bouncer/search_test.py: -------------------------------------------------------------------------------- 1 | from elasticsearch import Elasticsearch 2 | from mock import ANY, MagicMock, patch 3 | 4 | from bouncer.search import get_client, includeme 5 | 6 | 7 | class TestGetClient(object): 8 | def test_returns_client(self): 9 | client = get_client({"elasticsearch_url": "foo:9200"}) 10 | 11 | assert isinstance(client, Elasticsearch) 12 | 13 | @patch("bouncer.search.Elasticsearch") 14 | def test_configures_client(self, es_mock): 15 | get_client({"elasticsearch_url": "foo:9200"}) 16 | 17 | es_mock.assert_called_once_with(["foo:9200"]) 18 | 19 | 20 | def test_includeme(): 21 | configurator = MagicMock() 22 | configurator.registry.settings = {"elasticsearch_url": "foo:9200"} 23 | 24 | includeme(configurator) 25 | 26 | configurator.add_request_method.assert_called_once_with(ANY, name="es", reify=True) 27 | -------------------------------------------------------------------------------- /tests/unit/bouncer/util_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bouncer import util 4 | 5 | 6 | def test_parse_document_raises_if_annotated_deleted(): 7 | # When an annotation is deleted in h it isn't immediately removed from the 8 | # search index. Its Elasticsearch document is temporarily updated to just 9 | # {'deleted': True}. 10 | with pytest.raises(util.DeletedAnnotationError): 11 | util.parse_document({"_id": "annotation_id", "_source": {"deleted": True}}) 12 | 13 | 14 | def test_parse_document_raises_if_no_uri(es_annotation_doc): 15 | del es_annotation_doc["_source"]["target"][0]["source"] 16 | 17 | with pytest.raises(util.InvalidAnnotationError) as exc: 18 | util.parse_document(es_annotation_doc) 19 | 20 | assert exc.value.reason == "annotation_has_no_uri" 21 | 22 | 23 | def test_parse_document_raises_if_uri_not_a_string(es_annotation_doc): 24 | es_annotation_doc["_source"]["target"][0]["source"] = 52 25 | 26 | with pytest.raises(util.InvalidAnnotationError) as exc: 27 | util.parse_document(es_annotation_doc) 28 | 29 | assert exc.value.reason == "uri_not_a_string" 30 | 31 | 32 | def test_parse_document_returns_annotation_id(es_annotation_doc): 33 | annotation_id = util.parse_document(es_annotation_doc)["annotation_id"] 34 | assert annotation_id == "annotation_id" 35 | 36 | 37 | def test_parse_document_returns_document_uri(es_annotation_doc): 38 | document_uri = util.parse_document(es_annotation_doc)["document_uri"] 39 | assert document_uri == "http://example.com/example.html" 40 | 41 | 42 | def test_parse_document_returns_quote(es_annotation_doc): 43 | es_annotation_doc["_source"]["target"][0]["selector"] = [ 44 | {"type": "TextQuoteSelector", "exact": "test_quote"} 45 | ] 46 | quote = util.parse_document(es_annotation_doc)["quote"] 47 | assert quote == "test_quote" 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "selector", 52 | [ 53 | # No selector (ie. a page note). 54 | None, 55 | # No quote selector. Allowed by the service even though a quote is required 56 | # to anchor the annotation. 57 | [{"type": "TextPositionSelector"}], 58 | ], 59 | ) 60 | def test_parse_document_returns_boilerplate_quote_when_no_quote( 61 | es_annotation_doc, selector 62 | ): 63 | if selector: 64 | es_annotation_doc["_source"]["target"][0]["selector"] = selector 65 | quote = util.parse_document(es_annotation_doc)["quote"] 66 | assert quote == "Hypothesis annotation for example.com" 67 | 68 | 69 | def test_parse_document_returns_text(es_annotation_doc): 70 | es_annotation_doc["_source"]["text"] = "test_text" 71 | text = util.parse_document(es_annotation_doc)["text"] 72 | assert text == "test_text" 73 | 74 | 75 | def test_parse_document_returns_boilerplate_when_no_text(es_annotation_doc): 76 | text = util.parse_document(es_annotation_doc)["text"] 77 | assert text == util.ANNOTATION_BOILERPLATE_TEXT 78 | 79 | 80 | def test_parse_document_returns_show_metadata_true_when_shared_and_world( 81 | es_annotation_doc, 82 | ): 83 | show_metadata = util.parse_document(es_annotation_doc)["show_metadata"] 84 | assert show_metadata is True 85 | 86 | 87 | def test_parse_document_returns_document_uri_from_web_uri_when_pdf(es_annotation_doc): 88 | es_annotation_doc["_source"]["target"][0]["source"] = "urn:x-pdf:the-fingerprint" 89 | es_annotation_doc["_source"]["document"] = {"web_uri": "http://example.com/foo.pdf"} 90 | 91 | document_uri = util.parse_document(es_annotation_doc)["document_uri"] 92 | 93 | assert document_uri == "http://example.com/foo.pdf" 94 | 95 | 96 | def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs( 97 | es_annotation_doc, 98 | ): 99 | es_annotation_doc["_source"]["target"][0]["source"] = "urn:x-pdf:the-fingerprint" 100 | es_annotation_doc["_source"]["document"] = {"web_uri": 52} 101 | 102 | with pytest.raises(util.InvalidAnnotationError) as exc: 103 | util.parse_document(es_annotation_doc) 104 | 105 | assert exc.value.reason == "uri_not_a_string" 106 | 107 | 108 | def test_parse_document_returns_authority(es_annotation_doc): 109 | authority = util.parse_document(es_annotation_doc)["authority"] 110 | assert authority == "hypothes.is" 111 | 112 | 113 | def test_get_pretty_url_for_long_url(): 114 | long_netloc = "https://www.verylongdomainthatkeepsgoingandgoing.com" 115 | 116 | assert "www.verylongdomainthatkeepsgoi…" == util.get_pretty_url(long_netloc) 117 | 118 | 119 | @pytest.mark.parametrize( 120 | "selectors,has_media_time", 121 | [ 122 | ([{"type": "MediaTimeSelector", "start": 10, "end": 20}], True), 123 | ([{}], False), 124 | ], 125 | ) 126 | def test_parse_document_returns_has_media_time( 127 | es_annotation_doc, selectors, has_media_time 128 | ): 129 | es_annotation_doc["_source"]["target"][0]["selector"] = selectors 130 | parsed = util.parse_document(es_annotation_doc) 131 | assert parsed["has_media_time"] == has_media_time 132 | 133 | 134 | @pytest.fixture 135 | def es_annotation_doc(): 136 | """ 137 | Minimal JSON document for an annotation as returned from Elasticsearch. 138 | 139 | This contains only fields which can be assumed to exist on all annotations. 140 | """ 141 | return { 142 | "_id": "annotation_id", 143 | "_source": { 144 | "authority": "hypothes.is", 145 | "target": [{"source": "http://example.com/example.html"}], 146 | "group": "__world__", 147 | "shared": True, 148 | }, 149 | } 150 | -------------------------------------------------------------------------------- /tests/unit/bouncer/views_test.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import mock 4 | import pytest 5 | from elasticsearch import exceptions as es_exceptions 6 | from pyramid import httpexceptions, testing 7 | 8 | from bouncer import util, views 9 | 10 | 11 | @pytest.mark.usefixtures("parse_document") 12 | class TestAnnotationController(object): 13 | @pytest.mark.parametrize( 14 | "es_version,doc_type", 15 | [ 16 | ("6.2.0", "annotation"), 17 | ("7.10.0", "_doc"), 18 | ], 19 | ) 20 | def test_annotation_calls_get(self, es_version, doc_type): 21 | request = mock_request() 22 | request.es.info.return_value["version"]["number"] = es_version 23 | views.AnnotationController(request).annotation() 24 | 25 | request.es.get.assert_called_once_with( 26 | index="hypothesis", doc_type=doc_type, id="AVLlVTs1f9G3pW-EYc6q" 27 | ) 28 | 29 | def test_annotation_raises_http_not_found_if_annotation_deleted( 30 | self, parse_document 31 | ): 32 | parse_document.side_effect = util.DeletedAnnotationError() 33 | 34 | with pytest.raises(httpexceptions.HTTPNotFound): 35 | views.AnnotationController(mock_request()).annotation() 36 | 37 | def test_annotation_raises_http_not_found_if_get_raises_not_found(self): 38 | request = mock_request() 39 | request.es.get.side_effect = es_exceptions.NotFoundError 40 | 41 | with pytest.raises(httpexceptions.HTTPNotFound): 42 | views.AnnotationController(request).annotation() 43 | 44 | def test_annotation_calls_parse_document(self, parse_document): 45 | request = mock_request() 46 | 47 | views.AnnotationController(request).annotation() 48 | 49 | parse_document.assert_called_once_with(request.es.get.return_value) 50 | 51 | def test_annotation_raises_if_parse_document_raises(self, parse_document): 52 | parse_document.side_effect = util.InvalidAnnotationError( 53 | "error message", "the_reason" 54 | ) 55 | 56 | with pytest.raises(httpexceptions.HTTPUnprocessableEntity) as exc: 57 | views.AnnotationController(mock_request()).annotation() 58 | assert str(exc.value) == "error message" 59 | 60 | def test_annotation_raises_http_unprocessable_entity_for_file_urls( 61 | self, parse_document 62 | ): 63 | parse_document.return_value["document_uri"] = "file:///home/seanh/Foo.pdf" 64 | 65 | with pytest.raises(httpexceptions.HTTPUnprocessableEntity): 66 | views.AnnotationController(mock_request()).annotation() 67 | 68 | def test_annotation_returns_chrome_extension_id(self): 69 | template_data = views.AnnotationController(mock_request()).annotation() 70 | data = json.loads(template_data["data"]) 71 | assert data["chromeExtensionId"] == "test-extension-id" 72 | 73 | def test_annotation_returns_chrome_extension_id_for_authority(self, parse_document): 74 | parse_document.return_value["authority"] = "alt.authority" 75 | template_data = views.AnnotationController(mock_request()).annotation() 76 | data = json.loads(template_data["data"]) 77 | assert data["chromeExtensionId"] == "alt-extension-id" 78 | 79 | def test_annotation_returns_quote(self): 80 | template_data = views.AnnotationController(mock_request()).annotation() 81 | quote = template_data["quote"] 82 | assert quote == "Hypothesis annotation for www.example.com" 83 | 84 | def test_annotation_returns_via_url(self): 85 | template_data = views.AnnotationController(mock_request()).annotation() 86 | data = json.loads(template_data["data"]) 87 | assert data["viaUrl"] == ( 88 | "https://via.hypothes.is/http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q" 89 | ) 90 | 91 | def test_annotation_returns_extension_url(self): 92 | template_data = views.AnnotationController(mock_request()).annotation() 93 | data = json.loads(template_data["data"]) 94 | assert data["extensionUrl"] == ( 95 | "http://www.example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q" 96 | ) 97 | 98 | def test_annotation_strips_fragment_identifiers(self, parse_document): 99 | parse_document.return_value["document_uri"] = ( 100 | "http://example.com/example.html#foobar" 101 | ) 102 | template_data = views.AnnotationController(mock_request()).annotation() 103 | 104 | data = json.loads(template_data["data"]) 105 | 106 | assert data["extensionUrl"] == ( 107 | "http://example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q" 108 | ) 109 | assert data["viaUrl"] == ( 110 | "https://via.hypothes.is/http://example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q" 111 | ) 112 | 113 | def test_annotation_strips_bare_fragment_identifiers(self, parse_document): 114 | parse_document.return_value["document_uri"] = "http://example.com/example.html#" 115 | template_data = views.AnnotationController(mock_request()).annotation() 116 | 117 | data = json.loads(template_data["data"]) 118 | 119 | assert data["extensionUrl"] == ( 120 | "http://example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q" 121 | ) 122 | assert data["viaUrl"] == ( 123 | "https://via.hypothes.is/http://example.com/example.html#annotations:AVLlVTs1f9G3pW-EYc6q" 124 | ) 125 | 126 | def test_annotation_omits_via_url_for_third_party_annotations(self, parse_document): 127 | parse_document.return_value["authority"] = "partner.org" 128 | template_data = views.AnnotationController(mock_request()).annotation() 129 | 130 | data = json.loads(template_data["data"]) 131 | 132 | assert data["viaUrl"] is None 133 | 134 | def test_omits_via_url_if_url_embeds_client(self, url_embeds_client): 135 | url_embeds_client.return_value = True 136 | 137 | template_data = views.AnnotationController(mock_request()).annotation() 138 | data = json.loads(template_data["data"]) 139 | 140 | url_embeds_client.assert_called_with("http://www.example.com/example.html") 141 | assert data["viaUrl"] is None 142 | 143 | @pytest.mark.parametrize( 144 | "document_uri,has_media_time,use_via", 145 | [ 146 | # Transcript annotation made using Via 147 | ("https://www.youtube.com/watch?v=mBtsNNXjBPw", True, True), 148 | # Regular annotation made on youtube.com via extension 149 | ("https://www.youtube.com/watch?v=mBtsNNXjBPw", False, False), 150 | # Media time annotation made on a site not supported by Via's video 151 | # transcript feature. 152 | ("https://example.com", True, False), 153 | ], 154 | ) 155 | def test_always_uses_via_for_transcript_annotations( 156 | self, parse_document, document_uri, has_media_time, use_via 157 | ): 158 | parse_document.return_value["document_uri"] = document_uri 159 | parse_document.return_value["has_media_time"] = has_media_time 160 | template_data = views.AnnotationController(mock_request()).annotation() 161 | 162 | data = json.loads(template_data["data"]) 163 | 164 | assert data["alwaysUseVia"] == use_via 165 | 166 | 167 | class TestGotoUrlController(object): 168 | def test_it_shows_redirect_page(self): 169 | request = mock_request() 170 | request.GET["url"] = "https://example.com/" 171 | 172 | ctx = views.goto_url(request) 173 | 174 | assert ctx == { 175 | "data": json.dumps( 176 | { 177 | "chromeExtensionId": "test-extension-id", 178 | "viaUrl": "https://via.hypothes.is/https://example.com/#annotations:query:", 179 | "extensionUrl": "https://example.com/#annotations:query:", 180 | } 181 | ), 182 | "pretty_url": "example.com", 183 | } 184 | 185 | def test_it_sets_query_in_fragment(self): 186 | request = mock_request() 187 | request.GET["url"] = "https://example.com/article.html" 188 | request.GET["q"] = "user:jsmith" 189 | 190 | ctx = views.goto_url(request) 191 | 192 | data = json.loads(ctx["data"]) 193 | expected_frag = "#annotations:query:user%3Ajsmith" 194 | assert data["viaUrl"].endswith(expected_frag) 195 | assert data["extensionUrl"].endswith(expected_frag) 196 | 197 | def test_it_sets_group_in_fragment(self): 198 | request = mock_request() 199 | request.GET["url"] = "https://example.com/article.html" 200 | request.GET["group"] = "jj333e" 201 | 202 | ctx = views.goto_url(request) 203 | 204 | data = json.loads(ctx["data"]) 205 | expected_frag = "#annotations:group:jj333e" 206 | assert data["viaUrl"].endswith(expected_frag) 207 | assert data["extensionUrl"].endswith(expected_frag) 208 | 209 | def test_it_sets_group_in_fragment_if_both_group_and_query_present(self): 210 | request = mock_request() 211 | request.GET["url"] = "https://example.com/article.html" 212 | request.GET["q"] = "findme" 213 | request.GET["group"] = "jj333e" 214 | 215 | ctx = views.goto_url(request) 216 | 217 | data = json.loads(ctx["data"]) 218 | expected_frag = "#annotations:group:jj333e" 219 | assert data["viaUrl"].endswith(expected_frag) 220 | assert data["extensionUrl"].endswith(expected_frag) 221 | 222 | def test_it_rejects_invalid_or_missing_urls(self): 223 | invalid_urls = [ 224 | None, 225 | # Unsupported protocols. 226 | "ftp://foo.bar", 227 | "doi:10.1.2/345", 228 | "file://foo.bar", 229 | # Malformed URLs. 230 | r"http://goo\[g", 231 | ] 232 | 233 | for url in invalid_urls: 234 | request = mock_request() 235 | request.GET["url"] = url 236 | 237 | with pytest.raises(httpexceptions.HTTPBadRequest): 238 | views.goto_url(request) 239 | 240 | def test_it_allows_valid_http_urls(self): 241 | valid_urls = [ 242 | "http://publisher.org", 243 | "https://publisher.org", 244 | "HTTP://PUBLISHER.ORG", 245 | "HTTPS://example.com", 246 | ] 247 | 248 | for url in valid_urls: 249 | request = mock_request() 250 | request.GET["url"] = url 251 | 252 | views.goto_url(request) 253 | 254 | def test_it_strips_existing_fragment(self): 255 | request = mock_request() 256 | request.GET["url"] = "https://example.com/#foobar" 257 | 258 | ctx = views.goto_url(request) 259 | 260 | data = json.loads(ctx["data"]) 261 | assert ( 262 | data["viaUrl"] 263 | == "https://via.hypothes.is/https://example.com/#annotations:query:" 264 | ) 265 | assert data["extensionUrl"] == "https://example.com/#annotations:query:" 266 | 267 | def test_it_does_not_use_via_if_url_embeds_client(self, url_embeds_client): 268 | request = mock_request() 269 | request.GET["url"] = "https://example.com/#foobar" 270 | url_embeds_client.return_value = True 271 | 272 | ctx = views.goto_url(request) 273 | 274 | data = json.loads(ctx["data"]) 275 | url_embeds_client.assert_called_with("https://example.com/") 276 | assert data["viaUrl"] is None 277 | 278 | 279 | class TestErrorController(object): 280 | def test_httperror_sets_status_code(self): 281 | request = mock_request() 282 | 283 | views.ErrorController(httpexceptions.HTTPNotFound(), request).httperror() 284 | 285 | assert request.response.status_int == 404 286 | 287 | def test_httperror_returns_error_message(self): 288 | exc = httpexceptions.HTTPNotFound("Annotation not found") 289 | controller = views.ErrorController(exc, mock_request()) 290 | 291 | template_data = controller.httperror() 292 | 293 | assert template_data["message"] == "Annotation not found" 294 | 295 | def test_error_sets_status_code(self): 296 | request = mock_request() 297 | 298 | views.ErrorController(Exception(), request).error() 299 | 300 | assert request.response.status_int == 500 301 | 302 | def test_error_raises_in_debug_mode(self): 303 | request = mock_request() 304 | request.registry.settings["debug"] = True 305 | 306 | with pytest.raises(Exception): 307 | views.ErrorController(Exception(), request).error() 308 | 309 | def test_error_reports_to_sentry(self, h_pyramid_sentry): 310 | request = mock_request() 311 | 312 | views.ErrorController(Exception(), request).error() 313 | 314 | h_pyramid_sentry.report_exception.assert_called_once_with() 315 | 316 | def test_error_returns_error_message(self): 317 | controller = views.ErrorController(Exception(), mock_request()) 318 | 319 | template_data = controller.error() 320 | 321 | assert template_data["message"].startswith("Sorry, but") 322 | 323 | @pytest.fixture(autouse=True) 324 | def h_pyramid_sentry(self, patch): 325 | return patch("bouncer.views.h_pyramid_sentry") 326 | 327 | 328 | class TestHealthcheck(object): 329 | def test_ok(self, capture_message): 330 | request = mock_request() 331 | request.es.cluster.health.return_value = {"status": "green"} 332 | 333 | result = views.healthcheck(request) 334 | 335 | assert result == {"status": "okay"} 336 | capture_message.assert_not_called() 337 | 338 | def test_failed_es_request(self): 339 | request = mock_request() 340 | exc = es_exceptions.ConnectionTimeout() 341 | request.es.cluster.health.side_effect = exc 342 | 343 | with pytest.raises(views.FailedHealthcheck) as e: 344 | views.healthcheck(request) 345 | 346 | assert e.value.__cause__ == exc 347 | 348 | def test_wrong_cluster_status(self): 349 | request = mock_request() 350 | request.es.cluster.health.return_value = {"status": "red"} 351 | 352 | with pytest.raises(views.FailedHealthcheck) as e: 353 | views.healthcheck(request) 354 | 355 | assert "cluster status" in str(e.value) 356 | 357 | def test_sentry(self, capture_message): 358 | request = mock_request() 359 | request.params["sentry"] = "" 360 | request.es.cluster.health.return_value = {"status": "green"} 361 | 362 | views.healthcheck(request) 363 | 364 | capture_message.assert_called_once_with( 365 | "Test message from the healthcheck() view" 366 | ) 367 | 368 | 369 | @pytest.fixture 370 | def parse_document(request): 371 | patcher = mock.patch("bouncer.views.util.parse_document") 372 | parse_document = patcher.start() 373 | request.addfinalizer(patcher.stop) 374 | parse_document.return_value = { 375 | "annotation_id": "AVLlVTs1f9G3pW-EYc6q", 376 | "authority": "localhost", 377 | "document_uri": "http://www.example.com/example.html", 378 | "has_media_time": False, 379 | "show_metadata": True, 380 | "quote": "Hypothesis annotation for www.example.com", 381 | "text": "test_text", 382 | } 383 | return parse_document 384 | 385 | 386 | def mock_request(): 387 | request = testing.DummyRequest() 388 | request.registry.settings = { 389 | "chrome_extension_id": { 390 | "default": "test-extension-id", 391 | "alt.authority": "alt-extension-id", 392 | }, 393 | "debug": False, 394 | "elasticsearch_url": "http://localhost:9200", 395 | "elasticsearch_index": "hypothesis", 396 | "hypothesis_authority": "localhost", 397 | "hypothesis_url": "https://hypothes.is", 398 | "via_base_url": "https://via.hypothes.is", 399 | } 400 | request.matchdict = {"id": "AVLlVTs1f9G3pW-EYc6q"} 401 | request.es = mock.Mock() 402 | 403 | request.es.get.return_value = { 404 | "_id": "AVLlVTs1f9G3pW-EYc6q", 405 | "_source": { 406 | "target": [{"source": "http://example.com/example.html", "selector": []}], 407 | "uri": "http://www.example.com/example.html", 408 | "group": "__world__", 409 | }, 410 | } 411 | request.es.info.return_value = { 412 | "version": { 413 | "number": "6.2.0", 414 | } 415 | } 416 | request.raven = mock.Mock() 417 | return request 418 | 419 | 420 | @pytest.fixture(autouse=True) 421 | def url_embeds_client(): 422 | patcher = mock.patch("bouncer.views.url_embeds_client") 423 | url_embeds_client = patcher.start() 424 | url_embeds_client.return_value = False 425 | 426 | yield url_embeds_client 427 | 428 | patcher.stop() 429 | 430 | 431 | @pytest.fixture(autouse=True) 432 | def capture_message(patch): 433 | return patch("bouncer.views.capture_message") 434 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = tests 3 | skipsdist = true 4 | requires = 5 | tox>=3.8.0,<4 6 | tox-faster 7 | tox-pyenv 8 | tox-run-command 9 | tox_pyenv_fallback = false 10 | 11 | [testenv] 12 | skip_install = true 13 | setenv = 14 | dev: DEBUG = {env:DEBUG:yes} 15 | dev: HYPOTHESIS_AUTHORITY = {env:HYPOTHESIS_AUTHORITY:localhost} 16 | dev: HYPOTHESIS_URL = {env:HYPOTHESIS_URL:http://localhost:5000} 17 | dev: VIA_BASE_URL = {env:VIA_BASE_URL:http://localhost:9083} 18 | dev: WEB_CONCURRENCY = {env:WEB_CONCURRENCY:2} 19 | dev: SENTRY_ENVIRONMENT = {env:SENTRY_ENVIRONMENT:dev} 20 | passenv = 21 | HOME 22 | dev: CHROME_EXTENSION_ID 23 | dev: SENTRY_DSN 24 | deps = 25 | -r requirements/{env:TOX_ENV_NAME}.txt 26 | whitelist_externals = 27 | dev: gunicorn 28 | depends = 29 | coverage: tests 30 | commands = 31 | pip-sync-faster requirements/{env:TOX_ENV_NAME}.txt --pip-args '--disable-pip-version-check' 32 | dev: {posargs:supervisord -c conf/supervisord-dev.conf} 33 | lint: flake8 . 34 | format: black bouncer tests 35 | format: isort --quiet --atomic bouncer tests 36 | checkformatting: black --check bouncer tests 37 | checkformatting: isort --quiet --check-only bouncer tests 38 | tests: coverage run -m pytest {posargs:tests/unit/} 39 | functests: pytest {posargs:tests/functional/} 40 | coverage: -coverage combine 41 | coverage: coverage report 42 | --------------------------------------------------------------------------------