├── .babelrc ├── .coveragerc ├── .dockerignore ├── .editorconfig ├── .github └── workflows │ ├── mypy.yml │ ├── pkg_test.yml │ └── test_new_api.yml ├── .gitignore ├── .python-version ├── .vscode └── settings.json ├── Dockerfile ├── LICENSE.md ├── Makefile ├── README.md ├── docker-compose.yml ├── newapi ├── api.conf.example ├── build_runner.sh ├── database_upgrade_schema.py ├── debian │ ├── changelog │ ├── control │ ├── copyright │ ├── etc │ │ ├── nginx │ │ │ └── sites-available │ │ │ │ └── ooni-api.conf │ │ └── ooni │ │ │ └── api.conf │ ├── install │ ├── ooni-api-uploader.service │ ├── ooni-api-uploader.timer │ ├── ooni-api.service │ ├── ooni-download-geoip.service │ ├── ooni-download-geoip.timer │ ├── ooni_download_geoip.py │ ├── postinst │ ├── rules │ └── source │ │ └── format ├── mypy.ini ├── ooni_api_uploader.py ├── ooniapi │ ├── README.adoc │ ├── __init__.py │ ├── __main__.py │ ├── app.py │ ├── auth.py │ ├── citizenlab.py │ ├── cli │ │ └── __init__.py │ ├── config.py │ ├── countries │ │ ├── __init__.py │ │ └── country-list.json │ ├── database.py │ ├── markdown │ │ └── api_docs.md │ ├── measurements.py │ ├── models.py │ ├── pages │ │ ├── __init__.py │ │ └── docs.py │ ├── prio.py │ ├── private.py │ ├── probe_services.py │ ├── rate_limit_quotas.py │ ├── static │ │ ├── css │ │ │ └── master.css │ │ └── images │ │ │ ├── API-Horizontal-MonochromeInverted.png │ │ │ ├── API-Horizontal-MonochromeInverted@2x.png │ │ │ ├── API-Horizontal-MonochromeInvertedSmall.png │ │ │ └── API-Horizontal-MonochromeInvertedSmall@2x.png │ ├── templates │ │ ├── 400.html │ │ ├── 404.html │ │ ├── api.html │ │ ├── base.html │ │ ├── footer.html │ │ └── index.html │ ├── utils.py │ ├── views.py │ └── wsgi.py ├── setup.py ├── spawnrunner ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── functional │ │ ├── __init__.py │ │ ├── test_private_explorer.py │ │ └── test_probe_services.py │ ├── integ │ │ ├── README.adoc │ │ ├── __init__.py │ │ ├── bug355.sql │ │ ├── clickhouse_1_schema.sql │ │ ├── clickhouse_2_fixtures.sql │ │ ├── data │ │ │ ├── citizenlab.json │ │ │ ├── citizenlab_counters_us.json │ │ │ ├── psiphon_config.json │ │ │ ├── tor_targets.json │ │ │ └── url_priorities_us.json │ │ ├── test_aggregation.py │ │ ├── test_citizenlab.py │ │ ├── test_integration.py │ │ ├── test_integration_auth.py │ │ ├── test_params_validation.py │ │ ├── test_prioritization.py │ │ ├── test_prioritization_nodb.py │ │ ├── test_private_api.py │ │ ├── test_probe_services.py │ │ ├── test_probe_services_nodb.py │ │ ├── test_rate_limiter.py │ │ └── test_torsf_stats.py │ ├── unit │ │ ├── __init__.py │ │ ├── test_auth.py │ │ ├── test_countries.py │ │ ├── test_prio.py │ │ └── test_unit.py │ └── utils.py └── tools │ └── monitor_test_list.py ├── rate_limit_quotas.py ├── refresh_deps ├── scripts ├── init_db.sh └── restore-dump.sh └── setup.cfg /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["es2015"] 3 | } 4 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | source = measurements 5 | 6 | [report] 7 | exclude_lines = 8 | # Have to re-enable the standard pragma 9 | pragma: no cover 10 | 11 | # Don't complain if non-runnable code isn't run: 12 | if 0: 13 | if False: 14 | 15 | ignore_errors = True 16 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/* 2 | .idea 3 | node_modules 4 | __pycache__ 5 | *.pyc 6 | secrets/ 7 | config/secrets.env 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [{Makefile, makefile, GNUmakefile}] 2 | indent_style = tab 3 | indent_size = 4 4 | -------------------------------------------------------------------------------- /.github/workflows/mypy.yml: -------------------------------------------------------------------------------- 1 | name: Mypy test 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | test: 6 | runs-on: ubuntu-latest 7 | container: debian:10 8 | 9 | steps: 10 | - name: Check out repository code 11 | uses: actions/checkout@v2 12 | 13 | - name: Setup APT 14 | run: | 15 | apt-get update 16 | apt-get install --no-install-recommends -y ca-certificates gnupg 17 | rm -f /etc/apt/sources.list.d/* 18 | cat < /etc/apt/sources.list.d/backports.list 26 | apt-get update -q 27 | apt-get install -y --no-install-recommends git python3 python3-requests python3-gnupg s3cmd wget 28 | 29 | - name: Fetch debops-ci 30 | run: | 31 | wget https://raw.githubusercontent.com/ooni/sysadmin/master/tools/debops-ci 32 | chmod +x debops-ci 33 | 34 | - name: Build the package 35 | run: ./debops-ci --show-commands ci --bucket-name ooni-internal-deb 36 | env: 37 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 38 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 39 | DEB_GPG_KEY: ${{ secrets.DEB_GPG_KEY }} 40 | 41 | # end_to_end_test: 42 | # # Deploy API + fastpath + database + ooniprobe 43 | # needs: create_deb_package 44 | # runs-on: "ubuntu-20.04" 45 | # # Primary container image where all commands run 46 | # container: debian:10 47 | # 48 | # services: 49 | # postgres: 50 | # image: postgres 51 | # ports: 52 | # - 5432:5432 53 | # env: 54 | # POSTGRES_PASSWORD: postgres 55 | # POSTGRES_HOST_AUTH_METHOD: trust 56 | # options: >- 57 | # --health-cmd pg_isready 58 | # --health-interval 10s 59 | # --health-timeout 5s 60 | # --health-retries 5 61 | # 62 | # steps: 63 | # - name: Install git 64 | # run: | 65 | # apt-get update -q 66 | # apt-get install -y git 67 | # 68 | # - name: Check out repository code 69 | # uses: actions/checkout@v2 70 | # with: 71 | # fetch-depth: 0 72 | # 73 | # - name: Install depedencies 74 | # run: | 75 | # whoami 76 | # DEBIAN_FRONTEND=noninteractive apt-get update 77 | # DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates gnupg 78 | # echo "deb http://deb-ci.ooni.org unstable main" > /etc/apt/sources.list.d/deb-ci.list 79 | # echo "deb http://deb.debian.org/debian buster-backports main" > /etc/apt/sources.list.d/backports.list 80 | # apt-key adv --verbose --keyserver hkp://keyserver.ubuntu.com --recv-keys "B5A08F01796E7F521861B449372D1FF271F2DD50" 81 | # DEBIAN_FRONTEND=noninteractive apt-get update 82 | # DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y openssh-client ssl-cert 83 | # mkdir -p /run/nodeexp/ 84 | # 85 | # - name: Install fastpath analysis and ooni-api 86 | # run: DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y fastpath analysis ooni-api 87 | -------------------------------------------------------------------------------- /.github/workflows/test_new_api.yml: -------------------------------------------------------------------------------- 1 | name: 'local test' 2 | on: 3 | pull_request: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | description: 'Run the build with tmate debugging enabled' 8 | required: false 9 | default: false 10 | 11 | jobs: 12 | integration_test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Check out repository code 16 | uses: actions/checkout@v2 17 | 18 | - name: Setup tmate session 19 | uses: mxschmitt/action-tmate@v3 20 | if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} 21 | with: 22 | limit-access-to-actor: true 23 | 24 | - name: Build docker image 25 | run: make build 26 | 27 | - name: Setup database fixtures and run fastpath to populate the DB 28 | run: make initdb 29 | 30 | - name: Run all tests 31 | run: T="--show-capture=no -s -vv" make tests 32 | #run: T="--show-capture=no -s -vv --junitxml=pytest.xml" make tests 33 | 34 | #- name: debug docker 35 | # if: always() 36 | # run: docker ps -a 37 | 38 | # - run: find / -name pytest.xml 2> /dev/null 39 | # if: success() || failure() # run even if previous step failed 40 | 41 | # - name: Test report 42 | # uses: dorny/test-reporter@v1 43 | # if: success() || failure() # run even if previous step failed 44 | # with: 45 | # name: Test report 46 | # path: '/home/runner/work/api/api/newapi/pytest.xml' 47 | # reporter: java-junit # compatible with pytest --junitxml 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.py[co] 3 | __pycache__ 4 | 5 | .coverage 6 | .cache/ 7 | .idea/ 8 | .state/ 9 | .vscode/ 10 | 11 | ooni_measurements.db 12 | /data/ 13 | 14 | node_modules/ 15 | package-lock.json 16 | 17 | # Travis secrets 18 | secrets/id_rsa_travis 19 | secrets/secrets.env 20 | secrets/secrets.tar 21 | config/secrets.env 22 | 23 | make.conf 24 | venv/ 25 | .tox 26 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.7.9 2 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/Users/art/.virtualenvs/ooni-api/bin/python" 3 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:bullseye 2 | ENV PYTHONUNBUFFERED 1 3 | ENV PYTHONPATH /app/ 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN mkdir /scripts 7 | COPY newapi/build_runner.sh /scripts 8 | COPY newapi/debian/ooni_download_geoip.py /scripts 9 | COPY newapi/api.conf.example /scripts 10 | WORKDIR /scripts 11 | 12 | # Run runner setup 13 | RUN ./build_runner.sh 14 | 15 | # Download geoip files 16 | RUN ./ooni_download_geoip.py 17 | 18 | RUN rm -rf /scripts 19 | 20 | # Copy code and conf into the container 21 | COPY newapi /app/ 22 | # Set our work directory to our app directory 23 | WORKDIR /app/ 24 | 25 | EXPOSE 8000 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2019 Open Observatory of Network Interference (OONI), The Tor Project 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # This file is used to run tests manually and from .github/workflows/test_new_api.yml 3 | # 4 | TESTARGS ?= tests/functional/test_private_explorer.py tests/integ/test_aggregation.py tests/integ/test_citizenlab.py tests/integ/test_integration.py tests/integ/test_integration_auth.py tests/integ/test_prioritization.py tests/integ/test_private_api.py tests/integ/test_probe_services.py tests/unit/test_prio.py tests/integ/test_params_validation.py tests/functional/test_probe_services.py 5 | 6 | #tests/integ/test_prioritization_nodb.py 7 | #tests/integ/test_probe_services_nodb.py 8 | #tests/integ/test_integration_auth.py 9 | 10 | .state/docker-build: Dockerfile 11 | docker-compose build --force-rm api 12 | mkdir -p .state 13 | touch .state/docker-build 14 | 15 | serve: .state/docker-build 16 | docker-compose up --remove-orphans 17 | 18 | build: 19 | @$(MAKE) .state/docker-build 20 | 21 | initdb: 22 | # Setup database fixtures 23 | # Fetch fingerprints from github 24 | # run fastpath to populate the DB 25 | docker-compose run --rm api python3 -m pytest --setup-only --create-db -s 26 | 27 | tests: .state/docker-build 28 | docker-compose run --rm api python3 -m pytest $(T) $(TESTARGS) 29 | 30 | .PHONY: build initdb tests serve 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OONI API 2 | 3 | Source for https://api.ooni.io/ 4 | 5 | File bugs with the API inside of: https://github.com/ooni/backend/issues/new 6 | 7 | ## Local development 8 | 9 | You can run the OONI API locally in a development environment using `docker` 10 | and `docker-compose`. Follow the instructions below to set it up. 11 | 12 | ### Quickstart 13 | 14 | First you should build the docker image for the API: 15 | ``` 16 | make build 17 | ``` 18 | 19 | This only needs to be run once, or any time you make changes to the 20 | dependencies in the `newapi/build_runnner.sh` script. 21 | 22 | To populate the database with some sample data (this is needed for running many 23 | of the tests), you should run: 24 | ``` 25 | make initdb 26 | ``` 27 | 28 | This also needs to only be run once. 29 | 30 | At this point you have a fully setup development environment. 31 | 32 | You can run the full test suite via: 33 | ``` 34 | make tests 35 | ``` 36 | 37 | If you care to only run a specific test, that can be done using the `pytest` 38 | `-k` option, passed in as a T env var to `make`: 39 | ``` 40 | T="-k test_my_test_name" make tests 41 | ``` 42 | 43 | If you want to run a local instance of the OONI API, this can be done via: 44 | ``` 45 | make serve 46 | ``` 47 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | clickhouse: 5 | image: clickhouse/clickhouse-server:22.8.13.20 6 | hostname: clickhouse 7 | container_name: clickhouse 8 | ports: 9 | - "8123:8123" 10 | - "9000:9000" 11 | volumes: 12 | - ./data/clickhouse/data:/var/lib/clickhouse 13 | healthcheck: 14 | test: ["CMD", "wget", "-q", "http://127.0.0.1:8123/ping"] 15 | 16 | api: 17 | restart: always 18 | build: 19 | context: . 20 | command: gunicorn3 --reuse-port ooniapi.wsgi -b 0.0.0.0:8000 21 | volumes: 22 | - ./newapi/:/app 23 | ports: 24 | - "8000:8000" 25 | depends_on: 26 | clickhouse: 27 | condition: service_healthy 28 | -------------------------------------------------------------------------------- /newapi/api.conf.example: -------------------------------------------------------------------------------- 1 | # Configuration for OONI API. This is a sample file specific to the OONI setup 2 | # Syntax: treat it as a Python file, but only uppercase variables are used 3 | COLLECTORS = [] 4 | COLLECTOR_ID = "X" # usually an integer 5 | 6 | DATABASE_STATEMENT_TIMEOUT = 30 7 | 8 | CLICKHOUSE_URL = "clickhouse://clickhouse:9000/default" 9 | 10 | BASE_URL = "https://api.ooni.io/" 11 | # list of URLs: strings starting with "^" will be converted to regexps 12 | CORS_URLS = [r"^https://[-A-Za-z0-9]+\.ooni\.org$", r"^https://[-A-Za-z0-9]+\.ooni\.io$"] 13 | AUTOCLAVED_BASE_URL = "http://datacollector.infra.ooni.io/ooni-public/autoclaved/" 14 | 15 | # S3 endpoint 16 | S3_ACCESS_KEY_ID = "CHANGEME" 17 | S3_SECRET_ACCESS_KEY = "CHANGEME" 18 | S3_BUCKET_NAME = "ooni-data-eu-fra" 19 | S3_SESSION_TOKEN = "CHANGEME" 20 | S3_ENDPOINT_URL = "CHANGEME" 21 | 22 | PSIPHON_CONFFILE = "/etc/ooni/psiphon_config.json" 23 | TOR_TARGETS_CONFFILE = "/etc/ooni/tor_targets.json" 24 | 25 | # Registration email delivery 26 | MAIL_SERVER = "CHANGEME" 27 | MAIL_PORT = 465 28 | MAIL_USE_SSL = False 29 | MAIL_USERNAME = "CHANGEME" 30 | MAIL_PASSWORD = "CHANGEME" 31 | MAIL_SOURCE_ADDRESS = "contact@ooni.org" 32 | 33 | JWT_ENCRYPTION_KEY = "CHANGEME" 34 | ACCOUNT_ID_HASHING_KEY = "CHANGEME" 35 | 36 | SESSION_EXPIRY_DAYS = 2 37 | LOGIN_EXPIRY_DAYS = 7 38 | 39 | GITHUB_ORIGIN_REPO = "citizenlab/test-lists" 40 | GITHUB_PUSH_REPO = "ooni-bot/test-lists" 41 | GITHUB_TOKEN = "CHANGEME" 42 | GITHUB_USER = "CHANGEME" 43 | GITHUB_WORKDIR = "/var/lib/ooniapi/citizenlab" 44 | 45 | MSMT_SPOOL_DIR = "/tmp/oonispool" 46 | GEOIP_ASN_DB = "/var/lib/ooniapi/asn.mmdb" 47 | GEOIP_CC_DB = "/var/lib/ooniapi/cc.mmdb" 48 | -------------------------------------------------------------------------------- /newapi/build_runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # WARNING: run only in a dedicated container 4 | # Prepares a container to run the API 5 | # Called from spawnrunner or docker 6 | # 7 | set -eu 8 | export DEBIAN_FRONTEND=noninteractive 9 | 10 | echo 'deb http://deb.debian.org/debian bullseye-backports main' \ 11 | > /etc/apt/sources.list.d/backports.list 12 | 13 | # Install ca-certificates and gnupg first 14 | apt-get update 15 | apt-get install --no-install-recommends -y ca-certificates gnupg locales apt-transport-https dirmngr 16 | locale-gen en_US.UTF-8 17 | 18 | # Set up OONI archive 19 | echo 'deb http://deb-ci.ooni.org unstable main' \ 20 | > /etc/apt/sources.list.d/ooni.list 21 | apt-key adv --keyserver hkp://keyserver.ubuntu.com \ 22 | --recv-keys "B5A08F01796E7F521861B449372D1FF271F2DD50" 23 | 24 | apt-get update 25 | # Keep this in sync with debian/control 26 | # hint: grep debdeps **/*.py 27 | apt-get install --no-install-recommends -qy \ 28 | curl \ 29 | git \ 30 | gunicorn3 \ 31 | mypy \ 32 | python3-boto3 \ 33 | python3-clickhouse-driver \ 34 | python3-filelock \ 35 | python3-flasgger \ 36 | python3-flask \ 37 | python3-flask-cors \ 38 | python3-flask-restful \ 39 | python3-freezegun \ 40 | python3-geoip2 \ 41 | python3-git \ 42 | python3-jwt \ 43 | python3-lmdb \ 44 | python3-lz4 \ 45 | python3-mock \ 46 | python3-psycopg2 \ 47 | python3-pytest \ 48 | python3-pytest-benchmark\ 49 | python3-pytest-cov \ 50 | python3-pytest-mock \ 51 | python3-setuptools \ 52 | python3-sqlalchemy \ 53 | python3-sqlalchemy-utils \ 54 | python3-statsd \ 55 | python3-systemd \ 56 | python3-ujson 57 | apt-get autoremove -y 58 | apt-get clean 59 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 60 | 61 | mkdir -p /etc/ooni/ 62 | cp api.conf.example /etc/ooni/api.conf 63 | -------------------------------------------------------------------------------- /newapi/database_upgrade_schema.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Roll-forward / rollback database schemas 4 | 5 | Can be used from CLI and as a Python module 6 | """ 7 | 8 | from argparse import ArgumentParser 9 | import logging 10 | 11 | from clickhouse_driver import Client as Clickhouse 12 | 13 | log = logging.getLogger("database_upgrade_schema") 14 | 15 | 16 | def run(sql): 17 | title = sql.split("(")[0].strip() 18 | log.info(f"Running query {title}") 19 | click = Clickhouse(host="localhost") 20 | click.execute(sql) 21 | 22 | 23 | def setup_db(): 24 | """Setup database from scratch""" 25 | # Main tables 26 | run( 27 | """ 28 | CREATE TABLE IF NOT EXISTS default.fastpath 29 | ( 30 | `measurement_uid` String, 31 | `report_id` String, 32 | `input` String, 33 | `probe_cc` String, 34 | `probe_asn` Int32, 35 | `test_name` String, 36 | `test_start_time` DateTime, 37 | `measurement_start_time` DateTime, 38 | `filename` String, 39 | `scores` String, 40 | `platform` String, 41 | `anomaly` String, 42 | `confirmed` String, 43 | `msm_failure` String, 44 | `domain` String, 45 | `software_name` String, 46 | `software_version` String, 47 | `control_failure` String, 48 | `blocking_general` Float32, 49 | `is_ssl_expected` Int8, 50 | `page_len` Int32, 51 | `page_len_ratio` Float32, 52 | `server_cc` String, 53 | `server_asn` Int8, 54 | `server_as_name` String 55 | ) 56 | ENGINE = ReplacingMergeTree 57 | ORDER BY (measurement_start_time, report_id, input) 58 | SETTINGS index_granularity = 8192""" 59 | ) 60 | run( 61 | """ 62 | CREATE TABLE IF NOT EXISTS default.jsonl 63 | ( 64 | `report_id` String, 65 | `input` String, 66 | `s3path` String, 67 | `linenum` Int32, 68 | `measurement_uid` String 69 | ) 70 | ENGINE = MergeTree 71 | ORDER BY (report_id, input) 72 | SETTINGS index_granularity = 8192""" 73 | ) 74 | run( 75 | """ 76 | CREATE TABLE IF NOT EXISTS default.url_priorities ( 77 | `category_code` String, 78 | `cc` String, 79 | `domain` String, 80 | `url` String, 81 | `priority` Int32 82 | ) 83 | ENGINE = ReplacingMergeTree 84 | ORDER BY (category_code, cc, domain, url) 85 | SETTINGS index_granularity = 8192""" 86 | ) 87 | run( 88 | """ 89 | CREATE TABLE IF NOT EXISTS default.citizenlab 90 | ( 91 | `domain` String, 92 | `url` String, 93 | `cc` FixedString(32), 94 | `category_code` String 95 | ) 96 | ENGINE = ReplacingMergeTree 97 | ORDER BY (domain, url, cc, category_code) 98 | SETTINGS index_granularity = 4""" 99 | ) 100 | run( 101 | """ 102 | CREATE TABLE IF NOT EXISTS default.citizenlab_flip AS default.citizenlab""" 103 | ) 104 | run( 105 | """ 106 | CREATE TABLE IF NOT EXISTS test_groups ( 107 | `test_name` String, 108 | `test_group` String 109 | ) 110 | ENGINE = Join(ANY, LEFT, test_name)""" 111 | ) 112 | run( 113 | """ 114 | -- Auth 115 | 116 | CREATE TABLE IF NOT EXISTS accounts 117 | ( 118 | `account_id` FixedString(32), 119 | `role` String 120 | ) 121 | ENGINE = EmbeddedRocksDB 122 | PRIMARY KEY account_id""" 123 | ) 124 | run( 125 | """ 126 | CREATE TABLE IF NOT EXISTS session_expunge 127 | ( 128 | `account_id` FixedString(32), 129 | `threshold` DateTime DEFAULT now() 130 | ) 131 | ENGINE = EmbeddedRocksDB 132 | PRIMARY KEY account_id""" 133 | ) 134 | 135 | # Materialized views 136 | run( 137 | """ 138 | CREATE MATERIALIZED VIEW IF NOT EXISTS default.counters_test_list 139 | ( 140 | `day` DateTime, 141 | `probe_cc` String, 142 | `input` String, 143 | `msmt_cnt` UInt64 144 | ) 145 | ENGINE = SummingMergeTree 146 | PARTITION BY day 147 | ORDER BY (probe_cc, input) 148 | SETTINGS index_granularity = 8192 AS 149 | SELECT 150 | toDate(measurement_start_time) AS day, 151 | probe_cc, 152 | input, 153 | count() AS msmt_cnt 154 | FROM default.fastpath 155 | INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url 156 | WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity') 157 | GROUP BY 158 | day, 159 | probe_cc, 160 | input""" 161 | ) 162 | 163 | run( 164 | """ 165 | CREATE MATERIALIZED VIEW IF NOT EXISTS default.counters_asn_test_list 166 | ( 167 | `week` DateTime, 168 | `probe_cc` String, 169 | `probe_asn` UInt64, 170 | `input` String, 171 | `msmt_cnt` UInt64 172 | ) 173 | ENGINE = SummingMergeTree 174 | ORDER BY (probe_cc, probe_asn, input) 175 | SETTINGS index_granularity = 8192 AS 176 | SELECT 177 | toStartOfWeek(measurement_start_time) AS week, 178 | probe_cc, 179 | probe_asn, 180 | input, 181 | count() AS msmt_cnt 182 | FROM default.fastpath 183 | INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url 184 | WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity') 185 | GROUP BY 186 | week, 187 | probe_cc, 188 | probe_asn, 189 | input""" 190 | ) 191 | 192 | 193 | if __name__ == "__main__": 194 | ap = ArgumentParser() 195 | ap.add_argument("--upgrade", action="store_true") 196 | conf = ap.parse_args() 197 | if conf.upgrade: 198 | setup_db() 199 | -------------------------------------------------------------------------------- /newapi/debian/control: -------------------------------------------------------------------------------- 1 | Source: ooni-api 2 | Section: python 3 | Priority: optional 4 | Maintainer: Federico Ceratto 5 | Build-Depends: debhelper-compat (= 12), 6 | dh-python, 7 | dh-systemd (>= 1.5), 8 | python3, 9 | python3-boto3, 10 | python3-lz4, 11 | python3-psycopg2, 12 | python3-setuptools, 13 | python3-setproctitle, 14 | python3-statsd, 15 | python3-systemd, 16 | python3-ujson 17 | Standards-Version: 4.5.1 18 | 19 | Package: ooni-api 20 | Architecture: all 21 | Depends: ${misc:Depends}, 22 | ${python3:Depends}, 23 | curl, 24 | gunicorn3, 25 | libjs-bootstrap4, 26 | nginx | nginx-light, 27 | python3-boto3, 28 | python3-clickhouse-driver, 29 | python3-filelock, 30 | python3-flasgger, 31 | python3-flask, 32 | python3-flask-cors, 33 | python3-flask-restful, 34 | python3-geoip2, 35 | python3-git, 36 | python3-jwt, 37 | python3-lmdb, 38 | python3-lz4, 39 | python3-psycopg2, 40 | python3-setuptools, 41 | python3-sqlalchemy, 42 | python3-sqlalchemy-ext, 43 | python3-sqlalchemy-utils, 44 | python3-statsd, 45 | python3-systemd, 46 | python3-ujson, 47 | Suggests: 48 | python3-freezegun, 49 | python3-pytest 50 | Description: OONI API 51 | OONI API 52 | -------------------------------------------------------------------------------- /newapi/debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: ooni-api 3 | -------------------------------------------------------------------------------- /newapi/debian/etc/nginx/sites-available/ooni-api.conf: -------------------------------------------------------------------------------- 1 | # deployed by ooni-api .deb package 2 | 3 | # Use 2-level cache, 20MB of RAM + 5GB on disk, 4 | proxy_cache_path /var/cache/nginx/ooni-api levels=1:2 keys_zone=apicache:100M 5 | max_size=5g inactive=24h use_temp_path=off; 6 | 7 | # anonymize ipaddr 8 | map $remote_addr $remote_addr_anon { 9 | ~(?P\d+\.\d+\.\d+)\. $ip.0; 10 | ~(?P[^:]+:[^:]+): $ip::; 11 | default 0.0.0.0; 12 | } 13 | 14 | # log anonymized ipaddr and caching status 15 | log_format ooni_api_fmt '$remote_addr_anon $upstream_cache_status [$time_local] ' 16 | '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"'; 17 | 18 | server { 19 | listen 80; 20 | listen [::]:80; 21 | server_name _; 22 | access_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info ooni_api_fmt; 23 | location / { 24 | proxy_pass http://127.0.0.1:8000; 25 | proxy_set_header Host $host; 26 | proxy_set_header X-Real-IP $remote_addr; 27 | } 28 | } 29 | 30 | #FIXME 31 | server { 32 | listen 443 ssl http2; 33 | listen [::]:443 ssl http2; 34 | server_name _; 35 | access_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info ooni_api_fmt; 36 | 37 | ssl_certificate /etc/ssl/certs/ssl-cert-snakeoil.pem; 38 | ssl_certificate_key /etc/ssl/private/ssl-cert-snakeoil.key; 39 | ssl_session_timeout 1d; 40 | ssl_session_cache shared:MozSSL:10m; # about 40000 sessions 41 | ssl_session_tickets off; 42 | ssl_protocols TLSv1.3; 43 | ssl_prefer_server_ciphers off; 44 | add_header Strict-Transport-Security "max-age=63072000" always; 45 | ssl_stapling on; 46 | ssl_stapling_verify on; 47 | #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; 48 | 49 | resolver 127.0.0.1; 50 | 51 | location / { 52 | proxy_pass http://127.0.0.1:8000; 53 | proxy_set_header Host $host; 54 | proxy_set_header X-Real-IP $remote_addr; 55 | 56 | proxy_cache apicache; 57 | proxy_cache_min_uses 1; 58 | proxy_cache_lock on; 59 | proxy_cache_lock_timeout 30; 60 | proxy_cache_lock_age 30; 61 | proxy_cache_use_stale error timeout invalid_header updating; 62 | proxy_cache_methods HEAD GET; 63 | # Cache only 200, 301, and 302 by default and for very short. 64 | # Overridden by the API using the Expires header 65 | proxy_cache_valid 200 301 302 10s; 66 | proxy_cache_valid any 0; 67 | add_header X-Cache-Status $upstream_cache_status; 68 | } 69 | } 70 | 71 | -------------------------------------------------------------------------------- /newapi/debian/etc/ooni/api.conf: -------------------------------------------------------------------------------- 1 | # Configuration for OONI API 2 | # Syntax: treat it as a Python file, but only uppercase variables are used 3 | COLLECTORS = [] 4 | 5 | # Read-only database access 6 | DATABASE_URI_RO = "postgresql://api:CHANGEME@localhost/metadb" 7 | 8 | DATABASE_STATEMENT_TIMEOUT = 30 9 | 10 | USE_CLICKHOUSE = True 11 | CLICKHOUSE_URL = "clickhouse://localhost:9000/default" 12 | 13 | BASE_URL = "https://api.ooni.io/" 14 | # list of URLs: strings starting with "^" will be converted to regexps 15 | CORS_URLS = [r"^https://[-A-Za-z0-9]+\.ooni\.org$", r"^https://[-A-Za-z0-9]+\.ooni\.io$"] 16 | AUTOCLAVED_BASE_URL = "http://datacollector.infra.ooni.io/ooni-public/autoclaved/" 17 | 18 | # S3 endpoint 19 | S3_ACCESS_KEY_ID = "CHANGEME" 20 | S3_SECRET_ACCESS_KEY = "CHANGEME" 21 | S3_SESSION_TOKEN = "CHANGEME" 22 | S3_ENDPOINT_URL = "CHANGEME" 23 | 24 | PSIPHON_CONFFILE = "/etc/ooni/psiphon_config.json" 25 | TOR_TARGETS_CONFFILE = "/etc/ooni/tor_targets.json" 26 | 27 | # Registration email delivery 28 | MAIL_SERVER = "CHANGEME" 29 | MAIL_PORT = 465 30 | MAIL_USE_SSL = True 31 | MAIL_USERNAME = "CHANGEME" 32 | MAIL_PASSWORD = "CHANGEME" 33 | MAIL_SOURCE_ADDRESS = "contact@ooni.org" 34 | LOGIN_BASE_URL = "https://CHANGEME/login" 35 | 36 | JWT_ENCRYPTION_KEY = "CHANGEME" 37 | 38 | SESSION_EXPIRY_DAYS = 2 39 | LOGIN_EXPIRY_DAYS = 7 40 | 41 | GITHUB_ORIGIN_REPO = "citizenlab/test-lists" 42 | GITHUB_PUSH_REPO = "ooni-bot/test-lists" 43 | GITHUB_TOKEN = "CHANGEME" 44 | GITHUB_USER = "CHANGEME" 45 | GITHUB_WORKDIR = "/var/lib/ooniapi/citizenlab" 46 | 47 | LOGIN_BASE_URL = "https://test-lists.test.ooni.org/login" 48 | 49 | GITHUB_WORKDIR = "/var/lib/ooniapi/citizenlab" 50 | GITHUB_TOKEN = "DISABLED" 51 | GITHUB_USER = "ooni-bot" 52 | GITHUB_ORIGIN_REPO = "ooni/test-lists" 53 | GITHUB_PUSH_REPO = "ooni-bot/test-lists" 54 | 55 | #SECRET_KEY = "CHANGEME" 56 | 57 | # Measurement spool directory 58 | MSMT_SPOOL_DIR = "/var/lib/ooniapi/measurements" 59 | GEOIP_ASN_DB = "/var/lib/ooniapi/asn.mmdb" 60 | GEOIP_CC_DB = "/var/lib/ooniapi/cc.mmdb" 61 | -------------------------------------------------------------------------------- /newapi/debian/install: -------------------------------------------------------------------------------- 1 | debian/etc/ . 2 | debian/ooni-api-uploader.service /lib/systemd/system/ 3 | debian/ooni-api-uploader.timer /lib/systemd/system/ 4 | debian/ooni-download-geoip.service /lib/systemd/system/ 5 | debian/ooni-download-geoip.timer /lib/systemd/system/ 6 | debian/ooni_download_geoip.py /usr/bin/ 7 | -------------------------------------------------------------------------------- /newapi/debian/ooni-api-uploader.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Upload raw OONI measurements to S3 3 | Wants=ooni-api-uploader.timer 4 | 5 | [Service] 6 | Type=oneshot 7 | ExecStart=/usr/bin/ooni_api_uploader.py 8 | 9 | [Install] 10 | WantedBy=multi-user.target 11 | 12 | -------------------------------------------------------------------------------- /newapi/debian/ooni-api-uploader.timer: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Upload raw OONI measurements to S3 3 | Requires=ooni-api-uploader.service 4 | 5 | [Timer] 6 | Unit=ooni-api-uploader.service 7 | # run every hour at 10 minutes past the hour 8 | OnCalendar=*-*-* *:10 9 | #OnCalendar=*-*-* *:00/5 10 | 11 | [Install] 12 | WantedBy=timers.target 13 | -------------------------------------------------------------------------------- /newapi/debian/ooni-api.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=OONI API 3 | Wants=network-online.target 4 | After=network-online.target 5 | 6 | [Service] 7 | ExecStart=/usr/bin/gunicorn3 \ 8 | --config file:/etc/ooni/api.gunicorn.py \ 9 | ooniapi.wsgi 10 | 11 | Restart=on-abort 12 | Type=simple 13 | RestartSec=2s 14 | 15 | User=ooniapi 16 | Group=ooniapi 17 | ReadOnlyDirectories=/ 18 | ReadWriteDirectories=/proc/self 19 | ReadWriteDirectories=/var/lib/ooniapi/ 20 | 21 | PermissionsStartOnly=true 22 | LimitNOFILE=65536 23 | 24 | # Sandboxing 25 | CapabilityBoundingSet=CAP_SETUID CAP_SETGID 26 | SystemCallFilter=~@clock @debug @cpu-emulation @keyring @module @mount @obsolete @raw-io @reboot @swap 27 | NoNewPrivileges=yes 28 | PrivateDevices=yes 29 | PrivateTmp=yes 30 | ProtectHome=yes 31 | ProtectSystem=full 32 | ProtectKernelModules=yes 33 | ProtectKernelTunables=yes 34 | 35 | [Install] 36 | WantedBy=multi-user.target 37 | -------------------------------------------------------------------------------- /newapi/debian/ooni-download-geoip.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Download geoip databases 3 | Wants=ooni-download-geoip.timer 4 | 5 | [Service] 6 | Type=oneshot 7 | ExecStart=/usr/bin/ooni_download_geoip.py 8 | 9 | [Install] 10 | WantedBy=multi-user.target 11 | -------------------------------------------------------------------------------- /newapi/debian/ooni-download-geoip.timer: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Download geoip databases 3 | Requires=ooni-download-geoip.service 4 | 5 | [Timer] 6 | Unit=ooni-download-geoip.service 7 | OnActiveSec=0 8 | # run every week on Sunday 9 | OnCalendar=Sun *-*-* 00:00:00 10 | 11 | [Install] 12 | WantedBy=timers.target 13 | -------------------------------------------------------------------------------- /newapi/debian/ooni_download_geoip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Updates asn.mmdb and cc.mmdb in /var/lib/ooniapi/ 4 | Runs as a systemd timer. 5 | 6 | Monitor logs using: sudo journalctl --identifier ooni_download_geoip 7 | """ 8 | 9 | import sys 10 | import gzip 11 | import shutil 12 | import logging 13 | 14 | # debdeps: python3-geoip2 15 | import geoip2.database # type: ignore 16 | from pathlib import Path 17 | from datetime import datetime 18 | from urllib.error import HTTPError 19 | from urllib.request import urlopen, Request 20 | 21 | import statsd # debdeps: python3-statsd 22 | 23 | TS = datetime.utcnow().strftime("%Y-%m") 24 | ASN_URL = f"https://download.db-ip.com/free/dbip-asn-lite-{TS}.mmdb.gz" 25 | CC_URL = f"https://download.db-ip.com/free/dbip-country-lite-{TS}.mmdb.gz" 26 | 27 | OONI_API_DIR = Path("/var/lib/ooniapi/") 28 | 29 | metrics = statsd.StatsClient("127.0.0.1", 8125, prefix="ooni_download_geoip") 30 | log = logging.getLogger("ooni_download_geoip") 31 | 32 | try: 33 | from systemd.journal import JournalHandler # debdeps: python3-systemd 34 | 35 | log.addHandler(JournalHandler(SYSLOG_IDENTIFIER="ooni_download_geoip")) 36 | except ImportError: 37 | pass 38 | 39 | log.addHandler(logging.StreamHandler(sys.stdout)) 40 | log.setLevel(logging.DEBUG) 41 | 42 | 43 | def get_request(url): 44 | req = Request(url) 45 | # We need to set the user-agent otherwise db-ip gives us a 403 46 | req.add_header("User-Agent", "ooni-downloader") 47 | return urlopen(req) 48 | 49 | 50 | def is_already_updated() -> bool: 51 | try: 52 | with (OONI_API_DIR / "geoipdbts").open() as in_file: 53 | current_ts = in_file.read() 54 | except FileNotFoundError: 55 | return False 56 | 57 | return current_ts == TS 58 | 59 | 60 | def is_latest_available(url: str) -> bool: 61 | log.info(f"fetching {url}") 62 | try: 63 | resp = get_request(url) 64 | return resp.status == 200 65 | except HTTPError as err: 66 | if resp.status == 404: 67 | log.info(f"{url} hasn't been updated yet") 68 | return False 69 | log.info(f"unexpected status code '{err.code}' in {url}") 70 | return False 71 | 72 | 73 | def check_geoip_db(path: Path) -> None: 74 | assert "cc" in path.name or "asn" in path.name, "invalid path" 75 | 76 | with geoip2.database.Reader(str(path)) as reader: 77 | if "asn" in path.name: 78 | r1 = reader.asn("8.8.8.8") 79 | assert r1 is not None, "database file is invalid" 80 | m = reader.metadata() 81 | metrics.gauge("geoip_asn_node_cnt", m.node_count) 82 | metrics.gauge("geoip_asn_epoch", m.build_epoch) 83 | 84 | elif "cc" in path.name: 85 | r2 = reader.country("8.8.8.8") 86 | assert r2 is not None, "database file is invalid" 87 | m = reader.metadata() 88 | metrics.gauge("geoip_cc_node_cnt", m.node_count) 89 | metrics.gauge("geoip_cc_epoch", m.build_epoch) 90 | 91 | 92 | @metrics.timer("download_geoip") 93 | def download_geoip(url: str, filename: str) -> None: 94 | log.info(f"Updating geoip database for {url} ({filename})") 95 | 96 | tmp_gz_out = OONI_API_DIR / f"{filename}.gz.tmp" 97 | tmp_out = OONI_API_DIR / f"{filename}.tmp" 98 | 99 | with get_request(url) as resp: 100 | with tmp_gz_out.open("wb") as out_file: 101 | shutil.copyfileobj(resp, out_file) 102 | with gzip.open(str(tmp_gz_out)) as in_file: 103 | with tmp_out.open("wb") as out_file: 104 | shutil.copyfileobj(in_file, out_file) 105 | tmp_gz_out.unlink() 106 | 107 | try: 108 | check_geoip_db(tmp_out) 109 | except Exception as exc: 110 | log.error(f"consistenty check on the geoip DB failed: {exc}") 111 | metrics.incr("ooni_geoip_checkfail") 112 | return 113 | 114 | tmp_out.rename(OONI_API_DIR / filename) 115 | 116 | 117 | def update_geoip() -> None: 118 | OONI_API_DIR.mkdir(parents=True, exist_ok=True) 119 | download_geoip(ASN_URL, "asn.mmdb") 120 | download_geoip(CC_URL, "cc.mmdb") 121 | 122 | with (OONI_API_DIR / "geoipdbts").open("w") as out_file: 123 | out_file.write(TS) 124 | 125 | log.info("Updated GeoIP databases") 126 | metrics.incr("ooni_geoip_updated") 127 | 128 | 129 | def main(): 130 | if is_already_updated(): 131 | log.debug("Database already updated. Exiting.") 132 | sys.exit(0) 133 | 134 | if not is_latest_available(ASN_URL) or not is_latest_available(CC_URL): 135 | log.debug("Update not available yet. Exiting.") 136 | sys.exit(0) 137 | 138 | update_geoip() 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /newapi/debian/postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # postinst script for ooniapi 3 | 4 | set -e 5 | 6 | case "$1" in 7 | configure) 8 | addgroup --system --quiet ooniapi 9 | adduser --system --quiet --ingroup ooniapi --home /var/lib/ooniapi ooniapi 10 | mkdir -p /var/lib/ooniapi 11 | chown ooniapi /var/lib/ooniapi 12 | # Enable Nginx site. The conf is deployed from debian/ooniapi.conf by debian/install 13 | if [ ! -f /etc/nginx/sites-enabled/ooni-api.conf ] 14 | then 15 | ln -s -t /etc/nginx/sites-enabled/ /etc/nginx/sites-available/ooni-api.conf 16 | nginx -t 17 | systemctl reload nginx 18 | fi 19 | ;; 20 | 21 | abort-upgrade|abort-remove|abort-deconfigure) 22 | ;; 23 | 24 | *) 25 | echo "postinst called with unknown argument \`$1'" >&2 26 | exit 1 27 | ;; 28 | esac 29 | 30 | # dh_installdeb will replace this with shell code automatically 31 | # generated by other debhelper scripts. 32 | 33 | #DEBHELPER# 34 | 35 | exit 0 36 | -------------------------------------------------------------------------------- /newapi/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | export DH_VERBOSE = 1 3 | #export PYBUILD_NAME=ooniapi 4 | export PYBUILD_DISABLE=test 5 | 6 | %: 7 | dh $@ --with python3 --buildsystem=pybuild 8 | 9 | override_dh_installsystemd: 10 | dh_installsystemd --restart-after-upgrade 11 | dh_installsystemd --restart-after-upgrade ooni-api-uploader.service 12 | dh_installsystemd --restart-after-upgrade ooni-api-uploader.timer 13 | dh_installsystemd --restart-after-upgrade ooni-download-geoip.service 14 | dh_installsystemd --restart-after-upgrade ooni-download-geoip.timer 15 | -------------------------------------------------------------------------------- /newapi/debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /newapi/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.9 3 | ignore_missing_imports = True 4 | warn_unused_configs = True 5 | 6 | # warn_return_any = True 7 | -------------------------------------------------------------------------------- /newapi/ooni_api_uploader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Uploads OONI API measurements to S3 4 | Reads /etc/ooni/api.conf 5 | """ 6 | 7 | from configparser import ConfigParser 8 | from pathlib import Path 9 | from pathlib import PosixPath as PP 10 | from datetime import datetime, timedelta 11 | from typing import List, Dict 12 | import gzip 13 | import logging 14 | import tarfile 15 | import yaml 16 | 17 | import ujson 18 | import boto3 19 | import statsd # debdeps: python3-statsd 20 | 21 | # debdeps: python3-clickhouse-driver 22 | from clickhouse_driver import Client as Clickhouse 23 | 24 | 25 | metrics = statsd.StatsClient("127.0.0.1", 8125, prefix="ooni_api_uploader") 26 | log = logging.getLogger("ooni_api_uploader") 27 | 28 | try: 29 | from systemd.journal import JournalHandler # debdeps: python3-systemd 30 | 31 | log.addHandler(JournalHandler(SYSLOG_IDENTIFIER="ooni_api_uploader")) 32 | except ImportError: 33 | pass 34 | 35 | log.setLevel(logging.DEBUG) 36 | 37 | 38 | def create_s3_client(conf): 39 | session = boto3.Session( 40 | aws_access_key_id=conf.get("aws_access_key_id"), 41 | aws_secret_access_key=conf.get("aws_secret_access_key"), 42 | ) 43 | return session.resource("s3") 44 | 45 | 46 | def read_conf(): 47 | cf = "/etc/ooni/api-uploader.conf" 48 | log.info(f"Starting. Reading {cf}") 49 | conf = ConfigParser() 50 | conf.read(cf) 51 | return conf["DEFAULT"] 52 | 53 | 54 | def connect_to_db(conf): 55 | default = "clickhouse://api:api@localhost/default" 56 | uri = conf.get("db_uri", default) 57 | log.info(f"Connecting to database on {uri}") 58 | conn = Clickhouse.from_url(uri) 59 | return conn 60 | 61 | 62 | @metrics.timer("update_db_table") 63 | def update_db_table(conn, lookup_list, jsonl_s3path): 64 | for d in lookup_list: 65 | d["s3path"] = jsonl_s3path 66 | 67 | q = "INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES" 68 | log.info(f"Writing {len(lookup_list)} rows to DB") 69 | conn.execute(q, lookup_list) 70 | 71 | 72 | @metrics.timer("upload_measurement") 73 | def upload_to_s3(s3, bucket_name: str, tarf: PP, s3path: str) -> None: 74 | obj = s3.Object(bucket_name, s3path) 75 | log.info(f"Uploading {s3path}") 76 | obj.put(Body=tarf.read_bytes()) 77 | 78 | 79 | @metrics.timer("fill_postcan") 80 | def fill_postcan(hourdir: PP, postcanf: PP) -> List[PP]: 81 | msmt_files = sorted(f for f in hourdir.iterdir() if f.suffix == ".post") 82 | if not msmt_files: 83 | log.info(f"Nothing to fill {postcanf.name}") 84 | return [] 85 | log.info(f"Filling {postcanf.name}") 86 | measurements = [] 87 | postcan_byte_thresh = 20 * 1000 * 1000 88 | # Open postcan 89 | with tarfile.open(str(postcanf), "w:gz") as tar: 90 | for msmt_f in msmt_files: 91 | # Add a msmt and delete the msmt file 92 | metrics.incr("msmt_count") 93 | tar.add(str(msmt_f)) 94 | measurements.append(msmt_f) 95 | tarsize = postcanf.stat().st_size 96 | if tarsize > postcan_byte_thresh: 97 | log.info(f"Reached {tarsize} bytes") 98 | return measurements 99 | 100 | return measurements 101 | 102 | 103 | @metrics.timer("fill_jsonl") 104 | def fill_jsonl(measurements: List[PP], jsonlf: PP) -> List[Dict]: 105 | log.info(f"Filling {jsonlf.name}") 106 | # report_id, input, 2020092119_IT_tor.n0.0.jsonl.gz 107 | lookup_list = [] 108 | with gzip.open(jsonlf, "w") as jf: 109 | for linenum, msmt_f in enumerate(measurements): 110 | try: 111 | post = ujson.load(msmt_f.open()) 112 | except Exception: 113 | log.error("Unable to parse measurement") 114 | jf.write(b"{}\n") 115 | continue 116 | 117 | fmt = post.get("format", "").lower() 118 | msm = None 119 | if fmt == "json": 120 | msm = post.get("content", {}) 121 | elif fmt == "yaml": 122 | try: 123 | msm = yaml.load(msm, Loader=yaml.CLoader) 124 | except Exception: 125 | pass 126 | 127 | if msm is None: 128 | log.error("Unable to parse measurement") 129 | jf.write(b"{}\n") 130 | continue 131 | 132 | jf.write(ujson.dumps(msm).encode()) 133 | jf.write(b"\n") 134 | 135 | rid = msm.get("report_id") or "" 136 | input = msm.get("input") or "" 137 | msmt_uid = msmt_f.name[:-5] 138 | d = dict( 139 | report_id=rid, input=input, measurement_uid=msmt_uid, linenum=linenum 140 | ) 141 | lookup_list.append(d) 142 | 143 | return lookup_list 144 | 145 | 146 | def delete_msmt_posts(measurements: List[PP]) -> None: 147 | log.info(f"Deleting {len(measurements)} measurements") 148 | for msmt_f in measurements: 149 | msmt_f.unlink() 150 | 151 | 152 | @metrics.timer("total_run_time") 153 | def main(): 154 | conf = read_conf() 155 | bucket_name = conf.get("bucket_name") 156 | spooldir = Path(conf.get("msmt_spool_dir")) 157 | format_char = "n" 158 | collector_id = conf.get("collector_id") 159 | identity = f"{format_char}{collector_id}" 160 | log.info(f"Uploader {collector_id} starting") 161 | assert collector_id, "collector_id is not set" 162 | log.info(f"Using bucket {bucket_name} and spool {spooldir}") 163 | 164 | s3 = create_s3_client(conf) 165 | 166 | db_conn = connect_to_db(conf) 167 | 168 | # Scan spool directories, by age 169 | idir = spooldir / "incoming" 170 | threshold = datetime.utcnow() - timedelta(hours=1) 171 | for hourdir in sorted(idir.iterdir()): 172 | if not hourdir.is_dir() or hourdir.suffix == ".tmp": 173 | continue 174 | try: 175 | tstamp, cc, testname = hourdir.name.split("_") 176 | except Exception: 177 | continue 178 | if len(tstamp) != 10: 179 | continue 180 | hourdir_time = datetime.strptime(tstamp, "%Y%m%d%H") 181 | if hourdir_time > threshold: 182 | log.info(f"Stopping before {hourdir_time}") 183 | break 184 | 185 | log.info(f"Processing {hourdir}") 186 | # Split msmts across multiple postcans and jsonl files 187 | can_cnt = 0 188 | while True: 189 | # Compress raw POSTs into a tar.gz postcan 190 | postcanf = hourdir.with_suffix(f".{identity}.{can_cnt}.tar.gz") 191 | jsonlf = hourdir.with_suffix(f".{identity}.{can_cnt}.jsonl.gz") 192 | msmfiles = fill_postcan(hourdir, postcanf) 193 | if len(msmfiles) == 0: 194 | break 195 | # Also create jsonl file and delete msmt POSTs 196 | lookup_list = fill_jsonl(msmfiles, jsonlf) 197 | delete_msmt_posts(msmfiles) 198 | 199 | # Upload current postcan to S3 200 | postcan_s3path = ( 201 | f"raw/{tstamp[:8]}/{tstamp[8:10]}/{cc}/{testname}/{postcanf.name}" 202 | ) 203 | jsonl_s3path = ( 204 | f"raw/{tstamp[:8]}/{tstamp[8:10]}/{cc}/{testname}/{jsonlf.name}" 205 | ) 206 | if conf.get("run_mode", "") == "DESTROY_DATA": 207 | log.info("Testbed mode: Destroying postcans!") 208 | else: 209 | upload_to_s3(s3, bucket_name, postcanf, postcan_s3path) 210 | upload_to_s3(s3, bucket_name, jsonlf, jsonl_s3path) 211 | update_db_table(db_conn, lookup_list, jsonl_s3path) 212 | 213 | postcanf.unlink() 214 | jsonlf.unlink() 215 | 216 | can_cnt += 1 217 | metrics.incr("postcan_count") 218 | 219 | # Delete whole hourly directory 220 | for f in sorted(hourdir.iterdir()): 221 | f.unlink() 222 | hourdir.rmdir() 223 | 224 | log.info("Exiting") 225 | 226 | 227 | if __name__ == "__main__": 228 | main() 229 | -------------------------------------------------------------------------------- /newapi/ooniapi/README.adoc: -------------------------------------------------------------------------------- 1 | 2 | === New OONI API === 3 | 4 | Haproxy (local or remote) 5 | -> Nginx (provides TLS termination and proxy forwarding) 6 | -> Gunicorn 7 | -> API 8 | 9 | Deployed as ooni-api package 10 | Configured from /etc/ooni/api.conf 11 | -------------------------------------------------------------------------------- /newapi/ooniapi/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Open Observatory of Network Interference" 2 | __email__ = "contact@openobservatory.org" 3 | 4 | __license__ = "BSD 3 Clause" 5 | __version__ = "1.2.2" 6 | -------------------------------------------------------------------------------- /newapi/ooniapi/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from ooniapi.cli import cli 4 | 5 | if __name__ == "__main__": 6 | sys.exit(cli()) 7 | -------------------------------------------------------------------------------- /newapi/ooniapi/app.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import datetime 4 | import logging 5 | import os 6 | import re 7 | import sys 8 | from collections import deque 9 | 10 | from flask import Flask, json 11 | 12 | from flask_cors import CORS # debdeps: python3-flask-cors 13 | 14 | # debdeps: python3-geoip2 15 | import geoip2.database # type: ignore 16 | 17 | # python3-flask-cors has unnecessary dependencies :-/ 18 | from ooniapi.rate_limit_quotas import FlaskLimiter 19 | 20 | try: 21 | from systemd.journal import JournalHandler # debdeps: python3-systemd 22 | 23 | enable_journal = True 24 | except ImportError: # pragma: no cover 25 | enable_journal = False 26 | 27 | from flasgger import Swagger 28 | 29 | from decimal import Decimal 30 | from ooniapi.database import init_clickhouse_db 31 | 32 | APP_DIR = os.path.dirname(__file__) 33 | 34 | 35 | class FlaskJSONEncoder(json.JSONEncoder): 36 | # Special JSON encoder that handles dates 37 | def default(self, o): 38 | if isinstance(o, datetime.datetime): 39 | if o.tzinfo: 40 | # eg: '2015-09-25T23:14:42.588601+00:00' 41 | return o.isoformat("T") 42 | else: 43 | # No timezone present - assume UTC. 44 | # eg: '2015-09-25T23:14:42.588601Z' 45 | return o.isoformat("T") + "Z" 46 | 47 | if isinstance(o, datetime.date): 48 | return o.isoformat() 49 | 50 | if isinstance(o, Decimal): 51 | return float(o) 52 | 53 | if isinstance(o, set): 54 | return list(o) 55 | 56 | return json.JSONEncoder.default(self, o) 57 | 58 | 59 | def validate_conf(app, conffile): 60 | """Fail early if the app configuration looks incorrect""" 61 | # TODO: fallback to reasonable defaults as much as possible instead 62 | conf_keys = ( 63 | "ACCOUNT_ID_HASHING_KEY", 64 | "BASE_URL", 65 | "COLLECTORS", 66 | "COLLECTOR_ID", 67 | "DATABASE_STATEMENT_TIMEOUT", 68 | "CLICKHOUSE_URL", 69 | "GITHUB_ORIGIN_REPO", 70 | "GITHUB_PUSH_REPO", 71 | "GITHUB_TOKEN", 72 | "GITHUB_USER", 73 | "GITHUB_WORKDIR", 74 | "JWT_ENCRYPTION_KEY", 75 | "MAIL_PASSWORD", 76 | "MAIL_PORT", 77 | "MAIL_SERVER", 78 | "MAIL_SOURCE_ADDRESS", 79 | "MAIL_USERNAME", 80 | "MAIL_USE_SSL", 81 | "MSMT_SPOOL_DIR", 82 | "PSIPHON_CONFFILE", 83 | "S3_ACCESS_KEY_ID", 84 | "S3_BUCKET_NAME", 85 | "S3_ENDPOINT_URL", 86 | "S3_SECRET_ACCESS_KEY", 87 | "S3_SESSION_TOKEN", 88 | "TOR_TARGETS_CONFFILE", 89 | ) 90 | for k in conf_keys: 91 | if k not in app.config: 92 | app.logger.error(f"Missing configuration key {k} in {conffile}") 93 | # exit with 4 to terminate gunicorn 94 | sys.exit(4) 95 | 96 | 97 | def parse_cors_origins(app): 98 | out = [] 99 | for i in app.config["CORS_URLS"]: 100 | if i.startswith("^"): 101 | i = re.compile(i) 102 | out.append(i) 103 | app.config["CORS_URLS"] = out 104 | 105 | 106 | def setup_collectors_ring(config): 107 | """Create round-robin ring of collectors excluding localhost""" 108 | lh = config.get("HOSTNAME") 109 | if not lh: 110 | import socket 111 | 112 | lh = socket.getfqdn() 113 | 114 | colls = config["COLLECTORS"] 115 | c = deque(sorted(set(colls))) 116 | if lh in c: 117 | # rotated this way to distribute load evenly when n > 2 collectors 118 | # are in use 119 | while c[0] != lh: 120 | c.rotate() 121 | c.popleft() 122 | config["OTHER_COLLECTORS"] = c 123 | 124 | else: 125 | print(f"{lh} not found in collectors {colls}") 126 | config["OTHER_COLLECTORS"] = c 127 | 128 | print(f"Other collectors: {c}") 129 | 130 | 131 | def setup_logging(log): 132 | if enable_journal: 133 | root_logger = log.root 134 | h = JournalHandler(SYSLOG_IDENTIFIER="ooni-api") 135 | formatter = logging.Formatter("%(levelname)s %(message)s") 136 | h.setFormatter(formatter) 137 | root_logger.addHandler(h) 138 | root_logger.setLevel(logging.DEBUG) 139 | else: 140 | log.setLevel(logging.DEBUG) 141 | logging.basicConfig(format="%(message)s") 142 | 143 | 144 | def load_geoip_db(log, app): 145 | log.debug("Loading GeoIP DBs") 146 | ccfn = app.config.get("GEOIP_CC_DB") 147 | asnfn = app.config.get("GEOIP_ASN_DB") 148 | try: 149 | app.geoip_cc_reader = geoip2.database.Reader(ccfn) 150 | app.geoip_asn_reader = geoip2.database.Reader(asnfn) 151 | except Exception: 152 | log.error("Failed to load geoip DBs at", ccfn, asnfn, exc_info=True) 153 | 154 | 155 | def init_app(app, testmode=False): 156 | # Load configurations defaults from ooniapi/config.py 157 | # and then from the file pointed by CONF 158 | # (defaults to /etc/ooni/api.conf) 159 | log = logging.getLogger("ooni-api") 160 | conffile = os.getenv("CONF", "/etc/ooni/api.conf") 161 | setup_logging(log) 162 | 163 | log.info(f"Starting OONI API. Loading conf from {conffile}") 164 | app.config.from_object("ooniapi.config") 165 | app.config.from_pyfile(conffile) 166 | validate_conf(app, conffile) 167 | # parse_cors_origins(app) 168 | setup_collectors_ring(app.config) 169 | 170 | log.info("Configuration loaded") 171 | load_geoip_db(log, app) 172 | CORS(app) 173 | 174 | 175 | def create_app(*args, testmode=False, **kw): 176 | from ooniapi import views 177 | 178 | app = Flask(__name__) 179 | app.json_encoder = FlaskJSONEncoder 180 | log = app.logger 181 | 182 | # Order matters 183 | init_app(app, testmode=testmode) 184 | 185 | init_clickhouse_db(app) 186 | 187 | # Setup rate limiting 188 | limits = dict( 189 | ipaddr_per_month=60000, 190 | token_per_month=6000, 191 | ipaddr_per_week=20000, 192 | token_per_week=2000, 193 | ipaddr_per_day=4000, 194 | token_per_day=500, 195 | ) 196 | # Whitelist Prometheus and AMS Explorer 197 | # TODO: move addrs to an external config file /etc/ooniapi.conf ? 198 | whitelist = ["37.218.245.43", "37.218.242.149"] 199 | unmetered_pages = ["/", "/health", "/report*"] 200 | app.limiter = FlaskLimiter( 201 | limits=limits, 202 | app=app, 203 | whitelisted_ipaddrs=whitelist, 204 | unmetered_pages=unmetered_pages, 205 | ) 206 | 207 | Swagger(app, parse=True) 208 | 209 | # FIXME 210 | views.register(app) 211 | 212 | @app.route("/health") 213 | def health(): 214 | """Health check 215 | --- 216 | responses: 217 | '200': 218 | description: Status 219 | """ 220 | return "UP" 221 | # TODO: ping database? 222 | # option httpchk GET /check 223 | # http-check expect string success 224 | 225 | if False: 226 | log.debug("Routes:") 227 | for r in app.url_map.iter_rules(): 228 | log.debug(f" {r.match} ") 229 | log.debug("----") 230 | 231 | return app 232 | -------------------------------------------------------------------------------- /newapi/ooniapi/cli/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from flask import current_app 3 | from flask.cli import FlaskGroup, with_appcontext 4 | 5 | from sqlalchemy import exists, select 6 | 7 | from ooniapi.app import create_app 8 | 9 | cli = FlaskGroup(create_app=create_app) 10 | 11 | 12 | @cli.command() 13 | @with_appcontext 14 | def shell(): 15 | """Run a Python shell in the app context.""" 16 | 17 | try: 18 | import IPython 19 | except ImportError: 20 | IPython = None 21 | 22 | if IPython is not None: 23 | IPython.embed(banner1="", user_ns=current_app.make_shell_context()) 24 | else: 25 | import code 26 | 27 | code.interact(banner="", local=current_app.make_shell_context()) 28 | 29 | 30 | @cli.command() 31 | @with_appcontext 32 | def create_tables(): 33 | from ooniapi.database import Base 34 | from ooniapi import models 35 | 36 | Base.metadata.create_all(bind=current_app.db_engine) 37 | 38 | 39 | if __name__ == "__main__": 40 | cli() 41 | -------------------------------------------------------------------------------- /newapi/ooniapi/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | 8 | from flask import request 9 | 10 | import statsd # debdeps: python3-statsd 11 | 12 | BASE_DIR = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | CACHE_DEFAULT_TIMEOUT = None 15 | CACHE_CONFIG = {"CACHE_TYPE": "simple"} 16 | 17 | APP_ENV = os.environ.get("APP_ENV", "development") 18 | 19 | # As of 2017-07-18 635830 is the latest index in the database 20 | REPORT_INDEX_OFFSET = int(os.environ.get("REPORT_INDEX_OFFSET", "635830")) 21 | 22 | REQID_HDR = "X-Request-ID" 23 | 24 | metrics = statsd.StatsClient("localhost", 8125, prefix="ooni-api") 25 | 26 | 27 | def request_id(): 28 | if request: 29 | return request.headers.get(REQID_HDR) 30 | return None 31 | -------------------------------------------------------------------------------- /newapi/ooniapi/countries/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | CC to country name lookup table 3 | 4 | Regenerate the dict with: 5 | 6 | python3 ooniapi/countries/__init__.py 7 | 8 | """ 9 | 10 | _countries = { 11 | "AD": "Andorra", 12 | "AE": "United Arab Emirates", 13 | "AF": "Afghanistan", 14 | "AG": "Antigua & Barbuda", 15 | "AI": "Anguilla", 16 | "AL": "Albania", 17 | "AM": "Armenia", 18 | "AO": "Angola", 19 | "AQ": "Antarctica", 20 | "AR": "Argentina", 21 | "AS": "American Samoa", 22 | "AT": "Austria", 23 | "AU": "Australia", 24 | "AW": "Aruba", 25 | "AX": "Åland Islands", 26 | "AZ": "Azerbaijan", 27 | "BA": "Bosnia", 28 | "BB": "Barbados", 29 | "BD": "Bangladesh", 30 | "BE": "Belgium", 31 | "BF": "Burkina Faso", 32 | "BG": "Bulgaria", 33 | "BH": "Bahrain", 34 | "BI": "Burundi", 35 | "BJ": "Benin", 36 | "BL": "St. Barthélemy", 37 | "BM": "Bermuda", 38 | "BN": "Brunei", 39 | "BO": "Bolivia", 40 | "BQ": "Caribbean Netherlands", 41 | "BR": "Brazil", 42 | "BS": "Bahamas", 43 | "BT": "Bhutan", 44 | "BV": "Bouvet Island", 45 | "BW": "Botswana", 46 | "BY": "Belarus", 47 | "BZ": "Belize", 48 | "CA": "Canada", 49 | "CC": "Cocos (Keeling) Islands", 50 | "CD": "Congo - Kinshasa", 51 | "CF": "Central African Republic", 52 | "CG": "Congo - Brazzaville", 53 | "CH": "Switzerland", 54 | "CI": "Côte d’Ivoire", 55 | "CK": "Cook Islands", 56 | "CL": "Chile", 57 | "CM": "Cameroon", 58 | "CN": "China", 59 | "CO": "Colombia", 60 | "CR": "Costa Rica", 61 | "CU": "Cuba", 62 | "CV": "Cape Verde", 63 | "CW": "Curaçao", 64 | "CX": "Christmas Island", 65 | "CY": "Cyprus", 66 | "CZ": "Czechia", 67 | "DE": "Germany", 68 | "DJ": "Djibouti", 69 | "DK": "Denmark", 70 | "DM": "Dominica", 71 | "DO": "Dominican Republic", 72 | "DZ": "Algeria", 73 | "EC": "Ecuador", 74 | "EE": "Estonia", 75 | "EG": "Egypt", 76 | "EH": "Western Sahara", 77 | "ER": "Eritrea", 78 | "ES": "Spain", 79 | "ET": "Ethiopia", 80 | "FI": "Finland", 81 | "FJ": "Fiji", 82 | "FK": "Falkland Islands", 83 | "FM": "Micronesia", 84 | "FO": "Faroe Islands", 85 | "FR": "France", 86 | "GA": "Gabon", 87 | "GB": "United Kingdom", 88 | "GD": "Grenada", 89 | "GE": "Georgia", 90 | "GF": "French Guiana", 91 | "GG": "Guernsey", 92 | "GH": "Ghana", 93 | "GI": "Gibraltar", 94 | "GL": "Greenland", 95 | "GM": "Gambia", 96 | "GN": "Guinea", 97 | "GP": "Guadeloupe", 98 | "GQ": "Equatorial Guinea", 99 | "GR": "Greece", 100 | "GS": "South Georgia & South Sandwich Islands", 101 | "GT": "Guatemala", 102 | "GU": "Guam", 103 | "GW": "Guinea-Bissau", 104 | "GY": "Guyana", 105 | "HK": "Hong Kong", 106 | "HM": "Heard & McDonald Islands", 107 | "HN": "Honduras", 108 | "HR": "Croatia", 109 | "HT": "Haiti", 110 | "HU": "Hungary", 111 | "ID": "Indonesia", 112 | "IE": "Ireland", 113 | "IL": "Israel", 114 | "IM": "Isle of Man", 115 | "IN": "India", 116 | "IO": "British Indian Ocean Territory", 117 | "IQ": "Iraq", 118 | "IR": "Iran", 119 | "IS": "Iceland", 120 | "IT": "Italy", 121 | "JE": "Jersey", 122 | "JM": "Jamaica", 123 | "JO": "Jordan", 124 | "JP": "Japan", 125 | "KE": "Kenya", 126 | "KG": "Kyrgyzstan", 127 | "KH": "Cambodia", 128 | "KI": "Kiribati", 129 | "KM": "Comoros", 130 | "KN": "St. Kitts & Nevis", 131 | "KP": "North Korea", 132 | "KR": "South Korea", 133 | "KW": "Kuwait", 134 | "KY": "Cayman Islands", 135 | "KZ": "Kazakhstan", 136 | "LA": "Laos", 137 | "LB": "Lebanon", 138 | "LC": "St. Lucia", 139 | "LI": "Liechtenstein", 140 | "LK": "Sri Lanka", 141 | "LR": "Liberia", 142 | "LS": "Lesotho", 143 | "LT": "Lithuania", 144 | "LU": "Luxembourg", 145 | "LV": "Latvia", 146 | "LY": "Libya", 147 | "MA": "Morocco", 148 | "MC": "Monaco", 149 | "MD": "Moldova", 150 | "ME": "Montenegro", 151 | "MF": "St. Martin", 152 | "MG": "Madagascar", 153 | "MH": "Marshall Islands", 154 | "MK": "North Macedonia", 155 | "ML": "Mali", 156 | "MM": "Myanmar", 157 | "MN": "Mongolia", 158 | "MO": "Macao", 159 | "MP": "Northern Mariana Islands", 160 | "MQ": "Martinique", 161 | "MR": "Mauritania", 162 | "MS": "Montserrat", 163 | "MT": "Malta", 164 | "MU": "Mauritius", 165 | "MV": "Maldives", 166 | "MW": "Malawi", 167 | "MX": "Mexico", 168 | "MY": "Malaysia", 169 | "MZ": "Mozambique", 170 | "NA": "Namibia", 171 | "NC": "New Caledonia", 172 | "NE": "Niger", 173 | "NF": "Norfolk Island", 174 | "NG": "Nigeria", 175 | "NI": "Nicaragua", 176 | "NL": "Netherlands", 177 | "NO": "Norway", 178 | "NP": "Nepal", 179 | "NR": "Nauru", 180 | "NU": "Niue", 181 | "NZ": "New Zealand", 182 | "OM": "Oman", 183 | "PA": "Panama", 184 | "PE": "Peru", 185 | "PF": "French Polynesia", 186 | "PG": "Papua New Guinea", 187 | "PH": "Philippines", 188 | "PK": "Pakistan", 189 | "PL": "Poland", 190 | "PM": "St. Pierre & Miquelon", 191 | "PN": "Pitcairn Islands", 192 | "PR": "Puerto Rico", 193 | "PS": "Palestine", 194 | "PT": "Portugal", 195 | "PW": "Palau", 196 | "PY": "Paraguay", 197 | "QA": "Qatar", 198 | "RE": "Réunion", 199 | "RO": "Romania", 200 | "RS": "Serbia", 201 | "RU": "Russia", 202 | "RW": "Rwanda", 203 | "SA": "Saudi Arabia", 204 | "SB": "Solomon Islands", 205 | "SC": "Seychelles", 206 | "SD": "Sudan", 207 | "SE": "Sweden", 208 | "SG": "Singapore", 209 | "SH": "St. Helena", 210 | "SI": "Slovenia", 211 | "SJ": "Svalbard & Jan Mayen", 212 | "SK": "Slovakia", 213 | "SL": "Sierra Leone", 214 | "SM": "San Marino", 215 | "SN": "Senegal", 216 | "SO": "Somalia", 217 | "SR": "Suriname", 218 | "SS": "South Sudan", 219 | "ST": "São Tomé & Príncipe", 220 | "SV": "El Salvador", 221 | "SX": "Sint Maarten", 222 | "SY": "Syria", 223 | "SZ": "Eswatini", 224 | "TC": "Turks & Caicos Islands", 225 | "TD": "Chad", 226 | "TF": "French Southern Territories", 227 | "TG": "Togo", 228 | "TH": "Thailand", 229 | "TJ": "Tajikistan", 230 | "TK": "Tokelau", 231 | "TL": "Timor-Leste", 232 | "TM": "Turkmenistan", 233 | "TN": "Tunisia", 234 | "TO": "Tonga", 235 | "TR": "Turkey", 236 | "TT": "Trinidad & Tobago", 237 | "TV": "Tuvalu", 238 | "TW": "Taiwan", 239 | "TZ": "Tanzania", 240 | "UA": "Ukraine", 241 | "UG": "Uganda", 242 | "UM": "U.S. Outlying Islands", 243 | "US": "United States", 244 | "UY": "Uruguay", 245 | "UZ": "Uzbekistan", 246 | "VA": "Vatican City", 247 | "VC": "St. Vincent & Grenadines", 248 | "VE": "Venezuela", 249 | "VG": "British Virgin Islands", 250 | "VI": "U.S. Virgin Islands", 251 | "VN": "Vietnam", 252 | "VU": "Vanuatu", 253 | "WF": "Wallis & Futuna", 254 | "WS": "Samoa", 255 | "YE": "Yemen", 256 | "YT": "Mayotte", 257 | "ZA": "South Africa", 258 | "ZM": "Zambia", 259 | "ZW": "Zimbabwe", 260 | } 261 | 262 | 263 | def lookup_country(probe_cc: str) -> str: 264 | """Translate 2-char country code into country name""" 265 | return _countries[probe_cc.upper()] 266 | 267 | 268 | if __name__ == "__main__": 269 | import json 270 | 271 | with open("ooniapi/countries/country-list.json") as f: 272 | d = {e["iso3166_alpha2"].upper(): e["name"] for e in json.load(f)} 273 | print(dict(sorted(d.items()))) 274 | -------------------------------------------------------------------------------- /newapi/ooniapi/database.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from hashlib import shake_128 3 | from typing import Optional, List, Dict, Union 4 | import os 5 | 6 | from flask import current_app 7 | 8 | from sqlalchemy.dialects import postgresql 9 | from sqlalchemy.ext.declarative import declarative_base 10 | from sqlalchemy.sql.elements import TextClause 11 | from sqlalchemy.sql.selectable import Select 12 | 13 | # debdeps: python3-clickhouse-driver 14 | from clickhouse_driver import Client as Clickhouse 15 | import clickhouse_driver.errors 16 | 17 | # query_time = Summary("query", "query", ["hash", ], registry=metrics.registry) 18 | Base = declarative_base() 19 | 20 | log = logging.getLogger() 21 | 22 | 23 | def _gen_application_name(): # pragma: no cover 24 | try: 25 | machine_id = "/etc/machine-id" 26 | with open(machine_id) as fd: 27 | mid = fd.read(8) 28 | 29 | except FileNotFoundError: 30 | mid = "macos" 31 | 32 | pid = os.getpid() 33 | return f"api-{mid}-{pid}" 34 | 35 | 36 | def query_hash(q: str) -> str: 37 | """Short hash used to identify query statements. 38 | Allows correlating query statements between API logs and metrics 39 | """ 40 | return shake_128(q.encode()).hexdigest(4) 41 | 42 | 43 | # # Clickhouse 44 | 45 | 46 | def init_clickhouse_db(app) -> None: 47 | """Initializes Clickhouse session""" 48 | url = app.config["CLICKHOUSE_URL"] 49 | app.logger.info("Connecting to Clickhouse") 50 | app.click = Clickhouse.from_url(url) 51 | 52 | 53 | Query = Union[str, TextClause, Select] 54 | 55 | 56 | def _run_query(query: Query, query_params: dict, query_prio=3): 57 | settings = {"priority": query_prio, "max_execution_time": 28} 58 | if isinstance(query, (Select, TextClause)): 59 | query = str(query.compile(dialect=postgresql.dialect())) 60 | try: 61 | q = current_app.click.execute( 62 | query, query_params, with_column_types=True, settings=settings 63 | ) 64 | except clickhouse_driver.errors.ServerException as e: 65 | log.info(e.message) 66 | raise Exception("Database query error") 67 | 68 | rows, coldata = q 69 | colnames, coltypes = tuple(zip(*coldata)) 70 | return colnames, rows 71 | 72 | 73 | def query_click(query: Query, query_params: dict, query_prio=3) -> List[Dict]: 74 | colnames, rows = _run_query(query, query_params, query_prio=query_prio) 75 | return [dict(zip(colnames, row)) for row in rows] 76 | 77 | 78 | def query_click_one_row( 79 | query: Query, query_params: dict, query_prio=3 80 | ) -> Optional[dict]: 81 | colnames, rows = _run_query(query, query_params, query_prio=query_prio) 82 | for row in rows: 83 | return dict(zip(colnames, row)) 84 | 85 | return None 86 | 87 | 88 | def insert_click(query, rows: list) -> int: 89 | assert isinstance(rows, list) 90 | settings = {"priority": 1, "max_execution_time": 300} # query_prio 91 | return current_app.click.execute(query, rows, types_check=True, settings=settings) 92 | -------------------------------------------------------------------------------- /newapi/ooniapi/markdown/api_docs.md: -------------------------------------------------------------------------------- 1 | # OONI Measurements API 2 | 3 | This is the documentation for version 1 of the OONI measurements API. 4 | 5 | All the API endpoints start with the URL `/api/v1/`. 6 | 7 | # Pagination 8 | 9 | Some API endpoints support pagination. In these cases the response will have 10 | the following structure: 11 | 12 | ``` 13 | { 14 | "metadata": { 15 | "offset": "an integer specifying the current offset into the data", 16 | "limit": "an integer specifying how many results should be presented", 17 | "count": "an integer expressing the total number of items", 18 | "pages": "the number of pages, or the number of requests you will" 19 | "have to do with the current value of limit to obtain the" 20 | "full set of records", 21 | "next_url": "the url to be used to fetch the next set of items" 22 | }, 23 | "results": [ 24 | "a list containing generally dictionaries of the result in question" 25 | ] 26 | } 27 | ``` 28 | 29 | ## Search files 30 | 31 | Returns a listing of the files matching the given search criteria. 32 | 33 | This API endpoints supports pagination and will by default return 100 34 | results per response. 35 | 36 | ### Request 37 | 38 | **URL** 39 | 40 | /api/v1/files 41 | 42 | **Method** 43 | 44 | `GET` 45 | 46 | **URL Params** 47 | 48 | `probe_cc=[string]` - the two letter country code. 49 | 50 | `probe_asn=[string]` - the 51 | [Autonomous system](https://en.wikipedia.org/wiki/Autonomous_system_(Internet)) 52 | number in the format "ASXXX" 53 | 54 | `test_name=[string]` - the name of the test 55 | 56 | `since=[string]` - the start date of when measurements were run (ex. 57 | "2016-10-20T10:30:00") 58 | 59 | `until=[string]` - the end date of when measurement were run (ex. 60 | "2016-10-20T10:30:00") 61 | 62 | `since_index=[integer]` - return results only strictly greater than the 63 | provided index. 64 | 65 | `order_by=[string]` - by which key the results should be ordered by (default: test_start_time) 66 | 67 | `order=[string] ("desc", "asc")` - if the order should be ascending or descending. 68 | 69 | `offset=[integer]` - offset into the result set (default: 0) 70 | 71 | `limit=[integer]` - number of records to return (default: 100) 72 | 73 | **Data Params** 74 | 75 | None 76 | 77 | ### Response 78 | 79 | #### Success 80 | 81 | **Code:** 200
82 | **Content:** 83 | 84 | ``` 85 | { 86 | "metadata": { 87 | "count": "[integer] total number of rows", 88 | "limit": "[integer] current limit to returned results", 89 | "next_url": "[string] URL pointing to next page of results or none if no more pages are available", 90 | "offset": "[integer] the current offset into the result set", 91 | "pages": "[integer] total number of pages" 92 | "current_page": "[integer] current page" 93 | }, 94 | "results": [ 95 | { 96 | "probe_asn": "[string] the Autonomous system number of the result", 97 | "probe_cc": "[string] the country code of the result", 98 | "test_name": "[string] the name of the test that was run", 99 | "index": "[integer] the index of this result (useful when using since_index)", 100 | "test_start_time": "[string] start time for the measurement is ISO 8601 format", 101 | "download_url": "[string] url to the download. Note: if the download URL ends with '.gz' it should be considered compressed with gzip." 102 | } 103 | ] 104 | } 105 | ``` 106 | 107 | #### Error 108 | 109 | **Code:** 400 BAD REQUEST
110 | **Content:** 111 | 112 | ``` 113 | { 114 | "error_code": 400, 115 | "error_message": "Some error message" 116 | } 117 | ``` 118 | 119 | ## Search measurements 120 | 121 | Returns the IDs for the measurements that match the specified search 122 | criteria. 123 | 124 | ### Request 125 | 126 | **URL** 127 | 128 | /api/v1/measurements 129 | 130 | **Method** 131 | 132 | `GET` 133 | 134 | **URL Params** 135 | 136 | `report_id=[string]` - the report ID of the requested measurement 137 | 138 | `input=[string]` - the input for the requested measurement 139 | 140 | `probe_cc=[string]` - the two letter country code. 141 | 142 | `probe_asn=[string]` - the 143 | [Autonomous system](https://en.wikipedia.org/wiki/Autonomous_system_(Internet)) 144 | number in the format "ASXXX" 145 | 146 | `test_name=[string]` - the name of the test 147 | 148 | `since=[string]` - the start date of when measurements were run (ex. 149 | "2016-10-20T10:30:00") 150 | 151 | `until=[string]` - the end date of when measurement were run (ex. 152 | "2016-10-20T10:30:00") 153 | 154 | `order_by=[string]` - by which key the results should be ordered by (default: test_start_time) 155 | 156 | `order=[string] ("desc", "asc")` - if the order should be ascending or descending. 157 | 158 | `offset=[integer]` - offset into the result set (default: 0) 159 | 160 | `limit=[integer]` - number of records to return (default: 100) 161 | 162 | **Data Params** 163 | 164 | None 165 | 166 | ### Response 167 | 168 | #### Success 169 | 170 | **Code:** 200
171 | **Content:** 172 | 173 | ``` 174 | { 175 | "metadata": { 176 | "count": "[integer] total number of rows", 177 | "limit": "[integer] current limit to returned results", 178 | "next_url": "[string] URL pointing to next page of results or none if no more pages are available", 179 | "offset": "[integer] the current offset into the result set", 180 | "pages": "[integer] total number of pages" 181 | "current_page": "[integer] current page" 182 | }, 183 | "results": [ 184 | { 185 | "measurement_id": "[string] the ID of the measurement returned", 186 | "measurement_url": "[string] link to fetch the measurement (probably in the form of $BASEURL/api/v1/measurement/)" 187 | } 188 | ] 189 | } 190 | ``` 191 | 192 | #### Error 193 | 194 | **Code:** 400 BAD REQUEST
195 | **Content:** 196 | 197 | ``` 198 | { 199 | "error_code": 400, 200 | "error_message": "Some error message" 201 | } 202 | ``` 203 | 204 | 205 | ## Fetch measurement 206 | 207 | Returns the specified measurement. 208 | 209 | ### Request 210 | 211 | **URL** 212 | 213 | `/api/v1/measurement/` 214 | 215 | **Method** 216 | 217 | `GET` 218 | 219 | ### Response 220 | 221 | #### Success 222 | 223 | **Code:** 200
224 | **Content:** 225 | 226 | ``` 227 | { 228 | "id": "XXXX", 229 | "data": { 230 | "probe_cc": "XX", 231 | "probe_asn": "XX", 232 | ... 233 | "test_keys": {}, 234 | } 235 | } 236 | ``` 237 | 238 | #### Error 239 | 240 | **Code:** 400 BAD REQUEST
241 | **Content:** 242 | 243 | ``` 244 | { 245 | "error_code": 400, 246 | "error_message": "Some error message" 247 | } 248 | ``` 249 | -------------------------------------------------------------------------------- /newapi/ooniapi/models.py: -------------------------------------------------------------------------------- 1 | # Publised to Explorer in private.py 2 | # Also used in measurement.py 3 | 4 | TEST_GROUPS = { 5 | "websites": ["web_connectivity"], 6 | "im": ["facebook_messenger", "signal", "telegram", "whatsapp"], 7 | "middlebox": ["http_invalid_request_line", "http_header_field_manipulation"], 8 | "performance": ["ndt", "dash"], 9 | "circumvention": [ 10 | "bridge_reachability", 11 | "meek_fronted_requests_test", 12 | "vanilla_tor", 13 | "tcp_connect", 14 | "psiphon", 15 | "tor", 16 | "torsf", 17 | "riseupvpn", 18 | ], 19 | "legacy": [ 20 | "http_requests", 21 | "dns_consistency", 22 | "http_host", 23 | "multi_protocol_traceroute", 24 | ], 25 | "experimental": [ 26 | "urlgetter", 27 | "dnscheck", 28 | "stunreachability", 29 | ], 30 | } 31 | 32 | # Used in ooniapi/measurements.py for validation 33 | TEST_NAMES = [] 34 | for v in TEST_GROUPS.values(): 35 | assert isinstance(v, list) 36 | TEST_NAMES.extend(v) 37 | -------------------------------------------------------------------------------- /newapi/ooniapi/pages/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | OONI API - various pages e.g. 3 | / 4 | /files 5 | 6 | Redirects: 7 | /stats 8 | /files 9 | /files/by_date 10 | """ 11 | 12 | 13 | import re 14 | from datetime import timedelta, datetime 15 | 16 | from flask import Blueprint, render_template, redirect, send_file, make_response 17 | 18 | from werkzeug.exceptions import BadRequest, NotFound 19 | from werkzeug.wrappers import Response # why not flask.Response? 20 | 21 | # Exporting it 22 | from .docs import api_docs_blueprint 23 | 24 | pages_blueprint = Blueprint( 25 | "pages", "measurements", static_folder="static", static_url_path="/static/" 26 | ) 27 | 28 | 29 | DAY_REGEXP = re.compile(r"^\d{4}\-[0-1]\d\-[0-3]\d$") 30 | 31 | 32 | @pages_blueprint.route("/") 33 | def index(): 34 | """Landing page 35 | --- 36 | responses: 37 | '200': 38 | description: TODO 39 | """ 40 | return render_template("index.html") 41 | 42 | 43 | @pages_blueprint.route("/stats") 44 | def stats() -> Response: 45 | """TODO 46 | --- 47 | responses: 48 | '200': 49 | description: TODO 50 | """ 51 | return redirect("https://explorer.ooni.org", 301) 52 | 53 | 54 | @pages_blueprint.route("/files") 55 | def files_index() -> Response: 56 | """TODO 57 | --- 58 | responses: 59 | '200': 60 | description: TODO 61 | """ 62 | return redirect("https://explorer.ooni.org/search", 301) 63 | 64 | 65 | @pages_blueprint.route("/files/by_date") 66 | def files_by_date() -> Response: 67 | """TODO 68 | --- 69 | responses: 70 | '200': 71 | description: TODO 72 | """ 73 | return redirect("https://explorer.ooni.org/search", 301) 74 | 75 | 76 | @pages_blueprint.route("/files/by_date/") 77 | def files_on_date(date) -> Response: 78 | """TODO 79 | --- 80 | responses: 81 | '200': 82 | description: TODO 83 | """ 84 | if not DAY_REGEXP.match(date): 85 | raise BadRequest("Invalid date format") 86 | 87 | since = date 88 | until = (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)).strftime( 89 | "%Y-%m-%d" 90 | ) 91 | return redirect( 92 | "https://explorer.ooni.org/search?until={}&since={}".format(until, since), 301 93 | ) 94 | 95 | 96 | @pages_blueprint.route("/files/by_country") 97 | def files_by_country() -> Response: 98 | """TODO 99 | --- 100 | responses: 101 | '200': 102 | description: TODO 103 | """ 104 | return redirect("https://explorer.ooni.org/search", 301) 105 | 106 | 107 | @pages_blueprint.route("/files/by_country/") 108 | def files_in_country(country_code) -> Response: 109 | """TODO 110 | --- 111 | responses: 112 | '200': 113 | description: TODO 114 | """ 115 | if len(country_code) != 2: 116 | raise BadRequest("Country code must be two characters") 117 | country_code = country_code.upper() 118 | return redirect( 119 | "https://explorer.ooni.org/search?probe_cc={}".format(country_code), 301 120 | ) 121 | 122 | 123 | @pages_blueprint.route("/robots.txt") 124 | def robots_txt() -> Response: 125 | """Robots.txt 126 | --- 127 | responses: 128 | '200': 129 | description: robots.txt content 130 | """ 131 | txt = """ 132 | User-agent: * 133 | Disallow: /api/_ 134 | Disallow: /api/v1/aggregation 135 | Disallow: /api/v1/measurement_meta 136 | Disallow: /api/v1/raw_measurement 137 | Disallow: /api/v1/test-list/urls 138 | Disallow: /api/v1/torsf_stats 139 | Disallow: /files 140 | Disallow: /stats 141 | Disallow: /201 142 | Disallow: /202 143 | Crawl-delay: 300 144 | """ 145 | resp = make_response(txt) 146 | resp.headers["Content-type"] = "text/plain" 147 | resp.cache_control.max_age = 86400 148 | return resp 149 | 150 | 151 | # These two are needed to avoid breaking older URLs 152 | @pages_blueprint.route("//") 153 | def backward_compatible_download(date, report_file) -> Response: 154 | """Legacy entry point 155 | --- 156 | responses: 157 | '200': 158 | description: TODO 159 | """ 160 | if DAY_REGEXP.match(date) and report_file.endswith(".json"): 161 | # XXX maybe do some extra validation on report_file 162 | return redirect("/files/download/%s" % report_file) 163 | raise NotFound 164 | 165 | 166 | @pages_blueprint.route("/") 167 | def backward_compatible_by_date(date) -> Response: 168 | """TODO 169 | --- 170 | responses: 171 | '200': 172 | description: TODO 173 | """ 174 | if DAY_REGEXP.match(date): 175 | since = date 176 | until = (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)).strftime( 177 | "%Y-%m-%d" 178 | ) 179 | return redirect( 180 | "https://explorer.ooni.org/search?until={}&since={}".format(until, since), 181 | 301, 182 | ) 183 | raise NotFound 184 | -------------------------------------------------------------------------------- /newapi/ooniapi/pages/docs.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, render_template 2 | 3 | # prefix: /api 4 | api_docs_blueprint = Blueprint("api_docs", "measurements") 5 | 6 | 7 | @api_docs_blueprint.route("/", methods=["GET"]) 8 | def api_docs(): 9 | return render_template("api.html") 10 | -------------------------------------------------------------------------------- /newapi/ooniapi/rate_limit_quotas.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rate limiter and quota system. 3 | 4 | Framework-independent rate limiting mechanism that provides: 5 | * IP address and token-based accounting 6 | * customizable quotas based on IP address and token 7 | * late limiting based on resource usage (time spent on API calls) 8 | * bucketing based on day, week, month 9 | * statistics 10 | * metrics 11 | * fast in-memory storage 12 | 13 | Also provides a connector for Flask 14 | 15 | """ 16 | 17 | # TODO: add token-based limiting 18 | 19 | import ipaddress 20 | import struct 21 | import time 22 | from typing import Dict, List, Optional, Tuple, Union, Set 23 | from sys import maxsize 24 | 25 | import lmdb # debdeps: python3-lmdb 26 | 27 | from ooniapi.config import metrics 28 | 29 | LMDB_DIR = "/var/lib/ooniapi/lmdb" 30 | 31 | IpAddress = Union[ipaddress.IPv4Address, ipaddress.IPv6Address] 32 | IpAddrBucket = Dict[IpAddress, float] 33 | IpAddrBuckets = Tuple[IpAddrBucket, IpAddrBucket, IpAddrBucket] 34 | TokenBucket = Dict[str, float] 35 | TokenBuckets = Tuple[TokenBucket, TokenBucket, TokenBucket] 36 | StrBytes = Union[bytes, str] 37 | 38 | 39 | def lm_ipa_to_b(ipaddr: IpAddress) -> bytes: 40 | return ipaddr.packed 41 | 42 | def lm_sec_to_b(v: Union[float, int]) -> bytes: 43 | return struct.pack("I", int(v * 1000)) 44 | 45 | def lm_b_to_sec(raw: bytes) -> float: 46 | return struct.unpack("I", raw)[0] / 1000.0 47 | 48 | def lm_b_to_str_ipa(raw_ipa: bytes) -> str: 49 | if len(raw_ipa) == 4: 50 | return str(ipaddress.IPv4Address(raw_ipa)) 51 | return str(ipaddress.IPv6Address(raw_ipa)) 52 | 53 | 54 | class LMDB: 55 | def __init__(self, dbnames: tuple): 56 | self._env = lmdb.open(LMDB_DIR, metasync=False, max_dbs=10) 57 | dbnames2 = list(dbnames) 58 | dbnames2.append("meta") 59 | self._dbnames = dbnames2 60 | self._dbs: Dict[str, lmdb._Database] = {} 61 | for dbname in dbnames2: 62 | self._dbs[dbname] = self._env.open_db(dbname.encode()) 63 | 64 | def purge_databases(self): 65 | """Used for testing""" 66 | for dbname in self._dbnames: 67 | self._dbs[dbname] = self._env.open_db(dbname.encode()) 68 | with self._env.begin(db=self._dbs[dbname], write=True) as txn: 69 | txn.drop(self._dbs[dbname], delete=False) 70 | 71 | def consume_quota(self, dbname: str, ipa: bytes, used_s: float, limit_s: int): 72 | db = self._dbs[dbname] 73 | with self._env.begin(db=db, write=True) as txn: 74 | raw_val = txn.get(ipa) 75 | if raw_val is not None: 76 | v = lm_b_to_sec(raw_val) 77 | else: 78 | v = float(limit_s) 79 | 80 | v -= used_s 81 | if v < 0.0: 82 | v = 0.0 83 | txn.put(ipa, lm_sec_to_b(v)) 84 | 85 | return v 86 | 87 | def write_tnx(self, dbname="", db=None): 88 | if dbname: 89 | db = self._dbs[dbname] 90 | return self._env.begin(db=db, write=True) 91 | 92 | 93 | class Limiter: 94 | def __init__( 95 | self, 96 | limits: dict, 97 | token_check_callback=None, 98 | ipaddr_methods=["X-Real-Ip", "socket"], 99 | whitelisted_ipaddrs=Optional[List[str]], 100 | unmetered_pages=Optional[List[str]], 101 | ): 102 | # Bucket sequence: month, week, day 103 | labels = ("ipaddr_per_month", "ipaddr_per_week", "ipaddr_per_day") 104 | self._hours = [30 * 24, 7 * 24, 1 * 24] 105 | self._labels = labels 106 | self._ipaddr_limits = [limits.get(x, None) for x in labels] 107 | self._token_limits = [limits.get(x, None) for x in labels] 108 | self._lmdb = LMDB(dbnames=labels) 109 | self._token_buckets = ({}, {}, {}) # type: TokenBuckets 110 | self._token_check_callback = token_check_callback 111 | self._ipaddr_extraction_methods = ipaddr_methods 112 | self._last_quota_update_time = time.monotonic() 113 | self._whitelisted_ipaddrs: Set[IpAddress] = set() 114 | self._unmetered_pages_globs: Set[IpAddress] = set() 115 | self._unmetered_pages: Set[IpAddress] = set() 116 | for p in unmetered_pages: 117 | if p.endswith("*"): 118 | self._unmetered_pages_globs.add(p.rstrip("*")) 119 | else: 120 | self._unmetered_pages.add(p) 121 | for ipa in whitelisted_ipaddrs or []: 122 | self._whitelisted_ipaddrs.add(ipaddress.ip_address(ipa)) 123 | 124 | self.increment_quota_counters(1.0) 125 | self.increment_quota_counters_if_needed() 126 | 127 | def increment_quota_counters(self, tdelta_s: float): 128 | """Increment quota counters for every tracked ipaddr. When they exceed 129 | the default value simply delete the key""" 130 | if tdelta_s <= 0: 131 | return 132 | 133 | iterable = zip(self._hours, self._ipaddr_limits, self.ipaddr_buckets) 134 | for hours, limit, db in iterable: 135 | # limit, vdelta are in seconds 136 | vdelta_s = tdelta_s * limit / hours / 3600 137 | with self._lmdb._env.begin(db=db, write=True) as txn: 138 | i = txn.cursor().iternext() 139 | for raw_ipa, raw_val in i: 140 | v = lm_b_to_sec(raw_val) + vdelta_s 141 | if v >= limit: 142 | txn.pop(raw_ipa) # drop from DB: go back to default 143 | else: 144 | txn.put(raw_ipa, lm_sec_to_b(v)) 145 | 146 | def increment_quota_counters_if_needed(self): 147 | t = time.monotonic() 148 | tdelta_s = t - self._last_quota_update_time 149 | if tdelta_s > 3600: 150 | self.increment_quota_counters(tdelta_s) 151 | self._last_quota_update_time = t 152 | 153 | def consume_quota( 154 | self, elapsed_s: float, ipaddr: Optional[IpAddress] = None, token=None 155 | ) -> float: 156 | """Consume quota in seconds. Return the lowest remaining value in 157 | seconds""" 158 | assert ipaddr or token 159 | if not ipaddr: 160 | raise NotImplementedError() 161 | 162 | remaining: float = maxsize 163 | z = zip(self._ipaddr_limits, self._labels) 164 | for limit_s, dbname in z: 165 | ipa = lm_ipa_to_b(ipaddr) 166 | v = self._lmdb.consume_quota(dbname, ipa, elapsed_s, limit_s) 167 | if v < remaining: 168 | remaining = v 169 | 170 | return remaining 171 | 172 | def is_quota_available(self, ipaddr=None, token=None) -> bool: 173 | """Checks if all quota buckets for an ipaddr/token are > 0""" 174 | for db in self.ipaddr_buckets: 175 | with self._lmdb._env.begin(db=db, write=False) as txn: 176 | raw_val = txn.get(lm_ipa_to_b(ipaddr)) 177 | if raw_val is None: 178 | continue 179 | if lm_b_to_sec(raw_val) <= 0: 180 | return False 181 | 182 | return True 183 | 184 | def is_ipaddr_whitelisted(self, ipaddr: IpAddress) -> bool: 185 | return ipaddr in self._whitelisted_ipaddrs 186 | 187 | def is_page_unmetered(self, path) -> bool: 188 | if path in self._unmetered_pages: 189 | return True 190 | for u in self._unmetered_pages_globs: 191 | if path.startswith(u): 192 | return True 193 | 194 | return False 195 | 196 | def get_lowest_daily_quotas_summary(self, n=20) -> List[Tuple[str, float]]: 197 | """Returns a summary of daily quotas with the lowest values""" 198 | db = self._lmdb._dbs["ipaddr_per_day"] 199 | tmp = [] 200 | with self._lmdb._env.begin(db=db, write=False) as txn: 201 | i = txn.cursor().iternext() 202 | for raw_ipa, raw_val in i: 203 | val = lm_b_to_sec(raw_val) 204 | ipa = lm_b_to_str_ipa(raw_ipa) 205 | tmp.append((val, ipa)) 206 | 207 | tmp.sort() 208 | tmp = tmp[:n] 209 | return [(ipa, val) for val, ipa in tmp] 210 | 211 | @property 212 | def ipaddr_buckets(self): 213 | return [self._lmdb._dbs[lab] for lab in self._labels] 214 | 215 | 216 | # # Flask-specific code # # 217 | 218 | from flask import request, current_app 219 | import flask 220 | 221 | 222 | class FlaskLimiter: 223 | def _get_client_ipaddr(self) -> IpAddress: 224 | # https://github.com/alisaifee/flask-limiter/issues/41 225 | for m in self._limiter._ipaddr_extraction_methods: 226 | if m == "X-Forwarded-For": 227 | raise NotImplementedError("X-Forwarded-For ") 228 | 229 | elif m == "X-Real-Ip": 230 | ipaddr = request.headers.get("X-Real-Ip", None) 231 | if ipaddr: 232 | return ipaddress.ip_address(ipaddr) 233 | 234 | elif m == "socket": 235 | ipaddr = request.remote_addr 236 | if ipaddr: 237 | return ipaddress.ip_address(ipaddr) 238 | 239 | else: 240 | raise NotImplementedError(f"IP address method {m} is unknown") 241 | 242 | methods = ",".join(self._limiter._ipaddr_extraction_methods) 243 | raise Exception(f"Unable to detect IP address using {methods}") 244 | 245 | def _check_limits_callback(self): 246 | """Check rate limits before processing a request 247 | Refresh quota counters when needed 248 | """ 249 | if self._disabled: # used in integration tests 250 | return 251 | 252 | metrics.incr("busy_workers_count") 253 | self._request_start_time = time.monotonic() 254 | 255 | ipaddr = self._get_client_ipaddr() 256 | if self._limiter.is_ipaddr_whitelisted(ipaddr): 257 | return 258 | 259 | if self._limiter.is_page_unmetered(request.path): 260 | return 261 | 262 | self._limiter.increment_quota_counters_if_needed() 263 | # token = request.headers.get("Token", None) 264 | # if token: 265 | # check token validity 266 | if not self._limiter.is_quota_available(ipaddr=ipaddr): 267 | return "429 error", 429 268 | 269 | def _after_request_callback(self, response): 270 | """Consumes quota and injects HTTP headers when responding to a request 271 | """ 272 | if self._disabled: # used in integration tests 273 | return response 274 | 275 | log = current_app.logger 276 | try: 277 | tdelta = time.monotonic() - self._request_start_time 278 | # TODO: implement API call timing metrics 279 | # timer_path = request.path.split("?", 1)[0] 280 | # timer_path = "apicall_" + timer_path.replace("/", "__") 281 | # metrics.timing(timer_path, int(tdelta * 1000)) # ms 282 | 283 | ipaddr = self._get_client_ipaddr() 284 | if self._limiter.is_ipaddr_whitelisted(ipaddr): 285 | return response 286 | 287 | if self._limiter.is_page_unmetered(request.path): 288 | return 289 | 290 | remaining = self._limiter.consume_quota(tdelta, ipaddr=ipaddr) 291 | response.headers.add("X-RateLimit-Remaining", int(remaining)) 292 | metrics.decr("busy_workers_count") 293 | 294 | except Exception as e: 295 | log.error(str(e), exc_info=True) 296 | 297 | finally: 298 | return response 299 | 300 | def __init__( 301 | self, 302 | app, 303 | limits: dict, 304 | token_check_callback=None, 305 | ipaddr_methods=["X-Real-Ip", "socket"], 306 | whitelisted_ipaddrs=None, 307 | unmetered_pages=None, 308 | ): 309 | """""" 310 | self._limiter = Limiter( 311 | limits, 312 | token_check_callback=token_check_callback, 313 | ipaddr_methods=ipaddr_methods, 314 | whitelisted_ipaddrs=whitelisted_ipaddrs, 315 | unmetered_pages=unmetered_pages, 316 | ) 317 | if app.extensions.get("limiter"): 318 | raise Exception("The Flask app already has an extension named 'limiter'") 319 | 320 | app.before_request(self._check_limits_callback) 321 | app.after_request(self._after_request_callback) 322 | app.extensions["limiter"] = self 323 | self._disabled = False 324 | 325 | def get_lowest_daily_quotas_summary(self, n=20) -> List[Tuple[str, float]]: 326 | return self._limiter.get_lowest_daily_quotas_summary(n) 327 | -------------------------------------------------------------------------------- /newapi/ooniapi/static/images/API-Horizontal-MonochromeInverted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/ooniapi/static/images/API-Horizontal-MonochromeInverted.png -------------------------------------------------------------------------------- /newapi/ooniapi/static/images/API-Horizontal-MonochromeInverted@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/ooniapi/static/images/API-Horizontal-MonochromeInverted@2x.png -------------------------------------------------------------------------------- /newapi/ooniapi/static/images/API-Horizontal-MonochromeInvertedSmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/ooniapi/static/images/API-Horizontal-MonochromeInvertedSmall.png -------------------------------------------------------------------------------- /newapi/ooniapi/static/images/API-Horizontal-MonochromeInvertedSmall@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/ooniapi/static/images/API-Horizontal-MonochromeInvertedSmall@2x.png -------------------------------------------------------------------------------- /newapi/ooniapi/templates/400.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block uncontained %} 4 | 5 |
6 | 9 |
10 |

11 | Bad Request 12 |

13 |

14 | {{exception.description}} 15 |

16 |
17 | 18 |
19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /newapi/ooniapi/templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block uncontained %} 4 | 5 |
6 | 9 |
10 |

11 | We sailed the seven seas looking for this page, but we didn't find it. 12 |

13 |

14 | Can we interest you with some internet censorship 15 | measurements instead? 16 |

17 |
18 | 19 |
20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /newapi/ooniapi/templates/api.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}Documentation{% endblock%} 4 | 5 | {% block head_css %} 6 | {% endblock %} 7 | 8 | {% block body %} 9 | 10 | 11 |

Loading...

12 |
13 | 14 | 15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /newapi/ooniapi/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | OONI API{% if self.title() %} - {% endif %}{% block title %}{% endblock %} 5 | 6 | {% block head_meta %} 7 | 8 | {% endblock %} 9 | 10 | {% block head_css %} 11 | 12 | {% endblock %} 13 | 14 | {% block head %} 15 | {% endblock %} 16 | 17 | {% block head_js %} 18 | {% endblock %} 19 | 20 | 21 | 22 | {% block body %} 23 | 24 | {% block navbar %} 25 | {% endblock %} 26 | 27 | {% block uncontained %} 28 | {% endblock %} 29 | 30 |
31 | 32 | {% block content_fluid %} 33 | {% endblock %} 34 | 35 |
36 | 37 | {% block footer %} 38 | {% include 'footer.html' %} 39 | {% endblock %} 40 | 41 | {% block tail_js %} 42 | {% endblock %} 43 | 44 | {% endblock %} 45 | 46 | 47 | -------------------------------------------------------------------------------- /newapi/ooniapi/templates/footer.html: -------------------------------------------------------------------------------- 1 | 44 | -------------------------------------------------------------------------------- /newapi/ooniapi/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block navbar %} 4 | {% endblock %} 5 | 6 | {% block uncontained %} 7 |
8 |
9 | 14 |
15 |
16 |
17 |
18 |

Welcome to the OONI API

19 |

Dynamically updated every day, the OONI API enables researchers and 20 | data scientists to analyze all data collected from OONI Probes around the 21 | world.

22 |

Download and search OONI Probe network measurements.

23 |

As the OONI API is not designed for batch consumption of 24 | OONI data, please use it for a modest request rate. If you are 25 | interested in analyzing OONI data in batch, please read 26 | mining OONI data. 27 | If you need help, contact the OONI team. 28 |

29 | READ THE API DOCS 30 |
31 |
32 | {% endblock %} 33 | -------------------------------------------------------------------------------- /newapi/ooniapi/utils.py: -------------------------------------------------------------------------------- 1 | 2 | from datetime import datetime 3 | from flask import make_response, Response 4 | from flask.json import jsonify 5 | 6 | ISO_TIMESTAMP_SHORT = "%Y%m%dT%H%M%SZ" 7 | OONI_EPOCH = datetime(2012, 12, 5) 8 | 9 | INTERVAL_UNITS = dict(s=1, m=60, h=3600, d=86400) 10 | 11 | 12 | def cachedjson(interval: str, *a, **kw) -> Response: 13 | """Jsonify and add cache expiration""" 14 | resp = jsonify(*a, **kw) 15 | unit = interval[-1] 16 | value = int(interval[:-1]) 17 | resp.cache_control.max_age = value * INTERVAL_UNITS[unit] 18 | return resp 19 | 20 | 21 | def nocachejson(*a, **kw) -> Response: 22 | """Jsonify and explicitely prevent caching""" 23 | resp = jsonify(*a, **kw) 24 | resp.cache_control.max_age = 0 25 | resp.cache_control.no_cache = True 26 | return resp 27 | 28 | 29 | def jerror(msg, code=400, **kw) -> Response: 30 | resp = make_response(jsonify(error=msg, **kw), code) 31 | resp.cache_control.no_cache = True 32 | return resp 33 | -------------------------------------------------------------------------------- /newapi/ooniapi/views.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import traceback 8 | 9 | from flask import current_app, render_template 10 | from flask import make_response 11 | from flask.json import jsonify 12 | 13 | from ooniapi.auth import auth_blueprint 14 | from ooniapi.citizenlab import cz_blueprint 15 | from ooniapi.private import api_private_blueprint 16 | from ooniapi.measurements import api_msm_blueprint 17 | from ooniapi.pages import pages_blueprint 18 | from ooniapi.probe_services import probe_services_blueprint 19 | from ooniapi.prio import prio_bp 20 | 21 | HERE = os.path.abspath(os.path.dirname(__file__)) 22 | 23 | 24 | #def render_problem_exception(exception): 25 | # response = exception.to_problem() 26 | # return FlaskApi.get_response(response) 27 | 28 | 29 | # def render_generic_exception(exception): 30 | # if not isinstance(exception, werkzeug.exceptions.HTTPException): 31 | # exc_name = "{}.{}".format(type(exception).__module__, type(exception).__name__) 32 | # exc_desc = str(exception) 33 | # if hasattr(exception, "__traceback__"): 34 | # current_app.logger.error( 35 | # "".join(traceback.format_tb(exception.__traceback__)) 36 | # ) 37 | # current_app.logger.error( 38 | # "Unhandled error occurred, {}: {}".format(exc_name, exc_desc) 39 | # ) 40 | # exception = werkzeug.exceptions.InternalServerError( 41 | # description="An unhandled application error occurred: {}".format(exc_name) 42 | # ) 43 | # 44 | # response = problem( 45 | # title=exception.name, detail=exception.description, status=exception.code 46 | # ) 47 | # return FlaskApi.get_response(response) 48 | 49 | def render_generic_exception(exception): 50 | """Log a traceback and return code 500 with a simple JSON 51 | The CORS header is set as usual. Without this, an error could lead to browsers 52 | caching a response without the correct CORS header. 53 | """ 54 | # TODO: render_template 500.html instead? 55 | current_app.logger.error(f"Exception: {exception}") 56 | current_app.logger.error( 57 | "".join(traceback.format_tb(exception.__traceback__)) 58 | ) 59 | try: 60 | return make_response(jsonify(error=str(exception)), 500) 61 | except: 62 | return make_response("unhandled error", 500) 63 | 64 | 65 | def page_not_found(e): 66 | return render_template("404.html"), 404 67 | 68 | 69 | def bad_request(e): 70 | return render_template("400.html", exception=e), 400 71 | 72 | def register(app): 73 | 74 | #app.register_blueprint(api_docs_blueprint, url_prefix="/api") 75 | 76 | # Measurements API: 77 | app.register_blueprint(api_msm_blueprint, url_prefix="/api") 78 | #app.register_blueprint(connexion_api.blueprint) 79 | app.register_blueprint(auth_blueprint, url_prefix="") 80 | app.register_blueprint(cz_blueprint, url_prefix="") 81 | 82 | # Private API 83 | app.register_blueprint(api_private_blueprint, url_prefix="/api/_") 84 | 85 | # The index is here: 86 | app.register_blueprint(pages_blueprint, url_prefix="") 87 | 88 | # Probe services 89 | app.register_blueprint(probe_services_blueprint, url_prefix="") 90 | app.register_blueprint(prio_bp, url_prefix="") 91 | 92 | 93 | if "PYTEST_CURRENT_TEST" not in os.environ: 94 | 95 | app.register_error_handler(Exception, render_generic_exception) 96 | app.errorhandler(404)(page_not_found) 97 | app.errorhandler(400)(bad_request) 98 | -------------------------------------------------------------------------------- /newapi/ooniapi/wsgi.py: -------------------------------------------------------------------------------- 1 | 2 | # Entry point for gunicorn, see debian/ooni-api.service 3 | 4 | from ooniapi.app import create_app 5 | 6 | application = create_app() 7 | 8 | #from werkzeug.contrib.fixers import ProxyFix 9 | #application.wsgi_app = ProxyFix(application.wsgi_app) 10 | -------------------------------------------------------------------------------- /newapi/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | name="ooniapi", 7 | packages=find_packages(), 8 | include_package_data=True, 9 | zip_safe=False, 10 | entry_points={"console_scripts": ["ooniapi = measurements.cli:cli",]}, 11 | scripts=["ooni_api_uploader.py", "database_upgrade_schema.py"], 12 | package_data={"ooniapi": ("*.adoc", "templates/*", "static/*/*",),}, 13 | ) 14 | -------------------------------------------------------------------------------- /newapi/spawnrunner: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Create a nspawn chroot/container using Debian Buster 3 | # Mount the current directory into /root 4 | # Run ./spawnrunner setup 5 | # 6 | # You can run quoted commands as 7 | # ./spawnrunner bash -c 'echo "hi" > myfile' 8 | # 9 | # Test run example: 10 | # ./spawnrunner pytest-3 tests/integ/test_probe_services.py -v -x 11 | # 12 | set -eux 13 | # use full path, change this if needed 14 | RDIR=~/ooni-api-dir 15 | # Overlay dir that will contain test outputs and caches 16 | OUTDIR=/tmp/out 17 | 18 | ch () { 19 | sudo systemd-nspawn -q -D $RDIR \ 20 | --no-new-privileges=on \ 21 | --drop-capability=all \ 22 | --overlay "$(pwd):$OUTDIR:/root" \ 23 | --chdir=/root \ 24 | -- $@ 25 | } 26 | 27 | mkdir -p $OUTDIR 28 | if [ ! -d $RDIR ]; then 29 | sudo debootstrap buster $RDIR 30 | ./spawnrunner ./build_runner.sh 31 | #ch tox -e integ -vvv 32 | fi 33 | ch "$@" 34 | -------------------------------------------------------------------------------- /newapi/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/tests/__init__.py -------------------------------------------------------------------------------- /newapi/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import pytest 4 | import sys 5 | import shutil 6 | import subprocess 7 | from datetime import date, timedelta 8 | from textwrap import dedent 9 | from subprocess import PIPE 10 | from pathlib import Path 11 | 12 | import flask 13 | from clickhouse_driver import Client as Clickhouse 14 | 15 | # Setup logging before doing anything with the Flask app 16 | # See README.adoc 17 | 18 | import logging 19 | 20 | logging.basicConfig( 21 | level=logging.DEBUG, 22 | format="%(relativeCreated)6d %(levelname).1s %(filename)s:%(lineno)s %(message)s", 23 | ) 24 | 25 | from ooniapi.app import create_app 26 | 27 | 28 | @pytest.fixture(scope="session") 29 | def app(): 30 | app = create_app(testmode=True) 31 | app.debug = True 32 | assert app.logger.handlers == [] 33 | return app 34 | 35 | 36 | @pytest.yield_fixture 37 | def client(app): 38 | """ 39 | Overriding the `client` fixture from pytest_flask to fix this bug: 40 | https://github.com/pytest-dev/pytest-flask/issues/42 41 | """ 42 | with app.test_client() as client: 43 | yield client 44 | 45 | while True: 46 | top = flask._request_ctx_stack.top 47 | if top is not None and top.preserved: 48 | top.pop() 49 | else: 50 | break 51 | 52 | 53 | @pytest.fixture(autouse=True) 54 | def disable_rate_limits(app): 55 | app.limiter._disabled = True 56 | yield 57 | app.limiter._disabled = False 58 | 59 | 60 | def pytest_addoption(parser): 61 | parser.addoption("--ghpr", action="store_true", help="enable GitHub integ tests") 62 | parser.addoption("--proddb", action="store_true", help="uses data from prod DB") 63 | parser.addoption("--create-db", action="store_true", help="populate the DB") 64 | parser.addoption( 65 | "--inject-msmts", action="store_true", help="populate the DB with fresh data" 66 | ) 67 | 68 | 69 | def pytest_configure(config): 70 | pytest.run_ghpr = config.getoption("--ghpr") 71 | pytest.proddb = config.getoption("--proddb") 72 | assert pytest.proddb is False, "--proddb is disabled" 73 | pytest.create_db = config.getoption("--create-db") 74 | pytest.inject_msmts = config.getoption("--inject-msmts") 75 | 76 | 77 | @pytest.fixture(scope="session") 78 | def setup_database_part_1(): 79 | # Create database and users. 80 | # Executed as a dependency of setup_database_part_2 81 | # Drop and recreate database if exists. 82 | if not pytest.create_db: 83 | return 84 | 85 | 86 | @pytest.fixture(scope="session") 87 | def checkout_pipeline(tmpdir_factory): 88 | """Clone pipeline repo to then run fastpath from S3 and citizenlab importer""" 89 | if not pytest.create_db and not pytest.inject_msmts: 90 | return 91 | d = tmpdir_factory.mktemp("pipeline") 92 | if d.isdir(): 93 | shutil.rmtree(d) 94 | cmd = f"git clone --depth 1 https://github.com/ooni/pipeline -q {d}" 95 | # cmd = f"git clone --depth 1 https://github.com/ooni/pipeline --branch reprocessor-ch -q {d}" 96 | cmd = cmd.split() 97 | runcmd(cmd, Path('.')) 98 | return Path(d) 99 | 100 | 101 | def run_clickhouse_sql_scripts(app): 102 | log = app.logger 103 | clickhouse_url = app.config["CLICKHOUSE_URL"] 104 | click = Clickhouse.from_url(clickhouse_url) 105 | tables = click.execute("SHOW TABLES") 106 | for row in tables: 107 | if row[0] == "fastpath": 108 | return 109 | 110 | for fn in ["1_schema", "2_fixtures"]: 111 | sql_f = Path(f"tests/integ/clickhouse_{fn}.sql") 112 | print(f"Running {sql_f} on Clickhouse") 113 | queries = sql_f.read_text().split(";") 114 | for q in queries: 115 | q = q.strip() 116 | if not q: 117 | continue 118 | click.execute(q) 119 | 120 | 121 | def _run_fastpath(fpdir: Path, start: str, end: str, limit: int) -> None: 122 | fprun = fpdir / "run_fastpath" 123 | cmd = [fprun.as_posix(), "--noapi", "--devel"] 124 | cmd.extend(["--start-day", start, "--end-day", end, "--stop-after", str(limit)]) 125 | runcmd(cmd, fpdir) 126 | 127 | 128 | def runcmd(cmd: str,wd: Path) -> None: 129 | print("Running " + " ".join(cmd)) 130 | p = subprocess.run(cmd, cwd=wd) 131 | if p.returncode != 0: 132 | print("=" * 60) 133 | print(p.stderr) 134 | print("=" * 60) 135 | print(p.stdout) 136 | print("=" * 60) 137 | sys.exit(1) 138 | 139 | def run_fingerprint_update(log, pipeline_dir: Path, clickhouse_url: str) -> None: 140 | log.info("Importing fingerprints") 141 | rdir = pipeline_dir / "af" / "analysis" 142 | runner = rdir / "run_analysis" 143 | cmd = [ 144 | runner.as_posix(), 145 | "--update-fingerprints", 146 | "--devel", 147 | "--db-uri", 148 | clickhouse_url, 149 | ] 150 | runcmd(cmd, rdir) 151 | 152 | 153 | def run_fastpath(log, pipeline_dir: Path, clickhouse_url: str) -> None: 154 | """Run fastpath from S3""" 155 | fpdir = pipeline_dir / "af" / "fastpath" 156 | conffile = fpdir / "etc/ooni/fastpath.conf" 157 | conffile.parent.mkdir(parents=True) 158 | conf = f""" 159 | [DEFAULT] 160 | collectors = localhost 161 | db_uri = 162 | clickhouse_url = {clickhouse_url} 163 | s3_access_key = 164 | s3_secret_key = 165 | """ 166 | conffile.write_text(dedent(conf)) 167 | # Necessary to test the statistics in the private API 168 | # Makes the contents of the test DB non deterministic 169 | log.info("Running fastpath to populate 'yesterday'") 170 | _run_fastpath( 171 | fpdir, 172 | (date.today() - timedelta(days=1)).strftime("%Y-%m-%d"), 173 | date.today().strftime("%Y-%m-%d"), 174 | 3000, 175 | ) 176 | 177 | log.info("Running fastpath to populate 2021-07-9") 178 | _run_fastpath(fpdir, "2021-07-09", "2021-07-10", 10000) 179 | 180 | 181 | @pytest.fixture(autouse=True, scope="session") 182 | def setup_database_part_2(setup_database_part_1, app, checkout_pipeline): 183 | # Create tables, indexes and so on 184 | # This part needs the "app" object 185 | if not pytest.create_db: 186 | return 187 | 188 | clickhouse_url = app.config["CLICKHOUSE_URL"] 189 | assert any([x in clickhouse_url for x in ("localhost", "clickhouse")]) 190 | log = app.logger 191 | run_clickhouse_sql_scripts(app) 192 | run_fingerprint_update(log, checkout_pipeline, clickhouse_url) 193 | run_fastpath(log, checkout_pipeline, clickhouse_url) 194 | 195 | 196 | @pytest.fixture(autouse=True, scope="session") 197 | def connect_to_clickhouse(app): 198 | clickhouse_url = app.config["CLICKHOUSE_URL"] 199 | if clickhouse_url: 200 | app.click = Clickhouse.from_url(clickhouse_url) 201 | 202 | 203 | @pytest.fixture(autouse=True, scope="session") 204 | def inject_msmts(app, checkout_pipeline): 205 | if not pytest.inject_msmts: 206 | return 207 | 208 | 209 | # # Fixtures used by test files # # 210 | 211 | 212 | @pytest.fixture() 213 | def log(app): 214 | return app.logger 215 | 216 | 217 | @pytest.fixture() 218 | def citizenlab_tblready(client, app): 219 | # Ensure the citizenlab table is populated 220 | r = app.click.execute("SELECT count() FROM citizenlab")[0][0] 221 | assert r > 2 222 | 223 | 224 | @pytest.fixture 225 | def url_prio_tblready(app): 226 | log = app.logger 227 | # Ensure the url_priorities table is populated 228 | r = app.click.execute("SELECT count() FROM url_priorities")[0][0] 229 | if r > 5: 230 | return 231 | 232 | rules = [ 233 | ("NEWS", 100), 234 | ("POLR", 100), 235 | ("HUMR", 100), 236 | ("LGBT", 100), 237 | ("ANON", 100), 238 | ("MMED", 80), 239 | ("SRCH", 80), 240 | ("PUBH", 80), 241 | ("REL", 60), 242 | ("XED", 60), 243 | ("HOST", 60), 244 | ("ENV", 60), 245 | ("FILE", 40), 246 | ("CULTR", 40), 247 | ("IGO", 40), 248 | ("GOVT", 40), 249 | ("DATE", 30), 250 | ("HATE", 30), 251 | ("MILX", 30), 252 | ("PROV", 30), 253 | ("PORN", 30), 254 | ("GMB", 30), 255 | ("ALDR", 30), 256 | ("GAME", 20), 257 | ("MISC", 20), 258 | ("HACK", 20), 259 | ("ECON", 20), 260 | ("COMM", 20), 261 | ("CTRL", 20), 262 | ("COMT", 100), 263 | ("GRP", 100), 264 | ] 265 | rows = [ 266 | { 267 | "sign": 1, 268 | "category_code": ccode, 269 | "cc": "*", 270 | "domain": "*", 271 | "url": "*", 272 | "priority": prio, 273 | } 274 | for ccode, prio in rules 275 | ] 276 | # The url_priorities table is CollapsingMergeTree 277 | query = """INSERT INTO url_priorities 278 | (sign, category_code, cc, domain, url, priority) VALUES 279 | """ 280 | log.info("Populating url_priorities") 281 | app.click.execute(query, rows) 282 | app.click.execute("OPTIMIZE TABLE url_priorities FINAL") 283 | -------------------------------------------------------------------------------- /newapi/tests/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/tests/functional/__init__.py -------------------------------------------------------------------------------- /newapi/tests/functional/test_private_explorer.py: -------------------------------------------------------------------------------- 1 | """ 2 | These are tests for the private API endpoints that are relevant to OONI 3 | Explorer. 4 | """ 5 | 6 | country_query = {"probe_cc": "IT"} 7 | 8 | country_network_query = {"probe_cc": "IT", "probe_asn": 12874} 9 | 10 | 11 | def test_api_private_countries(client): 12 | response = client.get("/api/_/countries") 13 | assert response.status_code == 200 14 | 15 | 16 | def test_api_private_global_overview(client): 17 | response = client.get("/api/_/global_overview") 18 | assert response.status_code == 200 19 | 20 | 21 | def test_api_private_test_coverage(client): 22 | response = client.get("/api/_/test_coverage", query_string={"probe_cc": "IT"}) 23 | assert response.status_code == 200 24 | 25 | 26 | def test_api_private_test_coverage_test_groups(client): 27 | response = client.get( 28 | "/api/_/test_coverage", 29 | query_string={"probe_cc": "CA", "test_groups": "websites,performance"}, 30 | ) 31 | assert response.status_code == 200 32 | #assert len(response.json["test_coverage"]) > 10 33 | #assert len(response.json["network_coverage"]) > 10 34 | 35 | 36 | def test_api_private_country_overview(client): 37 | response = client.get("/api/_/country_overview", query_string={"probe_cc": "US"}) 38 | assert response.status_code == 200 39 | assert "first_bucket_date" in response.json 40 | 41 | 42 | def test_api_private_website_networks(client): 43 | response = client.get("/api/_/website_networks", query_string={"probe_cc": "US"}) 44 | assert response.status_code == 200 45 | #assert len(response.json["results"]) > 10, response 46 | 47 | 48 | def test_api_private_website_urls(client): 49 | qs = {"probe_cc": "IT", "probe_asn": 12874} 50 | response = client.get( "/api/_/website_urls", query_string=qs) 51 | assert response.status_code == 200 52 | assert "metadata" in response.json 53 | #assert len(response.json["results"]) > 5, response 54 | 55 | 56 | def test_api_private_website_stats(client): 57 | response = client.get( 58 | "/api/_/website_stats", 59 | query_string={ 60 | "probe_cc": "IT", 61 | "probe_asn": 12874, 62 | "input": "http://demonoid.ph", 63 | }, 64 | ) 65 | assert response.status_code == 200 66 | 67 | 68 | def test_api_private_im_networks(client): 69 | response = client.get("/api/_/im_networks", query_string={"probe_cc": "AU"}) 70 | assert response.status_code == 200 71 | assert "telegram" in response.json 72 | 73 | 74 | def test_api_private_im_stats(client): 75 | response = client.get( 76 | "/api/_/im_stats", 77 | query_string={"probe_cc": "IT", "probe_asn": 12874, "test_name": "telegram"}, 78 | ) 79 | assert response.status_code == 200 80 | 81 | 82 | # XXX this is commented out as the ingestion of these metrics happens outside of the pipeline 83 | # def test_api_private_network_stats(client): 84 | # response = client.get("/api/_/network_stats", 85 | # query_string={"probe_cc": "GR"}) 86 | # assert response.status_code == 200 87 | -------------------------------------------------------------------------------- /newapi/tests/functional/test_probe_services.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import ooniapi.probe_services 3 | 4 | from unittest.mock import patch 5 | 6 | 7 | @patch("ooniapi.probe_services.extract_probe_ipaddr") 8 | def test_round_robin_web_test_helpers(mock): 9 | # test fallback without mocked ipaddr 10 | li = ooniapi.probe_services.round_robin_web_test_helpers() 11 | assert li == [ 12 | {"address": "https://1.th.ooni.org", "type": "https"}, 13 | {"address": "https://2.th.ooni.org", "type": "https"}, 14 | {"address": "https://3.th.ooni.org", "type": "https"}, 15 | {"address": "https://0.th.ooni.org", "type": "https"}, 16 | ] 17 | 18 | c = Counter() 19 | for n in range(200): 20 | mock.return_value = f"1.2.3.{n}" 21 | li = ooniapi.probe_services.round_robin_web_test_helpers() 22 | assert len(li) == 4 23 | addr = li[0]["address"] # statistics on the first TH returned 24 | c[addr] += 1 25 | 26 | assert c == Counter( 27 | { 28 | "https://0.th.ooni.org": 20, 29 | "https://1.th.ooni.org": 60, 30 | "https://2.th.ooni.org": 60, 31 | "https://3.th.ooni.org": 60, 32 | } 33 | ) 34 | -------------------------------------------------------------------------------- /newapi/tests/integ/README.adoc: -------------------------------------------------------------------------------- 1 | === Integration tests 2 | 3 | Integration tests are run against a Clickhouse database 4 | 5 | They require a readonly and read-write user 6 | 7 | The --create-db flag will: 8 | * create DB schemas 9 | 10 | * run the fastpath on a fixed date: 11 | * populate the fastpath table with deterministic data from the past 12 | 13 | * import citizenlab test list from github 14 | 15 | The --inject-msmts flag will: 16 | * run the API to receive as set of artificial fresh measurements simulating msmt upload 17 | * populate the fastpath table with partially deterministic data 18 | * this trigger updates to Clickhouse materialized views and allow testing URL prioritization 19 | 20 | * run the measurement uploader to populate the jsonl table 21 | * do not write to S3 22 | 23 | Multiple runs of --inject-msmts *may* add new data. Tests should be written to pass if one or more runs have been done. 24 | 25 | 26 | Use --show-capture=no to avoid multiple log chunks 27 | Use -s to show logs in real time 28 | -------------------------------------------------------------------------------- /newapi/tests/integ/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /newapi/tests/integ/bug355.sql: -------------------------------------------------------------------------------- 1 | ( 2 | SELECT 3 | measurement.input_no, 4 | measurement.measurement_start_time, 5 | report.test_start_time, 6 | concat('temp-id', '-', measurement.msm_no) AS measurement_id, 7 | measurement.report_no, 8 | coalesce(measurement.anomaly, FALSE) AS anomaly, 9 | coalesce(measurement.confirmed, FALSE) AS confirmed, 10 | coalesce(measurement.msm_failure, FALSE) AS msm_failure, 11 | coalesce('{}') AS scores, 12 | measurement.exc AS exc, 13 | measurement.residual_no, 14 | report.report_id, 15 | report.probe_cc, 16 | report.probe_asn, 17 | report.test_name, 18 | report.report_no, 19 | coalesce(input.input, NULL) AS input 20 | FROM 21 | measurement 22 | LEFT OUTER JOIN input ON measurement.input_no = input.input_no 23 | JOIN report ON report.report_no = measurement.report_no 24 | WHERE 25 | report.probe_cc = 'IQ' 26 | AND measurement.measurement_start_time <= '2019-12-06T00:00:00'::timestamp 27 | AND coalesce(measurement.confirmed, FALSE) = TRUE 28 | ORDER BY 29 | test_start_time DESC 30 | LIMIT 50 OFFSET 0) 31 | UNION ( 32 | SELECT 33 | coalesce(0) AS m_input_no, 34 | fastpath.measurement_start_time AS test_start_time, 35 | fastpath.measurement_start_time, 36 | concat('temp-fid-', fastpath.tid) AS measurement_id, 37 | coalesce(0) AS m_report_no, 38 | coalesce(FALSE) AS anomaly, 39 | coalesce(FALSE) AS confirmed, 40 | coalesce(FALSE) AS msm_failure, 41 | CAST(fastpath.scores AS VARCHAR) AS anon_2, 42 | coalesce(ARRAY[0]) AS exc, 43 | coalesce(0) AS residual_no, 44 | fastpath.report_id, 45 | fastpath.probe_cc, 46 | fastpath.probe_asn, 47 | fastpath.test_name, 48 | coalesce(0) AS report_no, 49 | fastpath.input AS input 50 | FROM 51 | fastpath 52 | WHERE 53 | fastpath.measurement_start_time <= '2019-12-06T00:00:00'::timestamp 54 | AND fastpath.probe_cc = 'IQ' 55 | ORDER BY 56 | test_start_time DESC 57 | LIMIT 50 OFFSET 0); 58 | 59 | -------------------------------------------------------------------------------- /newapi/tests/integ/clickhouse_1_schema.sql: -------------------------------------------------------------------------------- 1 | -- Create tables for Clickhouse integ tests 2 | 3 | -- Main tables 4 | 5 | CREATE TABLE default.fastpath 6 | ( 7 | `measurement_uid` String, 8 | `report_id` String, 9 | `input` String, 10 | `probe_cc` String, 11 | `probe_asn` Int32, 12 | `test_name` String, 13 | `test_start_time` DateTime, 14 | `measurement_start_time` DateTime, 15 | `filename` String, 16 | `scores` String, 17 | `platform` String, 18 | `anomaly` String, 19 | `confirmed` String, 20 | `msm_failure` String, 21 | `domain` String, 22 | `software_name` String, 23 | `software_version` String, 24 | `control_failure` String, 25 | `blocking_general` Float32, 26 | `is_ssl_expected` Int8, 27 | `page_len` Int32, 28 | `page_len_ratio` Float32, 29 | `server_cc` String, 30 | `server_asn` Int8, 31 | `server_as_name` String, 32 | `update_time` DateTime64(3) MATERIALIZED now64(), 33 | `test_version` String, 34 | `test_runtime` Float32, 35 | `architecture` String, 36 | `engine_name` String, 37 | `engine_version` String, 38 | `blocking_type` String 39 | ) 40 | ENGINE = ReplacingMergeTree 41 | ORDER BY (measurement_start_time, report_id, input) 42 | SETTINGS index_granularity = 8192; 43 | 44 | CREATE TABLE default.jsonl 45 | ( 46 | `report_id` String, 47 | `input` String, 48 | `s3path` String, 49 | `linenum` Int32, 50 | `measurement_uid` String 51 | ) 52 | ENGINE = MergeTree 53 | ORDER BY (report_id, input) 54 | SETTINGS index_granularity = 8192; 55 | 56 | CREATE TABLE default.url_priorities ( 57 | `sign` Int8, 58 | `category_code` String, 59 | `cc` String, 60 | `domain` String, 61 | `url` String, 62 | `priority` Int32 63 | ) 64 | ENGINE = CollapsingMergeTree(sign) 65 | ORDER BY (category_code, cc, domain, url, priority) 66 | SETTINGS index_granularity = 1024; 67 | 68 | CREATE TABLE default.citizenlab 69 | ( 70 | `domain` String, 71 | `url` String, 72 | `cc` FixedString(32), 73 | `category_code` String 74 | ) 75 | ENGINE = ReplacingMergeTree 76 | ORDER BY (domain, url, cc, category_code) 77 | SETTINGS index_granularity = 4; 78 | 79 | CREATE TABLE default.citizenlab_flip AS default.citizenlab; 80 | 81 | CREATE TABLE test_groups ( 82 | `test_name` String, 83 | `test_group` String 84 | ) 85 | ENGINE = Join(ANY, LEFT, test_name); 86 | 87 | 88 | -- Auth 89 | 90 | CREATE TABLE accounts 91 | ( 92 | `account_id` FixedString(32), 93 | `role` String 94 | ) 95 | ENGINE = EmbeddedRocksDB 96 | PRIMARY KEY account_id; 97 | 98 | CREATE TABLE session_expunge 99 | ( 100 | `account_id` FixedString(32), 101 | `threshold` DateTime DEFAULT now() 102 | ) 103 | ENGINE = EmbeddedRocksDB 104 | PRIMARY KEY account_id; 105 | 106 | -- Materialized views 107 | 108 | CREATE MATERIALIZED VIEW default.counters_test_list 109 | ( 110 | `day` DateTime, 111 | `probe_cc` String, 112 | `input` String, 113 | `msmt_cnt` UInt64 114 | ) 115 | ENGINE = SummingMergeTree 116 | PARTITION BY day 117 | ORDER BY (probe_cc, input) 118 | SETTINGS index_granularity = 8192 AS 119 | SELECT 120 | toDate(measurement_start_time) AS day, 121 | probe_cc, 122 | input, 123 | count() AS msmt_cnt 124 | FROM default.fastpath 125 | INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url 126 | WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity') 127 | GROUP BY 128 | day, 129 | probe_cc, 130 | input; 131 | 132 | CREATE MATERIALIZED VIEW default.counters_asn_test_list 133 | ( 134 | `week` DateTime, 135 | `probe_cc` String, 136 | `probe_asn` UInt64, 137 | `input` String, 138 | `msmt_cnt` UInt64 139 | ) 140 | ENGINE = SummingMergeTree 141 | ORDER BY (probe_cc, probe_asn, input) 142 | SETTINGS index_granularity = 8192 AS 143 | SELECT 144 | toStartOfWeek(measurement_start_time) AS week, 145 | probe_cc, 146 | probe_asn, 147 | input, 148 | count() AS msmt_cnt 149 | FROM default.fastpath 150 | INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url 151 | WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity') 152 | GROUP BY 153 | week, 154 | probe_cc, 155 | probe_asn, 156 | input; 157 | 158 | CREATE TABLE msmt_feedback 159 | ( 160 | `measurement_uid` String, 161 | `account_id` String, 162 | `status` String, 163 | `update_time` DateTime64(3) MATERIALIZED now64() 164 | ) 165 | ENGINE = ReplacingMergeTree 166 | ORDER BY (measurement_uid, account_id) 167 | SETTINGS index_granularity = 4; 168 | 169 | CREATE TABLE default.fingerprints_dns 170 | ( 171 | `name` String, 172 | `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6), 173 | `other_names` String, 174 | `location_found` String, 175 | `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), 176 | `pattern` String, 177 | `confidence_no_fp` UInt8, 178 | `expected_countries` String, 179 | `source` String, 180 | `exp_url` String, 181 | `notes` String 182 | ) 183 | ENGINE = EmbeddedRocksDB 184 | PRIMARY KEY name; 185 | 186 | CREATE TABLE default.fingerprints_http 187 | ( 188 | `name` String, 189 | `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6, 'injb' = 7, 'prov' = 8), 190 | `other_names` String, 191 | `location_found` String, 192 | `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), 193 | `pattern` String, 194 | `confidence_no_fp` UInt8, 195 | `expected_countries` String, 196 | `source` String, 197 | `exp_url` String, 198 | `notes` String 199 | ) 200 | ENGINE = EmbeddedRocksDB 201 | PRIMARY KEY name; 202 | 203 | CREATE TABLE asnmeta 204 | ( 205 | `asn` UInt32, 206 | `org_name` String, 207 | `cc` String, 208 | `changed` Date, 209 | `aut_name` String, 210 | `source` String 211 | ) 212 | ENGINE = MergeTree 213 | ORDER BY (asn, changed); 214 | -------------------------------------------------------------------------------- /newapi/tests/integ/clickhouse_2_fixtures.sql: -------------------------------------------------------------------------------- 1 | -- Populate lookup tables 2 | 3 | INSERT INTO test_groups (test_name, test_group) VALUES ('bridge_reachability', 'circumvention'), ('meek_fronted_requests_test', 'circumvention'), ('psiphon', 'circumvention'), ('riseupvpn', 'circumvention'), ('tcp_connect', 'circumvention'), ('tor', 'circumvention'), ('torsf', 'circumvention'), ('vanilla_tor', 'circumvention'), ('dnscheck', 'experimental'), ('urlgetter', 'experimental'), ('facebook_messenger', 'im'), ('signal', 'im'), ('telegram', 'im'), ('whatsapp', 'im'), ('dns_consistency', 'legacy'), ('http_host', 'legacy'), ('http_requests', 'legacy'), ('multi_protocol_traceroute', 'legacy'), ('http_header_field_manipulation', 'middlebox'), ('http_invalid_request_line', 'middlebox'), ('dash', 'performance'), ('ndt', 'performance')('web_connectivity', 'websites') ; 4 | 5 | -- Create integ test data for Clickhouse 6 | 7 | INSERT INTO citizenlab VALUES ('www.ushmm.org', 'https://www.ushmm.org/', 'ZZ', 'CULTR'); 8 | INSERT INTO citizenlab VALUES ('www.cabofrio.rj.gov.br', 'http://www.cabofrio.rj.gov.br/', 'BR', 'CULTR'); 9 | INSERT INTO citizenlab VALUES ('ncac.org', 'http://ncac.org/', 'ZZ', 'NEWS'); 10 | INSERT INTO citizenlab VALUES ('ncac.org', 'https://ncac.org/', 'ZZ', 'NEWS'); 11 | INSERT INTO citizenlab VALUES ('www.facebook.com','http://www.facebook.com/saakashvilimikheil','ge','NEWS'); 12 | INSERT INTO citizenlab VALUES ('www.facebook.com','http://www.facebook.com/somsakjeam/videos/1283095981743678/','th','POLR'); 13 | INSERT INTO citizenlab VALUES ('www.facebook.com','https://www.facebook.com/','ZZ','GRP'); 14 | INSERT INTO citizenlab VALUES ('facebook.com','http://facebook.com/','ua','GRP'); 15 | INSERT INTO citizenlab VALUES ('facebook.com','https://facebook.com/watch','jo','GRP'); 16 | INSERT INTO citizenlab VALUES ('twitter.com','http://twitter.com/ghonim','kw','POLR'); 17 | INSERT INTO citizenlab VALUES ('twitter.com','http://twitter.com/ghonim','so','POLR'); 18 | INSERT INTO citizenlab VALUES ('twitter.com','https://twitter.com/','ZZ','GRP'); 19 | 20 | -- get_measurement_meta integ tests 21 | INSERT INTO jsonl (report_id, input, s3path, linenum) VALUES ('20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3', 'https://www.backtrack-linux.org/', 'raw/20210709/00/MY/webconnectivity/2021070900_MY_webconnectivity.n0.2.jsonl.gz', 35) 22 | 23 | 24 | -------------------------------------------------------------------------------- /newapi/tests/integ/data/psiphon_config.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /newapi/tests/integ/data/tor_targets.json: -------------------------------------------------------------------------------- 1 | { 2 | "128.31.0.39:9101": { 3 | "address": "128.31.0.39:9101", 4 | "name": "moria1", 5 | "fingerprint": "9695DFC35FFEB861329B9F1AB04C46397020CE31", 6 | "protocol": "or_port_dirauth" 7 | }, 8 | "128.31.0.39:9131": { 9 | "address": "128.31.0.39:9131", 10 | "name": "moria1", 11 | "fingerprint": "9695DFC35FFEB861329B9F1AB04C46397020CE31", 12 | "protocol": "dir_port" 13 | }, 14 | "86.59.21.38:443": { 15 | "address": "86.59.21.38:443", 16 | "name": "tor26", 17 | "fingerprint": "847B1F850344D7876491A54892F904934E4EB85D", 18 | "protocol": "or_port_dirauth" 19 | }, 20 | "86.59.21.38:80": { 21 | "address": "86.59.21.38:80", 22 | "name": "tor26", 23 | "fingerprint": "847B1F850344D7876491A54892F904934E4EB85D", 24 | "protocol": "dir_port" 25 | }, 26 | "45.66.33.45:443": { 27 | "address": "45.66.33.45:443", 28 | "name": "dizum", 29 | "fingerprint": "7EA6EAD6FD83083C538F44038BBFA077587DD755", 30 | "protocol": "or_port_dirauth" 31 | }, 32 | "45.66.33.45:80": { 33 | "address": "45.66.33.45:80", 34 | "name": "dizum", 35 | "fingerprint": "7EA6EAD6FD83083C538F44038BBFA077587DD755", 36 | "protocol": "dir_port" 37 | }, 38 | "66.111.2.131:9001": { 39 | "address": "66.111.2.131:9001", 40 | "name": "Serge", 41 | "fingerprint": "BA44A889E64B93FAA2B114E02C2A279A8555C533", 42 | "protocol": "or_port_dirauth" 43 | }, 44 | "66.111.2.131:9030": { 45 | "address": "66.111.2.131:9030", 46 | "name": "Serge", 47 | "fingerprint": "BA44A889E64B93FAA2B114E02C2A279A8555C533", 48 | "protocol": "dir_port" 49 | }, 50 | "131.188.40.189:443": { 51 | "address": "131.188.40.189:443", 52 | "name": "gabelmoo", 53 | "fingerprint": "F2044413DAC2E02E3D6BCF4735A19BCA1DE97281", 54 | "protocol": "or_port_dirauth" 55 | }, 56 | "131.188.40.189:80": { 57 | "address": "131.188.40.189:80", 58 | "name": "gabelmoo", 59 | "fingerprint": "F2044413DAC2E02E3D6BCF4735A19BCA1DE97281", 60 | "protocol": "dir_port" 61 | }, 62 | "193.23.244.244:443": { 63 | "address": "193.23.244.244:443", 64 | "name": "dannenberg", 65 | "fingerprint": "7BE683E65D48141321C5ED92F075C55364AC7123", 66 | "protocol": "or_port_dirauth" 67 | }, 68 | "193.23.244.244:80": { 69 | "address": "193.23.244.244:80", 70 | "name": "dannenberg", 71 | "fingerprint": "7BE683E65D48141321C5ED92F075C55364AC7123", 72 | "protocol": "dir_port" 73 | }, 74 | "171.25.193.9:80": { 75 | "address": "171.25.193.9:80", 76 | "name": "maatuska", 77 | "fingerprint": "BD6A829255CB08E66FBE7D3748363586E46B3810", 78 | "protocol": "or_port_dirauth" 79 | }, 80 | "171.25.193.9:443": { 81 | "address": "171.25.193.9:443", 82 | "name": "maatuska", 83 | "fingerprint": "BD6A829255CB08E66FBE7D3748363586E46B3810", 84 | "protocol": "dir_port" 85 | }, 86 | "154.35.175.225:443": { 87 | "address": "154.35.175.225:443", 88 | "name": "Faravahar", 89 | "fingerprint": "CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC", 90 | "protocol": "or_port_dirauth" 91 | }, 92 | "154.35.175.225:80": { 93 | "address": "154.35.175.225:80", 94 | "name": "Faravahar", 95 | "fingerprint": "CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC", 96 | "protocol": "dir_port" 97 | }, 98 | "199.58.81.140:443": { 99 | "address": "199.58.81.140:443", 100 | "name": "longclaw", 101 | "fingerprint": "74A910646BCEEFBCD2E874FC1DC997430F968145", 102 | "protocol": "or_port_dirauth" 103 | }, 104 | "199.58.81.140:80": { 105 | "address": "199.58.81.140:80", 106 | "name": "longclaw", 107 | "fingerprint": "74A910646BCEEFBCD2E874FC1DC997430F968145", 108 | "protocol": "dir_port" 109 | }, 110 | "204.13.164.118:443": { 111 | "address": "204.13.164.118:443", 112 | "name": "bastet", 113 | "fingerprint": "24E2F139121D4394C54B5BCC368B3B411857C413", 114 | "protocol": "or_port_dirauth" 115 | }, 116 | "204.13.164.118:80": { 117 | "address": "204.13.164.118:80", 118 | "name": "bastet", 119 | "fingerprint": "24E2F139121D4394C54B5BCC368B3B411857C413", 120 | "protocol": "dir_port" 121 | }, 122 | "3fa772a44e07856b4c70e958b2f6dc8a29450a823509d5dbbf8b884e7fb5bb9d": { 123 | "address": "192.95.36.142:443", 124 | "fingerprint": "CDF2E852BF539B82BD10E27E9115A31734E378C2", 125 | "params": { 126 | "cert": [ 127 | "qUVQ0srL1JI/vO6V6m/24anYXiJD3QP2HgzUKQtQ7GRqqUvs7P+tG43RtAqdhLOALP7DJQ" 128 | ], 129 | "iat-mode": [ 130 | "1" 131 | ] 132 | }, 133 | "protocol": "obfs4" 134 | }, 135 | "99e9adc8bba0d60982dbc655b5e8735d88ad788905c3713a39eff3224b617eeb": { 136 | "address": "38.229.1.78:80", 137 | "fingerprint": "C8CBDB2464FC9804A69531437BCF2BE31FDD2EE4", 138 | "params": { 139 | "cert": [ 140 | "Hmyfd2ev46gGY7NoVxA9ngrPF2zCZtzskRTzoWXbxNkzeVnGFPWmrTtILRyqCTjHR+s9dg" 141 | ], 142 | "iat-mode": [ 143 | "1" 144 | ] 145 | }, 146 | "protocol": "obfs4" 147 | }, 148 | "9d735c6e70512123ab2c2fe966446b2345b352c512e9fb359f4b1673236e4d4a": { 149 | "address": "38.229.33.83:80", 150 | "fingerprint": "0BAC39417268B96B9F514E7F63FA6FBA1A788955", 151 | "params": { 152 | "cert": [ 153 | "VwEFpk9F/UN9JED7XpG1XOjm/O8ZCXK80oPecgWnNDZDv5pdkhq1OpbAH0wNqOT6H6BmRQ" 154 | ], 155 | "iat-mode": [ 156 | "1" 157 | ] 158 | }, 159 | "protocol": "obfs4" 160 | }, 161 | "548eebff71da6128321c3bc1c3ec12b5bfff277ef5cde32709a33e207b57f3e2": { 162 | "address": "37.218.245.14:38224", 163 | "fingerprint": "D9A82D2F9C2F65A18407B1D2B764F130847F8B5D", 164 | "params": { 165 | "cert": [ 166 | "bjRaMrr1BRiAW8IE9U5z27fQaYgOhX1UCmOpg2pFpoMvo6ZgQMzLsaTzzQNTlm7hNcb+Sg" 167 | ], 168 | "iat-mode": [ 169 | "0" 170 | ] 171 | }, 172 | "protocol": "obfs4" 173 | }, 174 | "d2d6e34abeda851f7cd37138ffafcce992b2ccdb0f263eb90ab75d7adbd5eeba": { 175 | "address": "85.31.186.98:443", 176 | "fingerprint": "011F2599C0E9B27EE74B353155E244813763C3E5", 177 | "params": { 178 | "cert": [ 179 | "ayq0XzCwhpdysn5o0EyDUbmSOx3X/oTEbzDMvczHOdBJKlvIdHHLJGkZARtT4dcBFArPPg" 180 | ], 181 | "iat-mode": [ 182 | "0" 183 | ] 184 | }, 185 | "protocol": "obfs4" 186 | }, 187 | "b8de51da541ced804840b1d8fd24d5ff1cfdf07eae673dae38c2bc2cce594ddd": { 188 | "address": "85.31.186.26:443", 189 | "fingerprint": "91A6354697E6B02A386312F68D82CF86824D3606", 190 | "params": { 191 | "cert": [ 192 | "PBwr+S8JTVZo6MPdHnkTwXJPILWADLqfMGoVvhZClMq/Urndyd42BwX9YFJHZnBB3H0XCw" 193 | ], 194 | "iat-mode": [ 195 | "0" 196 | ] 197 | }, 198 | "protocol": "obfs4" 199 | }, 200 | "bc7bc5fb57052ee5252e807443b9ab67307dbdb5ce79187c4360182a300dd0f8": { 201 | "address": "144.217.20.138:80", 202 | "fingerprint": "FB70B257C162BF1038CA669D568D76F5B7F0BABB", 203 | "params": { 204 | "cert": [ 205 | "vYIV5MgrghGQvZPIi1tJwnzorMgqgmlKaB77Y3Z9Q/v94wZBOAXkW+fdx4aSxLVnKO+xNw" 206 | ], 207 | "iat-mode": [ 208 | "0" 209 | ] 210 | }, 211 | "protocol": "obfs4" 212 | }, 213 | "5aeb9e43b43fc8a809b8d25aae968395a5ceea0e677caaf56e1c0a2ba002f5b5": { 214 | "address": "193.11.166.194:27015", 215 | "fingerprint": "2D82C2E354D531A68469ADF7F878FA6060C6BACA", 216 | "params": { 217 | "cert": [ 218 | "4TLQPJrTSaDffMK7Nbao6LC7G9OW/NHkUwIdjLSS3KYf0Nv4/nQiiI8dY2TcsQx01NniOg" 219 | ], 220 | "iat-mode": [ 221 | "0" 222 | ] 223 | }, 224 | "protocol": "obfs4" 225 | }, 226 | "2d7292b5163fb7de5b24cd04032c93a2d4c454431de3a00b5a6d4a3309529e49": { 227 | "address": "193.11.166.194:27020", 228 | "fingerprint": "86AC7B8D430DAC4117E9F42C9EAED18133863AAF", 229 | "params": { 230 | "cert": [ 231 | "0LDeJH4JzMDtkJJrFphJCiPqKx7loozKN7VNfuukMGfHO0Z8OGdzHVkhVAOfo1mUdv9cMg" 232 | ], 233 | "iat-mode": [ 234 | "0" 235 | ] 236 | }, 237 | "protocol": "obfs4" 238 | }, 239 | "b7c0e3f183ad85a6686ec68344765cec57906b215e7b82a98a9ca013cb980efa": { 240 | "address": "193.11.166.194:27025", 241 | "fingerprint": "1AE2C08904527FEA90C4C4F8C1083EA59FBC6FAF", 242 | "params": { 243 | "cert": [ 244 | "ItvYZzW5tn6v3G4UnQa6Qz04Npro6e81AP70YujmK/KXwDFPTs3aHXcHp4n8Vt6w/bv8cA" 245 | ], 246 | "iat-mode": [ 247 | "0" 248 | ] 249 | }, 250 | "protocol": "obfs4" 251 | }, 252 | "4a330634c5d678887f0f7c299490af43a6ac9fa944a6cc2140ab264c9ec124a0": { 253 | "address": "209.148.46.65:443", 254 | "fingerprint": "74FAD13168806246602538555B5521A0383A1875", 255 | "params": { 256 | "cert": [ 257 | "ssH+9rP8dG2NLDN2XuFw63hIO/9MNNinLmxQDpVa+7kTOa9/m+tGWT1SmSYpQ9uTBGa6Hw" 258 | ], 259 | "iat-mode": [ 260 | "0" 261 | ] 262 | }, 263 | "protocol": "obfs4" 264 | }, 265 | "49116bf72d336bb8724fd3a06a5afa7bbd4e7baef35fbcdb9a98d13e702270ad": { 266 | "address": "146.57.248.225:22", 267 | "fingerprint": "10A6CD36A537FCE513A322361547444B393989F0", 268 | "params": { 269 | "cert": [ 270 | "K1gDtDAIcUfeLqbstggjIw2rtgIKqdIhUlHp82XRqNSq/mtAjp1BIC9vHKJ2FAEpGssTPw" 271 | ], 272 | "iat-mode": [ 273 | "0" 274 | ] 275 | }, 276 | "protocol": "obfs4" 277 | }, 278 | "75fe96d641a078fee06529af376d7f8c92757596e48558d5d02baa1e10321d10": { 279 | "address": "45.145.95.6:27015", 280 | "fingerprint": "C5B7CD6946FF10C5B3E89691A7D3F2C122D2117C", 281 | "params": { 282 | "cert": [ 283 | "TD7PbUO0/0k6xYHMPW3vJxICfkMZNdkRrb63Zhl5j9dW3iRGiCx0A7mPhe5T2EDzQ35+Zw" 284 | ], 285 | "iat-mode": [ 286 | "0" 287 | ] 288 | }, 289 | "protocol": "obfs4" 290 | }, 291 | "f855ba38d517d8589c16e1333ac23c6e516532cf036ab6f47b15030b40a3b6a6": { 292 | "address": "[2a0c:4d80:42:702::1]:27015", 293 | "fingerprint": "C5B7CD6946FF10C5B3E89691A7D3F2C122D2117C", 294 | "params": { 295 | "cert": [ 296 | "TD7PbUO0/0k6xYHMPW3vJxICfkMZNdkRrb63Zhl5j9dW3iRGiCx0A7mPhe5T2EDzQ35+Zw" 297 | ], 298 | "iat-mode": [ 299 | "0" 300 | ] 301 | }, 302 | "protocol": "obfs4" 303 | }, 304 | "fa69b2ee7a7c8975af2452ecd566f67a6459d397a4cefc30be86a670675cdc23": { 305 | "address": "51.222.13.177:80", 306 | "fingerprint": "5EDAC3B810E12B01F6FD8050D2FD3E277B289A08", 307 | "params": { 308 | "cert": [ 309 | "2uplIpLQ0q9+0qMFrK5pkaYRDOe460LL9WHBvatgkuRr/SL31wBOEupaMMJ6koRE6Ld0ew" 310 | ], 311 | "iat-mode": [ 312 | "0" 313 | ] 314 | }, 315 | "protocol": "obfs4" 316 | } 317 | } 318 | 319 | -------------------------------------------------------------------------------- /newapi/tests/integ/data/url_priorities_us.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "category_code": "NEWS", 4 | "cc": "*", 5 | "domain": "*", 6 | "priority": 100, 7 | "url": "*" 8 | }, 9 | { 10 | "category_code": "POLR", 11 | "cc": "*", 12 | "domain": "*", 13 | "priority": 100, 14 | "url": "*" 15 | }, 16 | { 17 | "category_code": "HUMR", 18 | "cc": "*", 19 | "domain": "*", 20 | "priority": 100, 21 | "url": "*" 22 | }, 23 | { 24 | "category_code": "LGBT", 25 | "cc": "*", 26 | "domain": "*", 27 | "priority": 100, 28 | "url": "*" 29 | }, 30 | { 31 | "category_code": "ANON", 32 | "cc": "*", 33 | "domain": "*", 34 | "priority": 100, 35 | "url": "*" 36 | }, 37 | { 38 | "category_code": "GRP", 39 | "cc": "*", 40 | "domain": "*", 41 | "priority": 80, 42 | "url": "*" 43 | }, 44 | { 45 | "category_code": "COMT", 46 | "cc": "*", 47 | "domain": "*", 48 | "priority": 80, 49 | "url": "*" 50 | }, 51 | { 52 | "category_code": "MMED", 53 | "cc": "*", 54 | "domain": "*", 55 | "priority": 80, 56 | "url": "*" 57 | }, 58 | { 59 | "category_code": "SRCH", 60 | "cc": "*", 61 | "domain": "*", 62 | "priority": 80, 63 | "url": "*" 64 | }, 65 | { 66 | "category_code": "PUBH", 67 | "cc": "*", 68 | "domain": "*", 69 | "priority": 80, 70 | "url": "*" 71 | }, 72 | { 73 | "category_code": "REL", 74 | "cc": "*", 75 | "domain": "*", 76 | "priority": 60, 77 | "url": "*" 78 | }, 79 | { 80 | "category_code": "XED", 81 | "cc": "*", 82 | "domain": "*", 83 | "priority": 60, 84 | "url": "*" 85 | }, 86 | { 87 | "category_code": "HOST", 88 | "cc": "*", 89 | "domain": "*", 90 | "priority": 60, 91 | "url": "*" 92 | }, 93 | { 94 | "category_code": "ENV", 95 | "cc": "*", 96 | "domain": "*", 97 | "priority": 60, 98 | "url": "*" 99 | }, 100 | { 101 | "category_code": "FILE", 102 | "cc": "*", 103 | "domain": "*", 104 | "priority": 40, 105 | "url": "*" 106 | }, 107 | { 108 | "category_code": "CULTR", 109 | "cc": "*", 110 | "domain": "*", 111 | "priority": 40, 112 | "url": "*" 113 | }, 114 | { 115 | "category_code": "IGO", 116 | "cc": "*", 117 | "domain": "*", 118 | "priority": 40, 119 | "url": "*" 120 | }, 121 | { 122 | "category_code": "GOVT", 123 | "cc": "*", 124 | "domain": "*", 125 | "priority": 40, 126 | "url": "*" 127 | }, 128 | { 129 | "category_code": "DATE", 130 | "cc": "*", 131 | "domain": "*", 132 | "priority": 30, 133 | "url": "*" 134 | }, 135 | { 136 | "category_code": "HATE", 137 | "cc": "*", 138 | "domain": "*", 139 | "priority": 30, 140 | "url": "*" 141 | }, 142 | { 143 | "category_code": "MILX", 144 | "cc": "*", 145 | "domain": "*", 146 | "priority": 30, 147 | "url": "*" 148 | }, 149 | { 150 | "category_code": "PROV", 151 | "cc": "*", 152 | "domain": "*", 153 | "priority": 30, 154 | "url": "*" 155 | }, 156 | { 157 | "category_code": "PORN", 158 | "cc": "*", 159 | "domain": "*", 160 | "priority": 30, 161 | "url": "*" 162 | }, 163 | { 164 | "category_code": "GMB", 165 | "cc": "*", 166 | "domain": "*", 167 | "priority": 30, 168 | "url": "*" 169 | }, 170 | { 171 | "category_code": "ALDR", 172 | "cc": "*", 173 | "domain": "*", 174 | "priority": 30, 175 | "url": "*" 176 | }, 177 | { 178 | "category_code": "GAME", 179 | "cc": "*", 180 | "domain": "*", 181 | "priority": 20, 182 | "url": "*" 183 | }, 184 | { 185 | "category_code": "MISC", 186 | "cc": "*", 187 | "domain": "*", 188 | "priority": 20, 189 | "url": "*" 190 | }, 191 | { 192 | "category_code": "HACK", 193 | "cc": "*", 194 | "domain": "*", 195 | "priority": 20, 196 | "url": "*" 197 | }, 198 | { 199 | "category_code": "ECON", 200 | "cc": "*", 201 | "domain": "*", 202 | "priority": 20, 203 | "url": "*" 204 | }, 205 | { 206 | "category_code": "COMM", 207 | "cc": "*", 208 | "domain": "*", 209 | "priority": 20, 210 | "url": "*" 211 | }, 212 | { 213 | "category_code": "CTRL", 214 | "cc": "*", 215 | "domain": "*", 216 | "priority": 20, 217 | "url": "*" 218 | }, 219 | { 220 | "category_code": "*", 221 | "cc": "*", 222 | "domain": "www.facebook.com", 223 | "priority": 200, 224 | "url": "*" 225 | }, 226 | { 227 | "category_code": "*", 228 | "cc": "*", 229 | "domain": "twitter.com", 230 | "priority": 200, 231 | "url": "*" 232 | }, 233 | { 234 | "category_code": "*", 235 | "cc": "*", 236 | "domain": "www.instagram.com", 237 | "priority": 200, 238 | "url": "*" 239 | }, 240 | { 241 | "category_code": "*", 242 | "cc": "*", 243 | "domain": "www.whatsapp.com", 244 | "priority": 200, 245 | "url": "*" 246 | }, 247 | { 248 | "category_code": "*", 249 | "cc": "*", 250 | "domain": "web.whatsapp.com", 251 | "priority": 200, 252 | "url": "*" 253 | }, 254 | { 255 | "category_code": "*", 256 | "cc": "*", 257 | "domain": "telegram.org", 258 | "priority": 200, 259 | "url": "*" 260 | }, 261 | { 262 | "category_code": "*", 263 | "cc": "*", 264 | "domain": "web.telegram.org", 265 | "priority": 200, 266 | "url": "*" 267 | }, 268 | { 269 | "category_code": "*", 270 | "cc": "*", 271 | "domain": "www.youtube.com", 272 | "priority": 200, 273 | "url": "*" 274 | }, 275 | { 276 | "category_code": "*", 277 | "cc": "*", 278 | "domain": "www.tiktok.com", 279 | "priority": 200, 280 | "url": "*" 281 | }, 282 | { 283 | "category_code": "*", 284 | "cc": "*", 285 | "domain": "www.viber.com", 286 | "priority": 200, 287 | "url": "*" 288 | }, 289 | { 290 | "category_code": "*", 291 | "cc": "*", 292 | "domain": "www.snapchat.com", 293 | "priority": 200, 294 | "url": "*" 295 | }, 296 | { 297 | "category_code": "*", 298 | "cc": "*", 299 | "domain": "www.reddit.com", 300 | "priority": 200, 301 | "url": "*" 302 | }, 303 | { 304 | "category_code": "*", 305 | "cc": "*", 306 | "domain": "vimeo.com", 307 | "priority": 200, 308 | "url": "*" 309 | }, 310 | { 311 | "category_code": "*", 312 | "cc": "*", 313 | "domain": "www.wechat.com", 314 | "priority": 200, 315 | "url": "*" 316 | }, 317 | { 318 | "category_code": "*", 319 | "cc": "*", 320 | "domain": "international.qq.com", 321 | "priority": 200, 322 | "url": "*" 323 | }, 324 | { 325 | "category_code": "*", 326 | "cc": "*", 327 | "domain": "signal.org", 328 | "priority": 200, 329 | "url": "*" 330 | }, 331 | { 332 | "category_code": "MISC", 333 | "cc": "US", 334 | "domain": "*", 335 | "priority": -200, 336 | "url": "*" 337 | } 338 | ] 339 | -------------------------------------------------------------------------------- /newapi/tests/integ/test_integration_auth.py: -------------------------------------------------------------------------------- 1 | """ 2 | Integration test for Auth API 3 | 4 | Warning: this test runs against a real database and SMTP 5 | 6 | Lint using: 7 | black -t py37 -l 100 --fast ooniapi/tests/integ/test_probe_services.py 8 | 9 | Test using: 10 | pytest-3 -s --show-capture=no ooniapi/tests/integ/test_integration_auth.py 11 | """ 12 | 13 | from unittest.mock import MagicMock, Mock 14 | from urllib.parse import urlparse 15 | import quopri 16 | 17 | import pytest 18 | from freezegun import freeze_time # debdeps: python3-freezegun 19 | 20 | import ooniapi.auth 21 | import ooniapi.database 22 | 23 | 24 | @pytest.fixture() 25 | def log(app): 26 | return app.logger 27 | 28 | 29 | @pytest.fixture(autouse=True, scope="session") 30 | def setup_test_session(): 31 | # mock smtplib 32 | m = Mock(name="MockSMTPInstance") 33 | s = Mock(name="SMTP session") 34 | x = Mock(name="mock enter", return_value=s) 35 | m.__enter__ = x 36 | m.__exit__ = Mock(name="mock exit") 37 | setup_test_session.mocked_s = s 38 | ooniapi.auth.smtplib.SMTP = Mock(name="MockSMTP", return_value=m) 39 | ooniapi.auth.smtplib.SMTP_SSL = MagicMock() 40 | 41 | 42 | admin_e = "integtest@openobservatory.org" 43 | user_e = "nick@localhost.local" 44 | 45 | 46 | @pytest.fixture 47 | def integtest_admin(app): 48 | # Access DB directly 49 | with app.app_context(): 50 | ooniapi.auth._set_account_role(admin_e, "admin") 51 | yield 52 | ooniapi.auth._delete_account_data(admin_e) 53 | 54 | 55 | class AuthClient: 56 | # wrap Flask http client to provide Authorization=Bearer header 57 | def __init__(self, http_client, headers): 58 | self._headers = headers 59 | self._client = http_client 60 | 61 | def get(self, *a, **kw): 62 | return self._client.get(*a, **kw, headers=self._headers) 63 | 64 | def post(self, *a, **kw): 65 | return self._client.post(*a, **kw, headers=self._headers) 66 | 67 | def discard_authorization_header(self): 68 | self._headers.pop("Authorization") 69 | 70 | 71 | # setup_test_session mocks SMTP when the test session starts 72 | 73 | 74 | @pytest.fixture 75 | def usersession(client, app): 76 | # Mock out SMTP, register a user and log in 77 | user_e = "nick@localhost.local" 78 | reset_smtp_mock() 79 | with app.app_context(): 80 | headers = _register_and_login(client, user_e) 81 | reset_smtp_mock() 82 | yield AuthClient(client, headers) 83 | reset_smtp_mock() 84 | 85 | 86 | @pytest.fixture 87 | def adminsession(client, app): 88 | # Access DB directly 89 | # Mock out SMTP, register a user and log in 90 | with app.app_context(): 91 | ooniapi.auth._set_account_role(admin_e, "admin") 92 | headers= _register_and_login(client, admin_e) 93 | reset_smtp_mock() 94 | yield AuthClient(client, headers) 95 | ooniapi.auth._delete_account_data(admin_e) 96 | reset_smtp_mock() 97 | 98 | 99 | def reset_smtp_mock(): 100 | ooniapi.auth.smtplib.SMTP.reset_mock() 101 | ooniapi.auth.smtplib.SMTP_SSL.reset_mock() 102 | 103 | 104 | @pytest.fixture() 105 | def mocksmtp(): 106 | reset_smtp_mock() 107 | 108 | 109 | def postj(client, url, **kw): 110 | response = client.post(url, json=kw) 111 | assert response.status_code == 200 112 | return response 113 | 114 | 115 | # # Tests 116 | 117 | 118 | def test_login_user_bogus_token(client, mocksmtp): 119 | r = client.get("/api/v1/user_login?k=BOGUS") 120 | assert r.status_code == 401 121 | assert r.json == {"error": "Invalid credentials"} 122 | 123 | 124 | def test_user_register_non_valid_email(client, mocksmtp): 125 | d = dict( 126 | email_address="nick@localhost", redirect_to="https://explorer.ooni.org" 127 | ) # no FQDN 128 | r = client.post("/api/v1/user_register", json=d) 129 | assert r.status_code == 400 130 | assert r.json == {"error": "Invalid email address"} 131 | 132 | 133 | def test_user_register_non_valid_redirect(client, mocksmtp): 134 | d = dict( 135 | email_address="nick@a.org", redirect_to="https://BOGUS.ooni.org" 136 | ) # bogus fqdn 137 | r = client.post("/api/v1/user_register", json=d) 138 | assert r.status_code == 400 139 | assert r.json == {"error": "Invalid request"} 140 | 141 | 142 | def _register_and_login(client, email_address): 143 | ooniapi.auth._remove_from_session_expunge(email_address) 144 | # # return cookie header for further use 145 | d = dict(email_address=email_address, redirect_to="https://explorer.ooni.org") 146 | r = client.post("/api/v1/user_register", json=d) 147 | assert r.status_code == 200 148 | assert r.json == {"msg": "ok"} 149 | 150 | ooniapi.auth.smtplib.SMTP.assert_called_once() 151 | ooniapi.auth.smtplib.SMTP_SSL.assert_not_called() 152 | setup_test_session.mocked_s.send_message.assert_called_once() 153 | msg = setup_test_session.mocked_s.send_message.call_args[0][0] 154 | msg = str(msg) 155 | url = None 156 | assert "Subject: OONI Account activation" in msg 157 | # Decode MIME-quoted email 158 | msg = quopri.decodestring(msg) 159 | # Look for login URL in HTML 160 | for line in msg.splitlines(): 161 | if b' 10 67 | assert len(c) < 60000 68 | assert sorted(c[0].keys()) == [ 69 | "anomaly_perc", 70 | "category_code", 71 | "cc", 72 | "domain", 73 | "msmt_cnt", 74 | "priority", 75 | "url", 76 | ] 77 | 78 | 79 | def test_show_countries_prioritization_csv(client): 80 | resp = client.get("/api/_/show_countries_prioritization?format=CSV") 81 | assert not resp.is_json 82 | -------------------------------------------------------------------------------- /newapi/tests/integ/test_prioritization_nodb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Integration test for URL prioritization with mocked database 3 | Uses data from tests/integ/data/ 4 | Lint using Black. 5 | Test using: 6 | pytest-3 -s --show-capture=no ooniapi/tests/integ/test_prioritization_nodb.py 7 | """ 8 | 9 | import json 10 | from pathlib import Path 11 | 12 | import pytest 13 | from mock import MagicMock 14 | 15 | # Extract database responses with: 16 | # COPY (SELECT json_agg(t) FROM (...) t) TO '/tmp/.json'; 17 | 18 | queries = { 19 | ( 20 | "SELECT category_code, priority, url FROM citizenlab WHERE cc = 'ZZ' ORDER BY priority DESC" 21 | ): Path("citizenlab.json"), 22 | ( 23 | "SELECT category_code, domain, url, cc, COALESCE(msmt_cnt, 0)::float AS msmt_cnt FROM ( SELECT domain, url, cc, category_code FROM citizenlab WHERE citizenlab.cc = :cc_low OR citizenlab.cc = :cc OR citizenlab.cc = 'ZZ' ) AS citiz LEFT OUTER JOIN ( SELECT input, msmt_cnt FROM counters_test_list WHERE probe_cc = :cc ) AS cnt ON (citiz.url = cnt.input)" 24 | ): Path("citizenlab_counters_us.json"), 25 | ( 26 | "SELECT category_code, cc, domain, url, priority FROM url_priorities WHERE cc = :cc OR cc = '*'" 27 | ): Path("url_priorities_us.json"), 28 | } 29 | 30 | 31 | def mockdb(query, *query_kw): 32 | """Mocked app.db_session.execute""" 33 | query = " ".join(x.strip() for x in query.splitlines()).strip() 34 | print(f" {query} --") 35 | if query in queries: 36 | resp = queries[query] 37 | if isinstance(resp, Path): 38 | fn = Path("tests/integ/data") / resp 39 | print(f" loading {fn}") 40 | with fn.open() as f: 41 | j = json.load(f) 42 | 43 | r = MagicMock(side_effect=iter(j)) 44 | r.fetchall.return_value = j 45 | return r 46 | 47 | assert query == "", "Unexpected query to be mocked out: " + repr(query) 48 | 49 | 50 | @pytest.fixture 51 | def nodb(app): 52 | if hasattr(app, "db_session"): 53 | app.db_session.execute = mockdb 54 | 55 | 56 | def getjson(client, url): 57 | response = client.get(url) 58 | assert response.status_code == 200 59 | assert response.is_json 60 | return response.json 61 | 62 | 63 | def test_url_prioritization(client, nodb): 64 | c = getjson(client, "/api/v1/test-list/urls?limit=100") 65 | assert "metadata" in c 66 | assert c["metadata"] == { 67 | "count": 100, 68 | "current_page": -1, 69 | "limit": -1, 70 | "next_url": "", 71 | "pages": 1, 72 | } 73 | 74 | assert len(set(r["url"] for r in c["results"])) == 100 75 | 76 | 77 | def test_url_prioritization_category_code(client, nodb): 78 | c = getjson(client, "/api/v1/test-list/urls?category_codes=NEWS&limit=100") 79 | assert "metadata" in c 80 | assert c["metadata"] == { 81 | "count": 100, 82 | "current_page": -1, 83 | "limit": -1, 84 | "next_url": "", 85 | "pages": 1, 86 | } 87 | for r in c["results"]: 88 | assert r["category_code"] == "NEWS" 89 | 90 | assert len(set(r["url"] for r in c["results"])) == 100 91 | 92 | 93 | def test_url_prioritization_category_codes(client, nodb): 94 | url = "/api/v1/test-list/urls?category_codes=NEWS,HUMR&country_code=US&limit=100" 95 | c = getjson(client, url) 96 | assert "metadata" in c 97 | assert c["metadata"] == { 98 | "count": 100, 99 | "current_page": -1, 100 | "limit": -1, 101 | "next_url": "", 102 | "pages": 1, 103 | } 104 | for r in c["results"]: 105 | assert r["category_code"] in ("NEWS", "HUMR") 106 | 107 | assert len(set(r["url"] for r in c["results"])) == 100 108 | 109 | 110 | def test_url_prioritization_country_code_limit_debug(client, nodb): 111 | c = getjson(client, "/api/v1/test-list/urls?country_code=US&limit=9999&debug=true") 112 | assert "metadata" in c 113 | assert c["metadata"] == { 114 | "count": 1513, 115 | "current_page": -1, 116 | "limit": -1, 117 | "next_url": "", 118 | "pages": 1, 119 | } 120 | for r in c["results"]: 121 | assert r["country_code"] in ("XX", "US") 122 | 123 | assert len(c["results"]) == 1513 124 | 125 | 126 | def test_url_prioritization_country_code_nolimit(client, nodb): 127 | c = getjson(client, "/api/v1/test-list/urls?country_code=US") 128 | assert "metadata" in c 129 | xx_cnt = 0 130 | for r in c["results"]: 131 | assert r["country_code"] in ("XX", "US") 132 | if r["country_code"] == "XX": 133 | xx_cnt += 1 134 | 135 | assert xx_cnt > 1200 136 | us_cnt = c["metadata"]["count"] - xx_cnt 137 | assert us_cnt > 40 138 | -------------------------------------------------------------------------------- /newapi/tests/integ/test_private_api.py: -------------------------------------------------------------------------------- 1 | # 2 | # Most of the private API runs statistics on the last N days. As such, tests 3 | # are not deterministic. 4 | # 5 | 6 | import pytest 7 | 8 | 9 | def privapi(client, subpath): 10 | response = client.get(f"/api/_/{subpath}") 11 | assert response.status_code == 200 12 | assert response.is_json 13 | return response.json 14 | 15 | 16 | # TODO: improve tests 17 | 18 | 19 | def test_private_api_asn_by_month(client): 20 | url = "asn_by_month" 21 | response = privapi(client, url) 22 | assert len(response) > 1 23 | r = response[0] 24 | assert sorted(r.keys()) == ["date", "value"] 25 | assert r["value"] > 10 26 | assert r["value"] < 10**6 27 | assert r["date"].endswith("T00:00:00+00:00") 28 | 29 | 30 | def test_private_api_countries_by_month(client): 31 | url = "countries_by_month" 32 | response = privapi(client, url) 33 | assert len(response) > 1 34 | r = response[0] 35 | assert sorted(r.keys()) == ["date", "value"] 36 | assert r["value"] > 10 37 | assert r["value"] < 1000 38 | assert r["date"].endswith("T00:00:00+00:00") 39 | 40 | 41 | def test_private_api_test_names(client, log): 42 | url = "test_names" 43 | response = privapi(client, url) 44 | assert response == { 45 | "test_names": [ 46 | {"id": "bridge_reachability", "name": "Bridge Reachability"}, 47 | {"id": "dash", "name": "DASH"}, 48 | {"id": "dns_consistency", "name": "DNS Consistency"}, 49 | {"id": "dnscheck", "name": "DNS Check"}, 50 | {"id": "facebook_messenger", "name": "Facebook Messenger"}, 51 | { 52 | "id": "http_header_field_manipulation", 53 | "name": "HTTP Header Field Manipulation", 54 | }, 55 | {"id": "http_host", "name": "HTTP Host"}, 56 | {"id": "http_invalid_request_line", "name": "HTTP Invalid Request Line"}, 57 | {"id": "http_requests", "name": "HTTP Requests"}, 58 | {"id": "meek_fronted_requests_test", "name": "Meek Fronted Requests"}, 59 | {"id": "multi_protocol_traceroute", "name": "Multi Protocol Traceroute"}, 60 | {"id": "ndt", "name": "NDT"}, 61 | {"id": "psiphon", "name": "Psiphon"}, 62 | {"id": "riseupvpn", "name": "RiseupVPN"}, 63 | {"id": "signal", "name": "Signal"}, 64 | {"id": "stunreachability", "name": "STUN Reachability"}, 65 | {"id": "tcp_connect", "name": "TCP Connect"}, 66 | {"id": "telegram", "name": "Telegram"}, 67 | {"id": "tor", "name": "Tor"}, 68 | {"id": "torsf", "name": "Tor Snowflake"}, 69 | {"id": "urlgetter", "name": "URL Getter"}, 70 | {"id": "vanilla_tor", "name": "Vanilla Tor"}, 71 | {"id": "web_connectivity", "name": "Web Connectivity"}, 72 | {"id": "whatsapp", "name": "WhatsApp"}, 73 | ] 74 | } 75 | 76 | 77 | def test_private_api_countries_total(client, log): 78 | url = "countries" 79 | response = privapi(client, url) 80 | assert "countries" in response 81 | assert len(response["countries"]) >= 20 82 | for a in response["countries"]: 83 | if a["alpha_2"] == "CA": 84 | assert a["count"] > 100 85 | assert a["name"] == "Canada" 86 | return 87 | 88 | assert 0, "CA not found" 89 | 90 | 91 | def test_private_api_test_coverage(client, log): 92 | url = "test_coverage?probe_cc=BR" 93 | resp = privapi(client, url) 94 | assert 190 < len(resp["test_coverage"]) < 220 95 | assert 27 < len(resp["network_coverage"]) < 32 96 | assert sorted(resp["test_coverage"][0]) == ["count", "test_day", "test_group"] 97 | assert sorted(resp["network_coverage"][0]) == ["count", "test_day"] 98 | 99 | 100 | def test_private_api_test_coverage_with_groups(client, log): 101 | url = "test_coverage?probe_cc=BR&test_groups=websites" 102 | resp = privapi(client, url) 103 | assert len(resp["test_coverage"]) > 10 104 | assert sorted(resp["test_coverage"][0]) == ["count", "test_day", "test_group"] 105 | assert 27 < len(resp["network_coverage"]) < 32 106 | 107 | 108 | def test_private_api_domain_metadata1(client): 109 | url = "domain_metadata?domain=facebook.com" 110 | resp = privapi(client, url) 111 | assert resp["category_code"] == "GRP" 112 | assert resp["canonical_domain"] == "www.facebook.com" 113 | 114 | 115 | def test_private_api_domain_metadata2(client): 116 | url = "domain_metadata?domain=www.facebook.com" 117 | resp = privapi(client, url) 118 | assert resp["category_code"] == "GRP" 119 | assert resp["canonical_domain"] == "www.facebook.com" 120 | 121 | 122 | def test_private_api_domain_metadata3(client): 123 | url = "domain_metadata?domain=www.twitter.com" 124 | resp = privapi(client, url) 125 | assert resp["category_code"] == "GRP" 126 | assert resp["canonical_domain"] == "twitter.com" 127 | 128 | 129 | def test_private_api_domain_metadata4(client): 130 | url = "domain_metadata?domain=www.this-domain-is-not-in-the-test-lists-for-sure.com" 131 | resp = privapi(client, url) 132 | assert resp["category_code"] == "MISC" 133 | exp = "this-domain-is-not-in-the-test-lists-for-sure.com" 134 | assert resp["canonical_domain"] == exp 135 | 136 | 137 | @pytest.mark.skip("FIXME not deterministic") 138 | def test_private_api_website_networks(client, log): 139 | url = "website_networks?probe_cc=US" 140 | resp = privapi(client, url) 141 | assert len(resp["results"]) > 100 142 | 143 | 144 | @pytest.mark.skip("FIXME not deterministic") 145 | def test_private_api_website_stats(client, log): 146 | url = "website_stats?probe_cc=DE&probe_asn=3320&input=http:%2F%2Fwww.backtrack-linux.org%2F" 147 | resp = privapi(client, url) 148 | assert len(resp["results"]) > 2 149 | assert sorted(resp["results"][0].keys()) == [ 150 | "anomaly_count", 151 | "confirmed_count", 152 | "failure_count", 153 | "test_day", 154 | "total_count", 155 | ] 156 | 157 | 158 | @pytest.mark.skip("FIXME not deterministic") 159 | def test_private_api_website_urls(client, log): 160 | url = "website_urls?probe_cc=US&probe_asn=209" 161 | response = privapi(client, url) 162 | r = response["metadata"] 163 | assert r["total_count"] > 0 164 | del r["total_count"] 165 | assert r == { 166 | "current_page": 1, 167 | "limit": 10, 168 | "next_url": "https://api.ooni.io/api/_/website_urls?limit=10&offset=10&probe_asn=209&probe_cc=US", 169 | "offset": 0, 170 | } 171 | assert len(response["results"]) == 10 172 | 173 | 174 | def test_private_api_vanilla_tor_stats(client): 175 | url = "vanilla_tor_stats?probe_cc=BR" 176 | resp = privapi(client, url) 177 | assert "notok_networks" in resp 178 | return # FIXME: implement tests with mocked db 179 | assert resp["notok_networks"] >= 0 180 | assert len(resp["networks"]) > 10 181 | assert sorted(resp["networks"][0].keys()) == [ 182 | "failure_count", 183 | "last_tested", 184 | "probe_asn", 185 | "success_count", 186 | "test_runtime_avg", 187 | "test_runtime_max", 188 | "test_runtime_min", 189 | "total_count", 190 | ] 191 | assert resp["last_tested"].startswith("20") 192 | 193 | 194 | def test_private_api_vanilla_tor_stats_empty(client): 195 | url = "vanilla_tor_stats?probe_cc=XY" 196 | resp = privapi(client, url) 197 | assert resp["notok_networks"] == 0 198 | assert len(resp["networks"]) == 0 199 | assert resp["last_tested"] is None 200 | 201 | 202 | def test_private_api_im_networks(client): 203 | url = "im_networks?probe_cc=BR" 204 | resp = privapi(client, url) 205 | return # FIXME: implement tests with mocked db 206 | assert len(resp) > 1 207 | assert len(resp["facebook_messenger"]["ok_networks"]) > 5 208 | if "telegram" in resp: 209 | assert len(resp["telegram"]["ok_networks"]) > 5 210 | assert len(resp["signal"]["ok_networks"]) > 5 211 | # assert len(resp["whatsapp"]["ok_networks"]) > 5 212 | 213 | 214 | def test_private_api_im_stats_basic(client): 215 | url = "im_stats?probe_cc=CH&probe_asn=3303&test_name=facebook_messenger" 216 | resp = privapi(client, url) 217 | assert 20 < len(resp["results"]) < 34 218 | assert resp["results"][0]["total_count"] > -1 219 | assert resp["results"][0]["anomaly_count"] is None 220 | assert len(resp["results"][0]["test_day"]) == 25 221 | 222 | 223 | @pytest.mark.skip("FIXME not deterministic") 224 | def test_private_api_im_stats(client): 225 | url = "im_stats?probe_cc=CH&probe_asn=3303&test_name=facebook_messenger" 226 | resp = privapi(client, url) 227 | assert len(resp["results"]) > 10 228 | assert resp["results"][0]["total_count"] > -1 229 | assert resp["results"][0]["anomaly_count"] is None 230 | assert len(resp["results"][0]["test_day"]) == 25 231 | assert sum(e["total_count"] for e in resp["results"]) > 0, resp 232 | 233 | 234 | def test_private_api_network_stats(client): 235 | # TODO: the stats are not implemented 236 | url = "network_stats?probe_cc=GB" 237 | response = privapi(client, url) 238 | assert response == { 239 | "metadata": { 240 | "current_page": 1, 241 | "limit": 10, 242 | "next_url": None, 243 | "offset": 0, 244 | "total_count": 0, 245 | }, 246 | "results": [], 247 | } 248 | 249 | 250 | def test_private_api_country_overview(client): 251 | url = "country_overview?probe_cc=BR" 252 | resp = privapi(client, url) 253 | assert resp["first_bucket_date"].startswith("20"), resp 254 | assert resp["measurement_count"] > 1000 255 | assert resp["network_count"] > 10 256 | 257 | 258 | def test_private_api_global_overview(client): 259 | url = "global_overview" 260 | response = privapi(client, url) 261 | assert "country_count" in response 262 | assert "measurement_count" in response 263 | assert "network_count" in response 264 | 265 | 266 | def test_private_api_global_overview_by_month(client): 267 | url = "global_overview_by_month" 268 | resp = privapi(client, url) 269 | assert sorted(resp["networks_by_month"][0].keys()) == ["date", "value"] 270 | assert sorted(resp["countries_by_month"][0].keys()) == ["date", "value"] 271 | assert sorted(resp["measurements_by_month"][0].keys()) == ["date", "value"] 272 | assert resp["networks_by_month"][0]["date"].endswith("T00:00:00+00:00") 273 | 274 | 275 | @pytest.mark.skip(reason="cannot be tested") 276 | def test_private_api_quotas_summary(client): 277 | resp = privapi(client, "quotas_summary") 278 | 279 | 280 | def test_private_api_check_report_id(client, log): 281 | rid = "20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3" 282 | url = f"check_report_id?report_id={rid}" 283 | response = privapi(client, url) 284 | assert response == {"v": 0, "found": True} 285 | 286 | 287 | def test_private_api_check_bogus_report_id_is_found(client, log): 288 | # The API always returns True 289 | url = f"check_report_id?report_id=BOGUS_REPORT_ID" 290 | response = privapi(client, url) 291 | assert response == {"v": 0, "found": True} 292 | 293 | 294 | # # /circumvention_stats_by_country 295 | 296 | 297 | @pytest.mark.skip(reason="depends on fresh data") 298 | def test_private_api_circumvention_stats_by_country(client, log): 299 | url = "circumvention_stats_by_country" 300 | resp = privapi(client, url) 301 | assert resp["v"] == 0 302 | assert len(resp["results"]) > 3 303 | 304 | 305 | # # /circumvention_runtime_stats 306 | 307 | 308 | @pytest.mark.skip(reason="depends on fresh data") 309 | def test_private_api_circumvention_runtime_stats(client, log): 310 | url = "circumvention_runtime_stats" 311 | resp = privapi(client, url) 312 | assert resp["v"] == 0 313 | assert "error" not in resp, resp 314 | assert len(resp["results"]) > 3, resp 315 | 316 | 317 | # # /asnmeta 318 | 319 | 320 | def test_private_api_ansmeta(client, log): 321 | resp = privapi(client, "asnmeta?asn=0") 322 | assert resp == {'org_name': 'Unknown'} 323 | -------------------------------------------------------------------------------- /newapi/tests/integ/test_probe_services.py: -------------------------------------------------------------------------------- 1 | """ 2 | Integration test for Probe Services API 3 | 4 | Warning: this test runs against a real database 5 | See README.adoc 6 | 7 | Lint using: 8 | black -t py37 -l 100 --fast ooniapi/tests/integ/test_probe_services.py 9 | 10 | Test using: 11 | pytest-3 -s --show-capture=no ooniapi/tests/integ/test_probe_services.py 12 | """ 13 | 14 | # TODO: mock out /etc/ooni/api.conf during testing 15 | 16 | from mock import patch 17 | from pathlib import Path 18 | import json 19 | 20 | import pytest 21 | 22 | import ooniapi.probe_services 23 | 24 | 25 | @pytest.fixture() 26 | def log(app): 27 | return app.logger 28 | 29 | 30 | def getjson(client, url): 31 | response = client.get(url) 32 | assert response.status_code == 200 33 | assert response.is_json 34 | return response.json 35 | 36 | 37 | def getjsonh(client, url, headers=None): 38 | response = client.get(url, headers=headers) 39 | assert response.status_code == 200 40 | assert response.is_json 41 | return response.json 42 | 43 | 44 | def post(client, url, data): 45 | response = client.post(url, data=data) 46 | assert response.status_code == 200 47 | assert response.is_json 48 | return response.json 49 | 50 | 51 | def postj(client, url, **kw): 52 | response = client.post(url, json=kw) 53 | assert response.status_code == 200 54 | assert response.is_json 55 | return response.json 56 | 57 | 58 | def test_index(client): 59 | resp = client.get("/") 60 | assert resp.status_code == 200 61 | assert not resp.is_json 62 | assert "Welcome to" in resp.data.decode() 63 | 64 | 65 | # # Follow the order in ooniapi/probe_services.py 66 | 67 | 68 | ## Functional tests 69 | 70 | 71 | def test_extract_probe_ipaddr_octect(app): 72 | with app.test_request_context("/", headers={"X-Forwarded-For": "1.2.3.4"}): 73 | assert ooniapi.probe_services.extract_probe_ipaddr() == "1.2.3.4" 74 | assert ooniapi.probe_services.extract_probe_ipaddr_octect(0, 0) == 1 75 | assert ooniapi.probe_services.extract_probe_ipaddr_octect(3, 0) == 4 76 | assert ooniapi.probe_services.extract_probe_ipaddr_octect(77, -1) == -1 77 | 78 | with app.test_request_context("/", headers={"X-Real-IP": "1.2.3.4"}): 79 | assert ooniapi.probe_services.extract_probe_ipaddr() == "1.2.3.4" 80 | 81 | 82 | ## Test /api/v1/check-in 83 | 84 | 85 | def mock_load_json(): 86 | known = ("/etc/ooni/tor_targets.json", "/etc/ooni/psiphon_config.json") 87 | 88 | def load_json(fn): 89 | if fn in known: 90 | f = Path("tests/integ/data") / Path(fn).name 91 | print(f" Mocking probe_services._load_json {fn} -> {f}") 92 | return json.loads(f.read_text()) 93 | raise NotImplementedError(f"Unexpected fname to be mocked out: {fn}") 94 | 95 | return load_json 96 | 97 | 98 | def test_check_in_basic(client): 99 | j = dict( 100 | probe_cc="US", 101 | probe_asn="AS1234", 102 | on_wifi=True, 103 | charging=False, 104 | ) 105 | with patch("ooniapi.probe_services._load_json", new_callable=mock_load_json): 106 | c = postj(client, "/api/v1/check-in", **j) 107 | 108 | assert c["v"] == 1 109 | urls = c["tests"]["web_connectivity"]["urls"] 110 | assert len(urls) > 1, urls 111 | 112 | webc_rid = c["tests"]["web_connectivity"]["report_id"] 113 | ts, stn, cc, asn_i, _coll, _rand = webc_rid.split("_") 114 | assert int(asn_i) == 1234 115 | assert stn == "webconnectivity" 116 | assert cc == "US" 117 | 118 | assert sorted(c["conf"]) == ["features", "test_helpers"] 119 | 120 | 121 | def test_check_in_url_category_news(client): 122 | j = dict( 123 | on_wifi=True, 124 | charging=True, 125 | web_connectivity=dict(category_codes=["NEWS"]), 126 | ) 127 | with patch("ooniapi.probe_services._load_json", new_callable=mock_load_json): 128 | c = postj(client, "/api/v1/check-in", **j) 129 | assert c["v"] == 1 130 | urls = c["tests"]["web_connectivity"]["urls"] 131 | assert len(urls), urls 132 | for ui in urls: 133 | assert ui["category_code"] == "NEWS" 134 | 135 | webc_rid = c["tests"]["web_connectivity"]["report_id"] 136 | ts, stn, cc, asn_i, _coll, _rand = webc_rid.split("_") 137 | assert int(asn_i) == 0 138 | assert stn == "webconnectivity" 139 | assert cc == "ZZ" 140 | 141 | 142 | @pytest.mark.skip("Not supported. TODO: remove?") 143 | def test_check_in_url_category_code_passed_as_string(client, citizenlab_tblready): 144 | # category_codes should be sent as an array, but comma-separated string 145 | # is handled anyways 146 | j = dict( 147 | web_connectivity=dict(category_codes="NEWS,HUMR"), 148 | ) 149 | c = postj(client, "/api/v1/check-in", **j) 150 | assert c["v"] == 1 151 | urls = c["tests"]["web_connectivity"]["urls"] 152 | assert len(urls), urls 153 | for ui in urls: 154 | assert ui["category_code"] in ("NEWS", "HUMR") 155 | 156 | 157 | def test_check_in_url_prioritization_category_codes(client, citizenlab_tblready): 158 | c = getjson( 159 | client, 160 | "/api/v1/test-list/urls?category_codes=NEWS,HUMR&country_code=US&limit=100", 161 | ) 162 | assert "metadata" in c 163 | assert c["metadata"]["count"] 164 | c["metadata"]["count"] = "ignored" 165 | assert c["metadata"] == { 166 | "count": "ignored", 167 | "current_page": -1, 168 | "limit": -1, 169 | "next_url": "", 170 | "pages": 1, 171 | } 172 | for r in c["results"]: 173 | assert r["category_code"] in ("NEWS", "HUMR") 174 | 175 | assert set(r["url"] for r in c["results"]) 176 | 177 | 178 | def test_check_in_geoip(client): 179 | j = dict( 180 | on_wifi=True, 181 | charging=False, 182 | ) 183 | headers = [ 184 | ("X-Forwarded-For", "192.33.4.12") # The IP address of c.root-servers.net 185 | ] 186 | c = client.post("/api/v1/check-in", json=j, headers=headers).json 187 | assert c["probe_cc"] == "US" 188 | assert c["probe_asn"] == "AS2149" 189 | assert c["probe_network_name"] is not None 190 | 191 | 192 | # # Test /api/v1/collectors 193 | 194 | 195 | def test_list_collectors(client): 196 | c = getjson(client, "/api/v1/collectors") 197 | assert len(c) == 6 198 | 199 | 200 | # # Probe authentication 201 | 202 | 203 | def _register(client): 204 | pwd = "HLdywVhzVCNqLvHCfmnMhIXqGmUFMTuYjmuGZhNlRTeIyvxeQTnjVJsiRkutHCSw" 205 | j = { 206 | "password": pwd, 207 | "platform": "miniooni", 208 | "probe_asn": "AS0", 209 | "probe_cc": "ZZ", 210 | "software_name": "miniooni", 211 | "software_version": "0.1.0-dev", 212 | "supported_tests": ["web_connectivity"], 213 | } 214 | return postj(client, "/api/v1/register", **j) 215 | 216 | 217 | import ooniapi.auth 218 | 219 | 220 | def decode_token(client): 221 | # Decode JWT token in the cookie jar 222 | assert len(client.cookie_jar) == 1 223 | cookie = list(client.cookie_jar)[0].value 224 | tok = ooniapi.auth.decode_jwt(cookie, audience="user_auth") 225 | return tok 226 | 227 | 228 | def test_register(client): 229 | c = _register(client) 230 | assert "client_id" in c 231 | assert len(c["client_id"]) == 132 232 | 233 | 234 | def test_register_then_login(client): 235 | pwd = "HLdywVhzVCNqLvHCfmnMhIXqGmUFMTuYjmuGZhNlRTeIyvxeQTnjVJsiRkutHCSw" 236 | c = _register(client) 237 | assert "client_id" in c 238 | assert len(c["client_id"]) == 132 239 | tok = ooniapi.auth.decode_jwt(c["client_id"], audience="probe_login") 240 | 241 | client_id = c["client_id"] 242 | c = postj(client, "/api/v1/login", username=client_id, password=pwd) 243 | tok = ooniapi.auth.decode_jwt(c["token"], audience="probe_token") 244 | assert tok["registration_time"] is not None 245 | 246 | # Login with a bogus client id emulating probes before 2022 247 | client_id = "BOGUSBOGUS" 248 | j = dict(username=client_id, password=pwd) 249 | r = client.post("/api/v1/login", json=j) 250 | assert r.status_code == 200 251 | token = r.json["token"] 252 | tok = ooniapi.auth.decode_jwt(token, audience="probe_token") 253 | assert tok["registration_time"] is None # we don't know the reg. time 254 | 255 | # Expect failed login 256 | resp = client.post("/api/v1/login", json=dict()) 257 | # FIXME assert resp.status_code == 401 258 | 259 | 260 | def test_test_helpers(client): 261 | c = getjson(client, "/api/v1/test-helpers") 262 | assert len(c) == 6 263 | 264 | 265 | @patch("ooniapi.probe_services._load_json") 266 | def test_psiphon(mock_load_json, client): 267 | 268 | # register and login 269 | pwd = "HLdywVhzVCNqLvHCfmnMhIXqGmUFMTuYjmuGZhNlRTeIyvxeQTnjVJsiRkutHCSw" 270 | client_id = _register(client)["client_id"] 271 | 272 | c = postj(client, "/api/v1/login", username=client_id, password=pwd) 273 | tok = ooniapi.auth.decode_jwt(c["token"], audience="probe_token") 274 | assert tok["registration_time"] is not None 275 | 276 | url = "/api/v1/test-list/psiphon-config" 277 | # broken token 278 | headers = {"Authorization": "Bearer " + c["token"][2:]} 279 | r = client.get(url, headers=headers) 280 | assert r.status_code == 401 281 | 282 | mock_load_json.return_value = {"a": "b"} 283 | 284 | # valid token 285 | headers = {"Authorization": "Bearer " + c["token"]} 286 | r = client.get(url, headers=headers) 287 | assert r.status_code == 200, r.json 288 | assert r.json == {"a": "b"} 289 | 290 | 291 | @patch("ooniapi.probe_services._load_json") 292 | def test_tor_targets(mock_load_json, client): 293 | # register and login 294 | pwd = "HLdywVhzVCNqLvHCfmnMhIXqGmUFMTuYjmuGZhNlRTeIyvxeQTnjVJsiRkutHCSw" 295 | client_id = _register(client)["client_id"] 296 | 297 | c = postj(client, "/api/v1/login", username=client_id, password=pwd) 298 | tok = ooniapi.auth.decode_jwt(c["token"], audience="probe_token") 299 | assert tok["registration_time"] is not None 300 | 301 | url = "/api/v1/test-list/tor-targets" 302 | # broken token 303 | headers = {"Authorization": "Bearer " + c["token"][2:]} 304 | r = client.get(url, headers=headers) 305 | assert r.status_code == 401 306 | 307 | mock_load_json.return_value = {"a": "b"} 308 | 309 | # valid token 310 | headers = {"Authorization": "Bearer " + c["token"]} 311 | r = client.get(url, headers=headers) 312 | assert r.status_code == 200, r.json 313 | assert r.json == {"a": "b"} 314 | 315 | 316 | def test_bouncer_net_tests(client): 317 | j = { 318 | "net-tests": [ 319 | { 320 | "input-hashes": None, 321 | "name": "web_connectivity", 322 | "test-helpers": ["web-connectivity"], 323 | "version": "0.0.1", 324 | } 325 | ] 326 | } 327 | c = postj(client, "/bouncer/net-tests", **j) 328 | expected = { 329 | "net-tests": [ 330 | { 331 | "collector": "httpo://guegdifjy7bjpequ.onion", 332 | "collector-alternate": [ 333 | {"type": "https", "address": "https://ams-pg.ooni.org"}, 334 | { 335 | "front": "dkyhjv0wpi2dk.cloudfront.net", 336 | "type": "cloudfront", 337 | "address": "https://dkyhjv0wpi2dk.cloudfront.net", 338 | }, 339 | ], 340 | "name": "web_connectivity", 341 | "test-helpers": { 342 | "tcp-echo": "37.218.241.93", 343 | "http-return-json-headers": "http://37.218.241.94:80", 344 | "web-connectivity": "httpo://y3zq5fwelrzkkv3s.onion", 345 | }, 346 | "test-helpers-alternate": { 347 | "web-connectivity": [ 348 | {"type": "https", "address": "https://wcth.ooni.io"}, 349 | { 350 | "front": "d33d1gs9kpq1c5.cloudfront.net", 351 | "type": "cloudfront", 352 | "address": "https://d33d1gs9kpq1c5.cloudfront.net", 353 | }, 354 | ] 355 | }, 356 | "version": "0.0.1", 357 | "input-hashes": None, 358 | } 359 | ] 360 | } 361 | assert c == expected 362 | 363 | 364 | def test_bouncer_net_tests_bad_request1(client): 365 | resp = client.post("/bouncer/net-tests") 366 | assert resp.status_code == 400 367 | 368 | 369 | def test_bouncer_net_tests_bad_request2(client): 370 | j = {"net-tests": []} 371 | resp = client.post("/bouncer/net-tests", json=j) 372 | assert resp.status_code == 400 373 | 374 | 375 | # # test collector 376 | 377 | 378 | def test_collector_open_report(client): 379 | j = { 380 | "data_format_version": "0.2.0", 381 | "format": "json", 382 | "probe_asn": "AS65550", # reserved for examples 383 | "probe_cc": "IE", 384 | "software_name": "ooni-integ-test", 385 | "software_version": "0.0.0", 386 | "test_name": "web_connectivity", 387 | "test_start_time": "2020-09-09 14:11:11", 388 | "test_version": "0.1.0", 389 | } 390 | c = postj(client, "/report", **j) 391 | rid = c.pop("report_id") 392 | assert "_webconnectivity_IE_65550_" in rid 393 | assert c == { 394 | "backend_version": "1.3.5", 395 | "supported_formats": ["yaml", "json"], 396 | } 397 | assert len(rid) == 61, rid 398 | 399 | 400 | def test_collector_upload_msmt_bogus(client): 401 | j = dict(format="json", content=dict(test_keys={})) 402 | resp = client.post("/report/bogus", json=j) 403 | assert resp.status_code == 400, resp 404 | 405 | 406 | def test_collector_close_report(client): 407 | c = postj(client, "/report/TestReportID/close") 408 | assert c == {} 409 | 410 | 411 | # Test-list related tests are in test_prioritization.py 412 | -------------------------------------------------------------------------------- /newapi/tests/integ/test_rate_limiter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/tests/integ/test_rate_limiter.py -------------------------------------------------------------------------------- /newapi/tests/integ/test_torsf_stats.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlencode 2 | 3 | import pytest 4 | 5 | 6 | def api(client, subpath, **kw): 7 | url = f"/api/v1/{subpath}" 8 | if kw: 9 | assert "?" not in url 10 | url += "?" + urlencode(kw) 11 | 12 | response = client.get(url) 13 | assert response.status_code == 200 14 | assert response.is_json 15 | return response.json 16 | 17 | 18 | @pytest.mark.skipif(not pytest.proddb, reason="use --proddb to run") 19 | def test_torsf_stats_cc(client, log): 20 | url = "torsf_stats?probe_cc=IT&since=2021-07-28&until=2021-07-29" 21 | r = api(client, url) 22 | assert r["result"] == [ 23 | { 24 | "anomaly_count": 0, 25 | "anomaly_rate": 0.0, 26 | "failure_count": 0, 27 | "measurement_count": 2, 28 | "measurement_start_day": "2021-07-29", 29 | "probe_cc": "IT", 30 | } 31 | ] 32 | 33 | 34 | @pytest.mark.skipif(not pytest.proddb, reason="use --proddb to run") 35 | def test_torsf_stats_nocc(client, log): 36 | url = "torsf_stats?&since=2021-07-28&until=2021-07-29" 37 | r = api(client, url) 38 | assert r["result"] == [ 39 | { 40 | "anomaly_count": 0, 41 | "anomaly_rate": 0.0, 42 | "failure_count": 0, 43 | "measurement_count": 2, 44 | "measurement_start_day": "2021-07-29", 45 | "probe_cc": "IT", 46 | } 47 | ] 48 | -------------------------------------------------------------------------------- /newapi/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ooni/api/0901aeac10caedf4dbbc6ab9bee4cc456ff16e79/newapi/tests/unit/__init__.py -------------------------------------------------------------------------------- /newapi/tests/unit/test_auth.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ooniapi.auth import validate_redirect_url 4 | 5 | 6 | def test_auth_explorer(): 7 | redirect_to = ( 8 | "https://explorer.ooni.org/country/IT?since=2021-01-01&until=2022-01-01" 9 | ) 10 | redirect_to2, login_fqdn = validate_redirect_url(redirect_to) 11 | assert redirect_to2 == redirect_to 12 | assert login_fqdn == "explorer.ooni.org" 13 | 14 | 15 | def test_auth_bogus(): 16 | redirect_to = "https://invalid.ooni.org/country/IT" 17 | with pytest.raises(ValueError): 18 | validate_redirect_url(redirect_to) 19 | -------------------------------------------------------------------------------- /newapi/tests/unit/test_countries.py: -------------------------------------------------------------------------------- 1 | from ooniapi.countries import lookup_country 2 | 3 | 4 | def test_lookup(): 5 | assert lookup_country("IT") == "Italy" 6 | -------------------------------------------------------------------------------- /newapi/tests/unit/test_prio.py: -------------------------------------------------------------------------------- 1 | from ooniapi import prio 2 | 3 | 4 | def test_prio(): 5 | cz = { 6 | "category_code": "MISC", 7 | "domain": "thehiddenwiki.org", 8 | "url": "https://thehiddenwiki.org/", 9 | "cc": "ZZ", 10 | "msmt_cnt": 38, 11 | } 12 | pr = { 13 | "category_code": "MISC", 14 | "cc": "US", 15 | "domain": "*", 16 | "priority": -200, 17 | "url": "*", 18 | } 19 | assert prio.match_prio_rule(cz, pr) 20 | pr = { 21 | "category_code": "BOGUS", 22 | "cc": "US", 23 | "domain": "*", 24 | "priority": -200, 25 | "url": "*", 26 | } 27 | assert not prio.match_prio_rule(cz, pr) 28 | pr = { 29 | "category_code": "MISC", 30 | "cc": "US", 31 | "domain": "BOGUS", 32 | "priority": -200, 33 | "url": "*", 34 | } 35 | assert not prio.match_prio_rule(cz, pr) 36 | pr = { 37 | "category_code": "MISC", 38 | "cc": "US", 39 | "domain": "*", 40 | "priority": -200, 41 | "url": "BOGUS", 42 | } 43 | assert not prio.match_prio_rule(cz, pr) 44 | 45 | 46 | def test_prio_cc_1(): 47 | cz = {"cc": "ZZ"} 48 | pr = {"cc": "US"} 49 | for k in ["category_code", "domain", "url"]: 50 | cz[k] = pr[k] = "" 51 | assert prio.match_prio_rule(cz, pr) 52 | 53 | 54 | def test_prio_cc_2(): 55 | cz = {"cc": "US"} 56 | pr = {"cc": "US"} 57 | for k in ["category_code", "domain", "url"]: 58 | cz[k] = pr[k] = "" 59 | assert prio.match_prio_rule(cz, pr) 60 | 61 | 62 | def test_prio_cc_3(): 63 | cz = {"cc": "US"} 64 | pr = {"cc": "*"} 65 | for k in ["category_code", "domain", "url"]: 66 | cz[k] = pr[k] = "" 67 | assert prio.match_prio_rule(cz, pr) 68 | 69 | 70 | def test_prio_cc_4(): 71 | cz = {"cc": "US"} 72 | pr = {"cc": "IE"} 73 | for k in ["category_code", "domain", "url"]: 74 | cz[k] = pr[k] = "" 75 | assert not prio.match_prio_rule(cz, pr) 76 | 77 | 78 | def test_compute_priorities(): 79 | entries = [ 80 | { 81 | "category_code": "MISC", 82 | "domain": "thehiddenwiki.org", 83 | "url": "https://thehiddenwiki.org/", 84 | "cc": "ZZ", 85 | "msmt_cnt": 38, 86 | } 87 | ] 88 | prio_rules = [ 89 | {"category_code": "MISC", "cc": "*", "domain": "*", "priority": 20, "url": "*"}, 90 | { 91 | "category_code": "MISC", 92 | "cc": "US", 93 | "domain": "*", 94 | "priority": -200, 95 | "url": "*", 96 | }, 97 | ] 98 | out = prio.compute_priorities(entries, prio_rules) 99 | assert out == [ 100 | { 101 | "category_code": "MISC", 102 | "cc": "ZZ", 103 | "domain": "thehiddenwiki.org", 104 | "msmt_cnt": 38, 105 | "priority": -180, 106 | "url": "https://thehiddenwiki.org/", 107 | "weight": -4.7368421052631575, 108 | } 109 | ] 110 | -------------------------------------------------------------------------------- /newapi/tests/unit/test_unit.py: -------------------------------------------------------------------------------- 1 | from flask import url_for 2 | 3 | 4 | def test_api_list_reports_index(client): 5 | resp = client.get(url_for("/api/v1.measurements_api_list_files")) 6 | assert resp.status_code == 200 7 | assert isinstance(resp.json["results"], list) 8 | assert isinstance(resp.json["metadata"], dict) 9 | 10 | assert isinstance(resp.json["metadata"]["limit"], int) 11 | assert isinstance(resp.json["metadata"]["count"], int) 12 | assert isinstance(resp.json["metadata"]["pages"], int) 13 | assert isinstance(resp.json["metadata"]["offset"], int) 14 | assert isinstance(resp.json["metadata"]["current_page"], int) 15 | 16 | assert any( 17 | [ 18 | isinstance(resp.json["metadata"]["next_url"], str), 19 | resp.json["metadata"]["next_url"] is None, 20 | ] 21 | ) 22 | 23 | 24 | def test_api_list_reports_error(client): 25 | resp = client.get(url_for("/api/v1.measurements_api_list_files", order="INVALID")) 26 | assert resp.status_code == 400 27 | assert resp.json["title"] == "Bad Request" 28 | assert resp.json["detail"].startswith("'INVALID' is not one of") 29 | 30 | resp = client.get( 31 | url_for("/api/v1.measurements_api_list_files", order_by="INVALID") 32 | ) 33 | assert resp.status_code == 400 34 | assert resp.json["title"] == "Bad Request" 35 | assert resp.json["detail"].startswith("'INVALID' is not one of") 36 | 37 | 38 | def test_api_docs(client): 39 | resp = client.get(url_for("api_docs.api_docs")) 40 | assert resp.status_code == 200 41 | 42 | 43 | def test_pages_index(client): 44 | resp = client.get(url_for("pages.index")) 45 | assert resp.status_code == 200 46 | 47 | 48 | def test_pages_download_file_404(client): 49 | resp = client.get(url_for("pages.files_download", textname="/2019-01-01/DOES_NOT_EXIST")) 50 | assert resp.status_code == 404 51 | -------------------------------------------------------------------------------- /newapi/tests/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def jd(o): 5 | return json.dumps(o, indent=2, sort_keys=True) 6 | 7 | 8 | def fjd(o): 9 | # non-indented JSON dump 10 | return json.dumps(o, sort_keys=True) 11 | 12 | 13 | def privapi(client, subpath): 14 | response = client.get(f"/api/_/{subpath}") 15 | assert response.status_code == 200 16 | assert response.is_json 17 | return response.json 18 | 19 | 20 | def getjson(client, url): 21 | response = client.get(url) 22 | assert response.status_code == 200 23 | assert response.is_json 24 | return response.json 25 | -------------------------------------------------------------------------------- /newapi/tools/monitor_test_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Monitor test-list/urls 5 | """ 6 | 7 | from urllib.request import urlopen 8 | import json 9 | import time 10 | from argparse import ArgumentParser 11 | 12 | 13 | def stats(cat, data): 14 | avg = sum(data) / len(data) 15 | s = f"{cat} count: {len(data)} min: {min(data)} max: {max(data)} avg: {avg}" 16 | print(s) 17 | 18 | 19 | def main(): 20 | 21 | ap = ArgumentParser() 22 | ap.add_argument("--prod", action="store_true") 23 | ap.add_argument("--interval", default=60 * 5, type=int) 24 | ap.add_argument("--cc", default="GB") 25 | ap.add_argument("--probestop", default=50, type=int) 26 | args = ap.parse_args() 27 | 28 | if args.prod: 29 | hn = "api.ooni.io" 30 | else: 31 | hn = "ams-pg-test.ooni.org" 32 | 33 | listurl = f"https://{hn}/api/v1/test-list/urls?country_code={args.cc}" 34 | 35 | cnt = {} 36 | while True: 37 | print(f"--- {hn} {args.cc} ---") 38 | with urlopen(listurl) as p: 39 | data = p.read().decode() 40 | try: 41 | data = json.loads(data) 42 | except json.decoder.JSONDecodeError: 43 | print("--error--") 44 | print(data) 45 | print("----") 46 | time.sleep(args.interval) 47 | continue 48 | 49 | li = data["results"][:args.probestop] # simulate a probe 50 | for r in li: 51 | c = r["category_code"] 52 | url = r["url"] 53 | cnt.setdefault(c, {}).setdefault(url, 0) 54 | cnt[c][url] += 1 55 | 56 | for k in sorted(cnt): 57 | stats(k, cnt[k].values()) 58 | 59 | time.sleep(args.interval) 60 | 61 | 62 | main() 63 | -------------------------------------------------------------------------------- /rate_limit_quotas.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rate limiter and quota system. 3 | 4 | Framework-independent rate limiting mechanism that provides: 5 | * IP address and token-based accounting 6 | * customizable quotas based on IP address and token 7 | * late limiting based on resource usage (time spent on API calls) 8 | * bucketing based on day, week, month 9 | * statistics 10 | * metrics 11 | * fast in-memory storage 12 | 13 | Also provides a connector for Flask 14 | 15 | """ 16 | 17 | import time 18 | import ipaddress 19 | from typing import Dict, List, Optional, Tuple, Union 20 | 21 | IpAddress = Union[ipaddress.IPv4Address, ipaddress.IPv6Address] 22 | IpAddrBucket = Dict[IpAddress, float] 23 | IpAddrBuckets = Tuple[IpAddrBucket, IpAddrBucket, IpAddrBucket] 24 | TokenBucket = Dict[str, float] 25 | TokenBuckets = Tuple[TokenBucket, TokenBucket, TokenBucket] 26 | 27 | 28 | class Limiter: 29 | def __init__( 30 | self, 31 | limits: dict, 32 | token_check_callback=None, 33 | ipaddr_methods=["X-Real-Ip", "socket"], 34 | whitelisted_ipaddrs=Optional[List[str]], 35 | ): 36 | # Bucket sequence: month, week, day 37 | self._ipaddr_limits = [ 38 | limits.get(l, None) 39 | for l in ("ipaddr_per_month", "ipaddr_per_week", "ipaddr_per_day") 40 | ] 41 | self._token_limits = [ 42 | limits.get(l, None) 43 | for l in ("token_per_month", "token_per_week", "token_per_day") 44 | ] 45 | self._ipaddr_buckets = ({}, {}, {}) # type: IpAddrBuckets 46 | self._token_buckets = ({}, {}, {}) # type: TokenBuckets 47 | self._token_check_callback = token_check_callback 48 | self._ipaddr_extraction_methods = ipaddr_methods 49 | self._last_quota_update_time = time.monotonic() 50 | self._whitelisted_ipaddrs = set() 51 | for ipa in whitelisted_ipaddrs or []: 52 | self._whitelisted_ipaddrs.add(ipaddress.ip_address(ipa)) 53 | 54 | self.increment_quota_counters(1) 55 | self.refresh_quota_counters_if_needed() 56 | 57 | def increment_quota_counters(self, tdelta: int): 58 | """Delta: time from previous run in seconds""" 59 | if tdelta <= 0: 60 | return 61 | 62 | iterable = ( 63 | (30 * 24, self._ipaddr_limits[0], self._ipaddr_buckets[0]), 64 | (7 * 24, self._ipaddr_limits[0], self._ipaddr_buckets[0]), 65 | (1 * 24, self._ipaddr_limits[0], self._ipaddr_buckets[0]), 66 | (30 * 24, self._ipaddr_limits[0], self._ipaddr_buckets[0]), 67 | (7 * 24, self._ipaddr_limits[0], self._ipaddr_buckets[0]), 68 | (1 * 24, self._ipaddr_limits[0], self._ipaddr_buckets[0]), 69 | ) 70 | for hours, limit, bucket in iterable: 71 | vdelta = limit / hours / 3600 * tdelta 72 | to_delete = [] 73 | for k, v in bucket.items(): 74 | v += vdelta 75 | if v >= limit: 76 | to_delete.append(k) 77 | else: 78 | bucket[k] = v 79 | 80 | for k in to_delete: 81 | del bucket[k] 82 | 83 | def refresh_quota_counters_if_needed(self): 84 | t = time.monotonic() 85 | delta = t - self._last_quota_update_time 86 | if delta > 3600: 87 | self.increment_quota_counters(delta) 88 | 89 | self._last_quota_update_time = t 90 | 91 | def consume_quota(self, elapsed: float, ipaddr: Optional[IpAddress]=None, token=None) -> None: 92 | """Consume quota in seconds 93 | """ 94 | assert ipaddr or token 95 | if ipaddr: 96 | assert isinstance(ipaddr, ipaddress.IPv4Address) 97 | for n, limit in enumerate(self._ipaddr_limits): 98 | b = self._ipaddr_buckets[n] 99 | b[ipaddr] = b.get(ipaddr, limit) - elapsed 100 | 101 | else: 102 | raise NotImplementedError() 103 | 104 | def get_minimum_across_quotas(self, ipaddr=None, token=None) -> float: 105 | assert ipaddr or token 106 | if ipaddr: 107 | iterable = zip(self._ipaddr_limits, self._ipaddr_buckets) 108 | return min(bucket.get(ipaddr, limit) for limit, bucket in iterable) 109 | 110 | else: 111 | raise NotImplementedError() 112 | 113 | def is_quota_available(self, ipaddr=None, token=None) -> bool: 114 | """Check if all quota buckets for an ipaddr/token are > 0 115 | """ 116 | # return False if any bucket reached 0 117 | for bucket in self._ipaddr_buckets: 118 | if ipaddr in bucket: 119 | if bucket[ipaddr] <= 0: 120 | return False 121 | 122 | return True 123 | 124 | def is_ipaddr_whitelisted(self, ipaddr: IpAddress) -> bool: 125 | return ipaddr in self._whitelisted_ipaddrs 126 | 127 | def get_lowest_daily_quotas_summary(self, n=20) -> List[Tuple[int, float]]: 128 | """Returns a summary of daily quotas with the lowest values 129 | """ 130 | li = sorted((val, ipa) for ipa, val in self._ipaddr_buckets[2].items()) 131 | li = li[:n] 132 | return [(int(ipa.packed[0]), val) for val, ipa in li] 133 | 134 | 135 | # # Flask-specific code # # 136 | 137 | from flask import request, current_app 138 | import flask 139 | 140 | 141 | class FlaskLimiter: 142 | def _get_client_ipaddr(self) -> IpAddress: 143 | # https://github.com/alisaifee/flask-limiter/issues/41 144 | for m in self._limiter._ipaddr_extraction_methods: 145 | if m == "X-Forwarded-For": 146 | raise NotImplementedError("X-Forwarded-For ") 147 | 148 | elif m == "X-Real-Ip": 149 | ipaddr = request.headers.get("X-Real-Ip", None) 150 | if ipaddr: 151 | return ipaddress.ip_address(ipaddr) 152 | 153 | elif m == "socket": 154 | return ipaddress.ip_address(request.remote_addr) 155 | 156 | else: 157 | raise NotImplementedError(f"IP address method {m} is unknown") 158 | 159 | methods = ",".join(self._limiter._ipaddr_extraction_methods) 160 | raise Exception(f"Unable to detect IP address using {methods}") 161 | 162 | def _check_limits_callback(self): 163 | """Check rate limits before processing a request 164 | Refresh quota counters when needed 165 | """ 166 | self._limiter.refresh_quota_counters_if_needed() 167 | ipaddr = self._get_client_ipaddr() 168 | # token = request.headers.get("Token", None) 169 | # if token: 170 | # check token validity 171 | if not self._limiter.is_quota_available(ipaddr=ipaddr): 172 | flask.abort(429) 173 | self._request_start_time = time.monotonic() 174 | log = current_app.logger 175 | log.error("_check_limits_callback called") 176 | 177 | def _after_request_callback(self, response): 178 | """Consume quota and injects HTTP headers when responding to a request 179 | """ 180 | log = current_app.logger 181 | try: 182 | assert response 183 | tdelta = time.monotonic() - self._request_start_time 184 | ipaddr = self._get_client_ipaddr() 185 | if not self._limiter.is_ipaddr_whitelisted(ipaddr): 186 | self._limiter.consume_quota(tdelta, ipaddr=ipaddr) 187 | q = self._limiter.get_minimum_across_quotas(ipaddr=ipaddr) 188 | response.headers.add("X-RateLimit-Remaining", q) 189 | 190 | except Exception as e: 191 | log.error(str(e), exc_info=True) 192 | 193 | finally: 194 | return response 195 | 196 | def __init__( 197 | self, 198 | app, 199 | limits: dict, 200 | token_check_callback=None, 201 | ipaddr_methods=["X-Real-Ip", "socket"], 202 | whitelisted_ipaddrs=None, 203 | ): 204 | """ 205 | """ 206 | self._limiter = Limiter( 207 | limits, 208 | token_check_callback=token_check_callback, 209 | ipaddr_methods=ipaddr_methods, 210 | whitelisted_ipaddrs=whitelisted_ipaddrs, 211 | ) 212 | if app.extensions.get("limiter"): 213 | raise Exception("The Flask app already has an extension named 'limiter'") 214 | 215 | app.before_request(self._check_limits_callback) 216 | app.after_request(self._after_request_callback) 217 | app.extensions["limiter"] = self 218 | 219 | def get_lowest_daily_quotas_summary(self, n=20) -> List[Tuple[int, float]]: 220 | return self._limiter.get_lowest_daily_quotas_summary(n) 221 | -------------------------------------------------------------------------------- /refresh_deps: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -exu 3 | rm .tox/integ -rf 4 | tox -e integ -v --notest 5 | ./.tox/integ/bin/pip3 freeze | sort > /tmp/freeze 6 | sort requirements/main.txt | grep -v '^#.*$' | grep -v '^$' | sort > /tmp/main 7 | meld /tmp/freeze /tmp/main 8 | -------------------------------------------------------------------------------- /scripts/init_db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -exuo pipefail 3 | 4 | export POSTGRES_HOST=db 5 | export PGPASSWORD=$POSTGRES_PASSWORD 6 | 7 | tmpdir=$(mktemp -d) 8 | cd $tmpdir 9 | git clone --depth 1 https://github.com/ooni/pipeline.git 10 | 11 | echo "Create amsapi and readonly roles" 12 | psql -U $POSTGRES_USER -h $POSTGRES_HOST $POSTGRES_USER -c "CREATE ROLE amsapi;" 13 | psql -U $POSTGRES_USER -h $POSTGRES_HOST $POSTGRES_USER -c "CREATE ROLE readonly;" 14 | 15 | echo "Creating database tables using SQL files:" 16 | ls pipeline/af/oometa/*.install.sql 17 | cat pipeline/af/oometa/*.install.sql | psql -U $POSTGRES_USER -h $POSTGRES_HOST $POSTGRES_USER -v ON_ERROR_STOP=1 18 | -------------------------------------------------------------------------------- /scripts/restore-dump.sh: -------------------------------------------------------------------------------- 1 | lz4cat meta-closure.sql.lz4 | psql -U postgres -h localhost -p 5433 measurements -f sample-dump.sql 2 | 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | norecursedirs = build dist node_modules *.egg-info .state requirements 3 | markers = 4 | unit: Fast tests to be run regularly while developing 5 | functional: Slow tests that test the software end to end 6 | --------------------------------------------------------------------------------