├── .circleci └── config.yml ├── .dockerignore ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── openapi.yml ├── .gitignore ├── .proxy_coveragerc ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── app ├── __init__.py ├── adzerk │ ├── __init__.py │ ├── api.py │ ├── transform.py │ └── validation.py ├── client.py ├── conf │ ├── __init__.py │ ├── adzerk_cfg.py │ ├── domain_affinities_cfg.py │ ├── geolocation_cfg.py │ ├── s3_cfg.py │ ├── sentry_cfg.py │ └── spocs_cfg.py ├── config.py ├── exceptions │ ├── __init__.py │ ├── base_exception.py │ ├── invalid_content_type.py │ ├── invalid_param.py │ └── missing_param.py ├── geolocation │ ├── __init__.py │ └── factory.py ├── main.py ├── middleware │ ├── __init__.py │ └── proxy_headers.py ├── provider │ ├── __init__.py │ ├── geo_provider.py │ ├── sentry_provider.py │ └── session_provider.py ├── telemetry │ ├── __init__.py │ ├── handler.py │ └── requirements.txt └── validation.py ├── cloudformation ├── README.md ├── proxy-service.yaml └── vpc │ ├── README.md │ ├── nat.yaml │ ├── private-subnet.yaml │ ├── public-subnet.yaml │ ├── vpc.yaml │ └── vpc_parameters.json ├── conftest.py ├── docker-compose.yml ├── gunicorn.py ├── images ├── app │ └── Dockerfile ├── nginx │ ├── Dockerfile │ └── nginx.conf └── s3 │ └── download.sh ├── openapi └── openapi.yml └── tests ├── README.md ├── __init__.py ├── api ├── __init__.py └── test_api.py ├── fixtures ├── GeoIP2-City-Test.mmdb ├── __init__.py ├── mock_decision.py ├── mock_factory.py ├── mock_placements.py └── mock_spoc.py ├── load ├── script.yml └── serverless.yml ├── scripts ├── timer.sh └── wsgi_profiler_conf.py └── unit ├── __init__.py ├── test_adzerk_api.py ├── test_adzerk_transform.py ├── test_adzerk_validation.py ├── test_app.py ├── test_app_validation.py ├── test_geolocation_factory.py ├── test_geolocation_provider.py └── test_telemetry_handler.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details 4 | # 5 | # 6 | version: 2.1 7 | 8 | jobs: 9 | tests_unit: 10 | docker: 11 | - image: cimg/python:3.11.8 12 | auth: 13 | username: $DOCKERHUB_USERNAME 14 | password: $DOCKERHUB_PASSWORD 15 | working_directory: ~/proxy-server 16 | parameters: 17 | pipenv_path: 18 | default: "./" 19 | type: string 20 | environment: 21 | WORKON_HOME: '~/.venv' 22 | steps: 23 | - checkout 24 | 25 | - restore_cache: 26 | key: deps-test-{{ checksum "<>Pipfile.lock" }}-v2 27 | - run: 28 | name: Build environment and install requirements 29 | command: | 30 | pip install pipenv 31 | pipenv install --dev 32 | - save_cache: 33 | key: deps-test-{{ checksum "<>Pipfile.lock" }}-v2 34 | paths: 35 | - "~/.venv" 36 | 37 | - run: 38 | name: Run tests 39 | environment: 40 | APP_ENV: test 41 | command: | 42 | mkdir test-reports/ 43 | pipenv run pytest tests/unit/ --cov=. --cov-config=.proxy_coveragerc --junitxml=test-reports/junit.xml 44 | pipenv run coverage html --rcfile=.proxy_coveragerc 45 | 46 | # test reporting and pylint are still WIP 47 | - store_test_results: 48 | path: test-reports 49 | 50 | - store_artifacts: 51 | path: proxy-server-coverage 52 | 53 | 54 | # Workflow shortcuts 55 | workflows: 56 | version: 2 57 | build_and_test: 58 | jobs: 59 | - tests_unit: 60 | context: pocket-proxy 61 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Exclude all files starting with a dot 2 | **.* 3 | 4 | aws-cloudformation* 5 | deploy.sh 6 | docker-compose* 7 | *.md 8 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | #All Files 2 | * @pocket/Ads 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Expected Behavior 2 | 3 | ## Actual Behavior 4 | 5 | ## Steps to Reproduce the Problem 6 | 7 | 1. 8 | 1. 9 | 1. 10 | 11 | ## Specifications 12 | 13 | * Version: 14 | * Browser: 15 | * Operating System: 16 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Goal 2 | 3 | Insert purpose of pull request 4 | 5 | ## Todos: 6 | - [ ] Outstanding todo 7 | - [x] Completed todo 8 | 9 | ## Implementation Decisions 10 | 11 | 12 | ## All Submissions: 13 | 14 | - [ ] Have you followed the guidelines in our Contributing document? 15 | - [ ] Have you checked to ensure there aren't other open [Pull Requests](../pulls) for the same update/change? 16 | -------------------------------------------------------------------------------- /.github/workflows/openapi.yml: -------------------------------------------------------------------------------- 1 | name: OpenAPI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | 15 | - uses: actions/setup-node@v4 16 | with: 17 | node-version: latest 18 | 19 | - name: Lint 20 | run: npx @redocly/cli lint openapi/openapi.yml 21 | 22 | test: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v3 26 | 27 | - uses: actions/setup-python@v4 28 | with: 29 | python-version: "3.11" 30 | 31 | - name: Install dependencies 32 | run: | 33 | pip install pipenv 34 | pipenv install --dev 35 | 36 | - name: Test 37 | run: pipenv run pytest tests/api 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # pycharm 107 | .idea/ 108 | 109 | # Build 110 | tmp/ 111 | 112 | # Serverless Artillery 113 | /tests/load/.serverless/ 114 | /tests/load/.slsart 115 | /tests/load/*.js 116 | /tests/load/*.json 117 | /tests/load/node_modules 118 | 119 | # Environment variables 120 | .env 121 | -------------------------------------------------------------------------------- /.proxy_coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | 3 | [run] 4 | branch = True 5 | omit = 6 | # omit anything in base virtual env 7 | venv/* 8 | # leave out scripts not run as part of PFM service 9 | gunicorn.py 10 | telemetry/* 11 | tests/* 12 | [html] 13 | directory = proxy-server-coverage 14 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Community Participation Guidelines 2 | 3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines. 4 | For more details, please read the 5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 6 | 7 | ## How to Report 8 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | Thank you for checking out our Pocket Proxy Server work. We welcome contributions from everyone! By participating in this project, you agree to abide by the Mozilla [Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 4 | 5 | ### Asking questions / receiving updates 6 | 7 | * Slack channel (Mozilla staff only): #pkt-newtab 8 | 9 | * File issues/questions on Github: [https://github.com/Pocket/proxy-server/issues](https://github.com/Pocket/proxy-server/issues). We typically triage new issues every Monday. 10 | 11 | ### Finding Bugs & Filing Tickets 12 | 13 | If you've found a bug, or have a feature idea that you you'd like to see, follow these simple guidelines: 14 | 15 | * Pick a thoughtful and concise title for the issue (ie. *not* Thing Doesn't Work!) 16 | 17 | * Make sure to mention your browser version, OS and basic system parameters (eg. Chrome 62, Windows XP, 512KB RAM) 18 | 19 | * If you can reproduce the bug, give a step-by-step recipe 20 | 21 | * Include stack traces from the console(s) where appropriate 22 | 23 | * Screenshots and screen recordings welcome! 24 | 25 | * When in doubt, take a look at some existing issues and emulate 26 | 27 | ### Contributing Code 28 | 29 | If you are new to the repo, you might want to pay close attention to these tags, as they are typically a great way to get started: Good First Bug, Bug, Chore, and Polish. If you see a bug that is not yet assigned to anyone, start a conversation with an engineer in the ticket itself, expressing your interest in taking the bug. If you take the bug, someone will set the ticket to Assigned to Contributor, so we can be proactive about helping you succeed in fixing the bug. 30 | 31 | When you have some code written, you can open up a Pull Request, get your code reviewed, and see your code merged into the codebase. 32 | 33 | ### Setting up your development environment 34 | 35 | Please review the [README](https://github.com/Pocket/proxy-server/blob/main/README.md) for instructions on setting up your development environment, installing dependencies and building the extensions. 36 | 37 | ### Creating Pull Requests 38 | 39 | You have identified the bug, written code and now want to get it into the main repo using a [Pull Request](https://help.github.com/articles/about-pull-requests/). 40 | 41 | All code is added using a pull request against the main branch of our repo. Before submitting a PR, please go through this checklist: 42 | 43 | * All unit tests must pass (and if you haven't written a unit test, please do!) 44 | 45 | * Fill out the pull request template as outlined 46 | 47 | * Please add a PR / Needs Review tag to your PR (if you have permission). This starts the code review process. If you cannot add a tag, don't worry, we will add it during triage. 48 | 49 | * Make sure your PR will merge gracefully with main at the time you create the PR, and that your commit history is 'clean' 50 | 51 | ### Understanding Code Reviews 52 | 53 | You have created a PR and submitted it to the repo, and now are waiting patiently for you code review feedback. One of the projects module owners will be along and will either: 54 | 55 | * Make suggestions for some improvements 56 | 57 | * Give you a 👍 in the comments section, indicating the review is done and the code can be merged 58 | 59 | Typically, you will iterate on the PR, making changes and pushing your changes to new commits on the PR. When the reviewer is satisfied that your code is good-to-go, you will get the coveted R+ comment, and your code can be merged. If you have commit permission, you can go ahead and merge the code to main, otherwise, it will be done for you. 60 | 61 | Our project prides itself on it's respectful, patient and positive attitude when it comes to reviewing contributor's code, and as such, we expect contributors to be respectful, patient and positive in their communications as well. 62 | 63 | ### Writing Good Git Commit Messages 64 | 65 | We like this overview by Chris Beams on "[How to Write a Git Commit Message](https://chris.beams.io/posts/git-commit/)". 66 | 67 | The tl;dr is: 68 | 69 | 1. [Separate subject from body with a blank line](https://chris.beams.io/posts/git-commit/#separate) 70 | 71 | 2. [Limit the subject line to 50 characters](https://chris.beams.io/posts/git-commit/#limit-50) 72 | 73 | 3. [Capitalize the subject line](https://chris.beams.io/posts/git-commit/#capitalize) 74 | 75 | 4. [Do not end the subject line with a period](https://chris.beams.io/posts/git-commit/#end) 76 | 77 | 5. [Use a verb to start your subject line (Add, Remove, Fix, Update, Rework, Polish, etc.)](https://chris.beams.io/posts/git-commit/#imperative) 78 | 79 | 6. [Wrap the body at 72 characters](https://chris.beams.io/posts/git-commit/#wrap-72) 80 | 81 | 7. [Use the body to explain *what* and *why* vs. *how*](https://chris.beams.io/posts/git-commit/#why-not-how) 82 | 83 | ### Understanding How Pocket Triages 84 | 85 | The project team meets weekly (in a closed meeting, for the time being), to discuss project priorities, to triage new tickets, and to redistribute the work amongst team members. Any contributors tickets or PRs are carefully considered, prioritized, and if needed, assigned a reviewer. 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build 2 | build: 3 | docker compose build 4 | 5 | .PHONY: start 6 | start: 7 | docker compose up 8 | 9 | .PHONY: destroy 10 | destroy: 11 | docker compose down --rmi all --remove-orphans -v 12 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | certifi = ">=2023.7.22" 8 | boto3 = "~=1.28.85" 9 | requests = "~=2.31.0" 10 | geoip2 = "~=4.7.0" 11 | # we need to install gunicorn, uvicorn, uvloop, httptools, and fastapi 12 | # so it starts in the virtualenv instead of the one in the docker image 13 | gunicorn = "*" 14 | uvicorn = "*" 15 | fastapi = {extras = ["all"], version = "*"} 16 | uvloop = "*" 17 | httptools = "*" 18 | aiohttp = {extras = ["speedups"], version = "*"} 19 | schemathesis = "*" 20 | google-cloud-storage = "*" 21 | sentry-sdk = {extras = ["fastapi"], version = "*"} 22 | google-cloud-logging = "*" 23 | 24 | [dev-packages] 25 | responses = "*" 26 | pytest = "*" 27 | pytest-cov = "*" 28 | aioresponses = "*" 29 | 30 | [requires] 31 | python_version = "3.11" 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spocs Proxy Server 2 | This service sits between Firefox and [AdZerk](https://adzerk.com/). 3 | Its purpose is to preserve the privacy of Firefox clients when they request sponsored content (spocs) for the Firefox New Tab. 4 | See [Sponsored Stories FAQ](https://help.getpocket.com/article/1142-firefox-new-tab-recommendations#sponsoredstories) 5 | for more information. 6 | 7 | ## API 8 | 9 | See [OpenAPI documentation](https://app.swaggerhub.com/apis-docs/PocketNewTab/PocketProxyServer). 10 | 11 | ## Development environment 12 | The following steps create a Docker development environment to run this service locally. 13 | 14 | 1. Install [Docker](https://docs.docker.com/install/) and [Docker Compose](https://docs.docker.com/compose/install/). 15 | 2. In the project root run: `docker compose build`. 16 | 3. Start a mock s3 service: `docker compose up s3`. 17 | 4. Sign up for an account on the [MaxMind](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data?lang=en#accessing-geolite2-free-geolocation-data) website. 18 | 5. Navigate to the "Download Files" page in your account, download the GeoLite2 City database and copy it to `pocket-geoip/GeoLite2-City.mmdb` on the mock s3 container. 19 | 1. If the database is stored on s3: 20 | ``` 21 | images/s3/download.sh -b 22 | ``` 23 | 2. If it's stored on disk in a file called `GeoLite2-City.mmdb`: 24 | ``` 25 | aws --endpoint-url http://localhost:4569 s3 cp GeoLite2-City.mmdb s3://pocket-geoip/GeoLite2-City.mmdb 26 | ``` 27 | 5. Verify that GeoIP2 is available at [localhost:4569/pocket-geoip/GeoLite2-City.mmdb](http://localhost:4569/pocket-geoip/GeoLite2-City.mmdb). 28 | 6. Create a `.env` file in the project root directory with the following content, replacing `` with the respective secret values: 29 | ``` 30 | ADZERK_API_KEY= 31 | ``` 32 | 7. Start the application containers: `docker compose up`. 33 | 8. Test that the application is running: http://localhost/pulse. It should return `{"pulse":"ok"}`. 34 | 35 | ## Tests 36 | See the [Test README](tests/README.md). 37 | 38 | ## Deployment 39 | 40 | The first time the service is deployed, follow the steps in the [CloudFormation README](cloudformation/README.md). 41 | 42 | For subsequent deployments: 43 | 1. Merge a PR into the main branch. 44 | 2. Wait for the new Docker image to be built and uploaded to ECR. 45 | 3. Open Fargate in the AWS console and update the service, forcing a new deployment 46 | Or, you can increase the number of tasks. Since the task is using the `latest` tag 47 | they should pull in your changes without forcing an update to the task. 48 | 49 | # Telemetry Function 50 | The [Telemetry Handler](app/telemetry/handler.py) is triggered by telemetry from the Firefox discovery stream. It anonymously pings AdZerk to keep track of events related to sponsored content, such as clicks and impressions, in a privacy-preserving way. The event code (or "shim") does not contain any personally identifiable data; we never share personal data with AdZerk. 51 | 52 | ## Deployment 53 | 54 | 1. Open telemetry-proxy in the [Google Cloud Console](https://console.cloud.google.com) 55 | 2. Click Edit and paste the new code into the `main.py` file (*do not* be confused by the fact the contents of the function are stored in a file named `handler.py` in this repository and *do not* rename the file name on Google Cloud). 56 | 3. Click Deploy 57 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/app/__init__.py -------------------------------------------------------------------------------- /app/adzerk/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['api', 'transform'] 2 | -------------------------------------------------------------------------------- /app/adzerk/api.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import requests 4 | from aiohttp import ClientSession 5 | from copy import deepcopy 6 | 7 | from app.adzerk import validation 8 | from app import conf 9 | 10 | 11 | class AdZerkException(Exception): 12 | pass 13 | 14 | 15 | class Api: 16 | 17 | def __init__(self, pocket_id, country=None, region=None, site=None, placements=None, api_key: str = None): 18 | self.pocket_id = pocket_id 19 | self.country = country 20 | self.region = region 21 | self.site = site 22 | self.placements = placements 23 | self.api_key = api_key 24 | 25 | async def get_decisions(self, session: ClientSession): 26 | """ 27 | Calls Adzerk API with request body 28 | :return: A map of decisions, previously 29 | a list of decisions for one div/placement. 30 | """ 31 | async with session.post(conf.adzerk['decision']['url'], json=self.get_decision_body()) as r: 32 | if r.status == 400: 33 | text = await r.text() 34 | # This occurs when there is no site with the requested id from adzerk. 35 | # So instead we send back no results but log an error 36 | logging.error(text) 37 | return dict() 38 | response = await r.json() 39 | 40 | decisions = response['decisions'] 41 | if not decisions or len(decisions) == 0: 42 | return dict() 43 | for _, dec in decisions.items(): 44 | if dec: 45 | map(validation.validate_decision, dec) 46 | return decisions 47 | 48 | def get_decision_body(self): 49 | body = deepcopy(conf.adzerk['decision']['body']) 50 | self.__add_targeting(body) 51 | self.__add_placements(body) 52 | self.__add_site(body) 53 | logging.debug(body) 54 | return body 55 | 56 | def __add_targeting(self, body): 57 | if self.pocket_id is not None: 58 | body['user'] = {'key': self.pocket_id} 59 | keywords = [] 60 | if self.country: 61 | keywords.append(self.country) 62 | if self.region: 63 | keywords.append(self.country + '-' + self.region) 64 | if keywords: 65 | body['keywords'] = keywords 66 | 67 | def __add_site(self, body): 68 | if self.site is not None: 69 | for placement in body['placements']: 70 | placement['siteId'] = self.site 71 | 72 | def __add_placements(self, body): 73 | # if placement exists, we need to replace default values with placements from client 74 | if self.placements and len(self.placements) > 0: 75 | default_placement = body['placements'].pop(0) # remove default 76 | for place in self.placements: 77 | copy_place = deepcopy(default_placement) 78 | if 'ad_types' in place: 79 | copy_place['adTypes'] = place['ad_types'] 80 | if 'zone_ids' in place: 81 | copy_place['zoneIds'] = place['zone_ids'] 82 | copy_place['divName'] = place['name'] 83 | body['placements'].append(copy_place) 84 | 85 | def delete_user(self): 86 | response = requests.delete( 87 | url=conf.adzerk['forget_endpoint'], 88 | params={'userKey': self.pocket_id}, 89 | headers={'X-Adzerk-ApiKey': self.api_key}, 90 | timeout=30 91 | ) 92 | response.raise_for_status() 93 | return response 94 | -------------------------------------------------------------------------------- /app/adzerk/transform.py: -------------------------------------------------------------------------------- 1 | from urllib import parse 2 | import json 3 | import re 4 | import logging 5 | import distutils.util 6 | from app import conf 7 | 8 | DEFAULT_PRIORITY = 100 9 | 10 | 11 | def to_spoc(decision): 12 | if not decision: 13 | return {} 14 | custom_data = decision['contents'][0]['data'] 15 | body = decision['contents'][0].get('body') 16 | if body: 17 | body = json.loads(body) 18 | 19 | events_map = {e["id"]: tracking_url_to_shim(e["url"]) for e in decision["events"]} 20 | priority_map = conf.adzerk['priority_id_to_weight'] 21 | 22 | spoc = { 23 | 'id': decision['adId'], 24 | 'flight_id': decision['flightId'], 25 | 'campaign_id': decision['campaignId'], 26 | 'title': custom_data['ctTitle'], 27 | 'url': custom_data['ctUrl'], 28 | 'domain': custom_data['ctDomain'], 29 | 'excerpt': custom_data['ctExcerpt'], 30 | 'priority': priority_map.get(decision.get('priorityId'), DEFAULT_PRIORITY), 31 | 'context': __get_context(custom_data.get('ctSponsor')), 32 | 'raw_image_src': custom_data['ctFullimagepath'], 33 | 'image_src': __get_cdn_image(custom_data['ctFullimagepath']), 34 | 'shim': { 35 | 'click': tracking_url_to_shim(decision['clickUrl']), 36 | 'impression': tracking_url_to_shim(decision['impressionUrl']), 37 | 'delete': events_map[17], 38 | 'save': events_map[20], 39 | }, 40 | 'parameter_set': 'default', 41 | 'caps': conf.spocs['caps'], 42 | 'domain_affinities': __get_domain_affinities(custom_data.get('ctDomain_affinities')), 43 | 'personalization_models': get_personalization_models(body), 44 | } 45 | 46 | optional_fields = { 47 | 'ctCta': 'cta', 48 | 'ctCollectionTitle': 'collection_title', 49 | 'ctSponsor': 'sponsor', 50 | 'ctSponsoredByOverride': 'sponsored_by_override', 51 | } 52 | for adzerk_key, spoc_key in optional_fields.items(): 53 | if adzerk_key in custom_data and custom_data[adzerk_key]: 54 | spoc[spoc_key] = custom_data[adzerk_key] 55 | 56 | if 'sponsored_by_override' in spoc: 57 | spoc['sponsored_by_override'] = __clean_sponsored_by_override(spoc['sponsored_by_override']) 58 | 59 | spoc['min_score'] = float(custom_data.get('ctMin_score', 0.1)) 60 | spoc['item_score'] = float(custom_data.get('ctItem_score', 0.2)) 61 | 62 | try: 63 | spoc['is_video'] = bool(distutils.util.strtobool(custom_data['ctIsVideo'].strip())) 64 | except (KeyError, ValueError): 65 | # Don't set is_video if ctIsVideo is not present or not a boolean (e.g. an empty string) 66 | pass 67 | 68 | return spoc 69 | 70 | 71 | def tracking_url_to_shim(url): 72 | components = parse.urlsplit(url) 73 | 74 | path_id = conf.adzerk['telemetry_endpoint_ids'].get(components.path) 75 | if path_id is None: 76 | raise Exception('Not a known telemetry path: {0}'.format(components.path)) 77 | 78 | params = parse.parse_qs(components.query) 79 | e = params['e'][0] 80 | s = params['s'][0] 81 | return ','.join([path_id,e,s]) 82 | 83 | 84 | def is_collection(spocs): 85 | """ 86 | :param spocs: A list of spocs 87 | :return: True if the list of spocs is a sponsored collection; spocs that should be featured together. 88 | """ 89 | return all(spoc.get('collection_title') for spoc in spocs) 90 | 91 | 92 | def to_collection(spocs): 93 | """ 94 | Transforms a list of spocs to a sponsored collection dictionary. 95 | AdZerk does not support fields for a collection. We set them on all creatives and get them from an arbitrary one. 96 | :param spocs: A list of spocs 97 | :return: A dictionary with collection fields (title, flight_id, and sponsor) and a list of spocs. 98 | """ 99 | collection = { 100 | 'title': spocs[0]['collection_title'], 101 | 'flight_id': spocs[0]['flight_id'], 102 | 'sponsor': spocs[0]['sponsor'], 103 | 'context': __get_context(spocs[0]['sponsor']), 104 | } 105 | 106 | for spoc in spocs: 107 | del spoc['collection_title'] 108 | 109 | collection['items'] = spocs 110 | return collection 111 | 112 | 113 | def get_personalization_models(body): 114 | if body is None: 115 | return {} 116 | else: 117 | # Topics in AdZerk prefixed with topic_ correspond with models in Firefox prefixed with nb_model_. 118 | p = re.compile('^topic_') 119 | return {k: 1 for k in [p.sub('', t) for t, v in body.items() if p.match(t) and v in ('true', True)]} 120 | 121 | 122 | 123 | def __get_cdn_image(raw_image_url): 124 | escaped = parse.quote(raw_image_url) 125 | return 'https://img-getpocket.cdn.mozilla.net/direct?url={0}&resize=w618-h310'.format(escaped) 126 | 127 | 128 | def __get_context(sponsor): 129 | return 'Sponsored by {0}'.format(sponsor) if sponsor else '' 130 | 131 | 132 | def __get_domain_affinities(name): 133 | if name is None: 134 | return {} 135 | else: 136 | return conf.domain_affinities.get(str(name).lower(), dict()) 137 | 138 | 139 | def __clean_sponsored_by_override(sponsored_by_override): 140 | """ 141 | Return an empty string for 'sponsored_by_override' if the value in AdZerk is set to "blank" or "empty". 142 | @type sponsored_by_override: str 143 | """ 144 | return re.sub(r'^(blank|empty)$', '', sponsored_by_override.strip(), flags=re.IGNORECASE) 145 | -------------------------------------------------------------------------------- /app/adzerk/validation.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def validate_decision(decision): 5 | validate_image_url(decision['contents'][0]['data']['ctFullimagepath']) 6 | return True 7 | 8 | 9 | def validate_image_url(url): 10 | if re.match(r'https://(\w+\.)?zkcdn\.net/', url): 11 | return True 12 | else: 13 | raise Exception("Invalid AdZerk image url: {0}".format(url)) 14 | -------------------------------------------------------------------------------- /app/client.py: -------------------------------------------------------------------------------- 1 | from app import conf 2 | from app.adzerk.api import Api as AdZerk 3 | import app.adzerk.transform 4 | import logging 5 | from aiohttp import ClientSession 6 | 7 | 8 | class Client: 9 | 10 | def __init__(self, version, consumer_key, pocket_id, ip, geolocation_provider, 11 | site=None, placements=None, country=None, region=None): 12 | """ 13 | :param version: API version to provide backwards-compatibility to older clients 14 | :param consumer_key: Identifies the consumer (e.g. Firefox) 15 | :param pocket_id: Anonymous Pocket user id 16 | :param ip: Client IP-address 17 | :param geolocation_provider: GeoIP2 database that converts an IP to a geolocation. 18 | :param site: Override the site. Leave None to use the default Firefox Production site. 19 | :param placements: Override the default placements. Leave None to get spocs. 20 | :param country: Set the country for debugging purposes. Leave None for IP-based geolocation. 21 | :param region: Set the region for debugging purposes. Leave None for IP-based geolocation. 22 | """ 23 | self.version = int(version) 24 | self.consumer_key = consumer_key 25 | self.pocket_id = pocket_id 26 | self.ip = ip 27 | self.country = country 28 | self.region = region 29 | if not geolocation_provider: 30 | logging.error('Need geolocation object') 31 | else: 32 | self.geolocation = geolocation_provider 33 | self.site = site 34 | self.placements = placements 35 | 36 | async def get_spocs(self, session: ClientSession): 37 | targeting = {"site": self.site} # setting site here by default so it's picked up by API 38 | 39 | try: 40 | geo = self.geolocation.get_city(self.ip) 41 | targeting['country'] = self.geolocation.get_country(geo) 42 | targeting['region'] = self.geolocation.get_region(geo) 43 | except Exception as e: 44 | pass 45 | 46 | if self.country: 47 | targeting['country'] = self.country 48 | if self.region: 49 | targeting['region'] = self.region 50 | 51 | if self.pocket_id: 52 | targeting['pocket_id'] = self.pocket_id 53 | else: 54 | logging.warning("Could not target based on pocket_id because it's missing.") 55 | 56 | targeting['placements'] = self.placements 57 | 58 | adzerk_api = AdZerk(**targeting) 59 | decisions = await adzerk_api.get_decisions(session) 60 | 61 | response = { 62 | 'settings': app.conf.spocs['settings'], 63 | } 64 | self.__transform_spocs(response, decisions) 65 | 66 | if conf.env in ('development', 'staging'): 67 | response['__debug__'] = adzerk_api.get_decision_body() 68 | 69 | return response 70 | 71 | def __transform_spocs(self, response, spocs_raw): 72 | # spocs is a dict from multiple decisions 73 | # so we add its elements to final response directly 74 | for div, spocs in spocs_raw.items(): 75 | if spocs: 76 | transformed_spocs = [app.adzerk.transform.to_spoc(s) for s in spocs] 77 | if self.version >= 2 and app.adzerk.transform.is_collection(transformed_spocs): 78 | response[div] = app.adzerk.transform.to_collection(transformed_spocs) 79 | else: 80 | response[div] = transformed_spocs 81 | else: 82 | response[div] = [] 83 | -------------------------------------------------------------------------------- /app/conf/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | from app.conf import ( 5 | s3_cfg, 6 | geolocation_cfg, 7 | sentry_cfg, 8 | spocs_cfg, 9 | adzerk_cfg, 10 | domain_affinities_cfg, 11 | ) 12 | 13 | env = os.environ.get('APP_ENV') or 'development' 14 | release = os.environ.get('GIT_SHA') or 'local' 15 | logging.info("APP_ENV = " + env) 16 | 17 | s3 = getattr(s3_cfg, env) 18 | geolocation = getattr(geolocation_cfg, env) 19 | sentry = getattr(sentry_cfg, env) 20 | spocs = getattr(spocs_cfg, env) 21 | adzerk = getattr(adzerk_cfg, env) 22 | domain_affinities = getattr(domain_affinities_cfg, env) 23 | -------------------------------------------------------------------------------- /app/conf/adzerk_cfg.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from app.telemetry.handler import TELEMETRY_PATH_IDS 4 | 5 | network_id = os.environ.get("ADZERK_NETWORK_ID", 10250) 6 | div_name = "spocs" 7 | domain = "https://e-{0}.adzerk.net".format(str(network_id)) 8 | 9 | production = staging = development = test = { 10 | "network_id": network_id, 11 | "div_name": div_name, 12 | "telemetry_endpoint_ids": TELEMETRY_PATH_IDS, 13 | "forget_endpoint": "{0}/udb/{1}/".format(domain, str(network_id)), 14 | "decision": { 15 | "url": "{0}/api/v2".format(domain), 16 | "body": { 17 | "placements": [ 18 | { 19 | "divName": div_name, 20 | "networkId": network_id, 21 | "siteId": 1070098, 22 | "adTypes": [2401, 3617], 23 | "zoneIds": [217995], 24 | "count": 10, 25 | "eventIds": [17, 20], 26 | } 27 | ] 28 | }, 29 | }, 30 | # Default priory_id to weight mapping, used during task startup before they are fetched from AdZerk. 31 | "priority_id_to_weight": { 32 | 147517: 1, 33 | 180843: 2, 34 | 147518: 3, 35 | 160722: 9, 36 | 147520: 10, 37 | }, 38 | } 39 | -------------------------------------------------------------------------------- /app/conf/geolocation_cfg.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | production = staging = development = test = { 4 | 's3_bucket': os.environ.get('GEOIP_S3_BUCKET', 'GEOIP_S3_BUCKET'), 5 | 's3_key': 'GeoLite2-City.mmdb', 6 | } 7 | -------------------------------------------------------------------------------- /app/conf/s3_cfg.py: -------------------------------------------------------------------------------- 1 | development = test = { 2 | 'session': {}, 3 | 'client': { 4 | 'endpoint_url': 'http://s3:4569', 5 | 'region_name': '', 6 | 'aws_access_key_id': '', 7 | 'aws_secret_access_key': '', 8 | }, 9 | } 10 | 11 | staging = production = { 12 | 'session': {}, 13 | 'client': {}, 14 | } 15 | -------------------------------------------------------------------------------- /app/conf/sentry_cfg.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | dsn = os.environ.get('SENTRY_DSN', '') 4 | sample_rate_traces = os.environ.get('SENTRY_TRACES_SAMPLE_RATE', 0.0) 5 | sample_rate_profiles = os.environ.get('SENTRY_PROFILE_SAMPLE_RATE', 0.0) 6 | 7 | production = staging = development = test = { 8 | 'dsn': dsn, 9 | 'traces_sample_rate': sample_rate_traces, 10 | 'profiles_sample_rate': sample_rate_profiles, 11 | } 12 | -------------------------------------------------------------------------------- /app/conf/spocs_cfg.py: -------------------------------------------------------------------------------- 1 | production = staging = development = test = { 2 | 'caps': { 3 | 'lifetime': 50, 4 | 'campaign': { 5 | 'count': 10, 6 | 'period': 86400, 7 | }, 8 | 'flight': { 9 | 'count': 10, 10 | 'period': 86400, 11 | }, 12 | }, 13 | 'settings': { 14 | "feature_flags": { 15 | "spoc_v2": True, 16 | # 'Collections' are stories that are run together, currently only used occasionally in Firefox. 17 | # If collections is True, the client can include a collection placement in the request. 18 | # If collections is False, the client will not include a collection placement, to reduce ad decisions. 19 | "collections": False, 20 | }, 21 | "spocsPerNewTabs": 1, 22 | "domainAffinityParameterSets": { 23 | "default": { 24 | "recencyFactor": 0.5, 25 | "frequencyFactor": 0.5, 26 | "combinedDomainFactor": 0.5, 27 | "perfectFrequencyVisits": 10, 28 | "perfectCombinedDomainScore": 2, 29 | "multiDomainBoost": 0, 30 | "itemScoreFactor": 1 31 | }, 32 | "fully-personalized": { 33 | "recencyFactor": 0.5, 34 | "frequencyFactor": 0.5, 35 | "combinedDomainFactor": 0.5, 36 | "perfectFrequencyVisits": 10, 37 | "perfectCombinedDomainScore": 2, 38 | "itemScoreFactor": 0.01, 39 | "multiDomainBoost": 0 40 | }, 41 | "fully-personalized-domains": { 42 | "recencyFactor": 0.5, 43 | "frequencyFactor": 0.5, 44 | "combinedDomainFactor": 0.5, 45 | "perfectFrequencyVisits": 1, 46 | "perfectCombinedDomainScore": 10, 47 | "itemScoreFactor": 0.01, 48 | "multiDomainBoost": 0 49 | } 50 | }, 51 | "timeSegments": [ 52 | { 53 | "id": "week-1", 54 | "startTime": 432000, 55 | "endTime": 0, 56 | "weightPosition": 1 57 | }, 58 | { 59 | "id": "week-2", 60 | "startTime": 864000, 61 | "endTime": 432000, 62 | "weightPosition": 1 63 | }, 64 | { 65 | "id": "week-3", 66 | "startTime": 1296000, 67 | "endTime": 864000, 68 | "weightPosition": 1 69 | }, 70 | { 71 | "id": "week-4", 72 | "startTime": 1728000, 73 | "endTime": 1296000, 74 | "weightPosition": 1 75 | }, 76 | { 77 | "id": "week-5", 78 | "startTime": 2160000, 79 | "endTime": 1728000, 80 | "weightPosition": 1 81 | }, 82 | { 83 | "id": "week-6", 84 | "startTime": 2592000, 85 | "endTime": 2160000, 86 | "weightPosition": 1 87 | } 88 | ] 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | from pydantic_settings import BaseSettings 2 | 3 | 4 | class Settings(BaseSettings): 5 | # gunicorn 6 | gunicorn_bind: str = "0.0.0.0:8000" 7 | gunicorn_worker_class: str = "uvicorn.workers.UvicornWorker" 8 | gunicorn_workers: int = 4 9 | -------------------------------------------------------------------------------- /app/exceptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/app/exceptions/__init__.py -------------------------------------------------------------------------------- /app/exceptions/base_exception.py: -------------------------------------------------------------------------------- 1 | class BaseException(Exception): 2 | status_code = 400 3 | 4 | def __init__(self, message, status_code=None, payload=None): 5 | Exception.__init__(self) 6 | self.message = message 7 | if status_code is not None: 8 | self.status_code = status_code 9 | self.payload = payload 10 | 11 | def to_dict(self): 12 | rv = dict(self.payload or ()) 13 | rv['error_message'] = self.message 14 | return rv 15 | -------------------------------------------------------------------------------- /app/exceptions/invalid_content_type.py: -------------------------------------------------------------------------------- 1 | from app.exceptions.base_exception import BaseException 2 | 3 | 4 | class InvalidContentType(BaseException): 5 | status_code = 400 6 | -------------------------------------------------------------------------------- /app/exceptions/invalid_param.py: -------------------------------------------------------------------------------- 1 | from app.exceptions.base_exception import BaseException 2 | 3 | 4 | class InvalidParam(BaseException): 5 | status_code = 400 6 | -------------------------------------------------------------------------------- /app/exceptions/missing_param.py: -------------------------------------------------------------------------------- 1 | from app.exceptions.base_exception import BaseException 2 | 3 | 4 | class MissingParam(BaseException): 5 | status_code = 400 6 | -------------------------------------------------------------------------------- /app/geolocation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/app/geolocation/__init__.py -------------------------------------------------------------------------------- /app/geolocation/factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import boto3 5 | import geoip2.database 6 | from google.cloud import storage 7 | 8 | from app import conf 9 | from app.conf import geolocation 10 | 11 | 12 | class Factory: 13 | def __init__(self): 14 | if os.environ.get("GEOIP_GCS_BUCKET", None): 15 | self.storage_provider = "GCS" 16 | self.storage_client = storage.Client() 17 | 18 | else: 19 | self.storage_provider = "S3" 20 | self.storage_client = boto3.session.Session(**conf.s3["session"]).client( 21 | "s3", **conf.s3["client"] 22 | ) 23 | 24 | def get_instance(self): 25 | """ 26 | :return: geoip2.database.Reader 27 | """ 28 | with tempfile.TemporaryFile() as fp: 29 | if self.storage_provider == "GCS": 30 | bucket = self.storage_client.bucket(os.environ.get("GEOIP_GCS_BUCKET")) 31 | blob = bucket.blob("GeoIP2-City.mmdb") 32 | 33 | blob.download_to_file(fp) 34 | 35 | else: 36 | self.storage_client.download_fileobj( 37 | geolocation["s3_bucket"], geolocation["s3_key"], fp 38 | ) 39 | 40 | fp.seek(0) 41 | 42 | return geoip2.database.Reader(fp, mode=geoip2.database.MODE_FD) 43 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | from contextlib import asynccontextmanager 2 | from json.decoder import JSONDecodeError 3 | from os import environ 4 | from typing import Dict 5 | 6 | import uvicorn 7 | from starlette.responses import JSONResponse 8 | from starlette.requests import ClientDisconnect 9 | from starlette.status import HTTP_204_NO_CONTENT 10 | from fastapi import FastAPI, Request, Response 11 | 12 | from app.adzerk.api import Api as AdZerk 13 | from app.client import Client 14 | from app.exceptions.base_exception import BaseException 15 | from app.exceptions.missing_param import MissingParam 16 | from app.exceptions.invalid_content_type import InvalidContentType 17 | from app.exceptions.invalid_param import InvalidParam 18 | from app.validation import is_valid_pocket_id 19 | from app.provider.geo_provider import GeolocationProvider 20 | from app.provider.session_provider import SessionProvider 21 | from app.provider.sentry_provider import sentry_init 22 | from app.middleware.proxy_headers import ProxyHeadersMiddleware 23 | 24 | 25 | @asynccontextmanager 26 | async def lifespan(_: FastAPI) -> None: 27 | # Initialize logging to sentry 28 | sentry_init() 29 | 30 | # Initialize singleton aiohttp client session (called for side effect) 31 | SessionProvider.session() 32 | 33 | yield 34 | 35 | await SessionProvider.shutdown() 36 | 37 | 38 | app = FastAPI(lifespan=lifespan) 39 | 40 | # Trust the X-Forwarded-For using a middleware. See the middle ware for more info. 41 | app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*") 42 | 43 | provider = GeolocationProvider() 44 | 45 | 46 | @app.post('/spocs') 47 | async def get_spocs(request: Request): 48 | try: 49 | required_params = set(['version', 'consumer_key', 'pocket_id']) 50 | optional_params = set(['site', 'placements', 'country', 'region']) 51 | req_params = await __get_request_params(request) 52 | if request.client is not None: 53 | client_host = request.client.host 54 | else: 55 | client_host = "" 56 | return await call(client_host, req_params, required_params, optional_params=optional_params) 57 | except ClientDisconnect: 58 | pass 59 | 60 | 61 | @app.delete('/user') 62 | async def delete_user(json_data: Dict): 63 | pocket_id = json_data['pocket_id'] 64 | adzerk_api = AdZerk(pocket_id=pocket_id, api_key=environ.get('ADZERK_API_KEY')) 65 | response = adzerk_api.delete_user() 66 | 67 | return {'status': int(response.status_code == 200)}, response.status_code 68 | 69 | 70 | @app.get('/pulse') 71 | async def pulse(): 72 | return {'pulse': 'ok'} 73 | 74 | 75 | @app.get('/health') 76 | async def health(): 77 | return {'health': 'ok'} 78 | 79 | 80 | @app.exception_handler(MissingParam) 81 | @app.exception_handler(InvalidParam) 82 | @app.exception_handler(InvalidContentType) 83 | def handle_invalid_usage(request: Request, error: BaseException): 84 | response = error.to_dict() 85 | response['status_code'] = error.status_code 86 | return JSONResponse(status_code=error.status_code, content=response) 87 | 88 | 89 | async def call(client_ip, req_params, required_params, optional_params=None): 90 | # first validate required parameters 91 | params = {k: v for k, v in req_params.items() if k in required_params} 92 | __validate_required_params(required_params, params) 93 | 94 | # then identify unknown parameters 95 | all_params = set([k for k in req_params.keys()]) 96 | unknown_params = all_params - (optional_params | required_params) # given params minus union of allowed 97 | if len(unknown_params) > 0: 98 | raise InvalidParam('Unrecognized parameters: {0}'.format(unknown_params)) 99 | 100 | # finally add optional parameters to required parameters 101 | other_params = {k: v for k, v in req_params.items() if k in optional_params} 102 | 103 | # do some additional checking for placements 104 | if 'placements' in other_params: 105 | __validate_placements(other_params['placements']) 106 | params.update(other_params) 107 | 108 | # validate that the version param is a valid int 109 | if 'version' in params: 110 | try: 111 | int(params['version']) 112 | except ValueError: 113 | raise InvalidParam('Invalid version') 114 | 115 | client = Client(ip=client_ip, geolocation_provider=provider, **params) 116 | session = SessionProvider.session() 117 | 118 | return await client.get_spocs(session) 119 | 120 | 121 | def __validate_required_params(required, params): 122 | missing = required - params.keys() 123 | if missing: 124 | raise MissingParam('Missing required argument(s): {0}'.format(', '.join(missing))) 125 | 126 | if not is_valid_pocket_id(params['pocket_id']): 127 | raise InvalidParam('Invalid pocket_id') 128 | 129 | 130 | def __validate_placements(placements): 131 | if not placements or len(placements) == 0: 132 | return 133 | required_params = ['name'] 134 | optional_params = ['zone_ids', 'ad_types', 'count'] 135 | list_params = ['ad_types', 'zone_ids'] 136 | for p in placements: 137 | __validate_single_placement(p, required_params, optional_params, list_params) 138 | 139 | 140 | def __validate_single_placement(placement, required, optional, list_params): 141 | try: 142 | for r in required: 143 | if r not in placement: 144 | raise MissingParam('Missing required parameter {0} in placement field'.format(r)) 145 | for f in placement.keys(): 146 | if f not in required and f not in optional: 147 | raise InvalidParam('{0} is an unknown placement parameter'.format(f)) 148 | for l in list_params: 149 | if l in placement and type(placement[l]) is not list: 150 | raise InvalidParam('{0} must be a list of values in placement field'.format(l)) 151 | except AttributeError: 152 | raise InvalidParam('Invalid placements') 153 | 154 | async def __get_request_params(request: Request): 155 | """ 156 | Copies request params into a mutable dictionary 157 | so that we can put in a default value for site if not present. 158 | Default value is None so that we can grab it from 159 | hardcoded conf. 160 | :return: 161 | """ 162 | try: 163 | json = await request.json() 164 | except JSONDecodeError: 165 | raise InvalidContentType('Expecting application/json body') 166 | except UnicodeDecodeError: 167 | raise InvalidContentType('Expecting application/json body') 168 | 169 | req_params = dict() 170 | 171 | try: 172 | for k, v in json.items(): 173 | req_params.update({k: v}) 174 | except AttributeError: 175 | raise InvalidContentType('Expcting application/json body') 176 | 177 | for k, v in request.query_params.items(): 178 | if k in ('site', 'country', 'region'): 179 | req_params.update({k: v}) 180 | if 'site' not in req_params: 181 | req_params.update({'site': None}) 182 | if 'placements' not in req_params: 183 | req_params.update({'placements': None}) 184 | 185 | return req_params 186 | 187 | 188 | if __name__ == "__main__": 189 | # This runs uvicorn in a local development environment. 190 | uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True) 191 | -------------------------------------------------------------------------------- /app/middleware/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/app/middleware/__init__.py -------------------------------------------------------------------------------- /app/middleware/proxy_headers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copy and paste from https://github.com/encode/uvicorn/blob/master/uvicorn/middleware/proxy_headers.py 3 | 4 | There is a weird implementation of this in the main library. 5 | We really want the first ip in the forwarded for, but the Proxy Header middleware will only give us 6 | the last client in our chain. There is a PR on Uvicorn to support trusting all the way to the ClientIP 7 | 8 | https://github.com/encode/uvicorn/pull/591 9 | 10 | Our Chain is currently: 11 | 12 | Client IP, Load Balancer, Nginx. Original code returns load balancer ip, when we want Client ip 13 | 14 | 15 | /// Original Code Start 16 | 17 | This middleware can be used when a known proxy is fronting the application, 18 | and is trusted to be properly setting the `X-Forwarded-Proto` and 19 | `X-Forwarded-For` headers with the connecting client information. 20 | 21 | Modifies the `client` and `scheme` information so that they reference 22 | the connecting client, rather that the connecting proxy. 23 | 24 | https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers#Proxies 25 | """ 26 | 27 | 28 | class ProxyHeadersMiddleware: 29 | def __init__(self, app, trusted_hosts="127.0.0.1"): 30 | self.app = app 31 | if isinstance(trusted_hosts, str): 32 | self.trusted_hosts = [item.strip() for item in trusted_hosts.split(",")] 33 | else: 34 | self.trusted_hosts = trusted_hosts 35 | self.always_trust = "*" in self.trusted_hosts 36 | 37 | async def __call__(self, scope, receive, send): 38 | if scope["type"] in ("http", "websocket"): 39 | client_addr = scope.get("client") 40 | client_host = client_addr[0] if client_addr else None 41 | 42 | if self.always_trust or client_host in self.trusted_hosts: 43 | headers = dict(scope["headers"]) 44 | 45 | if b"x-forwarded-proto" in headers: 46 | # Determine if the incoming request was http or https based on 47 | # the X-Forwarded-Proto header. 48 | x_forwarded_proto = headers[b"x-forwarded-proto"].decode("latin1") 49 | scope["scheme"] = x_forwarded_proto.strip() 50 | 51 | if b"x-forwarded-for" in headers: 52 | # Determine the client address from the last trusted IP in the 53 | # X-Forwarded-For header. We've lost the connecting client's port 54 | # information by now, so only include the host. 55 | x_forwarded_for = headers[b"x-forwarded-for"].decode("latin1") 56 | # Pocket Change - This is the only changed line. 57 | host = x_forwarded_for.split(",")[0].strip() 58 | port = 0 59 | scope["client"] = (host, port) 60 | 61 | return await self.app(scope, receive, send) 62 | -------------------------------------------------------------------------------- /app/provider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/app/provider/__init__.py -------------------------------------------------------------------------------- /app/provider/geo_provider.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from app import conf 4 | from app.geolocation.factory import Factory as GeolocationFactory 5 | 6 | 7 | class GeolocationProvider: 8 | __PROVIDER_INSTANCE = None 9 | 10 | def __init__(self): 11 | if not GeolocationProvider.__PROVIDER_INSTANCE: 12 | geolocation = GeolocationFactory().get_instance() 13 | GeolocationProvider.__PROVIDER_INSTANCE = geolocation 14 | 15 | def __setattr__(self, key, value): 16 | if GeolocationProvider.__PROVIDER_INSTANCE: 17 | raise AttributeError("Already instantiated") 18 | 19 | @classmethod 20 | def get_city(cls, ip): 21 | return cls.__PROVIDER_INSTANCE.city(ip) 22 | 23 | @classmethod 24 | def get_country(cls, city): 25 | if city: 26 | return city.country.iso_code 27 | else: 28 | logging.warning("No city returned") 29 | 30 | @classmethod 31 | def get_region(cls, city): 32 | if city: 33 | return city.subdivisions.most_specific.iso_code 34 | else: 35 | logging.warning("No city returned") 36 | -------------------------------------------------------------------------------- /app/provider/sentry_provider.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import sentry_sdk 4 | 5 | from app import conf 6 | from starlette.requests import ClientDisconnect 7 | 8 | 9 | SENTRY_IGNORE_ERRORS = ( 10 | ClientDisconnect 11 | ) 12 | 13 | 14 | def before_send(event, hint): 15 | if hint and "exc_info" in hint: 16 | exc_type, exc_value, tb = hint["exc_info"] 17 | if isinstance(exc_value, SENTRY_IGNORE_ERRORS): 18 | return None 19 | 20 | return event 21 | 22 | 23 | def sentry_init(): 24 | dsn = conf.sentry['dsn'] 25 | traces_sample_rate = conf.sentry['traces_sample_rate'] 26 | profiles_sample_rate = conf.sentry['profiles_sample_rate'] 27 | if not dsn: 28 | return 29 | sentry_sdk.init( 30 | dsn=dsn, 31 | traces_sample_rate=traces_sample_rate, 32 | profiles_sample_rate=profiles_sample_rate, 33 | environment=conf.env, 34 | before_send=before_send 35 | ) 36 | 37 | -------------------------------------------------------------------------------- /app/provider/session_provider.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import aiohttp 4 | 5 | 6 | class SessionProvider: 7 | """Singleton wrapper for an aiohttp ClientSession""" 8 | 9 | __session: Optional[aiohttp.ClientSession] = None 10 | 11 | @classmethod 12 | def session(cls) -> aiohttp.ClientSession: 13 | """Get the session singleton, creating it on first call""" 14 | 15 | if cls.__session is None: 16 | # 30 second timeout 17 | timeout = aiohttp.ClientTimeout(total=30) 18 | 19 | # Unlimited connection pool size 20 | connector = aiohttp.TCPConnector(limit=None) 21 | 22 | # Avoid persisting cookies across requests 23 | cookie_jar = aiohttp.DummyCookieJar() 24 | 25 | cls.__session = aiohttp.ClientSession( 26 | connector=connector, 27 | cookie_jar=cookie_jar, 28 | timeout=timeout, 29 | ) 30 | 31 | return cls.__session 32 | 33 | @classmethod 34 | async def shutdown(cls) -> None: 35 | """Close the session and release resources""" 36 | 37 | if cls.__session is None: 38 | return 39 | 40 | await cls.__session.close() 41 | cls.__session = None 42 | -------------------------------------------------------------------------------- /app/telemetry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/app/telemetry/__init__.py -------------------------------------------------------------------------------- /app/telemetry/handler.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import google.cloud.logging 3 | import gzip 4 | import json 5 | import logging 6 | import os 7 | import random 8 | import time 9 | import urllib.request 10 | from datetime import datetime, timezone 11 | from urllib.parse import urlencode 12 | 13 | NETWORK_ID = os.environ.get("ADZERK_NETWORK_ID", 10250) 14 | TELEMETRY_PATH_IDS = { 15 | "/r": "0", 16 | "/i.gif": "1", 17 | "/e.gif": "2", 18 | } 19 | 20 | 21 | def handle_message(event, context): 22 | """Triggered from a message on a Cloud Pub/Sub topic. 23 | 24 | :param event: Event payload. 25 | :param context: Google Cloud Function metadata. 26 | """ 27 | 28 | namespace_key = "document_namespace" 29 | doctype_key = "document_type" 30 | user_agent_version_key = "user_agent_version" 31 | submission_timestamp_key = "submission_timestamp" 32 | 33 | decompressed = gzip.decompress(base64.b64decode(event["data"])).decode("utf-8") 34 | telemetry = json.loads(decompressed) 35 | attributes = event["attributes"] 36 | 37 | namespace = attributes.get(namespace_key) 38 | doctype = attributes.get(doctype_key) 39 | user_agent_version = attributes.get(user_agent_version_key) 40 | submission_timestamp = attributes.get(submission_timestamp_key) 41 | 42 | if namespace in ["org-mozilla-firefox", "org-mozilla-firefox-beta", "org-mozilla-fenix"]\ 43 | and "spoc" == doctype: # Android/Glean 44 | if "metrics" in telemetry: 45 | text_metrics = telemetry["metrics"].get("text", {}) 46 | if "pocket.spoc_shim" in text_metrics: 47 | ping_adzerk(text_metrics["pocket.spoc_shim"]) 48 | record_metrics(text_metrics["pocket.spoc_shim"], submission_timestamp, namespace, user_agent_version) 49 | elif "firefox-desktop" == namespace and "spoc" == doctype: # Desktop/Glean 50 | if user_agent_version is not None and int(user_agent_version) >= 122: 51 | if "metrics" in telemetry: 52 | text_metrics = telemetry["metrics"].get("text", {}) 53 | if "pocket.shim" in text_metrics: 54 | ping_adzerk(text_metrics["pocket.shim"]) 55 | record_metrics(text_metrics["pocket.shim"], submission_timestamp, namespace, user_agent_version) 56 | elif "activity-stream" == namespace and "impression-stats" == doctype: 57 | if user_agent_version is not None and int(user_agent_version) < 122: # Desktop/Legacy 58 | if "tiles" in telemetry: 59 | for tile in telemetry["tiles"]: 60 | if "shim" in tile: 61 | ping_adzerk(tile["shim"]) 62 | record_metrics(tile["shim"], submission_timestamp, namespace, user_agent_version) 63 | 64 | 65 | def ping_adzerk(shim): 66 | """Pings AdZerk with a given shim 67 | 68 | :param shim: comma-separated shim with an event and checksum component. 69 | """ 70 | path_id, e, s = shim.split(",") 71 | path = get_path(path_id) 72 | query = urlencode({"e": e, "s": s}) 73 | url = "https://e-{network_id}.adzerk.net{path}?{query}".format( 74 | network_id=NETWORK_ID, path=path, query=query 75 | ) 76 | with urllib.request.urlopen(url) as response: 77 | response.read() 78 | 79 | 80 | def get_path(path_id): 81 | for k, v in TELEMETRY_PATH_IDS.items(): 82 | if v == path_id: 83 | return k 84 | 85 | 86 | def record_metrics(shim, submission_timestamp, namespace, user_agent_version): 87 | """If METRICS_SAMPLE_RATE is set and greater than 0, then log metrics for the event. 88 | The metrics are logged in a structured event to enable google cloud log-based metrics to aggregate them 89 | 90 | metrics sampled: 91 | time from adserver generating the impression to now 92 | time from telemetry submission to now 93 | """ 94 | try: 95 | # range of 0-1000, to sample in increments of 1/1000 96 | metrics_sample_rate = os.environ.get("METRICS_SAMPLE_RATE", "0") 97 | 98 | if not metrics_sample_rate.isdecimal(): 99 | return 100 | 101 | if int(metrics_sample_rate) <= 0 or random.randrange(0, 1000) >= int(metrics_sample_rate): 102 | return 103 | 104 | log_client = google.cloud.logging.Client() 105 | log_client.setup_logging() 106 | 107 | submission_timestamp = datetime.fromisoformat(submission_timestamp) 108 | 109 | _, encoded_data, _ = shim.split(",") 110 | padded_data = encoded_data + '=' * (-len(encoded_data)%4) 111 | kevel_json = json.loads(base64.b64decode(padded_data)) 112 | kevel_timestamp_millis = kevel_json['ts'] 113 | 114 | now = get_now() 115 | 116 | glean_latency = now - submission_timestamp 117 | glean_latency_millis = int(glean_latency.total_seconds() * 1000) 118 | adserver_latency_millis = int(now.timestamp() * 1000) - kevel_timestamp_millis 119 | 120 | json_fields = { 121 | "glean_latency": glean_latency_millis, 122 | "adserver_latency": adserver_latency_millis, 123 | "namespace": namespace, 124 | "user_agent_version": user_agent_version, 125 | } 126 | logging.info("metrics", extra={"json_fields": json_fields}) 127 | 128 | return 129 | except: 130 | return 131 | 132 | 133 | def get_now(): 134 | return datetime.now(timezone.utc) -------------------------------------------------------------------------------- /app/telemetry/requirements.txt: -------------------------------------------------------------------------------- 1 | google-cloud-logging 2 | -------------------------------------------------------------------------------- /app/validation.py: -------------------------------------------------------------------------------- 1 | from uuid import UUID 2 | 3 | 4 | def is_valid_pocket_id(u): 5 | try: 6 | u = u.lstrip('{') 7 | u = u.rstrip('}') 8 | u = u.lower() 9 | parsed = UUID(u) 10 | except ValueError: 11 | return False 12 | 13 | return u == str(parsed) 14 | -------------------------------------------------------------------------------- /cloudformation/README.md: -------------------------------------------------------------------------------- 1 | Notes: 2 | 3 | - Log groups are not created by ecs-fargate.yaml. Including log groups in the templates proved to cause stack update reliability problems. You must manually create them before deploying. 4 | - You must manually create the GeoIP S3 bucket and upload GeoIP2-City.mmdb 5 | - You must manually create a secret in a JSON object, which the SecretsManager AWS Console uses by default. 6 | - `prod/adzerk` that stores the AdZerk API key: `{"ADZERK_API_KEY":""}` 7 | -------------------------------------------------------------------------------- /cloudformation/proxy-service.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: 2010-09-09 2 | Description: Proxy server service - privacy layer to front ad related vendors. 3 | Parameters: 4 | EcsNginxRepositoryName: 5 | Type: String 6 | Default: nginx 7 | 8 | EcsNginxGitSHA: 9 | Type: String 10 | Description: The sha to use 11 | Default: latest 12 | 13 | EcsProxyRepositoryName: 14 | Type: String 15 | Default: proxy 16 | 17 | EcsProxyGitSHA: 18 | Type: String 19 | Description: The sha to use 20 | Default: latest 21 | 22 | EcsTaskMemory: 23 | Type: String 24 | Default: 2GB 25 | 26 | EcsTaskCpu: 27 | Type: String 28 | Default: 1024 29 | 30 | EcsServiceTaskDesiredCount: 31 | Type: String 32 | Default: 115 33 | 34 | EcsServiceTaskMaxCount: 35 | Type: String 36 | Default: 250 37 | 38 | EcsServiceTaskMinCount: 39 | Type: String 40 | Default: 50 41 | 42 | EcsScaleTaskUpCount: 43 | Type: String 44 | Default: 9 45 | 46 | EcsScaleTaskDownCount: 47 | Type: String 48 | Default: -6 49 | 50 | EcsScaleTaskUpCooldown: 51 | Type: String 52 | Default: 60 53 | 54 | EcsScaleTaskDownCooldown: 55 | Type: String 56 | Default: 300 57 | 58 | EcsAppEnv: 59 | Type: String 60 | Default: production 61 | 62 | EcsAppGeoipS3Bucket: 63 | Type: String 64 | Default: pocket-proxy-geoip 65 | 66 | EcsAppAdzerkSecretName: 67 | Type: String 68 | Default: prod/adzerk 69 | 70 | ServiceScaleEvaluationPeriods: 71 | Description: "The number of periods over which data is compared to the specified threshold" 72 | Type: Number 73 | Default: 2 74 | MinValue: 2 75 | 76 | ServiceCpuScaleUpThreshold: 77 | Type: Number 78 | Description: "Average CPU value to trigger auto scaling up" 79 | Default: 55 80 | MinValue: 0 81 | MaxValue: 100 82 | ConstraintDescription: Value must be between 0 and 100 83 | 84 | ServiceCpuScaleDownThreshold: 85 | Type: Number 86 | Description: "Average CPU value to trigger auto scaling down" 87 | Default: 44 88 | MinValue: 0 89 | MaxValue: 100 90 | ConstraintDescription: Value must be between 0 and 100 91 | 92 | SSLCertificateArn: 93 | Type: String 94 | Description: Certificate ARN for ALB listener 95 | 96 | VPCStackName: 97 | Type: String 98 | Description: Stack name of VPC 99 | Default: VPC 100 | 101 | Resources: 102 | Alb: 103 | Type: 'AWS::ElasticLoadBalancingV2::LoadBalancer' 104 | Properties: 105 | Name: !Ref "AWS::StackName" 106 | Subnets: !Split [",", 'Fn::ImportValue': !Sub "${VPCStackName}-PublicSubnetIds"] 107 | 108 | SecurityGroups: 109 | - !Ref AlbSecurityGroup 110 | Tags: 111 | - Key: "Name" 112 | Value: !Sub "${AWS::StackName}" 113 | 114 | AlbListener: 115 | Type: 'AWS::ElasticLoadBalancingV2::Listener' 116 | Properties: 117 | DefaultActions: 118 | - Type: forward 119 | TargetGroupArn: 120 | Ref: AlbTargetGroup 121 | LoadBalancerArn: !Ref Alb 122 | Port: 443 123 | Protocol: HTTPS 124 | Certificates: 125 | - CertificateArn: !Ref SSLCertificateArn 126 | 127 | AlbTargetGroup: 128 | Type: 'AWS::ElasticLoadBalancingV2::TargetGroup' 129 | Properties: 130 | HealthCheckIntervalSeconds: 30 131 | HealthCheckPath: /pulse 132 | HealthCheckProtocol: HTTP 133 | HealthyThresholdCount: 5 134 | UnhealthyThresholdCount: 2 135 | HealthCheckTimeoutSeconds: 3 136 | TargetType: ip 137 | Protocol: HTTP 138 | Port: 80 139 | VpcId: { 'Fn::ImportValue': !Sub "${VPCStackName}-VpcId" } 140 | 141 | Tags: 142 | - Key: "Name" 143 | Value: !Sub "${AWS::StackName}" 144 | 145 | AlbSecurityGroup: 146 | Type: AWS::EC2::SecurityGroup 147 | Properties: 148 | GroupDescription: !Sub "${AWS::StackName} alb security group" 149 | GroupName: !Sub "${AWS::StackName}-alb" 150 | SecurityGroupEgress: 151 | - IpProtocol: tcp 152 | FromPort: 80 153 | ToPort: 80 154 | DestinationSecurityGroupId: !Ref "EcsSecurityGroup" 155 | SecurityGroupIngress: 156 | - IpProtocol: tcp 157 | FromPort: 443 158 | ToPort: 443 159 | CidrIp: 0.0.0.0/0 160 | Tags: 161 | - Key: "Name" 162 | Value: !Sub "${AWS::StackName}-alb" 163 | VpcId: { 'Fn::ImportValue': !Sub "${VPCStackName}-VpcId" } 164 | 165 | EcsTaskDefinition: 166 | Type: 'AWS::ECS::TaskDefinition' 167 | Properties: 168 | ContainerDefinitions: 169 | - Name: nginx 170 | Image: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${EcsNginxRepositoryName}:${EcsNginxGitSHA}" 171 | Essential: true 172 | HealthCheck: 173 | Command: 174 | - CMD-SHELL 175 | - curl -f http://127.0.0.1/pulse || exit 1 176 | Interval: 30 177 | Retries: 3 178 | Timeout: 5 179 | LogConfiguration: 180 | LogDriver: awslogs 181 | Options: 182 | awslogs-group: !Sub '/ecs/${AWS::StackName}/nginx' 183 | awslogs-region: us-east-1 184 | awslogs-stream-prefix: ecs 185 | PortMappings: 186 | - Protocol: tcp 187 | ContainerPort: 80 188 | Ulimits: 189 | - Name: nofile 190 | SoftLimit: 16384 191 | HardLimit: 16384 192 | 193 | - Name: proxy 194 | Image: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${EcsProxyRepositoryName}:${EcsProxyGitSHA}" 195 | Essential: true 196 | HealthCheck: 197 | Command: 198 | - CMD-SHELL 199 | - curl -f http://127.0.0.1:8000/pulse || exit 1 200 | Interval: 30 201 | Retries: 3 202 | Timeout: 5 203 | LogConfiguration: 204 | LogDriver: awslogs 205 | Options: 206 | awslogs-group: !Sub '/ecs/${AWS::StackName}/proxy' 207 | awslogs-region: us-east-1 208 | awslogs-stream-prefix: ecs 209 | awslogs-datetime-format: '\[%Y-%m-%d %H:%M:%S %z\]' 210 | Environment: 211 | - Name: APP_ENV 212 | Value: !Ref EcsAppEnv 213 | - Name: GEOIP_S3_BUCKET 214 | Value: !Ref EcsAppGeoipS3Bucket 215 | - Name: ADZERK_SECRET_NAME 216 | Value: !Ref EcsAppAdzerkSecretName 217 | PortMappings: 218 | - Protocol: tcp 219 | ContainerPort: 8000 220 | Ulimits: 221 | - Name: nofile 222 | SoftLimit: 16384 223 | HardLimit: 16384 224 | 225 | Secrets: 226 | - Name: ADZERK_API_KEY 227 | ValueFrom: !Sub "arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${EcsAppAdzerkSecretName}:ADZERK_API_KEY::" 228 | 229 | Cpu: !Ref EcsTaskCpu 230 | Memory: !Ref EcsTaskMemory 231 | NetworkMode: awsvpc 232 | RequiresCompatibilities: 233 | - FARGATE 234 | Family: !Sub "${AWS::StackName}" 235 | ExecutionRoleArn: !GetAtt EcsTaskExecutionRole.Arn 236 | TaskRoleArn: !GetAtt EcsTaskRole.Arn 237 | 238 | EcsService: 239 | Type: 'AWS::ECS::Service' 240 | DependsOn: AlbListener 241 | Properties: 242 | Cluster: !Ref "EcsCluster" 243 | LaunchType: FARGATE 244 | DesiredCount: !Ref "EcsServiceTaskDesiredCount" 245 | LoadBalancers: 246 | - ContainerPort: 80 247 | ContainerName: nginx 248 | TargetGroupArn: !Ref "AlbTargetGroup" 249 | TaskDefinition: !Ref "EcsTaskDefinition" 250 | NetworkConfiguration: 251 | AwsvpcConfiguration: 252 | AssignPublicIp: DISABLED 253 | SecurityGroups: 254 | - !Ref "EcsSecurityGroup" 255 | Subnets: !Split [",", 'Fn::ImportValue': !Join ["-", [ !Ref VPCStackName, 'PrivateSubnetIds' ]]] 256 | 257 | EcsCluster: 258 | Type: 'AWS::ECS::Cluster' 259 | Properties: 260 | ClusterName: !Ref AWS::StackName 261 | Tags: 262 | - Key: "Name" 263 | Value: !Sub "${AWS::StackName}" 264 | 265 | EcsTaskExecutionRole: 266 | Type: AWS::IAM::Role 267 | Properties: 268 | RoleName: !Sub "${AWS::StackName}ExecutionRole" 269 | Path: / 270 | AssumeRolePolicyDocument: 271 | Version: 2012-10-17 272 | Statement: 273 | - Effect: Allow 274 | Principal: 275 | Service: ecs-tasks.amazonaws.com 276 | Action: sts:AssumeRole 277 | ManagedPolicyArns: 278 | - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy 279 | Policies: 280 | - PolicyName: !Sub "${AWS::StackName}ExecutionPolicy" 281 | PolicyDocument: 282 | Version: 2012-10-17 283 | Statement: 284 | - Effect: Allow 285 | Action: 286 | - secretsmanager:GetSecretValue 287 | Resource: 288 | - !Sub "arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${EcsAppAdzerkSecretName}-??????" 289 | 290 | EcsTaskRole: 291 | Type: AWS::IAM::Role 292 | Properties: 293 | RoleName: !Sub "${AWS::StackName}TaskRole" 294 | Path: / 295 | AssumeRolePolicyDocument: 296 | Version: 2012-10-17 297 | Statement: 298 | - Effect: Allow 299 | Principal: 300 | Service: ecs-tasks.amazonaws.com 301 | Action: sts:AssumeRole 302 | Policies: 303 | - PolicyName: !Sub "${AWS::StackName}TaskPolicy" 304 | PolicyDocument: 305 | Version: 2012-10-17 306 | Statement: 307 | - Effect: Allow 308 | Action: 309 | - s3:GetObject 310 | Resource: 311 | - !Sub "arn:aws:s3:::${EcsAppGeoipS3Bucket}" 312 | - !Sub "arn:aws:s3:::${EcsAppGeoipS3Bucket}/*" 313 | - Effect: Allow 314 | Action: 315 | - secretsmanager:GetSecretValue 316 | Resource: 317 | - !Sub "arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${EcsAppAdzerkSecretName}-??????" 318 | 319 | EcsSecurityGroup: 320 | Type: AWS::EC2::SecurityGroup 321 | Properties: 322 | GroupDescription: !Sub "${AWS::StackName} ecs security group" 323 | GroupName: !Sub "${AWS::StackName}-ecs" 324 | SecurityGroupEgress: 325 | - IpProtocol: tcp 326 | FromPort: 443 327 | ToPort: 443 328 | CidrIp: 0.0.0.0/0 329 | Tags: 330 | - Key: "Name" 331 | Value: !Sub "${AWS::StackName}-ecs" 332 | VpcId: { 'Fn::ImportValue': !Sub "${VPCStackName}-VpcId" } 333 | 334 | 335 | EcsSecurityGroupIngress: 336 | Type: AWS::EC2::SecurityGroupIngress 337 | Properties: 338 | GroupId: !Ref EcsSecurityGroup 339 | IpProtocol: tcp 340 | FromPort: 80 341 | ToPort: 80 342 | SourceSecurityGroupId: !Ref "AlbSecurityGroup" 343 | 344 | EcsAutoScalingRole: 345 | Type: AWS::IAM::Role 346 | Properties: 347 | RoleName: !Sub "${AWS::StackName}-AutoScalingRole" 348 | AssumeRolePolicyDocument: 349 | Statement: 350 | - Effect: Allow 351 | Principal: 352 | Service: application-autoscaling.amazonaws.com 353 | Action: sts:AssumeRole 354 | Path: / 355 | Policies: 356 | - PolicyName: !Sub "${AWS::StackName}-AutoScalingPolicy" 357 | PolicyDocument: 358 | Statement: 359 | - Effect: Allow 360 | Action: 361 | - application-autoscaling:* 362 | - cloudwatch:DescribeAlarms 363 | - cloudwatch:PutMetricAlarm 364 | - ecs:DescribeServices 365 | - ecs:UpdateService 366 | Resource: 367 | - !Sub "arn:aws:ecs:${AWS::Region}:${AWS::AccountId}:service/${AWS::StackName}-*" 368 | - !Sub "arn:aws:ecs:${AWS::Region}:${AWS::AccountId}:task-definition/${AWS::StackName}:*" 369 | - !Sub "arn:aws:cloudwatch:${AWS::Region}:${AWS::AccountId}:alarm:${AWS::StackName}-*" 370 | 371 | ServiceScalingTarget: 372 | Type: "AWS::ApplicationAutoScaling::ScalableTarget" 373 | Properties: 374 | MinCapacity: !Ref "EcsServiceTaskMinCount" 375 | MaxCapacity: !Ref "EcsServiceTaskMaxCount" 376 | ResourceId: !Sub 377 | - "service/${EcsClusterName}/${EcsServiceName}" 378 | - EcsClusterName: !Ref "EcsCluster" 379 | EcsServiceName: !GetAtt EcsService.Name 380 | RoleARN: !GetAtt EcsAutoScalingRole.Arn 381 | ScalableDimension: ecs:service:DesiredCount 382 | ServiceNamespace: ecs 383 | 384 | ServiceScaleUpPolicy: 385 | Type: AWS::ApplicationAutoScaling::ScalingPolicy 386 | Properties: 387 | PolicyName: ScaleOutPolicy 388 | PolicyType: StepScaling 389 | ScalingTargetId: !Ref ServiceScalingTarget 390 | StepScalingPolicyConfiguration: 391 | AdjustmentType: ChangeInCapacity 392 | Cooldown: !Ref EcsScaleTaskUpCooldown 393 | MetricAggregationType: Average 394 | StepAdjustments: 395 | - ScalingAdjustment: !Ref EcsScaleTaskUpCount 396 | MetricIntervalLowerBound: 0 397 | 398 | ServiceScaleDownPolicy: 399 | Type: AWS::ApplicationAutoScaling::ScalingPolicy 400 | Properties: 401 | PolicyName: ScaleInPolicy 402 | PolicyType: StepScaling 403 | ScalingTargetId: !Ref ServiceScalingTarget 404 | StepScalingPolicyConfiguration: 405 | AdjustmentType: ChangeInCapacity 406 | Cooldown: !Ref EcsScaleTaskDownCooldown 407 | MetricAggregationType: Average 408 | StepAdjustments: 409 | - ScalingAdjustment: !Ref EcsScaleTaskDownCount 410 | MetricIntervalUpperBound: 0 411 | 412 | ServiceCPUScaleUpAlarm: 413 | Type: AWS::CloudWatch::Alarm 414 | Properties: 415 | EvaluationPeriods: !Ref ServiceScaleEvaluationPeriods 416 | Statistic: Average 417 | TreatMissingData: breaching 418 | Threshold: !Ref ServiceCpuScaleUpThreshold 419 | AlarmDescription: Alarm to add capacity if CPU is high 420 | Period: 60 421 | AlarmActions: 422 | - !Ref ServiceScaleUpPolicy 423 | Namespace: AWS/ECS 424 | Dimensions: 425 | - Name: ClusterName 426 | Value: !Ref "EcsCluster" 427 | - Name: ServiceName 428 | Value: !GetAtt EcsService.Name 429 | ComparisonOperator: GreaterThanThreshold 430 | MetricName: CPUUtilization 431 | 432 | ServiceCPUScaleDownAlarm: 433 | Type: AWS::CloudWatch::Alarm 434 | Properties: 435 | EvaluationPeriods: !Ref ServiceScaleEvaluationPeriods 436 | Statistic: Average 437 | TreatMissingData: breaching 438 | Threshold: !Ref ServiceCpuScaleDownThreshold 439 | AlarmDescription: Alarm to reduce capacity if container CPU is low 440 | Period: 300 441 | AlarmActions: 442 | - !Ref ServiceScaleDownPolicy 443 | Namespace: AWS/ECS 444 | Dimensions: 445 | - Name: ClusterName 446 | Value: !Ref "EcsCluster" 447 | - Name: ServiceName 448 | Value: !GetAtt EcsService.Name 449 | ComparisonOperator: LessThanThreshold 450 | MetricName: CPUUtilization 451 | 452 | -------------------------------------------------------------------------------- /cloudformation/vpc/README.md: -------------------------------------------------------------------------------- 1 | Real documentation pending. We're using nested stacks and therefore must sync the local templates to s3 before updating the main VPC stack. 2 | 3 | Quick commands to update VPC: 4 | 5 | `aws-vault exec pocket-proxy-rw -- aws s3 sync . s3://pocket-proxy-cloudformation/` 6 | `aws-vault exec pocket-proxy-rw -- aws cloudformation update-stack --stack-name VPC --template-body file://vpc.yaml --parameters file://vpc_parameters.json` 7 | 8 | 9 | -------------------------------------------------------------------------------- /cloudformation/vpc/nat.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: NAT Gateways 3 | Parameters: 4 | VPCName: 5 | Description: VPC Name 6 | Type: String 7 | 8 | PublicSubnet1Id: 9 | Description: Public subnet id 1 10 | Type: String 11 | 12 | PublicSubnet2Id: 13 | Description: Public subnet id 2 14 | Type: String 15 | Default: '' 16 | 17 | PublicSubnet3Id: 18 | Description: Public subnet id 3 19 | Type: String 20 | Default: '' 21 | 22 | PublicSubnet4Id: 23 | Description: Public subnet id 4 24 | Type: String 25 | Default: '' 26 | 27 | PrivateSubnet1Id: 28 | Description: Private subnet id 1 29 | Type: String 30 | 31 | PrivateSubnet2Id: 32 | Description: Private subnet id 2 33 | Type: String 34 | Default: '' 35 | 36 | PrivateSubnet3Id: 37 | Description: Private subnet id 3 38 | Type: String 39 | Default: '' 40 | 41 | PrivateSubnet4Id: 42 | Description: Private subnet id 4 43 | Type: String 44 | Default: '' 45 | 46 | PrivateRouteTable1: 47 | Description: Private route table 1 48 | Type: String 49 | 50 | PrivateRouteTable2: 51 | Description: Private route table 2 52 | Type: String 53 | Default: '' 54 | 55 | PrivateRouteTable3: 56 | Description: Private route table 3 57 | Type: String 58 | Default: '' 59 | 60 | PrivateRouteTable4: 61 | Description: Private route table 4 62 | Type: String 63 | Default: '' 64 | 65 | VpcId: 66 | Description: The VPC ID to create subnets in 67 | Type: AWS::EC2::VPC::Id 68 | 69 | Conditions: 70 | PublicSubnet2Id_IsSet: !Not 71 | - !Equals 72 | - !Ref 'PublicSubnet2Id' 73 | - '' 74 | 75 | PublicSubnet3Id_IsSet: !Not 76 | - !Equals 77 | - !Ref 'PublicSubnet3Id' 78 | - '' 79 | 80 | PublicSubnet4Id_IsSet: !Not 81 | - !Equals 82 | - !Ref 'PublicSubnet4Id' 83 | - '' 84 | 85 | Resources: 86 | Eip1: 87 | Type: AWS::EC2::EIP 88 | 89 | Eip2: 90 | Type: AWS::EC2::EIP 91 | Condition: PublicSubnet2Id_IsSet 92 | 93 | Eip3: 94 | Type: AWS::EC2::EIP 95 | Condition: PublicSubnet3Id_IsSet 96 | 97 | Eip4: 98 | Type: AWS::EC2::EIP 99 | Condition: PublicSubnet4Id_IsSet 100 | 101 | NATGateway1: 102 | Type: AWS::EC2::NatGateway 103 | Properties: 104 | AllocationId: !GetAtt Eip1.AllocationId 105 | SubnetId: !Ref PublicSubnet1Id 106 | Tags: 107 | - Key: Name 108 | Value: !Join 109 | - '-' 110 | - - !Ref 'VPCName' 111 | - nat-1 112 | 113 | NATGateway2: 114 | Type: AWS::EC2::NatGateway 115 | Condition: PublicSubnet2Id_IsSet 116 | Properties: 117 | AllocationId: !GetAtt Eip2.AllocationId 118 | SubnetId: !Ref PublicSubnet2Id 119 | Tags: 120 | - Key: Name 121 | Value: !Join 122 | - '-' 123 | - - !Ref 'VPCName' 124 | - nat-2 125 | 126 | NATGateway3: 127 | Type: AWS::EC2::NatGateway 128 | Condition: PublicSubnet3Id_IsSet 129 | Properties: 130 | AllocationId: !GetAtt Eip3.AllocationId 131 | SubnetId: !Ref PublicSubnet3Id 132 | Tags: 133 | - Key: Name 134 | Value: !Join 135 | - '-' 136 | - - !Ref 'VPCName' 137 | - nat-3 138 | 139 | NATGateway4: 140 | Type: AWS::EC2::NatGateway 141 | Condition: PublicSubnet4Id_IsSet 142 | Properties: 143 | AllocationId: !GetAtt Eip4.AllocationId 144 | SubnetId: !Ref PublicSubnet4Id 145 | Tags: 146 | - Key: Name 147 | Value: !Join 148 | - '-' 149 | - - !Ref 'VPCName' 150 | - nat-4 151 | 152 | PrivateRouteTable1DefaultRoute: 153 | Type: AWS::EC2::Route 154 | Properties: 155 | RouteTableId: !Ref 'PrivateRouteTable1' 156 | DestinationCidrBlock: '0.0.0.0/0' 157 | NatGatewayId: !Ref 'NATGateway1' 158 | 159 | PrivateRouteTable2DefaultRoute: 160 | Type: AWS::EC2::Route 161 | Condition: PublicSubnet2Id_IsSet 162 | Properties: 163 | RouteTableId: !Ref 'PrivateRouteTable2' 164 | DestinationCidrBlock: '0.0.0.0/0' 165 | NatGatewayId: !Ref 'NATGateway2' 166 | 167 | PrivateRouteTable3DefaultRoute: 168 | Type: AWS::EC2::Route 169 | Condition: PublicSubnet3Id_IsSet 170 | Properties: 171 | RouteTableId: !Ref 'PrivateRouteTable3' 172 | DestinationCidrBlock: '0.0.0.0/0' 173 | NatGatewayId: !Ref 'NATGateway3' 174 | 175 | PrivateRouteTable4DefaultRoute: 176 | Type: AWS::EC2::Route 177 | Condition: PublicSubnet4Id_IsSet 178 | Properties: 179 | RouteTableId: !Ref 'PrivateRouteTable4' 180 | DestinationCidrBlock: '0.0.0.0/0' 181 | NatGatewayId: !Ref 'NATGateway4' 182 | -------------------------------------------------------------------------------- /cloudformation/vpc/private-subnet.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: Private subnets 3 | Parameters: 4 | VPCName: 5 | Description: VPC Name 6 | Type: String 7 | 8 | PrivateSubnet1: 9 | Description: The CIDR for the private subnet in availability zone 1 10 | Type: String 11 | MinLength: '9' 12 | MaxLength: '19' 13 | AllowedPattern: >- 14 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 15 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 16 | 17 | PrivateSubnet2: 18 | Description: The CIDR for the private subnet in availability zone 2 19 | Type: String 20 | MinLength: '9' 21 | MaxLength: '19' 22 | AllowedPattern: >- 23 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 24 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 25 | 26 | PrivateSubnet3: 27 | Description: The CIDR for the private subnet in availability zone 3 28 | Type: String 29 | MinLength: '9' 30 | MaxLength: '19' 31 | AllowedPattern: >- 32 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 33 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 34 | Default: '0.0.0.0/32' 35 | 36 | PrivateSubnet4: 37 | Description: The CIDR for the private subnet in availability zone 4 38 | Type: String 39 | MinLength: '9' 40 | MaxLength: '19' 41 | AllowedPattern: >- 42 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 43 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 44 | Default: '0.0.0.0/32' 45 | 46 | VpcId: 47 | Description: The VPC ID to create subnets in 48 | Type: AWS::EC2::VPC::Id 49 | 50 | Conditions: 51 | PrivateSubnet3_IsSet: !Not 52 | - !Equals 53 | - !Ref 'PrivateSubnet3' 54 | - '0.0.0.0/32' 55 | 56 | PrivateSubnet4_IsSet: !Not 57 | - !Equals 58 | - !Ref 'PrivateSubnet4' 59 | - '0.0.0.0/32' 60 | 61 | Resources: 62 | PrivateRouteTable1: 63 | Type: AWS::EC2::RouteTable 64 | Properties: 65 | VpcId: !Ref 'VpcId' 66 | Tags: 67 | - Key: Name 68 | Value: !Join 69 | - '-' 70 | - - !Ref 'VPCName' 71 | - private-1 72 | - Key: availability_zone 73 | Value: !Select 74 | - '0' 75 | - !GetAZs 76 | Ref: AWS::Region 77 | 78 | PrivateRouteTable2: 79 | Type: AWS::EC2::RouteTable 80 | Properties: 81 | VpcId: !Ref 'VpcId' 82 | Tags: 83 | - Key: Name 84 | Value: !Join 85 | - '-' 86 | - - !Ref 'VPCName' 87 | - private-2 88 | - Key: availability_zone 89 | Value: !Select 90 | - '1' 91 | - !GetAZs 92 | Ref: AWS::Region 93 | 94 | PrivateRouteTable3: 95 | Type: AWS::EC2::RouteTable 96 | Condition: PrivateSubnet3_IsSet 97 | Properties: 98 | VpcId: !Ref 'VpcId' 99 | Tags: 100 | - Key: Name 101 | Value: !Join 102 | - '-' 103 | - - !Ref 'VPCName' 104 | - private-3 105 | - Key: availability_zone 106 | Value: !Select 107 | - '2' 108 | - !GetAZs 109 | Ref: AWS::Region 110 | 111 | PrivateRouteTable4: 112 | Type: AWS::EC2::RouteTable 113 | Condition: PrivateSubnet4_IsSet 114 | Properties: 115 | VpcId: !Ref 'VpcId' 116 | Tags: 117 | - Key: Name 118 | Value: !Join 119 | - '-' 120 | - - !Ref 'VPCName' 121 | - private-4 122 | - Key: availability_zone 123 | Value: !Select 124 | - '3' 125 | - !GetAZs 126 | Ref: AWS::Region 127 | 128 | PrivateSubnet1Stack: 129 | Type: AWS::EC2::Subnet 130 | Properties: 131 | VpcId: !Ref 'VpcId' 132 | AvailabilityZone: !Select 133 | - '0' 134 | - !GetAZs 135 | Ref: AWS::Region 136 | CidrBlock: !Ref 'PrivateSubnet1' 137 | Tags: 138 | - Key: Name 139 | Value: !Join 140 | - '-' 141 | - - !Ref 'VPCName' 142 | - private-1 143 | 144 | PrivateSubnet2Stack: 145 | Type: AWS::EC2::Subnet 146 | Properties: 147 | VpcId: !Ref 'VpcId' 148 | AvailabilityZone: !Select 149 | - '1' 150 | - !GetAZs 151 | Ref: AWS::Region 152 | CidrBlock: !Ref 'PrivateSubnet2' 153 | Tags: 154 | - Key: Name 155 | Value: !Join 156 | - '-' 157 | - - !Ref 'VPCName' 158 | - private-2 159 | 160 | PrivateSubnet3Stack: 161 | Type: AWS::EC2::Subnet 162 | Condition: PrivateSubnet3_IsSet 163 | Properties: 164 | VpcId: !Ref 'VpcId' 165 | AvailabilityZone: !Select 166 | - '2' 167 | - !GetAZs 168 | Ref: AWS::Region 169 | CidrBlock: !Ref 'PrivateSubnet3' 170 | Tags: 171 | - Key: Name 172 | Value: !Join 173 | - '-' 174 | - - !Ref 'VPCName' 175 | - private-3 176 | 177 | PrivateSubnet4Stack: 178 | Type: AWS::EC2::Subnet 179 | Condition: PrivateSubnet4_IsSet 180 | Properties: 181 | VpcId: !Ref 'VpcId' 182 | AvailabilityZone: !Select 183 | - '3' 184 | - !GetAZs 185 | Ref: AWS::Region 186 | CidrBlock: !Ref 'PrivateSubnet4' 187 | Tags: 188 | - Key: Name 189 | Value: !Join 190 | - '-' 191 | - - !Ref 'VPCName' 192 | - private-4 193 | 194 | PrivateSubnet1RouteTableAssociation: 195 | Type: AWS::EC2::SubnetRouteTableAssociation 196 | Properties: 197 | SubnetId: !Ref 'PrivateSubnet1Stack' 198 | RouteTableId: !Ref 'PrivateRouteTable1' 199 | 200 | PrivateSubnet2RouteTableAssociation: 201 | Type: AWS::EC2::SubnetRouteTableAssociation 202 | Properties: 203 | SubnetId: !Ref 'PrivateSubnet2Stack' 204 | RouteTableId: !Ref 'PrivateRouteTable2' 205 | 206 | PrivateSubnet3RouteTableAssociation: 207 | Type: AWS::EC2::SubnetRouteTableAssociation 208 | Condition: PrivateSubnet3_IsSet 209 | Properties: 210 | SubnetId: !Ref 'PrivateSubnet3Stack' 211 | RouteTableId: !Ref 'PrivateRouteTable3' 212 | 213 | PrivateSubnet4RouteTableAssociation: 214 | Type: AWS::EC2::SubnetRouteTableAssociation 215 | Condition: PrivateSubnet4_IsSet 216 | Properties: 217 | SubnetId: !Ref 'PrivateSubnet4Stack' 218 | RouteTableId: !Ref 'PrivateRouteTable4' 219 | 220 | Outputs: 221 | PrivateRouteTable1: 222 | Value: !Ref 'PrivateRouteTable1' 223 | 224 | PrivateRouteTable2: 225 | Value: !Ref 'PrivateRouteTable2' 226 | 227 | PrivateRouteTable3: 228 | Condition: PrivateSubnet3_IsSet 229 | Value: !Ref 'PrivateRouteTable3' 230 | 231 | PrivateRouteTable4: 232 | Condition: PrivateSubnet4_IsSet 233 | Value: !Ref 'PrivateRouteTable4' 234 | 235 | PrivateSubnet1Cidr: 236 | Value: !Ref 'PrivateSubnet1' 237 | 238 | PrivateSubnet1Id: 239 | Value: !Ref 'PrivateSubnet1Stack' 240 | 241 | PrivateSubnet1AZ: 242 | Value: !GetAtt 'PrivateSubnet1Stack.AvailabilityZone' 243 | 244 | PrivateSubnet2Cidr: 245 | Value: !Ref 'PrivateSubnet2' 246 | 247 | PrivateSubnet2Id: 248 | Value: !Ref 'PrivateSubnet2Stack' 249 | 250 | PrivateSubnet2AZ: 251 | Value: !GetAtt 'PrivateSubnet2Stack.AvailabilityZone' 252 | 253 | PrivateSubnet3Cidr: 254 | Condition: PrivateSubnet3_IsSet 255 | Value: !Ref 'PrivateSubnet3' 256 | 257 | PrivateSubnet3Id: 258 | Condition: PrivateSubnet3_IsSet 259 | Value: !Ref 'PrivateSubnet3Stack' 260 | 261 | PrivateSubnet3AZ: 262 | Condition: PrivateSubnet3_IsSet 263 | Value: !GetAtt 'PrivateSubnet3Stack.AvailabilityZone' 264 | 265 | PrivateSubnet4Cidr: 266 | Condition: PrivateSubnet4_IsSet 267 | Value: !Ref 'PrivateSubnet4' 268 | 269 | PrivateSubnet4Id: 270 | Condition: PrivateSubnet4_IsSet 271 | Value: !Ref 'PrivateSubnet4Stack' 272 | 273 | PrivateSubnet4AZ: 274 | Condition: PrivateSubnet4_IsSet 275 | Value: !GetAtt 'PrivateSubnet4Stack.AvailabilityZone' 276 | -------------------------------------------------------------------------------- /cloudformation/vpc/public-subnet.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: Public subnets with internet gateway 3 | Parameters: 4 | VPCName: 5 | Description: VPC Name 6 | Type: String 7 | 8 | EnableIpv6: 9 | Description: Boolean for amazon provided IPv6 CIDR block 10 | Type: String 11 | Default: 'false' 12 | 13 | Ipv6CidrBlocks: 14 | Description: Amazon assigned IPv6 cidr blocks 15 | Type: String 16 | Default: '' 17 | 18 | VPCIpv6CidrBlock: 19 | Description: Unused, but please do not delete. 20 | Type: String 21 | Default: '' 22 | 23 | PublicSubnet1: 24 | Description: The CIDR for the public subnet in availability zone 1 25 | Type: String 26 | MinLength: '9' 27 | MaxLength: '19' 28 | AllowedPattern: >- 29 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 30 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 31 | 32 | PublicSubnet2: 33 | Description: The CIDR for the public subnet in availability zone 2 34 | Type: String 35 | MinLength: '9' 36 | MaxLength: '19' 37 | AllowedPattern: >- 38 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 39 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 40 | 41 | PublicSubnet3: 42 | Description: The CIDR for the public subnet in availability zone 3 43 | Type: String 44 | MinLength: '9' 45 | MaxLength: '19' 46 | AllowedPattern: >- 47 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 48 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 49 | Default: '0.0.0.0/32' 50 | 51 | PublicSubnet4: 52 | Description: The CIDR for the public subnet in availability zone 4 53 | Type: String 54 | MinLength: '9' 55 | MaxLength: '19' 56 | AllowedPattern: >- 57 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 58 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 59 | Default: '0.0.0.0/32' 60 | 61 | VpcId: 62 | Description: The VPC ID to create subnets in 63 | Type: AWS::EC2::VPC::Id 64 | 65 | Conditions: 66 | EnableIpv6_IsSet: !Not 67 | - !Equals 68 | - !Ref 'EnableIpv6' 69 | - 'false' 70 | 71 | PublicSubnet3_IsSet: !Not 72 | - !Equals 73 | - !Ref 'PublicSubnet3' 74 | - '0.0.0.0/32' 75 | 76 | PublicSubnet3Ipv6_IsSet: !And 77 | - !Not 78 | - !Equals 79 | - !Ref 'PublicSubnet3' 80 | - '0.0.0.0/32' 81 | - !Condition 'EnableIpv6_IsSet' 82 | 83 | PublicSubnet4_IsSet: !Not 84 | - !Equals 85 | - !Ref 'PublicSubnet4' 86 | - '0.0.0.0/32' 87 | 88 | PublicSubnet4Ipv6_IsSet: !And 89 | - !Not 90 | - !Equals 91 | - !Ref 'PublicSubnet4' 92 | - '0.0.0.0/32' 93 | - !Condition 'EnableIpv6_IsSet' 94 | 95 | Resources: 96 | InternetGateway: 97 | Type: AWS::EC2::InternetGateway 98 | Properties: 99 | Tags: 100 | - Key: Name 101 | Value: !Ref 'VPCName' 102 | 103 | InternetGatewayAttachment: 104 | Type: AWS::EC2::VPCGatewayAttachment 105 | DependsOn: InternetGateway 106 | Properties: 107 | VpcId: !Ref 'VpcId' 108 | InternetGatewayId: !Ref 'InternetGateway' 109 | 110 | PublicRouteTable: 111 | Type: AWS::EC2::RouteTable 112 | Properties: 113 | VpcId: !Ref 'VpcId' 114 | Tags: 115 | - Key: Name 116 | Value: !Join 117 | - '-' 118 | - - !Ref 'VPCName' 119 | - public 120 | 121 | PublicSubnet1Stack: 122 | Type: AWS::EC2::Subnet 123 | Properties: 124 | VpcId: !Ref 'VpcId' 125 | AvailabilityZone: !Select 126 | - '0' 127 | - !GetAZs 128 | Ref: AWS::Region 129 | CidrBlock: !Ref 'PublicSubnet1' 130 | Ipv6CidrBlock: !If 131 | - EnableIpv6_IsSet 132 | - !Join 133 | - '' 134 | - - !Select 135 | - 0 136 | - !Split 137 | - '00::/56' 138 | - !Select 139 | - 0 140 | - - !Ref 'Ipv6CidrBlocks' 141 | - '00::/64' 142 | - !Ref 'AWS::NoValue' 143 | Tags: 144 | - Key: Name 145 | Value: !Join 146 | - '-' 147 | - - !Ref 'VPCName' 148 | - public-1 149 | 150 | PublicSubnet2Stack: 151 | Type: AWS::EC2::Subnet 152 | Properties: 153 | MapPublicIpOnLaunch: true 154 | VpcId: !Ref 'VpcId' 155 | AvailabilityZone: !Select 156 | - '1' 157 | - !GetAZs 158 | Ref: AWS::Region 159 | CidrBlock: !Ref 'PublicSubnet2' 160 | Ipv6CidrBlock: !If 161 | - EnableIpv6_IsSet 162 | - !Join 163 | - '' 164 | - - !Select 165 | - 0 166 | - !Split 167 | - '00::/56' 168 | - !Select 169 | - 0 170 | - - !Ref 'Ipv6CidrBlocks' 171 | - '01::/64' 172 | - !Ref 'AWS::NoValue' 173 | Tags: 174 | - Key: Name 175 | Value: !Join 176 | - '-' 177 | - - !Ref 'VPCName' 178 | - public-2 179 | 180 | PublicSubnet3Stack: 181 | Condition: PublicSubnet3_IsSet 182 | Type: AWS::EC2::Subnet 183 | Properties: 184 | VpcId: !Ref 'VpcId' 185 | AvailabilityZone: !Select 186 | - '2' 187 | - !GetAZs 188 | Ref: AWS::Region 189 | CidrBlock: !Ref 'PublicSubnet3' 190 | Ipv6CidrBlock: !If 191 | - EnableIpv6_IsSet 192 | - !Join 193 | - '' 194 | - - !Select 195 | - 0 196 | - !Split 197 | - '00::/56' 198 | - !Select 199 | - 0 200 | - - !Ref 'Ipv6CidrBlocks' 201 | - '02::/64' 202 | - !Ref 'AWS::NoValue' 203 | Tags: 204 | - Key: Name 205 | Value: !Join 206 | - '-' 207 | - - !Ref 'VPCName' 208 | - public-3 209 | 210 | PublicSubnet4Stack: 211 | Type: AWS::EC2::Subnet 212 | Condition: PublicSubnet4_IsSet 213 | Properties: 214 | VpcId: !Ref 'VpcId' 215 | AvailabilityZone: !Select 216 | - '3' 217 | - !GetAZs 218 | Ref: AWS::Region 219 | CidrBlock: !Ref 'PublicSubnet4' 220 | Ipv6CidrBlock: !If 221 | - EnableIpv6_IsSet 222 | - !Join 223 | - '' 224 | - - !Select 225 | - 0 226 | - !Split 227 | - '00::/56' 228 | - !Select 229 | - 0 230 | - - !Ref 'Ipv6CidrBlocks' 231 | - '03::/64' 232 | - !Ref 'AWS::NoValue' 233 | Tags: 234 | - Key: Name 235 | Value: !Join 236 | - '-' 237 | - - !Ref 'VPCName' 238 | - public-4 239 | 240 | PublicSubnet1RouteTableAssociation: 241 | Type: AWS::EC2::SubnetRouteTableAssociation 242 | Properties: 243 | SubnetId: !Ref 'PublicSubnet1Stack' 244 | RouteTableId: !Ref 'PublicRouteTable' 245 | 246 | PublicSubnet2RouteTableAssociation: 247 | Type: AWS::EC2::SubnetRouteTableAssociation 248 | Properties: 249 | SubnetId: !Ref 'PublicSubnet2Stack' 250 | RouteTableId: !Ref 'PublicRouteTable' 251 | 252 | PublicSubnet3RouteTableAssociation: 253 | Type: AWS::EC2::SubnetRouteTableAssociation 254 | Condition: PublicSubnet3_IsSet 255 | Properties: 256 | SubnetId: !Ref 'PublicSubnet3Stack' 257 | RouteTableId: !Ref 'PublicRouteTable' 258 | 259 | PublicSubnet4RouteTableAssociation: 260 | Condition: PublicSubnet4_IsSet 261 | Type: AWS::EC2::SubnetRouteTableAssociation 262 | Properties: 263 | SubnetId: !Ref 'PublicSubnet4Stack' 264 | RouteTableId: !Ref 'PublicRouteTable' 265 | 266 | PublicRouteTableDefaultRoute: 267 | Type: AWS::EC2::Route 268 | DependsOn: InternetGatewayAttachment 269 | Properties: 270 | RouteTableId: !Ref 'PublicRouteTable' 271 | DestinationCidrBlock: '0.0.0.0/0' 272 | GatewayId: !Ref 'InternetGateway' 273 | 274 | PublicRouteTableIpv6DefaultRoute: 275 | Type: AWS::EC2::Route 276 | DependsOn: InternetGatewayAttachment 277 | Condition: EnableIpv6_IsSet 278 | Properties: 279 | RouteTableId: !Ref 'PublicRouteTable' 280 | DestinationIpv6CidrBlock: ::/0 281 | GatewayId: !Ref 'InternetGateway' 282 | 283 | Outputs: 284 | PublicRouteTable: 285 | Value: !Ref 'PublicRouteTable' 286 | 287 | PublicSubnet1Cidr: 288 | Value: !Ref 'PublicSubnet1' 289 | 290 | PublicSubnet1Id: 291 | Value: !Ref 'PublicSubnet1Stack' 292 | 293 | PublicSubnet1AZ: 294 | Value: !GetAtt 'PublicSubnet1Stack.AvailabilityZone' 295 | 296 | PublicSubnet2Cidr: 297 | Value: !Ref 'PublicSubnet2' 298 | 299 | PublicSubnet2Id: 300 | Value: !Ref 'PublicSubnet2Stack' 301 | 302 | PublicSubnet2AZ: 303 | Value: !GetAtt 'PublicSubnet2Stack.AvailabilityZone' 304 | 305 | PublicSubnet3Cidr: 306 | Condition: PublicSubnet3_IsSet 307 | Value: !Ref 'PublicSubnet3' 308 | 309 | PublicSubnet3Id: 310 | Condition: PublicSubnet3_IsSet 311 | Value: !Ref 'PublicSubnet3Stack' 312 | 313 | PublicSubnet3AZ: 314 | Condition: PublicSubnet3_IsSet 315 | Value: !GetAtt 'PublicSubnet3Stack.AvailabilityZone' 316 | 317 | PublicSubnet4Cidr: 318 | Condition: PublicSubnet4_IsSet 319 | Value: !Ref 'PublicSubnet4' 320 | 321 | PublicSubnet4Id: 322 | Condition: PublicSubnet4_IsSet 323 | Value: !Ref 'PublicSubnet4Stack' 324 | 325 | PublicSubnet4AZ: 326 | Condition: PublicSubnet4_IsSet 327 | Value: !GetAtt 'PublicSubnet4Stack.AvailabilityZone' 328 | 329 | InternetGatewayId: 330 | Value: !Ref 'InternetGateway' 331 | -------------------------------------------------------------------------------- /cloudformation/vpc/vpc.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: VPC with public and private subnets across arbitrary availability zones 3 | Parameters: 4 | VPCName: 5 | Description: VPC Name 6 | Type: String 7 | 8 | EnableIpv6: 9 | Description: Boolean for amazon provided IPv6 CIDR block 10 | Type: String 11 | Default: 'false' 12 | 13 | VPCSubnetCidr: 14 | Description: VPC Subnet CIDR specification 15 | Type: String 16 | MinLength: '9' 17 | MaxLength: '19' 18 | AllowedPattern: >- 19 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 20 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 21 | 22 | PublicSubnet1: 23 | Description: The CIDR for the public subnet in availability zone 1 24 | Type: String 25 | MinLength: '9' 26 | MaxLength: '19' 27 | AllowedPattern: >- 28 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 29 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 30 | 31 | PublicSubnet2: 32 | Description: The CIDR for the public subnet in availability zone 2 33 | Type: String 34 | MinLength: '9' 35 | MaxLength: '19' 36 | AllowedPattern: >- 37 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 38 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 39 | 40 | PublicSubnet3: 41 | Description: The CIDR for the public subnet in availability zone 3 42 | Type: String 43 | MinLength: '9' 44 | MaxLength: '19' 45 | AllowedPattern: >- 46 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 47 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 48 | Default: '0.0.0.0/32' 49 | 50 | PublicSubnet4: 51 | Description: The CIDR for the public subnet in availability zone 4 52 | Type: String 53 | MinLength: '9' 54 | MaxLength: '19' 55 | AllowedPattern: >- 56 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 57 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 58 | Default: '0.0.0.0/32' 59 | 60 | PrivateSubnet1: 61 | Description: The CIDR for the private subnet in availability zone 1 62 | Type: String 63 | MinLength: '9' 64 | MaxLength: '19' 65 | AllowedPattern: >- 66 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 67 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 68 | Default: '0.0.0.0/32' 69 | 70 | PrivateSubnet2: 71 | Description: The CIDR for the private subnet in availability zone 2 72 | Type: String 73 | MinLength: '9' 74 | MaxLength: '19' 75 | AllowedPattern: >- 76 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 77 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 78 | Default: '0.0.0.0/32' 79 | 80 | PrivateSubnet3: 81 | Description: The CIDR for the private subnet in availability zone 3 82 | Type: String 83 | MinLength: '9' 84 | MaxLength: '19' 85 | AllowedPattern: >- 86 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 87 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 88 | Default: '0.0.0.0/32' 89 | 90 | PrivateSubnet4: 91 | Description: The CIDR for the private subnet in availability zone 4 92 | Type: String 93 | MinLength: '9' 94 | MaxLength: '19' 95 | AllowedPattern: >- 96 | ^([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5])).([0-9]|[1-9][0-9]|1[0-9]{2}|2([0-4][0-9]|5[0-5]))/([0-9]|[12][0-9]|3[0-2]) 97 | ConstraintDescription: Must be a valid IP CIDR range of the form x.x.x.x/x 98 | Default: '0.0.0.0/32' 99 | 100 | StacksURL: 101 | Description: The S3 URL to pocket-cloudformation 102 | Type: String 103 | Default: https://s3.amazonaws.com/pocket-proxy-cloudformation 104 | 105 | Conditions: 106 | EnableIpv6_IsSet: !Not 107 | - !Equals 108 | - !Ref 'EnableIpv6' 109 | - 'false' 110 | 111 | PublicSubnet3_IsSet: !Not 112 | - !Equals 113 | - !Ref 'PublicSubnet3' 114 | - '0.0.0.0/32' 115 | 116 | PublicSubnet4_IsSet: !Not 117 | - !Equals 118 | - !Ref 'PublicSubnet4' 119 | - '0.0.0.0/32' 120 | 121 | PrivateSubnet3_IsSet: !Not 122 | - !Equals 123 | - !Ref 'PrivateSubnet3' 124 | - '0.0.0.0/32' 125 | 126 | PrivateSubnet4_IsSet: !Not 127 | - !Equals 128 | - !Ref 'PrivateSubnet4' 129 | - '0.0.0.0/32' 130 | 131 | Resources: 132 | VPCIpv6CidrBlock: 133 | Type: AWS::EC2::VPCCidrBlock 134 | Condition: EnableIpv6_IsSet 135 | Properties: 136 | AmazonProvidedIpv6CidrBlock: true 137 | VpcId: !Ref 'VPC' 138 | 139 | VPC: 140 | Type: AWS::EC2::VPC 141 | Properties: 142 | EnableDnsSupport: 'true' 143 | EnableDnsHostnames: 'true' 144 | CidrBlock: !Ref 'VPCSubnetCidr' 145 | Tags: 146 | - Key: Name 147 | Value: !Ref 'VPCName' 148 | 149 | PublicSubnet: 150 | Type: AWS::CloudFormation::Stack 151 | Properties: 152 | TemplateURL: !Join 153 | - / 154 | - - !Ref 'StacksURL' 155 | - public-subnet.yaml 156 | TimeoutInMinutes: '60' 157 | Parameters: 158 | VpcId: !Ref 'VPC' 159 | VPCName: !Ref 'VPCName' 160 | EnableIpv6: !Ref 'EnableIpv6' 161 | Ipv6CidrBlocks: !If 162 | - EnableIpv6_IsSet 163 | - !Join 164 | - ',' 165 | - !GetAtt 'VPC.Ipv6CidrBlocks' 166 | - !Ref 'AWS::NoValue' 167 | VPCIpv6CidrBlock: !If 168 | - EnableIpv6_IsSet 169 | - !Ref 'VPCIpv6CidrBlock' 170 | - !Ref 'AWS::NoValue' 171 | PublicSubnet1: !Ref 'PublicSubnet1' 172 | PublicSubnet2: !Ref 'PublicSubnet2' 173 | PublicSubnet3: !Ref 'PublicSubnet3' 174 | PublicSubnet4: !Ref 'PublicSubnet4' 175 | 176 | PrivateSubnet: 177 | Type: AWS::CloudFormation::Stack 178 | Properties: 179 | TemplateURL: !Join 180 | - / 181 | - - !Ref 'StacksURL' 182 | - private-subnet.yaml 183 | TimeoutInMinutes: '60' 184 | Parameters: 185 | VpcId: !Ref 'VPC' 186 | VPCName: !Ref 'VPCName' 187 | PrivateSubnet1: !Ref 'PrivateSubnet1' 188 | PrivateSubnet2: !Ref 'PrivateSubnet2' 189 | PrivateSubnet3: !Ref 'PrivateSubnet3' 190 | PrivateSubnet4: !Ref 'PrivateSubnet4' 191 | 192 | NAT: 193 | Type: AWS::CloudFormation::Stack 194 | Properties: 195 | TemplateURL: !Join 196 | - / 197 | - - !Ref 'StacksURL' 198 | - nat.yaml 199 | TimeoutInMinutes: '60' 200 | Parameters: 201 | VpcId: !Ref 'VPC' 202 | VPCName: !Ref 'VPCName' 203 | PrivateSubnet1Id: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet1Id' 204 | PrivateSubnet2Id: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet2Id' 205 | PrivateSubnet3Id: !If 206 | - PrivateSubnet3_IsSet 207 | - !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet3Id' 208 | - !Ref 'AWS::NoValue' 209 | PrivateSubnet4Id: !If 210 | - PrivateSubnet4_IsSet 211 | - !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet4Id' 212 | - !Ref 'AWS::NoValue' 213 | PublicSubnet1Id: !GetAtt 'PublicSubnet.Outputs.PublicSubnet1Id' 214 | PublicSubnet2Id: !GetAtt 'PublicSubnet.Outputs.PublicSubnet2Id' 215 | PublicSubnet3Id: !If 216 | - PublicSubnet3_IsSet 217 | - !GetAtt 'PublicSubnet.Outputs.PublicSubnet3Id' 218 | - !Ref 'AWS::NoValue' 219 | PublicSubnet4Id: !If 220 | - PublicSubnet4_IsSet 221 | - !GetAtt 'PublicSubnet.Outputs.PublicSubnet4Id' 222 | - !Ref 'AWS::NoValue' 223 | PrivateRouteTable1: !GetAtt 'PrivateSubnet.Outputs.PrivateRouteTable1' 224 | PrivateRouteTable2: !GetAtt 'PrivateSubnet.Outputs.PrivateRouteTable2' 225 | PrivateRouteTable3: !If 226 | - PrivateSubnet3_IsSet 227 | - !GetAtt 'PrivateSubnet.Outputs.PrivateRouteTable3' 228 | - !Ref 'AWS::NoValue' 229 | PrivateRouteTable4: !If 230 | - PrivateSubnet4_IsSet 231 | - !GetAtt 'PrivateSubnet.Outputs.PrivateRouteTable4' 232 | - !Ref 'AWS::NoValue' 233 | 234 | Outputs: 235 | VPCName: 236 | Description: VPC Name 237 | Value: !Ref 'VPCName' 238 | Export: 239 | Name: !Sub "${AWS::StackName}-VPCName" 240 | 241 | VpcId: 242 | Description: The ID of the VPC that was created. 243 | Value: !Ref 'VPC' 244 | Export: 245 | Name: !Sub "${AWS::StackName}-VpcId" 246 | 247 | Ipv6CidrBlocks: 248 | Condition: EnableIpv6_IsSet 249 | Description: Amazon assigned IPv6 CIDR blocks 250 | Value: !Join 251 | - ',' 252 | - !GetAtt 'VPC.Ipv6CidrBlocks' 253 | Export: 254 | Name: !Sub '${VPCName}-ipv6cidrblocks' 255 | 256 | PublicSubnet1Cidr: 257 | Description: The CIDR notated address of PublicSubnet1 258 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet1Cidr' 259 | Export: 260 | Name: !Sub "${AWS::StackName}-PublicSubnet1Cidr" 261 | 262 | PublicSubnet1Id: 263 | Description: The subnet ID of PublicSubnet1 264 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet1Id' 265 | Export: 266 | Name: !Sub "${AWS::StackName}-PublicSubnet1Id" 267 | 268 | PublicSubnet1AZ: 269 | Description: The availability zone of PublicSubnet1 270 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet1AZ' 271 | Export: 272 | Name: !Sub "${AWS::StackName}-PublicSubnet1AZ" 273 | 274 | PublicSubnet2Cidr: 275 | Description: The CIDR notated address of PublicSubnet2 276 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet2Cidr' 277 | Export: 278 | Name: !Sub "${AWS::StackName}-PublicSubnet2Cidr" 279 | 280 | PublicSubnet2Id: 281 | Description: The subnet ID of PublicSubnet2 282 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet2Id' 283 | Export: 284 | Name: !Sub "${AWS::StackName}-PublicSubnet2Id" 285 | 286 | PublicSubnet2AZ: 287 | Description: The availability zone of PublicSubnet2 288 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet2AZ' 289 | Export: 290 | Name: !Sub "${AWS::StackName}-PublicSubnet2AZ" 291 | 292 | PublicSubnet3Cidr: 293 | Description: The CIDR notated address of PublicSubnet3 294 | Condition: PublicSubnet3_IsSet 295 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet3Cidr' 296 | Export: 297 | Name: !Sub "${AWS::StackName}-PublicSubnet3Cidr" 298 | 299 | PublicSubnet3Id: 300 | Description: The subnet ID of PublicSubnet3 301 | Condition: PublicSubnet3_IsSet 302 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet3Id' 303 | Export: 304 | Name: !Sub "${AWS::StackName}-PublicSubnet3Id" 305 | 306 | PublicSubnet3AZ: 307 | Description: The availability zone of PublicSubnet3 308 | Condition: PublicSubnet3_IsSet 309 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet3AZ' 310 | Export: 311 | Name: !Sub "${AWS::StackName}-PublicSubnet3AZ" 312 | 313 | PublicSubnet4Cidr: 314 | Description: The CIDR notated address of PublicSubnet4 315 | Condition: PublicSubnet4_IsSet 316 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet4Cidr' 317 | Export: 318 | Name: !Sub "${AWS::StackName}-PublicSubnet4Cidr" 319 | 320 | PublicSubnet4Id: 321 | Description: The subnet ID of PublicSubnet4 322 | Condition: PublicSubnet4_IsSet 323 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet4Id' 324 | Export: 325 | Name: !Sub "${AWS::StackName}-PublicSubnet4Id" 326 | 327 | PublicSubnet4AZ: 328 | Description: The availability zone of PublicSubnet4 329 | Condition: PublicSubnet4_IsSet 330 | Value: !GetAtt 'PublicSubnet.Outputs.PublicSubnet4AZ' 331 | Export: 332 | Name: !Sub "${AWS::StackName}-PublicSubnet4AZ" 333 | 334 | PrivateSubnet1Cidr: 335 | Description: The CIDR notated address of PrivateSubnet1 336 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet1Cidr' 337 | Export: 338 | Name: !Sub "${AWS::StackName}-PrivateSubnet1Cidr" 339 | 340 | PrivateSubnet1Id: 341 | Description: The subnet ID of PrivateSubnet1 342 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet1Id' 343 | Export: 344 | Name: !Sub "${AWS::StackName}-PrivateSubnet1Id" 345 | 346 | PrivateSubnet1AZ: 347 | Description: The availability zone of PrivateSubnet1 348 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet1AZ' 349 | Export: 350 | Name: !Sub "${AWS::StackName}-PrivateSubnet1AZ" 351 | 352 | PrivateSubnet2Cidr: 353 | Description: The CIDR notated address of PrivateSubnet2 354 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet2Cidr' 355 | Export: 356 | Name: !Sub "${AWS::StackName}-PrivateSubnet2Cidr" 357 | 358 | PrivateSubnet2Id: 359 | Description: The subnet ID of PrivateSubnet2 360 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet2Id' 361 | Export: 362 | Name: !Sub "${AWS::StackName}-PrivateSubnet2Id" 363 | 364 | PrivateSubnet2AZ: 365 | Description: The availability zone of PrivateSubnet2 366 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet2AZ' 367 | Export: 368 | Name: !Sub "${AWS::StackName}-PrivateSubnet2AZ" 369 | 370 | PrivateSubnet3Cidr: 371 | Description: The CIDR notated address of PrivateSubnet3 372 | Condition: PrivateSubnet3_IsSet 373 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet3Cidr' 374 | Export: 375 | Name: !Sub "${AWS::StackName}-PrivateSubnet3Cidr" 376 | 377 | PrivateSubnet3Id: 378 | Description: The subnet ID of PrivateSubnet3 379 | Condition: PrivateSubnet3_IsSet 380 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet3Id' 381 | Export: 382 | Name: !Sub "${AWS::StackName}-PrivateSubnet3Id" 383 | 384 | PrivateSubnet3AZ: 385 | Description: The availability zone of PrivateSubnet3 386 | Condition: PrivateSubnet3_IsSet 387 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet3AZ' 388 | Export: 389 | Name: !Sub "${AWS::StackName}-PrivateSubnet3AZ" 390 | 391 | PrivateSubnet4Cidr: 392 | Description: The CIDR notated address of PrivateSubnet4 393 | Condition: PrivateSubnet4_IsSet 394 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet4Cidr' 395 | Export: 396 | Name: !Sub "${AWS::StackName}-PrivateSubnet4Cidr" 397 | 398 | PrivateSubnet4Id: 399 | Description: The subnet ID of PrivateSubnet4 400 | Condition: PrivateSubnet4_IsSet 401 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet4Id' 402 | Export: 403 | Name: !Sub "${AWS::StackName}-PrivateSubnet4Id" 404 | 405 | PrivateSubnet4AZ: 406 | Description: The availability zone of PrivateSubnet4 407 | Condition: PrivateSubnet4_IsSet 408 | Value: !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet4AZ' 409 | Export: 410 | Name: !Sub "${AWS::StackName}-PrivateSubnet4AZ" 411 | 412 | PublicSubnetIds: 413 | Description: Comma delimited list of public subnet IDs 414 | Value: !Join 415 | - ',' 416 | - - !GetAtt 'PublicSubnet.Outputs.PublicSubnet1Id' 417 | - !GetAtt 'PublicSubnet.Outputs.PublicSubnet2Id' 418 | - !If [PublicSubnet3_IsSet, !GetAtt 'PublicSubnet.Outputs.PublicSubnet3Id', !Ref 'AWS::NoValue'] 419 | - !If [PublicSubnet4_IsSet, !GetAtt 'PublicSubnet.Outputs.PublicSubnet4Id', !Ref 'AWS::NoValue'] 420 | Export: 421 | Name: !Sub "${AWS::StackName}-PublicSubnetIds" 422 | 423 | PrivateSubnetIds: 424 | Description: Comma delimited list of private subnet IDs 425 | Value: !Join 426 | - ',' 427 | - - !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet1Id' 428 | - !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet2Id' 429 | - !If [PrivateSubnet3_IsSet, !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet3Id', !Ref 'AWS::NoValue'] 430 | - !If [PrivateSubnet4_IsSet, !GetAtt 'PrivateSubnet.Outputs.PrivateSubnet4Id', !Ref 'AWS::NoValue'] 431 | Export: 432 | Name: !Sub "${AWS::StackName}-PrivateSubnetIds" 433 | -------------------------------------------------------------------------------- /cloudformation/vpc/vpc_parameters.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "ParameterKey": "VPCName", 4 | "ParameterValue": "PocketProxy" 5 | }, 6 | { 7 | "ParameterKey": "VPCSubnetCidr", 8 | "ParameterValue": "172.30.0.0/16" 9 | }, 10 | { 11 | "ParameterKey": "PublicSubnet1", 12 | "ParameterValue": "172.30.0.0/20" 13 | }, 14 | { 15 | "ParameterKey": "PublicSubnet2", 16 | "ParameterValue": "172.30.16.0/20" 17 | }, 18 | { 19 | "ParameterKey": "PublicSubnet3", 20 | "ParameterValue": "172.30.32.0/20" 21 | }, 22 | { 23 | "ParameterKey": "PublicSubnet4", 24 | "ParameterValue": "172.30.48.0/20" 25 | }, 26 | { 27 | "ParameterKey": "PrivateSubnet1", 28 | "ParameterValue": "172.30.128.0/20" 29 | }, 30 | { 31 | "ParameterKey": "PrivateSubnet2", 32 | "ParameterValue": "172.30.144.0/20" 33 | }, 34 | { 35 | "ParameterKey": "PrivateSubnet3", 36 | "ParameterValue": "172.30.160.0/20" 37 | }, 38 | { 39 | "ParameterKey": "PrivateSubnet4", 40 | "ParameterValue": "172.30.176.0/20" 41 | } 42 | ] -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | # This file must exist for pytest to work properly. 2 | # https://stackoverflow.com/a/50610630/732649 -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | 3 | services: 4 | nginx: 5 | build: images/nginx 6 | container_name: ng01 7 | ports: 8 | - "80:80" 9 | 10 | depends_on: 11 | - app 12 | 13 | app: 14 | build: 15 | dockerfile: images/app/Dockerfile 16 | context: . 17 | args: 18 | UID: ${UID:-10001} 19 | GID: ${GID:-10001} 20 | 21 | command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] 22 | environment: 23 | - APP_ENV=development 24 | - GEOIP_S3_BUCKET=pocket-geoip 25 | - ADZERK_API_KEY=${ADZERK_API_KEY} 26 | - ADZERK_NETWORK_ID=10250 27 | - SENTRY_DSN=${SENTRY_DSN} 28 | 29 | ports: 30 | - 8000:8000 31 | 32 | volumes: 33 | - .:/app:ro 34 | - .:/opt/project/src:ro 35 | 36 | depends_on: 37 | - s3 38 | 39 | s3: 40 | image: pocket/fake-s3:20190604 41 | ports: 42 | - "4569:4569" 43 | 44 | volumes: 45 | - "fake_s3_vol:/fakes3/data" 46 | 47 | volumes: 48 | fake_s3_vol: {} 49 | -------------------------------------------------------------------------------- /gunicorn.py: -------------------------------------------------------------------------------- 1 | from app.config import Settings 2 | 3 | settings = Settings() 4 | 5 | bind = settings.gunicorn_bind 6 | worker_class = settings.gunicorn_worker_class 7 | workers = settings.gunicorn_workers 8 | -------------------------------------------------------------------------------- /images/app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.6 as base 2 | 3 | ENV PIPENV_VENV_IN_PROJECT=1 4 | 5 | RUN python -m pip install pipenv 6 | 7 | COPY Pipfile Pipfile.lock ./ 8 | 9 | RUN pipenv install --deploy 10 | 11 | FROM python:3.11.6-slim 12 | 13 | ARG UID=${UID:-10001} 14 | ARG GID=${GID:-10001} 15 | 16 | RUN groupadd -g $GID app; \ 17 | useradd -g $GID -u $UID -m -s /bin/bash app; \ 18 | mkdir /app; \ 19 | chown app:app /app 20 | 21 | COPY --from=base /.venv /.venv 22 | 23 | WORKDIR /app 24 | 25 | COPY --chown=app:app . . 26 | 27 | USER app 28 | 29 | ENV PATH=/.venv/bin:${PATH} 30 | 31 | CMD ["gunicorn", "--config", "gunicorn.py", "app.main:app"] 32 | -------------------------------------------------------------------------------- /images/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:1.21 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y curl 5 | 6 | RUN rm /etc/nginx/conf.d/default.conf 7 | 8 | COPY nginx.conf /etc/nginx/nginx.conf 9 | -------------------------------------------------------------------------------- /images/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | 3 | user nobody nogroup; 4 | # 'user nobody nobody;' for systems with 'nobody' as a group instead 5 | error_log /var/log/nginx/error.log warn; 6 | pid /var/run/nginx.pid; 7 | 8 | events { 9 | use epoll; 10 | worker_connections 2048; 11 | accept_mutex off; # set to 'on' if nginx worker_processes > 1 12 | } 13 | 14 | http { 15 | include mime.types; 16 | # fallback in case we can't determine a type 17 | default_type application/octet-stream; 18 | 19 | log_format custom '[$time_iso8601] "$request" $status $body_bytes_sent "$proxy_add_x_forwarded_for" "$http_user_agent"'; 20 | 21 | upstream app { 22 | # fail_timeout=0 means we always retry an upstream even if it failed 23 | # to return a good HTTP response 24 | 25 | # for UNIX domain socket setups 26 | #server unix:/tmp/gunicorn.sock fail_timeout=0; 27 | 28 | # for a TCP configuration 29 | server 127.0.0.1:8000 fail_timeout=0; 30 | } 31 | 32 | server { 33 | listen 80 default_server; 34 | 35 | ## Set HSTS header 36 | add_header Strict-Transport-Security "max-age=63072000; preload"; 37 | 38 | ## Size Limits & Buffer Overflows 39 | client_body_buffer_size 1K; 40 | client_header_buffer_size 1k; 41 | client_max_body_size 1k; 42 | large_client_header_buffers 2 1k; 43 | 44 | # set the correct host(s) for your site 45 | #server_name 127.0.0.1; 46 | # prevent version information from leaking 47 | server_tokens off; 48 | 49 | access_log /var/log/nginx/access.log custom; 50 | 51 | keepalive_timeout 5; 52 | 53 | location / { 54 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 55 | proxy_set_header X-Forwarded-Proto $scheme; 56 | proxy_set_header Host $http_host; 57 | # we don't want nginx trying to do something clever with 58 | # redirects, we set the Host: header above already. 59 | proxy_redirect off; 60 | proxy_pass http://app; 61 | } 62 | 63 | gzip on; 64 | gzip_types application/json; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /images/s3/download.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | SOURCE_BUCKET="pocket-geoip-dev" 5 | FAKE_ENDPOINT="http://localhost:4569" 6 | FAKE_BUCKET="pocket-geoip" 7 | AWS_PROFILE="default" 8 | 9 | Print () { 10 | GREEN=`tput setaf 2` 11 | NC=`tput sgr0` # No Color 12 | printf "${GREEN}${1}${NC}\n" 13 | } 14 | 15 | Warn () { 16 | YELLOW=`tput setaf 3` 17 | NC=`tput sgr0` # No Color 18 | printf "${YELLOW}${1}${NC}\n" 19 | } 20 | 21 | # Code in the finish function always runs. 22 | function finish { 23 | if [ -d "$TMPDIR" ]; then 24 | Print "Removing $TMPDIR" 25 | rm -rf $TMPDIR 26 | fi 27 | } 28 | trap finish EXIT 29 | 30 | # Parse arguments 31 | while getopts ":p:b:" opt; do 32 | case $opt in 33 | b) SOURCE_BUCKET=$OPTARG 34 | ;; 35 | \?) Warn "Invalid option -$OPTARG" 36 | ;; 37 | esac 38 | done 39 | 40 | TMPDIR=$(mktemp -d) 41 | Print "Downloading $SOURCE_BUCKET to $TMPDIR" 42 | aws s3 cp s3://$SOURCE_BUCKET $TMPDIR --recursive 43 | 44 | Print "Creating fake_s3 aws profile" 45 | aws configure set aws_access_key_id 'foobar' --profile 'fake_s3' 46 | aws configure set aws_secret_access_key 'foobar' --profile 'fake_s3' 47 | 48 | Print "Copying $TMPDIR to fake s3" 49 | aws --profile 'fake_s3' --endpoint-url $FAKE_ENDPOINT s3 cp $TMPDIR s3://$FAKE_BUCKET --recursive 50 | -------------------------------------------------------------------------------- /openapi/openapi.yml: -------------------------------------------------------------------------------- 1 | openapi: 3.0.0 2 | security: [] 3 | servers: 4 | - url: https://spocs.getpocket.com 5 | info: 6 | description: "APIs to interact with sponsored content (\"spocs\") for Firefox, while preserving privacy." 7 | version: '2.1' 8 | title: Firefox Spocs 9 | paths: 10 | /spocs: 11 | post: 12 | summary: Get sponsored content 13 | description: Get a list of spocs based on region and pocket_id from AdZerk. The IP address is used to deduce a rough geographic region, for example "Texas" in the U.S. or "England" in the U.K. The IP is not stored or shared with AdZerk to preserve privacy. 14 | parameters: 15 | - in: query 16 | name: site 17 | schema: 18 | type: integer 19 | format: int32 20 | minimum: 1 21 | maximum: 2147483647 22 | required: false 23 | description: override siteId in ad decision requests 24 | example: 2500 25 | - in: query 26 | name: region 27 | schema: 28 | type: string 29 | required: false 30 | description: override region in keywords of ad decision requests for testing 31 | - in: query 32 | name: country 33 | schema: 34 | type: string 35 | required: false 36 | description: override country in keywords of ad decision requests for testing 37 | requestBody: 38 | required: true 39 | content: 40 | application/json: 41 | schema: 42 | $ref: '#/components/schemas/SpocRequest' 43 | examples: 44 | version_one: 45 | summary: Request from client that does not support collections, FireFox version <= 74 46 | value: 47 | version: 1 48 | consumer_key: "40249-e88c401e1b1f2242d9e441c4" 49 | pocket_id: "{12345678-8901-2345-aaaa-bbbbbbcccccc}" 50 | version_one_collection_req: 51 | summary: Request for collection placements with version=1 52 | value: 53 | version: 1 54 | consumer_key: "40249-e88c401e1b1f2242d9e441c4" 55 | pocket_id: "{12345678-8901-2345-aaaa-bbbbbbcccccc}" 56 | placements: 57 | - name: "collections-div" 58 | ad_types: 59 | - 1234 60 | zone_ids: 61 | - 5000 62 | count: 10 63 | version_two_collection_req: 64 | summary: Request for collection placements with version=2 65 | value: 66 | version: 2 67 | consumer_key: "40249-e88c401e1b1f2242d9e441c4" 68 | pocket_id: "{12345678-8901-2345-aaaa-bbbbbbcccccc}" 69 | placements: 70 | - name: "collections-div" 71 | ad_types: 72 | - 1234 73 | zone_ids: 74 | - 5000 75 | count: 10 76 | responses: 77 | '200': 78 | description: Responds with settings and a list of spocs. 79 | content: 80 | application/json: 81 | schema: 82 | type: object 83 | additionalProperties: 84 | $ref: "#/components/schemas/SpocFeed" 85 | properties: 86 | settings: 87 | $ref: '#/components/schemas/Settings' 88 | __debug__: 89 | description: Informational object returned in non-prod environments 90 | type: object 91 | additionalProperties: true 92 | /user: 93 | delete: 94 | summary: Delete a user's personal data from AdZerk 95 | description: Used when a user opts-out of sponsored content to delete the user's data from AdZerk. 96 | requestBody: 97 | required: true 98 | content: 99 | application/json: 100 | schema: 101 | type: object 102 | required: 103 | - pocket_id 104 | properties: 105 | pocket_id: 106 | description: ID that uniquely identifies a session. 107 | example: "{12345678-8901-2345-aaaa-bbbbbbcccccc}" 108 | type: string 109 | responses: 110 | '200': 111 | description: Successfully deleted user data from AdZerk. 112 | 113 | components: 114 | schemas: 115 | SpocRequest: 116 | type: object 117 | required: 118 | - version 119 | - consumer_key 120 | - pocket_id 121 | - placements 122 | additionalProperties: false 123 | properties: 124 | version: 125 | type: integer 126 | description: API version 127 | format: int32 128 | minimum: 1 129 | maximum: 2 130 | example: 2 131 | consumer_key: 132 | type: string 133 | description: Identifies that the request is coming from Firefox. 134 | example: 40249-e88c401e1b1f2242d9e441c4 135 | pocket_id: 136 | type: string 137 | description: ID that uniquely identifies a session. 138 | example: "{12345678-8901-2345-aaaa-bbbbbbcccccc}" 139 | pattern: "\\A{[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}}\\Z" 140 | placements: 141 | type: array 142 | items: 143 | $ref: "#/components/schemas/Placement" 144 | site: 145 | type: integer 146 | format: int32 147 | minimum: 1 148 | maximum: 2147483647 149 | description: override siteId in ad decision requests 150 | example: 2500 151 | country: 152 | type: string 153 | description: override country in keywords of ad decision requests for testing 154 | region: 155 | type: string 156 | description: override region in keywords of ad decision requests for testing 157 | 158 | Placement: 159 | type: object 160 | description: https://dev.adzerk.com/reference/request 161 | required: 162 | - name 163 | additionalProperties: false 164 | properties: 165 | name: 166 | type: string 167 | example: spocs 168 | description: Corresponds to the key in the response object. 169 | ad_types: 170 | type: array 171 | description: https://dev.adzerk.com/docs/ad-sizes 172 | items: 173 | type: integer 174 | format: int32 175 | example: 1234 176 | minimum: 1 177 | maximum: 2147483647 178 | zone_ids: 179 | type: array 180 | description: https://dev.adzerk.com/docs/zones-overview 181 | items: 182 | type: integer 183 | format: int32 184 | example: 123456 185 | minimum: 1 186 | maximum: 2147483647 187 | count: 188 | type: integer 189 | example: 20 190 | minimum: 1 191 | maximum: 20 192 | description: number of spocs to return for this placement 193 | 194 | #### Settings #### 195 | 196 | Settings: 197 | type: object 198 | additionalProperties: false 199 | required: 200 | - feature_flags 201 | - spocsPerNewTabs 202 | - domainAffinityParameterSets 203 | - timeSegments 204 | properties: 205 | spocsPerNewTabs: 206 | type: integer 207 | minimum: 1 208 | example: 1 209 | domainAffinityParameterSets: 210 | type: object 211 | additionalProperties: 212 | $ref: '#/components/schemas/DomainAffinityParameterSet' 213 | timeSegments: 214 | type: array 215 | items: 216 | $ref: '#/components/schemas/TimeSegment' 217 | feature_flags: 218 | type: object 219 | $ref: '#/components/schemas/FeatureFlags' 220 | 221 | FeatureFlags: 222 | type: object 223 | additionalProperties: false 224 | required: 225 | - spoc_v2 226 | - collections 227 | properties: 228 | spoc_v2: 229 | type: boolean 230 | collections: 231 | type: boolean 232 | 233 | DomainAffinityParameterSet: 234 | type: object 235 | additionalProperties: false 236 | required: 237 | - recencyFactor 238 | - frequencyFactor 239 | - combinedDomainFactor 240 | - perfectCombinedDomainScore 241 | - multiDomainBoost 242 | - itemScoreFactor 243 | properties: 244 | recencyFactor: 245 | type: number 246 | frequencyFactor: 247 | type: number 248 | combinedDomainFactor: 249 | type: number 250 | perfectFrequencyVisits: 251 | type: number 252 | perfectCombinedDomainScore: 253 | type: number 254 | multiDomainBoost: 255 | type: number 256 | itemScoreFactor: 257 | type: number 258 | 259 | TimeSegment: 260 | type: object 261 | additionalProperties: false 262 | required: 263 | - id 264 | - startTime 265 | - endTime 266 | - weightPosition 267 | properties: 268 | id: 269 | type: string 270 | startTime: 271 | type: integer 272 | endTime: 273 | type: integer 274 | weightPosition: 275 | example: 1 276 | 277 | #### Spocs #### 278 | 279 | SpocFeed: 280 | oneOf: 281 | - type: array 282 | items: 283 | $ref: "#/components/schemas/SpocFeedItem" 284 | - type: object 285 | additionalProperties: false 286 | required: 287 | - title 288 | - flight_id 289 | properties: 290 | title: 291 | type: string 292 | example: "Best of the Web" 293 | flight_id: 294 | type: integer 295 | example: 4321 296 | sponsor: 297 | type: string 298 | example: NextAdvisor 299 | context: 300 | type: string 301 | example: "Sponsored by NextAdvisor" 302 | items: 303 | type: array 304 | items: 305 | $ref: "#/components/schemas/SpocFeedItem" 306 | 307 | Shim: 308 | type: object 309 | additionalProperties: false 310 | properties: 311 | click: 312 | type: string 313 | example: "1234123asdf4tYadsfQ,xY-01BU12" 314 | impression: 315 | type: string 316 | example: "a0c3943asdf4tYadsf300,xY-01BU9aadc" 317 | delete: 318 | type: string 319 | example: "fdea123asdf4tYadsf1000,xY-01BUa654" 320 | save: 321 | type: string 322 | example: "4567123asdf4tYadsfQcda,xY-01BU123" 323 | 324 | Caps: 325 | type: object 326 | additionalProperties: false 327 | required: 328 | - lifetime 329 | - flight 330 | - campaign 331 | properties: 332 | lifetime: 333 | type: integer 334 | example: 50 335 | flight: 336 | type: object 337 | additionalProperties: false 338 | required: 339 | - count 340 | - period 341 | properties: 342 | count: 343 | type: integer 344 | example: 10 345 | period: 346 | type: integer 347 | description: Period in seconds 348 | example: 86400 349 | campaign: 350 | type: object 351 | additionalProperties: false 352 | required: 353 | - count 354 | - period 355 | properties: 356 | count: 357 | type: integer 358 | example: 10 359 | period: 360 | type: integer 361 | description: Period in seconds 362 | example: 86400 363 | 364 | SpocFeedItem: 365 | type: object 366 | additionalProperties: false 367 | properties: 368 | campaign_id: 369 | type: integer 370 | example: 784 371 | caps: 372 | type: object 373 | $ref: '#/components/schemas/Caps' 374 | collection_title: 375 | type: string 376 | description: Shared title if all ads are one collection 377 | context: 378 | type: string 379 | description: Deprecated. Use sponsor field instead. 380 | example: Sponsored by NextAdvisor 381 | cta: 382 | type: string 383 | description: Text to display on CTA button 384 | example: Learn more 385 | domain: 386 | type: string 387 | example: arstechnica.com 388 | domain_affinities: 389 | type: object 390 | additionalProperties: 391 | type: number 392 | example: 393 | vanguard.com: 0.9956 394 | wealthsimple.com: 0.9193 395 | excerpt: 396 | type: string 397 | example: Driving excerpt 398 | flight_id: 399 | type: integer 400 | example: 432 401 | id: 402 | type: integer 403 | example: 30295 404 | image_src: 405 | type: string 406 | example: https://img-getpocket.cdn.mozilla.net/ad.gif 407 | is_video: 408 | type: boolean 409 | item_score: 410 | type: number 411 | format: float 412 | example: 0.2 413 | min_score: 414 | type: number 415 | format: float 416 | example: 0.1 417 | parameter_set: 418 | type: string 419 | example: default 420 | personalization_models: 421 | type: object 422 | additionalProperties: true 423 | priority: 424 | type: integer 425 | description: The priority order. 1-100, 1 is highest priority. 426 | minimum: 1 427 | maximum: 100 428 | raw_image_src: 429 | type: string 430 | example: https://kevel/ad.gif 431 | shim: 432 | type: object 433 | $ref: '#/components/schemas/Shim' 434 | sponsor: 435 | type: string 436 | example: NextAdvisor 437 | sponsored_by_override: 438 | type: string 439 | example: NextAdvisor 440 | title: 441 | type: string 442 | example: Why driving is hard—even for AIs 443 | url: 444 | type: string 445 | example: http://url 446 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Tests 2 | 3 | ## Unit Testing 4 | 5 | Run all test cases: 6 | ```bash 7 | python -m unittest discover tests/ 8 | ``` 9 | Or run a specific test case: 10 | ```bash 11 | python -m unittest tests.test_adzerk_transform.test_to_spoc 12 | ``` 13 | 14 | ## Load Testing 15 | 16 | [Serverless Artillery](https://github.com/Nordstrom/serverless-artillery) is used for load testing this service. 17 | 18 | ### Installation 19 | 1. `cd tests/load` 20 | 2. Follow the [Serverless Artillery installation instructions](https://github.com/Nordstrom/serverless-artillery#installation). 21 | 1. `npm install serverless` 22 | 2. `npm install serverless-artillery` 23 | 3. Check that the installation succeeded: `slsart --version` 24 | 3. `npm install artillery-plugin-cloudwatch` 25 | 26 | ### Run test 27 | 1. `cd tests/load` 28 | 2. `slsart invoke --stage dev #for dev` 29 | 30 | ## Local speed test 31 | Locally run 500 requests in parallel. This is useful to: 32 | 1. Verify whether the code is running asynchronously. 33 | 2. Do a performance test in a matter of seconds. Running a load test is still required to get an accurate result. 34 | ```shell script 35 | gunicorn -c ./tests/scripts/wsgi_profiler_conf.py "app.main:create_app()" 36 | ``` 37 | 38 | ## Profiling 39 | Measure which lines take up the most CPU time. This is useful to identify if any part of the code is taking much more time than it should. 40 | ```shell script 41 | gunicorn -c ./tests/scripts/wsgi_profiler_conf.py "app.main:create_app()" 42 | ``` 43 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/tests/__init__.py -------------------------------------------------------------------------------- /tests/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/tests/api/__init__.py -------------------------------------------------------------------------------- /tests/api/test_api.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import responses 4 | import schemathesis 5 | from aioresponses import aioresponses 6 | 7 | from tests.fixtures.mock_factory import get_mocked_geolocation_factory 8 | from tests.fixtures.mock_decision import mock_decision_2, mock_collection_response 9 | 10 | __FACTORY = get_mocked_geolocation_factory() 11 | with patch("app.geolocation.factory.Factory.get_instance", return_value=__FACTORY): 12 | from app.main import app 13 | 14 | schema = schemathesis.from_path("openapi/openapi.yml", app=app) 15 | 16 | 17 | MOCK_DECISIONS_RESPONSE = { 18 | "user": { 19 | "key": "{12345678-8901-2345-aaaa-bbbbbbcccccc}", 20 | }, 21 | "decisions": { 22 | "spocs": [mock_decision_2] 23 | } 24 | } 25 | 26 | MOCK_DECISIONS_COLLECTION_RESPONSE = { 27 | "user": { 28 | "key": "{12345678-8901-2345-aaaa-bbbbbbcccccc}", 29 | }, 30 | "decisions": { 31 | "spocs": [mock_collection_response] 32 | } 33 | } 34 | 35 | 36 | @responses.activate 37 | @schema.parametrize(endpoint="/user") 38 | def test_delete_api(case: schemathesis.Case) -> None: 39 | # Mock call to forget API 40 | responses.delete("https://e-10250.adzerk.net/udb/10250/") 41 | 42 | # Call Pocket Proxy and validate response 43 | response = case.call_asgi() 44 | case.validate_response(response) 45 | 46 | 47 | @responses.activate 48 | @schema.parametrize(endpoint="/spocs") 49 | def test_spocs_api(case: schemathesis.Case) -> None: 50 | with aioresponses() as m: 51 | # Mock call to decisions API 52 | m.post("https://e-10250.adzerk.net/api/v2", payload=MOCK_DECISIONS_RESPONSE) 53 | 54 | # Call Pocket Proxy and validate response 55 | response = case.call_asgi() 56 | case.validate_response(response) 57 | 58 | 59 | def version_two(context, body): 60 | return body.get("version") == 2 61 | 62 | 63 | # clients pass in version=2 if they support collections 64 | @responses.activate 65 | @schema.hooks.apply(version_two, name="filter_body") 66 | @schema.parametrize(endpoint="/spocs") 67 | def test_spocs_collection_api(case: schemathesis.Case) -> None: 68 | with aioresponses() as m: 69 | # Mock call to decisions API 70 | m.post("https://e-10250.adzerk.net/api/v2", payload=MOCK_DECISIONS_COLLECTION_RESPONSE) 71 | 72 | # Call Pocket Proxy and validate response 73 | response = case.call_asgi() 74 | case.validate_response(response) -------------------------------------------------------------------------------- /tests/fixtures/GeoIP2-City-Test.mmdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/tests/fixtures/GeoIP2-City-Test.mmdb -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/tests/fixtures/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/mock_decision.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | mock_response = { 4 | "adId": 111, 5 | "creativeId": 222, 6 | "flightId": 333, 7 | "campaignId": 1000, 8 | "priorityId": 555, 9 | "clickUrl": "http://e-123.adzerk.net/r?e=12345&s=12345", 10 | "contents": [ 11 | { 12 | "type": "html", 13 | "template": "image", 14 | "data": { 15 | "imageUrl": "http://static.adzerk.net/cat.jpg", 16 | "title": "ZOMG A CAT", 17 | "width": 350, 18 | "height": 350, 19 | "ctTitle": "title 1000", 20 | "ctUrl": "url", 21 | "ctDomain": "ctDOmain", 22 | "ctExcerpt": "excerpt", 23 | "ctSponsor": "sponsor", 24 | "ctFullimagepath": "blah", 25 | "ctMin_score": 0.01, 26 | "ctItem_score": 0.01, 27 | "ctDomain_affinities": "travel" 28 | } 29 | } 30 | ], 31 | "impressionUrl": "http://e-123.adzerk.net/i.gif?e=12345&s=12345", 32 | "events": [ 33 | { 34 | "id": 12, 35 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 36 | }, 37 | { 38 | "id": 13, 39 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 40 | }, 41 | { 42 | "id": 20, 43 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 44 | }, 45 | { 46 | "id": 17, 47 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 48 | } 49 | ], 50 | "pricing": { 51 | "price": 5, 52 | "clearPrice": 2.01, 53 | "revenue": 0.002, 54 | "rateType": 2, 55 | "eCPM": 5 56 | } 57 | } 58 | 59 | mock_response_900 = { 60 | "adId": 111, 61 | "creativeId": 222, 62 | "flightId": 333, 63 | "campaignId": 900, 64 | "priorityId": 555, 65 | "clickUrl": "http://e-123.adzerk.net/r?e=12345&s=12345", 66 | "contents": [ 67 | { 68 | "type": "html", 69 | "template": "image", 70 | "data": { 71 | "imageUrl": "http://static.adzerk.net/cat.jpg", 72 | "title": "ZOMG A CAT", 73 | "width": 350, 74 | "height": 350, 75 | "ctTitle": "title 900", 76 | "ctUrl": "url", 77 | "ctDomain": "ctDOmain", 78 | "ctExcerpt": "excerpt", 79 | "ctSponsor": "sponsor", 80 | "ctFullimagepath": "blah", 81 | "ctMin_score": 0.01, 82 | "ctItem_score": 0.01, 83 | "ctDomain_affinities": "travel" 84 | } 85 | } 86 | ], 87 | "impressionUrl": "http://e-123.adzerk.net/i.gif?e=12345&s=12345", 88 | "events": [ 89 | { 90 | "id": 12, 91 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 92 | }, 93 | { 94 | "id": 13, 95 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 96 | }, 97 | { 98 | "id": 20, 99 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 100 | }, 101 | { 102 | "id": 17, 103 | "url": "http://e-123.adzerk.net/e.gif?e=12345&s=12345" 104 | } 105 | ], 106 | "pricing": { 107 | "price": 5, 108 | "clearPrice": 2.01, 109 | "revenue": 0.002, 110 | "rateType": 2, 111 | "eCPM": 5 112 | } 113 | } 114 | 115 | mock_decision_2 = { 116 | "adId": 2, 117 | "creativeId": 9142593, 118 | "flightId": 8525375, 119 | "campaignId": 887195, 120 | "priorityId": 147517, 121 | "clickUrl": "https://e-10250.adzerk.net/r?e=jq&s=s2", 122 | "impressionUrl": "https://e-10250.adzerk.net/i.gif?e=ke1&s=s3", 123 | "contents": [ 124 | { 125 | "type": "raw", 126 | "data": { 127 | "ctUrl": "https://example.com/?key=foobar", 128 | "ctDomain_affinities": "publishers", 129 | "ctDomain": "wallmarket.com", 130 | "ctTitle": "Refresh Your Space for Spring", 131 | "ctExcerpt": "Get up to 50% off furniture, bedding, and more.", 132 | "ctFullimagepath": "https://cdn.net/25a.jpg", 133 | "ctSponsor": "WallMarket", 134 | "ctIsVideo": "", 135 | "ctImage": "25a.jpg", 136 | "fileName": "25a.jpg" 137 | } 138 | } 139 | ], 140 | "events": [ 141 | { 142 | "id": 17, 143 | "url": "https://e-10250.adzerk.net/e.gif?e=eyJ2&s=Y6" 144 | }, 145 | { 146 | "id": 20, 147 | "url": "https://e-10250.adzerk.net/e.gif?e=wfQ&s=Rj-6" 148 | } 149 | ] 150 | } 151 | 152 | mock_decision_3_cta = deepcopy(mock_decision_2) 153 | mock_decision_3_cta['adId'] = 3 154 | mock_decision_3_cta['contents'][0]['data']['ctCta'] = "Learn more" 155 | 156 | mock_collection_response = deepcopy(mock_response_900) 157 | mock_collection_response['adId'] = 4 158 | mock_collection_response['contents'][0]['data']['ctCollectionTitle'] = "Best of the Web" 159 | 160 | mock_decision_5_topics = deepcopy(mock_decision_2) 161 | mock_decision_5_topics['adId'] = 5 162 | mock_decision_5_topics['contents'][0]['body'] = "{\"topic_arts_and_entertainment\":\"\",\"topic_autos_and_vehicles\":\"true\",\"topic_beauty_and_fitness\":\"true\"}" 163 | 164 | mock_decision_6_no_sponsor = deepcopy(mock_decision_2) 165 | mock_decision_6_no_sponsor['adId'] = 6 166 | del mock_decision_6_no_sponsor['contents'][0]['data']['ctSponsor'] 167 | 168 | mock_decision_7_is_video = deepcopy(mock_decision_2) 169 | mock_decision_7_is_video['adId'] = 7 170 | mock_decision_7_is_video['contents'][0]['data']['ctIsVideo'] = " Yes " 171 | 172 | mock_decision_8_blank_sponsored_by_override = deepcopy(mock_decision_2) 173 | mock_decision_8_blank_sponsored_by_override['adId'] = 8 174 | mock_decision_8_blank_sponsored_by_override['contents'][0]['data']['ctSponsoredByOverride'] = "BLANK " 175 | 176 | mock_decision_9_sponsored_by_override = deepcopy(mock_decision_2) 177 | mock_decision_9_sponsored_by_override['adId'] = 9 178 | mock_decision_9_sponsored_by_override['contents'][0]['data']['ctSponsoredByOverride'] = "Brought by blank" 179 | 180 | mock_decision_10_missing_priority = deepcopy(mock_decision_2) 181 | mock_decision_10_missing_priority['adId'] = 10 182 | del mock_decision_10_missing_priority['priorityId'] 183 | -------------------------------------------------------------------------------- /tests/fixtures/mock_factory.py: -------------------------------------------------------------------------------- 1 | import geoip2.database 2 | import os 3 | 4 | 5 | def get_mocked_geolocation_factory(): 6 | dir_path = os.path.dirname(os.path.realpath(__file__)) 7 | file_path = os.path.join(dir_path, 'GeoIP2-City-Test.mmdb') 8 | return geoip2.database.Reader(file_path) 9 | -------------------------------------------------------------------------------- /tests/fixtures/mock_placements.py: -------------------------------------------------------------------------------- 1 | mock_spocs_placement = [ 2 | { 3 | "name": "spocs", 4 | "ad_types": [ 5 | 2401, 6 | 3617 7 | ] 8 | } 9 | ] 10 | 11 | mock_placements = [ 12 | { 13 | "name": "top-sites", 14 | "ad_types": [91, 3617], 15 | "zone_ids": [5000], 16 | }, 17 | { 18 | "name": "text-promo", 19 | "ad_types": [99, 400], 20 | "zone_ids": [5000], 21 | } 22 | ] 23 | 24 | mock_collection_placements = [ 25 | { 26 | "name": "sponsored-collection", 27 | "ad_types": [ 28 | 2401, 29 | 3617 30 | ], 31 | "zone_ids": [ 32 | 217692 33 | ], 34 | "count": 20 35 | }, 36 | { 37 | "name": "spocs", 38 | "ad_types": [ 39 | 2401, 40 | 3617 41 | ], 42 | "zone_ids": [ 43 | 217995 44 | ], 45 | "count": 20 46 | } 47 | ] 48 | -------------------------------------------------------------------------------- /tests/fixtures/mock_spoc.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | mock_spoc_2 = { 4 | "id": 2, 5 | "flight_id": 8525375, 6 | "campaign_id": 887195, 7 | "title": "Refresh Your Space for Spring", 8 | "url": "https://example.com/?key=foobar", 9 | "domain": "wallmarket.com", 10 | "excerpt": "Get up to 50% off furniture, bedding, and more.", 11 | "priority": 1, 12 | "context": "Sponsored by WallMarket", 13 | "sponsor": "WallMarket", 14 | "raw_image_src": "https://cdn.net/25a.jpg", 15 | "image_src": "https://img-getpocket.cdn.mozilla.net/direct?url=https%3A//cdn.net/25a.jpg&resize=w618-h310", 16 | "shim": { 17 | "click": "0,jq,s2", 18 | "impression": "1,ke1,s3", 19 | "delete": "2,eyJ2,Y6", 20 | "save": "2,wfQ,Rj-6" 21 | }, 22 | "parameter_set": "default", 23 | "caps": { 24 | "lifetime": 50, 25 | "campaign": { 26 | "count": 10, 27 | "period": 86400 28 | }, 29 | "flight": { 30 | "count": 10, 31 | "period": 86400 32 | } 33 | }, 34 | "domain_affinities": { 35 | "example.com": 1 36 | }, 37 | "personalization_models": {}, 38 | "min_score": 0.1, 39 | "item_score": 0.2, 40 | } 41 | 42 | mock_spoc_3_cta = deepcopy(mock_spoc_2) 43 | mock_spoc_3_cta["id"] = 3 44 | mock_spoc_3_cta["cta"] = "Learn more" 45 | 46 | mock_collection_spoc_2 = deepcopy(mock_spoc_2) 47 | mock_collection_spoc_2["collection_title"] = "Best of the Web" 48 | mock_collection_spoc_3 = deepcopy(mock_spoc_3_cta) 49 | mock_collection_spoc_3["collection_title"] = "Best of the Web" 50 | 51 | mock_collection = { 52 | "title": "Best of the Web", 53 | "sponsor": "WallMarket", 54 | "context": "Sponsored by WallMarket", 55 | "flight_id": 8525375, 56 | "items": [deepcopy(mock_spoc_2), deepcopy(mock_spoc_3_cta)] 57 | } 58 | 59 | mock_spoc_5_topics = deepcopy(mock_spoc_2) 60 | mock_spoc_5_topics["id"] = 5 61 | mock_spoc_5_topics["personalization_models"] = {"autos_and_vehicles":1, "beauty_and_fitness": 1} 62 | 63 | mock_spoc_6_no_sponsor = deepcopy(mock_spoc_2) 64 | mock_spoc_6_no_sponsor["id"] = 6 65 | del mock_spoc_6_no_sponsor["sponsor"] 66 | mock_spoc_6_no_sponsor["context"] = "" 67 | 68 | mock_spoc_7_is_video = deepcopy(mock_spoc_2) 69 | mock_spoc_7_is_video["id"] = 7 70 | mock_spoc_7_is_video["is_video"] = True 71 | 72 | mock_spoc_8_blank_sponsored_by_override = deepcopy(mock_spoc_2) 73 | mock_spoc_8_blank_sponsored_by_override["id"] = 8 74 | mock_spoc_8_blank_sponsored_by_override["sponsored_by_override"] = "" 75 | 76 | mock_spoc_9_sponsored_by_override = deepcopy(mock_spoc_2) 77 | mock_spoc_9_sponsored_by_override["id"] = 9 78 | mock_spoc_9_sponsored_by_override["sponsored_by_override"] = "Brought by blank" 79 | 80 | mock_spoc_10_missing_priority = deepcopy(mock_spoc_2) 81 | mock_spoc_10_missing_priority["id"] = 10 82 | mock_spoc_10_missing_priority["priority"] = 100 83 | -------------------------------------------------------------------------------- /tests/load/script.yml: -------------------------------------------------------------------------------- 1 | # Thank you for trying serverless-artillery! 2 | # This default script is intended to get you started quickly. 3 | # There is a lot more that Artillery can do. 4 | # You can find great documentation of the possibilities at: 5 | # https://artillery.io/docs/ 6 | config: 7 | # this hostname will be used as a prefix for each URI in the flow unless a complete URI is specified 8 | target: "https://spocs.getpocket.com" 9 | phases: 10 | - duration: 3600 11 | arrivalRate: 10 12 | rampTo: 7000 13 | name: "Warm up AdZerk" 14 | - duration: 10800 15 | arrivalRate: 7000 16 | name: "Sustained 100% of max load" 17 | plugins: 18 | cloudwatch: 19 | namespace: "serverless-artillery-myperftestservice-loadtest" 20 | 21 | scenarios: 22 | - flow: 23 | - post: 24 | url: "/spocs" 25 | json: 26 | version: 1 27 | consumer_key: "40249-e88c401e1b1f2242d9e441c4" 28 | pocket_id: "{12345678-1234-5678-90ab-1234567890ab}" 29 | qs: 30 | slsart: 1 31 | -------------------------------------------------------------------------------- /tests/load/serverless.yml: -------------------------------------------------------------------------------- 1 | # See tutorial 7, step 4: 2 | # https://github.com/Nordstrom/serverless-artillery#4-customize-deployment-assets-to-add-at-least-one-subscription 3 | 4 | # We're excited that this project has provided you enough value that you are looking at its code! 5 | # 6 | # This is a standard [Serverless Framework](https://www.serverless.com) project and you should 7 | # feel welcome to customize it to your needs and delight. 8 | # 9 | # If you do something super cool and would like to share the capability, please open a PR against 10 | # https://www.github.com/Nordstrom/serverless-artillery 11 | # 12 | # Thanks! 13 | 14 | # If the following value is changed, your service may be duplicated (this value is used to build the CloudFormation 15 | # Template script's name) 16 | service: serverless-artillery-dev 17 | 18 | provider: 19 | name: aws 20 | runtime: nodejs10.x 21 | iamRoleStatements: 22 | # This policy allows the function to invoke itself which is important if the script is larger than a single 23 | # function can produce 24 | - Effect: 'Allow' 25 | Action: 26 | - 'lambda:InvokeFunction' 27 | Resource: 28 | 'Fn::Join': 29 | - ':' 30 | - 31 | - 'arn:aws:lambda' 32 | - Ref: 'AWS::Region' 33 | - Ref: 'AWS::AccountId' 34 | - 'function' 35 | - '${self:service}-${opt:stage, self:provider.stage}-loadGenerator*' # must match function name 36 | # This policy allows the function to publish notifications to the SNS topic defined below with logical ID monitoringAlerts 37 | - Effect: 'Allow' 38 | Action: 39 | - 'sns:Publish' 40 | Resource: 41 | Ref: monitoringAlerts # must match the SNS topic's logical ID 42 | - Effect: 'Allow' 43 | Action: 44 | - 'cloudwatch:PutMetricData' 45 | Resource: 46 | - '*' 47 | 48 | functions: 49 | loadGenerator: # !!Do not edit this name!! 50 | handler: handler.handler # the serverlessArtilleryLoadTester handler() method can be found in the handler.js source file 51 | timeout: 300 # set timeout to be 5 minutes (max for Lambda) 52 | environment: 53 | TOPIC_ARN: 54 | Ref: monitoringAlerts 55 | TOPIC_NAME: 56 | 'Fn::GetAtt': 57 | - monitoringAlerts 58 | - TopicName 59 | events: 60 | - schedule: 61 | name: '${self:service}-${opt:stage, self:provider.stage}-monitoring' # !!Do not edit this name!! 62 | description: The scheduled event for running the function in monitoring mode 63 | rate: rate(1 minute) 64 | ######################################################################################################################## 65 | ### !! BEFORE ENABLING... !!! 66 | ### 0. Change `'>>': script.yml` below to reference the script you want to use for monitoring if that is not its name. 67 | ### The script must be in this directory or a subdirectory. 68 | ### 1. Modify your `script.yml` to provide the details of invoking every important surface of your service, as per 69 | ### https://artillery.io/docs 70 | ### 2. To receive alerts when errors exceed the budget: 71 | ### i. Add a `match` clause to your requests, specifying your expectations of a successful request. This relatively 72 | ### undocumented feature is implemented at: https://github.com/shoreditch-ops/artillery/blob/82bdcdfc32ce4407bb197deff2cee13b4ecbab3b/core/lib/engine_util.js#L318 73 | ### We would welcome the contribution of a plugin replacing this as discussed in https://github.com/Nordstrom/serverless-artillery/issues/116 74 | ### ii. Modify the `monitoringAlerts` SNS Topic below, uncommenting `Subscription` and providing subscriptions for any 75 | ### alerts that might be raised by the monitoring function. (To help you out, we've provided commented-out examples) 76 | ### (After all, what good is monitoring if noone is listening?) 77 | ### 3. Deploy your new assets/updated service using `slsart deploy` 78 | ### 4. [As appropriate] approve the subscription verifications for the SNS topic that will be sent following its creation 79 | ### 5. Re-deploy whenever you update your monitoring script 80 | ######################################################################################################################## 81 | enabled: false 82 | input: 83 | '>>': script.yml 84 | mode: monitoring 85 | resources: 86 | Resources: 87 | monitoringAlerts: # !!Do not edit this name!! 88 | Type: 'AWS::SNS::Topic' 89 | Properties: 90 | DisplayName: '${self:service} Monitoring Alerts' 91 | # Subscription: # docs at https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-sns-subscription.html 92 | # - Endpoint: http:/// # the endpoint is an URL beginning with "http://" 93 | # Protocol: http 94 | # - Endpoint: https:/// # the endpoint is a URL beginning with "https://" 95 | # Protocol: https 96 | # - Endpoint: @ # the endpoint is an email address 97 | # Protocol: email 98 | # - Endpoint: @ # the endpoint is an email address 99 | # Protocol: email-json 100 | # - Endpoint: # the endpoint is a phone number of an SMS-enabled device 101 | # Protocol: sms 102 | # - Endpoint: # the endpoint is the ARN of an Amazon SQS queue 103 | # Protocol: sqs 104 | # - Endpoint: # the endpoint is the EndpointArn of a mobile app and device. 105 | # Protocol: application 106 | # - Endpoint: # the endpoint is the ARN of an AWS Lambda function. 107 | # Protocol: lambda 108 | -------------------------------------------------------------------------------- /tests/scripts/timer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # \nocite{Tange2011a} 3 | 4 | time seq 500 | parallel --jobs 500 "curl -s -X POST \ 5 | http://localhost/spocs \ 6 | -H 'Accept: */*' \ 7 | -H 'Content-Type: application/json' \ 8 | -H 'cache-control: no-cache' \ 9 | -d '{ 10 | \"version\":1, 11 | \"consumer_key\":\"1234\", 12 | \"pocket_id\":\"5678\" 13 | }' > /dev/null" 14 | -------------------------------------------------------------------------------- /tests/scripts/wsgi_profiler_conf.py: -------------------------------------------------------------------------------- 1 | 2 | import cProfile 3 | import pstats 4 | from io import StringIO 5 | import logging 6 | import os 7 | import time 8 | 9 | PROFILE_LIMIT = int(os.environ.get("PROFILE_LIMIT", 30)) 10 | PROFILER = bool(int(os.environ.get("PROFILER", 1))) 11 | 12 | print(""" 13 | # ** USAGE: 14 | $ PROFILE_LIMIT=100 gunicorn -c ./wsgi_profiler_conf.py wsgi 15 | # ** TIME MEASUREMENTS ONLY: 16 | $ PROFILER=0 gunicorn -c ./wsgi_profiler_conf.py wsgi 17 | """) 18 | 19 | 20 | def profiler_enable(worker, req): 21 | worker.profile = cProfile.Profile() 22 | worker.profile.enable() 23 | worker.log.info("PROFILING %d: %s" % (worker.pid, req.uri)) 24 | 25 | 26 | def profiler_summary(worker, req): 27 | s = StringIO() 28 | worker.profile.disable() 29 | ps = pstats.Stats(worker.profile, stream=s).sort_stats('time', 'cumulative') 30 | ps.print_stats(PROFILE_LIMIT) 31 | 32 | logging.error("\n[%d] [INFO] [%s] URI %s" % (worker.pid, req.method, req.uri)) 33 | logging.error("[%d] [INFO] %s" % (worker.pid, s.getvalue())) 34 | 35 | 36 | def pre_request(worker, req): 37 | worker.start_time = time.time() 38 | if PROFILER is True: 39 | profiler_enable(worker, req) 40 | 41 | 42 | def post_request(worker, req, *args): 43 | total_time = time.time() - worker.start_time 44 | logging.error("\n[%d] [INFO] [%s] Load Time: %.3fs\n" % ( 45 | worker.pid, req.method, total_time)) 46 | if PROFILER is True: 47 | profiler_summary(worker, req) 48 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pocket/proxy-server/306202e77a700c4e9ee676875f67a73595493045/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/test_adzerk_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | from unittest.mock import patch, Mock 4 | import responses 5 | import requests.exceptions 6 | 7 | from app.adzerk.api import Api 8 | from tests.fixtures.mock_placements import mock_placements, mock_spocs_placement 9 | 10 | 11 | class TestAdZerkApi(TestCase): 12 | 13 | def setUp(self): 14 | # Reset cache expiration time 15 | Api.priority_cache_expires_at = None 16 | 17 | @responses.activate 18 | def test_delete_user(self): 19 | url = 'https://e-10250.adzerk.net/udb/10250/?userKey=%7B123%7D' 20 | responses.add(responses.DELETE, url, status=200) 21 | 22 | api = Api(pocket_id="{123}", api_key="DUMMY_123") 23 | api.delete_user() 24 | 25 | self.assertEqual(1, len(responses.calls)) 26 | 27 | request = responses.calls[0].request 28 | self.assertEqual(url, request.url) 29 | self.assertEqual('DUMMY_123', request.headers['X-Adzerk-ApiKey']) 30 | 31 | @responses.activate 32 | def test_update_api_key(self): 33 | url = 'https://e-10250.adzerk.net/udb/10250/?userKey=%7B123%7D' 34 | responses.add(responses.DELETE, url, status=401) 35 | 36 | api = Api(pocket_id="{123}", api_key="OUT_OF_DATE_123") 37 | # Exception is raised when AdZerk responds with a bad status code. 38 | with self.assertRaises(requests.exceptions.HTTPError): 39 | api.delete_user() 40 | 41 | self.assertEqual(1, len(responses.calls)) 42 | 43 | def test_keywords(self): 44 | api = Api(pocket_id="{123}", country='US', region='CA') 45 | body = api.get_decision_body() 46 | self.assertTrue('US' in body['keywords']) 47 | self.assertTrue('US-CA' in body['keywords']) 48 | 49 | def test_missing_region(self): 50 | api = Api(pocket_id="{123}", country='US', region='') 51 | body = api.get_decision_body() 52 | self.assertEqual(['US'], body['keywords']) 53 | 54 | def test_keywords_empty(self): 55 | api = Api(pocket_id="{123}") 56 | body = api.get_decision_body() 57 | self.assertFalse('keywords' in body) 58 | 59 | def test_new_placements(self): 60 | api = Api(pocket_id="{123}", placements=mock_placements) 61 | body = api.get_decision_body() 62 | self.assertTrue(2, len(body['placements'])) 63 | for p in body['placements']: 64 | self.assertEqual(10250, p['networkId']) 65 | self.assertEqual(1070098, p['siteId']) 66 | self.assertEqual(10, p['count']) 67 | self.assertEqual([5000], p['zoneIds']) 68 | 69 | def test_default_zone(self): 70 | api = Api(pocket_id="{123}", placements=mock_spocs_placement) 71 | body = api.get_decision_body() 72 | self.assertTrue(1, len(body['placements'])) 73 | for p in body['placements']: 74 | self.assertEqual([217995], p['zoneIds']) 75 | 76 | @responses.activate 77 | def test_site_is_not_stored_in_conf(self): 78 | api = Api(pocket_id="{123}", country='US', region='CA', site=1084367) 79 | body = api.get_decision_body() 80 | self.assertEqual(1084367, body['placements'][0]['siteId']) 81 | 82 | api = Api(pocket_id="{123}", country='US', region='CA') 83 | body = api.get_decision_body() 84 | self.assertEqual(1070098, body['placements'][0]['siteId']) 85 | -------------------------------------------------------------------------------- /tests/unit/test_adzerk_transform.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import patch 3 | 4 | from app.adzerk.transform import \ 5 | to_spoc, tracking_url_to_shim, is_collection, to_collection, get_personalization_models 6 | from tests.fixtures.mock_spoc import * 7 | from tests.fixtures.mock_decision import * 8 | 9 | 10 | class TestAdZerkTransform(TestCase): 11 | 12 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 13 | def test_to_spoc(self): 14 | self.assertEqual(mock_spoc_2, to_spoc(mock_decision_2)) 15 | 16 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 17 | def test_to_spoc_cta(self): 18 | self.assertEqual(mock_spoc_3_cta, to_spoc(mock_decision_3_cta)) 19 | 20 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 21 | def test_to_spoc_topics(self): 22 | self.assertEqual(mock_spoc_5_topics, to_spoc(mock_decision_5_topics)) 23 | 24 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 25 | def test_to_spoc_no_sponsor(self): 26 | self.assertEqual(mock_spoc_6_no_sponsor, to_spoc(mock_decision_6_no_sponsor)) 27 | 28 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 29 | def test_to_spoc_is_video(self): 30 | self.assertEqual(mock_spoc_7_is_video, to_spoc(mock_decision_7_is_video)) 31 | 32 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 33 | def test_to_spoc_sponsored_by_override(self): 34 | self.assertEqual(mock_spoc_8_blank_sponsored_by_override, to_spoc(mock_decision_8_blank_sponsored_by_override)) 35 | self.assertEqual(mock_spoc_9_sponsored_by_override, to_spoc(mock_decision_9_sponsored_by_override)) 36 | 37 | @patch.dict('app.conf.domain_affinities', {"publishers": {'example.com': 1}}) 38 | def test_missing_priority_id(self): 39 | self.assertEqual(mock_spoc_10_missing_priority, to_spoc(mock_decision_10_missing_priority)) 40 | 41 | def test_tracking_url_to_shim(self): 42 | self.assertEqual('0,eyJ,Zz', tracking_url_to_shim('https://e-10250.adzerk.net/r?e=eyJ&s=Zz')) 43 | self.assertEqual('1,a,b', tracking_url_to_shim('https://e-10250.adzerk.net/i.gif?s=b&e=a')) 44 | self.assertEqual('2,123,1', tracking_url_to_shim('https://e-10250.adzerk.net/e.gif?e=123&s=1')) 45 | 46 | with self.assertRaises(Exception): 47 | tracking_url_to_shim('https://e-10250.adzerk.net/x.gif?e=123&s=1') 48 | 49 | def test_is_collection(self): 50 | self.assertEqual(False, is_collection([mock_spoc_2])) 51 | self.assertEqual(False, is_collection([mock_spoc_2, mock_collection_spoc_2])) 52 | self.assertEqual(True, is_collection([mock_collection_spoc_2])) 53 | self.assertEqual(True, is_collection([mock_collection_spoc_2, mock_collection_spoc_3])) 54 | 55 | def test_to_collection(self): 56 | self.assertEqual(mock_collection, to_collection([mock_collection_spoc_2, mock_collection_spoc_3])) 57 | 58 | def test_get_topics(self): 59 | self.assertEqual( 60 | {'business':1, 'technology': 1}, 61 | get_personalization_models({'topic_business': 'true', 'topic_technology': True})) 62 | 63 | self.assertEqual( 64 | {}, 65 | get_personalization_models({'topic_business': '', 'topic_technology': ''})) 66 | 67 | self.assertEqual( 68 | {'business':1}, 69 | get_personalization_models({'topic_business': 'true', 'topic_technology': 'false'})) 70 | 71 | self.assertEqual( 72 | {'arts_and_entertainment':1}, 73 | get_personalization_models({'other_property_business': 'true', 'topic_arts_and_entertainment': 'true'})) 74 | -------------------------------------------------------------------------------- /tests/unit/test_adzerk_validation.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from app.adzerk.validation import validate_image_url 4 | 5 | 6 | class TestAdZerkValidation(TestCase): 7 | 8 | def test_valid_urls(self): 9 | self.assertTrue(validate_image_url('https://s.zkcdn.net/Advertisers/3a781523241f4e1293caad4ffbf2e2cb.jpg')) 10 | 11 | def test_invalid_urls(self): 12 | with self.assertRaises(Exception): 13 | self.assertFalse(validate_image_url('http://s.zkcdn.net/Advertisers/3a.jpg')) 14 | 15 | with self.assertRaises(Exception): 16 | self.assertFalse(validate_image_url('https://example.com/Advertisers/3a.jpg')) 17 | 18 | with self.assertRaises(Exception): 19 | self.assertFalse(validate_image_url('https://sxzkcdn.net/https://s.zkcdn.net/Advertisers/3a.jpg')) 20 | -------------------------------------------------------------------------------- /tests/unit/test_app.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from copy import deepcopy 3 | from unittest.mock import patch 4 | 5 | from fastapi.testclient import TestClient 6 | 7 | from tests.fixtures.mock_decision import mock_response, mock_response_900, mock_collection_response 8 | from tests.fixtures.mock_placements import mock_placements, mock_collection_placements 9 | 10 | 11 | class TestApp(unittest.TestCase): 12 | """ 13 | TODO: find a way to create a mocked response from Geo 14 | so that we can test the response with spocs targeted to a specific location 15 | """ 16 | mock_response_map = {'default': [mock_response]} 17 | mock_placement_map = {'top-sites': [mock_response], 'text-promo': [mock_response_900]} 18 | mock_collection_placement_map = {'sponsored-collection': [mock_collection_response], 'spocs': [mock_response]} 19 | 20 | @classmethod 21 | def create_client_no_geo_locs(cls) -> TestClient: 22 | from app.main import app 23 | return TestClient(app=app) 24 | 25 | @classmethod 26 | def get_request_body(cls, without=None, placements=None, update=None): 27 | ret = { 28 | "version": "1", 29 | "consumer_key": "12345-test-consumer-key", 30 | "pocket_id": "{12345678-1234-5678-90ab-1234567890ab}" 31 | } 32 | if placements: 33 | ret['placements'] = placements 34 | if without: 35 | ret.pop(without) 36 | if update: 37 | ret.update(update) 38 | return ret 39 | 40 | 41 | """ 42 | Tests: Pulse 43 | """ 44 | 45 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 46 | def test_app_pulse(self, mock_geo): 47 | with self.create_client_no_geo_locs() as client: 48 | resp = client.get('/pulse') 49 | self.assertEqual(resp.json(), {"pulse" : "ok"}) 50 | 51 | 52 | """ 53 | Tests: Health 54 | """ 55 | 56 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 57 | def test_app_health(self, mock_geo): 58 | with self.create_client_no_geo_locs() as client: 59 | resp = client.get('/health') 60 | self.assertEqual(resp.json(), {"health" : "ok"}) 61 | 62 | 63 | """ 64 | Tests: spocs 65 | """ 66 | 67 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 68 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 69 | def test_app_spocs_production_valid(self, mock_geo, mock_adzerk): 70 | with self.create_client_no_geo_locs() as client: 71 | resp = client.post('/spocs', json=self.get_request_body()) 72 | self.assertEqual(resp.status_code, 200) 73 | 74 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 75 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 76 | def test_app_spocs_production_valid_with_country_region(self, mock_geo, mock_adzerk): 77 | country_region = { 78 | 'country': 'CA', 79 | 'region': 'ON', 80 | } 81 | with self.create_client_no_geo_locs() as client: 82 | resp = client.post('/spocs', json=self.get_request_body(update=country_region)) 83 | self.assertEqual(resp.status_code, 200) 84 | 85 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 86 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 87 | def test_app_spocs_production_valid_with_country_region_query_param(self, mock_geo, mock_adzerk): 88 | with self.create_client_no_geo_locs() as client: 89 | resp = client.post('/spocs?country=CA®ion=ON', json=self.get_request_body()) 90 | self.assertEqual(resp.status_code, 200) 91 | 92 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 93 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_collection_placement_map) 94 | def test_app_spocs_collection_v1(self, mock_geo, mock_adzerk): 95 | """ 96 | API version 1 returns the collection as an array for backwards compatibility. 97 | """ 98 | request_body = self.get_request_body(placements=mock_collection_placements) 99 | with self.create_client_no_geo_locs() as client: 100 | resp = client.post('/spocs', json=request_body) 101 | self.assertEqual(resp.status_code, 200) 102 | self.assertEqual(resp.json()['sponsored-collection'][0]['collection_title'], 'Best of the Web') 103 | 104 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 105 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_collection_placement_map) 106 | def test_app_spocs_collection_v2(self, mock_geo, mock_adzerk): 107 | """ 108 | API version 2 returns the collection as an object, with collection-level fields pulled up. 109 | """ 110 | request_body = self.get_request_body(placements=mock_collection_placements, update={'version': '2'}) 111 | with self.create_client_no_geo_locs() as client: 112 | resp = client.post('/spocs', json=request_body) 113 | self.assertEqual(resp.status_code, 200) 114 | self.assertEqual(resp.json()['spocs'][0]['title'], 'title 1000') 115 | 116 | collection = resp.json()['sponsored-collection'] 117 | self.assertEqual(collection['title'], 'Best of the Web') 118 | self.assertEqual(collection['sponsor'], 'sponsor') 119 | self.assertEqual(collection['flight_id'], 333) 120 | self.assertEqual(collection['items'][0]['title'], 'title 900') 121 | self.assertTrue('collection_title' not in collection['items'][0]) 122 | 123 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 124 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 125 | def test_app_spocs_production_invalid_no_version(self, mock_geo, mock_adzerk): 126 | with self.create_client_no_geo_locs() as client: 127 | resp = client.post('/spocs', json=self.get_request_body(without='version')) 128 | self.assertEqual(resp.status_code, 400) 129 | 130 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 131 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 132 | def test_app_spocs_production_invalid_no_pocket_id(self, mock_geo, mock_adzerk): 133 | with self.create_client_no_geo_locs() as client: 134 | resp = client.post('/spocs', json=self.get_request_body(without='pocket_id')) 135 | self.assertEqual(resp.status_code, 400) 136 | 137 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 138 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 139 | def test_app_spocs_production_invalid_no_consumer_key(self, mock_geo, mock_adzerk): 140 | with self.create_client_no_geo_locs() as client: 141 | resp = client.post('/spocs', json=self.get_request_body(without='consumer_key')) 142 | self.assertEqual(resp.status_code, 400) 143 | 144 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 145 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 146 | def test_app_spocs_production_invalid_pocket_id(self, mock_geo, mock_adzerk): 147 | data = self.get_request_body() 148 | data['pocket_id'] = 'invalid' 149 | with self.create_client_no_geo_locs() as client: 150 | resp = client.post('/spocs', json=data) 151 | self.assertEqual(resp.status_code, 400) 152 | 153 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 154 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 155 | def test_app_spocs_production_unrecognized_field(self, mock_geo, mock_adzerk): 156 | data = self.get_request_body() 157 | data['invalid'] = 'something' 158 | with self.create_client_no_geo_locs() as client: 159 | resp = client.post('/spocs', json=data) 160 | self.assertEqual(resp.status_code, 400) 161 | 162 | 163 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 164 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 165 | def test_app_spocs_production_invalid_version_value(self, mock_geo, mock_adzerk): 166 | data = self.get_request_body() 167 | data['version'] = '2'' /**/ should sanitize input' 168 | with self.create_client_no_geo_locs() as client: 169 | resp = client.post('/spocs', json=data) 170 | self.assertEqual(resp.status_code, 400) 171 | 172 | 173 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 174 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 175 | def test_app_spocs_production_invalid_content_type(self, mock_geo, mock_adzerk): 176 | with self.create_client_no_geo_locs() as client: 177 | resp = client.post( 178 | '/spocs', 179 | headers={'Content-Type': 'text'}, 180 | data=self.get_request_body() 181 | ) 182 | self.assertEqual(resp.status_code, 400) 183 | 184 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 185 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 186 | def test_app_spocs_staging_production_valid_query_param(self, mock_geo, mock_adzerk): 187 | """ 188 | This test would be more useful if we checked that we got different responses based on the site. 189 | But currently not sure how to return a mocked response based on site, or even to test 190 | :param mock_geo: 191 | :param mock_adzerk: 192 | :return: 193 | """ 194 | with self.create_client_no_geo_locs() as client: 195 | resp = client.post('/spocs?site=12345', json=self.get_request_body()) 196 | self.assertEqual(resp.status_code, 200) 197 | 198 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 199 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_response_map) 200 | def test_app_spocs_staging_production_valid(self, mock_geo, mock_adzerk): 201 | site = { 202 | 'site': '12345' 203 | } 204 | with self.create_client_no_geo_locs() as client: 205 | resp = client.post('/spocs', json=self.get_request_body(update=site)) 206 | self.assertEqual(resp.status_code, 200) 207 | 208 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 209 | def test_app_spocs_production_valid_placements(self, mock_geo): 210 | with self.create_client_no_geo_locs() as client: 211 | resp = client.post('/spocs', json=self.get_request_body(placements=mock_placements)) 212 | self.assertEqual(200, resp.status_code) 213 | 214 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 215 | def test_app_spocs_production_invalid_placement_name(self, mock_geo): 216 | bad_placements = deepcopy(mock_placements) 217 | bad_placements[0].pop('name') 218 | with self.create_client_no_geo_locs() as client: 219 | resp = client.post('/spocs', json=self.get_request_body(placements=bad_placements)) 220 | self.assertEqual(400, resp.status_code) 221 | 222 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 223 | def test_app_spocs_production_unknown_placement_field(self, mock_geo): 224 | bad_placements = deepcopy(mock_placements) 225 | bad_placements[0]['adTypess'] = ['test'] 226 | with self.create_client_no_geo_locs() as client: 227 | resp = client.post('/spocs', json=self.get_request_body(placements=bad_placements)) 228 | self.assertEqual(400, resp.status_code) 229 | 230 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 231 | def test_app_spocs_production_invalid_json(self, mock_geo): 232 | with self.create_client_no_geo_locs() as client: 233 | resp = client.post('/spocs', json='${${[]}}') 234 | self.assertEqual(400, resp.status_code) 235 | 236 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 237 | def test_app_spocs_production_invalid_placement(self, mock_geo): 238 | bad_placements = deepcopy(mock_placements) 239 | bad_placements[0] = 'map[ad_types:[3120] name:foo zone_ids:[280143]]' 240 | with self.create_client_no_geo_locs() as client: 241 | resp = client.post('/spocs', json=self.get_request_body(placements=bad_placements)) 242 | self.assertEqual(400, resp.status_code) 243 | 244 | @patch('app.provider.geo_provider.GeolocationProvider.__init__', return_value=None) 245 | @patch('app.adzerk.api.Api.get_decisions', return_value=mock_placement_map) 246 | def test_app_spocs_production_valid_placements_call(self, mock_geo, mock_dec): 247 | bad_placements = deepcopy(mock_placements) 248 | with self.create_client_no_geo_locs() as client: 249 | resp = client.post('/spocs', json=self.get_request_body(placements=bad_placements)) 250 | self.assertEqual(200, resp.status_code) 251 | result = resp.json() 252 | self.assertTrue('top-sites' in result) 253 | self.assertEqual(1000, result['top-sites'][0]['campaign_id']) 254 | self.assertEqual('title 1000', result['top-sites'][0]['title']) 255 | self.assertTrue('text-promo' in result) 256 | self.assertEqual(900, result['text-promo'][0]['campaign_id']) 257 | self.assertEqual('title 900', result['text-promo'][0]['title']) 258 | -------------------------------------------------------------------------------- /tests/unit/test_app_validation.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from app.validation import is_valid_pocket_id 4 | 5 | 6 | class TestAppValidation(TestCase): 7 | 8 | def test_valid_pocket_id(self): 9 | self.assertTrue(is_valid_pocket_id('{12347fff-00b0-aaaa-0978-189231239808}')) 10 | # Uppercase is allowed: 11 | self.assertTrue(is_valid_pocket_id('{12347fff-00F0-AAAA-0978-189231feb808}')) 12 | 13 | def test_invalid_pocket_id(self): 14 | self.assertFalse(is_valid_pocket_id('{}')) 15 | # 'g' not allowed: 16 | self.assertFalse(is_valid_pocket_id('{g2345678-0000-aaaa-0978-189231239808}')) 17 | # Has to be correct length: 18 | self.assertFalse(is_valid_pocket_id('{12345678-0000-aaaa-0978-18923123980}')) 19 | self.assertFalse(is_valid_pocket_id('{12345678-0000-aaaa-0978-1892312398080}')) 20 | # Spaces not allowed: 21 | self.assertFalse(is_valid_pocket_id('{12345678 0000-aaaa-0978-189231239808}')) 22 | # Angular brackets not allowed: 23 | self.assertFalse(is_valid_pocket_id('{-0000-aaaa-0978-189231239808}')) 24 | -------------------------------------------------------------------------------- /tests/unit/test_geolocation_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, mock 2 | 3 | from app.geolocation.factory import Factory 4 | 5 | 6 | class TestGeoLocationFactory(TestCase): 7 | @mock.patch("boto3.session.Session") 8 | def test_s3(self, MockSession): 9 | f = Factory() 10 | 11 | assert MockSession.called 12 | assert f.storage_provider == "S3" 13 | 14 | @mock.patch("google.cloud.storage.Client") 15 | def test_gcs(self, MockClient): 16 | with mock.patch.dict("os.environ", {"GEOIP_GCS_BUCKET": "acme"}): 17 | f = Factory() 18 | 19 | assert MockClient.called 20 | assert f.storage_provider == "GCS" 21 | -------------------------------------------------------------------------------- /tests/unit/test_geolocation_provider.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import patch 3 | 4 | from app.provider.geo_provider import GeolocationProvider 5 | from tests.fixtures.mock_factory import get_mocked_geolocation_factory 6 | 7 | 8 | class TestGeolocationProvider(TestCase): 9 | 10 | __FACTORY = get_mocked_geolocation_factory() 11 | 12 | @patch('boto3.session.Session.client', return_value=None) 13 | @patch('app.geolocation.factory.Factory.get_instance', return_value=__FACTORY) 14 | def test_geolocation_no_setattr(self, mock_s3, mock_geofactory): 15 | glp = GeolocationProvider() 16 | try: 17 | glp.__setattr__('__PROVIDER_INSTANCE', 'something else') 18 | self.fail('Should not be able to set attribute after singleton is created') 19 | except AttributeError as e: 20 | self.assertEqual(e.__str__(), 'Already instantiated') 21 | 22 | @patch('boto3.session.Session.client', return_value=None) 23 | @patch('app.geolocation.factory.Factory.get_instance', return_value=__FACTORY) 24 | def test_geolocation_valid_ip(self, mock_s3, mock_geofactory): 25 | glp = GeolocationProvider() 26 | city = glp.get_city('216.160.83.56') 27 | self.assertEqual('Milton', city.city.name) 28 | self.assertEqual('America/Los_Angeles', city.location.time_zone) 29 | self.assertEqual('US', glp.get_country(city)) 30 | self.assertEqual('WA', glp.get_region(city)) 31 | 32 | @patch('boto3.session.Session.client', return_value=None) 33 | @patch('app.geolocation.factory.Factory.get_instance', return_value=__FACTORY) 34 | def test_geolocation_invalid_ip(self, mock_s3, mock_geofactory): 35 | glp = GeolocationProvider() 36 | try: 37 | glp.get_city('127.0.0.1') 38 | except RuntimeError as e: 39 | self.assertEqual('The address 127.0.0.1 is not in the database.', e.__str__()) 40 | 41 | @patch('boto3.session.Session.client', return_value=None) 42 | @patch('app.geolocation.factory.Factory.get_instance', return_value=__FACTORY) 43 | def test_geolocation_no_city(self, mock_s3, mock_geofactory): 44 | glp = GeolocationProvider() 45 | self.assertIsNone(glp.get_country(None)) 46 | self.assertIsNone(glp.get_region(None)) 47 | -------------------------------------------------------------------------------- /tests/unit/test_telemetry_handler.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import gzip 3 | import json 4 | import logging 5 | import os 6 | import random 7 | 8 | from datetime import datetime, timezone 9 | from unittest import mock, TestCase 10 | from unittest.mock import patch, call 11 | from app.telemetry.handler import handle_message, ping_adzerk, record_metrics 12 | 13 | 14 | def make_encoded_shim(timestamp_millis): 15 | kevel_shim = f'{{"v":"1.11","av":1963769,"at":3617,"bt":0,"cm":56469793,"ch":36848,"ck":{{}},"cr":340374584,"di":"f9bee25375f147888123a33bfdf871c8","dj":0,"ii":"80740184383e4e5793ec76705421b78a","dm":3,"fc":515667491,"fl":2000,"ip":"34.105.7.247","kw":"us,us-ct","mk":"us","nw":10250,"pc":0.93,"op":0.93,"ec":0,"gm":0,"ep":null,"pr":147518,"rt":2,"rs":500,"sa":"55","sb":"i-0983b4c7d16eabe85","sp":172421,"st":1070098,"uk":"{{4ef12475-fb03-4ac8-881f-8637bfbc76a7}}","zn":217758,"ts":{timestamp_millis},"pn":"spocs","gc":true,"gC":true,"gs":"none","dc":1,"tz":"UTC","ba":1,"fq":1}}' 16 | return base64.b64encode(bytes(kevel_shim, 'utf-8')).rstrip(b'=').decode() 17 | 18 | class TestTelemetryHandler(TestCase): 19 | @patch('app.telemetry.handler.record_metrics') 20 | @patch('app.telemetry.handler.ping_adzerk') 21 | def test_handle_message_legacy_ping(self, mock_ping_adzerk, mock_record_metrics): 22 | telemetry = {'tiles': [{'shim': '0,foo,bar'}, {'shim': '1,1,2'}, {'shim': '2,a,b'}]} 23 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 24 | submission_timestamp = '2024-04-24T21:02:18.123456Z' 25 | attributes = {'document_namespace': 'activity-stream', 'document_type': 'impression-stats', 26 | 'user_agent_version': 121, 'submission_timestamp': submission_timestamp} 27 | 28 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 29 | mock_ping_adzerk.assert_has_calls([ 30 | call('0,foo,bar'), 31 | call('1,1,2'), 32 | call('2,a,b'), 33 | ]) 34 | mock_record_metrics.assert_has_calls([ 35 | call('0,foo,bar', submission_timestamp, 'activity-stream', 121), 36 | call('1,1,2', submission_timestamp, 'activity-stream', 121), 37 | call('2,a,b', submission_timestamp, 'activity-stream', 121), 38 | ]) 39 | 40 | @patch('app.telemetry.handler.record_metrics') 41 | @patch('app.telemetry.handler.ping_adzerk') 42 | def test_handle_message_legacy_ping_no_version(self, mock_ping_adzerk, mock_record_metrics): 43 | telemetry = {'tiles': [{'shim': '0,foo,bar'}, {'shim': '1,1,2'}, {'shim': '2,a,b'}]} 44 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 45 | submission_timestamp = '2024-04-24T21:02:18.123456Z' 46 | attributes = {'document_namespace': 'activity-stream', 'document_type': 'impression-stats', 47 | 'submission_timestamp': submission_timestamp} 48 | 49 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 50 | mock_ping_adzerk.assert_not_called() 51 | mock_record_metrics.assert_not_called() 52 | 53 | @patch('app.telemetry.handler.record_metrics') 54 | @patch('app.telemetry.handler.ping_adzerk') 55 | def test_handle_message_android_spoc_ping(self, mock_ping_adzerk, mock_record_metrics): 56 | telemetry = {'metrics': {'text': {'pocket.spoc_shim': '0,foo,bar'}}} 57 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 58 | submission_timestamp = '2024-04-24T21:02:18.123456Z' 59 | attributes = {'document_namespace': 'org-mozilla-firefox', 'document_type': 'spoc', 60 | 'user_agent_version': 121, 'submission_timestamp': submission_timestamp} 61 | 62 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 63 | mock_ping_adzerk.assert_called_with('0,foo,bar') 64 | mock_record_metrics.assert_called_with('0,foo,bar', submission_timestamp, 'org-mozilla-firefox', 121) 65 | 66 | @patch('app.telemetry.handler.record_metrics') 67 | @patch('app.telemetry.handler.ping_adzerk') 68 | def test_handle_message_desktop_spoc_ping(self, mock_ping_adzerk, mock_record_metrics): 69 | telemetry = {'metrics': {'text': {'pocket.shim': '0,foo,bar'}}} 70 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 71 | submission_timestamp = '2024-04-24T21:02:18.123456Z' 72 | attributes = {'document_namespace': 'firefox-desktop', 'document_type': 'spoc', 73 | 'user_agent_version': 122, 'submission_timestamp': submission_timestamp} 74 | 75 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 76 | mock_ping_adzerk.assert_called_with('0,foo,bar') 77 | mock_record_metrics.assert_called_with('0,foo,bar', submission_timestamp, 'firefox-desktop', 122) 78 | 79 | @patch('app.telemetry.handler.ping_adzerk') 80 | def test_handle_message_desktop_spoc_ping_old_version(self, mock_ping_adzerk): 81 | telemetry = {'metrics': {'text': {'pocket.shim': '0,foo,bar'}}} 82 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 83 | attributes = {'document_namespace': 'firefox-desktop', 'document_type': 'spoc', 84 | 'user_agent_version': 121} 85 | 86 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 87 | mock_ping_adzerk.assert_not_called() 88 | 89 | @patch('app.telemetry.handler.ping_adzerk') 90 | def test_handle_message_desktop_spoc_ping_no_version(self, mock_ping_adzerk): 91 | telemetry = {'metrics': {'text': {'pocket.shim': '0,foo,bar'}}} 92 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 93 | attributes = {'document_namespace': 'firefox-desktop', 'document_type': 'spoc'} 94 | 95 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 96 | mock_ping_adzerk.assert_not_called() 97 | 98 | @patch('app.telemetry.handler.ping_adzerk') 99 | def test_handle_unknown_namespace(self, mock_ping_adzerk): 100 | telemetry = {'metrics': {'text': {'pocket.shim': '0,foo,bar'}}} 101 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 102 | attributes = {'document_namespace': 'firefox-temp', 'document_type': 'spoc', 103 | 'user_agent_version': 122} 104 | 105 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 106 | mock_ping_adzerk.assert_not_called() 107 | 108 | @patch('app.telemetry.handler.ping_adzerk') 109 | def test_handle_unknown_doctype(self, mock_ping_adzerk): 110 | telemetry = {'metrics': {'text': {'pocket.shim': '0,foo,bar'}}} 111 | data = base64.b64encode(gzip.compress(json.dumps(telemetry).encode('utf-8'))) 112 | attributes = {'document_namespace': 'firefox-desktop', 'document_type': 'temp', 113 | 'user_agent_version': 122} 114 | 115 | handle_message(event={'data': data, 'attributes': attributes}, context={}) 116 | mock_ping_adzerk.assert_not_called() 117 | 118 | @patch('urllib.request.urlopen') 119 | def test_ping_adzerk_r(self, mock_urlopen): 120 | ping_adzerk('0,foo,bar') 121 | mock_urlopen.assert_called_once_with("https://e-10250.adzerk.net/r?e=foo&s=bar") 122 | 123 | @patch('urllib.request.urlopen') 124 | def test_ping_adzerk_i(self, mock_urlopen): 125 | ping_adzerk('1,foo,bar') 126 | mock_urlopen.assert_called_once_with("https://e-10250.adzerk.net/i.gif?e=foo&s=bar") 127 | 128 | @patch('urllib.request.urlopen') 129 | def test_ping_adzerk_e(self, mock_urlopen): 130 | ping_adzerk('2,foo,bar') 131 | mock_urlopen.assert_called_once_with("https://e-10250.adzerk.net/e.gif?e=foo&s=bar") 132 | 133 | @patch('google.cloud.logging') 134 | @patch('logging.info') 135 | def test_record_metrics_no_sampling(self, mock_logging, mock_google_logging): 136 | os.environ["METRICS_SAMPLE_RATE"] = "0" 137 | shim = make_encoded_shim(1713971071000) 138 | record_metrics(f'2,{shim},bar', '2024-04-24T21:02:18.123456Z', "firefox-desktop", 125) 139 | mock_logging.assert_not_called() 140 | 141 | @patch('google.cloud.logging') 142 | @patch('logging.info') 143 | def test_record_metrics_sampling_misconfigured(self, mock_logging, mock_google_logging): 144 | os.environ["METRICS_SAMPLE_RATE"] = "true" 145 | shim = make_encoded_shim(1713971071000) 146 | record_metrics(f'2,{shim},bar', '2024-04-24T21:02:18.123456Z', "firefox-desktop", 125) 147 | mock_logging.assert_not_called() 148 | 149 | @patch('google.cloud.logging') 150 | @patch('logging.info') 151 | def test_record_metrics_sample_rate_excluded(self, mock_logging, mock_google_logging): 152 | # seed random to ensure random sample is excluded 153 | random.seed(0) 154 | os.environ["METRICS_SAMPLE_RATE"] = "500" 155 | shim = make_encoded_shim(1713971071000) 156 | record_metrics(f'2,{shim},bar', '2024-04-24T21:02:18.123456Z', "firefox-desktop", 125) 157 | mock_logging.assert_not_called() 158 | 159 | @patch('google.cloud.logging') 160 | @patch('logging.info') 161 | def test_record_metrics_shim_not_parsable(self, mock_logging, mock_google_logging): 162 | os.environ["METRICS_SAMPLE_RATE"] = "1000" 163 | record_metrics(f'2,shim,bar', '2024-04-24T21:02:18.123456Z', "firefox-desktop", 125) 164 | mock_logging.assert_not_called() 165 | 166 | @patch('google.cloud.logging') 167 | @patch('logging.info') 168 | def test_record_metrics_timestamp_not_parsable(self, mock_logging, mock_google_logging): 169 | os.environ["METRICS_SAMPLE_RATE"] = "1000" 170 | shim = make_encoded_shim(1713971071000) 171 | record_metrics(f'2,{shim},bar', 'invalid-timestamp', "firefox-desktop", 125) 172 | mock_logging.assert_not_called() 173 | 174 | @patch('app.telemetry.handler.get_now', mock.MagicMock(return_value=datetime(2024, 4, 25, 15, 4, 44, 686803, tzinfo=timezone.utc))) 175 | @patch('google.cloud.logging') 176 | @patch('logging.info') 177 | def test_log_metrics_sample_rate_included(self, mock_logging, mock_google_logging): 178 | # seed random to ensure random sample is included 179 | random.seed(0) 180 | os.environ["METRICS_SAMPLE_RATE"] = "900" 181 | shim = make_encoded_shim(1713971071000) 182 | record_metrics(f'2,{shim},bar', '2024-04-24T21:02:18.123456Z', "firefox-desktop", 125) 183 | json_fields = { 184 | "glean_latency": 64946563, 185 | "adserver_latency": 86413686, 186 | "namespace": "firefox-desktop", 187 | "user_agent_version": 125 188 | } 189 | mock_logging.assert_called_once_with("metrics", extra={"json_fields": json_fields}) 190 | --------------------------------------------------------------------------------