├── .circleci
└── config.yml
├── .editorconfig
├── .eslintrc.yaml
├── .gitignore
├── .npmrc
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── bin
└── client.js
├── config
├── dev.yml
├── dynamic.js
└── production.yml
├── docs
├── metrics.md
├── requests-and-responses.md
└── systems_diagram.png
├── functions
├── accept-test.js
├── accept.js
├── heartbeat-test.js
├── heartbeat.js
├── mockEndpoints.js
├── periodicMetrics-test.js
├── periodicMetrics.js
├── processQueueItem-test.js
├── processQueueItem.js
├── version-test.js
└── version.js
├── lib
├── constants.js
├── logging.js
├── metrics-test.js
├── metrics.js
├── sentry.js
├── test-setup.js
└── utils.js
├── package-lock.json
├── package.json
├── renovate.json
├── serverless.local.yml-dist
└── serverless.yml
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | test:
4 | docker:
5 | - image: circleci/node:8.11.4@sha256:372dbff602066c00561c11b8a19d85665001b9c13ec82f87247e1910e9fbd63b
6 | working_directory: ~/repo
7 | steps:
8 | - checkout
9 | - restore_cache:
10 | keys:
11 | - v1-dependencies-{{ checksum "package.json" }}
12 | - v1-dependencies-
13 | - run: npm install
14 | - save_cache:
15 | paths:
16 | - node_modules
17 | key: v1-dependencies-{{ checksum "package.json" }}
18 | - run: npm run lint
19 | - run: npm test
20 | - persist_to_workspace:
21 | root: .
22 | paths:
23 | - ./*
24 | deploy_dev:
25 | docker:
26 | - image: circleci/node:8.11.4@sha256:372dbff602066c00561c11b8a19d85665001b9c13ec82f87247e1910e9fbd63b
27 | working_directory: ~/repo
28 | steps:
29 | - attach_workspace:
30 | at: .
31 | - run: npm run deploy:master
32 |
33 | workflows:
34 | version: 2
35 | test_and_deploy:
36 | jobs:
37 | - test
38 | - deploy_dev:
39 | requires:
40 | - test
41 | filters:
42 | branches:
43 | only: master
44 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | indent_style = space
6 | end_of_line = lf
7 | charset = utf-8
8 | trim_trailing_whitespace = true
9 | indent_size = 2
10 |
--------------------------------------------------------------------------------
/.eslintrc.yaml:
--------------------------------------------------------------------------------
1 | extends:
2 | - eslint:recommended
3 | - plugin:mozilla/recommended
4 | - plugin:node/recommended
5 |
6 | env:
7 | node: true
8 | es6: true
9 |
10 | parserOptions:
11 | ecmaVersion: 2017
12 |
13 | root: true
14 |
15 | rules:
16 | node/no-unpublished-require: off
17 | no-console: off
18 |
19 | overrides:
20 | -
21 | files:
22 | - '**/*-test.js'
23 | rules:
24 | import/unambiguous: off
25 | env:
26 | mocha: true
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .*.sw?
2 | *~
3 | *.log
4 | .serverless
5 | node_modules
6 | npm-debug.log
7 | serverless.local.yml
8 |
--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | save-exact=true
2 |
3 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Community Participation Guidelines
2 |
3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines.
4 | For more details, please read the
5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
6 |
7 | ## How to Report
8 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page.
9 |
10 |
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Mozilla
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # watchdog-proxy
2 |
3 | [](https://circleci.com/gh/mozilla/watchdog-proxy)
4 |
5 | This is a simple proxy which interfaces with Microsoft's [PhotoDNA Service](https://www.microsoft.com/en-us/photodna).
6 |
7 | ## Systems Diagram
8 |
9 |
10 |
11 | ### Quick summary of operation
12 |
13 | 1. A third-party Consumer sends an HTTP POST request to the AWS API gateway to invoke the Accept lambda function
14 | 1. The Accept function authenticates the Consumer's credentials supplied via Hawk against a DynamoDB table
15 | 1. If the credentials & parameters are valid, details of the Consumer's submission are sent to the SQS queue and the uploaded image is saved in a private S3 bucket.
16 | 1. An Event Processor lambda function is invoked by [the AWS queue poller][poller] for each SQS message
17 | 1. The Event Processor function calls the upstream web service (i.e. PhotoDNA) with the details of a Consumer submission
18 | 1. On a response from the upstream web service, the Event Processor makes a request back to a URL included in the Consumer submission
19 | 1. Finally, on success, the Event Processor deletes the message from the SQS queue to acknowledge completion
20 |
21 | Note: images in the S3 bucket are not currently deleted, though objects in the bucket have a 30-day expiration
22 |
23 | [poller]: https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html
24 |
25 | ## Development
26 |
27 | ### Useful NPM scripts
28 |
29 | - `npm run lint` - check JS syntax & formatting
30 | - `npm run test` - run JS tests
31 | - `npm run watch` - start a file watcher that runs tests & lint
32 | - `npm run prettier` - clean up JS formatting
33 | - `npm run deploy` - deploy a stack configured for production
34 | - `npm run deploy:dev` - deploy a stack configured for development (e.g. with `ENABLE_DEV_AUTH=1`)
35 | - `npm run info` - display information about the currently deployed stack (e.g. handy for checking the stack's API URL)
36 | - `npm run logs -- -f accept -t` - watch logs for the function `accept`
37 | - `npm run client -- [--id --key --url ]` - make an authenticated request, defaults to an auto-detected service URL for your stack with credentials devuser / devkey
38 | - `npm run client -- --url https://watchdog-proxy.dev.mozaws.net` - make an authenticated request to the dev stack
39 | - `npm run client -- --help` - see further options accepted by the client
40 |
41 | ### Quickstart Notes
42 |
43 | First, ensure [node.js 8.11.1](https://nodejs.org/en/) or newer is installed. Then, the steps to get started look something like this:
44 | ```
45 | git clone git@github.com:mozilla/watchdog-proxy.git
46 | cd watchdog-proxy
47 | npm install
48 | npm start
49 | ```
50 |
51 | After cloning the repository and installing dependencies, `npm start` will launch several file watchers that build assets as needed, run unit tests, and check code quality as you edit files.
52 |
53 | For local development, create your own version of `serverless.local.yml`:
54 | 1. Copy `serverless.local.yml-dist` to `serverless.local.yml`
55 | 1. Edit `serverless.local.yml`
56 | 1. Change at least the `stage` property to a name that's unique to you
57 |
58 | You don't need to create this file if you intend to use environment variables to configure deployment. In fact, this file will override many environment variables if you use it.
59 |
60 | The next step is to get the service running on AWS. You'll need to [sign up for an account](https://aws.amazon.com/) or [request a Dev IAM account from Mozilla Cloud Operations](https://mana.mozilla.org/wiki/display/SVCOPS/Requesting+A+Dev+IAM+account+from+Cloud+Operations). (The latter is available only to Mozillians.)
61 |
62 | Optional: [Install AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/installing.html). This gives you tools to work with AWS from the command line.
63 |
64 | If you already have an AWS key ID and secret, [you can follow the quick start docs for Serverless to configure your credentials](https://serverless.com/framework/docs/providers/aws/guide/credentials#quick-setup)
65 |
66 | If you don't already have an AWS key ID and secret, [follow the guide to acquire and configure these credentials](https://serverless.com/framework/docs/providers/aws/guide/credentials/).
67 |
68 | Try deploying the service to AWS:
69 | ```
70 | npm run deploy:dev
71 | ```
72 |
73 | You should see output like the following:
74 | ```
75 | $ npm run deploy:dev
76 | Serverless: Packaging service...
77 | Serverless: Excluding development dependencies...
78 | Serverless: Creating Stack...
79 | Serverless: Checking Stack create progress...
80 | .....
81 | Serverless: Stack create finished...
82 | Serverless: Uploading CloudFormation file to S3...
83 | Serverless: Uploading artifacts...
84 | Serverless: Uploading service .zip file to S3 (6.39 MB)...
85 | Serverless: Validating template...
86 | Serverless: Updating Stack...
87 | Serverless: Checking Stack update progress...
88 | ...........................................................................
89 | Serverless: Stack update finished...
90 | Service Information
91 | service: watchdog-proxy
92 | stage: lmorchard
93 | region: us-east-1
94 | stack: watchdog-proxy-lmorchard
95 | api keys:
96 | None
97 | endpoints:
98 | GET - https://30r00qsyhf.execute-api.us-east-1.amazonaws.com/lmorchard/accept
99 | functions:
100 | accept: watchdog-proxy-lmorchard-accept
101 | pollQueue: watchdog-proxy-lmorchard-pollQueue
102 | processQueueItem: watchdog-proxy-lmorchard-processQueueItem
103 | ```
104 |
105 | If everything was successful, you should now have a running stack with an HTTPS resource to accept requests listed as one of the endpoints. Copy the listed endpoint URL and keep it handy.
106 |
107 | To send your first request, use the `client` script with the GET endpoint URL:
108 | ```
109 | npm run client
110 | ```
111 |
112 | With no options, this command should attempt to auto-detect the endpoint URL for your deployed stack. You can check to see the results of this request working its way through the stack with the following log commands:
113 | ```
114 | # Client request is accepted into the queue
115 | npm run logs -- -f accept
116 | # Client request is received from the queue
117 | npm run logs -- -f pollQueue
118 | # Queued job is processed
119 | npm run logs -- -f processQueueItem
120 | # Upstream service receives a request
121 | npm run logs -- -f mockUpstream
122 | # Client callback service receives a negative result
123 | npm run logs -- -f mockClientNegative
124 | # Client callback service receives a positive result
125 | npm run logs -- -f mockClientPositive
126 | ```
127 |
128 | If you want to remove this stack from AWS and delete everything, run `npm run remove`
129 |
130 | The [Serverless docs on workflow are useful](https://serverless.com/framework/docs/providers/aws/guide/workflow/).
131 |
132 | ### Custom stable domain name for local development
133 |
134 | By default, no custom domain name is created. You can use the semi-random domain name serverless offers on deployment and with `serverless info`.
135 |
136 | If you want to create a domain name for local development (e.g. `watchdog-proxy-lmorchard.dev.mozaws.net`):
137 |
138 | 1. Edit your `serverless.local.yml` to contain an enabled `customDomain` section with appropriate details
139 | 1. Run `npx serverless create_domain` - this only needs to be done once, to create the new custom domain name in Route53 and an accompanying CloudFront distribution
140 | 1. Run `npm run deploy:dev` to update your stack
141 |
142 | Read this Serverless Blog post for more details: https://serverless.com/blog/serverless-api-gateway-domain/
143 |
144 | ## Deployment
145 |
146 | ### Environment variables
147 |
148 | When using `serverless deploy` to deploy the stack, you can use several environment variables to alter configuration. Note that many of these are currently overridden by a `serverless.local.yml` file, if present.
149 |
150 | - `LOG_LEVEL` - (default: "info") one of the following severity levels for log verbosity in increasing order: critical, error, warn, info, debug, verbose, trace
151 | - `LOG_DEBUG` - (default: "0") set to "1" to debug logging itself
152 | - `LOG_FORMAT` - (default: "heka") set to "pretty" for human-readable, "heka" for JSON format readable by Heka
153 | - `STAGE` - Stage for building and deploying - one of `dev`, `stage`, or `production`
154 | - `NODE_ENV` - Use `production` for a more optimized production build, `development` for a development build with more verbose logging and other conveniences
155 | - `PREFIX` - a prefix string used in constructing the names of resources and functions, by default a combination of service and stage names
156 | - `HITRATE_TABLE` - name of the DynamoDB table used for tracking rate limiting, overrides the automatically named default value
157 | - `CREDENTIALS_TABLE` - name of the DynamoDB table containing user credentials, overrides the automatically named default value
158 | - `QUEUE_NAME` - name of the SQS queue used to manage incoming jobs, overrides the automatically named default value
159 | - `QUEUE_ARN` - ARN of the SQS queue used to manage incoming jobs - might seem redundant with `QUEUE_NAME`, but it's required for `production` stage to trigger the `processQueueItem` function with SQS events (done automatically for `dev` stage)
160 | - `CONTENT_BUCKET` - name of the S3 bucket used for storing images and job results, overrides the automatically named default value
161 | - `GIT_COMMIT` - The value reported by the `__version__` resource as `commit`. If not set, Serverless config will attempt to run the `git` command to discover the current commit.
162 | - `EMAIL_FROM` - email address from which alerts on positive matches are sent, *must be verified in AWS SES*
163 | - `EMAIL_TO` - email address to which all alerts on positive matches will be sent (along with positive_email parameter in requests), blank by default
164 | - `EMAIL_EXPIRES` - number of seconds for which links in a positive alert email should remain valid, defaults to one month (2592000 seconds)
165 | - `UPSTREAM_SERVICE_URL` - the URL of the production upstream web service (i.e. PhotoDNA)
166 | - `UPSTREAM_SERVICE_KEY` - the private subscription key for the upstream web service
167 | - `ENABLE_DEV_AUTH=1` - This enables a hardcoded user id / key for development (off by default)
168 | - `DISABLE_AUTH_CACHE=1` - Authentication credentials are cached in memory in the `accept` API function. This lasts until AWS recycles the container hosting the function. Setting this variable disables the cache.
169 | - `MOCK_POSITIVE_CHANCE` - a number from 0.0 to 1.0 representing the probability that the mock upstream endpoint will respond with a positive match
170 | - `METRICS_URL` - Override for Ping Centre service URL used for internal metrics. By default, the stage or production Ping Centre URL is used based on `NODE_ENV`
171 | - `METRICS_PING_PERIOD` - Number of milliseconds to wait between periodic metrics pings reporting on queue status (default: 1000)
172 | - `SENTRY_DSN` - [DSN for use with Sentry error reporting](https://docs.sentry.io/clients/node/config/#environment-variables)
173 |
174 | You can see these variables used by scripts defined in `package.json` for development convenience.
175 |
176 | #### Production deployment
177 |
178 | For a production deployment involving just the functions and no other resources, define most resources using environment variables like so:
179 |
180 | ```
181 | STAGE=production \
182 | NODE_ENV=production \
183 | PREFIX=watchdog \
184 | HITRATE_TABLE=$PREFIX-hitrate \
185 | CREDENTIALS_TABLE=$PREFIX-credentials \
186 | CONTENT_BUCKET=watchdog-content \
187 | QUEUE_NAME=$PREFIX-messages \
188 | QUEUE_ARN=arn:aws:sqs:$AWS_REGION:$AWS_ACCOUNT_ID:$QUEUE_NAME \
189 | UPSTREAM_SERVICE_URL=https://api.microsoftmoderator.com/photodna/v1.0/Match \
190 | UPSTREAM_SERVICE_KEY={secret service key} \
191 | npm run deploy
192 | ```
193 |
194 | This should select [the `production` config settings](./config/production.yml), which defines functions but omits resources or IAM statements. So, all those dependencies should be created separately and identified via environment variables.
195 |
196 | ## Managing client credentials
197 |
198 | Managing client credentials for [Hawk authentication][] is currently a manual
199 | process of direct edits to the Credentials table in Dynamo DB - no tooling has
200 | been created for this yet.
201 |
202 | Each item in the Credentials table consists of these properties:
203 |
204 | * `id` -the client's ID
205 | * `key` - a secret key
206 | * `algorithm` - the HMAC hash algorithm to use
207 |
208 | The `id` and `key` properties are mandatory and should both be unique values.
209 | Neither of these are hashed, since the Hawk algorithm requires access to them
210 | in plaintext. The `id` value will appear in log messages and metric pings, but
211 | `key` should remain secret.
212 |
213 | The `algorithm` property is optional - if empty, a default of "`sha256`" will
214 | be used. The `algorithm` used should be given to the client implementor along with
215 | the `id` and `key`, because it is necessary as part of constructing a Hawk auth
216 | request.
217 |
218 | The set of alternate values for `algorithm` are described as part of the
219 | [`crypto.createHmac` function](https://nodejs.org/api/crypto.html#crypto_crypto_createhmac_algorithm_key_options) supplied by node.js - TL;DR:
220 |
221 | > The algorithm is dependent on the available algorithms supported by the version of OpenSSL on the platform. Examples are 'sha256', 'sha512', etc. On recent releases of OpenSSL, openssl list -digest-algorithms (openssl list-message-digest-algorithms for older versions of OpenSSL) will display the available digest algorithms.
222 |
223 | [hawk authentication]: https://github.com/hueniverse/hawk#readme
224 |
--------------------------------------------------------------------------------
/bin/client.js:
--------------------------------------------------------------------------------
1 | const fs = require("fs");
2 | const Hawk = require("hawk");
3 | const request = require("request-promise-native");
4 | const program = require("commander");
5 | const packageData = require("../package.json");
6 |
7 | let endpointURL = null;
8 |
9 | async function main() {
10 | program
11 | .version(packageData.version)
12 | .usage("[options]")
13 | .option("-U, --url ", "Service base URL (auto-detected by default)")
14 | .option("-u, --id ", "User id")
15 | .option("-k, --key ", "User key")
16 | .option("-i, --image ", "Image path")
17 | .option("-n, --notes ", "Notes text")
18 | .option("-N, --negative ", "Negative URL")
19 | .option("-P, --positive ", "Positive URL")
20 | .option("-e, --email ", "Email for positives")
21 | .parse(process.argv);
22 |
23 | const endpointURL = await discoverEndpointURL(program);
24 | let url = `${endpointURL}/accept`;
25 |
26 | let negative_uri = program.negative;
27 | if (!negative_uri) {
28 | negative_uri = `${endpointURL}/mock/client/negative`;
29 | }
30 |
31 | let positive_uri = program.positive;
32 | if (!positive_uri) {
33 | positive_uri = `${endpointURL}/mock/client/positive`;
34 | }
35 |
36 | if (!url || !negative_uri || !positive_uri) {
37 | console.error("Missing required URL");
38 | program.outputHelp();
39 | return "";
40 | }
41 |
42 | const { header: Authorization } = Hawk.client.header(url, "POST", {
43 | credentials: {
44 | id: program.id || "devuser",
45 | key: program.key || "devkey",
46 | algorithm: "sha256",
47 | },
48 | });
49 |
50 | const formData = {
51 | image: program.image ? fs.createReadStream(program.image) : DEFAULT_IMAGE,
52 | negative_uri,
53 | positive_uri,
54 | };
55 | if (program.email) {
56 | formData.positive_email = program.email;
57 | }
58 | if (program.notes) {
59 | formData.notes = program.notes;
60 | }
61 |
62 | return request({
63 | method: "POST",
64 | url,
65 | headers: { Authorization },
66 | formData,
67 | });
68 | }
69 |
70 | async function discoverEndpointURL(program) {
71 | if (program.url) {
72 | return program.url;
73 | }
74 | if (!endpointURL) {
75 | // Attempt to discover the current stack's accept URL if missing option
76 | const Serverless = require("serverless");
77 | const serverless = new Serverless({ interactive: false });
78 | await serverless.init();
79 | await serverless.variables.populateService();
80 | const stackName = serverless.providers.aws.naming.getStackName();
81 | const stackInfo = await serverless.providers.aws.request(
82 | "CloudFormation",
83 | "describeStacks",
84 | { StackName: stackName }
85 | );
86 | stackInfo.Stacks[0].Outputs.forEach(({ OutputKey, OutputValue }) => {
87 | if (OutputKey === "ServiceEndpoint") {
88 | endpointURL = OutputValue;
89 | console.log(`Discovered endpoint URL: ${endpointURL}`);
90 | }
91 | });
92 | }
93 | if (!endpointURL) {
94 | throw "Could not discover endpoint URL";
95 | }
96 | return endpointURL;
97 | }
98 |
99 | // NOTE: This is a tiny image of lmorchard's face used if --image not supplied
100 | // (this is probably overkill)
101 | const DEFAULT_IMAGE = Buffer.from(
102 | "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABmJLR0QA/wD/AP+gvaeTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH3wEDFCMicI+/LQAAAB1pVFh0Q29tbWVudAAAAAAAQ3JlYXRlZCB3aXRoIEdJTVBkLmUHAAAKeElEQVRYw3WWyY+kyVmHnzeWb8nMLzNr33qZdvfsC8YzY8/AGBnLLPYBiRsWB+DGX4FaPnD0gTM3xImThYTBSGBxAJlZPMN4mJ6aqe7p6qWqu5asrMz81lg45CBxIaRQHEIRb8Qv3njen/z5n/5F1DZBRDDGopSAaKwWQKO0ICJAQClNDAHnHQrBaE0kgCiUCCIQgRDishOIMRK8EONyXfTLuYjDeY9c230+EjWiIkoJKgpRLTdDhCgGpQISBCOK1kV815BYjUlSfHSoaFEx4sUR8RCEQCBEQEAChAhBAgQBQElAIohVOkZRiAjLmJEY1VejoFAEHbFYrFKECKIUPgSMSkiTDC2axBpiBNc21K6lCxWtc8TYEflKnSCIihAjIhAwSGqTGGNEYgQRRBQRUAKCoESBRGJQIEKiNSKaLB1gtULrDJGIkYQQI9CijEErw2R2znR+tlRBhBDAhY5IJBWDMQbZKdai0oLoiBYDKGzURBEumjlN2+JjQIkisxkaRT8vsFlK27QkNkPC/z5XgAAeR54NCQhGNYhb0DQ1EsHHgA+OVKcoAmaoFUQP3mAiSBIIKIJJiJXGRUhEU/RGZGmGeIVJEuq6ppcW1K4kuI6AgI8EcSCayITRcJNMQwcMdELb1YSo6XxFCAGFwvSMIsSAxIgxioDQieKyKfE0aIHEWsb9EcOsoOglXJQlhoCEDhsjNssxOsUHR9u1NM6R6hyjFM4bptMjNooCIxonBukCBkuWZhhtBN8KOkLwghO4dC21a4hOGOYjxv2CF67c5Ldee5Wi1+fjg32eTCaMBgPuHz/g3uP7eNWgVEqa9CgKy3Q2o5rP6I3HjFe3IbTLpO5m9K0hRKFuZsjbu7tRaYsWRescVee4CJbLak6Ins3xDm8++wJ/9N3fZW9rC1sUNAoWviMfFRw/eMjdz7/g3V9+wOGjR1w0y+zXGmLwKGMYFuv0jCc0FfgWEwMoCCGgr42K2yIKa8EaQ9CW86rCR8/qcMjGeJ3vf/Ntbu3uMRiNsGtDsrUhxdVNettj1p7Z4earL/D6y69xY7TJ5cWU0+kEiDRdS0BIbQaiGa0/g+2NObooseIgdBjnPFopgjYYK1Rlw6KekaUF1ozItKZIEjQRjSe6DskKJDfL76UEkycMb+7w+voKKxurrPz85/zyzieUTUnwniRNMNbwwz/7E4SGH//4r/DlAmsFk5gERAOglSWzHZnR+Oho2gqR4RKzWoEopJcj/QQUxBiXjJCIsoKs5zzzrVf5g/6AKIH/+Ohder0BiVXkecrGZsHZaUuR96kmHS5LUfmgwFqNaIM2lrVizMZwxMDmpDZBiSaIIElCTAxiDRGACCrCkqxfwQbsWo+rv/Ysb77xFiEojF5eri4rJqenTM4uKMsZkmqyTGG00QgWbTXWJCBQ5BmzZkHdlIQYaEOk85FoEzBmGen/aaIEM0pIixQlwnh1zGw+Z7GY8V/vf8zFdEqILcNeH6PAnJUVF7M5EMhtn7WBYT6vmJZTUpMS2eHB02PquuWZ8ha3iiGmH4kiSwUA1P85gAiiBdM3eP8VdrM+iTR88O8/I/oWaWcstKUSwbi2oetq6tZRmw7f5ASlGaY5WdLDojh4+CXn4xkf3rtH8d4veOsbb3DjxjVGm+tUk0tq8czmFSpAlqf0Vgf4TvA+0nQd1vZ5etIhNod2xnQ6YVkAQRKTRmJESEA8PZPw8t4WjYdp6bh19Qob6xuMemMmM0fdwdpwwEpf2Nvbo1xUzMqWyxp07klDYHN7mw/e/5jz6RNimiDdgqOnR2htWXQVeWIIaGblJQag31/DqpyqmyES6bwHLbShIQmWsoq8cXOL5PoQk/QwBA6P7/N3P/kJw5Uhf/i936OtBZNnrG6M2H3pOl/ePWB39xXuPjnCNg0TkyCqIdicPC+4aCNV6TE3rr7C0cljtLSsDDcoyymda0l0j9xkzOsF2+tbpGnB9e0tetmQrmv5bP8Of/mjH3Fw9Jibm9uoLpAXOb2b21zOLsnyIeiOpilxraN1JUorJGooa9aNohgY1PHJETFEQh1IXR/lEiZVg7aaqyurSISNok+UIbXPqELk4PARL9+8wdramNe/+wYHh6dkKyvY4ZighcWk4jffeQsf4fzkhKat6FxL3XjmDZzMIvdOLjk8n2Iy1cOqDK9rLpoTLtszGufxLlD0UpQZ8M3EspELV9ZXUD2L7Wqqco6IYPOEX//Oy5x9eoRxffIctp/bYVFecvT0lCAe13XgIyuDlKQYUPpAdXlJ9ArTuQ6HZ9FOSUyytEtoeiZha7zC01mDD4HBYECWJaAVwTuMtkhiQAnJKGd7bwO1N0KyJSOePjzmwdFjOudx0eG8o1vMyYxCfKSIjpPaY1IpmLXnrI7GlFVNJJKYFKME13qIkUenJ9ShparmtPOOo7MTzudTRg+uoi4XxGlJf9BH5i2kCe28Zf+TuyzKOVU5IcsNWoTL+QV1XSGdR3RH1zmM2Ijxnq6OuC4g0ZAazeb6Kr5r0Dpy//ghj8+f8vG9fQ4O75NmfWKEJ//0U5I0Z1gMubp3jWvsUtgNDt7f57/37zCdnhJiQ0x7rKys8GQ2R1yLjZ7ohCSA7iXj216ga1pefu1NggSUq1kpenTlgjKmzMqWcX/ALz76kLJuoO64Ot5iYDLaqubR6RH7D77g+NExW9mQO/uf8dN/+QdOpsegNYUxGJ1wPj2jCzDTmjbLqLMck8QaozWVgaPDz0hTixOhrFqaoAhKmDcVH+5/ypW1Tb7+4oukrfC10Q7D1U1mzjGLNR/f/YjZ9IKDe/f58FefMi0XGDtCqxQXZMkVmy29Y5YwGqzinUPfXO3fzm2gsAFiTec6nEoofeR4csH55Sl9ERrX0Rv0efXWc5zNp9TWsBDHQtVs724Sicybiouq4u3f/g0uPrzPw/k5yhaobEyapizKOXPnaJsOV88p60v07qh327We2mucHtNEuJifcLGY0vmWjWh5dmuP9a1t9g8P2VldZ3djh9SmjPoFveGInRvX+OTOAR99/gWvfesN3vn+W+weBi7u3eNg8QhHijMr5GlKLi0DFUmVYWAtalYHKt3Djq+ze+0VZuUlrWuJMZCKsCmWLAov3XieK5u7vHvngPsnE0bFgJ3tTXZ29jj48phfffEl73z723zn99+mnS7oHjzlVjLmmS7SLZ6ymF8gvR16w69hbIrgcd6h11eu317bfIXfefuH/Nt7/8jl/JgYPYIhFcVOzGjmC67sXePF51/iyaxksphzNnnKvaPHtOmAT/YPeO65a/zgj7+HxMj+X/8rs88fcjY/53R+yjRWNMaS2AGkBcGuonRCV5Xo17/xg9vF2hp//7O/5Xx2SBQPMXxV2zVbJFgUZ0ePSZOEq1eu0oZA6yLapPR6fa7vrPH1V59n9uCIO3/zz5z/52fE4Dm5PONJc8oxjtK3JOkIEZjP53hyktEOsrX2bJxW53RthRCIRGLwCBqthGdjj03Spf8zQlaMGG5usrG5xbgYM8gH+FlNczZBJhWpg8L0UBL5/OwBH02/4D2pqGLL+uoL5L11qroGAqnN+B9wdEpntVlsVgAAAABJRU5ErkJggg==",
103 | "base64"
104 | );
105 |
106 | main()
107 | .then(console.log)
108 | .catch(err => console.error("ERROR", err.message));
109 |
--------------------------------------------------------------------------------
/config/dev.yml:
--------------------------------------------------------------------------------
1 | custom:
2 | resourceNames:
3 | hitrate: ${self:custom.prefix}-hitrate
4 | credentials: ${self:custom.prefix}-credentials
5 | sqs: ${self:custom.prefix}-messages
6 | contentBucket: ${self:custom.prefix}-content
7 | process: ${self:custom.prefix}-processQueueItem
8 |
9 | customDomain:
10 | domainName: ${self:service}.dev.mozaws.net
11 | certificateArn: 'arn:aws:acm:us-east-1:927034868273:certificate/61e462bd-410e-48b6-95da-fa5501430d1d'
12 | basePath: ''
13 | stage: ${self:provider.stage}
14 | createRoute53Record: true
15 | enabled: true
16 |
17 | remover:
18 | buckets:
19 | - ${self:custom.resourceNames.contentBucket}
20 |
21 | provider:
22 | iamRoleStatements:
23 | - Effect: Allow
24 | Action:
25 | - ses:SendEmail
26 | Resource:
27 | - arn:aws:ses:*:*:*
28 | - Effect: Allow
29 | Action:
30 | - dynamodb:GetItem
31 | - dynamodb:PutItem
32 | - dynamodb:DeleteItem
33 | - dynamodb:UpdateItem
34 | - dynamodb:Query
35 | - dynamodb:Scan
36 | Resource:
37 | - arn:aws:dynamodb:*:*:table/${self:custom.resourceNames.credentials}
38 | - arn:aws:dynamodb:*:*:table/${self:custom.resourceNames.hitrate}
39 | - Effect: Allow
40 | Action:
41 | - lambda:InvokeFunction
42 | Resource: arn:aws:lambda:*:*:function:${self:custom.resourceNames.process}
43 | - Effect: Allow
44 | Action:
45 | - sqs:ChangeMessageVisibility
46 | - sqs:ChangeMessageVisibilityBatch
47 | - sqs:DeleteMessage
48 | - sqs:DeleteMessageBatch
49 | - sqs:GetQueueAttributes
50 | - sqs:GetQueueUrl
51 | - sqs:ReceiveMessage
52 | - sqs:SendMessage
53 | - sqs:SendMessageBatch
54 | Resource: arn:aws:sqs:*:*:${self:custom.resourceNames.sqs}
55 | - Effect: Allow
56 | Action:
57 | - s3:GetObject
58 | - s3:PutObject
59 | - s3:PutObjectAcl
60 | - s3:DeleteObject
61 | - s3:ListBucket
62 | Resource:
63 | - arn:aws:s3:::${self:custom.resourceNames.contentBucket}
64 | - arn:aws:s3:::${self:custom.resourceNames.contentBucket}/*
65 |
66 | resources:
67 | Resources:
68 |
69 | S3BucketContent:
70 | Type: AWS::S3::Bucket
71 | Properties:
72 | BucketName: ${self:custom.resourceNames.contentBucket}
73 | LifecycleConfiguration:
74 | Rules:
75 | - Id: DailyCleanup
76 | Status: Enabled
77 | ExpirationInDays: 180
78 |
79 | Messages:
80 | Type: AWS::SQS::Queue
81 | Properties:
82 | QueueName: ${self:custom.resourceNames.sqs}
83 | MessageRetentionPeriod: 1209600
84 | VisibilityTimeout: 60
85 | RedrivePolicy:
86 | deadLetterTargetArn:
87 | Fn::GetAtt:
88 | - MessagesDeadLetterQueue
89 | - Arn
90 | maxReceiveCount: 3
91 |
92 | MessagesDeadLetterQueue:
93 | Type: AWS::SQS::Queue
94 | Properties:
95 | QueueName: ${self:custom.resourceNames.sqs}-dead-letter-queue
96 | MessageRetentionPeriod: 1209600
97 |
98 | HitRate:
99 | Type: AWS::DynamoDB::Table
100 | Properties:
101 | TableName: ${self:custom.resourceNames.hitrate}
102 | AttributeDefinitions:
103 | - AttributeName: id
104 | AttributeType: S
105 | KeySchema:
106 | - AttributeName: id
107 | KeyType: HASH
108 | ProvisionedThroughput:
109 | ReadCapacityUnits: 5
110 | WriteCapacityUnits: 5
111 | TimeToLiveSpecification:
112 | AttributeName: expiresAt
113 | Enabled: true
114 |
115 | Credentials:
116 | Type: AWS::DynamoDB::Table
117 | Properties:
118 | TableName: ${self:custom.resourceNames.credentials}
119 | AttributeDefinitions:
120 | - AttributeName: id
121 | AttributeType: S
122 | KeySchema:
123 | - AttributeName: id
124 | KeyType: HASH
125 | ProvisionedThroughput:
126 | ReadCapacityUnits: 5
127 | WriteCapacityUnits: 5
128 |
129 | functions:
130 |
131 | version:
132 | handler: functions/version.handler
133 | name: ${self:custom.prefix}-version
134 | environment: ${self:custom.fnEnv}
135 | events:
136 | - http:
137 | path: __version__
138 | method: get
139 |
140 | heartbeat:
141 | handler: functions/heartbeat.handler
142 | name: ${self:custom.prefix}-heartbeat
143 | environment: ${self:custom.fnEnv}
144 | events:
145 | - http:
146 | path: __heartbeat__
147 | method: get
148 |
149 | periodicMetrics:
150 | timeout: 60
151 | handler: functions/periodicMetrics.handler
152 | name: ${self:custom.prefix}-periodicMetrics
153 | environment: ${self:custom.fnEnv}
154 | events:
155 | - schedule: rate(1 minute)
156 |
157 | accept:
158 | handler: functions/accept.post
159 | name: ${self:custom.prefix}-accept
160 | environment: ${self:custom.fnEnv}
161 | events:
162 | - http:
163 | path: accept
164 | method: post
165 |
166 | processQueueItem:
167 | timeout: 60
168 | handler: functions/processQueueItem.handler
169 | name: ${self:custom.resourceNames.process}
170 | environment: ${self:custom.fnEnv}
171 | reservedConcurrency: 5
172 | events:
173 | - sqs:
174 | batchSize: 5
175 | arn:
176 | Fn::GetAtt:
177 | - Messages
178 | - Arn
179 |
180 | # TODO: Find a way to exclude these functions from prod deployments
181 | # See https://stackoverflow.com/questions/47718004/exclude-lambda-function-from-deploy-to-a-particular-stage
182 | mockUpstream:
183 | handler: functions/mockEndpoints.upstreamPost
184 | name: ${self:custom.prefix}-mockUpstream
185 | environment: ${self:custom.fnEnv}
186 | events:
187 | - http:
188 | path: mock/upstream
189 | method: post
190 |
191 | mockClientNegative:
192 | handler: functions/mockEndpoints.clientNegativePost
193 | name: ${self:custom.prefix}-mockClientNegative
194 | environment: ${self:custom.fnEnv}
195 | events:
196 | - http:
197 | path: mock/client/negative
198 | method: post
199 |
200 | mockClientPositive:
201 | handler: functions/mockEndpoints.clientPositivePost
202 | name: ${self:custom.prefix}-mockClientPositive
203 | environment: ${self:custom.fnEnv}
204 | events:
205 | - http:
206 | path: mock/client/positive
207 | method: post
208 |
209 | mockLogPost:
210 | handler: functions/mockEndpoints.logPost
211 | name: ${self:custom.prefix}-mockLogPost
212 | events:
213 | - http:
214 | path: mock/log
215 | method: post
216 |
--------------------------------------------------------------------------------
/config/dynamic.js:
--------------------------------------------------------------------------------
1 | const { promisify } = require("util");
2 | const { exec: execOrig } = require("child_process");
3 | const exec = promisify(execOrig);
4 |
5 | module.exports = async () => {
6 | let GIT_COMMIT;
7 | if (process.env.GIT_COMMIT) {
8 | ({ GIT_COMMIT } = process.env);
9 | } else {
10 | ({ stdout: GIT_COMMIT = "" } = await exec(
11 | 'git --no-pager log --format=format:"%H" -1'
12 | ));
13 | }
14 |
15 | return {
16 | GIT_COMMIT,
17 | };
18 | };
19 |
--------------------------------------------------------------------------------
/config/production.yml:
--------------------------------------------------------------------------------
1 | custom:
2 | customDomain:
3 | enabled: false
4 |
5 | resourceNames:
6 | hitrate: ${env:HITRATE_TABLE}
7 | credentials: ${env:CREDENTIALS_TABLE}
8 | sqs: ${env:QUEUE_NAME}
9 | sqsArn: ${env:QUEUE_ARN}
10 | contentBucket: ${env:CONTENT_BUCKET}
11 | process: ${self:custom.prefix}-processQueueItem
12 |
13 | remover: {}
14 |
15 | resources:
16 | Resources: {}
17 |
18 | provider:
19 | iamRoleStatements: {}
20 |
21 | functions:
22 |
23 | version:
24 | handler: functions/version.handler
25 | name: ${self:custom.prefix}-version
26 | environment: ${self:custom.fnEnv}
27 | events:
28 | - http:
29 | path: __version__
30 | method: get
31 |
32 | heartbeat:
33 | handler: functions/heartbeat.handler
34 | name: ${self:custom.prefix}-heartbeat
35 | environment: ${self:custom.fnEnv}
36 | events:
37 | - http:
38 | path: __heartbeat__
39 | method: get
40 |
41 | accept:
42 | handler: functions/accept.post
43 | name: ${self:custom.prefix}-accept
44 | environment: ${self:custom.fnEnv}
45 | events:
46 | - http:
47 | path: accept
48 | method: post
49 |
50 | processQueueItem:
51 | timeout: 60
52 | handler: functions/processQueueItem.handler
53 | name: ${self:custom.resourceNames.process}
54 | environment: ${self:custom.fnEnv}
55 | reservedConcurrency: 5
56 | events:
57 | - sqs:
58 | batchSize: 5
59 | arn: ${self:custom.resourceNames.sqsArn}
60 |
--------------------------------------------------------------------------------
/docs/metrics.md:
--------------------------------------------------------------------------------
1 | # Watchdog Metrics
2 | *Last Update: 2018-06-08*
3 |
4 | ## Analysis
5 | Questions we want to answer with metrics data include:
6 |
7 | - Overall throughput performance:
8 | - Consumer submission to submission to PhotoDNA (time in queue)
9 | - Response from PhotoDNA (time waiting for reply)
10 | - Response to consumer API (time to reply)
11 | - The sum of the above to give an easy health measure
12 | - Throughput data for positive identifications since they require
13 | manual intervention:
14 | - Number of positively flagged images
15 | - Breakdown of images not yet reviewed and under review
16 | - Number of images confirmed vs falsely identified
17 | - The number of items in the message queue
18 | - Total number of images processed
19 | - Breakdown of positive vs negative responses
20 |
21 | Each of these should be available globally, as well as broken down per consumer
22 | application.
23 |
24 |
25 | ## Collection
26 | This project uses Ping Centre to collect metrics data. Pings will be sent as
27 | JSON blobs. All pings will include the following fields:
28 | - *topic*: used by Ping Centre. In this case always "watchdog-proxy": string
29 | - *timestamp*: Using UNIX epoch time in milliseconds (i.e. `Date.now()` in JavaScript): number
30 |
31 |
32 | ## Events
33 | Additional fields submitted are described below.
34 |
35 | ### A new item is submitted from a consumer
36 | - *consumer_name*: the name of the consumer submitting the request: string
37 | - *event*: "new_item": string
38 | - *watchdog_id*: the ID assigned to the task: string
39 | - *type*: Content-Type of item submitted (eg. 'image/png' or 'image/jpg'): string
40 |
41 | Example:
42 | ```
43 | {
44 | "topic": "watchdog-proxy",
45 | "timestamp": "1534784298646",
46 |
47 | "consumer_name": "screenshots",
48 | "event": "new_item",
49 | "watchdog_id": "9ad08ec4-be1a-4327-b4ef-282bed37621f"
50 | "type": "image/png",
51 | }
52 | ```
53 |
54 | ### Periodic queue metrics
55 | The `periodicMetrics` function gets called via CloudWatch alarm every 60
56 | seconds and runs for most of 60 seconds before exiting. (This is a hack to work
57 | around lacking support for long-running functions in Amazon Lambda.)
58 |
59 | Metrics pings will be sent once per second (plus processing time) while the
60 | function is running. This period is configurable via the `METRICS_PING_PERIOD`
61 | environment variable during deployment.
62 |
63 | The metrics sent in the ping will contain:
64 | - *event*: "poller_heartbeat": string
65 | - *poller_id*: UUID given by Lambda to the current invocation of the `pollQueue` function
66 | - *items_in_queue*: Number of items in the queue before the worker removes any: integer
67 | - *items_in_progress*: Number of items being processed: integer
68 | - *items_in_waiting*: Number of items waiting to be queued: integer
69 |
70 | Example:
71 | ```
72 | {
73 | "topic": "watchdog-proxy",
74 | "timestamp": "1534784298646",
75 |
76 | "event": "poller_heartbeat",
77 | "poller_id": "31417de1-b3ef-4e90-be3c-e5116d459d1d",
78 | "items_in_queue": 1504,
79 | "items_in_progress": 22,
80 | "items_in_waiting": 38
81 | }
82 | ```
83 |
84 | ### A worker processes a queue item
85 | For *each* item fetched from the queue by the poller, the `processQueueItem` function will be invoked. That function, in turn, will send these metrics:
86 | - *event*: "worker_works": string
87 | - *worker_id*: UUID given by Lambda to the current invocation of the `processQueueItem` function
88 | - *consumer_name*: the ID of the consumer submitting the request: string
89 | - *watchdog_id*: the ID assigned to the task: string
90 | - *photodna_tracking_id*: ID from PhotoDNA: string
91 | - *is_match*: Whether the response was positive or negative: boolean
92 | - *is_error*: Was the response an error?: boolean
93 | - *timing_sent*: time (in ms) to send item to PhotoDNA: integer
94 | - *timing_received*: time (in ms) before response from PhotoDNA: integer
95 | - *timing_submitted*: time (in ms) to finish sending a response to consumer's report URL: integer
96 |
97 | Example:
98 | ```
99 | {
100 | "topic": "watchdog-proxy",
101 | "timestamp": "1534784298646",
102 |
103 | "event": "worker_works",
104 | "worker_id": "8cdb1e6b-7e15-489d-b171-e7a05781c5da",
105 | "consumer_name": "screenshots,
106 | "watchdog_id": "9ad08ec4-be1a-4327-b4ef-282bed37621f"
107 | "photodna_tracking_id": "1_photodna_a0e3d02b-1a0a-4b38-827f-764acd288c25",
108 | "is_match": false,
109 | "is_error": false,
110 |
111 | "timing_sent": 89,
112 | "timing_received": 161,
113 | "timing_submitted": 35
114 | }
115 | ```
116 |
--------------------------------------------------------------------------------
/docs/requests-and-responses.md:
--------------------------------------------------------------------------------
1 | # Request and Response Formats
2 |
3 | This document lists the request and response body formats _sent from_ Watchdog to its consumers.
4 |
5 | ## Responses From /accept
6 |
7 | ### 201
8 | ```javascript
9 | {
10 | id,
11 | negative_uri,
12 | positive_uri,
13 | positive_email,
14 | }
15 | ```
16 |
17 | Property | Notes
18 | ---------| -----
19 | id | A generated ID string.
20 | negative_uri | The negative result callback URI from the consumer's request.
21 | positive_uri | The positive result callback URI from the consumer's request.
22 | positive_email | The list of email addresses to receive a positive match notification from the consumer's request.
23 |
24 | ### 400
25 | ```javascript
26 | {
27 | error,
28 | }
29 | ```
30 |
31 | Property | Notes
32 | ---------| -----
33 | error | Watchdog cannot [parse the consumer's request](https://github.com/mscdex/busboy) or a required field is not in the request.
34 |
35 | ### 401
36 | ```javascript
37 | {
38 | error,
39 | }
40 | ```
41 |
42 | Property | Notes
43 | ---------| -----
44 | error | An error message from [Hawk](https://github.com/hueniverse/hawk).
45 |
46 | ## Callback Request
47 |
48 | This is a POST to one of the callback URIs the consumer sent in its submission. The request body format is identical for positive and negative match callbacks.
49 |
50 | ```javascript
51 | {
52 | watchdog_id,
53 | positive,
54 | notes,
55 | error,
56 | response,
57 | }
58 | ```
59 |
60 | Property | Notes
61 | ---------| -----
62 | watchdog_id | A generated ID. This is the same ID in the 201 response from `/accept`.
63 | positive | A boolean to indicate whether the image was a positive match. True when the request is sent to the positive callback URI, and false when POSTed to the negative callback URI.
64 | notes | The (optional) notes the consumer included in its submission.
65 | error | A boolean to indicate whether an error occurred upstream at PhotoDNA. When this is true, the `positive` value should be ignored. A list of error status codes is under "Response 200" at the [PhotoDNA documentation for its `match` endpoint](https://developer.microsoftmoderator.com/docs/services/57c7426e2703740ec4c9f4c3/operations/57c7426f27037407c8cc69e6). (See the `response` property below.)
66 | response | The full response fron PhotoDNA. The PhotoDNA response status code can be found here.
67 |
--------------------------------------------------------------------------------
/docs/systems_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/watchdog-proxy/7eadadeb633b126a6e953f158ecc4f3866f6caf5/docs/systems_diagram.png
--------------------------------------------------------------------------------
/functions/accept-test.js:
--------------------------------------------------------------------------------
1 | const sinon = require("sinon");
2 | const { expect } = require("chai");
3 | const Hawk = require("hawk");
4 |
5 | const { DEV_CREDENTIALS, DEFAULT_HAWK_ALGORITHM } = require("../lib/constants");
6 |
7 | const {
8 | mocks,
9 | makePromiseFn,
10 | env: { UPSTREAM_SERVICE_URL, CREDENTIALS_TABLE, QUEUE_NAME, CONTENT_BUCKET },
11 | constants: { QueueUrl, requestId },
12 | } = global;
13 |
14 | const Metrics = require("../lib/metrics");
15 | const accept = require("./accept");
16 |
17 | describe("functions/accept.post", () => {
18 | let metricsStub;
19 |
20 | beforeEach(() => {
21 | global.resetMocks();
22 | process.env.ENABLE_DEV_AUTH = "1";
23 | process.env.DISABLE_AUTH_CACHE = "1";
24 | metricsStub = sinon.stub(Metrics, "newItem");
25 | });
26 |
27 | afterEach(() => {
28 | metricsStub.restore();
29 | });
30 |
31 | describe("Hawk authentication", () => {
32 | const expectHawkUnauthorized = result => {
33 | expect(result.statusCode).to.equal(401);
34 | expect(result.headers["WWW-Authenticate"]).to.equal("Hawk");
35 | };
36 |
37 | it("responds with 401 Unauthorized with disabled dev credentials", async () => {
38 | process.env.ENABLE_DEV_AUTH = null;
39 |
40 | const id = "devuser";
41 | const { key, algorithm } = DEV_CREDENTIALS[id];
42 | const result = await acceptPost({
43 | httpMethod: "POST",
44 | proto: "https",
45 | host: "example.com",
46 | port: 443,
47 | path: "/prod/accept",
48 | id,
49 | key,
50 | algorithm,
51 | });
52 | expectHawkUnauthorized(result);
53 | });
54 |
55 | it("responds with 401 Unauthorized with bad id", async () => {
56 | const badid = "somerando";
57 | const key = "realkey";
58 |
59 | mocks.getItem.returns(makePromiseFn({}));
60 |
61 | const result = await acceptPost({
62 | httpMethod: "POST",
63 | proto: "https",
64 | host: "example.com",
65 | port: 443,
66 | path: "/prod/accept",
67 | id: badid,
68 | key,
69 | algorithm: DEFAULT_HAWK_ALGORITHM,
70 | });
71 |
72 | expect(mocks.getItem.lastCall.args[0]).to.deep.equal({
73 | TableName: CREDENTIALS_TABLE,
74 | Key: { id: badid },
75 | AttributesToGet: ["key", "algorithm"],
76 | });
77 | expectHawkUnauthorized(result);
78 | });
79 |
80 | it("responds with 401 Unauthorized with bad key", async () => {
81 | const id = "realuser";
82 | const key = "realkey";
83 | const badkey = "badkey";
84 | const algorithm = "sha256";
85 |
86 | mocks.getItem.returns(makePromiseFn({ Item: { key, algorithm } }));
87 |
88 | const result = await acceptPost({
89 | httpMethod: "POST",
90 | proto: "https",
91 | host: "example.com",
92 | port: 443,
93 | path: "/prod/accept",
94 | id,
95 | key: badkey,
96 | algorithm: DEFAULT_HAWK_ALGORITHM,
97 | });
98 |
99 | expect(mocks.getItem.lastCall.args[0]).to.deep.equal({
100 | TableName: CREDENTIALS_TABLE,
101 | Key: { id },
102 | AttributesToGet: ["key", "algorithm"],
103 | });
104 | expectHawkUnauthorized(result);
105 | });
106 |
107 | it("responds with 201 Created with enabled dev credentials", async () => {
108 | const id = "devuser";
109 | const { key, algorithm } = DEV_CREDENTIALS[id];
110 |
111 | const result = await acceptPost({
112 | httpMethod: "POST",
113 | proto: "https",
114 | host: "example.com",
115 | port: 443,
116 | path: "/prod/accept",
117 | id,
118 | key,
119 | algorithm,
120 | });
121 |
122 | // Dev credentials don't hit the database
123 | expect(mocks.getItem.notCalled).to.be.true;
124 | expect(result.statusCode).to.equal(201);
125 | });
126 |
127 | it("responds with 201 Created with real valid credentials", async () => {
128 | const id = "realuser";
129 | const key = "realkey";
130 | const algorithm = "sha256";
131 |
132 | mocks.getItem.returns(makePromiseFn({ Item: { key, algorithm } }));
133 |
134 | const result = await acceptPost({
135 | httpMethod: "POST",
136 | proto: "https",
137 | host: "example.com",
138 | port: 443,
139 | path: "/prod/accept",
140 | id,
141 | key,
142 | algorithm,
143 | });
144 |
145 | expect(mocks.getItem.lastCall.args[0]).to.deep.equal({
146 | TableName: CREDENTIALS_TABLE,
147 | Key: { id },
148 | AttributesToGet: ["key", "algorithm"],
149 | });
150 | expect(result.statusCode).to.equal(201);
151 | });
152 | });
153 |
154 | describe("Content submission", () => {
155 | it("responds with 400 if missing a required field", async () => {
156 | const id = "devuser";
157 | const { key, algorithm } = DEV_CREDENTIALS[id];
158 | const body = Object.assign({}, DEFAULT_POST_BODY);
159 | delete body.image;
160 |
161 | process.env.METRICS_URL = "https://example.com";
162 |
163 | const result = await acceptPost({
164 | httpMethod: "POST",
165 | proto: "https",
166 | host: "example.com",
167 | port: 443,
168 | path: "/prod/accept",
169 | id,
170 | key,
171 | algorithm,
172 | body,
173 | });
174 |
175 | expect(result.statusCode).to.equal(400);
176 | expect(JSON.parse(result.body).error).to.equal(
177 | 'Required "image" is missing'
178 | );
179 | });
180 |
181 | it("accepts a properly authorized image submission", async () => {
182 | const id = "devuser";
183 | const { key, algorithm } = DEV_CREDENTIALS[id];
184 | const imageContent = "1234";
185 | const imageContentType = "image/jpeg";
186 | const body = Object.assign({}, DEFAULT_POST_BODY, {
187 | image: {
188 | filename: "image.jpg",
189 | contentType: imageContentType,
190 | content: imageContent,
191 | },
192 | });
193 |
194 | const result = await acceptPost({
195 | httpMethod: "POST",
196 | proto: "https",
197 | host: "example.com",
198 | port: 443,
199 | path: "/prod/accept",
200 | id,
201 | key,
202 | algorithm,
203 | body,
204 | });
205 |
206 | const imageKey = `image-${requestId}`;
207 |
208 | expect(mocks.putObject.args[0][0]).to.deep.equal({
209 | Bucket: CONTENT_BUCKET,
210 | Key: imageKey,
211 | Body: Buffer.from(imageContent),
212 | ContentType: imageContentType,
213 | });
214 | expect(mocks.getQueueUrl.args[0][0]).to.deep.equal({
215 | QueueName: QUEUE_NAME,
216 | });
217 |
218 | const message = mocks.sendMessage.args[0][0];
219 | const messageBody = JSON.parse(message.MessageBody);
220 |
221 | expect(message.QueueUrl).to.equal(QueueUrl);
222 | expect("datestamp" in messageBody).to.be.true;
223 | expect(messageBody.upstreamServiceUrl).to.equal(UPSTREAM_SERVICE_URL);
224 | expect(messageBody.id).to.equal(requestId);
225 | expect(messageBody.user).to.equal(id);
226 | ["negative_uri", "positive_uri", "positive_email", "notes"].forEach(
227 | name => expect(messageBody[name]).to.equal(body[name])
228 | );
229 | expect(messageBody.image).to.equal(imageKey);
230 |
231 | expect(mocks.putObject.args[1][0]).to.deep.equal({
232 | Bucket: CONTENT_BUCKET,
233 | Key: `${imageKey}-request.json`,
234 | Body: message.MessageBody,
235 | ContentType: "application/json",
236 | });
237 |
238 | expect(metricsStub.called).to.be.true;
239 | expect(metricsStub.args[0][0]).to.deep.include({
240 | consumer_name: id,
241 | watchdog_id: requestId,
242 | type: imageContentType,
243 | });
244 |
245 | expect(result.statusCode).to.equal(201);
246 | });
247 | });
248 | });
249 |
250 | const DEFAULT_POST_BODY = {
251 | negative_uri: "https://example.com/negative",
252 | positive_uri: "https://example.com/positive",
253 | positive_email: "positive@example.com",
254 | notes: "foobar",
255 | image: {
256 | filename: "image.jpg",
257 | contentType: "image/jpeg",
258 | content: "1234123412341234",
259 | },
260 | };
261 |
262 | async function acceptPost({
263 | httpMethod,
264 | proto,
265 | host,
266 | port,
267 | path,
268 | id,
269 | key,
270 | algorithm,
271 | body = DEFAULT_POST_BODY,
272 | }) {
273 | const { contentType, encodedBody } = buildBody(body);
274 | const hawkResult = Hawk.client.header(
275 | `${proto}://${host}:${port}${path}`,
276 | httpMethod,
277 | { credentials: { id, key, algorithm } }
278 | );
279 | const headers = {
280 | Host: host,
281 | "X-Forwarded-Port": port,
282 | "Content-Type": contentType,
283 | Authorization: hawkResult.header,
284 | };
285 | return accept.post(
286 | {
287 | path,
288 | httpMethod,
289 | headers,
290 | body: encodedBody,
291 | requestContext: { path, requestId },
292 | },
293 | {}
294 | );
295 | }
296 |
297 | function buildBody(data) {
298 | const boundary = "--------------------------065117214804889366770750";
299 | const contentType = `multipart/form-data; boundary=${boundary}`;
300 |
301 | const encString = (name, value) =>
302 | `Content-Disposition: form-data; name="${name}"\r\n` +
303 | "\r\n" +
304 | value +
305 | "\r\n";
306 |
307 | const encFile = (name, { filename, contentType, content }) =>
308 | `Content-Disposition: form-data; name="${name}"; filename="${filename}"\r\n` +
309 | `Content-Type: ${contentType}\r\n` +
310 | "\r\n" +
311 | content +
312 | "\r\n";
313 |
314 | const encodedBody = [
315 | `--${boundary}\r\n`,
316 | Object.entries(data)
317 | .map(
318 | ([name, value]) =>
319 | typeof value == "string"
320 | ? encString(name, value)
321 | : encFile(name, value)
322 | )
323 | .join("--" + boundary + "\r\n"),
324 | `--${boundary}--`,
325 | ].join("");
326 |
327 | return { contentType, encodedBody };
328 | }
329 |
--------------------------------------------------------------------------------
/functions/accept.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | const Hawk = require("hawk");
4 | const Busboy = require("busboy");
5 | const AWS = require("aws-sdk");
6 | const S3 = new AWS.S3({ apiVersion: "2006-03-01" });
7 | const SQS = new AWS.SQS({ apiVersion: "2012-11-05" });
8 | const documentClient = new AWS.DynamoDB.DocumentClient();
9 | const { DEV_CREDENTIALS, DEFAULT_HAWK_ALGORITHM } = require("../lib/constants");
10 | const Sentry = require("../lib/sentry");
11 | const Metrics = require("../lib/metrics");
12 | const { md5 } = require("../lib/utils.js");
13 |
14 | const REQUIRED_FIELDS = ["image", "negative_uri", "positive_uri"];
15 |
16 | const Raven = Sentry();
17 |
18 | module.exports.post = async function(event, context) {
19 | const log = require("../lib/logging")({
20 | name: "accept",
21 | isRequest: true,
22 | event,
23 | context,
24 | });
25 |
26 | const {
27 | UPSTREAM_SERVICE_URL,
28 | QUEUE_NAME: QueueName,
29 | CONTENT_BUCKET: Bucket,
30 | } = process.env;
31 |
32 | log.verbose("env", {
33 | UPSTREAM_SERVICE_URL,
34 | QueueName,
35 | Bucket,
36 | });
37 |
38 | const {
39 | headers,
40 | queryStringParameters: params,
41 | requestContext: { path, requestId },
42 | } = event;
43 |
44 | log.verbose("event", { headers, params, path, requestId });
45 |
46 | const {
47 | Host: host,
48 | Authorization: authorization,
49 | "X-Forwarded-Port": port = 80,
50 | } = headers;
51 |
52 | let authArtifacts;
53 | try {
54 | ({ artifacts: authArtifacts } = await Hawk.server.authenticate(
55 | {
56 | method: "POST",
57 | url: path,
58 | params,
59 | host,
60 | port,
61 | authorization,
62 | },
63 | lookupCredentials
64 | ));
65 | log.commonFields.uid = authArtifacts.id;
66 | } catch (err) {
67 | Raven.captureException(err);
68 | log.error("authInvalid", { authorization, error: err.message });
69 | return response(
70 | 401,
71 | { error: err.message },
72 | { "WWW-Authenticate": "Hawk" }
73 | );
74 | }
75 |
76 | let body, negative_uri, positive_uri, positive_email, notes, image;
77 | try {
78 | body = await parseRequestBody(event);
79 | REQUIRED_FIELDS.forEach(name => {
80 | if (!body[name]) {
81 | log.warn("requestInvalid", { field: name });
82 | throw { message: `Required "${name}" is missing` };
83 | }
84 | });
85 | // TODO: More input validation here?
86 | ({ negative_uri, positive_uri, positive_email, notes, image } = body);
87 |
88 | log.debug("body", {
89 | negative_uri,
90 | positive_uri,
91 | positive_email,
92 | notes,
93 | image: {
94 | filename: image.filename,
95 | contentEncoding: image.contentEncoding,
96 | contentType: image.contentType,
97 | dataMD5: md5(image.data || ""),
98 | },
99 | });
100 | } catch (err) {
101 | Raven.captureException(err);
102 | return response(400, { error: err.message });
103 | }
104 |
105 | const imageKey = `image-${requestId}`;
106 |
107 | const upstreamServiceUrl =
108 | UPSTREAM_SERVICE_URL !== "__MOCK__"
109 | ? UPSTREAM_SERVICE_URL
110 | : "https://" +
111 | event.headers.Host +
112 | "/" +
113 | event.requestContext.stage +
114 | "/mock/upstream";
115 |
116 | const messageData = {
117 | datestamp: new Date().toISOString(),
118 | upstreamServiceUrl,
119 | id: requestId,
120 | user: authArtifacts.id,
121 | negative_uri,
122 | positive_uri,
123 | positive_email,
124 | notes,
125 | image: imageKey,
126 | };
127 | log.verbose("enqueue", messageData);
128 |
129 | const MessageBody = JSON.stringify(messageData);
130 |
131 | const imagePutResult = await S3.putObject({
132 | Bucket,
133 | Key: imageKey,
134 | ContentType: image.contentType,
135 | Body: image.data,
136 | }).promise();
137 |
138 | log.verbose("imagePutResult", { imagePutResult });
139 |
140 | const requestPutResult = await S3.putObject({
141 | Bucket,
142 | Key: `${imageKey}-request.json`,
143 | ContentType: "application/json",
144 | Body: MessageBody,
145 | }).promise();
146 |
147 | log.verbose("requestPutResult", { requestPutResult });
148 |
149 | const { QueueUrl } = await SQS.getQueueUrl({ QueueName }).promise();
150 | const queueSendResult = await SQS.sendMessage({
151 | QueueUrl,
152 | MessageBody,
153 | }).promise();
154 |
155 | log.verbose("queueSendResult", { QueueUrl, queueSendResult });
156 |
157 | const metricsResult = await Metrics.newItem({
158 | consumer_name: authArtifacts.id,
159 | watchdog_id: requestId,
160 | type: image.contentType,
161 | });
162 |
163 | log.verbose("metricsResult", { metricsResult });
164 |
165 | const responseData = {
166 | id: requestId,
167 | negative_uri,
168 | positive_uri,
169 | positive_email,
170 | };
171 | log.info("summary", responseData);
172 | return response(201, responseData);
173 | };
174 |
175 | function response(statusCode, body, headers = {}) {
176 | return {
177 | statusCode,
178 | headers: Object.assign({ "Content-Type": "application/json" }, headers),
179 | body: JSON.stringify(body),
180 | };
181 | }
182 |
183 | function getContentType(event) {
184 | let contentType = event.headers["content-type"];
185 | if (!contentType) {
186 | return event.headers["Content-Type"];
187 | }
188 | return contentType;
189 | }
190 |
191 | function parseRequestBody(event) {
192 | return new Promise((resolve, reject) => {
193 | const result = {};
194 | const busboy = new Busboy({
195 | headers: { "content-type": getContentType(event) },
196 | });
197 | busboy.on(
198 | "file",
199 | (fieldname, file, filename, contentEncoding, contentType) => {
200 | result[fieldname] = { filename, contentEncoding, contentType };
201 | const parts = [];
202 | file.on("data", data => parts.push(data));
203 | file.on("end", () => (result[fieldname].data = Buffer.concat(parts)));
204 | }
205 | );
206 | busboy.on("field", (fieldname, value) => (result[fieldname] = value));
207 | busboy.on("error", error => reject(`Parse error: ${error}`));
208 | busboy.on("finish", () => resolve(result));
209 | busboy.write(event.body, event.isBase64Encoded ? "base64" : "binary");
210 | busboy.end();
211 | });
212 | }
213 |
214 | // In-memory credentials lookup cache, only lasts until next deployment or
215 | // container is recycled. Saves a DynamoDB hit and ~900ms for most requests
216 | const credentialsCache = {};
217 |
218 | async function lookupCredentials(id) {
219 | const {
220 | ENABLE_DEV_AUTH,
221 | DISABLE_AUTH_CACHE,
222 | CREDENTIALS_TABLE: TableName,
223 | } = process.env;
224 |
225 | let out;
226 |
227 | if (ENABLE_DEV_AUTH === "1" && id in DEV_CREDENTIALS) {
228 | out = DEV_CREDENTIALS[id];
229 | } else if (DISABLE_AUTH_CACHE !== "1" && id in credentialsCache) {
230 | out = credentialsCache[id];
231 | } else {
232 | const result = await documentClient
233 | .get({
234 | TableName,
235 | Key: { id },
236 | AttributesToGet: ["key", "algorithm"],
237 | })
238 | .promise();
239 | if (!result.Item) {
240 | out = null;
241 | } else {
242 | const {
243 | Item: { key, algorithm = DEFAULT_HAWK_ALGORITHM },
244 | } = result;
245 | out = credentialsCache[id] = { id, key, algorithm };
246 | }
247 | }
248 |
249 | return out;
250 | }
251 |
--------------------------------------------------------------------------------
/functions/heartbeat-test.js:
--------------------------------------------------------------------------------
1 | const { expect } = require("chai");
2 |
3 | // NOTE: Import the test subject as late as possible so that the mocks work
4 | const heartbeat = require("./heartbeat");
5 |
6 | describe("functions/heartbeat.handler", () => {
7 | it("responds with 200 OK", async () => {
8 | const result = await heartbeat.handler({
9 | path: "/dev/__heartbeat__",
10 | httpMethod: "GET",
11 | headers: {},
12 | });
13 | expect(result.statusCode).to.equal(200);
14 | expect(JSON.parse(result.body)).to.deep.equal({ status: "OK" });
15 | });
16 | });
17 |
--------------------------------------------------------------------------------
/functions/heartbeat.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | module.exports.handler = async function(event = {}, context = {}) {
4 | const log = require("../lib/logging")({
5 | name: "heartbeat",
6 | isRequest: true,
7 | event,
8 | context,
9 | });
10 | log.info("summary");
11 | return {
12 | statusCode: 200,
13 | headers: { "Content-Type": "application/json" },
14 | body: JSON.stringify({ status: "OK" }),
15 | };
16 | };
17 |
--------------------------------------------------------------------------------
/functions/mockEndpoints.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | module.exports.upstreamPost = async (event, context) => {
4 | const { MOCK_POSITIVE_CHANCE = 0.1 } = process.env;
5 | console.log("upstream", event.body);
6 | return response(
7 | 200,
8 | Object.assign(
9 | {},
10 | baseMatchResponse,
11 | // TODO: Find a more deterministic way to simulate pos/neg match
12 | { IsMatch: Math.random() < parseFloat(MOCK_POSITIVE_CHANCE) }
13 | )
14 | );
15 | };
16 |
17 | module.exports.clientNegativePost = async (event, context) => {
18 | console.log("negative", event.body);
19 | return response(200, { status: "OK" });
20 | };
21 |
22 | module.exports.clientPositivePost = async (event, context) => {
23 | console.log("positive", event.body);
24 | return response(200, { status: "OK" });
25 | };
26 |
27 | module.exports.logPost = async (event, context) => {
28 | console.log("body", event.body);
29 | return response(200, { status: "OK" });
30 | };
31 |
32 | function response(statusCode, body, headers = {}) {
33 | return {
34 | statusCode,
35 | headers: Object.assign({ "Content-Type": "application/json" }, headers),
36 | body: JSON.stringify(body),
37 | };
38 | }
39 |
40 | const baseMatchResponse = {
41 | Status: {
42 | Code: 3000,
43 | Description: "OK",
44 | Exception: null,
45 | },
46 | ContentId: null,
47 | IsMatch: false,
48 | MatchDetails: {
49 | AdvancedInfo: [],
50 | MatchFlags: [],
51 | },
52 | XPartnerCustomerId: null,
53 | TrackingId:
54 | "WUS_418b5903425346a1b1451821c5cd06ee_57c7457ae3a97812ecf8bde9_ddba296dab39454aa00cf0b17e0eb7bf",
55 | EvaluateResponse: null,
56 | };
57 |
--------------------------------------------------------------------------------
/functions/periodicMetrics-test.js:
--------------------------------------------------------------------------------
1 | const { expect } = require("chai");
2 | const sinon = require("sinon");
3 |
4 | const { resetMocks, mocks } = global;
5 |
6 | const periodicMetrics = require("./periodicMetrics");
7 |
8 | describe("functions/periodicMetrics.handler", () => {
9 | const subject = periodicMetrics.handler;
10 |
11 | beforeEach(() => {
12 | resetMocks();
13 | });
14 |
15 | it("should exit when execution time is close to expired", async () => {
16 | process.env.METRICS_PING_PERIOD = 100;
17 | const event = {};
18 | const getRemainingTimeInMillis = sinon.stub().returns(100);
19 | const context = {
20 | awsRequestId: "foo",
21 | getRemainingTimeInMillis,
22 | };
23 |
24 | await subject(event, context);
25 |
26 | const sqsCalls = mocks.getQueueAttributes.args;
27 | expect(sqsCalls.length).to.equal(0);
28 | });
29 |
30 | it("should send a metrics ping based on queue status", async () => {
31 | const event = {};
32 | const getRemainingTimeInMillis = sinon
33 | .stub()
34 | .onCall(0)
35 | .returns(1101)
36 | .onCall(1)
37 | .returns(90);
38 | const context = {
39 | awsRequestId: "foo",
40 | getRemainingTimeInMillis,
41 | };
42 |
43 | await subject(event, context);
44 |
45 | const sqsCalls = mocks.getQueueAttributes.args;
46 | expect(sqsCalls.length).to.equal(1);
47 |
48 | const postCalls = mocks.requestPost.args;
49 | expect(postCalls.length).to.equal(1);
50 |
51 | expect(postCalls[0][0].body).to.deep.include({
52 | event: "poller_heartbeat",
53 | poller_id: context.awsRequestId,
54 | items_in_queue: 200,
55 | items_in_progress: 2,
56 | items_in_waiting: 20,
57 | });
58 | });
59 | });
60 |
--------------------------------------------------------------------------------
/functions/periodicMetrics.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | const AWS = require("aws-sdk");
4 | const SQS = new AWS.SQS({ apiVersion: "2012-11-05" });
5 | const Sentry = require("../lib/sentry");
6 | const Metrics = require("../lib/metrics");
7 | const { wait } = require("../lib/utils.js");
8 |
9 | const Raven = Sentry();
10 |
11 | module.exports.handler = async function(event = {}, context = {}) {
12 | const log = require("../lib/logging")({
13 | name: "periodicMetrics",
14 | event,
15 | context,
16 | });
17 |
18 | const { DEFAULT_METRICS_PING_PERIOD } = require("../lib/constants");
19 |
20 | const { METRICS_PING_PERIOD } = process.env;
21 |
22 | const pingPeriod =
23 | parseInt(METRICS_PING_PERIOD, 10) || DEFAULT_METRICS_PING_PERIOD;
24 |
25 | let pingCount = 0;
26 | log.debug("start");
27 | while (context.getRemainingTimeInMillis() > pingPeriod + 1000) {
28 | try {
29 | await sendHeartbeatMetrics(log, process.env, context);
30 | pingCount++;
31 | } catch (err) {
32 | Raven.captureException(err);
33 | log.error("error", { err });
34 | }
35 | log.verbose("pause", {
36 | pingPeriod,
37 | remaining: context.getRemainingTimeInMillis(),
38 | });
39 | await wait(pingPeriod);
40 | }
41 | log.verbose("exit", { pingCount });
42 | log.info("summary");
43 | };
44 |
45 | const sendHeartbeatMetrics = async (
46 | log,
47 | { QUEUE_NAME },
48 | { awsRequestId: poller_id }
49 | ) => {
50 | const apiStartTime = Date.now();
51 | const { QueueUrl } = await SQS.getQueueUrl({
52 | QueueName: QUEUE_NAME,
53 | }).promise();
54 | const attribsResult = await SQS.getQueueAttributes({
55 | QueueUrl,
56 | AttributeNames: [
57 | "ApproximateNumberOfMessages",
58 | "ApproximateNumberOfMessagesDelayed",
59 | "ApproximateNumberOfMessagesNotVisible",
60 | ],
61 | }).promise();
62 | const apiEndTime = Date.now();
63 | log.debug("getQueueAttributesDuration", {
64 | duration: apiEndTime - apiStartTime,
65 | });
66 |
67 | const {
68 | ApproximateNumberOfMessages,
69 | ApproximateNumberOfMessagesDelayed,
70 | ApproximateNumberOfMessagesNotVisible,
71 | } = attribsResult.Attributes || {};
72 |
73 | const pingData = {
74 | poller_id,
75 | items_in_queue: parseInt(ApproximateNumberOfMessages, 10),
76 | items_in_progress: parseInt(ApproximateNumberOfMessagesNotVisible, 10),
77 | items_in_waiting: parseInt(ApproximateNumberOfMessagesDelayed, 10),
78 | };
79 | log.debug("pingData", { pingData });
80 | return Metrics.pollerHeartbeat(pingData);
81 | };
82 |
--------------------------------------------------------------------------------
/functions/processQueueItem-test.js:
--------------------------------------------------------------------------------
1 | const sinon = require("sinon");
2 | const { expect } = require("chai");
3 |
4 | const {
5 | makePromiseFn,
6 | mocks,
7 | env: { CONTENT_BUCKET, UPSTREAM_SERVICE_URL, UPSTREAM_SERVICE_KEY },
8 | constants: { ReceiptHandle },
9 | } = global;
10 |
11 | const awsRequestId = "test-uuid";
12 |
13 | const Metrics = require("../lib/metrics");
14 | const processQueueItem = require("./processQueueItem");
15 |
16 | describe("functions/processQueueItem.handler", () => {
17 | let metricsStub;
18 |
19 | beforeEach(() => {
20 | global.resetMocks();
21 | metricsStub = sinon.stub(Metrics, "workerWorks");
22 | });
23 |
24 | afterEach(() => {
25 | metricsStub.restore();
26 | });
27 |
28 | it("hits negative_uri on negative match from upstream service", async () => {
29 | mocks.requestPost
30 | .onCall(0)
31 | .resolves(negativeMatchResponse)
32 | .onCall(1)
33 | .resolves({});
34 | await expectCommonItemProcessed(false);
35 |
36 | expect(mocks.sendEmail.called).to.be.false;
37 |
38 | const deleteCalls = mocks.deleteObject.args;
39 | expect(deleteCalls[0][0]).to.deep.equal({
40 | Bucket: CONTENT_BUCKET,
41 | Key: `${defaultMessage.image}`,
42 | });
43 | expect(deleteCalls[1][0]).to.deep.equal({
44 | Bucket: CONTENT_BUCKET,
45 | Key: `${defaultMessage.image}-request.json`,
46 | });
47 | });
48 |
49 | it("hits positive_uri on positive match from upstream service", async () => {
50 | const {
51 | id,
52 | user,
53 | negative_uri,
54 | positive_uri,
55 | positive_email,
56 | notes,
57 | image,
58 | } = defaultMessage;
59 |
60 | mocks.requestPost
61 | .onCall(0)
62 | .resolves(positiveMatchResponse)
63 | .onCall(1)
64 | .resolves({});
65 | await expectCommonItemProcessed(true);
66 |
67 | expect(mocks.sendEmail.called).to.be.true;
68 | const sendEmailCall = mocks.sendEmail.args[0][0];
69 | expect(sendEmailCall).to.deep.include({
70 | Source: global.env.EMAIL_FROM,
71 | Destination: {
72 | ToAddresses: [defaultMessage.positive_email],
73 | },
74 | });
75 | [id, user].forEach(v =>
76 | expect(sendEmailCall.Message.Subject.Data).to.include(v)
77 | );
78 | [id, user, notes].forEach(v =>
79 | expect(sendEmailCall.Message.Body.Text.Data).to.include(v)
80 | );
81 |
82 | const putObjectCall = mocks.putObject.args[0][0];
83 | expect(putObjectCall.Bucket).to.equal(CONTENT_BUCKET);
84 | expect(putObjectCall.Key).to.equal(`${defaultMessage.image}-response.json`);
85 | expect(putObjectCall.ContentType).to.equal("application/json");
86 | expect(JSON.parse(putObjectCall.Body)).to.deep.equal({
87 | id,
88 | user,
89 | negative_uri,
90 | positive_uri,
91 | positive_email,
92 | notes,
93 | image,
94 | response: positiveMatchResponse,
95 | });
96 | });
97 |
98 | it("flags an error on non-3000 status from upstream service", async () => {
99 | mocks.requestPost
100 | .onCall(0)
101 | .resolves(errorMatchResponse)
102 | .onCall(1)
103 | .resolves({});
104 | await expectCommonItemProcessed(false, true);
105 | });
106 |
107 | it("pauses for rate limiting", async () => {
108 | // Mock the hitrate table, but only the first three should matter.
109 | mocks.scanItems
110 | .onCall(0)
111 | .returns(makePromiseFn({ Count: 3 }))
112 | .onCall(1)
113 | .returns(makePromiseFn({ Count: 2 }))
114 | .onCall(2)
115 | .returns(makePromiseFn({ Count: 1 }))
116 | .onCall(3)
117 | .returns(makePromiseFn({ Count: 1 }));
118 |
119 | mocks.requestPost
120 | .onCall(0)
121 | .resolves(negativeMatchResponse)
122 | .onCall(1)
123 | .resolves({});
124 |
125 | await expectCommonItemProcessed(false);
126 |
127 | // Scan should be called 3 times to reflect pausing for rate limit.
128 | const scanCalls = mocks.scanItems.args;
129 | expect(scanCalls.length).to.equal(3);
130 | });
131 |
132 | const expectCommonItemProcessed = async (positive, error = false) => {
133 | const body = makeBody();
134 | const signedImageUrl = "https://example.s3.amazonaws.com/some-image";
135 | const signedRequestUrl = "https://example.s3.amazonaws.com/some-request";
136 | const signedResponseUrl = "https://example.s3.amazonaws.com/some-response";
137 | process.env.METRICS_URL = "https://example.com";
138 |
139 | mocks.getSignedUrl
140 | .onCall(0)
141 | .returns(signedImageUrl)
142 | .onCall(1)
143 | .returns(signedImageUrl)
144 | .onCall(2)
145 | .returns(signedRequestUrl)
146 | .onCall(3)
147 | .returns(signedResponseUrl);
148 |
149 | await processQueueItem.handler(
150 | { Records: [{ receiptHandle: ReceiptHandle, body }] },
151 | { awsRequestId }
152 | );
153 |
154 | expect(mocks.getSignedUrl.args[0]).to.deep.equal([
155 | "getObject",
156 | {
157 | Bucket: CONTENT_BUCKET,
158 | Expires: 600,
159 | Key: defaultMessage.image,
160 | },
161 | ]);
162 |
163 | expect(mocks.requestPost.args[0][0]).to.deep.equal({
164 | url: `${UPSTREAM_SERVICE_URL}?enhance`,
165 | headers: {
166 | "Content-Type": "application/json",
167 | "Ocp-Apim-Subscription-Key": UPSTREAM_SERVICE_KEY,
168 | },
169 | json: true,
170 | body: {
171 | DataRepresentation: "URL",
172 | Value: signedImageUrl,
173 | },
174 | });
175 |
176 | let upstreamResponse;
177 | if (error) {
178 | upstreamResponse = errorMatchResponse;
179 | } else if (positive) {
180 | upstreamResponse = positiveMatchResponse;
181 | } else {
182 | upstreamResponse = negativeMatchResponse;
183 | }
184 |
185 | expect(mocks.requestPost.args[1][0]).to.deep.equal({
186 | url: defaultMessage[positive ? "positive_uri" : "negative_uri"],
187 | headers: {
188 | "Content-Type": "application/json",
189 | },
190 | json: true,
191 | body: {
192 | error,
193 | watchdog_id: defaultMessage.id,
194 | notes: defaultMessage.notes,
195 | response: upstreamResponse,
196 | positive,
197 | },
198 | });
199 |
200 | const response = positive ? positiveMatchResponse : negativeMatchResponse;
201 | expect(metricsStub.called).to.be.true;
202 | expect(metricsStub.args[0][0]).to.deep.include({
203 | consumer_name: defaultMessage.user,
204 | worker_id: awsRequestId,
205 | watchdog_id: defaultMessage.id,
206 | photodna_tracking_id: response.TrackingId,
207 | is_error: false,
208 | is_match: response.IsMatch,
209 | });
210 | expect(metricsStub.args[0][0]).to.include.keys(
211 | "timing_sent",
212 | "timing_received",
213 | "timing_submitted"
214 | );
215 | };
216 | });
217 |
218 | const negativeMatchResponse = {
219 | Status: {
220 | Code: 3000,
221 | Description: "OK",
222 | Exception: null,
223 | },
224 | ContentId: null,
225 | IsMatch: false,
226 | MatchDetails: {
227 | AdvancedInfo: [],
228 | MatchFlags: [],
229 | },
230 | XPartnerCustomerId: null,
231 | TrackingId:
232 | "WUS_418b5903425346a1b1451821c5cd06ee_57c7457ae3a97812ecf8bde9_ddba296dab39454aa00cf0b17e0eb7bf",
233 | EvaluateResponse: null,
234 | };
235 |
236 | const errorMatchResponse = Object.assign({}, negativeMatchResponse, {
237 | Status: {
238 | Code: 3208,
239 | Description: "image too large",
240 | Exception: null,
241 | },
242 | });
243 |
244 | const positiveMatchResponse = {
245 | Status: {
246 | Code: 3000,
247 | Description: "OK",
248 | Exception: null,
249 | },
250 | ContentId: null,
251 | IsMatch: true,
252 | MatchDetails: {
253 | AdvancedInfo: [],
254 | MatchFlags: [
255 | {
256 | AdvancedInfo: [
257 | {
258 | Key: "MatchId",
259 | Value: "117721",
260 | },
261 | ],
262 | Source: "Test",
263 | Violations: ["A1"],
264 | },
265 | ],
266 | },
267 | XPartnerCustomerId: null,
268 | TrackingId:
269 | "WUS_418b5903425346a1b1451821c5cd06ee_57c7457ae3a97812ecf8bde9_0709e0136ee342e993092edceecbc407",
270 | EvaluateResponse: null,
271 | };
272 |
273 | const defaultMessage = {
274 | datestamp: "2018-07-31T12:00:00Z",
275 | upstreamServiceUrl: UPSTREAM_SERVICE_URL,
276 | id: "8675309",
277 | user: "devuser",
278 | negative_uri: "https://example.com/negative?id=123",
279 | positive_uri: "https://example.com/positive?id=123",
280 | positive_email: "foo@example.com",
281 | notes: "this is a test",
282 | image: "image-8675309",
283 | };
284 |
285 | const makeBody = (message = {}) =>
286 | JSON.stringify(Object.assign({}, defaultMessage, message));
287 |
--------------------------------------------------------------------------------
/functions/processQueueItem.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | const AWS = require("aws-sdk");
4 | const S3 = new AWS.S3({ apiVersion: "2006-03-01" });
5 | const SES = new AWS.SES({ apiVersion: "2010-12-01" });
6 | const documentClient = new AWS.DynamoDB.DocumentClient();
7 | const request = require("request-promise-native");
8 | const { RATE_LIMIT, RATE_PERIOD, RATE_WAIT } = require("../lib/constants");
9 | const Sentry = require("../lib/sentry");
10 | const Metrics = require("../lib/metrics");
11 | const { wait, epochNow } = require("../lib/utils.js");
12 |
13 | const Raven = Sentry();
14 |
15 | exports.handler = async function(event = {}, context = {}) {
16 | const { Records } = event;
17 | const log = require("../lib/logging")({
18 | name: "processQueueItem",
19 | event,
20 | context,
21 | });
22 |
23 | const results = [];
24 | for (let idx = 0; idx < Records.length; idx++) {
25 | const result = await exports.handleOne(Records[idx], context);
26 | results.push(result);
27 | }
28 |
29 | log.debug("done", { resultCount: results.length });
30 | log.info("summary", { recordCount: Records.length });
31 | return results;
32 | };
33 |
34 | const emailSubject = ({ id, user }) =>
35 | `[watchdog-proxy] Positive match for ${user} (${id})`;
36 |
37 | const emailBody = ({
38 | id,
39 | datestamp,
40 | user,
41 | notes,
42 | imageUrl,
43 | requestUrl,
44 | responseUrl,
45 | expirationDate,
46 | upstreamServiceResponse,
47 | }) => `
48 | Watchdog ID:
49 | ${id}
50 |
51 | Client application:
52 | ${user}
53 |
54 | Datestamp:
55 | ${datestamp}
56 |
57 | Notes:
58 | ${notes}
59 |
60 | Match metadata:
61 | ${JSON.stringify(upstreamServiceResponse, null, " ")}
62 |
63 | NOTE: The following URLs will expire and stop working after ${expirationDate}.
64 |
65 | Image URL:
66 | ${imageUrl}
67 |
68 | Request JSON:
69 | ${requestUrl}
70 |
71 | Response JSON:
72 | ${responseUrl}
73 | `;
74 |
75 | exports.handleOne = async function(event, context) {
76 | const log = require("../lib/logging")({
77 | name: "processQueueItem.worker",
78 | event,
79 | context,
80 | });
81 | log.info("summary");
82 |
83 | const { body } = event;
84 | const { awsRequestId } = context;
85 |
86 | const {
87 | HITRATE_TABLE,
88 | CONTENT_BUCKET: Bucket,
89 | UPSTREAM_SERVICE_KEY,
90 | EMAIL_FROM,
91 | EMAIL_TO,
92 | EMAIL_EXPIRES,
93 | } = process.env;
94 |
95 | log.verbose("env", {
96 | HITRATE_TABLE,
97 | Bucket,
98 | EMAIL_FROM,
99 | EMAIL_TO,
100 | EMAIL_EXPIRES,
101 | });
102 |
103 | const parsedBody = JSON.parse(body);
104 | log.verbose("parsedBody", { parsedBody });
105 |
106 | const {
107 | datestamp,
108 | upstreamServiceUrl,
109 | id,
110 | user,
111 | negative_uri,
112 | positive_uri,
113 | positive_email,
114 | notes,
115 | image,
116 | } = parsedBody;
117 |
118 | // Start constructing metrics ping data here, so that if there are any
119 | // exceptions we can at least send out a partially filled-in ping with
120 | // is_error: true
121 | const metricsPing = {
122 | consumer_name: user,
123 | worker_id: awsRequestId,
124 | watchdog_id: id,
125 | photodna_tracking_id: null,
126 | is_match: false,
127 | is_error: false,
128 | timing_sent: null,
129 | timing_received: null,
130 | timing_submitted: null,
131 | };
132 |
133 | log.info("processing", { id });
134 |
135 | const handleError = async (err, logType, extra = {}, isDone = true) => {
136 | Raven.captureException(err);
137 | metricsPing.is_error = true;
138 | log.error(logType, Object.assign({ err }, extra));
139 | return isDone ? done() : Promise.resolve();
140 | };
141 |
142 | const done = async () => {
143 | const metricsResult = await Metrics.workerWorks(metricsPing);
144 | log.verbose("metricsResult", { metricsResult });
145 | return id;
146 | };
147 |
148 | // Step #1: Handle rate limiting and pause if necessary
149 | try {
150 | // Pause if we're at the rate limit for current expiration window
151 | let rateLimited = false;
152 | do {
153 | const data = await documentClient
154 | .scan({
155 | TableName: HITRATE_TABLE,
156 | FilterExpression: "expiresAt > :now",
157 | ExpressionAttributeValues: { ":now": epochNow() },
158 | })
159 | .promise();
160 |
161 | log.verbose("hitRateData", { data });
162 |
163 | if (data.Count >= RATE_LIMIT) {
164 | log.info("pausing");
165 | rateLimited = true;
166 | await wait(RATE_WAIT);
167 | } else {
168 | rateLimited = false;
169 | }
170 | } while (rateLimited);
171 |
172 | // Count the current request in hitrate
173 | const hitRatePutResult = await documentClient
174 | .put({
175 | TableName: HITRATE_TABLE,
176 | Item: {
177 | id,
178 | timestamp: epochNow(),
179 | expiresAt: epochNow() + Math.floor(RATE_PERIOD / 1000),
180 | },
181 | })
182 | .promise();
183 |
184 | log.verbose("hitRatePutResult", { hitRatePutResult });
185 | } catch (err) {
186 | return handleError(err, "hitRateError");
187 | }
188 |
189 | // Step #2: Make a request to the upstream service
190 | let upstreamServiceResponse, IsMatch;
191 | try {
192 | const imageUrl = S3.getSignedUrl("getObject", {
193 | Bucket,
194 | Key: image,
195 | Expires: 600, // 5 minutes
196 | });
197 |
198 | log.verbose("imageUrl", { imageUrl });
199 |
200 | metricsPing.timing_sent = Date.now() - Date.parse(datestamp);
201 |
202 | const timingReceivedStart = Date.now();
203 | upstreamServiceResponse = await request.post({
204 | url: `${upstreamServiceUrl}?enhance`,
205 | headers: {
206 | "Content-Type": "application/json",
207 | "Ocp-Apim-Subscription-Key": UPSTREAM_SERVICE_KEY,
208 | },
209 | json: true,
210 | body: {
211 | DataRepresentation: "URL",
212 | Value: imageUrl,
213 | },
214 | });
215 | metricsPing.timing_received = Date.now() - timingReceivedStart;
216 | metricsPing.photodna_tracking_id = upstreamServiceResponse.TrackingId;
217 |
218 | ({ IsMatch } = upstreamServiceResponse);
219 | metricsPing.is_match = IsMatch;
220 |
221 | log.verbose("upstreamServiceResponse", { upstreamServiceResponse });
222 | } catch (err) {
223 | return handleError(err, "upstreamServiceError");
224 | }
225 |
226 | // Step #3: Handle the response from the upstream service
227 | if (!IsMatch) {
228 | try {
229 | // Step #3a: On negative match, clean up the image and request details.
230 | const deleteResult = await Promise.all([
231 | S3.deleteObject({ Bucket, Key: `${image}` }).promise(),
232 | S3.deleteObject({ Bucket, Key: `${image}-request.json` }).promise(),
233 | ]);
234 | log.verbose("deleteResult", { deleteResult });
235 | } catch (err) {
236 | return handleError(err, "deleteError");
237 | }
238 | } else {
239 | try {
240 | // Step #3b: On positive match, store the details of the match response.
241 | const putResult = await S3.putObject({
242 | Bucket,
243 | Key: `${image}-response.json`,
244 | ContentType: "application/json",
245 | Body: JSON.stringify({
246 | id,
247 | user,
248 | negative_uri,
249 | positive_uri,
250 | positive_email,
251 | notes,
252 | image,
253 | response: upstreamServiceResponse,
254 | }),
255 | }).promise();
256 |
257 | log.verbose("putResult", { putResult });
258 | } catch (err) {
259 | return handleError(err, "putError");
260 | }
261 | }
262 |
263 | // Step #4: Send an email alert on positive match.
264 | if (IsMatch) {
265 | try {
266 | const ToAddresses = [];
267 | if (positive_email) {
268 | ToAddresses.push(positive_email);
269 | }
270 | if (EMAIL_TO) {
271 | ToAddresses.push(EMAIL_TO);
272 | }
273 | if (EMAIL_FROM && ToAddresses.length) {
274 | const URL_TTL_IN_SEC = parseInt(EMAIL_EXPIRES, 10);
275 | const imageUrl = S3.getSignedUrl("getObject", {
276 | Bucket,
277 | Key: image,
278 | Expires: URL_TTL_IN_SEC,
279 | });
280 |
281 | const requestUrl = S3.getSignedUrl("getObject", {
282 | Bucket,
283 | Key: `${image}-request.json`,
284 | Expires: URL_TTL_IN_SEC,
285 | });
286 |
287 | const responseUrl = S3.getSignedUrl("getObject", {
288 | Bucket,
289 | Key: `${image}-response.json`,
290 | Expires: URL_TTL_IN_SEC,
291 | });
292 |
293 | const expirationDate = new Date(
294 | Date.now() + URL_TTL_IN_SEC * 1000
295 | ).toISOString();
296 |
297 | const emailParams = {
298 | Source: EMAIL_FROM,
299 | Destination: { ToAddresses },
300 | Message: {
301 | Subject: {
302 | Charset: "UTF-8",
303 | Data: emailSubject({ id, user }),
304 | },
305 | Body: {
306 | Text: {
307 | Charset: "UTF-8",
308 | Data: emailBody({
309 | id,
310 | datestamp,
311 | user,
312 | notes,
313 | imageUrl,
314 | requestUrl,
315 | responseUrl,
316 | expirationDate,
317 | upstreamServiceResponse,
318 | }),
319 | },
320 | },
321 | },
322 | };
323 | log.verbose("emailParams", { emailParams });
324 |
325 | const emailResult = await SES.sendEmail(emailParams).promise();
326 | log.verbose("emailResult", { emailResult });
327 | log.info("sentEmail", { messageId: emailResult.MessageId });
328 | }
329 | } catch (err) {
330 | // Do not bail out on an error from email, we can still send a callback
331 | await handleError(err, "emailError", {}, false);
332 | }
333 | }
334 |
335 | // Step #5: Send a callback request to the client service
336 | const callbackUrl = IsMatch ? positive_uri : negative_uri;
337 | try {
338 | const timingSubmittedStart = Date.now();
339 | const upstreamIsError =
340 | !upstreamServiceResponse ||
341 | !upstreamServiceResponse.Status ||
342 | upstreamServiceResponse.Status.Code !== 3000;
343 | const callbackResult = await request.post({
344 | url: callbackUrl,
345 | headers: {
346 | "Content-Type": "application/json",
347 | },
348 | json: true,
349 | body: {
350 | watchdog_id: id,
351 | positive: upstreamServiceResponse.IsMatch,
352 | notes,
353 | error: upstreamIsError,
354 | response: upstreamServiceResponse,
355 | },
356 | });
357 | metricsPing.timing_submitted = Date.now() - timingSubmittedStart;
358 | log.verbose("callbackResult", { callbackResult });
359 | } catch (err) {
360 | return handleError(err, "callbackError", { callbackUrl });
361 | }
362 |
363 | return done();
364 | };
365 |
--------------------------------------------------------------------------------
/functions/version-test.js:
--------------------------------------------------------------------------------
1 | const { expect } = require("chai");
2 | const packageMeta = require("../package.json");
3 |
4 | // NOTE: Import the test subject as late as possible so that the mocks work
5 | const version = require("./version");
6 |
7 | describe("functions/version.handler", () => {
8 | it("responds with deployed version information", async () => {
9 | const GIT_COMMIT = "8675309";
10 | process.env.GIT_COMMIT = GIT_COMMIT;
11 | const result = await version.handler({
12 | path: "/dev/__version__",
13 | httpMethod: "GET",
14 | headers: {},
15 | });
16 | expect(result.statusCode).to.equal(200);
17 | expect(JSON.parse(result.body)).to.deep.equal({
18 | commit: GIT_COMMIT,
19 | version: packageMeta.version,
20 | source: packageMeta.repository.url,
21 | });
22 | });
23 | });
24 |
--------------------------------------------------------------------------------
/functions/version.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | const packageMeta = require("../package.json");
3 |
4 | module.exports.handler = async function(event = {}, context = {}) {
5 | const log = require("../lib/logging")({
6 | name: "version",
7 | isRequest: true,
8 | event,
9 | context,
10 | });
11 | log.info("summary");
12 | const { GIT_COMMIT: commit = "" } = process.env;
13 | return {
14 | statusCode: 200,
15 | headers: { "Content-Type": "application/json" },
16 | body: JSON.stringify({
17 | commit,
18 | version: packageMeta.version,
19 | source: packageMeta.repository.url,
20 | }),
21 | };
22 | };
23 |
--------------------------------------------------------------------------------
/lib/constants.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | DEFAULT_HAWK_ALGORITHM: "sha256",
3 | DEV_CREDENTIALS: {
4 | devuser: {
5 | key: "devkey",
6 | algorithm: "sha256",
7 | },
8 | },
9 | RATE_LIMIT: 5,
10 | RATE_PERIOD: 1000,
11 | RATE_WAIT: 100,
12 | MAX_LONG_POLL_PERIOD: 20,
13 | POLL_DELAY: 100,
14 | DEFAULT_METRICS_PING_PERIOD: 1000,
15 | TILES_STAGE_URL: "https://onyx_tiles.stage.mozaws.net/v3/links/ping-centre",
16 | TILES_PROD_URL: "https://tiles.services.mozilla.com/v3/links/ping-centre",
17 | };
18 |
--------------------------------------------------------------------------------
/lib/logging.js:
--------------------------------------------------------------------------------
1 | // Configure logging and wrap mozlog methods in decorators that automatically
2 | // include function context and event information
3 | const {
4 | LOG_LEVEL = "info",
5 | LOG_FORMAT = "heka",
6 | LOG_DEBUG = "0",
7 | GIT_COMMIT = "",
8 | } = process.env;
9 |
10 | const mozlog = require("mozlog")({
11 | app: "watchdog-proxy",
12 | level: LOG_LEVEL,
13 | fmt: LOG_FORMAT,
14 | debug: LOG_DEBUG === "1",
15 | });
16 |
17 | module.exports = ({ name, event, context, isRequest = false }) => {
18 | const startTime = Date.now();
19 |
20 | const selector = isRequest ? selectRequest : selectBase;
21 |
22 | const log = mozlog(name);
23 |
24 | const out = {};
25 | out.commonFields = {
26 | version: GIT_COMMIT,
27 | };
28 | LOG_LEVELS.forEach(
29 | level =>
30 | (out[level] = (op, fields = {}) =>
31 | log[level](
32 | op,
33 | selector({
34 | startTime,
35 | event,
36 | context,
37 | fields,
38 | commonFields: out.commonFields,
39 | })
40 | ))
41 | );
42 | return out;
43 | };
44 |
45 | const LOG_LEVELS = [
46 | "trace",
47 | "verbose",
48 | "debug",
49 | "info",
50 | "warn",
51 | "error",
52 | "critical",
53 | ];
54 |
55 | const selectRequest = ({
56 | startTime,
57 | event,
58 | context,
59 | fields = {},
60 | commonFields = {},
61 | }) =>
62 | Object.assign(
63 | selectRequestEvent(event),
64 | selectBase({ startTime, context, fields })
65 | );
66 |
67 | const selectBase = ({ startTime, context, fields = {}, commonFields = {} }) =>
68 | Object.assign(
69 | { timestamp: Date.now(), t: Date.now() - startTime },
70 | selectContext(context),
71 | commonFields,
72 | fields
73 | );
74 |
75 | // https://docs.aws.amazon.com/lambda/latest/dg/eventsources.html#eventsources-api-gateway-request
76 | const selectRequestEvent = ({
77 | path,
78 | httpMethod: method,
79 | headers: {
80 | Host: hostname,
81 | "User-Agent": agent,
82 | "X-Forwarded-For": remoteAddressChain,
83 | },
84 | }) => ({
85 | path,
86 | method,
87 | remoteAddressChain,
88 | agent,
89 | hostname,
90 | });
91 |
92 | // https://docs.aws.amazon.com/lambda/latest/dg/nodejs-prog-model-context.html
93 | const selectContext = ({
94 | awsRequestId: rid,
95 | functionName,
96 | functionVersion,
97 | memoryLimitInMB,
98 | }) => ({
99 | rid,
100 | functionName,
101 | functionVersion,
102 | memoryLimitInMB,
103 | });
104 |
--------------------------------------------------------------------------------
/lib/metrics-test.js:
--------------------------------------------------------------------------------
1 | const { expect } = require("chai");
2 | const { mocks } = global;
3 | const { TILES_STAGE_URL, TILES_PROD_URL } = require("./constants");
4 | const Metrics = require("./metrics");
5 |
6 | describe("lib/metrics", () => {
7 | beforeEach(() => {
8 | global.resetMocks();
9 | });
10 |
11 | describe("Metrics", () => {
12 | describe("ping", () => {
13 | const subject = Metrics.ping;
14 |
15 | const expectPostURL = async url => {
16 | await subject({ foo: true });
17 | expect(mocks.requestPost.called).to.be.true;
18 | expect(mocks.requestPost.args[0][0].url).to.equal(url);
19 | };
20 |
21 | it("uses METRICS_URL env var when available", async () => {
22 | process.env.METRICS_URL = "https://example.com";
23 | await expectPostURL(process.env.METRICS_URL);
24 | delete process.env.METRICS_URL;
25 | });
26 |
27 | it("uses staging URL when NODE_ENV===development", async () => {
28 | process.env.NODE_ENV = "development";
29 | await expectPostURL(TILES_STAGE_URL);
30 | });
31 |
32 | it("uses production URL when NODE_ENV===production", async () => {
33 | process.env.NODE_ENV = "production";
34 | await expectPostURL(TILES_PROD_URL);
35 | });
36 | });
37 |
38 | const expectPostBody = async (subject, event, params) => {
39 | await subject(Object.assign({ ignored: "extra" }, params));
40 | const body = mocks.requestPost.args[0][0].body;
41 | expect(body).to.include.key("timestamp");
42 | // Hacky test to assert that the timestamp is roughly equivalent to the
43 | // current time in milliseconds - e.g. not an ISO8601 string or other
44 | expect(Date.now() - parseInt(body.timestamp) < 1000).to.be.true;
45 | delete body.timestamp;
46 | expect(body).to.deep.equal(
47 | Object.assign(
48 | {
49 | topic: "watchdog-proxy",
50 | event,
51 | },
52 | params
53 | )
54 | );
55 | };
56 |
57 | describe("newItem", () => {
58 | const subject = Metrics.newItem;
59 | it("sends expected properties", async () => {
60 | await expectPostBody(subject, "new_item", {
61 | consumer_name: "foo",
62 | watchdog_id: "bar",
63 | type: "baz",
64 | });
65 | });
66 | });
67 |
68 | describe("pollerHeartbeat", () => {
69 | const subject = Metrics.pollerHeartbeat;
70 | it("sends expected properties", async () => {
71 | await expectPostBody(subject, "poller_heartbeat", {
72 | poller_id: "123",
73 | items_in_queue: "456",
74 | items_in_progress: "789",
75 | items_in_waiting: "012",
76 | });
77 | });
78 | });
79 |
80 | describe("workerWorks", () => {
81 | const subject = Metrics.workerWorks;
82 | it("sends expected properties", async () => {
83 | await expectPostBody(subject, "worker_works", {
84 | consumer_name: "qwe",
85 | worker_id: "ytr",
86 | watchdog_id: "rty",
87 | photodna_tracking_id: "uio",
88 | is_match: "asd",
89 | is_error: "fgh",
90 | timing_retrieved: "jkl",
91 | timing_sent: "zxc",
92 | timing_received: "vbn",
93 | timing_submitted: "mnb",
94 | });
95 | });
96 | });
97 | });
98 | });
99 |
--------------------------------------------------------------------------------
/lib/metrics.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | const request = require("request-promise-native");
4 |
5 | const { TILES_STAGE_URL, TILES_PROD_URL } = require("./constants");
6 |
7 | const Metrics = (module.exports = {
8 | ping: async (data = {}) => {
9 | // Accept a METRICS_URL env var override or select URL based on NODE_ENV
10 | let url;
11 | if (process.env.METRICS_URL) {
12 | url = process.env.METRICS_URL;
13 | } else {
14 | url =
15 | process.env.NODE_ENV === "production"
16 | ? TILES_PROD_URL
17 | : TILES_STAGE_URL;
18 | }
19 | return request.post({
20 | url,
21 | headers: { "Content-Type": "application/json" },
22 | json: true,
23 | body: Object.assign(
24 | {
25 | topic: "watchdog-proxy",
26 | timestamp: Date.now(),
27 | },
28 | data
29 | ),
30 | });
31 | },
32 |
33 | newItem: ({ consumer_name, watchdog_id, type }) =>
34 | Metrics.ping({
35 | event: "new_item",
36 | consumer_name,
37 | watchdog_id,
38 | type,
39 | }),
40 |
41 | pollerHeartbeat: ({
42 | poller_id,
43 | items_in_queue,
44 | items_in_progress,
45 | items_in_waiting,
46 | }) =>
47 | Metrics.ping({
48 | event: "poller_heartbeat",
49 | poller_id,
50 | items_in_queue,
51 | items_in_progress,
52 | items_in_waiting,
53 | }),
54 |
55 | workerWorks: ({
56 | consumer_name,
57 | worker_id,
58 | watchdog_id,
59 | photodna_tracking_id,
60 | is_match,
61 | is_error,
62 | timing_retrieved,
63 | timing_sent,
64 | timing_received,
65 | timing_submitted,
66 | }) =>
67 | Metrics.ping({
68 | event: "worker_works",
69 | consumer_name,
70 | worker_id,
71 | watchdog_id,
72 | photodna_tracking_id,
73 | is_match,
74 | is_error,
75 | timing_retrieved,
76 | timing_sent,
77 | timing_received,
78 | timing_submitted,
79 | }),
80 | });
81 |
--------------------------------------------------------------------------------
/lib/sentry.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | const { logError } = require("./utils.js");
3 |
4 | const Raven = require("raven");
5 |
6 | module.exports = () => {
7 | const { SENTRY_DSN, GIT_COMMIT } = process.env;
8 | Raven.config(SENTRY_DSN, { release: GIT_COMMIT }).install(err =>
9 | logError("Sentry install failed", err)
10 | );
11 | return Raven;
12 | };
13 |
--------------------------------------------------------------------------------
/lib/test-setup.js:
--------------------------------------------------------------------------------
1 | const sinon = require("sinon");
2 | const AWS = require("aws-sdk");
3 | const request = require("request-promise-native");
4 | const mockRequire = require("mock-require");
5 |
6 | global.env = {
7 | CONFIG_TABLE: "test-config",
8 | CREDENTIALS_TABLE: "test-credentials",
9 | HITRATE_TABLE: "test-hitrate",
10 | QUEUE_NAME: "test-queue",
11 | CONTENT_BUCKET: "test-bucket",
12 | PROCESS_QUEUE_FUNCTION: "process-queue-item",
13 | UPSTREAM_SERVICE_URL: "https://api.example.com/v1.0/Match",
14 | UPSTREAM_SERVICE_KEY: "1234567890",
15 | EMAIL_FROM: "lorchard@mozilla.com",
16 | EMAIL_TO: "",
17 | EMAIL_EXPIRES: 600,
18 | LOG_LEVEL: process.env.LOG_LEVEL || "critical",
19 | };
20 |
21 | global.constants = {
22 | ETag: '"ae1e7accaab42504a930ecc6e6aa34c2"',
23 | QueueUrl: "https://example.com/sqs/",
24 | QueueAttributes: {
25 | ApproximateNumberOfMessages: "200",
26 | ApproximateNumberOfMessagesDelayed: "20",
27 | ApproximateNumberOfMessagesNotVisible: "2",
28 | },
29 | MessageId: "abba123",
30 | requestId: "8675309",
31 | ReceiptHandle: "5551212",
32 | defaultS3GetObjectResponse: {
33 | AcceptRanges: "bytes",
34 | Expiration:
35 | 'expiry-date="Sat, 09 Jun 2018 00:00:00 GMT", rule-id="DailyCleanup"',
36 | LastModified: "2018-05-09T22:56:51.000Z",
37 | ContentLength: 20,
38 | ETag: '"ae1e7accaab42504a930ecc6e6aa34c2"',
39 | ContentType: "image/jpeg",
40 | Metadata: {},
41 | Body: Buffer.from("THIS IS NOT AN IMAGE"),
42 | },
43 | };
44 |
45 | const mockRaven = {
46 | config: () => mockRaven,
47 | install: () => {},
48 | captureException: () => {},
49 | };
50 | mockRequire("raven", mockRaven);
51 |
52 | const defaultConstantsModule = Object.assign({}, require("./constants"), {
53 | RATE_PERIOD: 500,
54 | RATE_LIMIT: 2,
55 | RATE_WAIT: 10,
56 | MIN_HEARTBEAT_PERIOD: 0,
57 | });
58 | global.constantsModule = Object.assign({}, defaultConstantsModule);
59 | mockRequire("./constants", global.constantsModule);
60 |
61 | global.mocks = {};
62 | global.makePromiseFn = out => ({ promise: () => Promise.resolve(out) });
63 | global.makePromiseStub = out => sinon.stub().returns(global.makePromiseFn(out));
64 |
65 | global.resetMocks = () => {
66 | const {
67 | mocks,
68 | makePromiseStub,
69 | constants: { QueueUrl, QueueAttributes, MessageId, ETag },
70 | } = global;
71 |
72 | Object.assign(global.constantsModule, defaultConstantsModule);
73 | Object.assign(process.env, global.env);
74 | Object.values(global.mocks).forEach(mock => mock.resetHistory());
75 |
76 | const pSQS = AWS.SQS.prototype;
77 | const pSES = AWS.SES.prototype;
78 | const pS3 = AWS.S3.prototype;
79 | const pDocumentClient = AWS.DynamoDB.DocumentClient.prototype;
80 | const pLambda = AWS.Lambda.prototype;
81 |
82 | Object.assign(mocks, {
83 | sendEmail: (pSES.sendEmail = makePromiseStub({})),
84 | deleteMessage: (pSQS.deleteMessage = makePromiseStub({})),
85 | queryItems: (pDocumentClient.query = makePromiseStub({})),
86 | scanItems: (pDocumentClient.scan = makePromiseStub({ Count: 0 })),
87 | getItem: (pDocumentClient.get = makePromiseStub({})),
88 | putItem: (pDocumentClient.put = makePromiseStub({})),
89 | deleteItem: (pDocumentClient.delete = makePromiseStub({})),
90 | getQueueAttributes: (pSQS.getQueueAttributes = makePromiseStub({
91 | Attributes: QueueAttributes,
92 | })),
93 | getQueueUrl: (pSQS.getQueueUrl = makePromiseStub({ QueueUrl })),
94 | getSignedUrl: (pS3.getSignedUrl = sinon.stub().returns("")),
95 | putObject: (pS3.putObject = makePromiseStub({ ETag })),
96 | deleteObject: (pS3.deleteObject = makePromiseStub({})),
97 | requestPost: (request.post = sinon.stub().resolves({})),
98 | sendMessage: (pSQS.sendMessage = makePromiseStub({ MessageId })),
99 | receiveMessage: (pSQS.receiveMessage = makePromiseStub({ MessageId })),
100 | invoke: (pLambda.invoke = makePromiseStub({})),
101 | });
102 | };
103 |
104 | global.resetMocks();
105 |
--------------------------------------------------------------------------------
/lib/utils.js:
--------------------------------------------------------------------------------
1 | const crypto = require("crypto");
2 |
3 | const jsonPretty = data => JSON.stringify(data, null, " ");
4 |
5 | const md5 = data =>
6 | crypto
7 | .createHash("md5")
8 | .update(data)
9 | .digest("hex");
10 |
11 | const wait = delay => new Promise(resolve => setTimeout(resolve, delay));
12 |
13 | const epochNow = () => Math.floor(Date.now() / 1000);
14 |
15 | module.exports = {
16 | jsonPretty,
17 | md5,
18 | wait,
19 | epochNow,
20 | };
21 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "watchdog-proxy",
3 | "version": "0.0.1",
4 | "description": "Rate limiting proxy for watchdog requests",
5 | "scripts": {
6 | "precommit": "lint-staged && npm run test:js",
7 | "deploy": "cross-env NODE_ENV=production serverless deploy",
8 | "deploy:dev": "cross-env NODE_ENV=development ENABLE_DEV_AUTH=1 serverless deploy",
9 | "deploy:master": "cross-env STAGE=dev NODE_ENV=development ENABLE_DEV_AUTH=1 UPSTREAM_SERVICE_URL=https://watchdog-proxy.dev.mozaws.net/mock/upstream UPSTREAM_SERVICE_KEY=__MOCK__ METRICS_URL=https://watchdog-proxy.dev.mozaws.net/mock/log EMAIL_FROM=lorchard@mozilla.com serverless deploy",
10 | "info": "serverless info",
11 | "lint": "npm-run-all lint:*",
12 | "lint:js": "eslint .",
13 | "prettier": "prettier --trailing-comma=es5 --write \"{functions,lib,bin}/**/*.js\"",
14 | "logs": "serverless logs",
15 | "remove": "serverless remove",
16 | "start": "npm run watch",
17 | "test": "npm-run-all test:*",
18 | "test:js": "mocha --require lib/test-setup.js --recursive \"{functions,lib}/**/*-test.js\"",
19 | "watch": "npm-run-all --parallel watch:*",
20 | "watch:lint": "onchange \"{functions,lib}/**/*.js\" -v -i -p -- npm run lint",
21 | "watch:test": "onchange \"{functions,lib}/**/*.js\" -v -i -p -- npm run test",
22 | "client": "node bin/client.js"
23 | },
24 | "engines": {
25 | "node": ">=8"
26 | },
27 | "lint-staged": {
28 | "*.js": [
29 | "npm run prettier",
30 | "git add"
31 | ]
32 | },
33 | "homepage": "https://github.com/mozilla/watchdog-proxy/",
34 | "repository": {
35 | "type": "git",
36 | "url": "https://github.com/mozilla/watchdog-proxy.git"
37 | },
38 | "author": "Les Orchard ",
39 | "license": "MPL-2.0",
40 | "devDependencies": {
41 | "aws-sdk": "2.411.0",
42 | "chai": "4.2.0",
43 | "commander": "2.19.0",
44 | "cross-env": "5.2.0",
45 | "eslint": "5.14.1",
46 | "eslint-plugin-mozilla": "1.1.1",
47 | "eslint-plugin-no-unsanitized": "3.0.2",
48 | "husky": "1.3.1",
49 | "lint-staged": "8.1.4",
50 | "mocha": "6.0.2",
51 | "mock-require": "3.0.3",
52 | "npm-run-all": "4.1.5",
53 | "onchange": "5.2.0",
54 | "prettier": "1.16.4",
55 | "serverless": "1.38.0",
56 | "serverless-apigw-binary": "0.4.4",
57 | "serverless-domain-manager": "2.6.13",
58 | "serverless-s3-remover": "0.6.0",
59 | "serverless-sqs-alarms-plugin": "0.1.7",
60 | "sinon": "7.2.4"
61 | },
62 | "dependencies": {
63 | "busboy": "0.3.0",
64 | "eslint-plugin-node": "8.0.1",
65 | "hawk": "7.0.10",
66 | "raven": "2.6.4",
67 | "mozlog": "2.2.0",
68 | "request": "2.88.0",
69 | "request-promise-native": "1.0.7"
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "config:base",
4 | "group:all"
5 | ],
6 | "patch" : { "enabled": false },
7 | "schedule": ["before 5am on monday"]
8 | }
9 |
--------------------------------------------------------------------------------
/serverless.local.yml-dist:
--------------------------------------------------------------------------------
1 | # Change this to a name that's unique to you
2 | stage: your-name-here-dev
3 |
4 | # Using __MOCK__ values here causes the stack to use its own internal mock
5 | # endpoints. Otherwise, you can configure a real upstream service URL and key
6 | upstreamService:
7 | url: __MOCK__
8 | key: __MOCK__
9 |
10 | # Uncomment and change the following properties to set up a custom domain for
11 | # your own dev stack
12 | customDomain:
13 | enabled: false
14 | # enabled: true
15 | # stage: your-name-here-dev
16 | # domainName: watchdog-proxy-your-name.dev.mozaws.net
17 | # certificateArn: 'arn:aws:acm:us-east-1:927034868273:certificate/61e462bd-410e-48b6-95da-fa5501430d1d'
18 | # basePath: ''
19 | # createRoute53Record: true
20 |
--------------------------------------------------------------------------------
/serverless.yml:
--------------------------------------------------------------------------------
1 | service: ${file(serverless.local.yml):service, "watchdog-proxy"}
2 |
3 | custom:
4 | localConfig: ${file(serverless.local.yml)}
5 | dynamicConfig: ${file(config/dynamic.js)}
6 | perStageConfig: ${file(config/${self:custom.stage}.yml), file(config/dev.yml)}
7 | region: ${self:provider.region}
8 | stage: ${env:STAGE, self:custom.localConfig.stage, opt:stage, 'dev'}
9 | prefixAuto: ${self:service}-${self:custom.stage}
10 | prefix: ${env:PREFIX, self:custom.prefixAuto}
11 | resourceNames: ${self:custom.perStageConfig.custom.resourceNames}
12 | customDomain: ${file(serverless.local.yml):customDomain, self:custom.perStageConfig.custom.customDomain}
13 | remover: ${self:custom.perStageConfig.custom.remover}
14 |
15 | apigwBinary:
16 | types:
17 | - 'multipart/form-data'
18 |
19 | fnEnv:
20 | NODE_ENV: ${env:NODE_ENV,"production"}
21 | GIT_COMMIT: ${self:custom.dynamicConfig.GIT_COMMIT}
22 | LOG_INFO: ${env:LOG_INFO, "1"}
23 | LOG_DEBUG: ${env:LOG_DEBUG, "0"}
24 | DISABLE_AUTH_CACHE: ${env:DISABLE_AUTH_CACHE, "0"}
25 | ENABLE_DEV_AUTH: ${env:ENABLE_DEV_AUTH,"0"}
26 | UPSTREAM_SERVICE_URL: ${env:UPSTREAM_SERVICE_URL, self:custom.localConfig.upstreamService.url}
27 | UPSTREAM_SERVICE_KEY: ${env:UPSTREAM_SERVICE_KEY, self:custom.localConfig.upstreamService.key}
28 | SERVICE_STAGE: ${self:custom.stage}
29 | SERVICE_PREFIX: ${self:custom.prefix}
30 | HITRATE_TABLE: ${self:custom.resourceNames.hitrate}
31 | CREDENTIALS_TABLE: ${self:custom.resourceNames.credentials}
32 | QUEUE_NAME: ${self:custom.resourceNames.sqs}
33 | CONTENT_BUCKET: ${self:custom.resourceNames.contentBucket}
34 | PROCESS_QUEUE_FUNCTION: ${self:custom.resourceNames.process}
35 | METRICS_URL: ${env:METRICS_URL,""}
36 | EMAIL_FROM: ${env:EMAIL_FROM,""}
37 | EMAIL_TO: ${env:EMAIL_TO,""}
38 | EMAIL_EXPIRES: ${env:EMAIL_EXPIRATION,"2592000"}
39 | MOCK_POSITIVE_CHANCE: ${env:MOCK_POSITIVE_CHANCE,"0.1"}
40 | METRICS_PING_PERIOD: ${env:METRICS_PING_PERIOD,"1000"}
41 | SENTRY_DSN: ${env:SENTRY_DSN,""}
42 | SENTRY_RELEASE: ${env:SENTRY_RELEASE,env:GIT_COMMIT,self:custom.dynamicConfig.GIT_COMMIT}
43 |
44 | provider:
45 | name: aws
46 | runtime: nodejs8.10
47 | stage: ${env:STAGE, self:custom.localConfig.stage, opt:stage, 'dev'}
48 | region: ${env:AWS_REGION, "us-east-1"}
49 | memorySize: 128
50 | iamRoleStatements: ${self:custom.perStageConfig.provider.iamRoleStatements}
51 |
52 | plugins:
53 | - serverless-s3-remover
54 | - serverless-apigw-binary
55 | - serverless-domain-manager
56 |
57 | package:
58 | exclude:
59 | - docs/**
60 | - helpers/**
61 | - test/**
62 | - functions/**/*-test.js
63 |
64 | resources: ${self:custom.perStageConfig.resources}
65 |
66 | functions: ${self:custom.perStageConfig.functions}
67 |
--------------------------------------------------------------------------------