├── .github
├── header-checker-lint.yml
└── workflows
│ ├── ci.yaml
│ └── terraform.yml
├── .gitignore
├── .gitmodules
├── .mvn
└── wrapper
│ ├── MavenWrapperDownloader.java
│ └── maven-wrapper.properties
├── LICENSE
├── README.md
├── SECURITY.md
├── attestation
└── pom.xml
├── ci
├── FirestoreTesting.Dockerfile
├── build.sh
├── common.sh
├── dependencies.sh
├── firebase.sh
├── generate-templates.sh
└── integration.sh
├── cloudbuild.yaml
├── config
└── firebase
│ ├── README.md
│ ├── firebase.json
│ ├── firestore.rules
│ ├── package.json
│ └── rules.test.js
├── container-build.yaml
├── df-flex-template-base-image
├── Dockerfile
└── java_template_launcher
├── docs
├── code-of-conduct.md
└── contributing.md
├── logging.properties
├── model
└── pom.xml
├── mvn-settings.xml
├── mvnw
├── mvnw.cmd
├── pipeline
└── pom.xml
├── pom.xml
├── src
├── main
│ └── java
│ │ └── com
│ │ └── google
│ │ └── exposurenotification
│ │ └── privateanalytics
│ │ └── ingestion
│ │ ├── attestation
│ │ └── AbstractDeviceAttestation.java
│ │ ├── model
│ │ └── DataShare.java
│ │ └── pipeline
│ │ ├── AWSFederatedAuthHelper.java
│ │ ├── BatchWriterFn.java
│ │ ├── DataProcessorManifest.java
│ │ ├── DateFilterFn.java
│ │ ├── DeletionPipeline.java
│ │ ├── FirestoreConnector.java
│ │ ├── IngestionPipeline.java
│ │ ├── IngestionPipelineOptions.java
│ │ └── PrioSerializationHelper.java
├── proto
│ └── analytics.proto
└── test
│ ├── java
│ └── com
│ │ └── google
│ │ └── exposurenotification
│ │ └── privateanalytics
│ │ └── ingestion
│ │ ├── model
│ │ └── DataShareTest.java
│ │ └── pipeline
│ │ ├── DataProcessorManifestTest.java
│ │ ├── DateFilterFnTest.java
│ │ ├── DeletionPipelineIT.java
│ │ ├── FirestoreClientTestUtils.java
│ │ ├── IngestionPipelineIT.java
│ │ ├── IngestionPipelineOptionsTest.java
│ │ ├── IngestionPipelineTest.java
│ │ ├── PrioSerializationHelperTest.java
│ │ └── TestAttestation.java
│ └── resources
│ └── com
│ └── google
│ └── exposurenotification
│ └── privateanalytics
│ └── ingestion
│ └── pipeline
│ └── test-manifest.json
├── templates
├── dataflow-deletion-metadata-template.json
├── dataflow-flex-template.json
├── dataflow-ingestion-metadata-template.json
├── scheduler-deletion-template.tmpl
└── scheduler-ingestion-template.tmpl
└── terraform
├── README.md
├── dataflow.tf
├── firestore.tf
├── gcr.tf
├── iam.tf
├── main.tf
├── scheduler.tf
└── variables.tf
/.github/header-checker-lint.yml:
--------------------------------------------------------------------------------
1 | # Checks if license header exists and if the year is correct
2 | # for files modified by a pull request
3 | # For more information:
4 | # https://github.com/googleapis/repo-automation-bots/tree/master/packages/header-checker-lint#header-checker-lint
5 | allowedCopyrightHolders: ["Google LLC"]
6 | allowedLicenses: ["Apache-2.0"]
7 | sourceFileExtensions: ["java"]
--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2021 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | on:
17 | push:
18 | branches:
19 | - main
20 | pull_request:
21 | name: ci
22 | jobs:
23 | units:
24 | runs-on: ubuntu-latest
25 | steps:
26 | - uses: actions/checkout@v2
27 | with:
28 | submodules: 'true'
29 | - uses: actions/setup-java@v2
30 | with:
31 | java-version: 11
32 | distribution: 'adopt'
33 | cache: maven
34 | - run: java -version
35 | - run: ci/build.sh
36 | env:
37 | JOB_TYPE: test
38 | dependencies:
39 | runs-on: ubuntu-latest
40 | steps:
41 | - uses: actions/checkout@v2
42 | with:
43 | submodules: 'true'
44 | - uses: actions/setup-java@v2
45 | with:
46 | java-version: 11
47 | distribution: 'adopt'
48 | cache: maven
49 | - run: java -version
50 | - run: ci/dependencies.sh
51 | lint:
52 | runs-on: ubuntu-latest
53 | steps:
54 | - uses: actions/checkout@v2
55 | with:
56 | submodules: 'true'
57 | - uses: actions/setup-java@v2
58 | with:
59 | java-version: 11
60 | distribution: 'adopt'
61 | cache: maven
62 | - run: java -version
63 | - run: ci/build.sh
64 | env:
65 | JOB_TYPE: lint
--------------------------------------------------------------------------------
/.github/workflows/terraform.yml:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2021 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | name: terraform
17 |
18 | on:
19 | pull_request:
20 | paths:
21 | - 'terraform/**'
22 | - '.github/workflows/terraform*'
23 |
24 | jobs:
25 | lint:
26 | name: 'lint'
27 | runs-on: 'ubuntu-latest'
28 |
29 | steps:
30 | - name: 'checkout'
31 | uses: 'actions/checkout@v2'
32 |
33 | - name: 'setup-terraform'
34 | uses: 'hashicorp/setup-terraform@v1'
35 | with:
36 | terraform_version: '0.15.0'
37 |
38 | - name: 'init'
39 | working-directory: './terraform'
40 | run: 'terraform init'
41 |
42 | - name: 'validate'
43 | working-directory: './terraform'
44 | run: 'terraform validate'
45 |
46 | - name: 'fmt'
47 | working-directory: './terraform'
48 | run: 'terraform fmt -diff -check'
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.DS_Store
2 | credentials/
3 | target/
4 | **/.flattened-pom.xml
5 | */pom.xml.tag
6 | **/pom.xml.releaseBackup
7 | **/pom.xml.versionsBackup
8 | **/pom.xml.next
9 | **/release.properties
10 | **/dependency-reduced-pom.xml
11 | **/buildNumber.properties
12 | **/.mvn/timing.properties
13 | **/.mvn/wrapper/maven-wrapper.jar
14 | **/.idea/
15 | **/node_modules/
16 | **/**enpa-ingestion*.iml
17 | **/enpa-ingestion.ipr
18 | **/enpa-ingestion.iws
19 | **/**firestore-debug.log
20 | **/**ui-debug.log
21 | **/package-lock.json
22 |
23 | # Visual Code Studio
24 | .classpath
25 | .factorypath
26 | .project
27 | .settings/
28 | .vscode/
29 |
30 | # terraform
31 | terraform/.terraform/tmp
32 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/prio-server"]
2 | path = third_party/prio-server
3 | url = https://github.com/abetterinternet/prio-server
4 |
--------------------------------------------------------------------------------
/.mvn/wrapper/MavenWrapperDownloader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2007-present the original author or authors.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | import java.net.*;
17 | import java.io.*;
18 | import java.nio.channels.*;
19 | import java.util.Properties;
20 |
21 | public class MavenWrapperDownloader {
22 |
23 | private static final String WRAPPER_VERSION = "0.5.6";
24 | /**
25 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
26 | */
27 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
28 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
29 |
30 | /**
31 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
32 | * use instead of the default one.
33 | */
34 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
35 | ".mvn/wrapper/maven-wrapper.properties";
36 |
37 | /**
38 | * Path where the maven-wrapper.jar will be saved to.
39 | */
40 | private static final String MAVEN_WRAPPER_JAR_PATH =
41 | ".mvn/wrapper/maven-wrapper.jar";
42 |
43 | /**
44 | * Name of the property which should be used to override the default download url for the wrapper.
45 | */
46 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
47 |
48 | public static void main(String args[]) {
49 | System.out.println("- Downloader started");
50 | File baseDirectory = new File(args[0]);
51 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
52 |
53 | // If the maven-wrapper.properties exists, read it and check if it contains a custom
54 | // wrapperUrl parameter.
55 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
56 | String url = DEFAULT_DOWNLOAD_URL;
57 | if(mavenWrapperPropertyFile.exists()) {
58 | FileInputStream mavenWrapperPropertyFileInputStream = null;
59 | try {
60 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
61 | Properties mavenWrapperProperties = new Properties();
62 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
63 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
64 | } catch (IOException e) {
65 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
66 | } finally {
67 | try {
68 | if(mavenWrapperPropertyFileInputStream != null) {
69 | mavenWrapperPropertyFileInputStream.close();
70 | }
71 | } catch (IOException e) {
72 | // Ignore ...
73 | }
74 | }
75 | }
76 | System.out.println("- Downloading from: " + url);
77 |
78 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
79 | if(!outputFile.getParentFile().exists()) {
80 | if(!outputFile.getParentFile().mkdirs()) {
81 | System.out.println(
82 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
83 | }
84 | }
85 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
86 | try {
87 | downloadFileFromURL(url, outputFile);
88 | System.out.println("Done");
89 | System.exit(0);
90 | } catch (Throwable e) {
91 | System.out.println("- Error downloading");
92 | e.printStackTrace();
93 | System.exit(1);
94 | }
95 | }
96 |
97 | private static void downloadFileFromURL(String urlString, File destination) throws Exception {
98 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
99 | String username = System.getenv("MVNW_USERNAME");
100 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
101 | Authenticator.setDefault(new Authenticator() {
102 | @Override
103 | protected PasswordAuthentication getPasswordAuthentication() {
104 | return new PasswordAuthentication(username, password);
105 | }
106 | });
107 | }
108 | URL website = new URL(urlString);
109 | ReadableByteChannel rbc;
110 | rbc = Channels.newChannel(website.openStream());
111 | FileOutputStream fos = new FileOutputStream(destination);
112 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
113 | fos.close();
114 | rbc.close();
115 | }
116 |
117 | }
118 |
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip
2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/google/exposure-notifications-private-analytics-ingestion/actions/workflows/ci.yaml)
2 |
3 | # Exposure Notification Private Analytics Ingestion
4 |
5 | This repository contains implementations for [Apache Beam](https://beam.apache.org/)
6 | batch pipelines to process private data shares stored in Firestore according
7 | to the Exposure Notification Private Analytics protocol. It assumes private data
8 | shares uploaded as Firestore documents, as done in the
9 | [Exposure Notification Express template app](https://github.com/google/exposure-notifications-android/blob/4b7b461282b2ede6fb2a93488c6d628440052c8d/app/src/main/java/com/google/android/apps/exposurenotification/privateanalytics/PrivateAnalyticsFirestoreRepository.java#L42).
10 | These documents contain encrypted packets using the [Prio](https://crypto.stanford.edu/prio/)
11 | protocol. The pipeline implementation converts them into the format that
12 | downstream Prio data processing servers expect, defined in the Avro schema
13 | [here](https://github.com/abetterinternet/prio-server/tree/master/avro-schema).
14 |
15 | This implementation make use of Firestore as a scalable NoSQL db for subsequent
16 | batching and aggregation. Alternative implementations might operate a custom
17 | backend endpoint to accumulate the packets, or use a pubsub mechanism. Since the
18 | packets are encrypted on device, the channel over which the packets travel need
19 | not be trusted.
20 |
21 | This repository also contains the Firebase configuration to secure Firestore
22 | with [security rules](config/firebase/firestore.rules) as well as
23 | [Terraform scripts](terraform/main.tf) to bring up required infrastructure.
24 |
25 | ## Setup
26 |
27 | ### Multiple Maven modules
28 |
29 | The project is structured into multiple maven modules to allow incorporation of
30 | outside implementations of attestation. Implementations need only depend on the
31 | DataShare model module, and a profile can be added to get it included in the
32 | pipeline module build. The pipeline pulls available implementations dynamically.
33 |
34 | Since there aren't too many individual classes that make up each module, and
35 | since they are only meant to be packaged and executed together, we use a single
36 | source tree for all modules.
37 |
38 | Follow the
39 | [Getting started with Google Cloud Dataflow](https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/dataflow/README.md)
40 | page. You will need the following:
41 |
42 | 1. Set up a
43 | [Google Cloud project](https://console.cloud.google.com/projectcreate) or use an existing one.
44 | Then [import the Google Cloud project into Firebase](https://cloud.google.com/firestore/docs/client/get-firebase).
45 |
46 | 1. [Enable APIs](https://console.cloud.google.com/flows/enableapi?apiid=containerregistry.googleapis.com,cloudbuild.googleapis.com):
47 | Container Registry, Cloud Build, Cloud Datastore and Dataflow.
48 |
49 | 1. [Create an asymmetric signing key](https://cloud.google.com/kms/docs/creating-asymmetric-keys#create_an_asymmetric_signing_key)
50 |
51 | 1. [Create a storage bucket](https://cloud.google.com/storage/docs/creating-buckets)
52 | for your outputs.
53 |
54 | 1. Create a service account with permissions for [Firestore](https://cloud.google.com/datastore/docs/access/iam#iam_roles),
55 | [reading the KMS key](https://cloud.google.com/kms/docs/reference/permissions-and-roles),
56 | [Dataflow](https://cloud.google.com/dataflow/docs/concepts/access-control#roles),
57 | and [Cloud Storage](https://cloud.google.com/storage/docs/access-control/iam).
58 |
59 | 1. Download the a key for your service account and store as `credentials.json`.
60 | Keep those credentials save!
61 |
62 | ### Clone Submodules
63 | A submodule is need to access the avro definitions of the Prio classes.
64 |
65 | ```sh
66 | git submodule update --init
67 | ```
68 |
69 | ### Useful Environment Variables
70 |
71 | Setting the following environment variables can be handy when working in the
72 | project. Replace values in `[...]`.
73 |
74 | ```sh
75 | export PROJECT="[my-google-cloud-ingestion-project-id]"
76 | export GOOGLE_APPLICATION_CREDENTIALS="credentials.json"
77 | export TEMPLATES="gs://[my-cloud-storage-bucket]/templates"
78 | export PHA_OUTPUT="gs://[my-cloud-storage-bucket]/output/pha"
79 | export FACILITATOR_OUTPUT="gs://[my-cloud-storage-bucket]/output/faciliator"
80 | export KEY_RESOURCE_NAME="projects/[some-ingestion-project]/locations/global/keyRings/[some-signature-key-ring]/cryptoKeys/[some-signature-key]/cryptoKeyVersions/1"
81 | ```
82 |
83 | ## Testing
84 |
85 | ### Unit Tests
86 |
87 | To run unit tests:
88 |
89 | ```shell script
90 | ./mvnw test
91 | ```
92 |
93 | ### Integration Tests
94 |
95 | Integration tests go against an actual test project and so need an environment
96 | variable:
97 |
98 | ```shell script
99 | ./mvnw verify
100 | ```
101 |
102 | ## Running the Pipeline
103 |
104 | There are two pipelines. One reads Prio data shares from Firestore and
105 | generates the outputs which the PHA and Facilitator data share processors will
106 | consume. The other deletes expired or already processed data shares from
107 | Firestore.
108 |
109 | They both take as options the window of time to cover, in the form of a start
110 | time and duration. When not supplied, start time for the ingestion pipeline is
111 | calculated based on current time rounding back to previous window of length
112 | `duration`. For the deletion pipeline, it goes back two windows to ensure a
113 | safety margin of not deleting unprocessed data shares.
114 |
115 | ### Locally
116 |
117 | To run the ingestion pipeline locally:
118 |
119 | ```sh
120 |
121 | ./mvnw compile exec:java \
122 | -Djava.util.logging.config.file=logging.properties \
123 | -Dexec.mainClass=com.google.exposurenotification.privateanalytics.ingestion.pipeline.IngestionPipeline \
124 | -Dexec.args="--keyResourceName=$KEY_RESOURCE_NAME --phaOutput=$PHA_OUTPUT --facilitatorOutput=$FACILITATOR_OUTPUT"
125 | ```
126 |
127 | To run the deletion pipeline:
128 |
129 | ```sh
130 | ./mvnw compile exec:java \
131 | -Djava.util.logging.config.file=logging.properties \
132 | -Dexec.mainClass=com.google.exposurenotification.privateanalytics.ingestion.pipeline.DeletionPipeline \
133 | -Dexec.args="--project=$PROJECT"
134 | ```
135 |
136 | ### In Google Cloud Dataflow
137 |
138 | #### From local build
139 |
140 | ```sh
141 | export SERVICE_ACCOUNT_EMAIL=$(egrep -o '[^"]+@[^"]+\.iam\.gserviceaccount\.com' $GOOGLE_APPLICATION_CREDENTIALS)
142 |
143 | export BEAM_ARGS=(
144 | "--keyResourceName=$KEY_RESOURCE_NAME"
145 | "--phaOutput=$PHA_OUTPUT"
146 | "--facilitatorOutput=$FACILITATOR_OUTPUT"
147 | "--runner=DataflowRunner"
148 | "--region=us-central1"
149 | "--serviceAccount=$SERVICE_ACCOUNT_EMAIL"
150 | )
151 | ./mvnw compile exec:java \
152 | -Dexec.mainClass=com.google.exposurenotification.privateanalytics.ingestion.pipeline.IngestionPipeline \
153 | -Dexec.args="$BEAM_ARGS"
154 | ```
155 |
156 | #### From Flex Template
157 |
158 | See [below](#creating-a-flex-template) on how to generate the flex template.
159 |
160 | ```sh
161 | export SERVICE_ACCOUNT_EMAIL=$(egrep -o '[^"]+@[^"]+\.iam\.gserviceaccount\.com' $GOOGLE_APPLICATION_CREDENTIALS)
162 |
163 | gcloud dataflow flex-template run "ingestion-pipeline-$USER-`date +%Y%m%d-%H%M%S`" \
164 | --template-file-gcs-location "$TEMPLATE_PATH" \
165 | --parameters project="$PROJECT" \
166 | --parameters keyResourceName="$KEY_RESOURCE_NAME" \
167 | --parameters phaOutput="$PHA_OUTPUT" \
168 | --parameters facilitatorOutput="$FACILITATOR_OUTPUT" \
169 | --service-account-email "$SERVICE_ACCOUNT_EMAIL" \
170 | --region "us-central1"
171 | ```
172 |
173 | ## Building
174 |
175 | We generate a [templated dataflow job](https://cloud.google.com/dataflow/docs/guides/templates/overview#templated-dataflow-jobs)
176 | that takes all pipeline options as runtime parameters.
177 |
178 | ### Building a Flex Template and Launch Container
179 |
180 | To build the launch container we added profiles for the ingestion and deletion pipeline.
181 |
182 | To build the ingestion pipeline launch container with setting a git derived version:
183 |
184 | ```sh
185 | ./mvnw -Pingestion-container-build -Dcontainer-version=$(git describe --tags --always --dirty=-dirty) \
186 | -Dcontainer_registry_tag_prefix='gcr.io/[YOUR_CLOUD_PROJECT]' package
187 | ```
188 |
189 | To build the ingestion pipeline with a custom attestation implementation,
190 | include the additional `attestation` profile, which assumes the package is
191 | available in any of your configured maven repositories
192 | (in .m2/settings.xml or local mvn-settings.xml):
193 |
194 | ```sh
195 | ./mvnw -Pingestion-container-build,attestation -Dcontainer-version=$(git describe --tags --always --dirty=-dirty) \
196 | -Dcontainer_registry_tag_prefix='gcr.io/[YOUR_CLOUD_PROJECT]' package
197 | ```
198 |
199 | To build the deletion pipeline launch container with the setting a git derived version:
200 |
201 | ```sh
202 | ./mvnw -Pdeletion-container-build -Dcontainer-version=$(git describe --tags --always --dirty=-dirty) \
203 | -Dcontainer_registry_tag_prefix='gcr.io/[YOUR_CLOUD_PROJECT]' package
204 | ```
205 |
206 | Built containers get automatically published to the `container_registry_tag_prefix` you set. E.g. for Google container
207 | registry: `gcr.io/[YOUR_CLOUD_PROJECT]/ingestion-pipeline:$VERSION` and `gcr.io/[YOUR_CLOUD_PROJECT]/deletion-pipeline:$VERSION`
208 | respectively.
209 |
210 | To generate the Flex Template Metadata files and upload them to GCS run:
211 |
212 | *The following commands require nodejs json `npm install -g json`*
213 | Use the same `container_registry_tag_prefix` as in the builds above.
214 |
215 | ```sh
216 | export VERSION=$(git describe --tags --always --dirty=-dirty)
217 |
218 | json -f templates/dataflow-flex-template.json \
219 | -e "this.metadata=`cat templates/dataflow-ingestion-metadata-template.json`" \
220 | -e "this.image='gcr.io/[YOUR_CLOUD_PROJECT]/ingestion-pipeline:$VERSION'" > ingestion-pipeline-$VERSION.json
221 |
222 | json -f templates/dataflow-flex-template.json \
223 | -e "this.metadata=`cat templates/dataflow-deletion-metadata-template.json`" \
224 | -e "this.image='gcr.io/[YOUR_CLOUD_PROJECT]/deletion-pipeline:$VERSION'" > deletion-pipeline-$VERSION.json
225 |
226 | gsutil cp ingestion-pipeline-$VERSION.json $TEMPLATES
227 | gsutil cp deletion-pipeline-$VERSION.json $TEMPLATES
228 |
229 | gsutil -h "Content-Type:application/json" cp templates/scheduler-ingestion-template.tmpl \
230 | $TEMPLATES/scheduler-ingestion-template-$VERSION.tmpl
231 | gsutil -h "Content-Type:application/json" cp templates/scheduler-deletion-template.tmpl \
232 | $TEMPLATES/scheduler-deletion-template-$VERSION.tmpl
233 |
234 | unset VERSION
235 | ```
236 |
237 | ## Contributing
238 |
239 | Contributions to this repository are always welcome and highly encouraged.
240 |
241 | See [CONTRIBUTING](docs/contributing.md) for more information on how to get started.
242 |
243 | ## License
244 |
245 | Apache 2.0 - See [LICENSE](LICENSE) for more information.
246 |
247 | *This is not an official Google product*
248 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | To report a suspected vulnerability, please contact
4 | exposure-notifications-feedback@google.com and include the steps to
5 | produce the vulnerability.
6 |
--------------------------------------------------------------------------------
/attestation/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
18 | 4.0.0
19 |
20 | com.google.exposurenotification.privateanalytics.ingestion
21 | enpa-ingestion
22 | ${revision}
23 | ../pom.xml
24 |
25 | com.google.exposurenotification.privateanalytics.ingestion
26 | enpa-ingestion-attestation
27 | jar
28 |
29 |
30 | ../src/main/java
31 | ../src/test/java
32 |
33 |
34 | org.apache.maven.plugins
35 | maven-compiler-plugin
36 | ${maven-compiler-plugin.version}
37 |
38 |
39 |
40 | com.google.auto.service
41 | auto-service
42 | ${auto-service.version}
43 |
44 |
45 |
46 | **/attestation/**/*.java
47 | **/generated-sources/**
48 |
49 |
50 | **/attestation/**
51 | **/DataShareTest.java
52 |
53 |
54 |
55 |
56 | org.codehaus.mojo
57 | build-helper-maven-plugin
58 | 3.0.0
59 |
60 |
61 | generate-sources
62 |
63 | add-source
64 |
65 |
66 |
67 |
68 | ../third_party/android-key-attestation/server/src/main
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 | com.github.os72
77 | protoc-jar-maven-plugin
78 | 3.11.4
79 |
80 |
81 | generate-sources
82 |
83 | run
84 |
85 |
86 | 3.13.0
87 |
88 | ../src/proto
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | com.google.exposurenotification.privateanalytics.ingestion
100 | enpa-ingestion-model
101 | ${revision}
102 |
103 |
104 | org.apache.beam
105 | beam-sdks-java-core
106 | ${beam.version}
107 |
108 |
109 | org.bouncycastle
110 | bcpkix-jdk15on
111 | 1.61
112 | compile
113 |
114 |
115 | org.apache.beam
116 | beam-runners-core-construction-java
117 | ${beam.version}
118 |
119 |
120 | org.apache.beam
121 | beam-runners-direct-java
122 | ${beam.version}
123 |
124 |
125 |
126 |
127 |
--------------------------------------------------------------------------------
/ci/FirestoreTesting.Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | FROM openjdk:17-jdk-alpine3.14
16 |
17 | RUN apk add --no-cache nodejs npm bash
18 | RUN npm i -g firebase-tools
19 | RUN firebase emulators:exec --only firestore "npm -v"
20 |
21 | WORKDIR /workspace
22 |
--------------------------------------------------------------------------------
/ci/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -eo pipefail
17 |
18 | ## Get the directory of the build script
19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}"))
20 | ## cd to the parent directory, i.e. the root of the git repo
21 | cd ${scriptDir}/..
22 |
23 | # include common functions
24 | source ${scriptDir}/common.sh
25 |
26 | # Print out Java version
27 | java -version
28 | echo ${JOB_TYPE}
29 |
30 | RETURN_CODE=0
31 | set +e
32 |
33 | git submodule update --init
34 |
35 | case ${JOB_TYPE} in
36 | test)
37 | ./mvnw test
38 | RETURN_CODE=$?
39 | ;;
40 | lint)
41 | ./mvnw com.coveo:fmt-maven-plugin:check
42 | RETURN_CODE=$?
43 | if [[ ${RETURN_CODE} != 0 ]]; then
44 | echo "To fix formatting errors, run: mvn com.coveo:fmt-maven-plugin:format"
45 | fi
46 | ;;
47 | esac
48 |
49 | echo "exiting with ${RETURN_CODE}"
50 | exit ${RETURN_CODE}
51 |
--------------------------------------------------------------------------------
/ci/common.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | function retry_with_backoff {
17 | attempts_left=$1
18 | sleep_seconds=$2
19 | shift 2
20 | command=$@
21 |
22 |
23 | # store current flag state
24 | flags=$-
25 |
26 | # allow a failures to continue
27 | set +e
28 | echo "${command}"
29 | ${command}
30 | exit_code=$?
31 |
32 | # restore "e" flag
33 | if [[ ${flags} =~ e ]]
34 | then set -e
35 | else set +e
36 | fi
37 |
38 | if [[ $exit_code == 0 ]]
39 | then
40 | return 0
41 | fi
42 |
43 | # failure
44 | if [[ ${attempts_left} > 0 ]]
45 | then
46 | echo "failure (${exit_code}), sleeping ${sleep_seconds}..."
47 | sleep ${sleep_seconds}
48 | new_attempts=$((${attempts_left} - 1))
49 | new_sleep=$((${sleep_seconds} * 2))
50 | retry_with_backoff ${new_attempts} ${new_sleep} ${command}
51 | fi
52 |
53 | return $exit_code
54 | }
55 |
56 | ## Helper functionss
57 | function now() { date +"%Y-%m-%d %H:%M:%S" | tr -d '\n'; }
58 | function msg() { println "$*" >&2; }
59 | function println() { printf '%s\n' "$(now) $*"; }
--------------------------------------------------------------------------------
/ci/dependencies.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -eo pipefail
17 |
18 | ## Get the directory of the build script
19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}"))
20 | ## cd to the parent directory, i.e. the root of the git repo
21 | cd ${scriptDir}/..
22 |
23 | # include common functions
24 | source ${scriptDir}/common.sh
25 |
26 | # Print out Java
27 | java -version
28 |
29 | export MAVEN_OPTS="-Xmx1024m -XX:MaxPermSize=128m"
30 |
31 | # this should run maven enforcer
32 | retry_with_backoff 3 10 \
33 | ./mvnw install -B -V \
34 | -DskipTests=true \
35 | -Dclirr.skip=true
36 |
37 | ./mvnw -B dependency:analyze
--------------------------------------------------------------------------------
/ci/firebase.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2021 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | ## Run Firebase integration tests
17 | echo "************ Running Firebase integration tests script ************"
18 | ## Get the directory of the build script
19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}"))
20 | ## cd to the parent directory, i.e. the root of the git repo
21 | cd ${scriptDir}/..
22 | cd config/firebase
23 |
24 | echo "************ Installing npm testing library and jest ************"
25 | npm init -y
26 | npm i @firebase/testing jest
27 | echo "************ Dependencies installed successfully! ************"
28 |
29 | echo "************ Executing rules.test.js ************"
30 | firebase emulators:exec --project=$PROJECT --only firestore "npm run test"
31 |
--------------------------------------------------------------------------------
/ci/generate-templates.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -eo pipefail
17 |
18 | ## Get the directory of the build script
19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}"))
20 | ## cd to the parent directory, i.e. the root of the git repo
21 | cd ${scriptDir}/..
22 |
23 | # include common functions
24 | source ${scriptDir}/common.sh
25 |
26 | apk add --update npm
27 |
28 | # Print out versions
29 | gsutil --version
30 | npm install -g json
31 | json --version
32 |
33 | export VERSION=$(git describe --tags --always --dirty=-dirty)
34 |
35 | # Generate Dataflow Flex Templates, version and upload to GCS
36 | json -f templates/dataflow-flex-template.json \
37 | -e "this.metadata=$(cat templates/dataflow-ingestion-metadata-template.json)" \
38 | -e "this.image='gcr.io/enpa-infra/ingestion-pipeline:$VERSION'" > ingestion-pipeline-$VERSION.json
39 |
40 | json -f templates/dataflow-flex-template.json \
41 | -e "this.metadata=$(cat templates/dataflow-deletion-metadata-template.json)" \
42 | -e "this.image='gcr.io/enpa-infra/deletion-pipeline:$VERSION'" > deletion-pipeline-$VERSION.json
43 |
44 | json -f templates/dataflow-flex-template.json \
45 | -e "this.metadata=$(cat templates/dataflow-ingestion-metadata-template.json)" \
46 | -e "this.image='gcr.io/enpa-public-assets/ingestion-pipeline:$VERSION'" > public-ingestion-pipeline-$VERSION.json
47 |
48 | json -f templates/dataflow-flex-template.json \
49 | -e "this.metadata=$(cat templates/dataflow-deletion-metadata-template.json)" \
50 | -e "this.image='gcr.io/enpa-public-assets/deletion-pipeline:$VERSION'" > public-deletion-pipeline-$VERSION.json
51 |
52 | gsutil cp ingestion-pipeline-$VERSION.json gs://enpa-pipeline-specs/
53 | gsutil cp deletion-pipeline-$VERSION.json gs://enpa-pipeline-specs/
54 | gsutil cp public-ingestion-pipeline-$VERSION.json gs://enpa-pipeline-specs/
55 | gsutil cp public-deletion-pipeline-$VERSION.json gs://enpa-pipeline-specs/
56 |
57 | # Version and upload scheduler templates to GCS
58 | gsutil -h "Content-Type:application/json" cp templates/scheduler-ingestion-template.tmpl gs://enpa-pipeline-specs/scheduler-ingestion-template-$VERSION.tmpl
59 | gsutil -h "Content-Type:application/json" cp templates/scheduler-deletion-template.tmpl gs://enpa-pipeline-specs/scheduler-deletion-template-$VERSION.tmpl
60 |
61 | # Version Firestore Security Rules and upload to GCS
62 | gsutil -h "Content-Type:text/plain" cp config/firebase/firestore.rules gs://enpa-infra/security-rules/firestore-$VERSION.rules
63 |
--------------------------------------------------------------------------------
/ci/integration.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -eo pipefail
17 |
18 | ## Get the directory of the build script
19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}"))
20 | ## cd to the parent directory, i.e. the root of the git repo
21 | cd ${scriptDir}/..
22 |
23 | # include common functions
24 | source ${scriptDir}/common.sh
25 |
26 | # Print out Java
27 | java -version
28 |
29 | RETURN_CODE=0
30 | set +e
31 |
32 | git submodule update --init
33 |
34 | retry_with_backoff 3 10 \
35 | ./mvnw clean verify
36 |
37 | # enable once we can write to the firewalled sonarqube instance
38 | #retry_with_backoff 3 10 \
39 | # ./mvnw -Pcoverage clean verify sonar:sonar -Dsonar.projectKey=enpa-ingestion -Dsonar.host.url=http://10.128.0.2:9000 -Dsonar.login=$SONAR_LOGIN
40 |
--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | steps:
16 | ###########################################################
17 | # Step 1: Retrieve the cached .m2 directory from GCS
18 | ###########################################################
19 | - name: gcr.io/cloud-builders/gsutil
20 | id: fetch-mvn-cache
21 | args:
22 | - '-m'
23 | - 'rsync'
24 | - '-r'
25 | - 'gs://${_BUCKET}/cache/.m2'
26 | - '/cache/.m2'
27 | volumes:
28 | - path: '/cache/.m2'
29 | name: 'm2_cache'
30 |
31 | ###########################################################
32 | # Step 2: Run project integration tests using Maven
33 | ###########################################################
34 | - name: gcr.io/cloud-devrel-public-resources/java11
35 | id: pr-integration-tests
36 | waitFor:
37 | - fetch-mvn-cache
38 | entrypoint: bash
39 | args:
40 | - "-e"
41 | - "./ci/integration.sh"
42 | volumes:
43 | - path: '/cache/.m2'
44 | name: 'm2_cache'
45 | env:
46 | - MAVEN_OPTS=-Dmaven.repo.local=/cache/.m2
47 | - PROJECT=enpa-integration-testing
48 | - KEY_RESOURCE_NAME=projects/enpa-integration-testing/locations/global/keyRings/appa-signature-key/cryptoKeys/appa-signature-key/cryptoKeyVersions/1
49 |
50 | ###########################################################
51 | # Step 3: Run Firebase integration tests using npm
52 | ###########################################################
53 | - name: gcr.io/enpa-public-assets/firestore-tesing:v1
54 | id: firebase-integration-tests
55 | waitFor:
56 | - '-'
57 | entrypoint: bash
58 | args:
59 | - "-e"
60 | - "./ci/firebase.sh"
61 | env:
62 | - PROJECT=enpa-integration-testing
63 |
64 | ###########################################################
65 | # Step 4: Update cached .m2 directory on GCS with any
66 | # additional dependencies downloaded during the
67 | # build.
68 | ###########################################################
69 | - name: gcr.io/cloud-builders/gsutil
70 | id: sync-mvn-cache
71 | waitFor:
72 | - pr-integration-tests
73 | args:
74 | - '-m'
75 | - 'rsync'
76 | - '-r'
77 | - '/cache/.m2'
78 | - 'gs://${_BUCKET}/cache/.m2/'
79 | volumes:
80 | - path: '/cache/.m2'
81 | name: 'm2_cache'
82 |
83 | serviceAccount: 'projects/enpa-infra/serviceAccounts/development-integration-tests@enpa-integration-testing.iam.gserviceaccount.com'
84 |
85 | substitutions:
86 | # Default value
87 | _BUCKET: 'enpa-infra'
88 |
89 | options:
90 | # Use higher CPU machines so the caching and build steps are faster.
91 | machineType: 'N1_HIGHCPU_32'
--------------------------------------------------------------------------------
/config/firebase/README.md:
--------------------------------------------------------------------------------
1 | # Firebase Config
2 |
3 | Install the [Firebase CLI](https://firebase.google.com/docs/cli).
4 |
5 | ## Firestore Security Rules
6 |
7 | This provides a sample way to configure Firestore documents for
8 | Exposure Notifications Private Analytics.
9 |
10 | ### Testing
11 |
12 | First install the emulator, npm testing library and jest:
13 |
14 | ```shell script
15 | npm install -g firebase-tools
16 | firebase setup:emulators:firestore
17 | npm init -y
18 | npm i @firebase/testing
19 | npm i jest
20 | ```
21 |
22 | Then start the emulator and execute the test script:
23 |
24 | ```shell script
25 | firebase emulators:exec --only firestore "npm run test"
26 | ```
27 |
28 | ### Deploying
29 |
30 | Login firebase as
31 | follows:
32 |
33 | ```shell script
34 | firebase login
35 | ```
36 |
37 | You can update your projects Firestore Security Policy with these rules as
38 | follows:
39 |
40 | ```shell script
41 | firebase deploy --only firestore:rules
42 | ```
43 |
--------------------------------------------------------------------------------
/config/firebase/firebase.json:
--------------------------------------------------------------------------------
1 | {
2 | "firestore": {
3 | "rules": "firestore.rules"
4 | },
5 | "emulators": {
6 | "firestore": {
7 | "host": "localhost",
8 | "port": "8080"
9 | },
10 | "ui": {
11 | "enabled": true,
12 | "host": "localhost",
13 | "port": 4000
14 | }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/config/firebase/firestore.rules:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | rules_version = '2';
15 | service cloud.firestore {
16 | match /databases/{database}/documents {
17 |
18 | // Limited rules execution environment and functions are constrained:
19 | // https://firebase.google.com/docs/rules/rules-language#function
20 | // https://firebase.google.com/docs/reference/rules/index-all
21 | // https://github.com/google/cel-spec/blob/master/doc/langdef.md
22 | function quickPad(datePart) {
23 | return datePart.size() == 1 ? "0" + datePart : datePart;
24 | }
25 |
26 | // Assemble YYYY-MM-DD-HH from timestamp
27 | function buildDatePath(t) {
28 | return string(t.year()) + '-'
29 | + quickPad(string(t.month())) + '-'
30 | + quickPad(string(t.day())) + '-'
31 | + quickPad(string(t.hours()))
32 | }
33 |
34 | // Check date path against timestamp, allowing for clock skew
35 | function checkDatePath(datePath, t) {
36 | return datePath == buildDatePath(t + duration.value(1, 'h'))
37 | || datePath == buildDatePath(t)
38 | || datePath == buildDatePath(t - duration.value(1, 'h'));
39 | }
40 |
41 | // There are no filter() or map() functions available to us, and recursion
42 | // is not allowed, so we have to unroll the check of the list manually.
43 | // https://groups.google.com/g/google-cloud-firestore-discuss/c/S9BqqUcR4Lc/m/4CRjqfMyBwAJ
44 | function checkCertLengths(chain) {
45 | return checkCertLength(chain, 1)
46 | && checkCertLength(chain, 2)
47 | && checkCertLength(chain, 3)
48 | && checkCertLength(chain, 4)
49 | && checkCertLength(chain, 5)
50 | && checkCertLength(chain, 6)
51 | && checkCertLength(chain, 7)
52 | && checkCertLength(chain, 8)
53 | && checkCertLength(chain, 9)
54 | && checkCertLength(chain, 10);
55 | }
56 |
57 | // CEL implementations aren't guaranteed to do short circuit evaluation of
58 | // logical operators, so we use conditional operators here to avoid out of
59 | // bounds exceptions/NPEs.
60 | // https://github.com/google/cel-spec/blob/master/doc/langdef.md#logical-operators
61 | // Fun times.
62 | function checkCertLength(chain, i) {
63 | return chain.size() > i ? chain[i].size() < 30000 : true;
64 | }
65 | function safeCheckSize(s, len) {
66 | return s != null ? s.size() < len : true;
67 | }
68 |
69 | // Check schema of uploaded document
70 | function checkFields(d) {
71 | return
72 | // Check top level required fields
73 | d.payload != null
74 | && d.signature != null
75 | && d.certificateChain != null
76 | // nothing extraneous at top level
77 | && d.keys().toSet().hasOnly(['certificateChain', 'signature', 'payload'])
78 | // Check `payload` required fields
79 | // `uuid`, `created` already enforced elsewhere
80 | && d.payload.schemaVersion != null
81 | && d.payload.encryptedDataShares != null
82 | // TODO: support arbitrary number of servers here
83 | && d.payload.encryptedDataShares.size() == 2
84 | && d.payload.encryptedDataShares[0].payload != null
85 | && d.payload.encryptedDataShares[1].payload != null
86 | // check sizes
87 | && d.signature.size() < 200
88 | && d.certificateChain.size() >= 3
89 | && d.certificateChain.size() <= 10
90 | && checkCertLengths(d.certificateChain)
91 | && d.payload.uuid.size() < 50
92 | && safeCheckSize(d.payload.encryptedDataShares[0].encryptionKeyId, 100)
93 | && safeCheckSize(d.payload.encryptedDataShares[1].encryptionKeyId, 100)
94 | && d.payload.encryptedDataShares[0].payload.size() < 100000
95 | && d.payload.encryptedDataShares[1].payload.size() < 100000
96 | // nothing extraneous at payload level
97 | && d.payload.keys().toSet().hasOnly([
98 | 'uuid', 'created', 'schemaVersion', 'encryptedDataShares', 'prioParams'
99 | ])
100 | // share per server
101 | && d.payload.encryptedDataShares.size() == d.payload.prioParams.numberServers
102 | // Check prioParams required fields
103 | && d.payload.prioParams != null
104 | && d.payload.prioParams.bins != null
105 | && d.payload.prioParams.epsilon != null
106 | && d.payload.prioParams.epsilon > 1
107 | && d.payload.prioParams.numberServers != null
108 | && d.payload.prioParams.prime != null
109 | // nothing extraneous at prioParams level
110 | && d.payload.prioParams.keys().toSet().hasOnly([
111 | 'bins', 'epsilon', 'hammingWeight', 'numberServers', 'prime'
112 | ]);
113 | }
114 |
115 | // Check metric name
116 | function checkMetricName(n) {
117 | return n in [
118 | // Metric for testing
119 | 'fakeMetric-v1',
120 | // Currently supported metrics:
121 | // https://github.com/google/exposure-notifications-android/tree/master/app/src/main/java/com/google/android/apps/exposurenotification/privateanalytics/metrics
122 | 'CodeVerified-v1',
123 | 'CodeVerifiedWithReportType14d-v1',
124 | 'CodeVerifiedWithReportType14d-v2',
125 | 'DateExposure-v1',
126 | 'DateExposure-v2',
127 | 'DateExposure14d-v3',
128 | 'histogramMetric-v1',
129 | 'histogramMetric-v2',
130 | 'KeysUploaded-v1',
131 | 'KeysUploadedVaccineStatus-v1',
132 | 'KeysUploadedVaccineStatus-v2',
133 | 'KeysUploadedVaccineStatus14d-v3',
134 | 'KeysUploadedWithReportType14d-v1',
135 | 'KeysUploadedWithReportType14d-v2',
136 | 'PeriodicExposureNotification-v1',
137 | 'PeriodicExposureNotification14d-v1',
138 | 'PeriodicExposureNotification14d-v2',
139 | 'PeriodicExposureNotification14d-v3',
140 | 'PeriodicExposureNotificationInteraction-v1',
141 | 'SecondaryAttack14d-v1',
142 | 'SecondaryAttack14d-v2'
143 | ];
144 | }
145 |
146 | match /{top}/{uuid}/{date}/{metricName} {
147 | allow create: if request.resource.data.payload.uuid == uuid
148 | && top.matches('uuid[0-9]*')
149 | && request.resource.data.payload.created == request.time
150 | // Don't ingest anything with auth tokens attached
151 | && request.auth == null
152 | && checkDatePath(date, request.resource.data.payload.created)
153 | && checkFields(request.resource.data)
154 | && checkMetricName(metricName);
155 | }
156 | }
157 | }
158 |
--------------------------------------------------------------------------------
/config/firebase/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "firebaseconfigstest",
3 | "version": "1.0.0",
4 | "description": "test suite for configurations",
5 | "main": "rules_test.js",
6 | "scripts": {
7 | "test": "jest --env=node --verbose --forceExit --runInBand"
8 | },
9 | "dependencies": {
10 | "@firebase/testing": "^0.20.11",
11 | "jest": "^26.6.3"
12 | },
13 | "keywords": [],
14 | "author": "",
15 | "license": "ISC",
16 | "devDependencies": {}
17 | }
18 |
--------------------------------------------------------------------------------
/config/firebase/rules.test.js:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // https://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | const firebase = require('@firebase/testing');
15 | const fs = require('fs');
16 | const path = require('path');
17 | const assert = require('assert');
18 |
19 | const projectId = "emulator-test-project"
20 | const adminApp = firebase.initializeAdminApp({ projectId: projectId });
21 |
22 | beforeAll(async () => {
23 | await firebase.loadFirestoreRules({
24 | projectId: projectId,
25 | rules: fs.readFileSync("firestore.rules", "utf8")
26 | });
27 | const doc = adminApp.firestore().collection('uuid').doc('preexisting')
28 | .collection('2020-09-03-13').doc('fakeMetric-v1');
29 | await doc.set({
30 | 'payload': {
31 | 'created': firebase.firestore.FieldValue.serverTimestamp(),
32 | 'uuid': 'preexisting',
33 | }
34 | });
35 | });
36 |
37 | function getPath(date) {
38 | return date.toISOString().split('T')[0] + "-"
39 | + date.toISOString().split('T')[1].split(':')[0];
40 | }
41 |
42 | function correctContents(uuid = 'foo') {
43 | return {
44 | 'payload': {
45 | 'created': firebase.firestore.FieldValue.serverTimestamp(),
46 | 'uuid': uuid,
47 | 'schemaVersion': '1',
48 | 'encryptedDataShares': [
49 | {
50 | 'payload': 'payload1',
51 | 'encryptionKeyId': 'key1'
52 | },
53 | {
54 | 'payload': 'payload2',
55 | 'encryptionKeyId': 'key2'
56 | }
57 | ],
58 | 'prioParams': {
59 | 'bins': 1,
60 | 'epsilon': 2,
61 | 'hammingWeight': 3,
62 | 'numberServers': 2,
63 | 'prime': 5
64 | }
65 | },
66 | 'certificateChain': ['cert1', 'cert2', 'cert3'],
67 | 'signature': 'sig'
68 | };
69 | }
70 |
71 | describe('Tests of document writes and access', () => {
72 | const app = firebase.initializeTestApp({
73 | projectId: projectId,
74 | auth: null
75 | });
76 | const db = app.firestore()
77 | const datefmt = getPath(new Date());
78 | it('document cannot be written at wrong path',
79 | async () => {
80 | const doc = db.collection('random').doc('wrongpath');
81 | await firebase.assertFails(doc.set(correctContents()));
82 | });
83 | it('document cannot be written without payload',
84 | async () => {
85 | const doc = db.collection('uuid').doc('nopayload')
86 | .collection(datefmt).doc('fakeMetric-v1');
87 | contents = correctContents('nopayload');
88 | delete contents['payload'];
89 | await firebase.assertFails(doc.set(contents));
90 | });
91 | it('document cannot be written without uuid',
92 | async () => {
93 | const doc = db.collection('uuid').doc('nouuidfield')
94 | .collection(datefmt).doc('fakeMetric-v1');
95 | contents = correctContents();
96 | delete contents['payload']['uuid'] ;
97 | await firebase.assertFails(doc.set(contents));
98 | });
99 | it('document cannot be written without created field',
100 | async () => {
101 | const doc = db.collection('uuid').doc('nocreated')
102 | .collection(datefmt).doc('fakeMetric-v1');
103 | contents = correctContents('nocreated');
104 | delete contents['payload']['created'];
105 | await firebase.assertFails(doc.set(contents));
106 | });
107 | it('document cannot be written with extraneous field',
108 | async () => {
109 | const doc = db.collection('uuid').doc('extraneous')
110 | .collection(datefmt).doc('fakeMetric-v1');
111 | contents = correctContents('extraneous');
112 | contents['payload']['prioParams']['banana'] = "extra field";
113 | await firebase.assertFails(doc.set(contents));
114 | });
115 | it('documents cannot be created at very old path',
116 | async () => {
117 | var oldDate = new Date();
118 | oldDate.setHours(oldDate.getHours() - 2);
119 | const doc = db.collection('uuid').doc('old')
120 | .collection(getPath(oldDate)).doc('fakeMetric-v1');
121 | await firebase.assertFails(doc.set(correctContents('old')));
122 | });
123 | it('documents cannot be created with very large uuids',
124 | async () => {
125 | longuuid = 'x'.repeat(1000);
126 | const doc = db.collection('uuid').doc(longuuid)
127 | .collection(datefmt).doc('fakeMetric-v1');
128 | contents = correctContents(longuuid);
129 | contents['payload']['uuid'] = longuuid;
130 | await firebase.assertFails(doc.set(contents));
131 | });
132 | it('documents cannot be created with very large signatures',
133 | async () => {
134 | const doc = db.collection('uuid').doc('longsig')
135 | .collection(datefmt).doc('fakeMetric-v1');
136 | contents = correctContents('longsig');
137 | contents['signature'] = 'x'.repeat(1000);
138 | await firebase.assertFails(doc.set(contents));
139 | });
140 | it('documents cannot be created with very long certificate chains',
141 | async () => {
142 | const doc = db.collection('uuid').doc('longchain')
143 | .collection(datefmt).doc('fakeMetric-v1');
144 | contents = correctContents('longchain');
145 | contents['certificateChain'] = Array(12).fill('cert')
146 | await firebase.assertFails(doc.set(contents));
147 | });
148 | it('documents cannot be created with a large certificate',
149 | async () => {
150 | const doc = db.collection('uuid').doc('longcert')
151 | .collection(datefmt).doc('fakeMetric-v1');
152 | contents = correctContents('longcert');
153 | contents['certificateChain'].push('x'.repeat(50000));
154 | await firebase.assertFails(doc.set(contents));
155 | });
156 | it('correct documents can be created',
157 | async () => {
158 | const doc = db.collection('uuid').doc('correct1')
159 | .collection(datefmt).doc('fakeMetric-v1');
160 | await firebase.assertSucceeds(doc.set(correctContents('correct1')));
161 | });
162 | it('documents can be created at slightly off path',
163 | async () => {
164 | var oldDate = new Date();
165 | oldDate.setHours(oldDate.getHours() - 1);
166 | const doc = db.collection('uuid').doc('correct2')
167 | .collection(getPath(oldDate)).doc('fakeMetric-v1');
168 | await firebase.assertSucceeds(doc.set(correctContents('correct2')));
169 | });
170 | it('documents can be created at sharded top level collection',
171 | async () => {
172 | var oldDate = new Date();
173 | oldDate.setHours(oldDate.getHours() - 1);
174 | const doc = db.collection('uuid24').doc('correct3')
175 | .collection(getPath(oldDate)).doc('fakeMetric-v1');
176 | await firebase.assertSucceeds(doc.set(correctContents('correct3')));
177 | });
178 | it('document cannot be deleted',
179 | async () => {
180 | const doc = db.collection('uuid').doc('preexisting')
181 | .collection('2020-09-03-13').doc('fakeMetric-v1');
182 | await firebase.assertFails(doc.delete());
183 | });
184 | it('document cannot be updated',
185 | async () => {
186 | const doc = db.collection('uuid').doc('preexisting')
187 | .collection('2020-09-03-13').doc('fakeMetric-v1');
188 | await firebase.assertFails(doc.update(correctContents('preexisting')));
189 | });
190 | it('document cannot be read',
191 | async () => {
192 | const doc = db.collection('uuid').doc('preexisting')
193 | .collection('2020-09-03-13').doc('fakeMetric-v1');
194 | await firebase.assertFails(doc.get());
195 | });
196 | it('check final state of firestore',
197 | async () => {
198 | const querySnapshot = await adminApp.firestore()
199 | .collectionGroup('uuid').get();
200 | foundUuids = []
201 | querySnapshot.forEach((doc) => {
202 | foundUuids.push(doc.data()['payload']['uuid']);
203 | });
204 | assert.notStrictEqual(foundUuids,
205 | [ 'correct1', 'correct2', 'correct3', 'preexisting' ])
206 | });
207 | });
208 |
--------------------------------------------------------------------------------
/container-build.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | steps:
16 | ###########################################################
17 | # Step 1: Retrieve the cached .m2 directory from GCS
18 | ###########################################################
19 | - name: 'gcr.io/cloud-builders/gsutil'
20 | id: fetch-mvn-cache
21 | args:
22 | - '-m'
23 | - 'rsync'
24 | - '-r'
25 | - 'gs://${_BUCKET}/cache/.m2'
26 | - '/cache/.m2'
27 | volumes:
28 | - path: '/cache/.m2'
29 | name: 'm2_cache'
30 |
31 | ###########################################################
32 | # Step 2: Download submodules
33 | ###########################################################
34 | - name: gcr.io/cloud-builders/git
35 | waitFor:
36 | - '-'
37 | id: update-submodules
38 | args: ['submodule', 'update', '--init']
39 |
40 | ###########################################################
41 | # Step 3: Retag cloned git repo hash
42 | ###########################################################
43 | - name: gcr.io/cloud-builders/git
44 | waitFor:
45 | - '-'
46 | id: recreate-tag
47 | args: ['tag', '$_TAG']
48 |
49 | ###########################################################
50 | # Step 4: Rebuild Dataflow Flex Template base image
51 | ###########################################################
52 | - name: gcr.io/cloud-builders/docker
53 | id: rebuild-dataflow-flex-base-image
54 | waitFor:
55 | - '-'
56 | args:
57 | - 'build'
58 | - '-t'
59 | - 'gcr.io/enpa-public-assets/distroless-flex-template-launcher:11'
60 | - 'df-flex-template-base-image'
61 |
62 | ###########################################################
63 | # Step 5: Publish Dataflow Flex Template base image
64 | ###########################################################
65 | - name: gcr.io/cloud-builders/docker
66 | id: push-dataflow-flex-base-image
67 | waitFor:
68 | - rebuild-dataflow-flex-base-image
69 | args:
70 | - 'push'
71 | - 'gcr.io/enpa-public-assets/distroless-flex-template-launcher:11'
72 |
73 | ###########################################################
74 | # Step 6: Package and Deploy to Github maven repo
75 | ###########################################################
76 | - name: gcr.io/cloud-devrel-public-resources/java11
77 | id: package-deploy-mvn-registry
78 | waitFor:
79 | - update-submodules
80 | - recreate-tag
81 | - fetch-mvn-cache
82 | entrypoint: bash
83 | args:
84 | - '-c'
85 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -U -s mvn-settings.xml -DskipTests -Drevision=$(${_VERSION}) deploy'
86 | env:
87 | - 'PROJECT=enpa-integration-testing'
88 | - 'KEY_RESOURCE_NAME=projects/enpa-integration-testing/locations/global/keyRings/appa-signature-key/cryptoKeys/appa-signature-key/cryptoKeyVersions/1'
89 | secretEnv: ['GITHUB_TOKEN']
90 |
91 | ###########################################################
92 | # Step 7: Build and Publish Public Ingestion Container Image
93 | ###########################################################
94 | - name: gcr.io/cloud-devrel-public-resources/java11
95 | id: public-ingestion-container-build
96 | waitFor:
97 | - update-submodules
98 | - recreate-tag
99 | - fetch-mvn-cache
100 | - push-dataflow-flex-base-image
101 | entrypoint: bash
102 | args:
103 | - '-c'
104 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -DskipTests -Pingestion-container-build -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-public-assets" -Drevision=$(${_VERSION}) package'
105 |
106 | ###########################################################
107 | # Step 8: Build and Publish Public Deletion Container Image
108 | ###########################################################
109 | - name: gcr.io/cloud-devrel-public-resources/java11
110 | id: public-deletion-container-build
111 | waitFor:
112 | - update-submodules
113 | - recreate-tag
114 | - fetch-mvn-cache
115 | - push-dataflow-flex-base-image
116 | entrypoint: bash
117 | args:
118 | - '-c'
119 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -DskipTests -Pdeletion-container-build -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-public-assets" -Drevision=$(${_VERSION}) package'
120 |
121 | ###########################################################
122 | # Step 9: Build and Publish Ingestion Container Image
123 | ###########################################################
124 | - name: gcr.io/cloud-devrel-public-resources/java11
125 | id: ingestion-container-build
126 | waitFor:
127 | - update-submodules
128 | - recreate-tag
129 | - fetch-mvn-cache
130 | - push-dataflow-flex-base-image
131 | entrypoint: bash
132 | args:
133 | - '-c'
134 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -s mvn-settings.xml -DskipTests -Pingestion-container-build,attestation -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-infra" -Drevision=$(${_VERSION}) package'
135 | secretEnv: ['GITHUB_TOKEN']
136 |
137 | ###########################################################
138 | # Step 10: Build and Publish Deletion Container Image
139 | ###########################################################
140 | - name: gcr.io/cloud-devrel-public-resources/java11
141 | id: deletion-container-build
142 | waitFor:
143 | - update-submodules
144 | - recreate-tag
145 | - fetch-mvn-cache
146 | - push-dataflow-flex-base-image
147 | entrypoint: bash
148 | args:
149 | - '-c'
150 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -s mvn-settings.xml -DskipTests -Pdeletion-container-build,attestation -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-infra" -Drevision=$(${_VERSION}) package'
151 | secretEnv: ['GITHUB_TOKEN']
152 |
153 | ###########################################################
154 | # Step 11: Generate Templates for Dataflow jobs and Firebase
155 | ###########################################################
156 | - name: gcr.io/google.com/cloudsdktool/cloud-sdk:alpine
157 | id: flex-template-generator-and-uploader
158 | waitFor:
159 | - recreate-tag
160 | args:
161 | - "./ci/generate-templates.sh"
162 | env:
163 | - PROJECT_ID=$PROJECT_ID
164 |
165 | ###########################################################
166 | # Step 12: Update cached .m2 directory on GCS with any
167 | # additional dependencies downloaded during the
168 | # build.
169 | ###########################################################
170 | - name: gcr.io/cloud-builders/gsutil
171 | id: sync-mvn-cache
172 | waitFor:
173 | - ingestion-container-build
174 | - deletion-container-build
175 | args:
176 | - '-m'
177 | - 'rsync'
178 | - '-r'
179 | - '/cache/.m2'
180 | - 'gs://${_BUCKET}/cache/.m2/'
181 | volumes:
182 | - path: '/cache/.m2'
183 | name: 'm2_cache'
184 |
185 | images:
186 | - 'gcr.io/enpa-public-assets/distroless-flex-template-launcher:11'
187 |
188 | substitutions:
189 | # Default value
190 | _BUCKET: 'enpa-infra'
191 | _DEFAULT_TAG: '${SHORT_SHA}-SNAPSHOT'
192 | _TAG: ${TAG_NAME:-$_DEFAULT_TAG} #default value will be SHORT_SHA-SNAPSHOT
193 | _VERSION: 'git describe --tags --always --dirty=-dirty'
194 |
195 | secrets:
196 | - kmsKeyName: projects/enpa-infra/locations/global/keyRings/cloudbuild-keyring/cryptoKeys/cloudbuild
197 | secretEnv:
198 | GITHUB_TOKEN: 'CiQAzNSb40phOg8+Rtn21yuiJuqJA3CKm5YWgigMwpA5lsM42NgSUQA/9gK92tb89IansK/cTpYuAJpf9PUZ7Lvse94FqFmk/mVULLISsoNr6+39npTZAG2el7cRQ22LozB5lwE9dZGywalT7xHxs46+nywy/ix8Qg=='
199 |
200 | options:
201 | # Use higher CPU machines so the caching and build steps are faster.
202 | machineType: 'N1_HIGHCPU_32'
203 |
204 | serviceAccount: 'projects/enpa-infra/serviceAccounts/development-integration-tests@enpa-integration-testing.iam.gserviceaccount.com'
--------------------------------------------------------------------------------
/df-flex-template-base-image/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | FROM gcr.io/distroless/java:11
16 |
17 | COPY java_template_launcher /opt/google/dataflow/java_template_launcher
18 |
--------------------------------------------------------------------------------
/df-flex-template-base-image/java_template_launcher:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/exposure-notifications-private-analytics-ingestion/e4cb1ba2529cc055f68cb0895b2785919ee2ab11/df-flex-template-base-image/java_template_launcher
--------------------------------------------------------------------------------
/docs/code-of-conduct.md:
--------------------------------------------------------------------------------
1 | # Google Open Source Community Guidelines
2 |
3 | At Google, we recognize and celebrate the creativity and collaboration of open
4 | source contributors and the diversity of skills, experiences, cultures, and
5 | opinions they bring to the projects and communities they participate in.
6 |
7 | Every one of Google's open source projects and communities are inclusive
8 | environments, based on treating all individuals respectfully, regardless of
9 | gender identity and expression, sexual orientation, disabilities,
10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race,
11 | age, religion, or similar personal characteristic.
12 |
13 | We value diverse opinions, but we value respectful behavior more.
14 |
15 | Respectful behavior includes:
16 |
17 | * Being considerate, kind, constructive, and helpful.
18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or
19 | physically threatening behavior, speech, and imagery.
20 | * Not engaging in unwanted physical contact.
21 |
22 | Some Google open source projects [may adopt][] an explicit project code of
23 | conduct, which may have additional detailed expectations for participants. Most
24 | of those projects will use our [modified Contributor Covenant][].
25 |
26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct
27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/
28 |
29 | ## Resolve peacefully
30 |
31 | We do not believe that all conflict is necessarily bad; healthy debate and
32 | disagreement often yields positive results. However, it is never okay to be
33 | disrespectful.
34 |
35 | If you see someone behaving disrespectfully, you are encouraged to address the
36 | behavior directly with those involved. Many issues can be resolved quickly and
37 | easily, and this gives people more control over the outcome of their dispute.
38 | If you are unable to resolve the matter for any reason, or if the behavior is
39 | threatening or harassing, report it. We are dedicated to providing an
40 | environment where participants feel welcome and safe.
41 |
42 | ## Reporting problems
43 |
44 | Some Google open source projects may adopt a project-specific code of conduct.
45 | In those cases, a Google employee will be identified as the Project Steward,
46 | who will receive and handle reports of code of conduct violations. In the event
47 | that a project hasn’t identified a Project Steward, you can report problems by
48 | emailing opensource@google.com.
49 |
50 | We will investigate every complaint, but you may not receive a direct response.
51 | We will use our discretion in determining when and how to follow up on reported
52 | incidents, which may range from not taking action to permanent expulsion from
53 | the project and project-sponsored spaces. We will notify the accused of the
54 | report and provide them an opportunity to discuss it before any action is
55 | taken. The identity of the reporter will be omitted from the details of the
56 | report supplied to the accused. In potentially harmful situations, such as
57 | ongoing harassment or threats to anyone's safety, we may take action without
58 | notice.
59 |
60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also
61 | be found at .*
62 |
63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct
64 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google/conduct/).
29 |
--------------------------------------------------------------------------------
/logging.properties:
--------------------------------------------------------------------------------
1 | handlers=java.util.logging.ConsoleHandler
2 | .level=ALL
3 |
--------------------------------------------------------------------------------
/model/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
20 | 4.0.0
21 |
22 | com.google.exposurenotification.privateanalytics.ingestion
23 | enpa-ingestion
24 | ${revision}
25 | ../pom.xml
26 |
27 | com.google.exposurenotification.privateanalytics.ingestion
28 | enpa-ingestion-model
29 | jar
30 |
31 |
32 | ../src/main/java
33 | ../src/test/java
34 |
35 |
36 | org.apache.maven.plugins
37 | maven-compiler-plugin
38 | ${maven-compiler-plugin.version}
39 |
40 |
41 |
42 | com.google.auto.value
43 | auto-value
44 | ${auto-value.version}
45 |
46 |
47 | **/model/**/*.java
48 | **/model/**
49 |
50 |
51 |
52 | org.apache.maven.plugins
53 | maven-jar-plugin
54 | 3.2.0
55 |
56 |
57 |
58 | test-jar
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | com.google.cloud
69 | google-cloud-firestore
70 | 2.0.0
71 |
72 |
73 |
74 | com.google.firebase
75 | firebase-admin
76 | 7.0.0
77 |
78 |
79 |
80 | org.apache.beam
81 | beam-sdks-java-core
82 | ${beam.version}
83 |
84 |
85 |
86 |
87 |
--------------------------------------------------------------------------------
/mvn-settings.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | github
5 |
6 |
7 |
8 |
9 | github
10 |
11 |
12 | central
13 | https://repo1.maven.org/maven2
14 |
15 | true
16 |
17 |
18 |
19 | github-public
20 | ENPA Public Apache Maven Packages
21 | https://maven.pkg.github.com/google/exposure-notifications-private-analytics-ingestion
22 |
23 | true
24 |
25 |
26 |
27 | github-private
28 | ENPA Private Apache Maven Packages
29 | https://maven.pkg.github.com/googleprivate/enpa-ingestion-infra
30 |
31 | true
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | github-public
41 | x-access-token
42 | ${env.GITHUB_TOKEN}
43 |
44 |
45 | github-private
46 | x-access-token
47 | ${env.GITHUB_TOKEN}
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/mvnw.cmd:
--------------------------------------------------------------------------------
1 | @REM ----------------------------------------------------------------------------
2 | @REM Licensed to the Apache Software Foundation (ASF) under one
3 | @REM or more contributor license agreements. See the NOTICE file
4 | @REM distributed with this work for additional information
5 | @REM regarding copyright ownership. The ASF licenses this file
6 | @REM to you under the Apache License, Version 2.0 (the
7 | @REM "License"); you may not use this file except in compliance
8 | @REM with the License. You may obtain a copy of the License at
9 | @REM
10 | @REM https://www.apache.org/licenses/LICENSE-2.0
11 | @REM
12 | @REM Unless required by applicable law or agreed to in writing,
13 | @REM software distributed under the License is distributed on an
14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | @REM KIND, either express or implied. See the License for the
16 | @REM specific language governing permissions and limitations
17 | @REM under the License.
18 | @REM ----------------------------------------------------------------------------
19 |
20 | @REM ----------------------------------------------------------------------------
21 | @REM Maven Start Up Batch script
22 | @REM
23 | @REM Required ENV vars:
24 | @REM JAVA_HOME - location of a JDK home dir
25 | @REM
26 | @REM Optional ENV vars
27 | @REM M2_HOME - location of maven2's installed home dir
28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
31 | @REM e.g. to debug Maven itself, use
32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
34 | @REM ----------------------------------------------------------------------------
35 |
36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
37 | @echo off
38 | @REM set title of command window
39 | title %0
40 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
41 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
42 |
43 | @REM set %HOME% to equivalent of $HOME
44 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
45 |
46 | @REM Execute a user defined script before this one
47 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
48 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending
49 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
50 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
51 | :skipRcPre
52 |
53 | @setlocal
54 |
55 | set ERROR_CODE=0
56 |
57 | @REM To isolate internal variables from possible post scripts, we use another setlocal
58 | @setlocal
59 |
60 | @REM ==== START VALIDATION ====
61 | if not "%JAVA_HOME%" == "" goto OkJHome
62 |
63 | echo.
64 | echo Error: JAVA_HOME not found in your environment. >&2
65 | echo Please set the JAVA_HOME variable in your environment to match the >&2
66 | echo location of your Java installation. >&2
67 | echo.
68 | goto error
69 |
70 | :OkJHome
71 | if exist "%JAVA_HOME%\bin\java.exe" goto init
72 |
73 | echo.
74 | echo Error: JAVA_HOME is set to an invalid directory. >&2
75 | echo JAVA_HOME = "%JAVA_HOME%" >&2
76 | echo Please set the JAVA_HOME variable in your environment to match the >&2
77 | echo location of your Java installation. >&2
78 | echo.
79 | goto error
80 |
81 | @REM ==== END VALIDATION ====
82 |
83 | :init
84 |
85 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
86 | @REM Fallback to current working directory if not found.
87 |
88 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
89 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
90 |
91 | set EXEC_DIR=%CD%
92 | set WDIR=%EXEC_DIR%
93 | :findBaseDir
94 | IF EXIST "%WDIR%"\.mvn goto baseDirFound
95 | cd ..
96 | IF "%WDIR%"=="%CD%" goto baseDirNotFound
97 | set WDIR=%CD%
98 | goto findBaseDir
99 |
100 | :baseDirFound
101 | set MAVEN_PROJECTBASEDIR=%WDIR%
102 | cd "%EXEC_DIR%"
103 | goto endDetectBaseDir
104 |
105 | :baseDirNotFound
106 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
107 | cd "%EXEC_DIR%"
108 |
109 | :endDetectBaseDir
110 |
111 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
112 |
113 | @setlocal EnableExtensions EnableDelayedExpansion
114 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
115 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
116 |
117 | :endReadAdditionalConfig
118 |
119 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
120 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
121 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
122 |
123 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
124 |
125 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
126 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B
127 | )
128 |
129 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
130 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data.
131 | if exist %WRAPPER_JAR% (
132 | if "%MVNW_VERBOSE%" == "true" (
133 | echo Found %WRAPPER_JAR%
134 | )
135 | ) else (
136 | if not "%MVNW_REPOURL%" == "" (
137 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
138 | )
139 | if "%MVNW_VERBOSE%" == "true" (
140 | echo Couldn't find %WRAPPER_JAR%, downloading it ...
141 | echo Downloading from: %DOWNLOAD_URL%
142 | )
143 |
144 | powershell -Command "&{"^
145 | "$webclient = new-object System.Net.WebClient;"^
146 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
147 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
148 | "}"^
149 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^
150 | "}"
151 | if "%MVNW_VERBOSE%" == "true" (
152 | echo Finished downloading %WRAPPER_JAR%
153 | )
154 | )
155 | @REM End of extension
156 |
157 | @REM Provide a "standardized" way to retrieve the CLI args that will
158 | @REM work with both Windows and non-Windows executions.
159 | set MAVEN_CMD_LINE_ARGS=%*
160 |
161 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
162 | if ERRORLEVEL 1 goto error
163 | goto end
164 |
165 | :error
166 | set ERROR_CODE=1
167 |
168 | :end
169 | @endlocal & set ERROR_CODE=%ERROR_CODE%
170 |
171 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
172 | @REM check for post script, once with legacy .bat ending and once with .cmd ending
173 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
174 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
175 | :skipRcPost
176 |
177 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
178 | if "%MAVEN_BATCH_PAUSE%" == "on" pause
179 |
180 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
181 |
182 | exit /B %ERROR_CODE%
183 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
20 | 4.0.0
21 |
22 | com.google.exposurenotification.privateanalytics.ingestion
23 | enpa-ingestion
24 | ${revision}
25 |
26 | pom
27 |
28 |
29 | model
30 | attestation
31 | pipeline
32 |
33 |
34 |
35 | 20210413_RC01-SNAPSHOT
36 | change.me/changme
37 | 20210426_01
38 | 11
39 | 11
40 | 2.34.0
41 | 2.1
42 | 2.10.2
43 | 2.10.5
44 | 4.13.1
45 | 3.7.0
46 | 1.6.0
47 | 3.0.2
48 | 3.1.0
49 | 3.0.0
50 | 1.7.25
51 | 2.4.6
52 | 2.8.5
53 | 2.22.0
54 | ${project.version}
55 | 1.0-rc7
56 | 1.7.4
57 |
58 |
59 |
60 |
61 |
62 | org.codehaus.mojo
63 | flatten-maven-plugin
64 | 1.2.2
65 |
66 | true
67 |
68 |
69 |
70 | flatten
71 | process-resources
72 |
73 | flatten
74 |
75 |
76 |
77 | flatten.clean
78 | clean
79 |
80 | clean
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 | org.codehaus.mojo
90 | exec-maven-plugin
91 | ${maven-exec-plugin.version}
92 |
93 | true
94 | java
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | com.google.cloud
105 | libraries-bom
106 | 22.0.0
107 | pom
108 | import
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | com.google.guava
117 | guava
118 | [30.0-jre,)
119 |
120 |
121 | joda-time
122 | joda-time
123 | ${joda.version}
124 |
125 |
126 | org.slf4j
127 | slf4j-api
128 | ${slf4j.version}
129 |
130 |
131 | org.slf4j
132 | slf4j-jdk14
133 | ${slf4j.version}
134 | runtime
135 |
136 |
137 | org.hamcrest
138 | hamcrest-core
139 | ${hamcrest.version}
140 |
141 |
142 | org.hamcrest
143 | hamcrest-library
144 | ${hamcrest.version}
145 |
146 |
147 | junit
148 | junit
149 | ${junit.version}
150 | test
151 |
152 |
153 | com.google.auto.value
154 | auto-value
155 | ${auto-value.version}
156 | provided
157 |
158 |
159 | com.google.auto.service
160 | auto-service-annotations
161 | ${auto-service.version}
162 |
163 |
164 | org.mockito
165 | mockito-all
166 | 1.10.19
167 | test
168 |
169 |
170 | com.google.truth
171 | truth
172 | 1.0.1
173 | test
174 |
175 |
176 | com.google.protobuf
177 | protobuf-java
178 | 3.16.1
179 |
180 |
181 | org.apache.avro
182 | avro
183 | 1.10.0
184 |
185 |
186 | org.apache.avro
187 | avro-compiler
188 | 1.10.0
189 |
190 |
191 | org.apache.avro
192 | avro-maven-plugin
193 | 1.10.0
194 |
195 |
196 | com.google.code.gson
197 | gson
198 | 2.8.5
199 | compile
200 |
201 |
202 | com.google.truth.extensions
203 | truth-java8-extension
204 | 1.0.1
205 | test
206 |
207 |
208 |
209 |
210 |
211 | github-public
212 | GitHub Packages
213 | https://maven.pkg.github.com/google/exposure-notifications-private-analytics-ingestion
214 |
215 |
216 |
217 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/attestation/AbstractDeviceAttestation.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.attestation;
17 |
18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
19 | import org.apache.beam.sdk.metrics.Counter;
20 | import org.apache.beam.sdk.metrics.Metrics;
21 | import org.apache.beam.sdk.options.PipelineOptions;
22 | import org.apache.beam.sdk.transforms.PTransform;
23 | import org.apache.beam.sdk.values.PCollection;
24 |
25 | /**
26 | * Class to extend to implement some form of check that data originated from a real device.
27 | *
28 | *
We use a PTransform rather than a Filter to allow flexibility in implementations. E.g., an
29 | * implementation might want to compute various distributions over the set of data coming in rather
30 | * than make a strictly local decision as to whether a given DataShare is attested.
31 | */
32 | public abstract class AbstractDeviceAttestation
33 | extends PTransform, PCollection> {
34 |
35 | // Counters for the number of elements processed and eventually accepted.
36 | protected static final Counter processedCounter =
37 | Metrics.counter(AbstractDeviceAttestation.class, "processed");
38 | protected static final Counter acceptedCounter =
39 | Metrics.counter(AbstractDeviceAttestation.class, "accepted");
40 |
41 | /** @return a non-null class object if the attestation has pipeline options to be registered */
42 | public abstract Class extends PipelineOptions> getOptionsClass();
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/AWSFederatedAuthHelper.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.amazonaws.auth.*;
19 | import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
20 | import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
21 | import com.amazonaws.services.securitytoken.model.AssumeRoleWithWebIdentityRequest;
22 | import com.amazonaws.services.securitytoken.model.AssumeRoleWithWebIdentityResult;
23 | import com.amazonaws.services.securitytoken.model.Credentials;
24 | import com.google.auth.oauth2.GoogleCredentials;
25 | import com.google.auth.oauth2.IdTokenProvider;
26 | import java.io.IOException;
27 |
28 | public class AWSFederatedAuthHelper {
29 |
30 | private AWSFederatedAuthHelper() {}
31 |
32 | public static void setupAWSAuth(IngestionPipelineOptions options, String role, String region)
33 | throws IOException {
34 | GoogleCredentials credentials = GoogleCredentials.getApplicationDefault();
35 | if (!(credentials instanceof IdTokenProvider)) {
36 | throw new IllegalArgumentException("Credentials are not an instance of IdTokenProvider.");
37 | }
38 |
39 | AWSSecurityTokenService stsClient =
40 | AWSSecurityTokenServiceClientBuilder.standard()
41 | .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials()))
42 | .withRegion(region)
43 | .build();
44 |
45 | /**
46 | * Obtain credentials for the IAM role. Note that you cannot assume the role of an AWS root
47 | * account. Amazon S3 will deny access. You must use credentials for an IAM user or an IAM role.
48 | */
49 | AssumeRoleWithWebIdentityRequest roleRequest =
50 | new AssumeRoleWithWebIdentityRequest()
51 | .withRoleArn(role)
52 | .withRoleSessionName("enpa-gcp-aws-session")
53 | .withWebIdentityToken(
54 | ((IdTokenProvider) credentials)
55 | .idTokenWithAudience("enpa-gcp-aws", null)
56 | .getTokenValue());
57 |
58 | AssumeRoleWithWebIdentityResult roleResponse = stsClient.assumeRoleWithWebIdentity(roleRequest);
59 | Credentials sessionCredentials = roleResponse.getCredentials();
60 |
61 | // Create a BasicSessionCredentials object that contains the credentials you just retrieved.
62 | BasicSessionCredentials awsCredentials =
63 | new BasicSessionCredentials(
64 | sessionCredentials.getAccessKeyId(),
65 | sessionCredentials.getSecretAccessKey(),
66 | sessionCredentials.getSessionToken());
67 |
68 | options.setAwsCredentialsProvider(new AWSStaticCredentialsProvider(awsCredentials));
69 | options.setAwsRegion(region);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/BatchWriterFn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.google.cloud.kms.v1.AsymmetricSignResponse;
19 | import com.google.cloud.kms.v1.CryptoKeyVersionName;
20 | import com.google.cloud.kms.v1.Digest;
21 | import com.google.cloud.kms.v1.KeyManagementServiceClient;
22 | import com.google.common.collect.ImmutableList;
23 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
24 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata;
25 | import com.google.protobuf.ByteString;
26 | import java.io.IOException;
27 | import java.nio.ByteBuffer;
28 | import java.nio.channels.WritableByteChannel;
29 | import java.security.MessageDigest;
30 | import java.security.NoSuchAlgorithmException;
31 | import java.time.Clock;
32 | import java.time.Duration;
33 | import java.time.Instant;
34 | import java.time.ZoneOffset;
35 | import java.time.format.DateTimeFormatter;
36 | import java.util.ArrayList;
37 | import java.util.List;
38 | import java.util.UUID;
39 | import java.util.concurrent.TimeUnit;
40 | import org.abetterinternet.prio.v1.PrioBatchSignature;
41 | import org.abetterinternet.prio.v1.PrioDataSharePacket;
42 | import org.abetterinternet.prio.v1.PrioIngestionHeader;
43 | import org.apache.beam.sdk.io.FileSystems;
44 | import org.apache.beam.sdk.io.fs.ResourceId;
45 | import org.apache.beam.sdk.metrics.Counter;
46 | import org.apache.beam.sdk.metrics.Metrics;
47 | import org.apache.beam.sdk.transforms.DoFn;
48 | import org.apache.beam.sdk.util.MimeTypes;
49 | import org.apache.beam.sdk.values.KV;
50 | import org.slf4j.Logger;
51 | import org.slf4j.LoggerFactory;
52 |
53 | /** Function to write files (header, data records, signature) for a batch of {@link DataShare} */
54 | public class BatchWriterFn extends DoFn>, Void> {
55 |
56 | public static final String INGESTION_HEADER_SUFFIX = ".batch";
57 | public static final String DATASHARE_PACKET_SUFFIX = ".batch.avro";
58 | public static final String HEADER_SIGNATURE_SUFFIX = ".batch.sig";
59 |
60 | private static final Logger LOG = LoggerFactory.getLogger(BatchWriterFn.class);
61 | private static final Duration KMS_WAIT_TIME = Duration.ofSeconds(30);
62 | private static final DateTimeFormatter DATE_TIME_FORMATTER =
63 | DateTimeFormatter.ofPattern("/yyyy/MM/dd/HH/mm/");
64 |
65 | private static final Counter dataSharesInBatch =
66 | Metrics.counter(BatchWriterFn.class, "dataSharesInBatch");
67 |
68 | private static final Counter failedDataShares =
69 | Metrics.counter(BatchWriterFn.class, "failedDataShares");
70 |
71 | private static final Counter batchesProcessed =
72 | Metrics.counter(BatchWriterFn.class, "batchesProcessed");
73 |
74 | private static final Counter successfulBatches =
75 | Metrics.counter(BatchWriterFn.class, "successfulBatches");
76 |
77 | private static final Counter failedBatches =
78 | Metrics.counter(BatchWriterFn.class, "failedBatches");
79 |
80 | private transient KeyManagementServiceClient client;
81 | private transient CryptoKeyVersionName keyVersionName;
82 |
83 | // Uses pipeline options, otherwise could've lived in @Setup
84 | @StartBundle
85 | public void startBundle(StartBundleContext context) throws IOException {
86 | client = KeyManagementServiceClient.create();
87 | IngestionPipelineOptions options =
88 | context.getPipelineOptions().as(IngestionPipelineOptions.class);
89 | keyVersionName = CryptoKeyVersionName.parse(options.getKeyResourceName());
90 | }
91 |
92 | @FinishBundle
93 | public void finishBundle() {
94 | client.shutdown();
95 | LOG.info("Waiting for KMS Client to shutdown.");
96 | try {
97 | client.awaitTermination(KMS_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS);
98 | } catch (InterruptedException e) {
99 | LOG.warn("Interrupted while waiting for client shutdown", e);
100 | Thread.currentThread().interrupt();
101 | }
102 | }
103 |
104 | @ProcessElement
105 | public void processElement(ProcessContext c) {
106 | IngestionPipelineOptions options = c.getPipelineOptions().as(IngestionPipelineOptions.class);
107 |
108 | String phaPrefix = options.getPhaOutput();
109 | String facilitatorPrefix = options.getFacilitatorOutput();
110 |
111 | long startTime =
112 | IngestionPipelineOptions.calculatePipelineStart(
113 | options.getStartTime(), options.getDuration(), 1, Clock.systemUTC());
114 | long duration = options.getDuration();
115 |
116 | KV> input = c.element();
117 | DataShareMetadata metadata = input.getKey();
118 | batchesProcessed.inc();
119 | LOG.info("Processing batch: {}", metadata);
120 | // batch size explicitly chosen so that these lists fit in memory on a single worker
121 | List phaPackets = new ArrayList<>();
122 | List facilitatorPackets = new ArrayList<>();
123 | for (DataShare dataShare : input.getValue()) {
124 | List split = PrioSerializationHelper.splitPackets(dataShare);
125 | if (split.size() != DataShare.NUMBER_OF_SERVERS) {
126 | // Checks exist to discard such data shares prior to reaching this point.
127 | throw new IllegalArgumentException(
128 | "Share split into more than hardcoded number of servers");
129 | }
130 | // First packet always goes to PHA
131 | phaPackets.add(split.get(0));
132 | facilitatorPackets.add(split.get(1));
133 | }
134 |
135 | String date =
136 | Instant.ofEpochSecond(startTime + duration)
137 | .atOffset(ZoneOffset.UTC)
138 | .format(DATE_TIME_FORMATTER);
139 | String aggregateId = metadata.getMetricName() + date;
140 | // In case of dataflow runner retries, its useful to make the batch UUID deterministic so
141 | // that files that may already have been written are overwritten, instead of new files created.
142 | byte[] seed = (aggregateId + metadata.getBatchId()).getBytes();
143 | UUID batchId = UUID.nameUUIDFromBytes(seed);
144 | String phaFilePath =
145 | phaPrefix + ((phaPrefix.endsWith("/")) ? "" : "/") + aggregateId + batchId.toString();
146 | String facilitatorPath =
147 | facilitatorPrefix
148 | + ((facilitatorPrefix.endsWith("/")) ? "" : "/")
149 | + aggregateId
150 | + batchId.toString();
151 |
152 | try {
153 | // Write to PHA Output Destination
154 | LOG.info("PHA Output: {}", phaFilePath);
155 | writeBatch(
156 | options,
157 | startTime,
158 | duration,
159 | metadata,
160 | batchId,
161 | phaFilePath,
162 | phaPackets,
163 | options.getPhaAwsBucketRole(),
164 | options.getPhaAwsBucketRegion());
165 |
166 | // Write to Facilitator Output Destination
167 | LOG.info("Facilitator Output: {}", facilitatorPath);
168 | writeBatch(
169 | options,
170 | startTime,
171 | duration,
172 | metadata,
173 | batchId,
174 | facilitatorPath,
175 | facilitatorPackets,
176 | options.getFacilitatorAwsBucketRole(),
177 | options.getFacilitatorAwsBucketRegion());
178 |
179 | successfulBatches.inc();
180 | dataSharesInBatch.inc(phaPackets.size());
181 | } catch (IOException | NoSuchAlgorithmException e) {
182 | LOG.error("Unable to serialize Packet/Header/Sig file for PHA or facilitator", e);
183 | failedBatches.inc();
184 | failedDataShares.inc(phaPackets.size());
185 | }
186 | }
187 |
188 | /** Writes the triplet of files defined per batch of data shares (packet file, header, and sig) */
189 | private void writeBatch(
190 | IngestionPipelineOptions options,
191 | long startTime,
192 | long duration,
193 | DataShareMetadata metadata,
194 | UUID uuid,
195 | String filenamePrefix,
196 | List packets,
197 | String awsBucketRole,
198 | String awsBucketRegion)
199 | throws IOException, NoSuchAlgorithmException {
200 |
201 | if (filenamePrefix.startsWith("s3://")) {
202 | AWSFederatedAuthHelper.setupAWSAuth(options, awsBucketRole, awsBucketRegion);
203 | FileSystems.setDefaultPipelineOptions(options);
204 | }
205 | // write PrioDataSharePackets in this batch to file
206 | ByteBuffer packetsByteBuffer =
207 | PrioSerializationHelper.serializeRecords(
208 | packets, PrioDataSharePacket.class, PrioDataSharePacket.getClassSchema());
209 | writeToFile(filenamePrefix + DATASHARE_PACKET_SUFFIX, packetsByteBuffer);
210 |
211 | MessageDigest sha256 = MessageDigest.getInstance("SHA-256");
212 | byte[] packetsBytesHashDigest = sha256.digest(packetsByteBuffer.array());
213 | // create Header and write to file
214 | PrioIngestionHeader header =
215 | PrioSerializationHelper.createHeader(
216 | metadata, packetsBytesHashDigest, uuid, startTime, duration);
217 |
218 | ByteBuffer headerBytes =
219 | PrioSerializationHelper.serializeRecords(
220 | ImmutableList.of(header),
221 | PrioIngestionHeader.class,
222 | PrioIngestionHeader.getClassSchema());
223 | writeToFile(filenamePrefix + INGESTION_HEADER_SUFFIX, headerBytes);
224 |
225 | byte[] hashHeader = sha256.digest(headerBytes.array());
226 | Digest digestHeader = Digest.newBuilder().setSha256(ByteString.copyFrom(hashHeader)).build();
227 |
228 | AsymmetricSignResponse result = client.asymmetricSign(keyVersionName, digestHeader);
229 | PrioBatchSignature signature =
230 | PrioBatchSignature.newBuilder()
231 | .setBatchHeaderSignature(result.getSignature().asReadOnlyByteBuffer())
232 | .setKeyIdentifier(keyVersionName.toString())
233 | .build();
234 | ByteBuffer signatureBytes =
235 | PrioSerializationHelper.serializeRecords(
236 | ImmutableList.of(signature),
237 | PrioBatchSignature.class,
238 | PrioBatchSignature.getClassSchema());
239 | writeToFile(filenamePrefix + HEADER_SIGNATURE_SUFFIX, signatureBytes);
240 | }
241 |
242 | static void writeToFile(String filename, ByteBuffer contents) throws IOException {
243 | LOG.info("Writing output file: {}", filename);
244 | ResourceId resourceId = FileSystems.matchNewResource(filename, false);
245 | try (WritableByteChannel out = FileSystems.create(resourceId, MimeTypes.TEXT)) {
246 | out.write(contents);
247 | }
248 | }
249 | }
250 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DataProcessorManifest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
18 |
19 | import com.google.gson.JsonObject;
20 | import com.google.gson.JsonParser;
21 | import java.io.IOException;
22 | import java.io.InputStreamReader;
23 | import java.net.URL;
24 |
25 | /**
26 | * Encapsulation of the specific manifest for a PHA or Facilitator data processor.
27 | *
28 | * See
29 | * https://docs.google.com/document/d/1MdfM3QT63ISU70l63bwzTrxr93Z7Tv7EDjLfammzo6Q/edit#bookmark=id.8skgn5yx33ae
30 | * https://github.com/abetterinternet/prio-server/blob/main/manifest-updater/manifest/types.go
31 | */
32 | public class DataProcessorManifest {
33 |
34 | private static final String AWS_BUCKET_PREFIX = "s3://";
35 |
36 | private static final String INGESTION_BUCKET = "ingestion-bucket";
37 | private static final String INGESTION_IDENTITY = "ingestion-identity";
38 |
39 | private final String manifestUrl;
40 |
41 | private String bucket;
42 |
43 | private String awsBucketRegion;
44 |
45 | private String awsBucketName;
46 |
47 | private String awsRole;
48 |
49 | private boolean isAwsBucket;
50 |
51 | public DataProcessorManifest(String manifestUrl) {
52 | this.manifestUrl = manifestUrl;
53 | this.isAwsBucket = false;
54 | if (!"".equals(manifestUrl)) {
55 | init();
56 | }
57 | }
58 |
59 | public String getIngestionBucket() {
60 | return bucket;
61 | }
62 |
63 | public String getAwsBucketRegion() {
64 | return awsBucketRegion;
65 | }
66 |
67 | public String getAwsBucketName() {
68 | return awsBucketName;
69 | }
70 |
71 | public String getAwsRole() {
72 | return awsRole;
73 | }
74 |
75 | public boolean isAwsBucket() {
76 | return isAwsBucket;
77 | }
78 |
79 | private void init() {
80 | try {
81 | JsonObject manifestJson = fetchAndParseJson();
82 | bucket = manifestJson.get(INGESTION_BUCKET).getAsString();
83 |
84 | if (bucket.startsWith(AWS_BUCKET_PREFIX)) {
85 | isAwsBucket = true;
86 | String bucketInfo = bucket.substring(AWS_BUCKET_PREFIX.length());
87 | String[] regionName = bucketInfo.split("/");
88 | if (regionName.length != 2) {
89 | throw new IllegalArgumentException(
90 | "Ingestion bucket not in correct format of {AWS region}/{name}");
91 | }
92 |
93 | awsBucketRegion = regionName[0];
94 | awsBucketName = regionName[1];
95 | if (manifestJson.get(INGESTION_IDENTITY) == null) {
96 | throw new IllegalArgumentException(
97 | "Ingestion identity must be specified with AWS buckets");
98 | } else {
99 | awsRole = manifestJson.get(INGESTION_IDENTITY).getAsString();
100 | }
101 | }
102 |
103 | } catch (IOException e) {
104 | throw new ManifestProcessingRuntimeException("Unable to fetch and parse manifest", e);
105 | }
106 | }
107 |
108 | private JsonObject fetchAndParseJson() throws IOException {
109 | URL url = new URL(manifestUrl);
110 | InputStreamReader manifestReader = new InputStreamReader(url.openStream());
111 | return new JsonParser().parse(manifestReader).getAsJsonObject();
112 | }
113 |
114 | protected class ManifestProcessingRuntimeException extends RuntimeException {
115 |
116 | public ManifestProcessingRuntimeException(String message, Throwable cause) {
117 | super(message, cause);
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DateFilterFn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
19 | import java.time.Clock;
20 | import java.util.HashMap;
21 | import java.util.Map;
22 | import org.apache.beam.sdk.metrics.Counter;
23 | import org.apache.beam.sdk.metrics.Metrics;
24 | import org.apache.beam.sdk.transforms.DoFn;
25 | import org.slf4j.Logger;
26 | import org.slf4j.LoggerFactory;
27 |
28 | /** A DoFn that filters data shares in a particular time window */
29 | public class DateFilterFn extends DoFn {
30 |
31 | private static final Logger LOG = LoggerFactory.getLogger(DateFilterFn.class);
32 |
33 | private final Map dateFilterIncluded = new HashMap<>();
34 | private final Map dateFilterExcluded = new HashMap<>();
35 |
36 | @ProcessElement
37 | public void processElement(ProcessContext c) {
38 | String metricName = c.element().getDataShareMetadata().getMetricName();
39 | if (!dateFilterIncluded.containsKey(metricName)) {
40 | dateFilterIncluded.put(
41 | metricName, Metrics.counter(DateFilterFn.class, "dateFilterIncluded_" + metricName));
42 | dateFilterExcluded.put(
43 | metricName, Metrics.counter(DateFilterFn.class, "dateFilterExcluded_" + metricName));
44 | }
45 |
46 | if (c.element().getCreatedMs() == null || c.element().getCreatedMs() == 0) {
47 | LOG.warn("Skipping document with no creation timestamp: {}", c.element().getPath());
48 | return;
49 | }
50 | IngestionPipelineOptions options = c.getPipelineOptions().as(IngestionPipelineOptions.class);
51 |
52 | long startTime =
53 | IngestionPipelineOptions.calculatePipelineStart(
54 | options.getStartTime(), options.getDuration(), 1, Clock.systemUTC());
55 | long duration = options.getDuration();
56 |
57 | if (c.element().getCreatedMs() >= startTime * 1000
58 | && c.element().getCreatedMs() < (startTime + duration) * 1000) {
59 | LOG.debug("Included: {}", c.element());
60 | dateFilterIncluded.get(metricName).inc();
61 | c.output(c.element());
62 | } else {
63 | LOG.trace("Excluded: {}", c.element());
64 | dateFilterExcluded.get(metricName).inc();
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DeletionPipeline.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
18 |
19 | import com.google.exposurenotification.privateanalytics.ingestion.pipeline.FirestoreConnector.FirestorePartitionQueryCreation;
20 | import com.google.firestore.v1.RunQueryResponse;
21 | import com.google.firestore.v1.Write;
22 | import java.time.Clock;
23 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
24 | import org.apache.beam.sdk.Pipeline;
25 | import org.apache.beam.sdk.PipelineResult;
26 | import org.apache.beam.sdk.io.gcp.firestore.FirestoreIO;
27 | import org.apache.beam.sdk.io.gcp.firestore.RpcQosOptions;
28 | import org.apache.beam.sdk.metrics.MetricResults;
29 | import org.apache.beam.sdk.options.PipelineOptionsFactory;
30 | import org.apache.beam.sdk.transforms.MapElements;
31 | import org.apache.beam.sdk.transforms.SimpleFunction;
32 | import org.slf4j.Logger;
33 | import org.slf4j.LoggerFactory;
34 |
35 | /** Pipeline to delete processed data shares from Firestore. */
36 | public class DeletionPipeline {
37 |
38 | private static final Logger LOG = LoggerFactory.getLogger(DeletionPipeline.class);
39 |
40 | static void buildDeletionPipeline(IngestionPipelineOptions options, Pipeline pipeline) {
41 | DataflowPipelineOptions dataflowPipelineOptions = options.as(DataflowPipelineOptions.class);
42 | RpcQosOptions.Builder rpcQosOptionsBuilder = RpcQosOptions.newBuilder();
43 | int maxNumWorkers = dataflowPipelineOptions.getMaxNumWorkers();
44 | if (maxNumWorkers > 0) {
45 | rpcQosOptionsBuilder.withHintMaxNumWorkers(maxNumWorkers);
46 | }
47 | long startTime =
48 | IngestionPipelineOptions.calculatePipelineStart(
49 | options.getStartTime(), options.getDuration(), 2, Clock.systemUTC());
50 | pipeline
51 | .apply(new FirestorePartitionQueryCreation(startTime))
52 | .apply(FirestoreIO.v1().read().partitionQuery().withNameOnlyQuery().build())
53 | .apply(FirestoreIO.v1().read().runQuery().build())
54 | .apply(FirestoreConnector.filterRunQueryResponseHasDocument())
55 | .apply(
56 | MapElements.via(
57 | new SimpleFunction() {
58 | @Override
59 | public Write apply(RunQueryResponse input) {
60 | return Write.newBuilder().setDelete(input.getDocument().getName()).build();
61 | }
62 | }))
63 | .apply(
64 | FirestoreIO.v1()
65 | .write()
66 | .batchWrite()
67 | .withRpcQosOptions(rpcQosOptionsBuilder.build())
68 | .build());
69 | }
70 |
71 | public static void main(String[] args) {
72 | PipelineOptionsFactory.register(IngestionPipelineOptions.class);
73 | IngestionPipelineOptions options =
74 | PipelineOptionsFactory.fromArgs(args).withValidation().as(IngestionPipelineOptions.class);
75 | try {
76 | Pipeline pipeline = Pipeline.create(options);
77 | buildDeletionPipeline(options, pipeline);
78 | PipelineResult result = pipeline.run();
79 | result.waitUntilFinish();
80 | MetricResults metrics = result.metrics();
81 | LOG.info("Metrics:\n\n{}", metrics);
82 | } catch (UnsupportedOperationException ignore) {
83 | // Known issue that this can throw when generating a template:
84 | // https://issues.apache.org/jira/browse/BEAM-9337
85 | } catch (Exception e) {
86 | LOG.error("Exception thrown during pipeline run.", e);
87 | }
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/FirestoreConnector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.google.firestore.v1.DatabaseRootName;
19 | import com.google.firestore.v1.PartitionQueryRequest;
20 | import com.google.firestore.v1.RunQueryResponse;
21 | import com.google.firestore.v1.StructuredQuery;
22 | import com.google.firestore.v1.StructuredQuery.CollectionSelector;
23 | import com.google.firestore.v1.StructuredQuery.Direction;
24 | import com.google.firestore.v1.StructuredQuery.FieldReference;
25 | import com.google.firestore.v1.StructuredQuery.Order;
26 | import java.net.InetAddress;
27 | import java.net.UnknownHostException;
28 | import java.time.Duration;
29 | import java.time.temporal.ChronoUnit;
30 | import java.util.ArrayList;
31 | import java.util.List;
32 | import java.util.Locale;
33 | import org.apache.beam.sdk.metrics.Counter;
34 | import org.apache.beam.sdk.metrics.Metrics;
35 | import org.apache.beam.sdk.transforms.Create;
36 | import org.apache.beam.sdk.transforms.DoFn;
37 | import org.apache.beam.sdk.transforms.Filter;
38 | import org.apache.beam.sdk.transforms.PTransform;
39 | import org.apache.beam.sdk.transforms.ParDo;
40 | import org.apache.beam.sdk.transforms.SimpleFunction;
41 | import org.apache.beam.sdk.values.PBegin;
42 | import org.apache.beam.sdk.values.PCollection;
43 | import org.slf4j.Logger;
44 | import org.slf4j.LoggerFactory;
45 | import org.threeten.bp.LocalDateTime;
46 | import org.threeten.bp.ZoneOffset;
47 | import org.threeten.bp.format.DateTimeFormatter;
48 |
49 | /**
50 | * Primitive beam connector for Firestore specific to this application.
51 | *
52 | * For a general purpose connector see https://issues.apache.org/jira/browse/BEAM-8376
53 | */
54 | public class FirestoreConnector {
55 |
56 | private static final Logger LOG = LoggerFactory.getLogger(FirestoreConnector.class);
57 |
58 | private static final long SECONDS_IN_HOUR = Duration.of(1, ChronoUnit.HOURS).getSeconds();
59 |
60 | // Order must be name ascending. Right now, this is the only ordering that the
61 | // Firestore SDK supports.
62 | private static final String NAME_FIELD = "__name__";
63 |
64 | private static final Counter queriesGenerated =
65 | Metrics.counter(FirestoreConnector.class, "queriesGenerated");
66 |
67 | /**
68 | * PTransfrom which will generate the necessary PartitionQueryRequests for processing documents
69 | */
70 | public static final class FirestorePartitionQueryCreation
71 | extends PTransform> {
72 | private final long start;
73 |
74 | public FirestorePartitionQueryCreation(long start) {
75 | this.start = start;
76 | }
77 |
78 | @Override
79 | public PCollection expand(PBegin input) {
80 | IngestionPipelineOptions options =
81 | (IngestionPipelineOptions) input.getPipeline().getOptions();
82 | LOG.info("Using start time in seconds of {}", start);
83 | long backwardHours = options.getGraceHoursBackwards();
84 | // To correctly compute how many hours forward we need to look at, when including the
85 | // duration, we need to compute:
86 | // ceil ( forwardHours + durationInSeconds / 3600 )
87 | // Because Java division rounds down, we compute it as:
88 | // forwardHours + ( duration + 3599 ) / 3600.
89 | long forwardHours =
90 | options.getGraceHoursForwards()
91 | + (options.getDuration() + (SECONDS_IN_HOUR - 1)) / SECONDS_IN_HOUR;
92 | LOG.info(
93 | "{} Querying Firestore for documents in date range: {} to {}.",
94 | getLogPrefix(),
95 | formatDateTime(start - backwardHours * SECONDS_IN_HOUR),
96 | formatDateTime(start + forwardHours * SECONDS_IN_HOUR));
97 |
98 | return input
99 | .apply("Begin", Create.of(generateQueries(start, backwardHours, forwardHours)))
100 | .apply(
101 | "Create PartitionQuery",
102 | ParDo.of(
103 | new DoFn() {
104 | @ProcessElement
105 | public void processElement(ProcessContext context) {
106 | IngestionPipelineOptions options =
107 | context.getPipelineOptions().as(IngestionPipelineOptions.class);
108 | String path =
109 | "".equals(options.getFirestoreProject())
110 | ? getParentPath(options.getProject())
111 | : getParentPath(options.getFirestoreProject());
112 | PartitionQueryRequest request =
113 | PartitionQueryRequest.newBuilder()
114 | .setPartitionCount(options.getPartitionCount())
115 | .setParent(path)
116 | .setStructuredQuery(context.element())
117 | .build();
118 | context.output(request);
119 | }
120 | }));
121 | }
122 | }
123 |
124 | /**
125 | * If a query has zero results, a {@link RunQueryResponse} without a document will still be
126 | * returned. Provide a filter which can be used to filter to only {@code RunQueryResponse} which
127 | * have documents.
128 | */
129 | static Filter filterRunQueryResponseHasDocument() {
130 | return Filter.by(
131 | new SimpleFunction() {
132 | @Override
133 | public Boolean apply(RunQueryResponse input) {
134 | return input.hasDocument();
135 | }
136 | });
137 | }
138 |
139 | private static Iterable generateQueries(
140 | long startTime, long backwardHours, long forwardHours) {
141 | List structuredQueries = new ArrayList<>();
142 | // Each document in Firestore is stored under a Date collection with the format:
143 | // yyyy-MM-dd-HH.
144 | // To query all documents uploaded around startTime within the specified window, construct
145 | // a query for each hour within the window: [startTime - backwardHours, startTime +
146 | // forwardHours].
147 | for (long i = (-1 * backwardHours); i <= forwardHours; i++) {
148 | long timeToQuery = startTime + i * SECONDS_IN_HOUR;
149 | // Reformat the date to mirror the format of documents in Firestore: yyyy-MM-dd-HH.
150 | String formattedDateTime = formatDateTime(timeToQuery);
151 | // Construct and output query.
152 | StructuredQuery query =
153 | StructuredQuery.newBuilder()
154 | .addFrom(
155 | CollectionSelector.newBuilder()
156 | .setCollectionId(formattedDateTime)
157 | .setAllDescendants(true)
158 | .build())
159 | .addOrderBy(
160 | Order.newBuilder()
161 | .setField(FieldReference.newBuilder().setFieldPath(NAME_FIELD).build())
162 | .setDirection(Direction.ASCENDING)
163 | .build())
164 | .build();
165 | structuredQueries.add(query);
166 | queriesGenerated.inc();
167 | }
168 | LOG.info("{} Generated {} Firestore queries.", getLogPrefix(), structuredQueries.size());
169 | return structuredQueries;
170 | }
171 |
172 | private static String getParentPath(String projectId) {
173 | return DatabaseRootName.format(projectId, "(default)") + "/documents";
174 | }
175 |
176 | // Formats a time given in epoch seconds in the format: yyyy-MM-dd-HH
177 | public static String formatDateTime(Long time) {
178 | LocalDateTime dateTimeToQuery = LocalDateTime.ofEpochSecond(time, 0, ZoneOffset.UTC);
179 | // Reformat the date to mirror the format of documents in Firestore: yyyy-MM-dd-HH.
180 | DateTimeFormatter formatter =
181 | DateTimeFormatter.ofPattern("yyyy-MM-dd-HH", Locale.US).withZone(ZoneOffset.UTC);
182 | return formatter.format(dateTimeToQuery);
183 | }
184 |
185 | // TODO: use org.slf4j.MDC (mapped diagnostic content) or something cooler here
186 | private static String getLogPrefix() {
187 | String host = "unknown";
188 | try {
189 | InetAddress address = InetAddress.getLocalHost();
190 | host = address.getHostName();
191 | } catch (UnknownHostException ignore) {
192 | }
193 | return "["
194 | + host
195 | + "|"
196 | + ProcessHandle.current().pid()
197 | + "|"
198 | + Thread.currentThread().getName()
199 | + "] - ";
200 | }
201 | }
202 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/IngestionPipelineOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.amazonaws.auth.AWSCredentialsProvider;
19 | import java.time.Clock;
20 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
21 | import org.apache.beam.sdk.io.aws.options.AwsOptions;
22 | import org.apache.beam.sdk.options.Default;
23 | import org.apache.beam.sdk.options.Description;
24 |
25 | /** Specific options for the pipeline. */
26 | public interface IngestionPipelineOptions extends DataflowPipelineOptions {
27 |
28 | int UNSPECIFIED_START = -1;
29 |
30 | /** Firestore Project */
31 | @Description("Firestore Project")
32 | @Default.String("")
33 | String getFirestoreProject();
34 |
35 | void setFirestoreProject(String value);
36 |
37 | /** PHA Manifest file URL. */
38 | @Description("PHA Manifest file URL")
39 | @Default.String("")
40 | String getPhaManifestURL();
41 |
42 | void setPhaManifestURL(String value);
43 |
44 | /** PHA AWS bucket region. */
45 | @Description("PHA AWS bucket region")
46 | @Default.String("")
47 | String getPhaAwsBucketRegion();
48 |
49 | void setPhaAwsBucketRegion(String value);
50 |
51 | /** PHA AWS bucket name. */
52 | @Description("PHA AWS bucket name")
53 | @Default.String("")
54 | String getPhaAwsBucketName();
55 |
56 | void setPhaAwsBucketName(String value);
57 |
58 | /** PHA AWS bucket role. */
59 | @Description("PHA AWS bucket role")
60 | @Default.String("")
61 | String getPhaAwsBucketRole();
62 |
63 | void setPhaAwsBucketRole(String value);
64 |
65 | /**
66 | * Directory to place output files for PHA. If the directory does not exist, then it will
67 | * automatically be created.
68 | *
69 | * If set, this flag overrides an output location set in the PHA manifest file.
70 | */
71 | @Description(
72 | "Directory to place output files for PHA (Should end in 2-letter state abbreviation).")
73 | @Default.String("")
74 | String getPhaOutput();
75 |
76 | void setPhaOutput(String value);
77 |
78 | /** Facilitator Manifest file URL. */
79 | @Description("Facilitator Manifest file URL")
80 | @Default.String("")
81 | String getFacilitatorManifestURL();
82 |
83 | void setFacilitatorManifestURL(String value);
84 |
85 | /** Facilitator AWS bucket region. */
86 | @Description("Facilitator AWS bucket region")
87 | @Default.String("")
88 | String getFacilitatorAwsBucketRegion();
89 |
90 | void setFacilitatorAwsBucketRegion(String value);
91 |
92 | /** Facilitator AWS bucket name. */
93 | @Description("Facilitator AWS bucket name")
94 | @Default.String("")
95 | String getFacilitatorAwsBucketName();
96 |
97 | void setFacilitatorAwsBucketName(String value);
98 |
99 | /** Facilitator AWS bucket role. */
100 | @Description("Facilitator AWS bucket role")
101 | @Default.String("")
102 | String getFacilitatorAwsBucketRole();
103 |
104 | void setFacilitatorAwsBucketRole(String value);
105 |
106 | /**
107 | * Directory to place output files for Facilitator. If the directory does not exist, then it will
108 | * automatically be created.
109 | *
110 | *
If set, this flag overrides an output location set in the Facilitator manifest file.
111 | */
112 | @Description(
113 | "Directory to place output files for Facilitator. (Should end in 2-letter state"
114 | + " abbreviation)")
115 | @Default.String("")
116 | String getFacilitatorOutput();
117 |
118 | void setFacilitatorOutput(String value);
119 |
120 | /**
121 | * Start time of window to process. Used to filter documents that have been read from Firestore on
122 | * the "Creation" field. Defaults to current time rounded down to previous alignment period based
123 | * on the duration.
124 | */
125 | @Description("Start time in UTC seconds of documents to process")
126 | @Default.Long(UNSPECIFIED_START)
127 | Long getStartTime();
128 |
129 | void setStartTime(Long value);
130 |
131 | /**
132 | * Duration of time window to process. Used to filter documents that have been read from Firestore
133 | * on the "Creation" field.
134 | */
135 | @Description("Duration of window in seconds")
136 | @Default.Long(3600)
137 | Long getDuration();
138 |
139 | void setDuration(Long value);
140 |
141 | /**
142 | * Hours to look before startTime when querying Firestore collection. Used to construct document
143 | * path for Firestore reads.
144 | */
145 | @Description(
146 | "Hours to read backwards from startTime. Used to construct document path for Firestore"
147 | + " reads.")
148 | @Default.Long(1)
149 | Long getGraceHoursBackwards();
150 |
151 | void setGraceHoursBackwards(Long value);
152 |
153 | /**
154 | * Hours to look before startTime when querying Firestore. Used to construct document path for
155 | * Firestore reads.
156 | */
157 | @Description(
158 | "Hours to read forward from startTime. Used to construct document path for Firestore"
159 | + " reads.")
160 | @Default.Long(1)
161 | Long getGraceHoursForwards();
162 |
163 | void setGraceHoursForwards(Long value);
164 |
165 | /** Maximum number of query partitions to create for running Firestore read. */
166 | @Description("Maximum number of partitions to create for Firestore query.")
167 | @Default.Long(20)
168 | Long getPartitionCount();
169 |
170 | void setPartitionCount(Long value);
171 |
172 | /** Batch size of individual files. */
173 | @Description("Batch size of individual files.")
174 | @Default.Long(200000)
175 | Long getBatchSize();
176 |
177 | void setBatchSize(Long value);
178 |
179 | /** Batch size of Firestore batch deletes. */
180 | @Description("Batch size of Firestore deletes.")
181 | @Default.Long(100)
182 | Long getDeleteBatchSize();
183 |
184 | void setDeleteBatchSize(Long value);
185 |
186 | /**
187 | * Signing key resource name. See https://cloud.google.com/kms/docs/resource-hierarchy E.g.,
188 | * projects/$PROJECT_NAME/locations/global/keyRings/$RING/cryptoKeys/$KEY/cryptoKeyVersions/$VERSION
189 | */
190 | @Description("KMS resource name for signature generation")
191 | @Default.String("")
192 | String getKeyResourceName();
193 |
194 | void setKeyResourceName(String value);
195 |
196 | /** Whether to check device hardware attestations */
197 | @Description("Verify device attestations")
198 | @Default.Boolean(true)
199 | Boolean getDeviceAttestation();
200 |
201 | void setDeviceAttestation(Boolean value);
202 |
203 | @Description("AWS region used by the AWS client")
204 | String getAwsRegion();
205 |
206 | void setAwsRegion(String value);
207 |
208 | @Description(
209 | "The credential instance that should be used to authenticate against AWS services. The option value must contain \"@type\" field and an AWS Credentials Provider class name as the field value. Refer to DefaultAWSCredentialsProviderChain Javadoc for usage help. For example, to specify the AWS key ID and secret, specify the following: {\"@type\": \"AWSStaticCredentialsProvider\", \"awsAccessKeyId\":\"\", \"awsSecretKey\":\"\"}")
210 | @Default.InstanceFactory(AwsOptions.AwsUserCredentialsFactory.class)
211 | AWSCredentialsProvider getAwsCredentialsProvider();
212 |
213 | void setAwsCredentialsProvider(AWSCredentialsProvider value);
214 |
215 | /**
216 | * @return {@code startTime} from options/flags if set. Otherwise, rounds current time down to
217 | * start of {@code numWindows} windows back of length {@code duration} option/flag.
218 | */
219 | static long calculatePipelineStart(long start, long duration, int numWindows, Clock clock) {
220 | if (start != UNSPECIFIED_START) {
221 | return start;
222 | }
223 | return ((clock.instant().getEpochSecond() / duration) - numWindows) * duration;
224 | }
225 | }
226 |
--------------------------------------------------------------------------------
/src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/PrioSerializationHelper.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
19 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata;
20 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.EncryptedShare;
21 | import java.io.ByteArrayOutputStream;
22 | import java.io.File;
23 | import java.io.IOException;
24 | import java.lang.reflect.InvocationTargetException;
25 | import java.nio.ByteBuffer;
26 | import java.util.ArrayList;
27 | import java.util.List;
28 | import java.util.UUID;
29 | import org.abetterinternet.prio.v1.PrioDataSharePacket;
30 | import org.abetterinternet.prio.v1.PrioIngestionHeader;
31 | import org.apache.avro.Schema;
32 | import org.apache.avro.file.DataFileReader;
33 | import org.apache.avro.file.DataFileWriter;
34 | import org.apache.avro.io.DatumReader;
35 | import org.apache.avro.io.DatumWriter;
36 | import org.apache.avro.specific.SpecificDatumReader;
37 | import org.apache.avro.specific.SpecificDatumWriter;
38 | import org.apache.avro.specific.SpecificRecordBase;
39 | import org.apache.avro.util.Utf8;
40 | import org.slf4j.Logger;
41 | import org.slf4j.LoggerFactory;
42 |
43 | /**
44 | * Helpers for serializing and deserializing Prio data shares into (or from) the Apache Avro file
45 | * format.
46 | */
47 | public class PrioSerializationHelper {
48 |
49 | private PrioSerializationHelper() {}
50 |
51 | private static final Logger LOG = LoggerFactory.getLogger(PrioSerializationHelper.class);
52 |
53 | public static ByteBuffer serializeRecords(
54 | List records, Class recordClass, Schema schema) throws IOException {
55 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
56 | DatumWriter dataShareDatumWriter = new SpecificDatumWriter<>(recordClass);
57 | try (DataFileWriter dataFileWriter = new DataFileWriter<>(dataShareDatumWriter)) {
58 | dataFileWriter.create(schema, outputStream);
59 |
60 | for (T record : records) {
61 | dataFileWriter.append(record);
62 | }
63 |
64 | dataFileWriter.flush();
65 | dataFileWriter.close();
66 | }
67 | return ByteBuffer.wrap(outputStream.toByteArray());
68 | }
69 |
70 | public static List deserializeRecords(
71 | Class recordClass, String pathname)
72 | throws IOException, IllegalAccessException, InstantiationException {
73 | DatumReader datumReader = new SpecificDatumReader<>(recordClass);
74 | List results = new ArrayList<>();
75 | try (DataFileReader dataFileReader = new DataFileReader<>(new File(pathname), datumReader)) {
76 | T record;
77 | while (dataFileReader.hasNext()) {
78 | try {
79 | record = recordClass.getDeclaredConstructor().newInstance();
80 | record = dataFileReader.next(record);
81 | results.add(record);
82 | } catch (InvocationTargetException | NoSuchMethodException e) {
83 | LOG.error("PrioSerializationHelper Record instance creation error:", e);
84 | }
85 | }
86 | }
87 | return results;
88 | }
89 |
90 | public static PrioIngestionHeader createHeader(
91 | DataShareMetadata metadata, byte[] digest, UUID uuid, long startTime, long duration) {
92 | return PrioIngestionHeader.newBuilder()
93 | .setBatchUuid(new Utf8(uuid.toString()))
94 | .setName(new Utf8(metadata.getMetricName()))
95 | .setBatchStartTime(startTime)
96 | .setBatchEndTime(startTime + duration)
97 | .setNumberOfServers(metadata.getNumberOfServers())
98 | .setBins(metadata.getBins())
99 | .setHammingWeight(metadata.getHammingWeight())
100 | .setPrime(metadata.getPrime())
101 | .setEpsilon(metadata.getEpsilon())
102 | .setPacketFileDigest(ByteBuffer.wrap(digest))
103 | .build();
104 | }
105 |
106 | public static List splitPackets(DataShare dataShare) {
107 | List encryptedDataShares = dataShare.getEncryptedDataShares();
108 | List splitDataShares = new ArrayList<>();
109 | for (EncryptedShare encryptedShare : encryptedDataShares) {
110 | splitDataShares.add(
111 | PrioDataSharePacket.newBuilder()
112 | .setEncryptedPayload(ByteBuffer.wrap(encryptedShare.getEncryptedPayload()))
113 | .setEncryptionKeyId(null)
114 | .setRPit(dataShare.getRPit())
115 | .setUuid(dataShare.getUuid())
116 | .setVersionConfiguration(null)
117 | .setDeviceNonce(null)
118 | .build());
119 | }
120 | return splitDataShares;
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/src/proto/analytics.proto:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | syntax = "proto2";
16 |
17 | package com.google.exposurenotification.privateanalytics.ingestion;
18 |
19 | option java_multiple_files = true;
20 |
21 | // Parameters for the Prio algorithm https://crypto.stanford.edu/prio/
22 | message PrioAlgorithmParameters {
23 | // Number of bins for this aggregation.
24 | optional int32 bins = 1;
25 |
26 | optional double epsilon = 2;
27 |
28 | // The value of prime p used in this aggregation. The prime needs to be
29 | // congruent to 1 modulo 2 * next_power_two(bins + 1) to support Fourier
30 | // transform.
31 | // The aggregation will be performed modulo this prime number. Usually, the
32 | // is chosen large enough so that the aggregation happens over the integers.
33 | //
34 | optional uint64 prime = 3;
35 | // The number of servers that will be involved in the aggregation.
36 | //
37 | // Currently, the library only supports two servers.
38 | optional int32 number_servers = 4;
39 |
40 | // If specified, the Hamming weight of the vector will be verified during the
41 | // validity check on the server. Furthermore, the differential privacy noise
42 | // be selected by replacing the input with a random "hamming_weight"-hot
43 | // vector with probability 1/(1+e^epsilon).
44 | // If not specified, the input will be randomized element-wise.
45 | optional int32 hamming_weight = 5;
46 | }
47 |
48 | message ResponseStatus {
49 | // NEXT ID: 4
50 | enum StatusCode {
51 | // Operation experienced an unknown failure
52 | UNKNOWN_FAILURE = 0;
53 |
54 | // Operation finished successfully
55 | OK = 1;
56 |
57 | // Operation was cancelled
58 | CANCELLED_FAILURE = 2;
59 |
60 | // Operation failed due to an invalid parameter error
61 | INVALID_PARAMETER_FAILURE = 3;
62 | }
63 |
64 | // The status code of the response
65 | optional StatusCode status_code = 1 [default = UNKNOWN_FAILURE];
66 |
67 | // Detailed error message
68 | optional string error_details = 2;
69 | }
70 |
71 | // Response message for packet creation
72 | message CreatePacketsResponse {
73 | // Status of the response
74 | optional ResponseStatus response_status = 1;
75 |
76 | // A list of datashares to be dispatched to server(s).
77 | repeated bytes shares = 2;
78 | }
79 |
80 | // Parameters for packet creation
81 | message CreatePacketsParameters {
82 | // Parameters for Prio.
83 | optional PrioAlgorithmParameters prio_parameters = 1;
84 |
85 | // Public keys of the servers, represented as a Base64 encoded string.
86 | repeated string public_keys = 2;
87 |
88 | // Input data. Represented as an array of binary bits in an uint32 array.
89 | repeated uint32 data_bits = 3;
90 | }
91 |
92 | message Payload {
93 | optional string uuid = 1;
94 | optional string created = 2 [deprecated = true];
95 | optional PrioAlgorithmParameters prio_params = 3;
96 | optional int32 schema_version = 4;
97 | optional CreatePacketsResponse packets_response = 5;
98 | }
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DataProcessorManifestTest.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
16 |
17 | import static com.google.common.truth.Truth.assertThat;
18 |
19 | import java.net.MalformedURLException;
20 | import java.net.URL;
21 | import org.junit.Assert;
22 | import org.junit.Test;
23 | import org.junit.runner.RunWith;
24 | import org.junit.runners.JUnit4;
25 |
26 | /** Unit tests for {@link DataProcessorManifest}. */
27 | @RunWith(JUnit4.class)
28 | public class DataProcessorManifestTest {
29 |
30 | @Test
31 | public void testParsing() {
32 | URL manifestUrl =
33 | getClass()
34 | .getResource(
35 | "/com/google/exposurenotification/privateanalytics/ingestion/pipeline/test-manifest.json");
36 | DataProcessorManifest manifest = new DataProcessorManifest(manifestUrl.toString());
37 | assertThat(manifest.getIngestionBucket())
38 | .isEqualTo("s3://us-west-1/prio-demo-gcp-test-pha-1-ingestor-1-ingestion");
39 | assertThat(manifest.getAwsBucketRegion()).isEqualTo("us-west-1");
40 | assertThat(manifest.getAwsBucketName())
41 | .isEqualTo("prio-demo-gcp-test-pha-1-ingestor-1-ingestion");
42 | assertThat(manifest.getAwsRole())
43 | .isEqualTo("arn:aws:iam::12345678:role/AWSRoleAssumedByGCPSvcAcc");
44 | }
45 |
46 | @Test
47 | public void testInvalidURLParsing() throws MalformedURLException {
48 | URL manifestUrl = new URL("http://nothing/to/see/here");
49 | Assert.assertThrows(
50 | DataProcessorManifest.ManifestProcessingRuntimeException.class,
51 | () -> new DataProcessorManifest(manifestUrl.toString()));
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DateFilterFnTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
19 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata;
20 | import java.util.Arrays;
21 | import java.util.Collections;
22 | import java.util.List;
23 | import org.apache.beam.sdk.testing.PAssert;
24 | import org.apache.beam.sdk.testing.TestPipeline;
25 | import org.apache.beam.sdk.testing.ValidatesRunner;
26 | import org.apache.beam.sdk.transforms.Create;
27 | import org.apache.beam.sdk.transforms.ParDo;
28 | import org.apache.beam.sdk.values.PCollection;
29 | import org.junit.Rule;
30 | import org.junit.Test;
31 | import org.junit.experimental.categories.Category;
32 | import org.junit.runner.RunWith;
33 | import org.junit.runners.JUnit4;
34 |
35 | /** Unit tests for {@link DateFilterFn}. */
36 | @RunWith(JUnit4.class)
37 | public class DateFilterFnTest {
38 |
39 | public transient IngestionPipelineOptions options =
40 | TestPipeline.testingPipelineOptions().as(IngestionPipelineOptions.class);
41 |
42 | @Rule public final transient TestPipeline pipeline = TestPipeline.fromOptions(options);
43 |
44 | @Test
45 | @Category(ValidatesRunner.class)
46 | public void testDateFilter() {
47 | DataShareMetadata meta = DataShareMetadata.builder().setMetricName("sampleMetric").build();
48 | List dataShares =
49 | Arrays.asList(
50 | DataShare.builder()
51 | .setPath("id1")
52 | .setCreatedMs(1000L)
53 | .setDataShareMetadata(meta)
54 | .build(),
55 | DataShare.builder()
56 | .setPath("id2")
57 | .setCreatedMs(2000L)
58 | .setDataShareMetadata(meta)
59 | .build(),
60 | DataShare.builder()
61 | .setPath("id3")
62 | .setCreatedMs(3000L)
63 | .setDataShareMetadata(meta)
64 | .build(),
65 | DataShare.builder().setPath("missing").setDataShareMetadata(meta).build());
66 |
67 | options.setStartTime(2L);
68 | options.setDuration(1L);
69 | options.setDeviceAttestation(false);
70 |
71 | PCollection input = pipeline.apply(Create.of(dataShares));
72 |
73 | PCollection output = input.apply(ParDo.of(new DateFilterFn()));
74 |
75 | PAssert.that(output)
76 | .containsInAnyOrder(
77 | Collections.singletonList(
78 | DataShare.builder()
79 | .setPath("id2")
80 | .setCreatedMs(2000L)
81 | .setDataShareMetadata(meta)
82 | .build()));
83 | pipeline.run().waitUntilFinish();
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DeletionPipelineIT.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
16 |
17 | import static com.google.common.truth.Truth.assertThat;
18 | import static com.google.exposurenotification.privateanalytics.ingestion.pipeline.FirestoreConnector.formatDateTime;
19 | import static org.junit.Assert.assertThrows;
20 |
21 | import com.google.api.core.ApiFutures;
22 | import com.google.api.gax.core.FixedCredentialsProvider;
23 | import com.google.api.gax.rpc.NotFoundException;
24 | import com.google.auth.oauth2.GoogleCredentials;
25 | import com.google.cloud.firestore.v1.FirestoreClient;
26 | import com.google.cloud.firestore.v1.FirestoreClient.ListDocumentsPagedResponse;
27 | import com.google.cloud.firestore.v1.FirestoreSettings;
28 | import com.google.common.collect.Iterables;
29 | import com.google.common.collect.Streams;
30 | import com.google.firestore.v1.BatchWriteRequest;
31 | import com.google.firestore.v1.DatabaseRootName;
32 | import com.google.firestore.v1.Document;
33 | import com.google.firestore.v1.GetDocumentRequest;
34 | import com.google.firestore.v1.ListDocumentsRequest;
35 | import com.google.firestore.v1.Write;
36 | import java.io.IOException;
37 | import java.util.ArrayList;
38 | import java.util.List;
39 | import java.util.UUID;
40 | import java.util.concurrent.ExecutionException;
41 | import java.util.concurrent.ThreadLocalRandom;
42 | import java.util.concurrent.TimeUnit;
43 | import java.util.stream.Collectors;
44 | import java.util.stream.IntStream;
45 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
46 | import org.apache.beam.sdk.PipelineResult;
47 | import org.apache.beam.sdk.metrics.MetricNameFilter;
48 | import org.apache.beam.sdk.metrics.MetricsFilter;
49 | import org.apache.beam.sdk.testing.NeedsRunner;
50 | import org.apache.beam.sdk.testing.TestPipeline;
51 | import org.junit.After;
52 | import org.junit.Before;
53 | import org.junit.Rule;
54 | import org.junit.Test;
55 | import org.junit.experimental.categories.Category;
56 | import org.junit.runner.RunWith;
57 | import org.junit.runners.JUnit4;
58 |
59 | /** Integration tests for {@link DeletionPipeline}. */
60 | @RunWith(JUnit4.class)
61 | public class DeletionPipelineIT {
62 |
63 | // Randomize document creation time to avoid collisions between simultaneously running tests.
64 | // FirestoreReader will query all documents with created times within one hour of this time.
65 | static final long CREATION_TIME = ThreadLocalRandom.current().nextLong(0L, 1500000000L);
66 | static final long DURATION = 10800L;
67 | static final String PROJECT = System.getenv("PROJECT");
68 | // Randomize test collection name to avoid collisions between simultaneously running tests.
69 | static final String TEST_COLLECTION_NAME =
70 | "uuid" + UUID.randomUUID().toString().replace("-", "_");
71 | static final String KEY_RESOURCE_NAME = System.getenv("KEY_RESOURCE_NAME");
72 | static final DatabaseRootName DATABASE_ROOT_NAME = DatabaseRootName.of(PROJECT, "(default)");
73 | static final String BASE_COLLECTION_NAME =
74 | String.format("%s/documents/%s", DATABASE_ROOT_NAME, TEST_COLLECTION_NAME);
75 |
76 | List documentNames;
77 | FirestoreClient client;
78 |
79 | public transient IngestionPipelineOptions testOptions =
80 | TestPipeline.testingPipelineOptions().as(IngestionPipelineOptions.class);
81 |
82 | @Rule public final transient TestPipeline testPipeline = TestPipeline.fromOptions(testOptions);
83 |
84 | @Before
85 | public void setUp() throws IOException {
86 | documentNames = new ArrayList<>();
87 | client = getFirestoreClient();
88 | }
89 |
90 | @After
91 | public void tearDown() throws ExecutionException, InterruptedException {
92 | cleanUpParentResources(client);
93 | FirestoreClientTestUtils.shutdownFirestoreClient(client);
94 | }
95 |
96 | @Test
97 | @Category(NeedsRunner.class)
98 | public void testFirestoreDeleterDeletesDocs() throws InterruptedException {
99 | testOptions.as(DataflowPipelineOptions.class).setMaxNumWorkers(1);
100 | testOptions.setStartTime(CREATION_TIME);
101 | testOptions.setProject(PROJECT);
102 | testOptions.setDuration(DURATION);
103 | testOptions.setKeyResourceName(KEY_RESOURCE_NAME);
104 | int numDocs = 500;
105 | seedDatabase(numDocs);
106 |
107 | DeletionPipeline.buildDeletionPipeline(testOptions, testPipeline);
108 | PipelineResult result = testPipeline.run();
109 | result.waitUntilFinish();
110 |
111 | // Assert that processed documents have been deleted.
112 | documentNames.forEach(
113 | name ->
114 | assertThrows(NotFoundException.class, () -> fetchDocumentFromFirestore(name, client)));
115 | MetricNameFilter documentsDeletedMetricName =
116 | MetricNameFilter.named(
117 | "org.apache.beam.sdk.io.gcp.firestore.FirestoreV1.BatchWrite", "writes_successful");
118 | long documentsDeleted =
119 | result
120 | .metrics()
121 | .queryMetrics(MetricsFilter.builder().addNameFilter(documentsDeletedMetricName).build())
122 | .getCounters()
123 | .iterator()
124 | .next()
125 | .getCommitted();
126 | assertThat(documentsDeleted).isEqualTo(numDocs);
127 | }
128 |
129 | private static FirestoreClient getFirestoreClient() throws IOException {
130 | FirestoreSettings settings =
131 | FirestoreSettings.newBuilder()
132 | .setCredentialsProvider(
133 | FixedCredentialsProvider.create(GoogleCredentials.getApplicationDefault()))
134 | .build();
135 | return FirestoreClient.create(settings);
136 | }
137 |
138 | private static void cleanUpParentResources(FirestoreClient client)
139 | throws ExecutionException, InterruptedException {
140 | ListDocumentsPagedResponse documents =
141 | client.listDocuments(
142 | ListDocumentsRequest.newBuilder()
143 | .setParent("projects/" + PROJECT + "/databases/(default)/documents")
144 | .setCollectionId(TEST_COLLECTION_NAME)
145 | .build());
146 | ApiFutures.allAsList(
147 | Streams.stream(Iterables.partition(documents.iterateAll(), 500))
148 | .map(
149 | docs ->
150 | docs.stream()
151 | .map(Document::getName)
152 | .map(name -> Write.newBuilder().setDelete(name).build())
153 | .collect(Collectors.toList()))
154 | .map(DeletionPipelineIT::getBatchWriteRequest)
155 | .map(request -> client.batchWriteCallable().futureCall(request))
156 | .collect(Collectors.toList()))
157 | .get();
158 | }
159 |
160 | private static Document fetchDocumentFromFirestore(String path, FirestoreClient client) {
161 | return client.getDocument(GetDocumentRequest.newBuilder().setName(path).build());
162 | }
163 |
164 | private void seedDatabase(int numDocsToSeed) throws InterruptedException {
165 | // Adding a wait here to give the Firestore instance time to initialize before attempting
166 | // to connect.
167 | TimeUnit.SECONDS.sleep(1);
168 | documentNames =
169 | IntStream.rangeClosed(1, numDocsToSeed)
170 | .mapToObj(
171 | i ->
172 | String.format(
173 | "%s/testDoc%05d/%s/metric1",
174 | BASE_COLLECTION_NAME, i, formatDateTime(CREATION_TIME)))
175 | .collect(Collectors.toList());
176 |
177 | List batchWriteRequests =
178 | Streams.stream(Iterables.partition(documentNames, 500))
179 | .map(
180 | names ->
181 | names.stream()
182 | .map(
183 | name ->
184 | Write.newBuilder()
185 | .setUpdate(Document.newBuilder().setName(name).build())
186 | .build())
187 | .collect(Collectors.toList()))
188 | .map(DeletionPipelineIT::getBatchWriteRequest)
189 | .collect(Collectors.toList());
190 |
191 | for (BatchWriteRequest batchWriteRequest : batchWriteRequests) {
192 | client.batchWrite(batchWriteRequest);
193 | }
194 | }
195 |
196 | private static BatchWriteRequest getBatchWriteRequest(List writes) {
197 | return BatchWriteRequest.newBuilder()
198 | .setDatabase(DATABASE_ROOT_NAME.toString())
199 | .addAllWrites(writes)
200 | .build();
201 | }
202 | }
203 |
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/FirestoreClientTestUtils.java:
--------------------------------------------------------------------------------
1 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
2 |
3 | import com.google.cloud.firestore.v1.FirestoreClient;
4 | import java.time.Duration;
5 | import java.util.concurrent.TimeUnit;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | public final class FirestoreClientTestUtils {
10 | private static final Logger LOG = LoggerFactory.getLogger(FirestoreClientTestUtils.class);
11 |
12 | static final Duration FIRESTORE_SHUTDOWN_TIMEOUT = Duration.ofSeconds(30);
13 |
14 | static void shutdownFirestoreClient(FirestoreClient client) {
15 | client.shutdown();
16 | LOG.info("Waiting for FirestoreClient to shutdown.");
17 | try {
18 | client.awaitTermination(FIRESTORE_SHUTDOWN_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
19 | } catch (InterruptedException e) {
20 | LOG.warn("Interrupted while waiting for client shutdown", e);
21 | Thread.currentThread().interrupt();
22 | }
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/IngestionPipelineOptionsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2021 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import static com.google.common.truth.Truth.assertThat;
19 |
20 | import java.time.Clock;
21 | import java.time.Instant;
22 | import java.time.ZoneId;
23 | import org.junit.Test;
24 | import org.junit.runner.RunWith;
25 | import org.junit.runners.JUnit4;
26 |
27 | /** Unit tests for {@link IngestionPipelineOptions}. */
28 | @RunWith(JUnit4.class)
29 | public class IngestionPipelineOptionsTest {
30 |
31 | @Test
32 | public void testCalculatePipelineStart() {
33 | assertThat(IngestionPipelineOptions.calculatePipelineStart(123, 5, 1, Clock.systemUTC()))
34 | .isEqualTo(123);
35 | assertThat(IngestionPipelineOptions.calculatePipelineStart(123, 5, 4, Clock.systemUTC()))
36 | .isEqualTo(123);
37 | assertThat(
38 | IngestionPipelineOptions.calculatePipelineStart(
39 | IngestionPipelineOptions.UNSPECIFIED_START,
40 | 10,
41 | 1,
42 | Clock.fixed(Instant.ofEpochSecond(32), ZoneId.systemDefault())))
43 | .isEqualTo(20);
44 | assertThat(
45 | IngestionPipelineOptions.calculatePipelineStart(
46 | IngestionPipelineOptions.UNSPECIFIED_START,
47 | 10,
48 | 2,
49 | Clock.fixed(Instant.ofEpochSecond(32), ZoneId.systemDefault())))
50 | .isEqualTo(10);
51 | assertThat(
52 | IngestionPipelineOptions.calculatePipelineStart(
53 | IngestionPipelineOptions.UNSPECIFIED_START,
54 | 10,
55 | 1,
56 | Clock.fixed(Instant.ofEpochSecond(20), ZoneId.systemDefault())))
57 | .isEqualTo(10);
58 | assertThat(
59 | IngestionPipelineOptions.calculatePipelineStart(
60 | IngestionPipelineOptions.UNSPECIFIED_START,
61 | // default ingestion pipeline window
62 | // https://github.com/google/exposure-notifications-private-analytics-ingestion/blob/ebf484edf5969d2b7113534db7450f61a937ecf0/terraform/variables.tf#L79
63 | 3600,
64 | 1,
65 | Clock.fixed(Instant.ofEpochSecond(1608067718), ZoneId.of("UTC"))))
66 | .isEqualTo(1608062400);
67 | assertThat(
68 | IngestionPipelineOptions.calculatePipelineStart(
69 | IngestionPipelineOptions.UNSPECIFIED_START,
70 | // default deletion pipeline window
71 | // https://github.com/google/exposure-notifications-private-analytics-ingestion/blob/ebf484edf5969d2b7113534db7450f61a937ecf0/terraform/variables.tf#L91
72 | 43200,
73 | 2,
74 | Clock.fixed(Instant.ofEpochSecond(1608033600), ZoneId.of("UTC"))))
75 | .isEqualTo(1607947200);
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/IngestionPipelineTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
19 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata;
20 | import java.util.ArrayList;
21 | import java.util.Arrays;
22 | import java.util.Collections;
23 | import java.util.List;
24 | import org.apache.beam.sdk.testing.PAssert;
25 | import org.apache.beam.sdk.testing.TestPipeline;
26 | import org.apache.beam.sdk.testing.ValidatesRunner;
27 | import org.apache.beam.sdk.transforms.Count;
28 | import org.apache.beam.sdk.transforms.Create;
29 | import org.apache.beam.sdk.transforms.Keys;
30 | import org.apache.beam.sdk.transforms.Values;
31 | import org.apache.beam.sdk.values.KV;
32 | import org.apache.beam.sdk.values.PCollection;
33 | import org.junit.Rule;
34 | import org.junit.Test;
35 | import org.junit.experimental.categories.Category;
36 | import org.junit.runner.RunWith;
37 | import org.junit.runners.JUnit4;
38 |
39 | /** Unit tests for {@link IngestionPipeline}. */
40 | @RunWith(JUnit4.class)
41 | public class IngestionPipelineTest {
42 |
43 | public transient IngestionPipelineOptions options =
44 | TestPipeline.testingPipelineOptions().as(IngestionPipelineOptions.class);
45 |
46 | @Rule public final transient TestPipeline pipeline = TestPipeline.fromOptions(options);
47 |
48 | @Test
49 | @Category(ValidatesRunner.class)
50 | public void processDataSharesValid() {
51 | options.setStartTime(1L);
52 | options.setDuration(2L);
53 | options.setBatchSize(1L);
54 | options.setDeviceAttestation(false);
55 |
56 | DataShareMetadata meta = DataShareMetadata.builder().setMetricName("sampleMetric").build();
57 | List certs = new ArrayList<>();
58 | certs.add("cert1");
59 | certs.add("cert2");
60 | certs.add("cert3");
61 | List inputData =
62 | Arrays.asList(
63 | DataShare.builder()
64 | .setCertificateChain(certs)
65 | .setPath("id1")
66 | .setCreatedMs(1000L)
67 | .setDataShareMetadata(meta)
68 | .build(),
69 | DataShare.builder()
70 | .setCertificateChain(certs)
71 | .setPath("id2")
72 | .setCreatedMs(2000L)
73 | .setDataShareMetadata(meta)
74 | .build(),
75 | DataShare.builder()
76 | .setCertificateChain(certs)
77 | .setPath("id3")
78 | .setCreatedMs(4000L)
79 | .setDataShareMetadata(meta)
80 | .build(),
81 | DataShare.builder()
82 | .setCertificateChain(certs)
83 | .setPath("missing")
84 | .setDataShareMetadata(meta)
85 | .build());
86 |
87 | PCollection>> actualOutput =
88 | IngestionPipeline.processDataShares(pipeline.apply(Create.of(inputData)));
89 |
90 | List> expectedValues =
91 | Arrays.asList(
92 | Collections.singletonList(
93 | DataShare.builder()
94 | .setPath("id1")
95 | .setCreatedMs(1000L)
96 | .setCertificateChain(certs)
97 | .setDataShareMetadata(meta)
98 | .build()),
99 | Collections.singletonList(
100 | DataShare.builder()
101 | .setPath("id2")
102 | .setCreatedMs(2000L)
103 | .setCertificateChain(certs)
104 | .setDataShareMetadata(meta)
105 | .build()));
106 | PAssert.that(actualOutput.apply(Keys.create()).apply(Count.globally())).containsInAnyOrder(2L);
107 | PAssert.that(actualOutput.apply(Values.create())).containsInAnyOrder(expectedValues);
108 | pipeline.run().waitUntilFinish();
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/PrioSerializationHelperTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline;
17 |
18 | import static com.google.common.truth.Truth.assertThat;
19 | import static org.junit.Assert.assertEquals;
20 |
21 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare;
22 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.EncryptedShare;
23 | import java.io.File;
24 | import java.io.IOException;
25 | import java.net.URL;
26 | import java.nio.ByteBuffer;
27 | import java.util.ArrayList;
28 | import java.util.List;
29 | import org.abetterinternet.prio.v1.PrioDataSharePacket;
30 | import org.abetterinternet.prio.v1.PrioIngestionHeader;
31 | import org.junit.Rule;
32 | import org.junit.Test;
33 | import org.junit.rules.TemporaryFolder;
34 | import org.junit.runner.RunWith;
35 | import org.junit.runners.JUnit4;
36 |
37 | /** Tests for {@link PrioSerializationHelper}. */
38 | @RunWith(JUnit4.class)
39 | public class PrioSerializationHelperTest {
40 |
41 | @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
42 |
43 | @Test
44 | public void testPrioBatchHeaderSerialization()
45 | throws IOException, InstantiationException, IllegalAccessException {
46 | List ingestionHeaders = new ArrayList<>();
47 | PrioIngestionHeader header1 =
48 | PrioIngestionHeader.newBuilder()
49 | .setBatchUuid("id123")
50 | .setName("secretname")
51 | .setBins(123)
52 | .setEpsilon(3.14)
53 | .setPrime(7)
54 | .setNumberOfServers(3)
55 | .setHammingWeight(5)
56 | .setBatchStartTime(1600000000)
57 | .setBatchEndTime(1700000000)
58 | .setPacketFileDigest(ByteBuffer.wrap("placeholder1".getBytes()))
59 | .build();
60 | PrioIngestionHeader header2 =
61 | PrioIngestionHeader.newBuilder()
62 | .setBatchUuid("id987")
63 | .setName("simplename")
64 | .setBins(4)
65 | .setEpsilon(2.71)
66 | .setPrime(13)
67 | .setNumberOfServers(5)
68 | .setHammingWeight(8)
69 | .setBatchStartTime(1650000000)
70 | .setBatchEndTime(1710000000)
71 | .setPacketFileDigest(ByteBuffer.wrap("placeholder2".getBytes()))
72 | .build();
73 | ingestionHeaders.add(header1);
74 | ingestionHeaders.add(header2);
75 | File serializedHeaders = tmpFolder.newFile();
76 | ByteBuffer resultBytes =
77 | PrioSerializationHelper.serializeRecords(
78 | ingestionHeaders, PrioIngestionHeader.class, PrioIngestionHeader.getClassSchema());
79 |
80 | BatchWriterFn.writeToFile(serializedHeaders.getAbsolutePath(), resultBytes);
81 | List deserializedHeaders =
82 | PrioSerializationHelper.deserializeRecords(
83 | PrioIngestionHeader.class, serializedHeaders.getAbsolutePath());
84 | assertEquals(ingestionHeaders, deserializedHeaders);
85 | }
86 |
87 | @Test
88 | public void testPrioDataSharePacketSerialization()
89 | throws IOException, InstantiationException, IllegalAccessException {
90 | List dataSharePackets = new ArrayList<>();
91 | PrioDataSharePacket dataSharePacket1 =
92 | PrioDataSharePacket.newBuilder()
93 | .setDeviceNonce(ByteBuffer.wrap(new byte[] {0x07, 0x08, 0x09}))
94 | .setEncryptionKeyId("verysecretandsecurevalue1")
95 | .setRPit(1234567890)
96 | .setUuid("uniqueuserid1")
97 | .setVersionConfiguration("v1.0")
98 | .setEncryptedPayload(ByteBuffer.wrap(new byte[] {0x01, 0x02, 0x03, 0x04, 0x05}))
99 | .build();
100 |
101 | PrioDataSharePacket dataSharePacket2 =
102 | PrioDataSharePacket.newBuilder()
103 | .setDeviceNonce(ByteBuffer.wrap(new byte[] {0x10, 0x11, 0x12}))
104 | .setEncryptionKeyId("verysecretandsecurevalue2")
105 | .setRPit(987654321)
106 | .setUuid("uniqueuserid2")
107 | .setVersionConfiguration("v2.0")
108 | .setEncryptedPayload(ByteBuffer.wrap(new byte[] {0x06, 0x07, 0x08, 0x09, 0x10}))
109 | .build();
110 | dataSharePackets.add(dataSharePacket1);
111 | dataSharePackets.add(dataSharePacket2);
112 |
113 | File serializedDataShares = tmpFolder.newFile();
114 | ByteBuffer resultBytes =
115 | PrioSerializationHelper.serializeRecords(
116 | dataSharePackets, PrioDataSharePacket.class, PrioDataSharePacket.getClassSchema());
117 | BatchWriterFn.writeToFile(serializedDataShares.getAbsolutePath(), resultBytes);
118 | List deserializedHeaders =
119 | PrioSerializationHelper.deserializeRecords(
120 | PrioDataSharePacket.class, serializedDataShares.getAbsolutePath());
121 | assertEquals(dataSharePackets, deserializedHeaders);
122 | }
123 |
124 | @Test
125 | public void testSplitPackets() {
126 | DataShare share =
127 | DataShare.builder()
128 | .setSchemaVersion(2)
129 | .setEncryptedDataShares(
130 | List.of(
131 | EncryptedShare.builder()
132 | .setEncryptedPayload("pha".getBytes())
133 | .setEncryptionKeyId("55NdHuhCjyR3PtTL0A7WRiaIgURhTmlkNw5dbFsKL70=")
134 | .build(),
135 | EncryptedShare.builder()
136 | .setEncryptedPayload("facilitator".getBytes())
137 | .setEncryptionKeyId("facilitator-key-id")
138 | .build()))
139 | .setRPit(2L)
140 | .setUuid("someuuid")
141 | .build();
142 | URL manifestUrl =
143 | getClass()
144 | .getResource(
145 | "/com/google/exposurenotification/privateanalytics/ingestion/pipeline/test-manifest.json");
146 | DataProcessorManifest phaManifest = new DataProcessorManifest(manifestUrl.toString());
147 |
148 | List packets = PrioSerializationHelper.splitPackets(share);
149 | assertThat(packets).hasSize(2);
150 | assertThat(packets.get(0).getEncryptionKeyId()).isNull();
151 | assertThat(packets.get(1).getEncryptionKeyId()).isNull();
152 | }
153 | }
154 |
--------------------------------------------------------------------------------
/src/test/resources/com/google/exposurenotification/privateanalytics/ingestion/pipeline/test-manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "format": 0,
3 | "ingestion-bucket": "s3://us-west-1/prio-demo-gcp-test-pha-1-ingestor-1-ingestion",
4 | "ingestion-identity": "arn:aws:iam::12345678:role/AWSRoleAssumedByGCPSvcAcc",
5 | "peer-validation-bucket": "gs://prio-demo-gcp-test-pha-1-ingestor-1-peer-validation",
6 | "batch-signing-public-keys": {
7 | "demo-gcp-test-pha-1-ingestor-1-batch-signing-key": {
8 | "public-key": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEGBGNmLNT7TFvm59vbUdXvQCDGfXI\nta5HDGjpbKYKiINvbIsVES9oN2HPd7P2vITZrbWkpbLPGGwOa0srglXlSQ==\n-----END PUBLIC KEY-----\n",
9 | "expiration": "2021-01-30T22:21:39Z"
10 | }
11 | },
12 | "packet-encryption-certificates": {
13 | "demo-gcp-test-pha-1-ingestion-packet-decryption-key": {
14 | "certificate": "-----BEGIN CERTIFICATE-----\nMIIEqjCCA5KgAwIBAgITAPrxbgrqnx6Q94QbiX20Fu0soTANBgkqhkiG9w0BAQsF\nADAiMSAwHgYDVQQDDBdGYWtlIExFIEludGVybWVkaWF0ZSBYMTAeFw0yMDExMDEy\nMTIxMzlaFw0yMTAxMzAyMTIxMzlaMDkxNzA1BgNVBAMTLnRlc3QtcGhhLTEuZGVt\nby1nY3AuY2VydGlmaWNhdGVzLmlzcmctcHJpby5vcmcwWTATBgcqhkjOPQIBBggq\nhkjOPQMBBwNCAASo+KHdSgwWyQuuMadoecgsTNhKkEPYAWbtMp7mKwxBHfU38AKM\npjvJRXbazirrUyZrz7uYTQT6noLBX6Wc8DX2o4ICizCCAocwDgYDVR0PAQH/BAQD\nAgeAMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAMBgNVHRMBAf8EAjAA\nMB0GA1UdDgQWBBSF7/1eWAfGiRIFo2jvskZV9d/mvDAfBgNVHSMEGDAWgBTAzANG\nuVggzFxycPPhLssgpvVoOjB3BggrBgEFBQcBAQRrMGkwMgYIKwYBBQUHMAGGJmh0\ndHA6Ly9vY3NwLnN0Zy1pbnQteDEubGV0c2VuY3J5cHQub3JnMDMGCCsGAQUFBzAC\nhidodHRwOi8vY2VydC5zdGctaW50LXgxLmxldHNlbmNyeXB0Lm9yZy8wOQYDVR0R\nBDIwMIIudGVzdC1waGEtMS5kZW1vLWdjcC5jZXJ0aWZpY2F0ZXMuaXNyZy1wcmlv\nLm9yZzBMBgNVHSAERTBDMAgGBmeBDAECATA3BgsrBgEEAYLfEwEBATAoMCYGCCsG\nAQUFBwIBFhpodHRwOi8vY3BzLmxldHNlbmNyeXB0Lm9yZzCCAQQGCisGAQQB1nkC\nBAIEgfUEgfIA8AB3ALDMg+Wl+X1rr3wJzChJBIcqx+iLEyxjULfG/SbhbGx3AAAB\ndYXm2UAAAAQDAEgwRgIhAIl5MRKoTJT/+nhn0e/hDnOpiR4cm9cKAz9Rm8Yc/BAd\nAiEAuG1qnfYjEJ+55p7v8hrz2WSlGdukVsWAvCjw2lZERVYAdQAD7fHal3a284w0\nHjntnXB6dXA2nPmETzJ/6eFBODYbYAAAAXWF5ts1AAAEAwBGMEQCIEdfDFslikvA\nDUz5U9rNIcoYeRiWhOQ9ifMmRO6VyoFpAiBN6nvO/669OgtbR2YYa07NXr8b61lK\n+MFxfRCf5tkR5zANBgkqhkiG9w0BAQsFAAOCAQEAVnW5nGLQ26tXi5KHkyEVsrs1\n0u9S8Xp6PvMjnNRXuvC0u5b9QZ82COdVB5Y+bPav/Is7ppGtaWxd6/ZAZwCAbYzV\nooW54gv1NHjScs0F+Rx+TDWUW9W2SBqO5BB9Ei3wctu1vZFJ5IkjtbSLuLV0szqj\noNbdeCT3LZkvnGZgz3J5eyLDEsbf6KfW+7RJ8NPodjGmM91VTdOdUgxjwQQHOnYg\n+Zk0TRdwgfLg+o17l+Ng4BonGNaayge+VTj7smaOHslbtu9psangzAIK+KFBXSsz\nrljqSePLGWVHlRrZ5Fv2I9xKWiJKQDO+fWMCc4KNVBkKt3VGbmPwFRmlu4JzfQ==\n-----END CERTIFICATE-----\n\n-----BEGIN CERTIFICATE-----\nMIIEqzCCApOgAwIBAgIRAIvhKg5ZRO08VGQx8JdhT+UwDQYJKoZIhvcNAQELBQAw\nGjEYMBYGA1UEAwwPRmFrZSBMRSBSb290IFgxMB4XDTE2MDUyMzIyMDc1OVoXDTM2\nMDUyMzIyMDc1OVowIjEgMB4GA1UEAwwXRmFrZSBMRSBJbnRlcm1lZGlhdGUgWDEw\nggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDtWKySDn7rWZc5ggjz3ZB0\n8jO4xti3uzINfD5sQ7Lj7hzetUT+wQob+iXSZkhnvx+IvdbXF5/yt8aWPpUKnPym\noLxsYiI5gQBLxNDzIec0OIaflWqAr29m7J8+NNtApEN8nZFnf3bhehZW7AxmS1m0\nZnSsdHw0Fw+bgixPg2MQ9k9oefFeqa+7Kqdlz5bbrUYV2volxhDFtnI4Mh8BiWCN\nxDH1Hizq+GKCcHsinDZWurCqder/afJBnQs+SBSL6MVApHt+d35zjBD92fO2Je56\ndhMfzCgOKXeJ340WhW3TjD1zqLZXeaCyUNRnfOmWZV8nEhtHOFbUCU7r/KkjMZO9\nAgMBAAGjgeMwgeAwDgYDVR0PAQH/BAQDAgGGMBIGA1UdEwEB/wQIMAYBAf8CAQAw\nHQYDVR0OBBYEFMDMA0a5WCDMXHJw8+EuyyCm9Wg6MHoGCCsGAQUFBwEBBG4wbDA0\nBggrBgEFBQcwAYYoaHR0cDovL29jc3Auc3RnLXJvb3QteDEubGV0c2VuY3J5cHQu\nb3JnLzA0BggrBgEFBQcwAoYoaHR0cDovL2NlcnQuc3RnLXJvb3QteDEubGV0c2Vu\nY3J5cHQub3JnLzAfBgNVHSMEGDAWgBTBJnSkikSg5vogKNhcI5pFiBh54DANBgkq\nhkiG9w0BAQsFAAOCAgEABYSu4Il+fI0MYU42OTmEj+1HqQ5DvyAeyCA6sGuZdwjF\nUGeVOv3NnLyfofuUOjEbY5irFCDtnv+0ckukUZN9lz4Q2YjWGUpW4TTu3ieTsaC9\nAFvCSgNHJyWSVtWvB5XDxsqawl1KzHzzwr132bF2rtGtazSqVqK9E07sGHMCf+zp\nDQVDVVGtqZPHwX3KqUtefE621b8RI6VCl4oD30Olf8pjuzG4JKBFRFclzLRjo/h7\nIkkfjZ8wDa7faOjVXx6n+eUQ29cIMCzr8/rNWHS9pYGGQKJiY2xmVC9h12H99Xyf\nzWE9vb5zKP3MVG6neX1hSdo7PEAb9fqRhHkqVsqUvJlIRmvXvVKTwNCP3eCjRCCI\nPTAvjV+4ni786iXwwFYNz8l3PmPLCyQXWGohnJ8iBm+5nk7O2ynaPVW0U2W+pt2w\nSVuvdDM5zGv2f9ltNWUiYZHJ1mmO97jSY/6YfdOUH66iRtQtDkHBRdkNBsMbD+Em\n2TgBldtHNSJBfB3pm9FblgOcJ0FSWcUDWJ7vO0+NTXlgrRofRT6pVywzxVo6dND0\nWzYlTWeUVsO40xJqhgUQRER9YLOLxJ0O6C8i0xFxAMKOtSdodMB3RIwt7RFQ0uyt\nn5Z5MqkYhlMI3J1tPRTp1nEt9fyGspBOO05gi148Qasp+3N+svqKomoQglNoAxU=\n-----END CERTIFICATE-----\n"
15 | }
16 | }
17 | }
--------------------------------------------------------------------------------
/templates/dataflow-deletion-metadata-template.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ENPA Deletion Pipeline",
3 | "description": "A pipeline that can be used to delete data in Firestore",
4 | "parameters": [
5 | {
6 | "name": "firestoreProject",
7 | "label": "Firestore project ID",
8 | "isOptional": true,
9 | "helpText": "Google Cloud Project Id where Firestore instance lives."
10 | },
11 | {
12 | "name": "startTime",
13 | "label": "Start time",
14 | "helpText": "Start time in seconds of documents to process.",
15 | "isOptional": true,
16 | "regexes": [
17 | "[0-9]+"
18 | ]
19 | },
20 | {
21 | "name": "duration",
22 | "label": "Duration",
23 | "helpText": "Duration of window in seconds.",
24 | "isOptional": true,
25 | "regexes": [
26 | "[0-9]+"
27 | ]
28 | },
29 | {
30 | "name": "graceHoursBackwards",
31 | "label": "Grace period backwards",
32 | "helpText": "Hours to read backwards from startTime.",
33 | "isOptional": true,
34 | "regexes": [
35 | "[0-9]+"
36 | ]
37 | },
38 | {
39 | "name": "graceHoursForwards",
40 | "label": "Grace period forwards",
41 | "helpText": "Hours to read forward from startTime.",
42 | "isOptional": true,
43 | "regexes": [
44 | "[0-9]+"
45 | ]
46 | },
47 | {
48 | "name": "deleteBatchSize",
49 | "label": "Delete batch size",
50 | "helpText": "Batch size of Firestore batch deletes.",
51 | "isOptional": true,
52 | "regexes": [
53 | "[0-9]+"
54 | ]
55 | }
56 | ]
57 | }
58 |
--------------------------------------------------------------------------------
/templates/dataflow-flex-template.json:
--------------------------------------------------------------------------------
1 | {
2 | "image": "gcr.io/enpa-infra/ingestion-pipeline:0.1.0-SNAPSHOT",
3 | "metadata": {},
4 | "sdkInfo": {
5 | "language": "JAVA"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/templates/dataflow-ingestion-metadata-template.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ENPA Ingestion Pipeline",
3 | "description": "A batch processing pipeline that can be used to ingest private data shares according to the Exposure Notification Private Analytics protocol",
4 | "parameters": [
5 | {
6 | "name": "firestoreProject",
7 | "label": "Firestore project ID",
8 | "isOptional": true,
9 | "helpText": "Google Cloud Project Id where Firestore instance lives."
10 | },
11 | {
12 | "name": "phaOutput",
13 | "label": "PHA output",
14 | "helpText": "File prefix for output files for PHA",
15 | "isOptional": true
16 | },
17 | {
18 | "name": "phaManifestURL",
19 | "label": "PHA Manifest URL",
20 | "helpText": "Location of PHA Manifest file",
21 | "isOptional": true
22 | },
23 | {
24 | "name": "facilitatorOutput",
25 | "label": "Facilitator output",
26 | "helpText": "File prefix for output files for Facilitator.",
27 | "isOptional": true
28 | },
29 | {
30 | "name": "facilitatorManifestURL",
31 | "label": "Facilitator Manifest URL",
32 | "helpText": "Location of Facilitator Manifest file",
33 | "isOptional": true
34 | },
35 | {
36 | "name": "startTime",
37 | "label": "Start time",
38 | "helpText": "Start time in seconds of documents to process.",
39 | "isOptional": true,
40 | "regexes": [
41 | "[0-9]+"
42 | ]
43 | },
44 | {
45 | "name": "duration",
46 | "label": "Duration",
47 | "helpText": "Duration of window in seconds.",
48 | "isOptional": true,
49 | "regexes": [
50 | "[0-9]+"
51 | ]
52 | },
53 | {
54 | "name": "graceHoursBackwards",
55 | "label": "Grace period backwards",
56 | "helpText": "Hours to read backwards from startTime.",
57 | "isOptional": true,
58 | "regexes": [
59 | "[0-9]+"
60 | ]
61 | },
62 | {
63 | "name": "graceHoursForwards",
64 | "label": "Grace period forwards",
65 | "helpText": "Hours to read forward from startTime.",
66 | "isOptional": true,
67 | "regexes": [
68 | "[0-9]+"
69 | ]
70 | },
71 | {
72 | "name": "minimumParticipantCount",
73 | "label": "Minimum participant count",
74 | "helpText": "Minimum count of participants to preserve privacy.",
75 | "isOptional": true,
76 | "regexes": [
77 | "[0-9]+"
78 | ]
79 | },
80 | {
81 | "name": "partitionCount",
82 | "label": "Partition count",
83 | "helpText": "Maximum number of partitions to create for Firestore query.",
84 | "isOptional": true,
85 | "regexes": [
86 | "[0-9]+"
87 | ]
88 | },
89 | {
90 | "name": "batchSize",
91 | "label": "Batch size",
92 | "helpText": "Batch size of individual files.",
93 | "isOptional": true,
94 | "regexes": [
95 | "[0-9]+"
96 | ]
97 | },
98 | {
99 | "name": "deviceAttestation",
100 | "label": "Device attestation",
101 | "helpText": "Verify device attestations.",
102 | "isOptional": true,
103 | "regexes": [
104 | "true|false"
105 | ]
106 | },
107 | {
108 | "name": "keyResourceName",
109 | "label": "Key resource name",
110 | "helpText": "KMS resource name for signature generation.",
111 | "isOptional": true,
112 | "regexes": [
113 | "projects/[^/]+/locations/[^/]+/keyRings/[^/]+/cryptoKeys/.+"
114 | ]
115 | },
116 | {
117 | "name": "autoscalingAlgorithm",
118 | "label": "Autoscaling algorithm",
119 | "helpText": "Type of autoscaling to use",
120 | "isOptional": true,
121 | "regexes": [
122 | "NONE|THROUGHPUT_BASED"
123 | ]
124 | },
125 | {
126 | "name": "packageName",
127 | "label": "Android package name",
128 | "helpText": "Android package name to use during certificate checking.",
129 | "isOptional": true
130 | },
131 | {
132 | "name": "packageSignatureDigest",
133 | "label": "Android package signature digest",
134 | "helpText": "Android package signature digest to use during certificate checking.",
135 | "isOptional": true
136 | }
137 | ]
138 | }
139 |
--------------------------------------------------------------------------------
/templates/scheduler-deletion-template.tmpl:
--------------------------------------------------------------------------------
1 | {
2 | "launchParameter": {
3 | "jobName": "${pipeline_name}",
4 | "parameters": {%{ if start_time > 0 }
5 | "startTime": "${start_time}",%{ endif }%{ if autoscaling_algorithm != "" }
6 | "autoscalingAlgorithm": "${autoscaling_algorithm}",%{ endif }
7 | "duration":"${window}"
8 | },
9 | "environment": {
10 | "machineType": "${machine_type}",
11 | "numWorkers": "${worker_count}",
12 | "maxWorkers": "${max_worker_count}",
13 | "serviceAccountEmail": "${service_account}"
14 | },
15 | "containerSpecGcsPath": "gs://enpa-pipeline-specs/deletion-pipeline-${pipeline_version}.json"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/templates/scheduler-ingestion-template.tmpl:
--------------------------------------------------------------------------------
1 | {
2 | "launchParameter": {
3 | "jobName": "${pipeline_name}",
4 | "parameters": {
5 | "batchSize": "${batch_size}",
6 | "keyResourceName": "${key_id}",
7 | %{~if dev_project~}
8 | "phaOutput": "gs://${project}/output/pha",
9 | "facilitatorOutput": "gs://${project}/output/facilitator",
10 | %{~else~}
11 | "facilitatorManifestURL": "${facilitator_manifest_url}",
12 | "phaManifestURL": "${pha_manifest_url}",
13 | %{~endif~}
14 | "tempLocation": "${temp_location}",
15 | "duration":"${window}",%{ if start_time > 0 }
16 | "startTime": "${start_time}",%{ endif }%{ if autoscaling_algorithm != "" }
17 | "autoscalingAlgorithm": "${autoscaling_algorithm}",%{ endif }%{ if package_name != "" }
18 | "packageName":"${package_name}",%{ endif }%{ if package_signature_digest != "" }
19 | "packageSignatureDigest":"${package_signature_digest}",%{ endif }
20 | "deviceAttestation": "${enable_device_attestation}"
21 | },
22 | "environment": {
23 | "machineType": "${machine_type}",
24 | "numWorkers": "${worker_count}",
25 | "maxWorkers": "${max_worker_count}",
26 | "serviceAccountEmail": "${service_account}"
27 | },
28 | "containerSpecGcsPath": "gs://enpa-pipeline-specs/ingestion-pipeline-${pipeline_version}.json"
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/terraform/README.md:
--------------------------------------------------------------------------------
1 | ENPA Ingestion Terraform
2 | ================================================================================
3 |
4 | This [terraform](https://terraform.io) module configures and provisions the
5 | infrastructure for a single PHA's instance of ENPA.
6 |
7 | Prerequisites
8 | --------------------------------------------------------------------------------
9 |
10 | Before applying this module there are a few manual steps:
11 |
12 | * create the Google Cloud project
13 | * associate a billing account
14 | * enable Firebase, AppEngine, and Firestore
15 | * make sure you have [`gsutil`] and [the `firebase` CLI] installed
16 |
17 | [`gsutil`]: https://cloud.google.com/storage/docs/gsutil_install
18 | [the `firebase` CLI]: https://firebase.google.com/docs/cli
19 |
20 | Example Invocations
21 | --------------------------------------------------------------------------------
22 |
23 | A typical production project would look like this:
24 |
25 | ```terraform
26 | module "pha_example" {
27 | source = "github.com/google/exposure-notifications-private-analytics-ingestion.git//terraform"
28 |
29 | project = "enpa-ingestion-example"
30 | region = "us-west2"
31 | locality = "example"
32 |
33 | facilitator_manifest_url = "https://facil.example.com/example-g-enpa-manifest.json"
34 | pha_manifest_url = "https://pha.example.com/example-g-enpa-manifest.json"
35 |
36 | pipeline_version = "0.2.6"
37 | }
38 | ```
39 |
40 | You can also create dev projects which write their output to GCS instead of
41 | shipping it upstream:
42 |
43 | ```terraform
44 | module "pha_dev_example" {
45 | source = "github.com/google/exposure-notifications-private-analytics-ingestion.git//terraform"
46 |
47 | project = "enpa-ingestion-example-dev"
48 | region = "us-west2"
49 |
50 | dev_project = true
51 |
52 | pipeline_version = "0.2.6"
53 | }
54 | ```
55 |
56 | Available Parameters
57 | --------------------------------------------------------------------------------
58 |
59 | #### Required Parameters
60 |
61 | * `project` -- the ID of the Google Cloud project to install in
62 | * `pipeline_version` -- the version of the pipelines to run. Corresponds to tags in this repository.
63 |
64 | A production instance also requires:
65 |
66 | * `facilitator_manifest_url`
67 | * `pha_manifest_url`
68 |
69 | A development instance requires the `dev_project` flag. You should not specify
70 | manifest URLs on a dev project.
71 |
72 | #### Suggested Parameters
73 |
74 | You will almost always want to set these:
75 |
76 | * `region` -- the Google Cloud region to use (default: `us-central1`)
77 |
78 | You should also review the defaults of the following:
79 |
80 | * `locality` -- Locality string assigned to ENPA PHA setup, inferred from `project` if not set
81 | * `enable_device_attestation` -- whether to enable device attestation (default: true)
82 | * `ingestion_schedule` -- the schedule on which to run the ingestion pipeline (default: 30 minutes past each hour)
83 | * `deletion_schedule` -- the schedule on which to run the deletion pipeline (default: 0600 and 1800 UTC)
84 | * `ingestion_window` -- the length the window the ingestion pipeline uses to look for new data (default: one hour)
85 | * `deletion_window` -- the length of the window the deletion pipeline uses to look for data to remove (default: 12 hours)
86 |
87 | #### Dataflow Tuning Parameters
88 |
89 | These parameters let you tweak the Dataflow pipelines:
90 |
91 | * `batch_size` -- the number of records per batch (default: 100,000)
92 | * `ingestion_machine_type` -- the machine type used by the ingestion pipeline (default: `n1-standard-4`)
93 | * `ingestion_worker_count` -- the number of workers used by the ingestion pipeline (default: 10)
94 | * `ingestion_autoscaling_algorithm` -- the algorithm to use for autoscaling the ingestion pipeline (default: `THROUGHPUT_BASED`)
95 | * `deletion_machine_type` -- the machine type used by the deletion pipeline (default: `n1-standard-2`)
96 | * `deletion_worker_count` -- the number of workers used by the deletion pipeline (default: 10)
97 | * `deletion_autoscaling_algorithm` -- the algorithm to use for autoscaling the deletion pipeline (default: `THROUGHPUT_BASED`)
98 | * `package_signature_digest` -- Android package signature digest to use during certificate checking (default: not set)
99 | * `package_name` -- Android package name to use during certificate checking (default: not set)
100 |
101 | #### Internal Parameters
102 |
103 | You will generally leave these alone, but they're listed here for completeness:
104 |
105 | * `enable_pipelines` -- whether to schedule pipeline runs (default: true)
106 | * `manifest_bucket` -- the GCS bucket from which to read the prio manifests (default: prio-manifests)
107 |
--------------------------------------------------------------------------------
/terraform/dataflow.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2020, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | resource "google_service_account" "dataflow" {
18 | project = var.project
19 | account_id = "dataflow-job-runner"
20 |
21 | display_name = "Dataflow Job Runner"
22 | description = "Service account for dataflow pipelines"
23 | }
24 |
25 | resource "google_project_service" "dataflow" {
26 | project = var.project
27 | service = "dataflow.googleapis.com"
28 |
29 | disable_dependent_services = false
30 | disable_on_destroy = false
31 | }
32 |
33 | resource "google_project_iam_member" "dataflow" {
34 | project = var.project
35 | role = "roles/${each.value}"
36 | member = "serviceAccount:${google_service_account.dataflow.email}"
37 |
38 | ### FIXME: these roles are almost certainly overly broad. We should create a
39 | ### custom role that grants only the permissions required.
40 | for_each = toset([
41 | "cloudkms.signer",
42 | "containerregistry.ServiceAgent",
43 | "dataflow.admin",
44 | "dataflow.developer",
45 | "dataflow.worker",
46 | "datastore.user",
47 | "editor",
48 | "iam.serviceAccountUser",
49 | ])
50 | }
51 |
--------------------------------------------------------------------------------
/terraform/firestore.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2020, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | locals {
18 | temp_dir = "${path.module}/.terraform/tmp"
19 | }
20 |
21 | data "google_storage_object_signed_url" "firestore_rules" {
22 | bucket = var.templates_bucket
23 | path = "security-rules/firestore-${var.pipeline_version}.rules"
24 | duration = "5m"
25 | }
26 |
27 | data "http" "firestore_rules" {
28 | url = data.google_storage_object_signed_url.firestore_rules.signed_url
29 | }
30 |
31 | resource "local_file" "firestore_rules" {
32 | filename = "${local.temp_dir}/firestore.rules"
33 |
34 | # the content is not really sensitive in the normal sense, it's just enormous
35 | # and easier to elide than to scroll through.
36 | sensitive_content = data.http.firestore_rules.body
37 |
38 | file_permission = "0644"
39 | directory_permission = "0755"
40 | }
41 |
42 | resource "local_file" "firebase_json" {
43 | filename = "${local.temp_dir}/firebase.json"
44 | content = jsonencode({ firestore = { rules = "firestore.rules" } })
45 |
46 | file_permission = "0644"
47 | directory_permission = "0755"
48 | }
49 |
50 | resource "null_resource" "firestore_security_rules" {
51 | triggers = {
52 | # if either of the config files changes, upload the rules
53 | config = local_file.firebase_json.content
54 | rules = local_file.firestore_rules.content
55 |
56 | # if the version changes upload the rules even if the files didn't change
57 | version = var.pipeline_version
58 | }
59 |
60 | provisioner "local-exec" {
61 | command = "firebase deploy --only firestore:rules --project ${var.project}"
62 | working_dir = local.temp_dir
63 |
64 | environment = {
65 | GOOGLE_APPLICATION_CREDENTIALS = "${abspath(path.root)}/credentials.json"
66 | }
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/terraform/gcr.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2021, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | resource "google_storage_bucket_iam_member" "gcr_bucket" {
18 | bucket = "artifacts.enpa-infra.appspot.com"
19 | role = "roles/storage.objectViewer"
20 | member = "serviceAccount:${google_service_account.dataflow.email}"
21 | }
--------------------------------------------------------------------------------
/terraform/iam.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2021, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | resource "google_project_iam_custom_role" "terraform_service_account_role" {
18 | project = var.project
19 | role_id = "ENPATerraformRunner"
20 | title = "Terraform Runner for ENPA Infra provisioning"
21 | permissions = [
22 | "cloudkms.cryptoKeyVersions.create",
23 | "cloudkms.cryptoKeyVersions.destroy",
24 | "cloudkms.cryptoKeyVersions.get",
25 | "cloudkms.cryptoKeyVersions.viewPublicKey",
26 | "cloudkms.cryptoKeys.create",
27 | "cloudkms.cryptoKeys.get",
28 | "cloudkms.cryptoKeys.update",
29 | "cloudkms.keyRings.create",
30 | "cloudkms.keyRings.get",
31 | "cloudscheduler.jobs.create",
32 | "cloudscheduler.jobs.delete",
33 | "cloudscheduler.jobs.get",
34 | "firebase.projects.get",
35 | "firebaserules.releases.list",
36 | "firebaserules.releases.update",
37 | "firebaserules.rulesets.create",
38 | "firebaserules.rulesets.delete",
39 | "firebaserules.rulesets.get",
40 | "firebaserules.rulesets.test",
41 | "iam.roles.create",
42 | "iam.roles.delete",
43 | "iam.roles.get",
44 | "iam.roles.list",
45 | "iam.roles.update",
46 | "iam.serviceAccounts.actAs",
47 | "iam.serviceAccounts.create",
48 | "iam.serviceAccounts.delete",
49 | "iam.serviceAccounts.get",
50 | "resourcemanager.projects.get",
51 | "resourcemanager.projects.getIamPolicy",
52 | "resourcemanager.projects.setIamPolicy",
53 | "serviceusage.services.disable",
54 | "serviceusage.services.enable",
55 | "serviceusage.services.get",
56 | "serviceusage.services.list",
57 | "storage.buckets.create",
58 | "storage.buckets.delete",
59 | "storage.buckets.get",
60 | ]
61 | }
62 |
63 | resource "google_project_iam_member" "terraform_service_account_permissions" {
64 | project = var.project
65 | role = google_project_iam_custom_role.terraform_service_account_role.id
66 | member = "serviceAccount:${var.terraform_svc_account_email}"
67 | }
68 |
69 | resource "google_project_iam_binding" "owners" {
70 | depends_on = [google_project_iam_member.terraform_service_account_permissions]
71 | project = var.project
72 | role = "roles/owner"
73 | members = var.project_owners_list
74 | }
75 |
--------------------------------------------------------------------------------
/terraform/main.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2020, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | locals {
18 | services = [
19 | "iam.googleapis.com",
20 | "cloudkms.googleapis.com"
21 | ]
22 |
23 | app_engine_location = (var.region == "us-central1" ? "us-central" : var.region)
24 | locality = (var.locality != "" ? var.locality : replace(var.project, "enpa-ingestion-", ""))
25 | }
26 |
27 | resource "google_project_service" "apis" {
28 | for_each = toset(local.services)
29 | project = var.project
30 | service = each.value
31 |
32 | disable_dependent_services = false
33 | disable_on_destroy = true
34 | }
35 |
36 | resource "google_storage_bucket" "bucket" {
37 | project = var.project
38 | name = var.project
39 |
40 | location = var.region
41 | storage_class = "STANDARD"
42 |
43 | # when true, all objects in the bucket will be deleted if terraform tries to
44 | # delete the bucket. Setting it to false is an added level of safety.
45 | force_destroy = false
46 | }
47 |
48 | resource "google_kms_key_ring" "keyring" {
49 | project = var.project
50 | name = "enpa-signing-key-ring"
51 | location = var.region
52 |
53 | lifecycle {
54 | prevent_destroy = true
55 | }
56 |
57 | depends_on = [
58 | google_project_service.apis["cloudkms.googleapis.com"]
59 | ]
60 | }
61 |
62 | resource "google_kms_crypto_key" "key" {
63 | name = "enpa-signing-key"
64 | key_ring = google_kms_key_ring.keyring.id
65 | purpose = "ASYMMETRIC_SIGN"
66 |
67 | version_template {
68 | algorithm = "EC_SIGN_P256_SHA256"
69 | protection_level = "HSM"
70 | }
71 | }
72 |
73 | data "google_kms_crypto_key_version" "key" {
74 | crypto_key = google_kms_crypto_key.key.id
75 | }
76 |
77 | resource "google_storage_bucket_object" "manifest" {
78 | name = "${local.locality}-g-enpa-manifest.json"
79 | bucket = var.manifest_bucket
80 |
81 | cache_control = "no-cache,max-age=0"
82 | content_type = "application/json"
83 |
84 | ### FIXME: our keys currently don't expire so the expiration date is just a
85 | ### random value I pulled out of an example file. It should be changed to
86 | ### something meaningful.
87 | content = <<-EOF
88 | {
89 | "format": 1,
90 | "server-identity": {
91 | "gcp-service-account-email": "${google_service_account.dataflow.email}",
92 | "gcp-service-account-id": "${google_service_account.dataflow.unique_id}"
93 | },
94 | "batch-signing-public-keys": {
95 | "${google_kms_crypto_key.key.id}/cryptoKeyVersions/${coalesce(data.google_kms_crypto_key_version.key.version, "0")}": {
96 | "public-key": "${replace(try(data.google_kms_crypto_key_version.key.public_key[0].pem, ""), "\n", "\\n")}",
97 | "expiration": "20211231T000000Z"
98 | }
99 | }
100 | }
101 | EOF
102 | }
103 |
104 | resource "google_storage_object_acl" "manifest" {
105 | # this needs to be output_name in order to recreate the ACL if the object is
106 | # recreated
107 | object = google_storage_bucket_object.manifest.output_name
108 | bucket = var.manifest_bucket
109 |
110 | predefined_acl = (var.dev_project ? "authenticatedRead" : "publicRead")
111 | }
112 |
--------------------------------------------------------------------------------
/terraform/scheduler.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2020, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | locals {
18 | flex_template_launch_endpoint = "https://dataflow.googleapis.com/v1b3/projects/${var.project}/locations/${var.region}/flexTemplates:launch"
19 | }
20 |
21 | resource "google_project_service" "scheduler" {
22 | project = var.project
23 | service = "cloudscheduler.googleapis.com"
24 |
25 | disable_dependent_services = false
26 | disable_on_destroy = false
27 | }
28 |
29 | data "http" "ingestion_template" {
30 | url = "https://storage.googleapis.com/enpa-pipeline-specs/scheduler-ingestion-template-${var.pipeline_version}.tmpl"
31 | }
32 |
33 | data "template_file" "ingestion" {
34 | template = data.http.ingestion_template.body
35 |
36 | vars = {
37 | pipeline_name = "ingestion-pipeline-${lower(replace(replace(var.pipeline_version, ".", "-"), "_", "-"))}"
38 | start_time = var.ingestion_start_time
39 | autoscaling_algorithm = var.ingestion_autoscaling_algorithm
40 | batch_size = var.batch_size
41 | dev_project = var.dev_project
42 | enable_device_attestation = var.enable_device_attestation
43 | facilitator_manifest_url = var.facilitator_manifest_url
44 | key_id = "projects/${var.project}/locations/${var.region}/keyRings/${google_kms_key_ring.keyring.name}/cryptoKeys/${google_kms_crypto_key.key.name}/cryptoKeyVersions/1",
45 | machine_type = var.ingestion_machine_type
46 | pha_manifest_url = var.pha_manifest_url
47 | pipeline_version = var.pipeline_version
48 | project = var.project
49 | region = var.region
50 | service_account = google_service_account.dataflow.email
51 | temp_location = "${google_storage_bucket.bucket.url}/temp"
52 | window = var.ingestion_window
53 | worker_count = var.ingestion_worker_count
54 | max_worker_count = var.ingestion_max_worker_count
55 | package_signature_digest = var.package_signature_digest
56 | package_name = var.package_name
57 | }
58 | }
59 |
60 | resource "google_cloud_scheduler_job" "ingestion" {
61 | project = var.project
62 | name = "ingestion-pipeline"
63 | region = var.region
64 |
65 | # the GCP provider currently does not support pausing/resuming scheduler jobs,
66 | # so if we want to disable a job the best workaround we have is to schedule it
67 | # far into the future. Unfortunately due to the cron format the best we can do
68 | # is "one year from now", where "now" means the time at which I'm typing this
69 | # comment.
70 | #
71 | # Since we don't expect this project to live for another year it should be
72 | # fine, but don't be surprised if your pipeline runs at noon UTC on December
73 | # 15th.
74 | schedule = (var.enable_pipelines ? var.ingestion_schedule : "0 12 15 12 *")
75 | time_zone = "Etc/UTC"
76 |
77 | http_target {
78 | oauth_token {
79 | service_account_email = google_service_account.dataflow.email
80 | }
81 |
82 | http_method = "POST"
83 | uri = local.flex_template_launch_endpoint
84 | body = base64encode(data.template_file.ingestion.rendered)
85 | }
86 |
87 | depends_on = [
88 | google_project_service.scheduler
89 | ]
90 | }
91 |
92 | data "http" "deletion_template" {
93 | url = "https://storage.googleapis.com/enpa-pipeline-specs/scheduler-deletion-template-${var.pipeline_version}.tmpl"
94 | }
95 |
96 | data "template_file" "deletion" {
97 | template = data.http.deletion_template.body
98 |
99 | vars = {
100 | pipeline_name = "deletion-pipeline-${lower(replace(replace(var.pipeline_version, ".", "-"), "_", "-"))}"
101 | start_time = var.deletion_start_time
102 | autoscaling_algorithm = var.deletion_autoscaling_algorithm
103 | machine_type = var.deletion_machine_type
104 | pipeline_version = var.pipeline_version
105 | service_account = google_service_account.dataflow.email
106 | window = var.deletion_window
107 | worker_count = var.deletion_worker_count
108 | max_worker_count = var.deletion_max_worker_count
109 | }
110 | }
111 |
112 | resource "google_cloud_scheduler_job" "deletion" {
113 | project = var.project
114 | name = "deletion-pipeline"
115 | region = var.region
116 |
117 | # see comment in the ingestion job definition for info about this magic value
118 | schedule = (var.enable_pipelines ? var.deletion_schedule : "0 12 15 12 *")
119 | time_zone = "Etc/UTC"
120 |
121 | http_target {
122 | oauth_token {
123 | service_account_email = google_service_account.dataflow.email
124 | }
125 |
126 | http_method = "POST"
127 | uri = local.flex_template_launch_endpoint
128 | body = base64encode(data.template_file.deletion.rendered)
129 | }
130 |
131 | depends_on = [
132 | google_project_service.scheduler
133 | ]
134 | }
135 |
--------------------------------------------------------------------------------
/terraform/variables.tf:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2020, Google LLC.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | ### Required
18 |
19 | variable "project" {
20 | type = string
21 | description = "The ID of the Google Cloud project created previously. Required."
22 | }
23 |
24 | variable "project_owners_list" {
25 | type = list(string)
26 | description = "The list of fully-qualified owners (user:, group:, serviceAccount:) of the project"
27 | }
28 |
29 | variable "pipeline_version" {
30 | type = string
31 | description = "Dataflow tuning parameter: the version of the pipeline code. Required."
32 |
33 | }
34 |
35 | variable "dev_project" {
36 | type = bool
37 | description = "Should this project be configured for development? Defaults to false."
38 | default = false
39 | }
40 |
41 | # should only be set if dev_project is false
42 | variable "facilitator_manifest_url" {
43 | type = string
44 | description = "The facilitator manifest URL"
45 | default = ""
46 | }
47 |
48 | # should only be set if dev_project is false
49 | variable "pha_manifest_url" {
50 | type = string
51 | description = "The PHA manifest URL"
52 | default = ""
53 | }
54 |
55 | ### Suggested
56 |
57 | variable "region" {
58 | type = string
59 | description = "The Google Cloud region in which to create regional resources. Defaults to us-central1."
60 | default = "us-central1"
61 | }
62 |
63 | variable "locality" {
64 | type = string
65 | description = "The locality string of the ENPA installation. Optional to override locality setting."
66 | default = ""
67 | }
68 |
69 | variable "enable_device_attestation" {
70 | type = bool
71 | description = "Dataflow tuning parameter: whether to enable device attestation. Defaults to true."
72 | default = true
73 | }
74 |
75 | variable "ingestion_schedule" {
76 | type = string
77 | description = "a string describing the schedule for ingestion jobs, in cron format. Default: '30 * * * *' (30 minutes past each hour)"
78 | default = "30 * * * *"
79 | }
80 |
81 | variable "ingestion_window" {
82 | type = number
83 | description = "Dataflow tuning parameter: the length (in seconds) of the window that the ingestion pipeline will use to look for new records. Defaults to one hour (3600 seconds)."
84 | default = 3600
85 | }
86 |
87 | variable "deletion_schedule" {
88 | type = string
89 | description = "a string describing the schedule for deletion jobs, in cron format. Default: '0 6,18 * * *' (0600 and 1800, UTC)"
90 | default = "0 6,18 * * *"
91 | }
92 |
93 | variable "deletion_window" {
94 | type = number
95 | description = "Dataflow tuning parameter: the length (in seconds) of the window that the deletion pipeline will use to look for records. Defaults to twelve hours (43200 seconds)."
96 | default = 43200
97 | }
98 |
99 | ### Pipeline Tuning
100 |
101 | variable "batch_size" {
102 | type = number
103 | description = "Dataflow tuning parameter: the number of records per batch. Defaults to 100,000."
104 | default = 100000
105 | }
106 | variable "ingestion_start_time" {
107 |
108 | type = number
109 | description = "Start time in UTC seconds of documents to process for the ingestion pipeline. Defaults to 0 (not set)"
110 | default = 0
111 | }
112 |
113 | variable "ingestion_machine_type" {
114 | type = string
115 | description = "Dataflow tuning parameter: the type of machine to use for the ingestion pipeline. Defaults to n1-standard-4."
116 | default = "n1-standard-4"
117 | }
118 |
119 | variable "ingestion_worker_count" {
120 | type = number
121 | description = "Dataflow tuning parameter: the number of workers used by the ingestion pipeline. Defaults to 10."
122 | default = 10
123 | }
124 |
125 | variable "ingestion_max_worker_count" {
126 | type = number
127 | description = "Dataflow tuning parameter: the number of maximum workers used by the ingestion pipeline. Defaults to 15."
128 | default = 15
129 | }
130 |
131 | variable "ingestion_autoscaling_algorithm" {
132 | type = string
133 | description = "Dataflow tuning parameter: the autoscaling algorithm used by the ingestion pipeline. Can be either THROUGHPUT_BASED or NONE. Defaults to NOT SET."
134 | default = ""
135 | }
136 |
137 | variable "package_signature_digest" {
138 | type = string
139 | description = "Android package signature digest to use during certificate checking. Defaults to NOT SET"
140 | default = ""
141 | }
142 |
143 | variable "package_name" {
144 | type = string
145 | description = "Android package name to use during certificate checking. Defaults to NOT SET"
146 | default = ""
147 | }
148 |
149 | variable "deletion_start_time" {
150 |
151 | type = number
152 | description = "Start time in UTC seconds of documents to process for the deletion pipeline. Defaults to 0 (not set)"
153 | default = 0
154 | }
155 |
156 | variable "deletion_machine_type" {
157 | type = string
158 | description = "Dataflow tuning parameter: the type of machine to use for the ingestion pipeline. Defaults to n1-standard-2."
159 | default = "n1-standard-2"
160 | }
161 |
162 | variable "deletion_worker_count" {
163 | type = number
164 | description = "Dataflow tuning parameter: the number of workers used by the deletion pipeline. Defaults to 10."
165 | default = 10
166 | }
167 |
168 | variable "deletion_max_worker_count" {
169 | type = number
170 | description = "Dataflow tuning parameter: the number of maximum workers used by the deletion pipeline. Defaults to 20."
171 | default = 20
172 | }
173 |
174 | variable "deletion_autoscaling_algorithm" {
175 | type = string
176 | description = "Dataflow tuning parameter: the autoscaling algorithm used by the deletion pipeline. Can be either THROUGHPUT_BASED or NONE. Defaults to NOT SET."
177 | default = ""
178 | }
179 |
180 | ### Internals
181 |
182 | variable "enable_pipelines" {
183 | type = bool
184 | description = "Whether to enable the scheduling of dataflow pipelines. Defaults to true."
185 | default = true
186 | }
187 |
188 | variable "manifest_bucket" {
189 | type = string
190 | description = "The bucket in which to store the generated manifest. Defaults to 'prio-manifests'."
191 | default = "prio-manifests"
192 | }
193 |
194 | variable "templates_bucket" {
195 | type = string
196 | description = "The bucket in which templates are fetched from. Defaults to 'enpa-infra'."
197 | default = "enpa-infra"
198 | }
199 |
200 | variable "terraform_svc_account_email" {
201 | type = string
202 | description = "The email address of the Terraform Runner service account"
203 | }
--------------------------------------------------------------------------------