├── .github ├── header-checker-lint.yml └── workflows │ ├── ci.yaml │ └── terraform.yml ├── .gitignore ├── .gitmodules ├── .mvn └── wrapper │ ├── MavenWrapperDownloader.java │ └── maven-wrapper.properties ├── LICENSE ├── README.md ├── SECURITY.md ├── attestation └── pom.xml ├── ci ├── FirestoreTesting.Dockerfile ├── build.sh ├── common.sh ├── dependencies.sh ├── firebase.sh ├── generate-templates.sh └── integration.sh ├── cloudbuild.yaml ├── config └── firebase │ ├── README.md │ ├── firebase.json │ ├── firestore.rules │ ├── package.json │ └── rules.test.js ├── container-build.yaml ├── df-flex-template-base-image ├── Dockerfile └── java_template_launcher ├── docs ├── code-of-conduct.md └── contributing.md ├── logging.properties ├── model └── pom.xml ├── mvn-settings.xml ├── mvnw ├── mvnw.cmd ├── pipeline └── pom.xml ├── pom.xml ├── src ├── main │ └── java │ │ └── com │ │ └── google │ │ └── exposurenotification │ │ └── privateanalytics │ │ └── ingestion │ │ ├── attestation │ │ └── AbstractDeviceAttestation.java │ │ ├── model │ │ └── DataShare.java │ │ └── pipeline │ │ ├── AWSFederatedAuthHelper.java │ │ ├── BatchWriterFn.java │ │ ├── DataProcessorManifest.java │ │ ├── DateFilterFn.java │ │ ├── DeletionPipeline.java │ │ ├── FirestoreConnector.java │ │ ├── IngestionPipeline.java │ │ ├── IngestionPipelineOptions.java │ │ └── PrioSerializationHelper.java ├── proto │ └── analytics.proto └── test │ ├── java │ └── com │ │ └── google │ │ └── exposurenotification │ │ └── privateanalytics │ │ └── ingestion │ │ ├── model │ │ └── DataShareTest.java │ │ └── pipeline │ │ ├── DataProcessorManifestTest.java │ │ ├── DateFilterFnTest.java │ │ ├── DeletionPipelineIT.java │ │ ├── FirestoreClientTestUtils.java │ │ ├── IngestionPipelineIT.java │ │ ├── IngestionPipelineOptionsTest.java │ │ ├── IngestionPipelineTest.java │ │ ├── PrioSerializationHelperTest.java │ │ └── TestAttestation.java │ └── resources │ └── com │ └── google │ └── exposurenotification │ └── privateanalytics │ └── ingestion │ └── pipeline │ └── test-manifest.json ├── templates ├── dataflow-deletion-metadata-template.json ├── dataflow-flex-template.json ├── dataflow-ingestion-metadata-template.json ├── scheduler-deletion-template.tmpl └── scheduler-ingestion-template.tmpl └── terraform ├── README.md ├── dataflow.tf ├── firestore.tf ├── gcr.tf ├── iam.tf ├── main.tf ├── scheduler.tf └── variables.tf /.github/header-checker-lint.yml: -------------------------------------------------------------------------------- 1 | # Checks if license header exists and if the year is correct 2 | # for files modified by a pull request 3 | # For more information: 4 | # https://github.com/googleapis/repo-automation-bots/tree/master/packages/header-checker-lint#header-checker-lint 5 | allowedCopyrightHolders: ["Google LLC"] 6 | allowedLicenses: ["Apache-2.0"] 7 | sourceFileExtensions: ["java"] -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | on: 17 | push: 18 | branches: 19 | - main 20 | pull_request: 21 | name: ci 22 | jobs: 23 | units: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v2 27 | with: 28 | submodules: 'true' 29 | - uses: actions/setup-java@v2 30 | with: 31 | java-version: 11 32 | distribution: 'adopt' 33 | cache: maven 34 | - run: java -version 35 | - run: ci/build.sh 36 | env: 37 | JOB_TYPE: test 38 | dependencies: 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v2 42 | with: 43 | submodules: 'true' 44 | - uses: actions/setup-java@v2 45 | with: 46 | java-version: 11 47 | distribution: 'adopt' 48 | cache: maven 49 | - run: java -version 50 | - run: ci/dependencies.sh 51 | lint: 52 | runs-on: ubuntu-latest 53 | steps: 54 | - uses: actions/checkout@v2 55 | with: 56 | submodules: 'true' 57 | - uses: actions/setup-java@v2 58 | with: 59 | java-version: 11 60 | distribution: 'adopt' 61 | cache: maven 62 | - run: java -version 63 | - run: ci/build.sh 64 | env: 65 | JOB_TYPE: lint -------------------------------------------------------------------------------- /.github/workflows/terraform.yml: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: terraform 17 | 18 | on: 19 | pull_request: 20 | paths: 21 | - 'terraform/**' 22 | - '.github/workflows/terraform*' 23 | 24 | jobs: 25 | lint: 26 | name: 'lint' 27 | runs-on: 'ubuntu-latest' 28 | 29 | steps: 30 | - name: 'checkout' 31 | uses: 'actions/checkout@v2' 32 | 33 | - name: 'setup-terraform' 34 | uses: 'hashicorp/setup-terraform@v1' 35 | with: 36 | terraform_version: '0.15.0' 37 | 38 | - name: 'init' 39 | working-directory: './terraform' 40 | run: 'terraform init' 41 | 42 | - name: 'validate' 43 | working-directory: './terraform' 44 | run: 'terraform validate' 45 | 46 | - name: 'fmt' 47 | working-directory: './terraform' 48 | run: 'terraform fmt -diff -check' 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | credentials/ 3 | target/ 4 | **/.flattened-pom.xml 5 | */pom.xml.tag 6 | **/pom.xml.releaseBackup 7 | **/pom.xml.versionsBackup 8 | **/pom.xml.next 9 | **/release.properties 10 | **/dependency-reduced-pom.xml 11 | **/buildNumber.properties 12 | **/.mvn/timing.properties 13 | **/.mvn/wrapper/maven-wrapper.jar 14 | **/.idea/ 15 | **/node_modules/ 16 | **/**enpa-ingestion*.iml 17 | **/enpa-ingestion.ipr 18 | **/enpa-ingestion.iws 19 | **/**firestore-debug.log 20 | **/**ui-debug.log 21 | **/package-lock.json 22 | 23 | # Visual Code Studio 24 | .classpath 25 | .factorypath 26 | .project 27 | .settings/ 28 | .vscode/ 29 | 30 | # terraform 31 | terraform/.terraform/tmp 32 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/prio-server"] 2 | path = third_party/prio-server 3 | url = https://github.com/abetterinternet/prio-server 4 | -------------------------------------------------------------------------------- /.mvn/wrapper/MavenWrapperDownloader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-present the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import java.net.*; 17 | import java.io.*; 18 | import java.nio.channels.*; 19 | import java.util.Properties; 20 | 21 | public class MavenWrapperDownloader { 22 | 23 | private static final String WRAPPER_VERSION = "0.5.6"; 24 | /** 25 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. 26 | */ 27 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" 28 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; 29 | 30 | /** 31 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to 32 | * use instead of the default one. 33 | */ 34 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH = 35 | ".mvn/wrapper/maven-wrapper.properties"; 36 | 37 | /** 38 | * Path where the maven-wrapper.jar will be saved to. 39 | */ 40 | private static final String MAVEN_WRAPPER_JAR_PATH = 41 | ".mvn/wrapper/maven-wrapper.jar"; 42 | 43 | /** 44 | * Name of the property which should be used to override the default download url for the wrapper. 45 | */ 46 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; 47 | 48 | public static void main(String args[]) { 49 | System.out.println("- Downloader started"); 50 | File baseDirectory = new File(args[0]); 51 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); 52 | 53 | // If the maven-wrapper.properties exists, read it and check if it contains a custom 54 | // wrapperUrl parameter. 55 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); 56 | String url = DEFAULT_DOWNLOAD_URL; 57 | if(mavenWrapperPropertyFile.exists()) { 58 | FileInputStream mavenWrapperPropertyFileInputStream = null; 59 | try { 60 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); 61 | Properties mavenWrapperProperties = new Properties(); 62 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); 63 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); 64 | } catch (IOException e) { 65 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); 66 | } finally { 67 | try { 68 | if(mavenWrapperPropertyFileInputStream != null) { 69 | mavenWrapperPropertyFileInputStream.close(); 70 | } 71 | } catch (IOException e) { 72 | // Ignore ... 73 | } 74 | } 75 | } 76 | System.out.println("- Downloading from: " + url); 77 | 78 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); 79 | if(!outputFile.getParentFile().exists()) { 80 | if(!outputFile.getParentFile().mkdirs()) { 81 | System.out.println( 82 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); 83 | } 84 | } 85 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); 86 | try { 87 | downloadFileFromURL(url, outputFile); 88 | System.out.println("Done"); 89 | System.exit(0); 90 | } catch (Throwable e) { 91 | System.out.println("- Error downloading"); 92 | e.printStackTrace(); 93 | System.exit(1); 94 | } 95 | } 96 | 97 | private static void downloadFileFromURL(String urlString, File destination) throws Exception { 98 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { 99 | String username = System.getenv("MVNW_USERNAME"); 100 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); 101 | Authenticator.setDefault(new Authenticator() { 102 | @Override 103 | protected PasswordAuthentication getPasswordAuthentication() { 104 | return new PasswordAuthentication(username, password); 105 | } 106 | }); 107 | } 108 | URL website = new URL(urlString); 109 | ReadableByteChannel rbc; 110 | rbc = Channels.newChannel(website.openStream()); 111 | FileOutputStream fos = new FileOutputStream(destination); 112 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); 113 | fos.close(); 114 | rbc.close(); 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip 2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![ci](https://github.com/google/exposure-notifications-private-analytics-ingestion/actions/workflows/ci.yaml/badge.svg)](https://github.com/google/exposure-notifications-private-analytics-ingestion/actions/workflows/ci.yaml) 2 | 3 | # Exposure Notification Private Analytics Ingestion 4 | 5 | This repository contains implementations for [Apache Beam](https://beam.apache.org/) 6 | batch pipelines to process private data shares stored in Firestore according 7 | to the Exposure Notification Private Analytics protocol. It assumes private data 8 | shares uploaded as Firestore documents, as done in the 9 | [Exposure Notification Express template app](https://github.com/google/exposure-notifications-android/blob/4b7b461282b2ede6fb2a93488c6d628440052c8d/app/src/main/java/com/google/android/apps/exposurenotification/privateanalytics/PrivateAnalyticsFirestoreRepository.java#L42). 10 | These documents contain encrypted packets using the [Prio](https://crypto.stanford.edu/prio/) 11 | protocol. The pipeline implementation converts them into the format that 12 | downstream Prio data processing servers expect, defined in the Avro schema 13 | [here](https://github.com/abetterinternet/prio-server/tree/master/avro-schema). 14 | 15 | This implementation make use of Firestore as a scalable NoSQL db for subsequent 16 | batching and aggregation. Alternative implementations might operate a custom 17 | backend endpoint to accumulate the packets, or use a pubsub mechanism. Since the 18 | packets are encrypted on device, the channel over which the packets travel need 19 | not be trusted. 20 | 21 | This repository also contains the Firebase configuration to secure Firestore 22 | with [security rules](config/firebase/firestore.rules) as well as 23 | [Terraform scripts](terraform/main.tf) to bring up required infrastructure. 24 | 25 | ## Setup 26 | 27 | ### Multiple Maven modules 28 | 29 | The project is structured into multiple maven modules to allow incorporation of 30 | outside implementations of attestation. Implementations need only depend on the 31 | DataShare model module, and a profile can be added to get it included in the 32 | pipeline module build. The pipeline pulls available implementations dynamically. 33 | 34 | Since there aren't too many individual classes that make up each module, and 35 | since they are only meant to be packaged and executed together, we use a single 36 | source tree for all modules. 37 | 38 | Follow the 39 | [Getting started with Google Cloud Dataflow](https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/dataflow/README.md) 40 | page. You will need the following: 41 | 42 | 1. Set up a 43 | [Google Cloud project](https://console.cloud.google.com/projectcreate) or use an existing one. 44 | Then [import the Google Cloud project into Firebase](https://cloud.google.com/firestore/docs/client/get-firebase). 45 | 46 | 1. [Enable APIs](https://console.cloud.google.com/flows/enableapi?apiid=containerregistry.googleapis.com,cloudbuild.googleapis.com): 47 | Container Registry, Cloud Build, Cloud Datastore and Dataflow. 48 | 49 | 1. [Create an asymmetric signing key](https://cloud.google.com/kms/docs/creating-asymmetric-keys#create_an_asymmetric_signing_key) 50 | 51 | 1. [Create a storage bucket](https://cloud.google.com/storage/docs/creating-buckets) 52 | for your outputs. 53 | 54 | 1. Create a service account with permissions for [Firestore](https://cloud.google.com/datastore/docs/access/iam#iam_roles), 55 | [reading the KMS key](https://cloud.google.com/kms/docs/reference/permissions-and-roles), 56 | [Dataflow](https://cloud.google.com/dataflow/docs/concepts/access-control#roles), 57 | and [Cloud Storage](https://cloud.google.com/storage/docs/access-control/iam). 58 | 59 | 1. Download the a key for your service account and store as `credentials.json`. 60 | Keep those credentials save! 61 | 62 | ### Clone Submodules 63 | A submodule is need to access the avro definitions of the Prio classes. 64 | 65 | ```sh 66 | git submodule update --init 67 | ``` 68 | 69 | ### Useful Environment Variables 70 | 71 | Setting the following environment variables can be handy when working in the 72 | project. Replace values in `[...]`. 73 | 74 | ```sh 75 | export PROJECT="[my-google-cloud-ingestion-project-id]" 76 | export GOOGLE_APPLICATION_CREDENTIALS="credentials.json" 77 | export TEMPLATES="gs://[my-cloud-storage-bucket]/templates" 78 | export PHA_OUTPUT="gs://[my-cloud-storage-bucket]/output/pha" 79 | export FACILITATOR_OUTPUT="gs://[my-cloud-storage-bucket]/output/faciliator" 80 | export KEY_RESOURCE_NAME="projects/[some-ingestion-project]/locations/global/keyRings/[some-signature-key-ring]/cryptoKeys/[some-signature-key]/cryptoKeyVersions/1" 81 | ``` 82 | 83 | ## Testing 84 | 85 | ### Unit Tests 86 | 87 | To run unit tests: 88 | 89 | ```shell script 90 | ./mvnw test 91 | ``` 92 | 93 | ### Integration Tests 94 | 95 | Integration tests go against an actual test project and so need an environment 96 | variable: 97 | 98 | ```shell script 99 | ./mvnw verify 100 | ``` 101 | 102 | ## Running the Pipeline 103 | 104 | There are two pipelines. One reads Prio data shares from Firestore and 105 | generates the outputs which the PHA and Facilitator data share processors will 106 | consume. The other deletes expired or already processed data shares from 107 | Firestore. 108 | 109 | They both take as options the window of time to cover, in the form of a start 110 | time and duration. When not supplied, start time for the ingestion pipeline is 111 | calculated based on current time rounding back to previous window of length 112 | `duration`. For the deletion pipeline, it goes back two windows to ensure a 113 | safety margin of not deleting unprocessed data shares. 114 | 115 | ### Locally 116 | 117 | To run the ingestion pipeline locally: 118 | 119 | ```sh 120 | 121 | ./mvnw compile exec:java \ 122 | -Djava.util.logging.config.file=logging.properties \ 123 | -Dexec.mainClass=com.google.exposurenotification.privateanalytics.ingestion.pipeline.IngestionPipeline \ 124 | -Dexec.args="--keyResourceName=$KEY_RESOURCE_NAME --phaOutput=$PHA_OUTPUT --facilitatorOutput=$FACILITATOR_OUTPUT" 125 | ``` 126 | 127 | To run the deletion pipeline: 128 | 129 | ```sh 130 | ./mvnw compile exec:java \ 131 | -Djava.util.logging.config.file=logging.properties \ 132 | -Dexec.mainClass=com.google.exposurenotification.privateanalytics.ingestion.pipeline.DeletionPipeline \ 133 | -Dexec.args="--project=$PROJECT" 134 | ``` 135 | 136 | ### In Google Cloud Dataflow 137 | 138 | #### From local build 139 | 140 | ```sh 141 | export SERVICE_ACCOUNT_EMAIL=$(egrep -o '[^"]+@[^"]+\.iam\.gserviceaccount\.com' $GOOGLE_APPLICATION_CREDENTIALS) 142 | 143 | export BEAM_ARGS=( 144 | "--keyResourceName=$KEY_RESOURCE_NAME" 145 | "--phaOutput=$PHA_OUTPUT" 146 | "--facilitatorOutput=$FACILITATOR_OUTPUT" 147 | "--runner=DataflowRunner" 148 | "--region=us-central1" 149 | "--serviceAccount=$SERVICE_ACCOUNT_EMAIL" 150 | ) 151 | ./mvnw compile exec:java \ 152 | -Dexec.mainClass=com.google.exposurenotification.privateanalytics.ingestion.pipeline.IngestionPipeline \ 153 | -Dexec.args="$BEAM_ARGS" 154 | ``` 155 | 156 | #### From Flex Template 157 | 158 | See [below](#creating-a-flex-template) on how to generate the flex template. 159 | 160 | ```sh 161 | export SERVICE_ACCOUNT_EMAIL=$(egrep -o '[^"]+@[^"]+\.iam\.gserviceaccount\.com' $GOOGLE_APPLICATION_CREDENTIALS) 162 | 163 | gcloud dataflow flex-template run "ingestion-pipeline-$USER-`date +%Y%m%d-%H%M%S`" \ 164 | --template-file-gcs-location "$TEMPLATE_PATH" \ 165 | --parameters project="$PROJECT" \ 166 | --parameters keyResourceName="$KEY_RESOURCE_NAME" \ 167 | --parameters phaOutput="$PHA_OUTPUT" \ 168 | --parameters facilitatorOutput="$FACILITATOR_OUTPUT" \ 169 | --service-account-email "$SERVICE_ACCOUNT_EMAIL" \ 170 | --region "us-central1" 171 | ``` 172 | 173 | ## Building 174 | 175 | We generate a [templated dataflow job](https://cloud.google.com/dataflow/docs/guides/templates/overview#templated-dataflow-jobs) 176 | that takes all pipeline options as runtime parameters. 177 | 178 | ### Building a Flex Template and Launch Container 179 | 180 | To build the launch container we added profiles for the ingestion and deletion pipeline. 181 | 182 | To build the ingestion pipeline launch container with setting a git derived version: 183 | 184 | ```sh 185 | ./mvnw -Pingestion-container-build -Dcontainer-version=$(git describe --tags --always --dirty=-dirty) \ 186 | -Dcontainer_registry_tag_prefix='gcr.io/[YOUR_CLOUD_PROJECT]' package 187 | ``` 188 | 189 | To build the ingestion pipeline with a custom attestation implementation, 190 | include the additional `attestation` profile, which assumes the package is 191 | available in any of your configured maven repositories 192 | (in .m2/settings.xml or local mvn-settings.xml): 193 | 194 | ```sh 195 | ./mvnw -Pingestion-container-build,attestation -Dcontainer-version=$(git describe --tags --always --dirty=-dirty) \ 196 | -Dcontainer_registry_tag_prefix='gcr.io/[YOUR_CLOUD_PROJECT]' package 197 | ``` 198 | 199 | To build the deletion pipeline launch container with the setting a git derived version: 200 | 201 | ```sh 202 | ./mvnw -Pdeletion-container-build -Dcontainer-version=$(git describe --tags --always --dirty=-dirty) \ 203 | -Dcontainer_registry_tag_prefix='gcr.io/[YOUR_CLOUD_PROJECT]' package 204 | ``` 205 | 206 | Built containers get automatically published to the `container_registry_tag_prefix` you set. E.g. for Google container 207 | registry: `gcr.io/[YOUR_CLOUD_PROJECT]/ingestion-pipeline:$VERSION` and `gcr.io/[YOUR_CLOUD_PROJECT]/deletion-pipeline:$VERSION` 208 | respectively. 209 | 210 | To generate the Flex Template Metadata files and upload them to GCS run: 211 | 212 | *The following commands require nodejs json `npm install -g json`* 213 | Use the same `container_registry_tag_prefix` as in the builds above. 214 | 215 | ```sh 216 | export VERSION=$(git describe --tags --always --dirty=-dirty) 217 | 218 | json -f templates/dataflow-flex-template.json \ 219 | -e "this.metadata=`cat templates/dataflow-ingestion-metadata-template.json`" \ 220 | -e "this.image='gcr.io/[YOUR_CLOUD_PROJECT]/ingestion-pipeline:$VERSION'" > ingestion-pipeline-$VERSION.json 221 | 222 | json -f templates/dataflow-flex-template.json \ 223 | -e "this.metadata=`cat templates/dataflow-deletion-metadata-template.json`" \ 224 | -e "this.image='gcr.io/[YOUR_CLOUD_PROJECT]/deletion-pipeline:$VERSION'" > deletion-pipeline-$VERSION.json 225 | 226 | gsutil cp ingestion-pipeline-$VERSION.json $TEMPLATES 227 | gsutil cp deletion-pipeline-$VERSION.json $TEMPLATES 228 | 229 | gsutil -h "Content-Type:application/json" cp templates/scheduler-ingestion-template.tmpl \ 230 | $TEMPLATES/scheduler-ingestion-template-$VERSION.tmpl 231 | gsutil -h "Content-Type:application/json" cp templates/scheduler-deletion-template.tmpl \ 232 | $TEMPLATES/scheduler-deletion-template-$VERSION.tmpl 233 | 234 | unset VERSION 235 | ``` 236 | 237 | ## Contributing 238 | 239 | Contributions to this repository are always welcome and highly encouraged. 240 | 241 | See [CONTRIBUTING](docs/contributing.md) for more information on how to get started. 242 | 243 | ## License 244 | 245 | Apache 2.0 - See [LICENSE](LICENSE) for more information. 246 | 247 | *This is not an official Google product* 248 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | To report a suspected vulnerability, please contact 4 | exposure-notifications-feedback@google.com and include the steps to 5 | produce the vulnerability. 6 | -------------------------------------------------------------------------------- /attestation/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 18 | 4.0.0 19 | 20 | com.google.exposurenotification.privateanalytics.ingestion 21 | enpa-ingestion 22 | ${revision} 23 | ../pom.xml 24 | 25 | com.google.exposurenotification.privateanalytics.ingestion 26 | enpa-ingestion-attestation 27 | jar 28 | 29 | 30 | ../src/main/java 31 | ../src/test/java 32 | 33 | 34 | org.apache.maven.plugins 35 | maven-compiler-plugin 36 | ${maven-compiler-plugin.version} 37 | 38 | 39 | 40 | com.google.auto.service 41 | auto-service 42 | ${auto-service.version} 43 | 44 | 45 | 46 | **/attestation/**/*.java 47 | **/generated-sources/** 48 | 49 | 50 | **/attestation/** 51 | **/DataShareTest.java 52 | 53 | 54 | 55 | 56 | org.codehaus.mojo 57 | build-helper-maven-plugin 58 | 3.0.0 59 | 60 | 61 | generate-sources 62 | 63 | add-source 64 | 65 | 66 | 67 | 68 | ../third_party/android-key-attestation/server/src/main 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | com.github.os72 77 | protoc-jar-maven-plugin 78 | 3.11.4 79 | 80 | 81 | generate-sources 82 | 83 | run 84 | 85 | 86 | 3.13.0 87 | 88 | ../src/proto 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | com.google.exposurenotification.privateanalytics.ingestion 100 | enpa-ingestion-model 101 | ${revision} 102 | 103 | 104 | org.apache.beam 105 | beam-sdks-java-core 106 | ${beam.version} 107 | 108 | 109 | org.bouncycastle 110 | bcpkix-jdk15on 111 | 1.61 112 | compile 113 | 114 | 115 | org.apache.beam 116 | beam-runners-core-construction-java 117 | ${beam.version} 118 | 119 | 120 | org.apache.beam 121 | beam-runners-direct-java 122 | ${beam.version} 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /ci/FirestoreTesting.Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM openjdk:17-jdk-alpine3.14 16 | 17 | RUN apk add --no-cache nodejs npm bash 18 | RUN npm i -g firebase-tools 19 | RUN firebase emulators:exec --only firestore "npm -v" 20 | 21 | WORKDIR /workspace 22 | -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eo pipefail 17 | 18 | ## Get the directory of the build script 19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}")) 20 | ## cd to the parent directory, i.e. the root of the git repo 21 | cd ${scriptDir}/.. 22 | 23 | # include common functions 24 | source ${scriptDir}/common.sh 25 | 26 | # Print out Java version 27 | java -version 28 | echo ${JOB_TYPE} 29 | 30 | RETURN_CODE=0 31 | set +e 32 | 33 | git submodule update --init 34 | 35 | case ${JOB_TYPE} in 36 | test) 37 | ./mvnw test 38 | RETURN_CODE=$? 39 | ;; 40 | lint) 41 | ./mvnw com.coveo:fmt-maven-plugin:check 42 | RETURN_CODE=$? 43 | if [[ ${RETURN_CODE} != 0 ]]; then 44 | echo "To fix formatting errors, run: mvn com.coveo:fmt-maven-plugin:format" 45 | fi 46 | ;; 47 | esac 48 | 49 | echo "exiting with ${RETURN_CODE}" 50 | exit ${RETURN_CODE} 51 | -------------------------------------------------------------------------------- /ci/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | function retry_with_backoff { 17 | attempts_left=$1 18 | sleep_seconds=$2 19 | shift 2 20 | command=$@ 21 | 22 | 23 | # store current flag state 24 | flags=$- 25 | 26 | # allow a failures to continue 27 | set +e 28 | echo "${command}" 29 | ${command} 30 | exit_code=$? 31 | 32 | # restore "e" flag 33 | if [[ ${flags} =~ e ]] 34 | then set -e 35 | else set +e 36 | fi 37 | 38 | if [[ $exit_code == 0 ]] 39 | then 40 | return 0 41 | fi 42 | 43 | # failure 44 | if [[ ${attempts_left} > 0 ]] 45 | then 46 | echo "failure (${exit_code}), sleeping ${sleep_seconds}..." 47 | sleep ${sleep_seconds} 48 | new_attempts=$((${attempts_left} - 1)) 49 | new_sleep=$((${sleep_seconds} * 2)) 50 | retry_with_backoff ${new_attempts} ${new_sleep} ${command} 51 | fi 52 | 53 | return $exit_code 54 | } 55 | 56 | ## Helper functionss 57 | function now() { date +"%Y-%m-%d %H:%M:%S" | tr -d '\n'; } 58 | function msg() { println "$*" >&2; } 59 | function println() { printf '%s\n' "$(now) $*"; } -------------------------------------------------------------------------------- /ci/dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eo pipefail 17 | 18 | ## Get the directory of the build script 19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}")) 20 | ## cd to the parent directory, i.e. the root of the git repo 21 | cd ${scriptDir}/.. 22 | 23 | # include common functions 24 | source ${scriptDir}/common.sh 25 | 26 | # Print out Java 27 | java -version 28 | 29 | export MAVEN_OPTS="-Xmx1024m -XX:MaxPermSize=128m" 30 | 31 | # this should run maven enforcer 32 | retry_with_backoff 3 10 \ 33 | ./mvnw install -B -V \ 34 | -DskipTests=true \ 35 | -Dclirr.skip=true 36 | 37 | ./mvnw -B dependency:analyze -------------------------------------------------------------------------------- /ci/firebase.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | ## Run Firebase integration tests 17 | echo "************ Running Firebase integration tests script ************" 18 | ## Get the directory of the build script 19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}")) 20 | ## cd to the parent directory, i.e. the root of the git repo 21 | cd ${scriptDir}/.. 22 | cd config/firebase 23 | 24 | echo "************ Installing npm testing library and jest ************" 25 | npm init -y 26 | npm i @firebase/testing jest 27 | echo "************ Dependencies installed successfully! ************" 28 | 29 | echo "************ Executing rules.test.js ************" 30 | firebase emulators:exec --project=$PROJECT --only firestore "npm run test" 31 | -------------------------------------------------------------------------------- /ci/generate-templates.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eo pipefail 17 | 18 | ## Get the directory of the build script 19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}")) 20 | ## cd to the parent directory, i.e. the root of the git repo 21 | cd ${scriptDir}/.. 22 | 23 | # include common functions 24 | source ${scriptDir}/common.sh 25 | 26 | apk add --update npm 27 | 28 | # Print out versions 29 | gsutil --version 30 | npm install -g json 31 | json --version 32 | 33 | export VERSION=$(git describe --tags --always --dirty=-dirty) 34 | 35 | # Generate Dataflow Flex Templates, version and upload to GCS 36 | json -f templates/dataflow-flex-template.json \ 37 | -e "this.metadata=$(cat templates/dataflow-ingestion-metadata-template.json)" \ 38 | -e "this.image='gcr.io/enpa-infra/ingestion-pipeline:$VERSION'" > ingestion-pipeline-$VERSION.json 39 | 40 | json -f templates/dataflow-flex-template.json \ 41 | -e "this.metadata=$(cat templates/dataflow-deletion-metadata-template.json)" \ 42 | -e "this.image='gcr.io/enpa-infra/deletion-pipeline:$VERSION'" > deletion-pipeline-$VERSION.json 43 | 44 | json -f templates/dataflow-flex-template.json \ 45 | -e "this.metadata=$(cat templates/dataflow-ingestion-metadata-template.json)" \ 46 | -e "this.image='gcr.io/enpa-public-assets/ingestion-pipeline:$VERSION'" > public-ingestion-pipeline-$VERSION.json 47 | 48 | json -f templates/dataflow-flex-template.json \ 49 | -e "this.metadata=$(cat templates/dataflow-deletion-metadata-template.json)" \ 50 | -e "this.image='gcr.io/enpa-public-assets/deletion-pipeline:$VERSION'" > public-deletion-pipeline-$VERSION.json 51 | 52 | gsutil cp ingestion-pipeline-$VERSION.json gs://enpa-pipeline-specs/ 53 | gsutil cp deletion-pipeline-$VERSION.json gs://enpa-pipeline-specs/ 54 | gsutil cp public-ingestion-pipeline-$VERSION.json gs://enpa-pipeline-specs/ 55 | gsutil cp public-deletion-pipeline-$VERSION.json gs://enpa-pipeline-specs/ 56 | 57 | # Version and upload scheduler templates to GCS 58 | gsutil -h "Content-Type:application/json" cp templates/scheduler-ingestion-template.tmpl gs://enpa-pipeline-specs/scheduler-ingestion-template-$VERSION.tmpl 59 | gsutil -h "Content-Type:application/json" cp templates/scheduler-deletion-template.tmpl gs://enpa-pipeline-specs/scheduler-deletion-template-$VERSION.tmpl 60 | 61 | # Version Firestore Security Rules and upload to GCS 62 | gsutil -h "Content-Type:text/plain" cp config/firebase/firestore.rules gs://enpa-infra/security-rules/firestore-$VERSION.rules 63 | -------------------------------------------------------------------------------- /ci/integration.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eo pipefail 17 | 18 | ## Get the directory of the build script 19 | scriptDir=$(realpath $(dirname "${BASH_SOURCE[0]}")) 20 | ## cd to the parent directory, i.e. the root of the git repo 21 | cd ${scriptDir}/.. 22 | 23 | # include common functions 24 | source ${scriptDir}/common.sh 25 | 26 | # Print out Java 27 | java -version 28 | 29 | RETURN_CODE=0 30 | set +e 31 | 32 | git submodule update --init 33 | 34 | retry_with_backoff 3 10 \ 35 | ./mvnw clean verify 36 | 37 | # enable once we can write to the firewalled sonarqube instance 38 | #retry_with_backoff 3 10 \ 39 | # ./mvnw -Pcoverage clean verify sonar:sonar -Dsonar.projectKey=enpa-ingestion -Dsonar.host.url=http://10.128.0.2:9000 -Dsonar.login=$SONAR_LOGIN 40 | -------------------------------------------------------------------------------- /cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | ########################################################### 17 | # Step 1: Retrieve the cached .m2 directory from GCS 18 | ########################################################### 19 | - name: gcr.io/cloud-builders/gsutil 20 | id: fetch-mvn-cache 21 | args: 22 | - '-m' 23 | - 'rsync' 24 | - '-r' 25 | - 'gs://${_BUCKET}/cache/.m2' 26 | - '/cache/.m2' 27 | volumes: 28 | - path: '/cache/.m2' 29 | name: 'm2_cache' 30 | 31 | ########################################################### 32 | # Step 2: Run project integration tests using Maven 33 | ########################################################### 34 | - name: gcr.io/cloud-devrel-public-resources/java11 35 | id: pr-integration-tests 36 | waitFor: 37 | - fetch-mvn-cache 38 | entrypoint: bash 39 | args: 40 | - "-e" 41 | - "./ci/integration.sh" 42 | volumes: 43 | - path: '/cache/.m2' 44 | name: 'm2_cache' 45 | env: 46 | - MAVEN_OPTS=-Dmaven.repo.local=/cache/.m2 47 | - PROJECT=enpa-integration-testing 48 | - KEY_RESOURCE_NAME=projects/enpa-integration-testing/locations/global/keyRings/appa-signature-key/cryptoKeys/appa-signature-key/cryptoKeyVersions/1 49 | 50 | ########################################################### 51 | # Step 3: Run Firebase integration tests using npm 52 | ########################################################### 53 | - name: gcr.io/enpa-public-assets/firestore-tesing:v1 54 | id: firebase-integration-tests 55 | waitFor: 56 | - '-' 57 | entrypoint: bash 58 | args: 59 | - "-e" 60 | - "./ci/firebase.sh" 61 | env: 62 | - PROJECT=enpa-integration-testing 63 | 64 | ########################################################### 65 | # Step 4: Update cached .m2 directory on GCS with any 66 | # additional dependencies downloaded during the 67 | # build. 68 | ########################################################### 69 | - name: gcr.io/cloud-builders/gsutil 70 | id: sync-mvn-cache 71 | waitFor: 72 | - pr-integration-tests 73 | args: 74 | - '-m' 75 | - 'rsync' 76 | - '-r' 77 | - '/cache/.m2' 78 | - 'gs://${_BUCKET}/cache/.m2/' 79 | volumes: 80 | - path: '/cache/.m2' 81 | name: 'm2_cache' 82 | 83 | serviceAccount: 'projects/enpa-infra/serviceAccounts/development-integration-tests@enpa-integration-testing.iam.gserviceaccount.com' 84 | 85 | substitutions: 86 | # Default value 87 | _BUCKET: 'enpa-infra' 88 | 89 | options: 90 | # Use higher CPU machines so the caching and build steps are faster. 91 | machineType: 'N1_HIGHCPU_32' -------------------------------------------------------------------------------- /config/firebase/README.md: -------------------------------------------------------------------------------- 1 | # Firebase Config 2 | 3 | Install the [Firebase CLI](https://firebase.google.com/docs/cli). 4 | 5 | ## Firestore Security Rules 6 | 7 | This provides a sample way to configure Firestore documents for 8 | Exposure Notifications Private Analytics. 9 | 10 | ### Testing 11 | 12 | First install the emulator, npm testing library and jest: 13 | 14 | ```shell script 15 | npm install -g firebase-tools 16 | firebase setup:emulators:firestore 17 | npm init -y 18 | npm i @firebase/testing 19 | npm i jest 20 | ``` 21 | 22 | Then start the emulator and execute the test script: 23 | 24 | ```shell script 25 | firebase emulators:exec --only firestore "npm run test" 26 | ``` 27 | 28 | ### Deploying 29 | 30 | Login firebase as 31 | follows: 32 | 33 | ```shell script 34 | firebase login 35 | ``` 36 | 37 | You can update your projects Firestore Security Policy with these rules as 38 | follows: 39 | 40 | ```shell script 41 | firebase deploy --only firestore:rules 42 | ``` 43 | -------------------------------------------------------------------------------- /config/firebase/firebase.json: -------------------------------------------------------------------------------- 1 | { 2 | "firestore": { 3 | "rules": "firestore.rules" 4 | }, 5 | "emulators": { 6 | "firestore": { 7 | "host": "localhost", 8 | "port": "8080" 9 | }, 10 | "ui": { 11 | "enabled": true, 12 | "host": "localhost", 13 | "port": 4000 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /config/firebase/firestore.rules: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | rules_version = '2'; 15 | service cloud.firestore { 16 | match /databases/{database}/documents { 17 | 18 | // Limited rules execution environment and functions are constrained: 19 | // https://firebase.google.com/docs/rules/rules-language#function 20 | // https://firebase.google.com/docs/reference/rules/index-all 21 | // https://github.com/google/cel-spec/blob/master/doc/langdef.md 22 | function quickPad(datePart) { 23 | return datePart.size() == 1 ? "0" + datePart : datePart; 24 | } 25 | 26 | // Assemble YYYY-MM-DD-HH from timestamp 27 | function buildDatePath(t) { 28 | return string(t.year()) + '-' 29 | + quickPad(string(t.month())) + '-' 30 | + quickPad(string(t.day())) + '-' 31 | + quickPad(string(t.hours())) 32 | } 33 | 34 | // Check date path against timestamp, allowing for clock skew 35 | function checkDatePath(datePath, t) { 36 | return datePath == buildDatePath(t + duration.value(1, 'h')) 37 | || datePath == buildDatePath(t) 38 | || datePath == buildDatePath(t - duration.value(1, 'h')); 39 | } 40 | 41 | // There are no filter() or map() functions available to us, and recursion 42 | // is not allowed, so we have to unroll the check of the list manually. 43 | // https://groups.google.com/g/google-cloud-firestore-discuss/c/S9BqqUcR4Lc/m/4CRjqfMyBwAJ 44 | function checkCertLengths(chain) { 45 | return checkCertLength(chain, 1) 46 | && checkCertLength(chain, 2) 47 | && checkCertLength(chain, 3) 48 | && checkCertLength(chain, 4) 49 | && checkCertLength(chain, 5) 50 | && checkCertLength(chain, 6) 51 | && checkCertLength(chain, 7) 52 | && checkCertLength(chain, 8) 53 | && checkCertLength(chain, 9) 54 | && checkCertLength(chain, 10); 55 | } 56 | 57 | // CEL implementations aren't guaranteed to do short circuit evaluation of 58 | // logical operators, so we use conditional operators here to avoid out of 59 | // bounds exceptions/NPEs. 60 | // https://github.com/google/cel-spec/blob/master/doc/langdef.md#logical-operators 61 | // Fun times. 62 | function checkCertLength(chain, i) { 63 | return chain.size() > i ? chain[i].size() < 30000 : true; 64 | } 65 | function safeCheckSize(s, len) { 66 | return s != null ? s.size() < len : true; 67 | } 68 | 69 | // Check schema of uploaded document 70 | function checkFields(d) { 71 | return 72 | // Check top level required fields 73 | d.payload != null 74 | && d.signature != null 75 | && d.certificateChain != null 76 | // nothing extraneous at top level 77 | && d.keys().toSet().hasOnly(['certificateChain', 'signature', 'payload']) 78 | // Check `payload` required fields 79 | // `uuid`, `created` already enforced elsewhere 80 | && d.payload.schemaVersion != null 81 | && d.payload.encryptedDataShares != null 82 | // TODO: support arbitrary number of servers here 83 | && d.payload.encryptedDataShares.size() == 2 84 | && d.payload.encryptedDataShares[0].payload != null 85 | && d.payload.encryptedDataShares[1].payload != null 86 | // check sizes 87 | && d.signature.size() < 200 88 | && d.certificateChain.size() >= 3 89 | && d.certificateChain.size() <= 10 90 | && checkCertLengths(d.certificateChain) 91 | && d.payload.uuid.size() < 50 92 | && safeCheckSize(d.payload.encryptedDataShares[0].encryptionKeyId, 100) 93 | && safeCheckSize(d.payload.encryptedDataShares[1].encryptionKeyId, 100) 94 | && d.payload.encryptedDataShares[0].payload.size() < 100000 95 | && d.payload.encryptedDataShares[1].payload.size() < 100000 96 | // nothing extraneous at payload level 97 | && d.payload.keys().toSet().hasOnly([ 98 | 'uuid', 'created', 'schemaVersion', 'encryptedDataShares', 'prioParams' 99 | ]) 100 | // share per server 101 | && d.payload.encryptedDataShares.size() == d.payload.prioParams.numberServers 102 | // Check prioParams required fields 103 | && d.payload.prioParams != null 104 | && d.payload.prioParams.bins != null 105 | && d.payload.prioParams.epsilon != null 106 | && d.payload.prioParams.epsilon > 1 107 | && d.payload.prioParams.numberServers != null 108 | && d.payload.prioParams.prime != null 109 | // nothing extraneous at prioParams level 110 | && d.payload.prioParams.keys().toSet().hasOnly([ 111 | 'bins', 'epsilon', 'hammingWeight', 'numberServers', 'prime' 112 | ]); 113 | } 114 | 115 | // Check metric name 116 | function checkMetricName(n) { 117 | return n in [ 118 | // Metric for testing 119 | 'fakeMetric-v1', 120 | // Currently supported metrics: 121 | // https://github.com/google/exposure-notifications-android/tree/master/app/src/main/java/com/google/android/apps/exposurenotification/privateanalytics/metrics 122 | 'CodeVerified-v1', 123 | 'CodeVerifiedWithReportType14d-v1', 124 | 'CodeVerifiedWithReportType14d-v2', 125 | 'DateExposure-v1', 126 | 'DateExposure-v2', 127 | 'DateExposure14d-v3', 128 | 'histogramMetric-v1', 129 | 'histogramMetric-v2', 130 | 'KeysUploaded-v1', 131 | 'KeysUploadedVaccineStatus-v1', 132 | 'KeysUploadedVaccineStatus-v2', 133 | 'KeysUploadedVaccineStatus14d-v3', 134 | 'KeysUploadedWithReportType14d-v1', 135 | 'KeysUploadedWithReportType14d-v2', 136 | 'PeriodicExposureNotification-v1', 137 | 'PeriodicExposureNotification14d-v1', 138 | 'PeriodicExposureNotification14d-v2', 139 | 'PeriodicExposureNotification14d-v3', 140 | 'PeriodicExposureNotificationInteraction-v1', 141 | 'SecondaryAttack14d-v1', 142 | 'SecondaryAttack14d-v2' 143 | ]; 144 | } 145 | 146 | match /{top}/{uuid}/{date}/{metricName} { 147 | allow create: if request.resource.data.payload.uuid == uuid 148 | && top.matches('uuid[0-9]*') 149 | && request.resource.data.payload.created == request.time 150 | // Don't ingest anything with auth tokens attached 151 | && request.auth == null 152 | && checkDatePath(date, request.resource.data.payload.created) 153 | && checkFields(request.resource.data) 154 | && checkMetricName(metricName); 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /config/firebase/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "firebaseconfigstest", 3 | "version": "1.0.0", 4 | "description": "test suite for configurations", 5 | "main": "rules_test.js", 6 | "scripts": { 7 | "test": "jest --env=node --verbose --forceExit --runInBand" 8 | }, 9 | "dependencies": { 10 | "@firebase/testing": "^0.20.11", 11 | "jest": "^26.6.3" 12 | }, 13 | "keywords": [], 14 | "author": "", 15 | "license": "ISC", 16 | "devDependencies": {} 17 | } 18 | -------------------------------------------------------------------------------- /config/firebase/rules.test.js: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | const firebase = require('@firebase/testing'); 15 | const fs = require('fs'); 16 | const path = require('path'); 17 | const assert = require('assert'); 18 | 19 | const projectId = "emulator-test-project" 20 | const adminApp = firebase.initializeAdminApp({ projectId: projectId }); 21 | 22 | beforeAll(async () => { 23 | await firebase.loadFirestoreRules({ 24 | projectId: projectId, 25 | rules: fs.readFileSync("firestore.rules", "utf8") 26 | }); 27 | const doc = adminApp.firestore().collection('uuid').doc('preexisting') 28 | .collection('2020-09-03-13').doc('fakeMetric-v1'); 29 | await doc.set({ 30 | 'payload': { 31 | 'created': firebase.firestore.FieldValue.serverTimestamp(), 32 | 'uuid': 'preexisting', 33 | } 34 | }); 35 | }); 36 | 37 | function getPath(date) { 38 | return date.toISOString().split('T')[0] + "-" 39 | + date.toISOString().split('T')[1].split(':')[0]; 40 | } 41 | 42 | function correctContents(uuid = 'foo') { 43 | return { 44 | 'payload': { 45 | 'created': firebase.firestore.FieldValue.serverTimestamp(), 46 | 'uuid': uuid, 47 | 'schemaVersion': '1', 48 | 'encryptedDataShares': [ 49 | { 50 | 'payload': 'payload1', 51 | 'encryptionKeyId': 'key1' 52 | }, 53 | { 54 | 'payload': 'payload2', 55 | 'encryptionKeyId': 'key2' 56 | } 57 | ], 58 | 'prioParams': { 59 | 'bins': 1, 60 | 'epsilon': 2, 61 | 'hammingWeight': 3, 62 | 'numberServers': 2, 63 | 'prime': 5 64 | } 65 | }, 66 | 'certificateChain': ['cert1', 'cert2', 'cert3'], 67 | 'signature': 'sig' 68 | }; 69 | } 70 | 71 | describe('Tests of document writes and access', () => { 72 | const app = firebase.initializeTestApp({ 73 | projectId: projectId, 74 | auth: null 75 | }); 76 | const db = app.firestore() 77 | const datefmt = getPath(new Date()); 78 | it('document cannot be written at wrong path', 79 | async () => { 80 | const doc = db.collection('random').doc('wrongpath'); 81 | await firebase.assertFails(doc.set(correctContents())); 82 | }); 83 | it('document cannot be written without payload', 84 | async () => { 85 | const doc = db.collection('uuid').doc('nopayload') 86 | .collection(datefmt).doc('fakeMetric-v1'); 87 | contents = correctContents('nopayload'); 88 | delete contents['payload']; 89 | await firebase.assertFails(doc.set(contents)); 90 | }); 91 | it('document cannot be written without uuid', 92 | async () => { 93 | const doc = db.collection('uuid').doc('nouuidfield') 94 | .collection(datefmt).doc('fakeMetric-v1'); 95 | contents = correctContents(); 96 | delete contents['payload']['uuid'] ; 97 | await firebase.assertFails(doc.set(contents)); 98 | }); 99 | it('document cannot be written without created field', 100 | async () => { 101 | const doc = db.collection('uuid').doc('nocreated') 102 | .collection(datefmt).doc('fakeMetric-v1'); 103 | contents = correctContents('nocreated'); 104 | delete contents['payload']['created']; 105 | await firebase.assertFails(doc.set(contents)); 106 | }); 107 | it('document cannot be written with extraneous field', 108 | async () => { 109 | const doc = db.collection('uuid').doc('extraneous') 110 | .collection(datefmt).doc('fakeMetric-v1'); 111 | contents = correctContents('extraneous'); 112 | contents['payload']['prioParams']['banana'] = "extra field"; 113 | await firebase.assertFails(doc.set(contents)); 114 | }); 115 | it('documents cannot be created at very old path', 116 | async () => { 117 | var oldDate = new Date(); 118 | oldDate.setHours(oldDate.getHours() - 2); 119 | const doc = db.collection('uuid').doc('old') 120 | .collection(getPath(oldDate)).doc('fakeMetric-v1'); 121 | await firebase.assertFails(doc.set(correctContents('old'))); 122 | }); 123 | it('documents cannot be created with very large uuids', 124 | async () => { 125 | longuuid = 'x'.repeat(1000); 126 | const doc = db.collection('uuid').doc(longuuid) 127 | .collection(datefmt).doc('fakeMetric-v1'); 128 | contents = correctContents(longuuid); 129 | contents['payload']['uuid'] = longuuid; 130 | await firebase.assertFails(doc.set(contents)); 131 | }); 132 | it('documents cannot be created with very large signatures', 133 | async () => { 134 | const doc = db.collection('uuid').doc('longsig') 135 | .collection(datefmt).doc('fakeMetric-v1'); 136 | contents = correctContents('longsig'); 137 | contents['signature'] = 'x'.repeat(1000); 138 | await firebase.assertFails(doc.set(contents)); 139 | }); 140 | it('documents cannot be created with very long certificate chains', 141 | async () => { 142 | const doc = db.collection('uuid').doc('longchain') 143 | .collection(datefmt).doc('fakeMetric-v1'); 144 | contents = correctContents('longchain'); 145 | contents['certificateChain'] = Array(12).fill('cert') 146 | await firebase.assertFails(doc.set(contents)); 147 | }); 148 | it('documents cannot be created with a large certificate', 149 | async () => { 150 | const doc = db.collection('uuid').doc('longcert') 151 | .collection(datefmt).doc('fakeMetric-v1'); 152 | contents = correctContents('longcert'); 153 | contents['certificateChain'].push('x'.repeat(50000)); 154 | await firebase.assertFails(doc.set(contents)); 155 | }); 156 | it('correct documents can be created', 157 | async () => { 158 | const doc = db.collection('uuid').doc('correct1') 159 | .collection(datefmt).doc('fakeMetric-v1'); 160 | await firebase.assertSucceeds(doc.set(correctContents('correct1'))); 161 | }); 162 | it('documents can be created at slightly off path', 163 | async () => { 164 | var oldDate = new Date(); 165 | oldDate.setHours(oldDate.getHours() - 1); 166 | const doc = db.collection('uuid').doc('correct2') 167 | .collection(getPath(oldDate)).doc('fakeMetric-v1'); 168 | await firebase.assertSucceeds(doc.set(correctContents('correct2'))); 169 | }); 170 | it('documents can be created at sharded top level collection', 171 | async () => { 172 | var oldDate = new Date(); 173 | oldDate.setHours(oldDate.getHours() - 1); 174 | const doc = db.collection('uuid24').doc('correct3') 175 | .collection(getPath(oldDate)).doc('fakeMetric-v1'); 176 | await firebase.assertSucceeds(doc.set(correctContents('correct3'))); 177 | }); 178 | it('document cannot be deleted', 179 | async () => { 180 | const doc = db.collection('uuid').doc('preexisting') 181 | .collection('2020-09-03-13').doc('fakeMetric-v1'); 182 | await firebase.assertFails(doc.delete()); 183 | }); 184 | it('document cannot be updated', 185 | async () => { 186 | const doc = db.collection('uuid').doc('preexisting') 187 | .collection('2020-09-03-13').doc('fakeMetric-v1'); 188 | await firebase.assertFails(doc.update(correctContents('preexisting'))); 189 | }); 190 | it('document cannot be read', 191 | async () => { 192 | const doc = db.collection('uuid').doc('preexisting') 193 | .collection('2020-09-03-13').doc('fakeMetric-v1'); 194 | await firebase.assertFails(doc.get()); 195 | }); 196 | it('check final state of firestore', 197 | async () => { 198 | const querySnapshot = await adminApp.firestore() 199 | .collectionGroup('uuid').get(); 200 | foundUuids = [] 201 | querySnapshot.forEach((doc) => { 202 | foundUuids.push(doc.data()['payload']['uuid']); 203 | }); 204 | assert.notStrictEqual(foundUuids, 205 | [ 'correct1', 'correct2', 'correct3', 'preexisting' ]) 206 | }); 207 | }); 208 | -------------------------------------------------------------------------------- /container-build.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | ########################################################### 17 | # Step 1: Retrieve the cached .m2 directory from GCS 18 | ########################################################### 19 | - name: 'gcr.io/cloud-builders/gsutil' 20 | id: fetch-mvn-cache 21 | args: 22 | - '-m' 23 | - 'rsync' 24 | - '-r' 25 | - 'gs://${_BUCKET}/cache/.m2' 26 | - '/cache/.m2' 27 | volumes: 28 | - path: '/cache/.m2' 29 | name: 'm2_cache' 30 | 31 | ########################################################### 32 | # Step 2: Download submodules 33 | ########################################################### 34 | - name: gcr.io/cloud-builders/git 35 | waitFor: 36 | - '-' 37 | id: update-submodules 38 | args: ['submodule', 'update', '--init'] 39 | 40 | ########################################################### 41 | # Step 3: Retag cloned git repo hash 42 | ########################################################### 43 | - name: gcr.io/cloud-builders/git 44 | waitFor: 45 | - '-' 46 | id: recreate-tag 47 | args: ['tag', '$_TAG'] 48 | 49 | ########################################################### 50 | # Step 4: Rebuild Dataflow Flex Template base image 51 | ########################################################### 52 | - name: gcr.io/cloud-builders/docker 53 | id: rebuild-dataflow-flex-base-image 54 | waitFor: 55 | - '-' 56 | args: 57 | - 'build' 58 | - '-t' 59 | - 'gcr.io/enpa-public-assets/distroless-flex-template-launcher:11' 60 | - 'df-flex-template-base-image' 61 | 62 | ########################################################### 63 | # Step 5: Publish Dataflow Flex Template base image 64 | ########################################################### 65 | - name: gcr.io/cloud-builders/docker 66 | id: push-dataflow-flex-base-image 67 | waitFor: 68 | - rebuild-dataflow-flex-base-image 69 | args: 70 | - 'push' 71 | - 'gcr.io/enpa-public-assets/distroless-flex-template-launcher:11' 72 | 73 | ########################################################### 74 | # Step 6: Package and Deploy to Github maven repo 75 | ########################################################### 76 | - name: gcr.io/cloud-devrel-public-resources/java11 77 | id: package-deploy-mvn-registry 78 | waitFor: 79 | - update-submodules 80 | - recreate-tag 81 | - fetch-mvn-cache 82 | entrypoint: bash 83 | args: 84 | - '-c' 85 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -U -s mvn-settings.xml -DskipTests -Drevision=$(${_VERSION}) deploy' 86 | env: 87 | - 'PROJECT=enpa-integration-testing' 88 | - 'KEY_RESOURCE_NAME=projects/enpa-integration-testing/locations/global/keyRings/appa-signature-key/cryptoKeys/appa-signature-key/cryptoKeyVersions/1' 89 | secretEnv: ['GITHUB_TOKEN'] 90 | 91 | ########################################################### 92 | # Step 7: Build and Publish Public Ingestion Container Image 93 | ########################################################### 94 | - name: gcr.io/cloud-devrel-public-resources/java11 95 | id: public-ingestion-container-build 96 | waitFor: 97 | - update-submodules 98 | - recreate-tag 99 | - fetch-mvn-cache 100 | - push-dataflow-flex-base-image 101 | entrypoint: bash 102 | args: 103 | - '-c' 104 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -DskipTests -Pingestion-container-build -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-public-assets" -Drevision=$(${_VERSION}) package' 105 | 106 | ########################################################### 107 | # Step 8: Build and Publish Public Deletion Container Image 108 | ########################################################### 109 | - name: gcr.io/cloud-devrel-public-resources/java11 110 | id: public-deletion-container-build 111 | waitFor: 112 | - update-submodules 113 | - recreate-tag 114 | - fetch-mvn-cache 115 | - push-dataflow-flex-base-image 116 | entrypoint: bash 117 | args: 118 | - '-c' 119 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -DskipTests -Pdeletion-container-build -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-public-assets" -Drevision=$(${_VERSION}) package' 120 | 121 | ########################################################### 122 | # Step 9: Build and Publish Ingestion Container Image 123 | ########################################################### 124 | - name: gcr.io/cloud-devrel-public-resources/java11 125 | id: ingestion-container-build 126 | waitFor: 127 | - update-submodules 128 | - recreate-tag 129 | - fetch-mvn-cache 130 | - push-dataflow-flex-base-image 131 | entrypoint: bash 132 | args: 133 | - '-c' 134 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -s mvn-settings.xml -DskipTests -Pingestion-container-build,attestation -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-infra" -Drevision=$(${_VERSION}) package' 135 | secretEnv: ['GITHUB_TOKEN'] 136 | 137 | ########################################################### 138 | # Step 10: Build and Publish Deletion Container Image 139 | ########################################################### 140 | - name: gcr.io/cloud-devrel-public-resources/java11 141 | id: deletion-container-build 142 | waitFor: 143 | - update-submodules 144 | - recreate-tag 145 | - fetch-mvn-cache 146 | - push-dataflow-flex-base-image 147 | entrypoint: bash 148 | args: 149 | - '-c' 150 | - 'source ci/common.sh && retry_with_backoff 3 10 ./mvnw -s mvn-settings.xml -DskipTests -Pdeletion-container-build,attestation -Dcontainer-version=$(${_VERSION}) -Dcontainer_registry_tag_prefix="gcr.io/enpa-infra" -Drevision=$(${_VERSION}) package' 151 | secretEnv: ['GITHUB_TOKEN'] 152 | 153 | ########################################################### 154 | # Step 11: Generate Templates for Dataflow jobs and Firebase 155 | ########################################################### 156 | - name: gcr.io/google.com/cloudsdktool/cloud-sdk:alpine 157 | id: flex-template-generator-and-uploader 158 | waitFor: 159 | - recreate-tag 160 | args: 161 | - "./ci/generate-templates.sh" 162 | env: 163 | - PROJECT_ID=$PROJECT_ID 164 | 165 | ########################################################### 166 | # Step 12: Update cached .m2 directory on GCS with any 167 | # additional dependencies downloaded during the 168 | # build. 169 | ########################################################### 170 | - name: gcr.io/cloud-builders/gsutil 171 | id: sync-mvn-cache 172 | waitFor: 173 | - ingestion-container-build 174 | - deletion-container-build 175 | args: 176 | - '-m' 177 | - 'rsync' 178 | - '-r' 179 | - '/cache/.m2' 180 | - 'gs://${_BUCKET}/cache/.m2/' 181 | volumes: 182 | - path: '/cache/.m2' 183 | name: 'm2_cache' 184 | 185 | images: 186 | - 'gcr.io/enpa-public-assets/distroless-flex-template-launcher:11' 187 | 188 | substitutions: 189 | # Default value 190 | _BUCKET: 'enpa-infra' 191 | _DEFAULT_TAG: '${SHORT_SHA}-SNAPSHOT' 192 | _TAG: ${TAG_NAME:-$_DEFAULT_TAG} #default value will be SHORT_SHA-SNAPSHOT 193 | _VERSION: 'git describe --tags --always --dirty=-dirty' 194 | 195 | secrets: 196 | - kmsKeyName: projects/enpa-infra/locations/global/keyRings/cloudbuild-keyring/cryptoKeys/cloudbuild 197 | secretEnv: 198 | GITHUB_TOKEN: 'CiQAzNSb40phOg8+Rtn21yuiJuqJA3CKm5YWgigMwpA5lsM42NgSUQA/9gK92tb89IansK/cTpYuAJpf9PUZ7Lvse94FqFmk/mVULLISsoNr6+39npTZAG2el7cRQ22LozB5lwE9dZGywalT7xHxs46+nywy/ix8Qg==' 199 | 200 | options: 201 | # Use higher CPU machines so the caching and build steps are faster. 202 | machineType: 'N1_HIGHCPU_32' 203 | 204 | serviceAccount: 'projects/enpa-infra/serviceAccounts/development-integration-tests@enpa-integration-testing.iam.gserviceaccount.com' -------------------------------------------------------------------------------- /df-flex-template-base-image/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/distroless/java:11 16 | 17 | COPY java_template_launcher /opt/google/dataflow/java_template_launcher 18 | -------------------------------------------------------------------------------- /df-flex-template-base-image/java_template_launcher: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/exposure-notifications-private-analytics-ingestion/e4cb1ba2529cc055f68cb0895b2785919ee2ab11/df-flex-template-base-image/java_template_launcher -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Google Open Source Community Guidelines 2 | 3 | At Google, we recognize and celebrate the creativity and collaboration of open 4 | source contributors and the diversity of skills, experiences, cultures, and 5 | opinions they bring to the projects and communities they participate in. 6 | 7 | Every one of Google's open source projects and communities are inclusive 8 | environments, based on treating all individuals respectfully, regardless of 9 | gender identity and expression, sexual orientation, disabilities, 10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race, 11 | age, religion, or similar personal characteristic. 12 | 13 | We value diverse opinions, but we value respectful behavior more. 14 | 15 | Respectful behavior includes: 16 | 17 | * Being considerate, kind, constructive, and helpful. 18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or 19 | physically threatening behavior, speech, and imagery. 20 | * Not engaging in unwanted physical contact. 21 | 22 | Some Google open source projects [may adopt][] an explicit project code of 23 | conduct, which may have additional detailed expectations for participants. Most 24 | of those projects will use our [modified Contributor Covenant][]. 25 | 26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct 27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/ 28 | 29 | ## Resolve peacefully 30 | 31 | We do not believe that all conflict is necessarily bad; healthy debate and 32 | disagreement often yields positive results. However, it is never okay to be 33 | disrespectful. 34 | 35 | If you see someone behaving disrespectfully, you are encouraged to address the 36 | behavior directly with those involved. Many issues can be resolved quickly and 37 | easily, and this gives people more control over the outcome of their dispute. 38 | If you are unable to resolve the matter for any reason, or if the behavior is 39 | threatening or harassing, report it. We are dedicated to providing an 40 | environment where participants feel welcome and safe. 41 | 42 | ## Reporting problems 43 | 44 | Some Google open source projects may adopt a project-specific code of conduct. 45 | In those cases, a Google employee will be identified as the Project Steward, 46 | who will receive and handle reports of code of conduct violations. In the event 47 | that a project hasn’t identified a Project Steward, you can report problems by 48 | emailing opensource@google.com. 49 | 50 | We will investigate every complaint, but you may not receive a direct response. 51 | We will use our discretion in determining when and how to follow up on reported 52 | incidents, which may range from not taking action to permanent expulsion from 53 | the project and project-sponsored spaces. We will notify the accused of the 54 | report and provide them an opportunity to discuss it before any action is 55 | taken. The identity of the reporter will be omitted from the details of the 56 | report supplied to the accused. In potentially harmful situations, such as 57 | ongoing harassment or threats to anyone's safety, we may take action without 58 | notice. 59 | 60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also 61 | be found at .* 62 | 63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct 64 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /logging.properties: -------------------------------------------------------------------------------- 1 | handlers=java.util.logging.ConsoleHandler 2 | .level=ALL 3 | -------------------------------------------------------------------------------- /model/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 20 | 4.0.0 21 | 22 | com.google.exposurenotification.privateanalytics.ingestion 23 | enpa-ingestion 24 | ${revision} 25 | ../pom.xml 26 | 27 | com.google.exposurenotification.privateanalytics.ingestion 28 | enpa-ingestion-model 29 | jar 30 | 31 | 32 | ../src/main/java 33 | ../src/test/java 34 | 35 | 36 | org.apache.maven.plugins 37 | maven-compiler-plugin 38 | ${maven-compiler-plugin.version} 39 | 40 | 41 | 42 | com.google.auto.value 43 | auto-value 44 | ${auto-value.version} 45 | 46 | 47 | **/model/**/*.java 48 | **/model/** 49 | 50 | 51 | 52 | org.apache.maven.plugins 53 | maven-jar-plugin 54 | 3.2.0 55 | 56 | 57 | 58 | test-jar 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | com.google.cloud 69 | google-cloud-firestore 70 | 2.0.0 71 | 72 | 73 | 74 | com.google.firebase 75 | firebase-admin 76 | 7.0.0 77 | 78 | 79 | 80 | org.apache.beam 81 | beam-sdks-java-core 82 | ${beam.version} 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /mvn-settings.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | github 5 | 6 | 7 | 8 | 9 | github 10 | 11 | 12 | central 13 | https://repo1.maven.org/maven2 14 | 15 | true 16 | 17 | 18 | 19 | github-public 20 | ENPA Public Apache Maven Packages 21 | https://maven.pkg.github.com/google/exposure-notifications-private-analytics-ingestion 22 | 23 | true 24 | 25 | 26 | 27 | github-private 28 | ENPA Private Apache Maven Packages 29 | https://maven.pkg.github.com/googleprivate/enpa-ingestion-infra 30 | 31 | true 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | github-public 41 | x-access-token 42 | ${env.GITHUB_TOKEN} 43 | 44 | 45 | github-private 46 | x-access-token 47 | ${env.GITHUB_TOKEN} 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /mvnw.cmd: -------------------------------------------------------------------------------- 1 | @REM ---------------------------------------------------------------------------- 2 | @REM Licensed to the Apache Software Foundation (ASF) under one 3 | @REM or more contributor license agreements. See the NOTICE file 4 | @REM distributed with this work for additional information 5 | @REM regarding copyright ownership. The ASF licenses this file 6 | @REM to you under the Apache License, Version 2.0 (the 7 | @REM "License"); you may not use this file except in compliance 8 | @REM with the License. You may obtain a copy of the License at 9 | @REM 10 | @REM https://www.apache.org/licenses/LICENSE-2.0 11 | @REM 12 | @REM Unless required by applicable law or agreed to in writing, 13 | @REM software distributed under the License is distributed on an 14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | @REM KIND, either express or implied. See the License for the 16 | @REM specific language governing permissions and limitations 17 | @REM under the License. 18 | @REM ---------------------------------------------------------------------------- 19 | 20 | @REM ---------------------------------------------------------------------------- 21 | @REM Maven Start Up Batch script 22 | @REM 23 | @REM Required ENV vars: 24 | @REM JAVA_HOME - location of a JDK home dir 25 | @REM 26 | @REM Optional ENV vars 27 | @REM M2_HOME - location of maven2's installed home dir 28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands 29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending 30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven 31 | @REM e.g. to debug Maven itself, use 32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files 34 | @REM ---------------------------------------------------------------------------- 35 | 36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' 37 | @echo off 38 | @REM set title of command window 39 | title %0 40 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' 41 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% 42 | 43 | @REM set %HOME% to equivalent of $HOME 44 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") 45 | 46 | @REM Execute a user defined script before this one 47 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre 48 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending 49 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" 50 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" 51 | :skipRcPre 52 | 53 | @setlocal 54 | 55 | set ERROR_CODE=0 56 | 57 | @REM To isolate internal variables from possible post scripts, we use another setlocal 58 | @setlocal 59 | 60 | @REM ==== START VALIDATION ==== 61 | if not "%JAVA_HOME%" == "" goto OkJHome 62 | 63 | echo. 64 | echo Error: JAVA_HOME not found in your environment. >&2 65 | echo Please set the JAVA_HOME variable in your environment to match the >&2 66 | echo location of your Java installation. >&2 67 | echo. 68 | goto error 69 | 70 | :OkJHome 71 | if exist "%JAVA_HOME%\bin\java.exe" goto init 72 | 73 | echo. 74 | echo Error: JAVA_HOME is set to an invalid directory. >&2 75 | echo JAVA_HOME = "%JAVA_HOME%" >&2 76 | echo Please set the JAVA_HOME variable in your environment to match the >&2 77 | echo location of your Java installation. >&2 78 | echo. 79 | goto error 80 | 81 | @REM ==== END VALIDATION ==== 82 | 83 | :init 84 | 85 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". 86 | @REM Fallback to current working directory if not found. 87 | 88 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% 89 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir 90 | 91 | set EXEC_DIR=%CD% 92 | set WDIR=%EXEC_DIR% 93 | :findBaseDir 94 | IF EXIST "%WDIR%"\.mvn goto baseDirFound 95 | cd .. 96 | IF "%WDIR%"=="%CD%" goto baseDirNotFound 97 | set WDIR=%CD% 98 | goto findBaseDir 99 | 100 | :baseDirFound 101 | set MAVEN_PROJECTBASEDIR=%WDIR% 102 | cd "%EXEC_DIR%" 103 | goto endDetectBaseDir 104 | 105 | :baseDirNotFound 106 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR% 107 | cd "%EXEC_DIR%" 108 | 109 | :endDetectBaseDir 110 | 111 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig 112 | 113 | @setlocal EnableExtensions EnableDelayedExpansion 114 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a 115 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% 116 | 117 | :endReadAdditionalConfig 118 | 119 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" 120 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" 121 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 122 | 123 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 124 | 125 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( 126 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B 127 | ) 128 | 129 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 130 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data. 131 | if exist %WRAPPER_JAR% ( 132 | if "%MVNW_VERBOSE%" == "true" ( 133 | echo Found %WRAPPER_JAR% 134 | ) 135 | ) else ( 136 | if not "%MVNW_REPOURL%" == "" ( 137 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 138 | ) 139 | if "%MVNW_VERBOSE%" == "true" ( 140 | echo Couldn't find %WRAPPER_JAR%, downloading it ... 141 | echo Downloading from: %DOWNLOAD_URL% 142 | ) 143 | 144 | powershell -Command "&{"^ 145 | "$webclient = new-object System.Net.WebClient;"^ 146 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ 147 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ 148 | "}"^ 149 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^ 150 | "}" 151 | if "%MVNW_VERBOSE%" == "true" ( 152 | echo Finished downloading %WRAPPER_JAR% 153 | ) 154 | ) 155 | @REM End of extension 156 | 157 | @REM Provide a "standardized" way to retrieve the CLI args that will 158 | @REM work with both Windows and non-Windows executions. 159 | set MAVEN_CMD_LINE_ARGS=%* 160 | 161 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* 162 | if ERRORLEVEL 1 goto error 163 | goto end 164 | 165 | :error 166 | set ERROR_CODE=1 167 | 168 | :end 169 | @endlocal & set ERROR_CODE=%ERROR_CODE% 170 | 171 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost 172 | @REM check for post script, once with legacy .bat ending and once with .cmd ending 173 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" 174 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" 175 | :skipRcPost 176 | 177 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' 178 | if "%MAVEN_BATCH_PAUSE%" == "on" pause 179 | 180 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% 181 | 182 | exit /B %ERROR_CODE% 183 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 20 | 4.0.0 21 | 22 | com.google.exposurenotification.privateanalytics.ingestion 23 | enpa-ingestion 24 | ${revision} 25 | 26 | pom 27 | 28 | 29 | model 30 | attestation 31 | pipeline 32 | 33 | 34 | 35 | 20210413_RC01-SNAPSHOT 36 | change.me/changme 37 | 20210426_01 38 | 11 39 | 11 40 | 2.34.0 41 | 2.1 42 | 2.10.2 43 | 2.10.5 44 | 4.13.1 45 | 3.7.0 46 | 1.6.0 47 | 3.0.2 48 | 3.1.0 49 | 3.0.0 50 | 1.7.25 51 | 2.4.6 52 | 2.8.5 53 | 2.22.0 54 | ${project.version} 55 | 1.0-rc7 56 | 1.7.4 57 | 58 | 59 | 60 | 61 | 62 | org.codehaus.mojo 63 | flatten-maven-plugin 64 | 1.2.2 65 | 66 | true 67 | 68 | 69 | 70 | flatten 71 | process-resources 72 | 73 | flatten 74 | 75 | 76 | 77 | flatten.clean 78 | clean 79 | 80 | clean 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | org.codehaus.mojo 90 | exec-maven-plugin 91 | ${maven-exec-plugin.version} 92 | 93 | true 94 | java 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | com.google.cloud 105 | libraries-bom 106 | 22.0.0 107 | pom 108 | import 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | com.google.guava 117 | guava 118 | [30.0-jre,) 119 | 120 | 121 | joda-time 122 | joda-time 123 | ${joda.version} 124 | 125 | 126 | org.slf4j 127 | slf4j-api 128 | ${slf4j.version} 129 | 130 | 131 | org.slf4j 132 | slf4j-jdk14 133 | ${slf4j.version} 134 | runtime 135 | 136 | 137 | org.hamcrest 138 | hamcrest-core 139 | ${hamcrest.version} 140 | 141 | 142 | org.hamcrest 143 | hamcrest-library 144 | ${hamcrest.version} 145 | 146 | 147 | junit 148 | junit 149 | ${junit.version} 150 | test 151 | 152 | 153 | com.google.auto.value 154 | auto-value 155 | ${auto-value.version} 156 | provided 157 | 158 | 159 | com.google.auto.service 160 | auto-service-annotations 161 | ${auto-service.version} 162 | 163 | 164 | org.mockito 165 | mockito-all 166 | 1.10.19 167 | test 168 | 169 | 170 | com.google.truth 171 | truth 172 | 1.0.1 173 | test 174 | 175 | 176 | com.google.protobuf 177 | protobuf-java 178 | 3.16.1 179 | 180 | 181 | org.apache.avro 182 | avro 183 | 1.10.0 184 | 185 | 186 | org.apache.avro 187 | avro-compiler 188 | 1.10.0 189 | 190 | 191 | org.apache.avro 192 | avro-maven-plugin 193 | 1.10.0 194 | 195 | 196 | com.google.code.gson 197 | gson 198 | 2.8.5 199 | compile 200 | 201 | 202 | com.google.truth.extensions 203 | truth-java8-extension 204 | 1.0.1 205 | test 206 | 207 | 208 | 209 | 210 | 211 | github-public 212 | GitHub Packages 213 | https://maven.pkg.github.com/google/exposure-notifications-private-analytics-ingestion 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/attestation/AbstractDeviceAttestation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.attestation; 17 | 18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 19 | import org.apache.beam.sdk.metrics.Counter; 20 | import org.apache.beam.sdk.metrics.Metrics; 21 | import org.apache.beam.sdk.options.PipelineOptions; 22 | import org.apache.beam.sdk.transforms.PTransform; 23 | import org.apache.beam.sdk.values.PCollection; 24 | 25 | /** 26 | * Class to extend to implement some form of check that data originated from a real device. 27 | * 28 | *

We use a PTransform rather than a Filter to allow flexibility in implementations. E.g., an 29 | * implementation might want to compute various distributions over the set of data coming in rather 30 | * than make a strictly local decision as to whether a given DataShare is attested. 31 | */ 32 | public abstract class AbstractDeviceAttestation 33 | extends PTransform, PCollection> { 34 | 35 | // Counters for the number of elements processed and eventually accepted. 36 | protected static final Counter processedCounter = 37 | Metrics.counter(AbstractDeviceAttestation.class, "processed"); 38 | protected static final Counter acceptedCounter = 39 | Metrics.counter(AbstractDeviceAttestation.class, "accepted"); 40 | 41 | /** @return a non-null class object if the attestation has pipeline options to be registered */ 42 | public abstract Class getOptionsClass(); 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/AWSFederatedAuthHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.amazonaws.auth.*; 19 | import com.amazonaws.services.securitytoken.AWSSecurityTokenService; 20 | import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; 21 | import com.amazonaws.services.securitytoken.model.AssumeRoleWithWebIdentityRequest; 22 | import com.amazonaws.services.securitytoken.model.AssumeRoleWithWebIdentityResult; 23 | import com.amazonaws.services.securitytoken.model.Credentials; 24 | import com.google.auth.oauth2.GoogleCredentials; 25 | import com.google.auth.oauth2.IdTokenProvider; 26 | import java.io.IOException; 27 | 28 | public class AWSFederatedAuthHelper { 29 | 30 | private AWSFederatedAuthHelper() {} 31 | 32 | public static void setupAWSAuth(IngestionPipelineOptions options, String role, String region) 33 | throws IOException { 34 | GoogleCredentials credentials = GoogleCredentials.getApplicationDefault(); 35 | if (!(credentials instanceof IdTokenProvider)) { 36 | throw new IllegalArgumentException("Credentials are not an instance of IdTokenProvider."); 37 | } 38 | 39 | AWSSecurityTokenService stsClient = 40 | AWSSecurityTokenServiceClientBuilder.standard() 41 | .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) 42 | .withRegion(region) 43 | .build(); 44 | 45 | /** 46 | * Obtain credentials for the IAM role. Note that you cannot assume the role of an AWS root 47 | * account. Amazon S3 will deny access. You must use credentials for an IAM user or an IAM role. 48 | */ 49 | AssumeRoleWithWebIdentityRequest roleRequest = 50 | new AssumeRoleWithWebIdentityRequest() 51 | .withRoleArn(role) 52 | .withRoleSessionName("enpa-gcp-aws-session") 53 | .withWebIdentityToken( 54 | ((IdTokenProvider) credentials) 55 | .idTokenWithAudience("enpa-gcp-aws", null) 56 | .getTokenValue()); 57 | 58 | AssumeRoleWithWebIdentityResult roleResponse = stsClient.assumeRoleWithWebIdentity(roleRequest); 59 | Credentials sessionCredentials = roleResponse.getCredentials(); 60 | 61 | // Create a BasicSessionCredentials object that contains the credentials you just retrieved. 62 | BasicSessionCredentials awsCredentials = 63 | new BasicSessionCredentials( 64 | sessionCredentials.getAccessKeyId(), 65 | sessionCredentials.getSecretAccessKey(), 66 | sessionCredentials.getSessionToken()); 67 | 68 | options.setAwsCredentialsProvider(new AWSStaticCredentialsProvider(awsCredentials)); 69 | options.setAwsRegion(region); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/BatchWriterFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.google.cloud.kms.v1.AsymmetricSignResponse; 19 | import com.google.cloud.kms.v1.CryptoKeyVersionName; 20 | import com.google.cloud.kms.v1.Digest; 21 | import com.google.cloud.kms.v1.KeyManagementServiceClient; 22 | import com.google.common.collect.ImmutableList; 23 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 24 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata; 25 | import com.google.protobuf.ByteString; 26 | import java.io.IOException; 27 | import java.nio.ByteBuffer; 28 | import java.nio.channels.WritableByteChannel; 29 | import java.security.MessageDigest; 30 | import java.security.NoSuchAlgorithmException; 31 | import java.time.Clock; 32 | import java.time.Duration; 33 | import java.time.Instant; 34 | import java.time.ZoneOffset; 35 | import java.time.format.DateTimeFormatter; 36 | import java.util.ArrayList; 37 | import java.util.List; 38 | import java.util.UUID; 39 | import java.util.concurrent.TimeUnit; 40 | import org.abetterinternet.prio.v1.PrioBatchSignature; 41 | import org.abetterinternet.prio.v1.PrioDataSharePacket; 42 | import org.abetterinternet.prio.v1.PrioIngestionHeader; 43 | import org.apache.beam.sdk.io.FileSystems; 44 | import org.apache.beam.sdk.io.fs.ResourceId; 45 | import org.apache.beam.sdk.metrics.Counter; 46 | import org.apache.beam.sdk.metrics.Metrics; 47 | import org.apache.beam.sdk.transforms.DoFn; 48 | import org.apache.beam.sdk.util.MimeTypes; 49 | import org.apache.beam.sdk.values.KV; 50 | import org.slf4j.Logger; 51 | import org.slf4j.LoggerFactory; 52 | 53 | /** Function to write files (header, data records, signature) for a batch of {@link DataShare} */ 54 | public class BatchWriterFn extends DoFn>, Void> { 55 | 56 | public static final String INGESTION_HEADER_SUFFIX = ".batch"; 57 | public static final String DATASHARE_PACKET_SUFFIX = ".batch.avro"; 58 | public static final String HEADER_SIGNATURE_SUFFIX = ".batch.sig"; 59 | 60 | private static final Logger LOG = LoggerFactory.getLogger(BatchWriterFn.class); 61 | private static final Duration KMS_WAIT_TIME = Duration.ofSeconds(30); 62 | private static final DateTimeFormatter DATE_TIME_FORMATTER = 63 | DateTimeFormatter.ofPattern("/yyyy/MM/dd/HH/mm/"); 64 | 65 | private static final Counter dataSharesInBatch = 66 | Metrics.counter(BatchWriterFn.class, "dataSharesInBatch"); 67 | 68 | private static final Counter failedDataShares = 69 | Metrics.counter(BatchWriterFn.class, "failedDataShares"); 70 | 71 | private static final Counter batchesProcessed = 72 | Metrics.counter(BatchWriterFn.class, "batchesProcessed"); 73 | 74 | private static final Counter successfulBatches = 75 | Metrics.counter(BatchWriterFn.class, "successfulBatches"); 76 | 77 | private static final Counter failedBatches = 78 | Metrics.counter(BatchWriterFn.class, "failedBatches"); 79 | 80 | private transient KeyManagementServiceClient client; 81 | private transient CryptoKeyVersionName keyVersionName; 82 | 83 | // Uses pipeline options, otherwise could've lived in @Setup 84 | @StartBundle 85 | public void startBundle(StartBundleContext context) throws IOException { 86 | client = KeyManagementServiceClient.create(); 87 | IngestionPipelineOptions options = 88 | context.getPipelineOptions().as(IngestionPipelineOptions.class); 89 | keyVersionName = CryptoKeyVersionName.parse(options.getKeyResourceName()); 90 | } 91 | 92 | @FinishBundle 93 | public void finishBundle() { 94 | client.shutdown(); 95 | LOG.info("Waiting for KMS Client to shutdown."); 96 | try { 97 | client.awaitTermination(KMS_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS); 98 | } catch (InterruptedException e) { 99 | LOG.warn("Interrupted while waiting for client shutdown", e); 100 | Thread.currentThread().interrupt(); 101 | } 102 | } 103 | 104 | @ProcessElement 105 | public void processElement(ProcessContext c) { 106 | IngestionPipelineOptions options = c.getPipelineOptions().as(IngestionPipelineOptions.class); 107 | 108 | String phaPrefix = options.getPhaOutput(); 109 | String facilitatorPrefix = options.getFacilitatorOutput(); 110 | 111 | long startTime = 112 | IngestionPipelineOptions.calculatePipelineStart( 113 | options.getStartTime(), options.getDuration(), 1, Clock.systemUTC()); 114 | long duration = options.getDuration(); 115 | 116 | KV> input = c.element(); 117 | DataShareMetadata metadata = input.getKey(); 118 | batchesProcessed.inc(); 119 | LOG.info("Processing batch: {}", metadata); 120 | // batch size explicitly chosen so that these lists fit in memory on a single worker 121 | List phaPackets = new ArrayList<>(); 122 | List facilitatorPackets = new ArrayList<>(); 123 | for (DataShare dataShare : input.getValue()) { 124 | List split = PrioSerializationHelper.splitPackets(dataShare); 125 | if (split.size() != DataShare.NUMBER_OF_SERVERS) { 126 | // Checks exist to discard such data shares prior to reaching this point. 127 | throw new IllegalArgumentException( 128 | "Share split into more than hardcoded number of servers"); 129 | } 130 | // First packet always goes to PHA 131 | phaPackets.add(split.get(0)); 132 | facilitatorPackets.add(split.get(1)); 133 | } 134 | 135 | String date = 136 | Instant.ofEpochSecond(startTime + duration) 137 | .atOffset(ZoneOffset.UTC) 138 | .format(DATE_TIME_FORMATTER); 139 | String aggregateId = metadata.getMetricName() + date; 140 | // In case of dataflow runner retries, its useful to make the batch UUID deterministic so 141 | // that files that may already have been written are overwritten, instead of new files created. 142 | byte[] seed = (aggregateId + metadata.getBatchId()).getBytes(); 143 | UUID batchId = UUID.nameUUIDFromBytes(seed); 144 | String phaFilePath = 145 | phaPrefix + ((phaPrefix.endsWith("/")) ? "" : "/") + aggregateId + batchId.toString(); 146 | String facilitatorPath = 147 | facilitatorPrefix 148 | + ((facilitatorPrefix.endsWith("/")) ? "" : "/") 149 | + aggregateId 150 | + batchId.toString(); 151 | 152 | try { 153 | // Write to PHA Output Destination 154 | LOG.info("PHA Output: {}", phaFilePath); 155 | writeBatch( 156 | options, 157 | startTime, 158 | duration, 159 | metadata, 160 | batchId, 161 | phaFilePath, 162 | phaPackets, 163 | options.getPhaAwsBucketRole(), 164 | options.getPhaAwsBucketRegion()); 165 | 166 | // Write to Facilitator Output Destination 167 | LOG.info("Facilitator Output: {}", facilitatorPath); 168 | writeBatch( 169 | options, 170 | startTime, 171 | duration, 172 | metadata, 173 | batchId, 174 | facilitatorPath, 175 | facilitatorPackets, 176 | options.getFacilitatorAwsBucketRole(), 177 | options.getFacilitatorAwsBucketRegion()); 178 | 179 | successfulBatches.inc(); 180 | dataSharesInBatch.inc(phaPackets.size()); 181 | } catch (IOException | NoSuchAlgorithmException e) { 182 | LOG.error("Unable to serialize Packet/Header/Sig file for PHA or facilitator", e); 183 | failedBatches.inc(); 184 | failedDataShares.inc(phaPackets.size()); 185 | } 186 | } 187 | 188 | /** Writes the triplet of files defined per batch of data shares (packet file, header, and sig) */ 189 | private void writeBatch( 190 | IngestionPipelineOptions options, 191 | long startTime, 192 | long duration, 193 | DataShareMetadata metadata, 194 | UUID uuid, 195 | String filenamePrefix, 196 | List packets, 197 | String awsBucketRole, 198 | String awsBucketRegion) 199 | throws IOException, NoSuchAlgorithmException { 200 | 201 | if (filenamePrefix.startsWith("s3://")) { 202 | AWSFederatedAuthHelper.setupAWSAuth(options, awsBucketRole, awsBucketRegion); 203 | FileSystems.setDefaultPipelineOptions(options); 204 | } 205 | // write PrioDataSharePackets in this batch to file 206 | ByteBuffer packetsByteBuffer = 207 | PrioSerializationHelper.serializeRecords( 208 | packets, PrioDataSharePacket.class, PrioDataSharePacket.getClassSchema()); 209 | writeToFile(filenamePrefix + DATASHARE_PACKET_SUFFIX, packetsByteBuffer); 210 | 211 | MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); 212 | byte[] packetsBytesHashDigest = sha256.digest(packetsByteBuffer.array()); 213 | // create Header and write to file 214 | PrioIngestionHeader header = 215 | PrioSerializationHelper.createHeader( 216 | metadata, packetsBytesHashDigest, uuid, startTime, duration); 217 | 218 | ByteBuffer headerBytes = 219 | PrioSerializationHelper.serializeRecords( 220 | ImmutableList.of(header), 221 | PrioIngestionHeader.class, 222 | PrioIngestionHeader.getClassSchema()); 223 | writeToFile(filenamePrefix + INGESTION_HEADER_SUFFIX, headerBytes); 224 | 225 | byte[] hashHeader = sha256.digest(headerBytes.array()); 226 | Digest digestHeader = Digest.newBuilder().setSha256(ByteString.copyFrom(hashHeader)).build(); 227 | 228 | AsymmetricSignResponse result = client.asymmetricSign(keyVersionName, digestHeader); 229 | PrioBatchSignature signature = 230 | PrioBatchSignature.newBuilder() 231 | .setBatchHeaderSignature(result.getSignature().asReadOnlyByteBuffer()) 232 | .setKeyIdentifier(keyVersionName.toString()) 233 | .build(); 234 | ByteBuffer signatureBytes = 235 | PrioSerializationHelper.serializeRecords( 236 | ImmutableList.of(signature), 237 | PrioBatchSignature.class, 238 | PrioBatchSignature.getClassSchema()); 239 | writeToFile(filenamePrefix + HEADER_SIGNATURE_SUFFIX, signatureBytes); 240 | } 241 | 242 | static void writeToFile(String filename, ByteBuffer contents) throws IOException { 243 | LOG.info("Writing output file: {}", filename); 244 | ResourceId resourceId = FileSystems.matchNewResource(filename, false); 245 | try (WritableByteChannel out = FileSystems.create(resourceId, MimeTypes.TEXT)) { 246 | out.write(contents); 247 | } 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DataProcessorManifest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 18 | 19 | import com.google.gson.JsonObject; 20 | import com.google.gson.JsonParser; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.net.URL; 24 | 25 | /** 26 | * Encapsulation of the specific manifest for a PHA or Facilitator data processor. 27 | * 28 | *

See 29 | * https://docs.google.com/document/d/1MdfM3QT63ISU70l63bwzTrxr93Z7Tv7EDjLfammzo6Q/edit#bookmark=id.8skgn5yx33ae 30 | * https://github.com/abetterinternet/prio-server/blob/main/manifest-updater/manifest/types.go 31 | */ 32 | public class DataProcessorManifest { 33 | 34 | private static final String AWS_BUCKET_PREFIX = "s3://"; 35 | 36 | private static final String INGESTION_BUCKET = "ingestion-bucket"; 37 | private static final String INGESTION_IDENTITY = "ingestion-identity"; 38 | 39 | private final String manifestUrl; 40 | 41 | private String bucket; 42 | 43 | private String awsBucketRegion; 44 | 45 | private String awsBucketName; 46 | 47 | private String awsRole; 48 | 49 | private boolean isAwsBucket; 50 | 51 | public DataProcessorManifest(String manifestUrl) { 52 | this.manifestUrl = manifestUrl; 53 | this.isAwsBucket = false; 54 | if (!"".equals(manifestUrl)) { 55 | init(); 56 | } 57 | } 58 | 59 | public String getIngestionBucket() { 60 | return bucket; 61 | } 62 | 63 | public String getAwsBucketRegion() { 64 | return awsBucketRegion; 65 | } 66 | 67 | public String getAwsBucketName() { 68 | return awsBucketName; 69 | } 70 | 71 | public String getAwsRole() { 72 | return awsRole; 73 | } 74 | 75 | public boolean isAwsBucket() { 76 | return isAwsBucket; 77 | } 78 | 79 | private void init() { 80 | try { 81 | JsonObject manifestJson = fetchAndParseJson(); 82 | bucket = manifestJson.get(INGESTION_BUCKET).getAsString(); 83 | 84 | if (bucket.startsWith(AWS_BUCKET_PREFIX)) { 85 | isAwsBucket = true; 86 | String bucketInfo = bucket.substring(AWS_BUCKET_PREFIX.length()); 87 | String[] regionName = bucketInfo.split("/"); 88 | if (regionName.length != 2) { 89 | throw new IllegalArgumentException( 90 | "Ingestion bucket not in correct format of {AWS region}/{name}"); 91 | } 92 | 93 | awsBucketRegion = regionName[0]; 94 | awsBucketName = regionName[1]; 95 | if (manifestJson.get(INGESTION_IDENTITY) == null) { 96 | throw new IllegalArgumentException( 97 | "Ingestion identity must be specified with AWS buckets"); 98 | } else { 99 | awsRole = manifestJson.get(INGESTION_IDENTITY).getAsString(); 100 | } 101 | } 102 | 103 | } catch (IOException e) { 104 | throw new ManifestProcessingRuntimeException("Unable to fetch and parse manifest", e); 105 | } 106 | } 107 | 108 | private JsonObject fetchAndParseJson() throws IOException { 109 | URL url = new URL(manifestUrl); 110 | InputStreamReader manifestReader = new InputStreamReader(url.openStream()); 111 | return new JsonParser().parse(manifestReader).getAsJsonObject(); 112 | } 113 | 114 | protected class ManifestProcessingRuntimeException extends RuntimeException { 115 | 116 | public ManifestProcessingRuntimeException(String message, Throwable cause) { 117 | super(message, cause); 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DateFilterFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 19 | import java.time.Clock; 20 | import java.util.HashMap; 21 | import java.util.Map; 22 | import org.apache.beam.sdk.metrics.Counter; 23 | import org.apache.beam.sdk.metrics.Metrics; 24 | import org.apache.beam.sdk.transforms.DoFn; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | /** A DoFn that filters data shares in a particular time window */ 29 | public class DateFilterFn extends DoFn { 30 | 31 | private static final Logger LOG = LoggerFactory.getLogger(DateFilterFn.class); 32 | 33 | private final Map dateFilterIncluded = new HashMap<>(); 34 | private final Map dateFilterExcluded = new HashMap<>(); 35 | 36 | @ProcessElement 37 | public void processElement(ProcessContext c) { 38 | String metricName = c.element().getDataShareMetadata().getMetricName(); 39 | if (!dateFilterIncluded.containsKey(metricName)) { 40 | dateFilterIncluded.put( 41 | metricName, Metrics.counter(DateFilterFn.class, "dateFilterIncluded_" + metricName)); 42 | dateFilterExcluded.put( 43 | metricName, Metrics.counter(DateFilterFn.class, "dateFilterExcluded_" + metricName)); 44 | } 45 | 46 | if (c.element().getCreatedMs() == null || c.element().getCreatedMs() == 0) { 47 | LOG.warn("Skipping document with no creation timestamp: {}", c.element().getPath()); 48 | return; 49 | } 50 | IngestionPipelineOptions options = c.getPipelineOptions().as(IngestionPipelineOptions.class); 51 | 52 | long startTime = 53 | IngestionPipelineOptions.calculatePipelineStart( 54 | options.getStartTime(), options.getDuration(), 1, Clock.systemUTC()); 55 | long duration = options.getDuration(); 56 | 57 | if (c.element().getCreatedMs() >= startTime * 1000 58 | && c.element().getCreatedMs() < (startTime + duration) * 1000) { 59 | LOG.debug("Included: {}", c.element()); 60 | dateFilterIncluded.get(metricName).inc(); 61 | c.output(c.element()); 62 | } else { 63 | LOG.trace("Excluded: {}", c.element()); 64 | dateFilterExcluded.get(metricName).inc(); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DeletionPipeline.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 18 | 19 | import com.google.exposurenotification.privateanalytics.ingestion.pipeline.FirestoreConnector.FirestorePartitionQueryCreation; 20 | import com.google.firestore.v1.RunQueryResponse; 21 | import com.google.firestore.v1.Write; 22 | import java.time.Clock; 23 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; 24 | import org.apache.beam.sdk.Pipeline; 25 | import org.apache.beam.sdk.PipelineResult; 26 | import org.apache.beam.sdk.io.gcp.firestore.FirestoreIO; 27 | import org.apache.beam.sdk.io.gcp.firestore.RpcQosOptions; 28 | import org.apache.beam.sdk.metrics.MetricResults; 29 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 30 | import org.apache.beam.sdk.transforms.MapElements; 31 | import org.apache.beam.sdk.transforms.SimpleFunction; 32 | import org.slf4j.Logger; 33 | import org.slf4j.LoggerFactory; 34 | 35 | /** Pipeline to delete processed data shares from Firestore. */ 36 | public class DeletionPipeline { 37 | 38 | private static final Logger LOG = LoggerFactory.getLogger(DeletionPipeline.class); 39 | 40 | static void buildDeletionPipeline(IngestionPipelineOptions options, Pipeline pipeline) { 41 | DataflowPipelineOptions dataflowPipelineOptions = options.as(DataflowPipelineOptions.class); 42 | RpcQosOptions.Builder rpcQosOptionsBuilder = RpcQosOptions.newBuilder(); 43 | int maxNumWorkers = dataflowPipelineOptions.getMaxNumWorkers(); 44 | if (maxNumWorkers > 0) { 45 | rpcQosOptionsBuilder.withHintMaxNumWorkers(maxNumWorkers); 46 | } 47 | long startTime = 48 | IngestionPipelineOptions.calculatePipelineStart( 49 | options.getStartTime(), options.getDuration(), 2, Clock.systemUTC()); 50 | pipeline 51 | .apply(new FirestorePartitionQueryCreation(startTime)) 52 | .apply(FirestoreIO.v1().read().partitionQuery().withNameOnlyQuery().build()) 53 | .apply(FirestoreIO.v1().read().runQuery().build()) 54 | .apply(FirestoreConnector.filterRunQueryResponseHasDocument()) 55 | .apply( 56 | MapElements.via( 57 | new SimpleFunction() { 58 | @Override 59 | public Write apply(RunQueryResponse input) { 60 | return Write.newBuilder().setDelete(input.getDocument().getName()).build(); 61 | } 62 | })) 63 | .apply( 64 | FirestoreIO.v1() 65 | .write() 66 | .batchWrite() 67 | .withRpcQosOptions(rpcQosOptionsBuilder.build()) 68 | .build()); 69 | } 70 | 71 | public static void main(String[] args) { 72 | PipelineOptionsFactory.register(IngestionPipelineOptions.class); 73 | IngestionPipelineOptions options = 74 | PipelineOptionsFactory.fromArgs(args).withValidation().as(IngestionPipelineOptions.class); 75 | try { 76 | Pipeline pipeline = Pipeline.create(options); 77 | buildDeletionPipeline(options, pipeline); 78 | PipelineResult result = pipeline.run(); 79 | result.waitUntilFinish(); 80 | MetricResults metrics = result.metrics(); 81 | LOG.info("Metrics:\n\n{}", metrics); 82 | } catch (UnsupportedOperationException ignore) { 83 | // Known issue that this can throw when generating a template: 84 | // https://issues.apache.org/jira/browse/BEAM-9337 85 | } catch (Exception e) { 86 | LOG.error("Exception thrown during pipeline run.", e); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/FirestoreConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.google.firestore.v1.DatabaseRootName; 19 | import com.google.firestore.v1.PartitionQueryRequest; 20 | import com.google.firestore.v1.RunQueryResponse; 21 | import com.google.firestore.v1.StructuredQuery; 22 | import com.google.firestore.v1.StructuredQuery.CollectionSelector; 23 | import com.google.firestore.v1.StructuredQuery.Direction; 24 | import com.google.firestore.v1.StructuredQuery.FieldReference; 25 | import com.google.firestore.v1.StructuredQuery.Order; 26 | import java.net.InetAddress; 27 | import java.net.UnknownHostException; 28 | import java.time.Duration; 29 | import java.time.temporal.ChronoUnit; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import java.util.Locale; 33 | import org.apache.beam.sdk.metrics.Counter; 34 | import org.apache.beam.sdk.metrics.Metrics; 35 | import org.apache.beam.sdk.transforms.Create; 36 | import org.apache.beam.sdk.transforms.DoFn; 37 | import org.apache.beam.sdk.transforms.Filter; 38 | import org.apache.beam.sdk.transforms.PTransform; 39 | import org.apache.beam.sdk.transforms.ParDo; 40 | import org.apache.beam.sdk.transforms.SimpleFunction; 41 | import org.apache.beam.sdk.values.PBegin; 42 | import org.apache.beam.sdk.values.PCollection; 43 | import org.slf4j.Logger; 44 | import org.slf4j.LoggerFactory; 45 | import org.threeten.bp.LocalDateTime; 46 | import org.threeten.bp.ZoneOffset; 47 | import org.threeten.bp.format.DateTimeFormatter; 48 | 49 | /** 50 | * Primitive beam connector for Firestore specific to this application. 51 | * 52 | *

For a general purpose connector see https://issues.apache.org/jira/browse/BEAM-8376 53 | */ 54 | public class FirestoreConnector { 55 | 56 | private static final Logger LOG = LoggerFactory.getLogger(FirestoreConnector.class); 57 | 58 | private static final long SECONDS_IN_HOUR = Duration.of(1, ChronoUnit.HOURS).getSeconds(); 59 | 60 | // Order must be name ascending. Right now, this is the only ordering that the 61 | // Firestore SDK supports. 62 | private static final String NAME_FIELD = "__name__"; 63 | 64 | private static final Counter queriesGenerated = 65 | Metrics.counter(FirestoreConnector.class, "queriesGenerated"); 66 | 67 | /** 68 | * PTransfrom which will generate the necessary PartitionQueryRequests for processing documents 69 | */ 70 | public static final class FirestorePartitionQueryCreation 71 | extends PTransform> { 72 | private final long start; 73 | 74 | public FirestorePartitionQueryCreation(long start) { 75 | this.start = start; 76 | } 77 | 78 | @Override 79 | public PCollection expand(PBegin input) { 80 | IngestionPipelineOptions options = 81 | (IngestionPipelineOptions) input.getPipeline().getOptions(); 82 | LOG.info("Using start time in seconds of {}", start); 83 | long backwardHours = options.getGraceHoursBackwards(); 84 | // To correctly compute how many hours forward we need to look at, when including the 85 | // duration, we need to compute: 86 | // ceil ( forwardHours + durationInSeconds / 3600 ) 87 | // Because Java division rounds down, we compute it as: 88 | // forwardHours + ( duration + 3599 ) / 3600. 89 | long forwardHours = 90 | options.getGraceHoursForwards() 91 | + (options.getDuration() + (SECONDS_IN_HOUR - 1)) / SECONDS_IN_HOUR; 92 | LOG.info( 93 | "{} Querying Firestore for documents in date range: {} to {}.", 94 | getLogPrefix(), 95 | formatDateTime(start - backwardHours * SECONDS_IN_HOUR), 96 | formatDateTime(start + forwardHours * SECONDS_IN_HOUR)); 97 | 98 | return input 99 | .apply("Begin", Create.of(generateQueries(start, backwardHours, forwardHours))) 100 | .apply( 101 | "Create PartitionQuery", 102 | ParDo.of( 103 | new DoFn() { 104 | @ProcessElement 105 | public void processElement(ProcessContext context) { 106 | IngestionPipelineOptions options = 107 | context.getPipelineOptions().as(IngestionPipelineOptions.class); 108 | String path = 109 | "".equals(options.getFirestoreProject()) 110 | ? getParentPath(options.getProject()) 111 | : getParentPath(options.getFirestoreProject()); 112 | PartitionQueryRequest request = 113 | PartitionQueryRequest.newBuilder() 114 | .setPartitionCount(options.getPartitionCount()) 115 | .setParent(path) 116 | .setStructuredQuery(context.element()) 117 | .build(); 118 | context.output(request); 119 | } 120 | })); 121 | } 122 | } 123 | 124 | /** 125 | * If a query has zero results, a {@link RunQueryResponse} without a document will still be 126 | * returned. Provide a filter which can be used to filter to only {@code RunQueryResponse} which 127 | * have documents. 128 | */ 129 | static Filter filterRunQueryResponseHasDocument() { 130 | return Filter.by( 131 | new SimpleFunction() { 132 | @Override 133 | public Boolean apply(RunQueryResponse input) { 134 | return input.hasDocument(); 135 | } 136 | }); 137 | } 138 | 139 | private static Iterable generateQueries( 140 | long startTime, long backwardHours, long forwardHours) { 141 | List structuredQueries = new ArrayList<>(); 142 | // Each document in Firestore is stored under a Date collection with the format: 143 | // yyyy-MM-dd-HH. 144 | // To query all documents uploaded around startTime within the specified window, construct 145 | // a query for each hour within the window: [startTime - backwardHours, startTime + 146 | // forwardHours]. 147 | for (long i = (-1 * backwardHours); i <= forwardHours; i++) { 148 | long timeToQuery = startTime + i * SECONDS_IN_HOUR; 149 | // Reformat the date to mirror the format of documents in Firestore: yyyy-MM-dd-HH. 150 | String formattedDateTime = formatDateTime(timeToQuery); 151 | // Construct and output query. 152 | StructuredQuery query = 153 | StructuredQuery.newBuilder() 154 | .addFrom( 155 | CollectionSelector.newBuilder() 156 | .setCollectionId(formattedDateTime) 157 | .setAllDescendants(true) 158 | .build()) 159 | .addOrderBy( 160 | Order.newBuilder() 161 | .setField(FieldReference.newBuilder().setFieldPath(NAME_FIELD).build()) 162 | .setDirection(Direction.ASCENDING) 163 | .build()) 164 | .build(); 165 | structuredQueries.add(query); 166 | queriesGenerated.inc(); 167 | } 168 | LOG.info("{} Generated {} Firestore queries.", getLogPrefix(), structuredQueries.size()); 169 | return structuredQueries; 170 | } 171 | 172 | private static String getParentPath(String projectId) { 173 | return DatabaseRootName.format(projectId, "(default)") + "/documents"; 174 | } 175 | 176 | // Formats a time given in epoch seconds in the format: yyyy-MM-dd-HH 177 | public static String formatDateTime(Long time) { 178 | LocalDateTime dateTimeToQuery = LocalDateTime.ofEpochSecond(time, 0, ZoneOffset.UTC); 179 | // Reformat the date to mirror the format of documents in Firestore: yyyy-MM-dd-HH. 180 | DateTimeFormatter formatter = 181 | DateTimeFormatter.ofPattern("yyyy-MM-dd-HH", Locale.US).withZone(ZoneOffset.UTC); 182 | return formatter.format(dateTimeToQuery); 183 | } 184 | 185 | // TODO: use org.slf4j.MDC (mapped diagnostic content) or something cooler here 186 | private static String getLogPrefix() { 187 | String host = "unknown"; 188 | try { 189 | InetAddress address = InetAddress.getLocalHost(); 190 | host = address.getHostName(); 191 | } catch (UnknownHostException ignore) { 192 | } 193 | return "[" 194 | + host 195 | + "|" 196 | + ProcessHandle.current().pid() 197 | + "|" 198 | + Thread.currentThread().getName() 199 | + "] - "; 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/IngestionPipelineOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.amazonaws.auth.AWSCredentialsProvider; 19 | import java.time.Clock; 20 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; 21 | import org.apache.beam.sdk.io.aws.options.AwsOptions; 22 | import org.apache.beam.sdk.options.Default; 23 | import org.apache.beam.sdk.options.Description; 24 | 25 | /** Specific options for the pipeline. */ 26 | public interface IngestionPipelineOptions extends DataflowPipelineOptions { 27 | 28 | int UNSPECIFIED_START = -1; 29 | 30 | /** Firestore Project */ 31 | @Description("Firestore Project") 32 | @Default.String("") 33 | String getFirestoreProject(); 34 | 35 | void setFirestoreProject(String value); 36 | 37 | /** PHA Manifest file URL. */ 38 | @Description("PHA Manifest file URL") 39 | @Default.String("") 40 | String getPhaManifestURL(); 41 | 42 | void setPhaManifestURL(String value); 43 | 44 | /** PHA AWS bucket region. */ 45 | @Description("PHA AWS bucket region") 46 | @Default.String("") 47 | String getPhaAwsBucketRegion(); 48 | 49 | void setPhaAwsBucketRegion(String value); 50 | 51 | /** PHA AWS bucket name. */ 52 | @Description("PHA AWS bucket name") 53 | @Default.String("") 54 | String getPhaAwsBucketName(); 55 | 56 | void setPhaAwsBucketName(String value); 57 | 58 | /** PHA AWS bucket role. */ 59 | @Description("PHA AWS bucket role") 60 | @Default.String("") 61 | String getPhaAwsBucketRole(); 62 | 63 | void setPhaAwsBucketRole(String value); 64 | 65 | /** 66 | * Directory to place output files for PHA. If the directory does not exist, then it will 67 | * automatically be created. 68 | * 69 | *

If set, this flag overrides an output location set in the PHA manifest file. 70 | */ 71 | @Description( 72 | "Directory to place output files for PHA (Should end in 2-letter state abbreviation).") 73 | @Default.String("") 74 | String getPhaOutput(); 75 | 76 | void setPhaOutput(String value); 77 | 78 | /** Facilitator Manifest file URL. */ 79 | @Description("Facilitator Manifest file URL") 80 | @Default.String("") 81 | String getFacilitatorManifestURL(); 82 | 83 | void setFacilitatorManifestURL(String value); 84 | 85 | /** Facilitator AWS bucket region. */ 86 | @Description("Facilitator AWS bucket region") 87 | @Default.String("") 88 | String getFacilitatorAwsBucketRegion(); 89 | 90 | void setFacilitatorAwsBucketRegion(String value); 91 | 92 | /** Facilitator AWS bucket name. */ 93 | @Description("Facilitator AWS bucket name") 94 | @Default.String("") 95 | String getFacilitatorAwsBucketName(); 96 | 97 | void setFacilitatorAwsBucketName(String value); 98 | 99 | /** Facilitator AWS bucket role. */ 100 | @Description("Facilitator AWS bucket role") 101 | @Default.String("") 102 | String getFacilitatorAwsBucketRole(); 103 | 104 | void setFacilitatorAwsBucketRole(String value); 105 | 106 | /** 107 | * Directory to place output files for Facilitator. If the directory does not exist, then it will 108 | * automatically be created. 109 | * 110 | *

If set, this flag overrides an output location set in the Facilitator manifest file. 111 | */ 112 | @Description( 113 | "Directory to place output files for Facilitator. (Should end in 2-letter state" 114 | + " abbreviation)") 115 | @Default.String("") 116 | String getFacilitatorOutput(); 117 | 118 | void setFacilitatorOutput(String value); 119 | 120 | /** 121 | * Start time of window to process. Used to filter documents that have been read from Firestore on 122 | * the "Creation" field. Defaults to current time rounded down to previous alignment period based 123 | * on the duration. 124 | */ 125 | @Description("Start time in UTC seconds of documents to process") 126 | @Default.Long(UNSPECIFIED_START) 127 | Long getStartTime(); 128 | 129 | void setStartTime(Long value); 130 | 131 | /** 132 | * Duration of time window to process. Used to filter documents that have been read from Firestore 133 | * on the "Creation" field. 134 | */ 135 | @Description("Duration of window in seconds") 136 | @Default.Long(3600) 137 | Long getDuration(); 138 | 139 | void setDuration(Long value); 140 | 141 | /** 142 | * Hours to look before startTime when querying Firestore collection. Used to construct document 143 | * path for Firestore reads. 144 | */ 145 | @Description( 146 | "Hours to read backwards from startTime. Used to construct document path for Firestore" 147 | + " reads.") 148 | @Default.Long(1) 149 | Long getGraceHoursBackwards(); 150 | 151 | void setGraceHoursBackwards(Long value); 152 | 153 | /** 154 | * Hours to look before startTime when querying Firestore. Used to construct document path for 155 | * Firestore reads. 156 | */ 157 | @Description( 158 | "Hours to read forward from startTime. Used to construct document path for Firestore" 159 | + " reads.") 160 | @Default.Long(1) 161 | Long getGraceHoursForwards(); 162 | 163 | void setGraceHoursForwards(Long value); 164 | 165 | /** Maximum number of query partitions to create for running Firestore read. */ 166 | @Description("Maximum number of partitions to create for Firestore query.") 167 | @Default.Long(20) 168 | Long getPartitionCount(); 169 | 170 | void setPartitionCount(Long value); 171 | 172 | /** Batch size of individual files. */ 173 | @Description("Batch size of individual files.") 174 | @Default.Long(200000) 175 | Long getBatchSize(); 176 | 177 | void setBatchSize(Long value); 178 | 179 | /** Batch size of Firestore batch deletes. */ 180 | @Description("Batch size of Firestore deletes.") 181 | @Default.Long(100) 182 | Long getDeleteBatchSize(); 183 | 184 | void setDeleteBatchSize(Long value); 185 | 186 | /** 187 | * Signing key resource name. See https://cloud.google.com/kms/docs/resource-hierarchy E.g., 188 | * projects/$PROJECT_NAME/locations/global/keyRings/$RING/cryptoKeys/$KEY/cryptoKeyVersions/$VERSION 189 | */ 190 | @Description("KMS resource name for signature generation") 191 | @Default.String("") 192 | String getKeyResourceName(); 193 | 194 | void setKeyResourceName(String value); 195 | 196 | /** Whether to check device hardware attestations */ 197 | @Description("Verify device attestations") 198 | @Default.Boolean(true) 199 | Boolean getDeviceAttestation(); 200 | 201 | void setDeviceAttestation(Boolean value); 202 | 203 | @Description("AWS region used by the AWS client") 204 | String getAwsRegion(); 205 | 206 | void setAwsRegion(String value); 207 | 208 | @Description( 209 | "The credential instance that should be used to authenticate against AWS services. The option value must contain \"@type\" field and an AWS Credentials Provider class name as the field value. Refer to DefaultAWSCredentialsProviderChain Javadoc for usage help. For example, to specify the AWS key ID and secret, specify the following: {\"@type\": \"AWSStaticCredentialsProvider\", \"awsAccessKeyId\":\"\", \"awsSecretKey\":\"\"}") 210 | @Default.InstanceFactory(AwsOptions.AwsUserCredentialsFactory.class) 211 | AWSCredentialsProvider getAwsCredentialsProvider(); 212 | 213 | void setAwsCredentialsProvider(AWSCredentialsProvider value); 214 | 215 | /** 216 | * @return {@code startTime} from options/flags if set. Otherwise, rounds current time down to 217 | * start of {@code numWindows} windows back of length {@code duration} option/flag. 218 | */ 219 | static long calculatePipelineStart(long start, long duration, int numWindows, Clock clock) { 220 | if (start != UNSPECIFIED_START) { 221 | return start; 222 | } 223 | return ((clock.instant().getEpochSecond() / duration) - numWindows) * duration; 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/main/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/PrioSerializationHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 19 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata; 20 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.EncryptedShare; 21 | import java.io.ByteArrayOutputStream; 22 | import java.io.File; 23 | import java.io.IOException; 24 | import java.lang.reflect.InvocationTargetException; 25 | import java.nio.ByteBuffer; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.UUID; 29 | import org.abetterinternet.prio.v1.PrioDataSharePacket; 30 | import org.abetterinternet.prio.v1.PrioIngestionHeader; 31 | import org.apache.avro.Schema; 32 | import org.apache.avro.file.DataFileReader; 33 | import org.apache.avro.file.DataFileWriter; 34 | import org.apache.avro.io.DatumReader; 35 | import org.apache.avro.io.DatumWriter; 36 | import org.apache.avro.specific.SpecificDatumReader; 37 | import org.apache.avro.specific.SpecificDatumWriter; 38 | import org.apache.avro.specific.SpecificRecordBase; 39 | import org.apache.avro.util.Utf8; 40 | import org.slf4j.Logger; 41 | import org.slf4j.LoggerFactory; 42 | 43 | /** 44 | * Helpers for serializing and deserializing Prio data shares into (or from) the Apache Avro file 45 | * format. 46 | */ 47 | public class PrioSerializationHelper { 48 | 49 | private PrioSerializationHelper() {} 50 | 51 | private static final Logger LOG = LoggerFactory.getLogger(PrioSerializationHelper.class); 52 | 53 | public static ByteBuffer serializeRecords( 54 | List records, Class recordClass, Schema schema) throws IOException { 55 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 56 | DatumWriter dataShareDatumWriter = new SpecificDatumWriter<>(recordClass); 57 | try (DataFileWriter dataFileWriter = new DataFileWriter<>(dataShareDatumWriter)) { 58 | dataFileWriter.create(schema, outputStream); 59 | 60 | for (T record : records) { 61 | dataFileWriter.append(record); 62 | } 63 | 64 | dataFileWriter.flush(); 65 | dataFileWriter.close(); 66 | } 67 | return ByteBuffer.wrap(outputStream.toByteArray()); 68 | } 69 | 70 | public static List deserializeRecords( 71 | Class recordClass, String pathname) 72 | throws IOException, IllegalAccessException, InstantiationException { 73 | DatumReader datumReader = new SpecificDatumReader<>(recordClass); 74 | List results = new ArrayList<>(); 75 | try (DataFileReader dataFileReader = new DataFileReader<>(new File(pathname), datumReader)) { 76 | T record; 77 | while (dataFileReader.hasNext()) { 78 | try { 79 | record = recordClass.getDeclaredConstructor().newInstance(); 80 | record = dataFileReader.next(record); 81 | results.add(record); 82 | } catch (InvocationTargetException | NoSuchMethodException e) { 83 | LOG.error("PrioSerializationHelper Record instance creation error:", e); 84 | } 85 | } 86 | } 87 | return results; 88 | } 89 | 90 | public static PrioIngestionHeader createHeader( 91 | DataShareMetadata metadata, byte[] digest, UUID uuid, long startTime, long duration) { 92 | return PrioIngestionHeader.newBuilder() 93 | .setBatchUuid(new Utf8(uuid.toString())) 94 | .setName(new Utf8(metadata.getMetricName())) 95 | .setBatchStartTime(startTime) 96 | .setBatchEndTime(startTime + duration) 97 | .setNumberOfServers(metadata.getNumberOfServers()) 98 | .setBins(metadata.getBins()) 99 | .setHammingWeight(metadata.getHammingWeight()) 100 | .setPrime(metadata.getPrime()) 101 | .setEpsilon(metadata.getEpsilon()) 102 | .setPacketFileDigest(ByteBuffer.wrap(digest)) 103 | .build(); 104 | } 105 | 106 | public static List splitPackets(DataShare dataShare) { 107 | List encryptedDataShares = dataShare.getEncryptedDataShares(); 108 | List splitDataShares = new ArrayList<>(); 109 | for (EncryptedShare encryptedShare : encryptedDataShares) { 110 | splitDataShares.add( 111 | PrioDataSharePacket.newBuilder() 112 | .setEncryptedPayload(ByteBuffer.wrap(encryptedShare.getEncryptedPayload())) 113 | .setEncryptionKeyId(null) 114 | .setRPit(dataShare.getRPit()) 115 | .setUuid(dataShare.getUuid()) 116 | .setVersionConfiguration(null) 117 | .setDeviceNonce(null) 118 | .build()); 119 | } 120 | return splitDataShares; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/proto/analytics.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | syntax = "proto2"; 16 | 17 | package com.google.exposurenotification.privateanalytics.ingestion; 18 | 19 | option java_multiple_files = true; 20 | 21 | // Parameters for the Prio algorithm https://crypto.stanford.edu/prio/ 22 | message PrioAlgorithmParameters { 23 | // Number of bins for this aggregation. 24 | optional int32 bins = 1; 25 | 26 | optional double epsilon = 2; 27 | 28 | // The value of prime p used in this aggregation. The prime needs to be 29 | // congruent to 1 modulo 2 * next_power_two(bins + 1) to support Fourier 30 | // transform. 31 | // The aggregation will be performed modulo this prime number. Usually, the 32 | // is chosen large enough so that the aggregation happens over the integers. 33 | // 34 | optional uint64 prime = 3; 35 | // The number of servers that will be involved in the aggregation. 36 | // 37 | // Currently, the library only supports two servers. 38 | optional int32 number_servers = 4; 39 | 40 | // If specified, the Hamming weight of the vector will be verified during the 41 | // validity check on the server. Furthermore, the differential privacy noise 42 | // be selected by replacing the input with a random "hamming_weight"-hot 43 | // vector with probability 1/(1+e^epsilon). 44 | // If not specified, the input will be randomized element-wise. 45 | optional int32 hamming_weight = 5; 46 | } 47 | 48 | message ResponseStatus { 49 | // NEXT ID: 4 50 | enum StatusCode { 51 | // Operation experienced an unknown failure 52 | UNKNOWN_FAILURE = 0; 53 | 54 | // Operation finished successfully 55 | OK = 1; 56 | 57 | // Operation was cancelled 58 | CANCELLED_FAILURE = 2; 59 | 60 | // Operation failed due to an invalid parameter error 61 | INVALID_PARAMETER_FAILURE = 3; 62 | } 63 | 64 | // The status code of the response 65 | optional StatusCode status_code = 1 [default = UNKNOWN_FAILURE]; 66 | 67 | // Detailed error message 68 | optional string error_details = 2; 69 | } 70 | 71 | // Response message for packet creation 72 | message CreatePacketsResponse { 73 | // Status of the response 74 | optional ResponseStatus response_status = 1; 75 | 76 | // A list of datashares to be dispatched to server(s). 77 | repeated bytes shares = 2; 78 | } 79 | 80 | // Parameters for packet creation 81 | message CreatePacketsParameters { 82 | // Parameters for Prio. 83 | optional PrioAlgorithmParameters prio_parameters = 1; 84 | 85 | // Public keys of the servers, represented as a Base64 encoded string. 86 | repeated string public_keys = 2; 87 | 88 | // Input data. Represented as an array of binary bits in an uint32 array. 89 | repeated uint32 data_bits = 3; 90 | } 91 | 92 | message Payload { 93 | optional string uuid = 1; 94 | optional string created = 2 [deprecated = true]; 95 | optional PrioAlgorithmParameters prio_params = 3; 96 | optional int32 schema_version = 4; 97 | optional CreatePacketsResponse packets_response = 5; 98 | } -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DataProcessorManifestTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | 19 | import java.net.MalformedURLException; 20 | import java.net.URL; 21 | import org.junit.Assert; 22 | import org.junit.Test; 23 | import org.junit.runner.RunWith; 24 | import org.junit.runners.JUnit4; 25 | 26 | /** Unit tests for {@link DataProcessorManifest}. */ 27 | @RunWith(JUnit4.class) 28 | public class DataProcessorManifestTest { 29 | 30 | @Test 31 | public void testParsing() { 32 | URL manifestUrl = 33 | getClass() 34 | .getResource( 35 | "/com/google/exposurenotification/privateanalytics/ingestion/pipeline/test-manifest.json"); 36 | DataProcessorManifest manifest = new DataProcessorManifest(manifestUrl.toString()); 37 | assertThat(manifest.getIngestionBucket()) 38 | .isEqualTo("s3://us-west-1/prio-demo-gcp-test-pha-1-ingestor-1-ingestion"); 39 | assertThat(manifest.getAwsBucketRegion()).isEqualTo("us-west-1"); 40 | assertThat(manifest.getAwsBucketName()) 41 | .isEqualTo("prio-demo-gcp-test-pha-1-ingestor-1-ingestion"); 42 | assertThat(manifest.getAwsRole()) 43 | .isEqualTo("arn:aws:iam::12345678:role/AWSRoleAssumedByGCPSvcAcc"); 44 | } 45 | 46 | @Test 47 | public void testInvalidURLParsing() throws MalformedURLException { 48 | URL manifestUrl = new URL("http://nothing/to/see/here"); 49 | Assert.assertThrows( 50 | DataProcessorManifest.ManifestProcessingRuntimeException.class, 51 | () -> new DataProcessorManifest(manifestUrl.toString())); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DateFilterFnTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 19 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata; 20 | import java.util.Arrays; 21 | import java.util.Collections; 22 | import java.util.List; 23 | import org.apache.beam.sdk.testing.PAssert; 24 | import org.apache.beam.sdk.testing.TestPipeline; 25 | import org.apache.beam.sdk.testing.ValidatesRunner; 26 | import org.apache.beam.sdk.transforms.Create; 27 | import org.apache.beam.sdk.transforms.ParDo; 28 | import org.apache.beam.sdk.values.PCollection; 29 | import org.junit.Rule; 30 | import org.junit.Test; 31 | import org.junit.experimental.categories.Category; 32 | import org.junit.runner.RunWith; 33 | import org.junit.runners.JUnit4; 34 | 35 | /** Unit tests for {@link DateFilterFn}. */ 36 | @RunWith(JUnit4.class) 37 | public class DateFilterFnTest { 38 | 39 | public transient IngestionPipelineOptions options = 40 | TestPipeline.testingPipelineOptions().as(IngestionPipelineOptions.class); 41 | 42 | @Rule public final transient TestPipeline pipeline = TestPipeline.fromOptions(options); 43 | 44 | @Test 45 | @Category(ValidatesRunner.class) 46 | public void testDateFilter() { 47 | DataShareMetadata meta = DataShareMetadata.builder().setMetricName("sampleMetric").build(); 48 | List dataShares = 49 | Arrays.asList( 50 | DataShare.builder() 51 | .setPath("id1") 52 | .setCreatedMs(1000L) 53 | .setDataShareMetadata(meta) 54 | .build(), 55 | DataShare.builder() 56 | .setPath("id2") 57 | .setCreatedMs(2000L) 58 | .setDataShareMetadata(meta) 59 | .build(), 60 | DataShare.builder() 61 | .setPath("id3") 62 | .setCreatedMs(3000L) 63 | .setDataShareMetadata(meta) 64 | .build(), 65 | DataShare.builder().setPath("missing").setDataShareMetadata(meta).build()); 66 | 67 | options.setStartTime(2L); 68 | options.setDuration(1L); 69 | options.setDeviceAttestation(false); 70 | 71 | PCollection input = pipeline.apply(Create.of(dataShares)); 72 | 73 | PCollection output = input.apply(ParDo.of(new DateFilterFn())); 74 | 75 | PAssert.that(output) 76 | .containsInAnyOrder( 77 | Collections.singletonList( 78 | DataShare.builder() 79 | .setPath("id2") 80 | .setCreatedMs(2000L) 81 | .setDataShareMetadata(meta) 82 | .build())); 83 | pipeline.run().waitUntilFinish(); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/DeletionPipelineIT.java: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | import static com.google.exposurenotification.privateanalytics.ingestion.pipeline.FirestoreConnector.formatDateTime; 19 | import static org.junit.Assert.assertThrows; 20 | 21 | import com.google.api.core.ApiFutures; 22 | import com.google.api.gax.core.FixedCredentialsProvider; 23 | import com.google.api.gax.rpc.NotFoundException; 24 | import com.google.auth.oauth2.GoogleCredentials; 25 | import com.google.cloud.firestore.v1.FirestoreClient; 26 | import com.google.cloud.firestore.v1.FirestoreClient.ListDocumentsPagedResponse; 27 | import com.google.cloud.firestore.v1.FirestoreSettings; 28 | import com.google.common.collect.Iterables; 29 | import com.google.common.collect.Streams; 30 | import com.google.firestore.v1.BatchWriteRequest; 31 | import com.google.firestore.v1.DatabaseRootName; 32 | import com.google.firestore.v1.Document; 33 | import com.google.firestore.v1.GetDocumentRequest; 34 | import com.google.firestore.v1.ListDocumentsRequest; 35 | import com.google.firestore.v1.Write; 36 | import java.io.IOException; 37 | import java.util.ArrayList; 38 | import java.util.List; 39 | import java.util.UUID; 40 | import java.util.concurrent.ExecutionException; 41 | import java.util.concurrent.ThreadLocalRandom; 42 | import java.util.concurrent.TimeUnit; 43 | import java.util.stream.Collectors; 44 | import java.util.stream.IntStream; 45 | import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; 46 | import org.apache.beam.sdk.PipelineResult; 47 | import org.apache.beam.sdk.metrics.MetricNameFilter; 48 | import org.apache.beam.sdk.metrics.MetricsFilter; 49 | import org.apache.beam.sdk.testing.NeedsRunner; 50 | import org.apache.beam.sdk.testing.TestPipeline; 51 | import org.junit.After; 52 | import org.junit.Before; 53 | import org.junit.Rule; 54 | import org.junit.Test; 55 | import org.junit.experimental.categories.Category; 56 | import org.junit.runner.RunWith; 57 | import org.junit.runners.JUnit4; 58 | 59 | /** Integration tests for {@link DeletionPipeline}. */ 60 | @RunWith(JUnit4.class) 61 | public class DeletionPipelineIT { 62 | 63 | // Randomize document creation time to avoid collisions between simultaneously running tests. 64 | // FirestoreReader will query all documents with created times within one hour of this time. 65 | static final long CREATION_TIME = ThreadLocalRandom.current().nextLong(0L, 1500000000L); 66 | static final long DURATION = 10800L; 67 | static final String PROJECT = System.getenv("PROJECT"); 68 | // Randomize test collection name to avoid collisions between simultaneously running tests. 69 | static final String TEST_COLLECTION_NAME = 70 | "uuid" + UUID.randomUUID().toString().replace("-", "_"); 71 | static final String KEY_RESOURCE_NAME = System.getenv("KEY_RESOURCE_NAME"); 72 | static final DatabaseRootName DATABASE_ROOT_NAME = DatabaseRootName.of(PROJECT, "(default)"); 73 | static final String BASE_COLLECTION_NAME = 74 | String.format("%s/documents/%s", DATABASE_ROOT_NAME, TEST_COLLECTION_NAME); 75 | 76 | List documentNames; 77 | FirestoreClient client; 78 | 79 | public transient IngestionPipelineOptions testOptions = 80 | TestPipeline.testingPipelineOptions().as(IngestionPipelineOptions.class); 81 | 82 | @Rule public final transient TestPipeline testPipeline = TestPipeline.fromOptions(testOptions); 83 | 84 | @Before 85 | public void setUp() throws IOException { 86 | documentNames = new ArrayList<>(); 87 | client = getFirestoreClient(); 88 | } 89 | 90 | @After 91 | public void tearDown() throws ExecutionException, InterruptedException { 92 | cleanUpParentResources(client); 93 | FirestoreClientTestUtils.shutdownFirestoreClient(client); 94 | } 95 | 96 | @Test 97 | @Category(NeedsRunner.class) 98 | public void testFirestoreDeleterDeletesDocs() throws InterruptedException { 99 | testOptions.as(DataflowPipelineOptions.class).setMaxNumWorkers(1); 100 | testOptions.setStartTime(CREATION_TIME); 101 | testOptions.setProject(PROJECT); 102 | testOptions.setDuration(DURATION); 103 | testOptions.setKeyResourceName(KEY_RESOURCE_NAME); 104 | int numDocs = 500; 105 | seedDatabase(numDocs); 106 | 107 | DeletionPipeline.buildDeletionPipeline(testOptions, testPipeline); 108 | PipelineResult result = testPipeline.run(); 109 | result.waitUntilFinish(); 110 | 111 | // Assert that processed documents have been deleted. 112 | documentNames.forEach( 113 | name -> 114 | assertThrows(NotFoundException.class, () -> fetchDocumentFromFirestore(name, client))); 115 | MetricNameFilter documentsDeletedMetricName = 116 | MetricNameFilter.named( 117 | "org.apache.beam.sdk.io.gcp.firestore.FirestoreV1.BatchWrite", "writes_successful"); 118 | long documentsDeleted = 119 | result 120 | .metrics() 121 | .queryMetrics(MetricsFilter.builder().addNameFilter(documentsDeletedMetricName).build()) 122 | .getCounters() 123 | .iterator() 124 | .next() 125 | .getCommitted(); 126 | assertThat(documentsDeleted).isEqualTo(numDocs); 127 | } 128 | 129 | private static FirestoreClient getFirestoreClient() throws IOException { 130 | FirestoreSettings settings = 131 | FirestoreSettings.newBuilder() 132 | .setCredentialsProvider( 133 | FixedCredentialsProvider.create(GoogleCredentials.getApplicationDefault())) 134 | .build(); 135 | return FirestoreClient.create(settings); 136 | } 137 | 138 | private static void cleanUpParentResources(FirestoreClient client) 139 | throws ExecutionException, InterruptedException { 140 | ListDocumentsPagedResponse documents = 141 | client.listDocuments( 142 | ListDocumentsRequest.newBuilder() 143 | .setParent("projects/" + PROJECT + "/databases/(default)/documents") 144 | .setCollectionId(TEST_COLLECTION_NAME) 145 | .build()); 146 | ApiFutures.allAsList( 147 | Streams.stream(Iterables.partition(documents.iterateAll(), 500)) 148 | .map( 149 | docs -> 150 | docs.stream() 151 | .map(Document::getName) 152 | .map(name -> Write.newBuilder().setDelete(name).build()) 153 | .collect(Collectors.toList())) 154 | .map(DeletionPipelineIT::getBatchWriteRequest) 155 | .map(request -> client.batchWriteCallable().futureCall(request)) 156 | .collect(Collectors.toList())) 157 | .get(); 158 | } 159 | 160 | private static Document fetchDocumentFromFirestore(String path, FirestoreClient client) { 161 | return client.getDocument(GetDocumentRequest.newBuilder().setName(path).build()); 162 | } 163 | 164 | private void seedDatabase(int numDocsToSeed) throws InterruptedException { 165 | // Adding a wait here to give the Firestore instance time to initialize before attempting 166 | // to connect. 167 | TimeUnit.SECONDS.sleep(1); 168 | documentNames = 169 | IntStream.rangeClosed(1, numDocsToSeed) 170 | .mapToObj( 171 | i -> 172 | String.format( 173 | "%s/testDoc%05d/%s/metric1", 174 | BASE_COLLECTION_NAME, i, formatDateTime(CREATION_TIME))) 175 | .collect(Collectors.toList()); 176 | 177 | List batchWriteRequests = 178 | Streams.stream(Iterables.partition(documentNames, 500)) 179 | .map( 180 | names -> 181 | names.stream() 182 | .map( 183 | name -> 184 | Write.newBuilder() 185 | .setUpdate(Document.newBuilder().setName(name).build()) 186 | .build()) 187 | .collect(Collectors.toList())) 188 | .map(DeletionPipelineIT::getBatchWriteRequest) 189 | .collect(Collectors.toList()); 190 | 191 | for (BatchWriteRequest batchWriteRequest : batchWriteRequests) { 192 | client.batchWrite(batchWriteRequest); 193 | } 194 | } 195 | 196 | private static BatchWriteRequest getBatchWriteRequest(List writes) { 197 | return BatchWriteRequest.newBuilder() 198 | .setDatabase(DATABASE_ROOT_NAME.toString()) 199 | .addAllWrites(writes) 200 | .build(); 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/FirestoreClientTestUtils.java: -------------------------------------------------------------------------------- 1 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 2 | 3 | import com.google.cloud.firestore.v1.FirestoreClient; 4 | import java.time.Duration; 5 | import java.util.concurrent.TimeUnit; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | public final class FirestoreClientTestUtils { 10 | private static final Logger LOG = LoggerFactory.getLogger(FirestoreClientTestUtils.class); 11 | 12 | static final Duration FIRESTORE_SHUTDOWN_TIMEOUT = Duration.ofSeconds(30); 13 | 14 | static void shutdownFirestoreClient(FirestoreClient client) { 15 | client.shutdown(); 16 | LOG.info("Waiting for FirestoreClient to shutdown."); 17 | try { 18 | client.awaitTermination(FIRESTORE_SHUTDOWN_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS); 19 | } catch (InterruptedException e) { 20 | LOG.warn("Interrupted while waiting for client shutdown", e); 21 | Thread.currentThread().interrupt(); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/IngestionPipelineOptionsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import static com.google.common.truth.Truth.assertThat; 19 | 20 | import java.time.Clock; 21 | import java.time.Instant; 22 | import java.time.ZoneId; 23 | import org.junit.Test; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.JUnit4; 26 | 27 | /** Unit tests for {@link IngestionPipelineOptions}. */ 28 | @RunWith(JUnit4.class) 29 | public class IngestionPipelineOptionsTest { 30 | 31 | @Test 32 | public void testCalculatePipelineStart() { 33 | assertThat(IngestionPipelineOptions.calculatePipelineStart(123, 5, 1, Clock.systemUTC())) 34 | .isEqualTo(123); 35 | assertThat(IngestionPipelineOptions.calculatePipelineStart(123, 5, 4, Clock.systemUTC())) 36 | .isEqualTo(123); 37 | assertThat( 38 | IngestionPipelineOptions.calculatePipelineStart( 39 | IngestionPipelineOptions.UNSPECIFIED_START, 40 | 10, 41 | 1, 42 | Clock.fixed(Instant.ofEpochSecond(32), ZoneId.systemDefault()))) 43 | .isEqualTo(20); 44 | assertThat( 45 | IngestionPipelineOptions.calculatePipelineStart( 46 | IngestionPipelineOptions.UNSPECIFIED_START, 47 | 10, 48 | 2, 49 | Clock.fixed(Instant.ofEpochSecond(32), ZoneId.systemDefault()))) 50 | .isEqualTo(10); 51 | assertThat( 52 | IngestionPipelineOptions.calculatePipelineStart( 53 | IngestionPipelineOptions.UNSPECIFIED_START, 54 | 10, 55 | 1, 56 | Clock.fixed(Instant.ofEpochSecond(20), ZoneId.systemDefault()))) 57 | .isEqualTo(10); 58 | assertThat( 59 | IngestionPipelineOptions.calculatePipelineStart( 60 | IngestionPipelineOptions.UNSPECIFIED_START, 61 | // default ingestion pipeline window 62 | // https://github.com/google/exposure-notifications-private-analytics-ingestion/blob/ebf484edf5969d2b7113534db7450f61a937ecf0/terraform/variables.tf#L79 63 | 3600, 64 | 1, 65 | Clock.fixed(Instant.ofEpochSecond(1608067718), ZoneId.of("UTC")))) 66 | .isEqualTo(1608062400); 67 | assertThat( 68 | IngestionPipelineOptions.calculatePipelineStart( 69 | IngestionPipelineOptions.UNSPECIFIED_START, 70 | // default deletion pipeline window 71 | // https://github.com/google/exposure-notifications-private-analytics-ingestion/blob/ebf484edf5969d2b7113534db7450f61a937ecf0/terraform/variables.tf#L91 72 | 43200, 73 | 2, 74 | Clock.fixed(Instant.ofEpochSecond(1608033600), ZoneId.of("UTC")))) 75 | .isEqualTo(1607947200); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/IngestionPipelineTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 19 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.DataShareMetadata; 20 | import java.util.ArrayList; 21 | import java.util.Arrays; 22 | import java.util.Collections; 23 | import java.util.List; 24 | import org.apache.beam.sdk.testing.PAssert; 25 | import org.apache.beam.sdk.testing.TestPipeline; 26 | import org.apache.beam.sdk.testing.ValidatesRunner; 27 | import org.apache.beam.sdk.transforms.Count; 28 | import org.apache.beam.sdk.transforms.Create; 29 | import org.apache.beam.sdk.transforms.Keys; 30 | import org.apache.beam.sdk.transforms.Values; 31 | import org.apache.beam.sdk.values.KV; 32 | import org.apache.beam.sdk.values.PCollection; 33 | import org.junit.Rule; 34 | import org.junit.Test; 35 | import org.junit.experimental.categories.Category; 36 | import org.junit.runner.RunWith; 37 | import org.junit.runners.JUnit4; 38 | 39 | /** Unit tests for {@link IngestionPipeline}. */ 40 | @RunWith(JUnit4.class) 41 | public class IngestionPipelineTest { 42 | 43 | public transient IngestionPipelineOptions options = 44 | TestPipeline.testingPipelineOptions().as(IngestionPipelineOptions.class); 45 | 46 | @Rule public final transient TestPipeline pipeline = TestPipeline.fromOptions(options); 47 | 48 | @Test 49 | @Category(ValidatesRunner.class) 50 | public void processDataSharesValid() { 51 | options.setStartTime(1L); 52 | options.setDuration(2L); 53 | options.setBatchSize(1L); 54 | options.setDeviceAttestation(false); 55 | 56 | DataShareMetadata meta = DataShareMetadata.builder().setMetricName("sampleMetric").build(); 57 | List certs = new ArrayList<>(); 58 | certs.add("cert1"); 59 | certs.add("cert2"); 60 | certs.add("cert3"); 61 | List inputData = 62 | Arrays.asList( 63 | DataShare.builder() 64 | .setCertificateChain(certs) 65 | .setPath("id1") 66 | .setCreatedMs(1000L) 67 | .setDataShareMetadata(meta) 68 | .build(), 69 | DataShare.builder() 70 | .setCertificateChain(certs) 71 | .setPath("id2") 72 | .setCreatedMs(2000L) 73 | .setDataShareMetadata(meta) 74 | .build(), 75 | DataShare.builder() 76 | .setCertificateChain(certs) 77 | .setPath("id3") 78 | .setCreatedMs(4000L) 79 | .setDataShareMetadata(meta) 80 | .build(), 81 | DataShare.builder() 82 | .setCertificateChain(certs) 83 | .setPath("missing") 84 | .setDataShareMetadata(meta) 85 | .build()); 86 | 87 | PCollection>> actualOutput = 88 | IngestionPipeline.processDataShares(pipeline.apply(Create.of(inputData))); 89 | 90 | List> expectedValues = 91 | Arrays.asList( 92 | Collections.singletonList( 93 | DataShare.builder() 94 | .setPath("id1") 95 | .setCreatedMs(1000L) 96 | .setCertificateChain(certs) 97 | .setDataShareMetadata(meta) 98 | .build()), 99 | Collections.singletonList( 100 | DataShare.builder() 101 | .setPath("id2") 102 | .setCreatedMs(2000L) 103 | .setCertificateChain(certs) 104 | .setDataShareMetadata(meta) 105 | .build())); 106 | PAssert.that(actualOutput.apply(Keys.create()).apply(Count.globally())).containsInAnyOrder(2L); 107 | PAssert.that(actualOutput.apply(Values.create())).containsInAnyOrder(expectedValues); 108 | pipeline.run().waitUntilFinish(); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/test/java/com/google/exposurenotification/privateanalytics/ingestion/pipeline/PrioSerializationHelperTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.google.exposurenotification.privateanalytics.ingestion.pipeline; 17 | 18 | import static com.google.common.truth.Truth.assertThat; 19 | import static org.junit.Assert.assertEquals; 20 | 21 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare; 22 | import com.google.exposurenotification.privateanalytics.ingestion.model.DataShare.EncryptedShare; 23 | import java.io.File; 24 | import java.io.IOException; 25 | import java.net.URL; 26 | import java.nio.ByteBuffer; 27 | import java.util.ArrayList; 28 | import java.util.List; 29 | import org.abetterinternet.prio.v1.PrioDataSharePacket; 30 | import org.abetterinternet.prio.v1.PrioIngestionHeader; 31 | import org.junit.Rule; 32 | import org.junit.Test; 33 | import org.junit.rules.TemporaryFolder; 34 | import org.junit.runner.RunWith; 35 | import org.junit.runners.JUnit4; 36 | 37 | /** Tests for {@link PrioSerializationHelper}. */ 38 | @RunWith(JUnit4.class) 39 | public class PrioSerializationHelperTest { 40 | 41 | @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); 42 | 43 | @Test 44 | public void testPrioBatchHeaderSerialization() 45 | throws IOException, InstantiationException, IllegalAccessException { 46 | List ingestionHeaders = new ArrayList<>(); 47 | PrioIngestionHeader header1 = 48 | PrioIngestionHeader.newBuilder() 49 | .setBatchUuid("id123") 50 | .setName("secretname") 51 | .setBins(123) 52 | .setEpsilon(3.14) 53 | .setPrime(7) 54 | .setNumberOfServers(3) 55 | .setHammingWeight(5) 56 | .setBatchStartTime(1600000000) 57 | .setBatchEndTime(1700000000) 58 | .setPacketFileDigest(ByteBuffer.wrap("placeholder1".getBytes())) 59 | .build(); 60 | PrioIngestionHeader header2 = 61 | PrioIngestionHeader.newBuilder() 62 | .setBatchUuid("id987") 63 | .setName("simplename") 64 | .setBins(4) 65 | .setEpsilon(2.71) 66 | .setPrime(13) 67 | .setNumberOfServers(5) 68 | .setHammingWeight(8) 69 | .setBatchStartTime(1650000000) 70 | .setBatchEndTime(1710000000) 71 | .setPacketFileDigest(ByteBuffer.wrap("placeholder2".getBytes())) 72 | .build(); 73 | ingestionHeaders.add(header1); 74 | ingestionHeaders.add(header2); 75 | File serializedHeaders = tmpFolder.newFile(); 76 | ByteBuffer resultBytes = 77 | PrioSerializationHelper.serializeRecords( 78 | ingestionHeaders, PrioIngestionHeader.class, PrioIngestionHeader.getClassSchema()); 79 | 80 | BatchWriterFn.writeToFile(serializedHeaders.getAbsolutePath(), resultBytes); 81 | List deserializedHeaders = 82 | PrioSerializationHelper.deserializeRecords( 83 | PrioIngestionHeader.class, serializedHeaders.getAbsolutePath()); 84 | assertEquals(ingestionHeaders, deserializedHeaders); 85 | } 86 | 87 | @Test 88 | public void testPrioDataSharePacketSerialization() 89 | throws IOException, InstantiationException, IllegalAccessException { 90 | List dataSharePackets = new ArrayList<>(); 91 | PrioDataSharePacket dataSharePacket1 = 92 | PrioDataSharePacket.newBuilder() 93 | .setDeviceNonce(ByteBuffer.wrap(new byte[] {0x07, 0x08, 0x09})) 94 | .setEncryptionKeyId("verysecretandsecurevalue1") 95 | .setRPit(1234567890) 96 | .setUuid("uniqueuserid1") 97 | .setVersionConfiguration("v1.0") 98 | .setEncryptedPayload(ByteBuffer.wrap(new byte[] {0x01, 0x02, 0x03, 0x04, 0x05})) 99 | .build(); 100 | 101 | PrioDataSharePacket dataSharePacket2 = 102 | PrioDataSharePacket.newBuilder() 103 | .setDeviceNonce(ByteBuffer.wrap(new byte[] {0x10, 0x11, 0x12})) 104 | .setEncryptionKeyId("verysecretandsecurevalue2") 105 | .setRPit(987654321) 106 | .setUuid("uniqueuserid2") 107 | .setVersionConfiguration("v2.0") 108 | .setEncryptedPayload(ByteBuffer.wrap(new byte[] {0x06, 0x07, 0x08, 0x09, 0x10})) 109 | .build(); 110 | dataSharePackets.add(dataSharePacket1); 111 | dataSharePackets.add(dataSharePacket2); 112 | 113 | File serializedDataShares = tmpFolder.newFile(); 114 | ByteBuffer resultBytes = 115 | PrioSerializationHelper.serializeRecords( 116 | dataSharePackets, PrioDataSharePacket.class, PrioDataSharePacket.getClassSchema()); 117 | BatchWriterFn.writeToFile(serializedDataShares.getAbsolutePath(), resultBytes); 118 | List deserializedHeaders = 119 | PrioSerializationHelper.deserializeRecords( 120 | PrioDataSharePacket.class, serializedDataShares.getAbsolutePath()); 121 | assertEquals(dataSharePackets, deserializedHeaders); 122 | } 123 | 124 | @Test 125 | public void testSplitPackets() { 126 | DataShare share = 127 | DataShare.builder() 128 | .setSchemaVersion(2) 129 | .setEncryptedDataShares( 130 | List.of( 131 | EncryptedShare.builder() 132 | .setEncryptedPayload("pha".getBytes()) 133 | .setEncryptionKeyId("55NdHuhCjyR3PtTL0A7WRiaIgURhTmlkNw5dbFsKL70=") 134 | .build(), 135 | EncryptedShare.builder() 136 | .setEncryptedPayload("facilitator".getBytes()) 137 | .setEncryptionKeyId("facilitator-key-id") 138 | .build())) 139 | .setRPit(2L) 140 | .setUuid("someuuid") 141 | .build(); 142 | URL manifestUrl = 143 | getClass() 144 | .getResource( 145 | "/com/google/exposurenotification/privateanalytics/ingestion/pipeline/test-manifest.json"); 146 | DataProcessorManifest phaManifest = new DataProcessorManifest(manifestUrl.toString()); 147 | 148 | List packets = PrioSerializationHelper.splitPackets(share); 149 | assertThat(packets).hasSize(2); 150 | assertThat(packets.get(0).getEncryptionKeyId()).isNull(); 151 | assertThat(packets.get(1).getEncryptionKeyId()).isNull(); 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/test/resources/com/google/exposurenotification/privateanalytics/ingestion/pipeline/test-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "format": 0, 3 | "ingestion-bucket": "s3://us-west-1/prio-demo-gcp-test-pha-1-ingestor-1-ingestion", 4 | "ingestion-identity": "arn:aws:iam::12345678:role/AWSRoleAssumedByGCPSvcAcc", 5 | "peer-validation-bucket": "gs://prio-demo-gcp-test-pha-1-ingestor-1-peer-validation", 6 | "batch-signing-public-keys": { 7 | "demo-gcp-test-pha-1-ingestor-1-batch-signing-key": { 8 | "public-key": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEGBGNmLNT7TFvm59vbUdXvQCDGfXI\nta5HDGjpbKYKiINvbIsVES9oN2HPd7P2vITZrbWkpbLPGGwOa0srglXlSQ==\n-----END PUBLIC KEY-----\n", 9 | "expiration": "2021-01-30T22:21:39Z" 10 | } 11 | }, 12 | "packet-encryption-certificates": { 13 | "demo-gcp-test-pha-1-ingestion-packet-decryption-key": { 14 | "certificate": "-----BEGIN CERTIFICATE-----\nMIIEqjCCA5KgAwIBAgITAPrxbgrqnx6Q94QbiX20Fu0soTANBgkqhkiG9w0BAQsF\nADAiMSAwHgYDVQQDDBdGYWtlIExFIEludGVybWVkaWF0ZSBYMTAeFw0yMDExMDEy\nMTIxMzlaFw0yMTAxMzAyMTIxMzlaMDkxNzA1BgNVBAMTLnRlc3QtcGhhLTEuZGVt\nby1nY3AuY2VydGlmaWNhdGVzLmlzcmctcHJpby5vcmcwWTATBgcqhkjOPQIBBggq\nhkjOPQMBBwNCAASo+KHdSgwWyQuuMadoecgsTNhKkEPYAWbtMp7mKwxBHfU38AKM\npjvJRXbazirrUyZrz7uYTQT6noLBX6Wc8DX2o4ICizCCAocwDgYDVR0PAQH/BAQD\nAgeAMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAMBgNVHRMBAf8EAjAA\nMB0GA1UdDgQWBBSF7/1eWAfGiRIFo2jvskZV9d/mvDAfBgNVHSMEGDAWgBTAzANG\nuVggzFxycPPhLssgpvVoOjB3BggrBgEFBQcBAQRrMGkwMgYIKwYBBQUHMAGGJmh0\ndHA6Ly9vY3NwLnN0Zy1pbnQteDEubGV0c2VuY3J5cHQub3JnMDMGCCsGAQUFBzAC\nhidodHRwOi8vY2VydC5zdGctaW50LXgxLmxldHNlbmNyeXB0Lm9yZy8wOQYDVR0R\nBDIwMIIudGVzdC1waGEtMS5kZW1vLWdjcC5jZXJ0aWZpY2F0ZXMuaXNyZy1wcmlv\nLm9yZzBMBgNVHSAERTBDMAgGBmeBDAECATA3BgsrBgEEAYLfEwEBATAoMCYGCCsG\nAQUFBwIBFhpodHRwOi8vY3BzLmxldHNlbmNyeXB0Lm9yZzCCAQQGCisGAQQB1nkC\nBAIEgfUEgfIA8AB3ALDMg+Wl+X1rr3wJzChJBIcqx+iLEyxjULfG/SbhbGx3AAAB\ndYXm2UAAAAQDAEgwRgIhAIl5MRKoTJT/+nhn0e/hDnOpiR4cm9cKAz9Rm8Yc/BAd\nAiEAuG1qnfYjEJ+55p7v8hrz2WSlGdukVsWAvCjw2lZERVYAdQAD7fHal3a284w0\nHjntnXB6dXA2nPmETzJ/6eFBODYbYAAAAXWF5ts1AAAEAwBGMEQCIEdfDFslikvA\nDUz5U9rNIcoYeRiWhOQ9ifMmRO6VyoFpAiBN6nvO/669OgtbR2YYa07NXr8b61lK\n+MFxfRCf5tkR5zANBgkqhkiG9w0BAQsFAAOCAQEAVnW5nGLQ26tXi5KHkyEVsrs1\n0u9S8Xp6PvMjnNRXuvC0u5b9QZ82COdVB5Y+bPav/Is7ppGtaWxd6/ZAZwCAbYzV\nooW54gv1NHjScs0F+Rx+TDWUW9W2SBqO5BB9Ei3wctu1vZFJ5IkjtbSLuLV0szqj\noNbdeCT3LZkvnGZgz3J5eyLDEsbf6KfW+7RJ8NPodjGmM91VTdOdUgxjwQQHOnYg\n+Zk0TRdwgfLg+o17l+Ng4BonGNaayge+VTj7smaOHslbtu9psangzAIK+KFBXSsz\nrljqSePLGWVHlRrZ5Fv2I9xKWiJKQDO+fWMCc4KNVBkKt3VGbmPwFRmlu4JzfQ==\n-----END CERTIFICATE-----\n\n-----BEGIN CERTIFICATE-----\nMIIEqzCCApOgAwIBAgIRAIvhKg5ZRO08VGQx8JdhT+UwDQYJKoZIhvcNAQELBQAw\nGjEYMBYGA1UEAwwPRmFrZSBMRSBSb290IFgxMB4XDTE2MDUyMzIyMDc1OVoXDTM2\nMDUyMzIyMDc1OVowIjEgMB4GA1UEAwwXRmFrZSBMRSBJbnRlcm1lZGlhdGUgWDEw\nggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDtWKySDn7rWZc5ggjz3ZB0\n8jO4xti3uzINfD5sQ7Lj7hzetUT+wQob+iXSZkhnvx+IvdbXF5/yt8aWPpUKnPym\noLxsYiI5gQBLxNDzIec0OIaflWqAr29m7J8+NNtApEN8nZFnf3bhehZW7AxmS1m0\nZnSsdHw0Fw+bgixPg2MQ9k9oefFeqa+7Kqdlz5bbrUYV2volxhDFtnI4Mh8BiWCN\nxDH1Hizq+GKCcHsinDZWurCqder/afJBnQs+SBSL6MVApHt+d35zjBD92fO2Je56\ndhMfzCgOKXeJ340WhW3TjD1zqLZXeaCyUNRnfOmWZV8nEhtHOFbUCU7r/KkjMZO9\nAgMBAAGjgeMwgeAwDgYDVR0PAQH/BAQDAgGGMBIGA1UdEwEB/wQIMAYBAf8CAQAw\nHQYDVR0OBBYEFMDMA0a5WCDMXHJw8+EuyyCm9Wg6MHoGCCsGAQUFBwEBBG4wbDA0\nBggrBgEFBQcwAYYoaHR0cDovL29jc3Auc3RnLXJvb3QteDEubGV0c2VuY3J5cHQu\nb3JnLzA0BggrBgEFBQcwAoYoaHR0cDovL2NlcnQuc3RnLXJvb3QteDEubGV0c2Vu\nY3J5cHQub3JnLzAfBgNVHSMEGDAWgBTBJnSkikSg5vogKNhcI5pFiBh54DANBgkq\nhkiG9w0BAQsFAAOCAgEABYSu4Il+fI0MYU42OTmEj+1HqQ5DvyAeyCA6sGuZdwjF\nUGeVOv3NnLyfofuUOjEbY5irFCDtnv+0ckukUZN9lz4Q2YjWGUpW4TTu3ieTsaC9\nAFvCSgNHJyWSVtWvB5XDxsqawl1KzHzzwr132bF2rtGtazSqVqK9E07sGHMCf+zp\nDQVDVVGtqZPHwX3KqUtefE621b8RI6VCl4oD30Olf8pjuzG4JKBFRFclzLRjo/h7\nIkkfjZ8wDa7faOjVXx6n+eUQ29cIMCzr8/rNWHS9pYGGQKJiY2xmVC9h12H99Xyf\nzWE9vb5zKP3MVG6neX1hSdo7PEAb9fqRhHkqVsqUvJlIRmvXvVKTwNCP3eCjRCCI\nPTAvjV+4ni786iXwwFYNz8l3PmPLCyQXWGohnJ8iBm+5nk7O2ynaPVW0U2W+pt2w\nSVuvdDM5zGv2f9ltNWUiYZHJ1mmO97jSY/6YfdOUH66iRtQtDkHBRdkNBsMbD+Em\n2TgBldtHNSJBfB3pm9FblgOcJ0FSWcUDWJ7vO0+NTXlgrRofRT6pVywzxVo6dND0\nWzYlTWeUVsO40xJqhgUQRER9YLOLxJ0O6C8i0xFxAMKOtSdodMB3RIwt7RFQ0uyt\nn5Z5MqkYhlMI3J1tPRTp1nEt9fyGspBOO05gi148Qasp+3N+svqKomoQglNoAxU=\n-----END CERTIFICATE-----\n" 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /templates/dataflow-deletion-metadata-template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ENPA Deletion Pipeline", 3 | "description": "A pipeline that can be used to delete data in Firestore", 4 | "parameters": [ 5 | { 6 | "name": "firestoreProject", 7 | "label": "Firestore project ID", 8 | "isOptional": true, 9 | "helpText": "Google Cloud Project Id where Firestore instance lives." 10 | }, 11 | { 12 | "name": "startTime", 13 | "label": "Start time", 14 | "helpText": "Start time in seconds of documents to process.", 15 | "isOptional": true, 16 | "regexes": [ 17 | "[0-9]+" 18 | ] 19 | }, 20 | { 21 | "name": "duration", 22 | "label": "Duration", 23 | "helpText": "Duration of window in seconds.", 24 | "isOptional": true, 25 | "regexes": [ 26 | "[0-9]+" 27 | ] 28 | }, 29 | { 30 | "name": "graceHoursBackwards", 31 | "label": "Grace period backwards", 32 | "helpText": "Hours to read backwards from startTime.", 33 | "isOptional": true, 34 | "regexes": [ 35 | "[0-9]+" 36 | ] 37 | }, 38 | { 39 | "name": "graceHoursForwards", 40 | "label": "Grace period forwards", 41 | "helpText": "Hours to read forward from startTime.", 42 | "isOptional": true, 43 | "regexes": [ 44 | "[0-9]+" 45 | ] 46 | }, 47 | { 48 | "name": "deleteBatchSize", 49 | "label": "Delete batch size", 50 | "helpText": "Batch size of Firestore batch deletes.", 51 | "isOptional": true, 52 | "regexes": [ 53 | "[0-9]+" 54 | ] 55 | } 56 | ] 57 | } 58 | -------------------------------------------------------------------------------- /templates/dataflow-flex-template.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "gcr.io/enpa-infra/ingestion-pipeline:0.1.0-SNAPSHOT", 3 | "metadata": {}, 4 | "sdkInfo": { 5 | "language": "JAVA" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /templates/dataflow-ingestion-metadata-template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ENPA Ingestion Pipeline", 3 | "description": "A batch processing pipeline that can be used to ingest private data shares according to the Exposure Notification Private Analytics protocol", 4 | "parameters": [ 5 | { 6 | "name": "firestoreProject", 7 | "label": "Firestore project ID", 8 | "isOptional": true, 9 | "helpText": "Google Cloud Project Id where Firestore instance lives." 10 | }, 11 | { 12 | "name": "phaOutput", 13 | "label": "PHA output", 14 | "helpText": "File prefix for output files for PHA", 15 | "isOptional": true 16 | }, 17 | { 18 | "name": "phaManifestURL", 19 | "label": "PHA Manifest URL", 20 | "helpText": "Location of PHA Manifest file", 21 | "isOptional": true 22 | }, 23 | { 24 | "name": "facilitatorOutput", 25 | "label": "Facilitator output", 26 | "helpText": "File prefix for output files for Facilitator.", 27 | "isOptional": true 28 | }, 29 | { 30 | "name": "facilitatorManifestURL", 31 | "label": "Facilitator Manifest URL", 32 | "helpText": "Location of Facilitator Manifest file", 33 | "isOptional": true 34 | }, 35 | { 36 | "name": "startTime", 37 | "label": "Start time", 38 | "helpText": "Start time in seconds of documents to process.", 39 | "isOptional": true, 40 | "regexes": [ 41 | "[0-9]+" 42 | ] 43 | }, 44 | { 45 | "name": "duration", 46 | "label": "Duration", 47 | "helpText": "Duration of window in seconds.", 48 | "isOptional": true, 49 | "regexes": [ 50 | "[0-9]+" 51 | ] 52 | }, 53 | { 54 | "name": "graceHoursBackwards", 55 | "label": "Grace period backwards", 56 | "helpText": "Hours to read backwards from startTime.", 57 | "isOptional": true, 58 | "regexes": [ 59 | "[0-9]+" 60 | ] 61 | }, 62 | { 63 | "name": "graceHoursForwards", 64 | "label": "Grace period forwards", 65 | "helpText": "Hours to read forward from startTime.", 66 | "isOptional": true, 67 | "regexes": [ 68 | "[0-9]+" 69 | ] 70 | }, 71 | { 72 | "name": "minimumParticipantCount", 73 | "label": "Minimum participant count", 74 | "helpText": "Minimum count of participants to preserve privacy.", 75 | "isOptional": true, 76 | "regexes": [ 77 | "[0-9]+" 78 | ] 79 | }, 80 | { 81 | "name": "partitionCount", 82 | "label": "Partition count", 83 | "helpText": "Maximum number of partitions to create for Firestore query.", 84 | "isOptional": true, 85 | "regexes": [ 86 | "[0-9]+" 87 | ] 88 | }, 89 | { 90 | "name": "batchSize", 91 | "label": "Batch size", 92 | "helpText": "Batch size of individual files.", 93 | "isOptional": true, 94 | "regexes": [ 95 | "[0-9]+" 96 | ] 97 | }, 98 | { 99 | "name": "deviceAttestation", 100 | "label": "Device attestation", 101 | "helpText": "Verify device attestations.", 102 | "isOptional": true, 103 | "regexes": [ 104 | "true|false" 105 | ] 106 | }, 107 | { 108 | "name": "keyResourceName", 109 | "label": "Key resource name", 110 | "helpText": "KMS resource name for signature generation.", 111 | "isOptional": true, 112 | "regexes": [ 113 | "projects/[^/]+/locations/[^/]+/keyRings/[^/]+/cryptoKeys/.+" 114 | ] 115 | }, 116 | { 117 | "name": "autoscalingAlgorithm", 118 | "label": "Autoscaling algorithm", 119 | "helpText": "Type of autoscaling to use", 120 | "isOptional": true, 121 | "regexes": [ 122 | "NONE|THROUGHPUT_BASED" 123 | ] 124 | }, 125 | { 126 | "name": "packageName", 127 | "label": "Android package name", 128 | "helpText": "Android package name to use during certificate checking.", 129 | "isOptional": true 130 | }, 131 | { 132 | "name": "packageSignatureDigest", 133 | "label": "Android package signature digest", 134 | "helpText": "Android package signature digest to use during certificate checking.", 135 | "isOptional": true 136 | } 137 | ] 138 | } 139 | -------------------------------------------------------------------------------- /templates/scheduler-deletion-template.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "launchParameter": { 3 | "jobName": "${pipeline_name}", 4 | "parameters": {%{ if start_time > 0 } 5 | "startTime": "${start_time}",%{ endif }%{ if autoscaling_algorithm != "" } 6 | "autoscalingAlgorithm": "${autoscaling_algorithm}",%{ endif } 7 | "duration":"${window}" 8 | }, 9 | "environment": { 10 | "machineType": "${machine_type}", 11 | "numWorkers": "${worker_count}", 12 | "maxWorkers": "${max_worker_count}", 13 | "serviceAccountEmail": "${service_account}" 14 | }, 15 | "containerSpecGcsPath": "gs://enpa-pipeline-specs/deletion-pipeline-${pipeline_version}.json" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /templates/scheduler-ingestion-template.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "launchParameter": { 3 | "jobName": "${pipeline_name}", 4 | "parameters": { 5 | "batchSize": "${batch_size}", 6 | "keyResourceName": "${key_id}", 7 | %{~if dev_project~} 8 | "phaOutput": "gs://${project}/output/pha", 9 | "facilitatorOutput": "gs://${project}/output/facilitator", 10 | %{~else~} 11 | "facilitatorManifestURL": "${facilitator_manifest_url}", 12 | "phaManifestURL": "${pha_manifest_url}", 13 | %{~endif~} 14 | "tempLocation": "${temp_location}", 15 | "duration":"${window}",%{ if start_time > 0 } 16 | "startTime": "${start_time}",%{ endif }%{ if autoscaling_algorithm != "" } 17 | "autoscalingAlgorithm": "${autoscaling_algorithm}",%{ endif }%{ if package_name != "" } 18 | "packageName":"${package_name}",%{ endif }%{ if package_signature_digest != "" } 19 | "packageSignatureDigest":"${package_signature_digest}",%{ endif } 20 | "deviceAttestation": "${enable_device_attestation}" 21 | }, 22 | "environment": { 23 | "machineType": "${machine_type}", 24 | "numWorkers": "${worker_count}", 25 | "maxWorkers": "${max_worker_count}", 26 | "serviceAccountEmail": "${service_account}" 27 | }, 28 | "containerSpecGcsPath": "gs://enpa-pipeline-specs/ingestion-pipeline-${pipeline_version}.json" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /terraform/README.md: -------------------------------------------------------------------------------- 1 | ENPA Ingestion Terraform 2 | ================================================================================ 3 | 4 | This [terraform](https://terraform.io) module configures and provisions the 5 | infrastructure for a single PHA's instance of ENPA. 6 | 7 | Prerequisites 8 | -------------------------------------------------------------------------------- 9 | 10 | Before applying this module there are a few manual steps: 11 | 12 | * create the Google Cloud project 13 | * associate a billing account 14 | * enable Firebase, AppEngine, and Firestore 15 | * make sure you have [`gsutil`] and [the `firebase` CLI] installed 16 | 17 | [`gsutil`]: https://cloud.google.com/storage/docs/gsutil_install 18 | [the `firebase` CLI]: https://firebase.google.com/docs/cli 19 | 20 | Example Invocations 21 | -------------------------------------------------------------------------------- 22 | 23 | A typical production project would look like this: 24 | 25 | ```terraform 26 | module "pha_example" { 27 | source = "github.com/google/exposure-notifications-private-analytics-ingestion.git//terraform" 28 | 29 | project = "enpa-ingestion-example" 30 | region = "us-west2" 31 | locality = "example" 32 | 33 | facilitator_manifest_url = "https://facil.example.com/example-g-enpa-manifest.json" 34 | pha_manifest_url = "https://pha.example.com/example-g-enpa-manifest.json" 35 | 36 | pipeline_version = "0.2.6" 37 | } 38 | ``` 39 | 40 | You can also create dev projects which write their output to GCS instead of 41 | shipping it upstream: 42 | 43 | ```terraform 44 | module "pha_dev_example" { 45 | source = "github.com/google/exposure-notifications-private-analytics-ingestion.git//terraform" 46 | 47 | project = "enpa-ingestion-example-dev" 48 | region = "us-west2" 49 | 50 | dev_project = true 51 | 52 | pipeline_version = "0.2.6" 53 | } 54 | ``` 55 | 56 | Available Parameters 57 | -------------------------------------------------------------------------------- 58 | 59 | #### Required Parameters 60 | 61 | * `project` -- the ID of the Google Cloud project to install in 62 | * `pipeline_version` -- the version of the pipelines to run. Corresponds to tags in this repository. 63 | 64 | A production instance also requires: 65 | 66 | * `facilitator_manifest_url` 67 | * `pha_manifest_url` 68 | 69 | A development instance requires the `dev_project` flag. You should not specify 70 | manifest URLs on a dev project. 71 | 72 | #### Suggested Parameters 73 | 74 | You will almost always want to set these: 75 | 76 | * `region` -- the Google Cloud region to use (default: `us-central1`) 77 | 78 | You should also review the defaults of the following: 79 | 80 | * `locality` -- Locality string assigned to ENPA PHA setup, inferred from `project` if not set 81 | * `enable_device_attestation` -- whether to enable device attestation (default: true) 82 | * `ingestion_schedule` -- the schedule on which to run the ingestion pipeline (default: 30 minutes past each hour) 83 | * `deletion_schedule` -- the schedule on which to run the deletion pipeline (default: 0600 and 1800 UTC) 84 | * `ingestion_window` -- the length the window the ingestion pipeline uses to look for new data (default: one hour) 85 | * `deletion_window` -- the length of the window the deletion pipeline uses to look for data to remove (default: 12 hours) 86 | 87 | #### Dataflow Tuning Parameters 88 | 89 | These parameters let you tweak the Dataflow pipelines: 90 | 91 | * `batch_size` -- the number of records per batch (default: 100,000) 92 | * `ingestion_machine_type` -- the machine type used by the ingestion pipeline (default: `n1-standard-4`) 93 | * `ingestion_worker_count` -- the number of workers used by the ingestion pipeline (default: 10) 94 | * `ingestion_autoscaling_algorithm` -- the algorithm to use for autoscaling the ingestion pipeline (default: `THROUGHPUT_BASED`) 95 | * `deletion_machine_type` -- the machine type used by the deletion pipeline (default: `n1-standard-2`) 96 | * `deletion_worker_count` -- the number of workers used by the deletion pipeline (default: 10) 97 | * `deletion_autoscaling_algorithm` -- the algorithm to use for autoscaling the deletion pipeline (default: `THROUGHPUT_BASED`) 98 | * `package_signature_digest` -- Android package signature digest to use during certificate checking (default: not set) 99 | * `package_name` -- Android package name to use during certificate checking (default: not set) 100 | 101 | #### Internal Parameters 102 | 103 | You will generally leave these alone, but they're listed here for completeness: 104 | 105 | * `enable_pipelines` -- whether to schedule pipeline runs (default: true) 106 | * `manifest_bucket` -- the GCS bucket from which to read the prio manifests (default: prio-manifests) 107 | -------------------------------------------------------------------------------- /terraform/dataflow.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | resource "google_service_account" "dataflow" { 18 | project = var.project 19 | account_id = "dataflow-job-runner" 20 | 21 | display_name = "Dataflow Job Runner" 22 | description = "Service account for dataflow pipelines" 23 | } 24 | 25 | resource "google_project_service" "dataflow" { 26 | project = var.project 27 | service = "dataflow.googleapis.com" 28 | 29 | disable_dependent_services = false 30 | disable_on_destroy = false 31 | } 32 | 33 | resource "google_project_iam_member" "dataflow" { 34 | project = var.project 35 | role = "roles/${each.value}" 36 | member = "serviceAccount:${google_service_account.dataflow.email}" 37 | 38 | ### FIXME: these roles are almost certainly overly broad. We should create a 39 | ### custom role that grants only the permissions required. 40 | for_each = toset([ 41 | "cloudkms.signer", 42 | "containerregistry.ServiceAgent", 43 | "dataflow.admin", 44 | "dataflow.developer", 45 | "dataflow.worker", 46 | "datastore.user", 47 | "editor", 48 | "iam.serviceAccountUser", 49 | ]) 50 | } 51 | -------------------------------------------------------------------------------- /terraform/firestore.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | locals { 18 | temp_dir = "${path.module}/.terraform/tmp" 19 | } 20 | 21 | data "google_storage_object_signed_url" "firestore_rules" { 22 | bucket = var.templates_bucket 23 | path = "security-rules/firestore-${var.pipeline_version}.rules" 24 | duration = "5m" 25 | } 26 | 27 | data "http" "firestore_rules" { 28 | url = data.google_storage_object_signed_url.firestore_rules.signed_url 29 | } 30 | 31 | resource "local_file" "firestore_rules" { 32 | filename = "${local.temp_dir}/firestore.rules" 33 | 34 | # the content is not really sensitive in the normal sense, it's just enormous 35 | # and easier to elide than to scroll through. 36 | sensitive_content = data.http.firestore_rules.body 37 | 38 | file_permission = "0644" 39 | directory_permission = "0755" 40 | } 41 | 42 | resource "local_file" "firebase_json" { 43 | filename = "${local.temp_dir}/firebase.json" 44 | content = jsonencode({ firestore = { rules = "firestore.rules" } }) 45 | 46 | file_permission = "0644" 47 | directory_permission = "0755" 48 | } 49 | 50 | resource "null_resource" "firestore_security_rules" { 51 | triggers = { 52 | # if either of the config files changes, upload the rules 53 | config = local_file.firebase_json.content 54 | rules = local_file.firestore_rules.content 55 | 56 | # if the version changes upload the rules even if the files didn't change 57 | version = var.pipeline_version 58 | } 59 | 60 | provisioner "local-exec" { 61 | command = "firebase deploy --only firestore:rules --project ${var.project}" 62 | working_dir = local.temp_dir 63 | 64 | environment = { 65 | GOOGLE_APPLICATION_CREDENTIALS = "${abspath(path.root)}/credentials.json" 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /terraform/gcr.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | resource "google_storage_bucket_iam_member" "gcr_bucket" { 18 | bucket = "artifacts.enpa-infra.appspot.com" 19 | role = "roles/storage.objectViewer" 20 | member = "serviceAccount:${google_service_account.dataflow.email}" 21 | } -------------------------------------------------------------------------------- /terraform/iam.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2021, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | resource "google_project_iam_custom_role" "terraform_service_account_role" { 18 | project = var.project 19 | role_id = "ENPATerraformRunner" 20 | title = "Terraform Runner for ENPA Infra provisioning" 21 | permissions = [ 22 | "cloudkms.cryptoKeyVersions.create", 23 | "cloudkms.cryptoKeyVersions.destroy", 24 | "cloudkms.cryptoKeyVersions.get", 25 | "cloudkms.cryptoKeyVersions.viewPublicKey", 26 | "cloudkms.cryptoKeys.create", 27 | "cloudkms.cryptoKeys.get", 28 | "cloudkms.cryptoKeys.update", 29 | "cloudkms.keyRings.create", 30 | "cloudkms.keyRings.get", 31 | "cloudscheduler.jobs.create", 32 | "cloudscheduler.jobs.delete", 33 | "cloudscheduler.jobs.get", 34 | "firebase.projects.get", 35 | "firebaserules.releases.list", 36 | "firebaserules.releases.update", 37 | "firebaserules.rulesets.create", 38 | "firebaserules.rulesets.delete", 39 | "firebaserules.rulesets.get", 40 | "firebaserules.rulesets.test", 41 | "iam.roles.create", 42 | "iam.roles.delete", 43 | "iam.roles.get", 44 | "iam.roles.list", 45 | "iam.roles.update", 46 | "iam.serviceAccounts.actAs", 47 | "iam.serviceAccounts.create", 48 | "iam.serviceAccounts.delete", 49 | "iam.serviceAccounts.get", 50 | "resourcemanager.projects.get", 51 | "resourcemanager.projects.getIamPolicy", 52 | "resourcemanager.projects.setIamPolicy", 53 | "serviceusage.services.disable", 54 | "serviceusage.services.enable", 55 | "serviceusage.services.get", 56 | "serviceusage.services.list", 57 | "storage.buckets.create", 58 | "storage.buckets.delete", 59 | "storage.buckets.get", 60 | ] 61 | } 62 | 63 | resource "google_project_iam_member" "terraform_service_account_permissions" { 64 | project = var.project 65 | role = google_project_iam_custom_role.terraform_service_account_role.id 66 | member = "serviceAccount:${var.terraform_svc_account_email}" 67 | } 68 | 69 | resource "google_project_iam_binding" "owners" { 70 | depends_on = [google_project_iam_member.terraform_service_account_permissions] 71 | project = var.project 72 | role = "roles/owner" 73 | members = var.project_owners_list 74 | } 75 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | locals { 18 | services = [ 19 | "iam.googleapis.com", 20 | "cloudkms.googleapis.com" 21 | ] 22 | 23 | app_engine_location = (var.region == "us-central1" ? "us-central" : var.region) 24 | locality = (var.locality != "" ? var.locality : replace(var.project, "enpa-ingestion-", "")) 25 | } 26 | 27 | resource "google_project_service" "apis" { 28 | for_each = toset(local.services) 29 | project = var.project 30 | service = each.value 31 | 32 | disable_dependent_services = false 33 | disable_on_destroy = true 34 | } 35 | 36 | resource "google_storage_bucket" "bucket" { 37 | project = var.project 38 | name = var.project 39 | 40 | location = var.region 41 | storage_class = "STANDARD" 42 | 43 | # when true, all objects in the bucket will be deleted if terraform tries to 44 | # delete the bucket. Setting it to false is an added level of safety. 45 | force_destroy = false 46 | } 47 | 48 | resource "google_kms_key_ring" "keyring" { 49 | project = var.project 50 | name = "enpa-signing-key-ring" 51 | location = var.region 52 | 53 | lifecycle { 54 | prevent_destroy = true 55 | } 56 | 57 | depends_on = [ 58 | google_project_service.apis["cloudkms.googleapis.com"] 59 | ] 60 | } 61 | 62 | resource "google_kms_crypto_key" "key" { 63 | name = "enpa-signing-key" 64 | key_ring = google_kms_key_ring.keyring.id 65 | purpose = "ASYMMETRIC_SIGN" 66 | 67 | version_template { 68 | algorithm = "EC_SIGN_P256_SHA256" 69 | protection_level = "HSM" 70 | } 71 | } 72 | 73 | data "google_kms_crypto_key_version" "key" { 74 | crypto_key = google_kms_crypto_key.key.id 75 | } 76 | 77 | resource "google_storage_bucket_object" "manifest" { 78 | name = "${local.locality}-g-enpa-manifest.json" 79 | bucket = var.manifest_bucket 80 | 81 | cache_control = "no-cache,max-age=0" 82 | content_type = "application/json" 83 | 84 | ### FIXME: our keys currently don't expire so the expiration date is just a 85 | ### random value I pulled out of an example file. It should be changed to 86 | ### something meaningful. 87 | content = <<-EOF 88 | { 89 | "format": 1, 90 | "server-identity": { 91 | "gcp-service-account-email": "${google_service_account.dataflow.email}", 92 | "gcp-service-account-id": "${google_service_account.dataflow.unique_id}" 93 | }, 94 | "batch-signing-public-keys": { 95 | "${google_kms_crypto_key.key.id}/cryptoKeyVersions/${coalesce(data.google_kms_crypto_key_version.key.version, "0")}": { 96 | "public-key": "${replace(try(data.google_kms_crypto_key_version.key.public_key[0].pem, ""), "\n", "\\n")}", 97 | "expiration": "20211231T000000Z" 98 | } 99 | } 100 | } 101 | EOF 102 | } 103 | 104 | resource "google_storage_object_acl" "manifest" { 105 | # this needs to be output_name in order to recreate the ACL if the object is 106 | # recreated 107 | object = google_storage_bucket_object.manifest.output_name 108 | bucket = var.manifest_bucket 109 | 110 | predefined_acl = (var.dev_project ? "authenticatedRead" : "publicRead") 111 | } 112 | -------------------------------------------------------------------------------- /terraform/scheduler.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | locals { 18 | flex_template_launch_endpoint = "https://dataflow.googleapis.com/v1b3/projects/${var.project}/locations/${var.region}/flexTemplates:launch" 19 | } 20 | 21 | resource "google_project_service" "scheduler" { 22 | project = var.project 23 | service = "cloudscheduler.googleapis.com" 24 | 25 | disable_dependent_services = false 26 | disable_on_destroy = false 27 | } 28 | 29 | data "http" "ingestion_template" { 30 | url = "https://storage.googleapis.com/enpa-pipeline-specs/scheduler-ingestion-template-${var.pipeline_version}.tmpl" 31 | } 32 | 33 | data "template_file" "ingestion" { 34 | template = data.http.ingestion_template.body 35 | 36 | vars = { 37 | pipeline_name = "ingestion-pipeline-${lower(replace(replace(var.pipeline_version, ".", "-"), "_", "-"))}" 38 | start_time = var.ingestion_start_time 39 | autoscaling_algorithm = var.ingestion_autoscaling_algorithm 40 | batch_size = var.batch_size 41 | dev_project = var.dev_project 42 | enable_device_attestation = var.enable_device_attestation 43 | facilitator_manifest_url = var.facilitator_manifest_url 44 | key_id = "projects/${var.project}/locations/${var.region}/keyRings/${google_kms_key_ring.keyring.name}/cryptoKeys/${google_kms_crypto_key.key.name}/cryptoKeyVersions/1", 45 | machine_type = var.ingestion_machine_type 46 | pha_manifest_url = var.pha_manifest_url 47 | pipeline_version = var.pipeline_version 48 | project = var.project 49 | region = var.region 50 | service_account = google_service_account.dataflow.email 51 | temp_location = "${google_storage_bucket.bucket.url}/temp" 52 | window = var.ingestion_window 53 | worker_count = var.ingestion_worker_count 54 | max_worker_count = var.ingestion_max_worker_count 55 | package_signature_digest = var.package_signature_digest 56 | package_name = var.package_name 57 | } 58 | } 59 | 60 | resource "google_cloud_scheduler_job" "ingestion" { 61 | project = var.project 62 | name = "ingestion-pipeline" 63 | region = var.region 64 | 65 | # the GCP provider currently does not support pausing/resuming scheduler jobs, 66 | # so if we want to disable a job the best workaround we have is to schedule it 67 | # far into the future. Unfortunately due to the cron format the best we can do 68 | # is "one year from now", where "now" means the time at which I'm typing this 69 | # comment. 70 | # 71 | # Since we don't expect this project to live for another year it should be 72 | # fine, but don't be surprised if your pipeline runs at noon UTC on December 73 | # 15th. 74 | schedule = (var.enable_pipelines ? var.ingestion_schedule : "0 12 15 12 *") 75 | time_zone = "Etc/UTC" 76 | 77 | http_target { 78 | oauth_token { 79 | service_account_email = google_service_account.dataflow.email 80 | } 81 | 82 | http_method = "POST" 83 | uri = local.flex_template_launch_endpoint 84 | body = base64encode(data.template_file.ingestion.rendered) 85 | } 86 | 87 | depends_on = [ 88 | google_project_service.scheduler 89 | ] 90 | } 91 | 92 | data "http" "deletion_template" { 93 | url = "https://storage.googleapis.com/enpa-pipeline-specs/scheduler-deletion-template-${var.pipeline_version}.tmpl" 94 | } 95 | 96 | data "template_file" "deletion" { 97 | template = data.http.deletion_template.body 98 | 99 | vars = { 100 | pipeline_name = "deletion-pipeline-${lower(replace(replace(var.pipeline_version, ".", "-"), "_", "-"))}" 101 | start_time = var.deletion_start_time 102 | autoscaling_algorithm = var.deletion_autoscaling_algorithm 103 | machine_type = var.deletion_machine_type 104 | pipeline_version = var.pipeline_version 105 | service_account = google_service_account.dataflow.email 106 | window = var.deletion_window 107 | worker_count = var.deletion_worker_count 108 | max_worker_count = var.deletion_max_worker_count 109 | } 110 | } 111 | 112 | resource "google_cloud_scheduler_job" "deletion" { 113 | project = var.project 114 | name = "deletion-pipeline" 115 | region = var.region 116 | 117 | # see comment in the ingestion job definition for info about this magic value 118 | schedule = (var.enable_pipelines ? var.deletion_schedule : "0 12 15 12 *") 119 | time_zone = "Etc/UTC" 120 | 121 | http_target { 122 | oauth_token { 123 | service_account_email = google_service_account.dataflow.email 124 | } 125 | 126 | http_method = "POST" 127 | uri = local.flex_template_launch_endpoint 128 | body = base64encode(data.template_file.deletion.rendered) 129 | } 130 | 131 | depends_on = [ 132 | google_project_service.scheduler 133 | ] 134 | } 135 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2020, Google LLC. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | ### Required 18 | 19 | variable "project" { 20 | type = string 21 | description = "The ID of the Google Cloud project created previously. Required." 22 | } 23 | 24 | variable "project_owners_list" { 25 | type = list(string) 26 | description = "The list of fully-qualified owners (user:, group:, serviceAccount:) of the project" 27 | } 28 | 29 | variable "pipeline_version" { 30 | type = string 31 | description = "Dataflow tuning parameter: the version of the pipeline code. Required." 32 | 33 | } 34 | 35 | variable "dev_project" { 36 | type = bool 37 | description = "Should this project be configured for development? Defaults to false." 38 | default = false 39 | } 40 | 41 | # should only be set if dev_project is false 42 | variable "facilitator_manifest_url" { 43 | type = string 44 | description = "The facilitator manifest URL" 45 | default = "" 46 | } 47 | 48 | # should only be set if dev_project is false 49 | variable "pha_manifest_url" { 50 | type = string 51 | description = "The PHA manifest URL" 52 | default = "" 53 | } 54 | 55 | ### Suggested 56 | 57 | variable "region" { 58 | type = string 59 | description = "The Google Cloud region in which to create regional resources. Defaults to us-central1." 60 | default = "us-central1" 61 | } 62 | 63 | variable "locality" { 64 | type = string 65 | description = "The locality string of the ENPA installation. Optional to override locality setting." 66 | default = "" 67 | } 68 | 69 | variable "enable_device_attestation" { 70 | type = bool 71 | description = "Dataflow tuning parameter: whether to enable device attestation. Defaults to true." 72 | default = true 73 | } 74 | 75 | variable "ingestion_schedule" { 76 | type = string 77 | description = "a string describing the schedule for ingestion jobs, in cron format. Default: '30 * * * *' (30 minutes past each hour)" 78 | default = "30 * * * *" 79 | } 80 | 81 | variable "ingestion_window" { 82 | type = number 83 | description = "Dataflow tuning parameter: the length (in seconds) of the window that the ingestion pipeline will use to look for new records. Defaults to one hour (3600 seconds)." 84 | default = 3600 85 | } 86 | 87 | variable "deletion_schedule" { 88 | type = string 89 | description = "a string describing the schedule for deletion jobs, in cron format. Default: '0 6,18 * * *' (0600 and 1800, UTC)" 90 | default = "0 6,18 * * *" 91 | } 92 | 93 | variable "deletion_window" { 94 | type = number 95 | description = "Dataflow tuning parameter: the length (in seconds) of the window that the deletion pipeline will use to look for records. Defaults to twelve hours (43200 seconds)." 96 | default = 43200 97 | } 98 | 99 | ### Pipeline Tuning 100 | 101 | variable "batch_size" { 102 | type = number 103 | description = "Dataflow tuning parameter: the number of records per batch. Defaults to 100,000." 104 | default = 100000 105 | } 106 | variable "ingestion_start_time" { 107 | 108 | type = number 109 | description = "Start time in UTC seconds of documents to process for the ingestion pipeline. Defaults to 0 (not set)" 110 | default = 0 111 | } 112 | 113 | variable "ingestion_machine_type" { 114 | type = string 115 | description = "Dataflow tuning parameter: the type of machine to use for the ingestion pipeline. Defaults to n1-standard-4." 116 | default = "n1-standard-4" 117 | } 118 | 119 | variable "ingestion_worker_count" { 120 | type = number 121 | description = "Dataflow tuning parameter: the number of workers used by the ingestion pipeline. Defaults to 10." 122 | default = 10 123 | } 124 | 125 | variable "ingestion_max_worker_count" { 126 | type = number 127 | description = "Dataflow tuning parameter: the number of maximum workers used by the ingestion pipeline. Defaults to 15." 128 | default = 15 129 | } 130 | 131 | variable "ingestion_autoscaling_algorithm" { 132 | type = string 133 | description = "Dataflow tuning parameter: the autoscaling algorithm used by the ingestion pipeline. Can be either THROUGHPUT_BASED or NONE. Defaults to NOT SET." 134 | default = "" 135 | } 136 | 137 | variable "package_signature_digest" { 138 | type = string 139 | description = "Android package signature digest to use during certificate checking. Defaults to NOT SET" 140 | default = "" 141 | } 142 | 143 | variable "package_name" { 144 | type = string 145 | description = "Android package name to use during certificate checking. Defaults to NOT SET" 146 | default = "" 147 | } 148 | 149 | variable "deletion_start_time" { 150 | 151 | type = number 152 | description = "Start time in UTC seconds of documents to process for the deletion pipeline. Defaults to 0 (not set)" 153 | default = 0 154 | } 155 | 156 | variable "deletion_machine_type" { 157 | type = string 158 | description = "Dataflow tuning parameter: the type of machine to use for the ingestion pipeline. Defaults to n1-standard-2." 159 | default = "n1-standard-2" 160 | } 161 | 162 | variable "deletion_worker_count" { 163 | type = number 164 | description = "Dataflow tuning parameter: the number of workers used by the deletion pipeline. Defaults to 10." 165 | default = 10 166 | } 167 | 168 | variable "deletion_max_worker_count" { 169 | type = number 170 | description = "Dataflow tuning parameter: the number of maximum workers used by the deletion pipeline. Defaults to 20." 171 | default = 20 172 | } 173 | 174 | variable "deletion_autoscaling_algorithm" { 175 | type = string 176 | description = "Dataflow tuning parameter: the autoscaling algorithm used by the deletion pipeline. Can be either THROUGHPUT_BASED or NONE. Defaults to NOT SET." 177 | default = "" 178 | } 179 | 180 | ### Internals 181 | 182 | variable "enable_pipelines" { 183 | type = bool 184 | description = "Whether to enable the scheduling of dataflow pipelines. Defaults to true." 185 | default = true 186 | } 187 | 188 | variable "manifest_bucket" { 189 | type = string 190 | description = "The bucket in which to store the generated manifest. Defaults to 'prio-manifests'." 191 | default = "prio-manifests" 192 | } 193 | 194 | variable "templates_bucket" { 195 | type = string 196 | description = "The bucket in which templates are fetched from. Defaults to 'enpa-infra'." 197 | default = "enpa-infra" 198 | } 199 | 200 | variable "terraform_svc_account_email" { 201 | type = string 202 | description = "The email address of the Terraform Runner service account" 203 | } --------------------------------------------------------------------------------