├── _config.yml ├── bin_icon.png ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── cloudbuild.yaml ├── AUTHORS ├── ApacheHeader ├── src ├── test │ ├── resources │ │ ├── deidentify_request_with_inspect_template_name.json │ │ ├── single_surrogate_info_type_transform_reid_request.json │ │ ├── single_surrogate_record_primitive_type_transform_deid_template.json │ │ ├── non_reversabe_transformation_deid_template.json │ │ ├── single_surrogate_record_primitive_type_transform_reid_request.json │ │ ├── single_surrogate_info_type_transform_deid_template.json │ │ ├── single_surrogate_record_primitive_type_transform_reid_with_inspect_template_request.json │ │ ├── multiple_surrogate_record_info_type_reid_request.json │ │ └── multiple_surrogate_record_info_type_transforms_deid_config.json │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── solutions │ │ └── bqremoteencryptionfn │ │ ├── testing │ │ ├── stubs │ │ │ ├── dlp │ │ │ │ ├── TestBase64DlpStubs.java │ │ │ │ ├── MappingDeidentifyTemplateCallerFactory.java │ │ │ │ ├── VerifyingDeidentifyCallerFactory.java │ │ │ │ ├── RequestSizeLimitingDeidentifyFactory.java │ │ │ │ ├── VerifyingReidentifyCallerFactory.java │ │ │ │ ├── PatchyDlpStub.java │ │ │ │ └── Base64EncodingDlpStub.java │ │ │ ├── TestingBackgroundResource.java │ │ │ └── BaseUnaryApiFuture.java │ │ ├── SimpleBigQueryRemoteFnRequestMaker.java │ │ └── JsonMapper.java │ │ ├── fns │ │ └── dlp │ │ │ ├── DlpConfigTest.java │ │ │ ├── RowsToTableFnTest.java │ │ │ └── DlpRequestBatchExecutorTest.java │ │ └── BqTransformFnAppTest.java └── main │ ├── resources │ ├── dlp.properties │ ├── aes.properties │ └── application.properties │ └── java │ └── com │ └── google │ └── cloud │ └── solutions │ └── bqremoteencryptionfn │ ├── UserAgentHeaderProvider.java │ ├── BigQueryRemoteFnRequest.java │ ├── TransformFnFactory.java │ ├── BigQueryRemoteFnResponse.java │ ├── fns │ ├── IdentityFn.java │ ├── UnaryStringArgFn.java │ ├── dlp │ │ ├── DlpConfig.java │ │ ├── DlpFn.java │ │ ├── DlpReIdRequestMaker.java │ │ └── DlpRequestBatchExecutor.java │ ├── Base64Fn.java │ └── AesFn.java │ ├── TransformFn.java │ ├── BqTransformFnApp.java │ └── BigQueryFnCallController.java ├── settings.gradle ├── gradle.properties ├── .github ├── renovate.json └── workflows │ └── main.yml ├── CONTRIBUTING.md ├── .gitignore ├── sample_dlp_deid_config.json ├── variables.tf ├── gradlew.bat ├── .terraform.lock.hcl ├── gradlew ├── main.tf ├── LICENSE └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /bin_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/bigquery-dlp-remote-function/HEAD/bin_icon.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/bigquery-dlp-remote-function/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | steps: 3 | - name: gradle:8-jdk21 4 | args: [ 5 | 'gradle', 6 | 'build', 7 | 'jib', 8 | '-Djib.to.image=${_CONTAINER_IMAGE_NAME}', 9 | '-x', 10 | 'test' 11 | ] 12 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of avro-auto-dlp significant contributors. 2 | # 3 | # This does not necessarily list everyone who has contributed code, 4 | # especially since many employees of one corporation may be contributing. 5 | # To see the full list of contributors, see the revision history in 6 | # source control. 7 | Google LLC 8 | Anant Damle 9 | -------------------------------------------------------------------------------- /ApacheHeader: -------------------------------------------------------------------------------- 1 | Copyright $YEAR Google LLC 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /src/test/resources/deidentify_request_with_inspect_template_name.json: -------------------------------------------------------------------------------- 1 | { 2 | "parent": "projects/test-project-id/locations/test-region1", 3 | "item": { 4 | "table": { 5 | "headers": [{ 6 | "name": "bqfnvalue" 7 | }], 8 | "rows": [{ 9 | "values": [{ 10 | "stringValue": "Anant" 11 | }] 12 | }, { 13 | "values": [{ 14 | "stringValue": "Damle" 15 | }] 16 | }] 17 | } 18 | }, 19 | "inspectTemplateName": "testing-inspect-template-name", 20 | "deidentifyTemplateName": "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1" 21 | } -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | rootProject.name = 'bigquery-dlp-remote-function' 18 | -------------------------------------------------------------------------------- /src/main/resources/dlp.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2022 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | dlp.valueColName=bqfnvalue 18 | dlp.requestCellCount=1600 19 | dlp.requestBytes=500000 -------------------------------------------------------------------------------- /src/main/resources/aes.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2022 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | aesKey=${AES_KEY:default-aes-key} 18 | aesKeyType=${AES_KEY_TYPE:BASE64_KEY} 19 | aesCipherType=${AES_CIPHER_TYPE:AES/CBC/PKCS5PADDING} 20 | aesIvParameterBase64=${AES_IV_PARAMETER_BASE64:default-iv} -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/TestBase64DlpStubs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | public class TestBase64DlpStubs {} 20 | -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2022 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | server.port=${PORT:8080} 17 | spring.mvc.converters.preferred-json-mapper=gson 18 | spring.gson.lenient=true 19 | spring.gson.serialize-nulls=false 20 | spring.cloud.gcp.config.project-id=${PROJECT_ID:default-project-id} 21 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2022 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ 17 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ 18 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ 19 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ 20 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED 21 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended", 5 | ":semanticPrefixFixDepsChoreOthers", 6 | ":ignoreModulesAndTests", 7 | "group:monorepos", 8 | "group:recommended", 9 | "replacements:all", 10 | "workarounds:all" 11 | ], 12 | "dependencyDashboardApproval": true, 13 | "packageRules": [ 14 | { 15 | "extends": [ 16 | "packages:googleapis" 17 | ], 18 | "groupName": "googleapis packages" 19 | }, 20 | { 21 | "groupName": "github-workflow", 22 | "matchFileNames": [ 23 | ".github/workflows/main.yml" 24 | ] 25 | }, 26 | { 27 | "groupName": "terraform", 28 | "matchFileNames": [ 29 | "main.tf", 30 | ".terraform/**", 31 | ".terraform.lock.hcl" 32 | ] 33 | }, 34 | { 35 | "groupName": "gradle", 36 | "matchFileNames": [ 37 | "build.gradle", 38 | "gradle.properties", 39 | "gradle/**" 40 | ] 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /src/test/resources/single_surrogate_info_type_transform_reid_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "parent": "projects/test-project-id/locations/test-region1", 3 | "inspectConfig": { 4 | "customInfoTypes": [{ 5 | "infoType": { 6 | "name": "BQ_TOK_FN" 7 | }, 8 | "surrogateType": { 9 | } 10 | }] 11 | }, 12 | "reidentifyConfig": { 13 | "infoTypeTransformations": { 14 | "transformations": [{ 15 | "infoTypes": [{ 16 | "name": "BQ_TOK_FN" 17 | }], 18 | "primitiveTransformation": { 19 | "cryptoDeterministicConfig": { 20 | "cryptoKey": { 21 | "kmsWrapped": { 22 | "wrappedKey": "some+wrapped+key", 23 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 24 | } 25 | }, 26 | "surrogateInfoType": { 27 | "name": "BQ_TOK_FN" 28 | } 29 | } 30 | } 31 | }] 32 | }, 33 | "transformationErrorHandling": { 34 | "leaveUntransformed": { 35 | } 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /src/test/resources/single_surrogate_record_primitive_type_transform_deid_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "projects/norse-strata-355005/locations/asia-southeast1/deidentifyTemplates/record-type-primitive-transform", 3 | "displayName": "record-type-primitive-transform", 4 | "createTime": "2022-08-19T12:56:08.628127Z", 5 | "updateTime": "2022-08-19T12:56:08.628127Z", 6 | "deidentifyConfig": { 7 | "recordTransformations": { 8 | "fieldTransformations": [ 9 | { 10 | "fields": [ 11 | { 12 | "name": "bqfnvalue" 13 | } 14 | ], 15 | "primitiveTransformation": { 16 | "cryptoDeterministicConfig": { 17 | "cryptoKey": { 18 | "kmsWrapped": { 19 | "wrappedKey": "some+wrapped+key", 20 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 21 | } 22 | }, 23 | "surrogateInfoType": { 24 | "name": "BQ_EE" 25 | } 26 | } 27 | } 28 | } 29 | ] 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/test/resources/non_reversabe_transformation_deid_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1", 3 | "displayName": "Email Id and Phone number masker", 4 | "description": "De-identifies emails and phone numbers with unwrapped key.", 5 | "createTime": "2022-08-09T16:13:35.867327Z", 6 | "updateTime": "2022-08-17T05:58:16.867823Z", 7 | "deidentifyConfig": { 8 | "infoTypeTransformations": { 9 | "transformations": [ 10 | { 11 | "infoTypes": [ 12 | { 13 | "name": "EMAIL_ADDRESS" 14 | }, 15 | { 16 | "name": "PHONE_NUMBER" 17 | } 18 | ], 19 | "primitiveTransformation": { 20 | "cryptoHashConfig": { 21 | "cryptoKey": { 22 | "kmsWrapped": { 23 | "wrappedKey": "some+wrapped+key", 24 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 25 | } 26 | } 27 | } 28 | } 29 | } 30 | ] 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to 4 | follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the 9 | copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of 10 | the project. Head over to to see your current agreements on file or to sign a new 11 | one. 12 | 13 | You generally only need to submit a CLA once, so if you've already submitted one 14 | (even if it was for a different project), you probably don't need to do it again. 15 | 16 | ## Code reviews 17 | 18 | All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. 19 | Consult 20 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. 21 | 22 | ## Community Guidelines 23 | 24 | This project follows 25 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 26 | -------------------------------------------------------------------------------- /src/test/resources/single_surrogate_record_primitive_type_transform_reid_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "parent": "projects/test-project-id/locations/test-region1", 3 | "inspectConfig": { 4 | "customInfoTypes": [ 5 | { 6 | "infoType": { 7 | "name": "BQ_EE" 8 | }, 9 | "surrogateType": { 10 | } 11 | } 12 | ] 13 | }, 14 | "reidentifyConfig": { 15 | "recordTransformations": { 16 | "fieldTransformations": [ 17 | { 18 | "fields": [ 19 | { 20 | "name": "bqfnvalue" 21 | } 22 | ], 23 | "primitiveTransformation": { 24 | "cryptoDeterministicConfig": { 25 | "cryptoKey": { 26 | "kmsWrapped": { 27 | "wrappedKey": "some+wrapped+key", 28 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 29 | } 30 | }, 31 | "surrogateInfoType": { 32 | "name": "BQ_EE" 33 | } 34 | } 35 | } 36 | } 37 | ] 38 | }, 39 | "transformationErrorHandling": { 40 | "leaveUntransformed": { 41 | } 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /src/test/resources/single_surrogate_info_type_transform_deid_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1", 3 | "displayName": "Email Id and Phone number masker", 4 | "description": "De-identifies emails and phone numbers with unwrapped key.", 5 | "createTime": "2022-08-09T16:13:35.867327Z", 6 | "updateTime": "2022-08-17T05:58:16.867823Z", 7 | "deidentifyConfig": { 8 | "infoTypeTransformations": { 9 | "transformations": [ 10 | { 11 | "infoTypes": [ 12 | { 13 | "name": "EMAIL_ADDRESS" 14 | }, 15 | { 16 | "name": "PHONE_NUMBER" 17 | } 18 | ], 19 | "primitiveTransformation": { 20 | "cryptoDeterministicConfig": { 21 | "cryptoKey": { 22 | "kmsWrapped": { 23 | "wrappedKey": "some+wrapped+key", 24 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 25 | } 26 | }, 27 | "surrogateInfoType": { 28 | "name": "BQ_TOK_FN" 29 | } 30 | } 31 | } 32 | } 33 | ] 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/UserAgentHeaderProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import com.google.api.gax.rpc.HeaderProvider; 20 | import com.google.common.collect.ImmutableMap; 21 | import java.util.Map; 22 | import org.springframework.stereotype.Component; 23 | 24 | @Component 25 | public class UserAgentHeaderProvider implements HeaderProvider { 26 | private static final ImmutableMap USER_AGENT = 27 | ImmutableMap.of("user-agent", "cloud-solutions/bigquery-dlp-remote-function-v0.1"); 28 | 29 | @Override 30 | public Map getHeaders() { 31 | return USER_AGENT; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/resources/single_surrogate_record_primitive_type_transform_reid_with_inspect_template_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "parent": "projects/test-project-id/locations/test-region1", 3 | "inspectConfig": { 4 | "customInfoTypes": [ 5 | { 6 | "infoType": { 7 | "name": "BQ_EE" 8 | }, 9 | "surrogateType": { 10 | } 11 | } 12 | ] 13 | }, 14 | "inspectTemplateName": "testing-inspect-template", 15 | "reidentifyConfig": { 16 | "recordTransformations": { 17 | "fieldTransformations": [ 18 | { 19 | "fields": [ 20 | { 21 | "name": "bqfnvalue" 22 | } 23 | ], 24 | "primitiveTransformation": { 25 | "cryptoDeterministicConfig": { 26 | "cryptoKey": { 27 | "kmsWrapped": { 28 | "wrappedKey": "some+wrapped+key", 29 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 30 | } 31 | }, 32 | "surrogateInfoType": { 33 | "name": "BQ_EE" 34 | } 35 | } 36 | } 37 | } 38 | ] 39 | }, 40 | "transformationErrorHandling": { 41 | "leaveUntransformed": { 42 | } 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/BigQueryRemoteFnRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import java.util.List; 20 | import java.util.Map; 21 | import javax.annotation.Nullable; 22 | 23 | /** 24 | * BigQuery Remote Function Request data model 25 | * 26 | *

27 | * 28 | * @see 30 | * Input Format 31 | */ 32 | public record BigQueryRemoteFnRequest( 33 | String requestId, 34 | String caller, 35 | String sessionUser, 36 | @Nullable Map userDefinedContext, 37 | List> calls) {} 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore terraform folder 2 | .terraform/ 3 | !.terraform.lock.hcl 4 | terraform.tfstate* 5 | 6 | # Ignore any offline repositories the user may have created. 7 | **/offline-repository/**/* 8 | 9 | # Ignore files generated by the Gradle build process. 10 | **/.gradle/**/* 11 | **/.gogradle/**/* 12 | **/.nb-gradle/**/* 13 | **/gogradle.lock 14 | **/build/**/* 15 | .test-infra/**/vendor/**/* 16 | sdks/**/vendor/**/* 17 | runners/**/vendor/**/* 18 | # Cache of project 19 | **/.gradletasknamecache 20 | !src/**/build/ 21 | 22 | # Ignore Gradle GUI config 23 | gradle-app.setting 24 | 25 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 26 | !gradle-wrapper.jar 27 | 28 | # Ignore files generated by the Maven build process. 29 | **/bin/**/* 30 | **/dependency-reduced-pom.xml 31 | **/target/**/* 32 | 33 | # Ignore IntelliJ files. 34 | **/.idea/**/* 35 | **/*.iml 36 | **/*.ipr 37 | **/*.iws 38 | **/out/**/* 39 | 40 | # Ignore Eclipse files. 41 | **/.classpath 42 | **/.project 43 | **/.factorypath 44 | **/.checkstyle 45 | **/.fbExcludeFilterFile 46 | **/.apt_generated/**/* 47 | **/.settings/**/* 48 | 49 | # Ignore Visual Studio Code files. 50 | **/.vscode/**/* 51 | 52 | # Ignore files that end with '~', since they are most likely auto-save files 53 | # produced by a text editor. 54 | **/*~ 55 | 56 | # Ignore MacOSX files. 57 | **/.DS_Store/**/* 58 | **/.DS_Store -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/TransformFnFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import java.util.Map; 20 | import javax.annotation.Nonnull; 21 | 22 | /** Factory interface for creating objects of a {@link TransformFn} implementation. */ 23 | public interface TransformFnFactory { 24 | 25 | /** 26 | * Returns an instance of {@link TransformFn} implementation. 27 | * 28 | * @param options the implementation specific configuration 29 | */ 30 | T createFn(@Nonnull Map options); 31 | 32 | /** 33 | * Returns the name of the encryption algorithm implemented. Needs to be unique for all loaded 34 | * function classes. 35 | */ 36 | String getFnName(); 37 | } 38 | -------------------------------------------------------------------------------- /sample_dlp_deid_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "deidentifyTemplate" : { 3 | "displayName" : "Email Id and Phone number masker", 4 | "description":"De-identifies emails and phone numbers with unwrapped key.", 5 | "deidentifyConfig": { 6 | "infoTypeTransformations": { 7 | "transformations": [ 8 | { 9 | "infoTypes": [ 10 | { 11 | "name": "EMAIL_ADDRESS" 12 | } 13 | ], 14 | "primitiveTransformation": { 15 | "cryptoDeterministicConfig": { 16 | "cryptoKey": { 17 | "unwrapped": { 18 | "key": "VWtYcDJzNXY4eS9CP0UoSCtNYlFlU2hWbVlxM3Q2dzk=" 19 | } 20 | }, 21 | "surrogateInfoType": { 22 | "name": "BQ_TRF_EMAIL" 23 | } 24 | } 25 | } 26 | }, 27 | { 28 | "infoTypes": [ 29 | { 30 | "name": "PHONE_NUMBER" 31 | } 32 | ], 33 | "primitiveTransformation": { 34 | "cryptoDeterministicConfig": { 35 | "cryptoKey": { 36 | "unwrapped": { 37 | "key": "VWtYcDJzNXY4eS9CP0UoSCtNYlFlU2hWbVlxM3Q2dzk=" 38 | } 39 | }, 40 | "surrogateInfoType": { 41 | "name": "BQ_TRF_PH" 42 | } 43 | } 44 | } 45 | } 46 | ] 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/BigQueryRemoteFnResponse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import static com.google.common.base.Preconditions.checkNotNull; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * BigQuery Remote Function Response data model 25 | * 26 | *

27 | * 28 | * @see 30 | * Output Format 31 | */ 32 | public record BigQueryRemoteFnResponse(List replies, String errorMessage) { 33 | 34 | public static BigQueryRemoteFnResponse withReplies(List replies) { 35 | return new BigQueryRemoteFnResponse(checkNotNull(replies), null); 36 | } 37 | 38 | public static BigQueryRemoteFnResponse withErrorMessage(String errorMessage) { 39 | return new BigQueryRemoteFnResponse(null, errorMessage); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | ## Define variables 18 | 19 | variable "project_id" { 20 | type = string 21 | } 22 | 23 | variable "region" { 24 | type = string 25 | } 26 | 27 | variable "artifact_registry_name" { 28 | type = string 29 | default = "bq-remote-functions" 30 | } 31 | 32 | variable "bq_dataset" { 33 | type = string 34 | default = "fns" 35 | } 36 | 37 | variable "dlp_deid_template_json_file" { 38 | type = string 39 | default = "sample_dlp_deid_config.json" 40 | } 41 | 42 | variable "dlp_inspect_template_full_path" { 43 | default = "" 44 | } 45 | 46 | variable "service_name" { 47 | default = "bq-transform-fns" 48 | } 49 | 50 | variable "user_os" { 51 | type = string 52 | default = "linux" 53 | description = "The OS of the person running the Terraform script. Options: [linux, darwin]" 54 | validation { 55 | condition = contains(["linux","darwin"], var.user_os) 56 | error_message = "Supported OS Options: [linux, darwin]" 57 | } 58 | } -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/SimpleBigQueryRemoteFnRequestMaker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing; 18 | 19 | import static com.google.cloud.solutions.bqremoteencryptionfn.testing.JsonMapper.toJson; 20 | import static com.google.common.collect.ImmutableList.toImmutableList; 21 | 22 | import com.google.cloud.solutions.bqremoteencryptionfn.BigQueryRemoteFnRequest; 23 | import com.google.common.collect.ImmutableMap; 24 | import java.util.Arrays; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | public class SimpleBigQueryRemoteFnRequestMaker { 29 | 30 | public static String testRequest(Map options, List... calls) { 31 | var objCalls = 32 | Arrays.stream(calls) 33 | .map(l -> (List) l.stream().map(x -> (Object) x).collect(toImmutableList())) 34 | .collect(toImmutableList()); 35 | 36 | return toJson( 37 | new BigQueryRemoteFnRequest( 38 | "testRequestId", 39 | "testCallerId", 40 | "testSessionUser@somedomain.com", 41 | ImmutableMap.copyOf(options), 42 | objCalls)); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/TestingBackgroundResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs; 18 | 19 | import com.google.api.gax.core.BackgroundResource; 20 | import java.io.Serializable; 21 | import java.util.concurrent.TimeUnit; 22 | 23 | public final class TestingBackgroundResource implements BackgroundResource, Serializable { 24 | 25 | private boolean closed; 26 | private boolean shutdown; 27 | 28 | public TestingBackgroundResource() { 29 | this.closed = false; 30 | this.shutdown = false; 31 | } 32 | 33 | @Override 34 | public void close() { 35 | closed = true; 36 | } 37 | 38 | @Override 39 | public void shutdown() { 40 | close(); 41 | shutdown = true; 42 | } 43 | 44 | @Override 45 | public boolean isShutdown() { 46 | return shutdown; 47 | } 48 | 49 | @Override 50 | public boolean isTerminated() { 51 | return (closed && shutdown); 52 | } 53 | 54 | @Override 55 | public void shutdownNow() { 56 | shutdown = true; 57 | } 58 | 59 | @Override 60 | public boolean awaitTermination(long l, TimeUnit timeUnit) { 61 | shutdown(); 62 | return shutdown; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/IdentityFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns; 18 | 19 | import com.google.cloud.solutions.bqremoteencryptionfn.TransformFnFactory; 20 | import java.util.List; 21 | import java.util.Map; 22 | import javax.annotation.Nonnull; 23 | import org.springframework.stereotype.Component; 24 | 25 | /** Simple Pass-through function, does not transform the input. */ 26 | public final class IdentityFn extends UnaryStringArgFn { 27 | 28 | public static final String FN_NAME = "identity"; 29 | 30 | @Component 31 | public static class IdentityTransformFnFactory implements TransformFnFactory { 32 | @Override 33 | public IdentityFn createFn(@Nonnull Map options) { 34 | return new IdentityFn(); 35 | } 36 | 37 | @Override 38 | public String getFnName() { 39 | return FN_NAME; 40 | } 41 | } 42 | 43 | @Override 44 | public String getName() { 45 | return FN_NAME; 46 | } 47 | 48 | @Override 49 | public List deidentifyUnaryRow(List rows) { 50 | return rows; 51 | } 52 | 53 | @Override 54 | public List reidentifyUnaryRow(List rows) { 55 | return rows; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/UnaryStringArgFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns; 18 | 19 | import static com.google.common.collect.ImmutableList.toImmutableList; 20 | 21 | import com.google.cloud.solutions.bqremoteencryptionfn.TransformFn; 22 | import java.util.List; 23 | 24 | /** 25 | * Base class for transform functions that take a single argument for deidentify and reidentify 26 | * operations. 27 | */ 28 | public abstract class UnaryStringArgFn implements TransformFn { 29 | 30 | @Override 31 | public final List deidentify(List> rows) throws Exception { 32 | return deidentifyUnaryRow(makeUnaryArgumentRow(rows)); 33 | } 34 | 35 | @Override 36 | public final List reidentify(List> rows) throws Exception { 37 | return reidentifyUnaryRow(makeUnaryArgumentRow(rows)); 38 | } 39 | 40 | private List makeUnaryArgumentRow(List> calledRows) { 41 | return calledRows.stream().map(r -> r.get(0)).map(Object::toString).collect(toImmutableList()); 42 | } 43 | 44 | protected abstract List deidentifyUnaryRow(List rows) throws Exception; 45 | 46 | protected abstract List reidentifyUnaryRow(List rows) throws Exception; 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/dlp/DlpConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp; 18 | 19 | import com.google.common.base.Strings; 20 | import com.google.gson.FieldNamingPolicy; 21 | import com.google.gson.Gson; 22 | import java.lang.reflect.Field; 23 | import java.util.Map; 24 | 25 | /** Configuration Model for request options in a BigQuery remote function call. */ 26 | public record DlpConfig(String deidTemplate, String inspectTemplate) { 27 | 28 | private static final Gson jsonMapper = 29 | new Gson() 30 | .newBuilder() 31 | .setFieldNamingStrategy( 32 | (Field f) -> "dlp-" + FieldNamingPolicy.LOWER_CASE_WITH_DASHES.translateName(f)) 33 | .create(); 34 | 35 | public static DlpConfig fromJson(Map nodeTree) { 36 | return jsonMapper.fromJson(jsonMapper.toJsonTree(nodeTree), DlpConfig.class); 37 | } 38 | 39 | public static DlpConfig fromJson(String json) { 40 | return jsonMapper.fromJson(json, DlpConfig.class); 41 | } 42 | 43 | public boolean hasInspectTemplate() { 44 | return !Strings.isNullOrEmpty(inspectTemplate); 45 | } 46 | 47 | public boolean hasDlpDeidTemplate() { 48 | return !Strings.isNullOrEmpty(deidTemplate); 49 | } 50 | 51 | public String toJson() { 52 | return jsonMapper.toJson(this); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | --- 17 | name: Build and Validate 18 | 19 | on: 20 | push: 21 | branches: [ "main" ] 22 | pull_request: 23 | types: [ opened, synchronize, reopened ] 24 | 25 | permissions: 26 | actions: read 27 | contents: read 28 | security-events: write 29 | 30 | jobs: 31 | build_and_test: 32 | runs-on: ubuntu-latest 33 | name: Gradle build and CodeQL 34 | steps: 35 | - uses: actions/checkout@v4 36 | - name: Set up JDK 21 37 | uses: actions/setup-java@v4 38 | with: 39 | java-version: '21' 40 | distribution: 'temurin' 41 | - name: Cache Deps 42 | uses: actions/cache@v4 43 | with: 44 | path: | 45 | ~/.gradle/caches 46 | ~/.gradle/wrapper 47 | key: build-deps 48 | 49 | - name: Setup Gradle 50 | uses: gradle/gradle-build-action@v3 51 | 52 | - name: Initialize CodeQL 53 | uses: github/codeql-action/init@v3 54 | with: 55 | languages: java 56 | queries: security-and-quality 57 | 58 | - name: Build and Test 59 | run: ./gradlew clean test build assemble 60 | 61 | - name: Perform CodeQL Analysis 62 | uses: github/codeql-action/analyze@v3 63 | 64 | - name: Upload CodeCov 65 | uses: codecov/codecov-action@v5 66 | with: 67 | token: ${{ secrets.CODECOV_TOKEN }} 68 | fail_ci_if_error: true 69 | verbose: true 70 | -------------------------------------------------------------------------------- /src/test/resources/multiple_surrogate_record_info_type_reid_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "parent": "projects/test-project-id/locations/test-region1", 3 | "inspectConfig": { 4 | "customInfoTypes": [{ 5 | "infoType": { 6 | "name": "BQ_EM" 7 | }, 8 | "surrogateType": { 9 | } 10 | }, { 11 | "infoType": { 12 | "name": "BQ_PH" 13 | }, 14 | "surrogateType": { 15 | } 16 | }] 17 | }, 18 | "reidentifyConfig": { 19 | "recordTransformations": { 20 | "fieldTransformations": [{ 21 | "fields": [{ 22 | "name": "bqfnvalue" 23 | }], 24 | "infoTypeTransformations": { 25 | "transformations": [{ 26 | "infoTypes": [{ 27 | "name": "BQ_EM" 28 | }], 29 | "primitiveTransformation": { 30 | "cryptoDeterministicConfig": { 31 | "cryptoKey": { 32 | "kmsWrapped": { 33 | "wrappedKey": "some+other+base64+em", 34 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 35 | } 36 | }, 37 | "surrogateInfoType": { 38 | "name": "BQ_EM" 39 | } 40 | } 41 | } 42 | }, { 43 | "infoTypes": [{ 44 | "name": "BQ_PH" 45 | }], 46 | "primitiveTransformation": { 47 | "cryptoDeterministicConfig": { 48 | "cryptoKey": { 49 | "kmsWrapped": { 50 | "wrappedKey": "some+base64+key+pg==", 51 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 52 | } 53 | }, 54 | "surrogateInfoType": { 55 | "name": "BQ_PH" 56 | } 57 | } 58 | } 59 | }] 60 | } 61 | }] 62 | }, 63 | "transformationErrorHandling": { 64 | "leaveUntransformed": { 65 | } 66 | } 67 | } 68 | } -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/JsonMapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing; 18 | 19 | import com.google.gson.Gson; 20 | import com.google.gson.reflect.TypeToken; 21 | import com.google.protobuf.InvalidProtocolBufferException; 22 | import com.google.protobuf.Message; 23 | import com.google.protobuf.util.JsonFormat; 24 | 25 | public final class JsonMapper { 26 | 27 | private static final Gson jsonMapper = new Gson(); 28 | 29 | public static String toJson(T obj) { 30 | try { 31 | if (obj instanceof Message proto) { 32 | return JsonFormat.printer().print(proto); 33 | } 34 | 35 | return jsonMapper.toJson(obj); 36 | } catch (InvalidProtocolBufferException e) { 37 | return ""; 38 | } 39 | } 40 | 41 | public static T fromJson(String json, Class clazz) { 42 | return jsonMapper.fromJson(json, TypeToken.get(clazz)); 43 | } 44 | 45 | @SuppressWarnings("unchecked") // Use of generics for creation of Proto message from JSON. 46 | public static T jsonToProto(String json, Class protoClazz) { 47 | try { 48 | var builder = (Message.Builder) protoClazz.getMethod("newBuilder").invoke(null); 49 | JsonFormat.parser().merge(json, builder); 50 | return (T) builder.build(); 51 | } catch (Exception exception) { 52 | throw new RuntimeException("error converting\n" + json, exception); 53 | } 54 | } 55 | 56 | private JsonMapper() {} 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/TransformFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import java.util.List; 20 | 21 | /** Interface describing a general contract for any tokenization algorithm. */ 22 | public interface TransformFn { 23 | 24 | /** 25 | * Returns Deidentified/Encrypted list of strings for the provided list of messages using the 26 | * specific tokenization technique. The order of output should be the same as the order of input. 27 | * 28 | * @param rows UTF-8 encoded message strings 29 | * @return the deidentified list of messages in the same order as input. 30 | * @throws Exception when any exception occours in tokenization. 31 | */ 32 | List deidentify(List> rows) throws Exception; 33 | 34 | /** 35 | * Returns Reidentified/Decrypted list of strings for the provided list of encrypted messages 36 | * using the specific tokenization technique. The order of output should be the same as the order 37 | * of input. 38 | * 39 | * @param rows UTF-8 encoded encrypted message strings 40 | * @return the ReIdentified/Decrypted list of messages in the same order as input. 41 | * @throws Exception when any exception occurs or the input messages are not in the same 42 | * encryption format. 43 | */ 44 | List reidentify(List> rows) throws Exception; 45 | 46 | /** 47 | * Returns the name of the encryption algorithm implemented. Needs to be unique for all loaded 48 | * function classes. 49 | */ 50 | String getName(); 51 | } 52 | -------------------------------------------------------------------------------- /src/test/resources/multiple_surrogate_record_info_type_transforms_deid_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2", 3 | "displayName": "Record Transform", 4 | "createTime": "2022-08-10T04:52:49.601567Z", 5 | "updateTime": "2022-08-19T08:36:05.899487Z", 6 | "deidentifyConfig": { 7 | "recordTransformations": { 8 | "fieldTransformations": [ 9 | { 10 | "fields": [ 11 | { 12 | "name": "bqfnvalue" 13 | } 14 | ], 15 | "infoTypeTransformations": { 16 | "transformations": [ 17 | { 18 | "infoTypes": [ 19 | { 20 | "name": "EMAIL_ADDRESS" 21 | } 22 | ], 23 | "primitiveTransformation": { 24 | "cryptoDeterministicConfig": { 25 | "cryptoKey": { 26 | "kmsWrapped": { 27 | "wrappedKey": "some+other+base64+em", 28 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 29 | } 30 | }, 31 | "surrogateInfoType": { 32 | "name": "BQ_EM" 33 | } 34 | } 35 | } 36 | }, 37 | { 38 | "infoTypes": [ 39 | { 40 | "name": "PHONE_NUMBER" 41 | } 42 | ], 43 | "primitiveTransformation": { 44 | "cryptoDeterministicConfig": { 45 | "cryptoKey": { 46 | "kmsWrapped": { 47 | "wrappedKey": "some+base64+key+ph", 48 | "cryptoKeyName": "projects/test-project/locations/asia-southeast1/keyRings/dlp-encryption-key/cryptoKeys/key1" 49 | } 50 | }, 51 | "surrogateInfoType": { 52 | "name": "BQ_PH" 53 | } 54 | } 55 | } 56 | } 57 | ] 58 | } 59 | } 60 | ] 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/MappingDeidentifyTemplateCallerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | import static com.google.common.truth.Truth.assertThat; 20 | 21 | import com.google.api.gax.rpc.ApiCallContext; 22 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture; 23 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 24 | import com.google.common.collect.ImmutableMap; 25 | import com.google.privacy.dlp.v2.DeidentifyTemplate; 26 | import com.google.privacy.dlp.v2.GetDeidentifyTemplateRequest; 27 | import java.util.Map; 28 | 29 | public class MappingDeidentifyTemplateCallerFactory 30 | extends ApiFutureFactory { 31 | 32 | private final ImmutableMap nameTemplateMap; 33 | 34 | public MappingDeidentifyTemplateCallerFactory(Map nameTemplateMap) { 35 | super(GetDeidentifyTemplateRequest.class, DeidentifyTemplate.class); 36 | this.nameTemplateMap = ImmutableMap.copyOf(nameTemplateMap); 37 | } 38 | 39 | public static MappingDeidentifyTemplateCallerFactory using( 40 | Map nameTemplateMap) { 41 | return new MappingDeidentifyTemplateCallerFactory(nameTemplateMap); 42 | } 43 | 44 | @Override 45 | public BaseUnaryApiFuture create( 46 | GetDeidentifyTemplateRequest request, ApiCallContext context) { 47 | return new BaseUnaryApiFuture<>() { 48 | @Override 49 | public DeidentifyTemplate get() { 50 | assertThat(nameTemplateMap).containsKey(request.getName()); 51 | return nameTemplateMap.get(request.getName()); 52 | } 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/Base64Fn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns; 18 | 19 | import static com.google.common.collect.ImmutableList.toImmutableList; 20 | 21 | import com.google.cloud.solutions.bqremoteencryptionfn.TransformFnFactory; 22 | import com.google.common.collect.ImmutableList; 23 | import java.nio.charset.StandardCharsets; 24 | import java.util.Base64; 25 | import java.util.List; 26 | import java.util.Map; 27 | import javax.annotation.Nonnull; 28 | import org.springframework.stereotype.Component; 29 | 30 | /** 31 | * Implementation to convert a given input to Base64 encoded String using {@link 32 | * java.util.Base64.Encoder} 33 | */ 34 | public final class Base64Fn extends UnaryStringArgFn { 35 | 36 | public static final String FN_NAME = "base64"; 37 | 38 | @Component 39 | public static class Base64TransformFnFactory implements TransformFnFactory { 40 | @Override 41 | public Base64Fn createFn(@Nonnull Map options) { 42 | return new Base64Fn(); 43 | } 44 | 45 | @Override 46 | public String getFnName() { 47 | return FN_NAME; 48 | } 49 | } 50 | 51 | @Override 52 | public String getName() { 53 | return FN_NAME; 54 | } 55 | 56 | @Override 57 | public ImmutableList deidentifyUnaryRow(List rows) { 58 | var encoder = Base64.getEncoder(); 59 | 60 | return rows.stream() 61 | .map(row -> row.getBytes(StandardCharsets.UTF_8)) 62 | .map(encoder::encodeToString) 63 | .collect(toImmutableList()); 64 | } 65 | 66 | @Override 67 | public ImmutableList reidentifyUnaryRow(List rows) { 68 | var decoder = Base64.getDecoder(); 69 | 70 | return rows.stream() 71 | .map(decoder::decode) 72 | .map(b -> new String(b, StandardCharsets.UTF_8)) 73 | .collect(toImmutableList()); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/BaseUnaryApiFuture.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs; 18 | 19 | import com.google.api.core.ApiFuture; 20 | import com.google.api.gax.rpc.ApiCallContext; 21 | import java.io.Serializable; 22 | import java.util.concurrent.ExecutionException; 23 | import java.util.concurrent.Executor; 24 | import java.util.concurrent.TimeUnit; 25 | 26 | public abstract class BaseUnaryApiFuture implements ApiFuture, Serializable { 27 | 28 | public abstract static class ApiFutureFactory implements Serializable { 29 | 30 | private final Class requestClass; 31 | private final Class responseClass; 32 | 33 | public ApiFutureFactory(Class requestClass, Class responseClass) { 34 | this.requestClass = requestClass; 35 | this.responseClass = responseClass; 36 | } 37 | 38 | public abstract BaseUnaryApiFuture create(RequestT request, ApiCallContext context); 39 | 40 | public final boolean matchIO(Class requestClass, Class responseClass) { 41 | return (this.requestClass.equals(requestClass) && this.responseClass.equals(responseClass)); 42 | } 43 | } 44 | 45 | @Override 46 | public final void addListener(Runnable runnable, Executor executor) { 47 | executor.execute(runnable); 48 | } 49 | 50 | @Override 51 | public final boolean cancel(boolean b) { 52 | return false; 53 | } 54 | 55 | @Override 56 | public final boolean isCancelled() { 57 | return false; 58 | } 59 | 60 | @Override 61 | public final boolean isDone() { 62 | return true; 63 | } 64 | 65 | @Override 66 | @SuppressWarnings("NullableProblems") 67 | public final ResponseT get(long l, TimeUnit timeUnit) 68 | throws ExecutionException, InterruptedException { 69 | return get(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/VerifyingDeidentifyCallerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | import com.google.api.gax.rpc.ApiCallContext; 20 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture; 21 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 22 | import com.google.common.truth.extensions.proto.ProtoTruth; 23 | import com.google.privacy.dlp.v2.DeidentifyContentRequest; 24 | import com.google.privacy.dlp.v2.DeidentifyContentResponse; 25 | import java.util.concurrent.ExecutionException; 26 | 27 | public class VerifyingDeidentifyCallerFactory 28 | extends ApiFutureFactory { 29 | 30 | private final DeidentifyContentRequest expectedRequest; 31 | private final ApiFutureFactory deidFactory; 32 | 33 | public VerifyingDeidentifyCallerFactory( 34 | DeidentifyContentRequest expectedRequest, 35 | ApiFutureFactory deidFactory) { 36 | super(DeidentifyContentRequest.class, DeidentifyContentResponse.class); 37 | this.expectedRequest = expectedRequest; 38 | this.deidFactory = deidFactory; 39 | } 40 | 41 | @Override 42 | public BaseUnaryApiFuture create( 43 | DeidentifyContentRequest request, ApiCallContext context) { 44 | return new BaseUnaryApiFuture() { 45 | @Override 46 | public DeidentifyContentResponse get() throws InterruptedException, ExecutionException { 47 | ProtoTruth.assertThat(request) 48 | .ignoringFields(DeidentifyContentRequest.ITEM_FIELD_NUMBER) 49 | .isEqualTo(expectedRequest); 50 | 51 | return deidFactory.create(request, context).get(); 52 | } 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/BqTransformFnApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import com.google.cloud.dlp.v2.DlpServiceClient; 20 | import com.google.cloud.dlp.v2.DlpServiceSettings; 21 | import com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp.DlpFn.DlpClientFactory; 22 | import org.apache.coyote.http11.AbstractHttp11Protocol; 23 | import org.springframework.boot.SpringApplication; 24 | import org.springframework.boot.autoconfigure.SpringBootApplication; 25 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 26 | import org.springframework.boot.web.embedded.tomcat.TomcatServletWebServerFactory; 27 | import org.springframework.boot.web.server.WebServerFactoryCustomizer; 28 | import org.springframework.context.annotation.Bean; 29 | import org.springframework.context.annotation.Configuration; 30 | import org.springframework.context.annotation.Profile; 31 | 32 | @SpringBootApplication 33 | @EnableConfigurationProperties 34 | @Configuration 35 | public class BqTransformFnApp { 36 | 37 | public static void main(String[] args) { 38 | SpringApplication.run(BqTransformFnApp.class, args); 39 | } 40 | 41 | @Bean 42 | @Profile("!test") 43 | public DlpClientFactory defaultDlpClientFactory(UserAgentHeaderProvider userAgentHeaderProvider) { 44 | return () -> 45 | DlpServiceClient.create( 46 | DlpServiceSettings.newBuilder().setHeaderProvider(userAgentHeaderProvider).build()); 47 | } 48 | 49 | // Enable Keep-Alive HTTP Response header 50 | @Bean 51 | public WebServerFactoryCustomizer tomcatCustomizer() { 52 | return (tomcat) -> 53 | tomcat.addConnectorCustomizers( 54 | (connector) -> { 55 | if (connector.getProtocolHandler() 56 | instanceof AbstractHttp11Protocol protocolHandler) { 57 | protocolHandler.setKeepAliveTimeout(300000); 58 | protocolHandler.setMaxKeepAliveRequests(100); 59 | protocolHandler.setUseKeepAliveResponseHeader(true); 60 | } 61 | }); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/dlp/DlpConfigTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp; 18 | 19 | import static com.google.common.truth.Truth.assertThat; 20 | 21 | import com.google.common.collect.ImmutableMap; 22 | import org.json.JSONException; 23 | import org.junit.Test; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.JUnit4; 26 | import org.skyscreamer.jsonassert.JSONAssert; 27 | 28 | @RunWith(JUnit4.class) 29 | public class DlpConfigTest { 30 | 31 | @Test 32 | public void fromJson_map_valid() { 33 | var testJson = ImmutableMap.of("dlp-deid-template", "my-template-id"); 34 | assertThat(DlpConfig.fromJson(testJson)).isEqualTo(new DlpConfig("my-template-id", null)); 35 | } 36 | 37 | @Test 38 | public void fromJson_string_valid() { 39 | assertThat(DlpConfig.fromJson("{\"dlp-deid-template\": \"my-template-id\"}")) 40 | .isEqualTo(new DlpConfig("my-template-id", null)); 41 | } 42 | 43 | @Test 44 | public void toJson_valid() throws JSONException { 45 | var json = new DlpConfig("my-template-id", null).toJson(); 46 | JSONAssert.assertEquals("{\"dlp-deid-template\": \"my-template-id\"}", json, true); 47 | } 48 | 49 | @Test 50 | public void fromJson_mapWithInspectTemplate_valid() { 51 | var testJson = 52 | ImmutableMap.of( 53 | "dlp-deid-template", "my-template-id", "dlp-inspect-template", "my-inspect-template"); 54 | assertThat(DlpConfig.fromJson(testJson)) 55 | .isEqualTo(new DlpConfig("my-template-id", "my-inspect-template")); 56 | } 57 | 58 | @Test 59 | public void fromJson_stringWithInspectTemplate_valid() { 60 | assertThat( 61 | DlpConfig.fromJson( 62 | "{\"dlp-deid-template\": \"my-template-id\", \"dlp-inspect-template\":\"my-inspect-template\"}")) 63 | .isEqualTo(new DlpConfig("my-template-id", "my-inspect-template")); 64 | } 65 | 66 | @Test 67 | public void toJson_withInspectTemplate_valid() throws JSONException { 68 | var json = new DlpConfig("my-template-id", "my-inspect-template").toJson(); 69 | JSONAssert.assertEquals( 70 | "{\"dlp-deid-template\": \"my-template-id\", \"dlp-inspect-template\": \"my-inspect-template\"}", 71 | json, 72 | true); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/RequestSizeLimitingDeidentifyFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | import com.google.api.gax.rpc.ApiCallContext; 20 | import com.google.api.gax.rpc.InvalidArgumentException; 21 | import com.google.api.gax.rpc.StatusCode; 22 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture; 23 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 24 | import com.google.privacy.dlp.v2.ContentItem; 25 | import com.google.privacy.dlp.v2.DeidentifyContentRequest; 26 | import com.google.privacy.dlp.v2.DeidentifyContentResponse; 27 | 28 | public class RequestSizeLimitingDeidentifyFactory 29 | extends ApiFutureFactory { 30 | 31 | private final int expectedRowCount; 32 | 33 | public RequestSizeLimitingDeidentifyFactory(int expectedRowCount) { 34 | super(DeidentifyContentRequest.class, DeidentifyContentResponse.class); 35 | this.expectedRowCount = expectedRowCount; 36 | } 37 | 38 | @Override 39 | public BaseUnaryApiFuture create( 40 | DeidentifyContentRequest request, ApiCallContext context) { 41 | return new BaseUnaryApiFuture<>() { 42 | @Override 43 | public DeidentifyContentResponse get() { 44 | 45 | if (request.getItem().getTable().getRowsCount() > expectedRowCount) { 46 | throw new InvalidArgumentException( 47 | new RuntimeException( 48 | "Too many findings to de-identify. Retry with a smaller request."), 49 | new StatusCode() { 50 | @Override 51 | public Code getCode() { 52 | return Code.INVALID_ARGUMENT; 53 | } 54 | 55 | @Override 56 | public Object getTransportCode() { 57 | return Code.INVALID_ARGUMENT.getHttpStatusCode(); 58 | } 59 | }, 60 | true); 61 | } 62 | 63 | return DeidentifyContentResponse.newBuilder() 64 | .setItem(ContentItem.newBuilder().setTable(request.getItem().getTable())) 65 | .build(); 66 | } 67 | }; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/VerifyingReidentifyCallerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | import com.google.api.gax.rpc.ApiCallContext; 20 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture; 21 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 22 | import com.google.common.truth.extensions.proto.ProtoTruth; 23 | import com.google.privacy.dlp.v2.ReidentifyContentRequest; 24 | import com.google.privacy.dlp.v2.ReidentifyContentResponse; 25 | import java.util.concurrent.ExecutionException; 26 | 27 | public class VerifyingReidentifyCallerFactory 28 | extends ApiFutureFactory { 29 | 30 | private final ReidentifyContentRequest expectedRequest; 31 | private final ApiFutureFactory reidFactory; 32 | 33 | private VerifyingReidentifyCallerFactory( 34 | ReidentifyContentRequest expectedRequest, 35 | ApiFutureFactory reidFactory) { 36 | super(ReidentifyContentRequest.class, ReidentifyContentResponse.class); 37 | this.expectedRequest = expectedRequest; 38 | this.reidFactory = reidFactory; 39 | } 40 | 41 | public static VerifyingReidentifyCallerFactory withExpectedRequest( 42 | ReidentifyContentRequest expectedRequest) { 43 | return new VerifyingReidentifyCallerFactory(expectedRequest, null); 44 | } 45 | 46 | public VerifyingReidentifyCallerFactory withReidFactory( 47 | ApiFutureFactory reidFactory) { 48 | return new VerifyingReidentifyCallerFactory(expectedRequest, reidFactory); 49 | } 50 | 51 | @Override 52 | public BaseUnaryApiFuture create( 53 | ReidentifyContentRequest request, ApiCallContext context) { 54 | return new BaseUnaryApiFuture<>() { 55 | @Override 56 | public ReidentifyContentResponse get() throws InterruptedException, ExecutionException { 57 | ProtoTruth.assertThat(request) 58 | .ignoringFields(ReidentifyContentRequest.ITEM_FIELD_NUMBER) 59 | .isEqualTo(expectedRequest); 60 | 61 | return reidFactory.create(request, context).get(); 62 | } 63 | }; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | @rem SPDX-License-Identifier: Apache-2.0 17 | @rem 18 | 19 | @if "%DEBUG%"=="" @echo off 20 | @rem ########################################################################## 21 | @rem 22 | @rem Gradle startup script for Windows 23 | @rem 24 | @rem ########################################################################## 25 | 26 | @rem Set local scope for the variables with windows NT shell 27 | if "%OS%"=="Windows_NT" setlocal 28 | 29 | set DIRNAME=%~dp0 30 | if "%DIRNAME%"=="" set DIRNAME=. 31 | @rem This is normally unused 32 | set APP_BASE_NAME=%~n0 33 | set APP_HOME=%DIRNAME% 34 | 35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 37 | 38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 40 | 41 | @rem Find java.exe 42 | if defined JAVA_HOME goto findJavaFromJavaHome 43 | 44 | set JAVA_EXE=java.exe 45 | %JAVA_EXE% -version >NUL 2>&1 46 | if %ERRORLEVEL% equ 0 goto execute 47 | 48 | echo. 1>&2 49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 50 | echo. 1>&2 51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 52 | echo location of your Java installation. 1>&2 53 | 54 | goto fail 55 | 56 | :findJavaFromJavaHome 57 | set JAVA_HOME=%JAVA_HOME:"=% 58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 59 | 60 | if exist "%JAVA_EXE%" goto execute 61 | 62 | echo. 1>&2 63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 64 | echo. 1>&2 65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 66 | echo location of your Java installation. 1>&2 67 | 68 | goto fail 69 | 70 | :execute 71 | @rem Setup the command line 72 | 73 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 74 | 75 | 76 | @rem Execute Gradle 77 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 78 | 79 | :end 80 | @rem End local scope for the variables with windows NT shell 81 | if %ERRORLEVEL% equ 0 goto mainEnd 82 | 83 | :fail 84 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 85 | rem the _cmd.exe /c_ return code! 86 | set EXIT_CODE=%ERRORLEVEL% 87 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 88 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 89 | exit /b %EXIT_CODE% 90 | 91 | :mainEnd 92 | if "%OS%"=="Windows_NT" endlocal 93 | 94 | :omega 95 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/BigQueryFnCallController.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import static com.google.common.base.Preconditions.checkNotNull; 20 | 21 | import com.google.cloud.solutions.bqremoteencryptionfn.fns.IdentityFn.IdentityTransformFnFactory; 22 | import com.google.common.flogger.GoogleLogger; 23 | import java.util.List; 24 | import java.util.Map; 25 | import org.springframework.beans.factory.annotation.Autowired; 26 | import org.springframework.web.bind.annotation.PostMapping; 27 | import org.springframework.web.bind.annotation.RequestBody; 28 | import org.springframework.web.bind.annotation.RestController; 29 | 30 | /** 31 | * The main REST Controller that provides BigQuery Remote function compliant endpoint 32 | * 33 | * @see > transformFnFactories; 46 | 47 | @PostMapping("/") 48 | public BigQueryRemoteFnResponse process(@RequestBody BigQueryRemoteFnRequest request) { 49 | try { 50 | var options = 51 | checkNotNull(request.userDefinedContext(), "userDefinedContext is required. Found null."); 52 | var callMode = identifyCallMode(options); 53 | var algo = checkNotNull(options.get(TRANSFORM_ALGO_KEY), "Invalid Algorithm. Found null"); 54 | 55 | var transformFn = 56 | transformFnFactories.stream() 57 | .filter(factory -> factory.getFnName().equals(algo)) 58 | .findFirst() 59 | .orElseGet(IdentityTransformFnFactory::new) 60 | .createFn(options); 61 | 62 | var replies = 63 | switch (callMode) { 64 | case DEIDENTIFY -> transformFn.deidentify(request.calls()); 65 | case REIDENTIFY -> transformFn.reidentify(request.calls()); 66 | }; 67 | 68 | return BigQueryRemoteFnResponse.withReplies(replies); 69 | } catch (Exception exp) { 70 | logger.atInfo().withCause(exp).log("error processing request"); 71 | return BigQueryRemoteFnResponse.withErrorMessage(exp.getMessage()); 72 | } 73 | } 74 | 75 | private static CallMode identifyCallMode(Map userContext) { 76 | var callMode = userContext.get(CALL_MODE_KEY); 77 | return callMode == null ? CallMode.DEIDENTIFY : CallMode.valueOf(callMode.toUpperCase()); 78 | } 79 | 80 | public enum CallMode { 81 | DEIDENTIFY, 82 | REIDENTIFY 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/google" { 5 | version = "6.12.0" 6 | constraints = ">= 4.0.0" 7 | hashes = [ 8 | "h1:D6eBvo6Qs3OXXMwB2vFBhKuiL/TMNu5bBtjznSPbBH0=", 9 | "h1:JuVRL27b9/g/FdblPwGZEacECBZRIuHZ+nzRgpIDduo=", 10 | "h1:MgQU4YOpoJQngarNuPfvNKF4r7YyqYii7DBOiiaKBEw=", 11 | "h1:OQp1STR+bhXfBQKs02krIJuELZ2c4GmGhlBd4bSq128=", 12 | "h1:OvpG2uAeuIc19TFOQrr+s7Ded6sVBripLWLIzeWn90U=", 13 | "h1:PmkvZRF8v7SszpUP4l9gXfhlVwq3z+HrTE39qusLrBw=", 14 | "h1:WZulOZ1tthsLm/w7t7yy3C7JZ8QIXfO+QMEEW1pjrAY=", 15 | "h1:Yez7eMayTGGmX/LCIDD3Xgnktpv/701g2zotNmg6u2o=", 16 | "h1:nQTLHstv9bSviqpeBRzCfUG1lD8j+3f6Daxuv32At8k=", 17 | "h1:rvZHMkoxkHrBYQXb/waoZiD2oo3FS1AF8HoWHlb6SN8=", 18 | "h1:zpwamvGGsck1qNunJeyfolgiBDOJpXVINPzLLSECpjY=", 19 | "zh:14701aa307a832d99f567b8056a4c5e4ee5a403d984c98f024deee7507a3f29c", 20 | "zh:344eca00ffb2643c2fa7f52f069b659d50bb4c9369df4cad96ea0fadb54282c8", 21 | "zh:5fb57c0acfd4d30a39941900040d5518a909d8c975af0c4366a7bfd0d0bb09a8", 22 | "zh:617a77048a5b9aa568e8bc706cc84307a237b2dd0e49709028b283f8bbe42475", 23 | "zh:677837a05fefe0342cf4d4bdc494e8fd4d62331cac947820e73df37e8f512688", 24 | "zh:7b79f6e02474eef4a1480fc6589afb63ed16b25bf019b6056f9838e2845e2ef8", 25 | "zh:7d891fceb5b15e81240d829f42e1a36e4c812bfc1abe7856756e59101932205f", 26 | "zh:97f1e0ac799faf382426e070e888fac36b0867597b460dc95b0e7f657de21ba9", 27 | "zh:9855f2f2f5919ff6a6a2c982439c910d28c8978ad18cd8f549a5d1ba9b4dc4c3", 28 | "zh:ac551367180eb396af2a50244e80243d333d600a76002e29935262d76a02290b", 29 | "zh:c354f34e6579933d21a98ce7f31f4ef8aeaceb04cfaedaff6d3f3c0be56b2c79", 30 | "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", 31 | ] 32 | } 33 | 34 | provider "registry.terraform.io/hashicorp/null" { 35 | version = "3.2.1" 36 | hashes = [ 37 | "h1:FbGfc+muBsC17Ohy5g806iuI1hQc4SIexpYCrQHQd8w=", 38 | "zh:58ed64389620cc7b82f01332e27723856422820cfd302e304b5f6c3436fb9840", 39 | "zh:62a5cc82c3b2ddef7ef3a6f2fedb7b9b3deff4ab7b414938b08e51d6e8be87cb", 40 | "zh:63cff4de03af983175a7e37e52d4bd89d990be256b16b5c7f919aff5ad485aa5", 41 | "zh:74cb22c6700e48486b7cabefa10b33b801dfcab56f1a6ac9b6624531f3d36ea3", 42 | "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", 43 | "zh:79e553aff77f1cfa9012a2218b8238dd672ea5e1b2924775ac9ac24d2a75c238", 44 | "zh:a1e06ddda0b5ac48f7e7c7d59e1ab5a4073bbcf876c73c0299e4610ed53859dc", 45 | "zh:c37a97090f1a82222925d45d84483b2aa702ef7ab66532af6cbcfb567818b970", 46 | "zh:e4453fbebf90c53ca3323a92e7ca0f9961427d2f0ce0d2b65523cc04d5d999c2", 47 | "zh:e80a746921946d8b6761e77305b752ad188da60688cfd2059322875d363be5f5", 48 | "zh:fbdb892d9822ed0e4cb60f2fedbdbb556e4da0d88d3b942ae963ed6ff091e48f", 49 | "zh:fca01a623d90d0cad0843102f9b8b9fe0d3ff8244593bd817f126582b52dd694", 50 | ] 51 | } 52 | 53 | provider "registry.terraform.io/hashicorp/random" { 54 | version = "3.5.1" 55 | hashes = [ 56 | "h1:VSnd9ZIPyfKHOObuQCaKfnjIHRtR7qTw19Rz8tJxm+k=", 57 | "zh:04e3fbd610cb52c1017d282531364b9c53ef72b6bc533acb2a90671957324a64", 58 | "zh:119197103301ebaf7efb91df8f0b6e0dd31e6ff943d231af35ee1831c599188d", 59 | "zh:4d2b219d09abf3b1bb4df93d399ed156cadd61f44ad3baf5cf2954df2fba0831", 60 | "zh:6130bdde527587bbe2dcaa7150363e96dbc5250ea20154176d82bc69df5d4ce3", 61 | "zh:6cc326cd4000f724d3086ee05587e7710f032f94fc9af35e96a386a1c6f2214f", 62 | "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", 63 | "zh:b6d88e1d28cf2dfa24e9fdcc3efc77adcdc1c3c3b5c7ce503a423efbdd6de57b", 64 | "zh:ba74c592622ecbcef9dc2a4d81ed321c4e44cddf7da799faa324da9bf52a22b2", 65 | "zh:c7c5cde98fe4ef1143bd1b3ec5dc04baf0d4cc3ca2c5c7d40d17c0e9b2076865", 66 | "zh:dac4bad52c940cd0dfc27893507c1e92393846b024c5a9db159a93c534a3da03", 67 | "zh:de8febe2a2acd9ac454b844a4106ed295ae9520ef54dc8ed2faf29f12716b602", 68 | "zh:eab0d0495e7e711cca367f7d4df6e322e6c562fc52151ec931176115b83ed014", 69 | ] 70 | } 71 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/PatchyDlpStub.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | import com.google.api.core.ApiFuture; 20 | import com.google.api.gax.rpc.ApiCallContext; 21 | import com.google.api.gax.rpc.UnaryCallable; 22 | import com.google.cloud.dlp.v2.stub.DlpServiceStub; 23 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 24 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.TestingBackgroundResource; 25 | import com.google.common.collect.ImmutableList; 26 | import com.google.privacy.dlp.v2.DeidentifyContentRequest; 27 | import com.google.privacy.dlp.v2.DeidentifyContentResponse; 28 | import com.google.privacy.dlp.v2.DeidentifyTemplate; 29 | import com.google.privacy.dlp.v2.GetDeidentifyTemplateRequest; 30 | import com.google.privacy.dlp.v2.ReidentifyContentRequest; 31 | import com.google.privacy.dlp.v2.ReidentifyContentResponse; 32 | import java.io.Serializable; 33 | import java.util.List; 34 | import java.util.concurrent.TimeUnit; 35 | import java.util.function.Supplier; 36 | 37 | public class PatchyDlpStub extends DlpServiceStub implements Serializable { 38 | protected final TestingBackgroundResource testingBackgroundResource = 39 | new TestingBackgroundResource(); 40 | 41 | protected final ImmutableList> callableFactories; 42 | 43 | public PatchyDlpStub(List> callableFactories) { 44 | this.callableFactories = ImmutableList.copyOf(callableFactories); 45 | } 46 | 47 | public static PatchyDlpStub using(List> callableFactories) { 48 | return new PatchyDlpStub(callableFactories); 49 | } 50 | 51 | private static final class PatchyCallable extends UnaryCallable 52 | implements Serializable { 53 | private final ApiFutureFactory factory; 54 | 55 | public PatchyCallable(ApiFutureFactory factory) { 56 | this.factory = factory; 57 | } 58 | 59 | @Override 60 | public ApiFuture futureCall(X request, ApiCallContext context) { 61 | return factory.create(request, context); 62 | } 63 | } 64 | 65 | @SuppressWarnings("unchecked") // Checks are done in PatchyUnaryCallable#matchIO 66 | protected UnaryCallable findCallable( 67 | Class inputClass, 68 | Class outputClass, 69 | Supplier> defaultCallableGetter) { 70 | if (testingBackgroundResource.isShutdown() || testingBackgroundResource.isTerminated()) { 71 | throw new RuntimeException("Stub already shutdown or terminated"); 72 | } 73 | 74 | return callableFactories.stream() 75 | .filter(factory -> factory.matchIO(inputClass, outputClass)) 76 | .findFirst() 77 | .map(PatchyCallable::new) 78 | .map(p -> (UnaryCallable) p) 79 | .orElseGet(defaultCallableGetter); 80 | } 81 | 82 | @Override 83 | public UnaryCallable 84 | deidentifyContentCallable() { 85 | return findCallable( 86 | DeidentifyContentRequest.class, 87 | DeidentifyContentResponse.class, 88 | super::deidentifyContentCallable); 89 | } 90 | 91 | @Override 92 | public UnaryCallable 93 | reidentifyContentCallable() { 94 | return findCallable( 95 | ReidentifyContentRequest.class, 96 | ReidentifyContentResponse.class, 97 | super::reidentifyContentCallable); 98 | } 99 | 100 | @Override 101 | public UnaryCallable 102 | getDeidentifyTemplateCallable() { 103 | return findCallable( 104 | GetDeidentifyTemplateRequest.class, 105 | DeidentifyTemplate.class, 106 | super::getDeidentifyTemplateCallable); 107 | } 108 | 109 | @Override 110 | public void shutdown() { 111 | testingBackgroundResource.shutdown(); 112 | } 113 | 114 | @Override 115 | public boolean isShutdown() { 116 | return testingBackgroundResource.isShutdown(); 117 | } 118 | 119 | @Override 120 | public boolean isTerminated() { 121 | return testingBackgroundResource.isTerminated(); 122 | } 123 | 124 | @Override 125 | public void shutdownNow() { 126 | testingBackgroundResource.shutdownNow(); 127 | } 128 | 129 | @Override 130 | public boolean awaitTermination(long l, TimeUnit timeUnit) { 131 | return testingBackgroundResource.awaitTermination(l, timeUnit); 132 | } 133 | 134 | @Override 135 | public void close() { 136 | testingBackgroundResource.close(); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/AesFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns; 18 | 19 | import com.google.cloud.solutions.bqremoteencryptionfn.TransformFnFactory; 20 | import com.google.common.collect.ImmutableList; 21 | import com.google.common.io.BaseEncoding; 22 | import java.nio.charset.StandardCharsets; 23 | import java.security.GeneralSecurityException; 24 | import java.util.Base64; 25 | import java.util.List; 26 | import java.util.Map; 27 | import javax.annotation.Nonnull; 28 | import javax.crypto.Cipher; 29 | import javax.crypto.spec.IvParameterSpec; 30 | import javax.crypto.spec.SecretKeySpec; 31 | import org.springframework.beans.factory.annotation.Value; 32 | import org.springframework.context.annotation.PropertySource; 33 | import org.springframework.stereotype.Component; 34 | 35 | /** 36 | * Implementation of AES Encryption. The actual Key type is passed as a parameter and is dependent 37 | * on the KeyString length. 38 | */ 39 | public final class AesFn extends UnaryStringArgFn { 40 | 41 | public static final String FN_NAME = "aes"; 42 | public static final String AES_CIPHER_TYPE_KEY = "aes-cipher-type"; 43 | public static final String AES_IV_PARAMETER_KEY = "aes-iv-parameter-base64"; 44 | 45 | public enum AesKeyType { 46 | UTF8_KEY, 47 | BASE64_KEY 48 | } 49 | 50 | @Component 51 | @PropertySource("classpath:aes.properties") 52 | public static class AesTransformFnFactory implements TransformFnFactory { 53 | 54 | @Value("${aesKey}") 55 | private String aesKeyString; 56 | 57 | @Value("${aesKeyType}") 58 | private AesKeyType aesKeyType; 59 | 60 | @Value("${aesCipherType}") 61 | private String cipherType; 62 | 63 | @Value("${aesIvParameterBase64}") 64 | private String ivParameterBase64; 65 | 66 | @Override 67 | public String getFnName() { 68 | return FN_NAME; 69 | } 70 | 71 | @Override 72 | public AesFn createFn(@Nonnull Map options) { 73 | return new AesFn( 74 | aesKeyString, 75 | aesKeyType, 76 | options.getOrDefault(AES_CIPHER_TYPE_KEY, cipherType), 77 | options.getOrDefault(AES_IV_PARAMETER_KEY, ivParameterBase64)); 78 | } 79 | } 80 | 81 | private final String keyString; 82 | private final AesKeyType keyType; 83 | private final String cipherTransformType; 84 | private final String ivParameterBase64; 85 | 86 | public AesFn( 87 | String keyString, AesKeyType keyType, String cipherTransformType, String ivParameterBase64) { 88 | this.keyString = keyString; 89 | this.keyType = keyType; 90 | this.cipherTransformType = cipherTransformType; 91 | this.ivParameterBase64 = ivParameterBase64; 92 | } 93 | 94 | @Override 95 | public List deidentifyUnaryRow(List rows) throws Exception { 96 | var encryptCipher = makeCipher(Cipher.ENCRYPT_MODE); 97 | 98 | var encoder = Base64.getEncoder(); 99 | 100 | ImmutableList.Builder replies = ImmutableList.builder(); 101 | 102 | for (String element : rows) { 103 | var bytes = element.getBytes(StandardCharsets.UTF_8); 104 | replies.add(encoder.encodeToString(encryptCipher.doFinal(bytes))); 105 | } 106 | return replies.build(); 107 | } 108 | 109 | @Override 110 | public List reidentifyUnaryRow(List rows) throws Exception { 111 | 112 | var decryptCipher = makeCipher(Cipher.DECRYPT_MODE); 113 | 114 | var decoder = Base64.getDecoder(); 115 | 116 | ImmutableList.Builder replies = ImmutableList.builder(); 117 | 118 | for (String base64String : rows) { 119 | var bytes = decoder.decode(base64String); 120 | replies.add(new String(decryptCipher.doFinal(bytes), StandardCharsets.UTF_8)); 121 | } 122 | return replies.build(); 123 | } 124 | 125 | @Override 126 | public String getName() { 127 | return FN_NAME; 128 | } 129 | 130 | private Cipher makeCipher(int opMode) throws GeneralSecurityException { 131 | 132 | var keyBytes = 133 | switch (keyType) { 134 | case UTF8_KEY -> keyString.getBytes(StandardCharsets.UTF_8); 135 | case BASE64_KEY -> BaseEncoding.base64().decode(keyString); 136 | }; 137 | 138 | var secretKey = new SecretKeySpec(keyBytes, "AES"); 139 | var cipher = Cipher.getInstance(cipherTransformType); 140 | 141 | if (!cipherTransformType.toUpperCase().contains("ECB")) { 142 | var ivBytes = Base64.getDecoder().decode(ivParameterBase64); 143 | cipher.init(opMode, secretKey, new IvParameterSpec(ivBytes)); 144 | } else { 145 | cipher.init(opMode, secretKey); 146 | } 147 | 148 | return cipher; 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/dlp/RowsToTableFnTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp; 18 | 19 | import static com.google.common.collect.ImmutableList.toImmutableList; 20 | import static com.google.common.truth.Truth.assertThat; 21 | import static org.junit.Assert.assertThrows; 22 | 23 | import com.google.common.collect.ImmutableList; 24 | import com.google.common.flogger.GoogleLogger; 25 | import com.google.common.truth.extensions.proto.ProtoTruth; 26 | import java.util.List; 27 | import java.util.stream.IntStream; 28 | import org.junit.Before; 29 | import org.junit.Test; 30 | import org.junit.experimental.runners.Enclosed; 31 | import org.junit.runner.RunWith; 32 | import org.junit.runners.JUnit4; 33 | import org.junit.runners.Parameterized; 34 | import org.junit.runners.Parameterized.Parameters; 35 | 36 | @RunWith(Enclosed.class) 37 | public final class RowsToTableFnTest { 38 | 39 | @RunWith(Parameterized.class) 40 | public static final class ParameterizedTests { 41 | 42 | private final int maxCellCount; 43 | private final int maxBytes; 44 | private final List testRows; 45 | 46 | private final int expectedTablesCount; 47 | 48 | private DlpRequestBatchExecutor executor; 49 | 50 | public ParameterizedTests( 51 | String testCaseName, 52 | int maxCellCount, 53 | int maxBytes, 54 | List testRows, 55 | int expectedTablesCount) { 56 | this.maxCellCount = maxCellCount; 57 | this.maxBytes = maxBytes; 58 | this.testRows = testRows; 59 | this.expectedTablesCount = expectedTablesCount; 60 | 61 | GoogleLogger.forEnclosingClass().atInfo().log("testCase: %s", testCaseName); 62 | } 63 | 64 | @Before 65 | public void buildExecutor() { 66 | executor = 67 | DlpRequestBatchExecutor.builder() 68 | .setDlpColumnName("testDlpColumn") 69 | .setRequestMaxBytes(maxBytes) 70 | .setRequestCellCount(maxCellCount) 71 | .build(); 72 | } 73 | 74 | @Test 75 | public void apply_valid() { 76 | 77 | var tables = executor.rowsToTableFn().apply(testRows); 78 | 79 | assertThat(tables).hasSize(expectedTablesCount); 80 | tables.forEach( 81 | table -> { 82 | assertThat(table.getHeadersCount()).isEqualTo(1); 83 | assertThat(table.getHeadersList().get(0).getName()).isEqualTo("testDlpColumn"); 84 | ProtoTruth.assertThat(table).serializedSize().isLessThan(maxBytes); 85 | }); 86 | } 87 | 88 | @Parameters(name = "{0}") 89 | public static ImmutableList testingParameters() { 90 | return ImmutableList.builder() 91 | .add( 92 | new Object[] { 93 | /* testCaseName= */ "Exact row split", 94 | /* maxCellCount= */ 10, 95 | /* maxBytes= */ DlpRequestBatchExecutor.REQUEST_MAX_BYTES, 96 | /* testRows= */ makeRows("Some String", 50), 97 | /* expectedTablesCount= */ 5 98 | }) 99 | .add( 100 | new Object[] { 101 | /* testCaseName= */ "Extra table, with remainder of rows", 102 | /* maxCellCount= */ 20, 103 | /* maxBytes= */ DlpRequestBatchExecutor.REQUEST_MAX_BYTES, 104 | /* testRows= */ makeRows("TwentyStringers", 58), 105 | /* expectedTablesCount= */ 3 106 | }) 107 | .add( 108 | new Object[] { 109 | /* testCaseName= */ "Tables split for maxBytes", 110 | /* maxCellCount= */ 50000, 111 | /* maxBytes= */ 100, 112 | /* testRows= */ makeRows("iBaseStringToMakeFiftyBytesSizeOfStringWhyHard", 10), 113 | /* expectedTablesCount= */ 10 114 | }) 115 | .build(); 116 | } 117 | 118 | private static List makeRows(String base, int rowCount) { 119 | return IntStream.range(0, rowCount) 120 | .boxed() 121 | .map(i -> String.format("%s %03d", base, i)) 122 | .collect(toImmutableList()); 123 | } 124 | } 125 | 126 | @RunWith(JUnit4.class) 127 | public static final class ExceptionTests { 128 | 129 | @Test 130 | public void singleElementMoreThanMaxBytes_throwsRuntimeException() { 131 | 132 | var fn = 133 | DlpRequestBatchExecutor.builder() 134 | .setDlpColumnName("testDlpColumn") 135 | .setRequestMaxBytes(50) 136 | .setRequestCellCount(10000) 137 | .build() 138 | .rowsToTableFn(); 139 | 140 | var runtimeException = 141 | assertThrows( 142 | RuntimeException.class, 143 | () -> 144 | fn.apply( 145 | List.of( 146 | "iBaseStringToMakeFiftyBytesSizeOfStringWhyHardiBaseStringToMakeFiftyBytesSizeOfStringWhyHard"))); 147 | 148 | assertThat(runtimeException) 149 | .hasMessageThat() 150 | .startsWith("Single Row size greater than DLP limit."); 151 | } 152 | 153 | @Test 154 | public void maxBytesMoreThanMaxBytes_throwsRuntimeException() { 155 | 156 | var executor = 157 | DlpRequestBatchExecutor.builder() 158 | .setDlpColumnName("testDlpColumn") 159 | .setRequestMaxBytes(DlpRequestBatchExecutor.REQUEST_MAX_BYTES + 1) 160 | .setRequestCellCount(1) 161 | .build(); 162 | 163 | var runtimeException = assertThrows(IllegalArgumentException.class, executor::rowsToTableFn); 164 | 165 | assertThat(runtimeException) 166 | .hasMessageThat() 167 | .startsWith("Provided DLP requestMaxBytes (500001) is more than maximum (500000)"); 168 | } 169 | 170 | @Test 171 | public void dlpRowsMoreThanMaxRowCount_throwsRuntimeException() { 172 | 173 | var executor = 174 | DlpRequestBatchExecutor.builder() 175 | .setDlpColumnName("testDlpColumn") 176 | .setRequestMaxBytes(DlpRequestBatchExecutor.REQUEST_MAX_BYTES) 177 | .setRequestCellCount(50001) 178 | .build(); 179 | 180 | var runtimeException = assertThrows(IllegalArgumentException.class, executor::rowsToTableFn); 181 | 182 | assertThat(runtimeException) 183 | .hasMessageThat() 184 | .startsWith("Provided DLP requestCellCount (50001) is more than maximum (50000)"); 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/dlp/DlpFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp; 18 | 19 | import static com.google.common.base.Strings.isNullOrEmpty; 20 | 21 | import com.google.cloud.dlp.v2.DlpServiceClient; 22 | import com.google.cloud.solutions.bqremoteencryptionfn.TransformFnFactory; 23 | import com.google.cloud.solutions.bqremoteencryptionfn.fns.UnaryStringArgFn; 24 | import com.google.privacy.dlp.v2.*; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.regex.Pattern; 28 | import javax.annotation.Nonnull; 29 | import org.springframework.beans.factory.annotation.Value; 30 | import org.springframework.context.annotation.PropertySource; 31 | import org.springframework.stereotype.Component; 32 | 33 | /** 34 | * Implements the Google Cloud DLP based tokenization using the provided Deidentify Templates. The 35 | * Function does not manage DLP batch sizes, which can potentially throw an error. 36 | * 37 | *

The surrogateType information from the DeIdentifyConfig is used to construct a CustomInfoTypes 44 | * list for inspection and the InfoTypeTransformations' InfoType selectors from DeidentifyConfig are 45 | * modified to match the surrogate values. 46 | * 47 | * @see T throwUnknownTransformationException() { 209 | throw new RuntimeException("Unknown Transform Exception"); 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # SPDX-License-Identifier: Apache-2.0 19 | # 20 | 21 | ############################################################################## 22 | # 23 | # Gradle start up script for POSIX generated by Gradle. 24 | # 25 | # Important for running: 26 | # 27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 28 | # noncompliant, but you have some other compliant shell such as ksh or 29 | # bash, then to run this script, type that shell name before the whole 30 | # command line, like: 31 | # 32 | # ksh Gradle 33 | # 34 | # Busybox and similar reduced shells will NOT work, because this script 35 | # requires all of these POSIX shell features: 36 | # * functions; 37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 39 | # * compound commands having a testable exit status, especially «case»; 40 | # * various built-in commands including «command», «set», and «ulimit». 41 | # 42 | # Important for patching: 43 | # 44 | # (2) This script targets any POSIX shell, so it avoids extensions provided 45 | # by Bash, Ksh, etc; in particular arrays are avoided. 46 | # 47 | # The "traditional" practice of packing multiple parameters into a 48 | # space-separated string is a well documented source of bugs and security 49 | # problems, so this is (mostly) avoided, by progressively accumulating 50 | # options in "$@", and eventually passing that to Java. 51 | # 52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 54 | # see the in-line comments for details. 55 | # 56 | # There are tweaks for specific operating systems such as AIX, CygWin, 57 | # Darwin, MinGW, and NonStop. 58 | # 59 | # (3) This script is generated from the Groovy template 60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 61 | # within the Gradle project. 62 | # 63 | # You can find Gradle at https://github.com/gradle/gradle/. 64 | # 65 | ############################################################################## 66 | 67 | # Attempt to set APP_HOME 68 | 69 | # Resolve links: $0 may be a link 70 | app_path=$0 71 | 72 | # Need this for daisy-chained symlinks. 73 | while 74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 75 | [ -h "$app_path" ] 76 | do 77 | ls=$( ls -ld "$app_path" ) 78 | link=${ls#*' -> '} 79 | case $link in #( 80 | /*) app_path=$link ;; #( 81 | *) app_path=$APP_HOME$link ;; 82 | esac 83 | done 84 | 85 | # This is normally unused 86 | # shellcheck disable=SC2034 87 | APP_BASE_NAME=${0##*/} 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | if ! command -v java >/dev/null 2>&1 137 | then 138 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 139 | 140 | Please set the JAVA_HOME variable in your environment to match the 141 | location of your Java installation." 142 | fi 143 | fi 144 | 145 | # Increase the maximum file descriptors if we can. 146 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 147 | case $MAX_FD in #( 148 | max*) 149 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 150 | # shellcheck disable=SC2039,SC3045 151 | MAX_FD=$( ulimit -H -n ) || 152 | warn "Could not query maximum file descriptor limit" 153 | esac 154 | case $MAX_FD in #( 155 | '' | soft) :;; #( 156 | *) 157 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 158 | # shellcheck disable=SC2039,SC3045 159 | ulimit -n "$MAX_FD" || 160 | warn "Could not set maximum file descriptor limit to $MAX_FD" 161 | esac 162 | fi 163 | 164 | # Collect all arguments for the java command, stacking in reverse order: 165 | # * args from the command line 166 | # * the main class name 167 | # * -classpath 168 | # * -D...appname settings 169 | # * --module-path (only if needed) 170 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 171 | 172 | # For Cygwin or MSYS, switch paths to Windows format before running java 173 | if "$cygwin" || "$msys" ; then 174 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 175 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 176 | 177 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 178 | 179 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 180 | for arg do 181 | if 182 | case $arg in #( 183 | -*) false ;; # don't mess with options #( 184 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 185 | [ -e "$t" ] ;; #( 186 | *) false ;; 187 | esac 188 | then 189 | arg=$( cygpath --path --ignore --mixed "$arg" ) 190 | fi 191 | # Roll the args list around exactly as many times as the number of 192 | # args, so each arg winds up back in the position where it started, but 193 | # possibly modified. 194 | # 195 | # NB: a `for` loop captures its iteration list before it begins, so 196 | # changing the positional parameters here affects neither the number of 197 | # iterations, nor the values presented in `arg`. 198 | shift # remove old arg 199 | set -- "$@" "$arg" # push replacement arg 200 | done 201 | fi 202 | 203 | 204 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 205 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 206 | 207 | # Collect all arguments for the java command: 208 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 209 | # and any embedded shellness will be escaped. 210 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 211 | # treated as '${Hostname}' itself on the command line. 212 | 213 | set -- \ 214 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 215 | -classpath "$CLASSPATH" \ 216 | org.gradle.wrapper.GradleWrapperMain \ 217 | "$@" 218 | 219 | # Stop when "xargs" is not available. 220 | if ! command -v xargs >/dev/null 2>&1 221 | then 222 | die "xargs is not available" 223 | fi 224 | 225 | # Use "xargs" to parse quoted args. 226 | # 227 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 228 | # 229 | # In Bash we could simply go: 230 | # 231 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 232 | # set -- "${ARGS[@]}" "$@" 233 | # 234 | # but POSIX shell has neither arrays nor command substitution, so instead we 235 | # post-process each arg (as a line of input to sed) to backslash-escape any 236 | # character that might be a shell metacharacter, then use eval to reverse 237 | # that process (while maintaining the separation between arguments), and wrap 238 | # the whole thing up as a single "set" statement. 239 | # 240 | # This will of course break if any of these variables contains a newline or 241 | # an unmatched quote. 242 | # 243 | 244 | eval "set -- $( 245 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 246 | xargs -n1 | 247 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 248 | tr '\n' ' ' 249 | )" '"$@"' 250 | 251 | exec "$JAVACMD" "$@" 252 | -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | ###################################### 18 | ## Initializing Cloud Services ## 19 | ###################################### 20 | 21 | terraform { 22 | required_providers { 23 | google = { 24 | source = "hashicorp/google" 25 | version = ">= 4.0.0" 26 | } 27 | } 28 | 29 | provider_meta "google" { 30 | module_name = "cloud-solutions/deploy-bigquery-dlp-remote-function-v0.1" 31 | } 32 | } 33 | 34 | 35 | provider "google" { 36 | billing_project = var.project_id 37 | project = var.project_id 38 | region = var.region 39 | } 40 | 41 | ################################### 42 | ## Creating Cloud Resources ## 43 | ################################### 44 | 45 | 46 | resource "google_service_account" "run_service_account" { 47 | account_id = "${var.service_name}-runner" 48 | project = var.project_id 49 | } 50 | 51 | resource "google_project_iam_member" "grant_role_to_sa" { 52 | for_each = toset([ 53 | "roles/dlp.reader", 54 | "roles/dlp.user", 55 | ]) 56 | project = var.project_id 57 | role = each.key 58 | member = "serviceAccount:${google_service_account.run_service_account.email}" 59 | } 60 | 61 | resource "google_artifact_registry_repository" "image_registry" { 62 | format = "DOCKER" 63 | repository_id = var.artifact_registry_name 64 | project = var.project_id 65 | location = var.region 66 | } 67 | 68 | resource "google_service_account" "build_service_account" { 69 | account_id = "${var.service_name}-builder" 70 | project = var.project_id 71 | } 72 | 73 | resource "google_project_iam_member" "grant_role_to_build_sa" { 74 | project = var.project_id 75 | role = "roles/cloudbuild.builds.builder" 76 | member = "serviceAccount:${google_service_account.build_service_account.email}" 77 | } 78 | 79 | resource "google_storage_bucket" "cloud_build_bucket" { 80 | project = var.project_id 81 | location = var.region 82 | name = "build_bucket_${var.service_name}" 83 | uniform_bucket_level_access = true 84 | public_access_prevention = "enforced" 85 | force_destroy = true 86 | } 87 | 88 | resource "google_storage_bucket_iam_member" "builder_iam_bucket" { 89 | for_each = toset([ 90 | "roles/cloudbuild.builds.builder" 91 | ]) 92 | bucket = google_storage_bucket.cloud_build_bucket.name 93 | member = "serviceAccount:${google_service_account.build_service_account.email}" 94 | role = each.key 95 | } 96 | 97 | 98 | ## Create Image using Cloud Build and store in artifact registry 99 | resource "random_id" "build_version" { 100 | byte_length = 8 101 | 102 | keepers = { 103 | project_id = var.project_id 104 | region = var.region 105 | } 106 | } 107 | 108 | resource "null_resource" "build_function_image" { 109 | depends_on = [ 110 | google_artifact_registry_repository.image_registry, 111 | google_storage_bucket_iam_member.builder_iam_bucket 112 | ] 113 | 114 | triggers = { 115 | project_id = var.project_id 116 | region = var.region 117 | full_image_path = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.image_registry.name}/${var.service_name}:${random_id.build_version.hex}" 118 | } 119 | 120 | provisioner "local-exec" { 121 | when = create 122 | command = <Redaction Limits. 41 | * @see Content Limits 42 | */ 43 | public final class DlpRequestBatchExecutor { 44 | 45 | private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); 46 | 47 | public static final int REQUEST_MAX_CELL_COUNT = 50000; 48 | 49 | public static final int REQUEST_MAX_BYTES = 500000; 50 | 51 | private final int requestCellCount; 52 | 53 | private final int requestMaxBytes; 54 | 55 | private final String dlpColumnName; 56 | private final DlpClientFactory dlpClientFactory; 57 | 58 | private final Function> dlpCallFnFactory; 59 | private final Function> tableToDlpRequestFnFactory; 60 | 61 | private final Function dlpRequestToTableFn; 62 | private final Function dlpResponseToTableFn; 63 | 64 | private DlpRequestBatchExecutor( 65 | int requestCellCount, 66 | int requestMaxBytes, 67 | String dlpColumnName, 68 | DlpClientFactory dlpClientFactory, 69 | Function> dlpCallFnFactory, 70 | Function> tableToDlpRequestFnFactory, 71 | Function dlpRequestToTableFn, 72 | Function dlpResponseToTableFn) { 73 | this.requestCellCount = requestCellCount; 74 | this.requestMaxBytes = requestMaxBytes; 75 | this.dlpColumnName = dlpColumnName; 76 | this.dlpClientFactory = dlpClientFactory; 77 | this.dlpCallFnFactory = dlpCallFnFactory; 78 | this.tableToDlpRequestFnFactory = tableToDlpRequestFnFactory; 79 | this.dlpRequestToTableFn = dlpRequestToTableFn; 80 | this.dlpResponseToTableFn = dlpResponseToTableFn; 81 | } 82 | 83 | public List process(List rows) throws Exception { 84 | 85 | try (var dlpClient = dlpClientFactory.newClient()) { 86 | 87 | var requestMaker = tableToDlpRequestFnFactory.apply(dlpClient); 88 | var rowToTableFn = rowsToTableFn(); 89 | var tableToRowsFn = new TableToRowsFn(); 90 | 91 | return rowToTableFn.apply(rows).stream() 92 | .map(requestMaker) 93 | .map(new RetryingDlpCaller(dlpClient)) 94 | .flatMap(List::stream) 95 | .map(dlpResponseToTableFn) 96 | .map(tableToRowsFn) 97 | .flatMap(List::stream) 98 | .collect(toList()); 99 | } 100 | } 101 | 102 | /** 103 | * Implements exponential down-sizing of request payload when DLP content API requests to send 104 | * smaller requests by throwing {@link InvalidArgumentException}. 105 | */ 106 | private class RetryingDlpCaller implements Function> { 107 | 108 | private final DlpServiceClient dlpClient; 109 | 110 | public RetryingDlpCaller(DlpServiceClient dlpClient) { 111 | this.dlpClient = dlpClient; 112 | } 113 | 114 | @Override 115 | public List apply(DlpRequestT dlpRequest) { 116 | var dlpCallFn = dlpCallFnFactory.apply(dlpClient); 117 | var retries = 0; 118 | 119 | var requestsToSend = List.of(dlpRequest); 120 | 121 | do { 122 | try { 123 | 124 | logger.atInfo().log("Sending Try(%s): ", retries); 125 | 126 | return requestsToSend.stream().map(dlpCallFn).toList(); 127 | 128 | } catch (InvalidArgumentException invalidArgumentException) { 129 | 130 | logger.atWarning().log( 131 | "DLP Caller InvalidArgument: msg: %s, retryable: %s", 132 | invalidArgumentException.getMessage(), invalidArgumentException.isRetryable()); 133 | 134 | if (!invalidArgumentException.isRetryable() 135 | || !invalidArgumentException 136 | .getMessage() 137 | .toLowerCase() 138 | .contains("retry with a smaller request")) { 139 | throw invalidArgumentException; 140 | } 141 | } 142 | 143 | requestsToSend = 144 | requestsToSend.stream().map(this::splitRowsToHalf).flatMap(List::stream).toList(); 145 | 146 | } while (retries++ < 10); 147 | 148 | throw new RuntimeException("unable to receive DLP Response after retries"); 149 | } 150 | 151 | private List splitRowsToHalf(DlpRequestT dlpRequest) { 152 | 153 | var table = dlpRequestToTableFn.apply(dlpRequest); 154 | 155 | var rows = table.getRowsList(); 156 | var rowCount = table.getRowsCount(); 157 | var splitPoint = rowCount / 2; 158 | 159 | logger.atInfo().log("Splitting Table: new RowSize: %s", splitPoint); 160 | 161 | return Stream.of( 162 | table.toBuilder().clearRows().addAllRows(rows.subList(0, splitPoint)).build(), 163 | table.toBuilder().clearRows().addAllRows(rows.subList(splitPoint, rowCount)).build()) 164 | .map(subTable -> tableToDlpRequestFnFactory.apply(dlpClient).apply(subTable)) 165 | .toList(); 166 | } 167 | } 168 | 169 | @VisibleForTesting 170 | Function, List> rowsToTableFn() { 171 | return new RowsToTableFn(); 172 | } 173 | 174 | private final class RowsToTableFn implements Function, List
> { 175 | 176 | private RowsToTableFn() { 177 | checkArgument( 178 | requestCellCount <= REQUEST_MAX_CELL_COUNT, 179 | "Provided DLP requestCellCount (%s) is more than maximum (%s)", 180 | requestCellCount, 181 | REQUEST_MAX_CELL_COUNT); 182 | 183 | checkArgument( 184 | requestMaxBytes <= REQUEST_MAX_BYTES, 185 | "Provided DLP requestMaxBytes (%s) is more than maximum (%s)", 186 | requestMaxBytes, 187 | REQUEST_MAX_BYTES); 188 | } 189 | 190 | @Override 191 | public List
apply(List rows) { 192 | var requestTableBuilder = ImmutableList.
builder(); 193 | 194 | var accTable = newTable(); 195 | 196 | for (var stringRow : rows) { 197 | var tableRow = convertStringToRow(stringRow); 198 | 199 | if (tableRow.getSerializedSize() >= requestMaxBytes) { 200 | throw new RuntimeException( 201 | String.format( 202 | "Single Row size greater than DLP limit. Found %s bytes", 203 | tableRow.getSerializedSize())); 204 | } 205 | 206 | if (accTable.getRowsCount() + 1 > requestCellCount 207 | || accTable.getSerializedSize() + tableRow.getSerializedSize() >= requestMaxBytes) { 208 | 209 | requestTableBuilder.add(accTable); 210 | accTable = newTable(); 211 | } 212 | 213 | accTable = accTable.toBuilder().addRows(tableRow).build(); 214 | } 215 | 216 | var requestTables = requestTableBuilder.add(accTable).build(); 217 | logger.atInfo().log("Created %s tables from %s rows", requestTables.size(), rows.size()); 218 | return requestTables; 219 | } 220 | 221 | private Row convertStringToRow(String value) { 222 | return Row.newBuilder().addValues(Value.newBuilder().setStringValue(value)).build(); 223 | } 224 | 225 | private Table newTable() { 226 | return Table.newBuilder().addHeaders(FieldId.newBuilder().setName(dlpColumnName)).build(); 227 | } 228 | } 229 | 230 | private final class TableToRowsFn implements Function> { 231 | private int getHeaderIndex(List headers) { 232 | int headerIndex = 0; 233 | for (; headerIndex < headers.size(); headerIndex++) { 234 | if (headers.get(headerIndex).getName().equals(dlpColumnName)) { 235 | return headerIndex; 236 | } 237 | } 238 | 239 | throw new RuntimeException( 240 | String.format("required Table header (%s) not found in: %s", dlpColumnName, headers)); 241 | } 242 | 243 | @Override 244 | public List apply(Table table) { 245 | final int headerIndex = getHeaderIndex(table.getHeadersList()); 246 | 247 | return table.getRowsList().stream() 248 | .map(r -> r.getValues(headerIndex)) 249 | .map(com.google.privacy.dlp.v2.Value::getStringValue) 250 | .collect(toImmutableList()); 251 | } 252 | } 253 | 254 | public static Builder builder() { 255 | return new Builder<>(); 256 | } 257 | 258 | public static class Builder { 259 | private int requestCellCount; 260 | private int requestMaxBytes; 261 | private String dlpColumnName; 262 | private DlpClientFactory dlpClientFactory; 263 | private Function> dlpCallFnFactory; 264 | private Function> tableToDlpRequestFnFactory; 265 | private Function dlpRequestToTableFn; 266 | private Function dlpResponseToTableFn; 267 | 268 | public Builder() { 269 | this.requestCellCount = REQUEST_MAX_CELL_COUNT; 270 | this.requestMaxBytes = REQUEST_MAX_BYTES; 271 | } 272 | 273 | public Builder setRequestCellCount(int requestCellCount) { 274 | this.requestCellCount = requestCellCount; 275 | return this; 276 | } 277 | 278 | public Builder setRequestMaxBytes(int requestMaxBytes) { 279 | this.requestMaxBytes = requestMaxBytes; 280 | return this; 281 | } 282 | 283 | public Builder setDlpColumnName(String dlpColumnName) { 284 | this.dlpColumnName = dlpColumnName; 285 | return this; 286 | } 287 | 288 | public Builder setDlpClientFactory( 289 | DlpClientFactory dlpClientFactory) { 290 | this.dlpClientFactory = dlpClientFactory; 291 | return this; 292 | } 293 | 294 | public Builder setDlpCallFnFactory( 295 | Function> dlpCallFnFactory) { 296 | this.dlpCallFnFactory = dlpCallFnFactory; 297 | return this; 298 | } 299 | 300 | public Builder setTableToDlpRequestFnFactory( 301 | Function> tableToDlpRequestFnFactory) { 302 | this.tableToDlpRequestFnFactory = tableToDlpRequestFnFactory; 303 | return this; 304 | } 305 | 306 | public Builder setDlpRequestToTableFn( 307 | Function dlpRequestToTableFn) { 308 | this.dlpRequestToTableFn = dlpRequestToTableFn; 309 | return this; 310 | } 311 | 312 | public Builder setDlpResponseToTableFn( 313 | Function dlpResponseToTableFn) { 314 | this.dlpResponseToTableFn = dlpResponseToTableFn; 315 | return this; 316 | } 317 | 318 | public DlpRequestBatchExecutor build() { 319 | return new DlpRequestBatchExecutor<>( 320 | requestCellCount, 321 | requestMaxBytes, 322 | dlpColumnName, 323 | dlpClientFactory, 324 | dlpCallFnFactory, 325 | tableToDlpRequestFnFactory, 326 | dlpRequestToTableFn, 327 | dlpResponseToTableFn); 328 | } 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Use custom encryption in BigQuery using Remote Function, Cloud Run and Data Loss Prevention 2 | 3 | *Summary: Learn how to use Remote Functions to de/re-identify data with DLP in BigQuery using SQL.* 4 | 5 | [![Build and Validate](https://github.com/GoogleCloudPlatform/bigquery-dlp-remote-function/actions/workflows/main.yml/badge.svg)](https://github.com/GoogleCloudPlatform/bigquery-dlp-remote-function/actions/workflows/main.yml) 6 | [![codecov](https://codecov.io/gh/GoogleCloudPlatform/bigquery-dlp-remote-function/branch/main/graph/badge.svg?token=B25A0dD36P)](https://codecov.io/gh/GoogleCloudPlatform/bigquery-dlp-remote-function) 7 | 8 | This document discusses how to detect and de-identify sensitive data like personally identifiable information (PII) in 9 | BigQuery tables with simple SQL based functions, using Cloud Data Loss Prevention 10 | [(Cloud DLP)](https://cloud.google.com/dlp). De-identification techniques like encryption lets you preserve the utility 11 | of your data for joining or analytics while reducing the risk of handling the data by de-identifying/obfuscating the raw 12 | sensitive identifiers. 13 | 14 | Enterprises may have policy or regulatory requirements to only store de-identified data in their DataWarehouses, and a need 15 | to quickly re-identify the de-identify data for reports generation. To minimize the risk of handling large volumes of 16 | sensitive data, you can use an 17 | [automated data transformation pipeline](https://github.com/GoogleCloudPlatform/auto-data-tokenize) to create 18 | de-identified datasets that can be used for migrating from on-premises to cloud. You can use this project to either 19 | replace that pipeline with a SQL query for de-identification and quick re-identification or only for re-identification. 20 | 21 | Cloud DLP can inspect the data for sensitive information when the dataset has not been characterized, by using 22 | [more than 100 built-in classifiers](https://cloud.google.com/dlp/docs/infotypes-reference). Using DLP to de-identify the 23 | data requires complex data pipelines/applications. This solution aims to help your data analysts/engineers/scientists 24 | to achieve the same outcome through Simple SQL functions using 25 | BigQuery [Remote Functions](https://cloud.google.com/blog/products/data-analytics/extending-bigquery-functions) which 26 | are a powerful way to extend the functionality of BigQuery. 27 | 28 | This document demonstrates a reference implementation of de-identifying structured data in 29 | [BigQuery](https://cloud.google.com/bigquery) using DLP and remote function 30 | (hosted on [Cloud Run](https://cloud.google.com/run)). 31 | 32 | This document is intended for audience whose responsibilities include data security, data processing, or data 33 | analytics. This guide assumes that you're familiar with data processing and data privacy, without the need to be an 34 | expert and run simple shell and SQL scripts. 35 | 36 | ## Objectives 37 | 38 | * Deploy Cloud Run service providing DLP based encryption functionality 39 | * Create BigQuery Remote functions that use DLP de-identify templates 40 | * Verify data encryption in BigQuery using a SQL query 41 | 42 | ## Costs 43 | 44 | This tutorial uses billable components of Google Cloud, including the following: 45 | 46 | * [BigQuery](https://cloud.google.com/bigquery/pricing) 47 | * [Cloud Build](https://cloud.google.com/build/pricing) 48 | * [Cloud Run](https://cloud.google.com/run/pricing) 49 | * [Cloud Secret Manager](https://cloud.google.com/secret-manager/pricing) 50 | * [Data Loss Prevention](https://cloud.google.com/dlp/pricing) 51 | 52 | Use the [pricing calculator](https://cloud.google.com/products/calculator) to generate a cost estimate based on your 53 | projected usage. 54 | 55 | ## Architecture 56 | 57 | ![bigquery external tokenization functions](ext_bq_fn_dlp.svg) 58 | 59 | ## Before you begin 60 | 61 | For this tutorial, you need a Google Cloud [project](https://cloud.google.com/resource-manager/docs/cloud-platform-resource-hierarchy#projects). To make 62 | cleanup easiest at the end of the tutorial, we recommend that you create a new project for this tutorial. 63 | 64 | 1. [Create a Google Cloud project](https://console.cloud.google.com/projectselector2/home/dashboard). 65 | 1. Make sure that [billing is enabled](https://support.google.com/cloud/answer/6293499#enable-billing) for your Google 66 | Cloud project. 67 | 1. [Open Cloud Shell](https://console.cloud.google.com/?cloudshell=true). 68 | 69 | At the bottom of the Cloud Console, a [Cloud Shell](https://cloud.google.com/shell/docs/features) session opens and 70 | displays a command-line prompt. Cloud Shell is a shell environment with the Cloud SDK already installed, including 71 | the [gcloud](https://cloud.google.com/sdk/gcloud/) command-line tool, and with values already set for your current 72 | project. It can take a few seconds for the session to initialize. 73 | 74 | 1. In Cloud Shell, clone the source repository and go to the directory for this tutorial: 75 | 76 | git clone https://github.com/GoogleCloudPlatform/bigquery-dlp-remote-function.git 77 | cd bigquery-dlp-remote-function/ 78 | 79 | 2. Enable all the required Google Cloud APIs 80 | 81 | ```shell 82 | gcloud services enable \ 83 | artifactregistry.googleapis.com \ 84 | bigquery.googleapis.com \ 85 | bigqueryconnection.googleapis.com \ 86 | cloudbuild.googleapis.com \ 87 | cloudkms.googleapis.com \ 88 | containerregistry.googleapis.com \ 89 | dlp.googleapis.com \ 90 | run.googleapis.com \ 91 | secretmanager.googleapis.com 92 | ``` 93 | 94 | ## Deployment script 95 | 96 | 1. Authenticate using User [Application Default Credentials ("ADCs")](https://cloud.google.com/sdk/gcloud/reference/auth/application-default) as a primary authentication method. 97 | ```shell 98 | gcloud auth application-default login 99 | ``` 100 | 101 | 2. Initialize and run the Terraform script to create all resources: 102 | 103 | ```shell 104 | terraform init && \ 105 | terraform apply 106 | ``` 107 | 108 | 3. Once the script successfully completes resources creation, 109 | visit [BigQuery Console](https://console.cloud.google.com/bigquery) 110 | to run the test SQL script 111 | 112 | ```sql 113 | SELECT 114 | pii_column, 115 | fns.dlp_freetext_encrypt(pii_column) AS dlp_encrypted, 116 | fns.dlp_freetext_decrypt(fns.dlp_freetext_encrypt(pii_column)) AS dlp_decrypted 117 | FROM 118 | UNNEST( 119 | [ 120 | 'My name is John Doe. My email is john@doe.com', 121 | 'Some non PII data', 122 | '212-233-4532', 123 | 'some script with simple number 1234']) AS pii_column 124 | ``` 125 | 126 | The query will produce a result similar to following table: 127 | 128 |
129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 |
Rowpii_columndlp_encrypteddlp_decrypted
1My name is John Doe. My email is john@doe.comMy name is John Doe. My email is BQ_TRF_EMAIL(40):AQy6lGvwKR+AiiRqJpEr+nBzZUzOcjXkXamUugU=My name is John Doe. My email is john@doe.com
2Some non PII dataSome non PII dataSome non PII data
3212-233-4532BQ_TRF_PH(40):AbovCRcD0RwhqTJh1SuQmEfXERQoAmOKxwC2jc8=212-233-4532
4some script with simple number 1234some script with simple number 1234some script with simple number 1234
162 | 163 | ## Detailed Deployment steps 164 | 165 | In case you want to customize the deployment, please use following steps: 166 | 167 | ### Setting up your environment 168 | 169 | 1. Enable APIs for Compute Engine, Cloud Storage, Dataproc, and Cloud SQL services: 170 | 171 | ```shell 172 | gcloud services enable --project "${PROJECT_ID}" \ 173 | artifactregistry.googleapis.com \ 174 | bigquery.googleapis.com \ 175 | bigqueryconnection.googleapis.com \ 176 | cloudbuild.googleapis.com \ 177 | cloudkms.googleapis.com \ 178 | containerregistry.googleapis.com \ 179 | dlp.googleapis.com \ 180 | iam.googleapis.com \ 181 | run.googleapis.com \ 182 | secretmanager.googleapis.com 183 | ``` 184 | 185 | 1. In Cloud Shell, set the [Cloud Region](https://cloud.google.com/compute/docs/regions-zones#available) that you want to create your BigQuery and Cloud Run resources in: 186 | 187 | ```shell 188 | PROJECT_ID="" 189 | REGION="" 190 | ARTIFACT_REGISTRY_NAME="" 191 | CLOUD_RUN_SERVICE_NAME="bq-transform-fns" 192 | ``` 193 | 194 | ### Create Service Account for Cloud Run service 195 | 196 | For fine-grained access control, using a specialized service account for 197 | individual services is recommended. 198 | 199 | 1. Create a service Account: 200 | 201 | ```shell 202 | RUNNER_SA_NAME="${CLOUD_RUN_SERVICE_NAME}-runner" 203 | RUNNER_SA_EMAIL="${RUNNER_SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" 204 | 205 | gcloud iam service-accounts create "${RUNNER_SA_NAME}" \ 206 | --project="${PROJECT_ID}" \ 207 | --description "Runner for BigQuery remote function execution" \ 208 | --display-name "${RUNNER_SA_NAME}" 209 | ``` 210 | 211 | 1. Grant permissions to the service account to access DLP 212 | 213 | ```shell 214 | gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ 215 | --member="serviceAccount:${RUNNER_SA_EMAIL}" \ 216 | --role='roles/dlp.deidentifyTemplatesReader' 217 | 218 | gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ 219 | --member="serviceAccount:${RUNNER_SA_EMAIL}" \ 220 | --role='roles/dlp.user' 221 | ``` 222 | 223 | ### Create Artifact Registry 224 | This is a containerized SpringBoot application. 225 | Create an [Artifact Registry](https://cloud.google.com/artifact-registry) to store the application's container image 226 | 227 | ```shell 228 | gcloud artifacts repositories create "${ARTIFACT_REGISTRY_NAME}" \ 229 | --repository-format=docker \ 230 | --location="${REGION}" \ 231 | --description="Docker repository for Bigquery Functions" \ 232 | --project="${PROJECT_ID}" 233 | ``` 234 | 235 | ### Deploy Cloud Run service 236 | 237 | 1. Build the application container image using [Cloud Build](https://cloud.google.com/build): 238 | ```shell 239 | gcloud builds submit . \ 240 | --project="${PROJECT_ID}" \ 241 | --substitutions=_CONTAINER_IMAGE_NAME="${REGION}-docker.pkg.dev/${PROJECT_ID}/${ARTIFACT_REGISTRY_NAME}/${CLOUD_RUN_SERVICE_NAME}:latest" \ 242 | --machine-type=e2-highcpu-8 243 | ``` 244 | 245 | 2. Deploy Cloud Run by compiling and deploying Container : 246 | 247 | ```shell 248 | gcloud beta run deploy ${CLOUD_RUN_SERVICE_NAME} \ 249 | --image="${REGION}-docker.pkg.dev/${PROJECT_ID}/${ARTIFACT_REGISTRY_NAME}/${CLOUD_RUN_SERVICE_NAME}:latest" \ 250 | --execution-environment=gen2 \ 251 | --platform=managed \ 252 | --region="${REGION}" \ 253 | --service-account="${RUNNER_SA_EMAIL}" \ 254 | --update-env-vars=PROJECT_ID=${PROJECT_ID} \ 255 | --no-allow-unauthenticated \ 256 | --project ${PROJECT_ID} 257 | ``` 258 | 259 | 1. Retrieve and save the Cloud Run URL: 260 | 261 | ```shell 262 | RUN_URL="$(gcloud run services describe ${CLOUD_RUN_SERVICE_NAME} --region ${REGION} --project ${PROJECT_ID} --format="get(status.address.url)")" 263 | ``` 264 | 265 | ### Create DLP Didentify Templates 266 | 267 | DLP [Deidentify templates](https://cloud.google.com/dlp/docs/creating-templates-deid) makes managing deidentification and inspection easy to document and manage. 268 | 269 | ```shell 270 | DEID_TEMPLATE=$(curl -X POST \ 271 | -H "Authorization: Bearer `gcloud auth print-access-token`" \ 272 | -H "Accept: application/json" \ 273 | -H "Content-Type: application/json" \ 274 | -H "X-Goog-User-Project: ${PROJECT_ID}" \ 275 | --data-binary "@sample_dlp_deid_config.json" \ 276 | "https://dlp.googleapis.com/v2/projects/${PROJECT_ID}/locations/${REGION}/deidentifyTemplates") 277 | 278 | DEID_TEMPLATE_NAME=$(echo ${DEID_TEMPLATE} | jq -r '.name') 279 | ``` 280 | 281 | > **Note:** Recommended practice is to use 282 | > [KMS Wrapped Key for DLP de-identification](https://cloud.google.com/dlp/docs/create-wrapped-key). 283 | > 284 | > This document uses unwrapped key for simplification of demo purpose. 285 | 286 | ### Create BigQuery Remote Functions 287 | 288 | 1. Create BigQuery connection for accessing Cloud Run: 289 | 290 | ```shell 291 | bq mk --connection \ 292 | --display_name='External transform function connection' \ 293 | --connection_type=CLOUD_RESOURCE \ 294 | --project_id="${PROJECT_ID}" \ 295 | --location="${REGION}" \ 296 | ext-${CLOUD_RUN_SERVICE_NAME} 297 | ``` 298 | 299 | 1. Find the BigQuery Service Account used for the connection: 300 | 301 | ```shell 302 | CONNECTION_SA="$(bq --project_id ${PROJECT_ID} --format json show --connection ${PROJECT_ID}.${REGION}.ext-${CLOUD_RUN_SERVICE_NAME} | jq '.cloudResource.serviceAccountId')" 303 | ``` 304 | 305 | 1. Grant the BigQuery connection Service Account Cloud Run Invoker role for accessing the Cloud Run: 306 | 307 | ```shell 308 | gcloud projects add-iam-policy-binding ${PROJECT_ID} \ 309 | --member="serviceAccount:${CONNECTION_SA}" \ 310 | --role='roles/run.invoker' 311 | ``` 312 | 313 | ### Create BQ Dataset for remote functions 314 | 315 | 1. Define the BigQuery dataset to create remote functions: 316 | 317 | ```shell 318 | BQ_FUNCTION_DATASET="fns" 319 | ``` 320 | 321 | 1. Create the dataset if it doesn't exist: 322 | 323 | ```shell 324 | bq mk --dataset \ 325 | --project_id ${PROJECT_ID} \ 326 | --location ${REGION} \ 327 | ${BQ_FUNCTION_DATASET} 328 | ``` 329 | 330 | ### Create BigQuery Remote functions for DLP 331 | 332 | 1. Create DLP de-identification function: 333 | 334 | DLP uses default inspection template, if you want to use your custom [Inspection template](https://cloud.google.com/dlp/docs/concepts-templates#the_inspecttemplate_and_deidentifytemplate_objects), 335 | please add following to the `user_defined_context`: `('dlp-inspect-template', '')` 336 | 337 | ```shell 338 | bq query --project_id ${PROJECT_ID} \ 339 | --use_legacy_sql=false \ 340 | "CREATE OR REPLACE FUNCTION ${BQ_FUNCTION_DATASET}.dlp_freetext_encrypt(v STRING) 341 | RETURNS STRING 342 | REMOTE WITH CONNECTION \`${PROJECT_ID}.${REGION}.ext-${CLOUD_RUN_SERVICE_NAME}\` 343 | OPTIONS (endpoint = '${RUN_URL}', user_defined_context = [('mode', 'deidentify'),('algo','dlp'),('dlp-deid-template','${DEID_TEMPLATE_NAME}')]);" 344 | ``` 345 | 346 | 1. Create DLP re-identification function: 347 | 348 | DLP uses default inspection template, if you want to use your custom [Inspection template](https://cloud.google.com/dlp/docs/concepts-templates#the_inspecttemplate_and_deidentifytemplate_objects), 349 | please add following to the `user_defined_context`: `('dlp-inspect-template', '')` 350 | 351 | ```shell 352 | bq query --project_id ${PROJECT_ID} \ 353 | --use_legacy_sql=false \ 354 | "CREATE OR REPLACE FUNCTION ${BQ_FUNCTION_DATASET}.dlp_freetext_decrypt(v STRING) 355 | RETURNS STRING 356 | REMOTE WITH CONNECTION \`${PROJECT_ID}.${REGION}.ext-${CLOUD_RUN_SERVICE_NAME}\` 357 | OPTIONS (endpoint = '${RUN_URL}', user_defined_context = [('mode', 'reidentify'),('algo','dlp'),('dlp-deid-template','${DEID_TEMPLATE_NAME}')]);" 358 | ``` 359 | 360 | 361 | ## Verify De-identification and Re-identification 362 | 363 | Execute the following query to observe that the remote function is deidentifying 364 | and reidentifying the data using SQL: 365 | 366 | 1. Using [BigQuery Workspace](https://console.cloud.google.com/bigquery) 367 | 368 | ```sql 369 | SELECT 370 | pii_column, 371 | fns.dlp_freetext_encrypt(pii_column) AS dlp_encrypted, 372 | fns.dlp_freetext_decrypt(fns.dlp_freetext_encrypt(pii_column)) AS dlp_decrypted 373 | FROM 374 | UNNEST( 375 | [ 376 | 'My name is John Doe. My email is john@doe.com', 377 | 'Some non PII data', 378 | '212-233-4532', 379 | 'some script with simple number 1234']) AS pii_column 380 | ``` 381 | 382 | 1. Using Cloud Shell run the following command to run the query on BigQuery: 383 | 384 | ```shell 385 | bq query --project_id ${PROJECT_ID} \ 386 | --use_legacy_sql=false \ 387 | " 388 | SELECT 389 | pii_column, 390 | ${BQ_FUNCTION_DATASET}.dlp_freetext_encrypt(pii_column) AS dlp_encrypted, 391 | ${BQ_FUNCTION_DATASET}.dlp_freetext_decrypt(${BQ_FUNCTION_DATASET}.dlp_freetext_encrypt(pii_column)) AS dlp_decrypted 392 | FROM 393 | UNNEST( 394 | [ 395 | 'My name is John Doe. My email is john@doe.com', 396 | 'Some non PII data' 397 | ]) AS pii_column" 398 | ``` 399 | 400 | Expected Output: 401 | 402 | 403 | | pii_column | dlp_encrypted | dlp_decrypted | aes_encrypted | aes_decrypted | 404 | | ----------- | ----------- | ----------- | ----------- | ----------- | 405 | | My name is John Doe. My email is john@doe.com | My name is John Doe. My email is BQ_TOK_FN(40):AQy6lGvwKR+AiiRqJpEr+nBzZUzOcjXkXamUugU= | My name is John Doe. My email is john@doe.com | qWnwDEY3bDTCV/azveHnxUm24z/DvUcVLZWHyN4Rg16ISvqswcuYXNXsOyI4/d8M | My name is John Doe. My email is john@doe.com | 406 | | Some non PII data | Some non PII data | Some non PII data | 37Agm90CqzGXwerJxai/Tf89ffRN9FpPkuuDW+rz7ns= | Some non PII data | 407 | 408 | ## Limitations 409 | 410 | * The de-identification and re-identification processing occurs through a Cloud Run service. 411 | Please provision the Cloud Run CPU/Memory in accordance with your compute requirements 412 | * When using DLP consider following: 413 | * Use at least `--cpu=4 --memory=8Gi` for Cloud Run instance 414 | * When using DLP consider following limits for throughput and cost considerations: 415 | * Limit to 10000 or fewer items for DLP processing 416 | * Ensure BigQuery, Cloud Run service and DLP templates are in the same cloud region 417 | 418 | ## Cleaning up 419 | 420 |

421 | Caution: Deleting a project has the following effects: 422 |
    423 |
  • Everything in the project is deleted. If you used an existing project for this tutorial, when you delete it, you also delete any other work you've done in the project.
  • 424 |
  • Custom project IDs are lost. When you created this project, you might have created a custom project ID that you want to use in the future. To preserve the URLs that use the project ID, such as an appspot.com URL, delete selected resources inside the project instead of deleting the whole project.
  • 425 |
426 | If you plan to explore multiple tutorials and quickstarts, reusing projects can help you avoid exceeding project quota limits. 427 |
428 | 429 | To avoid incurring charges to your Google Cloud account for the resources used in this tutorial, you can delete the project: 430 | 431 | 1. In the Cloud Console, go to the [**Manage resources** page](https://console.cloud.google.com/iam-admin/projects). 432 | 1. In the project list, select the project that you want to delete and then click **Delete** ![delete](bin_icon.png). 433 | 1. In the dialog, type the project ID and then click **Shut down** to delete the project. 434 | 435 | 436 | ## What's next 437 | 438 | * Learn more about [BigQuery Remote Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions) 439 | * Learn more about [Cloud DLP](https://cloud.google.com/dlp) 440 | * Learn more about [Cloud KMS](https://cloud.google.com/kms) 441 | * Learn about [Inspecting storage and databases for sensitive data](https://cloud.google.com/dlp/docs/inspecting-storage) 442 | * Handling [De-identification and re-identification of PII in large-scale datasets using DLP](https://cloud.google.com/solutions/de-identification-re-identification-pii-using-cloud-dlp) 443 | 444 | ## Disclaimer 445 | **License**: Apache 2.0 446 | 447 | This is not an official Google product. 448 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/BqTransformFnAppTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn; 18 | 19 | import static com.google.cloud.solutions.bqremoteencryptionfn.testing.JsonMapper.fromJson; 20 | import static com.google.cloud.solutions.bqremoteencryptionfn.testing.JsonMapper.jsonToProto; 21 | import static com.google.cloud.solutions.bqremoteencryptionfn.testing.SimpleBigQueryRemoteFnRequestMaker.testRequest; 22 | import static com.google.common.truth.Truth.assertThat; 23 | import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; 24 | 25 | import com.google.cloud.dlp.v2.DlpServiceClient; 26 | import com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp.DlpFn.DlpClientFactory; 27 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 28 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp.*; 29 | import com.google.common.collect.ImmutableList; 30 | import com.google.common.collect.ImmutableSet; 31 | import com.google.common.flogger.GoogleLogger; 32 | import com.google.common.io.Resources; 33 | import com.google.privacy.dlp.v2.DeidentifyContentRequest; 34 | import com.google.privacy.dlp.v2.DeidentifyTemplate; 35 | import com.google.privacy.dlp.v2.ReidentifyContentRequest; 36 | import java.io.BufferedReader; 37 | import java.io.IOException; 38 | import java.io.InputStreamReader; 39 | import java.nio.charset.StandardCharsets; 40 | import java.util.List; 41 | import java.util.Map; 42 | import java.util.stream.Collectors; 43 | import org.junit.Before; 44 | import org.junit.ClassRule; 45 | import org.junit.Rule; 46 | import org.junit.Test; 47 | import org.junit.runner.RunWith; 48 | import org.junit.runners.Parameterized; 49 | import org.junit.runners.Parameterized.Parameters; 50 | import org.springframework.beans.factory.annotation.Autowired; 51 | import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; 52 | import org.springframework.boot.test.context.SpringBootTest; 53 | import org.springframework.boot.test.context.TestConfiguration; 54 | import org.springframework.context.annotation.Bean; 55 | import org.springframework.context.annotation.Import; 56 | import org.springframework.context.annotation.Profile; 57 | import org.springframework.test.context.ActiveProfiles; 58 | import org.springframework.test.context.junit4.rules.SpringClassRule; 59 | import org.springframework.test.context.junit4.rules.SpringMethodRule; 60 | import org.springframework.test.web.servlet.MockMvc; 61 | import org.springframework.test.web.servlet.result.MockMvcResultMatchers; 62 | 63 | @RunWith(Parameterized.class) 64 | @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.MOCK) 65 | @Import(BqTransformFnAppTest.TestDlpClientFactoryConfiguration.class) 66 | @ActiveProfiles("test") 67 | @AutoConfigureMockMvc 68 | public final class BqTransformFnAppTest { 69 | 70 | private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); 71 | 72 | static { 73 | System.setProperty("AES_KEY", "2lDNBd0hHgCZ+1/P+fWO+g=="); 74 | System.setProperty("AES_IV_PARAMETER_BASE64", "/t2/6YFewDgoHeQM1QBZdw=="); 75 | } 76 | 77 | @ClassRule public static final SpringClassRule SPRING_CLASS_RULE = new SpringClassRule(); 78 | @Rule public final SpringMethodRule springMethodRule = new SpringMethodRule(); 79 | @Autowired MockMvc mockMvc; 80 | 81 | @Autowired TestDlpClientFactoryConfiguration dlpClientFactoryConfiguration; 82 | 83 | private final String testRequestJson; 84 | private final BigQueryRemoteFnResponse expectedResult; 85 | 86 | private final List> factories; 87 | 88 | public BqTransformFnAppTest( 89 | String testCaseName, 90 | String testRequestJson, 91 | BigQueryRemoteFnResponse expectedResult, 92 | List> factories) { 93 | this.testRequestJson = testRequestJson; 94 | this.expectedResult = expectedResult; 95 | this.factories = factories; 96 | 97 | logger.atInfo().log("Starting Testcase: %s", testCaseName); 98 | } 99 | 100 | @Before 101 | public void setApiFactories() { 102 | dlpClientFactoryConfiguration.factories = this.factories; 103 | } 104 | 105 | @Test 106 | public void operation_valid() throws Exception { 107 | mockMvc 108 | .perform(post("/").contentType("application/json").content(testRequestJson)) 109 | .andExpect(MockMvcResultMatchers.status().isOk()) 110 | .andExpect( 111 | result -> 112 | assertThat( 113 | fromJson( 114 | result.getResponse().getContentAsString(), 115 | BigQueryRemoteFnResponse.class)) 116 | .isEqualTo(expectedResult)); 117 | } 118 | 119 | @Parameters(name = "{0}") 120 | public static ImmutableList testParameters() throws IOException { 121 | 122 | var base64Stub = 123 | new Base64EncodingDlpStub(ImmutableSet.of("bqfnvalue"), "test-project-id", "test-region1"); 124 | 125 | return ImmutableList.builder() 126 | .add( 127 | new Object[] { 128 | /* testName= */ "No-Op Deidentify", 129 | /* testRequestJson= */ testRequest( 130 | Map.of("mode", "deidentify", "algo", "identity"), 131 | List.of("Anant"), 132 | List.of("Damle")), 133 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 134 | /* factories= */ List.of() 135 | }) 136 | .add( 137 | new Object[] { 138 | /* testName= */ "Identity ReIdenitfy", 139 | /* testRequestJson= */ testRequest( 140 | Map.of("mode", "reidentify", "algo", "identity"), 141 | List.of("Anant"), 142 | List.of("Damle")), 143 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 144 | /* factories= */ List.of() 145 | }) 146 | .add( 147 | new Object[] { 148 | /* testName= */ "Base64 Deidentify", 149 | /* testRequestJson= */ testRequest( 150 | Map.of("mode", "deidentify", "algo", "base64"), 151 | List.of("Anant"), 152 | List.of("Damle")), 153 | /* expectedResult= */ new BigQueryRemoteFnResponse( 154 | List.of("QW5hbnQ=", "RGFtbGU="), null), 155 | /* factories= */ List.of() 156 | }) 157 | .add( 158 | new Object[] { 159 | /* testName= */ "Base64 ReIdentify", 160 | /* testRequestJson= */ testRequest( 161 | Map.of("mode", "reidentify", "algo", "base64"), 162 | List.of("QW5hbnQ="), 163 | List.of("RGFtbGU=")), 164 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 165 | /* factories= */ List.of() 166 | }) 167 | .add( 168 | new Object[] { 169 | /* testName= */ "AES128-ECB Deidentify", 170 | /* testRequestJson= */ testRequest( 171 | Map.of( 172 | "mode", "deidentify", 173 | "algo", "aes", 174 | "aes-cipher-type", "AES/ECB/PKCS5PADDING"), 175 | List.of("Anant"), 176 | List.of("Damle")), 177 | /* expectedResult= */ new BigQueryRemoteFnResponse( 178 | List.of("nrUwN61laFc115jyyQHmng==", "JCKtXkM8spJLyZdAqZKf/g=="), null), 179 | /* factories= */ List.of() 180 | }) 181 | .add( 182 | new Object[] { 183 | /* testName= */ "AES128-ECB ReIdentify", 184 | /* testRequestJson= */ testRequest( 185 | Map.of( 186 | "mode", 187 | "reidentify", 188 | "algo", 189 | "aes", 190 | "aes-cipher-type", 191 | "AES/ECB/PKCS5PADDING"), 192 | List.of("nrUwN61laFc115jyyQHmng=="), 193 | List.of("JCKtXkM8spJLyZdAqZKf/g==")), 194 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 195 | /* factories= */ List.of() 196 | }) 197 | .add( 198 | new Object[] { 199 | /* testName= */ "AES128 (default: CBC) Deidentify", 200 | /* testRequestJson= */ testRequest( 201 | Map.of("mode", "deidentify", "algo", "aes"), List.of("Anant"), List.of("Damle")), 202 | /* expectedResult= */ new BigQueryRemoteFnResponse( 203 | List.of("VhxcfvLBLRy8ag4DVl+7yQ==", "vjVNUHd2cpR0S8XLqhR+VQ=="), null), 204 | /* factories= */ List.of() 205 | }) 206 | .add( 207 | new Object[] { 208 | /* testName= */ "AES128 (default: CBC) pro ReIdentify", 209 | /* testRequestJson= */ testRequest( 210 | Map.of("mode", "reidentify", "algo", "aes"), 211 | List.of("VhxcfvLBLRy8ag4DVl+7yQ=="), 212 | List.of("vjVNUHd2cpR0S8XLqhR+VQ==")), 213 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 214 | /* factories= */ List.of() 215 | }) 216 | .add( 217 | new Object[] { 218 | /* testName= */ "AES128 CBC User provided ivParameter Deidentify", 219 | /* testRequestJson= */ testRequest( 220 | Map.of( 221 | "mode", 222 | "deidentify", 223 | "algo", 224 | "aes", 225 | "aes-iv-parameter-base64", 226 | "VGhpc0lzVGVzdFZlY3Rvcg=="), 227 | List.of("Anant"), 228 | List.of("Damle")), 229 | /* expectedResult= */ new BigQueryRemoteFnResponse( 230 | List.of("8MWXmtCTjwOlpBopOGQZfg==", "m3XXwCieBwdWi700D9yZdg=="), null), 231 | /* factories= */ List.of() 232 | }) 233 | .add( 234 | new Object[] { 235 | /* testName= */ "AES128 CBC User provided ivParameter ReIdentify", 236 | /* testRequestJson= */ testRequest( 237 | Map.of( 238 | "mode", 239 | "reidentify", 240 | "algo", 241 | "aes", 242 | "aes-iv-parameter-base64", 243 | "VGhpc0lzVGVzdFZlY3Rvcg=="), 244 | List.of("8MWXmtCTjwOlpBopOGQZfg=="), 245 | List.of("m3XXwCieBwdWi700D9yZdg==")), 246 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 247 | /* factories= */ List.of() 248 | }) 249 | .add( 250 | new Object[] { 251 | /* testName= */ "DLP deidentify", 252 | /* testRequestJson= */ testRequest( 253 | Map.of( 254 | "mode", 255 | "deidentify", 256 | "algo", 257 | "dlp", 258 | "dlp-deid-template", 259 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1"), 260 | List.of("Anant"), 261 | List.of("Damle")), 262 | /* expectedResult= */ new BigQueryRemoteFnResponse( 263 | List.of("QW5hbnQ=", "RGFtbGU="), null), 264 | /* factories= */ List.of( 265 | base64Stub.deidentifyFactory(), base64Stub.reidentifyFactory()) 266 | }) 267 | .add( 268 | new Object[] { 269 | /* testName= */ "DLP deidentify with inspect-Template", 270 | /* testRequestJson= */ testRequest( 271 | Map.of( 272 | "mode", 273 | "deidentify", 274 | "algo", 275 | "dlp", 276 | "dlp-deid-template", 277 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1", 278 | "dlp-inspect-template", 279 | "testing-inspect-template-name"), 280 | List.of("Anant"), 281 | List.of("Damle")), 282 | /* expectedResult= */ new BigQueryRemoteFnResponse( 283 | List.of("QW5hbnQ=", "RGFtbGU="), null), 284 | /* factories= */ List.of( 285 | new VerifyingDeidentifyCallerFactory( 286 | jsonToProto( 287 | loadResourceAsString( 288 | "deidentify_request_with_inspect_template_name.json"), 289 | DeidentifyContentRequest.class), 290 | base64Stub.deidentifyFactory()), 291 | base64Stub.reidentifyFactory()) 292 | }) 293 | .add( 294 | new Object[] { 295 | /* testName= */ "DLP reidentify Single Surrogate", 296 | /* testRequestJson= */ testRequest( 297 | Map.of( 298 | "mode", 299 | "reidentify", 300 | "algo", 301 | "dlp", 302 | "dlp-deid-template", 303 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1"), 304 | List.of("QW5hbnQ="), 305 | List.of("RGFtbGU=")), 306 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 307 | /* factories= */ List.of( 308 | base64Stub.deidentifyFactory(), 309 | MappingDeidentifyTemplateCallerFactory.using( 310 | Map.of( 311 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template1", 312 | jsonToProto( 313 | loadResourceAsString( 314 | "single_surrogate_info_type_transform_deid_template.json"), 315 | DeidentifyTemplate.class))), 316 | VerifyingReidentifyCallerFactory.withExpectedRequest( 317 | jsonToProto( 318 | loadResourceAsString( 319 | "single_surrogate_info_type_transform_reid_request.json"), 320 | ReidentifyContentRequest.class)) 321 | .withReidFactory(base64Stub.reidentifyFactory())) 322 | }) 323 | .add( 324 | new Object[] { 325 | /* testName= */ "DLP reidentify Record Transform Two Surrogates", 326 | /* testRequestJson= */ testRequest( 327 | Map.of( 328 | "mode", 329 | "reidentify", 330 | "algo", 331 | "dlp", 332 | "dlp-deid-template", 333 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2"), 334 | List.of("QW5hbnQ="), 335 | List.of("RGFtbGU=")), 336 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 337 | /* factories= */ List.of( 338 | base64Stub.deidentifyFactory(), 339 | MappingDeidentifyTemplateCallerFactory.using( 340 | Map.of( 341 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2", 342 | jsonToProto( 343 | loadResourceAsString( 344 | "multiple_surrogate_record_info_type_transforms_deid_config.json"), 345 | DeidentifyTemplate.class))), 346 | VerifyingReidentifyCallerFactory.withExpectedRequest( 347 | jsonToProto( 348 | loadResourceAsString( 349 | "multiple_surrogate_record_info_type_reid_request.json"), 350 | ReidentifyContentRequest.class)) 351 | .withReidFactory(base64Stub.reidentifyFactory())) 352 | }) 353 | .add( 354 | new Object[] { 355 | /* testName= */ "DLP reidentify Record Transform Primitive", 356 | /* testRequestJson= */ testRequest( 357 | Map.of( 358 | "mode", 359 | "reidentify", 360 | "algo", 361 | "dlp", 362 | "dlp-deid-template", 363 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2"), 364 | List.of("QW5hbnQ="), 365 | List.of("RGFtbGU=")), 366 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 367 | /* factories= */ List.of( 368 | base64Stub.deidentifyFactory(), 369 | MappingDeidentifyTemplateCallerFactory.using( 370 | Map.of( 371 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2", 372 | jsonToProto( 373 | loadResourceAsString( 374 | "single_surrogate_record_primitive_type_transform_deid_template.json"), 375 | DeidentifyTemplate.class))), 376 | VerifyingReidentifyCallerFactory.withExpectedRequest( 377 | jsonToProto( 378 | loadResourceAsString( 379 | "single_surrogate_record_primitive_type_transform_reid_request.json"), 380 | ReidentifyContentRequest.class)) 381 | .withReidFactory(base64Stub.reidentifyFactory())) 382 | }) 383 | .add( 384 | new Object[] { 385 | /* testName= */ "DLP reidentify Record Transform Primitive", 386 | /* testRequestJson= */ testRequest( 387 | Map.of( 388 | "mode", 389 | "reidentify", 390 | "algo", 391 | "dlp", 392 | "dlp-deid-template", 393 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2", 394 | "dlp-inspect-template", 395 | "testing-inspect-template"), 396 | List.of("QW5hbnQ="), 397 | List.of("RGFtbGU=")), 398 | /* expectedResult= */ new BigQueryRemoteFnResponse(List.of("Anant", "Damle"), null), 399 | /* factories= */ List.of( 400 | base64Stub.deidentifyFactory(), 401 | MappingDeidentifyTemplateCallerFactory.using( 402 | Map.of( 403 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2", 404 | jsonToProto( 405 | loadResourceAsString( 406 | "single_surrogate_record_primitive_type_transform_deid_template.json"), 407 | DeidentifyTemplate.class))), 408 | VerifyingReidentifyCallerFactory.withExpectedRequest( 409 | jsonToProto( 410 | loadResourceAsString( 411 | "single_surrogate_record_primitive_type_transform_reid_with_inspect_template_request.json"), 412 | ReidentifyContentRequest.class)) 413 | .withReidFactory(base64Stub.reidentifyFactory())) 414 | }) 415 | .add( 416 | new Object[] { 417 | /* testName= */ "DLP reidentify Unsupported Transformation", 418 | /* testRequestJson= */ testRequest( 419 | Map.of( 420 | "mode", 421 | "reidentify", 422 | "algo", 423 | "dlp", 424 | "dlp-deid-template", 425 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2"), 426 | List.of("QW5hbnQ="), 427 | List.of("RGFtbGU=")), 428 | /* expectedResult= */ new BigQueryRemoteFnResponse( 429 | null, "Unsupported ReId Primitive Transform CRYPTO_HASH_CONFIG"), 430 | /* factories= */ List.of( 431 | base64Stub.deidentifyFactory(), 432 | MappingDeidentifyTemplateCallerFactory.using( 433 | Map.of( 434 | "projects/test-project-id/locations/test-region1/deidentifyTemplates/template2", 435 | jsonToProto( 436 | loadResourceAsString( 437 | "non_reversabe_transformation_deid_template.json"), 438 | DeidentifyTemplate.class)))) 439 | }) 440 | .build(); 441 | } 442 | 443 | @TestConfiguration 444 | @Profile("test") 445 | public static class TestDlpClientFactoryConfiguration { 446 | private List> factories; 447 | 448 | @Bean 449 | public DlpClientFactory testDlpClientFactory() { 450 | return () -> DlpServiceClient.create(PatchyDlpStub.using(factories)); 451 | } 452 | } 453 | 454 | private static String loadResourceAsString(String sourcePath) throws IOException { 455 | try (var reader = 456 | new BufferedReader( 457 | new InputStreamReader( 458 | Resources.getResource(sourcePath).openStream(), StandardCharsets.UTF_8))) { 459 | return reader.lines().collect(Collectors.joining("\n")); 460 | } 461 | } 462 | } 463 | --------------------------------------------------------------------------------

38 | * 39 | * @see Deidentify Templates 40 | */ 41 | public final class DlpFn extends UnaryStringArgFn { 42 | public static final String FN_NAME = "dlp"; 43 | 44 | private static final Pattern TEMPLATE_LOCATION_REGEX = 45 | Pattern.compile("^projects/(?[^/]+)/locations/(?[^/]+)"); 46 | 47 | /** Factory Interface to manage client creation. */ 48 | public interface DlpClientFactory { 49 | DlpServiceClient newClient() throws Exception; 50 | } 51 | 52 | @Component 53 | @PropertySource("classpath:dlp.properties") 54 | public static class DlpTransformFnFactory implements TransformFnFactory { 55 | private final String dlpColName; 56 | 57 | private final int requestCellCount; 58 | 59 | private final int requestBytes; 60 | private final DlpClientFactory dlpClientFactory; 61 | 62 | public DlpTransformFnFactory( 63 | @Value("${dlp.valueColName}") String dlpColName, 64 | @Value("${dlp.requestCellCount}") int requestCellCount, 65 | @Value("${dlp.requestBytes}") int requestBytes, 66 | DlpClientFactory dlpClientFactory) { 67 | this.dlpColName = dlpColName; 68 | this.requestCellCount = requestCellCount; 69 | this.requestBytes = requestBytes; 70 | this.dlpClientFactory = dlpClientFactory; 71 | } 72 | 73 | @Override 74 | public DlpFn createFn(@Nonnull Map options) { 75 | return new DlpFn( 76 | requestCellCount, 77 | requestBytes, 78 | dlpColName, 79 | DlpConfig.fromJson(options), 80 | dlpClientFactory); 81 | } 82 | 83 | @Override 84 | public String getFnName() { 85 | return FN_NAME; 86 | } 87 | } 88 | 89 | private final String dlpColName; 90 | 91 | private final int requestCellCount; 92 | 93 | private final int requestBytes; 94 | private final DlpConfig dlpConfig; 95 | private final DlpClientFactory dlpClientFactory; 96 | 97 | private DlpFn( 98 | int requestCellCount, 99 | int requestBytes, 100 | String dlpColName, 101 | DlpConfig dlpConfig, 102 | DlpClientFactory dlpClientFactory) { 103 | this.requestCellCount = requestCellCount; 104 | this.requestBytes = requestBytes; 105 | this.dlpColName = dlpColName; 106 | this.dlpConfig = dlpConfig; 107 | this.dlpClientFactory = dlpClientFactory; 108 | } 109 | 110 | @Override 111 | public List deidentifyUnaryRow(List rows) throws Exception { 112 | return DlpRequestBatchExecutor.builder() 113 | .setDlpColumnName(dlpColName) 114 | .setRequestCellCount(requestCellCount) 115 | .setRequestMaxBytes(requestBytes) 116 | .setDlpClientFactory(dlpClientFactory) 117 | .setDlpCallFnFactory(dlpClient -> dlpClient::deidentifyContent) 118 | .setTableToDlpRequestFnFactory( 119 | dlpClient -> 120 | table -> 121 | DeidentifyContentRequest.newBuilder() 122 | .setParent(extractDlpParent(dlpConfig.deidTemplate())) 123 | .setDeidentifyTemplateName(dlpConfig.deidTemplate()) 124 | .setInspectTemplateName( 125 | dlpConfig.hasInspectTemplate() ? dlpConfig.inspectTemplate() : "") 126 | .setItem(ContentItem.newBuilder().setTable(table).build()) 127 | .build()) 128 | .setDlpRequestToTableFn(deidRequest -> deidRequest.getItem().getTable()) 129 | .setDlpResponseToTableFn(deidResponse -> deidResponse.getItem().getTable()) 130 | .build() 131 | .process(rows); 132 | } 133 | 134 | @Override 135 | public List reidentifyUnaryRow(List rows) throws Exception { 136 | return DlpRequestBatchExecutor.builder() 137 | .setDlpColumnName(dlpColName) 138 | .setRequestCellCount(requestCellCount) 139 | .setRequestMaxBytes(requestBytes) 140 | .setDlpClientFactory(dlpClientFactory) 141 | .setDlpCallFnFactory(dlpClient -> dlpClient::reidentifyContent) 142 | .setTableToDlpRequestFnFactory( 143 | dlpClient -> { 144 | var deidentifyConfig = 145 | dlpClient.getDeidentifyTemplate(dlpConfig.deidTemplate()).getDeidentifyConfig(); 146 | return (table) -> 147 | DlpReIdRequestMaker.forConfig(deidentifyConfig) 148 | .makeRequest(ContentItem.newBuilder().setTable(table)) 149 | .toBuilder() 150 | .setParent(extractDlpParent(dlpConfig.deidTemplate())) 151 | .setInspectTemplateName( 152 | dlpConfig.hasInspectTemplate() ? dlpConfig.inspectTemplate() : "") 153 | .build(); 154 | }) 155 | .setDlpRequestToTableFn(reidRequest -> reidRequest.getItem().getTable()) 156 | .setDlpResponseToTableFn(reidResponse -> reidResponse.getItem().getTable()) 157 | .build() 158 | .process(rows); 159 | } 160 | 161 | @Override 162 | public String getName() { 163 | return FN_NAME; 164 | } 165 | 166 | private static String extractDlpParent(String dlpTemplateName) { 167 | 168 | var matcher = TEMPLATE_LOCATION_REGEX.matcher(dlpTemplateName); 169 | if (!matcher.find()) { 170 | throw new RuntimeException("Invalid DLP Template name"); 171 | } 172 | 173 | var location = matcher.group("location"); 174 | 175 | var parentBuilder = new StringBuilder().append("projects/").append(matcher.group("project")); 176 | 177 | if (isNullOrEmpty(location) || !location.equals("global")) { 178 | parentBuilder.append("/locations/").append(location); 179 | } 180 | 181 | return parentBuilder.toString(); 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/testing/stubs/dlp/Base64EncodingDlpStub.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp; 18 | 19 | import static com.google.common.collect.ImmutableList.toImmutableList; 20 | import static com.google.common.truth.Truth.assertThat; 21 | 22 | import com.google.api.gax.rpc.ApiCallContext; 23 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture; 24 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.BaseUnaryApiFuture.ApiFutureFactory; 25 | import com.google.common.collect.ImmutableList; 26 | import com.google.common.collect.Streams; 27 | import com.google.privacy.dlp.v2.ContentItem; 28 | import com.google.privacy.dlp.v2.DeidentifyContentRequest; 29 | import com.google.privacy.dlp.v2.DeidentifyContentResponse; 30 | import com.google.privacy.dlp.v2.ReidentifyContentRequest; 31 | import com.google.privacy.dlp.v2.ReidentifyContentResponse; 32 | import com.google.privacy.dlp.v2.Table; 33 | import com.google.privacy.dlp.v2.Value; 34 | import java.io.Serializable; 35 | import java.nio.ByteBuffer; 36 | import java.util.Base64; 37 | import java.util.Set; 38 | import java.util.function.Function; 39 | 40 | public class Base64EncodingDlpStub implements Serializable { 41 | private final String projectId; 42 | private final String location; 43 | private final Set transformColumnIds; 44 | 45 | public Base64EncodingDlpStub(Set transformColumnIds, String projectId, String location) { 46 | this.transformColumnIds = transformColumnIds; 47 | this.projectId = projectId; 48 | this.location = location; 49 | } 50 | 51 | public ApiFutureFactory deidentifyFactory() { 52 | 53 | return new ApiFutureFactory<>(DeidentifyContentRequest.class, DeidentifyContentResponse.class) { 54 | @Override 55 | public BaseUnaryApiFuture create( 56 | DeidentifyContentRequest request, ApiCallContext context) { 57 | return new BaseUnaryApiFuture<>() { 58 | @Override 59 | public DeidentifyContentResponse get() { 60 | var actioner = 61 | new Base64Actioner(Base64EncodingDlpStub::encodeBase64Value, request.getParent()); 62 | 63 | return DeidentifyContentResponse.newBuilder() 64 | .setItem( 65 | ContentItem.newBuilder() 66 | .setTable(actioner.checkAndTransformRows(request.getItem().getTable()))) 67 | .build(); 68 | } 69 | }; 70 | } 71 | }; 72 | } 73 | 74 | public ApiFutureFactory reidentifyFactory() { 75 | 76 | return new ApiFutureFactory<>(ReidentifyContentRequest.class, ReidentifyContentResponse.class) { 77 | @Override 78 | public BaseUnaryApiFuture create( 79 | ReidentifyContentRequest request, ApiCallContext context) { 80 | return new BaseUnaryApiFuture<>() { 81 | @Override 82 | public ReidentifyContentResponse get() { 83 | var actioner = 84 | new Base64Actioner(Base64EncodingDlpStub::decodeBase64String, request.getParent()); 85 | 86 | return ReidentifyContentResponse.newBuilder() 87 | .setItem( 88 | ContentItem.newBuilder() 89 | .setTable(actioner.checkAndTransformRows(request.getItem().getTable()))) 90 | .build(); 91 | } 92 | }; 93 | } 94 | }; 95 | } 96 | 97 | private final class Base64Actioner { 98 | 99 | private final Function elementTransformer; 100 | private final String callParent; 101 | 102 | public Base64Actioner(Function elementTransformer, String callParent) { 103 | this.elementTransformer = elementTransformer; 104 | this.callParent = callParent; 105 | } 106 | 107 | private void checkCallParent() { 108 | assertThat(callParent).startsWith(String.format("projects/%s", projectId)); 109 | 110 | if (!location.equals("global")) { 111 | assertThat(callParent) 112 | .isEqualTo(String.format("projects/%s/locations/%s", projectId, location)); 113 | } 114 | } 115 | 116 | private Table checkAndTransformRows(Table table) { 117 | 118 | checkCallParent(); 119 | 120 | var headers = table.getHeadersList(); 121 | 122 | var updatedRows = 123 | table.getRowsList().stream() 124 | .map( 125 | row -> { 126 | //noinspection UnstableApiUsage 127 | ImmutableList updatedValues = 128 | Streams.zip( 129 | headers.stream(), 130 | row.getValuesList().stream(), 131 | (header, value) -> { 132 | if (transformColumnIds.contains(header.getName())) { 133 | return elementTransformer.apply(value); 134 | } 135 | return value; 136 | }) 137 | .collect(toImmutableList()); 138 | 139 | return row.toBuilder().clearValues().addAllValues(updatedValues).build(); 140 | }) 141 | .collect(toImmutableList()); 142 | 143 | return Table.newBuilder().addAllHeaders(headers).addAllRows(updatedRows).build(); 144 | } 145 | } 146 | 147 | private static Value encodeBase64Value(Value value) { 148 | 149 | byte[] bytes = null; 150 | 151 | switch (value.getTypeCase()) { 152 | case INTEGER_VALUE: 153 | bytes = ByteBuffer.allocate(Long.BYTES).putLong(value.getIntegerValue()).array(); 154 | break; 155 | case FLOAT_VALUE: 156 | bytes = ByteBuffer.allocate(Double.BYTES).putDouble(value.getIntegerValue()).array(); 157 | break; 158 | case STRING_VALUE: 159 | bytes = value.getStringValue().getBytes(); 160 | break; 161 | case BOOLEAN_VALUE: 162 | bytes = ByteBuffer.allocate(Integer.BYTES).putInt(value.getBooleanValue() ? 1 : 0).array(); 163 | break; 164 | case TIMESTAMP_VALUE: 165 | case TIME_VALUE: 166 | case DATE_VALUE: 167 | case DAY_OF_WEEK_VALUE: 168 | case TYPE_NOT_SET: 169 | return Value.getDefaultInstance(); 170 | } 171 | 172 | return Value.newBuilder().setStringValue(Base64.getEncoder().encodeToString(bytes)).build(); 173 | } 174 | 175 | private static Value decodeBase64String(Value value) { 176 | 177 | if (!value.getTypeCase().equals(Value.TypeCase.STRING_VALUE)) { 178 | throw new RuntimeException("non-string value not expected"); 179 | } 180 | 181 | return Value.newBuilder() 182 | .setStringValue(new String(Base64.getDecoder().decode(value.getStringValue()))) 183 | .build(); 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/dlp/DlpRequestBatchExecutorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp; 18 | 19 | import static com.google.common.collect.ImmutableList.toImmutableList; 20 | import static com.google.common.truth.Truth.assertThat; 21 | 22 | import com.google.cloud.dlp.v2.DlpServiceClient; 23 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp.Base64EncodingDlpStub; 24 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp.PatchyDlpStub; 25 | import com.google.cloud.solutions.bqremoteencryptionfn.testing.stubs.dlp.RequestSizeLimitingDeidentifyFactory; 26 | import com.google.common.collect.ImmutableSet; 27 | import com.google.privacy.dlp.v2.ContentItem; 28 | import com.google.privacy.dlp.v2.DeidentifyContentRequest; 29 | import com.google.privacy.dlp.v2.DeidentifyContentResponse; 30 | import com.google.privacy.dlp.v2.ReidentifyContentRequest; 31 | import com.google.privacy.dlp.v2.ReidentifyContentResponse; 32 | import java.util.List; 33 | import java.util.function.Function; 34 | import java.util.stream.IntStream; 35 | import org.junit.Test; 36 | import org.junit.runner.RunWith; 37 | import org.junit.runners.JUnit4; 38 | 39 | @RunWith(JUnit4.class) 40 | public class DlpRequestBatchExecutorTest { 41 | 42 | @Test 43 | public void process_correctlyCreatedDlpSizedTables_valid() throws Exception { 44 | 45 | var testRows = makeRows("TwentyStringers", 60000); 46 | var expectedTableRequests = 4; 47 | 48 | var base64EncodingStub = 49 | new Base64EncodingDlpStub(ImmutableSet.of("bqfnvalue"), "test-project-id", "global"); 50 | 51 | var dlpServiceClient = 52 | DlpServiceClient.create( 53 | PatchyDlpStub.using( 54 | List.of( 55 | base64EncodingStub.deidentifyFactory(), 56 | base64EncodingStub.reidentifyFactory()))); 57 | 58 | RequestMeasuringDlpCaller 59 | deidRequestMeasuringDlpCaller = 60 | new RequestMeasuringDlpCaller<>(dlpClient -> dlpClient::deidentifyContent); 61 | 62 | RequestMeasuringDlpCaller 63 | reidRequestMeasuringDlpCaller = 64 | new RequestMeasuringDlpCaller<>(dlpClient -> dlpClient::reidentifyContent); 65 | 66 | var deidRows = 67 | DlpRequestBatchExecutor.builder() 68 | .setDlpColumnName("dlpColumnName") 69 | .setDlpClientFactory(() -> dlpServiceClient) 70 | .setDlpCallFnFactory(deidRequestMeasuringDlpCaller) 71 | .setTableToDlpRequestFnFactory( 72 | dlpClient -> 73 | table -> 74 | DeidentifyContentRequest.newBuilder() 75 | .setParent("projects/test-project-id") 76 | .setItem(ContentItem.newBuilder().setTable(table)) 77 | .build()) 78 | .setDlpRequestToTableFn(deidRequest -> deidRequest.getItem().getTable()) 79 | .setDlpResponseToTableFn(deidResponse -> deidResponse.getItem().getTable()) 80 | .build() 81 | .process(testRows); 82 | 83 | var reidRows = 84 | DlpRequestBatchExecutor.builder() 85 | .setDlpColumnName("dlpColumnName") 86 | .setDlpClientFactory(() -> dlpServiceClient) 87 | .setDlpCallFnFactory(reidRequestMeasuringDlpCaller) 88 | .setTableToDlpRequestFnFactory( 89 | dlpClient -> 90 | table -> 91 | ReidentifyContentRequest.newBuilder() 92 | .setParent("projects/test-project-id") 93 | .setItem(ContentItem.newBuilder().setTable(table)) 94 | .build()) 95 | .setDlpRequestToTableFn(reidRequest -> reidRequest.getItem().getTable()) 96 | .setDlpResponseToTableFn(reidResponse -> reidResponse.getItem().getTable()) 97 | .build() 98 | .process(deidRows); 99 | 100 | assertThat(deidRows).hasSize(testRows.size()); 101 | assertThat(reidRows).containsAtLeastElementsIn(testRows).inOrder(); 102 | assertThat(deidRequestMeasuringDlpCaller.getRequestCalls()).isEqualTo(expectedTableRequests); 103 | assertThat(reidRequestMeasuringDlpCaller.getRequestCalls()).isEqualTo(expectedTableRequests); 104 | } 105 | 106 | @Test 107 | public void process_splitsTheTableWhenRequested_valid() throws Exception { 108 | 109 | var testRows = makeRows("SplittingTest", 1000); 110 | var expectedTableRequests = 3; // 1st = 1000 rows, 2,3 = 500 rows (1000/2) 111 | 112 | var dlpServiceClient = 113 | DlpServiceClient.create( 114 | PatchyDlpStub.using(List.of(new RequestSizeLimitingDeidentifyFactory(600)))); 115 | 116 | RequestMeasuringDlpCaller 117 | deidRequestMeasuringDlpCaller = 118 | new RequestMeasuringDlpCaller<>(dlpClient -> dlpClient::deidentifyContent); 119 | 120 | var deidRows = 121 | DlpRequestBatchExecutor.builder() 122 | .setDlpColumnName("dlpColumnName") 123 | .setDlpClientFactory(() -> dlpServiceClient) 124 | .setDlpCallFnFactory(deidRequestMeasuringDlpCaller) 125 | .setTableToDlpRequestFnFactory( 126 | dlpClient -> 127 | table -> 128 | DeidentifyContentRequest.newBuilder() 129 | .setParent("projects/test-project-id") 130 | .setItem(ContentItem.newBuilder().setTable(table)) 131 | .build()) 132 | .setDlpRequestToTableFn(deidRequest -> deidRequest.getItem().getTable()) 133 | .setDlpResponseToTableFn(deidResponse -> deidResponse.getItem().getTable()) 134 | .build() 135 | .process(testRows); 136 | 137 | assertThat(deidRows).hasSize(testRows.size()); 138 | assertThat(deidRequestMeasuringDlpCaller.getRequestCalls()).isEqualTo(expectedTableRequests); 139 | } 140 | 141 | private static List makeRows(String base, int rowCount) { 142 | return IntStream.range(0, rowCount) 143 | .boxed() 144 | .map(i -> String.format("%s %06d", base, i)) 145 | .collect(toImmutableList()); 146 | } 147 | 148 | public static final class RequestMeasuringDlpCaller 149 | implements Function> { 150 | 151 | private int requestCount; 152 | 153 | private final Function> actualFn; 154 | 155 | public RequestMeasuringDlpCaller( 156 | Function> actualFn) { 157 | this.requestCount = 0; 158 | this.actualFn = actualFn; 159 | } 160 | 161 | @Override 162 | public Function apply(DlpServiceClient dlpServiceClient) { 163 | return dlpRequestT -> { 164 | requestCount++; 165 | return actualFn.apply(dlpServiceClient).apply(dlpRequestT); 166 | }; 167 | } 168 | 169 | public int getRequestCalls() { 170 | return requestCount; 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/solutions/bqremoteencryptionfn/fns/dlp/DlpReIdRequestMaker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.solutions.bqremoteencryptionfn.fns.dlp; 18 | 19 | import static com.google.common.base.Preconditions.checkNotNull; 20 | import static java.util.stream.Collectors.toList; 21 | 22 | import com.google.privacy.dlp.v2.ContentItem; 23 | import com.google.privacy.dlp.v2.CustomInfoType; 24 | import com.google.privacy.dlp.v2.CustomInfoType.SurrogateType; 25 | import com.google.privacy.dlp.v2.DeidentifyConfig; 26 | import com.google.privacy.dlp.v2.FieldTransformation; 27 | import com.google.privacy.dlp.v2.InfoType; 28 | import com.google.privacy.dlp.v2.InfoTypeTransformations; 29 | import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation; 30 | import com.google.privacy.dlp.v2.InspectConfig; 31 | import com.google.privacy.dlp.v2.PrimitiveTransformation; 32 | import com.google.privacy.dlp.v2.PrimitiveTransformation.TransformationCase; 33 | import com.google.privacy.dlp.v2.RecordTransformations; 34 | import com.google.privacy.dlp.v2.ReidentifyContentRequest; 35 | import com.google.privacy.dlp.v2.TransformationErrorHandling; 36 | import java.util.List; 37 | import java.util.function.Function; 38 | import java.util.stream.Stream; 39 | 40 | /** 41 | * Transform a given {@link DeidentifyConfig} to {@link ReidentifyContentRequest}. 42 | * 43 | *