├── __init__.py ├── eval ├── __init__.py ├── testdata │ ├── BUILD │ └── expected_results ├── run_pipeline.py ├── results.proto └── BUILD ├── common ├── __init__.py ├── BUILD ├── gcsutil.py ├── testutil.py └── beam_testutil.py ├── dlp ├── __init__.py ├── testdata │ ├── input.csv │ ├── BUILD │ ├── batch_config.json │ ├── multi_column_config.json │ ├── multi_column_request.json │ ├── batch_request.json │ ├── request.json │ └── config.json ├── mae_testdata │ ├── sample.xml │ ├── sample.dtd │ └── BUILD ├── experimental_deid_config.json ├── radiology_deid_config.json ├── BUILD └── run_deid.py ├── .gitignore ├── deid_app ├── backend │ ├── __init__.py │ ├── config.py │ └── BUILD ├── frontend │ ├── src │ │ ├── dlp-demo │ │ │ ├── dlp-demo.component.css │ │ │ ├── dlp-text-demo │ │ │ │ ├── dlp-text-demo.component.css │ │ │ │ ├── dlp-text-demo.component.html │ │ │ │ ├── dlp-text-demo.component.ts │ │ │ │ └── dlp-text-demo.component.spec.ts │ │ │ ├── dlp-demo.component.html │ │ │ ├── dlp-demo.component.ts │ │ │ ├── dlp-demo.component.spec.ts │ │ │ └── dlp-image-demo │ │ │ │ ├── dlp-image-demo.component.html │ │ │ │ ├── dlp-image-demo.component.css │ │ │ │ ├── dlp-image-demo.component.spec.ts │ │ │ │ └── dlp-image-demo.component.ts │ │ ├── deidentify │ │ │ ├── upload-notes │ │ │ │ ├── upload-notes.component.css │ │ │ │ ├── upload-notes.component.html │ │ │ │ ├── upload-notes.component.ts │ │ │ │ └── upload-notes.component.spec.ts │ │ │ ├── current-jobs │ │ │ │ ├── current-jobs.component.css │ │ │ │ ├── current-jobs.component.spec.ts │ │ │ │ ├── current-jobs.component.ts │ │ │ │ └── current-jobs.component.html │ │ │ ├── run-deidentify │ │ │ │ ├── run-deidentify.component.css │ │ │ │ └── run-deidentify.component.spec.ts │ │ │ ├── csv-upload │ │ │ │ ├── csv-upload.component.css │ │ │ │ ├── csv-upload.component.spec.ts │ │ │ │ ├── csv-upload.component.html │ │ │ │ └── csv-upload.component.ts │ │ │ ├── deid_job.ts │ │ │ ├── compare-data │ │ │ │ ├── compare-data.component.css │ │ │ │ ├── compare-data.component.spec.ts │ │ │ │ └── compare-data.component.html │ │ │ ├── deidentify.component.html │ │ │ ├── deidentify.component.ts │ │ │ ├── deidentify.component.css │ │ │ └── deidentify.component.spec.ts │ │ ├── common │ │ │ ├── bigquery-new-table.css │ │ │ ├── bigquery-table.css │ │ │ ├── display-option.ts │ │ │ ├── submit_component.css │ │ │ ├── submit_component.ts │ │ │ ├── submit_component.html │ │ │ ├── bigquery-table.html │ │ │ ├── bigquery-new-table.html │ │ │ ├── bigquery-table.ts │ │ │ └── bigquery-new-table.ts │ │ ├── environments │ │ │ ├── environment.prod.ts │ │ │ └── environment.ts │ │ ├── favicon.ico │ │ ├── styles.css │ │ ├── evaluate │ │ │ ├── eval-stats │ │ │ │ ├── eval-stats.component.css │ │ │ │ ├── eval-stats.component.spec.ts │ │ │ │ ├── eval-stats.component.html │ │ │ │ └── eval-stats.component.ts │ │ │ ├── eval-pipeline │ │ │ │ ├── eval-pipeline.component.css │ │ │ │ └── eval-pipeline.component.spec.ts │ │ │ ├── evaluate.component.html │ │ │ ├── evaluate.component.css │ │ │ ├── evaluate.component.ts │ │ │ ├── eval_job.ts │ │ │ └── evaluate.component.spec.ts │ │ ├── app │ │ │ ├── app.component.css │ │ │ ├── app.component.ts │ │ │ ├── routing.module.spec.ts │ │ │ ├── material.module.spec.ts │ │ │ ├── routing.module.ts │ │ │ ├── app.component.spec.ts │ │ │ ├── app.component.html │ │ │ ├── material.module.ts │ │ │ └── app.module.ts │ │ ├── tsconfig.app.json │ │ ├── tsconfig.spec.json │ │ ├── tslint.json │ │ ├── browserslist │ │ ├── main.ts │ │ ├── index.html │ │ ├── test.ts │ │ ├── karma.conf.js │ │ ├── services │ │ │ ├── http_interceptor.ts │ │ │ ├── error_handler.ts │ │ │ └── dlp-demo.service.spec.ts │ │ └── polyfills.ts │ ├── proxy.conf.json │ ├── e2e │ │ ├── src │ │ │ ├── app.po.ts │ │ │ └── app.e2e-spec.ts │ │ ├── tsconfig.e2e.json │ │ └── protractor.conf.js │ ├── tsconfig.json │ ├── README.md │ ├── package.json │ └── tslint.json └── BUILD ├── physionet ├── __init__.py ├── docker │ ├── Dockerfile │ └── cloudbuild.yaml ├── bigquery_to_gcs.py ├── gcs_to_bigquery.py ├── bigquery_to_gcs_lib_test.py ├── physionet_to_mae.py ├── run_deid.py ├── gcs_to_bigquery_lib.py ├── bigquery_to_gcs_lib.py ├── physionet_to_mae_lib_test.py └── physionet_to_mae_lib.py ├── offline_tools └── redactor │ ├── .gitignore │ ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties │ ├── examples │ └── tag_remover │ │ ├── src │ │ ├── test │ │ │ ├── resources │ │ │ │ ├── basic.dcm │ │ │ │ └── basic-redacted.dcm │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── google │ │ │ │ └── cloud │ │ │ │ └── healthcare │ │ │ │ └── deid │ │ │ │ └── remover │ │ │ │ └── TagRemoverTest.java │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── google │ │ │ └── cloud │ │ │ └── healthcare │ │ │ └── deid │ │ │ └── remover │ │ │ └── TagRemover.java │ │ └── build.gradle │ ├── lib │ ├── gradle.properties │ └── src │ │ └── main │ │ └── proto │ │ └── DicomConfig.proto │ ├── settings.gradle │ ├── build.gradle │ ├── README.md │ └── gradlew.bat ├── mae ├── images │ └── firewall_rule.png ├── docker │ ├── xstartup │ └── Dockerfile ├── BUILD ├── remove_invalid_characters.py ├── bq_to_xml.py └── txt_to_xml.py ├── README.md ├── requirements ├── requirements.txt └── BUILD ├── six.BUILD ├── mist ├── docker │ ├── cloudbuild.yaml │ └── Dockerfile ├── bigquery_to_gcs.py ├── gcs_to_bigquery.py ├── bigquery_to_gcs_lib_test.py ├── run_mist.py ├── README.md ├── gcs_to_bigquery_lib_test.py ├── bigquery_to_gcs_lib.py ├── BUILD └── gcs_to_bigquery_lib.py ├── CONTRIBUTING.md ├── setup.py └── WORKSPACE /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dlp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bazel-* 2 | *.pyc -------------------------------------------------------------------------------- /deid_app/backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /physionet/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /offline_tools/redactor/.gitignore: -------------------------------------------------------------------------------- 1 | .gradle -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-demo.component.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/upload-notes/upload-notes.component.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-text-demo/dlp-text-demo.component.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /physionet/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM google/cloud-sdk:latest 2 | 3 | ADD . / 4 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/bigquery-new-table.css: -------------------------------------------------------------------------------- 1 | mat-form-field { 2 | width: 100%; 3 | } 4 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/bigquery-table.css: -------------------------------------------------------------------------------- 1 | mat-form-field { 2 | width: 100%; 3 | } 4 | 5 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/current-jobs/current-jobs.component.css: -------------------------------------------------------------------------------- 1 | table { 2 | width: 100%; 3 | } 4 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/upload-notes/upload-notes.component.html: -------------------------------------------------------------------------------- 1 |

2 | upload-notes works! 3 |

4 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-text-demo/dlp-text-demo.component.html: -------------------------------------------------------------------------------- 1 |

2 | dlp-text-demo works! 3 |

4 | -------------------------------------------------------------------------------- /deid_app/frontend/src/environments/environment.prod.ts: -------------------------------------------------------------------------------- 1 | export const environment = { 2 | production: true 3 | }; 4 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/run-deidentify/run-deidentify.component.css: -------------------------------------------------------------------------------- 1 | mat-form-field { 2 | width: 100%; 3 | } 4 | -------------------------------------------------------------------------------- /mae/images/firewall_rule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/healthcare-deid/HEAD/mae/images/firewall_rule.png -------------------------------------------------------------------------------- /deid_app/frontend/src/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/healthcare-deid/HEAD/deid_app/frontend/src/favicon.ico -------------------------------------------------------------------------------- /dlp/testdata/input.csv: -------------------------------------------------------------------------------- 1 | name,patient_id,record_number,note,comments 2 | Bruce,222,1,Mr. Banner lives in Manhattan,Follow up at 1-212-555-1234 3 | -------------------------------------------------------------------------------- /deid_app/frontend/proxy.conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "/api": { 3 | "target": "http://localhost:5000", 4 | "secure": false, 5 | "changeOrigin": true 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /deid_app/frontend/src/styles.css: -------------------------------------------------------------------------------- 1 | @import '~@angular/material/prebuilt-themes/indigo-pink.css'; 2 | html, 3 | body { 4 | font-family: "Open Sans", sans-serif; 5 | } 6 | -------------------------------------------------------------------------------- /offline_tools/redactor/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/healthcare-deid/HEAD/offline_tools/redactor/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval-stats/eval-stats.component.css: -------------------------------------------------------------------------------- 1 | table, mat-form-field { 2 | width: 100%; 3 | } 4 | 5 | mat-cell, .mat-cell { 6 | padding: 12px 16px 12px 0; 7 | } 8 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/app.component.css: -------------------------------------------------------------------------------- 1 | .title { 2 | margin: 0 auto; 3 | } 4 | 5 | mat-sidenav { 6 | width: 300px; 7 | } 8 | 9 | mat-button-toggle-group { 10 | width: 100% 11 | } 12 | 13 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/display-option.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Represents an option available in a select element. 3 | */ 4 | export interface DisplayOption { 5 | value: string; 6 | displayString: string; 7 | } 8 | -------------------------------------------------------------------------------- /offline_tools/redactor/examples/tag_remover/src/test/resources/basic.dcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/healthcare-deid/HEAD/offline_tools/redactor/examples/tag_remover/src/test/resources/basic.dcm -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-demo.component.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 |
8 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/csv-upload/csv-upload.component.css: -------------------------------------------------------------------------------- 1 | mat-form-field { 2 | width: 100%; 3 | } 4 | 5 | .upload-item { 6 | display: flex; 7 | align-items: center; 8 | justify-content: center; 9 | } 10 | -------------------------------------------------------------------------------- /offline_tools/redactor/examples/tag_remover/src/test/resources/basic-redacted.dcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleCloudPlatform/healthcare-deid/HEAD/offline_tools/redactor/examples/tag_remover/src/test/resources/basic-redacted.dcm -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Healthcare De-Id 2 | 3 | This project contains tools to run various tools for de-identifying medical 4 | records on Google Cloud Platform. 5 | 6 | For example, see physionet/README.md for info on running PhysioNet De-Id. 7 | -------------------------------------------------------------------------------- /deid_app/frontend/src/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../out-tsc/app", 5 | "types": [] 6 | }, 7 | "exclude": [ 8 | "test.ts", 9 | "**/*.spec.ts" 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /offline_tools/redactor/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /deid_app/frontend/e2e/src/app.po.ts: -------------------------------------------------------------------------------- 1 | import { browser, by, element } from 'protractor'; 2 | 3 | export class AppPage { 4 | navigateTo() { 5 | return browser.get('/'); 6 | } 7 | 8 | getParagraphText() { 9 | return element(by.css('app-root h1')).getText(); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /deid_app/frontend/e2e/tsconfig.e2e.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../out-tsc/app", 5 | "module": "commonjs", 6 | "target": "es5", 7 | "types": [ 8 | "jasmine", 9 | "jasminewd2", 10 | "node" 11 | ] 12 | } 13 | } -------------------------------------------------------------------------------- /deid_app/frontend/src/app/app.component.ts: -------------------------------------------------------------------------------- 1 | import {Component} from '@angular/core'; 2 | 3 | @Component({ 4 | selector: 'app-root', 5 | templateUrl: './app.component.html', 6 | styleUrls: ['./app.component.css'] 7 | }) 8 | export class AppComponent { 9 | title = 'Healthcare Deid'; 10 | } 11 | -------------------------------------------------------------------------------- /dlp/mae_testdata/sample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/submit_component.css: -------------------------------------------------------------------------------- 1 | button.pipeline-submit { 2 | margin: 10px 0; 3 | width: 40%; 4 | } 5 | 6 | div.pipeline-submit { 7 | display: flex; 8 | align-items: center; 9 | justify-content: center; 10 | } 11 | 12 | .pipeline-spinner { 13 | margin-bottom: 10px; 14 | } 15 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/deid_job.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Represents a Deid job along with the information needed for that job. 3 | */ 4 | export interface DeidJob { 5 | id: number; 6 | name: string; 7 | originalQuery: string; 8 | deidTable: string; 9 | status?: number; 10 | logTrace?: string; 11 | timestamp: Date; 12 | } 13 | -------------------------------------------------------------------------------- /offline_tools/redactor/lib/gradle.properties: -------------------------------------------------------------------------------- 1 | group=com.google.cloud.healthcare 2 | name=offline-dicom-redactor 3 | version=1.0.0-SNAPSHOT 4 | 5 | signing.keyId= 6 | signing.password= 7 | signing.secretKeyRingFile= 8 | 9 | ossrhUsername= 10 | ossrhPassword= -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | apache_beam 2 | google-apitools==0.5.26 # Newer versions are incompatible with six 1.10.0. 3 | google-auth-httplib2 4 | google-cloud-storage 5 | fastavro 6 | flask 7 | sqlalchemy 8 | flask_sqlalchemy 9 | jsonschema 10 | pymysql 11 | six==1.10.0 12 | jinja2 13 | markupsafe 14 | click 15 | pyarrow 16 | attrs 17 | pyrsistent 18 | -------------------------------------------------------------------------------- /deid_app/frontend/src/tsconfig.spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../out-tsc/spec", 5 | "types": [ 6 | "jasmine", 7 | "node" 8 | ] 9 | }, 10 | "files": [ 11 | "test.ts", 12 | "polyfills.ts" 13 | ], 14 | "include": [ 15 | "**/*.spec.ts", 16 | "**/*.d.ts" 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /six.BUILD: -------------------------------------------------------------------------------- 1 | # Python 2 and 3 compatibility utils. https://pypi.python.org/pypi/six 2 | 3 | genrule( 4 | name = "copy_six", 5 | srcs = ["six-1.10.0/six.py"], 6 | outs = ["six.py"], 7 | cmd = "cp $< $(@)", 8 | ) 9 | 10 | py_library( 11 | name = "six", 12 | srcs = ["six.py"], 13 | srcs_version = "PY2AND3", 14 | visibility = ["//visibility:public"], 15 | ) 16 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/routing.module.spec.ts: -------------------------------------------------------------------------------- 1 | import { RoutingModule } from './routing.module'; 2 | 3 | describe('RoutingModule', () => { 4 | let routingModule: RoutingModule; 5 | 6 | beforeEach(() => { 7 | routingModule = new RoutingModule(); 8 | }); 9 | 10 | it('should create an instance', () => { 11 | expect(routingModule).toBeTruthy(); 12 | }); 13 | }); 14 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval-pipeline/eval-pipeline.component.css: -------------------------------------------------------------------------------- 1 | mat-form-field { 2 | width: 100%; 3 | } 4 | 5 | button.pipeline-submit { 6 | margin: 10px 0; 7 | width: 40%; 8 | } 9 | 10 | div.pipeline-submit { 11 | display: flex; 12 | align-items: center; 13 | justify-content: center; 14 | } 15 | 16 | .pipeline-spinner { 17 | margin-bottom: 10px; 18 | } 19 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-demo.component.ts: -------------------------------------------------------------------------------- 1 | import { Component, OnInit } from '@angular/core'; 2 | 3 | @Component({ 4 | selector: 'app-dlp-demo', 5 | templateUrl: './dlp-demo.component.html', 6 | styleUrls: ['./dlp-demo.component.css'] 7 | }) 8 | export class DlpDemoComponent implements OnInit { 9 | 10 | constructor() { } 11 | 12 | ngOnInit() { 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /mae/docker/xstartup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export XKL_XMODMAP_DISABLE=1 4 | unset SESSION_MANAGER 5 | unset DBUS_SESSION_BUS_ADDRESS 6 | 7 | [ -x /etc/vnc/xstartup ] && exec /etc/vnc/xstartup 8 | [ -r $HOME/.Xresources ] && xrdb $HOME/.Xresources 9 | xsetroot -solid grey 10 | vncconfig -iconic & 11 | 12 | gnome-panel & 13 | gnome-settings-daemon & 14 | metacity & 15 | nautilus & 16 | gnome-terminal & -------------------------------------------------------------------------------- /deid_app/frontend/e2e/src/app.e2e-spec.ts: -------------------------------------------------------------------------------- 1 | import { AppPage } from './app.po'; 2 | 3 | describe('workspace-project App', () => { 4 | let page: AppPage; 5 | 6 | beforeEach(() => { 7 | page = new AppPage(); 8 | }); 9 | 10 | it('should display welcome message', () => { 11 | page.navigateTo(); 12 | expect(page.getParagraphText()).toEqual('Welcome to frontend!'); 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/material.module.spec.ts: -------------------------------------------------------------------------------- 1 | import { AppMaterialModule } from './material.module'; 2 | 3 | describe('AppMaterialModule', () => { 4 | let materialModule: AppMaterialModule; 5 | 6 | beforeEach(() => { 7 | materialModule = new AppMaterialModule(); 8 | }); 9 | 10 | it('should create an instance', () => { 11 | expect(materialModule).toBeTruthy(); 12 | }); 13 | }); 14 | -------------------------------------------------------------------------------- /deid_app/frontend/src/tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tslint.json", 3 | "rules": { 4 | "directive-selector": [ 5 | true, 6 | "attribute", 7 | "app", 8 | "camelCase" 9 | ], 10 | "component-selector": [ 11 | true, 12 | "element", 13 | "app", 14 | "kebab-case" 15 | ] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/upload-notes/upload-notes.component.ts: -------------------------------------------------------------------------------- 1 | import { Component, OnInit } from '@angular/core'; 2 | 3 | @Component({ 4 | selector: 'app-upload-notes', 5 | templateUrl: './upload-notes.component.html', 6 | styleUrls: ['./upload-notes.component.css'] 7 | }) 8 | export class UploadNotesComponent implements OnInit { 9 | 10 | constructor() { } 11 | 12 | ngOnInit() { 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-text-demo/dlp-text-demo.component.ts: -------------------------------------------------------------------------------- 1 | import { Component, OnInit } from '@angular/core'; 2 | 3 | @Component({ 4 | selector: 'app-dlp-text-demo', 5 | templateUrl: './dlp-text-demo.component.html', 6 | styleUrls: ['./dlp-text-demo.component.css'] 7 | }) 8 | export class DlpTextDemoComponent implements OnInit { 9 | 10 | constructor() { } 11 | 12 | ngOnInit() { 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/evaluate.component.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Compare manual and automatic labeling 11 | 12 | 13 |
14 | 15 | -------------------------------------------------------------------------------- /deid_app/frontend/src/browserslist: -------------------------------------------------------------------------------- 1 | # This file is currently used by autoprefixer to adjust CSS to support the below specified browsers 2 | # For additional information regarding the format and rule options, please see: 3 | # https://github.com/browserslist/browserslist#queries 4 | # 5 | # For IE 9-11 support, please remove 'not' from the last line of the file and adjust as needed 6 | 7 | > 0.5% 8 | last 2 versions 9 | Firefox ESR 10 | not dead 11 | not IE 9-11 -------------------------------------------------------------------------------- /deid_app/frontend/src/main.ts: -------------------------------------------------------------------------------- 1 | import 'hammerjs'; 2 | 3 | import {enableProdMode} from '@angular/core'; 4 | import {platformBrowserDynamic} from '@angular/platform-browser-dynamic'; 5 | 6 | import {AppModule} from './app/app.module'; 7 | import {environment} from './environments/environment'; 8 | 9 | if (environment.production) { 10 | enableProdMode(); 11 | } 12 | 13 | platformBrowserDynamic().bootstrapModule(AppModule).catch( 14 | err => console.log(err)); 15 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/compare-data/compare-data.component.css: -------------------------------------------------------------------------------- 1 | #compare-text-row { 2 | margin: 10px; 3 | display:flex; 4 | flex-direction:row; 5 | justify-content: space-around; 6 | } 7 | 8 | .compare-text-col { 9 | display: flex; 10 | flex-direction: column; 11 | justify-content: space-around; 12 | width: 45%; 13 | background-color: white; 14 | padding: 10px; 15 | } 16 | 17 | mat-form-field { 18 | width: 100%; 19 | } 20 | 21 | .data { 22 | flex-direction: row; 23 | } 24 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/submit_component.ts: -------------------------------------------------------------------------------- 1 | import {Component, Input, OnInit} from '@angular/core'; 2 | import {FormGroup} from '@angular/forms'; 3 | 4 | /** 5 | * Submit with spinner while running. 6 | */ 7 | @Component({ 8 | selector: 'app-submit', 9 | templateUrl: './submit_component.html', 10 | styleUrls: [ 11 | './submit_component.css', 12 | ] 13 | }) 14 | export class SubmitComponent { 15 | @Input() submitPlaceholder: string; 16 | @Input() submitForm: FormGroup; 17 | waiting = false; 18 | } 19 | -------------------------------------------------------------------------------- /deid_app/frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compileOnSave": false, 3 | "compilerOptions": { 4 | "baseUrl": "./", 5 | "outDir": "./dist/out-tsc", 6 | "sourceMap": true, 7 | "declaration": false, 8 | "module": "es2015", 9 | "moduleResolution": "node", 10 | "emitDecoratorMetadata": true, 11 | "experimentalDecorators": true, 12 | "target": "es5", 13 | "typeRoots": [ 14 | "node_modules/@types" 15 | ], 16 | "lib": [ 17 | "es2017", 18 | "dom" 19 | ] 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/deidentify.component.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/submit_component.html: -------------------------------------------------------------------------------- 1 |
2 | 9 |
10 |
11 | 15 | 16 |
17 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/evaluate.component.css: -------------------------------------------------------------------------------- 1 | input[type=number]::-webkit-inner-spin-button, 2 | input[type=number]::-webkit-outer-spin-button { 3 | -webkit-appearance: none; 4 | margin: 0; 5 | } 6 | 7 | .dlp-container { 8 | margin: 20px auto; 9 | padding: 0 15px; 10 | background-color: white; 11 | } 12 | @media (min-width: 768px) { 13 | .dlp-container { 14 | width: 750px; 15 | } 16 | } 17 | @media (min-width: 992px) { 18 | .dlp-container { 19 | width: 970px; 20 | } 21 | 22 | } 23 | @media (min-width: 1200px) { 24 | .dlp-container { 25 | width: 1170px; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/evaluate.component.ts: -------------------------------------------------------------------------------- 1 | import {Component, OnInit} from '@angular/core'; 2 | import {DlpDemoService} from '../services/dlp-demo.service'; 3 | 4 | @Component({ 5 | selector: 'app-evaluate', 6 | templateUrl: './evaluate.component.html', 7 | styleUrls: ['./evaluate.component.css'] 8 | }) 9 | export class EvaluateComponent implements OnInit { 10 | constructor(private dlpDemoService: DlpDemoService) {} 11 | 12 | ngOnInit() { 13 | this.dlpDemoService.refreshEvalJobs(); 14 | this.dlpDemoService.refreshDatasets(); 15 | this.dlpDemoService.refreshProject(); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/deidentify.component.ts: -------------------------------------------------------------------------------- 1 | import {Component, OnInit} from '@angular/core'; 2 | import {DlpDemoService} from '../services/dlp-demo.service'; 3 | 4 | @Component({ 5 | selector: 'app-deidentify', 6 | templateUrl: './deidentify.component.html', 7 | styleUrls: ['./deidentify.component.css'] 8 | }) 9 | export class DeidentifyComponent implements OnInit { 10 | constructor(private dlpDemoService: DlpDemoService) {} 11 | 12 | ngOnInit() { 13 | this.dlpDemoService.refreshDeidJobs(); 14 | this.dlpDemoService.refreshDatasets(); 15 | this.dlpDemoService.refreshProject(); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /dlp/mae_testdata/sample.dtd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval_job.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Represents an Eval job along with the information needed for that job. 3 | */ 4 | export interface EvalJob { 5 | id: number; 6 | name: string; 7 | findings: string; 8 | goldens: string; 9 | stats: string; 10 | debug: string; 11 | status: number; 12 | logTrace: string; 13 | timestamp: Date; 14 | } 15 | 16 | /** 17 | * Represents an Eval stats entry. 18 | */ 19 | export interface EvalStats { 20 | infoType: string; 21 | recall?: number; 22 | precision?: number; 23 | fScore?: number; 24 | truePositives?: number; 25 | falsePositives?: number; 26 | falseNegatives?: number; 27 | timestamp: Date; 28 | } 29 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/deidentify.component.css: -------------------------------------------------------------------------------- 1 | input[type=number]::-webkit-inner-spin-button, 2 | input[type=number]::-webkit-outer-spin-button { 3 | -webkit-appearance: none; 4 | margin: 0; 5 | } 6 | 7 | .dlp-container { 8 | margin: 20px 0; 9 | padding-right: 15px; 10 | padding-left: 15px; 11 | margin-right: auto; 12 | margin-left: auto; 13 | background-color: white; 14 | } 15 | @media (min-width: 768px) { 16 | .dlp-container { 17 | width: 750px; 18 | } 19 | } 20 | @media (min-width: 992px) { 21 | .dlp-container { 22 | width: 970px; 23 | } 24 | 25 | } 26 | @media (min-width: 1200px) { 27 | .dlp-container { 28 | width: 1170px; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /deid_app/frontend/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Deid App 6 | 7 | 8 | 9 | 10 | 12 | 14 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /mist/docker/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Google Cloud Build config for building a docker container with 2 | # MIST. Sample usage: 3 | # gcloud builds submit . --config=cloudbuild.yaml \ 4 | # --project=${PROJECTNAME?} 5 | 6 | steps: 7 | - name: 'ubuntu' 8 | args: ['mkdir', 'docker-build'] 9 | - name: 'ubuntu' 10 | args: ['cp', 'Dockerfile', 'docker-build/'] 11 | - name: 'gcr.io/cloud-builders/docker' 12 | args: ['build', '-t', 'gcr.io/${PROJECT_ID}/mist:latest', 'docker-build/'] 13 | - name: 'gcr.io/cloud-builders/docker' 14 | args: ['build', '-t', 'gcr.io/${PROJECT_ID}/mist:${BUILD_ID}', 'docker-build/'] 15 | 16 | images: 17 | - 'gcr.io/${PROJECT_ID}/mist:latest' 18 | - 'gcr.io/${PROJECT_ID}/mist:${BUILD_ID}' 19 | -------------------------------------------------------------------------------- /deid_app/frontend/src/environments/environment.ts: -------------------------------------------------------------------------------- 1 | // This file can be replaced during build by using the `fileReplacements` array. 2 | // `ng build ---prod` replaces `environment.ts` with `environment.prod.ts`. 3 | // The list of file replacements can be found in `angular.json`. 4 | 5 | export const environment = { 6 | production: false, 7 | }; 8 | 9 | /* 10 | * In development mode, for easier debugging, you can ignore zone related error 11 | * stack frames such as `zone.run`/`zoneDelegate.invokeTask` by importing the 12 | * below file. Don't forget to comment it out in production mode 13 | * because it will have a performance impact when errors are thrown 14 | */ 15 | // import 'zone.js/dist/zone-error'; // Included with Angular CLI. 16 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/bigquery-table.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 5 | 7 | {{dataset}} 8 | 9 | 10 | 11 | 12 | 13 | 15 | 17 | {{table}} 18 | 19 | 20 | 21 |
22 | -------------------------------------------------------------------------------- /requirements/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | licenses(["notice"]) # Apache License 2.0 16 | 17 | exports_files(["requirements.txt"]) 18 | -------------------------------------------------------------------------------- /offline_tools/redactor/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | rootProject.name = 'redactor' 18 | include 'lib', 'examples:tag_remover' -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval-stats/eval-stats.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 2 | 3 | import {EvalStatsComponent} from './eval-stats.component'; 4 | 5 | describe('EvalStatsComponent', () => { 6 | let component: EvalStatsComponent; 7 | let fixture: ComponentFixture; 8 | 9 | beforeEach(async(() => { 10 | TestBed.configureTestingModule({declarations: [EvalStatsComponent]}) 11 | .compileComponents(); 12 | })); 13 | 14 | beforeEach(() => { 15 | fixture = TestBed.createComponent(EvalStatsComponent); 16 | component = fixture.componentInstance; 17 | fixture.detectChanges(); 18 | }); 19 | 20 | it('should create', () => { 21 | expect(component).toBeTruthy(); 22 | }); 23 | }); 24 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/csv-upload/csv-upload.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 2 | 3 | import {CsvUploadComponent} from './csv-upload.component'; 4 | 5 | describe('CsvUploadComponent', () => { 6 | let component: CsvUploadComponent; 7 | let fixture: ComponentFixture; 8 | 9 | beforeEach(async(() => { 10 | TestBed.configureTestingModule({declarations: [CsvUploadComponent]}) 11 | .compileComponents(); 12 | })); 13 | 14 | beforeEach(() => { 15 | fixture = TestBed.createComponent(CsvUploadComponent); 16 | component = fixture.componentInstance; 17 | fixture.detectChanges(); 18 | }); 19 | 20 | it('should create', () => { 21 | expect(component).toBeTruthy(); 22 | }); 23 | }); 24 | -------------------------------------------------------------------------------- /deid_app/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Build rules for tools that run Healthcare DeID on GCP. 16 | 17 | licenses(["notice"]) # Apache License 2.0 18 | 19 | exports_files(["LICENSE"]) 20 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/routing.module.ts: -------------------------------------------------------------------------------- 1 | import { NgModule } from '@angular/core'; 2 | import { RouterModule, Routes } from '@angular/router'; 3 | 4 | import { DeidentifyComponent } from '../deidentify/deidentify.component'; 5 | import { EvaluateComponent } from '../evaluate/evaluate.component'; 6 | import { DlpDemoComponent } from '../dlp-demo/dlp-demo.component'; 7 | 8 | const routes: Routes = [ 9 | { path: '', redirectTo: '/dlpdemo', pathMatch: 'full' }, 10 | { path: 'dlpdemo', component: DlpDemoComponent }, 11 | { path: 'deidentify', component: DeidentifyComponent }, 12 | { path: 'evaluate', component: EvaluateComponent }, 13 | ]; 14 | 15 | @NgModule({ 16 | imports: [ 17 | RouterModule.forRoot(routes) 18 | ], 19 | exports: [ RouterModule ], 20 | }) 21 | export class RoutingModule { } 22 | 23 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval-pipeline/eval-pipeline.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 2 | 3 | import {EvalPipelineComponent} from './eval-pipeline.component'; 4 | 5 | describe('EvalPipelineComponent', () => { 6 | let component: EvalPipelineComponent; 7 | let fixture: ComponentFixture; 8 | 9 | beforeEach(async(() => { 10 | TestBed.configureTestingModule({declarations: [EvalPipelineComponent]}) 11 | .compileComponents(); 12 | })); 13 | 14 | beforeEach(() => { 15 | fixture = TestBed.createComponent(EvalPipelineComponent); 16 | component = fixture.componentInstance; 17 | fixture.detectChanges(); 18 | }); 19 | 20 | it('should create', () => { 21 | expect(component).toBeTruthy(); 22 | }); 23 | }); 24 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/compare-data/compare-data.component.spec.ts: -------------------------------------------------------------------------------- 1 | import { async, ComponentFixture, TestBed } from '@angular/core/testing'; 2 | 3 | import { CompareDataComponent } from './compare-data.component'; 4 | 5 | describe('CompareDataComponent', () => { 6 | let component: CompareDataComponent; 7 | let fixture: ComponentFixture; 8 | 9 | beforeEach(async(() => { 10 | TestBed.configureTestingModule({ 11 | declarations: [ CompareDataComponent ] 12 | }) 13 | .compileComponents(); 14 | })); 15 | 16 | beforeEach(() => { 17 | fixture = TestBed.createComponent(CompareDataComponent); 18 | component = fixture.componentInstance; 19 | fixture.detectChanges(); 20 | }); 21 | 22 | it('should create', () => { 23 | expect(component).toBeTruthy(); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/upload-notes/upload-notes.component.spec.ts: -------------------------------------------------------------------------------- 1 | import { async, ComponentFixture, TestBed } from '@angular/core/testing'; 2 | 3 | import { UploadNotesComponent } from './upload-notes.component'; 4 | 5 | describe('UploadNotesComponent', () => { 6 | let component: UploadNotesComponent; 7 | let fixture: ComponentFixture; 8 | 9 | beforeEach(async(() => { 10 | TestBed.configureTestingModule({ 11 | declarations: [ UploadNotesComponent ] 12 | }) 13 | .compileComponents(); 14 | })); 15 | 16 | beforeEach(() => { 17 | fixture = TestBed.createComponent(UploadNotesComponent); 18 | component = fixture.componentInstance; 19 | fixture.detectChanges(); 20 | }); 21 | 22 | it('should create', () => { 23 | expect(component).toBeTruthy(); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-text-demo/dlp-text-demo.component.spec.ts: -------------------------------------------------------------------------------- 1 | import { async, ComponentFixture, TestBed } from '@angular/core/testing'; 2 | 3 | import { DlpTextDemoComponent } from './dlp-text-demo.component'; 4 | 5 | describe('DlpTextDemoComponent', () => { 6 | let component: DlpTextDemoComponent; 7 | let fixture: ComponentFixture; 8 | 9 | beforeEach(async(() => { 10 | TestBed.configureTestingModule({ 11 | declarations: [ DlpTextDemoComponent ] 12 | }) 13 | .compileComponents(); 14 | })); 15 | 16 | beforeEach(() => { 17 | fixture = TestBed.createComponent(DlpTextDemoComponent); 18 | component = fixture.componentInstance; 19 | fixture.detectChanges(); 20 | }); 21 | 22 | it('should create', () => { 23 | expect(component).toBeTruthy(); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /deid_app/frontend/src/test.ts: -------------------------------------------------------------------------------- 1 | // This file is required by karma.conf.js and loads recursively all the .spec and framework files 2 | 3 | import 'zone.js/dist/zone-testing'; 4 | import { getTestBed } from '@angular/core/testing'; 5 | import { 6 | BrowserDynamicTestingModule, 7 | platformBrowserDynamicTesting, 8 | } from '@angular/platform-browser-dynamic/testing'; 9 | 10 | // tslint:disable-next-line:no-any This is a generated file required by karma.conf.js 11 | declare const require: any; 12 | 13 | // First, initialize the Angular testing environment. 14 | getTestBed().initTestEnvironment( 15 | BrowserDynamicTestingModule, 16 | platformBrowserDynamicTesting() 17 | ); 18 | // Then we find all the tests. 19 | const context = require.context('./', true, /\.spec\.ts$/); 20 | // And load the modules. 21 | context.keys().map(context); 22 | -------------------------------------------------------------------------------- /eval/testdata/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | package( 16 | default_visibility = ["//visibility:public"], 17 | ) 18 | 19 | licenses(["notice"]) # Apache 2.0 20 | 21 | exports_files([ 22 | "expected_results", 23 | ]) 24 | -------------------------------------------------------------------------------- /offline_tools/redactor/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | allprojects { 18 | repositories { 19 | google() 20 | jcenter() 21 | mavenCentral() 22 | maven { url 'http://www.dcm4che.org/maven2/' } 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /dlp/mae_testdata/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | package( 16 | default_visibility = ["//visibility:public"], 17 | ) 18 | 19 | licenses(["notice"]) # Apache 2.0 20 | 21 | exports_files([ 22 | "sample.xml", 23 | "sample.dtd", 24 | ]) 25 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-demo.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {NO_ERRORS_SCHEMA} from '@angular/core'; 2 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 3 | 4 | import {DlpDemoComponent} from './dlp-demo.component'; 5 | 6 | describe('DlpDemoComponent', () => { 7 | let component: DlpDemoComponent; 8 | let fixture: ComponentFixture; 9 | 10 | beforeEach(async(() => { 11 | TestBed 12 | .configureTestingModule( 13 | {declarations: [DlpDemoComponent], schemas: [NO_ERRORS_SCHEMA]}) 14 | .compileComponents(); 15 | })); 16 | 17 | beforeEach(() => { 18 | fixture = TestBed.createComponent(DlpDemoComponent); 19 | component = fixture.componentInstance; 20 | fixture.detectChanges(); 21 | }); 22 | 23 | it('should create', () => { 24 | expect(component).toBeTruthy(); 25 | }); 26 | }); 27 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/evaluate.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {NO_ERRORS_SCHEMA} from '@angular/core'; 2 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 3 | 4 | import {EvaluateComponent} from './evaluate.component'; 5 | 6 | describe('EvaluateComponent', () => { 7 | let component: EvaluateComponent; 8 | let fixture: ComponentFixture; 9 | 10 | beforeEach(async(() => { 11 | TestBed 12 | .configureTestingModule( 13 | {declarations: [EvaluateComponent], schemas: [NO_ERRORS_SCHEMA]}) 14 | .compileComponents(); 15 | })); 16 | 17 | beforeEach(() => { 18 | fixture = TestBed.createComponent(EvaluateComponent); 19 | component = fixture.componentInstance; 20 | fixture.detectChanges(); 21 | }); 22 | 23 | it('should create', () => { 24 | expect(component).toBeTruthy(); 25 | }); 26 | }); 27 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-image-demo/dlp-image-demo.component.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 9 |
10 |
11 | 12 |
13 |
14 |

Original Image

15 |
16 | 17 |
18 |
19 |
20 |

Redacted Image

21 |
22 | 23 |
24 |
25 |
26 | 27 | -------------------------------------------------------------------------------- /deid_app/frontend/e2e/protractor.conf.js: -------------------------------------------------------------------------------- 1 | // Protractor configuration file, see link for more information 2 | // https://github.com/angular/protractor/blob/master/lib/config.ts 3 | 4 | const { SpecReporter } = require('jasmine-spec-reporter'); 5 | 6 | exports.config = { 7 | allScriptsTimeout: 11000, 8 | specs: [ 9 | './src/**/*.e2e-spec.ts' 10 | ], 11 | capabilities: { 12 | 'browserName': 'chrome' 13 | }, 14 | directConnect: true, 15 | baseUrl: 'http://localhost:4200/', 16 | framework: 'jasmine', 17 | jasmineNodeOpts: { 18 | showColors: true, 19 | defaultTimeoutInterval: 30000, 20 | print: function() {} 21 | }, 22 | onPrepare() { 23 | require('ts-node').register({ 24 | project: require('path').join(__dirname, './tsconfig.e2e.json') 25 | }); 26 | jasmine.getEnv().addReporter(new SpecReporter({ spec: { displayStacktrace: true } })); 27 | } 28 | }; -------------------------------------------------------------------------------- /deid_app/frontend/src/common/bigquery-new-table.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 5 | 7 | {{dataset}} 8 | 9 | 10 | 11 | 12 | 13 | 18 | 19 | 21 | {{table}} 22 | 23 | 24 | 25 |
26 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/deidentify.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {NO_ERRORS_SCHEMA} from '@angular/core'; 2 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 3 | 4 | import {DeidentifyComponent} from './deidentify.component'; 5 | 6 | describe('DeidentifyComponent', () => { 7 | let component: DeidentifyComponent; 8 | let fixture: ComponentFixture; 9 | 10 | beforeEach(async(() => { 11 | TestBed 12 | .configureTestingModule( 13 | {declarations: [DeidentifyComponent], schemas: [NO_ERRORS_SCHEMA]}) 14 | .compileComponents(); 15 | })); 16 | 17 | beforeEach(() => { 18 | fixture = TestBed.createComponent(DeidentifyComponent); 19 | component = fixture.componentInstance; 20 | fixture.detectChanges(); 21 | }); 22 | 23 | it('should create', () => { 24 | expect(component).toBeTruthy(); 25 | }); 26 | }); 27 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/app.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {NO_ERRORS_SCHEMA} from '@angular/core'; 2 | import {async, TestBed} from '@angular/core/testing'; 3 | 4 | import {AppComponent} from './app.component'; 5 | 6 | describe('AppComponent', () => { 7 | beforeEach(async(() => { 8 | TestBed 9 | .configureTestingModule( 10 | {declarations: [AppComponent], schemas: [NO_ERRORS_SCHEMA]}) 11 | .compileComponents(); 12 | })); 13 | 14 | it('should create the app', async(() => { 15 | const fixture = TestBed.createComponent(AppComponent); 16 | const app = fixture.debugElement.componentInstance; 17 | expect(app).toBeTruthy(); 18 | })); 19 | 20 | it(`should have as title 'healthcare deid'`, async(() => { 21 | const fixture = TestBed.createComponent(AppComponent); 22 | const app = fixture.debugElement.componentInstance; 23 | expect(app.title).toEqual('Healthcare Deid'); 24 | })); 25 | }); 26 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-image-demo/dlp-image-demo.component.css: -------------------------------------------------------------------------------- 1 | #dlp-demo-image-row{ 2 | display:flex; 3 | flex-direction:row; 4 | justify-content: space-around; 5 | height: 500px; 6 | } 7 | 8 | .dlp-img-column { 9 | display: flex; 10 | flex-direction: column; 11 | justify-content: space-around; 12 | width: 40%; 13 | } 14 | 15 | .dlp-demo-img { 16 | margin: 20px; 17 | display: flex; 18 | width: 100%; 19 | height: 100%; 20 | flex-direction: column; 21 | border-radius: 25px; 22 | background-color: #E8E8E8; 23 | } 24 | 25 | .dlp-demo-img > img { 26 | object-fit: scale-down; 27 | width: 100%; 28 | height: 100%; 29 | border-radius: 25px; 30 | border: 2px solid; 31 | } 32 | 33 | .top-bar-container { 34 | margin-top: 10px; 35 | display: flex; 36 | flex-direction: row; 37 | justify-content: space-between; 38 | place-items: baseline; 39 | width: 100%; 40 | font-weight: 300; 41 | } 42 | 43 | .top-bar-item { 44 | margin-left: 20px; 45 | } 46 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/app.component.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Dlp Demo 6 | 7 | 8 | Deidentify 9 | 10 | 11 | Evaluate 12 | 13 | 14 | 15 | 16 | 17 | 18 | 21 |
22 | {{title}} 23 |
24 |
25 | 26 |
27 |
28 | 29 | -------------------------------------------------------------------------------- /dlp/testdata/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | package( 16 | default_visibility = ["//visibility:public"], 17 | ) 18 | 19 | licenses(["notice"]) # Apache 2.0 20 | 21 | exports_files([ 22 | "batch_config.json", 23 | "batch_request.json", 24 | "config.json", 25 | "multi_column_config.json", 26 | "multi_column_request.json", 27 | "request.json", 28 | "input.csv", 29 | ]) 30 | -------------------------------------------------------------------------------- /deid_app/frontend/README.md: -------------------------------------------------------------------------------- 1 | # Frontend 2 | 3 | This project was generated with [Angular CLI](https://github.com/angular/angular-cli) version 6.1.5. 4 | 5 | ## Development server 6 | 7 | Run `ng serve` for a dev server. Navigate to `http://localhost:4200/`. The app will automatically reload if you change any of the source files. 8 | 9 | ## Code scaffolding 10 | 11 | Run `ng generate component component-name` to generate a new component. You can also use `ng generate directive|pipe|service|class|guard|interface|enum|module`. 12 | 13 | ## Build 14 | 15 | Run `ng build` to build the project. The build artifacts will be stored in the `dist/` directory. Use the `--prod` flag for a production build. 16 | 17 | ## Running unit tests 18 | 19 | Run `ng test` to execute the unit tests via [Karma](https://karma-runner.github.io). 20 | 21 | ## Running end-to-end tests 22 | 23 | Run `ng e2e` to execute the end-to-end tests via [Protractor](http://www.protractortest.org/). 24 | 25 | ## Further help 26 | 27 | To get more help on the Angular CLI use `ng help` or go check out the [Angular CLI README](https://github.com/angular/angular-cli/blob/master/README.md). 28 | -------------------------------------------------------------------------------- /deid_app/frontend/src/karma.conf.js: -------------------------------------------------------------------------------- 1 | // Karma configuration file, see link for more information 2 | // https://karma-runner.github.io/1.0/config/configuration-file.html 3 | 4 | /** 5 | * @param {JSON!} config configuration file for testing. 6 | */ 7 | module.exports = function (config) { 8 | config.set({ 9 | basePath: '', 10 | frameworks: ['jasmine', '@angular-devkit/build-angular'], 11 | plugins: [ 12 | require('karma-jasmine'), 13 | require('karma-chrome-launcher'), 14 | require('karma-jasmine-html-reporter'), 15 | require('karma-coverage-istanbul-reporter'), 16 | require('@angular-devkit/build-angular/plugins/karma') 17 | ], 18 | client: { 19 | clearContext: false // leave Jasmine Spec Runner output visible in browser 20 | }, 21 | coverageIstanbulReporter: { 22 | dir: require('path').join(__dirname, '../coverage'), 23 | reports: ['html', 'lcovonly'], 24 | fixWebpackSourcePaths: true 25 | }, 26 | reporters: ['progress', 'kjhtml'], 27 | port: 9876, 28 | colors: true, 29 | logLevel: config.LOG_INFO, 30 | autoWatch: true, 31 | browsers: ['Chrome'], 32 | singleRun: false 33 | }); 34 | }; 35 | -------------------------------------------------------------------------------- /deid_app/frontend/src/services/http_interceptor.ts: -------------------------------------------------------------------------------- 1 | import {HttpEvent, HttpHandler, HttpRequest} from '@angular/common/http'; 2 | import {HttpErrorResponse, HttpInterceptor} from '@angular/common/http'; 3 | import {Injectable} from '@angular/core'; 4 | import {Observable, throwError} from 'rxjs'; 5 | import {catchError} from 'rxjs/operators'; 6 | 7 | import {ErrorHandler} from './error_handler'; 8 | 9 | /** 10 | * Ensures that any error responses from the server are handled properly. 11 | */ 12 | @Injectable() 13 | export class RequestInterceptor implements HttpInterceptor { 14 | constructor(private handler: ErrorHandler) {} 15 | 16 | /** 17 | * Implement the intercept function that gets called on every 18 | * incoming/outgoing call between app and server. 19 | */ 20 | // tslint:disable:no-any error can be of any type by definition. 21 | intercept(request: HttpRequest, next: HttpHandler): 22 | Observable> { 23 | // tslint:enable:no-any error can be of any type by definition. 24 | return next.handle(request).pipe(catchError((error, caught) => { 25 | this.handler.handleError(error); 26 | return throwError(error); 27 | })); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/current-jobs/current-jobs.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {HttpClientTestingModule} from '@angular/common/http/testing'; 2 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 3 | import {BrowserAnimationsModule} from '@angular/platform-browser/animations'; 4 | 5 | import {AppMaterialModule} from '../../app/material.module'; 6 | 7 | import {CurrentJobsComponent} from './current-jobs.component'; 8 | 9 | describe('CurrentJobsComponent', () => { 10 | let component: CurrentJobsComponent; 11 | let fixture: ComponentFixture; 12 | 13 | beforeEach(async(() => { 14 | TestBed 15 | .configureTestingModule({ 16 | imports: [ 17 | BrowserAnimationsModule, HttpClientTestingModule, 18 | AppMaterialModule 19 | ], 20 | declarations: [CurrentJobsComponent], 21 | }) 22 | .compileComponents(); 23 | })); 24 | 25 | beforeEach(() => { 26 | fixture = TestBed.createComponent(CurrentJobsComponent); 27 | component = fixture.componentInstance; 28 | fixture.detectChanges(); 29 | }); 30 | 31 | it('should create', () => { 32 | expect(component).toBeTruthy(); 33 | }); 34 | }); 35 | -------------------------------------------------------------------------------- /physionet/docker/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Google Cloud Build config for building a docker container with 2 | # physionet DeID. Sample usage: 3 | # gcloud builds submit . --config=cloudbuild.yaml \ 4 | # --project=${PROJECT:?} --substitutions="_PHYSIONET_VERSION=1.1" 5 | 6 | steps: 7 | - name: 'gcr.io/cloud-builders/wget' 8 | args: ['https://physionet.org/physiotools/sources/deid/deid-${_PHYSIONET_VERSION}.tar.gz'] 9 | - name: 'ubuntu' 10 | args: ['tar', '-xzf', 'deid-${_PHYSIONET_VERSION}.tar.gz'] 11 | - name: 'ubuntu' 12 | args: ['mkdir', 'docker-build'] 13 | - name: 'ubuntu' 14 | args: ['cp', 'Dockerfile', 'docker-build/'] 15 | - name: 'ubuntu' 16 | args: ['cp', 'deid-${_PHYSIONET_VERSION}/deid.pl', 'docker-build/'] 17 | - name: 'ubuntu' 18 | args: ['cp', '-r', 'deid-${_PHYSIONET_VERSION}/lists', 'docker-build/'] 19 | - name: 'ubuntu' 20 | args: ['cp', '-r', 'deid-${_PHYSIONET_VERSION}/dict', 'docker-build/'] 21 | - name: 'gcr.io/cloud-builders/docker' 22 | args: ['build', '-t', 'gcr.io/${PROJECT_ID}/physionet:latest', 'docker-build/'] 23 | - name: 'gcr.io/cloud-builders/docker' 24 | args: ['build', '-t', 'gcr.io/${PROJECT_ID}/physionet:deid-${_PHYSIONET_VERSION}', 'docker-build/'] 25 | 26 | images: 27 | - 'gcr.io/${PROJECT_ID}/physionet:latest' 28 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/csv-upload/csv-upload.component.html: -------------------------------------------------------------------------------- 1 |
2 |

Upload CSV to BigQuery

3 |
4 | 5 | 9 | 10 | 11 | 12 | 13 |
14 | 17 | 22 | 23 | {{selectedCsv.value.name}} 24 | 25 |
26 | 27 | 29 | 30 |
31 |
32 | -------------------------------------------------------------------------------- /offline_tools/redactor/examples/tag_remover/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | plugins { 18 | id 'java' 19 | id 'application' 20 | } 21 | 22 | buildDir = "/tmp/gradle_build/redactor/examples" 23 | 24 | dependencies { 25 | implementation project(':lib') 26 | implementation 'commons-cli:commons-cli:1.4' 27 | 28 | testImplementation 'junit:junit:4.12' 29 | testImplementation 'com.google.truth:truth:0.39' 30 | testImplementation 'commons-io:commons-io:2.6' 31 | } 32 | 33 | mainClassName = 'com.google.cloud.healthcare.deid.remover.TagRemover' 34 | 35 | jar { 36 | manifest { 37 | attributes 'Main-Class': mainClassName 38 | } 39 | } -------------------------------------------------------------------------------- /deid_app/frontend/src/services/error_handler.ts: -------------------------------------------------------------------------------- 1 | import {Injectable} from '@angular/core'; 2 | import {MatSnackBar} from '@angular/material/snack-bar'; 3 | 4 | /** 5 | * Represents an error returned from the DlpDemo backend. 6 | */ 7 | interface DlpServiceError { 8 | text: string; 9 | error: number; 10 | } 11 | 12 | /** 13 | * Handles errors and display their message in a snack bar. 14 | */ 15 | @Injectable() 16 | export class ErrorHandler { 17 | constructor( 18 | public snackbar: MatSnackBar, 19 | ) {} 20 | 21 | /** 22 | * Opens a SnackBar with the error details for the user. 23 | */ 24 | // tslint:disable-next-line:no-any error can be of any type by definition. 25 | handleError(err: any) { 26 | let message = 'Server Error'; 27 | if (this.isDlpServiceError(err.error) && err.error.text) { 28 | message = err.error.text; 29 | } 30 | this.snackbar.open(message, 'close'); 31 | } 32 | 33 | // tslint:disable-next-line:no-any error can be of any type by definition. 34 | private isDlpServiceError(errorObject: any): errorObject is DlpServiceError { 35 | return errorObject.text !== undefined && errorObject.error !== undefined && 36 | typeof errorObject.text === 'string' && 37 | typeof errorObject.error === 'number'; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /mist/bigquery_to_gcs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of bigquery_to_gcs_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from mist import bigquery_to_gcs_lib 24 | 25 | 26 | def main(): 27 | logging.getLogger().setLevel(logging.INFO) 28 | 29 | parser = argparse.ArgumentParser( 30 | description=('Read from BigQuery to MIST format.')) 31 | bigquery_to_gcs_lib.add_all_args(parser) 32 | args, extra_args = parser.parse_known_args(sys.argv[1:]) 33 | 34 | bigquery_to_gcs_lib.run_pipeline( 35 | args.input_query, args.output_path, extra_args) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /mist/gcs_to_bigquery.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of gcs_to_bigquery_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from mist import gcs_to_bigquery_lib 24 | 25 | 26 | def main(): 27 | logging.getLogger().setLevel(logging.INFO) 28 | 29 | parser = argparse.ArgumentParser( 30 | description=('Read from MIST files in GCS to BigQuery.')) 31 | gcs_to_bigquery_lib.add_all_args(parser) 32 | args, extra_args = parser.parse_known_args(sys.argv[1:]) 33 | 34 | gcs_to_bigquery_lib.run_pipeline( 35 | args.input_pattern, args.output_table, extra_args) 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /physionet/bigquery_to_gcs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of bigquery_to_gcs_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from physionet import bigquery_to_gcs_lib 24 | 25 | 26 | def main(): 27 | logging.getLogger().setLevel(logging.INFO) 28 | 29 | parser = argparse.ArgumentParser( 30 | description=('Read from BigQuery to PhysioNet format.')) 31 | bigquery_to_gcs_lib.add_all_args(parser) 32 | args, extra_args = parser.parse_known_args(sys.argv[1:]) 33 | 34 | bigquery_to_gcs_lib.run_pipeline( 35 | args.input_query, args.output_file, extra_args) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /physionet/gcs_to_bigquery.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of gcs_to_bigquery_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from physionet import gcs_to_bigquery_lib 24 | 25 | 26 | def main(): 27 | logging.getLogger().setLevel(logging.INFO) 28 | 29 | parser = argparse.ArgumentParser( 30 | description=('Read from PhysioNet files in GCS to BigQuery.')) 31 | gcs_to_bigquery_lib.add_all_args(parser) 32 | args, extra_args = parser.parse_known_args(sys.argv[1:]) 33 | 34 | gcs_to_bigquery_lib.run_pipeline( 35 | args.input_pattern, args.output_table, extra_args) 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/current-jobs/current-jobs.component.ts: -------------------------------------------------------------------------------- 1 | import {Component, OnDestroy, OnInit} from '@angular/core'; 2 | import {MatTableDataSource} from '@angular/material/table'; 3 | import {Subscription} from 'rxjs'; 4 | 5 | import {DlpDemoService} from '../../services/dlp-demo.service'; 6 | import {DeidJob} from '../deid_job'; 7 | 8 | /** 9 | * Displays the deidentify jobs that the user have created and ran. 10 | */ 11 | @Component({ 12 | selector: 'app-current-jobs', 13 | templateUrl: './current-jobs.component.html', 14 | styleUrls: [ 15 | './current-jobs.component.css', 16 | '../deidentify.component.css', 17 | ] 18 | }) 19 | export class CurrentJobsComponent implements OnInit, OnDestroy { 20 | private readonly subscriptions = new Subscription(); 21 | 22 | dataSource = new MatTableDataSource(); 23 | displayedColumns: string[] = 24 | ['id', 'name', 'originalQuery', 'deidTable', 'status', 'timestamp']; 25 | 26 | constructor(private dlpDemoService: DlpDemoService) {} 27 | 28 | ngOnInit() { 29 | /* Updates the dataSource whenever the with Deid Jobs. */ 30 | this.subscriptions.add( 31 | this.dlpDemoService.deidJobs.subscribe((jobs: DeidJob[]) => { 32 | this.dataSource.data = jobs; 33 | })); 34 | } 35 | 36 | ngOnDestroy() { 37 | this.subscriptions.unsubscribe(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-image-demo/dlp-image-demo.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {HttpClientTestingModule} from '@angular/common/http/testing'; 2 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 3 | import {By} from '@angular/platform-browser'; 4 | 5 | import {DlpImageDemoComponent} from './dlp-image-demo.component'; 6 | 7 | describe('DlpImageDemoComponent', () => { 8 | let component: DlpImageDemoComponent; 9 | let fixture: ComponentFixture; 10 | 11 | beforeEach(async(() => { 12 | TestBed 13 | .configureTestingModule({ 14 | imports: [HttpClientTestingModule], 15 | declarations: [DlpImageDemoComponent] 16 | }) 17 | .compileComponents(); 18 | })); 19 | 20 | beforeEach(() => { 21 | fixture = TestBed.createComponent(DlpImageDemoComponent); 22 | component = fixture.componentInstance; 23 | fixture.detectChanges(); 24 | }); 25 | 26 | it('should create', () => { 27 | expect(component).toBeTruthy(); 28 | }); 29 | 30 | it('should handle file change event', () => { 31 | const input = 32 | fixture.debugElement.query(By.css('input[type=file]')).nativeElement; 33 | 34 | spyOn(component, 'redactImage'); 35 | input.dispatchEvent(new Event('change')); 36 | expect(component.redactImage).toHaveBeenCalled(); 37 | }); 38 | }); 39 | -------------------------------------------------------------------------------- /mist/bigquery_to_gcs_lib_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for bigquery_to_gcs_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import unittest 20 | 21 | from mist import bigquery_to_gcs_lib 22 | 23 | 24 | class BigqueryToGcsTest(unittest.TestCase): 25 | 26 | def test_to_mist_record(self): 27 | row = {'patient_id': 999, 'note': 'test note'} 28 | self.assertEqual( 29 | '||||START_OF_RECORD||||999\ntest note', 30 | bigquery_to_gcs_lib.map_to_mist_record(row)) 31 | 32 | def test_to_mist_record_bad_input(self): 33 | self.assertIsNone(bigquery_to_gcs_lib.map_to_mist_record( 34 | {'patient_id': 999, 'record_number': 1})) 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Coding Style 26 | 27 | All code contributions should follow Google language-specific 28 | [Style Guides](https://google.github.io/styleguide/). 29 | 30 | ## Community Guidelines 31 | 32 | This project follows 33 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 34 | -------------------------------------------------------------------------------- /physionet/bigquery_to_gcs_lib_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for bigquery_to_gcs_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import unittest 20 | 21 | from physionet import bigquery_to_gcs_lib 22 | 23 | 24 | class BigqueryToGcsTest(unittest.TestCase): 25 | 26 | def test_to_physionet_record(self): 27 | row = {'patient_id': 999, 'record_number': 1, 'note': 'test note'} 28 | self.assertEqual( 29 | 'START_OF_RECORD=999||||1||||\ntest note\n||||END_OF_RECORD', 30 | bigquery_to_gcs_lib.map_to_physionet_record(row)) 31 | 32 | def test_to_physionet_record_bad_input(self): 33 | self.assertIsNone(bigquery_to_gcs_lib.map_to_physionet_record( 34 | {'patient_id': 999, 'record_number': 1})) 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /mae/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | ENV USER root 5 | 6 | RUN apt-get update && apt-get install -y --no-install-recommends ubuntu-desktop 7 | 8 | RUN apt-get update && \ 9 | apt-get install -y gnome-panel gnome-settings-daemon metacity nautilus gnome-terminal && \ 10 | apt-get install -y tightvncserver && \ 11 | mkdir /root/.vnc 12 | 13 | RUN apt-get install -y openjdk-8-jdk 14 | 15 | # Install Google Cloud SDK 16 | RUN apt-get install -y curl 17 | RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \ 18 | echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | \ 19 | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list 20 | RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - 21 | RUN apt-get update && apt-get install -y google-cloud-sdk 22 | 23 | RUN mkdir -p /root/.config/nautilus && \ 24 | chmod 700 /root/.config/nautilus 25 | 26 | ADD xstartup /root/.vnc/xstartup 27 | RUN chmod 755 /root/.vnc/xstartup 28 | 29 | ADD https://github.com/keighrim/mae-annotation/releases/download/v2.0.9/mae-2.0.9-fatjar.jar /root/ 30 | RUN chmod 755 /root/mae-2.0.9-fatjar.jar 31 | 32 | # Can be overridden by adding `--env PASSWORD=mypassword` to the `docker run` 33 | # command. 34 | ENV PASSWORD password 35 | 36 | CMD printf "$PASSWORD\n$PASSWORD\n\n" | vncpasswd && \ 37 | export PASSWORD="" && \ 38 | /usr/bin/vncserver :1 -geometry 1280x800 -depth 24 && tail -f /root/.vnc/*:1.log 39 | 40 | EXPOSE 5901 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup module for the healthcare_deid DLP pipeline. 2 | 3 | All of the code necessary to run the pipeline is packaged into a source 4 | distribution that is uploaded to the --staging_location specified on the command 5 | line. The source distribution is then installed on the workers before they 6 | start running. 7 | 8 | When remotely executing the pipeline, `--setup_file path/to/setup.py` must be 9 | added to the pipeline's command line. 10 | """ 11 | 12 | import os 13 | import setuptools 14 | 15 | 16 | # Add required python packages that should be installed over and above the 17 | # standard DataFlow worker environment. Version restrictions are supported if 18 | # necessary. 19 | REQUIRED_PACKAGES = [ 20 | 'apache_beam[gcp]', 21 | 'google-api-python-client', 22 | 'google-cloud-storage', 23 | 'six==1.10.0', 24 | ] 25 | 26 | packages = ['common', 'dlp', 'physionet'] 27 | package_dir = {p: p for p in packages} 28 | # Use eval from bazel-bin so we get the generated results_pb2.py file. 29 | # If it doesn't exist, then the job is another pipeline that doesn't need eval. 30 | eval_bazel_path = 'bazel-bin/eval/run_pipeline.runfiles/__main__/eval' 31 | if os.path.exists(eval_bazel_path): 32 | packages.append('eval') 33 | package_dir['eval'] = eval_bazel_path 34 | 35 | setuptools.setup( 36 | name='healthcare_deid', 37 | version='0.0.1', 38 | package_dir=package_dir, 39 | description='Healthcare Deid pipeline package.', 40 | install_requires=REQUIRED_PACKAGES, 41 | packages=packages) 42 | -------------------------------------------------------------------------------- /mist/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:14.04 16 | 17 | RUN apt-get update 18 | RUN apt-get install -y openjdk-7-jdk python python-dev python-virtualenv unzip wget 19 | 20 | # Set up the repository to install google-cloud-sdk. 21 | RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list 22 | RUN wget -O - https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - 23 | RUN apt-get update 24 | RUN apt-get install -y google-cloud-sdk 25 | 26 | # Download and install MIST. 27 | RUN wget -O mist.zip https://sourceforge.net/projects/mist-deid/files/latest/download?source=files 28 | RUN unzip mist.zip -d mist 29 | RUN mist/`ls mist`/install.sh 30 | 31 | # The path is dependent on the version, so we create a symlink to the 32 | # installation directory and point $MAT_PKG_HOME at it. 33 | RUN ln -s /mist/`ls mist`/src/MAT /mist_home 34 | ENV MAT_PKG_HOME="/mist_home" 35 | -------------------------------------------------------------------------------- /deid_app/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "0.0.0", 4 | "scripts": { 5 | "ng": "ng", 6 | "start": "ng serve", 7 | "build": "ng build", 8 | "test": "ng test", 9 | "lint": "ng lint", 10 | "e2e": "ng e2e" 11 | }, 12 | "private": true, 13 | "dependencies": { 14 | "@angular/animations": "^6.1.9", 15 | "@angular/cdk": "^6.4.7", 16 | "@angular/common": "^6.1.0", 17 | "@angular/compiler": "^6.1.0", 18 | "@angular/core": "^6.1.0", 19 | "@angular/forms": "^6.1.0", 20 | "@angular/http": "^6.1.0", 21 | "@angular/material": "^6.4.7", 22 | "@angular/platform-browser": "^6.1.0", 23 | "@angular/platform-browser-dynamic": "^6.1.0", 24 | "@angular/router": "^6.1.0", 25 | "core-js": "^2.5.4", 26 | "hammerjs": "^2.0.8", 27 | "rxjs": "^6.0.0", 28 | "zone.js": "~0.8.26" 29 | }, 30 | "devDependencies": { 31 | "@angular-devkit/build-angular": "~0.7.0", 32 | "@angular/cli": "~6.1.5", 33 | "@angular/compiler-cli": "^6.1.0", 34 | "@angular/language-service": "^6.1.0", 35 | "@types/jasmine": "~2.8.6", 36 | "@types/jasminewd2": "~2.0.3", 37 | "@types/node": "~8.9.4", 38 | "codelyzer": "~4.2.1", 39 | "jasmine-core": "~2.99.1", 40 | "jasmine-spec-reporter": "~4.2.1", 41 | "karma": "~1.7.1", 42 | "karma-chrome-launcher": "~2.2.0", 43 | "karma-coverage-istanbul-reporter": "~2.0.0", 44 | "karma-jasmine": "~1.1.1", 45 | "karma-jasmine-html-reporter": "^0.2.2", 46 | "protractor": "~5.4.0", 47 | "ts-node": "~5.0.1", 48 | "tslint": "~5.9.1", 49 | "typescript": "~2.7.2" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /physionet/physionet_to_mae.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of physionet_to_mae_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from physionet import physionet_to_mae_lib 24 | 25 | 26 | def main(): 27 | logging.getLogger().setLevel(logging.INFO) 28 | 29 | parser = argparse.ArgumentParser( 30 | description=('Convert files from PhysioNet to MAE.')) 31 | physionet_to_mae_lib.add_all_args(parser) 32 | args, pipeline_args = parser.parse_known_args(sys.argv[1:]) 33 | # --project is used both as a local arg and a pipeline arg, so parse it, then 34 | # add it to pipeline_args as well. 35 | pipeline_args += ['--project', args.project] 36 | 37 | physionet_to_mae_lib.run_pipeline(args.input_pattern, args.mae_output_dir, 38 | args.mae_task_name, args.project, 39 | pipeline_args) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /mist/run_mist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of run_mist_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from mist import run_mist_lib 24 | from google.cloud import storage 25 | 26 | 27 | def main(): 28 | logging.getLogger().setLevel(logging.INFO) 29 | 30 | parser = argparse.ArgumentParser( 31 | description=('Run MIST on Google Cloud Platform.')) 32 | run_mist_lib.add_all_args(parser) 33 | args = parser.parse_args(sys.argv[1:]) 34 | 35 | storage_client = storage.Client(args.project) 36 | 37 | errors = run_mist_lib.run_pipeline( 38 | args.input_pattern, args.output_directory, args.model_filename, 39 | args.project, args.log_directory, args.max_num_threads, 40 | args.service_account, storage_client) 41 | 42 | if errors: 43 | logging.error(errors) 44 | return 1 45 | 46 | logging.info('Ran MIST and put output in %s', args.output_directory) 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/material.module.ts: -------------------------------------------------------------------------------- 1 | import {NgModule} from '@angular/core'; 2 | import {MatAutocompleteModule} from '@angular/material/autocomplete'; 3 | import {MatButtonModule} from '@angular/material/button'; 4 | import {MatButtonToggleModule} from '@angular/material/button-toggle'; 5 | import {MatCheckboxModule} from '@angular/material/checkbox'; 6 | import {MatDividerModule} from '@angular/material/divider'; 7 | import {MatFormFieldModule} from '@angular/material/form-field'; 8 | import {MatIconModule} from '@angular/material/icon'; 9 | import {MatInputModule} from '@angular/material/input'; 10 | import {MatListModule} from '@angular/material/list'; 11 | import {MatProgressSpinnerModule} from '@angular/material/progress-spinner'; 12 | import {MatSelectModule} from '@angular/material/select'; 13 | import {MatSidenavModule} from '@angular/material/sidenav'; 14 | import {MatSnackBarModule} from '@angular/material/snack-bar'; 15 | import {MatTableModule} from '@angular/material/table'; 16 | import {MatTabsModule} from '@angular/material/tabs'; 17 | import {MatToolbarModule} from '@angular/material/toolbar'; 18 | 19 | /** 20 | * The Angular Material Module. Groups material modules and components and 21 | * exports them to the AppModule. 22 | */ 23 | @NgModule({ 24 | exports: [ 25 | MatAutocompleteModule, 26 | MatListModule, 27 | MatIconModule, 28 | MatInputModule, 29 | MatToolbarModule, 30 | MatButtonModule, 31 | MatSidenavModule, 32 | MatTabsModule, 33 | MatFormFieldModule, 34 | MatButtonToggleModule, 35 | MatSelectModule, 36 | MatTableModule, 37 | MatProgressSpinnerModule, 38 | MatDividerModule, 39 | MatSnackBarModule, 40 | MatCheckboxModule, 41 | ] 42 | }) 43 | export class AppMaterialModule { 44 | } 45 | 46 | -------------------------------------------------------------------------------- /physionet/run_deid.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Stand-alone executable version of run_deid.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from physionet import run_deid_lib 24 | from google.cloud import storage 25 | 26 | 27 | def main(): 28 | logging.getLogger().setLevel(logging.INFO) 29 | 30 | parser = argparse.ArgumentParser( 31 | description=('Run Physionet DeID on Google Cloud Platform.')) 32 | run_deid_lib.add_all_args(parser) 33 | args = parser.parse_args(sys.argv[1:]) 34 | 35 | storage_client = storage.Client(args.project) 36 | 37 | errors = run_deid_lib.run_pipeline( 38 | args.input_pattern, args.output_directory, args.config_file, args.project, 39 | args.log_directory, args.dict_directory, args.lists_directory, 40 | args.max_num_threads, args.include_original_in_pn_output, 41 | args.service_account, storage_client) 42 | 43 | if errors: 44 | logging.error(errors) 45 | return 1 46 | 47 | logging.info('Ran PhysioNet DeID and put output in %s', args.output_directory) 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/bigquery-table.ts: -------------------------------------------------------------------------------- 1 | import {Component, Input, OnInit} from '@angular/core'; 2 | import {FormBuilder, FormControl, FormGroup, Validators} from '@angular/forms'; 3 | import {Observable} from 'rxjs'; 4 | import {filter, map, switchMap} from 'rxjs/operators'; 5 | 6 | import {DlpDemoService} from '../services/dlp-demo.service'; 7 | 8 | /** 9 | * A selector for an existing table from the user's BQ tables. 10 | */ 11 | @Component({ 12 | selector: 'bigquery-table', 13 | templateUrl: './bigquery-table.html', 14 | styleUrls: [ 15 | './bigquery-table.css', 16 | ] 17 | }) 18 | export class BigQueryTable implements OnInit { 19 | @Input() bqTableForm: FormGroup; 20 | 21 | bqTables: Observable; 22 | 23 | get dataset(): FormControl { 24 | return this.bqTableForm.get('dataset') as FormControl; 25 | } 26 | 27 | get table(): FormControl { 28 | return this.bqTableForm.get('table') as FormControl; 29 | } 30 | 31 | constructor( 32 | protected dlpDemoService: DlpDemoService, 33 | protected formBuilder: FormBuilder, 34 | ) {} 35 | 36 | ngOnInit() { 37 | /* Configure a listener to get the tables of an input dataset. */ 38 | this.bqTables = this.dataset.valueChanges.pipe( 39 | filter(dataset => dataset !== null && dataset !== ''), 40 | switchMap(dataset => this.dlpDemoService.getTables(dataset)), 41 | map(datasetTables => datasetTables.tables)); 42 | } 43 | 44 | /** 45 | * Creates a FormGroup object to be associated with the BigQueryTable 46 | * component. 47 | */ 48 | static buildEntry(): FormGroup { 49 | return new FormGroup({ 50 | dataset: new FormControl('', Validators.required), 51 | table: new FormControl('', Validators.required), 52 | }); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/current-jobs/current-jobs.component.html: -------------------------------------------------------------------------------- 1 |
2 |

DLP Deid Jobs

3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
ID {{job.id}} Name {{job.name}} Input Data {{job.originalQuery}} Redacted Table {{job.deidTable}} Status {{job.status || "pending"}} Timestamp {{job.timestamp}}
45 |
46 | 47 | -------------------------------------------------------------------------------- /dlp/testdata/batch_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "passThrough": [ 4 | {"name": "patient_id", "type": "stringValue"}, 5 | {"name": "record_number", "type": "integerValue"} 6 | ], 7 | "inspect": [ 8 | {"name": "note", "type": "stringValue"} 9 | ] 10 | }, 11 | "infoTypeTransformations": [ 12 | { 13 | "infoTypes": [ 14 | {"name": "PHONE_NUMBER"}, 15 | {"name": "US_CENSUS_NAME"}, 16 | {"name": "US_FEMALE_NAME"}, 17 | {"name": "US_MALE_NAME"}, 18 | {"name": "KnownFirstName"}, 19 | {"name": "KnownLastName"}, 20 | {"name": "DatasetFirstName"} 21 | ], 22 | "primitiveTransformation": { 23 | "replaceWithInfoTypeConfig": {} 24 | } 25 | } 26 | ], 27 | "keyColumns": ["patient_id", "record_number"], 28 | "tagCategories": [ 29 | { 30 | "name": "NAME", 31 | "infoTypes": ["US_CENSUS_NAME"] 32 | }, 33 | { 34 | "name": "FIRST_NAME", 35 | "infoTypes": [ 36 | "US_FEMALE_NAME", "US_MALE_NAME", "KnownFirstName", "DatasetFirstName" 37 | ] 38 | }, 39 | { 40 | "name": "LAST_NAME", 41 | "infoTypes": ["KnownLastName"] 42 | }, 43 | { 44 | "name": "PHONE", 45 | "infoTypes": ["PHONE_NUMBER"] 46 | }, 47 | { 48 | "name": "UNKNOWN_CLASSIFICATION_TYPE", 49 | "infoTypes": [] 50 | } 51 | ], 52 | "perRowTypes": [ 53 | { 54 | "columnName": "first_name", 55 | "infoTypeName": "KnownFirstName" 56 | }, 57 | { 58 | "columnName": "last_name", 59 | "infoTypeName": "KnownLastName" 60 | } 61 | ], 62 | "perDatasetTypes": [ 63 | { 64 | "infoTypes": [ 65 | { 66 | "columnName": "first_name", 67 | "infoTypeName": "DatasetFirstName" 68 | } 69 | ] 70 | } 71 | ] 72 | } 73 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/run-deidentify/run-deidentify.component.spec.ts: -------------------------------------------------------------------------------- 1 | import {HttpClientTestingModule} from '@angular/common/http/testing'; 2 | import {async, ComponentFixture, TestBed} from '@angular/core/testing'; 3 | import {FormsModule, ReactiveFormsModule} from '@angular/forms'; 4 | import {BrowserAnimationsModule} from '@angular/platform-browser/animations'; 5 | 6 | import {AppMaterialModule} from '../../app/material.module'; 7 | 8 | import {RunDeidentifyComponent} from './run-deidentify.component'; 9 | 10 | describe('RunDeidentifyComponent', () => { 11 | let component: RunDeidentifyComponent; 12 | let fixture: ComponentFixture; 13 | 14 | beforeEach(async(() => { 15 | TestBed 16 | .configureTestingModule({ 17 | imports: [ 18 | BrowserAnimationsModule, HttpClientTestingModule, 19 | ReactiveFormsModule, FormsModule, AppMaterialModule 20 | ], 21 | declarations: [RunDeidentifyComponent], 22 | }) 23 | .compileComponents(); 24 | })); 25 | 26 | beforeEach(() => { 27 | fixture = TestBed.createComponent(RunDeidentifyComponent); 28 | component = fixture.componentInstance; 29 | component.ngOnInit(); 30 | fixture.detectChanges(); 31 | }); 32 | 33 | it('should create', () => { 34 | expect(component).toBeTruthy(); 35 | }); 36 | 37 | it('should be invalid when empty', () => { 38 | expect(component.dlpForm.valid).toBeFalsy(); 39 | }); 40 | 41 | it('output table name should be valid', () => { 42 | component.outputTable.setValue('invalid table'); 43 | expect(component.outputTable.valid).toBeFalsy(); 44 | component.outputTable.setValue('33startWithNum'); 45 | expect(component.outputTable.valid).toBeFalsy(); 46 | component.outputTable.setValue('validName'); 47 | expect(component.outputTable.valid).toBeTruthy(); 48 | }); 49 | }); 50 | -------------------------------------------------------------------------------- /mae/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@my_deps//:requirements.bzl", "requirement") 16 | 17 | package( 18 | default_visibility = ["//visibility:public"], 19 | ) 20 | 21 | # Build rules for DeID evaluation tools. 22 | 23 | licenses(["notice"]) # Apache License 2.0 24 | 25 | py_binary( 26 | name = "upload_files_to_bq", 27 | srcs = ["upload_files_to_bq.py"], 28 | python_version = "PY3", 29 | srcs_version = "PY2AND3", 30 | deps = [ 31 | requirement("apache_beam"), 32 | requirement("pyarrow"), 33 | requirement("numpy"), 34 | requirement("fastavro"), 35 | requirement("google-api-core"), 36 | requirement("google-apitools"), 37 | requirement("google-cloud-storage"), 38 | "//common:gcsutil", 39 | ], 40 | ) 41 | 42 | py_binary( 43 | name = "remove_invalid_characters", 44 | srcs = ["remove_invalid_characters.py"], 45 | python_version = "PY3", 46 | srcs_version = "PY2AND3", 47 | deps = [ 48 | "//common:mae", 49 | ], 50 | ) 51 | 52 | py_binary( 53 | name = "bq_to_xml", 54 | srcs = ["bq_to_xml.py"], 55 | python_version = "PY2", 56 | ) 57 | 58 | py_binary( 59 | name = "txt_to_xml", 60 | srcs = ["txt_to_xml.py"], 61 | python_version = "PY2", 62 | ) 63 | -------------------------------------------------------------------------------- /eval/run_pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Evaluate DeID findings against a 'golden' baseline. 16 | 17 | All input/output files should be on Google Cloud Storage. 18 | 19 | Requires Apache Beam client and Google Python API Client: 20 | pip install --upgrade apache_beam 21 | pip install --upgrade google-api-python-client 22 | """ 23 | 24 | from __future__ import absolute_import 25 | 26 | import argparse 27 | import logging 28 | import sys 29 | 30 | from eval import run_pipeline_lib 31 | 32 | 33 | def main(): 34 | logging.getLogger().setLevel(logging.INFO) 35 | 36 | parser = argparse.ArgumentParser( 37 | description='Evaluate DeID findings on Google Cloud.') 38 | run_pipeline_lib.add_all_args(parser) 39 | args, pipeline_args = parser.parse_known_args(sys.argv[1:]) 40 | 41 | errors = run_pipeline_lib.run_pipeline( 42 | args.mae_input_pattern, args.mae_golden_dir, args.results_dir, 43 | args.mae_input_query, args.mae_golden_table, 44 | args.write_per_note_stats_to_gcs, args.results_table, 45 | args.per_note_results_table, args.debug_output_table, 46 | args.types_to_ignore or [], None, pipeline_args) 47 | 48 | if errors: 49 | logging.error(errors) 50 | return 1 51 | 52 | logging.info('Ran eval.') 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /dlp/testdata/multi_column_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "inspect": [ 4 | {"name": "note", "type": "stringValue"}, 5 | {"name": "last_name", "type": "stringValue"} 6 | ], 7 | "passThrough": [ 8 | {"name": "patient_id", "type": "stringValue"}, 9 | {"name": "record_number", "type": "integerValue"} 10 | ] 11 | }, 12 | "infoTypeTransformations": [ 13 | { 14 | "infoTypes": [ 15 | {"name": "PHONE_NUMBER"}, 16 | {"name": "US_CENSUS_NAME"}, 17 | {"name": "US_FEMALE_NAME"}, 18 | {"name": "US_MALE_NAME"}, 19 | {"name": "KnownFirstName"}, 20 | {"name": "KnownLastName"}, 21 | {"name": "DatasetFirstName"} 22 | ], 23 | "primitiveTransformation": { 24 | "replaceWithInfoTypeConfig": {} 25 | } 26 | } 27 | ], 28 | "keyColumns": ["patient_id", "record_number"], 29 | "tagCategories": [ 30 | { 31 | "name": "NAME", 32 | "infoTypes": ["US_CENSUS_NAME"] 33 | }, 34 | { 35 | "name": "FIRST_NAME", 36 | "infoTypes": [ 37 | "US_FEMALE_NAME", "US_MALE_NAME", "KnownFirstName", "DatasetFirstName" 38 | ] 39 | }, 40 | { 41 | "name": "LAST_NAME", 42 | "infoTypes": ["KnownLastName"] 43 | }, 44 | { 45 | "name": "PHONE", 46 | "infoTypes": ["PHONE_NUMBER"] 47 | }, 48 | { 49 | "name": "UNKNOWN_CLASSIFICATION_TYPE", 50 | "infoTypes": [] 51 | } 52 | ], 53 | "perRowTypes": [ 54 | { 55 | "columnName": "first_name", 56 | "infoTypeName": "KnownFirstName" 57 | }, 58 | { 59 | "columnName": "last_name", 60 | "infoTypeName": "KnownLastName" 61 | } 62 | ], 63 | "perDatasetTypes": [ 64 | { 65 | "infoTypes": [ 66 | { 67 | "columnName": "first_name", 68 | "infoTypeName": "DatasetFirstName" 69 | } 70 | ] 71 | } 72 | ] 73 | } 74 | -------------------------------------------------------------------------------- /dlp/testdata/multi_column_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "deidentifyConfig": { 3 | "recordTransformations": { 4 | "fieldTransformations": [ 5 | { 6 | "fields": [{"name": "note"}, {"name": "last_name"}], 7 | "infoTypeTransformations": { 8 | "transformations": [ 9 | { 10 | "infoTypes": [{"name": "PHONE_NUMBER"}, 11 | {"name": "US_CENSUS_NAME"}, 12 | {"name": "US_FEMALE_NAME"}, 13 | {"name": "US_MALE_NAME"}, 14 | {"name": "KnownFirstName"}, 15 | {"name": "KnownLastName"}, 16 | {"name": "DatasetFirstName"}], 17 | "primitiveTransformation": {"replaceWithInfoTypeConfig": {}} 18 | } 19 | ] 20 | } 21 | } 22 | ] 23 | } 24 | }, 25 | 26 | "inspectConfig": { 27 | "infoTypes": [{"name": "PHONE_NUMBER"}, 28 | {"name": "US_CENSUS_NAME"}, 29 | {"name": "US_FEMALE_NAME"}, 30 | {"name": "US_MALE_NAME"}], 31 | "customInfoTypes": [ 32 | {"dictionary": 33 | {"wordList": {"words": ["Boaty"]}}, 34 | "infoType": {"name": "DatasetFirstName"}}, 35 | {"dictionary": 36 | {"wordList": {"words": ["Boaty"]}}, 37 | "infoType": {"name": "KnownFirstName"}}, 38 | {"dictionary": 39 | {"wordList": {"words": ["McBoatface"]}}, 40 | "infoType": {"name": "KnownLastName"}}]}, 41 | 42 | "item": {"table": { 43 | "headers": [{"name": "note"}, {"name": "last_name"}, 44 | {"name": "patient_id"}, {"name": "record_number"}], 45 | "rows": [{"values": [ 46 | {"stringValue": "McBoatface"}, 47 | {"stringValue": "text and PID and MORE PID"}, 48 | {"stringValue": "111"}, {"integerValue": "1"} 49 | ]}] 50 | }} 51 | } 52 | -------------------------------------------------------------------------------- /dlp/testdata/batch_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "deidentifyConfig": { 3 | "recordTransformations": { 4 | "fieldTransformations": [ 5 | { 6 | "fields": [{"name": "note"}], 7 | "infoTypeTransformations": { 8 | "transformations": [{ 9 | "infoTypes": [{"name": "PHONE_NUMBER"}, 10 | {"name": "US_CENSUS_NAME"}, 11 | {"name": "US_FEMALE_NAME"}, 12 | {"name": "US_MALE_NAME"}, 13 | {"name": "KnownFirstName"}, 14 | {"name": "KnownLastName"}, 15 | {"name": "DatasetFirstName"}], 16 | "primitiveTransformation": {"replaceWithInfoTypeConfig": {}} 17 | }] 18 | } 19 | } 20 | ] 21 | } 22 | }, 23 | 24 | "inspectConfig": { 25 | "infoTypes": [{"name": "PHONE_NUMBER"}, 26 | {"name": "US_CENSUS_NAME"}, 27 | {"name": "US_FEMALE_NAME"}, 28 | {"name": "US_MALE_NAME"}], 29 | "customInfoTypes": [ 30 | {"dictionary": 31 | {"wordList": {"words": ["Boaty", "Zephod"]}}, 32 | "infoType": {"name": "DatasetFirstName"}}, 33 | {"dictionary": 34 | {"wordList": {"words": ["Boaty", "Zephod"]}}, 35 | "infoType": {"name": "KnownFirstName"}}, 36 | {"dictionary": 37 | {"wordList": {"words": ["McBoatface", "Beeblebrox"]}}, 38 | "infoType": {"name": "KnownLastName"}}]}, 39 | 40 | "item": {"table": { 41 | "rows": [{"values": [{"stringValue": "text and PID and MORE PID"}, 42 | {"stringValue": "111"}, {"integerValue": "1"}]}, 43 | {"values": [{"stringValue": "note2 text"}, 44 | {"stringValue": "222"}, {"integerValue": "2"}]}], 45 | "headers": [ 46 | {"name": "note"}, {"name": "patient_id"}, {"name": "record_number"}] 47 | }} 48 | } 49 | -------------------------------------------------------------------------------- /mist/README.md: -------------------------------------------------------------------------------- 1 | # MIST on GCP 2 | 3 | This package contains tools to run [MIST](http://mist-deid.sourceforge.net) on 4 | Google Cloud. The example commands use the [bazel build system](http://bazel.build/versions/master/docs/install.html), but can also be 5 | run directly (i.e.`python mist/xxx.py`) if $PYTHONPATH includes this 6 | package. 7 | 8 | ## How to run 9 | 10 | ### BigQuery -> GCS 11 | 12 | bigquery_to_gcs.py runs a query on BigQuery and writes the data to the given 13 | path in Google Cloud Storage. 14 | 15 | ```shell 16 | bazel run mist:bigquery_to_gcs -- \ 17 | --input_query "select patient_id, note from [${PROJECT?}:${DATASET?}.deid]" \ 18 | --output_path gs://${BUCKET?}/deid/mist/bq-output \ 19 | --project ${PROJECT?} 20 | ``` 21 | 22 | ### Run MIST 23 | 24 | run_deid.py runs Physionet De-ID via the Pipelines API on a sharded file in GCS. 25 | The PhysioNet code and configuration has been packaged into a Docker container 26 | at gcr.io/genomics-api-test/physionet. Running it requires having the Google 27 | Python API client and Google Cloud Storage client installed: 28 | 29 | ```shell 30 | pip install --upgrade apache_beam 31 | pip install --upgrade google-api-python-client 32 | pip install --upgrade google-cloud-storage 33 | ``` 34 | 35 | Example usage (run with `--help` for more about the arguments): 36 | 37 | ```shell 38 | bazel run mist:run_mist -- \ 39 | --model_filename gs://${BUCKET}/deid/mist/model \ 40 | --project ${PROJECT?} \ 41 | --log_directory gs://${BUCKET?}/deid/mist/logs \ 42 | --input_pattern gs://${BUCKET?}/deid/mist/input/notes* \ 43 | --output_directory gs://${BUCKET?}/deid/mist/output 44 | ``` 45 | 46 | ### GCS -> BigQuery 47 | 48 | gcs_to_bigquery.py grabs data from a given Google Cloud Storage path and puts 49 | it into BigQuery. 50 | 51 | Example usage: 52 | 53 | ```shell 54 | bazel run mist:gcs_to_bigquery -- \ 55 | --input_pattern gs://${BUCKET?}/deid/mist/bq-output* \ 56 | --output_table "${PROJECT?}:${DATASET?}.deid_output" 57 | ``` 58 | -------------------------------------------------------------------------------- /dlp/testdata/request.json: -------------------------------------------------------------------------------- 1 | { 2 | "deidentifyConfig": { 3 | "recordTransformations": { 4 | "fieldTransformations": [ 5 | { 6 | "fields": [{"name": "note"}], 7 | "infoTypeTransformations": { 8 | "transformations": [{ 9 | "infoTypes": [{"name": "PHONE_NUMBER"}, 10 | {"name": "US_CENSUS_NAME"}, 11 | {"name": "US_FEMALE_NAME"}, 12 | {"name": "US_MALE_NAME"}, 13 | {"name": "KnownFirstName"}, 14 | {"name": "KnownLastName"}, 15 | {"name": "DatasetFirstName"}], 16 | "primitiveTransformation": {"replaceWithInfoTypeConfig": {}} 17 | }] 18 | } 19 | }, 20 | {"fields": [{"name": "field_transform_col"}], 21 | "primitiveTransformation": {"redactConfig": {}}} 22 | ] 23 | } 24 | }, 25 | 26 | "inspectConfig": { 27 | "infoTypes": [{"name": "PHONE_NUMBER"}, 28 | {"name": "US_CENSUS_NAME"}, 29 | {"name": "US_FEMALE_NAME"}, 30 | {"name": "US_MALE_NAME"}], 31 | "customInfoTypes": [ 32 | {"dictionary": 33 | {"wordList": {"words": ["Boaty"]}}, 34 | "infoType": {"name": "DatasetFirstName"}}, 35 | {"dictionary": 36 | {"wordList": {"words": ["Boaty"]}}, 37 | "infoType": {"name": "KnownFirstName"}}, 38 | {"dictionary": 39 | {"wordList": {"words": ["McBoatface"]}}, 40 | "infoType": {"name": "KnownLastName"}}]}, 41 | 42 | "item": {"table": { 43 | "rows": [{"values": [{"stringValue": "text and PID and MORE PID"}, 44 | {"stringValue": "111"}, {"integerValue": "1"}, 45 | {"stringValue": "transform me!"}]}], 46 | "headers": [ 47 | {"name": "note"}, {"name": "patient_id"}, {"name": "record_number"}, 48 | {"name": "field_transform_col"}] 49 | }} 50 | } 51 | -------------------------------------------------------------------------------- /offline_tools/redactor/examples/tag_remover/src/test/java/com/google/cloud/healthcare/deid/remover/TagRemoverTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.healthcare.deid.remover; 18 | 19 | import java.io.File; 20 | import org.apache.commons.io.FileUtils; 21 | import org.junit.Assert; 22 | import org.junit.Rule; 23 | import org.junit.Test; 24 | import org.junit.rules.TemporaryFolder; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.JUnit4; 27 | 28 | /** Test basic DICOM tag removal. */ 29 | @RunWith(JUnit4.class) 30 | public final class TagRemoverTest { 31 | @Rule 32 | public TemporaryFolder folder = new TemporaryFolder(); 33 | @Test 34 | public void basicRedaction() throws Exception { 35 | File expectedFile = new File( 36 | TagRemoverTest.class.getClassLoader().getResource("basic-redacted.dcm").getFile()); 37 | File inFile = new File( 38 | TagRemoverTest.class.getClassLoader().getResource("basic.dcm").getFile()); 39 | FileUtils.copyFileToDirectory(inFile, folder.getRoot()); 40 | String inPath = folder.getRoot() + "/basic.dcm"; 41 | String outPath = folder.getRoot() + "/basic-redacted.dcm"; 42 | String[] args = new String[]{"-i", inPath, "-o", outPath, "-t", "PatientName", "00081080"}; 43 | TagRemover.main(args); 44 | Assert.assertTrue(FileUtils.contentEquals(expectedFile, new File(outPath))); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /deid_app/backend/config.py: -------------------------------------------------------------------------------- 1 | """Provides configuration for running the flask server. 2 | 3 | helps with separating the configuration from the actual app. 4 | """ 5 | 6 | from __future__ import absolute_import 7 | 8 | import os 9 | 10 | import google.auth 11 | 12 | APP_CREDENTIALS = 'GOOGLE_APPLICATION_CREDENTIALS' 13 | if APP_CREDENTIALS not in os.environ or not os.environ[APP_CREDENTIALS]: 14 | raise Exception('You must specify service account credentials in the ' 15 | 'GOOGLE_APPLICATION_CREDENTIALS environment variable.') 16 | _, default_project = google.auth.default() 17 | 18 | 19 | class Config(object): 20 | """Config parameters to run the Deid App server and DB.""" 21 | SECRET_KEY = os.environ.get('SECRET_KEY') or 'dev' 22 | SERVER_NAME = 'localhost:5000' 23 | PROJECT_ID = os.environ.get('PROJECT_ID') or default_project 24 | DLP_API_NAME = 'dlp' 25 | DEID_CONFIG_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 26 | 'sample_deid_config.json') 27 | CLOUDSQL_USER = os.environ.get('CLOUDSQL_USER') 28 | CLOUDSQL_PASSWORD = os.environ.get('CLOUDSQL_PASSWORD') 29 | CLOUDSQL_DATABASE = os.environ.get('CLOUDSQL_DATABASE') 30 | CLOUDSQL_CONNECTION_NAME = os.environ.get('CLOUDSQL_CONNECTION_NAME') 31 | SQLALCHEMY_TRACK_MODIFICATIONS = False 32 | LOCAL_SQLALCHEMY_DATABASE_URI = ( 33 | 'mysql+pymysql://{user}:{password}@127.0.0.1:3306/{database}').format( 34 | user=CLOUDSQL_USER, password=CLOUDSQL_PASSWORD, 35 | database=CLOUDSQL_DATABASE) 36 | LIVE_SQLALCHEMY_DATABASE_URI = ( 37 | 'mysql+pymysql://{user}:{password}@localhost/{database}' 38 | '?unix_socket=/cloudsql/{connection_name}').format( 39 | user=CLOUDSQL_USER, password=CLOUDSQL_PASSWORD, 40 | database=CLOUDSQL_DATABASE, connection_name=CLOUDSQL_CONNECTION_NAME) 41 | if os.environ.get('GAE_INSTANCE'): 42 | SQLALCHEMY_DATABASE_URI = LIVE_SQLALCHEMY_DATABASE_URI 43 | else: 44 | SQLALCHEMY_DATABASE_URI = LOCAL_SQLALCHEMY_DATABASE_URI 45 | -------------------------------------------------------------------------------- /deid_app/frontend/src/dlp-demo/dlp-image-demo/dlp-image-demo.component.ts: -------------------------------------------------------------------------------- 1 | import {Component} from '@angular/core'; 2 | import {DlpDemoService} from '../../services/dlp-demo.service'; 3 | 4 | /** 5 | * This component accepts an image file and redacts it. The redacted version is 6 | * displayed once available 7 | */ 8 | @Component({ 9 | selector: 'app-dlp-image-demo', 10 | templateUrl: './dlp-image-demo.component.html', 11 | styleUrls: ['./dlp-image-demo.component.css'] 12 | }) 13 | export class DlpImageDemoComponent { 14 | base64OrigImg?: string; 15 | base64RedactedImg?: string; 16 | mimeImageType?: string; 17 | 18 | constructor(private readonly dlpDemoService: DlpDemoService) {} 19 | 20 | /** 21 | * returns the header of a byte64 image in order to append it later to a valid 22 | * byte stream to display the image. 23 | */ 24 | private getBase64Header(base64Img: string): string { 25 | return base64Img.split(',')[0]; 26 | } 27 | 28 | /** 29 | * Processes a received image from the user and requests the redacted version 30 | * from the backend. 31 | * @param images the list of files that the user has selected. Only the first 32 | * file within the list will be processed. 33 | */ 34 | redactImage(images: FileList): void { 35 | if (images.length < 1) { 36 | console.log('no image selected'); 37 | return; 38 | } 39 | const reader = new FileReader(); 40 | const selectedImage = images.item(0); 41 | this.mimeImageType = selectedImage.type; 42 | reader.onload = (doneEvent: Event) => { 43 | this.base64OrigImg = reader.result; 44 | this.dlpDemoService.redactImage(this.mimeImageType, this.base64OrigImg) 45 | .subscribe(data => { 46 | /* append the byte stream to the base64 header. */ 47 | this.base64RedactedImg = 48 | `${this.getBase64Header(this.base64OrigImg)},${ 49 | data.redactedByteStream}`; 50 | }); 51 | }; 52 | reader.readAsDataURL(selectedImage); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /deid_app/backend/BUILD: -------------------------------------------------------------------------------- 1 | # This is the web demo for showcasing healthcare Deid capabilities 2 | 3 | load("@my_deps//:requirements.bzl", "requirement") 4 | 5 | package( 6 | default_visibility = ["//visibility:public"], 7 | ) 8 | 9 | licenses(["notice"]) # Apache License 2.0 10 | 11 | py_library( 12 | name = "config", 13 | srcs = ["config.py"], 14 | srcs_version = "PY2AND3", 15 | ) 16 | 17 | py_binary( 18 | name = "model", 19 | srcs = ["model.py"], 20 | python_version = "PY3", 21 | srcs_version = "PY2AND3", 22 | deps = [":model_lib"], 23 | ) 24 | 25 | py_library( 26 | name = "model_lib", 27 | srcs = ["model.py"], 28 | srcs_version = "PY2AND3", 29 | deps = [ 30 | ":config", 31 | requirement("flask"), 32 | requirement("werkzeug"), 33 | requirement("itsdangerous"), 34 | requirement("click"), 35 | requirement("jinja2"), 36 | requirement("markupsafe"), 37 | requirement("sqlalchemy"), 38 | requirement("flask_sqlalchemy"), 39 | requirement("pymysql"), 40 | ], 41 | ) 42 | 43 | py_binary( 44 | name = "server", 45 | srcs = [ 46 | "__init__.py", 47 | "server.py", 48 | ], 49 | data = [ 50 | "sample_deid_config.json", 51 | ], 52 | python_version = "PY3", 53 | srcs_version = "PY2AND3", 54 | deps = [ 55 | ":config", 56 | ":model_lib", 57 | requirement("flask"), 58 | requirement("werkzeug"), 59 | requirement("itsdangerous"), 60 | requirement("click"), 61 | requirement("jinja2"), 62 | requirement("markupsafe"), 63 | requirement("google-cloud-core"), 64 | requirement("google-cloud-storage"), 65 | "//common:gcsutil", 66 | "//common:unicodecsv", 67 | "//dlp:run_deid_lib", 68 | "//eval:run_pipeline_lib", 69 | requirement("jsonschema"), 70 | requirement("functools32"), 71 | requirement("pyrsistent"), 72 | requirement("attrs"), 73 | ], 74 | ) 75 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/compare-data/compare-data.component.html: -------------------------------------------------------------------------------- 1 |
2 |

Compare Jobs

3 |
4 | 5 | 9 | 11 | 13 | ID: {{job.id}} - Name: {{job.name}} 14 | 15 | 16 | 17 | 18 | 19 | 21 | 23 | Patient ID: {{record.patientId}} - Record Number: 24 | {{record.recordNumber}} 25 | 26 | 27 | 28 |
29 |
30 | 31 |
33 |
34 |

Original Text

35 |
36 | 37 | 38 | 41 | 42 | 43 |
44 |
45 |
46 |

Deidentified Text

47 |
48 | 49 | 50 | 53 | 54 | 55 |
56 |
57 |
58 | -------------------------------------------------------------------------------- /mae/remove_invalid_characters.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | r"""Remove characters that aren't valid in XML 1.0 so MAE can read them. 15 | 16 | Usage: 17 | PYTHONPATH="." 18 | python mae/remove_invalid_characters.py --input_pattern="input/*.xml" \ 19 | --output_dir="output" 20 | """ 21 | 22 | from __future__ import absolute_import 23 | 24 | import argparse 25 | import glob 26 | import logging 27 | import os 28 | import sys 29 | 30 | from common import mae 31 | 32 | 33 | def run(file_pattern, output_dir): 34 | """Re-write the files with invalid XML characters removed.""" 35 | logging.info('Matched files: %s', glob.glob(file_pattern)) 36 | for filename in glob.glob(file_pattern): 37 | logging.info('Loading file: "%s"', filename) 38 | with open(filename) as f: 39 | contents = f.read() 40 | with open(os.path.join(output_dir, os.path.basename(filename)), 'w') as o: 41 | o.write(mae.remove_invalid_characters(contents)) 42 | 43 | 44 | def main(argv): 45 | logging.getLogger().setLevel(logging.INFO) 46 | 47 | parser = argparse.ArgumentParser( 48 | description='Remove characters that aren\'t valid in XML 1.0.') 49 | parser.add_argument('--input_pattern', type=str, required=True) 50 | parser.add_argument('--output_dir', type=str, required=True) 51 | args, _ = parser.parse_known_args(argv[1:]) 52 | run(args.input_pattern, args.output_dir) 53 | logging.info('Complete. Output is in "%s".', args.output_dir) 54 | 55 | 56 | if __name__ == '__main__': 57 | main(sys.argv) 58 | -------------------------------------------------------------------------------- /dlp/testdata/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "passThrough": [ 4 | {"name": "patient_id", "type": "stringValue"}, 5 | {"name": "record_number", "type": "integerValue"} 6 | ], 7 | "inspect": [ 8 | {"name": "note", "type": "stringValue"} 9 | ], 10 | "fieldTransform": [ 11 | {"name": "field_transform_col", "type": "stringValue"} 12 | ] 13 | }, 14 | "fieldTransformations": [ 15 | { 16 | "fields": [{"name": "field_transform_col"}], 17 | "primitiveTransformation": {"redactConfig": {}} 18 | } 19 | ], 20 | "infoTypeTransformations": [ 21 | { 22 | "infoTypes": [ 23 | {"name": "PHONE_NUMBER"}, 24 | {"name": "US_CENSUS_NAME"}, 25 | {"name": "US_FEMALE_NAME"}, 26 | {"name": "US_MALE_NAME"}, 27 | {"name": "KnownFirstName"}, 28 | {"name": "KnownLastName"}, 29 | {"name": "DatasetFirstName"} 30 | ], 31 | "primitiveTransformation": { 32 | "replaceWithInfoTypeConfig": {} 33 | } 34 | } 35 | ], 36 | "keyColumns": ["patient_id", "record_number"], 37 | "tagCategories": [ 38 | { 39 | "name": "NAME", 40 | "infoTypes": ["US_CENSUS_NAME"] 41 | }, 42 | { 43 | "name": "FIRST_NAME", 44 | "infoTypes": [ 45 | "US_FEMALE_NAME", "US_MALE_NAME", "KnownFirstName", "DatasetFirstName" 46 | ] 47 | }, 48 | { 49 | "name": "LAST_NAME", 50 | "infoTypes": ["KnownLastName"] 51 | }, 52 | { 53 | "name": "PHONE", 54 | "infoTypes": ["PHONE_NUMBER"] 55 | }, 56 | { 57 | "name": "UNKNOWN_CLASSIFICATION_TYPE", 58 | "infoTypes": [] 59 | } 60 | ], 61 | "perRowTypes": [ 62 | { 63 | "columnName": "first_name", 64 | "infoTypeName": "KnownFirstName" 65 | }, 66 | { 67 | "columnName": "last_name", 68 | "infoTypeName": "KnownLastName" 69 | } 70 | ], 71 | "perDatasetTypes": [ 72 | { 73 | "infoTypes": [ 74 | { 75 | "columnName": "first_name", 76 | "infoTypeName": "DatasetFirstName" 77 | } 78 | ] 79 | } 80 | ] 81 | } 82 | -------------------------------------------------------------------------------- /offline_tools/redactor/lib/src/main/proto/DicomConfig.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | syntax = "proto2"; 16 | 17 | package redactor; 18 | 19 | option java_package = "com.google.cloud.healthcare.deid.redactor.protos"; 20 | option java_outer_classname = "DicomConfigProtos"; 21 | 22 | // Specifies the parameters needed for de-identification of DICOM stores. 23 | message DicomConfig { 24 | // List of tags to be filtered. 25 | message TagFilterList { 26 | // Tags to be filtered. Tags must be DICOM Data Elements, as defined at: 27 | // http://dicom.nema.org/medical/dicom/current/output/html/part06.html#table_6-1. 28 | // They may be provided by "Keyword" or "Tag". For example "PatientID", 29 | // "00100010". 30 | repeated string tags = 1; 31 | } 32 | 33 | // Profile that determines which tags to keep/remove. 34 | enum TagFilterProfile { 35 | // No tag filtration profile provided. Same as CHC_BASIC. 36 | TAG_FILTER_PROFILE_UNSPECIFIED = 0; 37 | // Redacts basic tags which commonly contain sensitive data. 38 | CHC_BASIC = 1; 39 | } 40 | 41 | // Determines tag filtering method (meaning which tags to keep/remove). 42 | oneof tag_filter { 43 | // List of tags to keep. Remove all other tags. 44 | TagFilterList keep_list = 1; 45 | // List of tags to remove. Keep all other tags. 46 | TagFilterList remove_list = 2; 47 | // Tag filtering profile that determines which tags to keep/remove. 48 | TagFilterProfile filter_profile = 3; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /deid_app/frontend/src/common/bigquery-new-table.ts: -------------------------------------------------------------------------------- 1 | import {Component} from '@angular/core'; 2 | import {FormBuilder, FormControl, FormGroup, Validators} from '@angular/forms'; 3 | import {combineLatest, Observable} from 'rxjs'; 4 | import {map, startWith} from 'rxjs/operators'; 5 | 6 | import {DlpDemoService} from '../services/dlp-demo.service'; 7 | 8 | import {BigQueryTable} from './bigquery-table'; 9 | 10 | 11 | /** 12 | * A selector for a new table from the user's BQ tables. 13 | */ 14 | @Component({ 15 | selector: 'bigquery-new-table', 16 | templateUrl: './bigquery-new-table.html', 17 | styleUrls: [ 18 | './bigquery-new-table.css', 19 | ] 20 | }) 21 | export class BigQueryNewTable extends BigQueryTable { 22 | private tableFilter: Observable; 23 | 24 | constructor( 25 | dlpDemoService: DlpDemoService, 26 | formBuilder: FormBuilder, 27 | ) { 28 | super(dlpDemoService, formBuilder); 29 | } 30 | 31 | ngOnInit() { 32 | super.ngOnInit(); 33 | 34 | /* Get an Observable of the user's input. */ 35 | const userInput = this.table.valueChanges.pipe(startWith('')); 36 | 37 | /* Combine the user's input and the bqTables to generate filter value. */ 38 | this.tableFilter = 39 | combineLatest(userInput, this.bqTables) 40 | .pipe(map(([input, tables]) => this.includesFilter(input, tables))); 41 | } 42 | 43 | /** 44 | * Filters a list based on whether an entry contains a value as a subsequence. 45 | * @param value tests the list entries against. 46 | * @param list the original list that the filter will go through. 47 | */ 48 | private includesFilter(value: string, list: string[]): string[] { 49 | if (!list) { 50 | return []; 51 | } 52 | const filterValue = value.toLowerCase(); 53 | return list.filter(entry => entry.toLowerCase().includes(filterValue)); 54 | } 55 | 56 | static buildEntry(): FormGroup { 57 | return new FormGroup({ 58 | dataset: new FormControl('', Validators.required), 59 | table: new FormControl( 60 | '', 61 | [Validators.required, Validators.pattern('^[A-Za-z][A-Za-z0-9_]*$')]), 62 | }); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /mist/gcs_to_bigquery_lib_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for gcs_to_bigquery_lib.py.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import unittest 20 | 21 | from mist import gcs_to_bigquery_lib 22 | 23 | from mock import patch 24 | 25 | 26 | class GcsToBigQueryTest(unittest.TestCase): 27 | 28 | def testMapToBqInputs(self): 29 | """Tests for map_to_bq_inputs.""" 30 | correct_result = {'patient_id': 111, 31 | 'note': 'text input\nmultiple lines'} 32 | self.assertEqual( 33 | correct_result, 34 | gcs_to_bigquery_lib.map_to_bq_inputs("""111 35 | text input 36 | multiple lines""")) 37 | 38 | # Record may end with an extra newline. 39 | self.assertEqual( 40 | correct_result, 41 | gcs_to_bigquery_lib.map_to_bq_inputs("""111 42 | text input 43 | multiple lines 44 | """)) 45 | 46 | # Text is empty, but the record is valid. 47 | correct_result['note'] = '' 48 | self.assertEqual( 49 | correct_result, 50 | gcs_to_bigquery_lib.map_to_bq_inputs('111\n ')) 51 | 52 | @patch('apache_beam.io.filesystems.FileSystems') 53 | def testMapFileToRecords(self, mock_filesystems): 54 | """Tests for map_file_to_records.""" 55 | record1 = ['||||START_OF_RECORD||||1\n', 'some\n', 'contents'] 56 | record2 = ['||||START_OF_RECORD||||2\n', 'more\n', 'contents'] 57 | mock_filesystems.open.return_value = record1 + record2 58 | 59 | result = [r for r in gcs_to_bigquery_lib.map_file_to_records('filename')] 60 | 61 | self.assertEqual(['1\nsome\ncontents', '2\nmore\ncontents'], result) 62 | 63 | if __name__ == '__main__': 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /offline_tools/redactor/README.md: -------------------------------------------------------------------------------- 1 | # DICOM Redactor Library 2 | 3 | The DICOM Redactor Library is a Java library designed to redact sensitive data contained in DICOM tags. This is an offline library which does not communicate with Google Cloud. 4 | 5 | ## Getting Started 6 | 7 | ### Building 8 | 9 | The DICOM redactor library can be built using [Gradle](https://gradle.org/). Please refer to these [instructions](https://gradle.org/install/) to build Gradle for your system. 10 | 11 | To build the library and examples: 12 | 13 | ```shell 14 | cd redactor 15 | ./gradlew build 16 | ``` 17 | 18 | ### Running unit tests 19 | 20 | ```shell 21 | cd redactor 22 | ./gradlew test 23 | ``` 24 | 25 | ## Configuration 26 | 27 | The library's redaction is primarily configured using [protobuf](https://developers.google.com/protocol-buffers/). The configuration is similar to the [DicomConfig](https://cloud.google.com/healthcare/docs/reference/rpc/google.cloud.healthcare.v1beta1/deidentify#dicomconfig) for the deidentify operation in Google's Cloud Healthcare API (although the predefined filter profiles differ). 28 | 29 | The user can configure which tags to redact/remove in one of 3 ways: 30 | 31 | 1. keep_list - a list of DICOM tags to keep untouched. Other tags are removed. 32 | 2. remove_list - a list of DICOM tags to remove. Other tags are kept untouched. 33 | 3. filter_profile - a predefined profile that will keep and remove particular tags. 34 | 35 | See the full configuration [proto](lib/src/main/proto/DicomConfig.proto) for more info. 36 | 37 | To view the sepcific tags removed for a certain profile, see the relevant [textproto](lib/src/main/resource/chc_basic.textproto). 38 | 39 | ## UID Regeneration 40 | 41 | Regardless of the configuration, several UIDs will always be regenerated: SOPInstanceUID, StudyInstanceUID, SeriesInstanceUID, and MediaStorageSOPInstanceUID. 42 | By default, these will be regenerated using the [UUID Derived UID](http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_B.2.html) method. Optionally, when constucting a redactor, the user can specify their own prefix to use for the regenerated UIDs. 43 | 44 | ## Sample script 45 | 46 | A [command line utility](examples) for redacting tags using the library has been included. To run: 47 | 48 | ```shell 49 | cd redactor 50 | ./gradlew examples:tag_remover:run --args='-i in.dcm -o out.dcm -t PatientName' 51 | ``` 52 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") 16 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 17 | 18 | http_archive( 19 | name = "six_archive", # Required by @com_google_protobuf. 20 | build_file = "@com_google_protobuf//:six.BUILD", 21 | url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", 22 | ) 23 | 24 | bind( 25 | name = "six", 26 | actual = "@six_archive//:six", 27 | ) 28 | 29 | git_repository( 30 | name = "bazel_skylib", # Required by @com_google_protobuf. 31 | branch = "master", 32 | remote = "https://github.com/bazelbuild/bazel-skylib.git", 33 | ) 34 | 35 | http_archive( 36 | name = "com_google_protobuf", 37 | strip_prefix = "protobuf-master", 38 | urls = ["https://github.com/protocolbuffers/protobuf/archive/master.zip"], 39 | ) 40 | 41 | load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") 42 | 43 | protobuf_deps() 44 | 45 | git_repository( 46 | name = "io_bazel_rules_python", 47 | remote = "https://github.com/bazelbuild/rules_python.git", 48 | commit = "3e167dcfb17356c68588715ed324c5e9b76f391d", 49 | ) 50 | 51 | # For PIP support: 52 | load("@io_bazel_rules_python//python:pip.bzl", "pip_repositories") 53 | 54 | pip_repositories() 55 | 56 | load("@io_bazel_rules_python//python:pip.bzl", "pip_import") 57 | # This rule translates the specified requirements.txt into 58 | # @my_deps//:requirements.bzl, which itself exposes a pip_install method. 59 | pip_import( 60 | name = "my_deps", 61 | requirements = "//requirements:requirements.txt", 62 | ) 63 | 64 | # Load the pip_install symbol for my_deps, and create the dependencies' 65 | # repositories. 66 | load("@my_deps//:requirements.bzl", "pip_install") 67 | pip_install() 68 | -------------------------------------------------------------------------------- /common/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@my_deps//:requirements.bzl", "requirement") 16 | 17 | package( 18 | default_visibility = ["//visibility:public"], 19 | ) 20 | 21 | licenses(["notice"]) # Apache License 2.0 22 | 23 | py_library( 24 | name = "run_docker", 25 | srcs = ["run_docker.py"], 26 | srcs_version = "PY2AND3", 27 | deps = [ 28 | ":gcsutil", 29 | requirement("google-auth-httplib2"), 30 | ], 31 | ) 32 | 33 | py_test( 34 | name = "run_docker_test", 35 | srcs = ["run_docker_test.py"], 36 | python_version = "PY2", 37 | srcs_version = "PY2AND3", 38 | deps = [ 39 | ":gcsutil", 40 | ":run_docker", 41 | ], 42 | ) 43 | 44 | py_library( 45 | name = "mae", 46 | srcs = ["mae.py"], 47 | srcs_version = "PY2AND3", 48 | ) 49 | 50 | py_test( 51 | name = "mae_test", 52 | srcs = ["mae_test.py"], 53 | python_version = "PY2", 54 | srcs_version = "PY2AND3", 55 | deps = [ 56 | ":mae", 57 | ], 58 | ) 59 | 60 | py_library( 61 | name = "gcsutil", 62 | srcs = ["gcsutil.py"], 63 | srcs_version = "PY2AND3", 64 | ) 65 | 66 | py_library( 67 | name = "testutil", 68 | srcs = ["testutil.py"], 69 | srcs_version = "PY2AND3", 70 | ) 71 | 72 | py_library( 73 | name = "beam_testutil", 74 | srcs = ["beam_testutil.py"], 75 | srcs_version = "PY2AND3", 76 | deps = [ 77 | ":testutil", 78 | requirement("apache_beam"), 79 | requirement("pyarrow"), 80 | requirement("numpy"), 81 | requirement("fastavro"), 82 | requirement("google-api-core"), 83 | requirement("google-apitools"), 84 | ], 85 | ) 86 | 87 | py_library( 88 | name = "unicodecsv", 89 | srcs = ["unicodecsv.py"], 90 | srcs_version = "PY2AND3", 91 | ) 92 | -------------------------------------------------------------------------------- /eval/testdata/expected_results: -------------------------------------------------------------------------------- 1 | strict_entity_matching_results { 2 | micro_average_results { 3 | true_positives: 3 4 | false_positives: 5 5 | false_negatives: 4 6 | precision: 0.375 7 | recall: 0.428571 8 | f_score: 0.4 9 | } 10 | macro_average_results { 11 | precision: 0.416667 12 | recall: 0.416667 13 | f_score: 0.416667 14 | } 15 | per_type_micro_average_results { 16 | info_type_category: "TypeA" 17 | stats { 18 | true_positives: 2 19 | false_positives: 4 20 | false_negatives: 3 21 | precision: 0.333333 22 | recall: 0.4 23 | f_score: 0.363636 24 | } 25 | } 26 | per_type_micro_average_results { 27 | info_type_category: "TypeB" 28 | stats { 29 | true_positives: 1 30 | precision: 1.0 31 | recall: 1.0 32 | f_score: 1.0 33 | } 34 | } 35 | per_type_micro_average_results { 36 | info_type_category: "TypeX" 37 | stats { 38 | false_positives: 1 39 | recall: nan 40 | f_score: nan 41 | error_message: "Recall has denominator of zero. f-score is NaN" 42 | } 43 | } 44 | per_type_micro_average_results { 45 | info_type_category: "TypeY" 46 | stats { 47 | false_negatives: 1 48 | precision: nan 49 | f_score: nan 50 | error_message: "Precision has denominator of zero. f-score is NaN" 51 | } 52 | } 53 | } 54 | binary_token_matching_results { 55 | micro_average_results { 56 | true_positives: 7 57 | false_positives: 1 58 | false_negatives: 2 59 | precision: 0.875 60 | recall: 0.777778 61 | f_score: 0.823529 62 | } 63 | macro_average_results { 64 | precision: 0.75 65 | recall: 0.666667 66 | f_score: 0.705882 67 | } 68 | per_type_micro_average_results { 69 | info_type_category: "TypeA" 70 | stats { 71 | true_positives: 5 72 | false_positives: 1 73 | false_negatives: 2 74 | precision: 0.833333 75 | recall: 0.714286 76 | f_score: 0.769231 77 | } 78 | } 79 | per_type_micro_average_results { 80 | info_type_category: "TypeB" 81 | stats { 82 | true_positives: 1 83 | precision: 1.0 84 | recall: 1.0 85 | f_score: 1.0 86 | } 87 | } 88 | per_type_micro_average_results { 89 | info_type_category: "TypeY" 90 | stats { 91 | true_positives: 1 92 | precision: 1.0 93 | recall: 1.0 94 | f_score: 1.0 95 | } 96 | } 97 | } -------------------------------------------------------------------------------- /mae/bq_to_xml.py: -------------------------------------------------------------------------------- 1 | """Script to convert BigQuery data to MAE-compatible XML files on local disk.""" 2 | 3 | from __future__ import absolute_import 4 | 5 | import argparse 6 | import codecs 7 | import logging 8 | import os 9 | import sys 10 | 11 | from google.cloud import bigquery 12 | 13 | TEMPLATE = u""" 14 | <{0}> 15 | 16 | """ 17 | 18 | 19 | def run(input_query, output_dir, task_name, id_columns, target_column): 20 | """Get the BigQuery data and write it to local files.""" 21 | if output_dir.startswith('gs://'): 22 | raise Exception('Writing the output to a GCS bucket is not supported; ' 23 | 'please write to a local directory. You can then upload ' 24 | 'your files using "gsutil cp".') 25 | bq_client = bigquery.Client() 26 | job_config = bigquery.job.QueryJobConfig() 27 | job_config.use_legacy_sql = True 28 | query_job = bq_client.query(input_query, job_config=job_config) 29 | results_table = query_job.result() 30 | 31 | for row in results_table: 32 | id_str = '-'.join([str(row.get(col)) for col in id_columns]) 33 | filename = os.path.join(output_dir, id_str + '.xml') 34 | with codecs.open(filename, 'w', encoding='utf-8') as f: 35 | f.write(TEMPLATE.format(task_name, row.get(target_column))) 36 | 37 | logging.info('Output written to "%s"', output_dir) 38 | 39 | 40 | def main(argv): 41 | logging.getLogger().setLevel(logging.INFO) 42 | 43 | var = 'GOOGLE_APPLICATION_CREDENTIALS' 44 | if var not in os.environ or not os.environ[var]: 45 | raise Exception('You must specify service account credentials in the ' 46 | 'GOOGLE_APPLICATION_CREDENTIALS environment variable.') 47 | 48 | parser = argparse.ArgumentParser( 49 | description='Download BigQuery data and format it as MAE XML.') 50 | parser.add_argument('--input_query', type=str, required=True) 51 | parser.add_argument('--local_output_dir', type=str, required=True) 52 | parser.add_argument('--task_name', type=str, default='InspectPhiTask') 53 | parser.add_argument('--id_columns', type=str, 54 | default='patient_id,record_number') 55 | parser.add_argument('--target_column', type=str, 56 | default='note') 57 | args = parser.parse_args(argv[1:]) 58 | 59 | run(args.input_query, args.local_output_dir, args.task_name, 60 | args.id_columns.split(','), args.target_column) 61 | 62 | 63 | if __name__ == '__main__': 64 | main(sys.argv) 65 | -------------------------------------------------------------------------------- /physionet/gcs_to_bigquery_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Beam pipeline that pushes PhysioNet records to BigQuery. 16 | 17 | Requires Apache Beam client: 18 | pip install --upgrade apache_beam 19 | """ 20 | 21 | from __future__ import absolute_import 22 | 23 | import logging 24 | 25 | import apache_beam as beam 26 | from apache_beam.options.pipeline_options import PipelineOptions 27 | 28 | from physionet import files_to_physionet_records as f2pn 29 | 30 | 31 | def run_pipeline(input_pattern, output_table, pipeline_args): 32 | """Read the records from GCS and write them to BigQuery.""" 33 | p = beam.Pipeline(options=PipelineOptions(pipeline_args)) 34 | _ = (p | 35 | 'match_files' >> beam.Create(f2pn.match_files(input_pattern)) | 36 | 'to_records' >> beam.FlatMap(f2pn.map_file_to_records) | 37 | 'parse_physionet_record' >> beam.Map(f2pn.parse_physionet_record) | 38 | 'write' >> beam.io.Write(beam.io.BigQuerySink( 39 | output_table, 40 | schema='patient_id:INTEGER, record_number:INTEGER, note:STRING', 41 | write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))) 42 | result = p.run().wait_until_finish() 43 | logging.info('GCS to BigQuery result: %s', result) 44 | 45 | 46 | def add_args(parser): 47 | """Add command-line arguments to the program.""" 48 | parser.add_argument('--output_table', type=str, required=True, 49 | help='BigQuery table to store output data.') 50 | 51 | 52 | # Add arguments that won't be explicitly specified when this module is used as 53 | # part of a larger program. These args are only needed when this is run as a 54 | # stand-alone tool. 55 | def add_all_args(parser): 56 | """Add command-line arguments to the program.""" 57 | parser.add_argument('--input_pattern', type=str, required=True, 58 | help='GCS pattern to read input from.') 59 | add_args(parser) 60 | -------------------------------------------------------------------------------- /mae/txt_to_xml.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | r"""Convert text files to MAE XML files. 15 | 16 | MAE automatically converts text files to XML upon opening, but this process 17 | mangles unicode characters on Windows. This tool provides a workaround: first 18 | generate MAE XML files using this tool, then open them in MAE. 19 | 20 | Usage: 21 | python txt_to_xml.py --input_pattern="input/*.txt" --output_dir="output" \ 22 | --task_name="InspectPhiTask" 23 | """ 24 | 25 | from __future__ import absolute_import 26 | 27 | import argparse 28 | import codecs 29 | import glob 30 | import logging 31 | import os 32 | import sys 33 | 34 | 35 | TEMPLATE = u""" 36 | <{0}> 37 | 38 | """ 39 | 40 | 41 | def run(file_pattern, output_dir, task_name): 42 | """Re-write the files with invalid XML characters removed.""" 43 | logging.info('Matched files: %s', glob.glob(file_pattern)) 44 | for filename in glob.glob(file_pattern): 45 | logging.info('Loading file: "%s"', filename) 46 | with codecs.open(filename, encoding='utf-8') as f: 47 | contents = f.read() 48 | outname = os.path.join(output_dir, os.path.basename(filename)) + '.xml' 49 | with codecs.open(outname, 'w', encoding='utf-8') as o: 50 | o.write(TEMPLATE.format(task_name, contents)) 51 | 52 | 53 | def main(argv): 54 | logging.getLogger().setLevel(logging.INFO) 55 | 56 | parser = argparse.ArgumentParser( 57 | description='Convert text files to MAE XML files.') 58 | parser.add_argument('--input_pattern', type=str, required=True) 59 | parser.add_argument('--output_dir', type=str, required=True) 60 | parser.add_argument('--task_name', type=str, required=True) 61 | args, _ = parser.parse_known_args(argv[1:]) 62 | run(args.input_pattern, args.output_dir, args.task_name) 63 | logging.info('Complete. Output is in "%s".', args.output_dir) 64 | 65 | 66 | if __name__ == '__main__': 67 | main(sys.argv) 68 | -------------------------------------------------------------------------------- /dlp/experimental_deid_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "experimentalConfig": "default", 3 | "columns": { 4 | "passThrough": [ 5 | {"name": "patient_id", "type": "stringValue"}, 6 | {"name": "record_number", "type": "integerValue"} 7 | ], 8 | "inspect": [ 9 | {"name": "note", "type": "stringValue"} 10 | ] 11 | }, 12 | "infoTypeTransformations": [ 13 | { 14 | "infoTypes": [ 15 | {"name": "PERSON_NAME"}, 16 | {"name": "PHONE_NUMBER"}, 17 | {"name": "LOCATION"}, 18 | {"name": "DATE"}, 19 | {"name": "TIME"}, 20 | {"name": "AGE"}, 21 | {"name": "UNKNOWN_IDENTIFIER"}, 22 | {"name": "EMAIL_ADDRESS"} 23 | ], 24 | "primitiveTransformation": { 25 | "replaceWithInfoTypeConfig": {} 26 | } 27 | } 28 | ], 29 | "keyColumns": ["patient_id", "record_number"], 30 | "tagCategories": [ 31 | { 32 | "name": "NAME", 33 | "infoTypes": ["PERSON_NAME"] 34 | }, 35 | { 36 | "name": "AGE", 37 | "infoTypes": ["AGE"] 38 | }, 39 | { 40 | "name": "ORGANIZATION", 41 | "infoTypes": [] 42 | }, 43 | { 44 | "name": "ID", 45 | "infoTypes": ["UNKNOWN_IDENTIFIER"] 46 | }, 47 | { 48 | "name": "DATE", 49 | "infoTypes": ["DATE", "TIME"] 50 | }, 51 | { 52 | "name": "STREET", 53 | "infoTypes": [] 54 | }, 55 | { 56 | "name": "CITY", 57 | "infoTypes": [] 58 | }, 59 | { 60 | "name": "ZIPCODE", 61 | "infoTypes": [] 62 | }, 63 | { 64 | "name": "STATE", 65 | "infoTypes": [] 66 | }, 67 | { 68 | "name": "COUNTRY", 69 | "infoTypes": [] 70 | }, 71 | { 72 | "name": "LOCATION", 73 | "infoTypes": ["LOCATION"] 74 | }, 75 | { 76 | "name": "HOSPITAL", 77 | "infoTypes": [] 78 | }, 79 | { 80 | "name": "PHONE", 81 | "infoTypes": ["PHONE_NUMBER"] 82 | }, 83 | { 84 | "name": "EMAIL", 85 | "infoTypes": ["EMAIL_ADDRESS"] 86 | }, 87 | { 88 | "name": "URL", 89 | "infoTypes": [] 90 | }, 91 | { 92 | "name": "PROFESSION", 93 | "infoTypes": [] 94 | }, 95 | { 96 | "name": "FIRST_NAME", 97 | "infoTypes": [] 98 | }, 99 | { 100 | "name": "LAST_NAME", 101 | "infoTypes": [] 102 | }, 103 | { 104 | "name": "UNKNOWN_CLASSIFICATION_TYPE", 105 | "infoTypes": [] 106 | } 107 | ] 108 | } 109 | -------------------------------------------------------------------------------- /dlp/radiology_deid_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "experimentalConfig": "radiology", 3 | "columns": { 4 | "passThrough": [ 5 | {"name": "patient_id", "type": "stringValue"}, 6 | {"name": "record_number", "type": "integerValue"} 7 | ], 8 | "inspect": [ 9 | {"name": "note", "type": "stringValue"} 10 | ] 11 | }, 12 | "infoTypeTransformations": [ 13 | { 14 | "infoTypes": [ 15 | {"name": "PERSON_NAME"}, 16 | {"name": "PHONE_NUMBER"}, 17 | {"name": "LOCATION"}, 18 | {"name": "DATE"}, 19 | {"name": "TIME"}, 20 | {"name": "AGE"}, 21 | {"name": "UNKNOWN_IDENTIFIER"}, 22 | {"name": "EMAIL_ADDRESS"} 23 | ], 24 | "primitiveTransformation": { 25 | "replaceWithInfoTypeConfig": {} 26 | } 27 | } 28 | ], 29 | "keyColumns": ["patient_id", "record_number"], 30 | "tagCategories": [ 31 | { 32 | "name": "NAME", 33 | "infoTypes": ["PERSON_NAME"] 34 | }, 35 | { 36 | "name": "AGE", 37 | "infoTypes": ["AGE"] 38 | }, 39 | { 40 | "name": "ORGANIZATION", 41 | "infoTypes": [] 42 | }, 43 | { 44 | "name": "ID", 45 | "infoTypes": ["UNKNOWN_IDENTIFIER"] 46 | }, 47 | { 48 | "name": "DATE", 49 | "infoTypes": ["DATE", "TIME"] 50 | }, 51 | { 52 | "name": "STREET", 53 | "infoTypes": [] 54 | }, 55 | { 56 | "name": "CITY", 57 | "infoTypes": [] 58 | }, 59 | { 60 | "name": "ZIPCODE", 61 | "infoTypes": [] 62 | }, 63 | { 64 | "name": "STATE", 65 | "infoTypes": [] 66 | }, 67 | { 68 | "name": "COUNTRY", 69 | "infoTypes": [] 70 | }, 71 | { 72 | "name": "LOCATION", 73 | "infoTypes": ["LOCATION"] 74 | }, 75 | { 76 | "name": "HOSPITAL", 77 | "infoTypes": [] 78 | }, 79 | { 80 | "name": "PHONE", 81 | "infoTypes": ["PHONE_NUMBER"] 82 | }, 83 | { 84 | "name": "EMAIL", 85 | "infoTypes": ["EMAIL_ADDRESS"] 86 | }, 87 | { 88 | "name": "URL", 89 | "infoTypes": [] 90 | }, 91 | { 92 | "name": "PROFESSION", 93 | "infoTypes": [] 94 | }, 95 | { 96 | "name": "FIRST_NAME", 97 | "infoTypes": [] 98 | }, 99 | { 100 | "name": "LAST_NAME", 101 | "infoTypes": [] 102 | }, 103 | { 104 | "name": "UNKNOWN_CLASSIFICATION_TYPE", 105 | "infoTypes": [] 106 | } 107 | ] 108 | } 109 | -------------------------------------------------------------------------------- /mist/bigquery_to_gcs_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Beam pipeline that converts BigQuery data to MIST data in GCS. 16 | 17 | 18 | Requires Apache Beam client: 19 | pip install --upgrade apache_beam 20 | """ 21 | 22 | from __future__ import absolute_import 23 | 24 | import logging 25 | 26 | import apache_beam as beam 27 | from apache_beam.options.pipeline_options import PipelineOptions 28 | 29 | 30 | def map_to_mist_record(row): 31 | """Put the table row into our MIST format.""" 32 | if 'patient_id' not in row or 'note' not in row: 33 | logging.error('Missing one or more of (patient_id, note): %s', row) 34 | return None 35 | # There is no offical "MIST Format", so we use: 36 | # ||||START_OF_RECORD||||\n 37 | return '||||START_OF_RECORD||||%s\n%s' % (row['patient_id'], row['note']) 38 | 39 | 40 | def run_pipeline(input_query, output_path, pipeline_args): 41 | p = beam.Pipeline(options=PipelineOptions(pipeline_args)) 42 | _ = (p 43 | | 'read' >> beam.io.Read(beam.io.BigQuerySource(query=input_query)) 44 | | 'to_mist' >> beam.Map(map_to_mist_record) 45 | | 'write' >> beam.io.WriteToText(output_path)) 46 | result = p.run().wait_until_finish() 47 | 48 | logging.info('BigQuery to GCS result: %s', result) 49 | 50 | 51 | def add_args(parser): 52 | """Add command-line arguments to the program.""" 53 | parser.add_argument( 54 | '--input_query', type=str, required=True, 55 | help=('BigQuery query to provide input data. Must yield rows with 2 ' 56 | 'fields: (patient_id, note).')) 57 | 58 | 59 | # Add arguments that won't be explicitly specified when this module is used as 60 | # part of a larger program. These args are only needed when this is run as a 61 | # stand-alone tool. 62 | def add_all_args(parser): 63 | """Add command-line arguments to the program.""" 64 | parser.add_argument('--output_path', type=str, required=True, 65 | help='GCS path to write the output to.') 66 | add_args(parser) 67 | -------------------------------------------------------------------------------- /deid_app/frontend/src/services/dlp-demo.service.spec.ts: -------------------------------------------------------------------------------- 1 | import {HttpClientTestingModule, HttpTestingController} from '@angular/common/http/testing'; 2 | import {getTestBed, inject, TestBed} from '@angular/core/testing'; 3 | 4 | import {environment} from '../environments/environment'; 5 | 6 | import { 7 | DlpDemoService, 8 | RedactImgResponse, 9 | ListJobsResponse, 10 | } from './dlp-demo.service'; 11 | 12 | describe('DlpDemoService', () => { 13 | let service: DlpDemoService; 14 | let httpMock: HttpTestingController; 15 | 16 | beforeEach(() => { 17 | TestBed.configureTestingModule( 18 | {imports: [HttpClientTestingModule], providers: [DlpDemoService]}); 19 | const testbed = getTestBed(); 20 | httpMock = testbed.get(HttpTestingController); 21 | service = testbed.get(DlpDemoService); 22 | }); 23 | 24 | it('should be created', () => { 25 | expect(service).toBeTruthy(); 26 | }); 27 | 28 | it('should return a RedactImgResponse observable', () => { 29 | const result: RedactImgResponse = { 30 | redactedByteStream: 'redactedStream', 31 | }; 32 | const imageType = 'jpeg'; 33 | const fakeImgStream = 'origSteam'; 34 | 35 | let redactImageResponse: RedactImgResponse; 36 | service.redactImage(imageType, fakeImgStream).subscribe(res => { 37 | redactImageResponse = res; 38 | }); 39 | 40 | const req = httpMock.expectOne(`${environment.server}/api/demo/image`); 41 | expect(req.request.method).toBe('POST'); 42 | expect(req.cancelled).toBeFalsy(); 43 | req.flush(result); 44 | expect(redactImageResponse).toBeDefined(); 45 | expect(redactImageResponse.redactedByteStream).toEqual('redactedStream'); 46 | httpMock.verify(); 47 | }); 48 | 49 | it('should return a ListJobsResponse Observable', () => { 50 | const result: ListJobsResponse = { 51 | jobs: [ 52 | { 53 | id: 1, 54 | name: 'test', 55 | originalQuery: 'select * from test_table', 56 | deidTable: 'result', 57 | status: 200, 58 | timestamp: new Date(), 59 | }, 60 | ] 61 | }; 62 | let listJobsResponse: ListJobsResponse; 63 | service.getDeidJobs().subscribe(res => { 64 | listJobsResponse = res; 65 | }); 66 | 67 | const req = httpMock.expectOne( 68 | `${environment.server}/api/deidentify`); 69 | expect(req.request.method).toBe('GET'); 70 | expect(req.cancelled).toBeFalsy(); 71 | req.flush(result); 72 | expect(listJobsResponse).toBeDefined(); 73 | expect(listJobsResponse.jobs[0]).toEqual(result.jobs[0]); 74 | httpMock.verify(); 75 | }); 76 | }); 77 | -------------------------------------------------------------------------------- /physionet/bigquery_to_gcs_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Beam pipeline that converts BigQuery data to PhysioNet records. 16 | 17 | 18 | Requires Apache Beam client: 19 | pip install --upgrade apache_beam 20 | """ 21 | 22 | from __future__ import absolute_import 23 | 24 | import logging 25 | 26 | import apache_beam as beam 27 | from apache_beam.options.pipeline_options import PipelineOptions 28 | 29 | 30 | def map_to_physionet_record(row): 31 | """Put the table row into PhysioNet DeID format.""" 32 | if 'patient_id' not in row or 'record_number' not in row or 'note' not in row: 33 | logging.error( 34 | 'Missing one or more of (patient_id, record_number, note): %s', row) 35 | return None 36 | return 'START_OF_RECORD=%s||||%s||||\n%s\n||||END_OF_RECORD' % ( 37 | row['patient_id'], row['record_number'], row['note']) 38 | 39 | 40 | def run_pipeline(input_query, output_file, pipeline_args): 41 | p = beam.Pipeline(options=PipelineOptions(pipeline_args)) 42 | _ = (p 43 | | 'read' >> beam.io.Read(beam.io.BigQuerySource(query=input_query)) 44 | | 'to_physionet' >> beam.Map(map_to_physionet_record) 45 | | 'write' >> beam.io.WriteToText(output_file)) 46 | result = p.run().wait_until_finish() 47 | 48 | logging.info('BigQuery to GCS result: %s', result) 49 | 50 | 51 | def add_args(parser): 52 | """Add command-line arguments to the program.""" 53 | parser.add_argument( 54 | '--input_query', type=str, required=True, 55 | help=('BigQuery query to provide input data. Must yield rows with 3 ' 56 | 'fields: (patient_id, record_number, note).')) 57 | 58 | 59 | # Add arguments that won't be explicitly specified when this module is used as 60 | # part of a larger program. These args are only needed when this is run as a 61 | # stand-alone tool. 62 | def add_all_args(parser): 63 | """Add command-line arguments to the program.""" 64 | parser.add_argument('--output_file', type=str, required=True, 65 | help='GCS directory to write the output to.') 66 | add_args(parser) 67 | -------------------------------------------------------------------------------- /eval/results.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | syntax = "proto3"; 18 | 19 | package eval; 20 | 21 | message Results { 22 | // Aggregate results for binary token matching comparison. 23 | AggregateMatchingResults binary_token_matching_results = 1; 24 | 25 | // Aggregate results for strict entity matching comparison. 26 | AggregateMatchingResults strict_entity_matching_results = 2; 27 | 28 | // Per-note binary token matching results. 29 | repeated IndividualResult individual_binary_token_matching_results = 3; 30 | } 31 | 32 | message AggregateMatchingResults { 33 | // TP, FP, FN (true/false positive/negatives) are summed across the individual 34 | // metrics and used in aggregate to calculate these scores. 35 | Stats micro_average_results = 1; 36 | // The results are the average of all the individually-calculated results 37 | // (e.g. precision is the mean of all the individual precision scores). 38 | // true_positive, false_positive, and false_negative are not used here. 39 | Stats macro_average_results = 2; 40 | // Stats broken out per-infoType. 41 | repeated PerTypeStats per_type_micro_average_results = 3; 42 | } 43 | 44 | message PerTypeStats { 45 | // The info type category. 46 | string info_type_category = 1; 47 | // Results for findings of this type. 48 | Stats stats = 2; 49 | } 50 | 51 | message Stats { 52 | // Number of correct PII detections 53 | int32 true_positives = 1; 54 | 55 | // Number of incorrect PII detections 56 | int32 false_positives = 2; 57 | 58 | // Number of PII detections that were missed 59 | int32 false_negatives = 3; 60 | 61 | // Precision = True Positives / (True Positives + False Positives) 62 | float precision = 4; 63 | 64 | // Recall = True Positives / (True Positives + False Negatives) 65 | float recall = 5; 66 | 67 | // F-Score = Harmonic mean of precision and recall 68 | float f_score = 6; 69 | 70 | // If an error occurred, this will contain an explanation. 71 | string error_message = 7; 72 | } 73 | 74 | message IndividualResult { 75 | // String that uniquely identifies the record (e.g. patient ID + 76 | // record number). 77 | string record_id = 1; 78 | 79 | // The raw data and derived data. 80 | Stats stats = 2; 81 | } 82 | -------------------------------------------------------------------------------- /eval/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") 16 | load("@my_deps//:requirements.bzl", "requirement") 17 | 18 | package( 19 | default_visibility = ["//visibility:public"], 20 | ) 21 | 22 | # Build rules for DeID evaluation tools. 23 | 24 | licenses(["notice"]) # Apache License 2.0 25 | 26 | py_library( 27 | name = "eval_lib", 28 | srcs = ["eval_lib.py"], 29 | srcs_version = "PY2AND3", 30 | deps = [":results_py_pb2"], 31 | ) 32 | 33 | py_test( 34 | name = "eval_lib_test", 35 | srcs = ["eval_lib_test.py"], 36 | srcs_version = "PY2AND3", 37 | deps = [ 38 | ":eval_lib", 39 | ":results_py_pb2", 40 | ], 41 | ) 42 | 43 | py_library( 44 | name = "run_pipeline_lib", 45 | srcs = ["run_pipeline_lib.py"], 46 | srcs_version = "PY2AND3", 47 | deps = [ 48 | ":eval_lib", 49 | ":results_py_pb2", 50 | requirement("apache_beam"), 51 | requirement("pyarrow"), 52 | requirement("numpy"), 53 | requirement("fastavro"), 54 | requirement("google-api-core"), 55 | requirement("google-apitools"), 56 | requirement("google-cloud-storage"), 57 | "//common:gcsutil", 58 | ], 59 | ) 60 | 61 | py_test( 62 | name = "run_pipeline_lib_test", 63 | srcs = ["run_pipeline_lib_test.py"], 64 | data = ["//eval/testdata:expected_results"], 65 | deps = [ 66 | ":results_py_pb2", 67 | ":run_pipeline_lib", 68 | requirement("apache_beam"), 69 | requirement("pyarrow"), 70 | requirement("numpy"), 71 | requirement("fastavro"), 72 | requirement("google-api-core"), 73 | requirement("google-apitools"), 74 | "//common:beam_testutil", 75 | "//common:testutil", 76 | ], 77 | ) 78 | 79 | py_binary( 80 | name = "run_pipeline", 81 | srcs = ["run_pipeline.py"], 82 | python_version = "PY3", 83 | deps = [ 84 | ":run_pipeline_lib", 85 | requirement("google-cloud-storage"), 86 | ], 87 | ) 88 | 89 | proto_library( 90 | name = "results_proto", 91 | srcs = ["results.proto"], 92 | ) 93 | 94 | py_proto_library( 95 | name = "results_py_pb2", 96 | srcs = ["results.proto"], 97 | ) 98 | -------------------------------------------------------------------------------- /deid_app/frontend/src/app/app.module.ts: -------------------------------------------------------------------------------- 1 | import {HTTP_INTERCEPTORS, HttpClientModule} from '@angular/common/http'; 2 | import {NgModule} from '@angular/core'; 3 | import {FormsModule, ReactiveFormsModule} from '@angular/forms'; 4 | import {BrowserModule} from '@angular/platform-browser'; 5 | import {BrowserAnimationsModule} from '@angular/platform-browser/animations'; 6 | 7 | import {BigQueryNewTable} from '../common/bigquery-new-table'; 8 | import {BigQueryTable} from '../common/bigquery-table'; 9 | import {SubmitComponent} from '../common/submit_component'; 10 | import {CompareDataComponent} from '../deidentify/compare-data/compare-data.component'; 11 | import {CsvUploadComponent} from '../deidentify/csv-upload/csv-upload.component'; 12 | import {CurrentJobsComponent} from '../deidentify/current-jobs/current-jobs.component'; 13 | import {DeidentifyComponent} from '../deidentify/deidentify.component'; 14 | import {RunDeidentifyComponent} from '../deidentify/run-deidentify/run-deidentify.component'; 15 | import {UploadNotesComponent} from '../deidentify/upload-notes/upload-notes.component'; 16 | import {DlpDemoComponent} from '../dlp-demo/dlp-demo.component'; 17 | import {DlpImageDemoComponent} from '../dlp-demo/dlp-image-demo/dlp-image-demo.component'; 18 | import {DlpTextDemoComponent} from '../dlp-demo/dlp-text-demo/dlp-text-demo.component'; 19 | import {EvalPipelineComponent} from '../evaluate/eval-pipeline/eval-pipeline.component'; 20 | import {EvalStatsComponent} from '../evaluate/eval-stats/eval-stats.component'; 21 | import {EvaluateComponent} from '../evaluate/evaluate.component'; 22 | import {DlpDemoService} from '../services/dlp-demo.service'; 23 | import {RequestInterceptor} from '../services/http_interceptor'; 24 | import {ErrorHandler} from '../services/error_handler'; 25 | 26 | import {AppComponent} from './app.component'; 27 | import {AppMaterialModule} from './material.module'; 28 | import {RoutingModule} from './routing.module'; 29 | 30 | @NgModule({ 31 | declarations: [ 32 | AppComponent, 33 | DeidentifyComponent, 34 | DlpDemoComponent, 35 | EvaluateComponent, 36 | UploadNotesComponent, 37 | RunDeidentifyComponent, 38 | BigQueryTable, 39 | BigQueryNewTable, 40 | CompareDataComponent, 41 | CurrentJobsComponent, 42 | DlpImageDemoComponent, 43 | DlpTextDemoComponent, 44 | EvalPipelineComponent, 45 | EvalStatsComponent, 46 | CsvUploadComponent, 47 | SubmitComponent, 48 | ], 49 | imports: [ 50 | BrowserModule, 51 | BrowserAnimationsModule, 52 | AppMaterialModule, 53 | RoutingModule, 54 | HttpClientModule, 55 | FormsModule, 56 | ReactiveFormsModule, 57 | ], 58 | providers: [ 59 | ErrorHandler, 60 | {provide: HTTP_INTERCEPTORS, useClass: RequestInterceptor, multi: true}, 61 | DlpDemoService, 62 | ], 63 | bootstrap: [AppComponent] 64 | }) 65 | export class AppModule { 66 | } 67 | -------------------------------------------------------------------------------- /common/gcsutil.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """Get files from a GCS bucket matching a pattern.""" 17 | 18 | from __future__ import absolute_import 19 | 20 | import logging 21 | import re 22 | 23 | 24 | class GcsFileName(object): 25 | """Holds a gs:// filename with bucket and blob components.""" 26 | 27 | def __init__(self, bucket, blob): 28 | self.bucket = bucket 29 | self.blob = blob 30 | 31 | @classmethod 32 | def from_path(cls, path): 33 | # Split the input path to get the bucket name and path within the bucket. 34 | re_match = re.match(r'gs://([\w-]+)/(.*)', path) 35 | if not re_match or len(re_match.groups()) != 2: 36 | err = ('Failed to parse input path: "{0}". Expected: ' 37 | 'gs://bucket-name/path/to/file'.format(path)) 38 | logging.error(err) 39 | raise Exception(err) 40 | return cls(re_match.group(1), re_match.group(2)) 41 | 42 | def string(self): 43 | return 'gs://{0}/{1}'.format(self.bucket, self.blob) 44 | 45 | def __str__(self): 46 | return self.string() 47 | 48 | 49 | def find_files(pattern, storage_client): 50 | """Find files on GCS matching the given pattern.""" 51 | f = GcsFileName.from_path(pattern) 52 | bucket_name = f.bucket 53 | file_pattern = f.blob 54 | 55 | # The storage client doesn't take a pattern, just a prefix, so we presume here 56 | # that the only special/regex-like characters used are '?' and '*', and take 57 | # the longest prefix that doesn't contain either of those. 58 | file_prefix = file_pattern 59 | re_result = re.search(r'(.*?)[\?|\*]', file_pattern) 60 | if re_result: 61 | file_prefix = re_result.group(1) 62 | 63 | # Convert file_pattern to a regex by escaping the string, explicitly 64 | # converting the characters we want to treat specially (* and ?), and 65 | # appending '\Z' to the end of the pattern so we match only the full string. 66 | file_pattern_as_regex = ( 67 | re.escape(file_pattern).replace('\\*', '.*').replace('\\?', '.') + r'\Z') 68 | 69 | bucket = storage_client.lookup_bucket(bucket_name) 70 | if not bucket: 71 | raise Exception('Could not find bucket: "{}"'.format(bucket_name)) 72 | for blob in bucket.list_blobs(prefix=file_prefix): 73 | if not re.match(file_pattern_as_regex, blob.name): 74 | continue 75 | yield GcsFileName(bucket_name, blob.name) 76 | -------------------------------------------------------------------------------- /dlp/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@my_deps//:requirements.bzl", "requirement") 16 | 17 | package( 18 | default_visibility = ["//visibility:public"], 19 | ) 20 | 21 | # Build rules for DLP API DeID tools. 22 | 23 | licenses(["notice"]) # Apache License 2.0 24 | 25 | py_library( 26 | name = "run_deid_lib", 27 | srcs = ["run_deid_lib.py"], 28 | srcs_version = "PY2AND3", 29 | deps = [ 30 | requirement("apache_beam"), 31 | requirement("pyarrow"), 32 | requirement("numpy"), 33 | requirement("fastavro"), 34 | requirement("google-api-core"), 35 | requirement("google-apitools"), 36 | requirement("google-auth-httplib2"), # buildcleaner: keep 37 | "//common:mae", 38 | "//common:unicodecsv", 39 | ], 40 | ) 41 | 42 | py_test( 43 | name = "run_deid_lib_test", 44 | srcs = ["run_deid_lib_test.py"], 45 | data = [ 46 | ":sample_deid_config.json", 47 | "//dlp/mae_testdata:sample.dtd", 48 | "//dlp/mae_testdata:sample.xml", 49 | "//dlp/testdata:batch_config.json", 50 | "//dlp/testdata:batch_request.json", 51 | "//dlp/testdata:config.json", 52 | "//dlp/testdata:input.csv", 53 | "//dlp/testdata:multi_column_config.json", 54 | "//dlp/testdata:multi_column_request.json", 55 | "//dlp/testdata:request.json", 56 | ], 57 | deps = [ 58 | ":run_deid_lib", 59 | "//common:beam_testutil", 60 | "//common:testutil", 61 | requirement("httplib2"), 62 | ], 63 | ) 64 | 65 | py_binary( 66 | name = "run_deid", 67 | srcs = ["run_deid.py"], 68 | python_version = "PY3", 69 | srcs_version = "PY2AND3", 70 | deps = [ 71 | ":run_deid_lib", 72 | requirement("google-cloud-storage"), 73 | ], 74 | ) 75 | 76 | py_binary( 77 | name = "redact_image", 78 | srcs = ["redact_image.py"], 79 | python_version = "PY3", 80 | srcs_version = "PY2AND3", 81 | deps = [ 82 | ":run_deid_lib", 83 | requirement("google-cloud-core"), 84 | ], 85 | ) 86 | 87 | py_binary( 88 | name = "inspect_dicom", 89 | srcs = ["inspect_dicom.py"], 90 | python_version = "PY3", 91 | srcs_version = "PY2AND3", 92 | deps = [ 93 | ":run_deid_lib", 94 | requirement("google-cloud-core"), 95 | ], 96 | ) 97 | -------------------------------------------------------------------------------- /deid_app/frontend/src/deidentify/csv-upload/csv-upload.component.ts: -------------------------------------------------------------------------------- 1 | import {Component, OnDestroy, OnInit, ViewChild} from '@angular/core'; 2 | import {FormControl, FormGroup, Validators} from '@angular/forms'; 3 | import {MatSnackBar} from '@angular/material/snack-bar'; 4 | import {Observable, Subscription} from 'rxjs'; 5 | import {finalize} from 'rxjs/operators'; 6 | 7 | import {BigQueryNewTable} from '../../common/bigquery-new-table'; 8 | import {DlpDemoService} from '../../services/dlp-demo.service'; 9 | 10 | /** 11 | * Allows the user to upload a csv file to BigQuery table. 12 | */ 13 | @Component({ 14 | selector: 'app-csv-upload', 15 | templateUrl: './csv-upload.component.html', 16 | styleUrls: [ 17 | './csv-upload.component.css', 18 | '../deidentify.component.css', 19 | ] 20 | }) 21 | export class CsvUploadComponent implements OnInit, OnDestroy { 22 | private readonly subscriptions = new Subscription(); 23 | 24 | @ViewChild('submitComponent', {static: true}) submitComp; 25 | readonly submitPlaceholder = 'Upload'; 26 | 27 | csvForm = new FormGroup({ 28 | project: new FormControl(''), 29 | bqTable: BigQueryNewTable.buildEntry(), 30 | file: new FormControl('', Validators.required), 31 | }); 32 | 33 | constructor( 34 | private dlpDemoService: DlpDemoService, 35 | public snackBar: MatSnackBar, 36 | ) {} 37 | 38 | get selectedCsv(): FormControl { 39 | return this.csvForm.get('file') as FormControl; 40 | } 41 | 42 | get bqTable(): FormGroup { 43 | return this.csvForm.get('bqTable') as FormGroup; 44 | } 45 | 46 | ngOnInit() { 47 | /* Get the project name from the server. */ 48 | this.subscriptions.add(this.dlpDemoService.project.subscribe( 49 | project => this.csvForm.patchValue({project}))); 50 | } 51 | 52 | ngOnDestroy() { 53 | this.subscriptions.unsubscribe(); 54 | } 55 | 56 | /** 57 | * Reads and uploads a CSV from the user to the backend. The file is stored 58 | * in BigQuery. 59 | * @param csvFiles the list of files that the user has selected. Only the 60 | * first file within the list will be processed. 61 | */ 62 | selectCsv(csvFiles: FileList): void { 63 | if (csvFiles.length < 1) { 64 | return; 65 | } 66 | const selectedCsv = csvFiles.item(0); 67 | this.selectedCsv.setValue(selectedCsv); 68 | } 69 | 70 | submit() { 71 | const formData = new FormData(); 72 | const formVal = this.csvForm.value; 73 | formData.append('dataset', formVal.bqTable.dataset); 74 | formData.append('table', formVal.bqTable.table); 75 | formData.append('csv', formVal.file, formVal.file.name); 76 | this.submitComp.waiting = true; 77 | this.dlpDemoService.uploadCsv(formData) 78 | .pipe(finalize(() => this.submitComp.waiting = false)) 79 | .subscribe(res => { 80 | const message = 'Upload successful!'; 81 | this.snackBar.open(message, 'Dismiss', { 82 | duration: 3000, 83 | }); 84 | }); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /physionet/physionet_to_mae_lib_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for physionet.physionet_to_mae_lib.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import unittest 20 | 21 | from common import testutil 22 | from physionet import physionet_to_mae_lib 23 | from mock import Mock 24 | from mock import patch 25 | 26 | 27 | class PhysionetToMaeLibTest(unittest.TestCase): 28 | 29 | @patch('apache_beam.io.filesystems.FileSystems') 30 | @patch('google.cloud.storage.Client') 31 | def testRunPipeline(self, fake_client_fn, mock_filesystems_fn): 32 | result1 = Mock(metadata_list=[Mock(path='bucketname/file-00000-of-00001')]) 33 | mock_filesystems_fn.match.return_value = [result1] 34 | mock_filesystems_fn.open = testutil.fake_open 35 | fake_client_fn.return_value = testutil.FakeStorageClient() 36 | testutil.set_gcs_file('bucketname/file-00000-of-00001.phi', 37 | """ 38 | Patient 1\tNote 1 39 | 17\t17\t20 40 | Patient 1\tNote 2 41 | 0\t0\t3 42 | 8\t8\t16""") 43 | testutil.set_gcs_file('bucketname/file-00000-of-00001.text', 44 | """ 45 | START_OF_RECORD=1||||1|||| 46 | mundane text and PHI 47 | ||||END_OF_RECORD 48 | START_OF_RECORD=1||||2|||| 49 | PHI and MORE PHI as well 50 | ||||END_OF_RECORD""") 51 | 52 | physionet_to_mae_lib.run_pipeline( 53 | 'gs://bucketname/file-?????-of-?????', 'gs://bucketname/output/', 54 | 'InspectPhiTask', 'project-id', pipeline_args=None) 55 | 56 | expected_file1 = """ 57 | 58 | 59 | 60 | 61 | 62 | """ 63 | expected_file2 = """ 64 | 65 | 66 | 67 | 68 | 69 | 70 | """ 71 | self.assertEqual(expected_file1, 72 | testutil.get_gcs_file('bucketname/output/1-1.xml')) 73 | self.assertEqual(expected_file2, 74 | testutil.get_gcs_file('bucketname/output/1-2.xml')) 75 | 76 | if __name__ == '__main__': 77 | unittest.main() 78 | -------------------------------------------------------------------------------- /dlp/run_deid.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Run Google Data Loss Prevention API DeID. 16 | 17 | All input/output files should be on Google Cloud Storage. 18 | 19 | Requires Apache Beam client and Google Python API Client: 20 | pip install --upgrade apache_beam 21 | pip install --upgrade google-api-python-client 22 | """ 23 | 24 | from __future__ import absolute_import 25 | 26 | import argparse 27 | from datetime import datetime 28 | import logging 29 | import os 30 | import sys 31 | 32 | from dlp import run_deid_lib 33 | import google.auth 34 | from google.cloud import bigquery 35 | from google.cloud import storage 36 | 37 | 38 | def main(): 39 | logging.getLogger().setLevel(logging.INFO) 40 | 41 | parser = argparse.ArgumentParser( 42 | description='Run Data Loss Prevention (DLP) DeID on Google Cloud.') 43 | run_deid_lib.add_all_args(parser) 44 | args, pipeline_args = parser.parse_known_args(sys.argv[1:]) 45 | 46 | var = 'GOOGLE_APPLICATION_CREDENTIALS' 47 | if var not in os.environ or not os.environ[var]: 48 | raise Exception('You must specify service account credentials in the ' 49 | 'GOOGLE_APPLICATION_CREDENTIALS environment variable.') 50 | _, default_project = google.auth.default() 51 | 52 | # Parse --project and re-add it to the pipeline args, swapping it out for the 53 | # default if it's not set. 54 | project = args.project 55 | if not project: 56 | project = default_project 57 | pipeline_args += ['--project', project] 58 | 59 | bq_client = bigquery.Client(project) 60 | bq_config_fn = None 61 | if hasattr(bigquery.job, 'QueryJobConfig'): 62 | bq_config_fn = bigquery.job.QueryJobConfig 63 | 64 | if not args.deid_config_file: 65 | raise Exception('Must provide DeID Config.') 66 | deid_config_json = run_deid_lib.parse_config_file(args.deid_config_file) 67 | timestamp = datetime.utcnow() 68 | 69 | errors = run_deid_lib.run_pipeline( 70 | args.input_query, args.input_table, args.deid_table, args.findings_table, 71 | args.mae_dir, args.mae_table, deid_config_json, args.mae_task_name, 72 | project, storage.Client, bq_client, bq_config_fn, args.dlp_api_name, 73 | args.batch_size, args.dtd_dir, args.input_csv, args.output_csv, timestamp, 74 | pipeline_args) 75 | 76 | if errors: 77 | logging.error(errors) 78 | return 1 79 | 80 | logging.info('Ran DLP API DeID.') 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /mist/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@my_deps//:requirements.bzl", "requirement") 16 | 17 | package( 18 | default_visibility = ["//visibility:public"], 19 | ) 20 | 21 | # Build rules for MIST-on-GCP tools. 22 | 23 | licenses(["notice"]) # Apache License 2.0 24 | 25 | # Rules for running MIST on Docker on GCP. 26 | py_library( 27 | name = "run_mist_lib", 28 | srcs = ["run_mist_lib.py"], 29 | srcs_version = "PY2AND3", 30 | deps = [ 31 | "//common:run_docker", 32 | ], 33 | ) 34 | 35 | py_binary( 36 | name = "run_mist", 37 | srcs = ["run_mist.py"], 38 | python_version = "PY3", 39 | srcs_version = "PY2AND3", 40 | deps = [ 41 | ":run_mist_lib", 42 | requirement("google-cloud-storage"), 43 | ], 44 | ) 45 | 46 | # BigQuery -> GCS rules. 47 | py_library( 48 | name = "bigquery_to_gcs_lib", 49 | srcs = ["bigquery_to_gcs_lib.py"], 50 | deps = [ 51 | requirement("apache_beam"), 52 | requirement("pyarrow"), 53 | requirement("numpy"), 54 | requirement("fastavro"), 55 | requirement("google-api-core"), 56 | requirement("google-apitools"), 57 | ], 58 | ) 59 | 60 | py_binary( 61 | name = "bigquery_to_gcs", 62 | srcs = ["bigquery_to_gcs.py"], 63 | python_version = "PY2", 64 | deps = [":bigquery_to_gcs_lib"], 65 | ) 66 | 67 | py_test( 68 | name = "bigquery_to_gcs_lib_test", 69 | srcs = ["bigquery_to_gcs_lib_test.py"], 70 | python_version = "PY2", 71 | deps = [":bigquery_to_gcs_lib"], 72 | ) 73 | 74 | # GCS -> BigQuery rules. 75 | py_library( 76 | name = "gcs_to_bigquery_lib", 77 | srcs = ["gcs_to_bigquery_lib.py"], 78 | deps = [ 79 | requirement("apache_beam"), 80 | requirement("pyarrow"), 81 | requirement("numpy"), 82 | requirement("fastavro"), 83 | requirement("google-api-core"), 84 | requirement("google-apitools"), 85 | "//physionet:files_to_physionet_records", 86 | ], 87 | ) 88 | 89 | py_binary( 90 | name = "gcs_to_bigquery", 91 | srcs = ["gcs_to_bigquery.py"], 92 | python_version = "PY2", 93 | deps = [":gcs_to_bigquery_lib"], 94 | ) 95 | 96 | py_test( 97 | name = "gcs_to_bigquery_lib_test", 98 | srcs = ["gcs_to_bigquery_lib_test.py"], 99 | python_version = "PY2", 100 | deps = [":gcs_to_bigquery_lib"], 101 | ) 102 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval-stats/eval-stats.component.html: -------------------------------------------------------------------------------- 1 |
2 |

Statistics For Evaluation Jobs

3 | 4 | 8 | 10 | 12 | ID: {{job.id}} - Name: {{job.name}} 13 | 14 | 15 | 16 | 17 |
18 |

Stats Table

19 | 20 | 23 | 24 | 25 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 |
Info Type {{stat.infoType}} Recall {{(stat.recall | percent:'1.0-2') || 'undefined'}} Precision {{(stat.precision | percent:'1.0-2') || 'undefined'}} FScore {{(stat.fScore | percent:'1.0-2') || 'undefined'}} True Positives {{stat.truePositives}} False Positives {{stat.falsePositives}} False Negatives {{stat.falseNegatives}}
74 |
75 |
76 | -------------------------------------------------------------------------------- /deid_app/frontend/src/evaluate/eval-stats/eval-stats.component.ts: -------------------------------------------------------------------------------- 1 | import {Component, OnDestroy, OnInit} from '@angular/core'; 2 | import {FormControl, FormGroup} from '@angular/forms'; 3 | import {MatTableDataSource} from '@angular/material/table'; 4 | import {combineLatest, Observable, Subscription} from 'rxjs'; 5 | import {filter, map} from 'rxjs/operators'; 6 | import {startWith, switchMap} from 'rxjs/operators'; 7 | 8 | import {DlpDemoService} from '../../services/dlp-demo.service'; 9 | import {EvalJob, EvalStats} from '../eval_job'; 10 | 11 | /** 12 | * This component displays the results generated in the statistics table after 13 | * running the Evaluation pipeline. 14 | */ 15 | @Component({ 16 | selector: 'app-eval-stats', 17 | templateUrl: './eval-stats.component.html', 18 | styleUrls: ['./eval-stats.component.css', '../evaluate.component.css'] 19 | }) 20 | export class EvalStatsComponent implements OnInit, OnDestroy { 21 | private readonly subscriptions = new Subscription(); 22 | 23 | jobsFilter: Observable; 24 | statsJobForm = new FormControl(''); 25 | displayTable = false; 26 | 27 | private statsResults: Observable; 28 | dataSource = new MatTableDataSource(); 29 | displayedColumns: string[] = [ 30 | 'infoType', 'recall', 'precision', 'fScore', 'truePositives', 31 | 'falsePositives', 'falseNegatives' 32 | ]; 33 | 34 | constructor(private dlpDemoService: DlpDemoService) {} 35 | 36 | ngOnInit() { 37 | /* Get an Observable of the user's input. */ 38 | const jobUserInput = 39 | this.statsJobForm.valueChanges.pipe(startWith('')); 40 | 41 | /* Combine the user's input and the evalJobs to generate filter value. */ 42 | this.jobsFilter = 43 | combineLatest(this.dlpDemoService.evalJobs, jobUserInput) 44 | .pipe(map(([jobs, input]) => { 45 | let filterValue: string; 46 | if (typeof input === 'string') { 47 | filterValue = input.toLowerCase(); 48 | } else { 49 | filterValue = input.name.toLowerCase(); 50 | } 51 | return jobs.filter( 52 | entry => entry.name.toLowerCase().includes(filterValue)); 53 | })); 54 | 55 | /* Make an api call to retrieve new job stats */ 56 | this.statsResults = jobUserInput.pipe( 57 | filter(input => typeof input !== 'string'), 58 | switchMap((job: EvalJob) => { 59 | this.displayTable = false; 60 | return this.dlpDemoService.getEvalStats(job.id); 61 | })); 62 | 63 | /* hookup the table to the stats results */ 64 | this.subscriptions.add(this.statsResults.subscribe((stats: EvalStats[]) => { 65 | this.displayTable = true; 66 | this.dataSource.data = stats; 67 | })); 68 | } 69 | 70 | ngOnDestroy() { 71 | this.subscriptions.unsubscribe(); 72 | } 73 | 74 | displayJob(job: EvalJob|undefined): string { 75 | if (job) { 76 | return job.name; 77 | } 78 | return ''; 79 | } 80 | 81 | setFilterTable(filterValue: string): void { 82 | this.dataSource.filter = filterValue.trim().toLowerCase(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /common/testutil.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Utilities for unit tests.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import posixpath 20 | 21 | _fake_gcs = {} 22 | 23 | 24 | class _FakeReader(object): 25 | 26 | def __init__(self, contents): 27 | self.contents = contents.split('\n') 28 | for i in range(len(self.contents)-1): 29 | self.contents[i] += '\n' 30 | 31 | def __iter__(self): 32 | return self.contents.__iter__() 33 | 34 | 35 | def fake_open(filename): 36 | gs_prefix = 'gs://' 37 | if filename.startswith(gs_prefix): 38 | filename = filename[len(gs_prefix):] 39 | return _FakeReader(_fake_gcs[filename]) 40 | 41 | 42 | def set_gcs_file(filename, contents): 43 | _fake_gcs[filename] = contents 44 | 45 | 46 | def append_to_gcs_file(filename, contents): 47 | if filename not in _fake_gcs: 48 | return set_gcs_file(filename, contents) 49 | _fake_gcs[filename] += contents 50 | 51 | 52 | def get_gcs_file(filename): 53 | return _fake_gcs[filename] 54 | 55 | 56 | class _FakeBlob(object): 57 | 58 | def __init__(self, bucket_name, file_name): 59 | self._file_name = posixpath.join(bucket_name, file_name) 60 | self.name = file_name 61 | 62 | def upload_from_string(self, contents): 63 | _fake_gcs[self._file_name] = contents 64 | 65 | def download_as_string(self): 66 | return _fake_gcs[self._file_name] 67 | 68 | 69 | class _FakeBucket(object): 70 | """Fake GCS bucket object.""" 71 | 72 | def __init__(self, bucket_name): 73 | self._bucket_name = bucket_name 74 | 75 | def blob(self, file_name): 76 | return _FakeBlob(self._bucket_name, file_name) 77 | 78 | def list_blobs(self, prefix): 79 | blobs = [] 80 | for name in _fake_gcs: 81 | full_prefix = posixpath.join(self._bucket_name, prefix) 82 | if name.startswith(full_prefix): 83 | blob_name = name[len(self._bucket_name)+1:] 84 | blobs.append(_FakeBlob(self._bucket_name, blob_name)) 85 | return blobs 86 | 87 | def get_blob(self, name): 88 | if posixpath.join(self._bucket_name, name) not in _fake_gcs: 89 | raise Exception('blob {0} not found in bucket {1}', 90 | name, self._bucket_name) 91 | return _FakeBlob(self._bucket_name, name) 92 | 93 | 94 | class FakeStorageClient(object): 95 | 96 | def get_bucket(self, bucket_name): 97 | return _FakeBucket(bucket_name) 98 | 99 | def lookup_bucket(self, bucket_name): 100 | return _FakeBucket(bucket_name) 101 | 102 | -------------------------------------------------------------------------------- /mist/gcs_to_bigquery_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Beam pipeline that pushes PhysioNet records to BigQuery. 16 | 17 | Requires Apache Beam client: 18 | pip install --upgrade apache_beam 19 | """ 20 | 21 | from __future__ import absolute_import 22 | 23 | import logging 24 | 25 | import apache_beam as beam 26 | from apache_beam.options.pipeline_options import PipelineOptions 27 | 28 | from physionet import files_to_physionet_records as f2pn 29 | 30 | 31 | def map_file_to_records(file_path): 32 | """Parse MIST records from the given file.""" 33 | reader = beam.io.filesystems.FileSystems.open(file_path) 34 | text = ''.join([line for line in reader]) 35 | for record in text.split('||||START_OF_RECORD||||'): 36 | if '\n' not in record: 37 | continue 38 | yield record 39 | 40 | 41 | def map_to_bq_inputs(text): 42 | """Parse the PhysioNet text and get patient_id, record_number, and note.""" 43 | # There is no offical "MIST Format", so we use: 44 | # ||||START_OF_RECORD||||\n 45 | patient_id, note = text.split('\n', 1) 46 | if not patient_id or not note: 47 | return 48 | output = { 49 | 'patient_id': int(patient_id), 50 | 'note': note.strip() 51 | } 52 | return output 53 | 54 | 55 | def run_pipeline(input_pattern, output_table, pipeline_args): 56 | """Read the records from GCS and write them to BigQuery.""" 57 | p = beam.Pipeline(options=PipelineOptions(pipeline_args)) 58 | _ = (p | 59 | 'match_files' >> beam.Create(f2pn.match_files(input_pattern)) | 60 | 'to_records' >> beam.FlatMap(map_file_to_records) | 61 | 'map_to_bq_inputs' >> beam.Map(map_to_bq_inputs) | 62 | 'write' >> beam.io.Write(beam.io.BigQuerySink( 63 | output_table, 64 | schema='patient_id:INTEGER, note:STRING', 65 | write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))) 66 | result = p.run().wait_until_finish() 67 | logging.info('GCS to BigQuery result: %s', result) 68 | 69 | 70 | def add_args(parser): 71 | """Add command-line arguments to the program.""" 72 | parser.add_argument('--output_table', type=str, required=True, 73 | help='BigQuery table to store output data.') 74 | 75 | 76 | # Add arguments that won't be explicitly specified when this module is used as 77 | # part of a larger program. These args are only needed when this is run as a 78 | # stand-alone tool. 79 | def add_all_args(parser): 80 | """Add command-line arguments to the program.""" 81 | parser.add_argument('--input_pattern', type=str, required=True, 82 | help='GCS pattern to read input from.') 83 | add_args(parser) 84 | -------------------------------------------------------------------------------- /offline_tools/redactor/examples/tag_remover/src/main/java/com/google/cloud/healthcare/deid/remover/TagRemover.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.healthcare.deid.remover; 18 | 19 | import com.google.cloud.healthcare.deid.redactor.DicomRedactor; 20 | import com.google.cloud.healthcare.deid.redactor.protos.DicomConfigProtos.DicomConfig; 21 | import java.io.BufferedInputStream; 22 | import java.io.BufferedOutputStream; 23 | import java.io.File; 24 | import java.io.FileInputStream; 25 | import java.io.FileOutputStream; 26 | import java.io.InputStream; 27 | import java.io.OutputStream; 28 | import java.util.Arrays; 29 | import java.util.List; 30 | import org.apache.commons.cli.CommandLine; 31 | import org.apache.commons.cli.CommandLineParser; 32 | import org.apache.commons.cli.DefaultParser; 33 | import org.apache.commons.cli.HelpFormatter; 34 | import org.apache.commons.cli.Option; 35 | import org.apache.commons.cli.Options; 36 | import org.apache.commons.cli.ParseException; 37 | 38 | /** 39 | * TagRemover is a command line utility that removes tags from DICOM files. 40 | */ 41 | public final class TagRemover { 42 | 43 | public static void main(String[] args) throws Exception { 44 | Options options = new Options(); 45 | Option input = new Option("i", "input", true, "input DICOM file path"); 46 | input.setRequired(true); 47 | options.addOption(input); 48 | Option output = new Option("o", "output", true, "output DICOM file path"); 49 | output.setRequired(true); 50 | options.addOption(output); 51 | Option tags = new Option("t", "tags", true, "DICOM tags to redact"); 52 | tags.setRequired(true); 53 | tags.setArgs(Option.UNLIMITED_VALUES); 54 | options.addOption(tags); 55 | 56 | CommandLineParser parser = new DefaultParser(); 57 | HelpFormatter formatter = new HelpFormatter(); 58 | CommandLine cmd; 59 | 60 | try { 61 | cmd = parser.parse(options, args); 62 | } catch (ParseException e) { 63 | System.out.println(e.getMessage()); 64 | formatter.printHelp("tagremove", options); 65 | return; 66 | } 67 | 68 | InputStream is = 69 | new BufferedInputStream(new FileInputStream(new File(cmd.getOptionValue("input")))); 70 | OutputStream os = 71 | new BufferedOutputStream(new FileOutputStream(new File(cmd.getOptionValue("output")))); 72 | 73 | List tagList = Arrays.asList(cmd.getOptionValues("tags")); 74 | DicomConfig config = DicomConfig.newBuilder().setRemoveList( 75 | DicomConfig.TagFilterList.newBuilder().addAllTags(tagList)).build(); 76 | 77 | DicomRedactor redactor = new DicomRedactor(config); 78 | redactor.redact(is, os); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /common/beam_testutil.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Utilities for unit tests.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import collections 20 | import sys 21 | 22 | import apache_beam as beam 23 | from apache_beam.coders.coders import ToStringCoder 24 | from apache_beam.io import iobase 25 | from common import testutil 26 | 27 | _fake_bq_db = collections.defaultdict(list) 28 | 29 | 30 | class FakeSource(iobase.BoundedSource): 31 | 32 | def __init__(self): 33 | self._records = [] 34 | 35 | def get_range_tracker(self, unused_a, unused_b): 36 | return None 37 | 38 | def read(self, unused_range_tracker): 39 | for record in self._records: 40 | yield record 41 | 42 | 43 | class DummyWriteTransform(beam.PTransform): 44 | """A transform that replaces iobase.WriteToText in tests.""" 45 | 46 | def __init__(self, filename=None): 47 | gs_prefix = 'gs://' 48 | if filename.startswith(gs_prefix): 49 | filename = filename[len(gs_prefix):] 50 | self.filename = filename 51 | 52 | class WriteDoFn(beam.DoFn): 53 | """DoFn to write to fake GCS.""" 54 | 55 | def __init__(self, filename): 56 | self.filename = filename 57 | self.file_obj = None 58 | self.coder = ToStringCoder() 59 | 60 | def start_bundle(self): 61 | pass 62 | 63 | def process(self, element): 64 | e = element 65 | if sys.version < '3': 66 | e = self.coder.encode(element) 67 | testutil.append_to_gcs_file(self.filename, e + '\n') 68 | 69 | def finish_bundle(self): 70 | pass 71 | 72 | def expand(self, pcoll): 73 | return pcoll | 'DummyWriteForTesting' >> beam.ParDo( 74 | DummyWriteTransform.WriteDoFn(self.filename)) 75 | 76 | 77 | class _FakeBqWriter(iobase.Writer): 78 | 79 | def __init__(self, table_name): 80 | self._table_name = table_name 81 | _fake_bq_db[table_name] = [] 82 | 83 | def write(self, value): 84 | _fake_bq_db[self._table_name].append(value) 85 | 86 | def close(self): 87 | pass 88 | 89 | 90 | class FakeSink(iobase.Sink): 91 | """Fake BigQuery sink object.""" 92 | 93 | def __init__(self, table_name): 94 | self._writer = _FakeBqWriter(table_name) 95 | 96 | def initialize_write(self): 97 | pass 98 | 99 | def open_writer(self, unused_init_result, unused_uid): 100 | return self._writer 101 | 102 | def pre_finalize(self, unused_init_result, unused_writer_results): 103 | pass 104 | 105 | def finalize_write(self, unused_access_token, unused_table_names, 106 | unused_pre_finalize_results=None): 107 | pass 108 | 109 | 110 | def get_table(table_name): 111 | return _fake_bq_db[table_name] 112 | -------------------------------------------------------------------------------- /physionet/physionet_to_mae_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Beam pipeline that converts PhysioNet records to MAE format. 16 | 17 | Requires Apache Beam client: 18 | pip install --upgrade apache_beam 19 | """ 20 | 21 | from __future__ import absolute_import 22 | 23 | import logging 24 | import posixpath 25 | 26 | import apache_beam as beam 27 | from apache_beam.options.pipeline_options import PipelineOptions 28 | 29 | from common import gcsutil 30 | from common import mae 31 | from physionet import files_to_physionet_records as f2pn 32 | from google.cloud import storage 33 | 34 | 35 | def write_mae(mae_result, project, mae_dir): 36 | """Write the MAE results to GCS.""" 37 | storage_client = storage.Client(project) 38 | filename = '{}.xml'.format(mae_result.record_id) 39 | gcs_name = gcsutil.GcsFileName.from_path(mae_dir) 40 | bucket = storage_client.get_bucket(gcs_name.bucket) 41 | blob = bucket.blob(posixpath.join(gcs_name.blob, filename)) 42 | blob.upload_from_string(mae_result.mae_xml) 43 | 44 | 45 | def run_pipeline(input_pattern, output_dir, mae_task_name, project, 46 | pipeline_args): 47 | """Read the physionet records from GCS and write them out as MAE.""" 48 | p = beam.Pipeline(options=PipelineOptions(pipeline_args)) 49 | _ = (p | 50 | 'match_files' >> beam.Create(f2pn.match_files(input_pattern)) | 51 | 'to_records' >> beam.FlatMap(f2pn.map_phi_to_findings) | 52 | 'generate_mae' >> beam.Map(mae.generate_mae, mae_task_name, {}, 53 | ['patient_id', 'record_number']) | 54 | 'write_mae' >> beam.Map(write_mae, project, output_dir) 55 | ) 56 | result = p.run().wait_until_finish() 57 | logging.info('GCS to BigQuery result: %s', result) 58 | 59 | 60 | def add_args(parser, include_project=True): 61 | """Add command-line arguments to the program.""" 62 | parser.add_argument('--mae_output_dir', type=str, required=True, 63 | help='GCS directory to store output data.') 64 | parser.add_argument('--mae_task_name', type=str, required=False, 65 | help='Task name to use in generated MAE files.', 66 | default='InspectPhiTask') 67 | if include_project: 68 | parser.add_argument('--project', type=str, required=True, 69 | help='GCP project to run as.') 70 | 71 | 72 | # Add arguments that won't be explicitly specified when this module is used as 73 | # part of a larger program. These args are only needed when this is run as a 74 | # stand-alone tool. 75 | def add_all_args(parser): 76 | """Add command-line arguments to the program.""" 77 | parser.add_argument('--input_pattern', type=str, required=True, 78 | help='GCS pattern to read input from.') 79 | add_args(parser) 80 | -------------------------------------------------------------------------------- /deid_app/frontend/tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "rulesDirectory": [ 3 | "node_modules/codelyzer" 4 | ], 5 | "rules": { 6 | "arrow-return-shorthand": true, 7 | "callable-types": true, 8 | "class-name": true, 9 | "comment-format": [ 10 | true, 11 | "check-space" 12 | ], 13 | "curly": true, 14 | "deprecation": { 15 | "severity": "warn" 16 | }, 17 | "eofline": true, 18 | "forin": true, 19 | "import-blacklist": [ 20 | true, 21 | "rxjs/Rx" 22 | ], 23 | "import-spacing": true, 24 | "indent": [ 25 | true, 26 | "spaces" 27 | ], 28 | "interface-over-type-literal": true, 29 | "label-position": true, 30 | "max-line-length": [ 31 | true, 32 | 140 33 | ], 34 | "member-access": false, 35 | "member-ordering": [ 36 | true, 37 | { 38 | "order": [ 39 | "static-field", 40 | "instance-field", 41 | "static-method", 42 | "instance-method" 43 | ] 44 | } 45 | ], 46 | "no-arg": true, 47 | "no-bitwise": true, 48 | "no-console": [ 49 | true, 50 | "debug", 51 | "info", 52 | "time", 53 | "timeEnd", 54 | "trace" 55 | ], 56 | "no-construct": true, 57 | "no-debugger": true, 58 | "no-duplicate-super": true, 59 | "no-empty": false, 60 | "no-empty-interface": true, 61 | "no-eval": true, 62 | "no-inferrable-types": [ 63 | true, 64 | "ignore-params" 65 | ], 66 | "no-misused-new": true, 67 | "no-non-null-assertion": true, 68 | "no-shadowed-variable": true, 69 | "no-string-literal": false, 70 | "no-string-throw": true, 71 | "no-switch-case-fall-through": true, 72 | "no-trailing-whitespace": true, 73 | "no-unnecessary-initializer": true, 74 | "no-unused-expression": true, 75 | "no-use-before-declare": true, 76 | "no-var-keyword": true, 77 | "object-literal-sort-keys": false, 78 | "one-line": [ 79 | true, 80 | "check-open-brace", 81 | "check-catch", 82 | "check-else", 83 | "check-whitespace" 84 | ], 85 | "prefer-const": true, 86 | "quotemark": [ 87 | true, 88 | "single" 89 | ], 90 | "radix": true, 91 | "semicolon": [ 92 | true, 93 | "always" 94 | ], 95 | "triple-equals": [ 96 | true, 97 | "allow-null-check" 98 | ], 99 | "typedef-whitespace": [ 100 | true, 101 | { 102 | "call-signature": "nospace", 103 | "index-signature": "nospace", 104 | "parameter": "nospace", 105 | "property-declaration": "nospace", 106 | "variable-declaration": "nospace" 107 | } 108 | ], 109 | "unified-signatures": true, 110 | "variable-name": false, 111 | "whitespace": [ 112 | true, 113 | "check-branch", 114 | "check-decl", 115 | "check-operator", 116 | "check-separator", 117 | "check-type" 118 | ], 119 | "no-output-on-prefix": true, 120 | "use-input-property-decorator": true, 121 | "use-output-property-decorator": true, 122 | "use-host-property-decorator": true, 123 | "no-input-rename": true, 124 | "no-output-rename": true, 125 | "use-life-cycle-interface": true, 126 | "use-pipe-transform-interface": true, 127 | "component-class-suffix": true, 128 | "directive-class-suffix": true 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /deid_app/frontend/src/polyfills.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * This file includes polyfills needed by Angular and is loaded before the app. 3 | * You can add your own extra polyfills to this file. 4 | * 5 | * This file is divided into 2 sections: 6 | * 1. Browser polyfills. These are applied before loading ZoneJS and are sorted by browsers. 7 | * 2. Application imports. Files imported after ZoneJS that should be loaded before your main 8 | * file. 9 | * 10 | * The current setup is for so-called "evergreen" browsers; the last versions of browsers that 11 | * automatically update themselves. This includes Safari >= 10, Chrome >= 55 (including Opera), 12 | * Edge >= 13 on the desktop, and iOS 10 and Chrome on mobile. 13 | * 14 | * Learn more in https://angular.io/docs/ts/latest/guide/browser-support.html 15 | */ 16 | 17 | /*************************************************************************************************** 18 | * BROWSER POLYFILLS 19 | */ 20 | 21 | /** 22 | * IE9, IE10 and IE11 requires all of the following polyfills 23 | */ 24 | // import 'core-js/es6/symbol'; 25 | // import 'core-js/es6/object'; 26 | // import 'core-js/es6/function'; 27 | // import 'core-js/es6/parse-int'; 28 | // import 'core-js/es6/parse-float'; 29 | // import 'core-js/es6/number'; 30 | // import 'core-js/es6/math'; 31 | // import 'core-js/es6/string'; 32 | // import 'core-js/es6/date'; 33 | // import 'core-js/es6/array'; 34 | // import 'core-js/es6/regexp'; 35 | // import 'core-js/es6/map'; 36 | // import 'core-js/es6/weak-map'; 37 | // import 'core-js/es6/set'; 38 | 39 | /** IE10 and IE11 requires the following for NgClass support on SVG elements */ 40 | // import 'classlist.js'; // Run `npm install --save classlist.js`. 41 | 42 | /** IE10 and IE11 requires the following for the Reflect API. */ 43 | // import 'core-js/es6/reflect'; 44 | 45 | 46 | /** 47 | * Evergreen browsers require these. 48 | */ 49 | // Used for reflect-metadata in JIT. If you use AOT (and only Angular decorators), you can remove. 50 | import 'core-js/es7/reflect'; 51 | 52 | 53 | /** 54 | * Web Animations `@angular/platform-browser/animations` 55 | * Only required if AnimationBuilder is used within the application and using IE/Edge or Safari. 56 | * Standard animation support in Angular DOES NOT require any polyfills (as of Angular 6.0). 57 | */ 58 | // import 'web-animations-js'; // Run `npm install --save web-animations-js`. 59 | 60 | /** 61 | * By default, zone.js will patch all possible macroTask and DomEvents 62 | * user can disable parts of macroTask/DomEvents patch by setting following flags 63 | */ 64 | 65 | // (window as any).__Zone_disable_requestAnimationFrame = true; // disable patch requestAnimationFrame 66 | // (window as any).__Zone_disable_on_property = true; // disable patch onProperty such as onclick 67 | // (window as any).__zone_symbol__BLACK_LISTED_EVENTS = ['scroll', 'mousemove']; // disable patch specified eventNames 68 | 69 | /* 70 | * in IE/Edge developer tools, the addEventListener will also be wrapped by zone.js 71 | * with the following flag, it will bypass `zone.js` patch for IE/Edge 72 | */ 73 | // (window as any).__Zone_enable_cross_context_check = true; 74 | 75 | /*************************************************************************************************** 76 | * Zone JS is required by default for Angular itself. 77 | */ 78 | import 'zone.js/dist/zone'; // Included with Angular CLI. 79 | 80 | 81 | 82 | /*************************************************************************************************** 83 | * APPLICATION IMPORTS 84 | */ 85 | -------------------------------------------------------------------------------- /offline_tools/redactor/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem ############################################################################## 2 | @rem # Copyright 2019 Google LLC 3 | @rem # 4 | @rem # Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem # you may not use this file except in compliance with the License. 6 | @rem # You may obtain a copy of the License at 7 | @rem # 8 | @rem # https://www.apache.org/licenses/LICENSE-2.0 9 | @rem # 10 | @rem # Unless required by applicable law or agreed to in writing, software 11 | @rem # distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem # See the License for the specific language governing permissions and 14 | @rem # limitations under the License. 15 | @rem ############################################################################## 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 34 | 35 | @rem Find java.exe 36 | if defined JAVA_HOME goto findJavaFromJavaHome 37 | 38 | set JAVA_EXE=java.exe 39 | %JAVA_EXE% -version >NUL 2>&1 40 | if "%ERRORLEVEL%" == "0" goto init 41 | 42 | echo. 43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 44 | echo. 45 | echo Please set the JAVA_HOME variable in your environment to match the 46 | echo location of your Java installation. 47 | 48 | goto fail 49 | 50 | :findJavaFromJavaHome 51 | set JAVA_HOME=%JAVA_HOME:"=% 52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 53 | 54 | if exist "%JAVA_EXE%" goto init 55 | 56 | echo. 57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 58 | echo. 59 | echo Please set the JAVA_HOME variable in your environment to match the 60 | echo location of your Java installation. 61 | 62 | goto fail 63 | 64 | :init 65 | @rem Get command-line arguments, handling Windows variants 66 | 67 | if not "%OS%" == "Windows_NT" goto win9xME_args 68 | 69 | :win9xME_args 70 | @rem Slurp the command line arguments. 71 | set CMD_LINE_ARGS= 72 | set _SKIP=2 73 | 74 | :win9xME_args_slurp 75 | if "x%~1" == "x" goto execute 76 | 77 | set CMD_LINE_ARGS=%* 78 | 79 | :execute 80 | @rem Setup the command line 81 | 82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 83 | 84 | @rem Execute Gradle 85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 86 | 87 | :end 88 | @rem End local scope for the variables with windows NT shell 89 | if "%ERRORLEVEL%"=="0" goto mainEnd 90 | 91 | :fail 92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 93 | rem the _cmd.exe /c_ return code! 94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 95 | exit /b 1 96 | 97 | :mainEnd 98 | if "%OS%"=="Windows_NT" endlocal 99 | 100 | :omega 101 | --------------------------------------------------------------------------------