├── .flake8 ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug.md │ ├── feature_request.md │ └── general-issue.md ├── dependabot.yml ├── pull_request_template.md └── workflows │ └── gh-pages.yml ├── .gitignore ├── .gitmodules ├── .semgrepignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Config ├── LICENSE ├── NOTICE ├── README.md ├── README_zh.md ├── buildspec.yml ├── cfnnag_global_disable.txt ├── cfnnag_global_suppress_list.txt ├── codescan-prebuild-custom.sh ├── deployment ├── build-open-source-dist.sh ├── build-s3-dist.sh ├── cdk-solution-helper │ ├── README.md │ ├── index.js │ └── package.json ├── helper.py └── solution_config ├── docs ├── en │ ├── about-premium-edition.md │ ├── architecture-details │ │ ├── architecture-details.md │ │ └── services-in-the-solution.md │ ├── architecture-overview │ │ ├── architecture.md │ │ └── well-architected-pillars.md │ ├── contributors.md │ ├── deployment │ │ ├── deployment.md │ │ ├── images │ │ │ ├── AuthingAuthenticationCofiguration.jpg │ │ │ ├── AuthingCallbackURL.jpg │ │ │ ├── AuthingEndpointInformation.jpg │ │ │ ├── CloudformationOutput.png │ │ │ ├── CognitoAppType.jpg │ │ │ ├── CognitoCallbackURL.jpg │ │ │ ├── CognitoClientId.png │ │ │ ├── CognitoConnectScopes.jpg │ │ │ ├── CognitoHostedUI.jpg │ │ │ ├── CognitoUserpoolId.png │ │ │ ├── OktaAcess.png │ │ │ ├── OktaAppIntegration.png │ │ │ ├── OktaCallbackURL.jpg │ │ │ ├── OktaClientId.jpg │ │ │ ├── OktaCreateApp.png │ │ │ └── OktaIssuerUrl.jpg │ │ └── template.md │ ├── developer-guide │ │ ├── api-access.md │ │ ├── api-preview.md │ │ ├── datasource.md │ │ ├── identifier.md │ │ ├── images │ │ │ ├── api-access-3.png │ │ │ ├── api-access-5.png │ │ │ ├── api-access-6.png │ │ │ └── lambda-console.png │ │ ├── job.md │ │ ├── openapi.json │ │ └── source.md │ ├── faq.md │ ├── images │ │ ├── arch.png │ │ ├── batch-create-datasource-failed.png │ │ ├── batch-create-datasource-succeeded.png │ │ ├── batch_create_datasource_enablemarcos.png │ │ ├── data-protection-pain-points.png │ │ ├── edit-icon.png │ │ ├── how-sdps-works.png │ │ ├── launch-stack.png │ │ ├── orgs-ctrl.png │ │ ├── portal-catalog-256.png │ │ ├── portal-identifiers-256.png │ │ ├── portal-summary-256.png │ │ └── system-design.png │ ├── index.md │ ├── notices.md │ ├── plan-deployment │ │ ├── cost.md │ │ ├── regions.md │ │ └── security.md │ ├── revisions.md │ ├── solution-overview │ │ ├── concepts-and-definitions.md │ │ └── features-and-benefits.md │ ├── troubleshooting.md │ ├── uninstall.md │ ├── update │ │ ├── images │ │ │ ├── DeleteENI.png │ │ │ ├── InputUrl.jpg │ │ │ └── SelectStack.png │ │ └── update.md │ └── user-guide │ │ ├── appendix-build-in-identifiers-eu-gdpr.md │ │ ├── appendix-built-in-identifiers.md │ │ ├── appendix-built-in-supported-datatypes.md │ │ ├── appendix-database-proxy.md │ │ ├── appendix-organization.md │ │ ├── appendix-permissions.md │ │ ├── check-result-dashboard.md │ │ ├── data-catalog-create-glue.md │ │ ├── data-catalog-create-jdbc.md │ │ ├── data-catalog-create-rds.md │ │ ├── data-catalog-create-s3.md │ │ ├── data-catalog-delete.md │ │ ├── data-catalog-export.md │ │ ├── data-catalog-labels.md │ │ ├── data-catalog-sync.md │ │ ├── data-identifiers.md │ │ ├── data-source.md │ │ ├── discovery-job-create.md │ │ ├── discovery-job-details.md │ │ ├── discovery-job-pause-and-cancel.md │ │ ├── discovery-job-report.md │ │ ├── discovery-job-rerun-and-duplicate.md │ │ └── get-started.md ├── index.html ├── mkdocs.base.yml ├── mkdocs.en.yml ├── mkdocs.zh.yml └── zh │ ├── about-premium-edition.md │ ├── architecture-details │ ├── architecture-details.md │ └── services-in-the-solution.md │ ├── architecture-overview │ ├── architecture.md │ └── well-architected-pillars.md │ ├── contributors.md │ ├── deployment │ ├── deployment.md │ ├── images │ │ ├── AuthingAuthenticationCofiguration.jpg │ │ ├── AuthingCallbackURL.jpg │ │ ├── AuthingEndpointInformation.jpg │ │ ├── CloudformationOutput.png │ │ ├── CognitoAppType.jpg │ │ ├── CognitoCallbackURL.jpg │ │ ├── CognitoClientId.png │ │ ├── CognitoConnectScopes.jpg │ │ ├── CognitoHostedUI.jpg │ │ ├── CognitoUserpoolId.png │ │ ├── OktaAcess.png │ │ ├── OktaAppIntegration.png │ │ ├── OktaCallbackURL.jpg │ │ ├── OktaClientId.jpg │ │ ├── OktaCreateApp.png │ │ └── OktaIssuerUrl.jpg │ └── template.md │ ├── developer-guide │ ├── api-access.md │ ├── api-preview.md │ ├── datasource.md │ ├── identifier.md │ ├── images │ │ ├── api-access-3.png │ │ ├── api-access-5.png │ │ ├── api-access-6.png │ │ └── lambda-console.png │ ├── job.md │ ├── openapi.json │ └── source.md │ ├── faq.md │ ├── images │ ├── account-list-cn.png │ ├── arch.png │ ├── batch-create-datasource-failed.png │ ├── batch-create-datasource-succeeded.png │ ├── batch_create_datasource_enablemarcos.png │ ├── cn-custom-identifier-create.png │ ├── cn-custom-identifier.png │ ├── cn-dashboard-search.png │ ├── cn-dashboard.png │ ├── cn-identifier-list.png │ ├── cn-identifier-to-template.png │ ├── cn-job-status-progress.png │ ├── cn-job-status.png │ ├── cn-s3-authorize.png │ ├── data-protection-pain-points.png │ ├── edit-icon.png │ ├── how-sdps-works.png │ ├── job-list-cn.png │ ├── launch-stack.png │ ├── orgs-ctrl.png │ ├── portal-catalog-256.png │ ├── portal-identifiers-256.png │ ├── portal-summary-256.png │ └── system-design.png │ ├── index.md │ ├── notices.md │ ├── plan-deployment │ ├── cost.md │ ├── regions.md │ └── security.md │ ├── revisions.md │ ├── solution-overview │ ├── concepts.md │ └── features-and-benefits.md │ ├── troubleshooting.md │ ├── uninstall.md │ ├── update │ ├── images │ │ ├── DeleteENI.png │ │ ├── InputUrl.jpg │ │ └── SelectStack.png │ └── update.md │ └── user-guide │ ├── appendix-build-in-identifiers-eu-gdpr.md │ ├── appendix-built-in-identifiers.md │ ├── appendix-built-in-supported-datatypes.md │ ├── appendix-database-proxy.md │ ├── appendix-organization.md │ ├── appendix-permissions.md │ ├── check-result-dashboard.md │ ├── check-result-data-catalog.md │ ├── data-catalog-create-glue.md │ ├── data-catalog-create-jdbc.md │ ├── data-catalog-create-rds.md │ ├── data-catalog-create-s3.md │ ├── data-catalog-delete.md │ ├── data-catalog-export.md │ ├── data-catalog-labels.md │ ├── data-catalog-sync.md │ ├── data-identifiers.md │ ├── data-source.md │ ├── discovery-job-create.md │ ├── discovery-job-details.md │ ├── discovery-job-pause-and-cancel.md │ ├── discovery-job-report.md │ ├── discovery-job-rerun-and-duplicate.md │ └── get-started.md ├── semgrep.sarif ├── sonar-project.properties ├── source ├── .viperlightignore ├── .viperlightrc ├── constructs │ ├── .eslintrc.json │ ├── README.md │ ├── api │ │ ├── README.md │ │ ├── __init__.py │ │ ├── catalog │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ ├── sample_service.py │ │ │ ├── schemas.py │ │ │ ├── service.py │ │ │ └── service_dashboard.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── abilities.py │ │ │ ├── concurrent_upload2s3.py │ │ │ ├── constant.py │ │ │ ├── db_base_col.py │ │ │ ├── enum.py │ │ │ ├── exception_handler.py │ │ │ ├── log_formatter.py │ │ │ ├── query_condition.py │ │ │ ├── reference_parameter.py │ │ │ ├── request_wrapper.py │ │ │ └── response_wrapper.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ ├── schemas.py │ │ │ └── service.py │ │ ├── data_source │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── glue_database_detector.py │ │ │ ├── jdbc_database.py │ │ │ ├── jdbc_detector.py │ │ │ ├── jdbc_schema.py │ │ │ ├── main.py │ │ │ ├── rds_detector.py │ │ │ ├── resource_list.py │ │ │ ├── s3_detector.py │ │ │ ├── schemas.py │ │ │ └── service.py │ │ ├── db │ │ │ ├── __init__.py │ │ │ ├── database.py │ │ │ ├── models_catalog.py │ │ │ ├── models_config.py │ │ │ ├── models_data_source.py │ │ │ ├── models_discovery_job.py │ │ │ ├── models_label.py │ │ │ ├── models_template.py │ │ │ └── models_version.py │ │ ├── discovery_job │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ ├── schemas.py │ │ │ └── service.py │ │ ├── label │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ ├── schemas.py │ │ │ └── service.py │ │ ├── lambda │ │ │ ├── auto_sync_data.py │ │ │ ├── controller.py │ │ │ ├── crawler_event.py │ │ │ ├── forward_message.py │ │ │ ├── portal_config.py │ │ │ ├── rds_schema_inspector.py │ │ │ └── sync_crawler_results.py │ │ ├── logging.conf │ │ ├── main.py │ │ ├── organization │ │ │ └── organization.py │ │ ├── pytest.ini │ │ ├── pytest │ │ │ ├── test_auth.py │ │ │ ├── test_data_source.py │ │ │ ├── test_labels.py │ │ │ └── test_query.py │ │ ├── requirements.txt │ │ ├── search │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ └── schemas.py │ │ ├── template │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ ├── schemas.py │ │ │ └── service.py │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── list_tool.py │ │ │ ├── mytime.py │ │ │ └── pydantic_tool.py │ │ └── version │ │ │ ├── __init__.py │ │ │ ├── crud.py │ │ │ ├── main.py │ │ │ └── service.py │ ├── bin │ │ └── main.ts │ ├── cdk.json │ ├── config │ │ ├── batch_create │ │ │ ├── datasource │ │ │ │ └── template │ │ │ │ │ ├── batch_create_jdbc_datasource-cn.xlsx │ │ │ │ │ └── batch_create_jdbc_datasource-en.xlsx │ │ │ └── identifier │ │ │ │ └── template │ │ │ │ ├── batch_create_identifier-cn.xlsx │ │ │ │ └── batch_create_identifier-en.xlsx │ │ ├── job │ │ │ └── script │ │ │ │ ├── glue-job-unstructured.py │ │ │ │ ├── glue-job.py │ │ │ │ ├── glue-sample-job.py │ │ │ │ └── job_extra_files.zip │ │ └── provider.json │ ├── lib │ │ ├── admin-region-stack.ts │ │ ├── admin-stack.ts │ │ ├── admin │ │ │ ├── acm-stack.ts │ │ │ ├── acm │ │ │ │ ├── acm.py │ │ │ │ └── requirements.txt │ │ │ ├── alb-stack.ts │ │ │ ├── api-stack.ts │ │ │ ├── call-region-stack.ts │ │ │ ├── cognito-post-stack.ts │ │ │ ├── cognito-post │ │ │ │ ├── cognito-post.py │ │ │ │ └── requirements.txt │ │ │ ├── cognito-stack.ts │ │ │ ├── database │ │ │ │ ├── 1.0.0-1.0.1 │ │ │ │ │ ├── 20_update.sql │ │ │ │ │ └── 99_version.sql │ │ │ │ ├── 1.0.1-1.0.2 │ │ │ │ │ ├── 20_update.sql │ │ │ │ │ └── 99_version.sql │ │ │ │ ├── 1.0.x-1.1.0 │ │ │ │ │ ├── 20_update.sql │ │ │ │ │ └── 99_version.sql │ │ │ │ ├── 1.1.0-1.1.2 │ │ │ │ │ ├── 20_update.sql │ │ │ │ │ └── 99_version.sql │ │ │ │ ├── init_db.py │ │ │ │ ├── requirements.txt │ │ │ │ └── whole │ │ │ │ │ ├── 00_db.sql │ │ │ │ │ ├── 01_config.sql │ │ │ │ │ ├── 01_version.sql │ │ │ │ │ ├── 10_catalog.sql │ │ │ │ │ ├── 10_data_source.sql │ │ │ │ │ ├── 10_discovery_job.sql │ │ │ │ │ ├── 10_label.sql │ │ │ │ │ ├── 10_template.sql │ │ │ │ │ ├── 11_account.sql │ │ │ │ │ ├── 90_init.sql │ │ │ │ │ └── 99_version.sql │ │ │ ├── delete-resources-stack.ts │ │ │ ├── delete-resources │ │ │ │ ├── delete_resources.py │ │ │ │ └── requirements.txt │ │ │ ├── glue-stack.ts │ │ │ ├── glue │ │ │ │ ├── add_partition.py │ │ │ │ └── requirements.txt │ │ │ ├── rds-stack.ts │ │ │ ├── region │ │ │ │ ├── call_region.py │ │ │ │ └── requirements.txt │ │ │ ├── sqs-stack.ts │ │ │ └── vpc-stack.ts │ │ ├── agent-stack.ts │ │ ├── agent │ │ │ ├── AgentRole-stack.ts │ │ │ ├── CrawlerEventbridge-stack.ts │ │ │ ├── DeleteAgentResources-stack.ts │ │ │ ├── DiscoveryJob-stack.ts │ │ │ ├── DiscoveryJob.json │ │ │ ├── RenameResources-stack.ts │ │ │ ├── delete-agent-resources │ │ │ │ ├── delete_agent_resources.py │ │ │ │ └── requirements.txt │ │ │ ├── rename-resources │ │ │ │ ├── rename_resources.py │ │ │ │ └── requirements.txt │ │ │ ├── split-job │ │ │ │ ├── requirements.txt │ │ │ │ └── split_job.py │ │ │ └── unstructured-crawler │ │ │ │ └── UnstructuredCrawler.py │ │ ├── common │ │ │ ├── bucket-stack.ts │ │ │ ├── build-config.ts │ │ │ ├── constants.ts │ │ │ ├── parameter.ts │ │ │ └── solution-info.ts │ │ └── it-stack.ts │ ├── package.json │ └── tsconfig.json ├── containers │ └── document-pii-detection │ │ ├── Dockerfile │ │ ├── __init__.py │ │ ├── fd_model │ │ └── det.onnx │ │ ├── main.py │ │ ├── ocr_model │ │ ├── classifier.onnx │ │ ├── det_standard.onnx │ │ ├── keys_v1.txt │ │ └── rec_standard.onnx │ │ ├── parser_factory.py │ │ ├── parsers │ │ ├── __init__.py │ │ ├── doc_parser.py │ │ ├── email_parser.py │ │ ├── html_parser.py │ │ ├── image_analysis │ │ │ ├── __init__.py │ │ │ ├── face_detection │ │ │ │ ├── __init__.py │ │ │ │ └── face_detection_main.py │ │ │ └── general_ocr │ │ │ │ ├── __init__.py │ │ │ │ ├── imaug │ │ │ │ ├── __init__.py │ │ │ │ └── operators.py │ │ │ │ ├── ocr_main.py │ │ │ │ └── postprocess │ │ │ │ ├── __init__.py │ │ │ │ ├── cls_postprocess.py │ │ │ │ ├── db_postprocess.py │ │ │ │ └── rec_postprocess.py │ │ ├── image_parser.py │ │ ├── parser.py │ │ ├── pdf_parser.py │ │ └── txt_parser.py │ │ ├── requirements.txt │ │ └── utils.py ├── portal │ ├── .eslintrc.json │ ├── .gitignore │ ├── .prettierrc.json │ ├── .well-known │ │ └── security.txt │ ├── Dockerfile │ ├── README.md │ ├── config │ │ ├── env.js │ │ ├── getHttpsConfig.js │ │ ├── jest │ │ │ ├── babelTransform.js │ │ │ ├── cssTransform.js │ │ │ └── fileTransform.js │ │ ├── modules.js │ │ ├── paths.js │ │ ├── webpack.config.js │ │ ├── webpack │ │ │ └── persistentCache │ │ │ │ └── createEnvironmentHash.js │ │ └── webpackDevServer.config.js │ ├── nginx-config │ │ ├── nginx.conf │ │ └── start_nginx.sh │ ├── package-lock.json │ ├── package.json │ ├── public │ │ ├── aws-exports.json │ │ ├── geo.json │ │ ├── icon.png │ │ ├── index.html │ │ ├── locales │ │ │ ├── en │ │ │ │ ├── account.json │ │ │ │ ├── catalog.json │ │ │ │ ├── common.json │ │ │ │ ├── datasource.json │ │ │ │ ├── identifier.json │ │ │ │ ├── info.json │ │ │ │ ├── job.json │ │ │ │ ├── summary.json │ │ │ │ ├── template.json │ │ │ │ └── timeline.json │ │ │ └── zh │ │ │ │ ├── account.json │ │ │ │ ├── catalog.json │ │ │ │ ├── common.json │ │ │ │ ├── datasource.json │ │ │ │ ├── identifier.json │ │ │ │ ├── info.json │ │ │ │ ├── job.json │ │ │ │ ├── summary.json │ │ │ │ ├── template.json │ │ │ │ └── timeline.json │ │ ├── logos │ │ │ ├── 1.svg │ │ │ ├── 2.svg │ │ │ ├── 3.svg │ │ │ ├── 4.svg │ │ │ ├── 5.svg │ │ │ ├── 6.svg │ │ │ └── source │ │ │ │ ├── db.svg │ │ │ │ ├── glue.svg │ │ │ │ ├── rds.svg │ │ │ │ └── s3.svg │ │ └── robots.txt │ ├── scripts │ │ ├── build.js │ │ ├── start.js │ │ └── test.js │ ├── src │ │ ├── App.test.tsx │ │ ├── App.tsx │ │ ├── apis │ │ │ ├── account-manager │ │ │ │ └── api.ts │ │ │ ├── config │ │ │ │ └── api.ts │ │ │ ├── dashboard │ │ │ │ └── api.ts │ │ │ ├── data-catalog │ │ │ │ └── api.ts │ │ │ ├── data-job │ │ │ │ └── api.ts │ │ │ ├── data-source │ │ │ │ └── api.ts │ │ │ ├── data-template │ │ │ │ └── api.ts │ │ │ ├── label │ │ │ │ └── api.ts │ │ │ ├── props │ │ │ │ └── api.ts │ │ │ └── query │ │ │ │ └── api.ts │ │ ├── common │ │ │ ├── ConterLink.tsx │ │ │ ├── HelpInfo.tsx │ │ │ ├── IdentifierTypeSelect.tsx │ │ │ ├── LabelModal.tsx │ │ │ ├── PropsModal.tsx │ │ │ ├── PropsSelect.tsx │ │ │ ├── ProviderTab.tsx │ │ │ ├── RuleKeywords.tsx │ │ │ ├── external-link-group.tsx │ │ │ └── separated-list │ │ │ │ ├── index.tsx │ │ │ │ └── styles.module.scss │ │ ├── enum │ │ │ └── common_types.ts │ │ ├── i18n.ts │ │ ├── index.scss │ │ ├── index.tsx │ │ ├── pages │ │ │ ├── account-management │ │ │ │ ├── componments │ │ │ │ │ └── AccountList.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── account_type.ts │ │ │ ├── add-account │ │ │ │ ├── componments │ │ │ │ │ ├── AccountForm.tsx │ │ │ │ │ └── AddAccountInfo.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── add_account_type.ts │ │ │ ├── batch-operation │ │ │ │ └── index.tsx │ │ │ ├── common-alert │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── data_config.ts │ │ │ ├── common-badge │ │ │ │ ├── componments │ │ │ │ │ └── Options.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── badge_type.ts │ │ │ ├── create-identifier │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── create-job │ │ │ │ ├── components │ │ │ │ │ ├── AdvancedSettings.tsx │ │ │ │ │ ├── JobPreview.tsx │ │ │ │ │ ├── JobSettings.tsx │ │ │ │ │ ├── SelectGlueCatalog.tsx │ │ │ │ │ ├── SelectJDBCCatalog.tsx │ │ │ │ │ ├── SelectProvider.tsx │ │ │ │ │ ├── SelectRDSCatalog.tsx │ │ │ │ │ └── SelectS3Catalog.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── indexOld.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── create_data_type.ts │ │ │ ├── data-catalog │ │ │ │ ├── componments │ │ │ │ │ ├── CatalogDetailList.tsx │ │ │ │ │ ├── CatalogList.tsx │ │ │ │ │ ├── DetailModal.tsx │ │ │ │ │ ├── IdentifierFilterTag.tsx │ │ │ │ │ └── SchemaModal.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── data_config.ts │ │ │ ├── data-job │ │ │ │ ├── componments │ │ │ │ │ ├── JobCatalogs.tsx │ │ │ │ │ ├── JobDetailModal.tsx │ │ │ │ │ ├── JobHistory.tsx │ │ │ │ │ └── JobProperties.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── job_list_type.ts │ │ │ ├── data-source-connection │ │ │ │ ├── componments │ │ │ │ │ ├── Alert.tsx │ │ │ │ │ ├── DataSourceCatalog.tsx │ │ │ │ │ ├── DataSourceInfo.tsx │ │ │ │ │ ├── DataSourceList.tsx │ │ │ │ │ ├── JDBCConnection.tsx │ │ │ │ │ ├── JDBCConnectionEdit.tsx │ │ │ │ │ └── SourceBadge.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ ├── data_config.ts │ │ │ │ │ └── s3_selector_config.ts │ │ │ ├── data-source-delete │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── data-tag │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── data-template │ │ │ │ ├── componments │ │ │ │ │ ├── AddCustomIdentfier.tsx │ │ │ │ │ └── AddIndentifierTable.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ └── types │ │ │ │ │ └── template_type.ts │ │ │ ├── error-badge │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── glue-job │ │ │ │ ├── componments │ │ │ │ │ ├── GlueJobCatalog.tsx │ │ │ │ │ ├── GlueJobProgress.tsx │ │ │ │ │ └── RunStatusPercent.tsx │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── left-menu │ │ │ │ ├── CustomBreadCrumb.tsx │ │ │ │ ├── Navigation.tsx │ │ │ │ └── style.scss │ │ │ ├── login-callback │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── no-access │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── page-header │ │ │ │ ├── PageHeader.tsx │ │ │ │ └── style.scss │ │ │ ├── page-spinner │ │ │ │ └── index.tsx │ │ │ ├── public_style.scss │ │ │ ├── resources-filter │ │ │ │ ├── hook.ts │ │ │ │ └── index.tsx │ │ │ ├── right-modal │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── signed-in-page │ │ │ │ ├── LayoutHeader.tsx │ │ │ │ └── style.scss │ │ │ ├── summary │ │ │ │ ├── comps │ │ │ │ │ ├── Charts.tsx │ │ │ │ │ ├── DataLocations.tsx │ │ │ │ │ ├── Overview.tsx │ │ │ │ │ └── charts │ │ │ │ │ │ ├── AmazonGlue.tsx │ │ │ │ │ │ ├── AmazonRDS.tsx │ │ │ │ │ │ ├── AmazonS3.tsx │ │ │ │ │ │ ├── JDBC.tsx │ │ │ │ │ │ └── items │ │ │ │ │ │ ├── CircleChart.tsx │ │ │ │ │ │ ├── CommonPieChart.tsx │ │ │ │ │ │ ├── CustomLineChart.tsx │ │ │ │ │ │ ├── GlueCatalogOvervie.tsx │ │ │ │ │ │ ├── HorizontalBarChart.tsx │ │ │ │ │ │ ├── IdentifierTable.tsx │ │ │ │ │ │ ├── JDBCCatalogOvervie.tsx │ │ │ │ │ │ ├── MapChart.tsx │ │ │ │ │ │ ├── MapMarker.tsx │ │ │ │ │ │ ├── Pagination.tsx │ │ │ │ │ │ ├── RDSCatalogOverview.tsx │ │ │ │ │ │ ├── RegionSelector.tsx │ │ │ │ │ │ ├── S3CatalogOverview.tsx │ │ │ │ │ │ └── TableData.tsx │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── system-settings │ │ │ │ ├── index.tsx │ │ │ │ └── typs │ │ │ │ │ └── config-typs.ts │ │ │ ├── template-delete │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ ├── template-identifiers │ │ │ │ ├── index.tsx │ │ │ │ ├── style.scss │ │ │ │ ├── tables │ │ │ │ │ └── IdentifierTable.tsx │ │ │ │ └── types │ │ │ │ │ └── identifiers_type.ts │ │ │ ├── time-line │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ │ └── top-data-coverage │ │ │ │ ├── index.tsx │ │ │ │ └── style.scss │ │ ├── react-app-env.d.ts │ │ ├── reportWebVitals.ts │ │ ├── routers │ │ │ ├── index.tsx │ │ │ └── routerEnum.tsx │ │ ├── setupTests.ts │ │ ├── tools │ │ │ ├── apiRequest.tsx │ │ │ └── tools.tsx │ │ └── ts │ │ │ ├── common-alert │ │ │ └── types.ts │ │ │ ├── common-badge │ │ │ └── types.ts │ │ │ ├── common.ts │ │ │ ├── dashboard │ │ │ └── types.ts │ │ │ ├── data-catalog │ │ │ └── types.ts │ │ │ ├── data-source-connection │ │ │ └── types.ts │ │ │ ├── resources-filter │ │ │ └── types.ts │ │ │ ├── right-modal │ │ │ └── types.ts │ │ │ ├── tools │ │ │ └── types.ts │ │ │ ├── top-data-coverage │ │ │ └── types.ts │ │ │ └── types.ts │ └── tsconfig.json └── run-all-tests.sh └── tsconfig.json /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | import-order-style = google 3 | max-line-length = 120 4 | ignore = 5 | E203,W191 6 | exclude = 7 | build 8 | .git 9 | __pycache__ 10 | .tox 11 | venv/ 12 | tmp* 13 | deployment/ 14 | cdk.out 15 | node_modules 16 | 17 | max-complexity = 10 18 | require-code = True -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @awslabs/sdps-codeowners -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: Report a bug to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | --- 11 | name: "\U0001F41B Bug Report" 12 | about: Report a bug 13 | title: "(Lambda@Edge/module name): short issue description" 14 | labels: bug, needs-triage 15 | --- 16 | 17 | 20 | 21 | 22 | 23 | 24 | ### Reproduction Steps 25 | 26 | 27 | ### What did you expect to happen? 28 | 29 | 30 | ### What actually happened? 31 | 32 | 33 | ### Environment 34 | 35 | - **CDK CLI Version :** 36 | - **Framework Version:** 37 | - **Language (Version):** 38 | 39 | ### Other 40 | 41 | 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for sensitive-data-protection-on-aws 4 | title: '' 5 | labels: feature-request 6 | assignees: '' 7 | 8 | --- 9 | 10 | --- 11 | name: "\U0001F680 Feature Request" 12 | about: Request a new feature 13 | title: "(module name): short issue description" 14 | labels: feature-request, needs-triage 15 | --- 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | ### Use Case 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | ### Proposed Solution 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | ### Other 40 | 41 | 45 | 46 | 47 | 48 | 49 | 50 | * [ ] :wave: I may be able to implement this feature request 51 | * [ ] :warning: This feature might incur a breaking change 52 | 53 | --- 54 | 55 | This is a :rocket: Feature Request 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general-issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General Issue 3 | about: Create a new issue 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | --- 11 | name: "\U00002753 General Issue" 12 | about: Create a new issue 13 | title: "(module name): short issue description" 14 | labels: needs-triage, guidance 15 | --- 16 | 17 | ## :question: General Issue 18 | 19 | 23 | 24 | ### The Question 25 | 31 | 32 | ### Environment 33 | 34 | - **CDK CLI Version :** 35 | - **Framework Version:** 36 | - **Language (Version):** 37 | 38 | 39 | ### Other information 40 | 41 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | **What is the current behavior? 2 | 3 | Please describe the current behavior of the code that you are trying to modify. This could include any issues or limitations that you are addressing. 4 | 5 | **What is the updated behavior? 6 | 7 | Please describe the changes you are making to the code's behavior. This could include new features or enhancements that you are adding, as well as how they differ from the current behavior. 8 | 9 | **Checklist 10 | 11 | Please check off the following items before submitting your pull request: 12 | 13 | - [ ] I have tested my changes. 14 | - [ ] No sensitive information is included. 15 | - [ ] I've reviewed my changes to ensure they won't cause any new issues that could affect the stability or performance of the codebase. 16 | 17 | Please check off the following items if this changes include API modefications: 18 | 19 | - [ ] I confirm this changes has been test with [authorization validation steps](https://github.com/awslabs/sensitive-data-protection-on-aws/blob/main/CONTRIBUTING.md#contributing-via-pull-requests), it won't break the authorization token verification mechanism. 20 | - [ ] I confirm this changes won't cause security issues. -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - docs/** 9 | 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | fetch-depth: 0 # Fetch all history for .GitInfo and .Lastmod 17 | 18 | - name: Setup Python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: '3.8' 22 | 23 | - name: Install dependencies 24 | run: | 25 | python3 -m pip install mkdocs # install mkdocs 26 | python3 -m pip install mkdocs-material # install material theme 27 | python3 -m pip install mkdocs-include-markdown-plugin==3.8.1 28 | python3 -m pip install mkdocs-macros-plugin 29 | - name: Build mkdocs 30 | run: | 31 | mkdocs build -f ./docs/mkdocs.en.yml 32 | mkdocs build -f ./docs/mkdocs.zh.yml 33 | cp -av ./docs/index.html ./docs/site 34 | - name: Deploy 35 | uses: peaceiris/actions-gh-pages@v3 36 | if: ${{ github.ref == 'refs/heads/main' }} 37 | with: 38 | github_token: ${{ secrets.GITHUB_TOKEN }} 39 | publish_dir: ./docs/site -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "source/portal/lambda/layer/_build_layer"] 2 | path = source/portal/lambda/layer/_build_layer 3 | url = https://github.com/nowfox/build-lambda-layer-python 4 | -------------------------------------------------------------------------------- /.semgrepignore: -------------------------------------------------------------------------------- 1 | # 忽略所有在 tests 目录下的文件 2 | tests/ 3 | 4 | # 忽略所有的 .json 文件 5 | *.json 6 | 7 | # 忽略特定的文件 8 | .gitignore 9 | deployment/cdk-solution-helper/index.js 10 | deployment/cdk-solution-helper/index.js 11 | deployment/cdk-solution-helper/index.js 12 | deployment/helper.py 13 | source/portal/config/env.js 14 | source/portal/config/modules.js 15 | source/portal/config/modules.js 16 | source/portal/config/modules.js 17 | source/portal/config/paths.js 18 | source/portal/nginx-config/start_nginx.sh 19 | .github/* 20 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /Config: -------------------------------------------------------------------------------- 1 | package.Aws-sensitive-data-protection = { 2 | interfaces = (1.0); 3 | 4 | # Use NoOpBuild. See https://w.amazon.com/index.php/BrazilBuildSystem/NoOpBuild 5 | build-system = no-op; 6 | build-tools = { 7 | 1.0 = { 8 | NoOpBuild = 1.0; 9 | }; 10 | }; 11 | 12 | # Use runtime-dependencies for when you want to bring in additional 13 | # packages when deploying. 14 | # Use dependencies instead if you intend for these dependencies to 15 | # be exported to other packages that build against you. 16 | dependencies = { 17 | 1.0 = { 18 | }; 19 | }; 20 | 21 | runtime-dependencies = { 22 | 1.0 = { 23 | }; 24 | }; 25 | 26 | }; 27 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /cfnnag_global_disable.txt: -------------------------------------------------------------------------------- 1 | # Instructions 2 | # ------------ 3 | # 1) Create a SIM ticket for cfg-nag bypass 4 | # 2) Place the url for your ticket below in sim_ticket: 5 | # 3) Put a reason for the bypass below in reason: 6 | # 4) Management approval is required. Place the Amazon ID of the manager in approval: 7 | # 5) Rename this file to .cfnnag_global_disable 8 | sim_ticket: 9 | reason: 10 | approval: 11 | -------------------------------------------------------------------------------- /cfnnag_global_suppress_list.txt: -------------------------------------------------------------------------------- 1 | # Instructions 2 | # ------------ 3 | # 1) Add any cfn_nag rules that don't apply to this solution, providing a reason for each item 4 | # 2) Rename this file to .cfnnag_global_suppress_list 5 | # Reference: https://github.com/stelligent/cfn_nag#global-blacklist 6 | --- 7 | RulesToSuppress: 8 | - id: W89 9 | reason: Lambda functions in this solution do not need to be deployed in a VPC 10 | -------------------------------------------------------------------------------- /deployment/cdk-solution-helper/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk-solution-helper", 3 | "version": "0.1.0", 4 | "devDependencies": { 5 | "fs": "0.0.1-security" 6 | }, 7 | "dependencies": { 8 | "fs": "0.0.1-security" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /deployment/solution_config: -------------------------------------------------------------------------------- 1 | SOLUTION_ID='SO0xyz' 2 | SOLUTION_NAME='Sensitive Data Protection' 3 | SOLUTION_TRADEMARKEDNAME='aws-sensitive-data-protection-solution' -------------------------------------------------------------------------------- /docs/en/about-premium-edition.md: -------------------------------------------------------------------------------- 1 | Sensitive Data Protection Premium Edition (SDP-PE) is a solution offered by AWS Professional Services (the Greater China Regions). It includes not only the features of SDP (this open-source project), but also features provided by AWS Professional Services. 2 | 3 | The features of SDP-PE include, but are not limited to, the following: 4 | 5 | 6 | ### Data inspection 7 | 8 | - Industry-specific classification templates (e.g. for automobile industry) 9 | - Document inspection (PDF, TXT, etc.) 10 | - Image inspection (face recognition, OCR, car license, etc.) 11 | 12 | ### Data masking 13 | 14 | - Data masking rule configuration 15 | - Data masking 16 | - Image masking (face recognition, OCR, car license, etc.) 17 | - Document data masking (PDF, TXT, etc.) 18 | 19 | ### Auditing and reporting 20 | 21 | - Auditing all API calls (data inspection, data masking) through the solution 22 | - Viewing dashboard and downloading reports for all records of API calls 23 | 24 | For more details, please contact AWS Sales (the Greater China Regions) for further information and a price quote. -------------------------------------------------------------------------------- /docs/en/architecture-details/services-in-the-solution.md: -------------------------------------------------------------------------------- 1 | The following AWS services are included in this solution: 2 | 3 | | AWS service | Description | 4 | | --- | --- | 5 | | [Application Load Balancer](https://aws.amazon.com/alb/) | **Core**. To distribute the frontend web UI assets. | 6 | | [Amazon ECR](https://aws.amazon.com/ecr/) | **Core**. To store Docker images. | 7 | | [AWS Lambda](https://aws.amazon.com/lambda/) | **Core**. To serve as a target for the application load balancer. | 8 | | [AWS Step Functions](https://aws.amazon.com/step-functions/) | **Supporting**. To control job processing. | 9 | | [AWS Glue](https://aws.amazon.com/glue/) | **Supporting**. To take inventory of data sources and to be invoked for sensitive data detection. | 10 | | [Amazon RDS](https://aws.amazon.com/rds/) | **Supporting**. To set up, operate, and scale a relational database in the cloud with just a few clicks. | 11 | | [Amazon SQS](https://aws.amazon.com/sqs/) | **Supporting**. To allow the Step Functions to send messages to the detection job queue. | 12 | | [Amazon SageMaker](https://aws.amazon.com/sagemaker/) | **Supporting**. To pre-process unstructured data. | 13 | -------------------------------------------------------------------------------- /docs/en/contributors.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | - Chen, Haiyun 4 | - Cui, Hubin 5 | - Gu, George 6 | - Hao, Liang 7 | - Han, Xu 8 | - Ji, Junxiang 9 | - Jia, Ting 10 | - Li, Xiujuan 11 | - Lv, Ning 12 | - Qin, Dehua 13 | - Su, Fan 14 | - Wang, Yu 15 | - Yi, Ke 16 | - Yi, Yan 17 | - Zhang, Junzhong 18 | -------------------------------------------------------------------------------- /docs/en/deployment/images/AuthingAuthenticationCofiguration.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/AuthingAuthenticationCofiguration.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/AuthingCallbackURL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/AuthingCallbackURL.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/AuthingEndpointInformation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/AuthingEndpointInformation.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/CloudformationOutput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CloudformationOutput.png -------------------------------------------------------------------------------- /docs/en/deployment/images/CognitoAppType.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CognitoAppType.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/CognitoCallbackURL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CognitoCallbackURL.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/CognitoClientId.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CognitoClientId.png -------------------------------------------------------------------------------- /docs/en/deployment/images/CognitoConnectScopes.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CognitoConnectScopes.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/CognitoHostedUI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CognitoHostedUI.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/CognitoUserpoolId.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/CognitoUserpoolId.png -------------------------------------------------------------------------------- /docs/en/deployment/images/OktaAcess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/OktaAcess.png -------------------------------------------------------------------------------- /docs/en/deployment/images/OktaAppIntegration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/OktaAppIntegration.png -------------------------------------------------------------------------------- /docs/en/deployment/images/OktaCallbackURL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/OktaCallbackURL.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/OktaClientId.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/OktaClientId.jpg -------------------------------------------------------------------------------- /docs/en/deployment/images/OktaCreateApp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/OktaCreateApp.png -------------------------------------------------------------------------------- /docs/en/deployment/images/OktaIssuerUrl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/deployment/images/OktaIssuerUrl.jpg -------------------------------------------------------------------------------- /docs/en/developer-guide/api-preview.md: -------------------------------------------------------------------------------- 1 | This document only lists the main operation interfaces for each module. For more details, please refer to [OPENAPI](../openapi.json) 2 | 3 | ## APIs Related to Data Sources 4 | | url | Function | Rate Limit (requests per second) | 5 | |--------------------|-------------------|-------| 6 | | /data-source/add_account | Add Account | 10 | 7 | | /data-source/delete_account | Delete Account | 10 | 8 | | /data-source/add-jdbc-conn | Add Data Source | 10 | 9 | | /data-source/update-jdbc-conn | Edit Data Source | 10 | 10 | | /data-source/delete-jdbc | Delete Data Source | 10 | 11 | 12 | 13 | ## APIs Related to Job 14 | | url | Function | Rate Limit (requests per second) | 15 | |--------------------|-------------------|-------| 16 | | /discovery-jobs | Create Job | 10 | 17 | | /discovery-jobs/{job_id}/start | Start Job | 10 | 18 | 19 | ## APIs Related to Identifier 20 | | url | Function | Rate Limit (requests per second) | 21 | |--------------------|-------------------|-------| 22 | | /template/identifiers | Create Recognition Rule | 10 | 23 | | /template/template-mappings | Add Recognition Rule to Template | 10 | -------------------------------------------------------------------------------- /docs/en/developer-guide/images/api-access-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/developer-guide/images/api-access-3.png -------------------------------------------------------------------------------- /docs/en/developer-guide/images/api-access-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/developer-guide/images/api-access-5.png -------------------------------------------------------------------------------- /docs/en/developer-guide/images/api-access-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/developer-guide/images/api-access-6.png -------------------------------------------------------------------------------- /docs/en/developer-guide/images/lambda-console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/developer-guide/images/lambda-console.png -------------------------------------------------------------------------------- /docs/en/developer-guide/source.md: -------------------------------------------------------------------------------- 1 | Visit our [GitHub repository](https://github.com/awslabs/sensitive-data-protection-on-aws) to download the templates and scripts for this solution. The Sensitive Data Protection on AWS Solution template is generated using the [AWS Cloud Development Kit (CDK)][cdk]. Refer to the [README.md](https://github.com/awslabs/sensitive-data-protection-on-aws/blob/main/README.md) file for additional information. 2 | 3 | [cdk]: http://aws.amazon.com/cdk/ -------------------------------------------------------------------------------- /docs/en/images/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/arch.png -------------------------------------------------------------------------------- /docs/en/images/batch-create-datasource-failed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/batch-create-datasource-failed.png -------------------------------------------------------------------------------- /docs/en/images/batch-create-datasource-succeeded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/batch-create-datasource-succeeded.png -------------------------------------------------------------------------------- /docs/en/images/batch_create_datasource_enablemarcos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/batch_create_datasource_enablemarcos.png -------------------------------------------------------------------------------- /docs/en/images/data-protection-pain-points.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/data-protection-pain-points.png -------------------------------------------------------------------------------- /docs/en/images/edit-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/edit-icon.png -------------------------------------------------------------------------------- /docs/en/images/how-sdps-works.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/how-sdps-works.png -------------------------------------------------------------------------------- /docs/en/images/launch-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/launch-stack.png -------------------------------------------------------------------------------- /docs/en/images/orgs-ctrl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/orgs-ctrl.png -------------------------------------------------------------------------------- /docs/en/images/portal-catalog-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/portal-catalog-256.png -------------------------------------------------------------------------------- /docs/en/images/portal-identifiers-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/portal-identifiers-256.png -------------------------------------------------------------------------------- /docs/en/images/portal-summary-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/portal-summary-256.png -------------------------------------------------------------------------------- /docs/en/images/system-design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/images/system-design.png -------------------------------------------------------------------------------- /docs/en/notices.md: -------------------------------------------------------------------------------- 1 | Customers are responsible for making their own independent assessment of the information in this document. This document: (a) is for informational purposes only, (b) represents Amazon Web Services current product offerings and practices, which are subject to change without notice, and (c) does not create any commitments or assurances from Amazon Web Services and its affiliates, suppliers or licensors. Amazon Web Services products or services are provided “as is” without warranties, representations, or conditions of any kind, whether express or implied. Amazon Web Services responsibilities and liabilities to its customers are controlled by Amazon Web Services agreements, and this document is not part of, nor does it modify, any agreement between Amazon Web Services and its customers. 2 | 3 | The Sensitive Data Protection on AWS solution is licensed under the terms of the Apache License Version 2.0 available at [The Apache Software Foundation][foundation]. 4 | 5 | [foundation]: https://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- /docs/en/plan-deployment/regions.md: -------------------------------------------------------------------------------- 1 | # Regional Deployment 2 | 3 | This solution uses services which may not be currently available in all AWS Regions. Launch this solution in an AWS Region where required services are available. For the most current availability by Region, refer to the AWS Regional Services List. 4 | 5 | ## Supported regions for deployment in AWS Global Regions 6 | 7 | | Region Name | Region ID | 8 | |----------|-------| 9 | | US East (N. Virginia) Region | us-east-1 10 | | US East (Ohio) Region | us-east-2 11 | | US West (N. California) Region | us-west-1 12 | | US West (Oregon) Region | us-west-2 13 | | Asia Pacific (Mumbai) Region | ap-south-1 14 | | Asia Pacific (Tokyo) Region | ap-northeast-1 15 | | Asia Pacific (Seoul) Region | ap-northeast-2 16 | | Asia Pacific (Singapore) Region | ap-southeast-1 17 | | Asia Pacific (Sydney) Region | ap-southeast-2 18 | | Canada (Central) Region | ca-central-1 19 | | Europe (Ireland) Region | eu-west-1 20 | | Europe (London) Region | eu-west-2 21 | | Europe (Paris) Region | eu-west-3 22 | | Europe (Frankfurt) Region | eu-central-1 23 | | South America (Sao Paulo) Region | sa-east-1 24 | 25 | 26 | ## Supported regions for deployment in AWS China Regions 27 | 28 | | Region Name | Region ID | 29 | |----------|-------| 30 | | AWS China (Beijing) Region operated by Sinnet | cn-north-1 31 | | AWS China (Ningxia) Region operated by NWCD | cn-northwest-1 -------------------------------------------------------------------------------- /docs/en/revisions.md: -------------------------------------------------------------------------------- 1 | | Date | Modifications | 2 | |--------------|----------------------------------------------------------------| 3 | | June 2023 | Initial release version. Support for AWS data sources: S3 and Amazon RDS for scanning sensitive data in structured data | 4 | | December 2023| Release v1.1.0.
1) Support for unstructured data scanning in S3.
2) Support for new AWS data sources: self-hosted databases in EC2 and Glue data catalogs.
3) Support for databases in other clouds (Tencent/Google): databases that can be connected via JDBC. | 5 | -------------------------------------------------------------------------------- /docs/en/solution-overview/concepts-and-definitions.md: -------------------------------------------------------------------------------- 1 | # Concepts and definitions 2 | - **Data source**: AWS resources where data is stored, such as Amazon S3, and Amazon RDS. 3 | - **Data catalog**: A repository of metadata of a data source, allowing you to manage data at the column level. For example, you can view the table schema, sample data of a particular column, and add labels to specific data fields. 4 | - **Data identifier**: The rule used to detect data. You can define custom data identifiers using RegEx and keywords. 5 | - **Classification template**: A collection of data identifiers. Data identifiers are rules used to detect data. 6 | - **Sensitive data discovery job**: A job that uses a template to detect sensitive data. The job automatically labels sensitive data in the data catalog. 7 | - **Glue job**: A job that is triggered by a sensitive data discovery job to scan sensitive data using AWS Glue. One discovery job can trigger AWS Glue jobs in multiple AWS accounts in a distributed manner. -------------------------------------------------------------------------------- /docs/en/solution-overview/features-and-benefits.md: -------------------------------------------------------------------------------- 1 | The solution includes the following features: 2 | 3 | **Data discovery**: supports various data sources, such as Amazon S3 and Amazon RDS, across multiple AWS accounts. The solution allows you to easily create a data catalog and run sensitive data discovery jobs. The solution leverages not only pattern-based discovery but also ML classification based on deep learning Natural Language Processing (NLP) and Named Entity Recognition (NER). 4 | 5 | **Flexible data classification**: defines data classification templates for detecting privacy data, such as personal information. The solution allows you to define custom sensitive data types or choose from over 200 built-in data types. 6 | 7 | **Centralized data visualization**: provides the dashboards with an overview of the data catalogs and sensitive data status, such as data location, and data sources. 8 | -------------------------------------------------------------------------------- /docs/en/uninstall.md: -------------------------------------------------------------------------------- 1 | To uninstall the solution, you must delete the AWS CloudFormation stack. 2 | 3 | You can use either the AWS Management Console or the AWS Command Line Interface (AWS CLI) to delete the CloudFormation stack. 4 | 5 | **Time to uninstall:** Approximately 60 minutes 6 | 7 | ## Uninstall the stack using the AWS Management Console 8 | 9 | 1. Sign in to the AWS CloudFormation console. 10 | 1. Select this solution’s installation parent stack. 11 | 1. Choose **Delete**. 12 | 13 | ## Uninstall the stack using AWS Command Line Interface 14 | 15 | Determine whether the AWS Command Line Interface (AWS CLI) is available in your environment. For installation instructions, refer to [What Is the AWS Command Line Interface][aws-cli] in the *AWS CLI User Guide*. After confirming that the AWS CLI is available, run the following command. 16 | 17 | ```bash 18 | aws cloudformation delete-stack --stack-name --region 19 | ``` 20 | 21 | 22 | [cloudformation-console]: https://console.aws.amazon.com/cloudformation/home 23 | [aws-cli]: https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html 24 | -------------------------------------------------------------------------------- /docs/en/update/images/DeleteENI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/update/images/DeleteENI.png -------------------------------------------------------------------------------- /docs/en/update/images/InputUrl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/update/images/InputUrl.jpg -------------------------------------------------------------------------------- /docs/en/update/images/SelectStack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/en/update/images/SelectStack.png -------------------------------------------------------------------------------- /docs/en/user-guide/appendix-organization.md: -------------------------------------------------------------------------------- 1 | You can use [AWS Organizations](https://aws.amazon.com/organizations/) to manage automated deployment of monitored accounts. In AWS CloudFormation, you can configure [StackSet](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/what-is-cfnstacksets.html) to deploy the Agent stack in the target Organizational Unit (OU). After you have configured the deployment, the Agent stack will be automatically deployed to the specified region of the account under the OU. Finally, you need to deploy the IT stack to the Organizations management account or the corresponding CloudFormation delegated account under Organizations, then, you can add member accounts [via Organizations](../user-guide/data-source.md#add-aws-accounts-via-organization). 2 | 3 | ![orgs-ctrl](docs/../../images/orgs-ctrl.png) 4 | 5 | ## Steps 6 | 7 | 1. Deploy Admin CloudFormation stack in the Admin account. 8 | 2. Register delegated administrator in StackSets in Organization’s management account. For more information, refer to [Register a delegated administrator](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/stacksets-orgs-delegated-admin.html). 9 | 3. Deploy IT CloudFormation Stack. 10 | 4. Create a role for the solution Admin API. 11 | 5. Create StackSet for Agent CloudFormation Stack. 12 | 6. Deploy to Organization/OU(s). 13 | 7. Add member account [via Organizations](../user-guide/data-source.md#add-aws-accounts-via-organization). 14 | 8. Retrieve deployment stacks and member accounts. -------------------------------------------------------------------------------- /docs/en/user-guide/appendix-permissions.md: -------------------------------------------------------------------------------- 1 | # Appendix: Permissions for agent CloudFormation stack 2 | 3 | The solution follows the least privilege principle to grant permissions to monitored account(s) when deploying the CloudFormation template. 4 | 5 | The permissions can be described at a high-level: 6 | 7 | - (Data source) Amazon S3: read only permission for data source scanning. 8 | - (Data source) Amazon RDS: read only permission for data source scanning. 9 | - AWS SecretsManager: read only permission. If RDS database is secured with Secrets, the solution will read credentials from Secret Manager. 10 | - AWS Glue: write permission. Glue data catalog, Glue crawler, Glue job are used. Glue is triggered by Step Functions. 11 | - AWS StepFunctions: resource created. Step Function is used to orchestrate Glue jobs for data discovery. 12 | - AWS Lambda: resource created. 13 | - Amazon CloudWatch: write permission. Lambda logs will be stored in CloudWatch. 14 | 15 | 16 | !!! Info "For more information" 17 | You can view specific permission details in [Template for Monitored account (Agent template)](../deployment/template.md) -------------------------------------------------------------------------------- /docs/en/user-guide/check-result-dashboard.md: -------------------------------------------------------------------------------- 1 | # Review the Results 2 | 3 | ## View Summary Statistics 4 | 5 | In the left-side menu, select **Overview**. You can see data statistics as shown in the following image. 6 | 7 | ![edit-icon](docs/../../images/cn-dashboard.png) 8 | 9 | ## Query Sensitive Data 10 | 11 | In the statistics panel, in the **Popular Data Identifiers** section, you can perform a reverse search on sensitive data. This allows for quick localization of the data sources containing sensitive data. 12 | ![edit-icon](docs/../../images/cn-dashboard-search.png) 13 | -------------------------------------------------------------------------------- /docs/en/user-guide/data-catalog-create-glue.md: -------------------------------------------------------------------------------- 1 | # Connect to Data Sources 2 | After adding an AWS account, you can connect to AWS Glue Data Catalogs to scan data sources that use AWS Glue as a data catalog (metadata catalog). 3 | 4 | ## Connect to AWS Glue Data Source 5 | 6 | !!! Info "Supported Big Data Data Types" 7 | For specific data formats supported by AWS Glue, please refer to [Built-in Classifiers in AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/add-classifier.html). 8 | 9 | Additionally, the solution also supports Glue Hudi tables. 10 | 11 | 1. On the **Connect Data Sources** page, click an account to open its details page. 12 | 2. On the **Glue Data Catalogs** tab, select a Glue connection, then choose **Sync to Data Catalog**. 13 | 3. You will see the catalog status turn to gray `PENDING`, indicating the connection is starting (about 3 minutes). 14 | 4. When you see the catalog status turn to green `ACTIVE`, it means the Glue Data Catalog has been synchronized to the SDP platform's data catalog. 15 | 16 | At this point, you have successfully connected to the Glue Data Catalog and can proceed to the next steps. 17 | -------------------------------------------------------------------------------- /docs/en/user-guide/data-catalog-delete.md: -------------------------------------------------------------------------------- 1 | You can delete data catalogs if you do not need them any more. 2 | ## Delete data catalogs for S3 3 | 4 | 1. On the **Connect to data source** page, click one account to open its details page. 5 | 2. In the **S3** tab, select an S3 bucket, and choose **Delete data catalog** from the **Actions** list. 6 | 7 | ## Delete data catalogs for RDS 8 | 9 | 1. On the **Connect to data source** page, click one account to open its details page. 10 | 2. Choose the **Amazon RDS** tab. 11 | 3. Select a RDS instance, and choose **Delete data catalog** from the **Actions** list. -------------------------------------------------------------------------------- /docs/en/user-guide/data-catalog-export.md: -------------------------------------------------------------------------------- 1 | You can export the latest data catalogs 2 | 3 | 1. On the **Browse data catalogs** page, click button **Export data catalogs** button. 4 | 2. Choose either **Download .xlsx file** for Microsoft Excel file or **Download . csv file** 5 | 6 | The exported files contains all the details at column level of current data catalogs, the file schema is shown as below. 7 | 8 | | S3 | RDS | 9 | |-----------------------|------------------------| 10 | | account_id | account_id | 11 | | region | region | 12 | | s3_bucket | rds_instance_id | 13 | | folder_name | table_name | 14 | | column_name | column_name | 15 | | identifiers | identifiers | 16 | | sample_data | sample_data | 17 | | bucket_catalog_label | instance_catalog_label | 18 | | folder_catalog_label | table_catalog_label | 19 | | comment | comment | -------------------------------------------------------------------------------- /docs/en/user-guide/data-catalog-sync.md: -------------------------------------------------------------------------------- 1 | ## What is data catalog? 2 | A data catalog is a repository of metadata of data source (Amazon S3, Amazon RDS). With data catalogs, you can view the column-level information of data. 3 | 4 | ## When are the data catalogs synchronized with data source? 5 | 6 | The solution synchronizes the data catalogs with data source in the following situations: 7 | - Sync to data catalog (manual). Please refer to [Connect to data source](data-catalog-create.md) 8 | - Run sensitive data discovery job (automatic) 9 | 10 | !!! Info "For more information" 11 | Synchronizing data catalog will not affect the labels on an existing data catalog. 12 | 13 | | AWS resource | Data source change | Sync to data catalog | Run sensitive data discovery jobs | 14 | | --- | --- | --- | --- | 15 | | S3 | bucket created | Y | Y | 16 | | S3 | bucket deleted | Y | Y | 17 | | S3 | object created | Y | Y | 18 | | S3 | object deleted | Y | Y | 19 | | S3 | object(in bucket root) created | Y | Y | 20 | | S3 | object(in bucket root) deleted | N | N | 21 | | S3 | object updated (timestamp changed) | Y | Y | 22 | | RDS | instance created | Y | Y | 23 | | RDS | instance deleted | Y | Y | 24 | | RDS | instance updated | Y | Y | 25 | | RDS | database created | Y | Y | 26 | | RDS | database deleted | Y | Y | 27 | | RDS | table created | Y | Y | 28 | | RDS | table deleted | Y | Y | 29 | | RDS | table updated | Y | Y | 30 | | RDS | column created | Y | Y | 31 | | RDS | column deleted | Y | Y | 32 | | RDS | column updated | Y | Y | 33 | -------------------------------------------------------------------------------- /docs/en/user-guide/discovery-job-details.md: -------------------------------------------------------------------------------- 1 | Sensitive data discovery jobs consist of Glue jobs running in monitored AWS accounts (the same accounts as the data sources). 2 | 3 | * Return to the list of sensitive data tasks, you can see the job status as `Running`. 4 | * To view task progress: Click on the task, in the sidebar, click **Task Run Details**. 5 | ![edit-icon](docs/../../images/cn-job-status.png) 6 | 7 | * Initially, the progress may remain at 0%. Do not worry, as the system is checking for any changes in the data structure. The progress will update once the actual data scan begins. 8 | ![edit-icon](docs/../../images/cn-job-status-progress.png) 9 | !!! Info "Run Duration" 10 | The duration depends on the sampling rate, the tables to be scanned, and the number of identifiers in the template. 11 | For example: For one instance with 400 tables, a scan depth of 30, and 21 rules in the template, it might take approximately 25 minutes. 12 | Different S3 buckets/database instances are scanned in parallel by the backend. 13 | 14 | * Wait for the Glue job status to change to `SUCCEEDED`. This indicates that the scanning task is complete. 15 | * If the Glue job fails, you can click on the `FAILED` status to view its error logs. 16 | 17 | ## Download Classification Template Snapshot 18 | You can download a snapshot of the template as it was at the start of the job. The snapshot shows which data identifiers the job was using. 19 | 20 | On the **Job Details** page, select **Download Snapshot** to download the template snapshot in JSON format (.json). 21 | -------------------------------------------------------------------------------- /docs/en/user-guide/discovery-job-pause-and-cancel.md: -------------------------------------------------------------------------------- 1 | You can only pause or resume a scheduled job. It does NOT mean to pause or resume a running discovery job. 2 | 3 | To pause a scheduled job, on the **Run Sensitive Data Discovery Jobs** page, click **Actions** and select **Pause**. For instance, if you scheduled a monthly job on the first day of every month and ran a job once in January, choosing Pause will prevent the discovery job from being executed in February. 4 | 5 | To resume a paused job, on the **Run Sensitive Data Discovery Jobs** page,click **Actions** and select **Continue**. 6 | -------------------------------------------------------------------------------- /docs/en/user-guide/discovery-job-rerun-and-duplicate.md: -------------------------------------------------------------------------------- 1 | ## Re-run a discovery Job 2 | On the **Run Sensitive Data Discovery Jobs** page, click **Actions** and select **Execute once**. You can create a new discovery job and run it with the same settings as the previous run. 3 | 4 | ## Duplicate a Discovery Job 5 | On the **Run Sensitive Data Discovery Jobs** page, click **Actions** and select **Duplicate**. You can duplicate a job setting and modify it to start a new job. -------------------------------------------------------------------------------- /docs/en/user-guide/get-started.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | After successfully [deploying the solution](../deployment/deployment.md), you can access the console to detect sensitive data. 3 | 4 | - **Step 1**: [Connect to Data Sources](data-source.md) Add AWS accounts and create data catalogs. 5 | - **Step 2**: [Define Classification Templates](data-classification-template.md) Manage data identifiers within templates to define sensitive data. 6 | - **Step 3**: [Run Sensitive Data Discovery Jobs](discovery-job-create.md) Detect sensitive data by creating and managing data discovery jobs. 7 | - **Step 4**: [Review Results](data-catalog-sync.md) View the metadata (such as table structures) of data sources through the data catalog, see updated data catalogs, and check the dashboard on the "Summary" page. 8 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | -------------------------------------------------------------------------------- /docs/mkdocs.base.yml: -------------------------------------------------------------------------------- 1 | site_name: Sensitive Data Protection on AWS 2 | site_url: https://awslabs.github.io/sensitive-data-protection-on-aws/ 3 | repo_url: https://github.com/awslabs/sensitive-data-protection-on-aws 4 | repo_name: awslabs/sensitive-data-protection-on-aws 5 | 6 | theme: 7 | favicon: https://s3.cn-north-1.amazonaws.com.cn/aws-assets-prod/libra-css/images/site/fav/favicon.ico 8 | logo: https://s3.cn-north-1.amazonaws.com.cn/aws-assets-prod/libra-css/images/site/fav/favicon.ico 9 | name: material 10 | palette: 11 | - media: "(prefers-color-scheme: light)" 12 | scheme: default 13 | toggle: 14 | icon: material/toggle-switch-off-outline 15 | name: Switch to dark mode 16 | - media: "(prefers-color-scheme: dark)" 17 | scheme: slate 18 | toggle: 19 | icon: material/toggle-switch 20 | name: Switch to light mode 21 | 22 | extra: 23 | generator: false 24 | copyright: Copyright © 2020 - 2023 Amazon Web Services 25 | alternate: 26 | - name: English 27 | link: /sensitive-data-protection-on-aws/en/ 28 | lang: en 29 | - name: 简体中文 30 | link: /sensitive-data-protection-on-aws/zh/ 31 | lang: zh 32 | bucket: aws-gcr-solutions 33 | version: develop 34 | 35 | plugins: 36 | - search 37 | - include-markdown 38 | - macros 39 | 40 | markdown_extensions: 41 | - admonition 42 | - attr_list 43 | - pymdownx.details 44 | - pymdownx.superfences 45 | 46 | 47 | -------------------------------------------------------------------------------- /docs/zh/about-premium-edition.md: -------------------------------------------------------------------------------- 1 | Sensitive Data Protection Premium Edition(SDP-PE)是由 AWS 专业服务团队(大中华区)提供的解决方案。它不仅包括 SDP(这个开源项目)的主体功能,还包括 AWS 专业服务团队提供的功能。 2 | 3 | SDP-PE 的功能包括但不限于以下内容: 4 | 5 | ### 数据检查 6 | 7 | - 特定行业的数据分类分级模板(例如,用于汽车行业) 8 | - 文档检查(PDF、TXT 等) 9 | - 图像检查(人脸识别、OCR、车牌等) 10 | 11 | ### 数据脱敏 12 | 13 | - 数据脱敏规则配置 14 | - 数据脱敏 15 | - 图像脱敏(人脸识别、OCR、车牌等) 16 | - 文档脱敏(PDF、TXT 等) 17 | 18 | ### 审计和报告 19 | 20 | - 通过本解决方案审计所有 API 调用(数据检查和数据脱敏)。 21 | - 查看仪表板并下载所有 API 调用记录的报告。 22 | 23 | 如需更多详细信息,请联系 AWS 销售(大中华区)获取进一步信息和价格报价。 -------------------------------------------------------------------------------- /docs/zh/architecture-details/architecture-details.md: -------------------------------------------------------------------------------- 1 | # 解决方案如何运作 2 | 3 | 本节描述了构成此解决方案的组件和 AWS 服务以及高层次系统设计。 4 | 5 | ![高层次系统设计](docs/../../images/system-design.png) 6 | **AWS 上的敏感数据保护高层次系统设计** 7 | 8 | 如图所示,集中式敏感数据治理帐户是管理员帐户。解决方案用户(通常为安全审计员)可以在部署 **Admin** 堆栈后通过 Web 门户访问解决方案。用户可以在部署 **Agent** 堆栈并登录 Web 门户后浏览数据目录并在监控帐户中执行敏感数据检测作业。 9 | 10 | 多个监控帐户与管理员帐户连接,具有数据源访问和作业执行权限,以便管理员帐户可以在指定的监控帐户中调用作业处理器模型进行敏感数据检测。 11 | 12 | ## 管理员帐户中的模块 13 | 14 | - **Web 门户(UI)**:解决方案管理员或普通用户可以通过 Web 门户访问解决方案。它提供安全的用户访问管理和解决方案的 Web UI。 15 | 16 | - **数据源管理(DSM)**:DSM 负责通过数据源检测器从监控帐户中检索数据源并存储数据源结构。用户可以探索监控帐户中的数据存储,例如 S3 存储桶和 RDS 实例。 17 | 18 | - **数据目录管理(DCM)**:DCM 可以发现 DSM 中数据源的最新模式(通常称为元数据)。该模式包括表列等信息在 RDS 数据库中以及敏感数据检测作业运行后的敏感数据检测结果。 19 | 20 | - **作业控制器(JC)**:作业控制器负责在监控帐户中执行检测作业并将检测结果收集回管理员帐户。它可以配置作业按用户定义的时间表或根据需要运行。 21 | 22 | - **模板配置(TC)**:检测模板存储在 TC 模型中。它包含内置模板和自定义模板。JC 可以检索模板以运行作业处理器。 23 | 24 | - **帐户管理(AM)**:监控 AWS 帐户由 AM 模型管理。 25 | 26 | ## 监控帐户中的模块 27 | 28 | - **作业处理器**:作业处理器是敏感数据检测的运行容器,由作业控制器调用。作业处理器将原始数据读取到检测引擎进行检测,并将分析结果和运行状态发送到作业控制器。 29 | 30 | - **检测引擎**:检测引擎模型是具有 AI/ML 支持功能的核心敏感数据检测引擎。它从作业处理器接收数据,使用预先训练的 ML 模型或模式识别敏感数据类型。 31 | -------------------------------------------------------------------------------- /docs/zh/architecture-details/services-in-the-solution.md: -------------------------------------------------------------------------------- 1 | 此解决方案使用了以下 AWS 服务: 2 | 3 | | AWS 服务 | 描述 | 4 | | --- | --- | 5 | | [Application Load Balancer](https://aws.amazon.com/alb/) | **核心组件**。用于分发前端 Web UI 资产。 | 6 | | [Amazon ECR](https://aws.amazon.com/ecr/) | **核心组件**。用于存储 Docker 镜像。 | 7 | | [AWS Lambda](https://aws.amazon.com/lambda/) | **核心组件**。作为应用程序负载均衡器的目标。 | 8 | | [AWS Step Functions](https://aws.amazon.com/step-functions/) | **支持**。任务流程控制 | 9 | | [AWS Glue](https://aws.amazon.com/glue/) | **支持**。用于清点数据源。被调用以进行敏感数据检测。 | 10 | | [Amazon RDS](https://aws.amazon.com/rds/) | **支持**。仅需几次点击即可在云中设置、操作和扩展关系数据库。 | 11 | | [Amazon SQS](https://aws.amazon.com/sqs/) | **支持**。让 Step Functions 向检测作业队列发送消息。 | 12 | | [Amazon SageMaker](https://aws.amazon.com/sagemaker/) | **支持**。非结构化数据预处理。 | 13 | -------------------------------------------------------------------------------- /docs/zh/architecture-overview/architecture.md: -------------------------------------------------------------------------------- 1 | 使用默认参数部署此解决方案将在AWS云中构建以下环境。 2 | 3 | ![架构图](docs/../../images/arch.png) 4 | **AWS上的敏感数据保护架构** 5 | 6 | 1. [应用负载均衡器](https://aws.amazon.com/alb/)用于分发托管在[AWS Lambda](https://aws.amazon.com/lambda/)中的解决方案前端Web UI资源。 7 | 2. 身份提供者用于用户身份验证。 8 | 3. AWS Lambda函数打包为Docker镜像,并存储在[Amazon ECR(弹性容器注册表)](https://aws.amazon.com/ecr/)中。 9 | 4. 后端Lambda函数是应用负载均衡器的目标。 10 | 5. 后端Lambda函数调用监控账户中的[AWS Step Functions](https://aws.amazon.com/step-functions/)来进行敏感数据检测。 11 | 6. 在[AWS Step Functions](https://aws.amazon.com/step-functions/)工作流中,[AWS Glue](https://aws.amazon.com/glue/) Crawler运行以列出结构化数据源,并将其作为元数据表存储在Glue数据库中。[Amazon SageMaker](https://aws.amazon.com/sagemaker/) processing job用于预处理S3桶中的非结构化文件,并将元数据存储在Glue数据库中。AWS Glue Job用于检测敏感数据。 12 | 7. Glue作业运行后,Step Functions会将消息发送到[Amazon SQS(简单队列服务)](https://aws.amazon.com/sqs/)中的检测作业队列。 13 | 8. Lambda函数从Amazon SQS中处理消息。 14 | 9. [Amazon Athena](https://aws.amazon.com/athena/)查询检测结果,并将其保存到[Amazon RDS(关系型数据库服务)](https://aws.amazon.com/rds/)中的MySQL实例中。 15 | 16 | 该解决方案使用AWS Glue服务作为构建受监控账户中的数据目录和调用Glue作业以检测敏感数据个人身份识别信息(PII)的核心。分布式的Glue作业在每个受监控的账户中运行,而管理员账户包含了跨AWS账户的集中式数据源数据目录。这是AWS推荐的数据网格(Data Mesh)概念的一种实现方式。 17 | 18 | 更具体地说,该解决方案引入了事件驱动的流程,并使用AWS IAM角色在管理员账户和受监控账户之间触发和通信以进行敏感数据发现作业。管理员账户可以启动PII检测作业并检索数据目录。所有受监控的AWS账户都被允许连接到管理员账户,管理员账户能够区分和访问这些受监控的账户。 -------------------------------------------------------------------------------- /docs/zh/contributors.md: -------------------------------------------------------------------------------- 1 | # 贡献者 2 | 3 | - Chen, Haiyun 4 | - Cui, Hubin 5 | - Gu, George 6 | - Hao, Liang 7 | - Han, Xu 8 | - Ji, Junxiang 9 | - Jia, Ting 10 | - Li, Xiujuan 11 | - Lv, Ning 12 | - Qin, Dehua 13 | - Su, Fan 14 | - Wang, Yu 15 | - Wang, Xudong 16 | - Yi, Ke 17 | - Yi, Yan 18 | - Zhang, Junzhong 19 | -------------------------------------------------------------------------------- /docs/zh/deployment/images/AuthingAuthenticationCofiguration.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/AuthingAuthenticationCofiguration.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/AuthingCallbackURL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/AuthingCallbackURL.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/AuthingEndpointInformation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/AuthingEndpointInformation.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/CloudformationOutput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CloudformationOutput.png -------------------------------------------------------------------------------- /docs/zh/deployment/images/CognitoAppType.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CognitoAppType.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/CognitoCallbackURL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CognitoCallbackURL.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/CognitoClientId.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CognitoClientId.png -------------------------------------------------------------------------------- /docs/zh/deployment/images/CognitoConnectScopes.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CognitoConnectScopes.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/CognitoHostedUI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CognitoHostedUI.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/CognitoUserpoolId.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/CognitoUserpoolId.png -------------------------------------------------------------------------------- /docs/zh/deployment/images/OktaAcess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/OktaAcess.png -------------------------------------------------------------------------------- /docs/zh/deployment/images/OktaAppIntegration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/OktaAppIntegration.png -------------------------------------------------------------------------------- /docs/zh/deployment/images/OktaCallbackURL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/OktaCallbackURL.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/OktaClientId.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/OktaClientId.jpg -------------------------------------------------------------------------------- /docs/zh/deployment/images/OktaCreateApp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/OktaCreateApp.png -------------------------------------------------------------------------------- /docs/zh/deployment/images/OktaIssuerUrl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/deployment/images/OktaIssuerUrl.jpg -------------------------------------------------------------------------------- /docs/zh/developer-guide/api-access.md: -------------------------------------------------------------------------------- 1 | 如果需要调用API实现数据检测,请先在lambda的环境变量里配置ApiKey,系统会通过请求头里面的指定字段验证ApiKey的真实性,具体操作请参考本文档。 2 | 3 | ## 自定义安全密钥 4 | 5 | - 步骤一:登录aws后台,选择lambda选项,进入lambda控制台 6 | 7 | ![lambda控制台](images/lambda-console.png) 8 | 9 | - 步骤二:选择右上角对应的部署region, 进入Functions列表页 10 | - 步骤三: 在搜索框里输入APIAPIFunction,在下拉列表中选择命中的Function 11 | 12 | ![api-fun](images/api-access-3.png) 13 | 14 | - 步骤四: 点击Function name链接,进入该Function页面,选中进入 Configuration 标签页 15 | - 步骤五: 选中左侧的Environment variables标签,点击右边面板右上角的edit按钮,添加环境变量 16 | 17 | ![env-tab](images/api-access-5.png) 18 | 19 | - 步骤六: 在环境变量编辑页面,新增key-value,key为固定值“ApiKey”,value为用户自定义的内容,后续访问api的时候会以这个value值做校验 20 | 21 | ![edit-env](images/api-access-6.png) 22 | 23 | ## 安全验证 24 | 25 | 所有 API 使用 API 密钥进行安全验证,所有 API 请求都应在 HTTP 标头中包含您的 API 密钥,`x-api-key` 如下所示: 26 | 27 | ```config 28 | x-api-key: xxxxxxxxxxxxxxxxxxxx 29 | ``` 30 | -------------------------------------------------------------------------------- /docs/zh/developer-guide/api-preview.md: -------------------------------------------------------------------------------- 1 | 本文仅列出每个模块主要操作接口。更详细请参考[OPENAPI](../openapi.json) 2 | 3 | ## 数据源相关接口 4 | | url | 接口功能 | 频率限制(次/秒) | 5 | |--------------------|-------------------|-------| 6 | | /data-source/add_account | 添加账号 | 10 | 7 | | /data-source/delete_account | 删除账号 | 10 | 8 | | /data-source/add-jdbc-conn | 添加数据源 | 10 | 9 | | /data-source/update-jdbc-conn | 编辑数据源 | 10 | 10 | | /data-source/delete-jdbc | 删除数据源 | 10 | 11 | 12 | 13 | ## 检测任务相关接口 14 | | url | 接口功能 | 频率限制(次/秒) | 15 | |--------------------|-------------------|-------| 16 | | /discovery-jobs | 创建任务 | 10 | 17 | | /discovery-jobs/{job_id}/start | 启动任务 | 10 | 18 | 19 | ## 数据识别规则相关接口 20 | | url | 接口功能 | 频率限制(次/秒) | 21 | |--------------------|-------------------|-------| 22 | | /template/identifiers | 创建识别规则 | 10 | 23 | | /template/template-mappings | 将识别规则添加进模版 | 10 | -------------------------------------------------------------------------------- /docs/zh/developer-guide/images/api-access-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/developer-guide/images/api-access-3.png -------------------------------------------------------------------------------- /docs/zh/developer-guide/images/api-access-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/developer-guide/images/api-access-5.png -------------------------------------------------------------------------------- /docs/zh/developer-guide/images/api-access-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/developer-guide/images/api-access-6.png -------------------------------------------------------------------------------- /docs/zh/developer-guide/images/lambda-console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/developer-guide/images/lambda-console.png -------------------------------------------------------------------------------- /docs/zh/developer-guide/source.md: -------------------------------------------------------------------------------- 1 | 访问我们的[GitHub存储库](https://github.com/awslabs/sensitive-data-protection-on-aws),下载此解决方案的源文件。敏感数据保护解决方案的模板是使用[云开发工具包(CDK)](http://aws.amazon.com/cdk/)生成的。有关更多信息,请参阅[README.md](https://github.com/awslabs/sensitive-data-protection-on-aws/blob/main/README.md)文件。 -------------------------------------------------------------------------------- /docs/zh/images/account-list-cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/account-list-cn.png -------------------------------------------------------------------------------- /docs/zh/images/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/arch.png -------------------------------------------------------------------------------- /docs/zh/images/batch-create-datasource-failed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/batch-create-datasource-failed.png -------------------------------------------------------------------------------- /docs/zh/images/batch-create-datasource-succeeded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/batch-create-datasource-succeeded.png -------------------------------------------------------------------------------- /docs/zh/images/batch_create_datasource_enablemarcos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/batch_create_datasource_enablemarcos.png -------------------------------------------------------------------------------- /docs/zh/images/cn-custom-identifier-create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-custom-identifier-create.png -------------------------------------------------------------------------------- /docs/zh/images/cn-custom-identifier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-custom-identifier.png -------------------------------------------------------------------------------- /docs/zh/images/cn-dashboard-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-dashboard-search.png -------------------------------------------------------------------------------- /docs/zh/images/cn-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-dashboard.png -------------------------------------------------------------------------------- /docs/zh/images/cn-identifier-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-identifier-list.png -------------------------------------------------------------------------------- /docs/zh/images/cn-identifier-to-template.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-identifier-to-template.png -------------------------------------------------------------------------------- /docs/zh/images/cn-job-status-progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-job-status-progress.png -------------------------------------------------------------------------------- /docs/zh/images/cn-job-status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-job-status.png -------------------------------------------------------------------------------- /docs/zh/images/cn-s3-authorize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/cn-s3-authorize.png -------------------------------------------------------------------------------- /docs/zh/images/data-protection-pain-points.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/data-protection-pain-points.png -------------------------------------------------------------------------------- /docs/zh/images/edit-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/edit-icon.png -------------------------------------------------------------------------------- /docs/zh/images/how-sdps-works.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/how-sdps-works.png -------------------------------------------------------------------------------- /docs/zh/images/job-list-cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/job-list-cn.png -------------------------------------------------------------------------------- /docs/zh/images/launch-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/launch-stack.png -------------------------------------------------------------------------------- /docs/zh/images/orgs-ctrl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/orgs-ctrl.png -------------------------------------------------------------------------------- /docs/zh/images/portal-catalog-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/portal-catalog-256.png -------------------------------------------------------------------------------- /docs/zh/images/portal-identifiers-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/portal-identifiers-256.png -------------------------------------------------------------------------------- /docs/zh/images/portal-summary-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/portal-summary-256.png -------------------------------------------------------------------------------- /docs/zh/images/system-design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/images/system-design.png -------------------------------------------------------------------------------- /docs/zh/index.md: -------------------------------------------------------------------------------- 1 | 敏感数据保护解决方案(Sensitive Data Protection on AWS, SDP)允许企业客户创建数据目录,发现、保护和可视化多个 AWS 账户中的敏感数据。该解决方案让您不必手动标记以跟踪敏感数据(如个人身份信息(PII)和分类信息)。 2 | 3 | 该解决方案提供了一种自助式 Web 应用程序的自动化数据保护方法。您可以使用自己的数据分类模板执行定期或按需敏感数据发现作业。此外,您可以访问诸如所有 AWS 账户中存储的敏感数据条目总数、包含最多敏感数据的账户以及敏感数据所在的数据源等指标。 4 | 5 | ![工作原理](images/how-sdps-works.png) 6 | 7 | 该解决方案帮助企业客户(如涉及安全或大数据业务的公司)安全团队实施以下数据保护措施: 8 | 9 | - 对数百个 AWS 账户进行集中管理 10 | - 自动发现数据资产 11 | - 敏感数据检测和自动标记 12 | - 与其他 AWS 服务或应用程序集成 13 | 14 | 本指南提供了解决方案、其参考架构和组件、部署规划考虑因素以及将解决方案部署到 Amazon Web Services (AWS) 云端的配置步骤的概述。 15 | 16 | 使用此导航表快速查找这些问题的答案: 17 | 18 | | 如果您想要…… | 阅读…… | 19 | |----------|--------| 20 | | 了解运行此解决方案的成本 | [成本](./plan-deployment/cost.md) | 21 | | 了解此解决方案的安全考虑因素 | [安全](./plan-deployment/security.md) | 22 | | 了解此解决方案支持哪些 AWS 区域 | [支持部署的 AWS 区域](./plan-deployment/regions.md) | 23 | | 查看或下载此解决方案中包含的 AWS CloudFormation 模板,以自动部署此解决方案的基础设施资源(“堆栈”) | [AWS CloudFormation 模板](./deployment/template.md) | 24 | 25 | 本指南面向具有在 AWS 云上架构实践经验的 IT 架构师、开发人员、DevOps 和数据工程师。 26 | 27 | 本解决方案仅提供技术方案,您应自行负责遵守适用于您的数据保护相关法规。 -------------------------------------------------------------------------------- /docs/zh/notices.md: -------------------------------------------------------------------------------- 1 | 客户有责任对本文档中的信息进行独立评估。本文档:(a)仅供参考,(b)代表 Amazon Web Services 当前的产品和实践,这些产品和实践可能会在不经通知的情况下发生变化,(c)不会产生来自 Amazon Web Services 及其关联公司、供应商或许可方的任何承诺或保证。Amazon Web Services 的产品或服务按“原样”提供,不带任何明示或暗示的保证、陈述或条件。Amazon Web Services 对其客户的责任和义务由 Amazon Web Services 协议控制,本文档不属于也不会修改 Amazon Web Services 与其客户之间的任何协议。 2 | 3 | AWS 上的敏感数据保护解决方案根据 Apache 许可证版本 2.0 的条款获得许可,该许可证可在 [Apache 软件基金会][foundation] 获取。 4 | 5 | [foundation]: https://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- /docs/zh/plan-deployment/regions.md: -------------------------------------------------------------------------------- 1 | 此解决方案使用的服务可能尚未在所有 AWS 区域中提供。在需要的服务可用的 AWS 区域中启动此解决方案。有关按区域的最新可用性,请参阅 AWS 区域服务列表。 2 | 3 | ## AWS 全球区域支持部署的区域 4 | 5 | 6 | | 区域名称 | 区域 ID | 7 | |----------|-------| 8 | | 美国东部(弗吉尼亚北部)区域 | us-east-1 9 | | 美国东部(俄亥俄州)区域 | us-east-2 10 | | 美国西部(加利福尼亚北部)区域 | us-west-1 11 | | 美国西部(俄勒冈州)区域 | us-west-2 12 | | 亚太地区(孟买)区域 | ap-south-1 13 | | 亚太地区(东京)区域 | ap-northeast-1 14 | | 亚太地区(首尔)区域 | ap-northeast-2 15 | | 亚太地区(新加坡)区域 | ap-southeast-1 16 | | 亚太地区(悉尼)区域 | ap-southeast-2 17 | | 加拿大(中部)区域 | ca-central-1 18 | | 欧洲(爱尔兰)区域 | eu-west-1 19 | | 欧洲(伦敦)区域 | eu-west-2 20 | | 欧洲(巴黎)区域 | eu-west-3 21 | | 欧洲(法兰克福)区域 | eu-central-1 22 | | 南美洲(圣保罗)区域 | sa-east-1 23 | 24 | 25 | ## AWS 中国区域支持部署的区域 26 | 27 | 28 | | 区域名称 | 区域 ID | 29 | |----------|-------| 30 | | 由光环新网运营的 AWS 中国(北京)区域 | cn-north-1 31 | | 由西云数据运营的 AWS 中国(宁夏)区域 | cn-northwest-1 32 | -------------------------------------------------------------------------------- /docs/zh/revisions.md: -------------------------------------------------------------------------------- 1 | | 日期 | 修改 | 2 | |--------------|-------------------| 3 | | 2023年6月 | 首次发布版本
支持AWS数据源:S3和Amazon RDS的结构化数据的敏感数据扫描 | 4 | | 2023年12月 | 发布版本1.1.0
1) 支持S3非结构化数据扫描。
2) 支持新的AWS数据源:EC2自建数据库和Glue数据目录。
3) 支持其他云(Tencent/Google)的数据库:可以通过JDBC连接的数据库。 | -------------------------------------------------------------------------------- /docs/zh/solution-overview/concepts.md: -------------------------------------------------------------------------------- 1 | # 术语和概念 2 | 3 | - **数据源**:存储数据的 AWS 资源,例如 Amazon S3 和 Amazon RDS。 4 | - **数据目录**:数据源的元数据存储库,允许您在列级别管理数据。例如,您可以查看表模式、特定列的样本数据并为特定数据字段添加标签。 5 | - **数据标识符**:用于检测数据的规则。您可以使用 RegEx 和关键字定义自定义数据标识符。 6 | - **数据分类模板**:数据标识符的集合。数据标识符是用于检测数据的规则。 7 | - **敏感数据发现作业**:使用模板检测敏感数据的作业。作业会自动在数据目录中标记敏感数据。 8 | - **Glue 作业**:由敏感数据发现作业触发,使用 AWS Glue 扫描敏感数据的作业。一个发现作业可以在分布式方式下触发多个 AWS 帐户中的 AWS Glue 作业。 -------------------------------------------------------------------------------- /docs/zh/solution-overview/features-and-benefits.md: -------------------------------------------------------------------------------- 1 | 该解决方案包括以下功能: 2 | 3 | **数据发现**:支持多个 AWS 帐户中的各种数据源,例如 Amazon S3 和 Amazon RDS。解决方案允许您轻松创建数据目录并运行敏感数据发现作业。解决方案不仅利用基于模式的发现,还基于深度学习自然语言处理(NLP)和命名实体识别(NER)进行 ML 分类。 4 | 5 | **灵活的数据分类**:定义用于检测隐私数据(例如个人信息)的数据分类模板。解决方案允许您定义自定义敏感数据类型或从 200 多种内置数据类型中选择。 6 | 7 | **集中式数据可视化**:提供带有数据目录和敏感数据状态概览的仪表板,例如数据位置和数据源。 8 | 9 | -------------------------------------------------------------------------------- /docs/zh/troubleshooting.md: -------------------------------------------------------------------------------- 1 | # 故障排查 2 | 3 | ## 关于连接账号及连接数据源 4 | 5 | **连接RDS时提示错误信息:```At least one security group must open all ingress ports.```** 6 | 7 | 给RDS分配所在VPC的default安全组。这个是Glue的要求,详见[https://docs.aws.amazon.com/glue/latest/dg/glue-troubleshooting-errors.html](https://docs.aws.amazon.com/glue/latest/dg/glue-troubleshooting-errors.html)。 8 | 9 | 当您在RDS配了安全组之后,您需要删除SDP平台上对应数据目录,重新点击“同步至数据目录”。这样操作后台会重新获取一下RDS最新的配置,进而创建数据目录成功。 10 | 11 | **连接RDS时,`VPC S3 endpoint validation failed for SubnetId: subnet-000000. VPC: vpc-111111. Reason: Could not find S3 endpoint or NAT gateway for subnetId: subnet-000000 in Vpc`** 12 | 13 | 程序连接时有检查,一般不会出现该问题。手动删除后NAT Gateway或S3 endpoint(type为Gateway)会报这个错,添加NAT Gateway或S3 endpoint并配置路由后解决。 14 | 15 | **删除待检测账号中的Agent CloudFormation后,在main账号删除AWS账号时提示错误:`An error occurred (AccessDenied) when calling the AssumeRole operation: User: arn:aws:sts::5566xxxxxxx:assumed-role/SDPSAPIRole-us-east-1/SDPS-Admin-APIAPIFunction719F975A-yEQ3iOlIYK1F is not authorized to perform: sts:AssumeRole on resource: arn:aws:iam::8614xxxxxxx:role/SDPSRoleForAdmin-us-east-1`** 16 | 17 | Agent CloudFormation删除后需要等待5-8分钟,才能在主账号进行删除操作,否则会出现这个错误。出现错误请尝试在10分钟后再对main账号中的AWS账号进行删除。 18 | 19 | 20 | ## 关于敏感数据发现任务 21 | 22 | **在中国区域(China Regions)使用解决方案时,无法下载模板快照和报告文件。** 23 | 24 | 由于模板快照和报告从S3采用预签名方式下载,因此Admin所在账号必须做ICP备案或ICP Exception. 25 | -------------------------------------------------------------------------------- /docs/zh/uninstall.md: -------------------------------------------------------------------------------- 1 | 要卸载解决方案,您必须删除 AWS CloudFormation 堆栈。 2 | 3 | 您可以使用 AWS 管理控制台或 AWS 命令行界面(AWS CLI)删除 CloudFormation 堆栈。 4 | 5 | ## 使用 AWS 管理控制台卸载堆栈 6 | 7 | 1. 登录 AWS CloudFormation 控制台。 8 | 1. 选择此解决方案的安装父堆栈。 9 | 1. 选择 **删除**。 10 | 11 | ## 使用 AWS 命令行界面卸载堆栈 12 | 13 | 确定您的环境中是否可用 AWS 命令行界面(AWS CLI)。有关安装说明,请参阅 *AWS CLI 用户指南* 中的 [什么是 AWS 命令行界面][aws-cli]。确认 AWS CLI 可用后,运行以下命令。 14 | 15 | ```bash 16 | aws cloudformation delete-stack --stack-name --region 17 | ``` 18 | 19 | [cloudformation-console]: https://console.aws.amazon.com/cloudformation/home 20 | [aws-cli]: https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html -------------------------------------------------------------------------------- /docs/zh/update/images/DeleteENI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/update/images/DeleteENI.png -------------------------------------------------------------------------------- /docs/zh/update/images/InputUrl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/update/images/InputUrl.jpg -------------------------------------------------------------------------------- /docs/zh/update/images/SelectStack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/docs/zh/update/images/SelectStack.png -------------------------------------------------------------------------------- /docs/zh/update/update.md: -------------------------------------------------------------------------------- 1 | 2 | **升级所需时间**:大约20分钟 3 | 4 | ## 升级概述 5 | !!! Important "重要提示" 6 | 目前不支持从1.0升级到1.1,请删除1.0后重新部署1.1。 7 | 如果确实有该需求,请联系我们。 8 | !!! Important "重要提示" 9 | 请在更新前确认没有任何任务在运行。 10 | 本页用于指导已部署老版本,如何升级到最新版本。 11 | 按照以下步骤在AWS上升级此解决方案。 12 | 13 | - 步骤一:在您的AWS管理员账户中,升级**Admin**堆栈 14 | - 步骤二:在被监控账户中,升级**Agent**堆栈 15 | 16 | 17 | ## 升级步骤 18 | 19 | ### 步骤一:升级管理员堆栈 20 | !!! Important "重要提示" 21 | 更新堆栈模板地址必须和原堆栈模板地址保持一致,否则会升级失败。 22 | 即:原堆栈使用新建VPC模板,新堆栈也必须使用新建VPC模板;同样,原堆栈使用现有VPC模板,新堆栈也必须使用现有VPC模板 23 | 1. 登录到AWS管理控制台,进入到CloudFormation服务,选择以前部署的堆栈,再点击**更新**按钮。 24 | ![Select Stack](images/SelectStack.png) 25 | 2. 选择**替换当前模板**,然后在**Amazon S3 URL**输入框中输入对应的模板,然后点击**下一步**。模板地址参考[模板信息](../deployment/template.md)。 26 | ![Input Url](images/InputUrl.jpg) 27 | 3. 在**指定堆栈详细信息**页面上,各个参数保持不变,然后点击**下一步**。 28 | 4. 在**配置堆栈选项**页面上,各个参数保持不变,然后点击**下一步**。 29 | 5. 在**审核**页面上,查看并确认设置。选中3个“我确认”的复选框,点击**提交**按钮以更新堆栈。 30 | 6. 从1.1.0升级到后续版本时,请在CloudFormation更新5分钟后,手动删除4个状态为**Available**的ENI:2个描述内容包含**PortalConfigFunction**,2个描述内容包含**PortalFunction**。 31 | ![Delete ENI](images/DeleteENI.png) 32 | 7. 等待约20分钟,以确保成功更新了所有相关资源。您可以选择“资源”和“事件”选项卡查看堆栈的状态。 33 | 更新成功后,即可重新打开管理员页面。 34 | !!! Important "重要提示" 35 | 需要手动删除ENI,否则升级会失败。 36 | 37 | ### 步骤二:升级Agent堆栈 38 | 39 | 操作步骤和升级管理员堆栈相同,注意在输入模板地址时,输入Agent模板地址即可。 40 | !!! Important "重要提示" 41 | 42 | Agent必须和管理员版本同时升级,否则版本不匹配时,任务运行会报错。 -------------------------------------------------------------------------------- /docs/zh/user-guide/appendix-organization.md: -------------------------------------------------------------------------------- 1 | 您可以使用 [AWS Organizations](https://aws.amazon.com/organizations/) 管理监控帐户的自动化部署。在 AWS CloudFormation 中,您可以配置 [StackSet](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/what-is-cfnstacksets.html) 在目标组织单元(OU)中部署 Agent 堆栈。配置部署后,Agent 堆栈将自动部署到 OU 下帐户的指定区域。最后,您需要将 IT 堆栈部署到 Organizations 管理帐户或 Organizations 下的相应 CloudFormation 委托帐户,然后,您可以通过 Organizations [添加成员帐户](../user-guide/data-source.md#add-aws-accounts-via-organization)。 2 | 3 | ![orgs-ctrl](docs/../../images/orgs-ctrl.png) 4 | 5 | ## 操作步骤 6 | 1. 在 Admin account 中部署 Admin CloudFormation 堆栈 7 | 2. 在组织管理帐户中的 StackSets 中注册委托管理员。[注册委托管理员](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/stacksets-orgs-delegated-admin.html) 8 | 3. 部署 IT CloudFormation 堆栈 9 | 4. 为 Admin API 创建角色 10 | 5. 为 Agent CloudFormation 堆栈创建 StackSet 11 | 6. 部署到组织/OU 12 | 7. 通过组织[添加成员帐户](../user-guide/data-source.md#add-aws-accounts-via-organization) 13 | 8. 检索部署堆栈和成员帐户 -------------------------------------------------------------------------------- /docs/zh/user-guide/check-result-dashboard.md: -------------------------------------------------------------------------------- 1 | # 查看结果 2 | 3 | ## 查看总览统计 4 | 5 | 左侧菜单中,选择**总览**。您可以看到数据统计,如下图。 6 | 7 | ![edit-icon](docs/../../images/cn-dashboard.png) 8 | 9 | ## 查询敏感数据 10 | 11 | 在统计面板中,**热门数据标识符**板块,您可以对敏感数据进行反向搜索。可以快速 12 | -------------------------------------------------------------------------------- /docs/zh/user-guide/check-result-data-catalog.md: -------------------------------------------------------------------------------- 1 | # 查看结果 2 | 3 | ## 浏览数据目录 4 | 数据目录是数据源的元数据存储库。使用数据目录,您可以查看数据的列级信息。 5 | 6 | 7 | # 标记数据目录 8 | 9 | 数据目录为您的数据源提供元数据。您可以为其添加/更新标签以提供更多元数据信息。 10 | 11 | ## 敏感数据标记(自动或手动) 12 | 敏感数据作业完成后,"隐私字段" 将根据作业结果自动标记。数据目录中的列级数据将使用数据标识符进行标记。 13 | 14 | 您始终可以手动更新数据目录中的隐私字段。 15 | 16 | 在 **浏览数据目录** 页面: 17 | 18 | - 在 **S3** 标签页中,无论是在存储桶还是文件夹级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择隐私标签。 19 | - 在 **RDS** 标签页中,无论是在实例还是表级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择隐私标签。 20 | 21 | ### 自定义标记(手动) 22 | 您可以在数据目录中使用 "自定义标签" 字段添加与业务相关的标签(例如,业务线、部门、团队等)。 23 | 24 | 在 **浏览数据目录** 页面: 25 | 26 | - 在 S3 标签页中,无论是在存储桶还是文件夹级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择自定义标签。 27 | - 在 RDS 标签页中,无论是在实例还是表级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择自定义标签。 28 | 29 | 在下拉列表底部,单击 **管理自定义标签** 链接,将弹出一个窗口,在其中您可以 **添加/编辑/删除** 自定义标签。 30 | -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-create-glue.md: -------------------------------------------------------------------------------- 1 | # 连接到数据源 2 | 添加 AWS 帐户后,您可以连接AWS Glue Data Catalogs,用以扫描以AWS Glue为数据目录(元数据目录)的数据源。 3 | 4 | ## 连接AWS Glue数据源 5 | 6 | !!! Info "支持的大数据数据类型" 7 | 有关 AWS Glue 支持的特定数据格式,请参阅 [AWS Glue 中的内置分类器](https://docs.aws.amazon.com/glue/latest/dg/add-classifier.html)。 8 | 9 | 此外,方案还支持Glue Hudi表。 10 | 11 | 1. 在 **连接数据源** 页面上,单击一个帐户以打开其详细信息页面。 12 | 2. 在 **Glue Data Catalogs** 标签页,选择一个Glue 连接,然后选择 **同步至数据目录** 。 13 | 3. 您看到目录状态变为灰色`PENDING`,表示连接开始(约3分钟) 14 | 4. 您看到目录状态边绿色 `ACTIVE`,则表示已经Glue Data Catalog已同步至了SDP平台上的数据目录。 15 | 16 | 至此,您已经连接好了Glue Data Catalog,可以开始下一步操作了。 -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-create-rds.md: -------------------------------------------------------------------------------- 1 | # 连接到数据源 - RDS 2 | 3 | ### 前提条件 - 保持网络连通性 4 | 1. 请确认您[添加AWS账户](data-source.md)时,选择的是CloudFormation方式。如果您添加账户时,选择JDBC方式,请转至[通过EC2代理连接数据库](data-catalog-create-jdbc-database-proxy.md)进行操作。 5 | 2. 请确保待检测RDS的inbound rule上有所在安全组的自引用, 操作详见[官网文档](https://docs.aws.amazon.com/glue/latest/dg/setup-vpc-for-glue-access.html)。 6 | 3. 请确保Amazon RDS 实例所在VPC至少要有1个私有子网, 7 | 4. 请确保RDS所在VPC满足以下条件之一: 1) 它具有 [VPC NAT 网关](https://docs.aws.amazon.com/vpc/latest/userguide/vpc-nat-gateway.html)。 8 | 2) 它具有 S3 & Glue & KMS & Secret Manager服务的VPC Endpoint。 (操作详见[官网文档](https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html) )。 9 | 5. 准备好RDS的连接凭证(用户名/密码) 10 | 11 | !!! Info "如何获得RDS凭证" 12 | DBA或业务方创建一个只读的User做安全审计使用。此用户只需要数据库 SELECT(只读权限)。 13 | 14 | ## 连接Amazon RDS数据源 15 | 1. 从左侧菜单,选择 **连接数据源** 16 | 2. 选择**AWS Cloud**标签页 17 | 3. 单击进入一个AWS帐户,打开详细页面 18 | 4. 选择 **Amazon RDS** 标签页。您可以看到解决方案部署区域中的 RDS 实例列表 19 | 5. 选择一个 RDS 实例,点击按钮 **同步至数据目录** 20 | 6. 在弹出窗口中,输入RDS凭证信息。(如果您选择Secret Manager方式,需要提前为此RDS的用户名/密码托管在Secret Manager。) 21 | 22 | | 参数 | 必填项 | 参数描述 | 23 | |-------------------|--------|------------------------------------------------------| 24 | | 凭证 | 是 | 选择用户名密码或SecretManager。填写数据库的用户名/密码。 | 25 | 26 | 27 | 7. 点击 **连接**。您可以等待10s关闭此窗口。 28 | 8. 您看到目录状态变为灰色`PENDING`,表示连接开始(约3分钟) 29 | 9. 您看到目录状态变为蓝色`CRAWLING`。(200张表约15分钟) 30 | 10. 您看到目录状态边绿色 `ACTIVE`,则表示已为 RDS 实例创建了数据目录。 31 | 32 | 至此,您已经连接好RDS数据源了,可以开始下一步操作👉[定义分类分级模版](data-identifiers.md)。 -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-create-s3.md: -------------------------------------------------------------------------------- 1 | # 连接到数据源 - S3 2 | 添加云帐户后,您可以连接S3的数据源进行敏感数据扫描,连接也是一个授权的过程。 3 | 4 | !!! Info "支持扫描的数据/文件类型" 5 | 请参阅[附录:方案支持扫描的数据类型](appendix-built-in-supported-datatypes.md)。 6 | 7 | ### 前提条件 8 | 如果您需要扫描非结构化数据(如文档、代码、邮件、图片等)。请提升Service quota。 9 | 10 | * **Global regions**:请通过[Service Quota服务](https://console.aws.amazon.com/servicequotas/home)自助提升待扫描区域的SageMaker Processing Job实例配额。 11 | * **China regions**:请联系AWS销售开“配额提升工单”,内容参考:'您好,请把本账号某区域(例如,cn-northwest-1)区域对应SageMaker Processing Job ml.m5.2xlarge实例的并行运行数量提升到100'。 12 | 13 | ## 连接S3数据源 14 | 1. 从左侧菜单,选择**连接数据源** 15 | 2. 选择**AWS Cloud**标签 16 | 3. 单击进入AWS帐户,打开详细页面。 17 | 4. 在 **Amazon S3** 标签页中,查看解决方案部署区域中的 S3 存储桶列表。 18 | 5. 选择一个S3存储桶,点击 **授权**。 或者,您也可以从 **操作** 列表中选择 **批量授权**,快速授权所有S3桶。 19 | ![edit-icon](docs/../../images/cn-s3-authorize.png) 20 | 1. 大约半分钟后,您可以看到 **授权状态** 为绿色 `ACTIVE`。 21 | 22 | 至此,您已经连接好了S3数据源,可以开始下一步操作👉[定义分类分级模版](data-identifiers.md)了。 -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-delete.md: -------------------------------------------------------------------------------- 1 | 如果您不再需要数据目录,可以删除它们。 2 | ## 删除 S3 的数据目录 3 | 4 | 1. 在 **连接数据源** 页面上,单击一个帐户以打开其详细信息页面。 5 | 2. 在 **S3** 标签页中,选择一个 S3 存储桶,然后从 **操作** 列表中选择 **删除数据目录**。 6 | 7 | ## 删除 RDS 的数据目录 8 | 9 | 1. 在 **连接数据源** 页面上,单击一个帐户以打开其详细信息页面。 10 | 2. 选择 **Amazon RDS** 标签页。 11 | 3. 选择一个 RDS 实例,然后从 **操作** 列表中选择 **删除数据目录**。 -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-export.md: -------------------------------------------------------------------------------- 1 | 您可以导出最新的数据目录。 2 | 3 | 1. 在**浏览数据目录**页面,点击**导出数据目录**按钮。 4 | 2. 选择要下载的文件格式,可以选择**下载 .xlsx 文件**(Microsoft Excel 文件)或**下载 .csv 文件**。 5 | 6 | 导出的文件包含当前数据目录中所有列级别的详细信息,文件结构如下所示: 7 | 8 | | S3 | RDS | 9 | |-----------------------|------------------------| 10 | | 账户ID | 账户ID | 11 | | 区域 | 区域 | 12 | | S3存储桶 | RDS实例ID | 13 | | 文件夹名称 | 表名 | 14 | | 列名 | 列名 | 15 | | 标识符 | 标识符 | 16 | | 示例数据 | 示例数据 | 17 | | 存储桶目录标签 | 实例目录标签 | 18 | | 文件夹目录标签 | 表目录标签 | 19 | | 注释 | 注释 | -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-labels.md: -------------------------------------------------------------------------------- 1 | # 标记数据目录 2 | 3 | 数据目录为您的数据源提供元数据。您可以为其添加/更新标签以提供更多元数据信息。 4 | 5 | ## 敏感数据标记(自动或手动) 6 | 敏感数据作业完成后,"隐私字段" 将根据作业结果自动标记。数据目录中的列级数据将使用数据标识符进行标记。 7 | 8 | 您始终可以手动更新数据目录中的隐私字段。 9 | 10 | 在 **浏览数据目录** 页面: 11 | 12 | - 在 S3 标签页中,无论是在存储桶还是文件夹级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择隐私标签。 13 | - 在 RDS/Glue/JDBC 标签页中,无论是在实例还是表级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择隐私标签。 14 | 15 | ## 自定义标记(手动) 16 | 您可以在数据目录中使用 "自定义标签" 字段添加与业务相关的标签(例如,业务线、部门、团队等)。 17 | 18 | 在 **浏览数据目录** 页面: 19 | 20 | - 在 S3 标签页中,无论是在存储桶还是文件夹级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择自定义标签。 21 | - 在 RDS/Glue/JDBC 标签页中,无论是在实例还是表级别,您都可以单击 ![edit-icon](docs/../../images/edit-icon.png) 以从下拉列表中选择自定义标签。 22 | 23 | 在下拉列表底部,单击 **管理自定义标签** 链接,将弹出一个窗口,在其中您可以 **添加/编辑/删除** 自定义标签。 24 | -------------------------------------------------------------------------------- /docs/zh/user-guide/data-catalog-sync.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## 数据目录何时与数据源同步? 4 | 5 | 该解决方案会在以下情况下将数据目录与数据源同步: 6 | 7 | - 手动同步到数据目录。请参阅[连接到数据源](data-catalog-create.md)。 8 | - 运行敏感数据发现作业(自动)。 9 | 10 | !!! Info "更多信息" 11 | 同步数据目录时不会影响现有数据目录上的标签。 12 | 13 | | AWS 资源 | 数据源更改 | 同步到数据目录 | 运行敏感数据发现作业 | 14 | | --- | --- | --- | --- | 15 | | S3 | 创建存储桶 | Y | Y | 16 | | S3 | 删除存储桶 | Y | Y | 17 | | S3 | 创建对象 | Y | Y | 18 | | S3 | 删除对象 | Y | Y | 19 | | S3 | 在存储桶根目录中创建对象 | Y | Y | 20 | | S3 | 在存储桶根目录中删除对象 | N | N | 21 | | S3 | 更新对象(时间戳更改) | Y | Y | 22 | | RDS | 创建实例 | Y | Y | 23 | | RDS | 删除实例 | Y | Y | 24 | | RDS | 更新实例 | Y | Y| - 刷新连接数据源页面时 25 | | RDS | 创建数据库 | Y | Y | 26 | | RDS | 删除数据库 | Y | Y | 27 | | RDS | 创建表 | Y | Y | 28 | | RDS | 删除表 | Y | Y | 29 | | RDS | 更新表 | Y | Y | 30 | | RDS | 创建列 | Y | Y | 31 | | RDS | 删除列 | Y | Y | 32 | | RDS | 更新列 | Y | Y | 33 | -------------------------------------------------------------------------------- /docs/zh/user-guide/data-source.md: -------------------------------------------------------------------------------- 1 | # 连接到数据源 2 | 3 | ## 添加AWS账号 4 | 5 | 1. 左侧菜单中,选择**连接数据源**。 6 | 2. 选择AWS Cloud页签。 7 | 3. 点击按钮 **添加新账户**。 8 | ![edit-icon](docs/../../images/account-list-cn.png) 9 | 4. 选择以下一种账号添加方式: 10 | 11 | #### 方式一: 通过CloudFormation授权 (适用于同AWS region,自动发现S3,RDS,AWS Glue) 12 | 1. 打开**独立AWS账户**标签页 13 | 2. 按照此页面 **步骤1** 和 **步骤2** 中的说明安装 Agent CloudFormation 堆栈(约3mins),用于授权待检测账号。详细权限信息请参阅[附录:Agent CloudFormation 堆栈的权限](./appendix-permissions.md) 14 | 3. 在页面填写待检测的AWS账户ID,选择需要数据源所在的区域。 15 | 4. 点击按钮 **添加此账户**。 16 | 17 | #### 方式二:通过JDBC方式连接数据库 (适用于多AWS region,手动添加数据库,如Redshift,EC2自建数据库,多云数据库) 18 | 5. 打开**JDBC Only**标签页。 19 | 6. 选择regions 20 | 7. 填写AWS账户ID 21 | 8. 点击按钮 **添加此账户** 22 | 23 | #### 方式三:批量添加CloudFormation授权。(适用于同AWS region,自动发现S3,RDS,AWS Glue。通过AWS Organization批量管理账号) 24 | 9. 打开**JDBC Only**标签页 25 | 10. 按照此页面 **步骤1** , **步骤2** 和**步骤三**中的说明安装 Agent CloudFormation 堆栈。有关详细信息,请参阅[附录:通过组织添加 AWS 账户](appendix-organization.md)。 26 | 11. 填写AWS Organizaion代理账户ID 27 | 12. 点击按钮 **添加此账户** 28 | 29 | ## 添加其他云账号 30 | 31 | 1. 左侧菜单中,选择**连接数据源**。 32 | 2. 选择你需要添加的Cloud Provider的页签(如Tencent,Google)。 33 | 3. 点击按钮 **添加新账户**。 34 | 35 | #### 通过JDBC方式连接数据库 (适用于多云或者IDC,手动添加数据库) 36 | 1. 填写账户ID 37 | 2. 选择regions 38 | 3. 点击按钮 **添加此账户** -------------------------------------------------------------------------------- /docs/zh/user-guide/discovery-job-details.md: -------------------------------------------------------------------------------- 1 | 敏感数据发现作业由在监控的 AWS 账户(与数据源相同的账户)中运行的 Glue 作业组成。 2 | 3 | * 回到敏感数据任务的列表中,可以看到作业状态 为 `Running`。 4 | * 查看任务进度:点击这个任务,在侧边栏,点击 **任务运行详情**。 5 | ![edit-icon](docs/../../images/cn-job-status.png) 6 | 7 | * 进度在开始的一段时间为0%,不要着急,此时后台在检测数据结构是否有变化。等实际数据扫描开始,进度才变化。 8 | ![edit-icon](docs/../../images/cn-job-status-progress.png) 9 | !!! Info "运行时长" 10 | 时间长短取决于抽样率、待检测表、模版中标识符的数量。 11 | 举例:1个实例400张表,扫描深度为30,模版中21个规则,大约需要25分钟。 12 | 不同的S3桶/数据库实例,后台会并行扫描。 13 | 14 | * 等待Glue作业状态变为`SUCCEEDED`。表示扫描任务完成。 15 | * 如果 Glue 作业失败,您可以单击 `FAILED` 状态查看其错误日志。 16 | 17 | ## 下载分类模板快照 18 | 您可以下载作业开始运行时的模板快照。快照显示了该作业正在使用哪些数据标识符。 19 | 20 | 在 **作业详细信息** 页面上,选择 **下载快照** 以 JSON 格式(.json)下载模板快照。 21 | -------------------------------------------------------------------------------- /docs/zh/user-guide/discovery-job-pause-and-cancel.md: -------------------------------------------------------------------------------- 1 | 您只能暂停或恢复预定的作业。这并不意味着暂停或恢复正在运行的发现作业。 2 | 3 | 要暂停预定的作业,在 **运行敏感数据发现作业** 页面上,单击 **操作** 并选择 **暂停**。例如,如果您在每个月的第一天安排了一个每月一次的作业,并在一月份运行了一次作业,则选择暂停将阻止发现作业在二月份执行。 4 | 5 | 要恢复暂停的作业,在 **运行敏感数据发现作业** 页面上,单击 **操作** 并选择 **继续**。 -------------------------------------------------------------------------------- /docs/zh/user-guide/discovery-job-rerun-and-duplicate.md: -------------------------------------------------------------------------------- 1 | ## 重新运行发现作业 2 | 在 **运行敏感数据发现作业** 页面上,单击 **操作** 并选择 **执行一次**。您可以创建一个新的发现作业,并使用与上次运行相同的设置运行它。 3 | 4 | ## 复制发现作业 5 | 在 **运行敏感数据发现作业** 页面上,单击 **操作** 并选择 **复制**。您可以复制一个作业设置并修改它以启动新作业。 -------------------------------------------------------------------------------- /docs/zh/user-guide/get-started.md: -------------------------------------------------------------------------------- 1 | # 概述 2 | 成功[部署解决方案](../deployment/deployment.md)后,您可以访问控制台来检测敏感数据。 3 | 4 | - **第1步**:[连接数据源](data-source.md) 添加 AWS 账户并创建数据目录。 5 | - **第2步**:[定义分类模板](data-identifiers.md) 通过管理数据标识符在模板中定义敏感数据。 6 | - **第3步**:[运行敏感数据发现作业](discovery-job-create.md) 通过创建和管理数据发现作业来检测敏感数据。 7 | - **第4步**:[查看结果](data-catalog-sync.md) 通过数据目录可以看到数据源的元数据(如表结构)查看已更新的数据目录,并查看“摘要”页面检查仪表板。 -------------------------------------------------------------------------------- /semgrep.sarif: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/schemas/sarif-schema-2.1.0.json", 3 | "runs": [ 4 | { 5 | "invocations": [ 6 | { 7 | "executionSuccessful": true, 8 | "toolExecutionNotifications": [] 9 | } 10 | ], 11 | "results": [], 12 | "tool": { 13 | "driver": { 14 | "name": "Semgrep OSS", 15 | "rules": [], 16 | "semanticVersion": "1.66.2" 17 | } 18 | } 19 | } 20 | ], 21 | "version": "2.1.0" 22 | } -------------------------------------------------------------------------------- /source/.viperlightignore: -------------------------------------------------------------------------------- 1 | Config 2 | CODE_OF_CONDUCT.md:4 3 | CONTRIBUTING.md:51 4 | ^dist/ 5 | .typescript/node_modules 6 | .typescript/lambda/example-function-js/node_modules 7 | .typescript/lambda/layers/aws-nodesdk-custom-config/node_modules 8 | .typescript/cdk.out 9 | .typescript/lambda/layers/aws-nodesdk-custom-config/coverage 10 | .typescript/package-lock.json 11 | portal/node_modules 12 | constructs/api/common/constant.py:25 13 | constructs/api/common/constant.py:26 14 | constructs/lib/common/constants.ts 15 | constructs/lib/admin/database/ 16 | constructs/config/job/script/job_extra_files.zip 17 | constructs/api/pytest/test_data_source.py 18 | constructs/api/pytest/test_labels.py 19 | constructs/api/pytest/test_query.py 20 | constructs/lib/common/solution-info.ts:30 21 | constructs/config/batch_create/datasource/template/batch_create_jdbc_datasource-cn.xlsx 22 | constructs/config/batch_create/datasource/template/batch_create_jdbc_datasource-en.xlsx 23 | constructs/config/batch_create/identifier/template/batch_create_identifier-cn.xlsx 24 | constructs/config/batch_create/identifier/template/batch_create_identifier-en.xlsx 25 | 26 | [python-pipoutdated] 27 | pip=v21.1.2 28 | aws-sam-cli=v1.58.0 29 | pipenv=v2022.10.4 -------------------------------------------------------------------------------- /source/.viperlightrc: -------------------------------------------------------------------------------- 1 | { 2 | "all": true, 3 | "failOn": "medium" 4 | } 5 | -------------------------------------------------------------------------------- /source/constructs/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/catalog/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/catalog/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/common/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/common/concurrent_upload2s3.py: -------------------------------------------------------------------------------- 1 | part_bytes = 5 * 1024 * 1024 + 10 2 | 3 | 4 | def concurrent_upload(bucket_name, object_name, file_path, client): 5 | multipart_upload_response = client.create_multipart_upload(Bucket=bucket_name, Key=object_name) 6 | upload_id = multipart_upload_response['UploadId'] 7 | 8 | parts = [] 9 | uploaded_bytes = 0 10 | with open(file_path, "rb") as f: 11 | i = 1 12 | while True: 13 | data = f.read(part_bytes) 14 | if not len(data): 15 | break 16 | part = client.upload_part(Body=data, Bucket=bucket_name, Key=object_name, UploadId=upload_id, PartNumber=i) 17 | parts.append({"PartNumber": i, "ETag": part["ETag"]}) 18 | uploaded_bytes += len(data) 19 | i += 1 20 | 21 | client.complete_multipart_upload(Bucket=bucket_name, Key=object_name, UploadId=upload_id, 22 | MultipartUpload={"Parts": parts}) 23 | -------------------------------------------------------------------------------- /source/constructs/api/common/db_base_col.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from .constant import const 4 | 5 | 6 | def before_exec(conn, clause, multi_params, params): 7 | operator = os.getenv(const.USER, const.USER_DEFAULT_NAME) 8 | operation_time = datetime.utcnow() 9 | if str(clause).startswith('INSERT'): 10 | if multi_params: 11 | for current_params in multi_params: 12 | __add_insert_params(current_params, operator, operation_time) 13 | else: 14 | __add_insert_params(params, operator, operation_time) 15 | elif str(clause).startswith('UPDATE'): 16 | if multi_params: 17 | for current_params in multi_params: 18 | __add_update_params(current_params, operator, operation_time) 19 | else: 20 | __add_update_params(params, operator, operation_time) 21 | 22 | 23 | def __add_insert_params(params, operator, operation_time): 24 | params['create_by'] = operator 25 | params['create_time'] = operation_time 26 | 27 | 28 | def __add_update_params(params, default_operator, operation_time): 29 | params['modify_by'] = params['modify_by'] if ('modify_by' in params) else default_operator 30 | params['modify_time'] = operation_time 31 | -------------------------------------------------------------------------------- /source/constructs/api/common/log_formatter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | is_running_in_lambda = 'AWS_LAMBDA_FUNCTION_NAME' in os.environ 5 | 6 | 7 | class CustomFormatter(logging.Formatter): 8 | def format(self, record): 9 | if is_running_in_lambda: 10 | record.msg = str(record.msg).replace("\n", "\r") 11 | return super().format(record) 12 | -------------------------------------------------------------------------------- /source/constructs/api/common/reference_parameter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | import logging 4 | from common.constant import const 5 | 6 | logger = logging.getLogger(const.LOGGER_API) 7 | caller_identity = boto3.client('sts').get_caller_identity() 8 | admin_account_id = caller_identity.get('Account') 9 | admin_region = boto3.session.Session().region_name 10 | admin_bucket_name = os.getenv(const.ADMIN_BUCKET_NAME, f"{const.ADMIN_BUCKET_NAME_PREFIX}-{admin_account_id}-{admin_region}") 11 | partition = caller_identity['Arn'].split(':')[1] 12 | url_suffix = const.URL_SUFFIX_CN if partition == const.PARTITION_CN else '' 13 | public_account_id = const.PUBLIC_ACCOUNT_ID_CN if partition == const.PARTITION_CN else const.PUBLIC_ACCOUNT_ID_GLOBAL 14 | admin_subnet_ids = os.getenv('SubnetIds', '').split(',') 15 | -------------------------------------------------------------------------------- /source/constructs/api/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/config/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/config/crud.py: -------------------------------------------------------------------------------- 1 | import db.models_config as models 2 | from db.database import get_session 3 | 4 | 5 | def get_value(key: str) -> str: 6 | db_config = get_session().query(models.Config).filter(models.Config.config_key == key).first() 7 | if db_config is None: 8 | return None 9 | return db_config.config_value 10 | 11 | 12 | def set_value(key: str, value: str): 13 | session = get_session() 14 | size = session.query(models.Config).filter(models.Config.config_key == key).update({"config_value": value}) 15 | if size <= 0: 16 | db_config = models.Config(config_key=key, 17 | config_value=value) 18 | session.add(db_config) 19 | session.commit() 20 | 21 | 22 | def list_config(): 23 | return get_session().query(models.Config).all() 24 | -------------------------------------------------------------------------------- /source/constructs/api/config/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | from . import service, schemas 3 | from common.request_wrapper import inject_session 4 | from common.response_wrapper import BaseResponse 5 | 6 | router = APIRouter(prefix="/config", tags=["config"]) 7 | 8 | 9 | @router.get("", response_model=BaseResponse[list[schemas.ConfigBase]]) 10 | @inject_session 11 | def list_config(): 12 | return service.list_config() 13 | 14 | 15 | @router.post("") 16 | @inject_session 17 | def set_config(configs: list[schemas.ConfigBase]): 18 | return service.set_configs(configs) 19 | 20 | 21 | @router.get("/subnets", response_model=BaseResponse[list[schemas.SubnetInfo]]) 22 | @inject_session 23 | def list_subnets(): 24 | return service.list_subnets() 25 | -------------------------------------------------------------------------------- /source/constructs/api/config/schemas.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import BaseModel 3 | import db.models_config as models 4 | 5 | 6 | class ConfigBase(BaseModel): 7 | config_key: str 8 | config_value: str 9 | 10 | class Meta: 11 | orm_model = models.Config 12 | 13 | class Config: 14 | orm_mode = True 15 | 16 | 17 | class SubnetInfo(BaseModel): 18 | subnet_id: str 19 | name: Optional[str] 20 | available_ip_address_count: int 21 | -------------------------------------------------------------------------------- /source/constructs/api/config/service.py: -------------------------------------------------------------------------------- 1 | from . import crud,schemas 2 | import boto3 3 | from common.reference_parameter import admin_subnet_ids 4 | 5 | 6 | def set_config(key: str, value: str): 7 | crud.set_value(key, value) 8 | 9 | 10 | def get_config(key: str, default_value=None) -> str: 11 | _value = crud.get_value(key) 12 | if _value: 13 | return _value 14 | if default_value: 15 | return default_value 16 | return None 17 | 18 | 19 | def list_config(): 20 | return crud.list_config() 21 | 22 | 23 | def set_configs(configs: list[schemas.ConfigBase]): 24 | for config in configs: 25 | set_config(config.config_key, config.config_value) 26 | 27 | 28 | def list_subnets(): 29 | ec2_client = boto3.client('ec2') 30 | response = ec2_client.describe_subnets(SubnetIds=admin_subnet_ids) 31 | subnet_infos = [] 32 | for subnet in response['Subnets']: 33 | subnet_info = schemas.SubnetInfo(subnet_id=subnet['SubnetId'], 34 | name=__get_name(subnet['Tags']), 35 | available_ip_address_count=subnet['AvailableIpAddressCount']) 36 | subnet_infos.append(subnet_info) 37 | return subnet_infos 38 | 39 | 40 | def __get_name(tags: list) -> str: 41 | for tag in tags: 42 | if tag.get("Key") == "Name": 43 | return tag.get("Value") 44 | return None 45 | 46 | 47 | -------------------------------------------------------------------------------- /source/constructs/api/data_source/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/data_source/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/data_source/resource_list.py: -------------------------------------------------------------------------------- 1 | from common.enum import DatabaseType 2 | from common.abilities import convert_database_type_2_provider 3 | from common.query_condition import QueryCondition 4 | from . import crud 5 | 6 | 7 | def list_resources_by_database_type(database_type: str, account_id: str = None, region: str = None, condition: QueryCondition = None): 8 | if database_type == DatabaseType.S3.value: 9 | return crud.list_s3_resources(account_id, region, condition) 10 | elif database_type == DatabaseType.RDS.value: 11 | return crud.list_rds_resources(account_id, region, condition) 12 | elif database_type == DatabaseType.GLUE.value: 13 | return crud.list_glue_resources(account_id, region, condition) 14 | else: 15 | return crud.list_jdbc_resources_by_provider(convert_database_type_2_provider(database_type), account_id, region, condition) 16 | -------------------------------------------------------------------------------- /source/constructs/api/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/db/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/db/database.py: -------------------------------------------------------------------------------- 1 | import os 2 | import threading 3 | from sqlalchemy import event, create_engine 4 | from sqlalchemy.orm import sessionmaker, Session 5 | import boto3 6 | import json 7 | from common.constant import const 8 | from sqlalchemy.ext.declarative import declarative_base 9 | from common.db_base_col import before_exec 10 | 11 | secret_id = os.getenv("SecretId", const.SOLUTION_NAME) 12 | secrets_client = boto3.client('secretsmanager') 13 | secret_response = secrets_client.get_secret_value(SecretId=secret_id) 14 | secrets = json.loads(secret_response['SecretString']) 15 | SQLALCHEMY_DATABASE_URL = f"mysql+pymysql://{secrets['username']}:{secrets['password']}@{secrets['host']}:{secrets['port']}/{secrets['dbname']}" 16 | 17 | engine = create_engine(SQLALCHEMY_DATABASE_URL, echo=False, pool_size=1, max_overflow=0) 18 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) 19 | Base = declarative_base() 20 | 21 | event.listen(engine, "before_execute", before_exec) 22 | 23 | local_session = threading.local() 24 | 25 | 26 | def gen_session(): 27 | session = SessionLocal() 28 | local_session.currentSession = session 29 | 30 | 31 | def get_session() -> Session: 32 | return local_session.currentSession 33 | 34 | 35 | def close_session(): 36 | local_session.currentSession.close() 37 | -------------------------------------------------------------------------------- /source/constructs/api/db/models_config.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy as sa 2 | from sqlalchemy.ext.declarative import declarative_base 3 | 4 | 5 | Base = declarative_base() 6 | 7 | 8 | class Config(Base): 9 | 10 | __tablename__ = 'config' 11 | 12 | id = sa.Column(sa.Integer(), primary_key=True) 13 | config_key = sa.Column(sa.String(50), nullable=False, unique=True) 14 | config_value = sa.Column(sa.String(1000)) 15 | version = sa.Column(sa.Integer()) 16 | create_by = sa.Column(sa.String(255)) 17 | create_time = sa.Column(sa.DateTime()) 18 | modify_by = sa.Column(sa.String(255)) 19 | modify_time = sa.Column(sa.DateTime()) 20 | -------------------------------------------------------------------------------- /source/constructs/api/db/models_label.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy as sa 2 | from db.database import Base 3 | 4 | 5 | class Label(Base): 6 | __tablename__ = 'label' 7 | 8 | id = sa.Column(sa.Integer(), primary_key=True) 9 | label_name = sa.Column(sa.String(40), nullable=False, info={'searchable': True}) 10 | classification = sa.Column(sa.String(20), nullable=False) 11 | type = sa.Column(sa.String(20), nullable=False) 12 | style_type = sa.Column(sa.String(20), nullable=False) 13 | style_value = sa.Column(sa.String(20), nullable=False) 14 | state = sa.Column(sa.String(20)) 15 | version = sa.Column(sa.Integer()) 16 | create_by = sa.Column(sa.String(255)) 17 | create_time = sa.Column(sa.DateTime()) 18 | modify_by = sa.Column(sa.String(255)) 19 | modify_time = sa.Column(sa.DateTime()) 20 | -------------------------------------------------------------------------------- /source/constructs/api/db/models_version.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy as sa 2 | from sqlalchemy.ext.declarative import declarative_base 3 | 4 | 5 | Base = declarative_base() 6 | 7 | 8 | class Version(Base): 9 | 10 | __tablename__ = 'version' 11 | 12 | id = sa.Column(sa.Integer(), primary_key=True) 13 | value = sa.Column(sa.String(100), nullable=False) 14 | description = sa.Column(sa.String(1000)) 15 | version = sa.Column(sa.Integer()) 16 | create_by = sa.Column(sa.String(255)) 17 | create_time = sa.Column(sa.DateTime()) 18 | modify_by = sa.Column(sa.String(255)) 19 | modify_time = sa.Column(sa.DateTime()) 20 | -------------------------------------------------------------------------------- /source/constructs/api/discovery_job/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/discovery_job/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/label/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/label/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/lambda/auto_sync_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import boto3 4 | from common.enum import AutoSyncDataAction, Provider 5 | from data_source.service import delete_account 6 | from common.reference_parameter import logger, admin_region, partition 7 | from botocore.exceptions import ClientError 8 | from common.constant import const 9 | 10 | client_sts = boto3.client('sts') 11 | 12 | 13 | def __check_role(agent_account_id: str) -> bool: 14 | try: 15 | client_sts.assume_role( 16 | RoleArn=f'arn:{partition}:iam::{agent_account_id}:role/{const.SOLUTION_NAME}RoleForAdmin-{admin_region}', 17 | RoleSessionName="CheckRole" 18 | ) 19 | return True 20 | except ClientError as e: 21 | if e.response['Error']['Code'] != 'AccessDenied': 22 | logger.info(e) 23 | return False 24 | 25 | 26 | def sync_data(input_event): 27 | if input_event["Action"] == AutoSyncDataAction.DELETE_ACCOUNT.value: 28 | agent_account_id = input_event["AccountID"] 29 | # Wait for agent's role is deleted 30 | for i in range(0, 10): 31 | logger.info(f"Check time:{i}") 32 | if __check_role(agent_account_id): 33 | time.sleep(30) 34 | else: 35 | break 36 | delete_account(Provider.AWS_CLOUD.value, agent_account_id, None) 37 | -------------------------------------------------------------------------------- /source/constructs/api/lambda/crawler_event.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import boto3 4 | 5 | logger = logging.getLogger("api") 6 | logger.setLevel(logging.INFO) 7 | 8 | sqs = boto3.client('sqs') 9 | caller_identity = boto3.client('sts').get_caller_identity() 10 | partition = caller_identity['Arn'].split(':')[1] 11 | url_suffix = '.cn' if partition == 'aws-cn' else '' 12 | solution_name = os.getenv('SolutionName') 13 | admin_account_id = os.getenv('AdminAccountId') 14 | queue_url = f"https://sqs.{os.getenv('AWS_REGION')}.amazonaws.com{url_suffix}/{admin_account_id}/{solution_name}-Crawler" 15 | 16 | 17 | def lambda_handler(event, context): 18 | logger.info(event) 19 | if 'detail' in event and 'crawlerName' in event['detail']: 20 | crawler_name = event['detail']['crawlerName'] 21 | if crawler_name.startswith(solution_name + "-"): 22 | message = sqs.send_message( 23 | QueueUrl=queue_url, 24 | MessageBody=str(event)) 25 | else: 26 | logger.info(f"crawler event not send msg because of crawler_name not valid :{event}") 27 | elif 'detail' in event and 'databaseName' in event['detail']: 28 | # customer type : if glue database/unstructured , does not have real glue connection and crawler 29 | message = sqs.send_message( 30 | QueueUrl=queue_url, 31 | MessageBody=str(event)) 32 | return { 33 | 'statusCode': 200 34 | } 35 | -------------------------------------------------------------------------------- /source/constructs/api/lambda/forward_message.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | import os 4 | 5 | logger = logging.getLogger('api') 6 | logger.setLevel(logging.INFO) 7 | admin_region = os.getenv("AdminRegion", "cn-northwest-1") 8 | sqs = boto3.resource('sqs', region_name=admin_region) 9 | 10 | 11 | def lambda_handler(event, context): 12 | for record in event['Records']: 13 | event_source_arn = record["eventSourceARN"] 14 | queue_name = event_source_arn[event_source_arn.rfind(":") + 1:] 15 | payload = record["body"] 16 | logger.info(queue_name) 17 | logger.info(payload) 18 | forward_message(queue_name, payload) 19 | 20 | 21 | def forward_message(queue_name: str, message: str): 22 | queue = sqs.get_queue_by_name(QueueName=queue_name) 23 | queue.send_message(MessageBody=message) 24 | -------------------------------------------------------------------------------- /source/constructs/api/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,api 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=normalFormatter 9 | 10 | [logger_root] 11 | level=INFO 12 | handlers= 13 | 14 | [logger_api] 15 | level=DEBUG 16 | handlers=consoleHandler 17 | qualname=api 18 | 19 | [handler_consoleHandler] 20 | class=StreamHandler 21 | formatter=normalFormatter 22 | args=(sys.stdout,) 23 | 24 | [formatter_normalFormatter] 25 | class=common.log_formatter.CustomFormatter 26 | format=%(asctime)s [%(levelname)s] %(filename)s %(funcName)s() L%(lineno)-4d %(message)s 27 | -------------------------------------------------------------------------------- /source/constructs/api/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -v 3 | testpaths = pytest -------------------------------------------------------------------------------- /source/constructs/api/pytest/test_auth.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import pytest 5 | from main import app 6 | from fastapi.testclient import TestClient 7 | from unittest.mock import Mock 8 | 9 | 10 | @pytest.fixture 11 | def mock_online_validate(mocker): 12 | mock = Mock() 13 | mocker.patch('main.__online_validate', return_value=mock) 14 | return mock 15 | 16 | 17 | def test_auth_without_token(): 18 | client = TestClient(app) 19 | response = client.get("/version/get-latest-version") 20 | assert response.status_code == 200 21 | assert response.json() == {"code": 1003, "message": "Invalid token", "ref": None, "status": "fail"} 22 | 23 | 24 | def test_auth_with_token(mocker): 25 | mocker.patch('main.__online_validate', return_value=True) 26 | mocker.patch('main.jwt.get_unverified_claims', return_value={"username":"fake_user"}) 27 | mocker.patch('version.service.get_latest_version', return_value="0.0.0") 28 | client = TestClient(app) 29 | response = client.get("/version/get-latest-version", headers={"authorization": "Bearer fake_token"}) 30 | assert response.status_code == 200 31 | assert response.json() == {'status': 'success', 'code': 1001, 'message': 'Operation succeeded', 'data': '0.0.0'} -------------------------------------------------------------------------------- /source/constructs/api/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.28.70 2 | pytz==2023.3 3 | fastapi==0.109.2 4 | mangum==0.17.0 5 | sqlalchemy==1.4.44 6 | fastapi-pagination==0.12.11 7 | openpyxl==3.0.10 8 | pymysql==1.0.2 9 | sqlakeyset==1.0.1659142803 10 | requests==2.31.0 11 | urllib3==1.26.18 12 | python-jose==3.3.0 13 | pydantic==1.10.13 14 | python_multipart==0.0.6 -------------------------------------------------------------------------------- /source/constructs/api/template/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/template/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/tools/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/tools/list_tool.py: -------------------------------------------------------------------------------- 1 | def compare(list1: list, list2: list): 2 | sorted_list1 = sorted(list1) 3 | sorted_list2 = sorted(list2) 4 | return sorted_list1 == sorted_list2 5 | -------------------------------------------------------------------------------- /source/constructs/api/tools/mytime.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | 4 | def get_time() -> str: 5 | return format_time(datetime.utcnow()) 6 | 7 | 8 | def get_date() -> str: 9 | return datetime.strftime(datetime.utcnow(), '%Y-%m-%d') 10 | 11 | 12 | def format_time(in_time: datetime) -> str: 13 | return datetime.strftime(in_time, '%Y-%m-%d %H:%M:%S') 14 | 15 | 16 | def parse_time(in_str: str) -> datetime: 17 | return datetime.strptime(in_str, '%Y-%m-%d %H:%M:%S') 18 | 19 | 20 | def get_now() -> datetime: 21 | return datetime.utcnow() 22 | -------------------------------------------------------------------------------- /source/constructs/api/tools/pydantic_tool.py: -------------------------------------------------------------------------------- 1 | def is_pydantic(obj: object): 2 | """Checks whether an object is pydantic.""" 3 | return type(obj).__class__.__name__ == "ModelMetaclass" 4 | 5 | 6 | def parse_pydantic_schema(schema): 7 | """ 8 | Iterates through pydantic schema and parses nested schemas 9 | to a dictionary containing SQLAlchemy models. 10 | Only works if nested schemas have specified the Meta.orm_model. 11 | """ 12 | parsed_schema = dict(schema) 13 | for key, value in parsed_schema.items(): 14 | try: 15 | if isinstance(value, list) and len(value): 16 | if is_pydantic(value[0]): 17 | parsed_schema[key] = [schema.Meta.orm_model(**schema.dict()) for schema in value] 18 | else: 19 | if is_pydantic(value): 20 | parsed_schema[key] = value.Meta.orm_model(**value.dict()) 21 | except AttributeError: 22 | raise AttributeError("Found nested Pydantic model but Meta.orm_model was not specified.") 23 | return parsed_schema -------------------------------------------------------------------------------- /source/constructs/api/version/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/api/version/__init__.py -------------------------------------------------------------------------------- /source/constructs/api/version/crud.py: -------------------------------------------------------------------------------- 1 | from db import models_version as models 2 | from db.database import get_session 3 | from sqlalchemy import desc 4 | 5 | 6 | 7 | def get_latest_version(): 8 | return get_session().query( 9 | models.Version.value).order_by(models.Version.create_by.desc()).first()[0] 10 | -------------------------------------------------------------------------------- /source/constructs/api/version/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | from common.request_wrapper import inject_session 3 | from common.response_wrapper import BaseResponse 4 | from version import service 5 | 6 | router = APIRouter(prefix="/version", tags=["version"]) 7 | 8 | @router.get("/get-latest-version", 9 | response_model=BaseResponse[str]) 10 | @inject_session 11 | def get_latest_version(): 12 | return service.get_latest_version() 13 | -------------------------------------------------------------------------------- /source/constructs/api/version/service.py: -------------------------------------------------------------------------------- 1 | from version import crud 2 | 3 | def get_latest_version(): 4 | return crud.get_latest_version() 5 | -------------------------------------------------------------------------------- /source/constructs/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node bin/main.ts", 3 | "context": { 4 | "solution_id": "SO8031", 5 | "solution_name": "Sensitive Data Protection", 6 | "solution_version": "0.1.0" 7 | } 8 | } -------------------------------------------------------------------------------- /source/constructs/config/batch_create/datasource/template/batch_create_jdbc_datasource-cn.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/config/batch_create/datasource/template/batch_create_jdbc_datasource-cn.xlsx -------------------------------------------------------------------------------- /source/constructs/config/batch_create/datasource/template/batch_create_jdbc_datasource-en.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/config/batch_create/datasource/template/batch_create_jdbc_datasource-en.xlsx -------------------------------------------------------------------------------- /source/constructs/config/batch_create/identifier/template/batch_create_identifier-cn.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/config/batch_create/identifier/template/batch_create_identifier-cn.xlsx -------------------------------------------------------------------------------- /source/constructs/config/batch_create/identifier/template/batch_create_identifier-en.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/config/batch_create/identifier/template/batch_create_identifier-en.xlsx -------------------------------------------------------------------------------- /source/constructs/config/job/script/job_extra_files.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/constructs/config/job/script/job_extra_files.zip -------------------------------------------------------------------------------- /source/constructs/lib/admin/acm/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 3 | cryptography==42.0.4 -------------------------------------------------------------------------------- /source/constructs/lib/admin/cognito-post/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.0.0-1.0.1/20_update.sql: -------------------------------------------------------------------------------- 1 | alter table discovery_job 2 | add exclude_keywords varchar(1000) null after overwrite; 3 | 4 | alter table discovery_job_database 5 | add table_name varchar(1000) null after database_name; 6 | 7 | alter table discovery_job_run 8 | add exclude_keywords varchar(1000) null after template_snapshot_no; 9 | 10 | alter table discovery_job_run_database 11 | add table_name varchar(1000) null after database_name; 12 | 13 | alter table template_identifier 14 | add exclude_keywords varchar(1024) null after header_keywords; -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.0.0-1.0.1/99_version.sql: -------------------------------------------------------------------------------- 1 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.0.1','upgrade install','System',now(),'System',now()); -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.0.1-1.0.2/20_update.sql: -------------------------------------------------------------------------------- 1 | alter table discovery_job_run_database 2 | add error_log text null after log; 3 | 4 | update discovery_job_run_database set error_log = log; -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.0.1-1.0.2/99_version.sql: -------------------------------------------------------------------------------- 1 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.0.2','upgrade install','System',now(),'System',now()); -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.0.x-1.1.0/99_version.sql: -------------------------------------------------------------------------------- 1 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.1.0','upgrade install','System',now(),'System',now()); -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.1.0-1.1.2/20_update.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO config (config_key, config_value) VALUES ('ConcurrentRunJobNumber','10'); 2 | INSERT INTO config (config_key, config_value) VALUES ('SubJobNumberS3','80'); 3 | INSERT INTO config (config_key, config_value) VALUES ('SubJobNumberRds','3'); 4 | 5 | alter table discovery_job_database modify account_id varchar(20) null; 6 | alter table discovery_job_database modify region varchar(20) null; 7 | alter table discovery_job_database modify database_type varchar(20) null; 8 | alter table discovery_job_database modify database_name varchar(255) null; 9 | 10 | alter table catalog_database_level_classification add url varchar(2048) default null after database_name; 11 | alter table catalog_database_level_classification add description varchar(2048) default null after database_name; 12 | -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/1.1.0-1.1.2/99_version.sql: -------------------------------------------------------------------------------- 1 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.1.2','upgrade install','System',now(),'System',now()); -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/requirements.txt: -------------------------------------------------------------------------------- 1 | PyMySQL==1.0.2 2 | requests==2.31.0 3 | urllib3==1.26.18 -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/whole/00_db.sql: -------------------------------------------------------------------------------- 1 | alter database sdps default character set utf8mb4; -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/whole/01_config.sql: -------------------------------------------------------------------------------- 1 | create table config 2 | ( 3 | id int auto_increment 4 | primary key, 5 | config_key varchar(50) not null, 6 | config_value varchar(1000) null, 7 | version int null, 8 | create_by varchar(255) null, 9 | create_time datetime null, 10 | modify_by varchar(255) null, 11 | modify_time datetime null, 12 | constraint config_key_uindex 13 | unique (config_key) 14 | ); -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/whole/01_version.sql: -------------------------------------------------------------------------------- 1 | create table version 2 | ( 3 | id int auto_increment 4 | primary key, 5 | value varchar(100) not null, 6 | description varchar(1000) null, 7 | version int null, 8 | create_by varchar(255) null, 9 | create_time datetime null, 10 | modify_by varchar(255) null, 11 | modify_time datetime null 12 | ); 13 | 14 | -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/whole/10_label.sql: -------------------------------------------------------------------------------- 1 | create table label 2 | ( 3 | id int auto_increment primary key, 4 | label_name varchar(40) not null, 5 | classification varchar(20) not null, 6 | type varchar(20) not null, 7 | style_type varchar(20) not null, 8 | style_value varchar(20) not null, 9 | state varchar(20) not null, 10 | version int null, 11 | create_by varchar(255) null, 12 | create_time datetime null, 13 | modify_by varchar(255) null, 14 | modify_time datetime null, 15 | UNIQUE KEY unq_idx(classification,type,label_name), 16 | key sort_idx(modify_time) 17 | ); 18 | 19 | -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/whole/11_account.sql: -------------------------------------------------------------------------------- 1 | create table account 2 | ( 3 | id int auto_increment primary key, 4 | source_id int, 5 | name varchar(32) null, 6 | version int null, 7 | create_by varchar(255) null, 8 | create_time timestamp null, 9 | modify_by varchar(255) null, 10 | modify_time timestamp null 11 | ); 12 | -------------------------------------------------------------------------------- /source/constructs/lib/admin/database/whole/99_version.sql: -------------------------------------------------------------------------------- 1 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.0.0','whole install','System',now(),'System',now()); 2 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.0.1','whole install','System',now(),'System',now()); 3 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.0.2','whole install','System',now(),'System',now()); 4 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.1.0','whole install','System',now(),'System',now()); 5 | insert into version (value,description,create_by,create_time,modify_by,modify_time) values ('1.1.2','whole install','System',now(),'System',now()); -------------------------------------------------------------------------------- /source/constructs/lib/admin/delete-resources/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 -------------------------------------------------------------------------------- /source/constructs/lib/admin/glue/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 -------------------------------------------------------------------------------- /source/constructs/lib/admin/region/requirements.txt: -------------------------------------------------------------------------------- 1 | crhelper==2.0.11 -------------------------------------------------------------------------------- /source/constructs/lib/agent/delete-agent-resources/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 -------------------------------------------------------------------------------- /source/constructs/lib/agent/rename-resources/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 -------------------------------------------------------------------------------- /source/constructs/lib/agent/split-job/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urllib3==1.26.18 3 | pytz==2023.3 -------------------------------------------------------------------------------- /source/constructs/lib/common/build-config.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance 5 | * with the License. A copy of the License is located at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES 10 | * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions 11 | * and limitations under the License. 12 | */ 13 | 14 | export class BuildConfig { 15 | static InternetFacing = false; 16 | static PortalRepository = ''; 17 | static PortalTag = undefined; 18 | static PIP_PARAMETER = ''; 19 | } -------------------------------------------------------------------------------- /source/constructs/lib/common/parameter.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance 5 | * with the License. A copy of the License is located at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES 10 | * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions 11 | * and limitations under the License. 12 | */ 13 | 14 | export class Parameter { 15 | static paramGroups: any[] = []; 16 | static paramLabels: any = {}; 17 | 18 | public static init() { 19 | this.paramGroups = []; 20 | this.paramLabels = {}; 21 | } 22 | 23 | public static addToParamGroups(label: string, ...param: string[]) { 24 | this.paramGroups.push({ 25 | Label: { default: label }, 26 | Parameters: param, 27 | }); 28 | } 29 | 30 | public static addToParamLabels(label: string, param: string) { 31 | this.paramLabels[param] = { 32 | default: label, 33 | }; 34 | } 35 | } -------------------------------------------------------------------------------- /source/constructs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "alwaysStrict": true, 4 | "declaration": true, 5 | "esModuleInterop": true, 6 | "experimentalDecorators": true, 7 | "inlineSourceMap": true, 8 | "inlineSources": true, 9 | "lib": [ 10 | "es2019" 11 | ], 12 | "module": "CommonJS", 13 | "noEmitOnError": false, 14 | "noFallthroughCasesInSwitch": true, 15 | "noImplicitAny": true, 16 | "noImplicitReturns": true, 17 | "noImplicitThis": true, 18 | "noUnusedLocals": false, 19 | "noUnusedParameters": true, 20 | "resolveJsonModule": true, 21 | "strict": true, 22 | "strictNullChecks": true, 23 | "strictPropertyInitialization": true, 24 | "stripInternal": true, 25 | "target": "ES2019" 26 | }, 27 | "include": [ 28 | "**/*.ts" 29 | ], 30 | "exclude": [ 31 | "node_modules" 32 | ] 33 | } -------------------------------------------------------------------------------- /source/containers/document-pii-detection/Dockerfile: -------------------------------------------------------------------------------- 1 | # temp stage 2 | FROM python:3.12-slim-bullseye as builder 3 | 4 | # Install build dependencies 5 | RUN apt-get update && \ 6 | apt-get install -y --no-install-recommends gcc g++ 7 | 8 | # Install Python dependencies first 9 | COPY requirements.txt . 10 | RUN pip wheel --no-cache-dir --no-deps --wheel-dir /app/wheels -r requirements.txt 11 | 12 | # prod stage 13 | FROM python:3.12-slim-bullseye 14 | 15 | ARG FUNCTION_DIR="/opt/ml/code" 16 | 17 | WORKDIR ${FUNCTION_DIR} 18 | 19 | RUN apt-get update && apt-get install -y --no-install-recommends libmagic1 && rm -rf /var/lib/apt/lists/* 20 | 21 | COPY ocr_model/ ${FUNCTION_DIR}/ocr_model/ 22 | COPY fd_model/ ${FUNCTION_DIR}/fd_model/ 23 | COPY --from=builder /app/wheels ${FUNCTION_DIR}/wheels 24 | RUN pip install --no-cache ${FUNCTION_DIR}/wheels/* 25 | 26 | COPY parsers/ ${FUNCTION_DIR}/parsers/ 27 | COPY requirements.txt main.py parser_factory.py ${FUNCTION_DIR}/ 28 | 29 | 30 | CMD ["python3", "main.py"] -------------------------------------------------------------------------------- /source/containers/document-pii-detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/__init__.py -------------------------------------------------------------------------------- /source/containers/document-pii-detection/fd_model/det.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/fd_model/det.onnx -------------------------------------------------------------------------------- /source/containers/document-pii-detection/ocr_model/classifier.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/ocr_model/classifier.onnx -------------------------------------------------------------------------------- /source/containers/document-pii-detection/ocr_model/det_standard.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/ocr_model/det_standard.onnx -------------------------------------------------------------------------------- /source/containers/document-pii-detection/ocr_model/rec_standard.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/ocr_model/rec_standard.onnx -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parser_factory.py: -------------------------------------------------------------------------------- 1 | from parsers import PdfParser, TxtParser, DocParser, HtmlParser, EmailParser, ImageParser 2 | 3 | supported_file_types = { 4 | "document": [".docx", ".pdf"], 5 | "webpage": [".htm", ".html"], 6 | "email": [".eml"], 7 | "code": [".java", ".py", ".cpp", ".c", ".h", ".css", ".js", ".php", ".rb", ".swift", ".go", ".sql"], 8 | "text": [".txt", ".md", ".log"], 9 | "image": [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif"] 10 | } 11 | 12 | class ParserFactory: 13 | @staticmethod 14 | def create_parser(file_type, s3_client): 15 | if file_type.lower() in ['.pdf']: 16 | return PdfParser(s3_client=s3_client) 17 | elif file_type.lower() in supported_file_types['text'] or file_type.lower() in supported_file_types['code']: 18 | return TxtParser(s3_client=s3_client) 19 | elif file_type.lower() in ['.doc', '.docx']: 20 | return DocParser(s3_client=s3_client) 21 | elif file_type.lower() in ['.htm', '.html']: 22 | return HtmlParser(s3_client=s3_client) 23 | elif file_type.lower() in ['.eml']: 24 | return EmailParser(s3_client=s3_client) 25 | elif file_type.lower() in ['.jpg', '.jpeg', '.png', ".gif", ".bmp", ".tiff", ".tif"]: 26 | return ImageParser(s3_client=s3_client, fd_model_path='./fd_model/', 27 | ocr_model_path='./ocr_model/') 28 | else: 29 | raise ValueError('Unsupported file type') -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from .pdf_parser import PdfParser 2 | from .txt_parser import TxtParser 3 | from .doc_parser import DocParser 4 | from .html_parser import HtmlParser 5 | from .email_parser import EmailParser 6 | 7 | from .image_parser import ImageParser -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/doc_parser.py: -------------------------------------------------------------------------------- 1 | 2 | import docx 3 | import io 4 | from .parser import BaseParser 5 | 6 | class DocParser(BaseParser): 7 | def __init__(self, s3_client): 8 | super().__init__(s3_client=s3_client) 9 | 10 | def parse_file(self, doc_stream): 11 | """ 12 | Extracts text from a doc file and returns a string of content. 13 | """ 14 | 15 | doc = docx.Document(io.BytesIO(doc_stream)) 16 | file_content = "" 17 | for para in doc.paragraphs: 18 | file_content += para.text + "\n" 19 | 20 | prev_paragraph = "" 21 | for table in doc.tables: 22 | for row in table.rows: 23 | for cell in row.cells: 24 | for paragraph in cell.paragraphs: 25 | if paragraph.text != prev_paragraph: 26 | file_content += paragraph.text + "\n" 27 | prev_paragraph = paragraph.text 28 | 29 | return [file_content] 30 | -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/email_parser.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from .parser import BaseParser 4 | import quopri 5 | from email.parser import BytesParser as PyEmailParser 6 | import io 7 | import chardet 8 | 9 | class EmailParser(BaseParser): 10 | def __init__(self, s3_client): 11 | super().__init__(s3_client=s3_client) 12 | 13 | 14 | def parse_file(self, eml_stream): 15 | """ 16 | Extracts text from a eml file and returns a string of content. 17 | """ 18 | 19 | # file_encoding = self.get_encoding(eml_stream) 20 | result = chardet.detect(eml_stream) 21 | file_encoding = result['encoding'] 22 | 23 | # with open(eml_stream) as stream: 24 | parser = PyEmailParser() 25 | # message = parser.parsebytes(io.BytesIO(eml_stream)) 26 | 27 | file_content = [] 28 | eml_content = eml_stream.decode(file_encoding) # Decode the stream if needed 29 | msg = parser.parsebytes(eml_content.encode()) 30 | 31 | # text = "" 32 | if msg.is_multipart(): 33 | for part in msg.walk(): 34 | content_type = part.get_content_type() 35 | if content_type == "text/plain": 36 | file_content.append(part.get_payload(decode=True).decode()) 37 | else: 38 | file_content.append(msg.get_payload(decode=True).decode()) 39 | 40 | 41 | return ['\n'.join(file_content)] 42 | -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/image_analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/parsers/image_analysis/__init__.py -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/image_analysis/face_detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/parsers/image_analysis/face_detection/__init__.py -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/image_analysis/general_ocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/parsers/image_analysis/general_ocr/__init__.py -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/image_analysis/general_ocr/imaug/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | from .operators import * 6 | 7 | def transform(data, ops=None): 8 | """ transform """ 9 | if ops is None: 10 | ops = [] 11 | for op in ops: 12 | data = op(data) 13 | if data is None: 14 | return None 15 | return data 16 | 17 | 18 | def create_operators(op_param_list, global_config=None): 19 | """ 20 | create operators based on the config 21 | Args: 22 | params(list): a dict list, used to create some operators 23 | """ 24 | 25 | operator_mapping = { 26 | 'DecodeImage': DecodeImage, 27 | 'NormalizeImage': NormalizeImage, 28 | 'ToCHWImage': ToCHWImage, 29 | 'KeepKeys': KeepKeys, 30 | 'DetResizeForTest': DetResizeForTest 31 | } 32 | assert isinstance(op_param_list, list), ('operator config should be a list') 33 | ops = [] 34 | for operator in op_param_list: 35 | assert isinstance(operator, 36 | dict) and len(operator) == 1, "yaml format error" 37 | op_name = list(operator)[0] 38 | param = {} if operator[op_name] is None else operator[op_name] 39 | if global_config is not None: 40 | param.update(global_config) 41 | op = operator_mapping[op_name](**param) 42 | ops.append(op) 43 | return ops -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/image_analysis/general_ocr/postprocess/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import copy 7 | 8 | __all__ = ['build_post_process'] 9 | 10 | 11 | def build_post_process(config, global_config=None): 12 | from .db_postprocess import DBPostProcess 13 | from .rec_postprocess import CTCLabelDecode, AttnLabelDecode 14 | from .cls_postprocess import ClsPostProcess 15 | 16 | support_dict = [ 17 | 'DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess' 18 | ] 19 | 20 | support_dict = { 21 | 'DBPostProcess': DBPostProcess, 22 | 'CTCLabelDecode': CTCLabelDecode, 23 | 'AttnLabelDecode': AttnLabelDecode, 24 | 'ClsPostProcess': ClsPostProcess 25 | } 26 | 27 | config = copy.deepcopy(config) 28 | module_name = config.pop('name') 29 | if global_config is not None: 30 | config.update(global_config) 31 | assert module_name in support_dict, 'post process only support {}'.format(support_dict) 32 | module_class = support_dict[module_name](**config) 33 | return module_class -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/image_analysis/general_ocr/postprocess/cls_postprocess.py: -------------------------------------------------------------------------------- 1 | class ClsPostProcess(object): 2 | """ Convert between text-label and text-index """ 3 | 4 | def __init__(self, label_list, **kwargs): 5 | super(ClsPostProcess, self).__init__() 6 | self.label_list = label_list 7 | 8 | def __call__(self, preds, label=None, *args, **kwargs): 9 | pred_idxs = preds.argmax(axis=1) 10 | decode_out = [(self.label_list[idx], preds[i, idx]) 11 | for i, idx in enumerate(pred_idxs)] 12 | if label is None: 13 | return decode_out 14 | label = [(self.label_list[idx], 1.0) for idx in label] 15 | return decode_out, label -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/pdf_parser.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import boto3 4 | from pypdf import PdfReader 5 | from io import BytesIO 6 | 7 | from .parser import BaseParser 8 | 9 | class PdfParser(BaseParser): 10 | def __init__(self, s3_client): 11 | super().__init__(s3_client=s3_client) 12 | 13 | 14 | def parse_file(self, pdf_stream): 15 | """ 16 | Extracts text from a PDF file and returns a list of lines. 17 | """ 18 | 19 | # Create a PDF reader object 20 | pdf_reader = PdfReader(BytesIO(pdf_stream)) 21 | file_content = [] 22 | 23 | # Loop through each page in the PDF file 24 | for page_num in range(len(pdf_reader.pages)): 25 | page = pdf_reader.pages[page_num] 26 | 27 | # Extract the text from the page and append it to the string 28 | page_content = page.extract_text() 29 | file_content.append(page_content) 30 | 31 | # print(file_content) 32 | return file_content 33 | -------------------------------------------------------------------------------- /source/containers/document-pii-detection/parsers/txt_parser.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from .parser import BaseParser 4 | import quopri 5 | import chardet 6 | 7 | class TxtParser(BaseParser): 8 | def __init__(self, s3_client): 9 | super().__init__(s3_client=s3_client) 10 | 11 | def parse_file(self, txt_stream): 12 | """ 13 | Extracts text from a TXT file and returns a list of lines. 14 | """ 15 | 16 | # file_encoding = self.get_encoding(txt_stream) 17 | 18 | # Read the file 19 | # with open(txt_stream, 'rb') as file: 20 | # file_content_byte = file.read() 21 | result = chardet.detect(txt_stream) 22 | encoding = result['encoding'] 23 | file_content = txt_stream.decode(encoding) 24 | # print(file_content) 25 | # if file_encoding == 'us-ascii': 26 | # file_content = quopri.decodestring(file_content).decode('utf-8', errors='ignore') 27 | 28 | return [file_content] 29 | -------------------------------------------------------------------------------- /source/containers/document-pii-detection/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.34.63 2 | six==1.16.0 3 | numpy==1.26.4 4 | onnxruntime==1.17.1 5 | Pillow==10.3.0 6 | pyclipper==1.3.0.post5 7 | Shapely==2.0.3 8 | python-dateutil==2.8.2 9 | chardet==5.2.0 10 | pypdf==4.1.0 11 | python-magic==0.4.27 12 | python-docx==1.1.0 13 | bs4==0.0.2 14 | pandas==2.2.1 15 | pyarrow==15.0.2 16 | psutil==5.9.8 17 | opencv-python-headless==4.9.0.80 18 | -------------------------------------------------------------------------------- /source/containers/document-pii-detection/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/containers/document-pii-detection/utils.py -------------------------------------------------------------------------------- /source/portal/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true, 4 | "es2021": true 5 | }, 6 | "extends": [ 7 | "eslint:recommended", 8 | "plugin:@typescript-eslint/recommended", 9 | "react-app", 10 | "prettier" 11 | ], 12 | "overrides": [], 13 | "parserOptions": { 14 | "ecmaVersion": "latest", 15 | "sourceType": "module" 16 | }, 17 | "plugins": [ 18 | "react" 19 | ], 20 | "rules": { 21 | "eqeqeq": "warn", 22 | "react-hooks/exhaustive-deps": "off", 23 | "@typescript-eslint/no-explicit-any": "off" 24 | } 25 | } -------------------------------------------------------------------------------- /source/portal/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | 25 | aws-exports.json 26 | -------------------------------------------------------------------------------- /source/portal/.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "tabWidth": 2, 3 | "singleQuote": true 4 | } -------------------------------------------------------------------------------- /source/portal/.well-known/security.txt: -------------------------------------------------------------------------------- 1 | Contact: https://github.com/awslabs/sensitive-data-protection-on-aws 2 | Expires: 2023-12-31T18:37:07z -------------------------------------------------------------------------------- /source/portal/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM public.ecr.aws/docker/library/node:14.21.2 AS builder 3 | COPY . /tmp/frontend/ 4 | RUN cd /tmp/frontend/ && npm install && npm run build 5 | 6 | 7 | FROM public.ecr.aws/docker/library/nginx:1.23-alpine 8 | COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.6.0 /lambda-adapter /opt/extensions/lambda-adapter 9 | COPY --from=builder /tmp/frontend/build/ /usr/share/nginx/public/ 10 | COPY nginx-config/ /etc/nginx/ 11 | EXPOSE 8080 12 | RUN chmod +x /etc/nginx/start_nginx.sh 13 | ENTRYPOINT ["/etc/nginx/start_nginx.sh"] -------------------------------------------------------------------------------- /source/portal/config/jest/babelTransform.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const babelJest = require('babel-jest').default; 4 | 5 | const hasJsxRuntime = (() => { 6 | if (process.env.DISABLE_NEW_JSX_TRANSFORM === 'true') { 7 | return false; 8 | } 9 | 10 | try { 11 | require.resolve('react/jsx-runtime'); 12 | return true; 13 | } catch (e) { 14 | return false; 15 | } 16 | })(); 17 | 18 | module.exports = babelJest.createTransformer({ 19 | presets: [ 20 | [ 21 | require.resolve('babel-preset-react-app'), 22 | { 23 | runtime: hasJsxRuntime ? 'automatic' : 'classic', 24 | }, 25 | ], 26 | ], 27 | babelrc: false, 28 | configFile: false, 29 | }); 30 | -------------------------------------------------------------------------------- /source/portal/config/jest/cssTransform.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // This is a custom Jest transformer turning style imports into empty objects. 4 | // http://facebook.github.io/jest/docs/en/webpack.html 5 | 6 | module.exports = { 7 | process() { 8 | return 'module.exports = {};'; 9 | }, 10 | getCacheKey() { 11 | // The output is always the same. 12 | return 'cssTransform'; 13 | }, 14 | }; 15 | -------------------------------------------------------------------------------- /source/portal/config/jest/fileTransform.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const path = require('path'); 4 | const camelcase = require('camelcase'); 5 | 6 | // This is a custom Jest transformer turning file imports into filenames. 7 | // http://facebook.github.io/jest/docs/en/webpack.html 8 | 9 | module.exports = { 10 | process(src, filename) { 11 | const assetFilename = JSON.stringify(path.basename(filename)); 12 | 13 | if (filename.match(/\.svg$/)) { 14 | // Based on how SVGR generates a component name: 15 | // https://github.com/smooth-code/svgr/blob/01b194cf967347d43d4cbe6b434404731b87cf27/packages/core/src/state.js#L6 16 | const pascalCaseFilename = camelcase(path.parse(filename).name, { 17 | pascalCase: true, 18 | }); 19 | const componentName = `Svg${pascalCaseFilename}`; 20 | return `const React = require('react'); 21 | module.exports = { 22 | __esModule: true, 23 | default: ${assetFilename}, 24 | ReactComponent: React.forwardRef(function ${componentName}(props, ref) { 25 | return { 26 | $$typeof: Symbol.for('react.element'), 27 | type: 'svg', 28 | ref: ref, 29 | key: null, 30 | props: Object.assign({}, props, { 31 | children: ${assetFilename} 32 | }) 33 | }; 34 | }), 35 | };`; 36 | } 37 | 38 | return `module.exports = ${assetFilename};`; 39 | }, 40 | }; 41 | -------------------------------------------------------------------------------- /source/portal/config/webpack/persistentCache/createEnvironmentHash.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | const { createHash } = require('crypto'); 3 | 4 | module.exports = env => { 5 | const hash = createHash('md5'); 6 | hash.update(JSON.stringify(env)); 7 | 8 | return hash.digest('hex'); 9 | }; 10 | -------------------------------------------------------------------------------- /source/portal/nginx-config/start_nginx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wrote=false 4 | 5 | function write_domain_name(){ 6 | # In the lambda environment, except for tmp, everything is read-only 7 | wrote=true 8 | csp=" add_header Content-Security-Policy \"default-src 'self' $1; img-src 'self' blob: data: ; style-src 'self' blob: data:; font-src 'self' blob: data:; script-src 'self';\";" 9 | echo $csp > /tmp/CustomDomainName.conf 10 | } 11 | if [ -n "$CustomDomainName" ]; then 12 | write_domain_name $CustomDomainName 13 | else 14 | if [ -n "$OidcIssuer" ]; then 15 | # Due to the need to access external networks to obtain authorization_endpoint, the openid configuration is not parsed. 16 | domain_name=$(echo "$OidcIssuer" | sed -n 's/^\(.*\:\/\/\)\([^\/]*\).*/\2/p') 17 | build_in_domain_names="okta.com authing.cn amazoncognito.com amazonaws.com" 18 | IFS=' ' 19 | exist=false 20 | for build_in_domain_name in $build_in_domain_names; do 21 | if [[ $domain_name == *"$build_in_domain_name"* ]]; then 22 | exist=true 23 | break 24 | fi 25 | done 26 | if [ "$exist" = false ]; then 27 | sub_domain_name=$(echo "$domain_name" | awk -F'.' '{print $(NF-1)"."$NF}') 28 | wildcard="*.$sub_domain_name" 29 | write_domain_name $wildcard 30 | fi 31 | fi 32 | fi 33 | if [ "$wrote" = false ]; then 34 | write_domain_name "*.okta.com *.authing.cn *.amazoncognito.com *.amazonaws.com" 35 | fi 36 | nginx -g "daemon off;" 37 | -------------------------------------------------------------------------------- /source/portal/public/aws-exports.json: -------------------------------------------------------------------------------- 1 | { 2 | "aws_project_region": "", 3 | "aws_api_endpoint": "/api", 4 | "aws_authenticationType": "AUTH_TYPE.OPENID_CONNECT", 5 | "aws_oidc_issuer": "https://dev-43241832.okta.com", 6 | "aws_oidc_client_id": "0oa7liej6xUztllZL5d7", 7 | "aws_oidc_customer_domain": "https://sdps.nowfox.com/logincallback", 8 | "aws_alb_url": "", 9 | "aws_cognito_region": "", 10 | "aws_user_pools_id": "", 11 | "aws_user_pools_web_client_id": "", 12 | "version": "v1.0.0", 13 | "backend_url": "https://sdps-dev.demo.solutions.aws.a2z.org.cn:444", 14 | "expired": 12 15 | } 16 | -------------------------------------------------------------------------------- /source/portal/public/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/sensitive-data-protection-on-aws/03c90b9e2d464d014fa2b3d3c955af136f0aa3fd/source/portal/public/icon.png -------------------------------------------------------------------------------- /source/portal/public/locales/en/catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "privacy": "Privacy: ", 3 | "customLabels": "Custom labels: ", 4 | "addToTable": "Add to Table", 5 | "dataCatalog": "Data catalog: ", 6 | "browserCatalog": "Browse data catalogs", 7 | "browserCatalogDesc": "The platform connects to data source to create and update data catalogs. A data catalog is a repository of metadata of data sources.", 8 | "detail": { 9 | "filterCatalogs": "Filter catalogs", 10 | "identifier": "Identifiers: ", 11 | "selectIdentifier": "Select identifier from template", 12 | "noDataIdentifier": "No data identfiers", 13 | "noDataIdentifierDesc": "No data in this catalog was identfied as sensitive data." 14 | }, 15 | "list": { 16 | "filterDataCatalog": "Filter data catalogs" 17 | }, 18 | "modal": { 19 | "s3BucketDetail": "S3 bucket details", 20 | "rdsInstanceDetail": "RDS instance details", 21 | "glueDetail": "Glue catalog details", 22 | "jdbcDetail": "Custom database (JDBC) Details", 23 | "customLabel": "Custom labels: " 24 | }, 25 | "schemaModal": { 26 | "detailDescInfo": "These files are structured files and were detected to have the same schema", 27 | "detailDescHeader": "These are 10 sampled data entries of this table.", 28 | "schema": "Schema", 29 | "dataPreview": "Data preview", 30 | "folderDetails": "Folder details", 31 | "tableDetails": "Table details" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /source/portal/public/locales/en/timeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeline":"Timeline", 3 | "versionTimeline":"SDPS VERSIONS TIMELINES", 4 | "publishTime": "Publish time: ", 5 | "exportTemplates": "Export Template" 6 | } -------------------------------------------------------------------------------- /source/portal/public/locales/zh/catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "privacy": "隐私: ", 3 | "customLabels": "自定义标签: ", 4 | "addToTable": "添加到表", 5 | "dataCatalog": "数据目录: ", 6 | "browserCatalog": "浏览数据目录", 7 | "browserCatalogDesc": "该平台连接到数据源以创建和更新数据目录。数据目录是数据源元数据的存储库。", 8 | "detail": { 9 | "filterCatalogs": "筛选目录", 10 | "identifier": "标识符: ", 11 | "selectIdentifier": "从模版中请选择标识符", 12 | "noDataIdentifier": "没有数据标识符", 13 | "noDataIdentifierDesc": "此目录中没有数据被识别为敏感数据。" 14 | }, 15 | "list": { 16 | "filterDataCatalog": "筛选数据目录" 17 | }, 18 | "modal": { 19 | "s3BucketDetail": "S3 存储桶详细信息", 20 | "rdsInstanceDetail": "RDS 实例详细信息", 21 | "glueDetail": "Glue 目录详细信息", 22 | "jdbcDetail": "自定义数据库 (JDBC) 详细信息", 23 | "customLabel": "自定义标签: " 24 | }, 25 | "schemaModal": { 26 | "detailDescInfo": "这些文件是结构化文件,被检测出具有相同的架构", 27 | "detailDescHeader": "这是此表的 10 个采样数据条目。", 28 | "schema": "架构", 29 | "dataPreview": "数据预览", 30 | "folderDetails": "文件夹详情", 31 | "tableDetails": "表格详情" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /source/portal/public/locales/zh/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "summary": { 3 | "desc": "敏感数据保护方案可让企业客户创建数据目录,跨多个AWS账户发现、保护和可视化敏感数据。", 4 | "solutionDo": "解决方案的功能", 5 | "getStarted": "开始使用", 6 | "archOfSolution": "解决方案的技术架构" 7 | }, 8 | "connect": { 9 | "desc": "使用敏感数据保护的第一步是接入AWS账户。接入后,您可以单击特定的AWS账户并查看其数据源(S3、RDS)。", 10 | "addAWSAccount": "如何添加AWS账户" 11 | }, 12 | "catalog": { 13 | "desc": "数据目录是数据源(Amazon S3、Amazon RDS)元数据的存储库。使用数据目录,您可以查看数据的列级信息。", 14 | "catalogSync": "何时同步数据目录", 15 | "useLabels": "如何在数据目录中使用标签" 16 | }, 17 | "identifier": { 18 | "desc": "数据标识符是用于检查敏感数据的规则。在此网页上,您可以管理所有标识符。您可以查看内置标识符或使用正则表达式或关键词创建自己的标识符。", 19 | "howToCreate": "如何创建自定义数据标识符", 20 | "builtInList": "内置数据标识符列表" 21 | }, 22 | "template": { 23 | "desc": "模板是数据标识符的集合。您可以通过添加数据标识符来定义模板中的敏感数据。模板将用于敏感数据发现作业。", 24 | "addToTemplate": "如何在模板中添加数据标识符" 25 | }, 26 | "runjob": { 27 | "desc": "敏感数据发现作业由一个或多个AWS Glue作业组成,用于实际数据检测。", 28 | "jobSettings": "创建作业及设置", 29 | "howToDownload": "如何下载报告", 30 | "rerunJob": "如何下载报告", 31 | "howToPause": "如何暂停/继续作业" 32 | }, 33 | "jobDetail": { 34 | "desc": "敏感数据发现作业由一个或多个AWS Glue作业组成,用于实际数据检测。此页面显示不同AWS账户上的所有Glue作业执行情况。", 35 | "jobDetailPages": "更多作业详细信息" 36 | }, 37 | "account": { 38 | "desc": "您可以手动添加 或(通过AWS组织)自动添加AWS账户。", 39 | "manualAdd": "如何手动添加独立的AWS账户", 40 | "batchAdd": "如何自动通过AWS组织添加批量AWS账户" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /source/portal/public/locales/zh/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtInIdentifier": "内置数据标识符", 3 | "customIdentifier": "自定义数据标识符", 4 | "filterByNameOrDesc": "按名称或描述筛选", 5 | "filterDataIndentifier": "筛选数据标识符", 6 | "dataIdentifier": "数据标识符", 7 | "addDataIdentifier": "添加数据标识符", 8 | "dataIdentifierInThisTmpl": "此模板中的数据标识符", 9 | "dataIdentifierInThisTmplDesc": "此模板中的标识符将用于敏感数据发现作业。", 10 | "lastUpdated": "上次更新时间 ", 11 | "howItWorks": "它是如何工作的", 12 | "defineTmplPrivacy": "定义隐私分类模板。", 13 | "platFormWillLabel": "该平台将使用隐私标签标记数据目录 ", 14 | "dataMatches": "如果数据与启用的数据标识符的规则相匹配。", 15 | "dataScaned": "如果已扫描数据,但未与任何已启用的数据标识符匹配。", 16 | "dataNeverScanned": "如果从未扫描过数据,则隐私标签将显示 “N/A”", 17 | "defineClassification": "定义分类模板", 18 | "defineClassificationDesc": "分类模板包含一组数据标识符。敏感数据发现作业使用分类模板来识别敏感数据。 ", 19 | "manageDataIdentifier": "管理数据识别规则", 20 | "manageDataIdentifierDesc": "数据标识符是发现敏感数据的规则。 ", 21 | "deleteDataIdentifier": "删除数据标识符", 22 | "removeDataIdentifier": "从模板中删除此数据标识符", 23 | "deleteIdentifierTips": "永久删除此数据标识符?此操作无法撤消。", 24 | "deleteIdentifierFromTemplateTipsA": "你确定从模版移除下面", 25 | "deleteIdentifierFromTemplateTipsB": "条数据标识符?", 26 | "identifierBeingUsed": "此标识符用于以下地方", 27 | "classficationTemplate": "分类模板,单击 ", 28 | "dataCatalog": "数据目录 ", 29 | "dataS3": "数据目录: S3 存储桶 ", 30 | "dataRDS": "数据目录: RDS 实例 ", 31 | "dataGlue": "数据目录: AWS Glue ", 32 | "dataJDBC": "数据目录: 自定义数据库 (JDBC) ", 33 | "removeTheseAndDelete": "请删除这些标识符,然后删除", 34 | "toAvoidTips": "为避免意外删除,我们要求您提供额外的书面同意。", 35 | "typeConfirm": "输入 “确认” 即表示同意" 36 | } 37 | -------------------------------------------------------------------------------- /source/portal/public/locales/zh/timeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeline":"时间线", 3 | "versionTimeline":"SDPS 版本发布时间线", 4 | "publishTime": "发布时间: ", 5 | "exportTemplates": "导出安装模版" 6 | } -------------------------------------------------------------------------------- /source/portal/public/logos/3.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /source/portal/public/logos/5.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /source/portal/public/logos/6.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /source/portal/public/logos/source/glue.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /source/portal/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /source/portal/src/App.test.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { render, screen } from '@testing-library/react'; 3 | import App from './App'; 4 | 5 | test('renders learn react link', () => { 6 | render(); 7 | const linkElement = screen.getByText(/learn react/i); 8 | expect(linkElement).toBeInTheDocument(); 9 | }); 10 | -------------------------------------------------------------------------------- /source/portal/src/apis/account-manager/api.ts: -------------------------------------------------------------------------------- 1 | import { apiRequest } from 'tools/apiRequest'; 2 | 3 | // 获取Account list 4 | const getAccountList = async (params: any) => { 5 | const result = await apiRequest('post', 'data-source/list-account', params); 6 | return result; 7 | }; 8 | 9 | // 添加账号 10 | const addAccount = async (params: any) => { 11 | const result = await apiRequest('post', 'data-source/add_account', params); 12 | return result; 13 | }; 14 | 15 | // 添加成员账号 16 | const addOrgAccount = async (params: any) => { 17 | const result = await apiRequest('post', 'data-source/reload_organization_account', params); 18 | return result; 19 | }; 20 | 21 | // 添加账号 22 | const deleteAccount = async (params: any) => { 23 | const result = await apiRequest('post', 'data-source/delete_account', params); 24 | return result; 25 | }; 26 | 27 | // 获取Provider region 28 | const getProviderRegions = async (params: number) => { 29 | console.log(params) 30 | const result = await apiRequest('get', `data-source/query-regions-by-provider?provider_id=${params}`, ''); 31 | return result; 32 | }; 33 | 34 | export { getAccountList, addAccount, deleteAccount, addOrgAccount, getProviderRegions }; 35 | -------------------------------------------------------------------------------- /source/portal/src/apis/config/api.ts: -------------------------------------------------------------------------------- 1 | import { apiRequest } from 'tools/apiRequest'; 2 | 3 | const getSystemConfig = async (params: any) => { 4 | const result = await apiRequest('get', 'config', params); 5 | return result; 6 | }; 7 | 8 | const getSubnetsRunIps = async (params: any) => { 9 | const result = await apiRequest('get', 'config/subnets', params); 10 | return result; 11 | }; 12 | 13 | const updateSystemConfig = async (params: any) => { 14 | const result = await apiRequest('post', 'config', params); 15 | return result; 16 | }; 17 | 18 | export { getSystemConfig, getSubnetsRunIps, updateSystemConfig }; 19 | -------------------------------------------------------------------------------- /source/portal/src/apis/label/api.ts: -------------------------------------------------------------------------------- 1 | import { apiRequest } from 'tools/apiRequest'; 2 | 3 | // 获取查询所有Labels 4 | const requestGetAllLabels = async (params: { label_name: string }) => { 5 | const result: any = await apiRequest('get', `labels/search-labels`, params); 6 | return result; 7 | }; 8 | 9 | // 获取查询分页labels 10 | const getLabelsListByName = async (params: { 11 | page: number; 12 | size: number; 13 | label_name: string; 14 | }) => { 15 | const result: any = await apiRequest( 16 | 'post', 17 | `labels/search-detail-labels-by-page?page=${params.page}&size=${params.size}`, 18 | params 19 | ); 20 | return result; 21 | }; 22 | 23 | // 创建 Label 24 | const requestCreateLabel = async (params: { label_name: string }) => { 25 | const result: any = await apiRequest('post', `labels/create-label`, params); 26 | return result; 27 | }; 28 | 29 | // 删除 Label 30 | const requestDeleteLabel = async (params: { ids: string[] }) => { 31 | const result: any = await apiRequest( 32 | 'post', 33 | `labels/delete-labels-by-ids`, 34 | params 35 | ); 36 | return result; 37 | }; 38 | 39 | // 修改 Label 40 | const requestUpdateLabel = async (params: { 41 | id: string; 42 | label_name: string; 43 | }) => { 44 | const result: any = await apiRequest('post', `labels/update-label`, params); 45 | return result; 46 | }; 47 | 48 | export { 49 | requestGetAllLabels, 50 | getLabelsListByName, 51 | requestCreateLabel, 52 | requestDeleteLabel, 53 | requestUpdateLabel, 54 | }; 55 | -------------------------------------------------------------------------------- /source/portal/src/apis/props/api.ts: -------------------------------------------------------------------------------- 1 | import { apiRequest } from 'tools/apiRequest'; 2 | 3 | // search all props by props type 4 | const requestPropsByType = async (params: { type: string }) => { 5 | const result: any = await apiRequest( 6 | 'get', 7 | `template/list-props-by-type/${params.type}`, 8 | undefined 9 | ); 10 | return result; 11 | }; 12 | 13 | // 创建 Props 14 | const requestCreateProps = async (params: { 15 | prop_name: string; 16 | prop_type: string; 17 | }) => { 18 | const result: any = await apiRequest('post', `template/props`, params); 19 | return result; 20 | }; 21 | 22 | // 删除 Props 23 | const requestDeleteProps = async (params: { id: string }) => { 24 | const result: any = await apiRequest( 25 | 'delete', 26 | `/template/props/${params.id}`, 27 | params 28 | ); 29 | return result; 30 | }; 31 | 32 | // 修改 Props 33 | const requestUpdateProps = async (params: { 34 | id: string; 35 | prop_name: string; 36 | prop_type: string; 37 | }) => { 38 | const result: any = await apiRequest( 39 | 'patch', 40 | `template/props/${params.id}`, 41 | params 42 | ); 43 | return result; 44 | }; 45 | 46 | export { 47 | requestPropsByType, 48 | requestCreateProps, 49 | requestDeleteProps, 50 | requestUpdateProps, 51 | }; 52 | -------------------------------------------------------------------------------- /source/portal/src/common/ConterLink.tsx: -------------------------------------------------------------------------------- 1 | import { Link } from '@cloudscape-design/components'; 2 | import React from 'react'; 3 | import '../index.scss'; 4 | 5 | export const CounterLink = ({ children }: any) => { 6 | return ( 7 | 8 | {children} 9 | 10 | ); 11 | }; 12 | -------------------------------------------------------------------------------- /source/portal/src/common/HelpInfo.tsx: -------------------------------------------------------------------------------- 1 | import { HelpPanel } from '@cloudscape-design/components'; 2 | import React from 'react'; 3 | import { ExternalLinkGroup } from './external-link-group'; 4 | import { useTranslation } from 'react-i18next'; 5 | 6 | interface LinkItemType { 7 | href: string; 8 | text: string; 9 | } 10 | 11 | interface HelpInfoProps { 12 | title: string | null; 13 | description: string | null; 14 | linkItems: LinkItemType[]; 15 | } 16 | 17 | const HelpInfo: React.FC = (props: HelpInfoProps) => { 18 | const { title, description, linkItems } = props; 19 | const { t } = useTranslation(); 20 | return ( 21 | {title}} 23 | footer={ 24 | 25 | } 26 | > 27 |

{description}

28 |
29 | ); 30 | }; 31 | 32 | export default HelpInfo; 33 | -------------------------------------------------------------------------------- /source/portal/src/common/IdentifierTypeSelect.tsx: -------------------------------------------------------------------------------- 1 | import { Select, SelectProps } from '@cloudscape-design/components'; 2 | import React from 'react'; 3 | import { useTranslation } from 'react-i18next'; 4 | 5 | interface IdentifierTypeSelectProps { 6 | typeValue: SelectProps.Option | null; 7 | changeType: (type: SelectProps.Option | null) => void; 8 | } 9 | 10 | const IdentifierTypeSelect: React.FC = ( 11 | props: IdentifierTypeSelectProps 12 | ) => { 13 | const { t } = useTranslation(); 14 | const { typeValue, changeType } = props; 15 | return ( 16 |
17 |