├── .gitattributes ├── .github ├── CODEOWNERS ├── actions │ ├── .helpers │ │ ├── box.sh │ │ ├── success.sh │ │ └── success_destroy.sh │ ├── terraform-deploy │ │ └── action.yml │ ├── terraform-plan │ │ └── action.yml │ ├── terraform-unlock-state │ │ └── action.yml │ └── terraform-validate │ │ └── action.yml ├── dependabot.yml ├── generate-ssl-keys.sh ├── hooks │ └── pre-commit └── workflows │ ├── cd-terraform-core.yml │ ├── cd-terraform-etl.yml │ ├── cd-terraform-networking.yml │ ├── ci-python-lambda.yml │ ├── ci-terraform-backend-setup.yml │ ├── ci-terraform-core.yml │ ├── ci-terraform-etl.yml │ ├── ci-terraform-networking.yml │ ├── deploy_terraform.yml │ ├── deploy_terraform_networking.yml │ ├── flake8_linter_python_files.yml │ ├── lint-terraform.yml │ ├── plan-terraform.yml │ ├── terraform-core-state-unlock.yml │ ├── terraform-etl-state-unlock.yml │ ├── test-python-and-lambda.yml │ ├── unlock_terraform_state.yml │ └── validate-and-lint-terraform.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Makefile ├── README.md ├── docker ├── pre-production-data-cleanup │ ├── Dockerfile │ ├── cleanup-past-x-days.sh │ ├── deploy.sh │ └── docker-compose.yml └── sql-to-parquet │ ├── Dockerfile │ ├── delete_db_snapshots_in_db.py │ ├── deploy.sh │ ├── docker-compose.yml │ └── entrypoint.sh ├── external-lib ├── Makefile └── pom.xml ├── lambdas ├── Makefile ├── calculate_max_concurrency │ └── main.py ├── export_rds_snapshot_to_s3 │ └── main.py ├── g_drive_folder_to_s3 │ ├── Dockerfile │ ├── Makefile │ ├── Pipfile │ ├── Pipfile.lock │ ├── main.py │ └── test_main.py ├── g_drive_to_s3 │ └── main.py ├── glue-failure-notifications │ ├── index.js │ ├── package-lock.json │ └── package.json ├── glue_failure_gchat_notifications │ ├── Makefile │ ├── Pipfile │ ├── Pipfile.lock │ ├── main.py │ └── test.py ├── govnotify_api_ingestion_customer_services │ └── main.py ├── govnotify_api_ingestion_housing_lbh_communal_repairs │ └── main.py ├── govnotify_api_ingestion_repairs │ └── main.py ├── icaseworks_api_ingestion │ ├── Makefile │ ├── Pipfile │ ├── Pipfile.lock │ ├── helpers.py │ ├── main.py │ └── test.py ├── kafka_test │ ├── Makefile │ ├── Pipfile │ ├── Pipfile.lock │ ├── docker-compose.yml │ ├── helpers.py │ ├── lambda-events │ │ ├── list-all-topics.json │ │ ├── list-schema-registry-subjects.json │ │ ├── read-message-from-topic.json │ │ └── send-message-to-topic.json │ ├── local_test.py │ ├── main.py │ ├── readme.md │ ├── topic-messages │ │ ├── contact_details_api.json │ │ └── tenure_api.json │ ├── topic-schemas │ │ ├── contact_details_api.json │ │ └── tenure_api.json │ └── update_schemas_in_schema_registry.sh ├── lambda_alarms_handler │ ├── Makefile │ ├── Pipfile │ ├── Pipfile.lock │ ├── main.py │ └── test.py ├── lambda_layers │ ├── Dockerfile │ ├── Makefile │ ├── requirements1.txt │ ├── requirements2.txt │ ├── requirements3.txt │ ├── requirements4.txt │ ├── requirements5.txt │ ├── requirements6.txt │ └── requirements7.txt ├── liberator_prod_to_pre_prod │ ├── main.py │ └── test.py ├── map_s3_event_to_sns_topic │ └── main.py ├── mtfh_export_lambda │ ├── Makefile │ ├── main.py │ └── test.py ├── publish_file_upload_to_sns_topic │ └── main.py ├── rds-database-snapshot-replicator │ ├── README.md │ ├── index.js │ ├── local.js │ ├── package-lock.json │ └── package.json ├── rds_snapshot_export_s3_to_s3_copier │ └── main.py ├── redshift_stage_and_load_tables │ ├── main.py │ └── stage_and_load_parquet.sql ├── requirements.test.txt ├── s3-to-s3-export-copier │ ├── README.md │ ├── index.js │ ├── local.js │ ├── package-lock.json │ └── package.json ├── set_budget_limit_amount │ ├── Pipfile │ ├── Pipfile.lock │ └── main.py ├── sftp_to_s3 │ ├── README.md │ ├── index.js │ ├── package-lock.json │ └── package.json ├── shutdown_notebooks │ ├── main.py │ └── test.py ├── start_s3_file_ingestion_glue_job_from_sns_topic │ └── main.py └── team_times │ ├── main.py │ └── test.py ├── notebook ├── Makefile ├── README.md ├── aws-config │ └── config ├── docker-compose.yml └── scripts │ ├── Load latest tascomi cou.ipynb │ ├── Read from S3.ipynb │ ├── Recast tascomi contacts.ipynb │ ├── address-cleaning.ipynb │ ├── address-matching.ipynb │ ├── anna-cleaning-repair-dlo.ipynb │ ├── cleaning-ElecMechFire-Door-Entry.ipynb │ ├── cleaning-ElecMechFire-Lift-Breakdown-ela.ipynb │ ├── cleaning-ElecMechFire-Lightning-Protection.ipynb │ ├── cleaning-ElecMechFire-Reactive-Rewires.ipynb │ ├── cleaning-ElecMechFire-TV-Aerials.ipynb │ ├── cleaning-ElecMechFire-communal-lighting.ipynb │ ├── cleaning-elecmechfire-emergency-lighting-service-cleaning.ipynb │ ├── cleaning-repairs-alphatrack.ipynb │ ├── cleaning-repairs-avonline.ipynb │ ├── cleaning-repairs-axis.ipynb │ ├── cleaning-repairs-dlo.ipynb │ ├── cleaning-repairs-electrical-mechanical-fire-safety-fire-alarm-aov.ipynb │ ├── cleaning-repairs-electrical-supplies.ipynb │ ├── cleaning-repairs-heritage.ipynb │ ├── cleaning-repairs-purdy.ipynb │ ├── cleaning-repairs-stannah.ipynb │ ├── clearing-repairs-electric-heating.ipynb │ ├── coordinates UDF.ipynb │ ├── coordinates.ipynb │ ├── copy_tables_to_new_s3_location.ipynb │ ├── levenshtein.ipynb │ ├── planning │ └── load-table-from-tascomi-API-endpoint.ipynb │ ├── rename_headers.ipynb │ ├── tascomi_create_column_type_dictionary.ipynb │ ├── template.ipynb │ └── test-s3-connection.ipynb ├── scripts ├── .gitignore ├── Makefile ├── README.md ├── __init__.py ├── add-glue-jobs-in-terraform.py ├── configure_redshift.py ├── configure_redshift_run.sh ├── docker-compose.yml ├── helpers │ ├── __init__.py │ ├── address_cleaning_inputs.py │ ├── athena_helpers.py │ ├── coordinates.py │ ├── damp_and_mould_inputs.py │ ├── data_quality_testing.py │ ├── database_ingestion_helpers.py │ ├── helpers.py │ ├── housing_disrepair_helpers.py │ ├── housing_gx_dq_inputs.py │ ├── housing_mmh_vulnerability_keywords.py │ ├── repairs.py │ ├── text_analysis_helpers.py │ ├── time_series_helpers.py │ └── watermarks.py ├── jobs │ ├── __init__.py │ ├── academy_data │ │ └── load_all_academy_data_into_redshift.py │ ├── address_cleaning.py │ ├── address_matching.py │ ├── copy_json_data_landing_to_raw.py │ ├── copy_manually_uploaded_csv_data_to_raw.py │ ├── copy_tables_landing_to_raw.py │ ├── copy_tables_landing_to_raw_backdated.py │ ├── data_and_insight │ │ ├── __init__.py │ │ ├── active_person_records.py │ │ ├── address_cleaning.py │ │ ├── damp_and_mould_apply_ml_model.py │ │ ├── damp_and_mould_train_ml_model.py │ │ ├── damp_and_mould_training_data_prep.py │ │ ├── electoral_register_data_to_refined.py │ │ ├── icaseworks_ingest_to_raw.py │ │ └── person_matching_module.py │ ├── env_context.py │ ├── env_enforcement │ │ ├── liberator_fpns_refined.py │ │ ├── noisework_complaints_refined.py │ │ └── noiseworks_copy_csv_to_raw.py │ ├── env_services │ │ └── spatial-enrichment-dictionary.json │ ├── google_sheets_import.py │ ├── housing │ │ ├── __init__.py │ │ ├── housing_apply_gx_dq_tests.py │ │ ├── housing_assets_reshape_gx_suite.py │ │ ├── housing_contacts_reshape_gx_suite.py │ │ ├── housing_dwellings_list_gx_suite.py │ │ ├── housing_gx_dq_metadata.py │ │ ├── housing_homeowner_record_sheet_gx_suite.py │ │ ├── housing_maproperty_gx_suite.py │ │ ├── housing_matenancyagreement_gx_suite.py │ │ ├── housing_mtfh_case_notes_enriched.py │ │ ├── housing_mtfh_reshape_to_refined.py │ │ ├── housing_person_reshape_gx_suite.py │ │ ├── housing_tenure_reshape_gx_suite.py │ │ ├── mtfh_json_export_to_raw.py │ │ └── rentsense_to_refined.py │ ├── housing_register_to_refined_and_trusted.py │ ├── housing_repairs │ │ ├── elec_mech_fire_communal_lighting.py │ │ ├── elec_mech_fire_door_entry_cleaning.py │ │ ├── elec_mech_fire_dpa.py │ │ ├── elec_mech_fire_electric_heating_cleaning.py │ │ ├── elec_mech_fire_electrical_supplies_cleaning.py │ │ ├── elec_mech_fire_emergency_lighting_servicing_cleaning.py │ │ ├── elec_mech_fire_fire_alarmaov_cleaning.py │ │ ├── elec_mech_fire_lift_breakdown_ela_cleaning.py │ │ ├── elec_mech_fire_lightning_protection_cleaning.py │ │ ├── elec_mech_fire_reactive_rewires_cleaning.py │ │ ├── elec_mech_fire_tv_aerials_cleaning.py │ │ ├── get_uprn_from_uhref.py │ │ ├── repairs_alpha_track_cleaning.py │ │ ├── repairs_avonline_cleaning.py │ │ ├── repairs_axis_cleaning.py │ │ ├── repairs_dlo_cleaning.py │ │ ├── repairs_herts_heritage_cleaning.py │ │ ├── repairs_purdy_cleaning.py │ │ └── repairs_stannah_cleaning.py │ ├── hr_and_od │ │ └── staff_sickness_forecasting.py │ ├── ingest_database_tables_via_jdbc_connection.py │ ├── ingest_tables_from_dynamo_db.py │ ├── levenshtein_address_matching.py │ ├── ml_jobs │ │ └── __init__.py │ ├── parking │ │ ├── parking_copy_ringgo_sftp_data_to_raw.py │ │ ├── spatial-enrichment-dictionary-preprod.json │ │ └── spatial-enrichment-dictionary.json │ ├── person_matching.py │ ├── planning │ │ ├── hackney_bank_holiday.csv │ │ ├── tascomi-column-type-dictionary.json │ │ ├── tascomi_api_ingestion.py │ │ ├── tascomi_applications_trusted.py │ │ ├── tascomi_create_daily_snapshot.py │ │ ├── tascomi_locations_trusted.py │ │ ├── tascomi_officers_trusted.py │ │ ├── tascomi_parse_tables_increments.py │ │ ├── tascomi_recast_tables_increments.py │ │ ├── tascomi_subsidiary_tables.py │ │ ├── tascomi_subsidiary_tables_trusted.py │ │ └── trusted_officers_reporting.py │ ├── recast_tables.py │ ├── rentsense_former_tenants_to_refined.py │ ├── rentsense_to_refined_and_landing.py │ ├── revenues │ │ ├── etl_ctax_live_properties.py │ │ └── etl_zerobase_ctax_live_properties.py │ ├── sandbox │ │ ├── adam_covid.py │ │ ├── covid_vaccinations_arda.py │ │ ├── covid_vaccinations_verlander.py │ │ ├── daro_covid_locations_and_vaccinations.py │ │ ├── job_script_template.py │ │ ├── marta_training_job.py │ │ ├── steve_covid_locations_and_vaccinations.py │ │ ├── stg_job_template_huu_do.py │ │ └── training_job_tim.py │ ├── spark_example.py │ ├── spreadsheet_import.py │ └── unrestricted │ │ ├── addressbasepremium_create_address_table.py │ │ ├── addressbasepremium_load_files.py │ │ ├── blpu_class_lookup.csv │ │ ├── geography-tables-dictionary.json │ │ ├── llpg_latest_to_trusted.py │ │ ├── ons_ward_lookup_may_2023.csv │ │ └── spatial_enrichment.py ├── lib │ └── .keep ├── package-helpers.sh ├── requirements.build.txt ├── requirements.test.txt ├── setup.py ├── spark_events │ └── .keep └── tests │ ├── __init__.py │ ├── conftest.py │ ├── helpers │ ├── __init_.py │ ├── assertions.py │ ├── dataframe_conversions.py │ └── dummy_logger.py │ ├── housing_repairs │ ├── test_elec_mech_fire_dpa.py │ └── test_repairs_avonline_cleaning.py │ ├── planning │ ├── test_tascomi_api_ingestion.py │ ├── test_tascomi_create_daily_snapshot.py │ ├── test_tascomi_parse_tables.py │ └── test_tascomi_recast_tables_increments.py │ ├── redshift_configuration │ ├── test_configure_redshift.py │ ├── test_configure_redshift_role_inheritance.py │ ├── test_configure_redshift_role_permissions.py │ ├── test_create_redshift_roles.py │ └── test_revoke_role_grants.py │ ├── stubs │ ├── column_type_dictionary.json │ └── column_type_dictionary_partial.json │ ├── test_address_cleaning.py │ ├── test_cancel_job_if_failing_quality_checks.py │ ├── test_clean_column_names.py │ ├── test_data │ ├── levenshtein_address_matching │ │ ├── addresses │ │ │ └── import_year=2021 │ │ │ │ └── import_month=12 │ │ │ │ └── import_day=16 │ │ │ │ └── import_date=20211216 │ │ │ │ └── address_sample.csv │ │ └── source │ │ │ └── import_year=2021 │ │ │ └── import_month=12 │ │ │ └── import_day=16 │ │ │ └── import_date=20211216 │ │ │ └── source_sample.csv │ └── test_spreadsheet_import.csv │ ├── test_get_all_database_tables.py │ ├── test_get_glue_env_var.py │ ├── test_get_s3_subfolders.py │ ├── test_initialise_job.py │ ├── test_levenshtein_address_matching.py │ ├── test_recast_tables.py │ ├── test_spark_example.py │ ├── test_spreadsheet_import.py │ ├── test_update_table_ingestion_details.py │ └── watermarks │ ├── __init__.py │ └── test_watermarks.py ├── state-machine-definitions └── academy_ingestion.asl.json ├── terraform ├── backend-setup │ ├── 00-init.tf │ ├── 01-inputs-required.tf │ ├── 02-inputs-optional.tf │ ├── 03-input-derived.tf │ ├── 10-aws-s3-buckets.tf │ ├── 99-outputs.tf │ ├── Makefile │ └── README.md ├── compliance │ ├── dynamodb.feature │ ├── ecr.feature │ ├── elastic-search.feature │ ├── s3.feature │ └── subnet.feature ├── config │ ├── .tflint.hcl │ ├── env.tfvars.example │ ├── prod.tfvars │ └── stg.tfvars ├── core │ ├── 00-init.tf │ ├── 01-inputs-required.tf │ ├── 02-inputs-optional.tf │ ├── 03-input-derived.tf │ ├── 04-input-redundant.tf │ ├── 05-departments.tf │ ├── 06-network.tf │ ├── 07-qlik-server.tf │ ├── 08-aws-secrets-manager.tf │ ├── 10-aws-iam-glue.tf │ ├── 10-aws-s3-buckets.tf │ ├── 11-aws-ecs.tf │ ├── 12-aws-s3-scripts.tf │ ├── 12-housing-income-collection-db-ingestion.tf │ ├── 13-mssql-ingestion.tf │ ├── 14-parking-geolive-database-ingestion.tf │ ├── 15-unrestricted-geolive-database-ingestion.tf │ ├── 16-better-conversations-tables-ingestion.tf │ ├── 17-mtfh-tables-ingestion.tf │ ├── 18-rentsense-tables-ingestion.tf │ ├── 19-liberator-iam.tf │ ├── 20-noiseworks-import-s3.tf │ ├── 22-sagemaker.tf │ ├── 23-FME-iam.tf │ ├── 24-qlik-iam.tf │ ├── 26-google-platform.tf │ ├── 27-google-service-accounts.tf │ ├── 28-aws-secrets-manager.tf │ ├── 28-glue-error-notifications.tf │ ├── 28-glue-gchat-failure-notification.tf │ ├── 29-db-snapshot-to-s3-sandbox.tf │ ├── 29-db-snapshot-to-s3.tf │ ├── 30-g-drive-to-s3.tf │ ├── 32-kafka-event-streaming.tf │ ├── 33-sftp-server.tf │ ├── 34-aws-budget-alerting.tf │ ├── 35-sync-production-to-pre-production.tf │ ├── 36-liberator-import.tf │ ├── 37-datahub.tf │ ├── 38-api-ingestion.tf │ ├── 39-housing-interim-finance-db-ingestion.tf │ ├── 40-ringgo-sftp-to-s3-ingestion.tf │ ├── 41-lambda-failure-alarms.tf │ ├── 42-lambda-alarms-handler.tf │ ├── 43-lambda-monitoring-dashboard.tf │ ├── 44-lambda-layers.tf │ ├── 45-database-migration-iam.tf │ ├── 46-mwaa-bucket-kms.tf │ ├── 47-mwaa.tf │ ├── 50-dynamodb.tf │ ├── 51-load-all-academy-data-into-redshift-serverless.tf │ ├── 81-sync-rentsense-files.tf │ ├── 82-academy-pre-production-bens-housing-needs-raw-zone.tf │ ├── 82-academy-pre-production-raw-zone-data-test.tf │ ├── 82-academy-pre-production-revenues-raw-zone.tf │ ├── 83-development-deploy-role.tf │ ├── 84-pre-prod-data-cleanup-tasks.tf │ ├── 85-comprehend-poc-role.tf │ ├── 86-departments-extended-role.tf │ ├── 87-redshift-serverless.tf │ ├── 99-outputs.tf │ ├── Makefile │ └── state-migration.sh ├── etl │ ├── 00-init.tf │ ├── 01-inputs-required.tf │ ├── 02-inputs-optional.tf │ ├── 03-input-derived.tf │ ├── 04-input-redundant.tf │ ├── 05-aws-s3.tf │ ├── 06-aws-secrets-manager.tf │ ├── 07-department-data-sources.tf │ ├── 09-spreadsheet-imports-from-g-drive.tf │ ├── 11-aws-glue-security-configuration.tf │ ├── 12-aws-glue-crawler-parking-spreadsheets.tf │ ├── 12-aws-glue-parking-manual-upload.tf │ ├── 13-aws-glue-crawler-housing-repairs.tf │ ├── 14-aws-glue-job-electrical-mechnical-fire-safety.tf │ ├── 20-noiseworks-import-s3.tf │ ├── 24-aws-glue-housing.tf │ ├── 24-aws-glue-spatial.tf │ ├── 24-aws-glue-tascomi-data.tf │ ├── 25-aws-glue-job-revenues.tf │ ├── 33-aws-glue-backdated-liberator-data.tf │ ├── 33-aws-glue-liberator-data.tf │ ├── 34-aws-glue-crawlers.tf │ ├── 35-aws-glue-env-enforcement.tf │ ├── 36-aws-glue-job-electrical-mechnical-fire-safety-tv-aerials.tf │ ├── 37-aws-glue-job-housing-repairs.tf │ ├── 38-parking-load-redshift-tables.tf │ ├── 38-parking-team-time.tf │ ├── 40-aws-glue-job-sandbox.tf │ ├── 42-redshift.tf │ ├── 43-aws-glue-job-rentsense.tf │ ├── 44-aws-glue-job-bens-housing-needs.tf │ ├── 45-aws-glue-job-active-persons-records.tf │ ├── 46-aws-glue-job-person-record-matching.tf │ ├── 47-aws-glue-job-staff-sickness-forecasting.tf │ ├── 48-lambda-gov-notify-ingestion.tf │ ├── 49-lambda-gov-notify-ingestion-customer-services.tf │ ├── 49-lambda-gov-notify-ingestion-housing-lbh-communal-repairs.tf │ ├── 50-aws-lambda-export-dynamodb-pitr.tf │ ├── 52-aws-glue-job-active-persons-address-cleaning.tf │ ├── 53-aws-glue-housing-extract-mmh-vulnerabilities-to-refined.tf │ ├── 54-aws-glue-housing-apply-gx-dq-tests.tf │ ├── 55-aws-glue-icaseworks_ingest_etl.tf │ ├── 60-airflow-etl-used-crawlers.tf │ ├── 60-airflow-variables-and-connnections.tf │ ├── 61-aws-glue-catalog-database.tf │ ├── 99-moved.tf │ ├── 99-outputs.tf │ ├── Makefile │ └── example-glue-jobs.md ├── iam │ └── assume-role-policies.tf ├── modules │ ├── README.md │ ├── api-ingestion-lambda │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 04-resource-moves.tf │ │ ├── 10-lambda.tf │ │ └── 99-outputs.tf │ ├── aws-ecs-autoscaling-group │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ └── 05-ecs-autoscaling-group.tf │ ├── aws-ecs-docker-service │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 04-aws-iam.tf │ │ ├── 05-aws-ecs-service.tf │ │ ├── 06-aws-ecs-task.tf │ │ ├── 07-docker-pull-push.tf │ │ ├── 08-ecr.tf │ │ ├── 09-cloudwatch-event.tf │ │ ├── 99-outputs.tf │ │ └── docker_pull_push.sh │ ├── aws-ecs-fargate-task │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-iam.tf │ │ ├── 11-aws-ecr.tf │ │ ├── 12-aws-ecs.tf │ │ ├── 20-cloudwatch-event.tf │ │ └── 99-outputs.tf │ ├── aws-glue-job-with-crawler │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 10-aws-glue-job.tf │ │ ├── 11-aws-glue-crawler.tf │ │ └── 99-outputs.tf │ ├── aws-glue-job │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-glue-job.tf │ │ ├── 11-aws-glue-crawler.tf │ │ └── 99-outputs.tf │ ├── aws-lambda-folder-ingestion │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 20-iam.tf │ │ ├── 30-lambda.tf │ │ └── 99-outputs.tf │ ├── aws-lambda-layers │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 10-lambda-layer.tf │ │ └── 99-outputs.tf │ ├── aws-lambda │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 20-iam.tf │ │ ├── 30-lambda.tf │ │ └── 99-outputs.tf │ ├── aws-step-functions │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 10-main.tf │ │ └── 99-outputs.tf │ ├── copy-from-s3-to-s3 │ │ ├── .gitignore │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-lambda.tf │ │ └── lambda │ │ │ ├── .gitignore │ │ │ ├── index.js │ │ │ ├── package-lock.json │ │ │ └── package.json │ ├── data-sources │ │ ├── README.md │ │ ├── aws-glue-job │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 02-inputs-optional.tf │ │ │ ├── 03-inputs-derived.tf │ │ │ ├── 10-aws-glue-job.tf │ │ │ ├── 11-aws-glue-crawler.tf │ │ │ └── 99-outputs.tf │ │ ├── department │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 02-inputs-optional.tf │ │ │ ├── 03-input-derived.tf │ │ │ ├── 20-aws-glue-databases.tf │ │ │ ├── 30-google.tf │ │ │ ├── 40-aws-secretsmanager.tf │ │ │ ├── 50-aws-iam-roles.tf │ │ │ ├── 70-aws-sns.tf │ │ │ └── 99-outputs.tf │ │ ├── g-drive-to-s3 │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 03-input-derived.tf │ │ │ ├── 10-lambda.tf │ │ │ └── 99-outputs.tf │ │ ├── google-service-account │ │ │ ├── 00-init.tf │ │ │ ├── 00-inputs-required.tf │ │ │ ├── 01-inputs-optional.tf │ │ │ ├── 45-service-account.tf │ │ │ ├── 53-secrets-manager.tf │ │ │ └── 99-outpus.tf │ │ ├── google-sheets-glue-job │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 02-inputs-optional.tf │ │ │ ├── 03-input-derived.tf │ │ │ ├── 10-aws-glue-job.tf │ │ │ └── 99-outputs.tf │ │ ├── import-data-from-spreadsheet-job │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 03-input-derived.tf │ │ │ ├── 10-aws-glue-job.tf │ │ │ └── 99-outputs.tf │ │ ├── import-spreadsheet-file-from-g-drive │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 02-inputs-optional.tf │ │ │ ├── 10-import-spreadsheet-file-from-g-drive.tf │ │ │ └── 99-outputs.tf │ │ └── s3-bucket │ │ │ ├── 00-init.tf │ │ │ ├── 01-inputs-required.tf │ │ │ ├── 10-s3-bucket.tf │ │ │ └── 99-outputs.tf │ ├── database-ingestion-via-jdbc-connection │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-glue-connection.tf │ │ ├── 11-aws-glue-connection-crawler.tf │ │ ├── 20-aws-iam-policy.tf │ │ ├── 20-aws-iam-role.tf │ │ └── 99-outputs.tf │ ├── datahub │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 03-locals.tf │ │ ├── 04-aws-alb-frontend-react.tf │ │ ├── 04-aws-alb-gms.tf │ │ ├── 06-ecs-cluster.tf │ │ ├── 07-cloudwatch-logs.tf │ │ ├── 08-ecs-services.tf │ │ ├── 09-ssm.tf │ │ ├── 10-rds.tf │ │ ├── 11-elastic-search.tf │ │ ├── 12-iam.tf │ │ ├── 13-ecs-autoscaling-group.tf │ │ ├── 99-outputs.tf │ │ ├── Datahub.png │ │ ├── README.md │ │ └── datasource-ingestion-recipes │ │ │ └── glue-example.yml │ ├── db-snapshot-to-s3-sandbox-resources │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 10-rds-database.tf │ │ └── 11-bastion.tf │ ├── db-snapshot-to-s3 │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-s3.tf │ │ ├── 20-rds-to-s3-lambda.tf │ │ ├── 25-rds-to-s3-queue.tf │ │ ├── 30-rds-snapshot-export-service.tf │ │ ├── 40-s3-to-s3-copier-lambda.tf │ │ ├── 45-s3-to-s3-copier-queue.tf │ │ ├── 50-rds-event-subscription.tf │ │ └── 99-outputs.tf │ ├── department │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-aws-s3-buckets.tf │ │ ├── 20-aws-glue-databases.tf │ │ ├── 25-aws-athena.tf │ │ ├── 30-google.tf │ │ ├── 40-aws-secretsmanager.tf │ │ ├── 45-notebook.tf │ │ ├── 50-aws-iam-policies.tf │ │ ├── 50-aws-iam-roles.tf │ │ ├── 60-aws-sso.tf │ │ ├── 70-aws-sns.tf │ │ └── 99-outputs.tf │ ├── dynamodb │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-dynamodb-table.tf │ │ ├── 20-aws-kms-key.tf │ │ └── 99-outputs.tf │ ├── electrical-mechnical-fire-safety-cleaning-job │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-glue-data-cleaning.tf │ │ ├── 11-aws-glue-address-cleaning.tf │ │ ├── 12-aws-glue-address-matching.tf │ │ └── 99-outputs.tf │ ├── g-drive-to-s3 │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-lambda.tf │ │ └── 99-outputs.tf │ ├── glue-failure-alert-notifications │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-main.tf │ │ └── 99-outputs.tf │ ├── google-service-account │ │ ├── 00-init.tf │ │ ├── 00-inputs-required.tf │ │ ├── 01-inputs-optional.tf │ │ ├── 45-service-account.tf │ │ ├── 53-secrets-manager.tf │ │ └── 99-outpus.tf │ ├── google-sheets-glue-job │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-aws-glue-job.tf │ │ └── 99-outputs.tf │ ├── housing-repairs-google-sheets-cleaning │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-glue-data-cleaning.tf │ │ ├── 11-aws-glue-address-cleaning.tf │ │ ├── 12-aws-glue-address-matching.tf │ │ └── 99-outputs.tf │ ├── import-data-from-spreadsheet-job │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-aws-glue-job.tf │ │ └── 99-outputs.tf │ ├── import-spreadsheet-file-from-g-drive │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-import-spreadsheet-file-from-g-drive.tf │ │ └── 99-outputs.tf │ ├── kafka-event-streaming │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-input-derived.tf │ │ ├── 10-dependencies.tf │ │ ├── 20-security-groups.tf │ │ ├── 30-keys.tf │ │ ├── 40-logs.tf │ │ ├── 45-glue-crawler.tf │ │ ├── 50-cluster.tf │ │ ├── 60-connect.tf │ │ ├── 70-schema.tf │ │ ├── 80-iam.tf │ │ ├── 99-outputs.tf │ │ ├── README.md │ │ └── plugins │ │ │ ├── confluentinc-kafka-connect-s3-10.0.5-merged.zip │ │ │ └── confluentinc-kafka-connect-s3-10.0.5.zip │ ├── kafka-schema-registry │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-input-derived.tf │ │ ├── 10-aws-alb.tf │ │ ├── 20-aws-ecs.tf │ │ ├── 30-register-schemas.tf │ │ ├── 40-aws-logs.tf │ │ ├── 99-outpus.tf │ │ ├── schemas │ │ │ ├── contact_details_api.json │ │ │ └── tenure_api.json │ │ └── scripts │ │ │ └── update_schemas.sh │ ├── kafka-test-lambda │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-lambda.tf │ │ ├── 20-security-groups.tf │ │ └── 99-outputs.tf │ ├── lambda-alarms-and-monitoring │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-cloudwatch-alarm.tf │ │ ├── 11-aws-sns-topic.tf │ │ └── 12-aws-kms-key.tf │ ├── lambda-alarms-handler │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-lambda.tf │ │ └── 99-outputs.tf │ ├── lambda-monitoring-dashboard │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ └── 10-aws-cloudwatch-dashboard.tf │ ├── qlik-sense-server │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 04-aws-s3-alb-logs.tf │ │ ├── 10-aws-ec2.tf │ │ ├── 12-aws-load-balancer.tf │ │ ├── 13-aws-ec2-pre-prod.tf │ │ ├── 14-aws-glue-catalog-database.tf │ │ ├── 15-aws-ec2-prod.tf │ │ └── 99-outputs.tf │ ├── rds-snapshot-to-s3 │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 99-outputs.tf │ │ ├── eventbridge.tf │ │ ├── iam.tf │ │ └── lambda.tf │ ├── redshift-serverless │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-aws-iam.tf │ │ ├── 20-aws-redshiftserverless-namespace.tf │ │ ├── 21-aws-secrets-manager-secret.tf │ │ ├── 22-aws-kms-key.tf │ │ ├── 23-aws-redshiftserverless-workgroup.tf │ │ ├── 24-aws-security-group.tf │ │ ├── 25-aws-redshiftserverless-usage-limit.tf │ │ └── 99-outputs.tf │ ├── redshift │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-redshift.tf │ │ └── 99-outputs.tf │ ├── s3-bucket-notification-emails │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-inputs-derived.tf │ │ ├── 10-main.tf │ │ └── 99-outputs.tf │ ├── s3-bucket │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-s3-bucket.tf │ │ └── 99-outputs.tf │ ├── sagemaker │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 09-iam.tf │ │ ├── 10-ssm.tf │ │ ├── 11-notebook.tf │ │ ├── 99-outputs.tf │ │ ├── scripts │ │ │ └── notebook-start-up.sh │ │ └── spark-magic-config.json │ ├── set-budget-limit-amount │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 08-aws-budgets.tf │ │ ├── 10-lambda.tf │ │ └── 99-outputs.tf │ └── sql-to-rds-snapshot │ │ ├── 00-init.tf │ │ ├── 01-inputs-required.tf │ │ ├── 02-inputs-optional.tf │ │ ├── 03-input-derived.tf │ │ ├── 10-ecs.tf │ │ ├── 20-cloudtrail.tf │ │ ├── 30-rds.tf │ │ ├── 40-security-group.tf │ │ ├── 99-outputs.tf │ │ └── task_definition_template.json └── networking │ ├── 00-init.tf │ ├── 01-input-required.tf │ ├── 02-input-optional.tf │ ├── 03-input-derived.tf │ ├── 04-input-redundant.tf │ ├── 10-network.tf │ ├── 11-transit-gateway.tf │ ├── 99-outputs.tf │ ├── Makefile │ └── README.md └── troubleshoot ├── Makefile ├── Troubleshoot_for_Local_Setup_on_Windows.md └── package-helpers.bat /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | 3 | *.tf text eol=lf 4 | *.tfvars text eol=lf 5 | *.json text eol=lf 6 | *.yaml text eol=lf 7 | *.yml text eol=lf 8 | *.md text eol=lf 9 | *.txt text eol=lf 10 | *.sh text eol=lf 11 | *.py text eol=lf 12 | 13 | *.png binary 14 | *.jpg binary 15 | *.jpeg binary 16 | *.gif binary 17 | *.zip binary -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This file specifies owners for pull request approval 2 | # See https://help.github.com/articles/about-code-owners/ 3 | 4 | * @LBHackney-IT/data-analytics-platform @LBHackney-IT/data-insight 5 | -------------------------------------------------------------------------------- /.github/actions/.helpers/box.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo $1 | sed -e 's/^/../' -e 's/$/../' -e 's/./*/g' 3 | echo $1 | sed -e 's/^/* /' -e 's/$/ */' 4 | echo $1 | sed -e 's/^/../' -e 's/$/../' -e 's/./*/g' 5 | -------------------------------------------------------------------------------- /.github/actions/.helpers/success.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cat << "EOF" 3 | /\ /\ 4 | { `---' } 5 | { O O } 6 | ~~> V <~~ 7 | \ \|/ / 8 | `-----'__ 9 | / \ `^\_ 10 | { }\ |\_\_ W 11 | | \_/ |/ / \_\_( ) 12 | \__/ /(_E \__/ 13 | ( / 14 | MM 15 | EOF 16 | -------------------------------------------------------------------------------- /.github/actions/.helpers/success_destroy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cat << "EOF" 3 | / \\ 4 | /\\ | . . \\ 5 | ////\\| || 6 | //// \\ ___//\ 7 | /// \\ \ 8 | /// |\\ | 9 | // | \\ \ \ 10 | / | \\ \ \ 11 | | \\ / / 12 | | \/ / 13 | | \\/| 14 | | \\| 15 | | \\ 16 | | | 17 | |_________\ 18 | EOF 19 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directories: 5 | - "*" 6 | schedule: 7 | interval: "weekly" 8 | day: "monday" 9 | time: "08:00" 10 | groups: 11 | straightforward-dependencies: 12 | applies-to: version-updates 13 | update-types: 14 | - minor 15 | - patch 16 | -------------------------------------------------------------------------------- /.github/hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # They probably are but check if terraform and git are installed otherwise skip pre-commit hook 6 | if ! command -v terraform &> /dev/null 7 | then 8 | echo "Terraform not installed" 9 | exit 0; 10 | fi 11 | 12 | if ! command -v git &> /dev/null 13 | then 14 | echo "Git not installed" 15 | exit 0; 16 | fi 17 | 18 | #Find all the directories with terraform files in them 19 | terraform_directories=$(find "$(pwd)" -type f -name '*.tfvars' -or -name '*.tf' | sed -r 's|/[^/]+$||' | sort | uniq ) 20 | 21 | #Format the files in the terraform directories and add 22 | for directory in $terraform_directories; do 23 | if [[ "$directory" != *".terraform"* ]]; then 24 | for formatted_file in $(terraform fmt "$directory"); do 25 | git add "$(pwd)"/"$formatted_file" 26 | done 27 | fi 28 | done 29 | -------------------------------------------------------------------------------- /.github/workflows/ci-python-lambda.yml: -------------------------------------------------------------------------------- 1 | name: 'CI' 2 | on: 3 | push: 4 | branches-ignore: 5 | - "main" 6 | paths: 7 | - 'scripts/**' 8 | - 'lambdas/**' 9 | 10 | jobs: 11 | CI: 12 | uses: ./.github/workflows/test-python-and-lambda.yml 13 | -------------------------------------------------------------------------------- /.github/workflows/test-python-and-lambda.yml: -------------------------------------------------------------------------------- 1 | name: Test and Validate 2 | 3 | on: 4 | workflow_call: 5 | 6 | jobs: 7 | tests: 8 | name: Test Python Jobs & Lambda Functions 9 | runs-on: ubuntu-24.04 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@v3 13 | - name: Test Python Jobs 14 | run: | 15 | cd scripts 16 | make test 17 | - name: Test Lambda Functions 18 | run: | 19 | cd lambdas 20 | make test 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .terraform* 3 | *.tfstate* 4 | **/node_modules 5 | lambdas/*/lib 6 | lambdas/*/requirements.txt 7 | lambdas/*/key_file.json 8 | lambdas/*/venv 9 | lambdas/*/*.zip 10 | lambdas/*.zip 11 | notebook/scripts/.ipynb_checkpoints/* 12 | external-lib/target/* 13 | env.tfvars 14 | dev.tfvars 15 | google_service_account_creds*.json 16 | .DS_Store 17 | */.env 18 | notebook/aws-config/credentials 19 | .gitmessage 20 | **/.venv 21 | **/__pycache__ 22 | /venv/ 23 | .vscode/ 24 | **/*.log 25 | **/plan.out 26 | **/plan.out.json 27 | __pycache__/ 28 | spark_events/ 29 | build/ 30 | dist/ 31 | *.egg-info 32 | .pytest_cache 33 | .venv 34 | /build/ 35 | /dist/ 36 | /terraform/modules/copy-from-s3-to-s3/copy-from-s3-to-s3.zip 37 | /terraform/modules/kafka-event-streaming/local-testing/list-topics.json 38 | /terraform/modules/kafka-event-streaming/local-testing/send-message-to-topic.json 39 | /scripts/logs/ 40 | /scripts/tests/test_person_matching.py 41 | /lambdas/vonage_api_ingestion/messbook.py -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/gitguardian/ggshield 3 | rev: v1.14.2 4 | hooks: 5 | - id: ggshield 6 | language_version: python3 7 | stages: [commit] -------------------------------------------------------------------------------- /docker/pre-production-data-cleanup/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.19 2 | 3 | RUN apk add --update --no-cache \ 4 | bash \ 5 | aws-cli \ 6 | coreutils \ 7 | && rm -rf /var/cache/apk/* 8 | 9 | COPY ./cleanup-past-x-days.sh . 10 | 11 | RUN addgroup -S docker_users \ 12 | && adduser -S docker_user -G docker_users 13 | 14 | USER docker_user 15 | 16 | CMD ./cleanup-past-x-days.sh 17 | -------------------------------------------------------------------------------- /docker/pre-production-data-cleanup/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | if [[ $ENVIRONMENT != "stg" ]] 5 | then 6 | echo "Exiting as not in pre-production environment" 7 | exit 0; 8 | fi 9 | 10 | script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 11 | 12 | terraform_dir="${script_dir}/../../terraform/core" 13 | ecr_url=$(AWS_PROFILE="" terraform -chdir=${terraform_dir} output -raw pre_prod_data_cleanup_ecr_repository_endpoint) 14 | 15 | docker build -f ${script_dir}/Dockerfile -t ${ecr_url} ${script_dir} 16 | 17 | aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin $ecr_url 18 | 19 | docker push $ecr_url 20 | -------------------------------------------------------------------------------- /docker/pre-production-data-cleanup/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | cmd: 4 | container_name: cmd-runner 5 | build: . 6 | -------------------------------------------------------------------------------- /docker/sql-to-parquet/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | RUN apk add --update --no-cache \ 4 | bash \ 5 | unzip \ 6 | mariadb-connector-c-dev \ 7 | mysql mysql-client 8 | 9 | # Install AWS CLI 10 | RUN apk add --no-cache \ 11 | python3 \ 12 | py3-pip 13 | 14 | RUN python3 -m venv /venv 15 | ENV PATH="/venv/bin:$PATH" 16 | 17 | RUN pip3 install --upgrade pip \ 18 | && pip3 install awscli boto3 \ 19 | && rm -rf /var/cache/apk/* 20 | 21 | COPY ./entrypoint.sh . 22 | COPY ./delete_db_snapshots_in_db.py . 23 | 24 | RUN chmod +x entrypoint.sh 25 | 26 | CMD ./entrypoint.sh 27 | -------------------------------------------------------------------------------- /docker/sql-to-parquet/delete_db_snapshots_in_db.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | 4 | rds = boto3.client('rds') 5 | snapshots_api_response = rds.describe_db_snapshots( 6 | DBInstanceIdentifier=os.environ['RDS_INSTANCE_ID'] 7 | ) 8 | 9 | snapshots = snapshots_api_response['DBSnapshots'] 10 | 11 | print("Found ", len(snapshots), " snapshots") 12 | 13 | for snapshot in snapshots: 14 | snapshot_id = snapshot['DBSnapshotIdentifier'] 15 | 16 | if snapshot_id.startswith('awsbackup'): 17 | print("Skipping snapshot ", snapshot_id) 18 | else: 19 | print("Deleting snapshot ", snapshot_id) 20 | rds.delete_db_snapshot( 21 | DBSnapshotIdentifier=snapshot_id 22 | ) 23 | -------------------------------------------------------------------------------- /docker/sql-to-parquet/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu -o pipefail 3 | 4 | script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 5 | 6 | terraform_dir="${script_dir}/../../terraform/core" 7 | ecr_url=$(AWS_PROFILE="" terraform -chdir=${terraform_dir} output -raw liberator_dump_to_rds_snapshot_ecr_repository_worker_endpoint) 8 | 9 | docker build -f ${script_dir}/Dockerfile -t ${ecr_url} ${script_dir} 10 | 11 | aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin $ecr_url 12 | 13 | docker push $ecr_url 14 | -------------------------------------------------------------------------------- /docker/sql-to-parquet/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | cmd: 4 | container_name: cmd-runner 5 | # Use for testing https://hackney.atlassian.net/browse/DPP-194 6 | # environment: 7 | # - IMPORT_DATE_OVERRIDE=2022-06-01 8 | build: . 9 | -------------------------------------------------------------------------------- /external-lib/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: target/java-lib-1.0-SNAPSHOT-jar-with-dependencies.jar target/deequ-1.0.3.jar target/pydeequ-1.0.1.zip 4 | 5 | target/java-lib-1.0-SNAPSHOT-jar-with-dependencies.jar: pom.xml 6 | mvn clean package 7 | 8 | target/deequ-1.0.3.jar: 9 | wget https://repo1.maven.org/maven2/com/amazon/deequ/deequ/1.0.3/deequ-1.0.3.jar -O target/deequ-1.0.3.jar 10 | 11 | target/pydeequ-1.0.1.zip: 12 | pip3 install -t ./target pydeequ==1.0.1 13 | cd target && zip -r pydeequ-1.0.1.zip pydeequ 14 | -------------------------------------------------------------------------------- /lambdas/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | .venv/bin/python: 4 | python3 -m venv .venv 5 | 6 | .venv/.install.stamp: .venv/bin/python requirements.test.txt 7 | .venv/bin/python -m pip install -r requirements.test.txt 8 | touch .venv/.install.stamp 9 | 10 | test: .venv/.install.stamp 11 | .venv/bin/python -m unittest */test.py 12 | -------------------------------------------------------------------------------- /lambdas/calculate_max_concurrency/main.py: -------------------------------------------------------------------------------- 1 | def calculate_max_concurrency(available_ips: int, ips_per_job: int) -> int: 2 | return int((available_ips - 2) / ips_per_job) 3 | 4 | 5 | def lambda_handler(event, context): 6 | available_ips = int(event["AvailableIPs"]) 7 | ips_per_job = int(event["Workers"]) 8 | max_concurrency = calculate_max_concurrency(available_ips, ips_per_job) 9 | return {"max_concurrency": max_concurrency} 10 | 11 | 12 | if __name__ == "__main__": 13 | lambda_handler("event", "lambda_context") 14 | -------------------------------------------------------------------------------- /lambdas/g_drive_folder_to_s3/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the python docker image 2 | FROM ubuntu:latest 3 | 4 | # Install system dependencies, pipenv and zip 5 | RUN apt-get update 6 | RUN apt-get upgrade -y 7 | RUN apt-get install -y zip 8 | RUN apt-get install -y python3.11 python3-pip 9 | 10 | RUN pip3 install pipenv 11 | 12 | # Set the working directory in the container 13 | 14 | WORKDIR /app 15 | 16 | # Copy the Pipfile, Pipfile.lock and main.py to the container 17 | COPY Pipfile Pipfile.lock /app/ 18 | COPY main.py ./source/ 19 | 20 | # Install Python dependencies using pipenv 21 | RUN pipenv install 22 | RUN pipenv requirements > requirements.txt 23 | RUN pip install -t ./source/lib -r requirements.txt 24 | 25 | # Set the working directory to the container source 26 | WORKDIR /app/source 27 | RUN zip -r g_drive_folder_to_s3.zip . 28 | 29 | CMD "pyhon3", "main.py" 30 | -------------------------------------------------------------------------------- /lambdas/g_drive_folder_to_s3/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all build run zip stop clean 2 | 3 | CONTAINER_NAME := lambda_docker 4 | DOCKER_IMAGE := aws_lambda 5 | 6 | # Commands 7 | all: zip clean 8 | 9 | build: 10 | docker build -t $(DOCKER_IMAGE) . 11 | 12 | run: build 13 | docker run --name=$(CONTAINER_NAME) -d $(DOCKER_IMAGE) tail -f /dev/null 14 | 15 | zip: run 16 | docker cp $(CONTAINER_NAME):/app/source/g_drive_folder_to_s3.zip . 17 | 18 | stop: 19 | docker stop $(CONTAINER_NAME) 20 | 21 | clean: stop 22 | docker rm $(CONTAINER_NAME) 23 | -------------------------------------------------------------------------------- /lambdas/g_drive_folder_to_s3/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | google-auth-oauthlib = "*" 8 | google-api-python-client = "*" 9 | 10 | [dev-packages] 11 | boto3 = "*" 12 | 13 | [requires] 14 | python_version = "3.10" 15 | -------------------------------------------------------------------------------- /lambdas/glue-failure-notifications/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "aws-sdk": "^2.993.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /lambdas/glue_failure_gchat_notifications/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install-requirements 2 | 3 | install-requirements: 4 | python3 -m venv venv 5 | # the requirements are generated so that the packages 6 | # could be downloaded and packaged up for the lambda 7 | 8 | . venv/bin/activate && sudo pipenv lock --requirements > requirements.txt 9 | . venv/bin/activate && sudo pip install --target ./lib -r requirements.txt 10 | rm -rf venv/ 11 | -------------------------------------------------------------------------------- /lambdas/glue_failure_gchat_notifications/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | urllib3 = "*" 8 | python-dotenv = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3" 14 | -------------------------------------------------------------------------------- /lambdas/icaseworks_api_ingestion/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install-requirements 2 | 3 | install-requirements: 4 | python3 -m venv venv 5 | # the requirements are generated so that the packages 6 | # could be downloaded and packaged up for the lambda 7 | 8 | . venv/bin/activate && sudo pipenv requirements > requirements.txt 9 | . venv/bin/activate && sudo pip install --target ./lib -r requirements.txt 10 | rm -rf venv/ 11 | -------------------------------------------------------------------------------- /lambdas/icaseworks_api_ingestion/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | python-dotenv = "*" 8 | requests = "*" 9 | pybase64 = "*" 10 | 11 | [dev-packages] 12 | boto3 = "*" 13 | 14 | [requires] 15 | python_version = "3" 16 | -------------------------------------------------------------------------------- /lambdas/icaseworks_api_ingestion/helpers.py: -------------------------------------------------------------------------------- 1 | class MockResponse: 2 | def __init__(self, json_data, status_code): 3 | self.json_data = json_data 4 | self.status_code = status_code 5 | 6 | def json(self): 7 | return self.json_data 8 | -------------------------------------------------------------------------------- /lambdas/kafka_test/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install-requirements 2 | 3 | install-requirements: 4 | python3 -m venv venv 5 | # the requirements are generated so that the packages 6 | # could be downloaded and packaged up for the lambda 7 | 8 | . venv/bin/activate && sudo pipenv lock --requirements > requirements.txt 9 | #. venv/bin/activate && sudo pip install --target ./lib -r requirements.txt 10 | . venv/bin/activate && sudo docker run -v $(PWD):/var/task "lambci/lambda:build-python3.8" /bin/sh -c "pip install --target ./lib -r requirements.txt; exit" 11 | sudo cp ../../terraform/modules/kafka-schema-registry/schemas/* ./lib/ 12 | rm -rf venv/ 13 | -------------------------------------------------------------------------------- /lambdas/kafka_test/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | python-dotenv = "*" 8 | requests = "*" 9 | pybase64 = "*" 10 | kafka-python = "*" 11 | confluent-kafka = "*" 12 | avro = "*" 13 | fastavro = "*" 14 | 15 | [dev-packages] 16 | boto3 = "*" 17 | 18 | [requires] 19 | python_version = "3" 20 | -------------------------------------------------------------------------------- /lambdas/kafka_test/lambda-events/list-all-topics.json: -------------------------------------------------------------------------------- 1 | { 2 | "operation": "list-all-topics" 3 | } -------------------------------------------------------------------------------- /lambdas/kafka_test/lambda-events/list-schema-registry-subjects.json: -------------------------------------------------------------------------------- 1 | { 2 | "operation": "list-schema-registry-subjects" 3 | } -------------------------------------------------------------------------------- /lambdas/kafka_test/lambda-events/read-message-from-topic.json: -------------------------------------------------------------------------------- 1 | { 2 | "operation": "read-message-from-topic", 3 | "topic": "tenure_api" 4 | } -------------------------------------------------------------------------------- /lambdas/kafka_test/lambda-events/send-message-to-topic.json: -------------------------------------------------------------------------------- 1 | { 2 | "operation": "send-message-to-topic", 3 | "topic": "tenure_api" 4 | } -------------------------------------------------------------------------------- /lambdas/kafka_test/readme.md: -------------------------------------------------------------------------------- 1 | 1. run `docker-compose up` in this directory 2 | a. This will start the Kafka backend, the schema registry and the schema registry UI 3 | b. Schema registry UI will be available from http://localhost:8000 4 | c. Terminal output is often useful for seeing details on why things might be taking a while or are failing 5 | 6 | 2. Run the update_schemas_in_schema_registry.sh to update all schemas in the schema registry 7 | 8 | 3. Methods in local_test.py file can be run locally to validate new and existing schemas and test messages 9 | 10 | 4. main.py will be deployed as source for the test Lambda function. These files need to be developed further to remove some unnecessary repetition of code 11 | 12 | 5. topic schemas in this folder are copies of the schemas stored in the schema registry terraform module. This ensures test scripts can be run both locally and on AWS 13 | 14 | TODO: 15 | Add kafka ui 16 | -------------------------------------------------------------------------------- /lambdas/kafka_test/update_schemas_in_schema_registry.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | #contact details api schema 4 | contact_details_api_schema_string=$(jq -c . ./topic-schemas/contact_details_api.json | jq -R) 5 | curl -X POST -H "Content-Type: application/vnd.schemaregistry.v1+json" --data "{ \"schema\": ${contact_details_api_schema_string} }" "http://localhost:8081/subjects/contact_details_api-value/versions" 6 | 7 | #tenure api schema 8 | contact_details_api_schema_string=$(jq -c . ./topic-schemas/tenure_api.json | jq -R) 9 | curl -X POST -H "Content-Type: application/vnd.schemaregistry.v1+json" --data "{ \"schema\": ${contact_details_api_schema_string} }" "http://localhost:8081/subjects/tenure_api-value/versions" 10 | -------------------------------------------------------------------------------- /lambdas/lambda_alarms_handler/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install-requirements 2 | 3 | install-requirements: 4 | python3 -m venv venv 5 | # the requirements are generated so that the packages 6 | # could be downloaded and packaged up for the lambda 7 | 8 | . venv/bin/activate && sudo pipenv lock --requirements > requirements.txt 9 | . venv/bin/activate && sudo pip install --target ./lib -r requirements.txt 10 | rm -rf venv/ 11 | -------------------------------------------------------------------------------- /lambdas/lambda_alarms_handler/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | urllib3 = "*" 8 | python-dotenv = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3" 14 | -------------------------------------------------------------------------------- /lambdas/lambda_layers/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the Ubuntu latest image 2 | FROM ubuntu:22.04 3 | 4 | # Define a build-time variable 5 | ARG L_N 6 | 7 | # Install system dependencies and zip 8 | RUN apt-get update && \ 9 | apt-get upgrade -y && \ 10 | apt-get install -y zip python3.11 python3-pip 11 | 12 | # Install pip 13 | RUN python3.11 -m pip install --upgrade pip 14 | 15 | # Create a non-root user and group 16 | RUN addgroup --system docker_users \ 17 | && adduser --system --ingroup docker_users docker_user 18 | 19 | # Switch to non-root user 20 | USER docker_user 21 | 22 | # Set the working directory in the container 23 | WORKDIR /app 24 | 25 | # Copy the first requirements.txt to the container 26 | COPY requirements${L_N}.txt /app/requirements${L_N}.txt 27 | 28 | # Install Python dependencies for the first layer 29 | RUN mkdir -p /app/layer${L_N}/python && \ 30 | pip install -t /app/layer${L_N}/python -r requirements${L_N}.txt && \ 31 | cd /app/layer${L_N} && \ 32 | zip -r /app/layer${L_N}.zip . 33 | -------------------------------------------------------------------------------- /lambdas/lambda_layers/Makefile: -------------------------------------------------------------------------------- 1 | CONTAINER_NAME := lambda_$(LAYER_FILE)_docker 2 | DOCKER_IMAGE := aws_lambda_$(LAYER_FILE) 3 | LAYER_NUMBER := $(patsubst layer%.zip,%,$(LAYER_FILE)) 4 | .PHONY: build run copy 5 | 6 | all: copy clean 7 | 8 | build: 9 | docker build --build-arg L_N=$(LAYER_NUMBER) -t $(DOCKER_IMAGE) . 10 | 11 | run: build 12 | docker run --name $(CONTAINER_NAME) -d $(DOCKER_IMAGE) env 13 | 14 | copy: run 15 | docker cp $(CONTAINER_NAME):/app/$(LAYER_FILE) ./$(LAYER_FILE) 16 | 17 | stop: 18 | docker stop $(CONTAINER_NAME) 19 | 20 | clean: stop 21 | docker rm $(CONTAINER_NAME) -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements1.txt: -------------------------------------------------------------------------------- 1 | pandas==2.2.3 -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements2.txt: -------------------------------------------------------------------------------- 1 | requests==2.32.3 2 | httplib2==0.22.0 -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements3.txt: -------------------------------------------------------------------------------- 1 | notifications-python-client==10.0.1 -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements4.txt: -------------------------------------------------------------------------------- 1 | numpy==2.2.4 -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements5.txt: -------------------------------------------------------------------------------- 1 | gspread==6.2.1 2 | oauth2client==4.1.3 3 | google-api-python-client==2.170.0 4 | yagmail==0.15.293 -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements6.txt: -------------------------------------------------------------------------------- 1 | urllib3==2.4.0 -------------------------------------------------------------------------------- /lambdas/lambda_layers/requirements7.txt: -------------------------------------------------------------------------------- 1 | s3fs==2025.5.1 -------------------------------------------------------------------------------- /lambdas/mtfh_export_lambda/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install-requirements 2 | 3 | install-requirements: 4 | python3 -m venv venv 5 | # the requirements are generated so that the packages 6 | # could be downloaded and packaged up for the lambda 7 | 8 | . venv/bin/activate && sudo pipenv lock --requirements > requirements.txt 9 | . venv/bin/activate && sudo pip install --target ./lib -r requirements.txt 10 | rm -rf venv/ 11 | -------------------------------------------------------------------------------- /lambdas/publish_file_upload_to_sns_topic/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | 5 | import boto3 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | 11 | def lambda_handler(event, context): 12 | topic_arn = os.environ["TOPIC_ARN"] 13 | sns = boto3.client("sns") 14 | 15 | logger.info("## event") 16 | logger.info(event) 17 | 18 | bucket_name = event["Records"][0]["s3"]["bucket"]["name"] 19 | file_key = event["Records"][0]["s3"]["object"]["key"] 20 | event_time = event["Records"][0]["eventTime"] 21 | 22 | message = f"File uploaded: {file_key} to bucket: {bucket_name} at: {event_time}" 23 | subject = f"New File Uploaded to S3: {bucket_name}/{file_key}" 24 | 25 | sns.publish( 26 | TopicArn=topic_arn, 27 | Message=message, 28 | Subject=subject, 29 | ) 30 | 31 | return { 32 | "statusCode": 200, 33 | "body": json.dumps("Email notification sent successfully!"), 34 | } 35 | -------------------------------------------------------------------------------- /lambdas/rds-database-snapshot-replicator/README.md: -------------------------------------------------------------------------------- 1 | # Local lambda set up 2 | 3 | To deploy the lambda functions using the AWS CLI, use the following commands: 4 | 5 | 1. Compress the lambda `zip -r lambdaFunc.zip .` 6 | 2. `aws lambda update-function-code --function-name rd-export-testing_lambda --zip-file fileb://lambdaFunc.zip --profile madetech-sandbox` (update the --profile to your own if needed) 7 | -------------------------------------------------------------------------------- /lambdas/rds-database-snapshot-replicator/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rds-database-snapshot-replicator", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "aws-sdk": "^2.889.0" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /lambdas/redshift_stage_and_load_tables/stage_and_load_parquet.sql: -------------------------------------------------------------------------------- 1 | BEGIN 2 | -- Create the staging table 3 | EXECUTE format( 4 | 'CREATE TABLE IF NOT EXISTS %I.%I_staging (LIKE %I.%I);', 5 | {schema_name}, 6 | {table_name}, 7 | {schema_name}, 8 | {table_name} 9 | ); 10 | -- Load data from S3 into the staging table 11 | EXECUTE format( 12 | 'COPY %I.%I_staging FROM %L FORMAT AS PARQUET iam_role %L;', 13 | {schema_name}, 14 | {table_name}, 15 | {s3_path}, 16 | {iam_role} 17 | ); 18 | -- Insert data from staging to main table 19 | EXECUTE format( 20 | 'INSERT INTO %I.%I SELECT * FROM %I.%I_staging;', 21 | {schema_name}, 22 | {table_name}, 23 | {schema_name}, 24 | {table_name} 25 | ); 26 | -- Truncate staging table 27 | EXECUTE format( 28 | 'TRUNCATE %I.%I_staging;', 29 | {schema_name}, 30 | {table_name} 31 | ); 32 | COMMIT; -------------------------------------------------------------------------------- /lambdas/requirements.test.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | google-api-python-client 3 | python-dotenv 4 | oauth2client 5 | pybase64 -------------------------------------------------------------------------------- /lambdas/s3-to-s3-export-copier/README.md: -------------------------------------------------------------------------------- 1 | # Local lambda set up 2 | 3 | To deploy the lambda functions using the AWS CLI, use the following commands: 4 | 5 | 1. Compress the lambda `zip -r lambdaFunc.zip .` 6 | 2. `aws lambda update-function-code --function-name rd-export-testing_lambda --zip-file fileb://lambdaFunc.zip --profile madetech-sandbox` (update the --profile to your own if needed) 7 | -------------------------------------------------------------------------------- /lambdas/s3-to-s3-export-copier/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rds-database-snapshot-replicator", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "aws-sdk": "^2.889.0" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /lambdas/set_budget_limit_amount/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | boto3 = "*" 10 | 11 | [requires] 12 | python_version = "3.9" 13 | -------------------------------------------------------------------------------- /lambdas/set_budget_limit_amount/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "a36a5392bb1e8bbc06bfaa0761e52593cf2d83b486696bf54667ba8da616c839" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.9" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": {}, 19 | "develop": {} 20 | } 21 | -------------------------------------------------------------------------------- /lambdas/sftp_to_s3/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sftp-to-s3", 3 | "version": "1.0.0", 4 | "description": "A function to stream files matching a set pattern from SFTP server to S3 bucket", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "start": "node index.js" 9 | }, 10 | "author": "", 11 | "license": "ISC", 12 | "dependencies": { 13 | "aws-sdk": "^2.913.0", 14 | "luxon": "^3.2.1", 15 | "ssh2-sftp-client": "^9.0.4" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /lambdas/shutdown_notebooks/main.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | 3 | def shutdown_notebooks(_event, _lambda_context, glueClient = None, sagemakerClient = None): 4 | sagemaker = sagemakerClient or boto3.client("sagemaker") 5 | glue = glueClient or boto3.client("glue") 6 | # get all notebook instances 7 | notebooks = sagemaker.list_notebook_instances(MaxResults=100, StatusEquals='InService')['NotebookInstances'] 8 | 9 | # stop them all 10 | for notebook in notebooks: 11 | print(f"Stopping notebook: {notebook}") 12 | sagemaker.stop_notebook_instance( 13 | NotebookInstanceName=notebook['NotebookInstanceName'] 14 | ) 15 | 16 | # get all dev endpoints 17 | endpoint_names = glue.list_dev_endpoints(MaxResults=100)['DevEndpointNames'] 18 | 19 | # delete them all 20 | for endpoint in endpoint_names: 21 | print(f"Deleting endpoint: {endpoint}") 22 | glue.delete_dev_endpoint(EndpointName=endpoint) 23 | 24 | if __name__ == '__main__': 25 | shutdown_notebooks('event','lambda_context') 26 | -------------------------------------------------------------------------------- /notebook/Makefile: -------------------------------------------------------------------------------- 1 | save-credentials: 2 | -(aws-vault exec hackney-dataplatform-staging -- env | grep ^AWS_) > ./.env 3 | 4 | run-notebook: save-credentials 5 | docker compose up notebook 6 | 7 | run-notebook-v2: save-credentials 8 | docker compose up notebook-v2 9 | 10 | run-notebook-v3: save-credentials 11 | docker compose run --service-ports notebook-v3 12 | 13 | remove-images: 14 | -docker kill glue_jupyter 15 | docker rm glue_jupyter 16 | 17 | thrift-server: 18 | docker compose exec notebook bash -c "/home/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8/sbin/start-thriftserver.sh --hiveconf hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory --hiveconf hive.metastore.schema.verification=false --hiveconf aws.region=eu-west-2" 19 | 20 | spark-sql: 21 | docker compose exec notebook /home/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8/bin/beeline -u jdbc:hive2://localhost:10000/default -n root -p "" 22 | 23 | .PHONY: run-notebook run-notebook-v3 run-notebook-v2 24 | -------------------------------------------------------------------------------- /notebook/aws-config/config: -------------------------------------------------------------------------------- 1 | [default] 2 | region=eu-west-2 -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | spark_events/ 3 | build/ 4 | dist/ 5 | *.egg-info 6 | .pytest_cache 7 | .venv 8 | lib/* 9 | !lib/.keep -------------------------------------------------------------------------------- /scripts/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test test-watch history-server shell 2 | 3 | filename ?= "" 4 | test: 5 | filename=${filename} docker compose run unit_tests 6 | 7 | test-watch: 8 | docker compose run --entrypoint "bash -c 'pip install -r requirements.test.txt && pytest-watch ./${filename}'" unit_tests 9 | 10 | history-server: 11 | docker run --rm -p "18080:18080" -v ${PWD}/spark_events:/tmp/spark-events amazon/aws-glue-libs:glue_libs_1.0.0_image_01 bash -c 'rm $${SPARK_HOME}/jars/jersey-*-1.9.jar; $${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.history.HistoryServer' 12 | 13 | shell: 14 | docker run --rm -it -w /root/scripts -v ${PWD}:/root/scripts amazon/aws-glue-libs:glue_libs_1.0.0_image_01 bash -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | unit_tests: 4 | container_name: unit_test_runner 5 | image: amazon/aws-glue-libs@sha256:365ff50d6aae7774b491aa17b2eec9a07a8bcda5e2ad7f64df69f8fba2bf5d56 6 | working_dir: /root/scripts 7 | ports: 8 | - "4041:4040" 9 | entrypoint: bash -c "pip install -r ./requirements.test.txt && /home/aws-glue-libs/bin/gluepytest ./${filename}" 10 | volumes: 11 | - .:/root/scripts -------------------------------------------------------------------------------- /scripts/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/helpers/__init__.py -------------------------------------------------------------------------------- /scripts/helpers/repairs.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pyspark.sql.functions as F 3 | 4 | def clean_column_names(df): 5 | # remove full stops from column names 6 | df = df.select([F.col("`{0}`".format(c)).alias( 7 | c.replace('.', '')) for c in df.columns]) 8 | # remove trialing underscores 9 | df = df.select([F.col(col).alias(re.sub("_$", "", col)) 10 | for col in df.columns]) 11 | # lowercase and remove double underscores 12 | df = df.select([F.col(col).alias( 13 | re.sub("[^0-9a-zA-Z$]+", "_", col.lower())) for col in df.columns]) 14 | return df 15 | 16 | def map_repair_priority(data_frame, origin_column, target_column): 17 | return data_frame.withColumn(target_column, F.when(data_frame[origin_column] == "Immediate", 1) 18 | .when(data_frame[origin_column] == "Emergency", 2) 19 | .when(data_frame[origin_column] == "Urgent", 3) 20 | .when(data_frame[origin_column] == "Normal", 4) 21 | .otherwise(None)) 22 | -------------------------------------------------------------------------------- /scripts/jobs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/jobs/__init__.py -------------------------------------------------------------------------------- /scripts/jobs/data_and_insight/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/jobs/data_and_insight/__init__.py -------------------------------------------------------------------------------- /scripts/jobs/housing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/jobs/housing/__init__.py -------------------------------------------------------------------------------- /scripts/jobs/ml_jobs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/jobs/ml_jobs/__init__.py -------------------------------------------------------------------------------- /scripts/jobs/parking/spatial-enrichment-dictionary-preprod.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "cycle_hangar_waiting_list": { 3 | "database_name":"dataplatform-stg-liberator-refined-zone", 4 | "table_name":"pparking_cycle_hangar_wait_list_with_coords", 5 | "partition_keys":["import_year","import_month","import_day","import_date"], 6 | "date_partition_name":"import_date", 7 | "x_column":"x", 8 | "y_column":"y", 9 | "geom_format": "coords", 10 | "source_crs": "epsg:27700", 11 | "enrich_with":["housing_estate"] 12 | } 13 | }] 14 | -------------------------------------------------------------------------------- /scripts/jobs/parking/spatial-enrichment-dictionary.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "liberator_permit_llpg": { 3 | "database_name":"dataplatform-prod-liberator-raw-zone", 4 | "table_name":"liberator_permit_llpg", 5 | "partition_keys":["import_year","import_month","import_day","import_date"], 6 | "date_partition_name":"import_date", 7 | "x_column":"x", 8 | "y_column":"y", 9 | "geom_format": "coords", 10 | "source_crs": "epsg:27700", 11 | "enrich_with":["housing_estate"] 12 | } 13 | }] 14 | -------------------------------------------------------------------------------- /scripts/jobs/spark_example.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import DataFrame 2 | 3 | 4 | def only_hackney_addresses(dataframe: DataFrame) -> DataFrame: 5 | """Used to demonstrate pytest + pyspark. See test_spark_example.py 6 | """ 7 | return dataframe.filter(dataframe.council == 'Hackney') 8 | -------------------------------------------------------------------------------- /scripts/lib/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/lib/.keep -------------------------------------------------------------------------------- /scripts/package-helpers.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -eu -o pipefail 3 | 4 | 5 | source .venv/bin/activate 6 | python scripts/setup.py bdist_wheel 7 | -------------------------------------------------------------------------------- /scripts/requirements.build.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | wheel 3 | boto3 4 | redshift-connector==2.1.7 # used to connect with redshift 5 | -------------------------------------------------------------------------------- /scripts/requirements.test.txt: -------------------------------------------------------------------------------- 1 | pytest-watch 2 | freezegun 3 | pytest-mock 4 | pydeequ 5 | # Explicitly set urllib3 version compatible with botocore to pass CI tests 6 | urllib3>=1.25.4,<2.4 # glue 4 is using urllib3==1.25.11 so don't need add to requirements.build.txt 7 | redshift-connector==2.1.7 # used to connect with redshift 8 | -------------------------------------------------------------------------------- /scripts/spark_events/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/spark_events/.keep -------------------------------------------------------------------------------- /scripts/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/tests/__init__.py -------------------------------------------------------------------------------- /scripts/tests/helpers/__init_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/tests/helpers/__init_.py -------------------------------------------------------------------------------- /scripts/tests/helpers/assertions.py: -------------------------------------------------------------------------------- 1 | from unittest.case import TestCase 2 | 3 | def dictionaryContains(expected, actual): 4 | TestCase().assertEqual(actual, { **actual, **expected}) -------------------------------------------------------------------------------- /scripts/tests/helpers/dataframe_conversions.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import Row 2 | 3 | def list_to_dataframe(spark, list_data): 4 | return spark.createDataFrame(spark.sparkContext.parallelize( 5 | [Row(**i) for i in list_data] 6 | )) 7 | 8 | def dataframe_to_list(df): 9 | return [row.asDict() for row in df.rdd.collect()] -------------------------------------------------------------------------------- /scripts/tests/helpers/dummy_logger.py: -------------------------------------------------------------------------------- 1 | 2 | class Logger: 3 | def info(self, message): 4 | return -------------------------------------------------------------------------------- /scripts/tests/planning/test_tascomi_api_ingestion.py: -------------------------------------------------------------------------------- 1 | from freezegun import freeze_time 2 | from scripts.jobs.planning.tascomi_api_ingestion import get_days_since_last_import 3 | 4 | class TestTascomiApiIngestion: 5 | def test_get_days_since_last_import_no_days(self): 6 | today = "2012-01-14" 7 | last_import_date = "20120114" 8 | with freeze_time(today): 9 | actual_response = get_days_since_last_import(last_import_date) 10 | assert (actual_response == []) 11 | 12 | 13 | def test_get_days_since_last_import_two_days(self): 14 | today = "2018-03-15" 15 | last_import_date = "20180313" 16 | with freeze_time(today): 17 | actual_response = get_days_since_last_import(last_import_date) 18 | assert (actual_response == ["2018-03-13", "2018-03-14"]) -------------------------------------------------------------------------------- /scripts/tests/planning/test_tascomi_recast_tables_increments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/tests/planning/test_tascomi_recast_tables_increments.py -------------------------------------------------------------------------------- /scripts/tests/stubs/column_type_dictionary.json: -------------------------------------------------------------------------------- 1 | { 2 | "timestamp": { 3 | "MyTable": ["submit_date"], 4 | "Table2": ["another_date", "third_date"] 5 | }, 6 | "integer": { 7 | "MyTable": ["id_number"] 8 | }, 9 | "boolean": { 10 | "MyTable": ["flag"] 11 | }, 12 | "float": { 13 | "MyTable": ["measurement"] 14 | }, 15 | "long": { 16 | "MyTable": ["long_id"] 17 | }, 18 | "double": { 19 | "MyTable": ["large_measurement"] 20 | }, 21 | "date": { 22 | "MyTable": ["start_date"] 23 | } 24 | } -------------------------------------------------------------------------------- /scripts/tests/stubs/column_type_dictionary_partial.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "integer": { 4 | "MyTable": ["id_number"] 5 | }, 6 | "boolean": { 7 | "MyTable": ["flag"] 8 | } 9 | } 10 | ] -------------------------------------------------------------------------------- /scripts/tests/test_data/test_spreadsheet_import.csv: -------------------------------------------------------------------------------- 1 | header_one,header_two 2 | 1,2 3 | 1,2 -------------------------------------------------------------------------------- /scripts/tests/test_spark_example.py: -------------------------------------------------------------------------------- 1 | from scripts.jobs.spark_example import only_hackney_addresses 2 | from pyspark.sql import Row 3 | 4 | class TestSparkExample: 5 | def test_filters_only_hackney_addresses(self, spark): 6 | assert ( 7 | [ 8 | {'line1': '13', 'line2': 'Cheese Lane', 'postcode': 'E8 13HB', 'council': 'Hackney'} 9 | ] 10 | == 11 | self.only_hackney_addresses(spark, [ 12 | {'line1': '13', 'line2': 'Cheese Lane', 'postcode': 'E8 13HB', 'council': 'Hackney'}, 13 | {'line1': '13', 'line2': 'Pickle Lane', 'postcode': 'E15 13HB', 'council': 'Newham'}, 14 | ]) 15 | ) 16 | 17 | def only_hackney_addresses(self, spark, addresses): 18 | query_addresses = spark.createDataFrame(spark.sparkContext.parallelize([Row(**i) for i in addresses])) 19 | return [row.asDict() for row in only_hackney_addresses(query_addresses).rdd.collect()] 20 | -------------------------------------------------------------------------------- /scripts/tests/watermarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/scripts/tests/watermarks/__init__.py -------------------------------------------------------------------------------- /terraform/backend-setup/00-init.tf: -------------------------------------------------------------------------------- 1 | # Core Infrastructure 2 | provider "aws" { 3 | region = "eu-west-2" 4 | } 5 | 6 | # General 7 | terraform { 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/backend-setup/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | # Mandatory variables, that are provided by the GitHub Action CI/CD. The shouldn't be changed! 2 | variable "environment" { 3 | description = "Environment e.g. Dev, Stg, Prod, Mgmt." 4 | type = string 5 | } -------------------------------------------------------------------------------- /terraform/backend-setup/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | # Any internal local variables should be declared here. We also import the tag module for convenience 2 | module "tags" { 3 | source = "github.com/LBHackney-IT/aws-tags-lbh.git?ref=v1.1.1" 4 | 5 | application = var.application 6 | automation_build_url = var.automation_build_url 7 | confidentiality = var.confidentiality 8 | custom_tags = var.custom_tags 9 | department = var.department 10 | environment = var.environment 11 | phase = var.phase 12 | project = var.project 13 | stack = var.stack 14 | team = var.team 15 | } 16 | -------------------------------------------------------------------------------- /terraform/backend-setup/10-aws-s3-buckets.tf: -------------------------------------------------------------------------------- 1 | resource "aws_kms_key" "kms_key" { 2 | tags = module.tags.values 3 | 4 | description = "${var.project} - ${var.environment} KMS Key" 5 | deletion_window_in_days = 10 6 | enable_key_rotation = true 7 | } 8 | 9 | resource "aws_s3_bucket" "terraform_state_storage" { 10 | tags = merge(module.tags.values, { S3Backup = true }) 11 | 12 | bucket = lower("${var.project}-terraform-state") 13 | 14 | server_side_encryption_configuration { 15 | rule { 16 | apply_server_side_encryption_by_default { 17 | kms_master_key_id = aws_kms_key.kms_key.arn 18 | sse_algorithm = "aws:kms" 19 | } 20 | } 21 | } 22 | } 23 | 24 | resource "aws_s3_bucket_public_access_block" "block_public_access" { 25 | bucket = aws_s3_bucket.terraform_state_storage.id 26 | depends_on = [aws_s3_bucket.terraform_state_storage] 27 | 28 | block_public_acls = true 29 | block_public_policy = true 30 | ignore_public_acls = true 31 | restrict_public_buckets = true 32 | } 33 | -------------------------------------------------------------------------------- /terraform/backend-setup/99-outputs.tf: -------------------------------------------------------------------------------- 1 | # We make any output files clear by adding them to the 99-outputs.tf, meaning anyone can quickly check if they're consuming your module 2 | output "terraform_state_storage_arn" { 3 | description = "Terraform bucket arn" 4 | value = aws_s3_bucket.terraform_state_storage.arn 5 | } 6 | -------------------------------------------------------------------------------- /terraform/backend-setup/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: $(MAKECMDGOALS) 2 | 3 | init: 4 | aws-vault exec hackney-dataplatform-development -- terraform init 5 | 6 | plan: 7 | aws-vault exec hackney-dataplatform-development -- terraform plan -var-file="../config/env.tfvars" 8 | 9 | apply: 10 | aws-vault exec hackney-dataplatform-development -- terraform apply -var-file="../config/env.tfvars" 11 | 12 | validate: 13 | terraform validate 14 | 15 | destroy: 16 | aws-vault exec hackney-dataplatform-development -- terraform destroy -var-file="../config/env.tfvars" 17 | 18 | lint-init: 19 | tflint --init --config="../config/.tflint.hcl" 20 | 21 | lint: 22 | aws-vault exec hackney-dataplatform-development -- tflint --var-file='../config/env.tfvars' --module --config="../config/.tflint.hcl" --loglevel=warn . 23 | 24 | -------------------------------------------------------------------------------- /terraform/backend-setup/README.md: -------------------------------------------------------------------------------- 1 | # Terraform Backend Setup 2 | 3 | This project is used to deploy state storage buckets to AWS ensuring that they share the same tags and settings as the 4 | project who's state they manage. 5 | 6 | Since this project deploys the backend s3 buckets, it cannot itself use a backend s3 bucket but since should only be 7 | needed to deploy new environments or to rarely make changes to the bucket setup we have chosen to use local state 8 | management which we will commit to the GitRepo. 9 | 10 | Therefore, in order to differentiate environment states we have additionally choosen to use terraform workspaces. 11 | 12 | Before applying changes, please ensure that you have switched to the correct workspace using: 13 | `terraform workspace list` 14 | 15 | If you are not using the correct workspace, switch to the correct one using: 16 | `terraform workspace select ` 17 | 18 | If you need to create a new workspace use: 19 | `terraform workspace new ` -------------------------------------------------------------------------------- /terraform/compliance/dynamodb.feature: -------------------------------------------------------------------------------- 1 | Feature: DynamoDB 2 | 3 | Scenario: Ensure BackupPolicy tag is present 4 | Given I have aws_dynamodb_table defined 5 | Then it must contain tags 6 | And it must contain BackupPolicy 7 | 8 | Scenario: Ensure point in time recovery enabled 9 | Given I have aws_dynamodb_table defined 10 | Then it must contain point_in_time_recovery 11 | And its enabled property must be true 12 | -------------------------------------------------------------------------------- /terraform/compliance/ecr.feature: -------------------------------------------------------------------------------- 1 | Feature: ECR 2 | 3 | Scenario: ECR policy must not be public 4 | Given I have aws_ecr_repository_policy defined 5 | Then it must have policy 6 | When it has statement 7 | Then it must have statement 8 | And it must have Principal 9 | And its value must not be * 10 | 11 | Scenario: ECR image scanning on push must be enabled 12 | Given I have aws_ecr_repository defined 13 | Then it must have image_scanning_configuration 14 | And its scan_on_push must be true -------------------------------------------------------------------------------- /terraform/compliance/elastic-search.feature: -------------------------------------------------------------------------------- 1 | Feature: Elastic Search 2 | 3 | Scenario: Ensure it is deployed in a VPC 4 | Given I have aws_elasticsearch_domain defined 5 | Then it must contain vpc_options 6 | 7 | Scenario: Ensure OpenSearch clusters are encrypted at rest 8 | Given I have aws_elasticsearch_domain defined 9 | Then it must contain encrypt_at_rest 10 | And its enabled property must be true 11 | 12 | Scenario: Ensure minimum instance count is 2 13 | Given I have aws_elasticsearch_domain defined 14 | Then it must contain cluster_config 15 | And it must contain instance_count 16 | And its value must be greater and equal to 2 -------------------------------------------------------------------------------- /terraform/compliance/subnet.feature: -------------------------------------------------------------------------------- 1 | Feature: Subnets 2 | 3 | Scenario: Ensure a multi-layered network architecture 4 | Given I have aws_subnet defined 5 | When I count them 6 | Then I expect the result is more than 2 -------------------------------------------------------------------------------- /terraform/config/.tflint.hcl: -------------------------------------------------------------------------------- 1 | # See https://github.com/terraform-linters/tflint/blob/master/docs/user-guide/config.md 2 | config { 3 | 4 | } 5 | 6 | plugin "aws" { 7 | enabled = true 8 | version = "0.13.0" 9 | source = "github.com/terraform-linters/tflint-ruleset-aws" 10 | 11 | deep_check = true 12 | } 13 | 14 | rule "aws_resource_missing_tags" { 15 | enabled = true 16 | tags = ["AutomationBuildUrl", "Environment", "Team", "Department", "Application", "Phase", "Stack", "Project", "Confidentiality"] 17 | exclude = ["aws_s3_bucket_object","aws_s3_object"] 18 | } 19 | 20 | rule "terraform_module_pinned_source" { 21 | enabled = false 22 | } 23 | -------------------------------------------------------------------------------- /terraform/core/04-input-redundant.tf: -------------------------------------------------------------------------------- 1 | # This file and the following variables was added to stop a series of warnings being shown during plan and apply 2 | # because the environment config files contain variables for multiple terraform modules. 3 | # Blocks are defined for the unused (and therefore redundant) variables a provide defaults so that if they aren't 4 | # provided at any point the module won't throw errors. 5 | variable "transit_gateway_private_subnets" { 6 | default = false 7 | } 8 | 9 | variable "transit_gateway_availability_zones" { 10 | default = false 11 | } 12 | 13 | variable "transit_gateway_cidr" { 14 | default = false 15 | } 16 | 17 | variable "aws_mosaic_vpc_id" { 18 | default = false 19 | } 20 | 21 | variable "aws_housing_vpc_id" { 22 | default = false 23 | } 24 | 25 | variable "aws_mosaic_prod_account_id" { 26 | default = false 27 | } 28 | 29 | variable "aws_data_platform_account_id" { 30 | default = false 31 | } 32 | 33 | variable "aws_vpc_id" { 34 | default = false 35 | } 36 | 37 | variable "aws_dp_vpc_id" { 38 | default = false 39 | } -------------------------------------------------------------------------------- /terraform/core/07-qlik-server.tf: -------------------------------------------------------------------------------- 1 | module "qlik_server" { 2 | count = local.is_live_environment ? 1 : 0 3 | 4 | source = "../modules/qlik-sense-server" 5 | tags = module.tags.values 6 | vpc_id = data.aws_vpc.network.id 7 | vpc_subnet_ids = local.subnet_ids_list 8 | instance_type = var.qlik_server_instance_type 9 | ssl_certificate_domain = var.qlik_ssl_certificate_domain 10 | identifier_prefix = local.identifier_prefix 11 | short_identifier_prefix = local.short_identifier_prefix 12 | environment = var.environment 13 | is_production_environment = local.is_production_environment 14 | is_live_environment = local.is_live_environment 15 | secrets_manager_kms_key = aws_kms_key.secrets_manager_key 16 | production_firewall_ip = var.production_firewall_ip 17 | } 18 | -------------------------------------------------------------------------------- /terraform/core/11-aws-ecs.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecs_cluster" "workers" { 2 | tags = module.tags.values 3 | name = "${local.identifier_prefix}-workers" 4 | } -------------------------------------------------------------------------------- /terraform/core/26-google-platform.tf: -------------------------------------------------------------------------------- 1 | resource "google_project_service" "sheets_api" { 2 | count = local.is_live_environment ? 1 : 0 3 | 4 | project = var.google_project_id 5 | service = "sheets.googleapis.com" 6 | disable_dependent_services = true 7 | } 8 | 9 | resource "google_project_service" "drive_api" { 10 | count = local.is_live_environment ? 1 : 0 11 | 12 | project = var.google_project_id 13 | service = "drive.googleapis.com" 14 | disable_dependent_services = true 15 | } 16 | -------------------------------------------------------------------------------- /terraform/core/30-g-drive-to-s3.tf: -------------------------------------------------------------------------------- 1 | # This is no longer needed, but keeping this in as an example how the module should be used 2 | # module "repairs_spreadsheet" { 3 | # source = "../modules/g-drive-to-s3" 4 | # department = module.department_housing_repairs 5 | # tags = module.tags.values 6 | # identifier_prefix = local.identifier_prefix 7 | # lambda_artefact_storage_bucket = module.lambda_artefact_storage.bucket_id 8 | # zone_kms_key_arn = module.landing_zone.kms_key_arn 9 | # zone_bucket_arn = module.landing_zone.bucket_arn 10 | # zone_bucket_id = module.landing_zone.bucket_id 11 | # lambda_name = "repairs_spreadsheet" 12 | # service_area = "housing" 13 | # file_id = "1VlM80P6J8N0P3ZeU8VobBP9kMbpr1Lzq" 14 | # file_name = "Electrical-Mechnical-Fire-Safety-Temp-order-number-WC-12.10.20R1.xlsx" 15 | # } 16 | -------------------------------------------------------------------------------- /terraform/core/34-aws-budget-alerting.tf: -------------------------------------------------------------------------------- 1 | module "set_budget_limit_amount" { 2 | source = "../modules/set-budget-limit-amount" 3 | tags = module.tags.values 4 | environment = var.environment 5 | identifier_prefix = local.short_identifier_prefix 6 | lambda_artefact_storage_bucket = module.lambda_artefact_storage.bucket_id 7 | lambda_name = "set_budget_limit_amount" 8 | service_area = "housing" 9 | account_id = data.aws_caller_identity.data_platform.account_id 10 | emails_to_notify = var.emails_to_notify_with_budget_alerts 11 | } 12 | -------------------------------------------------------------------------------- /terraform/core/37-datahub.tf: -------------------------------------------------------------------------------- 1 | module "datahub" { 2 | count = local.is_live_environment ? 1 : 0 3 | 4 | source = "../modules/datahub" 5 | tags = module.tags.values 6 | short_identifier_prefix = local.short_identifier_prefix 7 | identifier_prefix = local.identifier_prefix 8 | vpc_id = data.aws_vpc.network.id 9 | vpc_subnet_ids = local.subnet_ids_list 10 | is_live_environment = local.is_live_environment 11 | datahub_url = var.datahub_url 12 | kafka_properties = { 13 | kafka_zookeeper_connect = module.kafka_event_streaming[0].cluster_config.zookeeper_connect_string 14 | kafka_bootstrap_server = module.kafka_event_streaming[0].cluster_config.bootstrap_brokers_tls 15 | } 16 | schema_registry_properties = { 17 | schema_registry_url = module.kafka_event_streaming[0].schema_registry_url 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /terraform/core/42-lambda-alarms-handler.tf: -------------------------------------------------------------------------------- 1 | module "lambda_alarms_handler" { 2 | count = local.is_production_environment ? 1 : 0 3 | source = "../modules/lambda-alarms-handler" 4 | tags = module.tags.values 5 | identifier_prefix = local.short_identifier_prefix 6 | lambda_name = "lambda-alarms-handler" 7 | lambda_artefact_storage_bucket = module.lambda_artefact_storage.bucket_id 8 | 9 | lambda_environment_variables = { 10 | "SECRET_NAME" = "${local.short_identifier_prefix}lambda-alarms-handler-secret" 11 | } 12 | 13 | secret_name = "${local.short_identifier_prefix}lambda-alarms-handler-secret" 14 | secrets_manager_kms_key = aws_kms_key.secrets_manager_key 15 | } 16 | -------------------------------------------------------------------------------- /terraform/core/43-lambda-monitoring-dashboard.tf: -------------------------------------------------------------------------------- 1 | module "lambda_monitoring_dashboard" { 2 | count = local.is_production_environment ? 1 : 0 3 | source = "../modules/lambda-monitoring-dashboard" 4 | tags = module.tags.values 5 | identifier_prefix = local.short_identifier_prefix 6 | } 7 | -------------------------------------------------------------------------------- /terraform/core/50-dynamodb.tf: -------------------------------------------------------------------------------- 1 | 2 | 3 | module "watermarks_dynamodb_table" { 4 | source = "../modules/dynamodb" 5 | count = local.is_live_environment && !local.is_production_environment ? 1 : 0 6 | 7 | name = "glue-watermarks" 8 | identifier_prefix = local.short_identifier_prefix 9 | billing_mode = "PAY_PER_REQUEST" 10 | hash_key = "jobName" 11 | range_key = "runId" 12 | table_class = "STANDARD" 13 | point_in_time_recovery_enabled = true 14 | tags = merge(module.tags.values, { BackupPolicy = title(var.environment) }) 15 | 16 | attributes = [ 17 | { 18 | name = "jobName" 19 | type = "S" 20 | }, 21 | { 22 | name = "runId" 23 | type = "S" 24 | } 25 | ] 26 | 27 | server_side_encryption_enabled = true 28 | } 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /terraform/core/81-sync-rentsense-files.tf: -------------------------------------------------------------------------------- 1 | module "copy_from_s3_to_s3" { 2 | source = "../modules/copy-from-s3-to-s3" 3 | tags = module.tags.values 4 | 5 | is_live_environment = local.is_live_environment 6 | environment = local.environment 7 | is_production_environment = local.is_production_environment 8 | lambda_name = "rentsense-s3-to-s3-export-copy" 9 | identifier_prefix = local.identifier_prefix 10 | short_identifier_prefix = local.short_identifier_prefix 11 | lambda_artefact_storage_bucket = module.lambda_artefact_storage 12 | lambda_execution_cron_schedule = "cron(0 8 * * ? *)" 13 | origin_bucket = module.refined_zone 14 | origin_path = "housing/rentsense/export/" 15 | target_bucket = { 16 | bucket_id = "feeds-pluto-mobysoft" 17 | bucket_arn = "arn:aws:s3:::feeds-pluto-mobysoft" 18 | kms_key_id = null 19 | kms_key_arn = null 20 | } 21 | target_path = var.rentsense_target_path 22 | assume_role = "arn:aws:iam::971933469343:role/customer-midas-roles-pluto-HackneyMidasRole-1M6PTJ5VS8104" 23 | } 24 | -------------------------------------------------------------------------------- /terraform/core/82-academy-pre-production-bens-housing-needs-raw-zone.tf: -------------------------------------------------------------------------------- 1 | #benefits-housing-needs raw zone crawler 2 | resource "aws_glue_crawler" "bens_housing_needs_raw_zone" { 3 | count = !local.is_production_environment ? 1 : 0 4 | tags = module.tags.values 5 | 6 | database_name = module.department_benefits_and_housing_needs.raw_zone_catalog_database_name 7 | name = "${local.short_identifier_prefix}bens-housing-needs-raw-zone" 8 | role = aws_iam_role.glue_role.arn 9 | 10 | s3_target { 11 | path = "s3://${module.raw_zone.bucket_id}/benefits-housing-needs/" 12 | } 13 | 14 | configuration = jsonencode({ 15 | Version = 1.0 16 | Grouping = { 17 | TableLevelConfiguration = 3 18 | TableGroupingPolicy = "CombineCompatibleSchemas" 19 | } 20 | CrawlerOutput = { 21 | Partitions = { 22 | AddOrUpdateBehavior = "InheritFromTable" 23 | } 24 | } 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /terraform/core/82-academy-pre-production-revenues-raw-zone.tf: -------------------------------------------------------------------------------- 1 | #revenues raw zone crawler 2 | resource "aws_glue_crawler" "revenues_raw_zone" { 3 | count = !local.is_production_environment ? 1 : 0 4 | tags = module.tags.values 5 | 6 | database_name = module.department_revenues.raw_zone_catalog_database_name 7 | name = "${local.short_identifier_prefix}revenues-raw-zone" 8 | role = aws_iam_role.glue_role.arn 9 | 10 | s3_target { 11 | path = "s3://${module.raw_zone.bucket_id}/revenues/" 12 | } 13 | 14 | configuration = jsonencode({ 15 | Version = 1.0 16 | Grouping = { 17 | TableLevelConfiguration = 3 18 | TableGroupingPolicy = "CombineCompatibleSchemas" 19 | } 20 | CrawlerOutput = { 21 | Partitions = { 22 | AddOrUpdateBehavior = "InheritFromTable" 23 | } 24 | } 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /terraform/etl/12-aws-glue-crawler-parking-spreadsheets.tf: -------------------------------------------------------------------------------- 1 | resource "aws_glue_crawler" "raw_zone_parking_g_drive_crawler" { 2 | tags = module.department_parking_data_source.tags 3 | 4 | database_name = module.department_parking_data_source.raw_zone_catalog_database_name 5 | name = "${local.short_identifier_prefix}raw-zone-parking-g-drive" 6 | role = data.aws_iam_role.glue_role.arn 7 | 8 | s3_target { 9 | path = "s3://${module.raw_zone_data_source.bucket_id}/parking/g-drive" 10 | exclusions = local.glue_crawler_excluded_blobs 11 | } 12 | 13 | configuration = jsonencode({ 14 | Version = 1.0 15 | Grouping = { 16 | TableLevelConfiguration = 4 17 | } 18 | }) 19 | table_prefix = null 20 | } 21 | 22 | resource "aws_glue_trigger" "raw_zone_parking_spreadsheets_crawler" { 23 | tags = module.department_parking_data_source.tags 24 | 25 | name = "${local.short_identifier_prefix}parking-raw-g-drive-crawler-trigger" 26 | schedule = "cron(0 23 * * ? *)" 27 | type = "SCHEDULED" 28 | enabled = local.is_live_environment 29 | 30 | actions { 31 | crawler_name = aws_glue_crawler.raw_zone_parking_g_drive_crawler.name 32 | } 33 | } -------------------------------------------------------------------------------- /terraform/etl/40-aws-glue-job-sandbox.tf: -------------------------------------------------------------------------------- 1 | #This terraform file is used for creating glue jobs during training. 2 | 3 | #module "job_template" { 4 | # source = "../modules/aws-glue-job" 5 | # is_live_environment = local.is_live_environment 6 | # is_production_environment = local.is_production_environment 7 | # 8 | # department = module.department_sandbox_data_source 9 | # job_name = "${local.short_identifier_prefix}job_template" 10 | # script_name = "job_script_template" 11 | # pydeequ_zip_key = data.aws_s3_bucket_object.pydeequ.key 12 | # helper_module_key = data.aws_s3_bucket_object.helpers.key 13 | # spark_ui_output_storage_id = module.spark_ui_output_storage_data_source.bucket_id 14 | # job_parameters = { 15 | # "--s3_bucket_target" = "s3://${module.refined_zone_data_source.bucket_id}/sandbox/some-target-location-in-the-refined-zone" 16 | # "--source_catalog_database" = module.department_sandbox_data_source.raw_zone_catalog_database_name 17 | # "--source_catalog_table" = "some_table_name" 18 | # } 19 | #} 20 | 21 | -------------------------------------------------------------------------------- /terraform/etl/61-aws-glue-catalog-database.tf: -------------------------------------------------------------------------------- 1 | resource "aws_glue_catalog_database" "hackney_synergy_live" { 2 | name = "hackney_synergy_live" 3 | 4 | lifecycle { 5 | prevent_destroy = true 6 | } 7 | } 8 | 9 | resource "aws_glue_catalog_database" "hackney_casemanagement_live" { 10 | name = "hackney_casemanagement_live" 11 | 12 | lifecycle { 13 | prevent_destroy = true 14 | } 15 | } 16 | 17 | resource "aws_glue_catalog_database" "housing_nec_migration_database" { 18 | name = "housing_nec_migration" 19 | 20 | lifecycle { 21 | prevent_destroy = true 22 | } 23 | } -------------------------------------------------------------------------------- /terraform/etl/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "redshift_cluster_id" { 2 | value = try(module.redshift[0].cluster_id, "") 3 | } 4 | 5 | output "redshift_iam_role_arn" { 6 | value = try(module.redshift[0].role_arn, "") 7 | } 8 | 9 | output "redshift_schemas" { 10 | value = local.redshift_schemas 11 | sensitive = true 12 | } 13 | 14 | output "redshift_users" { 15 | value = local.redshift_users 16 | sensitive = true 17 | } 18 | 19 | output "redshift_roles" { 20 | value = local.redshift_roles 21 | sensitive = true 22 | } 23 | -------------------------------------------------------------------------------- /terraform/etl/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: $(MAKECMDGOALS) 2 | 3 | init: 4 | aws-vault exec hackney-dataplatform-development -- terraform init 5 | 6 | plan: 7 | aws-vault exec hackney-dataplatform-development -- terraform plan -var-file="../config/env.tfvars" 8 | 9 | apply: 10 | aws-vault exec hackney-dataplatform-development -- terraform apply -var-file="../config/env.tfvars" --auto-approve 11 | 12 | validate: 13 | terraform validate 14 | 15 | destroy: 16 | aws-vault exec hackney-dataplatform-development -- terraform destroy -var-file="../config/env.tfvars" 17 | 18 | workspace-new: 19 | aws-vault exec hackney-dataplatform-development -- terraform workspace new ${WORKSPACE} 20 | 21 | workspace-select: 22 | aws-vault exec hackney-dataplatform-development -- terraform workspace select ${WORKSPACE} 23 | 24 | workspace-list: 25 | aws-vault exec hackney-dataplatform-development -- terraform workspace list 26 | 27 | format: 28 | terraform fmt . 29 | 30 | lint-init: 31 | tflint --init --config="../terraform/config/.tflint.hcl" 32 | 33 | lint: 34 | aws-vault exec hackney-dataplatform-development -- tflint --var-file='../config/env.tfvars' --module --config="../terraform/config/.tflint.hcl" --loglevel=warn . 35 | -------------------------------------------------------------------------------- /terraform/iam/assume-role-policies.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "step_functions_assume_role" { 2 | statement { 3 | actions = ["sts:AssumeRole"] 4 | principals { 5 | type = "Service" 6 | identifiers = ["states.amazonaws.com"] 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /terraform/modules/README.md: -------------------------------------------------------------------------------- 1 | Resource Modules 2 | 3 | Please see terraform documentation which explains the usage and purpose of resource modules: [Link](https://www.terraform.io/language/modules/develop/composition#module-composition) -------------------------------------------------------------------------------- /terraform/modules/api-ingestion-lambda/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/api-ingestion-lambda/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "lambda_execution_cron_schedule" { 2 | description = "CRON expression to schedule the Lambda" 3 | type = string 4 | default = "cron(0 6 * * ? *)" 5 | } 6 | 7 | variable "ephemeral_storage" { 8 | description = "Amount of temporary storage in MBs" 9 | type = number 10 | default = 512 11 | } 12 | 13 | variable "lambda_timeout" { 14 | description = "Lambda time out in seconds" 15 | type = number 16 | default = 900 17 | } 18 | 19 | variable "lambda_memory_size" { 20 | description = "Memory for lambda in MBs" 21 | type = number 22 | default = 256 23 | } 24 | 25 | variable "glue_job_to_trigger" { 26 | description = "Name of Glue job to trigger once data has been ingested to S3" 27 | type = string 28 | default = "" 29 | } 30 | 31 | variable "trigger_to_run" { 32 | description = "Name of Glue trigger to start once data has been ingested to S3" 33 | type = string 34 | default = "" 35 | } 36 | 37 | -------------------------------------------------------------------------------- /terraform/modules/api-ingestion-lambda/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | lambda_name_underscore = replace(lower(var.lambda_name), "/[^a-zA-Z0-9]+/", "_") 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/api-ingestion-lambda/04-resource-moves.tf: -------------------------------------------------------------------------------- 1 | moved { 2 | from = module.icaseworks_api_ingestion[0].null_resource.run_make_install_requirements 3 | to = module.icaseworks_api_ingestion[0].null_resource.run_install_requirements 4 | } -------------------------------------------------------------------------------- /terraform/modules/api-ingestion-lambda/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/api-ingestion-lambda/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-autoscaling-group/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-autoscaling-group/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "Name of ECS autoscaling group" 3 | type = string 4 | } 5 | 6 | variable "ecs_cluster_name" { 7 | description = "The ECS cluster name for the auto scaling policy" 8 | type = string 9 | } 10 | 11 | variable "ecs_service_name" { 12 | description = "The ECS service name for the auto scaling policy" 13 | type = string 14 | } 15 | 16 | variable "ecs_autoscaling_role_arn" { 17 | description = "IAM role with policy to handle autoscaling of ECS services" 18 | type = string 19 | } 20 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-autoscaling-group/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "autoscaling_max_capacity" { 2 | description = "The max capacity of the ECS service target." 3 | type = number 4 | default = 100 5 | } 6 | 7 | variable "autoscaling_min_capacity" { 8 | description = "The min capacity of the ECS service target" 9 | type = number 10 | default = 1 11 | } 12 | 13 | variable "cpu_target_value" { 14 | description = "The percentage CPU target value for ECS service" 15 | type = number 16 | default = 80 17 | } 18 | 19 | variable "memory_target_value" { 20 | description = "The percentage Memory target value for ECS service" 21 | type = number 22 | default = 80 23 | } 24 | 25 | variable "task_scale_in_cooldown_period" { 26 | description = "The amount of time, in seconds, after a scale in activity completes before another task scale in activity can start." 27 | type = number 28 | default = 60 29 | } 30 | 31 | variable "task_scale_out_cooldown_period" { 32 | description = "The amount of time, in seconds, after a scale out activity completes before another task scale out activity can start." 33 | type = number 34 | default = 60 35 | } 36 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-docker-service/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-docker-service/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_region" "current" {} 2 | 3 | data "aws_caller_identity" "current" {} -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-docker-service/07-docker-pull-push.tf: -------------------------------------------------------------------------------- 1 | resource "null_resource" "docker_pull_push" { 2 | 3 | triggers = { 4 | shell_hash = sha256("${var.container_properties.image_name}${var.container_properties.image_tag}${aws_ecr_repository.ecr.repository_url}") 5 | } 6 | 7 | provisioner "local-exec" { 8 | interpreter = ["bash", "-c"] 9 | command = "${path.module}/docker_pull_push.sh ${var.container_properties.image_name} ${var.container_properties.image_tag} ${aws_ecr_repository.ecr.repository_url} ${var.is_live_environment}" 10 | } 11 | 12 | depends_on = [aws_ecr_repository.ecr] 13 | } -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-docker-service/08-ecr.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecr_repository" "ecr" { 2 | tags = var.tags 3 | name = "${var.short_identifier_prefix}${var.container_properties.container_name}" 4 | image_scanning_configuration { 5 | scan_on_push = true 6 | } 7 | } -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-docker-service/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "security_group_id" { 2 | value = var.container_properties.standalone_onetime_task ? "" : aws_security_group.ecs_tasks[0].id 3 | } 4 | 5 | output "service_name" { 6 | value = var.container_properties.standalone_onetime_task ? "" : aws_ecs_service.ecs_service[0].name 7 | } -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-docker-service/docker_pull_push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | image_name=$1 6 | image_tag=$2 7 | ecr_url=$3 8 | is_live_environment=$4 9 | 10 | #Pull image from docker hub 11 | docker pull "$image_name":"$image_tag" 12 | 13 | if $is_live_environment; then 14 | export AWS_PROFILE=deploy_role 15 | fi 16 | 17 | #Login to ECR 18 | # shellcheck disable=SC2091 19 | # shellcheck disable=SC2216 20 | $(aws ecr get-login-password --region eu-west-2 | docker login --username AWS --password-stdin "$ecr_url") | true 21 | 22 | #Tag image pulled from docker hub 23 | docker tag "$image_name":"$image_tag" "$ecr_url":"$image_tag" 24 | 25 | #Push tagged image to ECR 26 | docker push "$ecr_url":"$image_tag" -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-fargate-task/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-fargate-task/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "security_groups" { 2 | description = "Security groups the task should be attached to" 3 | type = list(string) 4 | default = [] 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-fargate-task/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | tasks = [for task in var.tasks : merge({ task_id = (task.task_prefix == null ? "" : task.task_prefix) }, task)] 3 | } -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-fargate-task/11-aws-ecr.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecr_repository" "worker" { 2 | tags = var.tags 3 | name = var.operation_name 4 | image_scanning_configuration { 5 | scan_on_push = true 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /terraform/modules/aws-ecs-fargate-task/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "ecr_repository_worker_endpoint" { 2 | value = aws_ecr_repository.worker.repository_url 3 | } 4 | 5 | output "task_role" { 6 | value = aws_iam_role.task_role.arn 7 | } 8 | 9 | output "event_rule_names" { 10 | value = [for event_rule in aws_cloudwatch_event_rule.ecs_task : event_rule.name] 11 | } 12 | 13 | output "event_rule_arns" { 14 | value = [for event_rule in aws_cloudwatch_event_rule.ecs_task : event_rule.arn] 15 | } -------------------------------------------------------------------------------- /terraform/modules/aws-glue-job-with-crawler/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-glue-job-with-crawler/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "job_name" { 7 | description = "Name of the AWS glue job" 8 | type = string 9 | } 10 | 11 | variable "glue_role_arn" { 12 | description = "Glue Role ARN that the job will use to excecute" 13 | type = string 14 | } 15 | 16 | variable "job_script_location" { 17 | description = "S3 URL of the location of the script for the glue job" 18 | type = string 19 | } 20 | 21 | variable "job_arguments" { 22 | description = "Arguments to pass to the glue job" 23 | type = map(string) 24 | } 25 | 26 | variable "name_prefix" { 27 | description = "Prefix to add to the name of the triggers and crawlers." 28 | type = string 29 | } 30 | 31 | variable "database_name" { 32 | description = "Name of database" 33 | type = string 34 | } 35 | 36 | variable "table_prefix" { 37 | description = "Prefix to give to tables that are crawled" 38 | type = string 39 | } 40 | 41 | variable "s3_target_location" { 42 | description = "URL for where the target data will be stored in S3" 43 | type = string 44 | } 45 | -------------------------------------------------------------------------------- /terraform/modules/aws-glue-job-with-crawler/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "glue_crawler_excluded_blobs" { 2 | description = "A list of blobs to ignore when crawling the job" 3 | type = list(string) 4 | default = [ 5 | "*.json", 6 | "*.txt", 7 | "*.zip", 8 | "*.xlsx" 9 | ] 10 | } 11 | 12 | variable "workflow_name" { 13 | description = "Optional. Workflow to add the triggers to." 14 | type = string 15 | default = null 16 | } 17 | 18 | variable "crawler_to_trigger" { 19 | description = < 7 7 | error_message = "Job name must be at least 7 characters and include the department name." 8 | } 9 | } 10 | 11 | variable "helper_module_key" { 12 | description = "Helpers Python module S3 object key" 13 | type = string 14 | } 15 | 16 | variable "pydeequ_zip_key" { 17 | description = "Pydeequ module to be used in Glue scripts" 18 | type = string 19 | } 20 | 21 | variable "spark_ui_output_storage_id" { 22 | description = "Id of S3 bucket containing Spark UI output logs" 23 | type = string 24 | } 25 | 26 | variable "is_production_environment" { 27 | description = "A flag indicting if we are running in production for setting up automation" 28 | type = bool 29 | } 30 | 31 | variable "is_live_environment" { 32 | description = "A flag indicting if we are running in a live environment for setting up automation" 33 | type = bool 34 | } -------------------------------------------------------------------------------- /terraform/modules/aws-glue-job/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | job_name_identifier = replace(lower(var.job_name), "/[^a-zA-Z0-9]+/", "-") 3 | } -------------------------------------------------------------------------------- /terraform/modules/aws-glue-job/11-aws-glue-crawler.tf: -------------------------------------------------------------------------------- 1 | resource "aws_glue_crawler" "crawler" { 2 | count = var.crawler_details.database_name == null ? 0 : 1 3 | tags = local.tags 4 | 5 | database_name = var.crawler_details.database_name 6 | name = local.job_name_identifier 7 | role = local.glue_role_arn 8 | table_prefix = var.crawler_details.table_prefix 9 | 10 | s3_target { 11 | path = var.crawler_details.s3_target_location 12 | 13 | exclusions = var.glue_crawler_excluded_blobs 14 | } 15 | 16 | configuration = var.crawler_details.configuration 17 | } 18 | 19 | resource "aws_glue_trigger" "crawler_trigger" { 20 | count = var.crawler_details.database_name == null ? 0 : 1 21 | tags = local.tags 22 | 23 | name = "${local.job_name_identifier}-crawler-trigger" 24 | type = "CONDITIONAL" 25 | workflow_name = var.workflow_name 26 | 27 | predicate { 28 | conditions { 29 | job_name = aws_glue_job.job.name 30 | state = "SUCCEEDED" 31 | } 32 | } 33 | 34 | actions { 35 | crawler_name = aws_glue_crawler.crawler[0].name 36 | } 37 | } -------------------------------------------------------------------------------- /terraform/modules/aws-glue-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "crawler_name" { 2 | value = length(aws_glue_crawler.crawler) == 0 ? null : aws_glue_crawler.crawler[0].name 3 | } 4 | 5 | output "job_name" { 6 | value = aws_glue_job.job.name 7 | } 8 | 9 | output "job_arn" { 10 | value = aws_glue_job.job.arn 11 | } 12 | 13 | output "trigger_name" { 14 | value = aws_glue_trigger.job_trigger.name 15 | } 16 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-folder-ingestion/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-folder-ingestion/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "lambda_name" { 2 | type = string 3 | description = "Name of the Lambda Function" 4 | } 5 | 6 | variable "handler" { 7 | type = string 8 | description = "Function entrypoint in the format of file.function" 9 | } 10 | 11 | variable "lambda_artefact_storage_bucket" { 12 | type = string 13 | description = "S3 Bucket to store the Lambda artefact in" 14 | } 15 | 16 | variable "s3_key" { 17 | type = string 18 | description = "S3 Key to store the Lambda artefact in" 19 | } 20 | 21 | variable "lambda_source_dir" { 22 | type = string 23 | description = "Directory containing the Lambda Function source code" 24 | } 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-folder-ingestion/20-iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "lambda_role" { 2 | name = "${var.identifier_prefix}${var.lambda_name}-role" 3 | assume_role_policy = jsonencode({ 4 | Version = "2012-10-17" 5 | Statement = [ 6 | { 7 | Action = "sts:AssumeRole" 8 | Effect = "Allow" 9 | Principal = { 10 | Service = "lambda.amazonaws.com" 11 | } 12 | } 13 | ] 14 | }) 15 | } 16 | 17 | data "aws_iam_policy_document" "lambda_role" { 18 | statement { 19 | actions = [ 20 | "logs:CreateLogGroup", 21 | "logs:CreateLogStream", 22 | "logs:PutLogEvents", 23 | ] 24 | effect = "Allow" 25 | resources = ["*"] 26 | } 27 | } 28 | 29 | resource "aws_iam_policy" "lambda_role" { 30 | name = lower("${var.identifier_prefix}${var.lambda_name}") 31 | policy = data.aws_iam_policy_document.lambda_role.json 32 | tags = var.tags 33 | } 34 | 35 | resource "aws_iam_role_policy_attachment" "lambda_role" { 36 | role = aws_iam_role.lambda_role.name 37 | policy_arn = aws_iam_policy.lambda_role.arn 38 | } 39 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-folder-ingestion/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "lambda_function_arn" { 2 | description = "value of the lambda function arn" 3 | value = aws_lambda_function.lambda.arn 4 | } 5 | 6 | output "lambda_iam_role" { 7 | description = "name of the lambda function iam role" 8 | value = aws_iam_role.lambda_role.name 9 | } 10 | 11 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-layers/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-layers/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "lambda_name" { 2 | type = string 3 | description = "Name of the Lambda Function" 4 | } 5 | 6 | variable "layer_zip_file" { 7 | description = "The name of the zip file filename.zip" 8 | type = string 9 | } 10 | 11 | variable "layer_name" { 12 | description = "Name of the lambda layer" 13 | type = string 14 | } 15 | 16 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-layers/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "identifier_prefix" { 2 | type = string 3 | description = "Environment identifier prefix" 4 | default = "" 5 | } 6 | 7 | variable "tags" { 8 | type = map(string) 9 | description = "Tags to apply to all resources" 10 | default = {} 11 | } 12 | 13 | variable "compatible_runtimes" { 14 | description = "List of compatible runtimes for the lambda layer" 15 | type = list(string) 16 | default = ["python3.10"] 17 | } 18 | 19 | variable "environment_variables" { 20 | type = map(string) 21 | description = "Environment Variables to pass to the Lambda Function" 22 | default = null 23 | } 24 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda-layers/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/aws-lambda-layers/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/aws-lambda/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "lambda_name" { 2 | type = string 3 | description = "Name of the Lambda Function" 4 | } 5 | 6 | variable "handler" { 7 | type = string 8 | description = "Function entrypoint in the format of file.function" 9 | } 10 | 11 | variable "lambda_artefact_storage_bucket" { 12 | type = string 13 | description = "S3 Bucket to store the Lambda artefact in" 14 | } 15 | 16 | variable "s3_key" { 17 | type = string 18 | description = "S3 Key to store the Lambda artefact in" 19 | } 20 | 21 | variable "lambda_source_dir" { 22 | type = string 23 | description = "Directory containing the Lambda Function source code" 24 | } 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda/20-iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "lambda_role" { 2 | name = "${var.identifier_prefix}-${var.lambda_name}-role" 3 | assume_role_policy = jsonencode({ 4 | Version = "2012-10-17" 5 | Statement = [ 6 | { 7 | Action = "sts:AssumeRole" 8 | Effect = "Allow" 9 | Principal = { 10 | Service = "lambda.amazonaws.com" 11 | } 12 | } 13 | ] 14 | }) 15 | } 16 | 17 | data "aws_iam_policy_document" "lambda_role" { 18 | statement { 19 | actions = [ 20 | "logs:CreateLogGroup", 21 | "logs:CreateLogStream", 22 | "logs:PutLogEvents", 23 | ] 24 | effect = "Allow" 25 | resources = ["*"] 26 | } 27 | } 28 | 29 | resource "aws_iam_policy" "lambda_role" { 30 | name = lower("${var.identifier_prefix}-${var.lambda_name}") 31 | policy = data.aws_iam_policy_document.lambda_role.json 32 | tags = var.tags 33 | } 34 | 35 | resource "aws_iam_role_policy_attachment" "lambda_role" { 36 | role = aws_iam_role.lambda_role.name 37 | policy_arn = aws_iam_policy.lambda_role.arn 38 | } 39 | -------------------------------------------------------------------------------- /terraform/modules/aws-lambda/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "lambda_function_arn" { 2 | description = "value of the lambda function arn" 3 | value = aws_lambda_function.lambda.arn 4 | } 5 | 6 | output "lambda_iam_role" { 7 | description = "name of the lambda function iam role" 8 | value = aws_iam_role.lambda_role.name 9 | } 10 | 11 | output "lambda_iam_role_arn" { 12 | description = "arn of the lambda function iam role" 13 | value = aws_iam_role.lambda_role.arn 14 | } 15 | 16 | output "lambda_name" { 17 | description = "name of the lambda function" 18 | value = aws_lambda_function.lambda.function_name 19 | } 20 | -------------------------------------------------------------------------------- /terraform/modules/aws-step-functions/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/aws-step-functions/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | 2 | variable "name" { 3 | type = string 4 | description = "name of the state machine" 5 | validation { 6 | condition = can(regex("^[a-zA-Z0-9-_]+$", var.name)) 7 | error_message = "name must be alphanumeric, dashes and underscores are allowed" 8 | } 9 | } 10 | 11 | variable "definition" { 12 | type = string 13 | description = "definition of the state machine" 14 | } 15 | 16 | variable "role_arn" { 17 | type = string 18 | description = "role arn for the state machine" 19 | } 20 | -------------------------------------------------------------------------------- /terraform/modules/aws-step-functions/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | type = map(string) 3 | description = "Tags to apply to all resources" 4 | default = {} 5 | } 6 | 7 | variable "identifier_prefix" { 8 | type = string 9 | description = "Environment identifier prefix" 10 | default = "" 11 | } -------------------------------------------------------------------------------- /terraform/modules/aws-step-functions/10-main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_sfn_state_machine" "step_function" { 2 | name = "${var.identifier_prefix}${var.name}" 3 | role_arn = var.role_arn 4 | definition = var.definition 5 | tags = var.tags 6 | } 7 | -------------------------------------------------------------------------------- /terraform/modules/aws-step-functions/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "arn" { 2 | description = "value of the step function arn" 3 | value = aws_sfn_state_machine.step_function.arn 4 | } -------------------------------------------------------------------------------- /terraform/modules/copy-from-s3-to-s3/.gitignore: -------------------------------------------------------------------------------- 1 | copy-from-s3-to-s3.zip -------------------------------------------------------------------------------- /terraform/modules/copy-from-s3-to-s3/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/copy-from-s3-to-s3/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "assume_role" { 2 | description = "A role to assume when copying the data" 3 | default = false 4 | type = string 5 | } 6 | 7 | variable "lambda_execution_cron_schedule" { 8 | description = "CRON expression to schedule the Lambda" 9 | type = string 10 | default = "cron(0 9 * * ? *)" 11 | } 12 | 13 | -------------------------------------------------------------------------------- /terraform/modules/copy-from-s3-to-s3/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | lambda_timeout = 900 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/copy-from-s3-to-s3/lambda/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | *.zip 3 | -------------------------------------------------------------------------------- /terraform/modules/copy-from-s3-to-s3/lambda/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "aws-sdk": "^2.889.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/README.md: -------------------------------------------------------------------------------- 1 | Data-Only Modules 2 | 3 | Please see terraform documentation which explains the usage and purpose of data source modules: [Link](https://www.terraform.io/language/modules/develop/composition#data-only-modules) -------------------------------------------------------------------------------- /terraform/modules/data-sources/aws-glue-job/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/aws-glue-job/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "job_name" { 2 | description = "Name of the AWS glue job" 3 | type = string 4 | 5 | validation { 6 | condition = length(var.job_name) > 7 7 | error_message = "Job name must be at least 7 characters and include the department name." 8 | } 9 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/aws-glue-job/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | job_name_identifier = replace(lower(var.job_name), "/[^a-zA-Z0-9]+/", "-") 3 | default_crawler_configuration = jsonencode({ 4 | Version = 1.0 5 | CrawlerOutput = { 6 | Partitions = { AddOrUpdateBehavior = "InheritFromTable" } 7 | } 8 | }) 9 | crawler_details = defaults(var.crawler_details, { 10 | configuration = local.default_crawler_configuration 11 | }) 12 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/aws-glue-job/11-aws-glue-crawler.tf: -------------------------------------------------------------------------------- 1 | data "aws_glue_crawler" "crawler" { 2 | count = var.crawler_details.database_name == null ? 0 : 1 3 | name = local.job_name_identifier 4 | } 5 | 6 | data "aws_glue_trigger" "crawler_trigger" { 7 | count = var.crawler_details.database_name == null ? 0 : 1 8 | name = "${local.job_name_identifier}-crawler-trigger" 9 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/aws-glue-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "crawler_name" { 2 | value = length(data.aws_glue_crawler.crawler) == 0 ? null : data.aws_glue_crawler.crawler[0].name 3 | } 4 | 5 | output "job_name" { 6 | value = data.aws_glue_job.job.name 7 | } 8 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | configuration_aliases = [ 13 | aws.aws_hackit_account 14 | ] 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | department_identifier = replace(lower(var.name), "/[^a-zA-Z0-9]+/", "-") 3 | department_pascalcase = replace(title(replace(var.name, "/[^a-zA-Z0-9]+/", " ")), " ", "") 4 | } 5 | 6 | data "aws_caller_identity" "current" {} -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/20-aws-glue-databases.tf: -------------------------------------------------------------------------------- 1 | data "aws_ssm_parameter" "raw_zone_catalog_database_name" { 2 | name = "/${var.identifier_prefix}/glue_catalog_database/${local.department_identifier}/raw_zone_catalog_database_name" 3 | } 4 | 5 | data "aws_ssm_parameter" "refined_zone_catalog_database_name" { 6 | name = "/${var.identifier_prefix}/glue_catalog_database/${local.department_identifier}/refined_zone_catalog_database_name" 7 | } 8 | 9 | data "aws_ssm_parameter" "trusted_zone_catalog_database_name" { 10 | name = "/${var.identifier_prefix}/glue_catalog_database/${local.department_identifier}/trusted_zone_catalog_database_name" 11 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/30-google.tf: -------------------------------------------------------------------------------- 1 | data "google_project" "project" {} 2 | 3 | module "google_service_account_data_source" { 4 | source = "../google-service-account" 5 | is_live_environment = var.is_live_environment 6 | department_name = local.department_identifier 7 | identifier_prefix = var.short_identifier_prefix 8 | } 9 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/40-aws-secretsmanager.tf: -------------------------------------------------------------------------------- 1 | data "aws_secretsmanager_secret" "redshift_cluster_credentials" { 2 | arn = data.aws_ssm_parameter.redshift_cluster_credentials_arn.value 3 | } 4 | 5 | data "aws_ssm_parameter" "redshift_cluster_credentials_arn" { 6 | name = "/${var.identifier_prefix}/redshift/${local.department_identifier}/redshift_cluster_credentials_arn" 7 | } 8 | 9 | data "aws_secretsmanager_secret_version" "redshift_creds" { 10 | secret_id = data.aws_secretsmanager_secret.redshift_cluster_credentials.id 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/50-aws-iam-roles.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_role" "glue_agent" { 2 | name = lower("${var.identifier_prefix}-glue-${local.department_identifier}") 3 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/department/70-aws-sns.tf: -------------------------------------------------------------------------------- 1 | data "aws_sns_topic" "glue_jobs" { 2 | name = "glue-failure-notification-${var.short_identifier_prefix}${local.department_identifier}" 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/g-drive-to-s3/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/g-drive-to-s3/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | 2 | variable "identifier_prefix" { 3 | description = "Project wide resource identifier prefix" 4 | type = string 5 | } 6 | 7 | variable "lambda_name" { 8 | type = string 9 | 10 | validation { 11 | condition = length(var.lambda_name) <= 51 12 | error_message = "The lambda_name must be less than 51 characters long." 13 | } 14 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/g-drive-to-s3/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/g-drive-to-s3/10-lambda.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_role" "g_drive_to_s3_copier_lambda" { 2 | name = lower("${var.identifier_prefix}from-g-drive-${var.lambda_name}") 3 | } 4 | 5 | data "aws_s3_object" "g_drive_to_s3_copier_lambda" { 6 | bucket = var.lambda_artefact_storage_bucket 7 | key = "g_drive_to_s3.zip" 8 | } 9 | 10 | data "aws_lambda_function" "g_drive_to_s3_copier_lambda" { 11 | function_name = lower("${var.identifier_prefix}g-drive-${var.lambda_name}") 12 | } 13 | 14 | data "aws_lambda_function_event_invoke_config" "g_drive_to_s3_copier_lambda" { 15 | function_name = data.aws_lambda_function.g_drive_to_s3_copier_lambda.function_name 16 | } 17 | 18 | data "aws_cloudwatch_event_rule" "every_day_at_6" { 19 | name_prefix = "g-drive-to-s3-copier-every-day-at-6-" 20 | } 21 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/g-drive-to-s3/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/data-sources/g-drive-to-s3/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-service-account/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-service-account/00-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "is_live_environment" { 2 | description = "A flag indicting if we are running in a live environment for setting up automation" 3 | type = bool 4 | } 5 | 6 | variable "department_name" { 7 | type = string 8 | } 9 | 10 | variable "identifier_prefix" { 11 | type = string 12 | } 13 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-service-account/01-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "secret_type" { 2 | description = "Specify the type of secret to store in Secrets Manager" 3 | type = string 4 | default = "binary" 5 | 6 | validation { 7 | condition = contains(["binary", "string"], var.secret_type) 8 | error_message = "Secret type must be \"binary\" or \"string\"." 9 | } 10 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-service-account/45-service-account.tf: -------------------------------------------------------------------------------- 1 | data "google_service_account" "service_account" { 2 | count = var.is_live_environment ? 1 : 0 3 | account_id = lower("${var.identifier_prefix}${var.department_name}") 4 | } 5 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-service-account/53-secrets-manager.tf: -------------------------------------------------------------------------------- 1 | data "aws_secretsmanager_secret" "sheets_credentials" { 2 | name = data.aws_ssm_parameter.sheets_credentials_name.value 3 | } 4 | 5 | data "aws_ssm_parameter" "sheets_credentials_name" { 6 | name = "/${var.identifier_prefix}${var.department_name}/secrets_manager/sheets-credential/name" 7 | } 8 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-service-account/99-outpus.tf: -------------------------------------------------------------------------------- 1 | output "email" { 2 | value = length(data.google_service_account.service_account) == 1 ? data.google_service_account.service_account[0].email : "" 3 | } 4 | 5 | output "credentials_secret" { 6 | value = data.aws_secretsmanager_secret.sheets_credentials 7 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-sheets-glue-job/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-sheets-glue-job/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "sheets_credentials_name" { 2 | description = "Override the default department Google sheets credentials name" 3 | type = string 4 | default = null 5 | } 6 | 7 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-sheets-glue-job/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | dataset_name = lower(replace(var.dataset_name, "_", "-")) 3 | import_name = "${var.department.identifier}-${local.dataset_name}" 4 | full_output_path = "s3://${var.bucket_id}/${var.department.identifier}/${local.dataset_name}" 5 | sheets_credentials_name = var.sheets_credentials_name == null ? var.department.google_service_account.credentials_secret.name : var.sheets_credentials_name 6 | } -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-sheets-glue-job/10-aws-glue-job.tf: -------------------------------------------------------------------------------- 1 | module "google_sheet_import_data_source" { 2 | source = "../aws-glue-job" 3 | is_live_environment = local.is_live_environment 4 | is_production_environment = local.is_production_environment 5 | 6 | department = var.department 7 | job_name = "Google Sheets Import Job - ${local.import_name}" 8 | script_s3_object_key = var.google_sheets_import_script_key 9 | crawler_details = { 10 | database_name = var.glue_catalog_database_name 11 | s3_target_location = local.full_output_path 12 | table_prefix = "${var.department.identifier_snake_case}_" 13 | configuration = jsonencode({ 14 | Version = 1.0 15 | Grouping = { 16 | TableGroupingPolicy = "CombineCompatibleSchemas" 17 | } 18 | }) 19 | } 20 | } 21 | 22 | data "aws_glue_workflow" "workflow" { 23 | name = "${var.identifier_prefix}${local.import_name}" 24 | } 25 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/google-sheets-glue-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "glue_job_name" { 2 | description = "Glue job name" 3 | value = module.google_sheet_import_data_source.job_name 4 | } 5 | 6 | output "crawler_name" { 7 | description = "Crawler name" 8 | value = module.google_sheet_import_data_source.crawler_name 9 | } 10 | 11 | output "workflow_name" { 12 | description = "Workflow name" 13 | value = data.aws_glue_workflow.workflow.name 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-data-from-spreadsheet-job/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-data-from-spreadsheet-job/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | worksheet_key = lower(replace(replace(trimspace(var.worksheet_name), ".", ""), " ", "-")) 3 | import_name = "${var.department.identifier}-${local.worksheet_key}" 4 | } 5 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-data-from-spreadsheet-job/10-aws-glue-job.tf: -------------------------------------------------------------------------------- 1 | # Import test data 2 | module "spreadsheet_import_data_source" { 3 | source = "../aws-glue-job" 4 | is_live_environment = var.is_live_environment 5 | is_production_environment = var.is_production_environment 6 | 7 | department = var.department 8 | job_name = "Spreadsheet Import Job - ${var.department.identifier}-${var.glue_job_name}" 9 | script_s3_object_key = var.spreadsheet_import_script_key 10 | crawler_details = { 11 | database_name = var.glue_catalog_database_name 12 | s3_target_location = "s3://${var.raw_zone_bucket_id}/${var.department.identifier}/${var.output_folder_name}" 13 | configuration = jsonencode({ 14 | Version = 1.0 15 | Grouping = { 16 | TableLevelConfiguration = 3 17 | } 18 | }) 19 | } 20 | } 21 | 22 | data "aws_glue_workflow" "workflow" { 23 | name = "${var.identifier_prefix}${local.import_name}-${var.output_folder_name}" 24 | } 25 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-data-from-spreadsheet-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "job_name" { 2 | value = module.spreadsheet_import_data_source.job_name 3 | } 4 | 5 | output "catalog_table" { 6 | value = replace("${var.department.identifier}_${var.data_set_name}", "-", "_") 7 | } 8 | 9 | output "worksheet_key" { 10 | value = local.worksheet_key 11 | } 12 | 13 | output "workflow_name" { 14 | value = data.aws_glue_workflow.workflow.id 15 | } 16 | 17 | output "crawler_name" { 18 | value = module.spreadsheet_import_data_source.crawler_name 19 | } 20 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-spreadsheet-file-from-g-drive/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-spreadsheet-file-from-g-drive/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "worksheets" { 2 | type = map( 3 | object({ 4 | header_row_number = number 5 | worksheet_name = string 6 | }) 7 | ) 8 | default = { 9 | sheet1 : { 10 | header_row_number = 0 11 | worksheet_name = "1" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/import-spreadsheet-file-from-g-drive/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "worksheet_resources" { 2 | value = tomap({ 3 | for k in keys(module.import_data_from_spreadsheet_job_data_source) : module.import_data_from_spreadsheet_job_data_source[k].worksheet_key => { 4 | catalog_table = module.import_data_from_spreadsheet_job_data_source[k].catalog_table 5 | crawler_name = module.import_data_from_spreadsheet_job_data_source[k].crawler_name 6 | job_arn = module.import_data_from_spreadsheet_job_data_source[k].job_arn 7 | job_name = module.import_data_from_spreadsheet_job_data_source[k].job_name 8 | workflow_name = module.import_data_from_spreadsheet_job_data_source[k].workflow_name 9 | } 10 | }) 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/s3-bucket/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/s3-bucket/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "bucket_identifier" { 2 | description = "URL safe bucket identifier" 3 | type = string 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/s3-bucket/10-s3-bucket.tf: -------------------------------------------------------------------------------- 1 | data "aws_kms_key" "key" { 2 | key_id = lower("alias/${var.identifier_prefix}-s3-${var.bucket_identifier}") 3 | } 4 | 5 | data "aws_s3_bucket" "bucket" { 6 | bucket = lower("${var.identifier_prefix}-${var.bucket_identifier}") 7 | } 8 | 9 | data "aws_s3_bucket_policy" "bucket_policy" { 10 | bucket = data.aws_s3_bucket.bucket.id 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/data-sources/s3-bucket/99-outputs.tf: -------------------------------------------------------------------------------- 1 | # We make any output files clear by adding them to the 99-outputs.tf, meaning anyone can quickly check if they're consuming your module 2 | output "bucket_id" { 3 | description = "Bucket id of bucket" 4 | value = data.aws_s3_bucket.bucket.bucket 5 | } 6 | 7 | output "bucket_arn" { 8 | description = "Bucket id of bucket" 9 | value = data.aws_s3_bucket.bucket.arn 10 | } 11 | 12 | output "kms_key_id" { 13 | description = "KMS Key id" 14 | value = data.aws_kms_key.key.id 15 | } 16 | 17 | output "kms_key_arn" { 18 | description = "KMS Key arn" 19 | value = data.aws_kms_key.key.arn 20 | } 21 | 22 | output "bucket_url" { 23 | description = "S3 bucket url" 24 | value = "s3://${data.aws_s3_bucket.bucket.bucket}" 25 | } 26 | -------------------------------------------------------------------------------- /terraform/modules/database-ingestion-via-jdbc-connection/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicitly Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/database-ingestion-via-jdbc-connection/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "jdbc_connection_url" { 2 | description = "The JDBC Connection Url used to connect to the source database" 3 | type = string 4 | } 5 | 6 | variable "name" { 7 | description = "Name of the dataset that will be ingested." 8 | } 9 | 10 | variable "jdbc_connection_description" { 11 | description = "The type of connection and database that is used for data ingestion" 12 | type = string 13 | } 14 | 15 | variable "database_secret_name" { 16 | description = "Name of secret for database credentials" 17 | type = string 18 | } 19 | 20 | variable "tags" { 21 | description = "AWS tags" 22 | type = map(string) 23 | } 24 | 25 | variable "jdbc_connection_subnet" { 26 | description = "Subnet used for the JDBC connection" 27 | type = object({ 28 | id = string 29 | availability_zone = string 30 | vpc_id = string 31 | }) 32 | } 33 | 34 | variable "identifier_prefix" { 35 | description = "Project wide short resource identifier prefix" 36 | type = string 37 | } 38 | -------------------------------------------------------------------------------- /terraform/modules/database-ingestion-via-jdbc-connection/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "schema_name" { 2 | description = "Name of schema in the database containing tables to be ingested" 3 | type = string 4 | default = null 5 | } 6 | 7 | variable "create_workflow" { 8 | description = "Used to determine whether a workflow should be created for the ingestion process" 9 | type = bool 10 | default = true 11 | } 12 | 13 | variable "job_schedule" { 14 | description = "Used to set the schedule for the ingestion job" 15 | type = string 16 | default = "cron(15 0 ? * MON,TUE,WED,THU,FRI *)" 17 | } 18 | -------------------------------------------------------------------------------- /terraform/modules/database-ingestion-via-jdbc-connection/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | database_and_schema_name_lowercase = var.schema_name == null ? lower(local.database_name) : lower("${local.database_name}-${var.schema_name}") 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/database-ingestion-via-jdbc-connection/20-aws-iam-role.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "jdbc_connection_crawler_role" { 2 | statement { 3 | actions = ["sts:AssumeRole"] 4 | 5 | principals { 6 | identifiers = ["glue.amazonaws.com"] 7 | type = "Service" 8 | } 9 | } 10 | } 11 | 12 | resource "aws_iam_role" "jdbc_connection_crawler_role" { 13 | tags = var.tags 14 | 15 | name = "${var.identifier_prefix}${local.database_and_schema_name_lowercase}-crawler-can-access-jdbc-connection" 16 | assume_role_policy = data.aws_iam_policy_document.jdbc_connection_crawler_role.json 17 | } 18 | 19 | resource "aws_iam_role_policy_attachment" "crawler_can_access_jdbc_connection" { 20 | role = aws_iam_role.jdbc_connection_crawler_role.name 21 | policy_arn = aws_iam_policy.crawler_can_access_jdbc_connection.arn 22 | } 23 | -------------------------------------------------------------------------------- /terraform/modules/database-ingestion-via-jdbc-connection/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "jdbc_connection_name" { 2 | value = aws_glue_connection.jdbc_database_ingestion.name 3 | } 4 | 5 | output "ingestion_database_name" { 6 | value = aws_glue_catalog_database.ingestion_connection.name 7 | } 8 | 9 | output "workflow_name" { 10 | value = local.workflow_name 11 | } 12 | 13 | output "crawler_name" { 14 | value = aws_glue_crawler.ingestion_database_connection.name 15 | } -------------------------------------------------------------------------------- /terraform/modules/datahub/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/datahub/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "enable_load_balancer" { 2 | type = bool 3 | default = false 4 | } 5 | 6 | variable "hub_firewall_ips" { 7 | type = list(string) 8 | default = ["192.168.20.0/28", "192.168.21.0/28"] 9 | } -------------------------------------------------------------------------------- /terraform/modules/datahub/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_vpc" "vpc" { 2 | id = var.vpc_id 3 | } 4 | 5 | data "aws_region" "current" {} 6 | 7 | data "aws_caller_identity" "current" {} 8 | -------------------------------------------------------------------------------- /terraform/modules/datahub/06-ecs-cluster.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecs_cluster" "datahub" { 2 | tags = var.tags 3 | name = "${var.short_identifier_prefix}datahub" 4 | } -------------------------------------------------------------------------------- /terraform/modules/datahub/07-cloudwatch-logs.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_log_group" "datahub" { 2 | name = "${var.short_identifier_prefix}datahub" 3 | tags = var.tags 4 | } -------------------------------------------------------------------------------- /terraform/modules/datahub/13-ecs-autoscaling-group.tf: -------------------------------------------------------------------------------- 1 | module "datahub_ecs_gms_autoscaling_group" { 2 | source = "../aws-ecs-autoscaling-group" 3 | 4 | name = "${var.short_identifier_prefix}datahub-gms" 5 | ecs_autoscaling_role_arn = aws_iam_role.datahub_ecs_autoscale.arn 6 | ecs_cluster_name = aws_ecs_cluster.datahub.name 7 | ecs_service_name = module.datahub_gms.service_name 8 | } 9 | 10 | module "datahub_ecs_frontend_autoscaling_group" { 11 | source = "../aws-ecs-autoscaling-group" 12 | 13 | name = "${var.short_identifier_prefix}datahub-frontend" 14 | ecs_autoscaling_role_arn = aws_iam_role.datahub_ecs_autoscale.arn 15 | ecs_cluster_name = aws_ecs_cluster.datahub.name 16 | ecs_service_name = module.datahub_frontend_react.service_name 17 | } -------------------------------------------------------------------------------- /terraform/modules/datahub/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "datahub_gms_service_security_group_id" { 2 | value = module.datahub_gms.security_group_id 3 | } 4 | 5 | output "datahub_mae_security_group_id" { 6 | value = module.datahub_mae_consumer.security_group_id 7 | } 8 | 9 | output "datahub_mce_security_group_id" { 10 | value = module.datahub_mce_consumer.security_group_id 11 | } 12 | 13 | output "datahub_actions_security_group_id" { 14 | value = module.datahub_actions.security_group_id 15 | } 16 | 17 | output "datahub_kafka_setup_security_group_id" { 18 | value = module.kafka_setup.security_group_id 19 | } 20 | -------------------------------------------------------------------------------- /terraform/modules/datahub/Datahub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/datahub/Datahub.png -------------------------------------------------------------------------------- /terraform/modules/datahub/datasource-ingestion-recipes/glue-example.yml: -------------------------------------------------------------------------------- 1 | source: 2 | type: glue 3 | config: 4 | aws_region: '${AWS_DEFAULT_REGION}' 5 | aws_role: '${AWS_ROLE}' 6 | extract_transforms: '${GLUE_EXTRACT_TRANSFORMS}' 7 | sink: 8 | type: datahub-rest 9 | config: 10 | server: '${GMS_URL}' 11 | -------------------------------------------------------------------------------- /terraform/modules/db-snapshot-to-s3-sandbox-resources/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | configuration_aliases = [ 13 | aws.aws_sandbox_account 14 | ] 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /terraform/modules/db-snapshot-to-s3-sandbox-resources/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | 11 | variable "aws_sandbox_subnet_ids" { 12 | description = "AWS sandbox accounts subnet ids" 13 | type = list(string) 14 | } 15 | 16 | variable "aws_sandbox_account_id" { 17 | description = "Sandbox account ID" 18 | type = string 19 | } 20 | 21 | variable "aws_sandbox_vpc_id" { 22 | description = "VPC id of the sandbox account" 23 | type = string 24 | } 25 | -------------------------------------------------------------------------------- /terraform/modules/db-snapshot-to-s3/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/db-snapshot-to-s3/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | lambda_timeout = 900 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/db-snapshot-to-s3/10-s3.tf: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /terraform/modules/db-snapshot-to-s3/99-outputs.tf: -------------------------------------------------------------------------------- 1 | # We make any output files clear by adding them to the 99-outputs.tf, meaning anyone can quickly check if they're consuming your module 2 | output "s3_to_s3_copier_lambda_role_arn" { 3 | description = "KMS Key arn" 4 | value = aws_iam_role.s3_to_s3_copier_lambda.arn 5 | } 6 | 7 | output "rds_snapshot_service_arn" { 8 | description = "RDS Snapshot Service ARN" 9 | value = aws_iam_role.rds_snapshot_export_service.arn 10 | } 11 | 12 | output "rds_snapshot_to_s3_lambda_role_arn" { 13 | description = "RDS Snapshot to S3 Lambda Role ARN" 14 | value = aws_iam_role.rds_snapshot_to_s3_lambda.arn 15 | } 16 | -------------------------------------------------------------------------------- /terraform/modules/department/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | configuration_aliases = [ 13 | aws.aws_hackit_account 14 | ] 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /terraform/modules/department/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | department_identifier = replace(lower(var.name), "/[^a-zA-Z0-9]+/", "-") 3 | department_pascalcase = replace(title(replace(var.name, "/[^a-zA-Z0-9]+/", " ")), " ", "") 4 | } 5 | 6 | data "aws_caller_identity" "current" {} 7 | 8 | data "aws_region" "current" {} 9 | -------------------------------------------------------------------------------- /terraform/modules/department/25-aws-athena.tf: -------------------------------------------------------------------------------- 1 | resource "aws_athena_workgroup" "department_workgroup" { 2 | tags = var.tags 3 | 4 | name = "${var.short_identifier_prefix}${local.department_identifier}" 5 | state = "ENABLED" 6 | 7 | force_destroy = !var.is_live_environment 8 | 9 | configuration { 10 | enforce_workgroup_configuration = true 11 | publish_cloudwatch_metrics_enabled = true 12 | 13 | result_configuration { 14 | output_location = "s3://${var.athena_storage_bucket.bucket_id}/${local.department_identifier}/" 15 | 16 | encryption_configuration { 17 | encryption_option = "SSE_KMS" 18 | kms_key_arn = var.athena_storage_bucket.kms_key_arn 19 | } 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /terraform/modules/department/30-google.tf: -------------------------------------------------------------------------------- 1 | data "google_project" "project" {} 2 | 3 | module "google_service_account" { 4 | source = "../google-service-account" 5 | is_live_environment = var.is_live_environment 6 | department_name = local.department_identifier 7 | identifier_prefix = var.short_identifier_prefix 8 | application = var.application 9 | google_project_id = data.google_project.project.project_id 10 | secrets_manager_kms_key_id = var.secrets_manager_kms_key.key_id 11 | tags = var.tags 12 | } 13 | -------------------------------------------------------------------------------- /terraform/modules/department/45-notebook.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | create_notebook = var.notebook_instance != null && var.is_live_environment 3 | } 4 | 5 | module "sagemaker" { 6 | count = local.create_notebook ? 1 : 0 7 | source = "../sagemaker" 8 | development_endpoint_role_arn = aws_iam_role.glue_agent.arn 9 | tags = var.tags 10 | identifier_prefix = var.short_identifier_prefix 11 | python_libs = try(var.notebook_instance.extra_python_libs, null) 12 | extra_jars = try(var.notebook_instance.extra_jars, null) 13 | instance_name = local.department_identifier 14 | github_repository = try(var.notebook_instance.github_repository, null) 15 | } -------------------------------------------------------------------------------- /terraform/modules/dynamodb/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/dynamodb/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | 11 | variable "name" { 12 | description = "Name of the DynamoDB table" 13 | type = string 14 | default = null 15 | } 16 | 17 | variable "attributes" { 18 | description = "List of nested attribute definitions. Only required for hash_key and range_key attributes. Each attribute has two properties: name - (Required) The name of the attribute, type - (Required) Attribute type, which must be a scalar type: S, N, or B for (S)tring, (N)umber or (B)inary data" 19 | type = list(map(string)) 20 | default = [] 21 | } 22 | 23 | variable "hash_key" { 24 | description = "The attribute to use as the hash (partition) key. Must also be defined as an attribute" 25 | type = string 26 | default = null 27 | } 28 | -------------------------------------------------------------------------------- /terraform/modules/dynamodb/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" { 2 | provider = aws 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/dynamodb/20-aws-kms-key.tf: -------------------------------------------------------------------------------- 1 | resource "aws_kms_key" "dynamodb" { 2 | description = "${var.identifier_prefix} - ${var.name} dynamodb table KMS key " 3 | deletion_window_in_days = 10 4 | enable_key_rotation = true 5 | policy = data.aws_iam_policy_document.dynamodb_key_policy.json 6 | tags = var.tags 7 | } 8 | 9 | resource "aws_kms_alias" "dynamodb_kms_alias" { 10 | name = "alias/${var.identifier_prefix}${var.name}-dynamodb" 11 | target_key_id = aws_kms_key.dynamodb.key_id 12 | } 13 | 14 | data "aws_iam_policy_document" "dynamodb_key_policy" { 15 | 16 | statement { 17 | effect = "Allow" 18 | actions = [ 19 | "kms:*" 20 | ] 21 | resources = [ 22 | "*" 23 | ] 24 | principals { 25 | type = "AWS" 26 | identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /terraform/modules/dynamodb/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "dynamodb_table_arn" { 2 | description = "ARN of the DynamoDB table" 3 | value = try(aws_dynamodb_table.this[0].arn, aws_dynamodb_table.autoscaled[0].arn, "") 4 | } 5 | 6 | output "dynamodb_table_id" { 7 | description = "ID of the DynamoDB table" 8 | value = try(aws_dynamodb_table.this[0].id, aws_dynamodb_table.autoscaled[0].id, "") 9 | } 10 | 11 | output "dynamodb_table_stream_arn" { 12 | description = "The ARN of the Table Stream. Only available when var.stream_enabled is true" 13 | value = var.stream_enabled ? try(aws_dynamodb_table.this[0].stream_arn, aws_dynamodb_table.autoscaled[0].stream_arn, "") : null 14 | } 15 | 16 | output "dynamodb_table_stream_label" { 17 | description = "A timestamp, in ISO 8601 format of the Table Stream. Only available when var.stream_enabled is true" 18 | value = var.stream_enabled ? try(aws_dynamodb_table.this[0].stream_label, aws_dynamodb_table.autoscaled[0].stream_label, "") : null 19 | } 20 | 21 | -------------------------------------------------------------------------------- /terraform/modules/electrical-mechnical-fire-safety-cleaning-job/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/electrical-mechnical-fire-safety-cleaning-job/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | raw_zone_catalog_database_name = var.department.raw_zone_catalog_database_name 3 | refined_zone_catalog_database_name = var.department.refined_zone_catalog_database_name 4 | } -------------------------------------------------------------------------------- /terraform/modules/electrical-mechnical-fire-safety-cleaning-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | 2 | output "address_matching_job_name" { 3 | value = module.housing_repairs_elec_mech_fire_address_matching.job_name 4 | } 5 | -------------------------------------------------------------------------------- /terraform/modules/g-drive-to-s3/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/g-drive-to-s3/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "workflow_names" { 2 | description = "A list of workflow names to be triggered after import" 3 | type = list(string) 4 | default = [] 5 | } 6 | 7 | variable "workflow_arns" { 8 | description = "A list of workflow arns to be triggered after import" 9 | type = list(string) 10 | default = [] 11 | } 12 | 13 | variable "ingestion_schedule_enabled" { 14 | description = "Flag to enable the cloud watch trigger to copy the data from g-drive to s3" 15 | type = bool 16 | default = true 17 | } 18 | 19 | -------------------------------------------------------------------------------- /terraform/modules/g-drive-to-s3/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | lambda_timeout = 900 5 | lambda_memory_size = 3072 6 | } 7 | -------------------------------------------------------------------------------- /terraform/modules/g-drive-to-s3/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/g-drive-to-s3/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/glue-failure-alert-notifications/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/glue-failure-alert-notifications/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | 11 | variable "lambda_name" { 12 | description = "Name of the lambda" 13 | type = string 14 | } 15 | 16 | variable "lambda_artefact_storage_bucket" { 17 | description = "The name of the S3 bucket where the lambda artefact will be stored" 18 | type = string 19 | } 20 | 21 | variable "secrets_manager_kms_key" { 22 | description = "The KMS Key Id to be used to encrypt the secret which stores the web hook url" 23 | type = object({ 24 | key_id = string 25 | arn = string 26 | }) 27 | } 28 | 29 | variable "cloudwatch_event_pattern" { 30 | description = "A Cloudwatch event pattern to trigger the lambda" 31 | type = string 32 | } 33 | 34 | variable "secret_name" { 35 | description = "The name of the secret which stores the web hook url" 36 | type = string 37 | } 38 | 39 | -------------------------------------------------------------------------------- /terraform/modules/glue-failure-alert-notifications/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "lambda_environment_variables" { 2 | description = "An object containing environment variables to be used in the Lambda" 3 | type = map(string) 4 | } 5 | 6 | variable "timeout" { 7 | description = "The amount of time your Lambda Function has to run in seconds" 8 | type = number 9 | default = 3 10 | } 11 | -------------------------------------------------------------------------------- /terraform/modules/glue-failure-alert-notifications/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | -------------------------------------------------------------------------------- /terraform/modules/glue-failure-alert-notifications/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "lambda_name" { 2 | description = "Name of the lamda" 3 | value = aws_lambda_function.lambda.function_name 4 | } 5 | 6 | output "lambda_arn" { 7 | description = "ARN of the lambda" 8 | value = aws_lambda_function.lambda.arn 9 | } 10 | 11 | output "cloudwatch_event_rule_arn" { 12 | description = "ARN of the CloudWatch Event Rule" 13 | value = aws_cloudwatch_event_rule.lambda.arn 14 | } 15 | 16 | output "cloudwatch_event_rule_name" { 17 | description = "Name of the CloudWatch Event Rule" 18 | value = aws_cloudwatch_event_rule.lambda.name 19 | } 20 | -------------------------------------------------------------------------------- /terraform/modules/google-service-account/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/google-service-account/00-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "is_live_environment" { 2 | description = "A flag indicting if we are running in a live environment for setting up automation" 3 | type = bool 4 | } 5 | 6 | variable "department_name" { 7 | type = string 8 | } 9 | 10 | variable "identifier_prefix" { 11 | type = string 12 | } 13 | 14 | variable "application" { 15 | type = string 16 | description = "For example, data-platform" 17 | } 18 | 19 | variable "google_project_id" { 20 | type = string 21 | } 22 | 23 | variable "secrets_manager_kms_key_id" { 24 | type = string 25 | description = "The KMS Key Id to be used to encrypt the secret which stores the json credentials" 26 | } 27 | 28 | variable "tags" { 29 | description = "AWS tags" 30 | type = map(string) 31 | } 32 | -------------------------------------------------------------------------------- /terraform/modules/google-service-account/01-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "secret_type" { 2 | description = "Specify the type of secret to store in Secrets Manager" 3 | type = string 4 | default = "binary" 5 | 6 | validation { 7 | condition = contains(["binary", "string"], var.secret_type) 8 | error_message = "Secret type must be \"binary\" or \"string\"." 9 | } 10 | } -------------------------------------------------------------------------------- /terraform/modules/google-service-account/45-service-account.tf: -------------------------------------------------------------------------------- 1 | resource "google_service_account" "service_account" { 2 | count = var.is_live_environment ? 1 : 0 3 | 4 | account_id = lower("${var.identifier_prefix}${var.department_name}") 5 | display_name = "${var.application} - ${title(var.department_name)}" 6 | project = var.google_project_id 7 | } 8 | 9 | resource "time_rotating" "key_rotation" { 10 | rotation_days = 35 11 | } 12 | 13 | resource "google_service_account_key" "json_credentials" { 14 | count = var.is_live_environment ? 1 : 0 15 | 16 | service_account_id = google_service_account.service_account[0].name 17 | public_key_type = "TYPE_X509_PEM_FILE" 18 | 19 | keepers = { 20 | # Arbitrary map of values that, when changed, will trigger a new key to be generated 21 | # The key will only output the first time this resources is created, afterward it will have a null value 22 | secret_id = aws_secretsmanager_secret.sheets_credentials.id 23 | rotation_time = time_rotating.key_rotation.rotation_days 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /terraform/modules/google-service-account/99-outpus.tf: -------------------------------------------------------------------------------- 1 | output "email" { 2 | value = length(google_service_account.service_account) == 1 ? google_service_account.service_account[0].email : "" 3 | } 4 | 5 | output "credentials_secret" { 6 | value = aws_secretsmanager_secret.sheets_credentials 7 | } -------------------------------------------------------------------------------- /terraform/modules/google-sheets-glue-job/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/google-sheets-glue-job/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | dataset_name = lower(replace(var.dataset_name, "_", "-")) 3 | import_name = "${var.department.identifier}-${local.dataset_name}" 4 | full_output_path = "s3://${var.bucket_id}/${var.department.identifier}/google-sheets/${local.dataset_name}" 5 | sheets_credentials_name = var.sheets_credentials_name == null ? var.department.google_service_account.credentials_secret.name : var.sheets_credentials_name 6 | } -------------------------------------------------------------------------------- /terraform/modules/google-sheets-glue-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "glue_job_name" { 2 | description = "Glue job name" 3 | value = module.google_sheet_import.job_name 4 | } 5 | 6 | output "crawler_name" { 7 | description = "Crawler name" 8 | value = module.google_sheet_import.crawler_name 9 | } 10 | 11 | output "workflow_name" { 12 | description = "Workflow name" 13 | value = var.create_workflow ? aws_glue_workflow.workflow[0].name : null 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/housing-repairs-google-sheets-cleaning/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/housing-repairs-google-sheets-cleaning/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "number_of_workers_for_glue_job" { 2 | description = "Specify the number of worker to use for the glue job" 3 | type = number 4 | default = 6 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/housing-repairs-google-sheets-cleaning/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | glue_job_name = "${var.short_identifier_prefix}Housing Repairs - ${title(replace(var.dataset_name, "-", " "))}" 3 | raw_zone_catalog_database_name = var.department.raw_zone_catalog_database_name 4 | refined_zone_catalog_database_name = var.department.refined_zone_catalog_database_name 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/housing-repairs-google-sheets-cleaning/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "address_matching_job_name" { 2 | value = module.housing_repairs_google_sheets_address_matching.job_name 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/import-data-from-spreadsheet-job/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/import-data-from-spreadsheet-job/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "header_row_number" { 2 | description = "Header row number (0-indexed)" 3 | type = number 4 | default = 0 5 | } 6 | 7 | variable "glue_role_arn" { 8 | description = "Role to use for Glue jobs" 9 | type = string 10 | default = null 11 | } 12 | 13 | variable "enable_bookmarking" { 14 | description = "Enable glue job bookmarking" 15 | type = bool 16 | default = false 17 | } 18 | 19 | variable "tags" { 20 | description = "AWS tags" 21 | type = map(string) 22 | default = null 23 | } 24 | -------------------------------------------------------------------------------- /terraform/modules/import-data-from-spreadsheet-job/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | worksheet_key = lower(replace(replace(trimspace(var.worksheet_name), ".", ""), " ", "-")) 3 | import_name = "${var.department.identifier}-${local.worksheet_key}" 4 | job_bookmark_option = var.enable_bookmarking ? "job-bookmark-enable" : "job-bookmark-disable" 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/import-data-from-spreadsheet-job/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "job_name" { 2 | value = module.spreadsheet_import.job_name 3 | } 4 | 5 | output "job_arn" { 6 | value = module.spreadsheet_import.job_arn 7 | } 8 | 9 | output "catalog_table" { 10 | value = replace("${var.department.identifier}_${var.data_set_name}", "-", "_") 11 | } 12 | 13 | output "worksheet_key" { 14 | value = local.worksheet_key 15 | } 16 | 17 | output "workflow_name" { 18 | value = aws_glue_workflow.workflow.id 19 | } 20 | 21 | output "workflow_arn" { 22 | value = aws_glue_workflow.workflow.arn 23 | } 24 | 25 | output "crawler_name" { 26 | value = module.spreadsheet_import.crawler_name 27 | } 28 | -------------------------------------------------------------------------------- /terraform/modules/import-spreadsheet-file-from-g-drive/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/import-spreadsheet-file-from-g-drive/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | tags_with_department = merge(var.tags, { "PlatformDepartment" = var.department.identifier }) 3 | file_name_list = split(".", lower(var.input_file_name)) 4 | is_csv = length(local.file_name_list) > 0 && local.file_name_list[length(local.file_name_list) - 1] == "csv" ? true : false 5 | } -------------------------------------------------------------------------------- /terraform/modules/import-spreadsheet-file-from-g-drive/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "worksheet_resources" { 2 | value = tomap({ 3 | for k in keys(module.import_data_from_spreadsheet_job) : module.import_data_from_spreadsheet_job[k].worksheet_key => { 4 | catalog_table = module.import_data_from_spreadsheet_job[k].catalog_table 5 | crawler_name = module.import_data_from_spreadsheet_job[k].crawler_name 6 | job_arn = module.import_data_from_spreadsheet_job[k].job_arn 7 | job_name = module.import_data_from_spreadsheet_job[k].job_name 8 | workflow_name = module.import_data_from_spreadsheet_job[k].workflow_name 9 | } 10 | }) 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = ">= 0.14.3" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = ">= 4.11" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/02-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/10-dependencies.tf: -------------------------------------------------------------------------------- 1 | module "kafka_dependency_storage" { 2 | source = "../s3-bucket" 3 | tags = var.tags 4 | project = var.project 5 | environment = var.environment 6 | identifier_prefix = var.identifier_prefix 7 | bucket_name = "Kafka Dependency Storage" 8 | bucket_identifier = "kafka-dependency-storage" 9 | include_backup_policy_tags = false 10 | } 11 | 12 | resource "aws_s3_object" "kafka_connector_s3" { 13 | bucket = module.kafka_dependency_storage.bucket_id 14 | key = "plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip" 15 | acl = "private" 16 | source = "${path.module}/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip" 17 | source_hash = filemd5("${path.module}/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip") 18 | } 19 | -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/30-keys.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | default_arn = [ 3 | "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root", 4 | ] 5 | } 6 | 7 | resource "aws_kms_key" "kafka" { 8 | tags = var.tags 9 | description = "${var.short_identifier_prefix} - Kafka Streaming" 10 | 11 | deletion_window_in_days = 10 12 | enable_key_rotation = true 13 | 14 | policy = data.aws_iam_policy_document.kafka_client_access.json 15 | } 16 | 17 | data "aws_iam_policy_document" "kafka_client_access" { 18 | statement { 19 | actions = ["kms:*"] 20 | 21 | principals { 22 | identifiers = concat(var.cross_account_lambda_roles, local.default_arn) 23 | type = "AWS" 24 | } 25 | 26 | resources = ["*"] 27 | } 28 | } 29 | 30 | resource "aws_kms_alias" "key_alias" { 31 | name = lower("alias/${var.short_identifier_prefix}kafka-${aws_msk_cluster.kafka_cluster.cluster_name}") 32 | target_key_id = aws_kms_key.kafka.key_id 33 | } -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/40-logs.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_log_group" "connector_log_group" { 2 | tags = var.tags 3 | name = "${var.short_identifier_prefix}kafka-connector" 4 | } 5 | 6 | resource "aws_cloudwatch_log_group" "broker_log_group" { 7 | tags = var.tags 8 | name = "${var.short_identifier_prefix}event-streaming-broker-logs" 9 | } -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/45-glue-crawler.tf: -------------------------------------------------------------------------------- 1 | resource "aws_glue_crawler" "crawler" { 2 | tags = var.tags 3 | 4 | database_name = var.glue_database_name 5 | name = "${var.short_identifier_prefix}event-streaming-topics-crawler" 6 | role = var.glue_iam_role 7 | 8 | s3_target { 9 | path = "s3://${var.s3_bucket_to_write_to.bucket_id}/event-streaming/" 10 | 11 | exclusions = ["*.json", "*.txt", "*.zip", "*.xlsx"] 12 | } 13 | 14 | schema_change_policy { 15 | delete_behavior = "DELETE_FROM_DATABASE" 16 | update_behavior = "UPDATE_IN_DATABASE" 17 | } 18 | 19 | configuration = jsonencode({ 20 | Version = 1.0, 21 | Grouping = { 22 | TableGroupingPolicy = "CombineCompatibleSchemas" 23 | TableLevelConfiguration = 3 24 | } 25 | CrawlerOutput = { 26 | Partitions = { AddOrUpdateBehavior = "InheritFromTable" } 27 | } 28 | }) 29 | } -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/50-cluster.tf: -------------------------------------------------------------------------------- 1 | resource "aws_msk_cluster" "kafka_cluster" { 2 | cluster_name = "${var.short_identifier_prefix}event-streaming" 3 | kafka_version = "2.8.1" 4 | number_of_broker_nodes = 3 5 | 6 | broker_node_group_info { 7 | instance_type = "kafka.t3.small" 8 | ebs_volume_size = 200 9 | client_subnets = var.subnet_ids 10 | security_groups = [aws_security_group.kafka.id] 11 | } 12 | 13 | encryption_info { 14 | encryption_at_rest_kms_key_arn = aws_kms_key.kafka.arn 15 | } 16 | 17 | logging_info { 18 | broker_logs { 19 | cloudwatch_logs { 20 | enabled = true 21 | log_group = aws_cloudwatch_log_group.broker_log_group.name 22 | } 23 | } 24 | } 25 | 26 | tags = var.tags 27 | } -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "schema_registry_url" { 2 | value = "http://${module.schema_registry.load_balancer_dns_name}:8081" 3 | } 4 | 5 | output "cluster_config" { 6 | value = { 7 | zookeeper_connect_string = aws_msk_cluster.kafka_cluster.zookeeper_connect_string 8 | bootstrap_brokers = aws_msk_cluster.kafka_cluster.bootstrap_brokers 9 | bootstrap_brokers_tls = aws_msk_cluster.kafka_cluster.bootstrap_brokers_tls 10 | vpc_security_groups = [aws_security_group.kafka.id] 11 | vpc_subnets = var.subnet_ids 12 | cluster_name = aws_msk_cluster.kafka_cluster.cluster_name 13 | cluster_arn = aws_msk_cluster.kafka_cluster.arn 14 | kms_key_arn = aws_kms_key.kafka.arn 15 | } 16 | } -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip -------------------------------------------------------------------------------- /terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5.zip -------------------------------------------------------------------------------- /terraform/modules/kafka-schema-registry/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = ">= 0.14.3" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = ">= 3.72" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/kafka-schema-registry/02-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} -------------------------------------------------------------------------------- /terraform/modules/kafka-schema-registry/30-register-schemas.tf: -------------------------------------------------------------------------------- 1 | resource "null_resource" "register_schemas" { 2 | for_each = toset(var.topics) 3 | triggers = { 4 | shell_hash = filesha256("${path.module}/schemas/${each.value}.json") 5 | } 6 | 7 | provisioner "local-exec" { 8 | interpreter = ["bash", "-c"] 9 | command = < $key_file 19 | chmod 400 $key_file 20 | schema_string=$(jq -c . $path_to_schema_file | jq -R) 21 | 22 | 23 | ssh -4 -i $key_file -f -M \ 24 | -L 8081:${schema_registry_url//\"}:8081 \ 25 | -o "UserKnownHostsFile=/dev/null" \ 26 | -o "StrictHostKeyChecking=no" \ 27 | -o ProxyCommand="aws ssm start-session --target %h --document AWS-StartSSHSession --parameters portNumber=%p --region=eu-west-2" \ 28 | -o ExitOnForwardFailure=yes \ 29 | ec2-user@${instance_id//\"} \ 30 | sleep 10 31 | 32 | curl -X POST -H "Content-Type: application/vnd.schemaregistry.v1+json" --data "{ \"schema\": ${schema_string} }" "http://localhost:8081/subjects/$topic_name-value/versions" 33 | 34 | rm -f $key_file 35 | -------------------------------------------------------------------------------- /terraform/modules/kafka-test-lambda/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/kafka-test-lambda/02-inputs-optional.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/kafka-test-lambda/02-inputs-optional.tf -------------------------------------------------------------------------------- /terraform/modules/kafka-test-lambda/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | -------------------------------------------------------------------------------- /terraform/modules/kafka-test-lambda/20-security-groups.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "kafka-test" { 2 | name = "${var.identifier_prefix}kafka-test" 3 | tags = var.tags 4 | vpc_id = var.vpc_id 5 | description = "Specifies rules for traffic to the kafka-test lambda" 6 | 7 | egress { 8 | description = "Allow all outbound traffic within the security group" 9 | from_port = 0 10 | to_port = 0 11 | protocol = "-1" 12 | cidr_blocks = ["0.0.0.0/0"] 13 | ipv6_cidr_blocks = ["::/0"] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /terraform/modules/kafka-test-lambda/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "security_group_id" { 2 | value = aws_security_group.kafka-test.id 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-and-monitoring/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-and-monitoring/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | 11 | variable "lambda_name" { 12 | description = "Name of the lambda" 13 | type = string 14 | } 15 | 16 | variable "project" { 17 | description = "The project name." 18 | type = string 19 | } 20 | 21 | variable "environment" { 22 | description = "Environment e.g. Dev, Stg, Prod, Mgmt." 23 | type = string 24 | } 25 | 26 | variable "alarms_handler_lambda_name" { 27 | description = "Name of the alarms handler lambda" 28 | type = string 29 | } 30 | 31 | variable "alarms_handler_lambda_arn" { 32 | description = "ARN of the alarms handler lambda" 33 | type = string 34 | } 35 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-and-monitoring/02-inputs-optional.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/lambda-alarms-and-monitoring/02-inputs-optional.tf -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-and-monitoring/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "data_platform" { 2 | provider = aws 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-and-monitoring/10-aws-cloudwatch-alarm.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_log_metric_filter" "metric_filter" { 2 | name = "${var.lambda_name}-lambda-errors" 3 | pattern = "ERROR" 4 | log_group_name = "/aws/lambda/${var.lambda_name}" 5 | 6 | metric_transformation { 7 | name = "${var.lambda_name}-lambda-errors" 8 | namespace = "DataPlatform" 9 | value = "1" 10 | default_value = "0" 11 | unit = "Count" 12 | } 13 | } 14 | 15 | resource "aws_cloudwatch_metric_alarm" "lambda_metric_alarm" { 16 | alarm_name = aws_cloudwatch_log_metric_filter.metric_filter.name 17 | comparison_operator = "GreaterThanThreshold" 18 | evaluation_periods = "1" 19 | metric_name = aws_cloudwatch_log_metric_filter.metric_filter.name 20 | namespace = "DataPlatform" 21 | period = "300" 22 | statistic = "Sum" 23 | threshold = "0" 24 | datapoints_to_alarm = "1" 25 | alarm_description = "Triggers an alarm every time there's an error in the lambda's log stream" 26 | alarm_actions = [aws_sns_topic.sns_topic.arn] 27 | } 28 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-and-monitoring/11-aws-sns-topic.tf: -------------------------------------------------------------------------------- 1 | resource "aws_sns_topic" "sns_topic" { 2 | name = "lambda-failure-notification-${var.lambda_name}" 3 | kms_master_key_id = aws_kms_key.lambda_failure_notifications_kms_key.id 4 | } 5 | 6 | resource "aws_sns_topic_subscription" "topic_subscription" { 7 | topic_arn = aws_sns_topic.sns_topic.arn 8 | protocol = "lambda" 9 | endpoint = var.alarms_handler_lambda_arn 10 | } 11 | 12 | locals { 13 | lambda_name_upper_case = replace(title(replace(var.lambda_name, "-", " ")), " ", "") 14 | } 15 | 16 | resource "aws_lambda_permission" "allow_sns_invoke" { 17 | statement_id = "Allow${local.lambda_name_upper_case}ExecutionFromSNS" 18 | action = "lambda:InvokeFunction" 19 | function_name = var.alarms_handler_lambda_name 20 | principal = "sns.amazonaws.com" 21 | source_arn = aws_sns_topic.sns_topic.arn 22 | } 23 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-handler/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-handler/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | 11 | variable "lambda_name" { 12 | description = "Name of the lambda" 13 | type = string 14 | } 15 | 16 | variable "lambda_artefact_storage_bucket" { 17 | type = string 18 | } 19 | 20 | variable "secret_name" { 21 | description = "Name of the secret containing web hook url" 22 | } 23 | 24 | variable "secrets_manager_kms_key" { 25 | description = "The KMS Key Id to be used to encrypt the secret which stores the web hook url" 26 | type = object({ 27 | key_id = string 28 | arn = string 29 | }) 30 | } 31 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-handler/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "lambda_environment_variables" { 2 | description = "An object containing environment variables to be used in the Lambda" 3 | type = map(string) 4 | } 5 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-handler/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | -------------------------------------------------------------------------------- /terraform/modules/lambda-alarms-handler/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "lambda_name" { 2 | description = "Name of the lamda" 3 | value = aws_lambda_function.lambda.function_name 4 | } 5 | 6 | output "lambda_arn" { 7 | description = "ARN of the lambda" 8 | value = aws_lambda_function.lambda.arn 9 | } 10 | -------------------------------------------------------------------------------- /terraform/modules/lambda-monitoring-dashboard/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/lambda-monitoring-dashboard/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | -------------------------------------------------------------------------------- /terraform/modules/lambda-monitoring-dashboard/02-inputs-optional.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/lambda-monitoring-dashboard/02-inputs-optional.tf -------------------------------------------------------------------------------- /terraform/modules/lambda-monitoring-dashboard/03-inputs-derived.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/lambda-monitoring-dashboard/03-inputs-derived.tf -------------------------------------------------------------------------------- /terraform/modules/qlik-sense-server/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/qlik-sense-server/02-inputs-optional.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/qlik-sense-server/02-inputs-optional.tf -------------------------------------------------------------------------------- /terraform/modules/qlik-sense-server/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | #for pre-prod and prod setups only 4 | data "aws_instance" "qlik-sense-aws-instance" { 5 | filter { 6 | name = "tag:Name" 7 | values = var.is_production_environment ? ["Qlik Migration ${upper(var.environment)}"] : ["dataplatform-stg-qlik-sense-restore-8"] 8 | } 9 | } 10 | 11 | data "aws_vpc" "vpc" { 12 | id = var.vpc_id 13 | } 14 | -------------------------------------------------------------------------------- /terraform/modules/qlik-sense-server/14-aws-glue-catalog-database.tf: -------------------------------------------------------------------------------- 1 | resource "aws_glue_catalog_database" "qlik_alb_logs" { 2 | count = var.is_live_environment ? 1 : 0 3 | name = "${var.identifier_prefix}-qlik-alb-logs" 4 | 5 | lifecycle { 6 | prevent_destroy = true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /terraform/modules/qlik-sense-server/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/qlik-sense-server/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/rds-snapshot-to-s3/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/rds-snapshot-to-s3/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | lambda_timeout = 900 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/rds-snapshot-to-s3/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "rds_snapshot_s3_to_s3_copier_lambda_role_arn" { 2 | description = "ARN for the s3_to_s3_copier_lambda_role" 3 | value = aws_iam_role.rds_snapshot_s3_to_s3_copier_lambda_role.arn 4 | } 5 | 6 | output "rds_snapshot_s3_to_s3_copier_lambda_name" { 7 | description = "Name for the s3_to_s3_copier_lambda" 8 | value = module.rds_snapshot_s3_to_s3_copier.lambda_name 9 | } 10 | 11 | output "export_rds_to_s3_snapshot_lambda_name" { 12 | description = "Name for the export_rds_to_s3_snapshot_lambda" 13 | value = module.trigger_rds_snapshot_export.lambda_name 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "workgroup_base_capacity" { 2 | description = "Base capacity of the workgroup in Redshift Processing Units (RPUs)" 3 | type = number 4 | default = 32 5 | } 6 | 7 | 8 | # variable "maximimum_query_execution_time" { 9 | # description = "Max query execution time (in seconds)" 10 | # type = number 11 | # default = 3600 12 | # } 13 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/03-inputs-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/20-aws-redshiftserverless-namespace.tf: -------------------------------------------------------------------------------- 1 | resource "aws_redshiftserverless_namespace" "namespace" { 2 | tags = var.tags 3 | 4 | namespace_name = var.namespace_name 5 | 6 | admin_user_password = aws_secretsmanager_secret_version.master_password.secret_string 7 | admin_username = var.admin_username 8 | db_name = var.db_name 9 | default_iam_role_arn = aws_iam_role.redshift_serverless_role.arn 10 | iam_roles = [aws_iam_role.redshift_serverless_role.arn] 11 | kms_key_id = aws_kms_key.key.arn 12 | 13 | # #this is not ideal and can cause headaches if roles need tweaking, but seems to be a known issue https://github.com/hashicorp/terraform-provider-aws/issues/26624 14 | # lifecycle { 15 | # ignore_changes = [ 16 | # iam_roles 17 | # ] 18 | # } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/21-aws-secrets-manager-secret.tf: -------------------------------------------------------------------------------- 1 | resource "random_password" "master_password" { 2 | length = 40 3 | special = false 4 | } 5 | 6 | resource "aws_secretsmanager_secret" "master_password" { 7 | name_prefix = "${var.identifier_prefix}-redshift-serverless-${var.namespace_name}-namespace-master-password" 8 | description = "Master password for redshift serverless ${var.namespace_name} namespace" 9 | kms_key_id = var.secrets_manager_key 10 | } 11 | 12 | resource "aws_secretsmanager_secret_version" "master_password" { 13 | secret_id = aws_secretsmanager_secret.master_password.id 14 | secret_string = random_password.master_password.result 15 | } 16 | 17 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/22-aws-kms-key.tf: -------------------------------------------------------------------------------- 1 | resource "aws_kms_key" "key" { 2 | tags = var.tags 3 | 4 | description = "${var.identifier_prefix}-redshift-serverless-${var.namespace_name}-namespace" 5 | deletion_window_in_days = 10 6 | enable_key_rotation = true 7 | 8 | policy = data.aws_iam_policy_document.key_policy.json 9 | } 10 | 11 | data "aws_iam_policy_document" "key_policy" { 12 | statement { 13 | effect = "Allow" 14 | actions = [ 15 | "kms:*" 16 | ] 17 | 18 | resources = ["*"] 19 | 20 | principals { 21 | type = "AWS" 22 | identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] 23 | } 24 | 25 | } 26 | } 27 | 28 | resource "aws_kms_alias" "name" { 29 | name = lower("alias/${var.identifier_prefix}-redshift-serverless-${var.namespace_name}-namespace") 30 | target_key_id = aws_kms_key.key.id 31 | } 32 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/24-aws-security-group.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "redshift_serverless" { 2 | tags = var.tags 3 | 4 | name = "${var.identifier_prefix}-redshift-serverless-${var.namespace_name}-namespace" 5 | description = "Restrict access to redshift serverless" 6 | vpc_id = var.vpc_id 7 | revoke_rules_on_delete = true 8 | } 9 | 10 | #TODO: lock these down 11 | resource "aws_security_group_rule" "redshift_serverless_ingress" { 12 | description = "Allow all inbound traffic" 13 | type = "ingress" 14 | from_port = 0 15 | to_port = 0 16 | protocol = "TCP" 17 | cidr_blocks = ["0.0.0.0/0"] 18 | security_group_id = aws_security_group.redshift_serverless.id 19 | } 20 | 21 | resource "aws_security_group_rule" "redshift_serverless_egress" { 22 | description = "Allows all outbound traffic" 23 | type = "egress" 24 | from_port = 0 25 | to_port = 0 26 | protocol = "TCP" 27 | cidr_blocks = ["0.0.0.0/0"] 28 | security_group_id = aws_security_group.redshift_serverless.id 29 | } 30 | 31 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/25-aws-redshiftserverless-usage-limit.tf: -------------------------------------------------------------------------------- 1 | resource "aws_redshiftserverless_usage_limit" "usage_limit" { 2 | resource_arn = aws_redshiftserverless_workgroup.default.arn 3 | usage_type = "serverless-compute" 4 | period = var.serverless_compute_usage_limit_period 5 | amount = var.serverless_compute_usage_limit_amount 6 | breach_action = "log" 7 | } 8 | -------------------------------------------------------------------------------- /terraform/modules/redshift-serverless/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "redshift_serverless_role_arn" { 2 | value = aws_iam_role.redshift_serverless_role.arn 3 | description = "The ARN of the IAM role used by Redshift Serverless." 4 | } 5 | 6 | -------------------------------------------------------------------------------- /terraform/modules/redshift/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/redshift/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "additional_iam_roles" { 2 | description = "Additional IAM roles to attach to the Redshift cluster" 3 | type = list(string) 4 | default = [] 5 | } 6 | 7 | variable "preferred_maintenance_window" { 8 | description = "The weekly time range (in UTC) during which automated cluster maintenance can occur" 9 | type = string 10 | default = "sun:02:00-sun:03:00" 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/redshift/03-input-derived.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/redshift/03-input-derived.tf -------------------------------------------------------------------------------- /terraform/modules/redshift/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "role_arn" { 2 | value = aws_iam_role.redshift_role.arn 3 | } 4 | 5 | output "cluster_id" { 6 | value = aws_redshift_cluster.redshift_cluster.cluster_identifier 7 | } 8 | 9 | output "cluster_arn" { 10 | value = aws_redshift_cluster.redshift_cluster.arn 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/s3-bucket-notification-emails/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /terraform/modules/s3-bucket-notification-emails/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "The name of the SNS topic" 3 | } 4 | 5 | variable "bucket_id" { 6 | description = "The ID of the S3 bucket to subscribe to" 7 | } 8 | 9 | variable "bucket_arn" { 10 | description = "The ARN of the s3 bucket to subscribe to" 11 | } 12 | 13 | variable "email_list" { 14 | description = "A comma separated list of email addresses to subscribe to the SNS topic" 15 | default = "" 16 | } 17 | 18 | variable "lambda_artefact_storage_bucket" { 19 | description = "S3 Bucket to store the Lambda artefact in" 20 | } -------------------------------------------------------------------------------- /terraform/modules/s3-bucket-notification-emails/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "filter_prefix" { 2 | description = "The prefix to filter on" 3 | default = "" 4 | } 5 | 6 | variable "filter_suffix" { 7 | description = "The suffix to filter on" 8 | default = "" 9 | } -------------------------------------------------------------------------------- /terraform/modules/s3-bucket-notification-emails/03-inputs-derived.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/s3-bucket-notification-emails/03-inputs-derived.tf -------------------------------------------------------------------------------- /terraform/modules/s3-bucket-notification-emails/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/s3-bucket-notification-emails/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/s3-bucket/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/s3-bucket/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "project" { 7 | description = "The project name." 8 | type = string 9 | } 10 | 11 | variable "environment" { 12 | description = "Enviroment e.g. dev, stg, prod, mgmt." 13 | type = string 14 | } 15 | 16 | variable "bucket_name" { 17 | description = "S3 Bucket name" 18 | type = string 19 | } 20 | 21 | variable "bucket_identifier" { 22 | description = "URL safe bucket identifier" 23 | type = string 24 | } 25 | 26 | variable "identifier_prefix" { 27 | description = "Project wide resource identifier prefix" 28 | type = string 29 | } 30 | -------------------------------------------------------------------------------- /terraform/modules/s3-bucket/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | current_arn = [ 5 | "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root", 6 | ] 7 | role_arns_to_share_access_with = [for x in concat(var.role_arns_to_share_access_with, local.current_arn) : x if x != null] 8 | } 9 | -------------------------------------------------------------------------------- /terraform/modules/s3-bucket/99-outputs.tf: -------------------------------------------------------------------------------- 1 | # We make any output files clear by adding them to the 99-outputs.tf, meaning anyone can quickly check if they're consuming your module 2 | output "bucket_id" { 3 | description = "Bucket id of bucket" 4 | value = aws_s3_bucket.bucket.bucket 5 | } 6 | 7 | output "bucket_arn" { 8 | description = "Bucket id of bucket" 9 | value = aws_s3_bucket.bucket.arn 10 | } 11 | 12 | output "kms_key_id" { 13 | description = "KMS Key id" 14 | value = aws_kms_key.key.id 15 | } 16 | 17 | output "kms_key_arn" { 18 | description = "KMS Key arn" 19 | value = aws_kms_key.key.arn 20 | } 21 | 22 | output "bucket_url" { 23 | description = "S3 bucket url" 24 | value = "s3://${aws_s3_bucket.bucket.bucket}" 25 | } 26 | -------------------------------------------------------------------------------- /terraform/modules/sagemaker/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = ">= 0.14.3" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = ">= 3.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/sagemaker/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "identifier_prefix" { 7 | description = "Project wide resource identifier prefix" 8 | type = string 9 | } 10 | 11 | variable "development_endpoint_role_arn" { 12 | description = "The role provided controls acces to data from the notebook." 13 | type = string 14 | } 15 | 16 | variable "instance_name" { 17 | description = "Name of the notebook instance, typically set to the department name" 18 | type = string 19 | } 20 | 21 | variable "github_repository" { 22 | description = "Name of the sagemaker code repository to use as the default repository" 23 | type = string 24 | } -------------------------------------------------------------------------------- /terraform/modules/sagemaker/02-inputs-optional.tf: -------------------------------------------------------------------------------- 1 | variable "python_libs" { 2 | description = "Comma separated list of python libraries" 3 | type = string 4 | default = "" 5 | } 6 | 7 | variable "extra_jars" { 8 | description = "Comma separated list of jar files" 9 | type = string 10 | default = "" 11 | } -------------------------------------------------------------------------------- /terraform/modules/sagemaker/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} -------------------------------------------------------------------------------- /terraform/modules/sagemaker/10-ssm.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "dev_enpoint_key" { 2 | algorithm = "RSA" 3 | rsa_bits = 4096 4 | } 5 | 6 | resource "aws_ssm_parameter" "dev_enpoint_key" { 7 | tags = var.tags 8 | 9 | name = "/${var.identifier_prefix}glue-dev-endpoint-private-key/${var.instance_name}" 10 | type = "SecureString" 11 | description = "The private key for the glue development endpoint" 12 | value = tls_private_key.dev_enpoint_key.private_key_pem 13 | } 14 | -------------------------------------------------------------------------------- /terraform/modules/sagemaker/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "notebook_arn" { 2 | value = aws_sagemaker_notebook_instance.nb.arn 3 | } 4 | 5 | output "notebook_name" { 6 | value = aws_sagemaker_notebook_instance.nb.name 7 | } 8 | 9 | output "notebook_role_arn" { 10 | value = aws_iam_role.notebook.arn 11 | } 12 | 13 | output "lifecycle_configuration_arn" { 14 | value = aws_sagemaker_notebook_instance_lifecycle_configuration.sagemaker_lifecycle.arn 15 | } 16 | -------------------------------------------------------------------------------- /terraform/modules/set-budget-limit-amount/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/set-budget-limit-amount/01-inputs-required.tf: -------------------------------------------------------------------------------- 1 | variable "tags" { 2 | description = "AWS tags" 3 | type = map(string) 4 | } 5 | 6 | variable "environment" { 7 | description = "Enviroment e.g. Dev, Stg, Prod, Mgmt." 8 | type = string 9 | } 10 | 11 | variable "identifier_prefix" { 12 | description = "Project wide resource identifier prefix" 13 | type = string 14 | } 15 | 16 | variable "lambda_artefact_storage_bucket" { 17 | type = string 18 | } 19 | 20 | variable "lambda_name" { 21 | type = string 22 | 23 | validation { 24 | condition = length(var.lambda_name) <= 51 25 | error_message = "The lambda_name must be less than 51 characters long." 26 | } 27 | } 28 | 29 | variable "service_area" { 30 | description = "Name of service area where data is to be sent, e.g. 'housing'" 31 | type = string 32 | } 33 | 34 | variable "account_id" { 35 | description = "Account ID associated with budget being updated" 36 | type = string 37 | } 38 | 39 | variable "emails_to_notify" { 40 | description = "Array of emails or email groups who will be notified by the budget reporting" 41 | type = list(string) 42 | } -------------------------------------------------------------------------------- /terraform/modules/set-budget-limit-amount/02-inputs-optional.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/set-budget-limit-amount/02-inputs-optional.tf -------------------------------------------------------------------------------- /terraform/modules/set-budget-limit-amount/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | locals { 4 | lambda_timeout = 900 5 | } 6 | -------------------------------------------------------------------------------- /terraform/modules/set-budget-limit-amount/99-outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/set-budget-limit-amount/99-outputs.tf -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/00-init.tf: -------------------------------------------------------------------------------- 1 | /* This defines the configuration of Terraform and AWS required Terraform Providers. 2 | As this is a module, we don't have any explicity Provider blocks declared, as these 3 | will be inherited from the parent Terraform. 4 | */ 5 | terraform { 6 | required_version = "~> 1.0" 7 | 8 | required_providers { 9 | aws = { 10 | source = "hashicorp/aws" 11 | version = "~> 4.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/02-inputs-optional.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/sql-to-rds-snapshot/02-inputs-optional.tf -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/03-input-derived.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LBHackney-IT/Data-Platform/f1a3b78db1cfbb514583b2a4b6d4e984017e6b78/terraform/modules/sql-to-rds-snapshot/03-input-derived.tf -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/30-rds.tf: -------------------------------------------------------------------------------- 1 | resource "aws_db_subnet_group" "default" { 2 | tags = var.tags 3 | name = var.instance_name 4 | subnet_ids = var.aws_subnet_ids 5 | } 6 | 7 | resource "aws_db_instance" "ingestion_db" { 8 | allocated_storage = 15 9 | max_allocated_storage = 30 10 | engine = "mysql" 11 | engine_version = "8.0" 12 | instance_class = "db.t3.micro" 13 | identifier = var.instance_name 14 | db_subnet_group_name = aws_db_subnet_group.default.name 15 | username = "dataplatform" 16 | password = random_password.rds_password.result 17 | skip_final_snapshot = true 18 | vpc_security_group_ids = [aws_security_group.snapshot_db.id] 19 | apply_immediately = true 20 | ca_cert_identifier = "rds-ca-rsa2048-g1" 21 | } 22 | 23 | resource "random_password" "rds_password" { 24 | length = 40 25 | special = false 26 | } 27 | -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/40-security-group.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "snapshot_db" { 2 | name = var.instance_name 3 | description = "Restrict access to snapshot database" 4 | vpc_id = var.vpc_id 5 | tags = var.tags 6 | } 7 | 8 | resource "aws_security_group_rule" "allow_all_outbound_traffic" { 9 | description = "Allow all outbound traffic" 10 | security_group_id = aws_security_group.snapshot_db.id 11 | protocol = "-1" 12 | from_port = 0 13 | to_port = 0 14 | type = "egress" 15 | cidr_blocks = ["0.0.0.0/0"] 16 | ipv6_cidr_blocks = ["::/0"] 17 | } 18 | 19 | resource "aws_security_group_rule" "allow_mysql_inbound_traffic_within_the_security_group" { 20 | description = "Allow indbound traffic to MySQL within security group" 21 | security_group_id = aws_security_group.snapshot_db.id 22 | protocol = "TCP" 23 | from_port = 3306 24 | to_port = 3306 25 | type = "ingress" 26 | self = true 27 | } 28 | -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/99-outputs.tf: -------------------------------------------------------------------------------- 1 | # We make any output files clear by adding them to the 99-outputs.tf, meaning anyone can quickly check if they're consuming your module 2 | 3 | output "ecr_repository_worker_endpoint" { 4 | value = module.sql_to_parquet.ecr_repository_worker_endpoint 5 | } 6 | 7 | output "rds_instance_id" { 8 | value = aws_db_instance.ingestion_db.id 9 | } 10 | 11 | output "cloudwatch_event_rule_name" { 12 | value = module.sql_to_parquet.event_rule_names[0] 13 | } 14 | output "cloudwatch_event_rule_arn" { 15 | value = module.sql_to_parquet.event_rule_arns[0] 16 | } 17 | 18 | output "rds_instance_arn" { 19 | value = aws_db_instance.ingestion_db.arn 20 | } 21 | -------------------------------------------------------------------------------- /terraform/modules/sql-to-rds-snapshot/task_definition_template.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "essential": true, 4 | "memory": 512, 5 | "name": "sql-to-parquet", 6 | "cpu": 2, 7 | "image": "${REPOSITORY_URL}:latest", 8 | "environment": [ 9 | { "name": "MYSQL_HOST", "value": ${jsonencode(MYSQL_HOST)} }, 10 | { "name": "MYSQL_USER", "value": ${jsonencode(MYSQL_USER)} }, 11 | { "name": "MYSQL_PASS", "value": ${jsonencode(MYSQL_PASS)} }, 12 | { "name": "BUCKET_NAME", "value": ${jsonencode(BUCKET_NAME)} }, 13 | { "name": "RDS_INSTANCE_ID", "value": ${jsonencode(RDS_INSTANCE_ID)} } 14 | ], 15 | "LogConfiguration": { 16 | "LogDriver": "awslogs", 17 | "Options": { 18 | "awslogs-group": "${LOG_GROUP}", 19 | "awslogs-region": "eu-west-2", 20 | "awslogs-stream-prefix": "sql-to-parquet" 21 | } 22 | } 23 | 24 | } 25 | ] 26 | -------------------------------------------------------------------------------- /terraform/networking/00-init.tf: -------------------------------------------------------------------------------- 1 | # Core Infrastructure 2 | provider "aws" { 3 | region = var.aws_deploy_region 4 | 5 | dynamic assume_role { 6 | for_each = local.environment != "dev" ? [1] : [] 7 | 8 | content { 9 | role_arn = "arn:aws:iam::${var.aws_deploy_account_id}:role/${var.aws_deploy_iam_role_name}" 10 | session_name = "Terraform" 11 | } 12 | } 13 | } 14 | 15 | provider "aws" { 16 | alias = "aws_api_account" 17 | region = var.aws_deploy_region 18 | assume_role { 19 | role_arn = "arn:aws:iam::${var.aws_api_account_id}:role/${var.aws_deploy_iam_role_name}" 20 | session_name = "Terraform" 21 | } 22 | } 23 | 24 | # General 25 | terraform { 26 | required_providers { 27 | aws = { 28 | source = "hashicorp/aws" 29 | version = "~> 4.0" 30 | } 31 | } 32 | backend "s3" { 33 | region = "eu-west-2" 34 | key = "tfstate" 35 | bucket = "dataplatform-terraform-state" 36 | encrypt = true 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /terraform/networking/03-input-derived.tf: -------------------------------------------------------------------------------- 1 | # General 2 | module "tags" { 3 | source = "github.com/LBHackney-IT/aws-tags-lbh.git?ref=v1.1.1" 4 | 5 | application = var.application 6 | automation_build_url = var.automation_build_url 7 | confidentiality = var.confidentiality 8 | custom_tags = var.custom_tags 9 | department = var.department 10 | environment = var.environment 11 | phase = var.phase 12 | project = var.project 13 | stack = var.stack 14 | team = var.team 15 | } 16 | 17 | locals { 18 | team_snake = lower(replace(var.team, " ", "-")) 19 | environment = lower(replace(var.environment, " ", "-")) 20 | application_snake = lower(replace(var.application, " ", "-")) 21 | identifier_prefix = lower("${local.application_snake}-${local.environment}") 22 | } 23 | -------------------------------------------------------------------------------- /terraform/networking/99-outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc_id" { 2 | description = "The ID of the VPC" 3 | value = module.core_vpc.vpc_id 4 | } 5 | 6 | output "private_subnets_ids" { 7 | description = "List of private subnets IDs" 8 | value = module.core_vpc.private_subnets 9 | } 10 | -------------------------------------------------------------------------------- /terraform/networking/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: $(MAKECMDGOALS) 2 | 3 | init: 4 | aws-vault exec hackney-dataplatform-development -- terraform init 5 | 6 | plan: 7 | aws-vault exec hackney-dataplatform-development -- terraform plan -var-file="../config/env.tfvars" 8 | 9 | apply: 10 | aws-vault exec hackney-dataplatform-development -- terraform apply -var-file="../config/env.tfvars" 11 | 12 | validate: 13 | terraform validate 14 | 15 | destroy: 16 | aws-vault exec hackney-dataplatform-development -- terraform destroy -var-file="../config/env.tfvars" 17 | 18 | lint-init: 19 | tflint --init --config="../config/.tflint.hcl" 20 | 21 | lint: 22 | aws-vault exec hackney-dataplatform-development -- tflint --var-file='../config/env.tfvars' --module --config="../config/.tflint.hcl" --loglevel=warn . -------------------------------------------------------------------------------- /terraform/networking/README.md: -------------------------------------------------------------------------------- 1 | ## Networking 2 | This project deploys networking for the Data Platform accounts. Unless you intend to modify the network configuration 3 | you should not need to make changes to these files or run them. For a complete description of this process please 4 | see the main README.md file. 5 | 6 | ## State Management 7 | The state for the network infrastructure is stored alongside the main module state but is not directly related. For 8 | development the state is stored in the default workspace rather than individual engineer workspaces. 9 | 10 | ## Terraform commands 11 | 12 | After running, you can run `make plan`, `make apply` and `make destroy` to run the Terraform deploy/destroy commands with the development `env.tfvars` set for you. 13 | -------------------------------------------------------------------------------- /troubleshoot/package-helpers.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | call .venv\Scripts\activate 4 | python "scripts\setup.py" "bdist_wheel" --------------------------------------------------------------------------------