├── examples ├── aws_user_tags │ ├── data.tf │ ├── outputs.tf │ ├── locals.tf │ └── variables.tf ├── outputs.tf ├── aws_data.tf ├── variables.tf ├── templates │ └── json │ │ ├── glue_service_policy.json │ │ └── glue_user_policy.json └── main.tf ├── glue_resource_policy.tf ├── glue_registry.tf ├── glue_workflow.tf ├── glue_schema.tf ├── .gitignore ├── glue_data_quality_ruleset.tf ├── glue_connection.tf ├── glue_user_defined_function.tf ├── glue_partition_index.tf ├── glue_dev_endpoint.tf ├── glue_data_catalog_encryption_settings.tf ├── glue_security_configuration.tf ├── glue_catalog_database.tf ├── glue_classifier.tf ├── glue_job.tf ├── glue_ml_transform.tf ├── glue_partition.tf ├── glue_trigger.tf ├── glue_catalog_table.tf ├── glue_crawler.tf ├── outputs.tf ├── variables.tf └── README.md /examples/aws_user_tags/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" { 2 | } 3 | -------------------------------------------------------------------------------- /examples/aws_user_tags/outputs.tf: -------------------------------------------------------------------------------- 1 | output "tags" { 2 | value = local.tags 3 | } -------------------------------------------------------------------------------- /examples/outputs.tf: -------------------------------------------------------------------------------- 1 | output "glue_crawler_name" { 2 | value = module.glue.glue_crawler_id 3 | } 4 | 5 | output "glue_job_name" { 6 | value = module.glue.glue_job_id 7 | } 8 | 9 | output "random_glue_connection_password" { 10 | value = module.random_glue_connection_password.password_result 11 | sensitive = true 12 | } 13 | -------------------------------------------------------------------------------- /examples/aws_data.tf: -------------------------------------------------------------------------------- 1 | # Get the usera and account information 2 | data "aws_caller_identity" "current" { 3 | } 4 | 5 | # Get the correct AWS partition values can be: 6 | # "aws" - Public AWS partition 7 | # "aws-cn" - AWS China partition 8 | # "aws-us-gov" - US Government partition 9 | data "aws_partition" "current" { 10 | } -------------------------------------------------------------------------------- /examples/aws_user_tags/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | tags = merge( 3 | var.custom_tags, 4 | { 5 | terraform_managed = true 6 | environment = var.environment 7 | modified = formatdate("DD-MMM-YY hh:mm:ss ZZZ", timestamp()) 8 | modified_by = data.aws_caller_identity.current.arn 9 | } 10 | ) 11 | } -------------------------------------------------------------------------------- /examples/variables.tf: -------------------------------------------------------------------------------- 1 | variable "glue_connection_user_name" { 2 | type = string 3 | description = "Glue connection user name" 4 | default = "exampleglueusername" 5 | } 6 | 7 | variable "example_tags" { 8 | type = map(any) 9 | description = "Tag values for this example" 10 | default = { 11 | "cost-center" = "00-00000.000.01" 12 | "Project" = "My Test Glue Project" 13 | } 14 | } -------------------------------------------------------------------------------- /examples/aws_user_tags/variables.tf: -------------------------------------------------------------------------------- 1 | variable "environment" { 2 | type = string 3 | description = "Environment value" 4 | validation { 5 | condition = can(regex("DEV|TEST|QA|PROD|STAGE", var.environment)) 6 | error_message = "ERROR: environment must be of type: DEV, TEST, QA, PROD or STAGE." 7 | } 8 | } 9 | 10 | variable "custom_tags" { 11 | type = map(string) 12 | description = "Custom AWS tags provided by the user" 13 | default = {} 14 | } -------------------------------------------------------------------------------- /glue_resource_policy.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue resource policy 3 | #--------------------------------------------------- 4 | resource "aws_glue_resource_policy" "glue_resource_policy" { 5 | count = var.enable_glue_resource_policy ? 1 : 0 6 | 7 | policy = var.glue_resource_policy 8 | 9 | enable_hybrid = var.glue_resource_policy_enable_hybrid 10 | 11 | lifecycle { 12 | create_before_destroy = true 13 | ignore_changes = [] 14 | } 15 | 16 | depends_on = [] 17 | } -------------------------------------------------------------------------------- /glue_registry.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue registry 3 | #--------------------------------------------------- 4 | resource "aws_glue_registry" "glue_registry" { 5 | count = var.enable_glue_registry ? 1 : 0 6 | 7 | registry_name = var.glue_registry_name != "" ? lower(var.glue_registry_name) : "${lower(var.name)}-glue-registry-${lower(var.environment)}" 8 | 9 | description = var.glue_registry_description 10 | 11 | tags = merge( 12 | { 13 | Name = var.glue_registry_name != "" ? lower(var.glue_registry_name) : "${lower(var.name)}-glue-registry-${lower(var.environment)}" 14 | }, 15 | var.tags 16 | ) 17 | 18 | lifecycle { 19 | create_before_destroy = true 20 | ignore_changes = [] 21 | } 22 | 23 | depends_on = [] 24 | } -------------------------------------------------------------------------------- /glue_workflow.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue workflow 3 | #--------------------------------------------------- 4 | resource "aws_glue_workflow" "glue_workflow" { 5 | count = var.enable_glue_workflow ? 1 : 0 6 | 7 | name = var.glue_workflow_name != "" ? lower(var.glue_workflow_name) : "${lower(var.name)}-glue-workflow-${lower(var.environment)}" 8 | 9 | description = var.glue_workflow_description 10 | default_run_properties = var.glue_workflow_default_run_properties 11 | max_concurrent_runs = var.glue_workflow_max_concurrent_runs 12 | 13 | tags = merge( 14 | { 15 | Name = var.glue_workflow_name != "" ? lower(var.glue_workflow_name) : "${lower(var.name)}-glue-workflow-${lower(var.environment)}" 16 | }, 17 | var.tags 18 | ) 19 | 20 | lifecycle { 21 | create_before_destroy = true 22 | ignore_changes = [] 23 | } 24 | 25 | depends_on = [] 26 | } 27 | -------------------------------------------------------------------------------- /glue_schema.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue schema 3 | #--------------------------------------------------- 4 | resource "aws_glue_schema" "glue_schema" { 5 | count = var.enable_glue_schema ? 1 : 0 6 | 7 | schema_name = var.glue_schema_name != "" ? lower(var.glue_schema_name) : "${lower(var.name)}-glue-schema-${lower(var.environment)}" 8 | registry_arn = var.glue_schema_registry_arn != "" ? var.glue_schema_registry_arn : (var.enable_glue_registry ? aws_glue_registry.glue_registry.0.arn : null) 9 | data_format = var.glue_schema_data_format 10 | compatibility = var.glue_schema_compatibility 11 | schema_definition = var.glue_schema_schema_definition 12 | 13 | description = var.glue_schema_description 14 | 15 | tags = merge( 16 | { 17 | Name = var.glue_schema_name != "" ? lower(var.glue_schema_name) : "${lower(var.name)}-glue-schema-${lower(var.environment)}" 18 | }, 19 | var.tags 20 | ) 21 | 22 | lifecycle { 23 | create_before_destroy = true 24 | ignore_changes = [] 25 | } 26 | 27 | depends_on = [ 28 | aws_glue_registry.glue_registry 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Apple macOS operating system specific files 2 | # .DS_Store (Desktop Services Store) 3 | **/.DS_Store 4 | # Apple Icon file 5 | **/Icon* 6 | # Apple backup file 7 | **/*_BK 8 | 9 | # Terraform Lock files 10 | **/.terraform.lock.hcl 11 | 12 | # Local .terraform directories 13 | **/.terraform/* 14 | 15 | # .tfstate files 16 | **/*.tfstate 17 | **/*.tfstate.* 18 | **/terraform.tfstate 19 | **/terraform.tfstate.backup 20 | 21 | # Crash log files 22 | **/crash.log 23 | 24 | # Exclude all .tfvars files, which are likely to contain sentitive data, such as 25 | # password, private keys, and other secrets. These should not be part of version 26 | # control as they are data points which are potentially sensitive and subject 27 | # to change depending on the environment. 28 | # 29 | **/*.tfvars 30 | 31 | # Ignore override files as they are usually used to override resources locally and so 32 | # are not checked in 33 | override.tf 34 | override.tf.json 35 | *_override.tf 36 | *_override.tf.json 37 | 38 | # Include override files you do wish to add to version control using negated pattern 39 | # 40 | # !example_override.tf 41 | !.gitignore 42 | !.pre-commit-config.yaml 43 | !.github 44 | 45 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 46 | **/*tfplan* 47 | 48 | # Ignore CLI configuration files 49 | **/.terraformrc 50 | **/terraform.rc 51 | -------------------------------------------------------------------------------- /glue_data_quality_ruleset.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue data quality ruleset 3 | #--------------------------------------------------- 4 | resource "aws_glue_data_quality_ruleset" "glue_data_quality_ruleset" { 5 | count = var.enable_glue_data_quality_ruleset ? 1 : 0 6 | 7 | name = var.glue_data_quality_ruleset_name != "" ? lower(var.glue_data_quality_ruleset_name) : "${lower(var.name)}-data-quality-ruleset-${lower(var.environment)}" 8 | 9 | description = var.glue_data_quality_ruleset_description 10 | ruleset = var.glue_data_quality_ruleset_ruleset 11 | 12 | dynamic "target_table" { 13 | iterator = target_table 14 | for_each = length(keys(var.glue_trigger_target_table)) > 0 ? [var.glue_trigger_target_table] : [] 15 | 16 | content { 17 | database_name = lookup(target_table.value, "database_name", null) 18 | table_name = lookup(target_table.value, "table_name", null) 19 | 20 | catalog_id = lookup(target_table.value, "catalog_id", null) 21 | } 22 | } 23 | 24 | tags = merge( 25 | { 26 | Name = var.glue_data_quality_ruleset_name != "" ? lower(var.glue_data_quality_ruleset_name) : "${lower(var.name)}-data-quality-ruleset-${lower(var.environment)}" 27 | }, 28 | var.tags 29 | ) 30 | 31 | lifecycle { 32 | create_before_destroy = true 33 | ignore_changes = [] 34 | } 35 | 36 | depends_on = [] 37 | } 38 | -------------------------------------------------------------------------------- /glue_connection.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue connection 3 | #--------------------------------------------------- 4 | resource "aws_glue_connection" "glue_connection" { 5 | count = var.enable_glue_connection ? 1 : 0 6 | 7 | name = var.glue_connection_name != "" ? lower(var.glue_connection_name) : "${lower(var.name)}-glue-connection-${lower(var.environment)}" 8 | 9 | description = var.glue_connection_description 10 | catalog_id = var.glue_connection_catalog_id 11 | connection_properties = var.glue_connection_connection_properties 12 | connection_type = upper(var.glue_connection_connection_type) 13 | match_criteria = var.glue_connection_match_criteria 14 | 15 | dynamic "physical_connection_requirements" { 16 | iterator = physical_connection_requirements 17 | for_each = var.glue_connection_physical_connection_requirements 18 | 19 | content { 20 | availability_zone = lookup(physical_connection_requirements.value, "availability_zone", null) 21 | security_group_id_list = lookup(physical_connection_requirements.value, "security_group_id_list", []) 22 | subnet_id = lookup(physical_connection_requirements.value, "subnet_id", null) 23 | } 24 | } 25 | 26 | tags = merge( 27 | { 28 | Name = var.glue_connection_name != "" ? lower(var.glue_connection_name) : "${lower(var.name)}-glue-connection-${lower(var.environment)}" 29 | }, 30 | var.tags 31 | ) 32 | 33 | lifecycle { 34 | create_before_destroy = true 35 | ignore_changes = [] 36 | } 37 | 38 | depends_on = [] 39 | } 40 | -------------------------------------------------------------------------------- /glue_user_defined_function.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue user defined function 3 | #--------------------------------------------------- 4 | resource "aws_glue_user_defined_function" "glue_user_defined_function" { 5 | count = var.enable_glue_user_defined_function ? 1 : 0 6 | 7 | name = var.glue_user_defined_function_name != "" ? lower(var.glue_user_defined_function_name) : "${lower(var.name)}-glue-user-defined-fun-${lower(var.environment)}" 8 | database_name = var.glue_user_defined_function_database_name != "" ? var.glue_user_defined_function_database_name : (var.enable_glue_catalog_database ? aws_glue_catalog_database.glue_catalog_database.0.name : null) 9 | class_name = var.glue_user_defined_function_class_name 10 | owner_name = var.glue_user_defined_function_owner_name 11 | owner_type = var.glue_user_defined_function_owner_type 12 | 13 | catalog_id = var.glue_user_defined_function_catalog_id != "" ? var.glue_user_defined_function_catalog_id : (var.enable_glue_catalog_database ? aws_glue_catalog_database.glue_catalog_database.0.catalog_id : null) 14 | 15 | dynamic "resource_uris" { 16 | iterator = resource_uris 17 | for_each = var.glue_user_defined_function_resource_uris 18 | 19 | content { 20 | resource_type = lookup(resource_uris.value, "resource_type", null) 21 | uri = lookup(resource_uris.value, "uri", null) 22 | } 23 | } 24 | 25 | lifecycle { 26 | create_before_destroy = true 27 | ignore_changes = [] 28 | } 29 | 30 | depends_on = [ 31 | aws_glue_catalog_database.glue_catalog_database 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /glue_partition_index.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue partition index 3 | #--------------------------------------------------- 4 | resource "aws_glue_partition_index" "glue_partition_index" { 5 | count = var.enable_glue_partition_index ? 1 : 0 6 | 7 | table_name = var.glue_partition_index_table_name != "" ? var.glue_partition_index_table_name : (var.enable_glue_catalog_table ? aws_glue_catalog_table.glue_catalog_table[count.index].name : null) 8 | database_name = var.glue_partition_index_database_name != "" ? var.glue_partition_index_database_name : (var.enable_glue_catalog_database ? aws_glue_catalog_database.glue_catalog_database[count.index].name : null) 9 | 10 | catalog_id = var.glue_partition_index_catalog_id 11 | 12 | dynamic "partition_index" { 13 | iterator = partition_index 14 | for_each = var.glue_partition_index_partition_index 15 | 16 | content { 17 | index_name = lookup(partition_index.value, "index_name", null) 18 | keys = lookup(partition_index.value, "keys", null) 19 | } 20 | } 21 | 22 | dynamic "timeouts" { 23 | iterator = timeouts 24 | for_each = length(keys(var.glue_partition_index_timeouts)) > 0 ? [var.glue_partition_index_timeouts] : [] 25 | 26 | content { 27 | create = lookup(timeouts.value, "create", null) 28 | delete = lookup(timeouts.value, "delete", null) 29 | } 30 | } 31 | 32 | lifecycle { 33 | create_before_destroy = true 34 | ignore_changes = [] 35 | } 36 | 37 | depends_on = [ 38 | aws_glue_catalog_table.glue_catalog_table, 39 | aws_glue_catalog_database.glue_catalog_database 40 | ] 41 | } -------------------------------------------------------------------------------- /glue_dev_endpoint.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue dev endpoint 3 | #--------------------------------------------------- 4 | resource "aws_glue_dev_endpoint" "glue_dev_endpoint" { 5 | count = var.enable_glue_dev_endpoint ? 1 : 0 6 | 7 | name = var.glue_dev_endpoint_name != "" ? lower(var.glue_dev_endpoint_name) : "${lower(var.name)}-glue-dev-endpoint-${lower(var.environment)}" 8 | role_arn = var.glue_dev_endpoint_role_arn 9 | 10 | arguments = var.glue_dev_endpoint_arguments 11 | extra_jars_s3_path = var.glue_dev_endpoint_extra_jars_s3_path 12 | extra_python_libs_s3_path = var.glue_dev_endpoint_extra_python_libs_s3_path 13 | glue_version = var.glue_dev_endpoint_glue_version 14 | number_of_nodes = var.glue_dev_endpoint_number_of_nodes 15 | number_of_workers = var.glue_dev_endpoint_number_of_workers 16 | public_key = var.glue_dev_endpoint_public_key 17 | public_keys = var.glue_dev_endpoint_public_keys 18 | security_configuration = var.glue_dev_endpoint_security_configuration 19 | security_group_ids = var.glue_dev_endpoint_security_group_ids 20 | subnet_id = var.glue_dev_endpoint_subnet_id 21 | worker_type = var.glue_dev_endpoint_worker_type 22 | 23 | tags = merge( 24 | { 25 | Name = var.glue_dev_endpoint_name != "" ? lower(var.glue_dev_endpoint_name) : "${lower(var.name)}-glue-dev-endpoint-${lower(var.environment)}" 26 | }, 27 | var.tags 28 | ) 29 | 30 | lifecycle { 31 | create_before_destroy = true 32 | ignore_changes = [] 33 | } 34 | 35 | depends_on = [] 36 | } -------------------------------------------------------------------------------- /glue_data_catalog_encryption_settings.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue data catalog encryption settings 3 | #--------------------------------------------------- 4 | resource "aws_glue_data_catalog_encryption_settings" "glue_data_catalog_encryption_settings" { 5 | count = var.enable_glue_data_catalog_encryption_settings ? 1 : 0 6 | 7 | catalog_id = var.glue_data_catalog_encryption_settings_catalog_id 8 | 9 | data_catalog_encryption_settings { 10 | dynamic "connection_password_encryption" { 11 | iterator = connection_password_encryption 12 | for_each = lookup(var.glue_data_catalog_encryption_settings_data_catalog_encryption_settings, "connection_password_encryption", []) 13 | 14 | content { 15 | aws_kms_key_id = lookup(connection_password_encryption.value, "aws_kms_key_id", null) 16 | return_connection_password_encrypted = lookup(connection_password_encryption.value, "return_connection_password_encrypted", null) 17 | } 18 | } 19 | 20 | dynamic "encryption_at_rest" { 21 | iterator = encryption_at_rest 22 | for_each = lookup(var.glue_data_catalog_encryption_settings_data_catalog_encryption_settings, "encryption_at_rest", []) 23 | 24 | content { 25 | catalog_encryption_mode = lookup(encryption_at_rest.value, "catalog_encryption_mode", null) 26 | sse_aws_kms_key_id = lookup(encryption_at_rest.value, "sse_aws_kms_key_id", null) 27 | } 28 | } 29 | } 30 | 31 | lifecycle { 32 | create_before_destroy = true 33 | ignore_changes = [] 34 | } 35 | 36 | depends_on = [ 37 | aws_glue_catalog_database.glue_catalog_database 38 | ] 39 | } -------------------------------------------------------------------------------- /glue_security_configuration.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS glue security configuration 3 | #--------------------------------------------------- 4 | resource "aws_glue_security_configuration" "glue_security_configuration" { 5 | count = var.enable_glue_security_configuration ? 1 : 0 6 | 7 | name = var.glue_security_configuration_name != "" ? lower(var.glue_security_configuration_name) : "${lower(var.name)}-glue-sec-conf-${lower(var.environment)}" 8 | 9 | encryption_configuration { 10 | dynamic "cloudwatch_encryption" { 11 | iterator = cloudwatch_encryption 12 | for_each = lookup(var.glue_security_configuration_encryption_configuration, "cloudwatch_encryption", []) 13 | 14 | content { 15 | cloudwatch_encryption_mode = lookup(cloudwatch_encryption.value, "cloudwatch_encryption_mode", null) 16 | kms_key_arn = lookup(cloudwatch_encryption.value, "kms_key_arn", null) 17 | } 18 | } 19 | 20 | dynamic "job_bookmarks_encryption" { 21 | iterator = job_bookmarks_encryption 22 | for_each = lookup(var.glue_security_configuration_encryption_configuration, "job_bookmarks_encryption", []) 23 | 24 | content { 25 | job_bookmarks_encryption_mode = lookup(job_bookmarks_encryption.value, "job_bookmarks_encryption_mode", null) 26 | kms_key_arn = lookup(job_bookmarks_encryption.value, "kms_key_arn", null) 27 | } 28 | } 29 | 30 | dynamic "s3_encryption" { 31 | iterator = s3_encryption 32 | for_each = lookup(var.glue_security_configuration_encryption_configuration, "s3_encryption", []) 33 | 34 | content { 35 | s3_encryption_mode = lookup(s3_encryption.value, "s3_encryption_mode", null) 36 | kms_key_arn = lookup(s3_encryption.value, "kms_key_arn", null) 37 | } 38 | } 39 | } 40 | 41 | lifecycle { 42 | create_before_destroy = true 43 | ignore_changes = [] 44 | } 45 | 46 | depends_on = [] 47 | } 48 | -------------------------------------------------------------------------------- /glue_catalog_database.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue catalog database 3 | #--------------------------------------------------- 4 | resource "aws_glue_catalog_database" "glue_catalog_database" { 5 | count = var.enable_glue_catalog_database ? 1 : 0 6 | 7 | name = var.glue_catalog_database_name != "" ? lower(var.glue_catalog_database_name) : "${lower(var.name)}-glue-catalog-db-${lower(var.environment)}" 8 | 9 | description = var.glue_catalog_database_description 10 | catalog_id = var.glue_catalog_database_catalog_id 11 | location_uri = var.glue_catalog_database_location_uri 12 | parameters = var.glue_catalog_database_parameters 13 | 14 | dynamic "create_table_default_permission" { 15 | iterator = create_table_default_permission 16 | for_each = length(keys(var.glue_catalog_database_create_table_default_permission)) > 0 ? [var.glue_catalog_database_create_table_default_permission] : [] 17 | 18 | content { 19 | permissions = lookup(create_table_default_permission.value, "permissions", null) 20 | 21 | 22 | dynamic "principal" { 23 | iterator = principal 24 | for_each = length(keys(lookup(create_table_default_permission.value, "principal", {}))) > 0 ? [lookup(create_table_default_permission.value, "principal", {})] : [] 25 | 26 | content { 27 | data_lake_principal_identifier = lookup(principal.value, "data_lake_principal_identifier", null) 28 | } 29 | } 30 | } 31 | } 32 | 33 | dynamic "target_database" { 34 | iterator = target_database 35 | for_each = length(keys(var.glue_catalog_database_target_database)) > 0 ? [var.glue_catalog_database_target_database] : [] 36 | 37 | content { 38 | catalog_id = lookup(target_database.value, "catalog_id", null) 39 | database_name = lookup(target_database.value, "database_name", null) 40 | 41 | region = lookup(target_database.value, "region", null) 42 | } 43 | } 44 | 45 | tags = merge( 46 | { 47 | Name = var.glue_catalog_database_name != "" ? lower(var.glue_catalog_database_name) : "${lower(var.name)}-glue-catalog-db-${lower(var.environment)}" 48 | }, 49 | var.tags 50 | ) 51 | 52 | lifecycle { 53 | create_before_destroy = true 54 | ignore_changes = [] 55 | } 56 | 57 | depends_on = [] 58 | } 59 | -------------------------------------------------------------------------------- /glue_classifier.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue classifier 3 | #--------------------------------------------------- 4 | resource "aws_glue_classifier" "glue_classifier" { 5 | count = var.enable_glue_classifier ? 1 : 0 6 | 7 | name = var.glue_classifier_name != "" ? lower(var.glue_classifier_name) : "${lower(var.name)}-glue-classifier-${lower(var.environment)}" 8 | 9 | dynamic "csv_classifier" { 10 | iterator = csv_classifier 11 | for_each = var.glue_classifier_csv_classifier 12 | 13 | content { 14 | allow_single_column = lookup(csv_classifier.value, "allow_single_column", null) 15 | contains_header = lookup(csv_classifier.value, "contains_header", null) 16 | custom_datatype_configured = lookup(csv_classifier.value, "custom_datatype_configured", null) 17 | custom_datatypes = lookup(csv_classifier.value, "custom_datatypes", null) 18 | delimiter = lookup(csv_classifier.value, "delimiter", null) 19 | disable_value_trimming = lookup(csv_classifier.value, "disable_value_trimming", null) 20 | header = lookup(csv_classifier.value, "header", null) 21 | quote_symbol = lookup(csv_classifier.value, "quote_symbol", null) 22 | } 23 | } 24 | 25 | dynamic "grok_classifier" { 26 | iterator = grok_classifier 27 | for_each = var.glue_classifier_grok_classifier 28 | 29 | content { 30 | classification = lookup(grok_classifier.value, "classification", null) 31 | custom_patterns = lookup(grok_classifier.value, "custom_patterns", null) 32 | grok_pattern = lookup(grok_classifier.value, "grok_pattern", null) 33 | } 34 | } 35 | 36 | dynamic "json_classifier" { 37 | iterator = json_classifier 38 | for_each = var.glue_classifier_json_classifier 39 | 40 | content { 41 | json_path = lookup(json_classifier.value, "json_path", null) 42 | } 43 | } 44 | 45 | dynamic "xml_classifier" { 46 | iterator = xml_classifier 47 | for_each = var.glue_classifier_xml_classifier 48 | 49 | content { 50 | classification = lookup(xml_classifier.value, "classification", null) 51 | row_tag = lookup(xml_classifier.value, "row_tag", null) 52 | } 53 | } 54 | 55 | lifecycle { 56 | create_before_destroy = true 57 | ignore_changes = [] 58 | } 59 | 60 | depends_on = [] 61 | } 62 | -------------------------------------------------------------------------------- /glue_job.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue job 3 | #--------------------------------------------------- 4 | resource "aws_glue_job" "glue_job" { 5 | count = var.enable_glue_job ? 1 : 0 6 | 7 | name = var.glue_job_name != "" ? lower(var.glue_job_name) : "${lower(var.name)}-glue-job-${lower(var.environment)}" 8 | role_arn = var.glue_job_role_arn 9 | 10 | description = var.glue_job_description 11 | connections = length(var.glue_job_connections) > 0 ? var.glue_job_connections : (var.enable_glue_connection ? concat(var.glue_job_additional_connections, [element(concat(aws_glue_connection.glue_connection.*.id, [""]), 0)]) : []) 12 | default_arguments = var.glue_job_default_arguments 13 | non_overridable_arguments = var.glue_job_non_overridable_arguments 14 | glue_version = var.glue_job_glue_version 15 | execution_class = var.glue_job_execution_class 16 | max_capacity = var.glue_job_max_capacity 17 | max_retries = var.glue_job_max_retries 18 | timeout = var.glue_job_timeout 19 | security_configuration = var.glue_job_security_configuration != "" && !var.enable_glue_security_configuration ? var.glue_job_security_configuration : element(concat(aws_glue_security_configuration.glue_security_configuration.*.id, [""]), 0) 20 | worker_type = var.glue_job_worker_type 21 | number_of_workers = var.glue_job_number_of_workers 22 | 23 | dynamic "command" { 24 | iterator = command 25 | for_each = var.glue_job_command 26 | 27 | content { 28 | script_location = lookup(command.value, "script_location", null) 29 | 30 | name = lookup(command.value, "name", null) 31 | python_version = lookup(command.value, "python_version", null) 32 | } 33 | } 34 | 35 | dynamic "execution_property" { 36 | iterator = execution_property 37 | for_each = var.glue_job_execution_property 38 | 39 | content { 40 | max_concurrent_runs = lookup(execution_property.value, "max_concurrent_runs", 1) 41 | } 42 | } 43 | 44 | dynamic "notification_property" { 45 | iterator = notification_property 46 | for_each = var.glue_job_notification_property 47 | 48 | content { 49 | notify_delay_after = lookup(notification_property.value, "notify_delay_after", null) 50 | } 51 | } 52 | 53 | tags = merge( 54 | { 55 | Name = var.glue_job_name != "" ? lower(var.glue_job_name) : "${lower(var.name)}-glue-job-${lower(var.environment)}" 56 | }, 57 | var.tags 58 | ) 59 | 60 | lifecycle { 61 | create_before_destroy = true 62 | ignore_changes = [] 63 | } 64 | 65 | depends_on = [ 66 | aws_glue_connection.glue_connection, 67 | aws_glue_security_configuration.glue_security_configuration 68 | ] 69 | } 70 | -------------------------------------------------------------------------------- /glue_ml_transform.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue ml transform 3 | #--------------------------------------------------- 4 | resource "aws_glue_ml_transform" "glue_ml_transform" { 5 | count = var.enable_glue_ml_transform ? 1 : 0 6 | 7 | name = var.glue_ml_transform_name != "" ? lower(var.glue_ml_transform_name) : "${lower(var.name)}-glue-ml-transform-${lower(var.environment)}" 8 | role_arn = var.glue_ml_transform_role_arn 9 | 10 | description = var.glue_ml_transform_description 11 | glue_version = var.glue_ml_transform_glue_version 12 | max_capacity = var.glue_ml_transform_max_capacity 13 | max_retries = var.glue_ml_transform_max_retries 14 | timeout = var.glue_ml_transform_timeout 15 | worker_type = var.glue_ml_transform_worker_type 16 | 17 | number_of_workers = var.glue_ml_transform_number_of_workers 18 | 19 | dynamic "input_record_tables" { 20 | iterator = input_record_tables 21 | for_each = var.glue_ml_transform_input_record_tables 22 | 23 | content { 24 | database_name = lookup(input_record_tables.value, "database_name", (var.enable_glue_catalog_table ? element(aws_glue_catalog_table.glue_catalog_table.*.database_name, 0) : null)) 25 | table_name = lookup(input_record_tables.value, "table_name", (var.enable_glue_catalog_table ? element(aws_glue_catalog_table.glue_catalog_table.*.name, 0) : null)) 26 | 27 | catalog_id = lookup(input_record_tables.value, "catalog_id", null) 28 | connection_name = lookup(input_record_tables.value, "connection_name", null) 29 | } 30 | } 31 | 32 | dynamic "parameters" { 33 | iterator = parameters 34 | for_each = var.glue_ml_transform_parameters 35 | 36 | content { 37 | transform_type = lookup(parameters.value, "transform_type", null) 38 | 39 | dynamic "find_matches_parameters" { 40 | iterator = find_matches_parameters 41 | for_each = lookup(parameters.value, "find_matches_parameters", []) 42 | 43 | content { 44 | primary_key_column_name = lookup(find_matches_parameters.value, "primary_key_column_name", null) 45 | accuracy_cost_trade_off = lookup(find_matches_parameters.value, "accuracy_cost_trade_off", null) 46 | enforce_provided_labels = lookup(find_matches_parameters.value, "enforce_provided_labels", null) 47 | precision_recall_trade_off = lookup(find_matches_parameters.value, "precision_recall_trade_off", null) 48 | } 49 | } 50 | } 51 | } 52 | 53 | tags = merge( 54 | { 55 | Name = var.glue_ml_transform_name != "" ? lower(var.glue_ml_transform_name) : "${lower(var.name)}-glue-ml-transform-${lower(var.environment)}" 56 | }, 57 | var.tags 58 | ) 59 | 60 | lifecycle { 61 | create_before_destroy = true 62 | ignore_changes = [] 63 | } 64 | 65 | depends_on = [ 66 | aws_glue_catalog_table.glue_catalog_table 67 | ] 68 | } -------------------------------------------------------------------------------- /examples/templates/json/glue_service_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "glue:*", 8 | "s3:GetBucketLocation", 9 | "s3:ListBucket", 10 | "s3:ListAllMyBuckets", 11 | "s3:GetBucketAcl", 12 | "ec2:DescribeVpcEndpoints", 13 | "ec2:DescribeRouteTables", 14 | "ec2:CreateNetworkInterface", 15 | "ec2:DeleteNetworkInterface", 16 | "ec2:DescribeNetworkInterfaces", 17 | "ec2:DescribeSecurityGroups", 18 | "ec2:DescribeSubnets", 19 | "ec2:DescribeVpcAttribute", 20 | "iam:ListRolePolicies", 21 | "iam:GetRole", 22 | "iam:GetRolePolicy", 23 | "cloudwatch:PutMetricData" 24 | ], 25 | "Resource": [ 26 | "*" 27 | ] 28 | }, 29 | { 30 | "Effect": "Allow", 31 | "Action": [ 32 | "s3:CreateBucket" 33 | ], 34 | "Resource": [ 35 | "arn:${partition}:s3:::aws-glue-*" 36 | ] 37 | }, 38 | { 39 | "Effect": "Allow", 40 | "Action": [ 41 | "s3:GetObject", 42 | "s3:PutObject", 43 | "s3:DeleteObject" 44 | ], 45 | "Resource": [ 46 | "arn:${partition}:s3:::aws-glue-*/*", 47 | "arn:${partition}:s3:::*/*aws-glue-*/*" 48 | ] 49 | }, 50 | { 51 | "Effect": "Allow", 52 | "Action": [ 53 | "s3:GetObject" 54 | ], 55 | "Resource": [ 56 | "arn:${partition}:s3:::crawler-public*", 57 | "arn:${partition}:s3:::aws-glue-*" 58 | ] 59 | }, 60 | { 61 | "Effect": "Allow", 62 | "Action": [ 63 | "logs:CreateLogGroup", 64 | "logs:CreateLogStream", 65 | "logs:PutLogEvents", 66 | "logs:AssociateKmsKey" 67 | ], 68 | "Resource": [ 69 | "arn:${partition}:logs:*:*:/aws-glue/*" 70 | ] 71 | }, 72 | { 73 | "Effect": "Allow", 74 | "Action": [ 75 | "ec2:CreateTags", 76 | "ec2:DeleteTags" 77 | ], 78 | "Condition": { 79 | "ForAllValues:StringEquals": { 80 | "aws:TagKeys": [ 81 | "aws-glue-service-resource" 82 | ] 83 | } 84 | }, 85 | "Resource": [ 86 | "arn:${partition}:ec2:*:*:network-interface/*", 87 | "arn:${partition}:ec2:*:*:security-group/*", 88 | "arn:${partition}:ec2:*:*:instance/*" 89 | ] 90 | } 91 | ] 92 | } -------------------------------------------------------------------------------- /glue_partition.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue partition 3 | #--------------------------------------------------- 4 | resource "aws_glue_partition" "glue_partition" { 5 | count = var.enable_glue_partition ? 1 : 0 6 | 7 | database_name = var.glue_partition_database_name != "" ? var.glue_partition_database_name : (var.enable_glue_catalog_database ? aws_glue_catalog_database.glue_catalog_database[0].name : null) 8 | table_name = var.glue_partition_table_name != "" ? var.glue_partition_table_name : (var.enable_glue_catalog_table ? aws_glue_catalog_table.glue_catalog_table[0].name : null) 9 | partition_values = var.glue_partition_partition_values 10 | 11 | catalog_id = var.glue_partition_catalog_id 12 | parameters = var.glue_partition_parameters 13 | 14 | storage_descriptor { 15 | location = lookup(var.glue_partition_storage_descriptor, "location", null) 16 | input_format = lookup(var.glue_partition_storage_descriptor, "input_format", null) 17 | output_format = lookup(var.glue_partition_storage_descriptor, "output_format", null) 18 | compressed = lookup(var.glue_partition_storage_descriptor, "compressed", null) 19 | number_of_buckets = lookup(var.glue_partition_storage_descriptor, "number_of_buckets", null) 20 | bucket_columns = lookup(var.glue_partition_storage_descriptor, "bucket_columns", null) 21 | parameters = lookup(var.glue_partition_storage_descriptor, "parameters", null) 22 | stored_as_sub_directories = lookup(var.glue_partition_storage_descriptor, "stored_as_sub_directories", null) 23 | 24 | dynamic "columns" { 25 | iterator = columns 26 | for_each = lookup(var.glue_partition_storage_descriptor, "columns", []) 27 | 28 | content { 29 | name = lookup(columns.value, "name", null) 30 | 31 | type = lookup(columns.value, "type", null) 32 | comment = lookup(columns.value, "comment", null) 33 | } 34 | } 35 | 36 | dynamic "ser_de_info" { 37 | iterator = ser_de_info 38 | for_each = lookup(var.glue_partition_storage_descriptor, "ser_de_info", []) 39 | 40 | content { 41 | name = lookup(ser_de_info.value, "name", null) 42 | 43 | parameters = lookup(ser_de_info.value, "parameters", null) 44 | serialization_library = lookup(ser_de_info.value, "serialization_library", null) 45 | } 46 | } 47 | 48 | dynamic "sort_columns" { 49 | iterator = sort_columns 50 | for_each = lookup(var.glue_partition_storage_descriptor, "sort_columns", []) 51 | 52 | content { 53 | column = lookup(sort_columns.value, "column", null) 54 | sort_order = lookup(sort_columns.value, "sort_order", null) 55 | } 56 | } 57 | 58 | dynamic "skewed_info" { 59 | iterator = skewed_info 60 | for_each = lookup(var.glue_partition_storage_descriptor, "skewed_info", []) 61 | 62 | content { 63 | skewed_column_names = lookup(skewed_info.value, "skewed_column_names", null) 64 | skewed_column_value_location_maps = lookup(skewed_info.value, "skewed_column_value_location_maps", null) 65 | skewed_column_values = lookup(skewed_info.value, "skewed_column_values", null) 66 | } 67 | } 68 | } 69 | 70 | lifecycle { 71 | create_before_destroy = true 72 | ignore_changes = [] 73 | } 74 | 75 | depends_on = [ 76 | aws_glue_catalog_database.glue_catalog_database, 77 | aws_glue_catalog_table.glue_catalog_table 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /glue_trigger.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue trigger 3 | #--------------------------------------------------- 4 | resource "aws_glue_trigger" "glue_trigger" { 5 | count = var.enable_glue_trigger ? 1 : 0 6 | 7 | name = var.glue_trigger_name != "" ? lower(var.glue_trigger_name) : "${lower(var.name)}-glue-trigger-${lower(var.environment)}" 8 | type = upper(var.glue_trigger_type) 9 | 10 | description = var.glue_trigger_description 11 | enabled = var.glue_trigger_enabled 12 | schedule = var.glue_trigger_schedule 13 | workflow_name = var.glue_trigger_workflow_name != "" && !var.enable_glue_workflow ? var.glue_trigger_workflow_name : element(concat(aws_glue_workflow.glue_workflow.*.id, [""]), 0) 14 | start_on_creation = var.glue_trigger_start_on_creation 15 | dynamic "actions" { 16 | iterator = actions 17 | for_each = var.glue_trigger_actions 18 | 19 | content { 20 | arguments = lookup(actions.value, "arguments", null) 21 | # Both JobName or CrawlerName cannot be set together in an action 22 | crawler_name = lookup(actions.value, "crawler_name", (var.enable_glue_crawler && !var.enable_glue_job ? element(concat(aws_glue_crawler.glue_crawler.*.id, [""]), 0) : null)) 23 | job_name = lookup(actions.value, "job_name", (var.enable_glue_job && !var.enable_glue_crawler ? element(concat(aws_glue_job.glue_job.*.id, [""]), 0) : null)) 24 | timeout = lookup(actions.value, "timeout", null) 25 | security_configuration = lookup(actions.value, "security_configuration", null) 26 | 27 | dynamic "notification_property" { 28 | iterator = notification_property 29 | for_each = length(keys(lookup(actions.value, "notification_property", {}))) > 0 ? [lookup(actions.value, "notification_property", {})] : [] 30 | 31 | content { 32 | notify_delay_after = lookup(notification_property.value, "notify_delay_after", []) 33 | } 34 | } 35 | } 36 | } 37 | 38 | dynamic "predicate" { 39 | iterator = predicate 40 | for_each = length(keys(var.glue_trigger_predicate)) > 0 ? [var.glue_trigger_predicate] : [] 41 | 42 | content { 43 | logical = lookup(predicate.value, "logical", null) 44 | 45 | dynamic "conditions" { 46 | iterator = conditions 47 | for_each = lookup(predicate.value, "conditions", []) 48 | 49 | content { 50 | job_name = lookup(conditions.value, "job_name", null) 51 | state = lookup(conditions.value, "state", null) 52 | crawler_name = lookup(conditions.value, "crawler_name", null) 53 | crawl_state = lookup(conditions.value, "crawl_state", null) 54 | logical_operator = lookup(conditions.value, "logical_operator", null) 55 | } 56 | } 57 | } 58 | } 59 | 60 | dynamic "event_batching_condition" { 61 | iterator = event_batching_condition 62 | for_each = var.glue_trigger_event_batching_condition 63 | 64 | content { 65 | batch_size = lookup(event_batching_condition.value, "create", null) 66 | 67 | batch_window = lookup(event_batching_condition.value, "delete", null) 68 | } 69 | } 70 | 71 | dynamic "timeouts" { 72 | iterator = timeouts 73 | for_each = length(keys(var.glue_trigger_timeouts)) > 0 ? [var.glue_trigger_timeouts] : [] 74 | 75 | content { 76 | create = lookup(timeouts.value, "create", null) 77 | delete = lookup(timeouts.value, "delete", null) 78 | } 79 | } 80 | 81 | tags = merge( 82 | { 83 | Name = var.glue_trigger_name != "" ? lower(var.glue_trigger_name) : "${lower(var.name)}-glue-trigger-${lower(var.environment)}" 84 | }, 85 | var.tags 86 | ) 87 | 88 | lifecycle { 89 | create_before_destroy = true 90 | ignore_changes = [] 91 | } 92 | 93 | depends_on = [ 94 | aws_glue_workflow.glue_workflow, 95 | aws_glue_crawler.glue_crawler, 96 | aws_glue_job.glue_job 97 | ] 98 | } 99 | -------------------------------------------------------------------------------- /glue_catalog_table.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue catalog table 3 | #--------------------------------------------------- 4 | resource "aws_glue_catalog_table" "glue_catalog_table" { 5 | count = var.enable_glue_catalog_table ? 1 : 0 6 | 7 | name = var.glue_catalog_table_name != "" ? lower(var.glue_catalog_table_name) : "${lower(var.name)}-glue-catalog-table-${lower(var.environment)}" 8 | database_name = var.glue_catalog_table_database_name != "" && !var.enable_glue_catalog_database ? var.glue_catalog_table_database_name : element(concat(aws_glue_catalog_database.glue_catalog_database.*.name, [""]), 0) 9 | 10 | description = var.glue_catalog_table_description 11 | catalog_id = var.glue_catalog_table_catalog_id 12 | owner = var.glue_catalog_table_owner 13 | retention = var.glue_catalog_table_retention 14 | view_original_text = var.glue_catalog_table_view_original_text 15 | view_expanded_text = var.glue_catalog_table_view_expanded_text 16 | table_type = var.glue_catalog_table_table_type != null ? upper(var.glue_catalog_table_table_type) : var.glue_catalog_table_table_type 17 | parameters = var.glue_catalog_table_parameters 18 | 19 | dynamic "partition_index" { 20 | iterator = partition_index 21 | for_each = var.glue_catalog_table_partition_index 22 | 23 | content { 24 | index_name = lookup(partition_keys.value, "index_name", null) 25 | keys = lookup(partition_keys.value, "keys", null) 26 | } 27 | } 28 | 29 | dynamic "target_table" { 30 | iterator = partition_index 31 | for_each = var.glue_catalog_table_target_table 32 | 33 | content { 34 | catalog_id = lookup(var.partition_index, "catalog_id", null) 35 | database_name = lookup(var.partition_index, "database_name", null) 36 | name = lookup(var.partition_index, "name", null) 37 | } 38 | } 39 | 40 | dynamic "partition_keys" { 41 | iterator = partition_keys 42 | for_each = var.glue_catalog_table_partition_keys 43 | 44 | content { 45 | name = lookup(partition_keys.value, "name", null) 46 | 47 | type = lookup(partition_keys.value, "type", null) 48 | comment = lookup(partition_keys.value, "comment", null) 49 | } 50 | } 51 | 52 | dynamic "storage_descriptor" { 53 | iterator = storage_descriptor 54 | for_each = length(keys(var.glue_catalog_table_storage_descriptor)) > 0 ? [var.glue_catalog_table_storage_descriptor] : [] 55 | 56 | content { 57 | location = lookup(storage_descriptor.value, "location", null) 58 | input_format = lookup(storage_descriptor.value, "input_format", null) 59 | output_format = lookup(storage_descriptor.value, "output_format", null) 60 | compressed = lookup(storage_descriptor.value, "compressed", null) 61 | number_of_buckets = lookup(storage_descriptor.value, "number_of_buckets", null) 62 | bucket_columns = lookup(storage_descriptor.value, "bucket_columns", null) 63 | parameters = lookup(storage_descriptor.value, "parameters", null) 64 | stored_as_sub_directories = lookup(storage_descriptor.value, "stored_as_sub_directories", null) 65 | 66 | dynamic "columns" { 67 | iterator = columns 68 | for_each = lookup(storage_descriptor.value, "columns", []) 69 | 70 | content { 71 | name = lookup(columns.value, "columns_name", null) 72 | type = lookup(columns.value, "columns_type", null) 73 | comment = lookup(columns.value, "columns_comment", null) 74 | } 75 | } 76 | 77 | dynamic "ser_de_info" { 78 | iterator = ser_de_info 79 | for_each = lookup(storage_descriptor.value, "ser_de_info", []) 80 | 81 | content { 82 | name = lookup(ser_de_info.value, "ser_de_info_name", null) 83 | parameters = lookup(ser_de_info.value, "ser_de_info_parameters", null) 84 | serialization_library = lookup(ser_de_info.value, "ser_de_info_serialization_library", null) 85 | } 86 | } 87 | 88 | dynamic "sort_columns" { 89 | iterator = sort_columns 90 | for_each = lookup(storage_descriptor.value, "sort_columns", []) 91 | 92 | content { 93 | column = lookup(sort_columns.value, "sort_columns_column", null) 94 | sort_order = lookup(sort_columns.value, "sort_columns_sort_order", null) 95 | } 96 | } 97 | 98 | dynamic "skewed_info" { 99 | iterator = skewed_info 100 | for_each = lookup(storage_descriptor.value, "skewed_info", []) 101 | 102 | content { 103 | skewed_column_names = lookup(skewed_info.value, "skewed_info_skewed_column_names", null) 104 | skewed_column_value_location_maps = lookup(skewed_info.value, "skewed_info_skewed_column_value_location_maps", null) 105 | skewed_column_values = lookup(skewed_info.value, "skewed_info_skewed_column_values", null) 106 | } 107 | } 108 | } 109 | } 110 | 111 | lifecycle { 112 | create_before_destroy = true 113 | ignore_changes = [] 114 | } 115 | 116 | depends_on = [ 117 | aws_glue_catalog_database.glue_catalog_database 118 | ] 119 | } -------------------------------------------------------------------------------- /examples/templates/json/glue_user_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "glue:*", 8 | "redshift:DescribeClusters", 9 | "redshift:DescribeClusterSubnetGroups", 10 | "iam:ListRoles", 11 | "iam:ListRolePolicies", 12 | "iam:GetRole", 13 | "iam:GetRolePolicy", 14 | "iam:ListAttachedRolePolicies", 15 | "ec2:DescribeSecurityGroups", 16 | "ec2:DescribeSubnets", 17 | "ec2:DescribeVpcs", 18 | "ec2:DescribeVpcEndpoints", 19 | "ec2:DescribeRouteTables", 20 | "ec2:DescribeVpcAttribute", 21 | "ec2:DescribeKeyPairs", 22 | "ec2:DescribeInstances", 23 | "rds:DescribeDBInstances", 24 | "s3:ListAllMyBuckets", 25 | "s3:ListBucket", 26 | "s3:GetBucketAcl", 27 | "s3:GetBucketLocation", 28 | "cloudformation:DescribeStacks", 29 | "cloudformation:GetTemplateSummary", 30 | "dynamodb:ListTables", 31 | "kms:ListAliases", 32 | "kms:DescribeKey", 33 | "cloudwatch:GetMetricData", 34 | "cloudwatch:ListDashboards" 35 | ], 36 | "Resource": [ 37 | "*" 38 | ] 39 | }, 40 | { 41 | "Effect": "Allow", 42 | "Action": [ 43 | "s3:GetObject", 44 | "s3:PutObject" 45 | ], 46 | "Resource": [ 47 | "arn:${partition}:s3:::aws-glue-*/*", 48 | "arn:${partition}:s3:::*/*aws-glue-*/*", 49 | "arn:${partition}:s3:::aws-glue-*" 50 | ] 51 | }, 52 | { 53 | "Effect": "Allow", 54 | "Action": [ 55 | "tag:GetResources" 56 | ], 57 | "Resource": [ 58 | "*" 59 | ] 60 | }, 61 | { 62 | "Effect": "Allow", 63 | "Action": [ 64 | "s3:CreateBucket" 65 | ], 66 | "Resource": [ 67 | "arn:${partition}:s3:::aws-glue-*" 68 | ] 69 | }, 70 | { 71 | "Effect": "Allow", 72 | "Action": [ 73 | "logs:GetLogEvents" 74 | ], 75 | "Resource": [ 76 | "arn:${partition}:logs:*:*:/aws-glue/*" 77 | ] 78 | }, 79 | { 80 | "Effect": "Allow", 81 | "Action": [ 82 | "cloudformation:CreateStack", 83 | "cloudformation:DeleteStack" 84 | ], 85 | "Resource": "arn:${partition}:cloudformation:*:*:stack/aws-glue*/*" 86 | }, 87 | { 88 | "Effect": "Allow", 89 | "Action": [ 90 | "ec2:RunInstances" 91 | ], 92 | "Resource": [ 93 | "arn:${partition}:ec2:*:*:instance/*", 94 | "arn:${partition}:ec2:*:*:key-pair/*", 95 | "arn:${partition}:ec2:*:*:image/*", 96 | "arn:${partition}:ec2:*:*:security-group/*", 97 | "arn:${partition}:ec2:*:*:network-interface/*", 98 | "arn:${partition}:ec2:*:*:subnet/*", 99 | "arn:${partition}:ec2:*:*:volume/*" 100 | ] 101 | }, 102 | { 103 | "Effect": "Allow", 104 | "Action": [ 105 | "ec2:TerminateInstances", 106 | "ec2:CreateTags", 107 | "ec2:DeleteTags" 108 | ], 109 | "Resource": [ 110 | "arn:${partition}:ec2:*:*:instance/*" 111 | ], 112 | "Condition": { 113 | "StringLike": { 114 | "ec2:ResourceTag/aws:cloudformation:stack-id": "arn:${partition}:cloudformation:*:*:stack/aws-glue-*/*" 115 | }, 116 | "StringEquals": { 117 | "ec2:ResourceTag/aws:cloudformation:logical-id": "ZeppelinInstance" 118 | } 119 | } 120 | }, 121 | { 122 | "Action": [ 123 | "iam:PassRole" 124 | ], 125 | "Effect": "Allow", 126 | "Resource": "arn:${partition}:iam::*:role/AWSGlueServiceRole*", 127 | "Condition": { 128 | "StringLike": { 129 | "iam:PassedToService": [ 130 | "glue.amazonaws.com" 131 | ] 132 | } 133 | } 134 | }, 135 | { 136 | "Action": [ 137 | "iam:PassRole" 138 | ], 139 | "Effect": "Allow", 140 | "Resource": "arn:${partition}:iam::*:role/AWSGlueServiceNotebookRole*", 141 | "Condition": { 142 | "StringLike": { 143 | "iam:PassedToService": [ 144 | "ec2.amazonaws.com" 145 | ] 146 | } 147 | } 148 | }, 149 | { 150 | "Action": [ 151 | "iam:PassRole" 152 | ], 153 | "Effect": "Allow", 154 | "Resource": [ 155 | "arn:${partition}:iam::*:role/service-role/AWSGlueServiceRole*" 156 | ], 157 | "Condition": { 158 | "StringLike": { 159 | "iam:PassedToService": [ 160 | "glue.amazonaws.com" 161 | ] 162 | } 163 | } 164 | } 165 | ] 166 | } -------------------------------------------------------------------------------- /glue_crawler.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue crawler 3 | #--------------------------------------------------- 4 | resource "aws_glue_crawler" "glue_crawler" { 5 | count = var.enable_glue_crawler ? 1 : 0 6 | 7 | name = var.glue_crawler_name != "" ? lower(var.glue_crawler_name) : "${lower(var.name)}-glue-crawler-${lower(var.environment)}" 8 | database_name = var.glue_crawler_database_name != "" && !var.enable_glue_catalog_database ? var.glue_crawler_database_name : element(concat(aws_glue_catalog_database.glue_catalog_database.*.name, [""]), 0) 9 | role = var.glue_crawler_role 10 | 11 | description = var.glue_crawler_description 12 | classifiers = var.glue_crawler_classifiers 13 | configuration = var.glue_crawler_configuration 14 | schedule = var.glue_crawler_schedule 15 | security_configuration = var.glue_crawler_security_configuration != "" && !var.enable_glue_security_configuration ? var.glue_crawler_security_configuration : element(concat(aws_glue_security_configuration.glue_security_configuration.*.id, [""]), 0) 16 | table_prefix = var.glue_crawler_table_prefix 17 | 18 | dynamic "dynamodb_target" { 19 | iterator = dynamodb_target 20 | for_each = var.glue_crawler_dynamodb_target 21 | 22 | content { 23 | path = lookup(dynamodb_target.value, "path", null) 24 | 25 | scan_all = lookup(dynamodb_target.value, "scan_all", null) 26 | scan_rate = lookup(dynamodb_target.value, "scan_rate", null) 27 | } 28 | } 29 | 30 | dynamic "jdbc_target" { 31 | iterator = jdbc_target 32 | for_each = var.glue_crawler_jdbc_target 33 | 34 | content { 35 | connection_name = lookup(jdbc_target.value, "connection_name", null) 36 | path = lookup(jdbc_target.value, "path", null) 37 | 38 | exclusions = lookup(jdbc_target.value, "exclusions", null) 39 | enable_additional_metadata = lookup(jdbc_target.value, "enable_additional_metadata", null) 40 | } 41 | } 42 | 43 | dynamic "s3_target" { 44 | iterator = s3_target 45 | for_each = var.glue_crawler_s3_target 46 | 47 | content { 48 | path = lookup(s3_target.value, "path", null) 49 | 50 | connection_name = lookup(s3_target.value, "connection_name", null) 51 | exclusions = lookup(s3_target.value, "exclusions", null) 52 | sample_size = lookup(s3_target.value, "sample_size", null) 53 | event_queue_arn = lookup(s3_target.value, "event_queue_arn", null) 54 | dlq_event_queue_arn = lookup(s3_target.value, "dlq_event_queue_arn", null) 55 | } 56 | } 57 | 58 | dynamic "delta_target" { 59 | iterator = delta_target 60 | for_each = var.glue_crawler_delta_target 61 | 62 | content { 63 | delta_tables = lookup(delta_target.value, "delta_tables", null) 64 | write_manifest = lookup(delta_target.value, "write_manifest", null) 65 | 66 | connection_name = lookup(delta_target.value, "connection_name", null) 67 | create_native_delta_table = lookup(delta_target.value, "create_native_delta_table", null) 68 | } 69 | } 70 | 71 | dynamic "iceberg_target" { 72 | iterator = iceberg_target 73 | for_each = var.glue_crawler_iceberg_target 74 | 75 | content { 76 | paths = lookup(iceberg_target.value, "paths", null) 77 | maximum_traversal_depth = lookup(iceberg_target.value, "maximum_traversal_depth", null) 78 | 79 | connection_name = lookup(iceberg_target.value, "connection_name", null) 80 | exclusions = lookup(iceberg_target.value, "exclusions", null) 81 | } 82 | } 83 | 84 | dynamic "lake_formation_configuration" { 85 | iterator = lake_formation_configuration 86 | for_each = var.glue_crawler_lake_formation_configuration 87 | 88 | content { 89 | account_id = lookup(lake_formation_configuration.value, "account_id", null) 90 | use_lake_formation_credentials = lookup(lake_formation_configuration.value, "use_lake_formation_credentials", null) 91 | } 92 | } 93 | 94 | dynamic "catalog_target" { 95 | iterator = catalog_target 96 | for_each = length(var.glue_crawler_catalog_target) > 0 ? [var.glue_crawler_catalog_target] : [] 97 | 98 | content { 99 | database_name = lookup(catalog_target.value, "database_name", (var.enable_glue_catalog_database ? element(concat(aws_glue_catalog_database.glue_catalog_database.*.id, [""]), 0) : null)) 100 | tables = lookup(catalog_target.value, "tables", (var.enable_glue_catalog_table ? element(concat(aws_glue_catalog_table.glue_catalog_table.*.id, [""]), 0) : null)) 101 | 102 | connection_name = lookup(catalog_target.value, "connection_name", null) 103 | event_queue_arn = lookup(catalog_target.value, "event_queue_arn", null) 104 | dlq_event_queue_arn = lookup(catalog_target.value, "dlq_event_queue_arn", null) 105 | } 106 | } 107 | 108 | dynamic "schema_change_policy" { 109 | iterator = schema_change_policy 110 | for_each = var.glue_crawler_schema_change_policy 111 | 112 | content { 113 | delete_behavior = lookup(schema_change_policy.value, "delete_behavior", null) 114 | update_behavior = lookup(schema_change_policy.value, "update_behavior", null) 115 | } 116 | } 117 | 118 | dynamic "mongodb_target" { 119 | iterator = mongodb_target 120 | for_each = var.glue_crawler_mongodb_target 121 | 122 | content { 123 | connection_name = lookup(mongodb_target.value, "connection_name", null) 124 | 125 | path = lookup(mongodb_target.value, "path", null) 126 | scan_all = lookup(mongodb_target.value, "scan_all", null) 127 | } 128 | } 129 | 130 | dynamic "lineage_configuration" { 131 | iterator = lineage_configuration 132 | for_each = var.glue_crawler_lineage_configuration 133 | 134 | content { 135 | crawler_lineage_settings = lookup(lineage_configuration.value, "crawler_lineage_settings", null) 136 | } 137 | } 138 | 139 | dynamic "recrawl_policy" { 140 | iterator = recrawl_policy 141 | for_each = var.glue_crawler_recrawl_policy 142 | 143 | content { 144 | recrawl_behavior = lookup(recrawl_policy.value, "recrawl_behavior", null) 145 | } 146 | } 147 | 148 | tags = merge( 149 | { 150 | Name = var.glue_crawler_name != "" ? lower(var.glue_crawler_name) : "${lower(var.name)}-glue-crawler-${lower(var.environment)}" 151 | }, 152 | var.tags 153 | ) 154 | 155 | lifecycle { 156 | create_before_destroy = true 157 | ignore_changes = [] 158 | } 159 | 160 | depends_on = [ 161 | aws_glue_catalog_database.glue_catalog_database, 162 | aws_glue_security_configuration.glue_security_configuration, 163 | aws_glue_catalog_table.glue_catalog_table 164 | ] 165 | } 166 | -------------------------------------------------------------------------------- /examples/main.tf: -------------------------------------------------------------------------------- 1 | # 2 | # MAINTAINER Vitaliy Natarov "vitaliy.natarov@yahoo.com" 3 | # 4 | terraform { 5 | required_version = "~> 1.0" 6 | } 7 | 8 | provider "aws" { 9 | region = "us-east-1" 10 | shared_credentials_files = [pathexpand("~/.aws/credentials")] 11 | } 12 | 13 | 14 | module "aws_user_tags" { 15 | source = "./aws_user_tags" 16 | 17 | environment = "DEV" 18 | custom_tags = { 19 | "CreatedBy" = "Vitalii Natarov" 20 | "Helping" = "Gregory Mirsky" 21 | } 22 | } 23 | 24 | # Create example glue service policy from template 25 | module "iam_glue_example_service_policy" { 26 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/iam_policy?ref=master" 27 | name = "TEST-iam-policy" 28 | environment = "stage" 29 | 30 | # Using IAM policy 31 | enable_iam_policy = true 32 | iam_policy_name = "glue_example_service_policy" 33 | iam_policy_description = "Example Glue Service Policy" 34 | iam_policy_path = "/" 35 | iam_policy_policy = templatefile( 36 | "./templates/json/glue_service_policy.json", 37 | { 38 | partition = data.aws_partition.current.partition 39 | } 40 | ) 41 | } 42 | 43 | # Create example glue user policy from template 44 | module "iam_glue_example_user_policy" { 45 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/iam_policy?ref=master" 46 | name = "TEST-iam-policy" 47 | environment = "stage" 48 | 49 | # Using IAM policy 50 | enable_iam_policy = true 51 | iam_policy_name = "glue_example_user_policy" 52 | iam_policy_description = "Example Glue User Policy" 53 | iam_policy_path = "/" 54 | iam_policy_policy = templatefile( 55 | "./templates/json/glue_user_policy.json", 56 | { 57 | partition = data.aws_partition.current.partition 58 | } 59 | ) 60 | } 61 | 62 | module "glue_example_admin_role" { 63 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/iam_role?ref=master" 64 | name = "test" 65 | environment = "DEV" 66 | 67 | # Using IAM role 68 | enable_iam_role = true 69 | iam_role_name = "glue_example_role" 70 | iam_role_description = "glue example role" 71 | 72 | iam_role_assume_role_policy = jsonencode( 73 | { 74 | "Version" : "2012-10-17", 75 | "Statement" : [ 76 | { 77 | "Effect" : "Allow", 78 | "Principal" : { 79 | "Service" : "glue.amazonaws.com" 80 | }, 81 | "Action" : "sts:AssumeRole" 82 | } 83 | ] 84 | } 85 | ) 86 | 87 | iam_role_force_detach_policies = true 88 | iam_role_path = "/" 89 | iam_role_max_session_duration = 3600 90 | 91 | # Using IAM role policy 92 | // enable_iam_role_policy = true 93 | // iam_role_policy_name = "my-iam-role-policy-for-testing-terraform" 94 | // iam_role_policy = file("templates/iam_role_policy.json") 95 | 96 | # Using IAM role policy attachment 97 | enable_iam_role_policy_attachment = true 98 | iam_role_policy_attachment_policy_arns = [ 99 | // Attach AWS managed policy: AWSCloudFormationReadOnlyAccess 100 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSCloudFormationReadOnlyAccess", 101 | // Attach AWS managed policy: AWSGlueConsoleFullAccess 102 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSGlueConsoleFullAccess", 103 | // Attach AWS managed policy: AWSGlueConsoleSageMakerNotebookFullAccess 104 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSGlueConsoleSageMakerNotebookFullAccess", 105 | // Attach AWS managed policy: AWSGlueSchemaRegistryFullAccess 106 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSGlueSchemaRegistryFullAccess", 107 | // Attach AWS managed policy: AmazonAthenaFullAccess 108 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AmazonAthenaFullAccess", 109 | // Attach AWS managed policy: AmazonS3FullAccess 110 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AmazonS3FullAccess", 111 | // Attach AWS managed policy: CloudWatchLogsReadOnlyAccess 112 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/CloudWatchLogsReadOnlyAccess" 113 | ] 114 | } 115 | 116 | module "s3_private_glue_catalog" { 117 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/s3?ref=master" 118 | name = "test" 119 | environment = "DEV" 120 | 121 | # AWS S3 bucket 122 | enable_s3_bucket = true 123 | s3_bucket_name = "glue-catalog-${data.aws_caller_identity.current.account_id}" 124 | s3_bucket_acl_acl = "private" 125 | 126 | # Create test folder in the bucket 127 | enable_s3_object = true 128 | s3_object_stack = [ 129 | { 130 | key = "/catalog" 131 | } 132 | ] 133 | 134 | tags = merge( 135 | module.aws_user_tags.tags, 136 | var.example_tags 137 | ) 138 | } 139 | 140 | # Create glue crawler bucket (account_id ensures unique name across accounts) 141 | module "s3_private_glue_crawler" { 142 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/s3?ref=master" 143 | name = "test" 144 | environment = "DEV" 145 | 146 | # AWS S3 bucket 147 | enable_s3_bucket = true 148 | s3_bucket_name = "glue-crawler-${data.aws_caller_identity.current.account_id}" 149 | s3_bucket_acl_acl = "private" 150 | 151 | # Create crawler folder in the bucket 152 | enable_s3_object = true 153 | s3_object_stack = [ 154 | { 155 | key = "/crawler" 156 | } 157 | ] 158 | 159 | tags = merge( 160 | module.aws_user_tags.tags, 161 | var.example_tags 162 | ) 163 | } 164 | 165 | # Create Glue job bucket (account_id ensures unique name across accounts) 166 | module "s3_private_glue_jobs" { 167 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/s3?ref=master" 168 | name = "test" 169 | environment = "DEV" 170 | 171 | # AWS S3 bucket 172 | enable_s3_bucket = true 173 | s3_bucket_name = "glue-jobs-${data.aws_caller_identity.current.account_id}" 174 | s3_bucket_acl_acl = "private" 175 | 176 | # Create crawler folder in the bucket 177 | enable_s3_object = true 178 | s3_object_stack = [ 179 | { 180 | key = "/jobs" 181 | } 182 | ] 183 | 184 | tags = merge( 185 | module.aws_user_tags.tags, 186 | var.example_tags 187 | ) 188 | } 189 | 190 | # Generation random password 191 | module "random_glue_connection_password" { 192 | source = "git@github.com:SebastianUA/terraform.git//random/modules/random?ref=master" 193 | 194 | # Generate random password 195 | enable_password = true 196 | password_length = 13 197 | password_special = true 198 | password_override_special = "_%@" 199 | } 200 | 201 | # terraform-aws-glue module 202 | module "glue" { 203 | source = "../" 204 | name = "TEST" 205 | environment = "STAGE" 206 | 207 | # AWS Glue catalog DB 208 | enable_glue_catalog_database = true 209 | glue_catalog_database_name = "test-glue-db-${data.aws_caller_identity.current.account_id}" 210 | glue_catalog_database_parameters = null 211 | # AWS Glue catalog table 212 | enable_glue_catalog_table = true 213 | glue_catalog_table_name = "test-glue-table-${data.aws_caller_identity.current.account_id}" 214 | glue_catalog_table_description = "Those resources are managed by Terraform. Created by Vitaliy Natarov" 215 | glue_catalog_table_table_type = "EXTERNAL_TABLE" 216 | glue_catalog_table_parameters = { 217 | "sizeKey" = 493378 218 | "tmp" = "none" 219 | "test" = "yes" 220 | "classification" = "csv" 221 | } 222 | glue_catalog_table_storage_descriptor = { 223 | location = "s3://${module.s3_private_glue_catalog.s3_bucket_id}/test" 224 | input_format = "org.apache.hadoop.mapred.TextInputFormat" 225 | output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" 226 | 227 | columns = [ 228 | { 229 | columns_name = "oid" 230 | columns_type = "double" 231 | columns_comment = "oid" 232 | }, 233 | { 234 | columns_name = "oid2" 235 | columns_type = "double" 236 | columns_comment = "oid2" 237 | }, 238 | { 239 | columns_name = "oid3" 240 | columns_type = "double" 241 | columns_comment = "oid3" 242 | }, 243 | ] 244 | 245 | ser_de_info = [ 246 | { 247 | ser_de_info_name = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 248 | ser_de_info_serialization_library = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 249 | ser_de_info_parameters = tomap({ "field.delim" = "," }) 250 | } 251 | ] 252 | 253 | skewed_info = [ 254 | { 255 | ser_de_info_name = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 256 | ser_de_info_serialization_library = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 257 | ser_de_info_parameters = tomap({ "field.delim" = "," }) 258 | } 259 | ] 260 | 261 | sort_columns = [] 262 | } 263 | 264 | 265 | # AWS Glue connection 266 | enable_glue_connection = true 267 | glue_connection_connection_properties = { 268 | JDBC_CONNECTION_URL = "jdbc:mysql://aws_rds_cluster.example.endpoint/exampledatabase" 269 | PASSWORD = module.random_glue_connection_password.password_result 270 | USERNAME = var.glue_connection_user_name 271 | } 272 | ##glue_connection_physical_connection_requirements = [{ 273 | # availability_zone = "zone_here" 274 | # security_group_id_list = [] 275 | # subnet_id = "subnet_here" 276 | #}] 277 | enable_glue_crawler = true 278 | glue_crawler_name = "" 279 | glue_crawler_role = module.glue_example_admin_role.iam_role_arn 280 | enable_glue_security_configuration = false 281 | glue_security_configuration_name = "" 282 | glue_crawler_s3_target = [ 283 | { 284 | path = "s3://${module.s3_private_glue_crawler.s3_bucket_id}/crawler" 285 | exclusions = [] 286 | } 287 | ] 288 | 289 | enable_glue_job = true 290 | glue_job_name = "" 291 | glue_job_role_arn = module.glue_example_admin_role.iam_role_arn 292 | glue_job_additional_connections = [] 293 | glue_job_execution_property = [ 294 | { 295 | max_concurrent_runs = 2 296 | } 297 | ] 298 | glue_job_command = [ 299 | { 300 | script_location = "s3//${module.s3_private_glue_jobs.s3_bucket_id}/jobs" 301 | name = "jobs" 302 | } 303 | ] 304 | tags = merge( 305 | module.aws_user_tags.tags, 306 | tomap({ 307 | "cost-center" = "00-00000.000.01", 308 | "Project" = "My Test Glue Project" 309 | }) 310 | ) 311 | 312 | depends_on = [ 313 | module.aws_user_tags 314 | ] 315 | } 316 | 317 | module "glue_trigger" { 318 | source = "../" 319 | name = "TEST" 320 | environment = "STAGE" 321 | 322 | enable_glue_trigger = true 323 | glue_trigger_name = "" 324 | glue_trigger_actions = [ 325 | { 326 | # Both JobName or CrawlerName cannot be set together in an action 327 | crawler_name = module.glue.glue_crawler_id # null 328 | job_name = null # module.glue.glue_job_id 329 | arguments = null 330 | timeout = null 331 | } 332 | ] 333 | 334 | glue_trigger_predicate = { 335 | logical = "AND" 336 | conditions = [ 337 | { 338 | job_name = "job-name-1" 339 | state = null 340 | crawler_name = null 341 | crawl_state = null 342 | logical_operator = "EQUALS" 343 | }, 344 | { 345 | job_name = "job-name-2" 346 | state = null 347 | crawler_name = null 348 | crawl_state = null 349 | logical_operator = "EQUALS" 350 | } 351 | ] 352 | } 353 | 354 | tags = merge( 355 | module.aws_user_tags.tags, 356 | tomap({ 357 | "cost-center" = "00-00000.000.01", 358 | "Project" = "My Test Glue Project" 359 | }) 360 | ) 361 | 362 | depends_on = [ 363 | module.glue, 364 | module.aws_user_tags 365 | ] 366 | } -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # AWS Glue catalog database 3 | #--------------------------------------------------- 4 | output "glue_catalog_database_arn" { 5 | description = "ARN for glue catalog database" 6 | value = element(concat(aws_glue_catalog_database.glue_catalog_database.*.arn, [""]), 0) 7 | } 8 | 9 | output "glue_catalog_database_id" { 10 | description = "ID for glue catalog database" 11 | value = element(concat(aws_glue_catalog_database.glue_catalog_database.*.id, [""]), 0) 12 | } 13 | 14 | output "glue_catalog_database_name" { 15 | description = "Name for glue catalog database" 16 | value = element(concat(aws_glue_catalog_database.glue_catalog_database.*.name, [""]), 0) 17 | } 18 | 19 | #--------------------------------------------------- 20 | # AWS Glue catalog table 21 | #--------------------------------------------------- 22 | output "glue_catalog_table_arn" { 23 | description = "ARN for glue catalog table" 24 | value = element(concat(aws_glue_catalog_table.glue_catalog_table.*.arn, [""]), 0) 25 | } 26 | 27 | output "glue_catalog_table_id" { 28 | description = "ID for glue catalog table" 29 | value = element(concat(aws_glue_catalog_table.glue_catalog_table.*.id, [""]), 0) 30 | } 31 | 32 | output "glue_catalog_table_name" { 33 | description = "Name for glue catalog table" 34 | value = element(concat(aws_glue_catalog_table.glue_catalog_table.*.name, [""]), 0) 35 | } 36 | 37 | #--------------------------------------------------- 38 | # AWS Glue classifier 39 | #--------------------------------------------------- 40 | output "glue_classifier_id" { 41 | description = "Name of the classifier" 42 | value = element(concat(aws_glue_classifier.glue_classifier.*.id, [""]), 0) 43 | } 44 | 45 | #--------------------------------------------------- 46 | # AWS Glue connection 47 | #--------------------------------------------------- 48 | output "glue_connection_id" { 49 | description = "Catalog ID and name of the connection" 50 | value = element(concat(aws_glue_connection.glue_connection.*.id, [""]), 0) 51 | } 52 | 53 | #--------------------------------------------------- 54 | # AWS Glue crawler 55 | #--------------------------------------------------- 56 | output "glue_crawler_id" { 57 | description = "Crawler name" 58 | value = element(concat(aws_glue_crawler.glue_crawler.*.id, [""]), 0) 59 | } 60 | 61 | output "glue_crawler_arn" { 62 | description = "The ARN of the crawler" 63 | value = element(concat(aws_glue_crawler.glue_crawler.*.arn, [""]), 0) 64 | } 65 | 66 | #--------------------------------------------------- 67 | # AWS glue security configuration 68 | #--------------------------------------------------- 69 | output "glue_security_configuration_id" { 70 | description = "Glue security configuration name" 71 | value = element(concat(aws_glue_security_configuration.glue_security_configuration.*.id, [""]), 0) 72 | } 73 | 74 | #--------------------------------------------------- 75 | # AWS Glue workflow 76 | #--------------------------------------------------- 77 | output "glue_workflow_id" { 78 | description = "Glue workflow name" 79 | value = element(concat(aws_glue_workflow.glue_workflow.*.id, [""]), 0) 80 | } 81 | 82 | #--------------------------------------------------- 83 | # AWS Glue job 84 | #--------------------------------------------------- 85 | output "glue_job_id" { 86 | description = "Glue job name" 87 | value = element(concat(aws_glue_job.glue_job.*.id, [""]), 0) 88 | } 89 | 90 | output "glue_job_arn" { 91 | description = "Amazon Resource Name (ARN) of Glue Job" 92 | value = element(concat(aws_glue_job.glue_job.*.arn, [""]), 0) 93 | } 94 | 95 | #--------------------------------------------------- 96 | # AWS Glue trigger 97 | #--------------------------------------------------- 98 | output "glue_trigger_id" { 99 | description = "Trigger name" 100 | value = element(concat(aws_glue_trigger.glue_trigger.*.id, [""]), 0) 101 | } 102 | 103 | output "glue_trigger_arn" { 104 | description = "Amazon Resource Name (ARN) of Glue Trigger" 105 | value = element(concat(aws_glue_trigger.glue_trigger.*.arn, [""]), 0) 106 | } 107 | 108 | #--------------------------------------------------- 109 | # AWS Glue data catalog encryption settings 110 | #--------------------------------------------------- 111 | output "glue_data_catalog_encryption_settings_id" { 112 | description = "The ID of the Data Catalog to set the security configuration for." 113 | value = element(concat(aws_glue_data_catalog_encryption_settings.glue_data_catalog_encryption_settings.*.id, [""]), 0) 114 | } 115 | 116 | #--------------------------------------------------- 117 | # AWS Glue dev endpoint 118 | #--------------------------------------------------- 119 | output "glue_dev_endpoint_id" { 120 | description = "The ID of the endpoint." 121 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.id, [""]), 0) 122 | } 123 | 124 | output "glue_dev_endpoint_arn" { 125 | description = "The ARN of the endpoint." 126 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.arn, [""]), 0) 127 | } 128 | 129 | output "glue_dev_endpoint_name" { 130 | description = "The name of the new endpoint." 131 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.name, [""]), 0) 132 | } 133 | 134 | output "glue_dev_endpoint_private_address" { 135 | description = "A private IP address to access the endpoint within a VPC, if this endpoint is created within one." 136 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.private_address, [""]), 0) 137 | } 138 | 139 | output "glue_dev_endpoint_public_address" { 140 | description = "The public IP address used by this endpoint. The PublicAddress field is present only when you create a non-VPC endpoint." 141 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.public_address, [""]), 0) 142 | } 143 | 144 | output "glue_dev_endpoint_yarn_endpoint_address" { 145 | description = "The YARN endpoint address used by this endpoint." 146 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.yarn_endpoint_address, [""]), 0) 147 | } 148 | 149 | output "glue_dev_endpoint_zeppelin_remote_spark_interpreter_port" { 150 | description = "The Apache Zeppelin port for the remote Apache Spark interpreter." 151 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.zeppelin_remote_spark_interpreter_port, [""]), 0) 152 | } 153 | 154 | output "glue_dev_endpoint_availability_zone" { 155 | description = "The AWS availability zone where this endpoint is located." 156 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.availability_zone, [""]), 0) 157 | } 158 | 159 | output "glue_dev_endpoint_vpc_id" { 160 | description = "The ID of the VPC used by this endpoint." 161 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.vpc_id, [""]), 0) 162 | } 163 | 164 | output "glue_dev_endpoint_status" { 165 | description = "The current status of this endpoint." 166 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.status, [""]), 0) 167 | } 168 | 169 | output "glue_dev_endpoint_failure_reason" { 170 | description = "The reason for a current failure in this endpoint." 171 | value = element(concat(aws_glue_dev_endpoint.glue_dev_endpoint.*.failure_reason, [""]), 0) 172 | } 173 | 174 | #--------------------------------------------------- 175 | # AWS Glue ml transform 176 | #--------------------------------------------------- 177 | output "glue_ml_transform_id" { 178 | description = "Glue ML Transform ID." 179 | value = element(concat(aws_glue_ml_transform.glue_ml_transform.*.id, [""]), 0) 180 | } 181 | 182 | output "glue_ml_transform_arn" { 183 | description = "Amazon Resource Name (ARN) of Glue ML Transform." 184 | value = element(concat(aws_glue_ml_transform.glue_ml_transform.*.arn, [""]), 0) 185 | } 186 | 187 | output "glue_ml_transform_label_count" { 188 | description = "The number of labels available for this transform." 189 | value = element(concat(aws_glue_ml_transform.glue_ml_transform.*.label_count, [""]), 0) 190 | } 191 | 192 | output "glue_ml_transform_schema" { 193 | description = "The object that represents the schema that this transform accepts. see Schema." 194 | value = element(concat(aws_glue_ml_transform.glue_ml_transform.*.schema, [""]), 0) 195 | } 196 | 197 | #--------------------------------------------------- 198 | # AWS Glue partition 199 | #--------------------------------------------------- 200 | output "glue_partition_id" { 201 | description = "partition id." 202 | value = element(concat(aws_glue_partition.glue_partition.*.id, [""]), 0) 203 | } 204 | 205 | output "glue_partition_creation_time" { 206 | description = "The time at which the partition was created." 207 | value = element(concat(aws_glue_partition.glue_partition.*.creation_time, [""]), 0) 208 | } 209 | 210 | output "glue_partition_last_analyzed_time" { 211 | description = "The last time at which column statistics were computed for this partition." 212 | value = element(concat(aws_glue_partition.glue_partition.*.last_analyzed_time, [""]), 0) 213 | } 214 | 215 | output "glue_partition_last_accessed_time" { 216 | description = "The last time at which the partition was accessed." 217 | value = element(concat(aws_glue_partition.glue_partition.*.last_accessed_time, [""]), 0) 218 | } 219 | 220 | #--------------------------------------------------- 221 | # AWS Glue registry 222 | #--------------------------------------------------- 223 | output "glue_registry_id" { 224 | description = "Amazon Resource Name (ARN) of Glue Registry." 225 | value = element(concat(aws_glue_registry.glue_registry.*.id, [""]), 0) 226 | } 227 | 228 | output "glue_registry_arn" { 229 | description = "Amazon Resource Name (ARN) of Glue Registry." 230 | value = element(concat(aws_glue_registry.glue_registry.*.arn, [""]), 0) 231 | } 232 | 233 | #--------------------------------------------------- 234 | # AWS Glue resource policy 235 | #--------------------------------------------------- 236 | output "glue_resource_policy_id" { 237 | description = "The ID of Glue resource policy." 238 | value = element(concat(aws_glue_resource_policy.glue_resource_policy.*.id, [""]), 0) 239 | } 240 | 241 | #--------------------------------------------------- 242 | # AWS Glue schema 243 | #--------------------------------------------------- 244 | output "glue_schema_id" { 245 | description = "Amazon Resource Name (ARN) of the schema." 246 | value = element(concat(aws_glue_schema.glue_schema.*.id, [""]), 0) 247 | } 248 | 249 | output "glue_schema_arn" { 250 | description = "Amazon Resource Name (ARN) of the schema." 251 | value = element(concat(aws_glue_schema.glue_schema.*.arn, [""]), 0) 252 | } 253 | 254 | output "glue_schema_registry_name" { 255 | description = "The name of the Glue Registry." 256 | value = element(concat(aws_glue_schema.glue_schema.*.registry_name, [""]), 0) 257 | } 258 | 259 | output "glue_schema_latest_schema_version" { 260 | description = "The latest version of the schema associated with the returned schema definition." 261 | value = element(concat(aws_glue_schema.glue_schema.*.latest_schema_version, [""]), 0) 262 | } 263 | 264 | output "glue_schema_next_schema_version" { 265 | description = "The next version of the schema associated with the returned schema definition." 266 | value = element(concat(aws_glue_schema.glue_schema.*.next_schema_version, [""]), 0) 267 | } 268 | 269 | output "glue_schema_schema_checkpoint" { 270 | description = "The version number of the checkpoint (the last time the compatibility mode was changed)." 271 | value = element(concat(aws_glue_schema.glue_schema.*.schema_checkpoint, [""]), 0) 272 | } 273 | 274 | #--------------------------------------------------- 275 | # AWS Glue user defined function 276 | #--------------------------------------------------- 277 | output "glue_user_defined_function_id" { 278 | description = "The id of the Glue User Defined Function." 279 | value = element(concat(aws_glue_user_defined_function.glue_user_defined_function.*.id, [""]), 0) 280 | } 281 | 282 | output "glue_user_defined_function_arn" { 283 | description = "The ARN of the Glue User Defined Function." 284 | value = element(concat(aws_glue_user_defined_function.glue_user_defined_function.*.arn, [""]), 0) 285 | } 286 | 287 | output "glue_user_defined_function_create_time" { 288 | description = "The time at which the function was created." 289 | value = element(concat(aws_glue_user_defined_function.glue_user_defined_function.*.create_time, [""]), 0) 290 | } 291 | 292 | #--------------------------------------------------- 293 | # AWS Glue partition index 294 | #--------------------------------------------------- 295 | output "glue_partition_index_id" { 296 | description = "Catalog ID, Database name, table name, and index name." 297 | value = element(concat(aws_glue_partition_index.glue_partition_index.*.id, [""]), 0) 298 | } 299 | 300 | #--------------------------------------------------- 301 | # AWS Glue data quality ruleset 302 | #--------------------------------------------------- 303 | output "glue_data_quality_ruleset_id" { 304 | description = "ID of the Glue Data Quality Ruleset." 305 | value = element(concat(aws_glue_data_quality_ruleset.glue_data_quality_ruleset.*.id, [""]), 0) 306 | } 307 | 308 | output "glue_data_quality_ruleset_arn" { 309 | description = "ARN of the Glue Data Quality Ruleset." 310 | value = element(concat(aws_glue_data_quality_ruleset.glue_data_quality_ruleset.*.arn, [""]), 0) 311 | } 312 | 313 | output "glue_data_quality_ruleset_created_on" { 314 | description = "The time and date that this data quality ruleset was created." 315 | value = element(concat(aws_glue_data_quality_ruleset.glue_data_quality_ruleset.*.created_on, [""]), 0) 316 | } 317 | 318 | output "glue_data_quality_ruleset_last_modified_on" { 319 | description = "The time and date that this data quality ruleset was created." 320 | value = element(concat(aws_glue_data_quality_ruleset.glue_data_quality_ruleset.*.last_modified_on, [""]), 0) 321 | } 322 | 323 | output "glue_data_quality_ruleset_recommendation_run_id" { 324 | description = "When a ruleset was created from a recommendation run, this run ID is generated to link the two together." 325 | value = element(concat(aws_glue_data_quality_ruleset.glue_data_quality_ruleset.*.recommendation_run_id, [""]), 0) 326 | } 327 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------- 2 | # Global or/and default variables 3 | #----------------------------------------------------------- 4 | variable "name" { 5 | description = "Name to be used on all resources as prefix" 6 | default = "TEST" 7 | } 8 | 9 | variable "environment" { 10 | description = "Environment for service" 11 | default = "STAGE" 12 | } 13 | 14 | variable "tags" { 15 | description = "A list of tag blocks. Each element should have keys named key, value, etc." 16 | type = map(string) 17 | default = {} 18 | } 19 | 20 | #--------------------------------------------------- 21 | # AWS Glue catalog database 22 | #--------------------------------------------------- 23 | variable "enable_glue_catalog_database" { 24 | description = "Enable glue catalog database usage" 25 | default = false 26 | } 27 | 28 | variable "glue_catalog_database_name" { 29 | description = "The name of the database." 30 | default = "" 31 | } 32 | 33 | variable "glue_catalog_database_description" { 34 | description = "(Optional) Description of the database." 35 | default = null 36 | } 37 | 38 | variable "glue_catalog_database_catalog_id" { 39 | description = "(Optional) ID of the Glue Catalog to create the database in. If omitted, this defaults to the AWS Account ID." 40 | default = null 41 | } 42 | 43 | variable "glue_catalog_database_location_uri" { 44 | description = "(Optional) The location of the database (for example, an HDFS path)." 45 | default = null 46 | } 47 | 48 | variable "glue_catalog_database_parameters" { 49 | description = "(Optional) A list of key-value pairs that define parameters and properties of the database." 50 | default = null 51 | } 52 | 53 | variable "glue_catalog_database_create_table_default_permission" { 54 | description = "(Optional) Creates a set of default permissions on the table for principals." 55 | default = {} 56 | } 57 | 58 | variable "glue_catalog_database_target_database" { 59 | description = "(Optional) Configuration block for a target database for resource linking." 60 | default = {} 61 | } 62 | 63 | #--------------------------------------------------- 64 | # AWS Glue catalog table 65 | #--------------------------------------------------- 66 | variable "enable_glue_catalog_table" { 67 | description = "Enable glue catalog table usage" 68 | default = false 69 | } 70 | 71 | variable "glue_catalog_table_name" { 72 | description = "Name of the table. For Hive compatibility, this must be entirely lowercase." 73 | default = "" 74 | } 75 | 76 | variable "glue_catalog_table_database_name" { 77 | description = "Name of the metadata database where the table metadata resides. For Hive compatibility, this must be all lowercase." 78 | default = "" 79 | } 80 | 81 | variable "glue_catalog_table_description" { 82 | description = "(Optional) Description of the table." 83 | default = null 84 | } 85 | 86 | variable "glue_catalog_table_catalog_id" { 87 | description = "(Optional) ID of the Glue Catalog and database to create the table in. If omitted, this defaults to the AWS Account ID plus the database name." 88 | default = null 89 | } 90 | 91 | variable "glue_catalog_table_owner" { 92 | description = "(Optional) Owner of the table." 93 | default = null 94 | } 95 | 96 | variable "glue_catalog_table_retention" { 97 | description = "(Optional) Retention time for this table." 98 | default = null 99 | } 100 | 101 | variable "glue_catalog_table_partition_keys" { 102 | description = "(Optional) A list of columns by which the table is partitioned. Only primitive types are supported as partition keys." 103 | default = [] 104 | } 105 | 106 | variable "glue_catalog_table_view_original_text" { 107 | description = "(Optional) If the table is a view, the original text of the view; otherwise null." 108 | default = null 109 | } 110 | 111 | variable "glue_catalog_table_view_expanded_text" { 112 | description = "(Optional) If the table is a view, the expanded text of the view; otherwise null." 113 | default = null 114 | } 115 | 116 | variable "glue_catalog_table_table_type" { 117 | description = "(Optional) The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc.)." 118 | default = null 119 | } 120 | 121 | variable "glue_catalog_table_parameters" { 122 | description = "(Optional) Properties associated with this table, as a list of key-value pairs." 123 | default = null 124 | } 125 | 126 | variable "glue_catalog_table_storage_descriptor" { 127 | description = "(Optional) A storage descriptor object containing information about the physical storage of this table. You can refer to the Glue Developer Guide for a full explanation of this object." 128 | default = { 129 | location = null 130 | input_format = null 131 | output_format = null 132 | compressed = null 133 | number_of_buckets = null 134 | bucket_columns = null 135 | parameters = null 136 | stored_as_sub_directories = null 137 | } 138 | } 139 | 140 | variable "glue_catalog_table_partition_index" { 141 | description = "(Optional) Configuration block for a maximum of 3 partition indexes." 142 | default = [] 143 | } 144 | 145 | variable "glue_catalog_table_target_table" { 146 | description = "Optional) Configuration block of a target table for resource linking." 147 | default = [] 148 | } 149 | 150 | #--------------------------------------------------- 151 | # AWS Glue classifier 152 | #--------------------------------------------------- 153 | variable "enable_glue_classifier" { 154 | description = "Enable glue classifier usage" 155 | default = false 156 | } 157 | 158 | variable "glue_classifier_name" { 159 | description = "The name of the classifier." 160 | default = "" 161 | } 162 | 163 | variable "glue_classifier_csv_classifier" { 164 | description = "(Optional) A classifier for Csv content. " 165 | default = [] 166 | } 167 | 168 | variable "glue_classifier_grok_classifier" { 169 | description = "(Optional) A classifier for grok content. " 170 | default = [] 171 | } 172 | 173 | variable "glue_classifier_json_classifier" { 174 | description = "(Optional) A classifier for json content. " 175 | default = [] 176 | } 177 | 178 | variable "glue_classifier_xml_classifier" { 179 | description = "(Optional) A classifier for xml content. " 180 | default = [] 181 | } 182 | 183 | #--------------------------------------------------- 184 | # AWS Glue connection 185 | #--------------------------------------------------- 186 | variable "enable_glue_connection" { 187 | description = "Enable glue connection usage" 188 | default = false 189 | } 190 | 191 | variable "glue_connection_name" { 192 | description = "The name of the connection." 193 | default = "" 194 | } 195 | 196 | variable "glue_connection_connection_properties" { 197 | description = "(Required) A map of key-value pairs used as parameters for this connection." 198 | default = {} 199 | } 200 | 201 | variable "glue_connection_description" { 202 | description = "(Optional) Description of the connection." 203 | default = null 204 | } 205 | 206 | variable "glue_connection_catalog_id" { 207 | description = "(Optional) The ID of the Data Catalog in which to create the connection. If none is supplied, the AWS account ID is used by default." 208 | default = null 209 | } 210 | 211 | variable "glue_connection_connection_type" { 212 | description = "(Optional) The type of the connection. Supported are: JDBC, MONGODB. Defaults to JDBC." 213 | default = "JDBC" 214 | } 215 | 216 | variable "glue_connection_match_criteria" { 217 | description = "(Optional) A list of criteria that can be used in selecting this connection." 218 | default = null 219 | } 220 | 221 | variable "glue_connection_physical_connection_requirements" { 222 | description = "(Optional) A map of physical connection requirements, such as VPC and SecurityGroup. " 223 | default = [] 224 | } 225 | 226 | #--------------------------------------------------- 227 | # AWS Glue crawler 228 | #--------------------------------------------------- 229 | variable "enable_glue_crawler" { 230 | description = "Enable glue crawler usage" 231 | default = false 232 | } 233 | 234 | variable "glue_crawler_name" { 235 | description = "Name of the crawler." 236 | default = "" 237 | } 238 | 239 | variable "glue_crawler_database_name" { 240 | description = "Glue database where results are written." 241 | default = "" 242 | } 243 | 244 | variable "glue_crawler_role" { 245 | description = "(Required) The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources." 246 | default = "" 247 | } 248 | 249 | variable "glue_crawler_description" { 250 | description = "(Optional) Description of the crawler." 251 | default = null 252 | } 253 | 254 | variable "glue_crawler_classifiers" { 255 | description = "(Optional) List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification." 256 | default = null 257 | } 258 | 259 | variable "glue_crawler_configuration" { 260 | description = "(Optional) JSON string of configuration information." 261 | default = null 262 | } 263 | 264 | variable "glue_crawler_schedule" { 265 | description = "(Optional) A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *)." 266 | default = null 267 | } 268 | 269 | variable "glue_crawler_security_configuration" { 270 | description = "(Optional) The name of Security Configuration to be used by the crawler" 271 | default = null 272 | } 273 | 274 | variable "glue_crawler_table_prefix" { 275 | description = "(Optional) The table prefix used for catalog tables that are created." 276 | default = null 277 | } 278 | 279 | variable "glue_crawler_delta_target" { 280 | description = "(Optional) List of nested Delta Lake target arguments" 281 | default = [] 282 | } 283 | 284 | variable "glue_crawler_iceberg_target" { 285 | description = "(Optional) List nested Iceberg target arguments." 286 | default = [] 287 | } 288 | 289 | variable "glue_crawler_lake_formation_configuration" { 290 | description = "(Optional) Specifies Lake Formation configuration settings for the crawler." 291 | default = [] 292 | } 293 | 294 | variable "glue_crawler_dynamodb_target" { 295 | description = "(Optional) List of nested DynamoDB target arguments." 296 | default = [] 297 | } 298 | 299 | variable "glue_crawler_jdbc_target" { 300 | description = "(Optional) List of nested JBDC target arguments. " 301 | default = [] 302 | } 303 | 304 | variable "glue_crawler_s3_target" { 305 | description = "(Optional) List nested Amazon S3 target arguments." 306 | default = [] 307 | } 308 | 309 | variable "glue_crawler_catalog_target" { 310 | description = "(Optional) List nested Amazon catalog target arguments." 311 | default = [] 312 | } 313 | 314 | variable "glue_crawler_schema_change_policy" { 315 | description = "(Optional) Policy for the crawler's update and deletion behavior." 316 | default = [] 317 | } 318 | 319 | variable "glue_crawler_recrawl_policy" { 320 | description = "Optional) A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run." 321 | default = [] 322 | } 323 | 324 | variable "glue_crawler_mongodb_target" { 325 | description = "(Optional) List nested MongoDB target arguments." 326 | default = [] 327 | } 328 | 329 | variable "glue_crawler_lineage_configuration" { 330 | description = "(Optional) Specifies data lineage configuration settings for the crawler." 331 | default = [] 332 | } 333 | 334 | #--------------------------------------------------- 335 | # AWS glue security configuration 336 | #--------------------------------------------------- 337 | variable "enable_glue_security_configuration" { 338 | description = "Enable glue security configuration usage" 339 | default = false 340 | } 341 | 342 | variable "glue_security_configuration_name" { 343 | description = "Name of the security configuration." 344 | default = "" 345 | } 346 | 347 | variable "glue_security_configuration_encryption_configuration" { 348 | description = "Set encryption configuration for Glue security configuration" 349 | default = {} 350 | } 351 | 352 | #--------------------------------------------------- 353 | # AWS Glue workflow 354 | #--------------------------------------------------- 355 | variable "enable_glue_workflow" { 356 | description = "Enable glue workflow usage" 357 | default = false 358 | } 359 | 360 | variable "glue_workflow_name" { 361 | description = "The name you assign to this workflow." 362 | default = "" 363 | } 364 | 365 | variable "glue_workflow_description" { 366 | description = "(Optional) Description of the workflow." 367 | default = null 368 | } 369 | 370 | variable "glue_workflow_default_run_properties" { 371 | description = "(Optional) A map of default run properties for this workflow. These properties are passed to all jobs associated to the workflow." 372 | default = null 373 | } 374 | 375 | variable "glue_workflow_max_concurrent_runs" { 376 | description = "(Optional) Prevents exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs." 377 | default = null 378 | } 379 | 380 | #--------------------------------------------------- 381 | # AWS Glue job 382 | #--------------------------------------------------- 383 | variable "enable_glue_job" { 384 | description = "Enable glue job usage" 385 | default = false 386 | } 387 | 388 | variable "glue_job_name" { 389 | description = "The name you assign to this job. It must be unique in your account." 390 | default = "" 391 | } 392 | 393 | variable "glue_job_role_arn" { 394 | description = "The ARN of the IAM role associated with this job." 395 | default = null 396 | } 397 | 398 | variable "glue_job_command" { 399 | description = "(Required) The command of the job." 400 | default = [] 401 | } 402 | 403 | variable "glue_job_description" { 404 | description = "(Optional) Description of the job." 405 | default = null 406 | } 407 | 408 | variable "glue_job_connections" { 409 | description = "(Optional) The list of connections used for this job." 410 | default = [] 411 | } 412 | 413 | variable "glue_job_additional_connections" { 414 | description = "(Optional) The list of connections used for the job." 415 | default = [] 416 | } 417 | 418 | variable "glue_job_default_arguments" { 419 | description = "(Optional) The map of default arguments for this job. You can specify arguments here that your own job-execution script consumes, as well as arguments that AWS Glue itself consumes. For information about how to specify and consume your own Job arguments, see the Calling AWS Glue APIs in Python topic in the developer guide. For information about the key-value pairs that AWS Glue consumes to set up your job, see the Special Parameters Used by AWS Glue topic in the developer guide." 420 | default = { 421 | "--job-language" = "python" 422 | } 423 | } 424 | 425 | variable "glue_job_non_overridable_arguments" { 426 | description = "(Optional) Non-overridable arguments for this job, specified as name-value pairs." 427 | default = null 428 | } 429 | 430 | variable "glue_job_execution_property" { 431 | description = "(Optional) Execution property of the job." 432 | default = [] 433 | } 434 | 435 | variable "glue_job_glue_version" { 436 | description = "(Optional) The version of glue to use, for example '1.0'. For information about available versions, see the AWS Glue Release Notes." 437 | default = null 438 | } 439 | 440 | variable "glue_job_execution_class" { 441 | description = "(Optional) Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources. Valid value: FLEX, STANDARD." 442 | default = null 443 | } 444 | 445 | variable "glue_job_max_capacity" { 446 | description = "(Optional) The maximum number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. Required when pythonshell is set, accept either 0.0625 or 1.0." 447 | default = null 448 | } 449 | 450 | variable "glue_job_max_retries" { 451 | description = "(Optional) The maximum number of times to retry this job if it fails." 452 | default = null 453 | } 454 | 455 | variable "glue_job_notification_property" { 456 | description = "(Optional) Notification property of the job." 457 | default = [] 458 | } 459 | 460 | variable "glue_job_timeout" { 461 | description = "(Optional) The job timeout in minutes. The default is 2880 minutes (48 hours)." 462 | default = 2880 463 | } 464 | 465 | variable "glue_job_security_configuration" { 466 | description = "(Optional) The name of the Security Configuration to be associated with the job." 467 | default = null 468 | } 469 | 470 | variable "glue_job_worker_type" { 471 | description = "(Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X." 472 | default = null 473 | } 474 | 475 | variable "glue_job_number_of_workers" { 476 | description = "(Optional) The number of workers of a defined workerType that are allocated when a job runs." 477 | default = null 478 | } 479 | 480 | #--------------------------------------------------- 481 | # AWS Glue trigger 482 | #--------------------------------------------------- 483 | variable "enable_glue_trigger" { 484 | description = "Enable glue trigger usage" 485 | default = false 486 | } 487 | 488 | variable "glue_trigger_name" { 489 | description = "The name of the trigger." 490 | default = "" 491 | } 492 | 493 | variable "glue_trigger_type" { 494 | description = "(Required) The type of trigger. Valid values are CONDITIONAL, ON_DEMAND, and SCHEDULED." 495 | default = "ON_DEMAND" 496 | } 497 | 498 | variable "glue_trigger_description" { 499 | description = "(Optional) A description of the new trigger." 500 | default = null 501 | } 502 | 503 | variable "glue_trigger_enabled" { 504 | description = "(Optional) Start the trigger. Defaults to true. Not valid to disable for ON_DEMAND type." 505 | default = null 506 | } 507 | 508 | variable "glue_trigger_schedule" { 509 | description = "(Optional) A cron expression used to specify the schedule. Time-Based Schedules for Jobs and Crawlers" 510 | default = null 511 | } 512 | 513 | variable "glue_trigger_workflow_name" { 514 | description = "(Optional) A workflow to which the trigger should be associated to. Every workflow graph (DAG) needs a starting trigger (ON_DEMAND or SCHEDULED type) and can contain multiple additional CONDITIONAL triggers." 515 | default = null 516 | } 517 | 518 | variable "glue_trigger_actions" { 519 | description = "(Required) List of actions initiated by this trigger when it fires. " 520 | default = [] 521 | } 522 | 523 | variable "glue_trigger_timeouts" { 524 | description = "Set timeouts for glue trigger" 525 | default = {} 526 | } 527 | 528 | variable "glue_trigger_predicate" { 529 | description = "(Optional) A predicate to specify when the new trigger should fire. Required when trigger type is CONDITIONAL" 530 | default = {} 531 | } 532 | 533 | variable "glue_trigger_start_on_creation" { 534 | description = "(Optional) Set to true to start SCHEDULED and CONDITIONAL triggers when created. True is not supported for ON_DEMAND triggers." 535 | default = null 536 | } 537 | 538 | variable "glue_trigger_event_batching_condition" { 539 | description = "(Optional) Batch condition that must be met (specified number of events received or batch time window expired) before EventBridge event trigger fires." 540 | default = [] 541 | } 542 | 543 | #--------------------------------------------------- 544 | # AWS Glue data catalog encryption settings 545 | #--------------------------------------------------- 546 | variable "enable_glue_data_catalog_encryption_settings" { 547 | description = "Enable glue data catalog encryption settings usage" 548 | default = false 549 | } 550 | 551 | variable "glue_data_catalog_encryption_settings_data_catalog_encryption_settings" { 552 | description = "Set data_catalog_encryption_settings block for Glue data catalog encryption" 553 | default = {} 554 | } 555 | 556 | variable "glue_data_catalog_encryption_settings_catalog_id" { 557 | description = "(Optional) The ID of the Data Catalog to set the security configuration for. If none is provided, the AWS account ID is used by default." 558 | default = null 559 | } 560 | 561 | #--------------------------------------------------- 562 | # AWS Glue dev endpoint 563 | #--------------------------------------------------- 564 | variable "enable_glue_dev_endpoint" { 565 | description = "Enable glue dev endpoint usage" 566 | default = false 567 | } 568 | 569 | variable "glue_dev_endpoint_name" { 570 | description = "The name of this endpoint. It must be unique in your account." 571 | default = "" 572 | } 573 | 574 | variable "glue_dev_endpoint_role_arn" { 575 | description = "(Required) The IAM role for this endpoint." 576 | default = null 577 | } 578 | 579 | variable "glue_dev_endpoint_arguments" { 580 | description = "(Optional) A map of arguments used to configure the endpoint." 581 | default = null 582 | } 583 | 584 | variable "glue_dev_endpoint_extra_jars_s3_path" { 585 | description = "(Optional) Path to one or more Java Jars in an S3 bucket that should be loaded in this endpoint." 586 | default = null 587 | } 588 | 589 | variable "glue_dev_endpoint_extra_python_libs_s3_path" { 590 | description = "(Optional) Path(s) to one or more Python libraries in an S3 bucket that should be loaded in this endpoint. Multiple values must be complete paths separated by a comma." 591 | default = null 592 | } 593 | 594 | variable "glue_dev_endpoint_glue_version" { 595 | description = "(Optional) - Specifies the versions of Python and Apache Spark to use. Defaults to AWS Glue version 0.9." 596 | default = null 597 | } 598 | 599 | variable "glue_dev_endpoint_number_of_nodes" { 600 | description = "(Optional) The number of AWS Glue Data Processing Units (DPUs) to allocate to this endpoint. Conflicts with worker_type" 601 | default = null 602 | } 603 | 604 | variable "glue_dev_endpoint_number_of_workers" { 605 | description = "(Optional) The number of workers of a defined worker type that are allocated to this endpoint. This field is available only when you choose worker type G.1X or G.2X." 606 | default = null 607 | } 608 | 609 | variable "glue_dev_endpoint_public_key" { 610 | description = "(Optional) The public key to be used by this endpoint for authentication." 611 | default = null 612 | } 613 | 614 | variable "glue_dev_endpoint_public_keys" { 615 | description = "(Optional) A list of public keys to be used by this endpoint for authentication." 616 | default = null 617 | } 618 | 619 | variable "glue_dev_endpoint_security_configuration" { 620 | description = "(Optional) The name of the Security Configuration structure to be used with this endpoint." 621 | default = null 622 | } 623 | 624 | variable "glue_dev_endpoint_security_group_ids" { 625 | description = "(Optional) Security group IDs for the security groups to be used by this endpoint." 626 | default = null 627 | } 628 | 629 | variable "glue_dev_endpoint_subnet_id" { 630 | description = "(Optional) The subnet ID for the new endpoint to use." 631 | default = null 632 | } 633 | 634 | variable "glue_dev_endpoint_worker_type" { 635 | description = "(Optional) The type of predefined worker that is allocated to this endpoint. Accepts a value of Standard, G.1X, or G.2X." 636 | default = null 637 | } 638 | 639 | #--------------------------------------------------- 640 | # AWS Glue ml transform 641 | #--------------------------------------------------- 642 | variable "enable_glue_ml_transform" { 643 | description = "Enable glue ml transform usage" 644 | default = false 645 | } 646 | 647 | variable "glue_ml_transform_name" { 648 | description = "The name you assign to this ML Transform. It must be unique in your account." 649 | default = "" 650 | } 651 | 652 | variable "glue_ml_transform_role_arn" { 653 | description = "(Required) The ARN of the IAM role associated with this ML Transform." 654 | default = null 655 | } 656 | 657 | variable "glue_ml_transform_input_record_tables" { 658 | description = "(Required) A list of AWS Glue table definitions used by the transform. see Input Record Tables." 659 | default = [] 660 | } 661 | 662 | variable "glue_ml_transform_parameters" { 663 | description = "(Required) The algorithmic parameters that are specific to the transform type used. Conditionally dependent on the transform type. see Parameters." 664 | default = [] 665 | } 666 | 667 | variable "glue_ml_transform_description" { 668 | description = "(Optional) Description of the ML Transform." 669 | default = null 670 | } 671 | 672 | variable "glue_ml_transform_glue_version" { 673 | description = "(Optional) The version of glue to use, for example '1.0'. For information about available versions, see the AWS Glue Release Notes." 674 | default = null 675 | } 676 | 677 | variable "glue_ml_transform_max_capacity" { 678 | description = "(Optional) The number of AWS Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. max_capacity is a mutually exclusive option with number_of_workers and worker_type." 679 | default = null 680 | } 681 | 682 | variable "glue_ml_transform_max_retries" { 683 | description = "(Optional) The maximum number of times to retry this ML Transform if it fails." 684 | default = null 685 | } 686 | 687 | variable "glue_ml_transform_timeout" { 688 | description = "(Optional) The ML Transform timeout in minutes. The default is 2880 minutes (48 hours)." 689 | default = null 690 | } 691 | 692 | variable "glue_ml_transform_worker_type" { 693 | description = "(Optional) The type of predefined worker that is allocated when an ML Transform runs. Accepts a value of Standard, G.1X, or G.2X. Required with number_of_workers." 694 | default = null 695 | } 696 | 697 | variable "glue_ml_transform_number_of_workers" { 698 | description = "(Optional) The number of workers of a defined worker_type that are allocated when an ML Transform runs. Required with worker_type" 699 | default = null 700 | } 701 | 702 | #--------------------------------------------------- 703 | # AWS Glue partition 704 | #--------------------------------------------------- 705 | variable "enable_glue_partition" { 706 | description = "Enable glue partition usage" 707 | default = false 708 | } 709 | 710 | variable "glue_partition_database_name" { 711 | description = "Name of the metadata database where the table metadata resides. For Hive compatibility, this must be all lowercase." 712 | default = "" 713 | } 714 | 715 | variable "glue_partition_table_name" { 716 | description = "Table name" 717 | default = "" 718 | } 719 | 720 | variable "glue_partition_partition_values" { 721 | description = "(Required) The values that define the partition." 722 | default = [] 723 | } 724 | 725 | variable "glue_partition_catalog_id" { 726 | description = "(Optional) ID of the Glue Catalog and database to create the table in. If omitted, this defaults to the AWS Account ID plus the database name." 727 | default = null 728 | } 729 | 730 | variable "glue_partition_parameters" { 731 | description = "(Optional) Properties associated with this table, as a list of key-value pairs." 732 | default = null 733 | } 734 | 735 | variable "glue_partition_storage_descriptor" { 736 | description = "(Optional) A storage descriptor object containing information about the physical storage of this table. You can refer to the Glue Developer Guide for a full explanation of this object." 737 | default = { 738 | location = null 739 | input_format = null 740 | output_format = null 741 | compressed = null 742 | number_of_buckets = null 743 | bucket_columns = null 744 | parameters = null 745 | stored_as_sub_directories = null 746 | } 747 | } 748 | 749 | #--------------------------------------------------- 750 | # AWS Glue registry 751 | #--------------------------------------------------- 752 | variable "enable_glue_registry" { 753 | description = "Enable glue registry usage" 754 | default = false 755 | } 756 | 757 | variable "glue_registry_name" { 758 | description = "The Name of the registry." 759 | default = "" 760 | } 761 | 762 | variable "glue_registry_description" { 763 | description = "(Optional) A description of the registry." 764 | default = null 765 | } 766 | 767 | #--------------------------------------------------- 768 | # AWS Glue resource policy 769 | #--------------------------------------------------- 770 | variable "enable_glue_resource_policy" { 771 | description = "Enable glue resource policy usage" 772 | default = false 773 | } 774 | 775 | variable "glue_resource_policy" { 776 | description = "(Required) The policy to be applied to the aws glue data catalog." 777 | default = null 778 | } 779 | 780 | variable "glue_resource_policy_enable_hybrid" { 781 | description = "(Optional) Indicates that you are using both methods to grant cross-account. Valid values are TRUE and FALSE. Note the terraform will not perform drift detetction on this field as its not return on read." 782 | default = null 783 | } 784 | 785 | #--------------------------------------------------- 786 | # AWS Glue schema 787 | #--------------------------------------------------- 788 | variable "enable_glue_schema" { 789 | description = "Enable glue schema usage" 790 | default = false 791 | } 792 | 793 | variable "glue_schema_name" { 794 | description = "The Name of the schema." 795 | default = "" 796 | } 797 | 798 | variable "glue_schema_registry_arn" { 799 | description = "The ARN of the Glue Registry to create the schema in." 800 | default = "" 801 | } 802 | 803 | variable "glue_schema_data_format" { 804 | description = "(Required) The data format of the schema definition. Currently only AVRO is supported." 805 | default = null 806 | } 807 | 808 | variable "glue_schema_compatibility" { 809 | description = "(Required) The compatibility mode of the schema. Values values are: NONE, DISABLED, BACKWARD, BACKWARD_ALL, FORWARD, FORWARD_ALL, FULL, and FULL_ALL." 810 | default = null 811 | } 812 | 813 | variable "glue_schema_schema_definition" { 814 | description = "(Required) The schema definition using the data_format setting for schema_name." 815 | default = null 816 | } 817 | 818 | variable "glue_schema_description" { 819 | description = "(Optional) A description of the schema." 820 | default = null 821 | } 822 | 823 | #--------------------------------------------------- 824 | # AWS Glue user defined function 825 | #--------------------------------------------------- 826 | variable "enable_glue_user_defined_function" { 827 | description = "Enable glue user defined function usage" 828 | default = false 829 | } 830 | 831 | variable "glue_user_defined_function_name" { 832 | description = "The name of the function." 833 | default = "" 834 | } 835 | 836 | variable "glue_user_defined_function_database_name" { 837 | description = "The name of the Database to create the Function." 838 | default = "" 839 | } 840 | 841 | variable "glue_user_defined_function_class_name" { 842 | description = "(Required) The Java class that contains the function code." 843 | default = null 844 | } 845 | 846 | variable "glue_user_defined_function_owner_name" { 847 | description = "(Required) The owner of the function." 848 | default = null 849 | } 850 | 851 | variable "glue_user_defined_function_owner_type" { 852 | description = "(Required) The owner type. can be one of USER, ROLE, and GROUP." 853 | default = null 854 | } 855 | 856 | variable "glue_user_defined_function_catalog_id" { 857 | description = "(Optional) ID of the Glue Catalog to create the function in. If omitted, this defaults to the AWS Account ID." 858 | default = null 859 | } 860 | 861 | variable "glue_user_defined_function_resource_uris" { 862 | description = "(Optional) The configuration block for Resource URIs. See resource uris below for more details." 863 | default = [] 864 | } 865 | 866 | #--------------------------------------------------- 867 | # AWS Glue partition index 868 | #--------------------------------------------------- 869 | variable "enable_glue_partition_index" { 870 | description = "Enable glue partition index usage" 871 | default = false 872 | } 873 | 874 | variable "glue_partition_index_table_name" { 875 | description = "Name of the table. For Hive compatibility, this must be entirely lowercase." 876 | default = "" 877 | } 878 | 879 | variable "glue_partition_index_database_name" { 880 | description = "(Required) Name of the metadata database where the table metadata resides. For Hive compatibility, this must be all lowercase." 881 | default = "" 882 | } 883 | 884 | variable "glue_partition_index_catalog_id" { 885 | description = "(Optional) The catalog ID where the table resides." 886 | default = "" 887 | } 888 | 889 | variable "glue_partition_index_partition_index" { 890 | description = "(Required) Configuration block for a partition index." 891 | default = [] 892 | } 893 | 894 | variable "glue_partition_index_timeouts" { 895 | description = "Set timeouts glue partition index" 896 | default = {} 897 | } 898 | 899 | #--------------------------------------------------- 900 | # AWS Glue data quality ruleset 901 | #--------------------------------------------------- 902 | variable "enable_glue_data_quality_ruleset" { 903 | description = "Enable glue data quality ruleset usage" 904 | default = false 905 | } 906 | 907 | variable "glue_data_quality_ruleset_name" { 908 | description = "Name of the data quality ruleset." 909 | default = "" 910 | } 911 | 912 | variable "glue_data_quality_ruleset_description" { 913 | description = "(Optional) Description of the data quality ruleset." 914 | default = null 915 | } 916 | 917 | variable "glue_data_quality_ruleset_ruleset" { 918 | description = "(Optional) A Data Quality Definition Language (DQDL) ruleset. For more information, see the AWS Glue developer guide." 919 | default = null 920 | } 921 | 922 | variable "glue_trigger_target_table" { 923 | description = "(Optional, Forces new resource) A Configuration block specifying a target table associated with the data quality ruleset." 924 | default = {} 925 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Work with GLUE via terraform 2 | 3 | A terraform module for making GLUE. 4 | 5 | 6 | *NOTE*: Full list of Terraform modules that I have are located here: [https://github.com/SebastianUA/terraform](https://github.com/SebastianUA/terraform) 7 | 8 | ## Usage 9 | ---------------------- 10 | Import the module and retrieve with ```terraform get``` or ```terraform get --update```. Adding a module resource to your template, e.g. `main.tf`: 11 | 12 | ``` 13 | # 14 | # MAINTAINER Vitaliy Natarov "vitaliy.natarov@yahoo.com" 15 | # 16 | terraform { 17 | required_version = "~> 1.0" 18 | } 19 | 20 | provider "aws" { 21 | region = "us-east-1" 22 | shared_credentials_files = [pathexpand("~/.aws/credentials")] 23 | } 24 | 25 | 26 | module "aws_user_tags" { 27 | source = "./aws_user_tags" 28 | 29 | environment = "DEV" 30 | custom_tags = { 31 | "CreatedBy" = "Vitalii Natarov" 32 | "Helping" = "Gregory Mirsky" 33 | } 34 | } 35 | 36 | # Create example glue service policy from template 37 | module "iam_glue_example_service_policy" { 38 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/iam_policy?ref=master" 39 | name = "TEST-iam-policy" 40 | environment = "stage" 41 | 42 | # Using IAM policy 43 | enable_iam_policy = true 44 | iam_policy_name = "glue_example_service_policy" 45 | iam_policy_description = "Example Glue Service Policy" 46 | iam_policy_path = "/" 47 | iam_policy_policy = templatefile( 48 | "./templates/json/glue_service_policy.json", 49 | { 50 | partition = data.aws_partition.current.partition 51 | } 52 | ) 53 | } 54 | 55 | # Create example glue user policy from template 56 | module "iam_glue_example_user_policy" { 57 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/iam_policy?ref=master" 58 | name = "TEST-iam-policy" 59 | environment = "stage" 60 | 61 | # Using IAM policy 62 | enable_iam_policy = true 63 | iam_policy_name = "glue_example_user_policy" 64 | iam_policy_description = "Example Glue User Policy" 65 | iam_policy_path = "/" 66 | iam_policy_policy = templatefile( 67 | "./templates/json/glue_user_policy.json", 68 | { 69 | partition = data.aws_partition.current.partition 70 | } 71 | ) 72 | } 73 | 74 | module "glue_example_admin_role" { 75 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/iam_role?ref=master" 76 | name = "test" 77 | environment = "DEV" 78 | 79 | # Using IAM role 80 | enable_iam_role = true 81 | iam_role_name = "glue_example_role" 82 | iam_role_description = "glue example role" 83 | 84 | iam_role_assume_role_policy = jsonencode( 85 | { 86 | "Version" : "2012-10-17", 87 | "Statement" : [ 88 | { 89 | "Effect" : "Allow", 90 | "Principal" : { 91 | "Service" : "glue.amazonaws.com" 92 | }, 93 | "Action" : "sts:AssumeRole" 94 | } 95 | ] 96 | } 97 | ) 98 | 99 | iam_role_force_detach_policies = true 100 | iam_role_path = "/" 101 | iam_role_max_session_duration = 3600 102 | 103 | # Using IAM role policy 104 | // enable_iam_role_policy = true 105 | // iam_role_policy_name = "my-iam-role-policy-for-testing-terraform" 106 | // iam_role_policy = file("templates/iam_role_policy.json") 107 | 108 | # Using IAM role policy attachment 109 | enable_iam_role_policy_attachment = true 110 | iam_role_policy_attachment_policy_arns = [ 111 | // Attach AWS managed policy: AWSCloudFormationReadOnlyAccess 112 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSCloudFormationReadOnlyAccess", 113 | // Attach AWS managed policy: AWSGlueConsoleFullAccess 114 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSGlueConsoleFullAccess", 115 | // Attach AWS managed policy: AWSGlueConsoleSageMakerNotebookFullAccess 116 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSGlueConsoleSageMakerNotebookFullAccess", 117 | // Attach AWS managed policy: AWSGlueSchemaRegistryFullAccess 118 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AWSGlueSchemaRegistryFullAccess", 119 | // Attach AWS managed policy: AmazonAthenaFullAccess 120 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AmazonAthenaFullAccess", 121 | // Attach AWS managed policy: AmazonS3FullAccess 122 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/AmazonS3FullAccess", 123 | // Attach AWS managed policy: CloudWatchLogsReadOnlyAccess 124 | "arn:${data.aws_partition.current.partition}:iam::${data.aws_partition.current.partition}:policy/CloudWatchLogsReadOnlyAccess" 125 | ] 126 | } 127 | 128 | module "s3_private_glue_catalog" { 129 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/s3?ref=master" 130 | name = "test" 131 | environment = "DEV" 132 | 133 | # AWS S3 bucket 134 | enable_s3_bucket = true 135 | s3_bucket_name = "glue-catalog-${data.aws_caller_identity.current.account_id}" 136 | s3_bucket_acl_acl = "private" 137 | 138 | # Create test folder in the bucket 139 | enable_s3_object = true 140 | s3_object_stack = [ 141 | { 142 | key = "/catalog" 143 | } 144 | ] 145 | 146 | tags = merge( 147 | module.aws_user_tags.tags, 148 | var.example_tags 149 | ) 150 | } 151 | 152 | # Create glue crawler bucket (account_id ensures unique name across accounts) 153 | module "s3_private_glue_crawler" { 154 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/s3?ref=master" 155 | name = "test" 156 | environment = "DEV" 157 | 158 | # AWS S3 bucket 159 | enable_s3_bucket = true 160 | s3_bucket_name = "glue-crawler-${data.aws_caller_identity.current.account_id}" 161 | s3_bucket_acl_acl = "private" 162 | 163 | # Create crawler folder in the bucket 164 | enable_s3_object = true 165 | s3_object_stack = [ 166 | { 167 | key = "/crawler" 168 | } 169 | ] 170 | 171 | tags = merge( 172 | module.aws_user_tags.tags, 173 | var.example_tags 174 | ) 175 | } 176 | 177 | # Create Glue job bucket (account_id ensures unique name across accounts) 178 | module "s3_private_glue_jobs" { 179 | source = "git@github.com:SebastianUA/terraform.git//aws/modules/s3?ref=master" 180 | name = "test" 181 | environment = "DEV" 182 | 183 | # AWS S3 bucket 184 | enable_s3_bucket = true 185 | s3_bucket_name = "glue-jobs-${data.aws_caller_identity.current.account_id}" 186 | s3_bucket_acl_acl = "private" 187 | 188 | # Create crawler folder in the bucket 189 | enable_s3_object = true 190 | s3_object_stack = [ 191 | { 192 | key = "/jobs" 193 | } 194 | ] 195 | 196 | tags = merge( 197 | module.aws_user_tags.tags, 198 | var.example_tags 199 | ) 200 | } 201 | 202 | # Generation random password 203 | module "random_glue_connection_password" { 204 | source = "git@github.com:SebastianUA/terraform.git//random/modules/random?ref=master" 205 | 206 | # Generate random password 207 | enable_password = true 208 | password_length = 13 209 | password_special = true 210 | password_override_special = "_%@" 211 | } 212 | 213 | # terraform-aws-glue module 214 | module "glue" { 215 | source = "../" 216 | name = "TEST" 217 | environment = "STAGE" 218 | 219 | # AWS Glue catalog DB 220 | enable_glue_catalog_database = true 221 | glue_catalog_database_name = "test-glue-db-${data.aws_caller_identity.current.account_id}" 222 | glue_catalog_database_parameters = null 223 | # AWS Glue catalog table 224 | enable_glue_catalog_table = true 225 | glue_catalog_table_name = "test-glue-table-${data.aws_caller_identity.current.account_id}" 226 | glue_catalog_table_description = "Those resources are managed by Terraform. Created by Vitaliy Natarov" 227 | glue_catalog_table_table_type = "EXTERNAL_TABLE" 228 | glue_catalog_table_parameters = { 229 | "sizeKey" = 493378 230 | "tmp" = "none" 231 | "test" = "yes" 232 | "classification" = "csv" 233 | } 234 | glue_catalog_table_storage_descriptor = { 235 | location = "s3://${module.s3_private_glue_catalog.s3_bucket_id}/test" 236 | input_format = "org.apache.hadoop.mapred.TextInputFormat" 237 | output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" 238 | 239 | columns = [ 240 | { 241 | columns_name = "oid" 242 | columns_type = "double" 243 | columns_comment = "oid" 244 | }, 245 | { 246 | columns_name = "oid2" 247 | columns_type = "double" 248 | columns_comment = "oid2" 249 | }, 250 | { 251 | columns_name = "oid3" 252 | columns_type = "double" 253 | columns_comment = "oid3" 254 | }, 255 | ] 256 | 257 | ser_de_info = [ 258 | { 259 | ser_de_info_name = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 260 | ser_de_info_serialization_library = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 261 | ser_de_info_parameters = tomap({ "field.delim" = "," }) 262 | } 263 | ] 264 | 265 | skewed_info = [ 266 | { 267 | ser_de_info_name = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 268 | ser_de_info_serialization_library = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" 269 | ser_de_info_parameters = tomap({ "field.delim" = "," }) 270 | } 271 | ] 272 | 273 | sort_columns = [] 274 | } 275 | 276 | 277 | # AWS Glue connection 278 | enable_glue_connection = true 279 | glue_connection_connection_properties = { 280 | JDBC_CONNECTION_URL = "jdbc:mysql://aws_rds_cluster.example.endpoint/exampledatabase" 281 | PASSWORD = module.random_glue_connection_password.password_result 282 | USERNAME = var.glue_connection_user_name 283 | } 284 | ##glue_connection_physical_connection_requirements = [{ 285 | # availability_zone = "zone_here" 286 | # security_group_id_list = [] 287 | # subnet_id = "subnet_here" 288 | #}] 289 | enable_glue_crawler = true 290 | glue_crawler_name = "" 291 | glue_crawler_role = module.glue_example_admin_role.iam_role_arn 292 | enable_glue_security_configuration = false 293 | glue_security_configuration_name = "" 294 | glue_crawler_s3_target = [ 295 | { 296 | path = "s3://${module.s3_private_glue_crawler.s3_bucket_id}/crawler" 297 | exclusions = [] 298 | } 299 | ] 300 | 301 | enable_glue_job = true 302 | glue_job_name = "" 303 | glue_job_role_arn = module.glue_example_admin_role.iam_role_arn 304 | glue_job_additional_connections = [] 305 | glue_job_execution_property = [ 306 | { 307 | max_concurrent_runs = 2 308 | } 309 | ] 310 | glue_job_command = [ 311 | { 312 | script_location = "s3//${module.s3_private_glue_jobs.s3_bucket_id}/jobs" 313 | name = "jobs" 314 | } 315 | ] 316 | tags = merge( 317 | module.aws_user_tags.tags, 318 | tomap({ 319 | "cost-center" = "00-00000.000.01", 320 | "Project" = "My Test Glue Project" 321 | }) 322 | ) 323 | 324 | depends_on = [ 325 | module.aws_user_tags 326 | ] 327 | } 328 | 329 | module "glue_trigger" { 330 | source = "../" 331 | name = "TEST" 332 | environment = "STAGE" 333 | 334 | enable_glue_trigger = true 335 | glue_trigger_name = "" 336 | glue_trigger_actions = [ 337 | { 338 | # Both JobName or CrawlerName cannot be set together in an action 339 | crawler_name = module.glue.glue_crawler_id # null 340 | job_name = null # module.glue.glue_job_id 341 | arguments = null 342 | timeout = null 343 | } 344 | ] 345 | 346 | glue_trigger_predicate = { 347 | logical = "AND" 348 | conditions = [ 349 | { 350 | job_name = "job-name-1" 351 | state = null 352 | crawler_name = null 353 | crawl_state = null 354 | logical_operator = "EQUALS" 355 | }, 356 | { 357 | job_name = "job-name-2" 358 | state = null 359 | crawler_name = null 360 | crawl_state = null 361 | logical_operator = "EQUALS" 362 | } 363 | ] 364 | } 365 | 366 | tags = merge( 367 | module.aws_user_tags.tags, 368 | tomap({ 369 | "cost-center" = "00-00000.000.01", 370 | "Project" = "My Test Glue Project" 371 | }) 372 | ) 373 | 374 | depends_on = [ 375 | module.glue, 376 | module.aws_user_tags 377 | ] 378 | } 379 | ``` 380 | 381 | ## Module Input Variables 382 | ---------------------- 383 | - `name` - Name to be used on all resources as prefix (`default = TEST`) 384 | - `environment` - Environment for service (`default = STAGE`) 385 | - `tags` - A list of tag blocks. Each element should have keys named key, value, etc. (`default = {}`) 386 | - `enable_glue_catalog_database` - Enable glue catalog database usage (`default = False`) 387 | - `glue_catalog_database_name` - The name of the database. (`default = ""`) 388 | - `glue_catalog_database_description` - (Optional) Description of the database. (`default = null`) 389 | - `glue_catalog_database_catalog_id` - (Optional) ID of the Glue Catalog to create the database in. If omitted, this defaults to the AWS Account ID. (`default = null`) 390 | - `glue_catalog_database_location_uri` - (Optional) The location of the database (for example, an HDFS path). (`default = null`) 391 | - `glue_catalog_database_parameters` - (Optional) A list of key-value pairs that define parameters and properties of the database. (`default = null`) 392 | - `glue_catalog_database_create_table_default_permission` - (Optional) Creates a set of default permissions on the table for principals. (`default = {}`) 393 | - `glue_catalog_database_target_database` - (Optional) Configuration block for a target database for resource linking. (`default = {}`) 394 | - `enable_glue_catalog_table` - Enable glue catalog table usage (`default = False`) 395 | - `glue_catalog_table_name` - Name of the table. For Hive compatibility, this must be entirely lowercase. (`default = ""`) 396 | - `glue_catalog_table_database_name` - Name of the metadata database where the table metadata resides. For Hive compatibility, this must be all lowercase. (`default = ""`) 397 | - `glue_catalog_table_description` - (Optional) Description of the table. (`default = null`) 398 | - `glue_catalog_table_catalog_id` - (Optional) ID of the Glue Catalog and database to create the table in. If omitted, this defaults to the AWS Account ID plus the database name. (`default = null`) 399 | - `glue_catalog_table_owner` - (Optional) Owner of the table. (`default = null`) 400 | - `glue_catalog_table_retention` - (Optional) Retention time for this table. (`default = null`) 401 | - `glue_catalog_table_partition_keys` - (Optional) A list of columns by which the table is partitioned. Only primitive types are supported as partition keys. (`default = []`) 402 | - `glue_catalog_table_view_original_text` - (Optional) If the table is a view, the original text of the view; otherwise null. (`default = null`) 403 | - `glue_catalog_table_view_expanded_text` - (Optional) If the table is a view, the expanded text of the view; otherwise null. (`default = null`) 404 | - `glue_catalog_table_table_type` - (Optional) The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc.). (`default = null`) 405 | - `glue_catalog_table_parameters` - (Optional) Properties associated with this table, as a list of key-value pairs. (`default = null`) 406 | - `glue_catalog_table_storage_descriptor` - (Optional) A storage descriptor object containing information about the physical storage of this table. You can refer to the Glue Developer Guide for a full explanation of this object. (`default = {'location': None, 'input_format': None, 'output_format': None, 'compressed': None, 'number_of_buckets': None, 'bucket_columns': None, 'parameters': None, 'stored_as_sub_directories': None}`) 407 | - `glue_catalog_table_partition_index` - (Optional) Configuration block for a maximum of 3 partition indexes. (`default = []`) 408 | - `glue_catalog_table_target_table` - Optional) Configuration block of a target table for resource linking. (`default = []`) 409 | - `enable_glue_classifier` - Enable glue classifier usage (`default = False`) 410 | - `glue_classifier_name` - The name of the classifier. (`default = ""`) 411 | - `glue_classifier_csv_classifier` - (Optional) A classifier for Csv content. (`default = []`) 412 | - `glue_classifier_grok_classifier` - (Optional) A classifier for grok content. (`default = []`) 413 | - `glue_classifier_json_classifier` - (Optional) A classifier for json content. (`default = []`) 414 | - `glue_classifier_xml_classifier` - (Optional) A classifier for xml content. (`default = []`) 415 | - `enable_glue_connection` - Enable glue connection usage (`default = False`) 416 | - `glue_connection_name` - The name of the connection. (`default = ""`) 417 | - `glue_connection_connection_properties` - (Required) A map of key-value pairs used as parameters for this connection. (`default = {}`) 418 | - `glue_connection_description` - (Optional) Description of the connection. (`default = null`) 419 | - `glue_connection_catalog_id` - (Optional) The ID of the Data Catalog in which to create the connection. If none is supplied, the AWS account ID is used by default. (`default = null`) 420 | - `glue_connection_connection_type` - (Optional) The type of the connection. Supported are: JDBC, MONGODB. Defaults to JDBC. (`default = JDBC`) 421 | - `glue_connection_match_criteria` - (Optional) A list of criteria that can be used in selecting this connection. (`default = null`) 422 | - `glue_connection_physical_connection_requirements` - (Optional) A map of physical connection requirements, such as VPC and SecurityGroup. (`default = []`) 423 | - `enable_glue_crawler` - Enable glue crawler usage (`default = False`) 424 | - `glue_crawler_name` - Name of the crawler. (`default = ""`) 425 | - `glue_crawler_database_name` - Glue database where results are written. (`default = ""`) 426 | - `glue_crawler_role` - (Required) The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources. (`default = ""`) 427 | - `glue_crawler_description` - (Optional) Description of the crawler. (`default = null`) 428 | - `glue_crawler_classifiers` - (Optional) List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification. (`default = null`) 429 | - `glue_crawler_configuration` - (Optional) JSON string of configuration information. (`default = null`) 430 | - `glue_crawler_schedule` - (Optional) A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *). (`default = null`) 431 | - `glue_crawler_security_configuration` - (Optional) The name of Security Configuration to be used by the crawler (`default = null`) 432 | - `glue_crawler_table_prefix` - (Optional) The table prefix used for catalog tables that are created. (`default = null`) 433 | - `glue_crawler_delta_target` - (Optional) List of nested Delta Lake target arguments (`default = []`) 434 | - `glue_crawler_iceberg_target` - (Optional) List nested Iceberg target arguments. (`default = []`) 435 | - `glue_crawler_lake_formation_configuration` - (Optional) Specifies Lake Formation configuration settings for the crawler. (`default = []`) 436 | - `glue_crawler_dynamodb_target` - (Optional) List of nested DynamoDB target arguments. (`default = []`) 437 | - `glue_crawler_jdbc_target` - (Optional) List of nested JBDC target arguments. (`default = []`) 438 | - `glue_crawler_s3_target` - (Optional) List nested Amazon S3 target arguments. (`default = []`) 439 | - `glue_crawler_catalog_target` - (Optional) List nested Amazon catalog target arguments. (`default = []`) 440 | - `glue_crawler_schema_change_policy` - (Optional) Policy for the crawler's update and deletion behavior. (`default = []`) 441 | - `glue_crawler_recrawl_policy` - Optional) A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run. (`default = []`) 442 | - `glue_crawler_mongodb_target` - (Optional) List nested MongoDB target arguments. (`default = []`) 443 | - `glue_crawler_lineage_configuration` - (Optional) Specifies data lineage configuration settings for the crawler. (`default = []`) 444 | - `enable_glue_security_configuration` - Enable glue security configuration usage (`default = False`) 445 | - `glue_security_configuration_name` - Name of the security configuration. (`default = ""`) 446 | - `glue_security_configuration_encryption_configuration` - Set encryption configuration for Glue security configuration (`default = {}`) 447 | - `enable_glue_workflow` - Enable glue workflow usage (`default = False`) 448 | - `glue_workflow_name` - The name you assign to this workflow. (`default = ""`) 449 | - `glue_workflow_description` - (Optional) Description of the workflow. (`default = null`) 450 | - `glue_workflow_default_run_properties` - (Optional) A map of default run properties for this workflow. These properties are passed to all jobs associated to the workflow. (`default = null`) 451 | - `glue_workflow_max_concurrent_runs` - (Optional) Prevents exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs. (`default = null`) 452 | - `enable_glue_job` - Enable glue job usage (`default = False`) 453 | - `glue_job_name` - The name you assign to this job. It must be unique in your account. (`default = ""`) 454 | - `glue_job_role_arn` - The ARN of the IAM role associated with this job. (`default = null`) 455 | - `glue_job_command` - (Required) The command of the job. (`default = []`) 456 | - `glue_job_description` - (Optional) Description of the job. (`default = null`) 457 | - `glue_job_connections` - (Optional) The list of connections used for this job. (`default = []`) 458 | - `glue_job_additional_connections` - (Optional) The list of connections used for the job. (`default = []`) 459 | - `glue_job_default_arguments` - (Optional) The map of default arguments for this job. You can specify arguments here that your own job-execution script consumes, as well as arguments that AWS Glue itself consumes. For information about how to specify and consume your own Job arguments, see the Calling AWS Glue APIs in Python topic in the developer guide. For information about the key-value pairs that AWS Glue consumes to set up your job, see the Special Parameters Used by AWS Glue topic in the developer guide. (`default = {'--job-language': 'python'}`) 460 | - `glue_job_non_overridable_arguments` - (Optional) Non-overridable arguments for this job, specified as name-value pairs. (`default = null`) 461 | - `glue_job_execution_property` - (Optional) Execution property of the job. (`default = []`) 462 | - `glue_job_glue_version` - (Optional) The version of glue to use, for example '1.0'. For information about available versions, see the AWS Glue Release Notes. (`default = null`) 463 | - `glue_job_execution_class` - (Optional) Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources. Valid value: FLEX, STANDARD. (`default = null`) 464 | - `glue_job_max_capacity` - (Optional) The maximum number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. Required when pythonshell is set, accept either 0.0625 or 1.0. (`default = null`) 465 | - `glue_job_max_retries` - (Optional) The maximum number of times to retry this job if it fails. (`default = null`) 466 | - `glue_job_notification_property` - (Optional) Notification property of the job. (`default = []`) 467 | - `glue_job_timeout` - (Optional) The job timeout in minutes. The default is 2880 minutes (48 hours). (`default = 2880`) 468 | - `glue_job_security_configuration` - (Optional) The name of the Security Configuration to be associated with the job. (`default = null`) 469 | - `glue_job_worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X. (`default = null`) 470 | - `glue_job_number_of_workers` - (Optional) The number of workers of a defined workerType that are allocated when a job runs. (`default = null`) 471 | - `enable_glue_trigger` - Enable glue trigger usage (`default = False`) 472 | - `glue_trigger_name` - The name of the trigger. (`default = ""`) 473 | - `glue_trigger_type` - (Required) The type of trigger. Valid values are CONDITIONAL, ON_DEMAND, and SCHEDULED. (`default = ON_DEMAND`) 474 | - `glue_trigger_description` - (Optional) A description of the new trigger. (`default = null`) 475 | - `glue_trigger_enabled` - (Optional) Start the trigger. Defaults to true. Not valid to disable for ON_DEMAND type. (`default = null`) 476 | - `glue_trigger_schedule` - (Optional) A cron expression used to specify the schedule. Time-Based Schedules for Jobs and Crawlers (`default = null`) 477 | - `glue_trigger_workflow_name` - (Optional) A workflow to which the trigger should be associated to. Every workflow graph (DAG) needs a starting trigger (ON_DEMAND or SCHEDULED type) and can contain multiple additional CONDITIONAL triggers. (`default = null`) 478 | - `glue_trigger_actions` - (Required) List of actions initiated by this trigger when it fires. (`default = []`) 479 | - `glue_trigger_timeouts` - Set timeouts for glue trigger (`default = {}`) 480 | - `glue_trigger_predicate` - (Optional) A predicate to specify when the new trigger should fire. Required when trigger type is CONDITIONAL (`default = {}`) 481 | - `glue_trigger_start_on_creation` - (Optional) Set to true to start SCHEDULED and CONDITIONAL triggers when created. True is not supported for ON_DEMAND triggers. (`default = null`) 482 | - `glue_trigger_event_batching_condition` - (Optional) Batch condition that must be met (specified number of events received or batch time window expired) before EventBridge event trigger fires. (`default = []`) 483 | - `enable_glue_data_catalog_encryption_settings` - Enable glue data catalog encryption settings usage (`default = False`) 484 | - `glue_data_catalog_encryption_settings_data_catalog_encryption_settings` - Set data_catalog_encryption_settings block for Glue data catalog encryption (`default = {}`) 485 | - `glue_data_catalog_encryption_settings_catalog_id` - (Optional) The ID of the Data Catalog to set the security configuration for. If none is provided, the AWS account ID is used by default. (`default = null`) 486 | - `enable_glue_dev_endpoint` - Enable glue dev endpoint usage (`default = False`) 487 | - `glue_dev_endpoint_name` - The name of this endpoint. It must be unique in your account. (`default = ""`) 488 | - `glue_dev_endpoint_role_arn` - (Required) The IAM role for this endpoint. (`default = null`) 489 | - `glue_dev_endpoint_arguments` - (Optional) A map of arguments used to configure the endpoint. (`default = null`) 490 | - `glue_dev_endpoint_extra_jars_s3_path` - (Optional) Path to one or more Java Jars in an S3 bucket that should be loaded in this endpoint. (`default = null`) 491 | - `glue_dev_endpoint_extra_python_libs_s3_path` - (Optional) Path(s) to one or more Python libraries in an S3 bucket that should be loaded in this endpoint. Multiple values must be complete paths separated by a comma. (`default = null`) 492 | - `glue_dev_endpoint_glue_version` - (Optional) - Specifies the versions of Python and Apache Spark to use. Defaults to AWS Glue version 0.9. (`default = null`) 493 | - `glue_dev_endpoint_number_of_nodes` - (Optional) The number of AWS Glue Data Processing Units (DPUs) to allocate to this endpoint. Conflicts with worker_type (`default = null`) 494 | - `glue_dev_endpoint_number_of_workers` - (Optional) The number of workers of a defined worker type that are allocated to this endpoint. This field is available only when you choose worker type G.1X or G.2X. (`default = null`) 495 | - `glue_dev_endpoint_public_key` - (Optional) The public key to be used by this endpoint for authentication. (`default = null`) 496 | - `glue_dev_endpoint_public_keys` - (Optional) A list of public keys to be used by this endpoint for authentication. (`default = null`) 497 | - `glue_dev_endpoint_security_configuration` - (Optional) The name of the Security Configuration structure to be used with this endpoint. (`default = null`) 498 | - `glue_dev_endpoint_security_group_ids` - (Optional) Security group IDs for the security groups to be used by this endpoint. (`default = null`) 499 | - `glue_dev_endpoint_subnet_id` - (Optional) The subnet ID for the new endpoint to use. (`default = null`) 500 | - `glue_dev_endpoint_worker_type` - (Optional) The type of predefined worker that is allocated to this endpoint. Accepts a value of Standard, G.1X, or G.2X. (`default = null`) 501 | - `enable_glue_ml_transform` - Enable glue ml transform usage (`default = False`) 502 | - `glue_ml_transform_name` - The name you assign to this ML Transform. It must be unique in your account. (`default = ""`) 503 | - `glue_ml_transform_role_arn` - (Required) The ARN of the IAM role associated with this ML Transform. (`default = null`) 504 | - `glue_ml_transform_input_record_tables` - (Required) A list of AWS Glue table definitions used by the transform. see Input Record Tables. (`default = []`) 505 | - `glue_ml_transform_parameters` - (Required) The algorithmic parameters that are specific to the transform type used. Conditionally dependent on the transform type. see Parameters. (`default = []`) 506 | - `glue_ml_transform_description` - (Optional) Description of the ML Transform. (`default = null`) 507 | - `glue_ml_transform_glue_version` - (Optional) The version of glue to use, for example '1.0'. For information about available versions, see the AWS Glue Release Notes. (`default = null`) 508 | - `glue_ml_transform_max_capacity` - (Optional) The number of AWS Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. max_capacity is a mutually exclusive option with number_of_workers and worker_type. (`default = null`) 509 | - `glue_ml_transform_max_retries` - (Optional) The maximum number of times to retry this ML Transform if it fails. (`default = null`) 510 | - `glue_ml_transform_timeout` - (Optional) The ML Transform timeout in minutes. The default is 2880 minutes (48 hours). (`default = null`) 511 | - `glue_ml_transform_worker_type` - (Optional) The type of predefined worker that is allocated when an ML Transform runs. Accepts a value of Standard, G.1X, or G.2X. Required with number_of_workers. (`default = null`) 512 | - `glue_ml_transform_number_of_workers` - (Optional) The number of workers of a defined worker_type that are allocated when an ML Transform runs. Required with worker_type (`default = null`) 513 | - `enable_glue_partition` - Enable glue partition usage (`default = False`) 514 | - `glue_partition_database_name` - Name of the metadata database where the table metadata resides. For Hive compatibility, this must be all lowercase. (`default = ""`) 515 | - `glue_partition_table_name` - Table name (`default = ""`) 516 | - `glue_partition_partition_values` - (Required) The values that define the partition. (`default = []`) 517 | - `glue_partition_catalog_id` - (Optional) ID of the Glue Catalog and database to create the table in. If omitted, this defaults to the AWS Account ID plus the database name. (`default = null`) 518 | - `glue_partition_parameters` - (Optional) Properties associated with this table, as a list of key-value pairs. (`default = null`) 519 | - `glue_partition_storage_descriptor` - (Optional) A storage descriptor object containing information about the physical storage of this table. You can refer to the Glue Developer Guide for a full explanation of this object. (`default = {'location': None, 'input_format': None, 'output_format': None, 'compressed': None, 'number_of_buckets': None, 'bucket_columns': None, 'parameters': None, 'stored_as_sub_directories': None}`) 520 | - `enable_glue_registry` - Enable glue registry usage (`default = False`) 521 | - `glue_registry_name` - The Name of the registry. (`default = ""`) 522 | - `glue_registry_description` - (Optional) A description of the registry. (`default = null`) 523 | - `enable_glue_resource_policy` - Enable glue resource policy usage (`default = False`) 524 | - `glue_resource_policy` - (Required) The policy to be applied to the aws glue data catalog. (`default = null`) 525 | - `glue_resource_policy_enable_hybrid` - (Optional) Indicates that you are using both methods to grant cross-account. Valid values are TRUE and FALSE. Note the terraform will not perform drift detetction on this field as its not return on read. (`default = null`) 526 | - `enable_glue_schema` - Enable glue schema usage (`default = False`) 527 | - `glue_schema_name` - The Name of the schema. (`default = ""`) 528 | - `glue_schema_registry_arn` - The ARN of the Glue Registry to create the schema in. (`default = ""`) 529 | - `glue_schema_data_format` - (Required) The data format of the schema definition. Currently only AVRO is supported. (`default = null`) 530 | - `glue_schema_compatibility` - (Required) The compatibility mode of the schema. Values values are: NONE, DISABLED, BACKWARD, BACKWARD_ALL, FORWARD, FORWARD_ALL, FULL, and FULL_ALL. (`default = null`) 531 | - `glue_schema_schema_definition` - (Required) The schema definition using the data_format setting for schema_name. (`default = null`) 532 | - `glue_schema_description` - (Optional) A description of the schema. (`default = null`) 533 | - `enable_glue_user_defined_function` - Enable glue user defined function usage (`default = False`) 534 | - `glue_user_defined_function_name` - The name of the function. (`default = ""`) 535 | - `glue_user_defined_function_database_name` - The name of the Database to create the Function. (`default = ""`) 536 | - `glue_user_defined_function_class_name` - (Required) The Java class that contains the function code. (`default = null`) 537 | - `glue_user_defined_function_owner_name` - (Required) The owner of the function. (`default = null`) 538 | - `glue_user_defined_function_owner_type` - (Required) The owner type. can be one of USER, ROLE, and GROUP. (`default = null`) 539 | - `glue_user_defined_function_catalog_id` - (Optional) ID of the Glue Catalog to create the function in. If omitted, this defaults to the AWS Account ID. (`default = null`) 540 | - `glue_user_defined_function_resource_uris` - (Optional) The configuration block for Resource URIs. See resource uris below for more details. (`default = []`) 541 | - `enable_glue_partition_index` - Enable glue partition index usage (`default = False`) 542 | - `glue_partition_index_table_name` - Name of the table. For Hive compatibility, this must be entirely lowercase. (`default = ""`) 543 | - `glue_partition_index_database_name` - (Required) Name of the metadata database where the table metadata resides. For Hive compatibility, this must be all lowercase. (`default = ""`) 544 | - `glue_partition_index_catalog_id` - (Optional) The catalog ID where the table resides. (`default = ""`) 545 | - `glue_partition_index_partition_index` - (Required) Configuration block for a partition index. (`default = []`) 546 | - `glue_partition_index_timeouts` - Set timeouts glue partition index (`default = {}`) 547 | - `enable_glue_data_quality_ruleset` - Enable glue data quality ruleset usage (`default = False`) 548 | - `glue_data_quality_ruleset_name` - Name of the data quality ruleset. (`default = ""`) 549 | - `glue_data_quality_ruleset_description` - (Optional) Description of the data quality ruleset. (`default = null`) 550 | - `glue_data_quality_ruleset_ruleset` - (Optional) A Data Quality Definition Language (DQDL) ruleset. For more information, see the AWS Glue developer guide. (`default = null`) 551 | - `glue_trigger_target_table` - (Optional, Forces new resource) A Configuration block specifying a target table associated with the data quality ruleset. (`default = {}`) 552 | 553 | ## Module Output Variables 554 | ---------------------- 555 | - `glue_catalog_database_arn` - ARN for glue catalog database 556 | - `glue_catalog_database_id` - ID for glue catalog database 557 | - `glue_catalog_database_name` - Name for glue catalog database 558 | - `glue_catalog_table_arn` - ARN for glue catalog table 559 | - `glue_catalog_table_id` - ID for glue catalog table 560 | - `glue_catalog_table_name` - Name for glue catalog table 561 | - `glue_classifier_id` - Name of the classifier 562 | - `glue_connection_id` - Catalog ID and name of the connection 563 | - `glue_crawler_id` - Crawler name 564 | - `glue_crawler_arn` - The ARN of the crawler 565 | - `glue_security_configuration_id` - Glue security configuration name 566 | - `glue_workflow_id` - Glue workflow name 567 | - `glue_job_id` - Glue job name 568 | - `glue_job_arn` - Amazon Resource Name (ARN) of Glue Job 569 | - `glue_trigger_id` - Trigger name 570 | - `glue_trigger_arn` - Amazon Resource Name (ARN) of Glue Trigger 571 | - `glue_data_catalog_encryption_settings_id` - The ID of the Data Catalog to set the security configuration for. 572 | - `glue_dev_endpoint_id` - The ID of the endpoint. 573 | - `glue_dev_endpoint_arn` - The ARN of the endpoint. 574 | - `glue_dev_endpoint_name` - The name of the new endpoint. 575 | - `glue_dev_endpoint_private_address` - A private IP address to access the endpoint within a VPC, if this endpoint is created within one. 576 | - `glue_dev_endpoint_public_address` - The public IP address used by this endpoint. The PublicAddress field is present only when you create a non-VPC endpoint. 577 | - `glue_dev_endpoint_yarn_endpoint_address` - The YARN endpoint address used by this endpoint. 578 | - `glue_dev_endpoint_zeppelin_remote_spark_interpreter_port` - The Apache Zeppelin port for the remote Apache Spark interpreter. 579 | - `glue_dev_endpoint_availability_zone` - The AWS availability zone where this endpoint is located. 580 | - `glue_dev_endpoint_vpc_id` - The ID of the VPC used by this endpoint. 581 | - `glue_dev_endpoint_status` - The current status of this endpoint. 582 | - `glue_dev_endpoint_failure_reason` - The reason for a current failure in this endpoint. 583 | - `glue_ml_transform_id` - Glue ML Transform ID. 584 | - `glue_ml_transform_arn` - Amazon Resource Name (ARN) of Glue ML Transform. 585 | - `glue_ml_transform_label_count` - The number of labels available for this transform. 586 | - `glue_ml_transform_schema` - The object that represents the schema that this transform accepts. see Schema. 587 | - `glue_partition_id` - partition id. 588 | - `glue_partition_creation_time` - The time at which the partition was created. 589 | - `glue_partition_last_analyzed_time` - The last time at which column statistics were computed for this partition. 590 | - `glue_partition_last_accessed_time` - The last time at which the partition was accessed. 591 | - `glue_registry_id` - Amazon Resource Name (ARN) of Glue Registry. 592 | - `glue_registry_arn` - Amazon Resource Name (ARN) of Glue Registry. 593 | - `glue_resource_policy_id` - The ID of Glue resource policy. 594 | - `glue_schema_id` - Amazon Resource Name (ARN) of the schema. 595 | - `glue_schema_arn` - Amazon Resource Name (ARN) of the schema. 596 | - `glue_schema_registry_name` - The name of the Glue Registry. 597 | - `glue_schema_latest_schema_version` - The latest version of the schema associated with the returned schema definition. 598 | - `glue_schema_next_schema_version` - The next version of the schema associated with the returned schema definition. 599 | - `glue_schema_schema_checkpoint` - The version number of the checkpoint (the last time the compatibility mode was changed). 600 | - `glue_user_defined_function_id` - The id of the Glue User Defined Function. 601 | - `glue_user_defined_function_arn` - The ARN of the Glue User Defined Function. 602 | - `glue_user_defined_function_create_time` - The time at which the function was created. 603 | - `glue_partition_index_id` - Catalog ID, Database name, table name, and index name. 604 | - `glue_data_quality_ruleset_id` - ID of the Glue Data Quality Ruleset. 605 | - `glue_data_quality_ruleset_arn` - ARN of the Glue Data Quality Ruleset. 606 | - `glue_data_quality_ruleset_created_on` - The time and date that this data quality ruleset was created. 607 | - `glue_data_quality_ruleset_last_modified_on` - The time and date that this data quality ruleset was created. 608 | - `glue_data_quality_ruleset_recommendation_run_id` - When a ruleset was created from a recommendation run, this run ID is generated to link the two together. 609 | 610 | 611 | ## Authors 612 | 613 | Created and maintained by [Vitaliy Natarov](https://github.com/SebastianUA). An email: [vitaliy.natarov@yahoo.com](vitaliy.natarov@yahoo.com). 614 | 615 | ## License 616 | 617 | Apache 2 Licensed. See [LICENSE](https://github.com/SebastianUA/terraform/blob/master/LICENSE) for full details. 618 | --------------------------------------------------------------------------------