├── .pre-commit-config.yaml ├── versions.tf ├── LICENSE ├── variables.tf ├── .gitignore ├── README.md ├── functions ├── kinesis-firehose-cloudwatch-logs-json-processor-python.py └── kinesis-firehose-apache-logs-processor-python.py ├── iam.tf └── main.tf /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: git://github.com/antonbabenko/pre-commit-terraform 3 | rev: v1.7.2 4 | hooks: 5 | - id: terraform_fmt 6 | - repo: git://github.com/pre-commit/pre-commit-hooks 7 | rev: v1.2.3 8 | hooks: 9 | - id: check-merge-conflict -------------------------------------------------------------------------------- /versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | version = "~> 3.37.0" 6 | } 7 | null = { 8 | source = "hashicorp/null" 9 | version = "~> 3.1.0" 10 | } 11 | archive = { 12 | source = "hashicorp/archive" 13 | version = "~> 2.1.0" 14 | } 15 | } 16 | required_version = ">= 0.13" 17 | } 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Felipe Frizzo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | variable "kinesis_firehose_stream_name" { 2 | description = "Name to be use on kinesis firehose stream" 3 | type = string 4 | } 5 | 6 | variable "kinesis_firehose_stream_backup_prefix" { 7 | description = "The prefix name to use for the kinesis backup" 8 | type = string 9 | default = "backup/" 10 | } 11 | 12 | variable "root_path" { 13 | description = "The path where the lambda function file is located is root or module path" 14 | type = bool 15 | default = false 16 | } 17 | 18 | variable "bucket_name" { 19 | description = "The bucket name" 20 | type = string 21 | } 22 | 23 | variable "lambda_function_name" { 24 | description = "The lambda function name" 25 | type = string 26 | } 27 | 28 | variable "lambda_function_file_name" { 29 | description = "The lambda function file name" 30 | type = string 31 | } 32 | 33 | variable "glue_catalog_database_name" { 34 | description = "The Glue catalog database name" 35 | type = string 36 | } 37 | 38 | variable "glue_catalog_table_name" { 39 | description = "The Glue catalog database table name" 40 | type = string 41 | } 42 | 43 | variable "glue_catalog_table_columns" { 44 | description = "A list of table columns" 45 | type = map(object({ 46 | name = string 47 | type = string 48 | })) 49 | } 50 | 51 | variable "cloudwatch_subscription_filter_name" { 52 | description = "The subscription filter name" 53 | type = string 54 | } 55 | 56 | variable "cloudwatch_log_group_name" { 57 | description = "The cloudwatch log group name" 58 | type = string 59 | } 60 | 61 | variable "cloudwatch_filter_pattern" { 62 | description = "The cloudwatch filter pattern" 63 | type = string 64 | } 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | 8 | # .tfvars files 9 | *.tfvars 10 | 11 | ### VirtualEnv template 12 | # Virtualenv 13 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 14 | .Python 15 | [Bb]in 16 | [Ii]nclude 17 | [Ll]ib 18 | [Ll]ib64 19 | [Ll]ocal 20 | [Ss]cripts 21 | pyvenv.cfg 22 | .venv 23 | pip-selfcheck.json 24 | ### Python template 25 | # Byte-compiled / optimized / DLL files 26 | __pycache__/ 27 | *.py[cod] 28 | *$py.class 29 | 30 | # C extensions 31 | *.so 32 | 33 | # Distribution / packaging 34 | build/ 35 | develop-eggs/ 36 | dist/ 37 | downloads/ 38 | eggs/ 39 | .eggs/ 40 | lib/ 41 | lib64/ 42 | parts/ 43 | sdist/ 44 | var/ 45 | wheels/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | MANIFEST 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *.cover 70 | .hypothesis/ 71 | .pytest_cache/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | 82 | # Flask stuff: 83 | instance/ 84 | .webassets-cache 85 | 86 | # Scrapy stuff: 87 | .scrapy 88 | 89 | # Sphinx documentation 90 | docs/_build/ 91 | 92 | # PyBuilder 93 | target/ 94 | 95 | # Jupyter Notebook 96 | .ipynb_checkpoints 97 | 98 | # pyenv 99 | .python-version 100 | 101 | # celery beat schedule file 102 | celerybeat-schedule 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # terraform-aws-kinesis-firehose 2 | 3 | This code creates a [Kinesis Firehose]('https://aws.amazon.com/kinesis/data-firehose/') in AWS to send CloudWatch log data to S3. 4 | 5 | ## Usage 6 | 7 | ```terraform 8 | module "kinesis-firehose" { 9 | source = "git::https://github.com/felipefrizzo/terraform-aws-kinesis-firehose.git?ref=master" 10 | region = "aws_region" 11 | kinesis_firehose_stream_name = "stream_name" 12 | kinesis_firehose_stream_backup_prefix = "bucket_backup_prefix" 13 | bucket_name = "bucket_name" 14 | root_path = false 15 | lambda_function_name = "lambda_function_name" 16 | lambda_function_file_name = "kinesis-firehose-cloudwatch-logs-json-processor-python" 17 | glue_catalog_database_name = "glue_catalog_database_name" 18 | glue_catalog_table_name = "glue_catalog_table_name" 19 | glue_catalog_table_columns = { 20 | "column_name" = { 21 | name = "column_name" 22 | type = "column_type" 23 | } 24 | } 25 | cloudwatch_subscription_filter_name = "cloudwatch_subscription_filter_name" 26 | cloudwatch_log_group_name = "cloudwatch_log_group_name" 27 | cloudwatch_filter_pattern = "cloudwatch_filter_pattern" 28 | } 29 | ``` 30 | 31 | ## Inputs 32 | 33 | | Name | Description | Type | Default | Required | 34 | |------|-------------|:----:|:-------:|:--------:| 35 | | kinesis_firehose_stream_name | Name to be use on kinesis firehose stream (e.g. `poc_logs`) | string | - | yes | 36 | | kinesis_firehose_stream_backup_prefix | The prefix name to use for the kinesis backup (e.g. `backup_prefix`) | string | `` | no | 37 | | bucket_name | Bucket name | string | - | yes | 38 | | root_path | The path where the lambda function file is located is root or module path (e.g. `true`) | boolean | `` | no | 39 | | lambda_function_name | Lambda function name (e.g. `lambda_kinesis`) | string | - | yes | 40 | | lambda_function_file_name | Lambda function file name | string | - | yes | 41 | | glue_catalog_database_name | Glue catalog database name | string | - | yes | 42 | | glue_catalog_table_name | Glue catalog database table name | string | - | yes | 43 | | glue_catalog_table_columns | A map of object of table columns | map | `` | yes | 44 | | cloudwatch_subscription_filter_name | Subscription filter name | string | - | yes | 45 | | cloudwatch_log_group_name | Cloudwatch log group name | string | - | yes | 46 | | cloudwatch_filter_pattern | Cloudwatch filter pattern | string | - | yes | 47 | -------------------------------------------------------------------------------- /functions/kinesis-firehose-cloudwatch-logs-json-processor-python.py: -------------------------------------------------------------------------------- 1 | """ 2 | For processing data sent to Kinesis Firehose by CloudWatch logs subscription filter. 3 | 4 | CloudWatch Logs sends to Firehose records that look like that: 5 | { 6 | "messageType":"DATA_MESSAGE", 7 | "owner":"123456789012", 8 | "logGroup":"log_group_name", 9 | "logStream":"log_stream_name", 10 | "subscriptionFilters":[ 11 | "subscription_filter_name" 12 | ], 13 | "logEvents":[ 14 | { 15 | "id":"34347401063152187823588091447941432395582337638937001984", 16 | "timestamp":1540190731627, 17 | "message": "{"method":"GET", "path":"/example/12345", "format":"html", "action":"show", "status":200, "params":{ "user_id":"11111" }, "ip":"192.168.0.0", "@timestamp":"2018-10-22T06:45:31.428Z", "@version":"1", "message":"[200] GET /example/12345 (ExampleController#show)"}" 18 | }, 19 | ... 20 | ] 21 | } 22 | """ 23 | from __future__ import print_function 24 | 25 | import base64 as b64 26 | import gzip 27 | import json 28 | import logging 29 | 30 | STATUS_OK: str = 'Ok' 31 | DROPPED: str = 'Dropped' 32 | FAILED: str = 'ProcessingFailed' 33 | 34 | logger = logging.getLogger() 35 | logger.setLevel(logging.INFO) 36 | 37 | 38 | class DataTransformation: 39 | def __init__(self, records: list) -> None: 40 | logger.info('Start Kinesis Firehose data transformation.') 41 | self.records: list = records 42 | self.output: list = [] 43 | 44 | def process(self) -> list: 45 | for record in self.records: 46 | record_id: int = record.get('recordId', None) 47 | payload: dict = self.__decompress(record.get('data', None)) 48 | logger.info(f'Payload to be transform: {payload}') 49 | 50 | message_type: str = payload.get('messageType', None) 51 | 52 | if message_type == 'CONTROL_MESSAGE': 53 | output_record = {'recordId': record_id, 'result': DROPPED} 54 | elif message_type == 'DATA_MESSAGE': 55 | data = self.__transformation(payload) 56 | logger.info(f'Payload after transformation: {data}') 57 | output_record = { 58 | 'recordId': record_id, 59 | 'result': STATUS_OK, 60 | 'data': self.__compress(data) 61 | } 62 | else: 63 | output_record = {'recordId': record_id, 'result': FAILED} 64 | self.output.append(output_record) 65 | 66 | logger.info(f'Data after finish transformation: {self.output}') 67 | return self.output 68 | 69 | def __compress(self, data) -> str: 70 | return b64.b64encode(data.encode('UTF-8')).decode('UTF-8') 71 | 72 | def __decompress(self, data) -> dict: 73 | return json.loads(gzip.decompress(b64.b64decode(data))) 74 | 75 | def __transformation(self, payload: dict) -> str: 76 | record = '\r\n'.join( 77 | e.pop('message') for e in payload.pop('logEvents', None) 78 | ) 79 | return record 80 | 81 | 82 | def lambda_handler(event, context) -> dict: 83 | output = DataTransformation(event.get('records', None)).process() 84 | return dict(records=output) 85 | -------------------------------------------------------------------------------- /functions/kinesis-firehose-apache-logs-processor-python.py: -------------------------------------------------------------------------------- 1 | """ 2 | For processing data sent to Kinesis Firehose by CloudWatch logs subscription filter. 3 | 4 | CloudWatch Logs sends to Firehose records that look like that: 5 | { 6 | "messageType":"DATA_MESSAGE", 7 | "owner":"123456789012", 8 | "logGroup":"log_group_name", 9 | "logStream":"log_stream_name", 10 | "subscriptionFilters":[ 11 | "subscription_filter_name" 12 | ], 13 | "logEvents":[ 14 | { 15 | "id":"34347401063152187823588091447941432395582337638937001984", 16 | "timestamp":1540190731627, 17 | "message": "127.0.0.1 - - [30/Jul/2006:24:59:59 +0000] "GET / HTTP/1.1" 200 195 "-" "ELB-HealthChecker/2.0"" 18 | }, 19 | ... 20 | ] 21 | } 22 | """ 23 | from __future__ import print_function 24 | 25 | import base64 as b64 26 | import gzip 27 | import json 28 | import logging 29 | import re 30 | 31 | STATUS_OK: str = 'Ok' 32 | DROPPED: str = 'Dropped' 33 | FAILED: str = 'ProcessingFailed' 34 | 35 | logger = logging.getLogger() 36 | logger.setLevel(logging.INFO) 37 | 38 | 39 | class DataTransformation: 40 | def __init__(self, records: list) -> None: 41 | logger.info('Start Kinesis Firehose data transformation.') 42 | self.records: list = records 43 | self.pattern: str = r"(?P[\d.]+) (\S+) (\S+) \[(?P[\w:/]+\s[\+\-]\d{4})\] \"(?P[A-Z.]+) (?P\S+) (\S+)\" (?P[\d.]+) (\S+) \"(?P\w.|\S+)\" \"(?P\w.+)\"" 44 | self.output: list = [] 45 | self.fields: list = [ 46 | 'ip', 47 | 'date', 48 | 'method', 49 | 'path', 50 | 'status', 51 | 'from', 52 | 'user_agent' 53 | ] 54 | 55 | def process(self) -> list: 56 | for record in self.records: 57 | record_id: int = record.get('recordId', None) 58 | payload: dict = self.__decompress(record.get('data', None)) 59 | logger.info(f'Payload to be transform: {payload}') 60 | 61 | message_type: str = payload.get('messageType', None) 62 | 63 | if message_type == 'CONTROL_MESSAGE': 64 | output_record = {'recordId': record_id, 'result': DROPPED} 65 | self.output.append(output_record) 66 | elif message_type == 'DATA_MESSAGE': 67 | for data, result in self.__transformation(payload): 68 | logger.info(f'Payload after transformation: {data}') 69 | output_record = { 70 | 'recordId': record_id, 71 | 'result': result, 72 | 'data': self.__compress(data) 73 | } 74 | self.output.append(output_record) 75 | else: 76 | output_record = {'recordId': record_id, 'result': FAILED} 77 | self.output.append(output_record) 78 | 79 | logger.info(f'Data after finish transformation: {self.output}') 80 | return self.output 81 | 82 | def __compress(self, data) -> str: 83 | return b64.b64encode(json.dumps(data).encode('UTF-8')).decode('UTF-8') 84 | 85 | def __decompress(self, data) -> dict: 86 | return json.loads(gzip.decompress(b64.b64decode(data))) 87 | 88 | def __transformation(self, payload: dict) -> [dict, str]: 89 | data = None 90 | 91 | for event in payload.pop('logEvents', None): 92 | message = event.pop('message', None) 93 | matches = re.search(self.pattern, message) 94 | 95 | if matches and 'HealthChecker' not in matches.group('user_agent'): 96 | data = {field: matches.group(field) for field in self.fields} 97 | result = STATUS_OK 98 | elif 'HealthChecker' in matches.group('user_agent'): 99 | logger.info('Dropped HealthChecker log message') 100 | result = DROPPED 101 | else: 102 | logger.info( 103 | "[ERROR] The log message doesn't match with " 104 | "the regex pattern" 105 | ) 106 | result = FAILED 107 | 108 | yield [data, result] 109 | 110 | 111 | def lambda_handler(event, context) -> dict: 112 | output = DataTransformation(event.get('records', None)).process() 113 | return dict(records=output) 114 | -------------------------------------------------------------------------------- /iam.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "kinesis_firehose_stream_assume_role" { 2 | statement { 3 | effect = "Allow" 4 | actions = ["sts:AssumeRole"] 5 | 6 | principals { 7 | type = "Service" 8 | identifiers = ["firehose.amazonaws.com"] 9 | } 10 | } 11 | } 12 | 13 | data "aws_iam_policy_document" "kinesis_firehose_access_bucket_assume_policy" { 14 | statement { 15 | effect = "Allow" 16 | 17 | actions = [ 18 | "s3:AbortMultipartUpload", 19 | "s3:GetBucketLocation", 20 | "s3:GetObject", 21 | "s3:ListBucket", 22 | "s3:ListBucketMultipartUploads", 23 | "s3:PutObject", 24 | ] 25 | 26 | resources = [ 27 | aws_s3_bucket.kinesis_firehose_stream_bucket.arn, 28 | "${aws_s3_bucket.kinesis_firehose_stream_bucket.arn}/*", 29 | ] 30 | } 31 | } 32 | 33 | data "aws_iam_policy_document" "kinesis_firehose_access_glue_assume_policy" { 34 | statement { 35 | effect = "Allow" 36 | actions = ["glue:GetTableVersions"] 37 | resources = ["*"] 38 | } 39 | } 40 | 41 | resource "aws_iam_role" "kinesis_firehose_stream_role" { 42 | name = "kinesis_firehose_stream_role" 43 | assume_role_policy = data.aws_iam_policy_document.kinesis_firehose_stream_assume_role.json 44 | } 45 | 46 | resource "aws_iam_role_policy" "kinesis_firehose_access_bucket_policy" { 47 | name = "kinesis_firehose_access_bucket_policy" 48 | role = aws_iam_role.kinesis_firehose_stream_role.name 49 | policy = data.aws_iam_policy_document.kinesis_firehose_access_bucket_assume_policy.json 50 | } 51 | 52 | resource "aws_iam_role_policy" "kinesis_firehose_access_glue_policy" { 53 | name = "kinesis_firehose_access_glue_policy" 54 | role = aws_iam_role.kinesis_firehose_stream_role.name 55 | policy = data.aws_iam_policy_document.kinesis_firehose_access_glue_assume_policy.json 56 | } 57 | 58 | data "aws_iam_policy_document" "lambda_assume_role" { 59 | statement { 60 | effect = "Allow" 61 | actions = ["sts:AssumeRole"] 62 | 63 | principals { 64 | type = "Service" 65 | identifiers = ["lambda.amazonaws.com"] 66 | } 67 | } 68 | } 69 | 70 | data "aws_iam_policy_document" "lambda_assume_policy" { 71 | statement { 72 | effect = "Allow" 73 | 74 | actions = [ 75 | "lambda:InvokeFunction", 76 | "lambda:GetFunctionConfiguration", 77 | ] 78 | 79 | resources = [ 80 | aws_lambda_function.lambda_kinesis_firehose_data_transformation.arn, 81 | "${aws_lambda_function.lambda_kinesis_firehose_data_transformation.arn}:*", 82 | ] 83 | } 84 | } 85 | 86 | data "aws_iam_policy_document" "lambda_to_cloudwatch_assume_policy" { 87 | statement { 88 | effect = "Allow" 89 | 90 | actions = [ 91 | "logs:CreateLogGroup", 92 | "logs:CreateLogStream", 93 | "logs:PutLogEvents", 94 | ] 95 | 96 | resources = ["*"] 97 | } 98 | } 99 | 100 | resource "aws_iam_role" "lambda" { 101 | name = "lambda_function_role" 102 | assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json 103 | } 104 | 105 | resource "aws_iam_role_policy" "lambda_policy" { 106 | name = "lambda_function_policy" 107 | role = aws_iam_role.kinesis_firehose_stream_role.name 108 | policy = data.aws_iam_policy_document.lambda_assume_policy.json 109 | } 110 | 111 | resource "aws_iam_role_policy" "lambda_to_cloudwatch_policy" { 112 | name = "lambda_to_cloudwatch_policy" 113 | role = aws_iam_role.lambda.name 114 | policy = data.aws_iam_policy_document.lambda_to_cloudwatch_assume_policy.json 115 | } 116 | 117 | data "aws_iam_policy_document" "cloudwatch_logs_assume_role" { 118 | statement { 119 | effect = "Allow" 120 | actions = ["sts:AssumeRole"] 121 | 122 | principals { 123 | type = "Service" 124 | identifiers = ["logs.${data.aws_region.default.name}.amazonaws.com"] 125 | } 126 | } 127 | } 128 | 129 | data "aws_iam_policy_document" "cloudwatch_logs_assume_policy" { 130 | statement { 131 | effect = "Allow" 132 | actions = ["firehose:*"] 133 | resources = [aws_kinesis_firehose_delivery_stream.kinesis_firehose_stream.arn] 134 | } 135 | } 136 | 137 | resource "aws_iam_role" "cloudwatch_logs_role" { 138 | name = "cloudwatch_logs_role" 139 | assume_role_policy = data.aws_iam_policy_document.cloudwatch_logs_assume_role.json 140 | } 141 | 142 | resource "aws_iam_role_policy" "cloudwatch_logs_policy" { 143 | name = "cloudwatch_logs_policy" 144 | role = aws_iam_role.cloudwatch_logs_role.name 145 | policy = data.aws_iam_policy_document.cloudwatch_logs_assume_policy.json 146 | } -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | data "aws_region" "default" {} 2 | 3 | resource "aws_kinesis_firehose_delivery_stream" "kinesis_firehose_stream" { 4 | name = var.kinesis_firehose_stream_name 5 | destination = "extended_s3" 6 | 7 | extended_s3_configuration { 8 | role_arn = aws_iam_role.kinesis_firehose_stream_role.arn 9 | bucket_arn = aws_s3_bucket.kinesis_firehose_stream_bucket.arn 10 | buffer_size = 128 11 | s3_backup_mode = "Enabled" 12 | prefix = "logs/" 13 | 14 | s3_backup_configuration { 15 | role_arn = aws_iam_role.kinesis_firehose_stream_role.arn 16 | bucket_arn = aws_s3_bucket.kinesis_firehose_stream_bucket.arn 17 | prefix = var.kinesis_firehose_stream_backup_prefix 18 | 19 | cloudwatch_logging_options { 20 | enabled = true 21 | log_group_name = aws_cloudwatch_log_group.kinesis_firehose_stream_logging_group.name 22 | log_stream_name = aws_cloudwatch_log_stream.kinesis_firehose_stream_logging_stream.name 23 | } 24 | } 25 | 26 | processing_configuration { 27 | enabled = true 28 | 29 | processors { 30 | type = "Lambda" 31 | 32 | parameters { 33 | parameter_name = "LambdaArn" 34 | parameter_value = "${aws_lambda_function.lambda_kinesis_firehose_data_transformation.arn}:$LATEST" 35 | } 36 | } 37 | } 38 | 39 | cloudwatch_logging_options { 40 | enabled = true 41 | log_group_name = aws_cloudwatch_log_group.kinesis_firehose_stream_logging_group.name 42 | log_stream_name = aws_cloudwatch_log_stream.kinesis_firehose_stream_logging_stream.name 43 | } 44 | 45 | data_format_conversion_configuration { 46 | input_format_configuration { 47 | deserializer { 48 | hive_json_ser_de {} 49 | } 50 | } 51 | 52 | output_format_configuration { 53 | serializer { 54 | parquet_ser_de {} 55 | } 56 | } 57 | 58 | schema_configuration { 59 | database_name = aws_glue_catalog_database.glue_catalog_database.name 60 | table_name = aws_glue_catalog_table.glue_catalog_table.name 61 | role_arn = aws_iam_role.kinesis_firehose_stream_role.arn 62 | } 63 | } 64 | } 65 | } 66 | 67 | resource "aws_cloudwatch_log_group" "kinesis_firehose_stream_logging_group" { 68 | name = "/aws/kinesisfirehose/${var.kinesis_firehose_stream_name}" 69 | } 70 | 71 | resource "aws_cloudwatch_log_stream" "kinesis_firehose_stream_logging_stream" { 72 | log_group_name = aws_cloudwatch_log_group.kinesis_firehose_stream_logging_group.name 73 | name = "S3Delivery" 74 | } 75 | 76 | resource "aws_s3_bucket" "kinesis_firehose_stream_bucket" { 77 | bucket = var.bucket_name 78 | acl = "private" 79 | } 80 | 81 | locals { 82 | path_prefix = "${var.root_path ? path.root : path.module}/functions" 83 | } 84 | 85 | data "archive_file" "kinesis_firehose_data_transformation" { 86 | type = "zip" 87 | source_file = format("%s/%s.py", local.path_prefix, var.lambda_function_file_name) 88 | output_path = format("%s/%s.zip", local.path_prefix, var.lambda_function_file_name) 89 | } 90 | 91 | resource "aws_cloudwatch_log_group" "lambda_function_logging_group" { 92 | name = "/aws/lambda/${var.lambda_function_name}" 93 | } 94 | 95 | resource "aws_lambda_function" "lambda_kinesis_firehose_data_transformation" { 96 | filename = data.archive_file.kinesis_firehose_data_transformation.output_path 97 | function_name = var.lambda_function_name 98 | 99 | role = aws_iam_role.lambda.arn 100 | handler = "${var.lambda_function_file_name}.lambda_handler" 101 | source_code_hash = data.archive_file.kinesis_firehose_data_transformation.output_base64sha256 102 | runtime = "python3.6" 103 | timeout = 60 104 | } 105 | 106 | resource "aws_glue_catalog_database" "glue_catalog_database" { 107 | name = var.glue_catalog_database_name 108 | } 109 | 110 | resource "aws_glue_catalog_table" "glue_catalog_table" { 111 | name = var.glue_catalog_table_name 112 | database_name = aws_glue_catalog_database.glue_catalog_database.name 113 | 114 | parameters = { 115 | "classification" = "parquet" 116 | } 117 | 118 | storage_descriptor { 119 | input_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat" 120 | output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" 121 | location = "s3://${aws_s3_bucket.kinesis_firehose_stream_bucket.bucket}/" 122 | 123 | ser_de_info { 124 | name = "JsonSerDe" 125 | serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" 126 | 127 | parameters = { 128 | "serialization.format" = 1 129 | "explicit.null" = false 130 | "parquet.compression" = "SNAPPY" 131 | } 132 | } 133 | 134 | dynamic "columns" { 135 | for_each = var.glue_catalog_table_columns 136 | content { 137 | name = columns.value["name"] 138 | type = columns.value["type"] 139 | } 140 | } 141 | } 142 | } 143 | 144 | resource "aws_cloudwatch_log_subscription_filter" "cloudwatch_subscription_filter" { 145 | name = var.cloudwatch_subscription_filter_name 146 | log_group_name = var.cloudwatch_log_group_name 147 | filter_pattern = var.cloudwatch_filter_pattern 148 | 149 | destination_arn = aws_kinesis_firehose_delivery_stream.kinesis_firehose_stream.arn 150 | distribution = "ByLogStream" 151 | 152 | role_arn = aws_iam_role.cloudwatch_logs_role.arn 153 | } 154 | --------------------------------------------------------------------------------