├── .pre-commit-config.yaml
├── versions.tf
├── LICENSE
├── variables.tf
├── .gitignore
├── README.md
├── functions
    ├── kinesis-firehose-cloudwatch-logs-json-processor-python.py
    └── kinesis-firehose-apache-logs-processor-python.py
├── iam.tf
└── main.tf


/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: git://github.com/antonbabenko/pre-commit-terraform
3 |   rev: v1.7.2
4 |   hooks:
5 |     - id: terraform_fmt
6 | - repo: git://github.com/pre-commit/pre-commit-hooks
7 |   rev: v1.2.3
8 |   hooks:
9 |     - id: check-merge-conflict


--------------------------------------------------------------------------------
/versions.tf:
--------------------------------------------------------------------------------
 1 | terraform {
 2 |   required_providers {
 3 |     aws = {
 4 |       source  = "hashicorp/aws"
 5 |       version = "~> 3.37.0"
 6 |     }
 7 |     null = {
 8 |       source  = "hashicorp/null"
 9 |       version = "~> 3.1.0"
10 |     }
11 |     archive = {
12 |       source  = "hashicorp/archive"
13 |       version = "~> 2.1.0"
14 |     }
15 |   }
16 |   required_version = ">= 0.13"
17 | }
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Felipe Frizzo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "kinesis_firehose_stream_name" {
 2 |   description = "Name to be use on kinesis firehose stream"
 3 |   type        = string
 4 | }
 5 | 
 6 | variable "kinesis_firehose_stream_backup_prefix" {
 7 |   description = "The prefix name to use for the kinesis backup"
 8 |   type        = string
 9 |   default     = "backup/"
10 | }
11 | 
12 | variable "root_path" {
13 |   description = "The path where the lambda function file is located is root or module path"
14 |   type        = bool
15 |   default     = false
16 | }
17 | 
18 | variable "bucket_name" {
19 |   description = "The bucket name"
20 |   type        = string
21 | }
22 | 
23 | variable "lambda_function_name" {
24 |   description = "The lambda function name"
25 |   type        = string
26 | }
27 | 
28 | variable "lambda_function_file_name" {
29 |   description = "The lambda function file name"
30 |   type        = string
31 | }
32 | 
33 | variable "glue_catalog_database_name" {
34 |   description = "The Glue catalog database name"
35 |   type        = string
36 | }
37 | 
38 | variable "glue_catalog_table_name" {
39 |   description = "The Glue catalog database table name"
40 |   type        = string
41 | }
42 | 
43 | variable "glue_catalog_table_columns" {
44 |   description = "A list of table columns"
45 |   type        = map(object({
46 |     name = string
47 |     type = string
48 |   }))
49 | }
50 | 
51 | variable "cloudwatch_subscription_filter_name" {
52 |   description = "The subscription filter name"
53 |   type        = string
54 | }
55 | 
56 | variable "cloudwatch_log_group_name" {
57 |   description = "The cloudwatch log group name"
58 |   type        = string
59 | }
60 | 
61 | variable "cloudwatch_filter_pattern" {
62 |   description = "The cloudwatch filter pattern"
63 |   type        = string
64 | }
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #  Local .terraform directories
  2 | **/.terraform/*
  3 | 
  4 | # .tfstate files
  5 | *.tfstate
  6 | *.tfstate.*
  7 | 
  8 | # .tfvars files
  9 | *.tfvars
 10 | 
 11 | ### VirtualEnv template
 12 | # Virtualenv
 13 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 14 | .Python
 15 | [Bb]in
 16 | [Ii]nclude
 17 | [Ll]ib
 18 | [Ll]ib64
 19 | [Ll]ocal
 20 | [Ss]cripts
 21 | pyvenv.cfg
 22 | .venv
 23 | pip-selfcheck.json
 24 | ### Python template
 25 | # Byte-compiled / optimized / DLL files
 26 | __pycache__/
 27 | *.py[cod]
 28 | *$py.class
 29 | 
 30 | # C extensions
 31 | *.so
 32 | 
 33 | # Distribution / packaging
 34 | build/
 35 | develop-eggs/
 36 | dist/
 37 | downloads/
 38 | eggs/
 39 | .eggs/
 40 | lib/
 41 | lib64/
 42 | parts/
 43 | sdist/
 44 | var/
 45 | wheels/
 46 | *.egg-info/
 47 | .installed.cfg
 48 | *.egg
 49 | MANIFEST
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *.cover
 70 | .hypothesis/
 71 | .pytest_cache/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | 
 82 | # Flask stuff:
 83 | instance/
 84 | .webassets-cache
 85 | 
 86 | # Scrapy stuff:
 87 | .scrapy
 88 | 
 89 | # Sphinx documentation
 90 | docs/_build/
 91 | 
 92 | # PyBuilder
 93 | target/
 94 | 
 95 | # Jupyter Notebook
 96 | .ipynb_checkpoints
 97 | 
 98 | # pyenv
 99 | .python-version
100 | 
101 | # celery beat schedule file
102 | celerybeat-schedule
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | 
128 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # terraform-aws-kinesis-firehose
 2 | 
 3 | This code creates a [Kinesis Firehose]('https://aws.amazon.com/kinesis/data-firehose/') in AWS to send CloudWatch log data to S3.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```terraform
 8 | module "kinesis-firehose" {
 9 |   source                                = "git::https://github.com/felipefrizzo/terraform-aws-kinesis-firehose.git?ref=master"
10 |   region                                = "aws_region"
11 |   kinesis_firehose_stream_name          = "stream_name"
12 |   kinesis_firehose_stream_backup_prefix = "bucket_backup_prefix"
13 |   bucket_name                           = "bucket_name"
14 |   root_path                             = false
15 |   lambda_function_name                  = "lambda_function_name"
16 |   lambda_function_file_name             = "kinesis-firehose-cloudwatch-logs-json-processor-python"
17 |   glue_catalog_database_name            = "glue_catalog_database_name"
18 |   glue_catalog_table_name               = "glue_catalog_table_name"
19 |   glue_catalog_table_columns            = {
20 |     "column_name" = {
21 |       name = "column_name"
22 |       type = "column_type"
23 |     }
24 |   }
25 |   cloudwatch_subscription_filter_name   = "cloudwatch_subscription_filter_name"
26 |   cloudwatch_log_group_name             = "cloudwatch_log_group_name"
27 |   cloudwatch_filter_pattern             = "cloudwatch_filter_pattern"
28 | }
29 | ```
30 | 
31 | ## Inputs
32 | 
33 | | Name | Description | Type | Default | Required |
34 | |------|-------------|:----:|:-------:|:--------:|
35 | | kinesis_firehose_stream_name | Name to be use on kinesis firehose stream (e.g. `poc_logs`) | string | - | yes |
36 | | kinesis_firehose_stream_backup_prefix | The prefix name to use for the kinesis backup (e.g. `backup_prefix`) | string | `` | no |
37 | | bucket_name | Bucket name | string | - | yes |
38 | | root_path | The path where the lambda function file is located is root or module path (e.g. `true`) | boolean | `` | no |
39 | | lambda_function_name | Lambda function name (e.g. `lambda_kinesis`) | string | - | yes |
40 | | lambda_function_file_name | Lambda function file name | string | - | yes |
41 | | glue_catalog_database_name | Glue catalog database name | string | - | yes |
42 | | glue_catalog_table_name | Glue catalog database table name | string | - | yes |
43 | | glue_catalog_table_columns | A map of object of table columns | map | `<map>` | yes |
44 | | cloudwatch_subscription_filter_name | Subscription filter name | string | - | yes |
45 | | cloudwatch_log_group_name | Cloudwatch log group name | string | - | yes |
46 | | cloudwatch_filter_pattern | Cloudwatch filter pattern | string | - | yes |
47 | 


--------------------------------------------------------------------------------
/functions/kinesis-firehose-cloudwatch-logs-json-processor-python.py:
--------------------------------------------------------------------------------
 1 | """
 2 | For processing data sent to Kinesis Firehose by CloudWatch logs subscription filter.
 3 | 
 4 | CloudWatch Logs sends to Firehose records that look like that:
 5 | {
 6 |    "messageType":"DATA_MESSAGE",
 7 |    "owner":"123456789012",
 8 |    "logGroup":"log_group_name",
 9 |    "logStream":"log_stream_name",
10 |    "subscriptionFilters":[
11 |       "subscription_filter_name"
12 |    ],
13 |    "logEvents":[
14 |       {
15 |          "id":"34347401063152187823588091447941432395582337638937001984",
16 |          "timestamp":1540190731627,
17 |          "message": "{"method":"GET", "path":"/example/12345", "format":"html", "action":"show", "status":200, "params":{ "user_id":"11111" }, "ip":"192.168.0.0", "@timestamp":"2018-10-22T06:45:31.428Z", "@version":"1", "message":"[200] GET /example/12345 (ExampleController#show)"}"
18 |       },
19 |       ...
20 |    ]
21 | }
22 | """
23 | from __future__ import print_function
24 | 
25 | import base64 as b64
26 | import gzip
27 | import json
28 | import logging
29 | 
30 | STATUS_OK: str = 'Ok'
31 | DROPPED: str = 'Dropped'
32 | FAILED: str = 'ProcessingFailed'
33 | 
34 | logger = logging.getLogger()
35 | logger.setLevel(logging.INFO)
36 | 
37 | 
38 | class DataTransformation:
39 |     def __init__(self, records: list) -> None:
40 |         logger.info('Start Kinesis Firehose data transformation.')
41 |         self.records: list = records
42 |         self.output: list = []
43 | 
44 |     def process(self) -> list:
45 |         for record in self.records:
46 |             record_id: int = record.get('recordId', None)
47 |             payload: dict = self.__decompress(record.get('data', None))
48 |             logger.info(f'Payload to be transform: {payload}')
49 | 
50 |             message_type: str = payload.get('messageType', None)
51 | 
52 |             if message_type == 'CONTROL_MESSAGE':
53 |                 output_record = {'recordId': record_id, 'result': DROPPED}
54 |             elif message_type == 'DATA_MESSAGE':
55 |                 data = self.__transformation(payload)
56 |                 logger.info(f'Payload after transformation: {data}')
57 |                 output_record = {
58 |                     'recordId': record_id,
59 |                     'result': STATUS_OK,
60 |                     'data': self.__compress(data)
61 |                 }
62 |             else:
63 |                 output_record = {'recordId': record_id, 'result': FAILED}
64 |             self.output.append(output_record)
65 | 
66 |         logger.info(f'Data after finish transformation: {self.output}')
67 |         return self.output
68 | 
69 |     def __compress(self, data) -> str:
70 |         return b64.b64encode(data.encode('UTF-8')).decode('UTF-8')
71 | 
72 |     def __decompress(self, data) -> dict:
73 |         return json.loads(gzip.decompress(b64.b64decode(data)))
74 | 
75 |     def __transformation(self, payload: dict) -> str:
76 |         record = '\r\n'.join(
77 |             e.pop('message') for e in payload.pop('logEvents', None)
78 |         )
79 |         return record
80 | 
81 | 
82 | def lambda_handler(event, context) -> dict:
83 |     output = DataTransformation(event.get('records', None)).process()
84 |     return dict(records=output)
85 | 


--------------------------------------------------------------------------------
/functions/kinesis-firehose-apache-logs-processor-python.py:
--------------------------------------------------------------------------------
  1 | """
  2 | For processing data sent to Kinesis Firehose by CloudWatch logs subscription filter.
  3 | 
  4 | CloudWatch Logs sends to Firehose records that look like that:
  5 | {
  6 |    "messageType":"DATA_MESSAGE",
  7 |    "owner":"123456789012",
  8 |    "logGroup":"log_group_name",
  9 |    "logStream":"log_stream_name",
 10 |    "subscriptionFilters":[
 11 |       "subscription_filter_name"
 12 |    ],
 13 |    "logEvents":[
 14 |       {
 15 |          "id":"34347401063152187823588091447941432395582337638937001984",
 16 |          "timestamp":1540190731627,
 17 |          "message": "127.0.0.1 - - [30/Jul/2006:24:59:59 +0000] "GET / HTTP/1.1" 200 195 "-" "ELB-HealthChecker/2.0""
 18 |       },
 19 |       ...
 20 |    ]
 21 | }
 22 | """
 23 | from __future__ import print_function
 24 | 
 25 | import base64 as b64
 26 | import gzip
 27 | import json
 28 | import logging
 29 | import re
 30 | 
 31 | STATUS_OK: str = 'Ok'
 32 | DROPPED: str = 'Dropped'
 33 | FAILED: str = 'ProcessingFailed'
 34 | 
 35 | logger = logging.getLogger()
 36 | logger.setLevel(logging.INFO)
 37 | 
 38 | 
 39 | class DataTransformation:
 40 |     def __init__(self, records: list) -> None:
 41 |         logger.info('Start Kinesis Firehose data transformation.')
 42 |         self.records: list = records
 43 |         self.pattern: str = r"(?P<ip>[\d.]+) (\S+) (\S+) \[(?P<date>[\w:/]+\s[\+\-]\d{4})\] \"(?P<method>[A-Z.]+) (?P<path>\S+) (\S+)\" (?P<status>[\d.]+) (\S+) \"(?P<from>\w.|\S+)\" \"(?P<user_agent>\w.+)\""
 44 |         self.output: list = []
 45 |         self.fields: list = [
 46 |             'ip',
 47 |             'date',
 48 |             'method',
 49 |             'path',
 50 |             'status',
 51 |             'from',
 52 |             'user_agent'
 53 |         ]
 54 | 
 55 |     def process(self) -> list:
 56 |         for record in self.records:
 57 |             record_id: int = record.get('recordId', None)
 58 |             payload: dict = self.__decompress(record.get('data', None))
 59 |             logger.info(f'Payload to be transform: {payload}')
 60 | 
 61 |             message_type: str = payload.get('messageType', None)
 62 | 
 63 |             if message_type == 'CONTROL_MESSAGE':
 64 |                 output_record = {'recordId': record_id, 'result': DROPPED}
 65 |                 self.output.append(output_record)
 66 |             elif message_type == 'DATA_MESSAGE':
 67 |                 for data, result in self.__transformation(payload):
 68 |                     logger.info(f'Payload after transformation: {data}')
 69 |                     output_record = {
 70 |                         'recordId': record_id,
 71 |                         'result': result,
 72 |                         'data': self.__compress(data)
 73 |                     }
 74 |                     self.output.append(output_record)
 75 |             else:
 76 |                 output_record = {'recordId': record_id, 'result': FAILED}
 77 |                 self.output.append(output_record)
 78 | 
 79 |         logger.info(f'Data after finish transformation: {self.output}')
 80 |         return self.output
 81 | 
 82 |     def __compress(self, data) -> str:
 83 |         return b64.b64encode(json.dumps(data).encode('UTF-8')).decode('UTF-8')
 84 |     
 85 |     def __decompress(self, data) -> dict:
 86 |         return json.loads(gzip.decompress(b64.b64decode(data)))
 87 |     
 88 |     def __transformation(self, payload: dict) -> [dict, str]:
 89 |         data = None
 90 | 
 91 |         for event in payload.pop('logEvents', None):
 92 |             message = event.pop('message', None)
 93 |             matches = re.search(self.pattern, message)
 94 | 
 95 |             if matches and 'HealthChecker' not in matches.group('user_agent'):
 96 |                 data = {field: matches.group(field) for field in self.fields}
 97 |                 result = STATUS_OK
 98 |             elif 'HealthChecker' in matches.group('user_agent'):
 99 |                 logger.info('Dropped HealthChecker log message')
100 |                 result = DROPPED
101 |             else:
102 |                 logger.info(
103 |                     "[ERROR] The log message doesn't match with "
104 |                     "the regex pattern"
105 |                 )
106 |                 result = FAILED
107 | 
108 |             yield [data, result]
109 | 
110 | 
111 | def lambda_handler(event, context) -> dict:
112 |     output = DataTransformation(event.get('records', None)).process()
113 |     return dict(records=output)
114 | 


--------------------------------------------------------------------------------
/iam.tf:
--------------------------------------------------------------------------------
  1 | data "aws_iam_policy_document" "kinesis_firehose_stream_assume_role" {
  2 |   statement {
  3 |     effect  = "Allow"
  4 |     actions = ["sts:AssumeRole"]
  5 | 
  6 |     principals {
  7 |       type        = "Service"
  8 |       identifiers = ["firehose.amazonaws.com"]
  9 |     }
 10 |   }
 11 | }
 12 | 
 13 | data "aws_iam_policy_document" "kinesis_firehose_access_bucket_assume_policy" {
 14 |   statement {
 15 |     effect = "Allow"
 16 | 
 17 |     actions = [
 18 |       "s3:AbortMultipartUpload",
 19 |       "s3:GetBucketLocation",
 20 |       "s3:GetObject",
 21 |       "s3:ListBucket",
 22 |       "s3:ListBucketMultipartUploads",
 23 |       "s3:PutObject",
 24 |     ]
 25 | 
 26 |     resources = [
 27 |       aws_s3_bucket.kinesis_firehose_stream_bucket.arn,
 28 |       "${aws_s3_bucket.kinesis_firehose_stream_bucket.arn}/*",
 29 |     ]
 30 |   }
 31 | }
 32 | 
 33 | data "aws_iam_policy_document" "kinesis_firehose_access_glue_assume_policy" {
 34 |   statement {
 35 |     effect    = "Allow"
 36 |     actions   = ["glue:GetTableVersions"]
 37 |     resources = ["*"]
 38 |   }
 39 | }
 40 | 
 41 | resource "aws_iam_role" "kinesis_firehose_stream_role" {
 42 |   name               = "kinesis_firehose_stream_role"
 43 |   assume_role_policy = data.aws_iam_policy_document.kinesis_firehose_stream_assume_role.json
 44 | }
 45 | 
 46 | resource "aws_iam_role_policy" "kinesis_firehose_access_bucket_policy" {
 47 |   name   = "kinesis_firehose_access_bucket_policy"
 48 |   role   = aws_iam_role.kinesis_firehose_stream_role.name
 49 |   policy = data.aws_iam_policy_document.kinesis_firehose_access_bucket_assume_policy.json
 50 | }
 51 | 
 52 | resource "aws_iam_role_policy" "kinesis_firehose_access_glue_policy" {
 53 |   name   = "kinesis_firehose_access_glue_policy"
 54 |   role   = aws_iam_role.kinesis_firehose_stream_role.name
 55 |   policy = data.aws_iam_policy_document.kinesis_firehose_access_glue_assume_policy.json
 56 | }
 57 | 
 58 | data "aws_iam_policy_document" "lambda_assume_role" {
 59 |   statement {
 60 |     effect  = "Allow"
 61 |     actions = ["sts:AssumeRole"]
 62 | 
 63 |     principals {
 64 |       type        = "Service"
 65 |       identifiers = ["lambda.amazonaws.com"]
 66 |     }
 67 |   }
 68 | }
 69 | 
 70 | data "aws_iam_policy_document" "lambda_assume_policy" {
 71 |   statement {
 72 |     effect = "Allow"
 73 | 
 74 |     actions = [
 75 |       "lambda:InvokeFunction",
 76 |       "lambda:GetFunctionConfiguration",
 77 |     ]
 78 | 
 79 |     resources = [
 80 |       aws_lambda_function.lambda_kinesis_firehose_data_transformation.arn,
 81 |       "${aws_lambda_function.lambda_kinesis_firehose_data_transformation.arn}:*",
 82 |     ]
 83 |   }
 84 | }
 85 | 
 86 | data "aws_iam_policy_document" "lambda_to_cloudwatch_assume_policy" {
 87 |   statement {
 88 |     effect = "Allow"
 89 | 
 90 |     actions = [
 91 |       "logs:CreateLogGroup",
 92 |       "logs:CreateLogStream",
 93 |       "logs:PutLogEvents",
 94 |     ]
 95 | 
 96 |     resources = ["*"]
 97 |   }
 98 | }
 99 | 
100 | resource "aws_iam_role" "lambda" {
101 |   name               = "lambda_function_role"
102 |   assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json
103 | }
104 | 
105 | resource "aws_iam_role_policy" "lambda_policy" {
106 |   name   = "lambda_function_policy"
107 |   role   = aws_iam_role.kinesis_firehose_stream_role.name
108 |   policy = data.aws_iam_policy_document.lambda_assume_policy.json
109 | }
110 | 
111 | resource "aws_iam_role_policy" "lambda_to_cloudwatch_policy" {
112 |   name   = "lambda_to_cloudwatch_policy"
113 |   role   = aws_iam_role.lambda.name
114 |   policy = data.aws_iam_policy_document.lambda_to_cloudwatch_assume_policy.json
115 | }
116 | 
117 | data "aws_iam_policy_document" "cloudwatch_logs_assume_role" {
118 |   statement {
119 |     effect  = "Allow"
120 |     actions = ["sts:AssumeRole"]
121 | 
122 |     principals {
123 |       type        = "Service"
124 |       identifiers = ["logs.${data.aws_region.default.name}.amazonaws.com"]
125 |     }
126 |   }
127 | }
128 | 
129 | data "aws_iam_policy_document" "cloudwatch_logs_assume_policy" {
130 |   statement {
131 |     effect    = "Allow"
132 |     actions   = ["firehose:*"]
133 |     resources = [aws_kinesis_firehose_delivery_stream.kinesis_firehose_stream.arn]
134 |   }
135 | }
136 | 
137 | resource "aws_iam_role" "cloudwatch_logs_role" {
138 |   name               = "cloudwatch_logs_role"
139 |   assume_role_policy = data.aws_iam_policy_document.cloudwatch_logs_assume_role.json
140 | }
141 | 
142 | resource "aws_iam_role_policy" "cloudwatch_logs_policy" {
143 |   name   = "cloudwatch_logs_policy"
144 |   role   = aws_iam_role.cloudwatch_logs_role.name
145 |   policy = data.aws_iam_policy_document.cloudwatch_logs_assume_policy.json
146 | }


--------------------------------------------------------------------------------
/main.tf:
--------------------------------------------------------------------------------
  1 | data "aws_region" "default" {}
  2 | 
  3 | resource "aws_kinesis_firehose_delivery_stream" "kinesis_firehose_stream" {
  4 |   name        = var.kinesis_firehose_stream_name
  5 |   destination = "extended_s3"
  6 | 
  7 |   extended_s3_configuration {
  8 |     role_arn       = aws_iam_role.kinesis_firehose_stream_role.arn
  9 |     bucket_arn     = aws_s3_bucket.kinesis_firehose_stream_bucket.arn
 10 |     buffer_size    = 128
 11 |     s3_backup_mode = "Enabled"
 12 |     prefix         = "logs/"
 13 | 
 14 |     s3_backup_configuration {
 15 |       role_arn   = aws_iam_role.kinesis_firehose_stream_role.arn
 16 |       bucket_arn = aws_s3_bucket.kinesis_firehose_stream_bucket.arn
 17 |       prefix     = var.kinesis_firehose_stream_backup_prefix
 18 | 
 19 |       cloudwatch_logging_options {
 20 |         enabled         = true
 21 |         log_group_name  = aws_cloudwatch_log_group.kinesis_firehose_stream_logging_group.name
 22 |         log_stream_name = aws_cloudwatch_log_stream.kinesis_firehose_stream_logging_stream.name
 23 |       }
 24 |     }
 25 | 
 26 |     processing_configuration {
 27 |       enabled = true
 28 | 
 29 |       processors {
 30 |         type = "Lambda"
 31 | 
 32 |         parameters {
 33 |           parameter_name  = "LambdaArn"
 34 |           parameter_value = "${aws_lambda_function.lambda_kinesis_firehose_data_transformation.arn}:$LATEST"
 35 |         }
 36 |       }
 37 |     }
 38 | 
 39 |     cloudwatch_logging_options {
 40 |       enabled         = true
 41 |       log_group_name  = aws_cloudwatch_log_group.kinesis_firehose_stream_logging_group.name
 42 |       log_stream_name = aws_cloudwatch_log_stream.kinesis_firehose_stream_logging_stream.name
 43 |     }
 44 | 
 45 |     data_format_conversion_configuration {
 46 |       input_format_configuration {
 47 |         deserializer {
 48 |           hive_json_ser_de {}
 49 |         }
 50 |       }
 51 | 
 52 |       output_format_configuration {
 53 |         serializer {
 54 |           parquet_ser_de {}
 55 |         }
 56 |       }
 57 | 
 58 |       schema_configuration {
 59 |         database_name = aws_glue_catalog_database.glue_catalog_database.name
 60 |         table_name    = aws_glue_catalog_table.glue_catalog_table.name
 61 |         role_arn      = aws_iam_role.kinesis_firehose_stream_role.arn
 62 |       }
 63 |     }
 64 |   }
 65 | }
 66 | 
 67 | resource "aws_cloudwatch_log_group" "kinesis_firehose_stream_logging_group" {
 68 |   name = "/aws/kinesisfirehose/${var.kinesis_firehose_stream_name}"
 69 | }
 70 | 
 71 | resource "aws_cloudwatch_log_stream" "kinesis_firehose_stream_logging_stream" {
 72 |   log_group_name = aws_cloudwatch_log_group.kinesis_firehose_stream_logging_group.name
 73 |   name           = "S3Delivery"
 74 | }
 75 | 
 76 | resource "aws_s3_bucket" "kinesis_firehose_stream_bucket" {
 77 |   bucket = var.bucket_name
 78 |   acl    = "private"
 79 | }
 80 | 
 81 | locals {
 82 |   path_prefix = "${var.root_path ? path.root : path.module}/functions"
 83 | }
 84 | 
 85 | data "archive_file" "kinesis_firehose_data_transformation" {
 86 |   type        = "zip"
 87 |   source_file = format("%s/%s.py", local.path_prefix, var.lambda_function_file_name)
 88 |   output_path = format("%s/%s.zip", local.path_prefix, var.lambda_function_file_name)
 89 | }
 90 | 
 91 | resource "aws_cloudwatch_log_group" "lambda_function_logging_group" {
 92 |   name = "/aws/lambda/${var.lambda_function_name}"
 93 | }
 94 | 
 95 | resource "aws_lambda_function" "lambda_kinesis_firehose_data_transformation" {
 96 |   filename      = data.archive_file.kinesis_firehose_data_transformation.output_path
 97 |   function_name = var.lambda_function_name
 98 | 
 99 |   role             = aws_iam_role.lambda.arn
100 |   handler          = "${var.lambda_function_file_name}.lambda_handler"
101 |   source_code_hash = data.archive_file.kinesis_firehose_data_transformation.output_base64sha256
102 |   runtime          = "python3.6"
103 |   timeout          = 60
104 | }
105 | 
106 | resource "aws_glue_catalog_database" "glue_catalog_database" {
107 |   name = var.glue_catalog_database_name
108 | }
109 | 
110 | resource "aws_glue_catalog_table" "glue_catalog_table" {
111 |   name          = var.glue_catalog_table_name
112 |   database_name = aws_glue_catalog_database.glue_catalog_database.name
113 | 
114 |   parameters = {
115 |     "classification" = "parquet"
116 |   }
117 | 
118 |   storage_descriptor {
119 |     input_format  = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
120 |     output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
121 |     location      = "s3://${aws_s3_bucket.kinesis_firehose_stream_bucket.bucket}/"
122 | 
123 |     ser_de_info {
124 |       name                  = "JsonSerDe"
125 |       serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
126 | 
127 |       parameters = {
128 |         "serialization.format" = 1
129 |         "explicit.null"        = false
130 |         "parquet.compression"  = "SNAPPY"
131 |       }
132 |     }
133 | 
134 |     dynamic "columns" {
135 |       for_each = var.glue_catalog_table_columns
136 |       content {
137 |         name = columns.value["name"]
138 |         type = columns.value["type"]
139 |       }
140 |     }
141 |   }
142 | }
143 | 
144 | resource "aws_cloudwatch_log_subscription_filter" "cloudwatch_subscription_filter" {
145 |   name           = var.cloudwatch_subscription_filter_name
146 |   log_group_name = var.cloudwatch_log_group_name
147 |   filter_pattern = var.cloudwatch_filter_pattern
148 | 
149 |   destination_arn = aws_kinesis_firehose_delivery_stream.kinesis_firehose_stream.arn
150 |   distribution    = "ByLogStream"
151 | 
152 |   role_arn = aws_iam_role.cloudwatch_logs_role.arn
153 | }
154 | 


--------------------------------------------------------------------------------