├── .gitignore
├── .pre-commit-config.yaml
├── AUTHORS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── diagram.png
├── es_cleanup.py
├── es_cleanup_test.py
├── json_file
    ├── cloudwatch-target.json
    ├── es_policy.json
    └── trust_policy.json
├── requirements.txt
├── serverless.yml
├── terraform
    ├── README.md
    ├── cloudwatch.tf
    ├── iam.tf
    ├── lambda.tf
    ├── outputs.tf
    ├── sg.tf
    ├── variables.tf
    └── versions.tf
└── tests
    ├── README.md
    ├── data.tf
    ├── main.tf
    ├── main_vpc.tf
    ├── outputs.tf
    ├── run.sh
    └── variables.tf


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | **/.terraform
10 | **/terraform.tfstate*
11 | **/.terraform.tfstate*
12 | 
13 | **/*es-cleanup.zip
14 | **/*es_cleanup.zip
15 | .serverless
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | .hypothesis/
54 | 
55 | # Translations
56 | *.mo
57 | *.pot
58 | 
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | 
63 | # Flask stuff:
64 | instance/
65 | .webassets-cache
66 | 
67 | # Scrapy stuff:
68 | .scrapy
69 | 
70 | # Sphinx documentation
71 | docs/_build/
72 | 
73 | # PyBuilder
74 | target/
75 | 
76 | # Jupyter Notebook
77 | .ipynb_checkpoints
78 | 
79 | # pyenv
80 | .python-version
81 | 
82 | # celery beat schedule file
83 | celerybeat-schedule
84 | 
85 | # dotenv
86 | .env
87 | 
88 | # virtualenv
89 | .venv/
90 | venv/
91 | ENV/
92 | 
93 | # Spyder project settings
94 | .spyderproject
95 | 
96 | # Rope project settings
97 | .ropeproject
98 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: git://github.com/antonbabenko/pre-commit-terraform
 3 |     rev: v1.27.0
 4 |     hooks:
 5 |       - id: terraform_fmt
 6 |       - id: terraform_docs
 7 |   - repo: git://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v2.5.0
 9 |     hooks:
10 |       - id: check-json
11 |       - id: end-of-file-fixer
12 |       - id: trailing-whitespace
13 |       - id: check-case-conflict
14 |       - id: check-merge-conflict
15 |       - id: detect-private-key
16 |       - id: detect-aws-credentials
17 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | # Authors
 2 | 
 3 | * **Giulio Calzolari** - *Initial work* - [giuliocalzo](https://github.com/giuliocalzolari)
 4 | * **Omar Tarabai** - *Terraform deployment* - [omar-tarabai](https://github.com/omar-tarabai)
 5 | * **netflash**  - [netflash](https://github.com/netflash)
 6 | * **dcodix** - *Python 3 support* - [dcodix](https://github.com/dcodix)
 7 | * **cheungpat** - *index parsing* - [cheungpat](https://github.com/cheungpat)
 8 | * **cliveza** - *terraform suffix* - [cliveza](https://github.com/cliveza)
 9 | * **fllaca** - *timeout var* - [fllaca](https://github.com/fllaca)
10 | * **johannes-gehrs** - *Fix Logic, Improve Test Coverage, and Resilience* - [johannes-gehrs](https://github.com/johannes-gehrs)
11 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | ## [v0.8](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.8) (2018-09-21)
 4 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.7...v0.8)
 5 | 
 6 | **Merged pull requests:**
 7 | 
 8 | - Added suffix variable [\#19](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/19) ([cliveza](https://github.com/cliveza))
 9 | 
10 | ## [v0.7](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.7) (2018-09-13)
11 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.6...v0.7)
12 | 
13 | **Implemented enhancements:**
14 | 
15 | - Code optimisation when aws\_lambda\_function.vpc\_config is solved [\#12](https://github.com/cloudreach/aws-lambda-es-cleanup/issues/12)
16 | 
17 | **Fixed bugs:**
18 | 
19 | - Code optimisation when aws\\_lambda\\_function.vpc\\_config is solved [\#12](https://github.com/cloudreach/aws-lambda-es-cleanup/issues/12)
20 | 
21 | ## [v0.6](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.6) (2018-08-17)
22 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.5...v0.6)
23 | 
24 | **Fixed bugs:**
25 | 
26 | - Fix terraform security group [\#15](https://github.com/cloudreach/aws-lambda-es-cleanup/issues/15)
27 | - Fix index name parsed incorrectly if index format contains hyphen [\#18](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/18) ([cheungpat](https://github.com/cheungpat))
28 | 
29 | **Merged pull requests:**
30 | 
31 | - \[Resolve \#15\] Enable DNS connection on security group [\#16](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/16) ([giuliocalzolari](https://github.com/giuliocalzolari))
32 | - Update es\_policy.json [\#14](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/14) ([giuliocalzolari](https://github.com/giuliocalzolari))
33 | 
34 | ## [v0.5](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.5) (2018-02-28)
35 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.4...v0.5)
36 | 
37 | **Implemented enhancements:**
38 | 
39 | - VPC support for the Lambda function [\#10](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/10) ([MattiasGees](https://github.com/MattiasGees))
40 | 
41 | ## [v0.4](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.4) (2018-02-02)
42 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.3...v0.4)
43 | 
44 | **Implemented enhancements:**
45 | 
46 | - Add the feature that allows to pass variables from event [\#9](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/9) ([bilardi](https://github.com/bilardi))
47 | 
48 | ## [v0.3](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.3) (2017-12-12)
49 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.2...v0.3)
50 | 
51 | **Implemented enhancements:**
52 | 
53 | - Compatible with py2 and py3 [\#8](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/8) ([dcodix](https://github.com/dcodix))
54 | 
55 | **Fixed bugs:**
56 | 
57 | - Compatible with py2 and py3 [\#8](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/8) ([dcodix](https://github.com/dcodix))
58 | 
59 | **Merged pull requests:**
60 | 
61 | - typo [\#6](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/6) ([netflash](https://github.com/netflash))
62 | 
63 | ## [v0.2](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.2) (2017-11-09)
64 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.1...v0.2)
65 | 
66 | **Merged pull requests:**
67 | 
68 | - Add terraform stack [\#5](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/5) ([otarabai](https://github.com/otarabai))
69 | - update Readme [\#1](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/1) ([giuliocalzolari](https://github.com/giuliocalzolari))
70 | 
71 | ## [v0.1](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.1) (2017-05-23)
72 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Hello! Thank you for choosing to help contribute to one of the Cloudreach OpenSource projects. There are many ways you can contribute and help is always welcome.  We simply ask that you follow the following contribution policies.
 4 | 
 5 | - [Submit a Bug Report](#submit_a_bug_report)
 6 | - [Enhancement Proposal](#enhancement_proposal)
 7 | - [Contributing Code](#contributing_code)
 8 | 
 9 | ## Submit a Bug Report
10 | 
11 | Note: DO NOT include your credentials in ANY code examples, descriptions, or media you make public.
12 | 
13 | Before submitting a bug, please check our [issues page](https://github.com/cloudreach/aws-lambda-es-cleanup/issues) to see if it's already been reported.
14 | 
15 | When reporting a bug, fill out the required template, and please include as much detail as possible as it helps us resolve issues faster.
16 | 
17 | ## Enhancement Proposal
18 | 
19 | Enhancement proposals should:
20 | 
21 | * Use a descriptive title.
22 | * Provide a step-by-step description of the suggested enhancement.
23 | * Provide specific examples to demonstrate the steps.
24 | * Describe the current behaviour and explain which behaviour you expected to see instead.
25 | * Keep the scope as narrow as possible, to make it easier to implement.
26 | 
27 | Remember that this is a volunteer-driven project, and that contributions are welcome.
28 | 
29 | ## Contributing Code
30 | 
31 | Contributions should be made in response to a particular GitHub Issue. We find it easier to review code if we've already discussed what it should do, and assessed if it fits with the wider codebase.
32 | 
33 | A good pull request:
34 | 
35 | * Is clear.
36 | * Works across all supported version of Python.
37 | * Complies with the existing codebase style ([flake8](http://flake8.pycqa.org/en/latest/), [pylint](https://www.pylint.org/)).
38 | * Includes [docstrings](https://www.python.org/dev/peps/pep-0257/) and comments for unintuitive sections of code.
39 | * Includes documentation for new features.
40 | * Is appropriately licensed (Apache 2.0).
41 | 
42 | 
43 | 
44 | # Get Started
45 | 
46 | * Clone the repository locally:
47 | 
48 | ```bash
49 |     $ git clone git@github.com:cloudreach/aws-lambda-es-cleanup.git
50 | ```
51 | 
52 | * Install your local copy into a [virtualenv](http://docs.python-guide.org/en/latest/dev/virtualenvs/). Assuming you have virtualenv installed, this is how you set up your fork for local development:
53 | 
54 | ```bash
55 |     $ cd aws-lambda-es-cleanup/
56 |     $ virtualenv env
57 |     $ source env/bin/activate
58 |     $ pip install -r requirements.txt
59 |     $ pip install -e .
60 | ```
61 | 
62 | * Create a branch for local development:
63 | 
64 | ```bash
65 |     $ git checkout -b branch-<GitHub issue number>
66 | ```
67 | 
68 | * Make sure the changes comply with the pull request guidelines in the section on [Contributing Code](#contributing_code).
69 | 
70 | * Commit your changes:
71 | 
72 | ```bash
73 |     $ git add .
74 |     $ git commit
75 | ```
76 | 
77 | * Commit messages should follow [these guidelines](https://github.com/erlang/otp/wiki/Writing-good-commit-messages).
78 | 
79 | * Push your branch to GitHub:
80 | 
81 | ```bash
82 |     $ git push origin <description of pull request>
83 | ```
84 | 
85 | * Submit a pull request through the GitHub website.
86 | 
87 | 
88 | Credits
89 | -------
90 | 
91 | This document took inspiration from the CONTRIBUTING files of the [Atom](https://github.com/atom/atom/blob/abccce6ee9079fdaefdecb018e72ea64000e52ef/CONTRIBUTING.md) and [Boto3](https://github.com/boto/boto3/blob/e85febf46a819d901956f349afef0b0eaa4d906d/CONTRIBUTING.rst) projects.
92 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Apache Software License 2.0
 2 | 
 3 | Copyright 2020 **Cloudreach Europe Limited** or its affiliates. All Rights Reserved.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AWS Lambda Elasticsearch Index Cleanup
  2 | 
  3 | ## Overview
  4 | This AWS Lambda function allows you to delete the old Elasticsearch indexes using SigV4Auth authentication. You configure the AWS Elasticsearch Access Policy authorizing the Lambda Role or the AWS Account number instead of using the IP address whitelist.
  5 | 
  6 | ## Diagram
  7 | 
  8 | <p align="center">
  9 |   <img src="diagram.png">
 10 | </p>
 11 | 
 12 | 
 13 | 
 14 | ## Getting Started
 15 | ### How To install
 16 | 
 17 | Clone your repository
 18 | 
 19 | ```bash
 20 | $ git clone git@github.com:cloudreach/aws-lambda-es-cleanup.git
 21 | $ cd aws-lambda-es-cleanup/
 22 | ```
 23 | 
 24 | Configure in a proper way the IAM policy inside `json_file/es_policy.json` and `json_file/trust_policy.json`
 25 | 
 26 | Create the IAM Role
 27 | 
 28 | ```bash
 29 | $ aws iam create-role --role-name es-cleanup-lambda \
 30 | 	--assume-role-policy-document file://json_file/trust_policy.json
 31 | 
 32 | ```
 33 | 
 34 | ```bash
 35 | $ aws iam put-role-policy --role-name es-cleanup-lambda \
 36 |     --policy-name es_cleanup \
 37 |     --policy-document file://json_file/es_policy.json
 38 | ```
 39 | 
 40 | 
 41 | if your lambda is running inside the VPC also attach the these policies
 42 | 
 43 | 
 44 | ```
 45 | arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
 46 | arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole
 47 | ````
 48 | 
 49 | 
 50 | Create your Lambda package
 51 | 
 52 | ```bash
 53 | $ zip es-cleanup-lambda.zip es_cleanup.py
 54 | ```
 55 | 
 56 | 
 57 | 
 58 | ### Lambda deployment
 59 | Using awscli you can create your AWS function and set the proper IAM role with the right Account ID
 60 | 
 61 | ```bash
 62 | $ export AWS_DEFAULT_REGION=eu-west-1
 63 | $ ESENDPOINT="search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com" #ES endpoint
 64 | 
 65 | $ aws lambda create-function \
 66 | 	--function-name es-cleanup-lambda \
 67 | 	--environment Variables={es_endpoint=$ESENDPOINT} \
 68 | 	--zip-file fileb://es-cleanup-lambda.zip \
 69 | 	--description "Elasticsearch Index Cleanup" \
 70 | 	--role arn:aws:iam::123456789012:role/es-cleanup-lambda \
 71 | 	--handler es_cleanup.lambda_handler \
 72 | 	--runtime python3.6 \
 73 | 	--timeout 180
 74 | ```
 75 | 
 76 | If you want to send variables and not to use environment
 77 | ```bash
 78 | $ export AWS_DEFAULT_REGION=eu-west-1
 79 | 
 80 | $ aws lambda create-function \
 81 | 	--function-name es-cleanup-lambda \
 82 | 	--zip-file fileb://es-cleanup-lambda.zip \
 83 | 	--description "Elasticsearch Index Cleanup" \
 84 | 	--role arn:aws:iam::123456789012:role/es-cleanup-lambda \
 85 | 	--handler es_cleanup.lambda_handler \
 86 | 	--runtime python3.6 \
 87 | 	--timeout 180
 88 | ```
 89 | 
 90 | ### Lambda invoke with parameters
 91 | is it possible to override the default behaviour passing specific payload
 92 | 
 93 | ```bash
 94 | $ aws lambda invoke
 95 |  --function-name es-cleanup-lambda \
 96 |  outfile --payload \
 97 |  '{"es_endpoint":"search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com"}'
 98 | ```
 99 | 
100 | Create your AWS Cloudwatch rule:
101 | 
102 | ```bash
103 | $ aws events put-rule \
104 | 	--name my-scheduled-rule \
105 | 	--schedule-expression 'cron(0 1 * * ? *)'
106 | 
107 | 
108 | $ aws lambda add-permission \
109 | 	--function-name es-cleanup-lambda \
110 | 	--statement-id my-scheduled-event \
111 | 	--action 'lambda:InvokeFunction' \
112 | 	--principal events.amazonaws.com \
113 | 	--source-arn arn:aws:events:eu-west-1:123456789012:rule/my-scheduled-rule
114 | 
115 | 
116 | $ aws events put-targets \
117 | 	--rule my-scheduled-rule \
118 | 	--targets file://json_file/cloudwatch-target.json
119 | ```
120 | 
121 | ### Lambda configuration and OS parameters
122 | 
123 | Using AWS environment variable you can easily modify the behaviour of the Lambda function
124 | 
125 | | Variable Name | Example Value | Description | Default Value | Required |
126 | | --- | --- | --- | --- |  --- |
127 | | es_endpoint | search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com  | AWS ES fqdn | `None` | True |
128 | | index |  `logstash*` | Index/indices to process comma separated, with `all` every index will be processed except the one listed in `skip_index` | `.*` | False |
129 | | skip_index |  `.kibana,.kibana_5` | Index/indices to skip  | `.kibana*` | False |
130 | | index_format  | `%Y.%m.%d` | Combined with `index` varible is used to evaluate the index age | `%Y.%m.%d` |  False |
131 | | delete_after | `7` | Numbers of days to preserve | `15` |  False |
132 | 
133 | ## Serverless Framework
134 | 
135 | Editing the file `serverless.yml`, you can deploy your function in AWS using [Serverless Framework](https://serverless.com/framework/docs/providers/aws/cli-reference/)
136 | 
137 | ```bash
138 | $ git clone git@github.com:cloudreach/aws-lambda-es-cleanup.git
139 | $ cd aws-lambda-es-cleanup/
140 | $ serverless deploy
141 | Serverless: Creating Stack...
142 | Serverless: Checking Stack create progress...
143 | .....
144 | Serverless: Stack create finished...
145 | Serverless: Packaging service...
146 | Serverless: Uploading CloudFormation file to S3...
147 | Serverless: Uploading function .zip files to S3...
148 | Serverless: Uploading service .zip file to S3 (7.13 KB)...
149 | Serverless: Updating Stack...
150 | Serverless: Checking Stack update progress...
151 | ......................
152 | Serverless: Stack update finished...
153 | Service Information
154 | service: es-cleanup-lambda
155 | stage: prod
156 | region: eu-west-1
157 | api keys:
158 |   None
159 | endpoints:
160 |   None
161 | functions:
162 |   es-cleanup-lambda: es-cleanup-lambda-prod-es-cleanup-lambda
163 | ```
164 | 
165 | ### Terraform deployment
166 | 
167 | This lambda function can be also build using terraform followings this [README](terraform/README.md).
168 | 
169 | ## How to Contribute
170 | 
171 | We encourage contribution to our projects, please see our [CONTRIBUTING](CONTRIBUTING.md) guide for details.
172 | 
173 | 
174 | ## License
175 | 
176 | **aws-lambda-es-cleanup** is licensed under the [Apache Software License 2.0](LICENSE.md).
177 | 
178 | ## Thanks
179 | 
180 | Keep It Cloudy ([@CloudreachKIC](https://twitter.com/cloudreachkic))
181 | 


--------------------------------------------------------------------------------
/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudreach/aws-lambda-es-cleanup/d043cc37807c0773c0a0cc2ea4a562b44d5d77bc/diagram.png


--------------------------------------------------------------------------------
/es_cleanup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | This AWS Lambda function allowed to delete the old Elasticsearch index
  5 | """
  6 | import datetime
  7 | import re
  8 | import sys
  9 | import time
 10 | 
 11 | import json
 12 | import os
 13 | from botocore.auth import SigV4Auth
 14 | from botocore.awsrequest import AWSRequest
 15 | from botocore.credentials import create_credential_resolver
 16 | from botocore.httpsession import URLLib3Session
 17 | from botocore.session import get_session
 18 | 
 19 | if sys.version_info[0] == 3:
 20 |     from urllib.request import quote
 21 | else:
 22 |     from urllib import quote
 23 | 
 24 | 
 25 | class ES_Exception(Exception):
 26 |     """Exception capturing status_code from Client Request"""
 27 |     status_code = 0
 28 |     payload = ""
 29 | 
 30 |     def __init__(self, status_code, payload):
 31 |         self.status_code = status_code
 32 |         self.payload = payload
 33 |         Exception.__init__(self,
 34 |                            "ES_Exception: status_code={}, payload={}".format(
 35 |                                status_code, payload))
 36 | 
 37 | 
 38 | class ES_Cleanup(object):
 39 |     name = "lambda_es_cleanup"
 40 | 
 41 |     def __init__(self, event, context):
 42 |         """Main Class init
 43 | 
 44 |         Args:
 45 |             event (dict): AWS Cloudwatch Scheduled Event
 46 |             context (object): AWS running context
 47 |         """
 48 |         self.report = []
 49 |         self.event = event
 50 |         self.context = context
 51 | 
 52 |         self.cfg = {}
 53 |         self.cfg["es_endpoint"] = self.get_parameter("es_endpoint")
 54 |         self.cfg["index"] = self.get_parameter("index", ".*")
 55 |         self.cfg["skip_index"] = self.get_parameter("skip_index", ".kibana*")
 56 | 
 57 |         self.cfg["delete_after"] = int(self.get_parameter("delete_after", 15))
 58 |         self.cfg["es_max_retry"] = int(self.get_parameter("es_max_retry", 3))
 59 |         self.cfg["index_format"] = self.get_parameter(
 60 |             "index_format", "%Y.%m.%d")
 61 | 
 62 |         if not self.cfg["es_endpoint"]:
 63 |             raise Exception("[es_endpoint] OS variable is not set")
 64 | 
 65 |     def get_parameter(self, key_param, default_param=None):
 66 |         """helper function to retrieve specific configuration
 67 | 
 68 |         Args:
 69 |             key_param     (str): key_param to read from "event" or "environment" variable
 70 |             default_param (str): default value
 71 | 
 72 |         Returns:
 73 |             string: parameter value or None
 74 | 
 75 |         """
 76 |         return self.event.get(key_param, os.environ.get(key_param, default_param))
 77 | 
 78 |     def send_to_es(self, path, method="GET", payload={}):
 79 |         """Low-level POST data to Amazon Elasticsearch Service generating a Sigv4 signed request
 80 | 
 81 |         Args:
 82 |             path (str): path to send to ES
 83 |             method (str, optional): HTTP method default:GET
 84 |             payload (dict, optional): additional payload used during POST or PUT
 85 | 
 86 |         Returns:
 87 |             dict: json answer converted in dict
 88 | 
 89 |         Raises:
 90 |             #: Error during ES communication
 91 |             ES_Exception: Description
 92 |         """
 93 |         if not path.startswith("/"):
 94 |             path = "/" + path
 95 | 
 96 |         es_region = self.cfg["es_endpoint"].split(".")[1]
 97 | 
 98 |         headers = {
 99 |             "Host": self.cfg["es_endpoint"],
100 |             "Content-Type": "application/json"
101 |         }
102 | 
103 |         # send to ES with exponential backoff
104 |         retries = 0
105 |         while retries < int(self.cfg["es_max_retry"]):
106 |             if retries > 0:
107 |                 seconds = (2 ** retries) * .1
108 |                 time.sleep(seconds)
109 | 
110 |             req = AWSRequest(
111 |                 method=method,
112 |                 url="https://{}{}".format(
113 |                     self.cfg["es_endpoint"], quote(path)),
114 |                 data=json.dumps(payload),
115 |                 params={"format": "json"},
116 |                 headers=headers)
117 |             credential_resolver = create_credential_resolver(get_session())
118 |             credentials = credential_resolver.load_credentials()
119 |             SigV4Auth(credentials, 'es', es_region).add_auth(req)
120 | 
121 |             try:
122 |                 preq = req.prepare()
123 |                 session = URLLib3Session()
124 |                 res = session.send(preq)
125 |                 if res.status_code >= 200 and res.status_code <= 299:
126 |                     return json.loads(res.content)
127 |                 else:
128 |                     raise ES_Exception(res.status_code, res._content)
129 | 
130 |             except ES_Exception as e:
131 |                 if (e.status_code >= 500) and (e.status_code <= 599):
132 |                     retries += 1  # Candidate for retry
133 |                 else:
134 |                     raise  # Stop retrying, re-raise exception
135 | 
136 |     def delete_index(self, index_name):
137 |         """ES DELETE specific index
138 | 
139 |         Args:
140 |             index_name (str): Index name
141 | 
142 |         Returns:
143 |             dict: ES answer
144 |         """
145 |         return self.send_to_es(index_name, "DELETE")
146 | 
147 |     def get_indices(self):
148 |         """ES Get indices
149 | 
150 |         Returns:
151 |             dict: ES answer
152 |         """
153 |         return self.send_to_es("/_cat/indices")
154 | 
155 | 
156 | class DeleteDecider(object):
157 |     def __init__(self, delete_after, idx_format, idx_regex, skip_idx_regex, today):
158 |         self.delete_after = delete_after
159 |         self.idx_format = idx_format
160 |         self.idx_regex = idx_regex
161 |         self.skip_idx_regex = skip_idx_regex
162 |         self.today = today
163 | 
164 |     def should_delete(self, index):
165 |         idx_split = index["index"].rsplit("-", 1 + self.idx_format.count("-"))
166 |         idx_date_str = '-'.join(word for word in idx_split[1:])
167 |         idx_name = idx_split[0]
168 | 
169 |         if not re.search(self.idx_regex, index["index"]):
170 |             return False, "index '{}' name '{}' did not match pattern '{}'".format(index["index"],
171 |                                                                                    idx_name,
172 |                                                                                    self.idx_regex)
173 | 
174 |         earliest_to_keep = self.today - datetime.timedelta(days=self.delete_after)
175 |         if re.search(self.skip_idx_regex, index["index"]):
176 |             return False, "index matches skip condition"
177 | 
178 |         try:
179 |             idx_datetime = datetime.datetime.strptime(idx_date_str, self.idx_format)
180 |             idx_date = idx_datetime.date()
181 |         except ValueError:
182 |             raise ValueError("Unable to parse index date {0} - "
183 |                              "incorrect index date format set?".format(idx_date_str))
184 | 
185 |         if idx_date < earliest_to_keep:
186 |             return True, "all conditions satisfied"
187 | 
188 |         return False, "deletion age of has not been reached. " \
189 |                       "Oldest index kept: {0}, Index Date: {1}".format(earliest_to_keep, idx_date)
190 | 
191 | 
192 | def lambda_handler(event, context):
193 |     """Main Lambda function
194 |     Args:
195 |         event (dict): AWS Cloudwatch Scheduled Event
196 |         context (object): AWS running context
197 |     Returns:
198 |         None
199 |     """
200 |     es = ES_Cleanup(event, context)
201 |     decider = DeleteDecider(delete_after=int(es.cfg["delete_after"]),
202 |                             idx_regex=es.cfg["index"],
203 |                             idx_format=es.cfg["index_format"],
204 |                             skip_idx_regex=es.cfg["skip_index"],
205 |                             today=datetime.date.today())
206 | 
207 |     for index in es.get_indices():
208 |         d, reason = decider.should_delete(index)
209 |         if d:
210 |             print("Deleting index: {}".format(index["index"]))
211 |             es.delete_index(index["index"])
212 |         else:
213 |             print("Skipping or keeping index: {}. Reason: {}".format(index["index"], reason))
214 | 
215 | 
216 | if __name__ == '__main__':
217 |     event = {
218 |         'account': '123456789012',
219 |         'region': 'eu-west-1',
220 |         'detail': {},
221 |         'detail-type': 'Scheduled Event',
222 |         'source': 'aws.events',
223 |         'time': '1970-01-01T00:00:00Z',
224 |         'id': 'cdc73f9d-aea9-11e3-9d5a-835b769c0d9c',
225 |         'resources':
226 |             ['arn:aws:events:us-east-1:123456789012:rule/my-schedule']
227 |     }
228 |     lambda_handler(event, "")
229 | 


--------------------------------------------------------------------------------
/es_cleanup_test.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import unittest
 3 | 
 4 | import es_cleanup
 5 | 
 6 | IDX_REGEX = '.*'
 7 | IDX_FORMAT1 = '%Y.%m.%d'
 8 | SKIP_IDX_REGEX = 'kibana*'
 9 | 
10 | decider = es_cleanup.DeleteDecider(delete_after=4,
11 |                                    idx_format=IDX_FORMAT1,
12 |                                    idx_regex=IDX_REGEX,
13 |                                    skip_idx_regex=SKIP_IDX_REGEX,
14 |                                    today=datetime.date(2019, 12, 19))
15 | 
16 | 
17 | class TestShouldDelete(unittest.TestCase):
18 |     def test_should_be_deleted(self):
19 |         tuple = decider.should_delete({"index": "k8s-2019.12.14"})
20 |         self.assertTrue(tuple[0])
21 | 
22 |     def test_should_not_be_deleted(self):
23 |         tuple = decider.should_delete({"index": "k8s-2019.12.15"})
24 |         self.assertFalse(tuple[0])
25 | 
26 |     def test_should_raise_value_error(self):
27 |         with self.assertRaises(ValueError):
28 |             decider.should_delete({"index": "k8s-2019-12-15"})
29 | 
30 |     def test_should_skip_indes(self):
31 |         tuple = decider.should_delete({"index": ".kibana"})
32 |         self.assertFalse(tuple[0])
33 |         self.assertTrue("matches skip condition" in tuple[1])
34 | 
35 |     def test_should_skip_indes_2(self):
36 |         tuple = decider.should_delete({"index": ".kibana_1"})
37 |         self.assertFalse(tuple[0])
38 |         self.assertTrue("matches skip condition" in tuple[1])
39 | 
40 | 
41 | 
42 | 
43 | 
44 | decider2 = es_cleanup.DeleteDecider(delete_after=4,
45 |                                    idx_format='%Y.%m.%d',
46 |                                    idx_regex='app[1-2].*|k8s.*',
47 |                                    skip_idx_regex='kibana.*',
48 |                                    today=datetime.date(2019, 12, 19))
49 | 
50 | 
51 | class TestShouldDelete2(unittest.TestCase):
52 |     def test_should_be_deleted(self):
53 |         tuple = decider2.should_delete({"index": "k8s-2019.12.14"})
54 |         self.assertTrue(tuple[0])
55 | 
56 |     def test_should_not_be_deleted(self):
57 |         tuple = decider2.should_delete({"index": "k8s-2019.12.15"})
58 |         self.assertFalse(tuple[0])
59 | 
60 |     def test_should_be_deleted_app1(self):
61 |         tuple = decider2.should_delete({"index": "app1-2019.12.14"})
62 |         self.assertTrue(tuple[0])
63 | 
64 |     def test_should_not_be_deleted_app1(self):
65 |         tuple = decider.should_delete({"index": "app1-2019.12.15"})
66 |         self.assertFalse(tuple[0])
67 | 
68 |     def test_should_be_deleted_app2(self):
69 |         tuple = decider2.should_delete({"index": "app2-2019.12.14"})
70 |         self.assertTrue(tuple[0])
71 | 
72 |     def test_should_not_be_deleted_app2(self):
73 |         tuple = decider2.should_delete({"index": "app2-2019.12.15"})
74 |         self.assertFalse(tuple[0])
75 | 
76 |     def test_should_not_be_deleted_app3(self):
77 |         tuple = decider2.should_delete({"index": "app3-2019.12.14"})
78 |         self.assertFalse(tuple[0])
79 | 
80 |     def test_should_raise_value_error(self):
81 |         with self.assertRaises(ValueError):
82 |             decider2.should_delete({"index": "k8s-2019-12-15"})
83 | 
84 |     def test_should_skip_indes(self):
85 |         tuple = decider2.should_delete({"index": ".kibana"})
86 |         self.assertFalse(tuple[0])
87 |         self.assertTrue("matches skip condition" in tuple[1])
88 | 
89 |     def test_should_skip_indes_2(self):
90 |         tuple = decider2.should_delete({"index": ".kibana_1"})
91 |         self.assertFalse(tuple[0])
92 |         self.assertTrue("matches skip condition" in tuple[1])
93 | 
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     unittest.main()
98 | 


--------------------------------------------------------------------------------
/json_file/cloudwatch-target.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "Id": "1",
4 |     "Arn": "arn:aws:lambda:eu-west-1:123456789012:function:es-cleanup-lambda"
5 |   }
6 | ]
7 | 


--------------------------------------------------------------------------------
/json_file/es_policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Sid": "LambdaLogCreation",
 6 |       "Effect": "Allow",
 7 |       "Action": ["logs:*"],
 8 |       "Resource": "arn:aws:logs:*:*:*"
 9 |     },
10 |     {
11 |       "Sid": "ESPermission",
12 |       "Effect": "Allow",
13 |       "Action": [
14 |         "es:*"
15 |       ],
16 |       "Resource": "arn:aws:es:eu-west-1:123456789012:domain/es-demo/*"
17 |     }
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/json_file/trust_policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Action": "sts:AssumeRole",
 6 |       "Principal": {
 7 |         "Service": "lambda.amazonaws.com"
 8 |       },
 9 |       "Effect": "Allow",
10 |       "Sid": ""
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.3.0
2 | botocore>=1.5.0
3 | 


--------------------------------------------------------------------------------
/serverless.yml:
--------------------------------------------------------------------------------
 1 | # serverless.yml
 2 | service: es-cleanup-lambda
 3 | 
 4 | provider:
 5 |   name: aws
 6 |   stage: prod
 7 |   region: eu-west-1
 8 |   profile: ${env:AWS_DEFAULT_PROFILE}
 9 |   deploymentBucket:
10 |     name: ${env:S3_DEPLOYMENT_BUCKET}
11 |   environment:
12 |     es_endpoint: search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com
13 |     index: ".*"
14 |     skip_index: ".kibana*"
15 |     delete_after: "7"
16 |     index_format: "%Y.%m.%d"
17 |   stackTags: # Optional CF stack tags
18 |     Owner: aws@cloudreach.com
19 |     Purpose: es-cleanup
20 |     Environment: PROD
21 |   iamRoleStatements:
22 |     - Effect: "Allow"
23 |       Action:
24 |         - "es:*"
25 |       Resource: "arn:aws:es:eu-west-1:123456789012:domain/es-demo/*"
26 |       # you must specify the full ARN  of the AWS ES Domain
27 | 
28 | 
29 | 
30 | functions:
31 |   es-cleanup-lambda:
32 |     handler: es-cleanup.lambda_handler
33 |     name: es-cleanup-lambda
34 |     description: ES old index removal
35 |     runtime: python3.7
36 |     memorySize: "128"
37 |     timeout: 300
38 |     package:
39 |       exclude:
40 |         - ./**
41 |       include:
42 |         - es_cleanup.py
43 |         - LICENSE.md
44 |         - README.md
45 |         - CONTRIBUTING.md
46 |     events:
47 |       # Invoke Lambda function every night at 01.00 AM
48 |       - schedule: cron(0 1 * * ? *)
49 | 
50 | resources:
51 |   Description: "AWS Lambda: Elasticsearch Index Cleanup"
52 | 


--------------------------------------------------------------------------------
/terraform/README.md:
--------------------------------------------------------------------------------
 1 | # Module Input Variables
 2 | 
 3 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
 4 | ## Providers
 5 | 
 6 | | Name | Version |
 7 | |------|---------|
 8 | | archive | n/a |
 9 | | aws | n/a |
10 | | null | n/a |
11 | 
12 | ## Inputs
13 | 
14 | | Name | Description | Type | Default | Required |
15 | |------|-------------|------|---------|:-----:|
16 | | delete\_after | Numbers of days to preserve | `number` | `15` | no |
17 | | es\_endpoint | AWS ES FQDN e.g. search-es-demo-xxxxxxxxxx.eu-west-1.es.amazonaws.com | `string` | n/a | yes |
18 | | index | Index/indices to process using regex, except the one matching `skip_index` regex | `string` | `".*"` | no |
19 | | index\_format | Combined with 'index' varible is used to evaluate the index age | `string` | `"%Y.%m.%d"` | no |
20 | | prefix | A prefix for the resource names, this helps create multiple instances of this stack for different environments | `string` | `""` | no |
21 | | python\_version | Lambda Python version to be used | `string` | `"3.6"` | no |
22 | | schedule | Cloudwatch Cron Schedule expression for running the cleanup function | `string` | `"cron(0 3 * * ? *)"` | no |
23 | | security\_group\_ids | Addiational Security Ids To add. | `list(string)` | `[]` | no |
24 | | skip\_index | Index/indices to skip | `string` | `".kibana*"` | no |
25 | | subnet\_ids | Subnet IDs you want to deploy the lambda in. Only fill this in if you want to deploy your Lambda function inside a VPC. | `list(string)` | `[]` | no |
26 | | suffix | A suffix for the resource names, this helps create multiple instances of this stack for different environments | `string` | `""` | no |
27 | | tags | Tags to apply | `map` | <pre>{<br>  "Name": "es-cleanup"<br>}</pre> | no |
28 | | timeout | Maximum lambda execution time | `number` | `300` | no |
29 | 
30 | ## Outputs
31 | 
32 | | Name | Description |
33 | |------|-------------|
34 | | cloudwatch\_event\_arn | AWS Cloudwatch Event ARN |
35 | | iam\_role\_arn | AWS IAM ARN |
36 | | lambda\_arn | AWS Lambda ARN |
37 | 
38 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
39 | 
40 | 
41 | ## pre-commit hook
42 | 
43 | this repo is using pre-commit hook to know more [click here](https://github.com/antonbabenko/pre-commit-terraform)
44 | to manually trigger use this command
45 | 
46 | ```
47 | pre-commit install
48 | pre-commit run --all-files
49 | ```
50 | 
51 | 
52 | ## Example
53 | 
54 | ```
55 | terraform {
56 |   required_version = ">= 0.12"
57 | }
58 | 
59 | provider "aws" {
60 |   region = "eu-west-1"
61 | }
62 | 
63 | module "public_es_cleanup" {
64 |   source       = "github.com/cloudreach/aws-lambda-es-cleanup.git//terraform?ref=v0.14"
65 | 
66 |   prefix       = "public_es_"
67 |   es_endpoint  = "test-es-XXXXXXX.eu-central-1.es.amazonaws.com"
68 |   delete_after = 365
69 | }
70 | 
71 | 
72 | module "vpc_es_cleanup" {
73 |   source             = "github.com/cloudreach/aws-lambda-es-cleanup.git//terraform?ref=v0.14"
74 | 
75 |   prefix             = "vpc_es_"
76 |   es_endpoint        = "vpc-gc-demo-vpc-gloo5rzcdhyiykwdlots2hdjla.eu-central-1.es.amazonaws.com"
77 |   index              = "all"
78 |   delete_after       = 30
79 |   subnet_ids         = ["subnet-d8660da2"]
80 |   security_group_ids = ["sg-02dd3aa6da1b5"]
81 | }
82 | ```
83 | 
84 | 
85 | ### Issue
86 | In order order to use new module version you must have `terraform-provider-aws` greated than `~> 2.7` and use Terraform `~> 0.12`
87 | 


--------------------------------------------------------------------------------
/terraform/cloudwatch.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_cloudwatch_event_rule" "schedule" {
 2 |   name                = "${var.prefix}es-cleanup-execution-schedule${var.suffix}"
 3 |   description         = "${var.prefix}es-cleanup execution schedule${var.suffix}"
 4 |   schedule_expression = var.schedule
 5 | }
 6 | 
 7 | resource "aws_cloudwatch_event_target" "es_cleanup" {
 8 |   target_id = "${var.prefix}lambda-es-cleanup${var.suffix}"
 9 |   rule      = aws_cloudwatch_event_rule.schedule.name
10 |   arn       = aws_lambda_function.es_cleanup.arn
11 | }
12 | 
13 | resource "aws_lambda_permission" "allow_cloudwatch" {
14 |   statement_id  = "AllowExecutionFromCloudWatch"
15 |   action        = "lambda:InvokeFunction"
16 |   function_name = aws_lambda_function.es_cleanup.arn
17 |   principal     = "events.amazonaws.com"
18 |   source_arn    = aws_cloudwatch_event_rule.schedule.arn
19 | }
20 | 
21 | resource "aws_cloudwatch_log_group" "cwlog" {
22 |   name = "/aws/lambda/${var.prefix}es-cleanup${var.suffix}"
23 | }
24 | 


--------------------------------------------------------------------------------
/terraform/iam.tf:
--------------------------------------------------------------------------------
 1 | data "aws_region" "current" {
 2 | }
 3 | 
 4 | data "aws_caller_identity" "current" {
 5 | }
 6 | 
 7 | data "aws_iam_policy_document" "policy" {
 8 |   statement {
 9 |     sid    = "LambdaLogCreation"
10 |     effect = "Allow"
11 |     actions = [
12 |       "logs:CreateLogGroup",
13 |       "logs:CreateLogStream",
14 |       "logs:PutLogEvents",
15 |     ]
16 |     resources = [
17 |       "arn:aws:logs:${data.aws_region.current.name}:*:log-group:/aws/lambda/${var.prefix}es-cleanup${var.suffix}",
18 |       "arn:aws:logs:${data.aws_region.current.name}:*:log-group:/aws/lambda/${var.prefix}es-cleanup${var.suffix}:*",
19 |     ]
20 |   }
21 | 
22 |   statement {
23 |     sid    = "LambdaVPCconfig"
24 |     effect = "Allow"
25 |     actions = [
26 |       "ec2:CreateNetworkInterface",
27 |       "ec2:DescribeNetworkInterfaces",
28 |       "ec2:DeleteNetworkInterface",
29 |     ]
30 |     resources = ["*"]
31 |   }
32 | 
33 |   statement {
34 |     sid    = "ESPermission"
35 |     effect = "Allow"
36 |     actions = [
37 |       "es:*",
38 |     ]
39 |     resources = [
40 |       "arn:aws:es:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:domain/*",
41 |     ]
42 |   }
43 | }
44 | 
45 | resource "aws_iam_policy" "policy" {
46 |   name        = "${var.prefix}es-cleanup${var.suffix}"
47 |   path        = "/"
48 |   description = "Policy for ${var.prefix}es-cleanup${var.suffix} Lambda function"
49 |   policy      = data.aws_iam_policy_document.policy.json
50 | }
51 | 
52 | resource "aws_iam_role" "role" {
53 |   name = "${var.prefix}es-cleanup${var.suffix}"
54 | 
55 |   assume_role_policy = <<EOF
56 | {
57 |   "Version": "2012-10-17",
58 |   "Statement": [
59 |     {
60 |       "Effect": "Allow",
61 |       "Principal": {
62 |         "Service": "lambda.amazonaws.com"
63 |       },
64 |       "Action": "sts:AssumeRole"
65 |     }
66 |   ]
67 | }
68 | EOF
69 | 
70 | }
71 | 
72 | resource "aws_iam_role_policy_attachment" "policy_attachment" {
73 |   role       = aws_iam_role.role.name
74 |   policy_arn = aws_iam_policy.policy.arn
75 | }
76 | 
77 | resource "aws_iam_role_policy_attachment" "policy_attachment_vpc" {
78 |   count      = length(var.subnet_ids) > 0 ? 1 : 0
79 |   role       = aws_iam_role.role.name
80 |   policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
81 | }
82 | 


--------------------------------------------------------------------------------
/terraform/lambda.tf:
--------------------------------------------------------------------------------
 1 | data "archive_file" "es_cleanup_lambda" {
 2 |   type        = "zip"
 3 |   source_file = "${path.module}/../es_cleanup.py"
 4 |   output_path = "${path.module}/es_cleanup.zip"
 5 | }
 6 | 
 7 | locals {
 8 |   sg_ids = [element(concat(aws_security_group.lambda.*.id, [""]), 0)]
 9 | }
10 | 
11 | data "null_data_source" "lambda_file" {
12 |   inputs = {
13 |     filename = "${path.module}/es_cleanup.zip"
14 |   }
15 | }
16 | 
17 | resource "aws_lambda_function" "es_cleanup" {
18 |   filename         = data.null_data_source.lambda_file.outputs.filename
19 |   function_name    = "${var.prefix}es-cleanup${var.suffix}"
20 |   description      = "${var.prefix}es-cleanup${var.suffix}"
21 |   timeout          = var.timeout
22 |   runtime          = "python${var.python_version}"
23 |   role             = aws_iam_role.role.arn
24 |   handler          = "es_cleanup.lambda_handler"
25 |   source_code_hash = data.archive_file.es_cleanup_lambda.output_base64sha256
26 | 
27 |   environment {
28 |     variables = {
29 |       es_endpoint  = var.es_endpoint
30 |       index        = var.index
31 |       skip_index   = var.skip_index
32 |       delete_after = var.delete_after
33 |       index_format = var.index_format
34 |     }
35 |   }
36 | 
37 |   tags = merge(
38 |     var.tags,
39 |     {
40 |       "Scope" = "${var.prefix}lambda_function_to_elasticsearch${var.suffix}"
41 |     },
42 |   )
43 | 
44 |   # This will be a code block with empty lists if we don't create a securitygroup and the subnet_ids are empty.
45 |   # When these lists are empty it will deploy the lambda without VPC support.
46 |   vpc_config {
47 |     subnet_ids         = var.subnet_ids
48 |     security_group_ids = compact(concat(local.sg_ids, var.security_group_ids))
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/terraform/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "iam_role_arn" {
 2 |   description = "AWS IAM ARN"
 3 |   value       = aws_iam_role.role.arn
 4 | }
 5 | 
 6 | output "lambda_arn" {
 7 |   description = "AWS Lambda ARN"
 8 |   value       = aws_lambda_function.es_cleanup.arn
 9 | }
10 | 
11 | output "cloudwatch_event_arn" {
12 |   description = "AWS Cloudwatch Event ARN"
13 |   value       = aws_cloudwatch_event_rule.schedule.arn
14 | }
15 | 


--------------------------------------------------------------------------------
/terraform/sg.tf:
--------------------------------------------------------------------------------
 1 | data "aws_subnet" "selected" {
 2 |   count = length(var.subnet_ids) > 0 ? 1 : 0
 3 |   id    = var.subnet_ids[0]
 4 | }
 5 | 
 6 | resource "aws_security_group" "lambda" {
 7 |   count       = length(var.subnet_ids) > 0 ? 1 : 0
 8 |   name        = "${var.prefix}lambda_cleanup_to_elasticsearch${var.suffix}"
 9 |   description = "${var.prefix}lambda_cleanup_to_elasticsearch${var.suffix}"
10 |   vpc_id      = data.aws_subnet.selected[0].vpc_id
11 | 
12 |   egress {
13 |     from_port   = 443
14 |     to_port     = 443
15 |     protocol    = "tcp"
16 |     cidr_blocks = ["0.0.0.0/0"]
17 |   }
18 | 
19 |   egress {
20 |     from_port   = 53
21 |     to_port     = 53
22 |     protocol    = "tcp"
23 |     cidr_blocks = ["0.0.0.0/0"]
24 |   }
25 | 
26 |   egress {
27 |     from_port   = 53
28 |     to_port     = 53
29 |     protocol    = "udp"
30 |     cidr_blocks = ["0.0.0.0/0"]
31 |   }
32 | 
33 |   tags = merge(
34 |     var.tags,
35 |     {
36 |       "Scope" = "${var.prefix}lambda_function_to_elasticsearch${var.suffix}"
37 |     },
38 |   )
39 | }
40 | 


--------------------------------------------------------------------------------
/terraform/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "prefix" {
 2 |   description = "A prefix for the resource names, this helps create multiple instances of this stack for different environments"
 3 |   default     = ""
 4 |   type        = string
 5 | }
 6 | 
 7 | variable "suffix" {
 8 |   description = "A suffix for the resource names, this helps create multiple instances of this stack for different environments"
 9 |   default     = ""
10 |   type        = string
11 | }
12 | 
13 | variable "schedule" {
14 |   description = "Cloudwatch Cron Schedule expression for running the cleanup function"
15 |   default     = "cron(0 3 * * ? *)"
16 |   type        = string
17 | }
18 | 
19 | variable "timeout" {
20 |   description = "Maximum lambda execution time"
21 |   default     = 300
22 |   type        = number
23 | }
24 | 
25 | variable "es_endpoint" {
26 |   description = "AWS ES FQDN e.g. search-es-demo-xxxxxxxxxx.eu-west-1.es.amazonaws.com"
27 |   type        = string
28 | }
29 | 
30 | variable "index" {
31 |   description = "Index/indices to process using regex, except the one matching `skip_index` regex"
32 |   default     = ".*"
33 |   type        = string
34 | }
35 | 
36 | variable "skip_index" {
37 |   description = "Index/indices to skip"
38 |   default     = ".kibana*"
39 |   type        = string
40 | }
41 | 
42 | variable "delete_after" {
43 |   description = "Numbers of days to preserve"
44 |   default     = 15
45 |   type        = number
46 | }
47 | 
48 | variable "index_format" {
49 |   description = "Combined with 'index' varible is used to evaluate the index age"
50 |   default     = "%Y.%m.%d"
51 |   type        = string
52 | }
53 | 
54 | variable "python_version" {
55 |   description = "Lambda Python version to be used"
56 |   default     = "3.6"
57 |   type        = string
58 | }
59 | 
60 | variable "subnet_ids" {
61 |   description = "Subnet IDs you want to deploy the lambda in. Only fill this in if you want to deploy your Lambda function inside a VPC."
62 |   type        = list(string)
63 |   default     = []
64 | }
65 | 
66 | variable "security_group_ids" {
67 |   description = "Addiational Security Ids To add."
68 |   type        = list(string)
69 |   default     = []
70 | }
71 | 
72 | variable "tags" {
73 |   description = "Tags to apply"
74 |   default = {
75 |     Name = "es-cleanup"
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/terraform/versions.tf:
--------------------------------------------------------------------------------
1 | 
2 | terraform {
3 |   required_version = ">= 0.12"
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # ES cluster creation
 2 | 
 3 | This script will provision:
 4 | - a simple AWS ES node
 5 | - deploy the `es-cleanup` module
 6 | - create multiple index
 7 | 
 8 | 
 9 | ## How to
10 | 
11 | Just run
12 | 
13 | ```
14 | ./run.sh
15 | 
16 | ```
17 | 
18 | ## Notes
19 | 
20 | This demo script will save the terraform state in you local folder.
21 | 
22 | Remember to destroy your test scenario using the command:
23 | 
24 | ```
25 | terraform destroy -auto-approve
26 | ```
27 | 


--------------------------------------------------------------------------------
/tests/data.tf:
--------------------------------------------------------------------------------
1 | locals {
2 |   domain_name = var.use_prefix ? join("", [var.domain_prefix, var.domain_name]) : var.domain_name
3 |   inside_vpc  = length(var.vpc_options["subnet_ids"]) > 0 ? true : false
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/main.tf:
--------------------------------------------------------------------------------
  1 | terraform {
  2 |   required_version = ">= 0.12"
  3 | }
  4 | 
  5 | provider "aws" {
  6 |   region = "eu-central-1"
  7 | }
  8 | 
  9 | 
 10 | data "http" "myip" {
 11 |   url = "http://ipv4.icanhazip.com"
 12 | }
 13 | 
 14 | data "aws_caller_identity" "current" {}
 15 | 
 16 | data "aws_iam_policy_document" "es_management_access" {
 17 |   count = false == local.inside_vpc ? 1 : 0
 18 | 
 19 |   statement {
 20 |     sid = "1"
 21 |     actions = [
 22 |       "es:*",
 23 |     ]
 24 | 
 25 |     resources = ["${aws_elasticsearch_domain.es[0].arn}/*"]
 26 | 
 27 |     principals {
 28 |       type        = "AWS"
 29 |       identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
 30 |     }
 31 | 
 32 |   }
 33 | 
 34 |   statement {
 35 |     sid = "2"
 36 |     actions = [
 37 |       "es:*",
 38 |     ]
 39 | 
 40 |     resources = ["${aws_elasticsearch_domain.es[0].arn}/*"]
 41 | 
 42 |     principals {
 43 |       type        = "AWS"
 44 |       identifiers = ["*"]
 45 |     }
 46 |     condition {
 47 |       test     = "IpAddress"
 48 |       variable = "aws:SourceIp"
 49 | 
 50 |       values = ["${chomp(data.http.myip.body)}/32"]
 51 |     }
 52 |   }
 53 | }
 54 | 
 55 | resource "aws_elasticsearch_domain" "es" {
 56 |   count = false == local.inside_vpc ? 1 : 0
 57 | 
 58 |   depends_on = [aws_iam_service_linked_role.es]
 59 | 
 60 |   domain_name           = local.domain_name
 61 |   elasticsearch_version = var.es_version
 62 | 
 63 |   encrypt_at_rest {
 64 |     enabled    = var.encrypt_at_rest
 65 |     kms_key_id = var.kms_key_id
 66 |   }
 67 | 
 68 |   cluster_config {
 69 |     instance_type            = var.instance_type
 70 |     instance_count           = var.instance_count
 71 |     dedicated_master_enabled = var.instance_count >= var.dedicated_master_threshold ? true : false
 72 |     dedicated_master_count   = var.instance_count >= var.dedicated_master_threshold ? 3 : 0
 73 |     dedicated_master_type    = var.instance_count >= var.dedicated_master_threshold ? var.dedicated_master_type != "false" ? var.dedicated_master_type : var.instance_type : ""
 74 |     zone_awareness_enabled   = var.es_zone_awareness
 75 |   }
 76 | 
 77 |   advanced_options = var.advanced_options
 78 | 
 79 |   node_to_node_encryption {
 80 |     enabled = var.node_to_node_encryption_enabled
 81 |   }
 82 | 
 83 |   ebs_options {
 84 |     ebs_enabled = var.ebs_volume_size > 0 ? true : false
 85 |     volume_size = var.ebs_volume_size
 86 |     volume_type = var.ebs_volume_type
 87 |   }
 88 | 
 89 |   snapshot_options {
 90 |     automated_snapshot_start_hour = var.snapshot_start_hour
 91 |   }
 92 | 
 93 |   tags = merge(
 94 |     {
 95 |       "Domain" = local.domain_name
 96 |     },
 97 |     var.tags,
 98 |   )
 99 | }
100 | 
101 | resource "aws_elasticsearch_domain_policy" "es_management_access" {
102 |   count = false == local.inside_vpc ? 1 : 0
103 | 
104 |   domain_name     = local.domain_name
105 |   access_policies = data.aws_iam_policy_document.es_management_access[0].json
106 | }
107 | 
108 | 
109 | 
110 | 
111 | module "public_es_cleanup" {
112 |   source = "../terraform/"
113 | 
114 |   prefix       = "public_es_"
115 |   es_endpoint  = element(aws_elasticsearch_domain.es.*.endpoint, 0)
116 |   delete_after = 1
117 | }
118 | 


--------------------------------------------------------------------------------
/tests/main_vpc.tf:
--------------------------------------------------------------------------------
 1 | /*Add a new set of data.aws_iam_policy_document, aws_elasticsearch_domain, aws_elasticsearch_domain_policy. Because currently terraform/aws_elasticsearch_domain
 2 | does not handle properly null/empty "vpc_options" */
 3 | 
 4 | data "aws_iam_policy_document" "es_vpc_management_access" {
 5 |   count = local.inside_vpc ? 1 : 0
 6 | 
 7 |   statement {
 8 |     actions = [
 9 |       "es:*",
10 |     ]
11 | 
12 |     resources = [
13 |       aws_elasticsearch_domain.es_vpc[0].arn,
14 |       "${aws_elasticsearch_domain.es_vpc[0].arn}/*",
15 |     ]
16 | 
17 |     principals {
18 |       type = "AWS"
19 | 
20 |       identifiers = distinct(compact(var.management_iam_roles))
21 |     }
22 |   }
23 | }
24 | 
25 | resource "aws_iam_service_linked_role" "es" {
26 |   count            = var.create_iam_service_linked_role ? 1 : 0
27 |   aws_service_name = "es.amazonaws.com"
28 | }
29 | 
30 | resource "aws_elasticsearch_domain" "es_vpc" {
31 |   count = local.inside_vpc ? 1 : 0
32 | 
33 |   depends_on = [aws_iam_service_linked_role.es]
34 | 
35 |   domain_name           = local.domain_name
36 |   elasticsearch_version = var.es_version
37 | 
38 |   encrypt_at_rest {
39 |     enabled    = var.encrypt_at_rest
40 |     kms_key_id = var.kms_key_id
41 |   }
42 | 
43 |   cluster_config {
44 |     instance_type            = var.instance_type
45 |     instance_count           = var.instance_count
46 |     dedicated_master_enabled = var.instance_count >= var.dedicated_master_threshold ? true : false
47 |     dedicated_master_count   = var.instance_count >= var.dedicated_master_threshold ? 3 : 0
48 |     dedicated_master_type    = var.instance_count >= var.dedicated_master_threshold ? var.dedicated_master_type != "false" ? var.dedicated_master_type : var.instance_type : ""
49 |     zone_awareness_enabled   = var.es_zone_awareness
50 |   }
51 | 
52 |   advanced_options = var.advanced_options
53 | 
54 |   node_to_node_encryption {
55 |     enabled = var.node_to_node_encryption_enabled
56 |   }
57 | 
58 |   vpc_options {
59 |     subnet_ids         = var.vpc_options["subnet_ids"]
60 |     security_group_ids = var.vpc_options["security_group_ids"]
61 |   }
62 | 
63 |   ebs_options {
64 |     ebs_enabled = var.ebs_volume_size > 0 ? true : false
65 |     volume_size = var.ebs_volume_size
66 |     volume_type = var.ebs_volume_type
67 |   }
68 | 
69 |   snapshot_options {
70 |     automated_snapshot_start_hour = var.snapshot_start_hour
71 |   }
72 | 
73 |   tags = merge(
74 |     {
75 |       "Domain" = local.domain_name
76 |     },
77 |     var.tags,
78 |   )
79 | }
80 | 
81 | resource "aws_elasticsearch_domain_policy" "es_vpc_management_access" {
82 |   count = local.inside_vpc ? 1 : 0
83 | 
84 |   domain_name     = local.domain_name
85 |   access_policies = data.aws_iam_policy_document.es_vpc_management_access[0].json
86 | }
87 | 


--------------------------------------------------------------------------------
/tests/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "arn" {
 2 |   description = "Amazon Resource Name (ARN) of the domain"
 3 |   value = element(
 4 |     concat(
 5 |       aws_elasticsearch_domain.es_vpc.*.arn,
 6 |       aws_elasticsearch_domain.es.*.arn,
 7 |       [""],
 8 |     ),
 9 |     0,
10 |   )
11 | }
12 | 
13 | output "domain_id" {
14 |   description = "Unique identifier for the domain"
15 |   value = element(
16 |     concat(
17 |       aws_elasticsearch_domain.es_vpc.*.domain_id,
18 |       aws_elasticsearch_domain.es.*.domain_id,
19 |       [""],
20 |     ),
21 |     0,
22 |   )
23 | }
24 | 
25 | output "domain_name" {
26 |   description = "The name of the Elasticsearch domain"
27 |   value = element(
28 |     concat(
29 |       aws_elasticsearch_domain.es_vpc.*.domain_name,
30 |       aws_elasticsearch_domain.es.*.domain_name,
31 |       [""],
32 |     ),
33 |     0,
34 |   )
35 | }
36 | 
37 | output "endpoint" {
38 |   description = "Domain-specific endpoint used to submit index, search, and data upload requests"
39 |   value = element(
40 |     concat(
41 |       aws_elasticsearch_domain.es_vpc.*.endpoint,
42 |       aws_elasticsearch_domain.es.*.endpoint,
43 |       [""],
44 |     ),
45 |     0,
46 |   )
47 | }
48 | 
49 | output "kibana_endpoint" {
50 |   description = "Domain-specific endpoint for kibana without https scheme"
51 |   value = element(
52 |     concat(
53 |       aws_elasticsearch_domain.es_vpc.*.kibana_endpoint,
54 |       aws_elasticsearch_domain.es.*.kibana_endpoint,
55 |       [""],
56 |     ),
57 |     0,
58 |   )
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | 
 5 | info(){
 6 |   echo "[INFO]    $1"
 7 | }
 8 | warning(){
 9 |   echo "[WARNING] $1"
10 | }
11 | error(){
12 |   echo "[ERROR]   $1"
13 | }
14 | fatal(){
15 |   echo "[FATAL]   $1" ;
16 |   exit 1
17 | }
18 | 
19 | 
20 | 
21 | 
22 | PLAN_FILE="plan.out"
23 | 
24 | terraform_init() {
25 |     info "Running terraform init"
26 |     terraform init -input=false || fatal "Could not initialize terraform"
27 | }
28 | 
29 | 
30 | terraform_validate() {
31 |     info "Running terraform validate"
32 |     terraform validate . || fatal "Could not validate terraform"
33 | }
34 | 
35 | 
36 | terraform_plan() {
37 |     info "Running terraform plan"
38 |     terraform plan -out=$PLAN_FILE || fatal "Terraform plan failed"
39 | }
40 | 
41 | terraform_apply() {
42 |     terraform_plan
43 |     info "Running terraform apply"
44 |     terraform apply  \
45 |         -lock=true \
46 |         -input=false \
47 |         -refresh=true \
48 |         -auto-approve=true \
49 |         $PLAN_FILE || fatal "Terraform apply failed"
50 |     rm $PLAN_FILE
51 | }
52 | 
53 | create_index() {
54 |     curl -q -i -X POST -H 'Content-Type:application/json'  -d '{ "test": "test"}' $1
55 | }
56 | 
57 | terraform_init
58 | terraform_apply
59 | 
60 | TARGET="https://$(terraform output endpoint)"
61 | 
62 | create_index $TARGET/k8s-2022.01.01/books
63 | create_index $TARGET/k8s-2021.01.01/books
64 | create_index $TARGET/k8s-2020.01.01/books
65 | create_index $TARGET/k8s-2019.01.01/books
66 | create_index $TARGET/k8s-2018.01.01/books
67 | create_index $TARGET/k8s-2012.01.01/books
68 | create_index $TARGET/k8s-2011.01.01/books
69 | 
70 | 
71 | curl $TARGET/_aliases?pretty=true
72 | 


--------------------------------------------------------------------------------
/tests/variables.tf:
--------------------------------------------------------------------------------
  1 | variable "create_iam_service_linked_role" {
  2 |   description = "Whether to create IAM service linked role for AWS ElasticSearch service. Can be only one per AWS account."
  3 |   type        = bool
  4 |   default     = false
  5 | }
  6 | 
  7 | variable "domain_name" {
  8 |   description = "Domain name for Elasticsearch cluster"
  9 |   type        = string
 10 |   default     = "es-domain"
 11 | }
 12 | 
 13 | variable "es_version" {
 14 |   description = "Version of Elasticsearch to deploy (default 5.1)"
 15 |   type        = string
 16 |   default     = "7.1"
 17 | }
 18 | 
 19 | variable "instance_type" {
 20 |   description = "ES instance type for data nodes in the cluster (default t2.small.elasticsearch)"
 21 |   type        = string
 22 |   default     = "t2.small.elasticsearch"
 23 | }
 24 | 
 25 | variable "instance_count" {
 26 |   description = "Number of data nodes in the cluster (default 6)"
 27 |   type        = number
 28 |   default     = 1
 29 | }
 30 | 
 31 | variable "dedicated_master_type" {
 32 |   description = "ES instance type to be used for dedicated masters (default same as instance_type)"
 33 |   type        = string
 34 |   default     = "false"
 35 | }
 36 | 
 37 | variable "encrypt_at_rest" {
 38 |   description = "Enable encrption at rest (only specific instance family types support it: m4, c4, r4, i2, i3 default: false)"
 39 |   type        = bool
 40 |   default     = false
 41 | }
 42 | 
 43 | variable "management_iam_roles" {
 44 |   description = "List of IAM role ARNs from which to permit management traffic (default ['*']).  Note that a client must match both the IP address and the IAM role patterns in order to be permitted access."
 45 |   type        = list(string)
 46 |   default     = ["*"]
 47 | }
 48 | 
 49 | variable "management_public_ip_addresses" {
 50 |   description = "List of IP addresses from which to permit management traffic (default []).  Note that a client must match both the IP address and the IAM role patterns in order to be permitted access."
 51 |   type        = list(string)
 52 |   default     = []
 53 | }
 54 | 
 55 | variable "es_zone_awareness" {
 56 |   description = "Enable zone awareness for Elasticsearch cluster (default false)"
 57 |   type        = bool
 58 |   default     = false
 59 | }
 60 | 
 61 | variable "es_zone_awareness_count" {
 62 |   description = "Number of availability zones used for data nodes (default 2)"
 63 |   type        = number
 64 |   default     = 2
 65 | }
 66 | 
 67 | variable "ebs_volume_size" {
 68 |   description = "Optionally use EBS volumes for data storage by specifying volume size in GB (default 0)"
 69 |   type        = number
 70 |   default     = 10
 71 | }
 72 | 
 73 | variable "ebs_volume_type" {
 74 |   description = "Storage type of EBS volumes, if used (default gp2)"
 75 |   type        = string
 76 |   default     = "gp2"
 77 | }
 78 | 
 79 | variable "kms_key_id" {
 80 |   description = "KMS key used for elasticsearch"
 81 |   type        = string
 82 |   default     = ""
 83 | }
 84 | 
 85 | variable "snapshot_start_hour" {
 86 |   description = "Hour at which automated snapshots are taken, in UTC (default 0)"
 87 |   type        = number
 88 |   default     = 0
 89 | }
 90 | 
 91 | variable "vpc_options" {
 92 |   description = "A map of supported vpc options"
 93 |   type        = map(list(string))
 94 | 
 95 |   default = {
 96 |     security_group_ids = []
 97 |     subnet_ids         = []
 98 |   }
 99 | }
100 | 
101 | variable "tags" {
102 |   description = "tags to apply to all resources"
103 |   type        = map(string)
104 |   default     = {}
105 | }
106 | 
107 | variable "use_prefix" {
108 |   description = "Flag indicating whether or not to use the domain_prefix. Default: true"
109 |   type        = bool
110 |   default     = true
111 | }
112 | 
113 | variable "domain_prefix" {
114 |   description = "String to be prefixed to search domain. Default: tf-"
115 |   type        = string
116 |   default     = "tf-"
117 | }
118 | 
119 | variable "dedicated_master_threshold" {
120 |   description = "The number of instances above which dedicated master nodes will be used. Default: 10"
121 |   type        = number
122 |   default     = 10
123 | }
124 | 
125 | variable "advanced_options" {
126 |   description = "Map of key-value string pairs to specify advanced configuration options. Note that the values for these configuration options must be strings (wrapped in quotes) or they may be wrong and cause a perpetual diff, causing Terraform to want to recreate your Elasticsearch domain on every apply."
127 |   type        = map(string)
128 |   default = {
129 |     "rest.action.multi.allow_explicit_index" = "true"
130 |   }
131 | }
132 | 
133 | 
134 | variable "node_to_node_encryption_enabled" {
135 |   description = "Whether to enable node-to-node encryption."
136 |   type        = bool
137 |   default     = false
138 | }
139 | 


--------------------------------------------------------------------------------