├── .gitignore ├── .pre-commit-config.yaml ├── AUTHORS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── diagram.png ├── es_cleanup.py ├── es_cleanup_test.py ├── json_file ├── cloudwatch-target.json ├── es_policy.json └── trust_policy.json ├── requirements.txt ├── serverless.yml ├── terraform ├── README.md ├── cloudwatch.tf ├── iam.tf ├── lambda.tf ├── outputs.tf ├── sg.tf ├── variables.tf └── versions.tf └── tests ├── README.md ├── data.tf ├── main.tf ├── main_vpc.tf ├── outputs.tf ├── run.sh └── variables.tf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | **/.terraform 10 | **/terraform.tfstate* 11 | **/.terraform.tfstate* 12 | 13 | **/*es-cleanup.zip 14 | **/*es_cleanup.zip 15 | .serverless 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv/ 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: git://github.com/antonbabenko/pre-commit-terraform 3 | rev: v1.27.0 4 | hooks: 5 | - id: terraform_fmt 6 | - id: terraform_docs 7 | - repo: git://github.com/pre-commit/pre-commit-hooks 8 | rev: v2.5.0 9 | hooks: 10 | - id: check-json 11 | - id: end-of-file-fixer 12 | - id: trailing-whitespace 13 | - id: check-case-conflict 14 | - id: check-merge-conflict 15 | - id: detect-private-key 16 | - id: detect-aws-credentials 17 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Authors 2 | 3 | * **Giulio Calzolari** - *Initial work* - [giuliocalzo](https://github.com/giuliocalzolari) 4 | * **Omar Tarabai** - *Terraform deployment* - [omar-tarabai](https://github.com/omar-tarabai) 5 | * **netflash** - [netflash](https://github.com/netflash) 6 | * **dcodix** - *Python 3 support* - [dcodix](https://github.com/dcodix) 7 | * **cheungpat** - *index parsing* - [cheungpat](https://github.com/cheungpat) 8 | * **cliveza** - *terraform suffix* - [cliveza](https://github.com/cliveza) 9 | * **fllaca** - *timeout var* - [fllaca](https://github.com/fllaca) 10 | * **johannes-gehrs** - *Fix Logic, Improve Test Coverage, and Resilience* - [johannes-gehrs](https://github.com/johannes-gehrs) 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [v0.8](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.8) (2018-09-21) 4 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.7...v0.8) 5 | 6 | **Merged pull requests:** 7 | 8 | - Added suffix variable [\#19](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/19) ([cliveza](https://github.com/cliveza)) 9 | 10 | ## [v0.7](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.7) (2018-09-13) 11 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.6...v0.7) 12 | 13 | **Implemented enhancements:** 14 | 15 | - Code optimisation when aws\_lambda\_function.vpc\_config is solved [\#12](https://github.com/cloudreach/aws-lambda-es-cleanup/issues/12) 16 | 17 | **Fixed bugs:** 18 | 19 | - Code optimisation when aws\\_lambda\\_function.vpc\\_config is solved [\#12](https://github.com/cloudreach/aws-lambda-es-cleanup/issues/12) 20 | 21 | ## [v0.6](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.6) (2018-08-17) 22 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.5...v0.6) 23 | 24 | **Fixed bugs:** 25 | 26 | - Fix terraform security group [\#15](https://github.com/cloudreach/aws-lambda-es-cleanup/issues/15) 27 | - Fix index name parsed incorrectly if index format contains hyphen [\#18](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/18) ([cheungpat](https://github.com/cheungpat)) 28 | 29 | **Merged pull requests:** 30 | 31 | - \[Resolve \#15\] Enable DNS connection on security group [\#16](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/16) ([giuliocalzolari](https://github.com/giuliocalzolari)) 32 | - Update es\_policy.json [\#14](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/14) ([giuliocalzolari](https://github.com/giuliocalzolari)) 33 | 34 | ## [v0.5](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.5) (2018-02-28) 35 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.4...v0.5) 36 | 37 | **Implemented enhancements:** 38 | 39 | - VPC support for the Lambda function [\#10](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/10) ([MattiasGees](https://github.com/MattiasGees)) 40 | 41 | ## [v0.4](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.4) (2018-02-02) 42 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.3...v0.4) 43 | 44 | **Implemented enhancements:** 45 | 46 | - Add the feature that allows to pass variables from event [\#9](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/9) ([bilardi](https://github.com/bilardi)) 47 | 48 | ## [v0.3](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.3) (2017-12-12) 49 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.2...v0.3) 50 | 51 | **Implemented enhancements:** 52 | 53 | - Compatible with py2 and py3 [\#8](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/8) ([dcodix](https://github.com/dcodix)) 54 | 55 | **Fixed bugs:** 56 | 57 | - Compatible with py2 and py3 [\#8](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/8) ([dcodix](https://github.com/dcodix)) 58 | 59 | **Merged pull requests:** 60 | 61 | - typo [\#6](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/6) ([netflash](https://github.com/netflash)) 62 | 63 | ## [v0.2](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.2) (2017-11-09) 64 | [Full Changelog](https://github.com/cloudreach/aws-lambda-es-cleanup/compare/v0.1...v0.2) 65 | 66 | **Merged pull requests:** 67 | 68 | - Add terraform stack [\#5](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/5) ([otarabai](https://github.com/otarabai)) 69 | - update Readme [\#1](https://github.com/cloudreach/aws-lambda-es-cleanup/pull/1) ([giuliocalzolari](https://github.com/giuliocalzolari)) 70 | 71 | ## [v0.1](https://github.com/cloudreach/aws-lambda-es-cleanup/tree/v0.1) (2017-05-23) 72 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Hello! Thank you for choosing to help contribute to one of the Cloudreach OpenSource projects. There are many ways you can contribute and help is always welcome. We simply ask that you follow the following contribution policies. 4 | 5 | - [Submit a Bug Report](#submit_a_bug_report) 6 | - [Enhancement Proposal](#enhancement_proposal) 7 | - [Contributing Code](#contributing_code) 8 | 9 | ## Submit a Bug Report 10 | 11 | Note: DO NOT include your credentials in ANY code examples, descriptions, or media you make public. 12 | 13 | Before submitting a bug, please check our [issues page](https://github.com/cloudreach/aws-lambda-es-cleanup/issues) to see if it's already been reported. 14 | 15 | When reporting a bug, fill out the required template, and please include as much detail as possible as it helps us resolve issues faster. 16 | 17 | ## Enhancement Proposal 18 | 19 | Enhancement proposals should: 20 | 21 | * Use a descriptive title. 22 | * Provide a step-by-step description of the suggested enhancement. 23 | * Provide specific examples to demonstrate the steps. 24 | * Describe the current behaviour and explain which behaviour you expected to see instead. 25 | * Keep the scope as narrow as possible, to make it easier to implement. 26 | 27 | Remember that this is a volunteer-driven project, and that contributions are welcome. 28 | 29 | ## Contributing Code 30 | 31 | Contributions should be made in response to a particular GitHub Issue. We find it easier to review code if we've already discussed what it should do, and assessed if it fits with the wider codebase. 32 | 33 | A good pull request: 34 | 35 | * Is clear. 36 | * Works across all supported version of Python. 37 | * Complies with the existing codebase style ([flake8](http://flake8.pycqa.org/en/latest/), [pylint](https://www.pylint.org/)). 38 | * Includes [docstrings](https://www.python.org/dev/peps/pep-0257/) and comments for unintuitive sections of code. 39 | * Includes documentation for new features. 40 | * Is appropriately licensed (Apache 2.0). 41 | 42 | 43 | 44 | # Get Started 45 | 46 | * Clone the repository locally: 47 | 48 | ```bash 49 | $ git clone git@github.com:cloudreach/aws-lambda-es-cleanup.git 50 | ``` 51 | 52 | * Install your local copy into a [virtualenv](http://docs.python-guide.org/en/latest/dev/virtualenvs/). Assuming you have virtualenv installed, this is how you set up your fork for local development: 53 | 54 | ```bash 55 | $ cd aws-lambda-es-cleanup/ 56 | $ virtualenv env 57 | $ source env/bin/activate 58 | $ pip install -r requirements.txt 59 | $ pip install -e . 60 | ``` 61 | 62 | * Create a branch for local development: 63 | 64 | ```bash 65 | $ git checkout -b branch- 66 | ``` 67 | 68 | * Make sure the changes comply with the pull request guidelines in the section on [Contributing Code](#contributing_code). 69 | 70 | * Commit your changes: 71 | 72 | ```bash 73 | $ git add . 74 | $ git commit 75 | ``` 76 | 77 | * Commit messages should follow [these guidelines](https://github.com/erlang/otp/wiki/Writing-good-commit-messages). 78 | 79 | * Push your branch to GitHub: 80 | 81 | ```bash 82 | $ git push origin 83 | ``` 84 | 85 | * Submit a pull request through the GitHub website. 86 | 87 | 88 | Credits 89 | ------- 90 | 91 | This document took inspiration from the CONTRIBUTING files of the [Atom](https://github.com/atom/atom/blob/abccce6ee9079fdaefdecb018e72ea64000e52ef/CONTRIBUTING.md) and [Boto3](https://github.com/boto/boto3/blob/e85febf46a819d901956f349afef0b0eaa4d906d/CONTRIBUTING.rst) projects. 92 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright 2020 **Cloudreach Europe Limited** or its affiliates. All Rights Reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS Lambda Elasticsearch Index Cleanup 2 | 3 | ## Overview 4 | This AWS Lambda function allows you to delete the old Elasticsearch indexes using SigV4Auth authentication. You configure the AWS Elasticsearch Access Policy authorizing the Lambda Role or the AWS Account number instead of using the IP address whitelist. 5 | 6 | ## Diagram 7 | 8 |

9 | 10 |

11 | 12 | 13 | 14 | ## Getting Started 15 | ### How To install 16 | 17 | Clone your repository 18 | 19 | ```bash 20 | $ git clone git@github.com:cloudreach/aws-lambda-es-cleanup.git 21 | $ cd aws-lambda-es-cleanup/ 22 | ``` 23 | 24 | Configure in a proper way the IAM policy inside `json_file/es_policy.json` and `json_file/trust_policy.json` 25 | 26 | Create the IAM Role 27 | 28 | ```bash 29 | $ aws iam create-role --role-name es-cleanup-lambda \ 30 | --assume-role-policy-document file://json_file/trust_policy.json 31 | 32 | ``` 33 | 34 | ```bash 35 | $ aws iam put-role-policy --role-name es-cleanup-lambda \ 36 | --policy-name es_cleanup \ 37 | --policy-document file://json_file/es_policy.json 38 | ``` 39 | 40 | 41 | if your lambda is running inside the VPC also attach the these policies 42 | 43 | 44 | ``` 45 | arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole 46 | arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole 47 | ```` 48 | 49 | 50 | Create your Lambda package 51 | 52 | ```bash 53 | $ zip es-cleanup-lambda.zip es_cleanup.py 54 | ``` 55 | 56 | 57 | 58 | ### Lambda deployment 59 | Using awscli you can create your AWS function and set the proper IAM role with the right Account ID 60 | 61 | ```bash 62 | $ export AWS_DEFAULT_REGION=eu-west-1 63 | $ ESENDPOINT="search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com" #ES endpoint 64 | 65 | $ aws lambda create-function \ 66 | --function-name es-cleanup-lambda \ 67 | --environment Variables={es_endpoint=$ESENDPOINT} \ 68 | --zip-file fileb://es-cleanup-lambda.zip \ 69 | --description "Elasticsearch Index Cleanup" \ 70 | --role arn:aws:iam::123456789012:role/es-cleanup-lambda \ 71 | --handler es_cleanup.lambda_handler \ 72 | --runtime python3.6 \ 73 | --timeout 180 74 | ``` 75 | 76 | If you want to send variables and not to use environment 77 | ```bash 78 | $ export AWS_DEFAULT_REGION=eu-west-1 79 | 80 | $ aws lambda create-function \ 81 | --function-name es-cleanup-lambda \ 82 | --zip-file fileb://es-cleanup-lambda.zip \ 83 | --description "Elasticsearch Index Cleanup" \ 84 | --role arn:aws:iam::123456789012:role/es-cleanup-lambda \ 85 | --handler es_cleanup.lambda_handler \ 86 | --runtime python3.6 \ 87 | --timeout 180 88 | ``` 89 | 90 | ### Lambda invoke with parameters 91 | is it possible to override the default behaviour passing specific payload 92 | 93 | ```bash 94 | $ aws lambda invoke 95 | --function-name es-cleanup-lambda \ 96 | outfile --payload \ 97 | '{"es_endpoint":"search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com"}' 98 | ``` 99 | 100 | Create your AWS Cloudwatch rule: 101 | 102 | ```bash 103 | $ aws events put-rule \ 104 | --name my-scheduled-rule \ 105 | --schedule-expression 'cron(0 1 * * ? *)' 106 | 107 | 108 | $ aws lambda add-permission \ 109 | --function-name es-cleanup-lambda \ 110 | --statement-id my-scheduled-event \ 111 | --action 'lambda:InvokeFunction' \ 112 | --principal events.amazonaws.com \ 113 | --source-arn arn:aws:events:eu-west-1:123456789012:rule/my-scheduled-rule 114 | 115 | 116 | $ aws events put-targets \ 117 | --rule my-scheduled-rule \ 118 | --targets file://json_file/cloudwatch-target.json 119 | ``` 120 | 121 | ### Lambda configuration and OS parameters 122 | 123 | Using AWS environment variable you can easily modify the behaviour of the Lambda function 124 | 125 | | Variable Name | Example Value | Description | Default Value | Required | 126 | | --- | --- | --- | --- | --- | 127 | | es_endpoint | search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com | AWS ES fqdn | `None` | True | 128 | | index | `logstash*` | Index/indices to process comma separated, with `all` every index will be processed except the one listed in `skip_index` | `.*` | False | 129 | | skip_index | `.kibana,.kibana_5` | Index/indices to skip | `.kibana*` | False | 130 | | index_format | `%Y.%m.%d` | Combined with `index` varible is used to evaluate the index age | `%Y.%m.%d` | False | 131 | | delete_after | `7` | Numbers of days to preserve | `15` | False | 132 | 133 | ## Serverless Framework 134 | 135 | Editing the file `serverless.yml`, you can deploy your function in AWS using [Serverless Framework](https://serverless.com/framework/docs/providers/aws/cli-reference/) 136 | 137 | ```bash 138 | $ git clone git@github.com:cloudreach/aws-lambda-es-cleanup.git 139 | $ cd aws-lambda-es-cleanup/ 140 | $ serverless deploy 141 | Serverless: Creating Stack... 142 | Serverless: Checking Stack create progress... 143 | ..... 144 | Serverless: Stack create finished... 145 | Serverless: Packaging service... 146 | Serverless: Uploading CloudFormation file to S3... 147 | Serverless: Uploading function .zip files to S3... 148 | Serverless: Uploading service .zip file to S3 (7.13 KB)... 149 | Serverless: Updating Stack... 150 | Serverless: Checking Stack update progress... 151 | ...................... 152 | Serverless: Stack update finished... 153 | Service Information 154 | service: es-cleanup-lambda 155 | stage: prod 156 | region: eu-west-1 157 | api keys: 158 | None 159 | endpoints: 160 | None 161 | functions: 162 | es-cleanup-lambda: es-cleanup-lambda-prod-es-cleanup-lambda 163 | ``` 164 | 165 | ### Terraform deployment 166 | 167 | This lambda function can be also build using terraform followings this [README](terraform/README.md). 168 | 169 | ## How to Contribute 170 | 171 | We encourage contribution to our projects, please see our [CONTRIBUTING](CONTRIBUTING.md) guide for details. 172 | 173 | 174 | ## License 175 | 176 | **aws-lambda-es-cleanup** is licensed under the [Apache Software License 2.0](LICENSE.md). 177 | 178 | ## Thanks 179 | 180 | Keep It Cloudy ([@CloudreachKIC](https://twitter.com/cloudreachkic)) 181 | -------------------------------------------------------------------------------- /diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudreach/aws-lambda-es-cleanup/d043cc37807c0773c0a0cc2ea4a562b44d5d77bc/diagram.png -------------------------------------------------------------------------------- /es_cleanup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | This AWS Lambda function allowed to delete the old Elasticsearch index 5 | """ 6 | import datetime 7 | import re 8 | import sys 9 | import time 10 | 11 | import json 12 | import os 13 | from botocore.auth import SigV4Auth 14 | from botocore.awsrequest import AWSRequest 15 | from botocore.credentials import create_credential_resolver 16 | from botocore.httpsession import URLLib3Session 17 | from botocore.session import get_session 18 | 19 | if sys.version_info[0] == 3: 20 | from urllib.request import quote 21 | else: 22 | from urllib import quote 23 | 24 | 25 | class ES_Exception(Exception): 26 | """Exception capturing status_code from Client Request""" 27 | status_code = 0 28 | payload = "" 29 | 30 | def __init__(self, status_code, payload): 31 | self.status_code = status_code 32 | self.payload = payload 33 | Exception.__init__(self, 34 | "ES_Exception: status_code={}, payload={}".format( 35 | status_code, payload)) 36 | 37 | 38 | class ES_Cleanup(object): 39 | name = "lambda_es_cleanup" 40 | 41 | def __init__(self, event, context): 42 | """Main Class init 43 | 44 | Args: 45 | event (dict): AWS Cloudwatch Scheduled Event 46 | context (object): AWS running context 47 | """ 48 | self.report = [] 49 | self.event = event 50 | self.context = context 51 | 52 | self.cfg = {} 53 | self.cfg["es_endpoint"] = self.get_parameter("es_endpoint") 54 | self.cfg["index"] = self.get_parameter("index", ".*") 55 | self.cfg["skip_index"] = self.get_parameter("skip_index", ".kibana*") 56 | 57 | self.cfg["delete_after"] = int(self.get_parameter("delete_after", 15)) 58 | self.cfg["es_max_retry"] = int(self.get_parameter("es_max_retry", 3)) 59 | self.cfg["index_format"] = self.get_parameter( 60 | "index_format", "%Y.%m.%d") 61 | 62 | if not self.cfg["es_endpoint"]: 63 | raise Exception("[es_endpoint] OS variable is not set") 64 | 65 | def get_parameter(self, key_param, default_param=None): 66 | """helper function to retrieve specific configuration 67 | 68 | Args: 69 | key_param (str): key_param to read from "event" or "environment" variable 70 | default_param (str): default value 71 | 72 | Returns: 73 | string: parameter value or None 74 | 75 | """ 76 | return self.event.get(key_param, os.environ.get(key_param, default_param)) 77 | 78 | def send_to_es(self, path, method="GET", payload={}): 79 | """Low-level POST data to Amazon Elasticsearch Service generating a Sigv4 signed request 80 | 81 | Args: 82 | path (str): path to send to ES 83 | method (str, optional): HTTP method default:GET 84 | payload (dict, optional): additional payload used during POST or PUT 85 | 86 | Returns: 87 | dict: json answer converted in dict 88 | 89 | Raises: 90 | #: Error during ES communication 91 | ES_Exception: Description 92 | """ 93 | if not path.startswith("/"): 94 | path = "/" + path 95 | 96 | es_region = self.cfg["es_endpoint"].split(".")[1] 97 | 98 | headers = { 99 | "Host": self.cfg["es_endpoint"], 100 | "Content-Type": "application/json" 101 | } 102 | 103 | # send to ES with exponential backoff 104 | retries = 0 105 | while retries < int(self.cfg["es_max_retry"]): 106 | if retries > 0: 107 | seconds = (2 ** retries) * .1 108 | time.sleep(seconds) 109 | 110 | req = AWSRequest( 111 | method=method, 112 | url="https://{}{}".format( 113 | self.cfg["es_endpoint"], quote(path)), 114 | data=json.dumps(payload), 115 | params={"format": "json"}, 116 | headers=headers) 117 | credential_resolver = create_credential_resolver(get_session()) 118 | credentials = credential_resolver.load_credentials() 119 | SigV4Auth(credentials, 'es', es_region).add_auth(req) 120 | 121 | try: 122 | preq = req.prepare() 123 | session = URLLib3Session() 124 | res = session.send(preq) 125 | if res.status_code >= 200 and res.status_code <= 299: 126 | return json.loads(res.content) 127 | else: 128 | raise ES_Exception(res.status_code, res._content) 129 | 130 | except ES_Exception as e: 131 | if (e.status_code >= 500) and (e.status_code <= 599): 132 | retries += 1 # Candidate for retry 133 | else: 134 | raise # Stop retrying, re-raise exception 135 | 136 | def delete_index(self, index_name): 137 | """ES DELETE specific index 138 | 139 | Args: 140 | index_name (str): Index name 141 | 142 | Returns: 143 | dict: ES answer 144 | """ 145 | return self.send_to_es(index_name, "DELETE") 146 | 147 | def get_indices(self): 148 | """ES Get indices 149 | 150 | Returns: 151 | dict: ES answer 152 | """ 153 | return self.send_to_es("/_cat/indices") 154 | 155 | 156 | class DeleteDecider(object): 157 | def __init__(self, delete_after, idx_format, idx_regex, skip_idx_regex, today): 158 | self.delete_after = delete_after 159 | self.idx_format = idx_format 160 | self.idx_regex = idx_regex 161 | self.skip_idx_regex = skip_idx_regex 162 | self.today = today 163 | 164 | def should_delete(self, index): 165 | idx_split = index["index"].rsplit("-", 1 + self.idx_format.count("-")) 166 | idx_date_str = '-'.join(word for word in idx_split[1:]) 167 | idx_name = idx_split[0] 168 | 169 | if not re.search(self.idx_regex, index["index"]): 170 | return False, "index '{}' name '{}' did not match pattern '{}'".format(index["index"], 171 | idx_name, 172 | self.idx_regex) 173 | 174 | earliest_to_keep = self.today - datetime.timedelta(days=self.delete_after) 175 | if re.search(self.skip_idx_regex, index["index"]): 176 | return False, "index matches skip condition" 177 | 178 | try: 179 | idx_datetime = datetime.datetime.strptime(idx_date_str, self.idx_format) 180 | idx_date = idx_datetime.date() 181 | except ValueError: 182 | raise ValueError("Unable to parse index date {0} - " 183 | "incorrect index date format set?".format(idx_date_str)) 184 | 185 | if idx_date < earliest_to_keep: 186 | return True, "all conditions satisfied" 187 | 188 | return False, "deletion age of has not been reached. " \ 189 | "Oldest index kept: {0}, Index Date: {1}".format(earliest_to_keep, idx_date) 190 | 191 | 192 | def lambda_handler(event, context): 193 | """Main Lambda function 194 | Args: 195 | event (dict): AWS Cloudwatch Scheduled Event 196 | context (object): AWS running context 197 | Returns: 198 | None 199 | """ 200 | es = ES_Cleanup(event, context) 201 | decider = DeleteDecider(delete_after=int(es.cfg["delete_after"]), 202 | idx_regex=es.cfg["index"], 203 | idx_format=es.cfg["index_format"], 204 | skip_idx_regex=es.cfg["skip_index"], 205 | today=datetime.date.today()) 206 | 207 | for index in es.get_indices(): 208 | d, reason = decider.should_delete(index) 209 | if d: 210 | print("Deleting index: {}".format(index["index"])) 211 | es.delete_index(index["index"]) 212 | else: 213 | print("Skipping or keeping index: {}. Reason: {}".format(index["index"], reason)) 214 | 215 | 216 | if __name__ == '__main__': 217 | event = { 218 | 'account': '123456789012', 219 | 'region': 'eu-west-1', 220 | 'detail': {}, 221 | 'detail-type': 'Scheduled Event', 222 | 'source': 'aws.events', 223 | 'time': '1970-01-01T00:00:00Z', 224 | 'id': 'cdc73f9d-aea9-11e3-9d5a-835b769c0d9c', 225 | 'resources': 226 | ['arn:aws:events:us-east-1:123456789012:rule/my-schedule'] 227 | } 228 | lambda_handler(event, "") 229 | -------------------------------------------------------------------------------- /es_cleanup_test.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import unittest 3 | 4 | import es_cleanup 5 | 6 | IDX_REGEX = '.*' 7 | IDX_FORMAT1 = '%Y.%m.%d' 8 | SKIP_IDX_REGEX = 'kibana*' 9 | 10 | decider = es_cleanup.DeleteDecider(delete_after=4, 11 | idx_format=IDX_FORMAT1, 12 | idx_regex=IDX_REGEX, 13 | skip_idx_regex=SKIP_IDX_REGEX, 14 | today=datetime.date(2019, 12, 19)) 15 | 16 | 17 | class TestShouldDelete(unittest.TestCase): 18 | def test_should_be_deleted(self): 19 | tuple = decider.should_delete({"index": "k8s-2019.12.14"}) 20 | self.assertTrue(tuple[0]) 21 | 22 | def test_should_not_be_deleted(self): 23 | tuple = decider.should_delete({"index": "k8s-2019.12.15"}) 24 | self.assertFalse(tuple[0]) 25 | 26 | def test_should_raise_value_error(self): 27 | with self.assertRaises(ValueError): 28 | decider.should_delete({"index": "k8s-2019-12-15"}) 29 | 30 | def test_should_skip_indes(self): 31 | tuple = decider.should_delete({"index": ".kibana"}) 32 | self.assertFalse(tuple[0]) 33 | self.assertTrue("matches skip condition" in tuple[1]) 34 | 35 | def test_should_skip_indes_2(self): 36 | tuple = decider.should_delete({"index": ".kibana_1"}) 37 | self.assertFalse(tuple[0]) 38 | self.assertTrue("matches skip condition" in tuple[1]) 39 | 40 | 41 | 42 | 43 | 44 | decider2 = es_cleanup.DeleteDecider(delete_after=4, 45 | idx_format='%Y.%m.%d', 46 | idx_regex='app[1-2].*|k8s.*', 47 | skip_idx_regex='kibana.*', 48 | today=datetime.date(2019, 12, 19)) 49 | 50 | 51 | class TestShouldDelete2(unittest.TestCase): 52 | def test_should_be_deleted(self): 53 | tuple = decider2.should_delete({"index": "k8s-2019.12.14"}) 54 | self.assertTrue(tuple[0]) 55 | 56 | def test_should_not_be_deleted(self): 57 | tuple = decider2.should_delete({"index": "k8s-2019.12.15"}) 58 | self.assertFalse(tuple[0]) 59 | 60 | def test_should_be_deleted_app1(self): 61 | tuple = decider2.should_delete({"index": "app1-2019.12.14"}) 62 | self.assertTrue(tuple[0]) 63 | 64 | def test_should_not_be_deleted_app1(self): 65 | tuple = decider.should_delete({"index": "app1-2019.12.15"}) 66 | self.assertFalse(tuple[0]) 67 | 68 | def test_should_be_deleted_app2(self): 69 | tuple = decider2.should_delete({"index": "app2-2019.12.14"}) 70 | self.assertTrue(tuple[0]) 71 | 72 | def test_should_not_be_deleted_app2(self): 73 | tuple = decider2.should_delete({"index": "app2-2019.12.15"}) 74 | self.assertFalse(tuple[0]) 75 | 76 | def test_should_not_be_deleted_app3(self): 77 | tuple = decider2.should_delete({"index": "app3-2019.12.14"}) 78 | self.assertFalse(tuple[0]) 79 | 80 | def test_should_raise_value_error(self): 81 | with self.assertRaises(ValueError): 82 | decider2.should_delete({"index": "k8s-2019-12-15"}) 83 | 84 | def test_should_skip_indes(self): 85 | tuple = decider2.should_delete({"index": ".kibana"}) 86 | self.assertFalse(tuple[0]) 87 | self.assertTrue("matches skip condition" in tuple[1]) 88 | 89 | def test_should_skip_indes_2(self): 90 | tuple = decider2.should_delete({"index": ".kibana_1"}) 91 | self.assertFalse(tuple[0]) 92 | self.assertTrue("matches skip condition" in tuple[1]) 93 | 94 | 95 | 96 | if __name__ == '__main__': 97 | unittest.main() 98 | -------------------------------------------------------------------------------- /json_file/cloudwatch-target.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Id": "1", 4 | "Arn": "arn:aws:lambda:eu-west-1:123456789012:function:es-cleanup-lambda" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /json_file/es_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "LambdaLogCreation", 6 | "Effect": "Allow", 7 | "Action": ["logs:*"], 8 | "Resource": "arn:aws:logs:*:*:*" 9 | }, 10 | { 11 | "Sid": "ESPermission", 12 | "Effect": "Allow", 13 | "Action": [ 14 | "es:*" 15 | ], 16 | "Resource": "arn:aws:es:eu-west-1:123456789012:domain/es-demo/*" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /json_file/trust_policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Action": "sts:AssumeRole", 6 | "Principal": { 7 | "Service": "lambda.amazonaws.com" 8 | }, 9 | "Effect": "Allow", 10 | "Sid": "" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.3.0 2 | botocore>=1.5.0 3 | -------------------------------------------------------------------------------- /serverless.yml: -------------------------------------------------------------------------------- 1 | # serverless.yml 2 | service: es-cleanup-lambda 3 | 4 | provider: 5 | name: aws 6 | stage: prod 7 | region: eu-west-1 8 | profile: ${env:AWS_DEFAULT_PROFILE} 9 | deploymentBucket: 10 | name: ${env:S3_DEPLOYMENT_BUCKET} 11 | environment: 12 | es_endpoint: search-es-demo-zveqnhnhjqm5flntemgmx5iuya.eu-west-1.es.amazonaws.com 13 | index: ".*" 14 | skip_index: ".kibana*" 15 | delete_after: "7" 16 | index_format: "%Y.%m.%d" 17 | stackTags: # Optional CF stack tags 18 | Owner: aws@cloudreach.com 19 | Purpose: es-cleanup 20 | Environment: PROD 21 | iamRoleStatements: 22 | - Effect: "Allow" 23 | Action: 24 | - "es:*" 25 | Resource: "arn:aws:es:eu-west-1:123456789012:domain/es-demo/*" 26 | # you must specify the full ARN of the AWS ES Domain 27 | 28 | 29 | 30 | functions: 31 | es-cleanup-lambda: 32 | handler: es-cleanup.lambda_handler 33 | name: es-cleanup-lambda 34 | description: ES old index removal 35 | runtime: python3.7 36 | memorySize: "128" 37 | timeout: 300 38 | package: 39 | exclude: 40 | - ./** 41 | include: 42 | - es_cleanup.py 43 | - LICENSE.md 44 | - README.md 45 | - CONTRIBUTING.md 46 | events: 47 | # Invoke Lambda function every night at 01.00 AM 48 | - schedule: cron(0 1 * * ? *) 49 | 50 | resources: 51 | Description: "AWS Lambda: Elasticsearch Index Cleanup" 52 | -------------------------------------------------------------------------------- /terraform/README.md: -------------------------------------------------------------------------------- 1 | # Module Input Variables 2 | 3 | 4 | ## Providers 5 | 6 | | Name | Version | 7 | |------|---------| 8 | | archive | n/a | 9 | | aws | n/a | 10 | | null | n/a | 11 | 12 | ## Inputs 13 | 14 | | Name | Description | Type | Default | Required | 15 | |------|-------------|------|---------|:-----:| 16 | | delete\_after | Numbers of days to preserve | `number` | `15` | no | 17 | | es\_endpoint | AWS ES FQDN e.g. search-es-demo-xxxxxxxxxx.eu-west-1.es.amazonaws.com | `string` | n/a | yes | 18 | | index | Index/indices to process using regex, except the one matching `skip_index` regex | `string` | `".*"` | no | 19 | | index\_format | Combined with 'index' varible is used to evaluate the index age | `string` | `"%Y.%m.%d"` | no | 20 | | prefix | A prefix for the resource names, this helps create multiple instances of this stack for different environments | `string` | `""` | no | 21 | | python\_version | Lambda Python version to be used | `string` | `"3.6"` | no | 22 | | schedule | Cloudwatch Cron Schedule expression for running the cleanup function | `string` | `"cron(0 3 * * ? *)"` | no | 23 | | security\_group\_ids | Addiational Security Ids To add. | `list(string)` | `[]` | no | 24 | | skip\_index | Index/indices to skip | `string` | `".kibana*"` | no | 25 | | subnet\_ids | Subnet IDs you want to deploy the lambda in. Only fill this in if you want to deploy your Lambda function inside a VPC. | `list(string)` | `[]` | no | 26 | | suffix | A suffix for the resource names, this helps create multiple instances of this stack for different environments | `string` | `""` | no | 27 | | tags | Tags to apply | `map` |
{
"Name": "es-cleanup"
}
| no | 28 | | timeout | Maximum lambda execution time | `number` | `300` | no | 29 | 30 | ## Outputs 31 | 32 | | Name | Description | 33 | |------|-------------| 34 | | cloudwatch\_event\_arn | AWS Cloudwatch Event ARN | 35 | | iam\_role\_arn | AWS IAM ARN | 36 | | lambda\_arn | AWS Lambda ARN | 37 | 38 | 39 | 40 | 41 | ## pre-commit hook 42 | 43 | this repo is using pre-commit hook to know more [click here](https://github.com/antonbabenko/pre-commit-terraform) 44 | to manually trigger use this command 45 | 46 | ``` 47 | pre-commit install 48 | pre-commit run --all-files 49 | ``` 50 | 51 | 52 | ## Example 53 | 54 | ``` 55 | terraform { 56 | required_version = ">= 0.12" 57 | } 58 | 59 | provider "aws" { 60 | region = "eu-west-1" 61 | } 62 | 63 | module "public_es_cleanup" { 64 | source = "github.com/cloudreach/aws-lambda-es-cleanup.git//terraform?ref=v0.14" 65 | 66 | prefix = "public_es_" 67 | es_endpoint = "test-es-XXXXXXX.eu-central-1.es.amazonaws.com" 68 | delete_after = 365 69 | } 70 | 71 | 72 | module "vpc_es_cleanup" { 73 | source = "github.com/cloudreach/aws-lambda-es-cleanup.git//terraform?ref=v0.14" 74 | 75 | prefix = "vpc_es_" 76 | es_endpoint = "vpc-gc-demo-vpc-gloo5rzcdhyiykwdlots2hdjla.eu-central-1.es.amazonaws.com" 77 | index = "all" 78 | delete_after = 30 79 | subnet_ids = ["subnet-d8660da2"] 80 | security_group_ids = ["sg-02dd3aa6da1b5"] 81 | } 82 | ``` 83 | 84 | 85 | ### Issue 86 | In order order to use new module version you must have `terraform-provider-aws` greated than `~> 2.7` and use Terraform `~> 0.12` 87 | -------------------------------------------------------------------------------- /terraform/cloudwatch.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_event_rule" "schedule" { 2 | name = "${var.prefix}es-cleanup-execution-schedule${var.suffix}" 3 | description = "${var.prefix}es-cleanup execution schedule${var.suffix}" 4 | schedule_expression = var.schedule 5 | } 6 | 7 | resource "aws_cloudwatch_event_target" "es_cleanup" { 8 | target_id = "${var.prefix}lambda-es-cleanup${var.suffix}" 9 | rule = aws_cloudwatch_event_rule.schedule.name 10 | arn = aws_lambda_function.es_cleanup.arn 11 | } 12 | 13 | resource "aws_lambda_permission" "allow_cloudwatch" { 14 | statement_id = "AllowExecutionFromCloudWatch" 15 | action = "lambda:InvokeFunction" 16 | function_name = aws_lambda_function.es_cleanup.arn 17 | principal = "events.amazonaws.com" 18 | source_arn = aws_cloudwatch_event_rule.schedule.arn 19 | } 20 | 21 | resource "aws_cloudwatch_log_group" "cwlog" { 22 | name = "/aws/lambda/${var.prefix}es-cleanup${var.suffix}" 23 | } 24 | -------------------------------------------------------------------------------- /terraform/iam.tf: -------------------------------------------------------------------------------- 1 | data "aws_region" "current" { 2 | } 3 | 4 | data "aws_caller_identity" "current" { 5 | } 6 | 7 | data "aws_iam_policy_document" "policy" { 8 | statement { 9 | sid = "LambdaLogCreation" 10 | effect = "Allow" 11 | actions = [ 12 | "logs:CreateLogGroup", 13 | "logs:CreateLogStream", 14 | "logs:PutLogEvents", 15 | ] 16 | resources = [ 17 | "arn:aws:logs:${data.aws_region.current.name}:*:log-group:/aws/lambda/${var.prefix}es-cleanup${var.suffix}", 18 | "arn:aws:logs:${data.aws_region.current.name}:*:log-group:/aws/lambda/${var.prefix}es-cleanup${var.suffix}:*", 19 | ] 20 | } 21 | 22 | statement { 23 | sid = "LambdaVPCconfig" 24 | effect = "Allow" 25 | actions = [ 26 | "ec2:CreateNetworkInterface", 27 | "ec2:DescribeNetworkInterfaces", 28 | "ec2:DeleteNetworkInterface", 29 | ] 30 | resources = ["*"] 31 | } 32 | 33 | statement { 34 | sid = "ESPermission" 35 | effect = "Allow" 36 | actions = [ 37 | "es:*", 38 | ] 39 | resources = [ 40 | "arn:aws:es:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:domain/*", 41 | ] 42 | } 43 | } 44 | 45 | resource "aws_iam_policy" "policy" { 46 | name = "${var.prefix}es-cleanup${var.suffix}" 47 | path = "/" 48 | description = "Policy for ${var.prefix}es-cleanup${var.suffix} Lambda function" 49 | policy = data.aws_iam_policy_document.policy.json 50 | } 51 | 52 | resource "aws_iam_role" "role" { 53 | name = "${var.prefix}es-cleanup${var.suffix}" 54 | 55 | assume_role_policy = < 0 ? 1 : 0 79 | role = aws_iam_role.role.name 80 | policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" 81 | } 82 | -------------------------------------------------------------------------------- /terraform/lambda.tf: -------------------------------------------------------------------------------- 1 | data "archive_file" "es_cleanup_lambda" { 2 | type = "zip" 3 | source_file = "${path.module}/../es_cleanup.py" 4 | output_path = "${path.module}/es_cleanup.zip" 5 | } 6 | 7 | locals { 8 | sg_ids = [element(concat(aws_security_group.lambda.*.id, [""]), 0)] 9 | } 10 | 11 | data "null_data_source" "lambda_file" { 12 | inputs = { 13 | filename = "${path.module}/es_cleanup.zip" 14 | } 15 | } 16 | 17 | resource "aws_lambda_function" "es_cleanup" { 18 | filename = data.null_data_source.lambda_file.outputs.filename 19 | function_name = "${var.prefix}es-cleanup${var.suffix}" 20 | description = "${var.prefix}es-cleanup${var.suffix}" 21 | timeout = var.timeout 22 | runtime = "python${var.python_version}" 23 | role = aws_iam_role.role.arn 24 | handler = "es_cleanup.lambda_handler" 25 | source_code_hash = data.archive_file.es_cleanup_lambda.output_base64sha256 26 | 27 | environment { 28 | variables = { 29 | es_endpoint = var.es_endpoint 30 | index = var.index 31 | skip_index = var.skip_index 32 | delete_after = var.delete_after 33 | index_format = var.index_format 34 | } 35 | } 36 | 37 | tags = merge( 38 | var.tags, 39 | { 40 | "Scope" = "${var.prefix}lambda_function_to_elasticsearch${var.suffix}" 41 | }, 42 | ) 43 | 44 | # This will be a code block with empty lists if we don't create a securitygroup and the subnet_ids are empty. 45 | # When these lists are empty it will deploy the lambda without VPC support. 46 | vpc_config { 47 | subnet_ids = var.subnet_ids 48 | security_group_ids = compact(concat(local.sg_ids, var.security_group_ids)) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "iam_role_arn" { 2 | description = "AWS IAM ARN" 3 | value = aws_iam_role.role.arn 4 | } 5 | 6 | output "lambda_arn" { 7 | description = "AWS Lambda ARN" 8 | value = aws_lambda_function.es_cleanup.arn 9 | } 10 | 11 | output "cloudwatch_event_arn" { 12 | description = "AWS Cloudwatch Event ARN" 13 | value = aws_cloudwatch_event_rule.schedule.arn 14 | } 15 | -------------------------------------------------------------------------------- /terraform/sg.tf: -------------------------------------------------------------------------------- 1 | data "aws_subnet" "selected" { 2 | count = length(var.subnet_ids) > 0 ? 1 : 0 3 | id = var.subnet_ids[0] 4 | } 5 | 6 | resource "aws_security_group" "lambda" { 7 | count = length(var.subnet_ids) > 0 ? 1 : 0 8 | name = "${var.prefix}lambda_cleanup_to_elasticsearch${var.suffix}" 9 | description = "${var.prefix}lambda_cleanup_to_elasticsearch${var.suffix}" 10 | vpc_id = data.aws_subnet.selected[0].vpc_id 11 | 12 | egress { 13 | from_port = 443 14 | to_port = 443 15 | protocol = "tcp" 16 | cidr_blocks = ["0.0.0.0/0"] 17 | } 18 | 19 | egress { 20 | from_port = 53 21 | to_port = 53 22 | protocol = "tcp" 23 | cidr_blocks = ["0.0.0.0/0"] 24 | } 25 | 26 | egress { 27 | from_port = 53 28 | to_port = 53 29 | protocol = "udp" 30 | cidr_blocks = ["0.0.0.0/0"] 31 | } 32 | 33 | tags = merge( 34 | var.tags, 35 | { 36 | "Scope" = "${var.prefix}lambda_function_to_elasticsearch${var.suffix}" 37 | }, 38 | ) 39 | } 40 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "prefix" { 2 | description = "A prefix for the resource names, this helps create multiple instances of this stack for different environments" 3 | default = "" 4 | type = string 5 | } 6 | 7 | variable "suffix" { 8 | description = "A suffix for the resource names, this helps create multiple instances of this stack for different environments" 9 | default = "" 10 | type = string 11 | } 12 | 13 | variable "schedule" { 14 | description = "Cloudwatch Cron Schedule expression for running the cleanup function" 15 | default = "cron(0 3 * * ? *)" 16 | type = string 17 | } 18 | 19 | variable "timeout" { 20 | description = "Maximum lambda execution time" 21 | default = 300 22 | type = number 23 | } 24 | 25 | variable "es_endpoint" { 26 | description = "AWS ES FQDN e.g. search-es-demo-xxxxxxxxxx.eu-west-1.es.amazonaws.com" 27 | type = string 28 | } 29 | 30 | variable "index" { 31 | description = "Index/indices to process using regex, except the one matching `skip_index` regex" 32 | default = ".*" 33 | type = string 34 | } 35 | 36 | variable "skip_index" { 37 | description = "Index/indices to skip" 38 | default = ".kibana*" 39 | type = string 40 | } 41 | 42 | variable "delete_after" { 43 | description = "Numbers of days to preserve" 44 | default = 15 45 | type = number 46 | } 47 | 48 | variable "index_format" { 49 | description = "Combined with 'index' varible is used to evaluate the index age" 50 | default = "%Y.%m.%d" 51 | type = string 52 | } 53 | 54 | variable "python_version" { 55 | description = "Lambda Python version to be used" 56 | default = "3.6" 57 | type = string 58 | } 59 | 60 | variable "subnet_ids" { 61 | description = "Subnet IDs you want to deploy the lambda in. Only fill this in if you want to deploy your Lambda function inside a VPC." 62 | type = list(string) 63 | default = [] 64 | } 65 | 66 | variable "security_group_ids" { 67 | description = "Addiational Security Ids To add." 68 | type = list(string) 69 | default = [] 70 | } 71 | 72 | variable "tags" { 73 | description = "Tags to apply" 74 | default = { 75 | Name = "es-cleanup" 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /terraform/versions.tf: -------------------------------------------------------------------------------- 1 | 2 | terraform { 3 | required_version = ">= 0.12" 4 | } 5 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # ES cluster creation 2 | 3 | This script will provision: 4 | - a simple AWS ES node 5 | - deploy the `es-cleanup` module 6 | - create multiple index 7 | 8 | 9 | ## How to 10 | 11 | Just run 12 | 13 | ``` 14 | ./run.sh 15 | 16 | ``` 17 | 18 | ## Notes 19 | 20 | This demo script will save the terraform state in you local folder. 21 | 22 | Remember to destroy your test scenario using the command: 23 | 24 | ``` 25 | terraform destroy -auto-approve 26 | ``` 27 | -------------------------------------------------------------------------------- /tests/data.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | domain_name = var.use_prefix ? join("", [var.domain_prefix, var.domain_name]) : var.domain_name 3 | inside_vpc = length(var.vpc_options["subnet_ids"]) > 0 ? true : false 4 | } 5 | -------------------------------------------------------------------------------- /tests/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 0.12" 3 | } 4 | 5 | provider "aws" { 6 | region = "eu-central-1" 7 | } 8 | 9 | 10 | data "http" "myip" { 11 | url = "http://ipv4.icanhazip.com" 12 | } 13 | 14 | data "aws_caller_identity" "current" {} 15 | 16 | data "aws_iam_policy_document" "es_management_access" { 17 | count = false == local.inside_vpc ? 1 : 0 18 | 19 | statement { 20 | sid = "1" 21 | actions = [ 22 | "es:*", 23 | ] 24 | 25 | resources = ["${aws_elasticsearch_domain.es[0].arn}/*"] 26 | 27 | principals { 28 | type = "AWS" 29 | identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] 30 | } 31 | 32 | } 33 | 34 | statement { 35 | sid = "2" 36 | actions = [ 37 | "es:*", 38 | ] 39 | 40 | resources = ["${aws_elasticsearch_domain.es[0].arn}/*"] 41 | 42 | principals { 43 | type = "AWS" 44 | identifiers = ["*"] 45 | } 46 | condition { 47 | test = "IpAddress" 48 | variable = "aws:SourceIp" 49 | 50 | values = ["${chomp(data.http.myip.body)}/32"] 51 | } 52 | } 53 | } 54 | 55 | resource "aws_elasticsearch_domain" "es" { 56 | count = false == local.inside_vpc ? 1 : 0 57 | 58 | depends_on = [aws_iam_service_linked_role.es] 59 | 60 | domain_name = local.domain_name 61 | elasticsearch_version = var.es_version 62 | 63 | encrypt_at_rest { 64 | enabled = var.encrypt_at_rest 65 | kms_key_id = var.kms_key_id 66 | } 67 | 68 | cluster_config { 69 | instance_type = var.instance_type 70 | instance_count = var.instance_count 71 | dedicated_master_enabled = var.instance_count >= var.dedicated_master_threshold ? true : false 72 | dedicated_master_count = var.instance_count >= var.dedicated_master_threshold ? 3 : 0 73 | dedicated_master_type = var.instance_count >= var.dedicated_master_threshold ? var.dedicated_master_type != "false" ? var.dedicated_master_type : var.instance_type : "" 74 | zone_awareness_enabled = var.es_zone_awareness 75 | } 76 | 77 | advanced_options = var.advanced_options 78 | 79 | node_to_node_encryption { 80 | enabled = var.node_to_node_encryption_enabled 81 | } 82 | 83 | ebs_options { 84 | ebs_enabled = var.ebs_volume_size > 0 ? true : false 85 | volume_size = var.ebs_volume_size 86 | volume_type = var.ebs_volume_type 87 | } 88 | 89 | snapshot_options { 90 | automated_snapshot_start_hour = var.snapshot_start_hour 91 | } 92 | 93 | tags = merge( 94 | { 95 | "Domain" = local.domain_name 96 | }, 97 | var.tags, 98 | ) 99 | } 100 | 101 | resource "aws_elasticsearch_domain_policy" "es_management_access" { 102 | count = false == local.inside_vpc ? 1 : 0 103 | 104 | domain_name = local.domain_name 105 | access_policies = data.aws_iam_policy_document.es_management_access[0].json 106 | } 107 | 108 | 109 | 110 | 111 | module "public_es_cleanup" { 112 | source = "../terraform/" 113 | 114 | prefix = "public_es_" 115 | es_endpoint = element(aws_elasticsearch_domain.es.*.endpoint, 0) 116 | delete_after = 1 117 | } 118 | -------------------------------------------------------------------------------- /tests/main_vpc.tf: -------------------------------------------------------------------------------- 1 | /*Add a new set of data.aws_iam_policy_document, aws_elasticsearch_domain, aws_elasticsearch_domain_policy. Because currently terraform/aws_elasticsearch_domain 2 | does not handle properly null/empty "vpc_options" */ 3 | 4 | data "aws_iam_policy_document" "es_vpc_management_access" { 5 | count = local.inside_vpc ? 1 : 0 6 | 7 | statement { 8 | actions = [ 9 | "es:*", 10 | ] 11 | 12 | resources = [ 13 | aws_elasticsearch_domain.es_vpc[0].arn, 14 | "${aws_elasticsearch_domain.es_vpc[0].arn}/*", 15 | ] 16 | 17 | principals { 18 | type = "AWS" 19 | 20 | identifiers = distinct(compact(var.management_iam_roles)) 21 | } 22 | } 23 | } 24 | 25 | resource "aws_iam_service_linked_role" "es" { 26 | count = var.create_iam_service_linked_role ? 1 : 0 27 | aws_service_name = "es.amazonaws.com" 28 | } 29 | 30 | resource "aws_elasticsearch_domain" "es_vpc" { 31 | count = local.inside_vpc ? 1 : 0 32 | 33 | depends_on = [aws_iam_service_linked_role.es] 34 | 35 | domain_name = local.domain_name 36 | elasticsearch_version = var.es_version 37 | 38 | encrypt_at_rest { 39 | enabled = var.encrypt_at_rest 40 | kms_key_id = var.kms_key_id 41 | } 42 | 43 | cluster_config { 44 | instance_type = var.instance_type 45 | instance_count = var.instance_count 46 | dedicated_master_enabled = var.instance_count >= var.dedicated_master_threshold ? true : false 47 | dedicated_master_count = var.instance_count >= var.dedicated_master_threshold ? 3 : 0 48 | dedicated_master_type = var.instance_count >= var.dedicated_master_threshold ? var.dedicated_master_type != "false" ? var.dedicated_master_type : var.instance_type : "" 49 | zone_awareness_enabled = var.es_zone_awareness 50 | } 51 | 52 | advanced_options = var.advanced_options 53 | 54 | node_to_node_encryption { 55 | enabled = var.node_to_node_encryption_enabled 56 | } 57 | 58 | vpc_options { 59 | subnet_ids = var.vpc_options["subnet_ids"] 60 | security_group_ids = var.vpc_options["security_group_ids"] 61 | } 62 | 63 | ebs_options { 64 | ebs_enabled = var.ebs_volume_size > 0 ? true : false 65 | volume_size = var.ebs_volume_size 66 | volume_type = var.ebs_volume_type 67 | } 68 | 69 | snapshot_options { 70 | automated_snapshot_start_hour = var.snapshot_start_hour 71 | } 72 | 73 | tags = merge( 74 | { 75 | "Domain" = local.domain_name 76 | }, 77 | var.tags, 78 | ) 79 | } 80 | 81 | resource "aws_elasticsearch_domain_policy" "es_vpc_management_access" { 82 | count = local.inside_vpc ? 1 : 0 83 | 84 | domain_name = local.domain_name 85 | access_policies = data.aws_iam_policy_document.es_vpc_management_access[0].json 86 | } 87 | -------------------------------------------------------------------------------- /tests/outputs.tf: -------------------------------------------------------------------------------- 1 | output "arn" { 2 | description = "Amazon Resource Name (ARN) of the domain" 3 | value = element( 4 | concat( 5 | aws_elasticsearch_domain.es_vpc.*.arn, 6 | aws_elasticsearch_domain.es.*.arn, 7 | [""], 8 | ), 9 | 0, 10 | ) 11 | } 12 | 13 | output "domain_id" { 14 | description = "Unique identifier for the domain" 15 | value = element( 16 | concat( 17 | aws_elasticsearch_domain.es_vpc.*.domain_id, 18 | aws_elasticsearch_domain.es.*.domain_id, 19 | [""], 20 | ), 21 | 0, 22 | ) 23 | } 24 | 25 | output "domain_name" { 26 | description = "The name of the Elasticsearch domain" 27 | value = element( 28 | concat( 29 | aws_elasticsearch_domain.es_vpc.*.domain_name, 30 | aws_elasticsearch_domain.es.*.domain_name, 31 | [""], 32 | ), 33 | 0, 34 | ) 35 | } 36 | 37 | output "endpoint" { 38 | description = "Domain-specific endpoint used to submit index, search, and data upload requests" 39 | value = element( 40 | concat( 41 | aws_elasticsearch_domain.es_vpc.*.endpoint, 42 | aws_elasticsearch_domain.es.*.endpoint, 43 | [""], 44 | ), 45 | 0, 46 | ) 47 | } 48 | 49 | output "kibana_endpoint" { 50 | description = "Domain-specific endpoint for kibana without https scheme" 51 | value = element( 52 | concat( 53 | aws_elasticsearch_domain.es_vpc.*.kibana_endpoint, 54 | aws_elasticsearch_domain.es.*.kibana_endpoint, 55 | [""], 56 | ), 57 | 0, 58 | ) 59 | } 60 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | 5 | info(){ 6 | echo "[INFO] $1" 7 | } 8 | warning(){ 9 | echo "[WARNING] $1" 10 | } 11 | error(){ 12 | echo "[ERROR] $1" 13 | } 14 | fatal(){ 15 | echo "[FATAL] $1" ; 16 | exit 1 17 | } 18 | 19 | 20 | 21 | 22 | PLAN_FILE="plan.out" 23 | 24 | terraform_init() { 25 | info "Running terraform init" 26 | terraform init -input=false || fatal "Could not initialize terraform" 27 | } 28 | 29 | 30 | terraform_validate() { 31 | info "Running terraform validate" 32 | terraform validate . || fatal "Could not validate terraform" 33 | } 34 | 35 | 36 | terraform_plan() { 37 | info "Running terraform plan" 38 | terraform plan -out=$PLAN_FILE || fatal "Terraform plan failed" 39 | } 40 | 41 | terraform_apply() { 42 | terraform_plan 43 | info "Running terraform apply" 44 | terraform apply \ 45 | -lock=true \ 46 | -input=false \ 47 | -refresh=true \ 48 | -auto-approve=true \ 49 | $PLAN_FILE || fatal "Terraform apply failed" 50 | rm $PLAN_FILE 51 | } 52 | 53 | create_index() { 54 | curl -q -i -X POST -H 'Content-Type:application/json' -d '{ "test": "test"}' $1 55 | } 56 | 57 | terraform_init 58 | terraform_apply 59 | 60 | TARGET="https://$(terraform output endpoint)" 61 | 62 | create_index $TARGET/k8s-2022.01.01/books 63 | create_index $TARGET/k8s-2021.01.01/books 64 | create_index $TARGET/k8s-2020.01.01/books 65 | create_index $TARGET/k8s-2019.01.01/books 66 | create_index $TARGET/k8s-2018.01.01/books 67 | create_index $TARGET/k8s-2012.01.01/books 68 | create_index $TARGET/k8s-2011.01.01/books 69 | 70 | 71 | curl $TARGET/_aliases?pretty=true 72 | -------------------------------------------------------------------------------- /tests/variables.tf: -------------------------------------------------------------------------------- 1 | variable "create_iam_service_linked_role" { 2 | description = "Whether to create IAM service linked role for AWS ElasticSearch service. Can be only one per AWS account." 3 | type = bool 4 | default = false 5 | } 6 | 7 | variable "domain_name" { 8 | description = "Domain name for Elasticsearch cluster" 9 | type = string 10 | default = "es-domain" 11 | } 12 | 13 | variable "es_version" { 14 | description = "Version of Elasticsearch to deploy (default 5.1)" 15 | type = string 16 | default = "7.1" 17 | } 18 | 19 | variable "instance_type" { 20 | description = "ES instance type for data nodes in the cluster (default t2.small.elasticsearch)" 21 | type = string 22 | default = "t2.small.elasticsearch" 23 | } 24 | 25 | variable "instance_count" { 26 | description = "Number of data nodes in the cluster (default 6)" 27 | type = number 28 | default = 1 29 | } 30 | 31 | variable "dedicated_master_type" { 32 | description = "ES instance type to be used for dedicated masters (default same as instance_type)" 33 | type = string 34 | default = "false" 35 | } 36 | 37 | variable "encrypt_at_rest" { 38 | description = "Enable encrption at rest (only specific instance family types support it: m4, c4, r4, i2, i3 default: false)" 39 | type = bool 40 | default = false 41 | } 42 | 43 | variable "management_iam_roles" { 44 | description = "List of IAM role ARNs from which to permit management traffic (default ['*']). Note that a client must match both the IP address and the IAM role patterns in order to be permitted access." 45 | type = list(string) 46 | default = ["*"] 47 | } 48 | 49 | variable "management_public_ip_addresses" { 50 | description = "List of IP addresses from which to permit management traffic (default []). Note that a client must match both the IP address and the IAM role patterns in order to be permitted access." 51 | type = list(string) 52 | default = [] 53 | } 54 | 55 | variable "es_zone_awareness" { 56 | description = "Enable zone awareness for Elasticsearch cluster (default false)" 57 | type = bool 58 | default = false 59 | } 60 | 61 | variable "es_zone_awareness_count" { 62 | description = "Number of availability zones used for data nodes (default 2)" 63 | type = number 64 | default = 2 65 | } 66 | 67 | variable "ebs_volume_size" { 68 | description = "Optionally use EBS volumes for data storage by specifying volume size in GB (default 0)" 69 | type = number 70 | default = 10 71 | } 72 | 73 | variable "ebs_volume_type" { 74 | description = "Storage type of EBS volumes, if used (default gp2)" 75 | type = string 76 | default = "gp2" 77 | } 78 | 79 | variable "kms_key_id" { 80 | description = "KMS key used for elasticsearch" 81 | type = string 82 | default = "" 83 | } 84 | 85 | variable "snapshot_start_hour" { 86 | description = "Hour at which automated snapshots are taken, in UTC (default 0)" 87 | type = number 88 | default = 0 89 | } 90 | 91 | variable "vpc_options" { 92 | description = "A map of supported vpc options" 93 | type = map(list(string)) 94 | 95 | default = { 96 | security_group_ids = [] 97 | subnet_ids = [] 98 | } 99 | } 100 | 101 | variable "tags" { 102 | description = "tags to apply to all resources" 103 | type = map(string) 104 | default = {} 105 | } 106 | 107 | variable "use_prefix" { 108 | description = "Flag indicating whether or not to use the domain_prefix. Default: true" 109 | type = bool 110 | default = true 111 | } 112 | 113 | variable "domain_prefix" { 114 | description = "String to be prefixed to search domain. Default: tf-" 115 | type = string 116 | default = "tf-" 117 | } 118 | 119 | variable "dedicated_master_threshold" { 120 | description = "The number of instances above which dedicated master nodes will be used. Default: 10" 121 | type = number 122 | default = 10 123 | } 124 | 125 | variable "advanced_options" { 126 | description = "Map of key-value string pairs to specify advanced configuration options. Note that the values for these configuration options must be strings (wrapped in quotes) or they may be wrong and cause a perpetual diff, causing Terraform to want to recreate your Elasticsearch domain on every apply." 127 | type = map(string) 128 | default = { 129 | "rest.action.multi.allow_explicit_index" = "true" 130 | } 131 | } 132 | 133 | 134 | variable "node_to_node_encryption_enabled" { 135 | description = "Whether to enable node-to-node encryption." 136 | type = bool 137 | default = false 138 | } 139 | --------------------------------------------------------------------------------