├── .gitignore ├── README.md ├── elastic_search_cloudtrail_template.json └── s3_lambda_es.py /.gitignore: -------------------------------------------------------------------------------- 1 | requests* 2 | 3 | ### JetBrains template 4 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio 5 | 6 | *.iml 7 | 8 | ## Directory-based project format: 9 | .idea/ 10 | # if you remove the above rule, at least ignore the following: 11 | 12 | # User-specific stuff: 13 | # .idea/workspace.xml 14 | # .idea/tasks.xml 15 | # .idea/dictionaries 16 | 17 | # Sensitive or high-churn files: 18 | # .idea/dataSources.ids 19 | # .idea/dataSources.xml 20 | # .idea/sqlDataSources.xml 21 | # .idea/dynamic.xml 22 | # .idea/uiDesigner.xml 23 | 24 | # Gradle: 25 | # .idea/gradle.xml 26 | # .idea/libraries 27 | 28 | # Mongo Explorer plugin: 29 | # .idea/mongoSettings.xml 30 | 31 | ## File-based project format: 32 | *.ipr 33 | *.iws 34 | 35 | ## Plugin-specific files: 36 | 37 | # IntelliJ 38 | /out/ 39 | 40 | # mpeltonen/sbt-idea plugin 41 | .idea_modules/ 42 | 43 | # JIRA plugin 44 | atlassian-ide-plugin.xml 45 | 46 | # Crashlytics plugin (for Android Studio and IntelliJ) 47 | com_crashlytics_export_strings.xml 48 | crashlytics.properties 49 | crashlytics-build.properties 50 | ### Python template 51 | # Byte-compiled / optimized / DLL files 52 | __pycache__/ 53 | *.py[cod] 54 | *$py.class 55 | 56 | # C extensions 57 | *.so 58 | 59 | # Distribution / packaging 60 | .Python 61 | env/ 62 | build/ 63 | develop-eggs/ 64 | dist/ 65 | downloads/ 66 | eggs/ 67 | .eggs/ 68 | lib/ 69 | lib64/ 70 | parts/ 71 | sdist/ 72 | var/ 73 | *.egg-info/ 74 | .installed.cfg 75 | *.egg 76 | 77 | # PyInstaller 78 | # Usually these files are written by a python script from a template 79 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 80 | *.manifest 81 | *.spec 82 | 83 | # Installer logs 84 | pip-log.txt 85 | pip-delete-this-directory.txt 86 | 87 | # Unit test / coverage reports 88 | htmlcov/ 89 | .tox/ 90 | .coverage 91 | .coverage.* 92 | .cache 93 | nosetests.xml 94 | coverage.xml 95 | *,cover 96 | 97 | # Translations 98 | *.mo 99 | *.pot 100 | 101 | # Django stuff: 102 | *.log 103 | 104 | # Sphinx documentation 105 | docs/_build/ 106 | 107 | # PyBuilder 108 | target/ 109 | 110 | # Created by .ignore support plugin (hsz.mobi) 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # THIS CODE IS NOT BEING MAINTAINED 2 | 3 | # Cloudtrail Logs -> S3 -> Lambda -> AWS Elasticsearch Service 4 | 5 | Automagically put your cloudtrail logs into Amazon Elasticsearch Service! and get a nice Kibana interface for it ;) 6 | 7 | Cloudtrail, we all need it, and hey, its fairly easy to check stuff on the console. 8 | Now, if you have multiple AWS accounts, it starts to get hairy, nobody wants to keep jumping over account consoles for that right? 9 | 10 | Enter automation to save your skin! 11 | 12 | ## Requirements 13 | 14 | - basic knowledge of all the aws services involved (s3, lambda, elasticsearch, cloudtrail) 15 | - cloudtrail already set to store logs in an s3 bucket 16 | - elasticsearch cluster ready 17 | 18 | ## Steps 19 | 20 | Assuming some basic knowledge of the services involved. 21 | 22 | - clone this repo 23 | - on your terminal, install the requests module on the same folder with ```pip install requests -t .``` 24 | - edit s3_lambda_es.py changing the following variables 25 | - host = the full hostname for your AWS ES endpoint 26 | - region = region were your ES cluster is located 27 | - access_key and secret_key = a key pair to sign your requests to ES 28 | - put the elastic_search_cloudtrail_template.json file contents on your ES cluster 29 | - create a new lambda function 30 | - for uploading the code zip this entire folder and upload away (can keep readme, elastic_search_cloudtrail_template.json and .gitignore out). Very important! The files must be on the root of the zip, not inside a folder. 31 | - handler for the function is s3_lambda_es.lambda_handler 32 | - lambda role must allow access to the cloudtrail s3 bucket, and to create logs on cloudwatch 33 | - on s3 create an event for ObjectCreated (All) pointing to the lambda function 34 | - done! go check your kibana now and see the data flowing in ;) 35 | 36 | ## Notes 37 | 38 | I have a more detailed post on [my blog](https://www.fernandobattistella.com.br/log_processing/2016/03/13/Cloudtrail-S3-Lambda-Elasticsearch.html). 39 | 40 | ## Issues 41 | 42 | On lambda web console, in the monitoring tab, theres a link for its logs on cloudwatch, if anything blows, its gonna show there. 43 | -------------------------------------------------------------------------------- /elastic_search_cloudtrail_template.json: -------------------------------------------------------------------------------- 1 | PUT /_template/logstash 2 | { 3 | "template" : "logstash-*", 4 | "settings" : { 5 | "index.refresh_interval" : "5s" 6 | }, 7 | "mappings" : { 8 | "_default_" : { 9 | "_all" : {"enabled" : true, "omit_norms" : true}, 10 | "dynamic_templates" : [ { 11 | "message_field" : { 12 | "match" : "message", 13 | "match_mapping_type" : "string", 14 | "mapping" : { 15 | "type" : "string", "index" : "analyzed", "omit_norms" : true, 16 | "fielddata" : { "format" : "enabled" } 17 | } 18 | } 19 | }, { 20 | "string_fields" : { 21 | "match" : "*", 22 | "match_mapping_type" : "string", 23 | "mapping" : { 24 | "type" : "string", "index" : "analyzed", "omit_norms" : true, 25 | "fielddata" : { "format" : "enabled" }, 26 | "fields" : { 27 | "raw" : {"type": "string", "index" : "not_analyzed", "doc_values" : true, "ignore_above" : 256} 28 | } 29 | } 30 | } 31 | }, { 32 | "float_fields" : { 33 | "match" : "*", 34 | "match_mapping_type" : "float", 35 | "mapping" : { "type" : "float", "doc_values" : true } 36 | } 37 | }, { 38 | "double_fields" : { 39 | "match" : "*", 40 | "match_mapping_type" : "double", 41 | "mapping" : { "type" : "double", "doc_values" : true } 42 | } 43 | }, { 44 | "byte_fields" : { 45 | "match" : "*", 46 | "match_mapping_type" : "byte", 47 | "mapping" : { "type" : "byte", "doc_values" : true } 48 | } 49 | }, { 50 | "short_fields" : { 51 | "match" : "*", 52 | "match_mapping_type" : "short", 53 | "mapping" : { "type" : "short", "doc_values" : true } 54 | } 55 | }, { 56 | "integer_fields" : { 57 | "match" : "*", 58 | "match_mapping_type" : "integer", 59 | "mapping" : { "type" : "integer", "doc_values" : true } 60 | } 61 | }, { 62 | "long_fields" : { 63 | "match" : "*", 64 | "match_mapping_type" : "long", 65 | "mapping" : { "type" : "long", "doc_values" : true } 66 | } 67 | }, { 68 | "date_fields" : { 69 | "match" : "*", 70 | "match_mapping_type" : "date", 71 | "mapping" : { "type" : "date", "doc_values" : true } 72 | } 73 | }, { 74 | "geo_point_fields" : { 75 | "match" : "*", 76 | "match_mapping_type" : "geo_point", 77 | "mapping" : { "type" : "geo_point", "doc_values" : true } 78 | } 79 | } ], 80 | "properties" : { 81 | "@timestamp": { "type": "date", "doc_values" : true }, 82 | "@version": { "type": "string", "index": "not_analyzed", "doc_values" : true }, 83 | "geoip" : { 84 | "type" : "object", 85 | "dynamic": true, 86 | "properties" : { 87 | "ip": { "type": "ip", "doc_values" : true }, 88 | "location" : { "type" : "geo_point", "doc_values" : true }, 89 | "latitude" : { "type" : "float", "doc_values" : true }, 90 | "longitude" : { "type" : "float", "doc_values" : true } 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /s3_lambda_es.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lambda function that receives an S3 event for a cloudtrail log file 3 | Downloads the file from the event, insert its json contents into elasticsearch 4 | Profit! 5 | 6 | Signed URL code taken from AWS docs and adapted for this script 7 | http://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html 8 | 9 | Requires an access key with XXX permissions 10 | 11 | Script will create an index for each day in the format 12 | logstash-YYYY.MM.DD 13 | with type cloudtrail 14 | """ 15 | 16 | import json 17 | import gzip 18 | import requests 19 | import datetime 20 | import hashlib 21 | import hmac 22 | import boto3 23 | 24 | ######################################################################################################################## 25 | # variables to be changed 26 | 27 | # no https nor trailing slash in this one, just the full hostname of your elasticsearch endpoint 28 | host = 'search-example-1234567890qwerty.so-meaz-1.es.amazonaws.com' 29 | region = 'us-east-1' 30 | 31 | # access keys used in the url signing, for some reason the lamba role ones didnt work for me 32 | access_key = 'access key id' 33 | secret_key = 'secret key' 34 | ######################################################################################################################## 35 | 36 | # variables that you should'nt have to change, ever :) 37 | method = 'POST' 38 | service = 'es' 39 | content_type = 'application/x-amz-json-1.0' 40 | 41 | 42 | # functions used in the aws signed url 43 | def sign(key, msg): 44 | return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() 45 | 46 | 47 | def get_signature_key(key, date_stamp, region_name, service_name): 48 | k_date = sign(('AWS4' + key).encode('utf-8'), date_stamp) 49 | k_region = sign(k_date, region_name) 50 | k_service = sign(k_region, service_name) 51 | k_signing = sign(k_service, 'aws4_request') 52 | return k_signing 53 | 54 | print 'Lambda function starting' 55 | 56 | # defines a s3 boto client 57 | s3 = boto3.client('s3') 58 | 59 | 60 | # main function, started by lambda 61 | def lambda_handler(event, context): 62 | print("Received event") 63 | # attribute bucket and file name/path to variables 64 | bucket = event['Records'][0]['s3']['bucket']['name'] 65 | key = event['Records'][0]['s3']['object']['key'] 66 | 67 | # where to save the downloaded file 68 | file_path = '/tmp/ctlogfile.gz' 69 | 70 | # downloads file to above path 71 | s3.download_file(bucket, key, file_path) 72 | 73 | # opens gz file for reading 74 | gzfile = gzip.open(file_path, "r") 75 | 76 | # loads contents of the Records key into variable (our actual cloudtrail log entries!) 77 | response = json.loads(gzfile.readlines()[0])["Records"] 78 | 79 | # loops over the events in the json 80 | for i in response: 81 | # leave the boring and useless events out to not flood our elasticsearch db 82 | # if you want these just remove this line and indent down the block 83 | if not( 84 | i["eventSource"] == "elasticloadbalancing" and 85 | i["eventName"] == "describeInstanceHealth" and 86 | i["userIdentity.userName"] == "secret_username" 87 | ): 88 | """ 89 | Prints go to cloudwatch logs ;) easy way to debug or get more information on your logs. 90 | I have my logs on cloudwatch set to erase in 1 day, so I go really verbose here. 91 | """ 92 | 93 | print 'Sending event to elasticsearch' 94 | 95 | # adds @timestamp field = time of the event 96 | i["@timestamp"] = i["eventTime"] 97 | 98 | # removes .aws.amazon.com from eventsources 99 | i["eventSource"] = i["eventSource"].split(".")[0] 100 | data = json.dumps(i) 101 | 102 | # defines correct index name based on eventTime, so we have an index for each day on ES 103 | event_date = i["eventTime"].split("T")[0].replace("-", ".") 104 | 105 | # url endpoint for our ES cluster 106 | url = 'https://'+host+'/logstash-'+event_date+'/cloudtrail/' 107 | print "url :", url 108 | print "data: ", data 109 | 110 | # aws signed url stuff - for comments on this check their example page linked on top comment 111 | t = datetime.datetime.utcnow() 112 | amz_date = t.strftime('%Y%m%dT%H%M%SZ') 113 | date_stamp = t.strftime('%Y%m%d') 114 | canonical_uri = '/logstash-'+event_date+'/cloudtrail/' 115 | canonical_querystring = '' 116 | canonical_headers = 'content-type:' + content_type + '\n' + \ 117 | 'host:' + host + '\n' + \ 118 | 'x-amz-date:' + amz_date + '\n' 119 | signed_headers = 'content-type;host;x-amz-date' 120 | payload_hash = hashlib.sha256(data).hexdigest() 121 | canonical_request = method + '\n' + \ 122 | canonical_uri + '\n' + \ 123 | canonical_querystring + '\n' + \ 124 | canonical_headers + '\n' + \ 125 | signed_headers + '\n' + \ 126 | payload_hash 127 | algorithm = 'AWS4-HMAC-SHA256' 128 | credential_scope = date_stamp + '/' + region + '/' + service + '/' + 'aws4_request' 129 | string_to_sign = algorithm + '\n' + \ 130 | amz_date + '\n' + \ 131 | credential_scope + '\n' + \ 132 | hashlib.sha256(canonical_request).hexdigest() 133 | signing_key = get_signature_key(secret_key, date_stamp, region, service) 134 | signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest() 135 | authorization_header = algorithm + ' ' + \ 136 | 'Credential=' + access_key + '/' + credential_scope + ', ' + \ 137 | 'SignedHeaders=' + signed_headers + ', ' + \ 138 | 'Signature=' + signature 139 | headers = {'Content-Type':content_type, 140 | 'X-Amz-Date':amz_date, 141 | 'Authorization':authorization_header} 142 | 143 | # sends the json to elasticsearch 144 | req = requests.post(url, data=data, headers=headers) 145 | print "status code: ", req.status_code 146 | print "text", req.text 147 | 148 | retry_counter = 1 149 | 150 | """ 151 | if we fail for some reason we will retry 3 times 152 | you will most likely have errors if you're copying a huge ammount of logs from an old bucket 153 | to your new one. 154 | 155 | For normal usage you shouldnt have to worry about this. 156 | I got it in production with 90 aws accounts pointing to the same bucket, 157 | and a pair of m3.mediums on the ES cluster, with 0 errors. 158 | 159 | I dont raise an exception on errors to not miss all the other entries in the file, or risk repeating any 160 | inserts done before the error. 161 | """ 162 | 163 | # if our status code is not successfull, and our retry counter is less than 4 164 | while req.status_code != 201 and retry_counter < 4: 165 | print "retry", retry_counter, "of 3 - failed sending data to elasticsearch:", req.status_code 166 | 167 | # send the data to ES again 168 | req = requests.post(url, data=data, headers=headers) 169 | 170 | # if it worked this time, nice! \o/ 171 | if req.status_code == 201: 172 | print "data successfully sent!" 173 | print "status code: ", req.status_code 174 | print "text", req.text 175 | retry_counter += 1 176 | 177 | print "all done for this file!" 178 | --------------------------------------------------------------------------------