├── LICENSE.md ├── Pipfile ├── Pipfile.lock ├── README.md ├── environment.yml ├── nuke_log_group.py └── sweep_log_streams.py /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Seth Miller 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | boto3 = "*" 10 | 11 | [requires] 12 | python_version = "3.7" 13 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "49efc3368e25cc541579c91c92b08134ed8a23fc9d5c328ef8207674428382f5" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "boto3": { 20 | "hashes": [ 21 | "sha256:01f1792838981191da87bed271f2b486841a282fcd78a3b5bac98e5b85c95ba8", 22 | "sha256:0f1f796abf85e53a8a50b893cbeccd93ee081184453127b663da4b97d72aa0cc" 23 | ], 24 | "index": "pypi", 25 | "version": "==1.9.197" 26 | }, 27 | "botocore": { 28 | "hashes": [ 29 | "sha256:3baf129118575602ada9926f5166d82d02273c250d0feb313fc270944b27c48b", 30 | "sha256:dc080aed4f9b220a9e916ca29ca97a9d37e8e1d296fe89cbaeef929bf0c8066b" 31 | ], 32 | "version": "==1.12.253" 33 | }, 34 | "docutils": { 35 | "hashes": [ 36 | "sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0", 37 | "sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827", 38 | "sha256:a2aeea129088da402665e92e0b25b04b073c04b2dce4ab65caaa38b7ce2e1a99" 39 | ], 40 | "version": "==0.15.2" 41 | }, 42 | "jmespath": { 43 | "hashes": [ 44 | "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9", 45 | "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f" 46 | ], 47 | "version": "==0.10.0" 48 | }, 49 | "python-dateutil": { 50 | "hashes": [ 51 | "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", 52 | "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" 53 | ], 54 | "markers": "python_version >= '2.7'", 55 | "version": "==2.8.1" 56 | }, 57 | "s3transfer": { 58 | "hashes": [ 59 | "sha256:6efc926738a3cd576c2a79725fed9afde92378aa5c6a957e3af010cb019fac9d", 60 | "sha256:b780f2411b824cb541dbcd2c713d0cb61c7d1bcadae204cdddda2b35cef493ba" 61 | ], 62 | "version": "==0.2.1" 63 | }, 64 | "six": { 65 | "hashes": [ 66 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 67 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 68 | ], 69 | "version": "==1.15.0" 70 | }, 71 | "urllib3": { 72 | "hashes": [ 73 | "sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc", 74 | "sha256:87716c2d2a7121198ebcb7ce7cccf6ce5e9ba539041cfbaeecfb641dc0bf6acc" 75 | ], 76 | "index": "pypi", 77 | "version": "==1.25.8" 78 | } 79 | }, 80 | "develop": {} 81 | } 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS CloudWatch Log Clean 2 | 3 | Some simple scripts for cleaning AWS CloudWatch Logs. Useful for cleaning up after AWS Lambda Functions. AWS doesn't seem to have nice rotation and cleanup for these. Leading to extremely degraded performance for CloudWatch metric "tailing". 4 | 5 | **Notice:** These are destructive operations that will result in lost data if used incorrectly. I'm not responsible for any of your data that gets lost. I have personally tested these scripts and have used them as part of a maintenance procedure, but I don't guarantee they will work perfectly for you. Look over the scripts before running they, they're hopefully pretty straight forward. 6 | 7 | ## Installation 8 | 9 | Ensure you have boto3: `pip install -y boto3` 10 | 11 | These are just little helpers scripts and it isn't on Pypi or anything, just grab the zip: 12 | 13 | ```bash 14 | wget -O aws-loudwatch-log-clean.zip https://github.com/four43/aws-cloudwatch-log-clean/archive/master.zip \ 15 | && unzip ./aws-loudwatch-log-clean.zip 16 | ``` 17 | 18 | Or copy/paste the raw files into your own `.py` file. They don't depend on anything except `boto3` 19 | 20 | ## Usage 21 | 22 | ### nuke_log_group.py 23 | 24 | This delete a log group and replace it with a new one with the same settings. This essentially clears all log streams from the log group. Usage: 25 | 26 | ```bash 27 | ./nuke_log_group.py [log-stream-prefix] --dry-run 28 | ``` 29 | 30 | Check that everything looks good, then: 31 | 32 | ```bash 33 | ./nuke_log_group.py [log-stream-prefix] 34 | ``` 35 | 36 | **Example:** 37 | 38 | ```bash 39 | ./nuke_log_group.py /aws/lambda/my-func 40 | ``` 41 | 42 | ### sweep_log_streams.py 43 | 44 | This will clean up old and empty log streams inside of a log group. Usage: 45 | 46 | ```bash 47 | ./sweep_log_streams.py [log-stream-prefix] --dry-run 48 | ``` 49 | 50 | Check that everything looks good, then: 51 | 52 | ```bash 53 | ./sweep_log_streams.py [log-stream-prefix] 54 | ``` 55 | 56 | 57 | Example: 58 | 59 | ```bash 60 | ./sweep_log_streams.py /aws/lambda/my-func 61 | ``` 62 | 63 | ## Contributing 64 | 65 | Feedback, issues, forks, and pull requests welcome! Thanks. -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: aws-cloudwatch-log-clean 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - boto3 -------------------------------------------------------------------------------- /nuke_log_group.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import boto3 4 | 5 | client = boto3.client('logs') 6 | 7 | 8 | def get_log_group_config(log_group_name): 9 | log_groups_result = client.describe_log_groups( 10 | logGroupNamePrefix=log_group_name, 11 | limit=2 12 | ) 13 | log_groups = log_groups_result['logGroups'] 14 | if len(log_groups) == 0: 15 | # No log group found 16 | raise Exception("No log groups found by name: " + log_group_name) 17 | elif len(log_groups) > 1: 18 | # Still okay if it matches exactly. 19 | if log_groups[0]['logGroupName'] != log_group_name: 20 | # Too many log groups found 21 | raise Exception("More than one log group found, be more specific: {}\n" 22 | " - Nuking a log group is pretty destructive, we'll just do one at a time for now" 23 | .format(log_group_name)) 24 | 25 | log_group_config = log_groups[0] 26 | 27 | if log_group_config['logGroupName'] != log_group_name: 28 | # Name wasn't an exact match 29 | raise Exception("Log group found a single match but it wasn't quite right.\n" 30 | "Did you mean: " + log_group_config['logGroupName'] + " ?") 31 | return log_group_config 32 | 33 | 34 | def get_log_group_metric_filters(log_group_name, next_token=None): 35 | opts = { 36 | 'logGroupName': log_group_name, 37 | 'limit': 50 # Maximum 38 | } 39 | if next_token: 40 | opts['nextToken'] = next_token 41 | 42 | metric_filters_response = client.describe_metric_filters(**opts) 43 | if metric_filters_response: 44 | for metric_filter in metric_filters_response['metricFilters']: 45 | yield metric_filter 46 | # Exhausted, try to loop with paging token 47 | if 'nextToken' in metric_filters_response: 48 | yield from get_log_group_metric_filters(log_group_name, metric_filters_response['nextToken']) 49 | 50 | 51 | def get_log_group_subscription_filters(log_group_name, next_token=None): 52 | opts = { 53 | 'logGroupName': log_group_name, 54 | 'limit': 50 # Maximum 55 | } 56 | if next_token: 57 | opts['nextToken'] = next_token 58 | 59 | subscription_filters_response = client.describe_subscription_filters(**opts) 60 | if subscription_filters_response: 61 | for subscription_filter in subscription_filters_response['subscriptionFilters']: 62 | yield subscription_filter 63 | # Exhausted, try to loop with paging token 64 | if 'nextToken' in subscription_filters_response: 65 | yield from get_log_group_subscription_filters(log_group_name, subscription_filters_response['nextToken']) 66 | 67 | 68 | def main(log_group_name, dry_run=False): 69 | log_group_config = get_log_group_config(log_group_name) 70 | 71 | log_group_tags = client.list_tags_log_group( 72 | logGroupName=log_group_name 73 | )['tags'] 74 | 75 | # Fully pump our generator, we're deleting this resource so make sure we have everything first. 76 | log_group_metric_filters = [] 77 | for metric_filter in get_log_group_metric_filters(log_group_name): 78 | log_group_metric_filters.append(metric_filter) 79 | 80 | # Fully pump our generator, we're deleting this resource so make sure we have everything first. 81 | log_group_subscription_filters = [] 82 | for subscription_filter in get_log_group_subscription_filters(log_group_name): 83 | log_group_subscription_filters.append(subscription_filter) 84 | 85 | # Delete log group 86 | if dry_run: 87 | print("Would delete: {} (but --dry-run is set)".format(log_group_name)) 88 | else: 89 | print("Deleting: {}".format(log_group_name)) 90 | client.delete_log_group( 91 | logGroupName=log_group_name 92 | ) 93 | 94 | # Create new log group 95 | create_opts = { 96 | 'logGroupName': log_group_name 97 | } 98 | if 'kmsKeyId' in log_group_config: 99 | create_opts['kmsKeyId'] = log_group_config['kmsKeyId'] 100 | if len(log_group_tags): 101 | create_opts['tags']: { 102 | **log_group_tags 103 | } 104 | 105 | if dry_run: 106 | print("Would create new log group: " + log_group_name + " (but --dry-run is set)") 107 | else: 108 | print("Creating: {}".format(log_group_name)) 109 | client.create_log_group(**create_opts) 110 | 111 | # Put everything back 112 | # Expire 113 | if dry_run: 114 | print( 115 | "Would put new retention policy of {} days (but --dry-run is set)" 116 | .format(log_group_config['retentionInDays']) 117 | ) 118 | else: 119 | print("Setting expiration to: {} days".format(log_group_config['retentionInDays'])) 120 | client.put_retention_policy( 121 | logGroupName=log_group_name, 122 | retentionInDays=log_group_config['retentionInDays'] 123 | ) 124 | 125 | # Metric Filter 126 | for metric_filter in log_group_metric_filters: 127 | if dry_run: 128 | print("Would put metric filter {} (but --dry-run is set)".format(metric_filter['filterName'])) 129 | else: 130 | print("Putting metric filter {}".format(metric_filter['filterName'])) 131 | del metric_filter['creationTime'] 132 | client.put_metric_filter( 133 | **metric_filter 134 | ) 135 | 136 | # Subscriptions 137 | for subscription_filter in log_group_subscription_filters: 138 | if dry_run: 139 | print("Would put subscription filter for {} (but --dry-run is set)" 140 | .format(subscription_filter['destinationArn'])) 141 | else: 142 | print("Putting subscription filter for {}".format(subscription_filter['destinationArn'])) 143 | del subscription_filter['creationTime'] 144 | client.put_subscription_filter( 145 | **subscription_filter 146 | ) 147 | 148 | 149 | def get_arg_parser(): 150 | parser = argparse.ArgumentParser(description="Removes a log group and will replace it with a new empty one with " 151 | "the same settings") 152 | parser.add_argument("--dry-run", 153 | dest="dry_run", 154 | action="store_true", 155 | help="Just print what we're going to do, don't actually do it." 156 | ) 157 | parser.add_argument(dest="log_group_name", 158 | help="The log group to clean and replace. Example: '/aws/lambda/my-app'" 159 | ) 160 | return parser 161 | 162 | 163 | if __name__ == "__main__": 164 | args = get_arg_parser().parse_args() 165 | main(args.log_group_name, args.dry_run) 166 | -------------------------------------------------------------------------------- /sweep_log_streams.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | from datetime import datetime, timedelta 4 | from dateutil import tz 5 | from time import sleep 6 | 7 | import boto3 8 | 9 | client = boto3.client('logs') 10 | 11 | 12 | def print_log_group(log_group, message): 13 | print("[{}] {}".format(log_group['logGroupName'], message)) 14 | 15 | 16 | def get_log_groups(prefix, next_token=None): 17 | opts = { 18 | 'logGroupNamePrefix': prefix, 19 | 'limit': 50 # Maximum 20 | } 21 | if next_token: 22 | opts['nextToken'] = next_token 23 | log_groups_response = client.describe_log_groups(**opts) 24 | if log_groups_response: 25 | for log_group in log_groups_response['logGroups']: 26 | yield log_group 27 | # Exhausted, try to loop with paging token 28 | if 'nextToken' in log_groups_response: 29 | yield from get_log_groups(prefix, log_groups_response['nextToken']) 30 | 31 | 32 | def get_streams(log_group, next_token=None): 33 | opts = { 34 | 'logGroupName': log_group['logGroupName'], 35 | 'limit': 50 # Max 36 | } 37 | if next_token: 38 | opts['nextToken'] = next_token 39 | 40 | response = client.describe_log_streams(**opts) 41 | 42 | if response: 43 | for stream in response['logStreams']: 44 | yield stream 45 | if 'nextToken' in response: 46 | yield from get_streams(log_group, response['nextToken']) 47 | 48 | 49 | def delete_old_streams(log_group, dry_run=False): 50 | """ 51 | Delete old log streams that are empty. Events get cleaned up by log_group['retentionInDays'] but the streams don't. 52 | """ 53 | print_log_group(log_group, "Checking for old streams...") 54 | 55 | now = datetime.utcnow().replace(tzinfo=tz.tzutc()) 56 | if 'retentionInDays' in log_group: 57 | oldest_valid_event = now - timedelta(days=log_group['retentionInDays']) 58 | else: 59 | # Log group has no expiration set, we're done here. 60 | print_log_group(log_group, "Log Group has no expiration set, skipping.") 61 | return 62 | 63 | print(" - Streams in group: " + log_group['logGroupName']) 64 | for stream in get_streams(log_group): 65 | 66 | # lastEventTimestamp doesn't update right away sometimes or if the stream was created with no events 67 | # it is missing 68 | if 'lastEventTimestamp' in stream: 69 | stream_time = datetime.fromtimestamp(stream['lastEventTimestamp'] / 1000, tz=tz.tzutc()) 70 | else: 71 | # Assume stream creation if we don't have a lastEventTimestamp 72 | stream_time = datetime.fromtimestamp(stream['creationTime'] / 1000, tz=tz.tzutc()) 73 | 74 | if stream_time < oldest_valid_event: 75 | if dry_run: 76 | print_log_group(log_group, "Would delete stream: " + stream['logStreamName'] + " (--dry-run set)") 77 | else: 78 | print_log_group(log_group, "Deleting stream: " + stream['logStreamName']) 79 | client.delete_log_stream( 80 | logGroupName=log_group['logGroupName'], 81 | logStreamName=stream['logStreamName'] 82 | ) 83 | print_log_group(log_group, "Deleted stream: " + stream['logStreamName']) 84 | # The AWS API gets overloaded if we go too fast. 85 | sleep(0.2) 86 | else: 87 | print_log_group(log_group, "Checked stream, keeping: " + stream['logStreamName']) 88 | 89 | 90 | def get_arg_parser(): 91 | parser = argparse.ArgumentParser(description="Cleans up old and empty log streams from log groups matching a " 92 | "provided pattern") 93 | 94 | parser.add_argument("--dry-run", 95 | dest="dry_run", 96 | action="store_true", 97 | help="Just print what we're going to do, don't actually do it." 98 | ) 99 | 100 | parser.add_argument("prefix", 101 | help="The log group prefix to filter for. Example: '/aws/lambda/app-staging-'" 102 | ) 103 | return parser 104 | 105 | 106 | def main(prefix, dry_run=False): 107 | for log_group in get_log_groups(prefix): 108 | delete_old_streams(log_group, dry_run) 109 | print("Done") 110 | 111 | 112 | if __name__ == "__main__": 113 | args = get_arg_parser().parse_args() 114 | main(args.prefix, args.dry_run) 115 | --------------------------------------------------------------------------------