├── .gitignore ├── du.py ├── helpers.py ├── license.txt ├── readme.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /du.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import boto3 5 | from botocore.exceptions import NoCredentialsError, NoRegionError 6 | from helpers import cloudwatch_bucket_size, formatted_size, print_sizes_by_dir 7 | import sys 8 | 9 | parser = argparse.ArgumentParser(description="This script is meant to be like the `du` tool for linux, except for inspecting the disk usage of s3 buckets. It will traverse s3 buckets and provide high level disk usage information to stdout.") 10 | parser.add_argument("-p", "--profile", default='default', help="AWS credentials profile to use") 11 | parser.add_argument("-b", "--bucket", help="Bucket to examine (ex: 'com.owocki.assets')") 12 | parser.add_argument("-d", "--depth", type=int, default=1, help="Depth to examine bucket (ex: 4)") 13 | parser.add_argument("-di", "--dir", default='/', help="Directory to examine (ex: 'logs/')") 14 | 15 | # setup 16 | try: 17 | # args 18 | args = parser.parse_args() 19 | 20 | boto3.setup_default_session(profile_name=args.profile) 21 | 22 | s3 = boto3.resource('s3',config=boto3.session.Config(signature_version='s3v4')) 23 | 24 | if not args.bucket: 25 | buckets = s3.buckets.all() 26 | else: 27 | buckets = [s3.Bucket(args.bucket)] 28 | 29 | target_depth = args.depth 30 | target_dir = args.dir 31 | 32 | 33 | # Print out bucket names 34 | for bucket in buckets: 35 | print(bucket.name) 36 | 37 | #get high level stats from cloudwatch 38 | try: 39 | cloudwatch_bucket_size_formatted = formatted_size(cloudwatch_bucket_size(bucket.name)) 40 | print('(Cloudwatch bucket size estimate: {})'.format(cloudwatch_bucket_size_formatted)) 41 | except Exception as e: 42 | print("Could not get cloudwatch stats: {}".format(e)) 43 | 44 | # traverse dirs in s3 bucket 45 | print_sizes_by_dir(bucket, _dir=target_dir, target_depth=target_depth) 46 | 47 | except NoCredentialsError: 48 | print("Unable to locate aws credentials. Please make sure you have the following configuration file at '~/.aws/credentials': \n\n" + \ 49 | "[default]\n" + \ 50 | "aws_access_key_id = YOURACCESSKEY\n" + \ 51 | "aws_secret_access_key = YOURSECRETKEY\n" + \ 52 | "region=YOURREGION\n") 53 | exit(0) 54 | 55 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from datetime import datetime, timedelta 3 | 4 | in_memory_cache = {} 5 | 6 | def cloudwatch_bucket_size(bucket_name): 7 | cloudwatch = boto3.client('cloudwatch') 8 | response = cloudwatch.get_metric_statistics( 9 | Namespace="AWS/S3", 10 | MetricName="BucketSizeBytes", 11 | Dimensions=[ 12 | { 13 | "Name": "BucketName", 14 | "Value": bucket_name 15 | }, 16 | { 17 | "Name": "StorageType", 18 | "Value": "StandardStorage" 19 | } 20 | ], 21 | StartTime=datetime.now() - timedelta(days=1), 22 | EndTime=datetime.now(), 23 | Period=86400, 24 | Statistics=['Average'] 25 | ) 26 | datapoints = response['Datapoints'] 27 | if len(datapoints) == 0: 28 | raise Exception('CloudWatch is not enabled. Do you have the right region in your AWS config?') 29 | return response['Datapoints'][0]['Average'] 30 | 31 | 32 | def formatted_size(num, suffix='B'): 33 | for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: 34 | if abs(num) < 1024.0: 35 | return "%3.1f%s%s" % (num, unit, suffix) 36 | num /= 1024.0 37 | return "%.1f%s%s" % (num, 'Yi', suffix) 38 | 39 | 40 | def print_sizes_by_dir(bucket, _dir='/', target_depth=1, current_depth=0): 41 | 42 | # recursive bounds checking 43 | if current_depth > target_depth: 44 | return 45 | 46 | # setup 47 | prefix = "".join(['-' for i in range(0,current_depth)]) 48 | 49 | # get size of files in this dir 50 | total_size = 0 51 | if _dir == '/': 52 | object_summary_iterator = bucket.objects.all() 53 | else: 54 | object_summary_iterator = bucket.objects.filter(Prefix=_dir).all() 55 | for obj in object_summary_iterator: 56 | path = obj.key 57 | total_size += obj.size 58 | 59 | # print out size 60 | hr_size = formatted_size(total_size) 61 | print(" {} {} : {}".format(prefix, _dir, hr_size)) 62 | 63 | 64 | # iterate through next level dirs 65 | if _dir == '/': 66 | result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter="/") 67 | else: 68 | result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter="/", Prefix=_dir) 69 | dirs = result.get('CommonPrefixes') 70 | if dirs is not None: 71 | for o in dirs: 72 | dir_name = o.get('Prefix') 73 | 74 | # recursivly traverse this dir 75 | print_sizes_by_dir(bucket,dir_name, target_depth, current_depth+1) -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Kevin Owocki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # s3_disk_util 2 | 3 | * _What_ -- A tool that allows a user to visualize which buckets (and parts of buckets) are using the most data storage. 4 | * _Why_ -- Because I'm trying to pare down my S3 bill, and the S3 control panels (even CloudWatch) do not really provide anything similar. 5 | * _Inspiration_ -- This script is meant to be like the `du` tool for linux, except for inspecting the disk usage of s3 buckets. 6 | * _How_ -- It will traverse s3 buckets and provide high level disk usage information to stdout. 7 | 8 | # Usage 9 | 10 | ```bash 11 | % python3 du.py --help 12 | usage: du.py [-h] [-b BUCKET] [-p PROFILE] [-d DEPTH] [-di DIR] 13 | 14 | This script is meant to be like the `du` tool for linux, except for inspecting 15 | the disk usage of s3 buckets. It will traverse s3 buckets and provide high 16 | level disk usage information to stdout. 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | -b BUCKET, --bucket BUCKET 21 | Bucket to examine (ex: 'com.owocki.assets') 22 | -p PROFILE, --profile PROFILE 23 | AWS credentials profile name (default: 'default') 24 | -d DEPTH, --depth DEPTH 25 | Depth to examine bucket (ex: 4) 26 | -di DIR, --dir DIR Directory to examine (ex: 'logs/') 27 | ``` 28 | 29 | ## Example 30 | 31 | ```bash 32 | % python3 du.py --depth=1 --bucket=BUCKETNAME --profile=mytestaccount 33 | BUCKETNAME 34 | (Cloudwatch bucket size estimate: 22.7GiB) 35 | / : 22.7GiB 36 | - DIR1/ : 22.6GiB 37 | - DIR2/ : 452.6KiB 38 | - DIR3/ : 1.6MiB 39 | - DIR4/ : 119.0MiB 40 | - DIR5/ : 0.0B 41 | 42 | % python3 du.py --depth=2 --bucket=BUCKETNAME 43 | BUCKETNAME 44 | (Cloudwatch bucket size estimate: 22.7GiB) 45 | / : 22.7GiB 46 | - DIR1/ : 22.6GiB 47 | -- DIR1/SUBDIR1/ : 31.1MiB 48 | -- DIR1/SUBDIR2/ : 12.7GiB 49 | -- DIR1/SUBDIR3/ : 0.0B 50 | -- DIR1/SUBDIR4/ : 9.9GiB 51 | - DIR2/ : 452.6KiB 52 | -- DIR2/SUBDIR1/ : 429.5KiB 53 | - DIR3/ : 1.6MiB 54 | -- DIR3/SUBDIR1/ : 254.4KiB 55 | - DIR4/ : 119.0MiB 56 | - DIR5/ : 0.0B 57 | 58 | ``` 59 | 60 | # Setup 61 | 62 | 1. Create an AWS IAM user account at (https://console.aws.amazon.com/iam/home). 63 | * Make sure your user has `AmazonS3FullAccess` and `CloudWatchReadOnlyAccess` policies. 64 | 2. Use your existing `~/.aws/credentials` file and profile names or create a config file that looks like this: 65 | ```bash 66 | % cat ~/.aws/credentials 67 | 68 | [default] 69 | aws_access_key_id = ACCESS_KEY_GOES_HERE 70 | aws_secret_access_key = SECRET_KEY_GOES_HERE 71 | region=REGION 72 | ``` 73 | 3. Clone this repo. 74 | 4. Install python3 (if needed) and boto3 (if needed). 75 | * To install python3, instructions differ depending upon your OS. Using Homebrew is probably the easiest (`brew install python3`), or here are some instructions for [Mac OS X](http://www.marinamele.com/2014/07/install-python3-on-mac-os-x-and-use-virtualenv-and-virtualenvwrapper.html) 76 | * To install boto3, `pip install -r requirements.txt` 77 | 5. Run `du.py` with the usage described above. 78 | 79 | # What else 80 | 81 | This script can run a little slow on larger buckets. Thats okay; This is a limitation inherent to the way this information is provided via AWS APIs. Pipe `du.py`'s' output to a file (perhaps inside of a `screen` or `tmux`) and come back later. 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | --------------------------------------------------------------------------------