├── test.txt ├── test └── .gitignore ├── requirements.txt ├── .gitignore ├── buckets └── .gitignore ├── sites.txt ├── .travis.yml ├── s3utils.py ├── README.md ├── s3scanner.py └── test_scanner.py /test.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse 2 | requests 3 | awscli 4 | sh 5 | pytest 6 | coloredlogs 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | venv2.7/ 3 | .idea/ 4 | __pycache__ 5 | *.pyc 6 | .cache/ 7 | .pytest_cache -------------------------------------------------------------------------------- /buckets/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /sites.txt: -------------------------------------------------------------------------------- 1 | flaws.cloud 2 | arstechnica.com 3 | lifehacker.com 4 | gizmodo.com 5 | reddit.com 6 | stackoverflow.com -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.6" 5 | cache: pip 6 | install: 7 | - pip install -r requirements.txt 8 | script: 9 | - pytest 10 | notifications: 11 | email: false -------------------------------------------------------------------------------- /s3utils.py: -------------------------------------------------------------------------------- 1 | import sh 2 | import requests 3 | import os 4 | 5 | 6 | def checkBucket(bucketName, region): 7 | """ Does a simple GET request with the Requests library and interprets the results. 8 | 9 | site - A domain name without protocol (http[s]) 10 | region - An s3 region. See: https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region """ 11 | 12 | bucketDomain = 'http://' + bucketName + '.s3-' + region + '.amazonaws.com' 13 | 14 | try: 15 | r = requests.head(bucketDomain) 16 | except requests.exceptions.ConnectionError: # Couldn't resolve the hostname. Definitely not a bucket. 17 | message = "{0:>16} : {1}".format("[not found]", bucketName) 18 | return 900, message 19 | if r.status_code == 200: # Successfully found a bucket! 20 | size = getBucketSize(bucketName) 21 | return 200, bucketName, region, size 22 | 23 | elif r.status_code == 301: # We tried the wrong region. 'x-amz-bucket-region' header will give us the correct one. 24 | return 301, r.headers['x-amz-bucket-region'] 25 | 26 | elif r.status_code == 403: # Bucket exists, but we're not allowed to LIST it. 27 | return 403, bucketName, region 28 | elif r.status_code == 404: # This is definitely not a valid bucket name. 29 | message = "{0:>15} : {1}".format("[not found]", bucketName) 30 | return 404, message 31 | else: 32 | raise ValueError("Got an unhandled status code back: " + str(r.status_code) + " for site: " + bucketName + ":" + region) 33 | 34 | 35 | def dumpBucket(bucketName, region): 36 | 37 | # Check to make sure the bucket is open 38 | b = checkBucket(bucketName, region) 39 | if b[0] != 200: 40 | raise ValueError("The specified bucket is not open.") 41 | 42 | # Dump the bucket into bucket folder 43 | bucketDir = './buckets/' + bucketName 44 | if not os.path.exists(bucketDir): 45 | os.makedirs(bucketDir) 46 | 47 | sh.aws('s3', 'sync', 's3://'+bucketName, bucketDir, '--no-sign-request', _fg=True) 48 | 49 | # Check if folder is empty. If it is, delete it 50 | if not os.listdir(bucketDir): 51 | # Delete empty folder 52 | os.rmdir(bucketDir) 53 | 54 | 55 | def getBucketSize(bucketName): 56 | """ 57 | Use awscli to 'ls' the bucket which will give us the total size of the bucket. 58 | NOTE: 59 | Function assumes the bucket exists and doesn't catch errors if it doesn't. 60 | """ 61 | try: 62 | a = sh.aws('s3', 'ls', '--summarize', '--human-readable', '--recursive', '--no-sign-request', 's3://' + 63 | bucketName, _timeout=8) 64 | except sh.TimeoutException: 65 | return "Unknown Size" 66 | # Get the last line of the output, get everything to the right of the colon, and strip whitespace 67 | return a.splitlines()[len(a.splitlines())-1].split(":")[1].strip() 68 | 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # S3Scanner 2 | [](https://creativecommons.org/licenses/by-nc-sa/4.0/) [](https://travis-ci.org/sa7mon/S3Scanner) 3 | 4 | A tool to find open S3 buckets and dump their contents :droplet: 5 | 6 |  7 | 8 | ## Using 9 | 10 |
11 | # s3scanner - Find S3 buckets and dump! 12 | # 13 | # Author: Dan Salmon - @bltjetpack, github.com/sa7mon 14 | 15 | positional arguments: 16 | buckets Name of text file containing buckets to check 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | -o, --out-file OUTFILE Name of file to save the successfully checked buckets in (Default: buckets.txt) 21 | -c, --include-closed Include found but closed buckets in the out-file 22 | -r , --default-region AWS region to default to (Default: us-west-1) 23 | -d, --dump Dump all found open buckets locally 24 |25 | 26 | The tool takes in a list of bucket names to check. Found S3 domains are output to file with their corresponding region in the format 'domain:region'. The tool will also dump the contents of 'open' buckets locally. 27 | 28 | ## Examples 29 | This tool accepts the following type of bucket formats to check: 30 | 31 | - bucket name - `google-dev` 32 | - domain name - `uber.com`, `sub.domain.com` 33 | - full s3 url - `yahoo-staging.s3-us-west-2.amazonaws.com` (To easily combine with other tools like [bucket-stream](https://github.com/eth0izzle/bucket-stream)) 34 | - bucket:region - `flaws.cloud:us-west-2` 35 | 36 | ```bash 37 | > cat names.txt 38 | flaws.cloud 39 | google-dev 40 | testing.microsoft.com 41 | yelp-production.s3-us-west-1.amazonaws.com 42 | github-dev:us-east-1 43 | ``` 44 | 45 | 1. Dump all open buckets, log both open and closed buckets to found.txt 46 | 47 | ```bash 48 | > python ./s3scanner.py --include-closed --out-file found.txt --dump names.txt 49 | ``` 50 | 2. Just log open buckets to the default output file (buckets.txt) 51 | 52 | ```bash 53 | > python ./s3scanner.py names.txt 54 | ``` 55 | 56 | ## Installation 57 | 1. (Optional) `virtualenv venv && source ./venv/bin/activate` 58 | 2. `pip install -r requirements.txt` 59 | 3. `python ./s3scanner.py` 60 | 61 | (Compatibility has been tested with Python 2.7 and 3.6) 62 | 63 | ## Contributing 64 | Issues are welcome and Pull Requests are appreciated. 65 | 66 | | master | [](https://travis-ci.org/sa7mon/S3Scanner) | 67 | |:------------:|:-------------------------------------------------------------------------------------------------------------------------:| 68 | | enhancements | [](https://travis-ci.org/sa7mon/S3Scanner) | 69 | | bugs | [](https://travis-ci.org/sa7mon/S3Scanner) | 70 | 71 | All contributions should be compatiable with both Python 2.7 and 3.6. Run tests with `pytest` in 2.7 and 3.6 virtual environments. 72 | 73 | ## License 74 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International [(CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/) 75 | -------------------------------------------------------------------------------- /s3scanner.py: -------------------------------------------------------------------------------- 1 | ######### 2 | # 3 | # AWS S3scanner - Scans domain names for S3 buckets 4 | # 5 | # Author: Dan Salmon (twitter.com/bltjetpack, github.com/sa7mon) 6 | # Created: 6/19/17 7 | # License: Creative Commons (CC BY-NC-SA 4.0)) 8 | # 9 | ######### 10 | 11 | import argparse 12 | import s3utils as s3 13 | import logging 14 | import coloredlogs 15 | import sys 16 | 17 | 18 | # We want to use both formatter classes, so a custom class it is 19 | class CustomFormatter(argparse.RawTextHelpFormatter, argparse.RawDescriptionHelpFormatter): 20 | pass 21 | 22 | 23 | # Instantiate the parser 24 | parser = argparse.ArgumentParser(description='# s3scanner - Find S3 buckets and dump!\n' 25 | '#\n' 26 | '# Author: Dan Salmon - @bltjetpack, github.com/sa7mon\n', 27 | prog='s3scanner', formatter_class=CustomFormatter) 28 | 29 | # Declare arguments 30 | parser.add_argument('-o', '--out-file', required=False, dest='outFile', 31 | help='Name of file to save the successfully checked buckets in (Default: buckets.txt)') 32 | parser.add_argument('-c', '--include-closed', required=False, dest='includeClosed', action='store_true', 33 | help='Include found but closed buckets in the out-file') 34 | parser.add_argument('-r', '--default-region', dest='', 35 | help='AWS region to default to (Default: us-west-1)') 36 | parser.add_argument('-d', '--dump', required=False, dest='dump', action='store_true', 37 | help='Dump all found open buckets locally') 38 | parser.add_argument('buckets', help='Name of text file containing buckets to check') 39 | 40 | parser.set_defaults(defaultRegion='us-west-1') 41 | parser.set_defaults(includeClosed=False) 42 | parser.set_defaults(outFile='./buckets.txt') 43 | parser.set_defaults(dump=False) 44 | 45 | # If there are no args supplied, print the full help text instead of the short usage text 46 | if len(sys.argv) == 1: 47 | parser.print_help() 48 | sys.exit(1) 49 | 50 | # Parse the args 51 | args = parser.parse_args() 52 | 53 | # Create file logger 54 | flog = logging.getLogger('s3scanner-file') 55 | flog.setLevel(logging.DEBUG) # Set log level for logger object 56 | 57 | # Create file handler which logs even debug messages 58 | fh = logging.FileHandler(args.outFile) 59 | fh.setLevel(logging.DEBUG) 60 | 61 | # Add the handler to logger 62 | flog.addHandler(fh) 63 | 64 | # Create secondary logger for logging to screen 65 | slog = logging.getLogger('s3scanner-screen') 66 | slog.setLevel(logging.INFO) 67 | 68 | # Logging levels for the screen logger: 69 | # INFO = found, open 70 | # WARN = found, closed 71 | # ERROR = not found 72 | # The levels serve no other purpose than to specify the output color 73 | 74 | levelStyles = { 75 | 'info': {'color': 'blue'}, 76 | 'warning': {'color': 'yellow'}, 77 | 'error': {'color': 'red'} 78 | } 79 | 80 | fieldStyles = { 81 | 'asctime': {'color': 'white'} 82 | } 83 | 84 | # Use coloredlogs to add color to screen logger. Define format and styles. 85 | coloredlogs.install(level='DEBUG', logger=slog, fmt='%(asctime)s %(message)s', 86 | level_styles=levelStyles, field_styles=fieldStyles) 87 | 88 | 89 | with open(args.buckets, 'r') as f: 90 | for line in f: 91 | line = line.rstrip() # Remove any extra whitespace 92 | region = args.defaultRegion 93 | 94 | # Determine what kind of input we're given. Options: 95 | # bucket name i.e. mybucket 96 | # domain name i.e. flaws.cloud 97 | # full S3 url i.e. flaws.cloud.s3-us-west-2.amazonaws.com 98 | # bucket:region i.e. flaws.cloud:us-west-2 99 | 100 | if ".amazonaws.com" in line: # We were given a full s3 url 101 | bucket = line[:line.rfind(".s3")] 102 | region = line[len(line[:line.rfind(".s3")]) + 4:line.rfind(".amazonaws.com")] 103 | elif ":" in line: # We were given a bucket in 'bucket:region' format 104 | region = line.split(":")[1] 105 | bucket = line.split(":")[0] 106 | else: # We were either given a bucket name or domain name 107 | bucket = line 108 | 109 | result = s3.checkBucket(bucket, region) 110 | 111 | if result[0] == 301: 112 | result = s3.checkBucket(bucket, result[1]) 113 | 114 | if result[0] in [900, 404]: # These are our 'bucket not found' codes 115 | slog.error(result[1]) 116 | 117 | elif result[0] == 403: # Found but closed bucket. Only log if user says to. 118 | message = "{0:>15} : {1}".format("[found] [closed]", result[1] + ":" + result[2]) 119 | slog.warning(message) 120 | if args.includeClosed: # If user supplied '--include-closed' flag, log this bucket to file 121 | flog.debug(result[1] + ":" + result[2]) 122 | 123 | elif result[0] == 200: # The only 'bucket found and open' codes 124 | message = "{0:<7}{1:>9} : {2}".format("[found]", "[open]", result[1] + ":" + result[2] + " - " + result[3]) 125 | slog.info(message) 126 | flog.debug(result[1] + ":" + result[2]) 127 | if args.dump: 128 | s3.dumpBucket(bucket, result[2]) 129 | else: 130 | raise ValueError("Got back unknown code from checkBucket()") 131 | -------------------------------------------------------------------------------- /test_scanner.py: -------------------------------------------------------------------------------- 1 | import s3utils as s3 2 | import sh 3 | import os 4 | import sys 5 | import shutil 6 | 7 | pyVersion = sys.version_info 8 | # pyVersion[0] can be 2 or 3 9 | 10 | 11 | s3scannerLocation = "./" 12 | testingFolder = "./test/" 13 | 14 | 15 | def test_arguments(): 16 | # Scenario 1: No arguments 17 | 18 | try: 19 | sh.python(s3scannerLocation + 's3scanner.py') 20 | except sh.ErrorReturnCode as e: 21 | assert e.stderr.decode('utf-8') == "" 22 | assert "usage: s3scanner [-h] [-o OUTFILE] [-c] [-r] [-d] buckets" in e.stdout.decode('utf-8') 23 | 24 | 25 | def test_checkBucket(): 26 | """ 27 | Scenario 1: Bucket name exists, region is wrong 28 | Expected: 29 | Code: 301 30 | Region: Region returned depends on the closest S3 region to the user. Since we don't know this, 31 | just assert for 2 hyphens. 32 | Note: 33 | Amazon should always give us a 301 to redirect to the nearest s3 endpoint. 34 | Currently uses the ap-south-1 (Asia Pacific - Mumbai) region, so if you're running 35 | the test near there, change to a region far from 36 | you - https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region 37 | 38 | Scenario 2: Bucket exists, region correct 39 | Expected: 40 | Code: 200 41 | Message: Contains the domain name and region 42 | Note: 43 | Using flaws.cloud as example by permission of owner (@0xdabbad00) 44 | 45 | """ 46 | # Scenario 1 47 | result = s3.checkBucket('amazon.com', 'ap-south-1') 48 | assert result[0] == 301 49 | assert result[1].count("-") == 2 50 | 51 | # Scenario 2 52 | result = s3.checkBucket('flaws.cloud', 'us-west-2') 53 | assert result[0] == 200 54 | assert result[1] == 'flaws.cloud' 55 | assert result[2] == 'us-west-2' 56 | 57 | 58 | def test_checkIncludeClosed(): 59 | """ Verify that the '--include-closed' argument is working correctly. 60 | Expected: 61 | The bucket name 'yahoo.com' is expected to exist, but be closed. The bucket name 62 | and region should be included in the output buckets file in the format 'bucket:region'. 63 | """ 64 | 65 | # Create a file called testing.txt and write 'yahoo.com' to it 66 | 67 | inFile = testingFolder + 'test_checkIncludeClosed_in.txt' 68 | outFile = testingFolder + 'test_checkIncludeClosed_out.txt' 69 | 70 | f = open(inFile, 'w') 71 | f.write('yahoo.com\n') # python will convert \n to os.linesep 72 | f.close() 73 | 74 | run1 = sh.python(s3scannerLocation + "s3scanner.py", "--out-file", outFile, 75 | "--include-closed", inFile) 76 | 77 | found = False 78 | with open(outFile, 'r') as g: 79 | for line in g: 80 | if 'yahoo.com' in line: 81 | found = True 82 | 83 | try: 84 | assert found is True 85 | finally: 86 | # Cleanup testing files 87 | os.remove(outFile) 88 | os.remove(inFile) 89 | 90 | 91 | def test_dumpBucket(): 92 | """ 93 | Verify the dumpBucket() function is working as intended. 94 | 95 | Expected: Supplying the function with the arguments ("flaws.cloud", "us-west-2") should result in 6 files 96 | being downloaded into the buckets folder. The expected file sizes of each file are listed in the 97 | 'expectedFiles' dictionary. 98 | """ 99 | 100 | # Dump the flaws.cloud bucket 101 | s3.dumpBucket("flaws.cloud", "us-west-2") 102 | 103 | # Folder to look for the files in 104 | dumpDir = './buckets/flaws.cloud/' 105 | 106 | # Expected sizes of each file 107 | expectedFiles = {'hint1.html': 2575, 'hint2.html': 1707, 'hint3.html': 1101, 'index.html': 2877, 108 | 'robots.txt': 46, 'secret-dd02c7c.html': 1051} 109 | 110 | try: 111 | # Assert number of files in the folder 112 | assert len(os.listdir(dumpDir)) == len(expectedFiles) 113 | 114 | # For each file, assert the size 115 | for file, size in expectedFiles.items(): 116 | assert os.path.getsize(dumpDir + file) == size 117 | finally: 118 | # No matter what happens with the asserts, cleanup after the test by deleting the flaws.cloud directory 119 | shutil.rmtree(dumpDir) 120 | 121 | 122 | def test_getBucketSize(): 123 | """ 124 | Scenario 1: Bucket doesn't exist 125 | Expected: 255 126 | 127 | Scenario 2: Bucket exists, listing open to public 128 | Expected: 129 | Size: 9.1 KiB 130 | Note: 131 | Using flaws.cloud as example by permission of owner (@0xdabbad00) 132 | 133 | """ 134 | 135 | # Scenario 1 136 | try: 137 | result = s3.getBucketSize('example-this-hopefully-wont-exist-123123123') 138 | except sh.ErrorReturnCode_255: 139 | assert True 140 | 141 | # Scenario 3 142 | assert s3.getBucketSize('flaws.cloud') == "9.1 KiB" 143 | 144 | 145 | def test_outputFormat(): 146 | """ 147 | Scenario: 148 | Verify that the main script outputs found buckets in the format "bucket:region" 149 | Expected: 150 | The output for flaws.cloud should be the following: "flaws.cloud:us-west-2" 151 | """ 152 | 153 | inFile = testingFolder + 'test_outputFormat_in.txt' 154 | outFile = testingFolder + 'test_outputFormat_out.txt' 155 | 156 | f = open(inFile, 'w') 157 | f.write('flaws.cloud\n') # python will convert \n to os.linesep 158 | f.close() 159 | 160 | sh.python(s3scannerLocation + '/s3scanner.py', '--out-file', outFile, inFile) 161 | 162 | found = False 163 | with open(outFile, 'r') as g: 164 | for line in g: 165 | if line.strip() == 'flaws.cloud:us-west-2': 166 | found = True 167 | 168 | try: 169 | assert found is True 170 | finally: 171 | # Cleanup testing files 172 | os.remove(outFile) 173 | os.remove(inFile) 174 | --------------------------------------------------------------------------------