├── requirements.txt ├── NOTICE ├── .gitignore ├── LICENSE-3rdparty.csv ├── Dockerfile ├── .github └── workflows │ └── docker-release.yml ├── selenium_driver.py ├── undocumented-aws-api-hunter.py ├── README.md ├── aws_connector.py ├── LICENSE └── scripts └── count_undoc_apis.py /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium 2 | requests 3 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | undocumented-aws-api-hunter 2 | Copyright 2024-Present Datadog, Inc. 3 | 4 | This product includes software developed at Datadog ( 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | models 2 | incomplete 3 | reset.py 4 | logs 5 | output.log 6 | __pycache__ 7 | botocore 8 | endpoints.txt 9 | compare_output.md 10 | temp 11 | models.db 12 | venv 13 | old_models 14 | application.log 15 | -------------------------------------------------------------------------------- /LICENSE-3rdparty.csv: -------------------------------------------------------------------------------- 1 | Component,Origin,License,Copyright 2 | Selenium,Selenium,Apache-2.0,Copyright (c) 2011-2024 Software Freedom Conservancy 3 | Selenium,Selenium,Apache-2.0,Copyright (c) 2004-2011 Selenium committers 4 | Chromium,Google,BSD-3-Clause,Copyright (c) 2015 The Chromium Authors 5 | requests,Python Software Foundation,Apache-2.0,Copyright (c) 2019 Kenneth Reitz 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt requirements.txt 6 | 7 | RUN pip3 install -r requirements.txt 8 | 9 | COPY . . 10 | 11 | RUN apt update 12 | 13 | # Find the latest Chrome deb here: https://pkgs.org/download/google-chrome-stable 14 | RUN wget -q https://dl.google.com/linux/deb/pool/main/g/google-chrome-stable/google-chrome-stable_131.0.6778.69-1_amd64.deb 15 | RUN apt install -y ./google-chrome-stable_131.0.6778.69-1_amd64.deb 16 | 17 | RUN CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE` && \ 18 | mkdir ./chromedriver && \ 19 | cd ./chromedriver && \ 20 | wget https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/131.0.6778.69/linux64/chromedriver-linux64.zip && \ 21 | unzip chromedriver-linux64.zip && \ 22 | cd chromedriver-linux64 && \ 23 | chmod +x chromedriver && \ 24 | mv chromedriver /usr/bin/chromedriver 25 | 26 | RUN useradd -m -u 1000 user 27 | RUN chown -R 1000:1000 /app 28 | 29 | USER user 30 | 31 | CMD ["python", "./undocumented-aws-api-hunter.py", "--headless"] -------------------------------------------------------------------------------- /.github/workflows/docker-release.yml: -------------------------------------------------------------------------------- 1 | name: Release docker image 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | 7 | env: 8 | REGISTRY: ghcr.io 9 | IMAGE_NAME: datadog/undocumented-aws-api-hunter 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | docker-build-push: 16 | runs-on: ubuntu-latest 17 | permissions: 18 | contents: read 19 | packages: write 20 | steps: 21 | - name: Log into registry ${{ env.REGISTRY }} 22 | uses: docker/login-action@v3 23 | with: 24 | registry: ${{ env.REGISTRY }} 25 | username: ${{ github.actor }} 26 | password: ${{ secrets.GITHUB_TOKEN }} 27 | 28 | - name: Set up QEMU 29 | uses: docker/setup-qemu-action@v3 30 | 31 | - name: Set up Docker Buildx 32 | uses: docker/setup-buildx-action@v3 33 | 34 | - name: Build and push Docker image 35 | uses: docker/build-push-action@v5 36 | with: 37 | platforms: linux/amd64 38 | push: true 39 | build-args: | 40 | VERSION=${{ github.ref_name }} 41 | tags: | 42 | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }} 43 | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest 44 | -------------------------------------------------------------------------------- /selenium_driver.py: -------------------------------------------------------------------------------- 1 | import os, time 2 | 3 | from selenium import webdriver 4 | from selenium.webdriver.common.keys import Keys 5 | from selenium.webdriver.common.by import By 6 | from selenium.webdriver.remote.remote_connection import LOGGER 7 | 8 | def create_driver(args): 9 | chrome_options = webdriver.ChromeOptions() 10 | if args.headless: 11 | chrome_options.add_argument("--headless") 12 | chrome_options.add_argument("--no-sandbox") 13 | chrome_options.add_argument("--disable-dev-shm-usage") 14 | driver = webdriver.Chrome(options=chrome_options) 15 | return driver 16 | 17 | 18 | def authenticate(driver): 19 | UAH_ACCOUNT_ID = os.getenv("UAH_ACCOUNT_ID") 20 | UAH_USERNAME = os.getenv("UAH_USERNAME") 21 | UAH_PASSWORD = os.getenv("UAH_PASSWORD") 22 | driver.get("https://us-east-1.console.aws.amazon.com/console/home?region=us-east-1") 23 | driver.find_element(By.ID, "iam_user_radio_button").click() 24 | time.sleep(1) 25 | driver.find_element(By.ID, "resolving_input").send_keys(UAH_ACCOUNT_ID) 26 | time.sleep(1) 27 | driver.find_element(By.ID, "resolving_input").send_keys(Keys.RETURN) 28 | time.sleep(1) 29 | driver.find_element(By.ID, "username").send_keys(UAH_USERNAME) 30 | time.sleep(1) 31 | driver.find_element(By.ID, "password").send_keys(UAH_PASSWORD) 32 | time.sleep(1) 33 | driver.find_element(By.ID, "password").send_keys(Keys.RETURN) 34 | time.sleep(3) 35 | return driver 36 | -------------------------------------------------------------------------------- /undocumented-aws-api-hunter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, datetime 3 | import argparse, logging, sys 4 | 5 | from selenium import webdriver 6 | from selenium.webdriver.common.keys import Keys 7 | from selenium.webdriver.common.by import By 8 | 9 | import selenium_driver 10 | import aws_connector 11 | 12 | MODEL_DIR = "./models" 13 | LOG_DIR = "./logs" 14 | ENDPOINTS_DIR = "./endpoints" 15 | 16 | 17 | def main(args): 18 | # In case this is a single query 19 | if args.single: 20 | js_content = aws_connector.fetch_service_model(args.single) 21 | aws_connector.parse_service_model(js_content, args.single, True, MODEL_DIR) 22 | exit() 23 | 24 | driver = selenium_driver.create_driver(args) 25 | driver = selenium_driver.authenticate(driver) 26 | 27 | aws_services = aws_connector.fetch_services() 28 | 29 | endpoints = load_endpoints() 30 | 31 | for service in aws_services: 32 | queried_javascript = set() 33 | url = aws_connector.process_url(service) 34 | if url is None: 35 | continue 36 | 37 | driver.get(url) 38 | 39 | endpoints = endpoints.union(aws_connector.parse_endpoints(driver.page_source)) 40 | javascript = aws_connector.find_javascript_urls(driver.page_source) 41 | for script in javascript: 42 | if script not in queried_javascript: 43 | js_content = aws_connector.fetch_service_model(script) 44 | if js_content is None: 45 | continue 46 | 47 | aws_connector.parse_service_model(js_content, script, True, MODEL_DIR) 48 | queried_javascript.add(script) 49 | 50 | with open(f"{ENDPOINTS_DIR}/endpoints.txt", 'w') as w: 51 | for item in endpoints: 52 | w.write(f"{item}\n") 53 | 54 | 55 | def load_endpoints(): 56 | to_return = set() 57 | with open(f"{ENDPOINTS_DIR}/endpoints.txt", 'r') as r: 58 | for url in r: 59 | to_return.add(url.strip()) 60 | return to_return 61 | 62 | 63 | def initialize(args): 64 | # Check for a local models directory 65 | if not os.path.isdir(MODEL_DIR): 66 | os.mkdir(MODEL_DIR) 67 | #if not os.path.isdir("./incomplete"): 68 | # os.mkdir("./incomplete") 69 | if not os.path.isdir(ENDPOINTS_DIR): 70 | os.mkdir(ENDPOINTS_DIR) 71 | if not os.path.isfile(f"{ENDPOINTS_DIR}/endpoints.txt"): 72 | open(f"{ENDPOINTS_DIR}/endpoints.txt", 'w').close() 73 | 74 | # Check needed environment variables 75 | env_vars = ["UAH_ACCOUNT_ID", "UAH_USERNAME", "UAH_PASSWORD"] 76 | for env_var in env_vars: 77 | # TODO: Fix this below 78 | if env_var not in os.environ: 79 | print(f"[!] Mising environment variable: {env_var}") 80 | print(f"[-] Terminating") 81 | exit() 82 | 83 | # Configure logging 84 | if not os.path.isdir(LOG_DIR): 85 | os.mkdir(LOG_DIR) 86 | 87 | logging.basicConfig( 88 | level=logging.INFO, 89 | format="%(message)s", 90 | handlers=[ 91 | logging.FileHandler(f"{LOG_DIR}/application.log"), 92 | logging.StreamHandler(sys.stdout) 93 | ] 94 | ) 95 | logging.getLogger('selenium').setLevel(logging.CRITICAL) 96 | logging.getLogger('requests').setLevel(logging.CRITICAL) 97 | logging.getLogger('urllib3').setLevel(logging.CRITICAL) 98 | logging.getLogger('json').setLevel(logging.CRITICAL) 99 | logging.getLogger('chardet.charsetprober').setLevel(logging.CRITICAL) 100 | logging.getLogger('chardet.universaldetector').setLevel(logging.CRITICAL) 101 | 102 | 103 | if __name__ == "__main__": 104 | parser = argparse.ArgumentParser(description="Find this pesky undocumented AWS APIs with the AWS Console") 105 | 106 | parser.add_argument('--headless', dest='headless', action='store_true', default=False, 107 | help="Do not open a visible chrome window. Headless mode. (Default: False)") 108 | parser.add_argument('--single', dest='single', action='store', type=str, 109 | help="Parses a single URL for its models.") 110 | 111 | args = parser.parse_args() 112 | 113 | initialize(args) 114 | 115 | timestamp = datetime.datetime.now() 116 | logging.info(f"{datetime.datetime.now()} INFO - Starting new run at {timestamp.strftime('%m/%d/%Y %H:%M:%S')}") 117 | 118 | main(args) 119 | 120 | timestamp = datetime.datetime.now() 121 | logging.info(f"{datetime.datetime.now()} INFO - Finished run at {timestamp.strftime('%m/%d/%Y %H:%M:%S')}") 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # undocumented-aws-api-hunter 2 | 3 | A tool to uncover, extract, and monitor undocumented AWS APIs from the AWS console. This tool was released at the the [fwd:cloudsec EU](https://fwdcloudsec.org/conference/europe/) talk, "[Hidden Among the Clouds: A Look at Undocumented AWS APIs](https://docs.google.com/presentation/d/1jJM_9KPfYZL60B56MQwQTym1H_A63abz2t_p_8Vo8MU/edit?usp=sharing)". This research has already uncovered some useful [tradecraft](https://frichetten.com/blog/undocumented-amplify-api-leak-account-id/), and even two [cross-tenant vulnerabilities](https://frichetten.com/blog/minor-cross-tenant-vulns-app-runner/), however due to the shear volume of undocumented APIs found there are likely many more. 4 | 5 | ## Table of Contents 6 | 7 | - [How does it work?](#how-does-it-work) 8 | - [Usage](#usage) 9 | - [Output](#output) 10 | - [How to Build Docker Container](#how-to-build-docker-container) 11 | - [Manual Installation/Usage](#manual-installationusage) 12 | - [Scripts (generate stats)](#scripts-generate-stats) 13 | - [Undocumented parameters are only compared at top level](#undocumented-parameters-are-only-compared-at-top-level) 14 | - [Author](#author) 15 | 16 | ## How does it work? 17 | 18 | The undocumented-aws-api-hunter uses [Selenium](https://www.selenium.dev/) to pilot a headless instance of [Google Chrome](https://www.google.com/chrome/) and crawl the AWS console. It starts by signing into the console using an IAM user. Next, it will extract the service pages from the search bar of the console. It will then visit each of these pages and search the JavaScript being loaded on each page for AWS service models. Once it finds a model, it will store it. 19 | 20 | ![364505916-476d7532-a6e4-491a-843c-33704819135b](https://github.com/user-attachments/assets/8133dd16-b41d-4610-a2c2-4ee3d9f9ab04) 21 | 22 | the undocumented-aws-api-hunter will deduplicate models and only store shapes, operations, and other information that is net-new. Subsequent runs of the undocumented API hunter can add new data to the extracted models. The tool will also mark the location (`download_location`) where a model or operation was downloaded from. This provides transparency and evidence of where models were found. 23 | 24 | For an example extracted dataset, please see [here](https://github.com/frichetten/aws-api-models). 25 | 26 | > [!WARNING] 27 | > From some nominal testing it appears that this tool works on M series Macs, however be aware that because this tool uses [Selenium](https://www.selenium.dev/) and hence, [Google Chrome](https://www.google.com/chrome/), there may be some funkyness on non-x86-64 machines. If you'd like to run this in production it would be best to do so on an x86 Linux machine. 28 | 29 | ## Usage 30 | 31 | Please create an IAM user in your account with console access. Then create a `.env` with the following environment variables with the associated info: `UAH_USERNAME`, `UAH_PASSWORD`, and `UAH_ACCOUNT_ID`. With those variables set you can run the tool. This user must **NOT** have any permissions. If they have any IAM policies granting permissions it runs the risk of the automation accidentally invoking something. 32 | 33 | Run the container with the following: 34 | 35 | ``` 36 | docker run -it --rm -v ${PWD}/models:/app/models -v ${PWD}/logs:/app/logs --env-file .env ghcr.io/datadog/undocumented-aws-api-hunter:latest 37 | ``` 38 | 39 | ## Output 40 | 41 | When running this tool a number of artifacts are created, including: 42 | 43 | - Models: Models are output to `/models`. Each subsequent run of the tool should use the same model directory as the tool will deduplicate based on previous findings. 44 | - Logs: Logs are output to `/logs/application.log`. This includes a running output of models, operations, and parameters found. This is particularly useful to monitor for new findings. 45 | - Endpoints: AWS will often store endpoints in the HTML (yes, that is correct) of pages in the console. This tool will extract those and store them in a file called `endpoints.txt`. This can be useful for finding API endpoints for these undocumented APIs, however it is important to stress that this is not ALL endpoints. It may return a few hundred (when tens of thousands or more exist). If you're interested in finding more API endpoints, [this](https://securitylabs.datadoghq.com/articles/non-production-endpoints-as-an-attack-surface-in-aws/) method is recommended. 46 | 47 | ## How to Build Docker Container 48 | 49 | ``` 50 | git clone https://github.com/DataDog/undocumented-aws-api-hunter.git 51 | ``` 52 | 53 | Build the Docker container: 54 | 55 | ``` 56 | docker build -t undocumented-aws-api-hunter . 57 | ``` 58 | 59 | ## Manual Installation/Usage 60 | 61 | > [!IMPORTANT] 62 | > This is only neccessary if you'd like to help with development of the project. If you just want to use it you would be much better served with the Docker option above. 63 | 64 | ``` 65 | git clone https://github.com/DataDog/undocumented-aws-api-hunter.git 66 | ``` 67 | 68 | Inside the directory, create a new [virtual environment](https://docs.python.org/3/library/venv.html) with the following command: 69 | 70 | ``` 71 | python3 -m venv ./venv 72 | ``` 73 | 74 | Activate it: 75 | 76 | ``` 77 | source ./venv/bin/activate 78 | ``` 79 | 80 | Install packages: 81 | 82 | ``` 83 | python3 -m pip install -r requirements.txt 84 | ``` 85 | 86 | Install the [ChromeDriver](https://chromedriver.chromium.org/downloads) for your operating system. This is required for Selenium. The process for this will depend on your OS so I will keep it vague. I used parts of [this](https://tecadmin.net/setup-selenium-chromedriver-on-ubuntu/) guide (once you have `chromedriver` installed you can stop. No need to complete the other steps) for a Linux machine. 87 | 88 | Please create an IAM user in your account with console access. Then export the following environment variables with the associated info: `UAH_USERNAME`, `UAH_PASSWORD`, and `UAH_ACCOUNT_ID`. With those variables set you can run the tool. This user must **NOT** have any permissions. If they have any IAM policies granting permissions it runs the risk of the automation accidentally invoking something. 89 | 90 | ``` 91 | ./undocumented-aws-api-hunter.py --headless 92 | ``` 93 | 94 | ## Scripts (generate stats) 95 | 96 | Within this repo is a scripts directory which contains some scripts for generating stats on undocumented APIs. Each stat is split into its own section to make it easier to read. As a part of generating these stats there are some gotchas/limitations that are worth noting. They are described down below. 97 | 98 | ### Undocumented parameters are only compared at top level 99 | 100 | In AWS API models, parameters for APIs are described as "shapes". These shapes are the format by which parameters are passed to the API. Shapes can be recursive, with one shape having multiple shapes within itself (they can even reference [themselves](https://github.com/boto/botocore/blob/bc89f1540e0cbb000561a72d20de9df0e92b9f4d/botocore/data/lexv2-runtime/2020-08-07/service-2.json#L532) which is fun to debug). When we compare these shapes between the botocore library and the extracted models we only compared shapes at the top level. This knowingly undercounts how many there are because down the chain there may be sub-shapes which have different fields. 101 | 102 | This undercounting is intentional because properly evaluating this is a problem to be solved. The reson is that AWS' own models are not descriptive enough to acomplish this. As an example `lambda-2015-03-32:UpdateFunctionEventInvokeConfig` has the shape DestinationConfig, this has a sub-shape OnSuccess, which itself has a member for "[Destination](https://github.com/Frichetten/aws-api-models/blob/4bc7b764593d2c2b78e3f81ff8c7027bd7048e50/models/lambda-2015-03-31-rest-json.json#L4358)". 103 | 104 | In botocore all of this is still true, however it continues on. "Destination" has a sub-member for "[DestinationArn](https://github.com/boto/botocore/blob/0ac30565017f1486b2eebf9bd90b5411f0d7f1fb/botocore/data/lambda/2015-03-31/service-2.json#L4747)". 105 | 106 | ![365281205-fa24b438-4f82-4571-9eeb-e96b4c89eb37](https://github.com/user-attachments/assets/ac98506a-38b2-49c8-af12-d2aa62774267) 107 | 108 | It is not clear why the models are not the same. My working theory is that AWS uses a lot of code generation for it's models. As a result, models are often fragmented and don't always contain the full set. As a result, it's possible that we are not properly merging shapes and missing some parts of them. Regardless of the reason why, we are unable to further analyze shapes. 109 | 110 | If you find a way to reliably (emphasis) do this, please let me know. I would love to hear about it. For now, we are only comparing the top level parameters. This has the knock-on effect of reporting fewer undocumented parameters than there actually are. 111 | 112 | ## Author 113 | 114 | This tool was written by [Nick Frichette](https://frichetten.com/) in his free time. To find more of his research on AWS, please see [Datadog Security Labs](https://securitylabs.datadoghq.com/). 115 | 116 | -------------------------------------------------------------------------------- /aws_connector.py: -------------------------------------------------------------------------------- 1 | import requests, logging, re, json, os 2 | 3 | from datetime import datetime 4 | 5 | def parse_service_model(js_content, download_location, save, MODEL_DIR): 6 | match1 = re.findall("(parse\('{\"version\":\"[\.0-9]*?\",.*?'\))", js_content) 7 | match2 = re.findall("(parse\('{\"metadata\":{\".*?'\))", js_content) 8 | matches = match1 + match2 9 | 10 | if len(matches) == 0: 11 | return 12 | 13 | for model in matches: 14 | # This is necessary to remove 2 trailing characters 15 | # and to replace some invalid JSON characters 16 | try: 17 | parsed_model = json.loads(model[7:-2].replace("\\","")) 18 | except json.decoder.JSONDecodeError as e: 19 | logging.warning(f"{datetime.now()} ERROR - Failed to parse: {model[7:-2]} from {download_location}") 20 | continue 21 | 22 | if 'metadata' not in parsed_model.keys(): 23 | logging.info(f"{datetime.now()} ERROR - No metadata found - {parsed_model}") 24 | continue 25 | 26 | if 'operations' not in parsed_model.keys(): 27 | logging.info(f"{datetime.now()} ERROR - No operations found - {parsed_model}") 28 | continue 29 | 30 | # TODO: Better handling for non-uid models (<1%) 31 | if "uid" not in parsed_model['metadata'].keys(): 32 | if "serviceFullName" in parsed_model['metadata'].keys(): 33 | #logging.info(f"[-] No UID found - {parsed_model['metadata']['serviceFullName']}") 34 | filename = f"{parsed_model['metadata']['serviceFullName']}-{parsed_model['metadata']['protocol']}" 35 | else: 36 | logging.info(f"{datetime.now()} ERROR - No UID found - unnamed") 37 | filename = "".join([item for item in parsed_model['metadata'].values() if type(item) is str]) 38 | #_mark_download_location(parsed_model, download_location) 39 | #_dump_to_file(parsed_model, filename, './incomplete') 40 | continue 41 | 42 | if not save: 43 | # Just print it 44 | print(json.dumps(parsed_model, indent=4)) 45 | # Need to determine if we have this file already 46 | elif os.path.exists(f"{MODEL_DIR}/{parsed_model['metadata']['uid']}-{parsed_model['metadata']['protocol']}.json"): 47 | # Integrate 48 | # TODO: there are some with alternative serviceFullNames and perhaps other info 49 | # Need to explore if there is enough of them to have special handling here. 50 | filename = f"{parsed_model['metadata']['uid']}-{parsed_model['metadata']['protocol']}" 51 | existing_model = _load_file(filename, MODEL_DIR) 52 | 53 | # Need to mark downloads from the new one before integrating 54 | parsed_model = _mark_download_location(parsed_model, download_location) 55 | complete_model = _integrate_models(parsed_model, existing_model) 56 | _dump_to_file(complete_model, filename, MODEL_DIR) 57 | else: 58 | logging.info(f"{datetime.now()} INFO - New model found: {parsed_model['metadata']['uid']}") 59 | parsed_model = _mark_download_location(parsed_model, download_location) 60 | filename = f"{parsed_model['metadata']['uid']}-{parsed_model['metadata']['protocol']}" 61 | _dump_to_file(parsed_model, filename, MODEL_DIR) 62 | 63 | 64 | def fetch_service_model(javascript_url): 65 | try: 66 | resp = requests.get(javascript_url, timeout=30) 67 | except Exception as e: 68 | logging.error(f"{datetime.now()} ERROR - Failed to retrieve {javascript_url} {e}") 69 | return None 70 | 71 | if resp.status_code != 200: 72 | logging.error(f"{datetime.now()} ERROR - Failed to retrieve {javascript_url}") 73 | return resp.text 74 | 75 | 76 | def fetch_services(): 77 | resp = requests.get("https://us-east-1.console.aws.amazon.com/console/home?region=us-east-1®ion=us-east-1") 78 | # 400 is not a bug. This gives the content we want :) 79 | if resp.status_code != 400: 80 | logging.critical("[X] Failed to pull service list") 81 | logging.critical("[X] Exiting") 82 | exit() 83 | 84 | match = re.findall("name=\"awsc-mezz-data\" content='(.*?)'", resp.text) 85 | return json.loads(match[0])['services'] 86 | 87 | 88 | def process_url(service): 89 | if "url" not in service.keys(): 90 | logging.error(f"[!] url not in keys for {service}") 91 | return None 92 | elif service['url'] is None: 93 | return None 94 | elif service['url'][0] == "/": 95 | return f"https://us-east-1.console.aws.amazon.com{service['url']}?region=us-east-1" 96 | else: 97 | return service['url'] 98 | 99 | 100 | def parse_endpoints(driver_content): 101 | to_return = set() 102 | match = re.findall("(?:\w+ndpoint)"\s*:\s*"\s*([^&]+)", driver_content) 103 | for item in match: 104 | to_return.add(item) 105 | return to_return 106 | 107 | 108 | def find_javascript_urls(driver_content): 109 | match = re.findall("(https?:\/\/[\w\-._~:\/?#\[\]@!$&'()*+,;=]+\.js)", driver_content) 110 | return match 111 | 112 | 113 | def _mark_download_location(model, download_location): 114 | if 'download_location' not in model['metadata'].keys(): 115 | model['metadata']['download_location'] = [download_location] 116 | elif download_location not in model['metadata']['download_location']: 117 | if len(model['metadata']['download_location']) >= 25: 118 | model['metadata']['download_location'] = model['metadata']['download_location'][:24] 119 | model['metadata']['download_location'].append(download_location) 120 | 121 | for operation in model['operations']: 122 | if 'download_location' not in model['operations'][operation].keys(): 123 | model['operations'][operation]['download_location'] = [download_location] 124 | elif download_location not in model['operations'][operation]['download_location']: 125 | if len(model['metadata']['download_location']) >= 25: 126 | model['metadata']['download_location'] = model['metadata']['download_location'][:24] 127 | model['operations'][operation]['download_location'].append(download_location) 128 | 129 | return model 130 | 131 | 132 | def _load_file(filename, MODEL_DIR): 133 | with open(f"{MODEL_DIR}/{filename}.json", "r") as r: 134 | return json.load(r) 135 | 136 | 137 | def _dump_to_file(model, filename, MODEL_DIR): 138 | filename = f"{MODEL_DIR}/{filename}.json" 139 | with open(filename, "w") as w: 140 | json.dump(model, w, indent=4) 141 | 142 | 143 | def _integrate_models(parsed_model, existing_model): 144 | # First, update the download location for the metadata 145 | if parsed_model['metadata']['download_location'][0] not in existing_model['metadata']['download_location']: 146 | if len(existing_model['metadata']['download_location']) >= 25: 147 | existing_model['metadata']['download_location'] = existing_model['metadata']['download_location'][1:] 148 | 149 | existing_model['metadata']['download_location'] += parsed_model['metadata']['download_location'] 150 | 151 | # Next deal with operations 152 | for operation in parsed_model['operations']: 153 | if operation not in existing_model['operations'].keys(): 154 | logging.info(f"{datetime.now()} INFO - Adding new operation: {existing_model['metadata']['uid']}:{operation}") 155 | existing_model['operations'][operation] = parsed_model['operations'][operation] 156 | 157 | else: 158 | if len(existing_model['operations'][operation]['download_location']) >= 25: 159 | existing_model['operations'][operation]['download_location'] = existing_model['operations'][operation]['download_location'][1:] 160 | 161 | # This operation already exists, but let's update its download_location 162 | if parsed_model['operations'][operation]['download_location'][0] not in existing_model['operations'][operation]['download_location']: 163 | existing_model['operations'][operation]['download_location'] += (parsed_model['operations'][operation]['download_location']) 164 | 165 | # This operation already exists, so let's integrate its parameters 166 | if 'input' in parsed_model['operations'][operation].keys() and 'members' in parsed_model['operations'][operation]['input'].keys(): 167 | for member in parsed_model['operations'][operation]['input']['members'].keys(): 168 | # This accounts for a weird edge case when there is no `input` key 169 | if 'input' not in existing_model['operations'][operation].keys(): 170 | existing_model['operations'][operation]['input'] = {} 171 | logging.info(f"{datetime.now()} INFO - Malformed model from AWS: {existing_model['metadata']['uid']}:{operation}") 172 | 173 | if 'members' not in existing_model['operations'][operation]['input'].keys(): 174 | existing_model['operations'][operation]['input']['members'] = {} 175 | 176 | if member not in existing_model['operations'][operation]['input']['members'].keys(): 177 | existing_model['operations'][operation]['input']['members'][member] = parsed_model['operations'][operation]['input']['members'][member] 178 | logging.info(f"{datetime.now()} INFO - Adding new input parameter: {existing_model['metadata']['uid']}:{operation}:{member}") 179 | 180 | # Now add new shapes 181 | for shape in parsed_model['shapes']: 182 | if shape not in existing_model['shapes'].keys(): 183 | existing_model['shapes'][shape] = parsed_model['shapes'][shape] 184 | 185 | return existing_model 186 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /scripts/count_undoc_apis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """This script will compare results found in the AWS Console to the botocore dataset. 4 | Doing this may reveal API actions that are undocumented.""" 5 | 6 | # NOTE: I intentionally remove some botocore data that does not have a UID. 7 | # This is to make things easier but it may have an attack surface. Go back and review 8 | 9 | import os, json, sys 10 | 11 | if len(sys.argv) < 3: 12 | print(f"Usage: ./count_undoc_apis.py ") 13 | exit() 14 | 15 | if "botocore" not in os.listdir("."): 16 | print(f"Error! Please download botocore locally") 17 | exit() 18 | 19 | BOTOCORE_MODELS = f"{os.path.expanduser(sys.argv[1])}/botocore/data" 20 | MODELS_DIR = os.path.expanduser(sys.argv[2]) 21 | botocore = {} 22 | 23 | # Slurp all botocore models into memory 24 | # with `uid` as the primary key 25 | for service in os.listdir(BOTOCORE_MODELS): 26 | if not os.path.isdir(f"{BOTOCORE_MODELS}/{service}"): 27 | continue 28 | 29 | for version in os.listdir(f"{BOTOCORE_MODELS}/{service}/"): 30 | if not os.path.isdir(f"{BOTOCORE_MODELS}/{service}/{version}"): 31 | continue 32 | 33 | if not os.path.exists(f"{BOTOCORE_MODELS}/{service}/{version}/service-2.json"): 34 | continue 35 | 36 | with open(f"{BOTOCORE_MODELS}/{service}/{version}/service-2.json", "r") as r: 37 | data = json.load(r) 38 | if 'uid' not in data['metadata'].keys(): 39 | continue 40 | botocore[data['metadata']['uid']] = data 41 | 42 | # Search through all crawled model definitions and compare to botocore 43 | # If something is not in botocore, alert 44 | 45 | extracted = {} 46 | modelfiles = os.listdir(MODELS_DIR) 47 | for file in modelfiles: 48 | with open(f"{MODELS_DIR}/{file}", "r") as r: 49 | data = json.load(r) 50 | if 'uid' not in data['metadata'].keys(): 51 | continue 52 | extracted[data['metadata']['uid']] = data 53 | 54 | ###################################################################################### 55 | # Count undocumented services 56 | # Methodology: This should be simple, we check the uid of each model and split on the first -20 or -19. 57 | # For example, SSO-2017-11-28 would be SSO. cleanrooms-2022-02-17 would be cleanrooms. giraffe-1986-04-08 58 | # would be giraffe. This will help separate the service from the version. 59 | botocore_services = set() 60 | for service_name, service in botocore.items(): 61 | name = service['metadata']['uid'].split("-20")[0].split("-19")[0] 62 | botocore_services.add(name) 63 | 64 | extracted_services = set() 65 | for service_name, service in extracted.items(): 66 | name = service['metadata']['uid'].split("-20")[0].split("-19")[0] 67 | extracted_services.add(name) 68 | 69 | #print("Finding undocumented services") 70 | undocumented_services_count = 0 71 | for service in extracted_services: 72 | if service not in botocore_services: 73 | #print(service) 74 | undocumented_services_count += 1 75 | print(f"Undocumented services: {undocumented_services_count}") 76 | 77 | 78 | ###################################################################################### 79 | # Count undocumented versions of documented services 80 | # Methodology: This will be slightly more complicated as we need to first check if a service 81 | # is documented and if it is, we then need to see if the version is undocumented. 82 | botocore_services = [] 83 | for service_name, service in botocore.items(): 84 | name = service['metadata']['uid'] 85 | botocore_services.append(name) 86 | 87 | extracted_services = [] 88 | for service_name, service in extracted.items(): 89 | name = service['metadata']['uid'] 90 | extracted_services.append(name) 91 | 92 | #print("Finding undocumented service versions for documented services") 93 | undocumented_service_versions_count = 0 94 | for service in extracted_services: 95 | name = service.split("-20")[0].split("-19")[0] 96 | found = any(name in substring for substring in botocore_services) 97 | if not found: 98 | continue 99 | 100 | if service not in botocore_services: 101 | #print(service) 102 | undocumented_service_versions_count += 1 103 | print(f"Undocumented service versions for documented services: {undocumented_service_versions_count}") 104 | 105 | 106 | 107 | ###################################################################################### 108 | # Count undocumented parameters for documented actions 109 | # Methodology: Substantially more complex than previous. We compare services by their UID 110 | # and by their actions. If an extracted model has a parameter that the documented model does not have, 111 | # we count that as an undocumented parameter. I know the code is rough below. When it comes to parameters 112 | # the extracted model file format differs from the documented models. This means I can't use the same 113 | # recursive function. I have taken steps to double check this output to make sure nothing is amiss. 114 | 115 | # VERY IMPORTANT: This does NOT enumerate all sub-parameters. You cannot (realistically) do this with the current model format. 116 | # As an example lambda-2015-03-32:UpdateFunctionEventInvokeConfig has the parameter DestinationConfig, this has a 117 | # sub-parameter OnSuccess, which itself has a member for "Destination". 118 | # https://github.com/Frichetten/aws-api-models/blob/4bc7b764593d2c2b78e3f81ff8c7027bd7048e50/models/lambda-2015-03-31-rest-json.json#L4358 119 | # In botocore all of this is still true, however it continues on. "Destination" has a sub-member for "DestinationArn" 120 | # https://github.com/boto/botocore/blob/0ac30565017f1486b2eebf9bd90b5411f0d7f1fb/botocore/data/lambda/2015-03-31/service-2.json#L4747 121 | # Because of these model differences we can never reconcile this. 122 | 123 | # If you find a way to reliably (emphasis) do this, please let me know. I would love to hear about it. 124 | # For now, we are only comparing the top level parameters. This has the knock-on effect of reporting 125 | # fewer undocumented parameters than there actually are. 126 | 127 | # Below you will find find_shape and find_member. These are recursive functions left over from when I was trying to find 128 | # all sub-parameters. They are not used in the final version of this script. I've kept them here for reference if 129 | # someone (or future me) wants to try and tackle this problem again. 130 | botocore_data = {} 131 | for service_name, service in botocore.items(): 132 | name = service['metadata']['uid'] 133 | botocore_data[name] = service 134 | 135 | extracted_data = {} 136 | for service_name, service in extracted.items(): 137 | name = service['metadata']['uid'] 138 | extracted_data[name] = service 139 | 140 | # This function will be used recursively to find the parameter shapes 141 | def find_shape(model, shape_name, previous_shape): 142 | flatlist = [] 143 | 144 | if model['shapes'][shape_name]['type'] == "structure": 145 | for member in model['shapes'][shape_name]['members'].keys(): 146 | # To prevent infinite recursion scenarios, break out here 147 | # Example: https://github.com/boto/botocore/blob/bc89f1540e0cbb000561a72d20de9df0e92b9f4d/botocore/data/lexv2-runtime/2020-08-07/service-2.json#L532 148 | if shape_name == model['shapes'][shape_name]['members'][member]['shape']: 149 | continue 150 | 151 | flatlist += find_shape(model, model['shapes'][shape_name]['members'][member]['shape'], member) 152 | else: 153 | flatlist.append(previous_shape) 154 | 155 | return flatlist 156 | 157 | 158 | def find_member(model, shape_name, previous_shape): 159 | flatlist = [] 160 | 161 | if model['shapes'][shape_name]['type'] == "structure": 162 | for member in model['shapes'][shape_name]['members'].keys(): 163 | if 'shape' in model['shapes'][shape_name]['members'][member].keys(): 164 | # Same anti-recursion check 165 | if shape_name == model['shapes'][shape_name]['members'][member]['shape']: 166 | continue 167 | flatlist += find_member(model, model['shapes'][shape_name]['members'][member]['shape'], member) 168 | 169 | elif "type" in model['shapes'][shape_name]['members'][member].keys() and model['shapes'][shape_name]['members'][member]['type'] == "structure": 170 | for submember in model['shapes'][shape_name]['members'][member]['members'].keys(): 171 | flatlist.append(submember) 172 | 173 | else: 174 | flatlist.append(member) 175 | else: 176 | flatlist.append(previous_shape) 177 | 178 | return flatlist 179 | 180 | #print("Finding undocumented parameters for documented actions") 181 | undocumented_parameters_count = 0 182 | # Note we iterate the botocore data because we are only interested in documented services 183 | for service_name, service in botocore_data.items(): 184 | #if service_name != "lambda-2015-03-31": 185 | # continue 186 | if service_name not in extracted_data.keys(): 187 | continue 188 | 189 | for operation_name, operation in service['operations'].items(): 190 | #if operation_name != "UpdateFunctionConfiguration": 191 | # continue 192 | if 'input' not in operation.keys(): 193 | continue 194 | if operation_name not in extracted_data[service_name]['operations'].keys(): 195 | continue 196 | if 'input' not in extracted_data[service_name]['operations'][operation_name].keys(): 197 | continue 198 | if 'members' not in extracted_data[service_name]['operations'][operation_name]['input'].keys(): 199 | continue 200 | 201 | #botocore_params = find_shape(service, operation['input']['shape'], "") 202 | #print(botocore_params) 203 | botocore_params = [] 204 | for param_name, param_value in service['shapes'][operation['input']['shape']]['members'].items(): 205 | botocore_params.append(param_name) 206 | 207 | extracted_params = [] 208 | for param_name, param_value in extracted_data[service_name]['operations'][operation_name]['input']['members'].items(): 209 | extracted_params.append(param_name) 210 | #for param_name, param_value in extracted_data[service_name]['operations'][operation_name]['input']['members'].items(): 211 | # if "shape" in param_value.keys(): 212 | # recursive_params = find_member(extracted_data[service_name], param_value['shape'], param_name) 213 | # extracted_params += recursive_params 214 | # else: 215 | # # There are 2 scenarios. The first is that the parameter is a structure. The second is that it is a simple type. 216 | # # If it is a structure, we need to extract the members. If it is a simple type, we can just add it to the list. 217 | # # If you need a good example of this, check out the "PutBotAlias" action in the "lex-models-2017-04-19" service. 218 | # if "type" in param_value.keys() and param_value['type'] == "structure": 219 | # for member in param_value['members'].keys(): 220 | # extracted_params.append(member) 221 | # else: 222 | # extracted_params.append(param_name) 223 | #extracted_params = set(extracted_params) 224 | #print(extracted_params) 225 | 226 | for param in extracted_params: 227 | if param not in botocore_params: 228 | #print(f"{service_name}:{operation_name}:{param}") 229 | undocumented_parameters_count += 1 230 | print(f"Undocumented parameters for documented actions: {undocumented_parameters_count}") 231 | 232 | 233 | 234 | ###################################################################################### 235 | # Count undocumented actions of documented services 236 | # Methodology: Slightly less complex than previous. We compare services by their UID. 237 | # If an extracted model has an action that the documented model does not have, we count that as an undocumented action. 238 | botocore_actions = {} 239 | for service_name, service in botocore.items(): 240 | name = service['metadata']['uid'] 241 | botocore_actions[name] = service['operations'] 242 | 243 | extracted_actions = {} 244 | for service_name, service in extracted.items(): 245 | name = service['metadata']['uid'] 246 | extracted_actions[name] = service['operations'] 247 | 248 | #print("Finding undocumented actions for documented services") 249 | undocumented_actions_count = 0 250 | for service_name, operations in extracted_actions.items(): 251 | if service_name not in botocore_actions.keys(): 252 | continue 253 | 254 | for operation in operations.keys(): 255 | if operation in botocore_actions[service_name].keys(): 256 | continue 257 | 258 | #print(f"{service_name}:{operation}") 259 | undocumented_actions_count += 1 260 | print(f"Undocumented actions for documented services: {undocumented_actions_count}") 261 | 262 | 263 | 264 | ###################################################################################### 265 | # Count all undocumented actions for undocumented services 266 | # Methodology: This is the easiest. If the service itself is not documented, all of its actions are undocumented. 267 | # We simply iterate through all of our extracted services and count the actions. 268 | botocore_actions = {} 269 | for service_name, service in botocore.items(): 270 | name = service['metadata']['uid'] 271 | botocore_actions[name] = service['operations'] 272 | 273 | extracted_actions = {} 274 | for service_name, service in extracted.items(): 275 | name = service['metadata']['uid'] 276 | extracted_actions[name] = service['operations'] 277 | 278 | #print("Finding undocumented actions for undocumented services") 279 | undocumented_actions_count = 0 280 | for service_name, operations in extracted_actions.items(): 281 | if service_name in botocore_actions.keys(): 282 | continue 283 | 284 | for operation in operations.keys(): 285 | #print(f"{service_name}:{operation}") 286 | undocumented_actions_count += 1 287 | 288 | print(f"Undocumented actions for undocumented services: {undocumented_actions_count}") 289 | 290 | --------------------------------------------------------------------------------