├── requirements.txt
├── NOTICE
├── .gitignore
├── LICENSE-3rdparty.csv
├── Dockerfile
├── .github
    └── workflows
    │   └── docker-release.yml
├── selenium_driver.py
├── undocumented-aws-api-hunter.py
├── README.md
├── aws_connector.py
├── LICENSE
└── scripts
    └── count_undoc_apis.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | selenium
2 | requests
3 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | undocumented-aws-api-hunter
2 | Copyright 2024-Present Datadog, Inc.
3 | 
4 | This product includes software developed at Datadog (<https://www.datadoghq.com/).>
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | models
 2 | incomplete
 3 | reset.py
 4 | logs
 5 | output.log
 6 | __pycache__
 7 | botocore
 8 | endpoints.txt
 9 | compare_output.md
10 | temp
11 | models.db
12 | venv
13 | old_models
14 | application.log
15 | 


--------------------------------------------------------------------------------
/LICENSE-3rdparty.csv:
--------------------------------------------------------------------------------
1 | Component,Origin,License,Copyright
2 | Selenium,Selenium,Apache-2.0,Copyright (c) 2011-2024 Software Freedom Conservancy
3 | Selenium,Selenium,Apache-2.0,Copyright (c) 2004-2011 Selenium committers
4 | Chromium,Google,BSD-3-Clause,Copyright (c) 2015 The Chromium Authors
5 | requests,Python Software Foundation,Apache-2.0,Copyright (c) 2019 Kenneth Reitz
6 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt requirements.txt
 6 | 
 7 | RUN pip3 install -r requirements.txt
 8 | 
 9 | COPY . .
10 | 
11 | RUN apt update
12 | 
13 | # Find the latest Chrome deb here: https://pkgs.org/download/google-chrome-stable
14 | RUN wget -q https://dl.google.com/linux/deb/pool/main/g/google-chrome-stable/google-chrome-stable_131.0.6778.69-1_amd64.deb
15 | RUN apt install -y ./google-chrome-stable_131.0.6778.69-1_amd64.deb
16 | 
17 | RUN CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE` && \
18 | 	mkdir ./chromedriver && \
19 | 	cd ./chromedriver && \
20 | 	wget https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/131.0.6778.69/linux64/chromedriver-linux64.zip && \
21 | 	unzip chromedriver-linux64.zip && \
22 | 	cd chromedriver-linux64 && \
23 | 	chmod +x chromedriver && \
24 | 	mv chromedriver /usr/bin/chromedriver
25 | 
26 | RUN useradd -m -u 1000 user
27 | RUN chown -R 1000:1000 /app
28 | 
29 | USER user
30 | 
31 | CMD ["python", "./undocumented-aws-api-hunter.py", "--headless"]


--------------------------------------------------------------------------------
/.github/workflows/docker-release.yml:
--------------------------------------------------------------------------------
 1 | name: Release docker image
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 | 
 7 | env:
 8 |   REGISTRY: ghcr.io
 9 |   IMAGE_NAME: datadog/undocumented-aws-api-hunter
10 | 
11 | permissions:
12 |   contents: read
13 | 
14 | jobs:
15 |   docker-build-push:
16 |     runs-on: ubuntu-latest
17 |     permissions:
18 |       contents: read
19 |       packages: write
20 |     steps:
21 |       - name: Log into registry ${{ env.REGISTRY }}
22 |         uses: docker/login-action@v3
23 |         with:
24 |           registry: ${{ env.REGISTRY }}
25 |           username: ${{ github.actor }}
26 |           password: ${{ secrets.GITHUB_TOKEN }}
27 | 
28 |       - name: Set up QEMU
29 |         uses: docker/setup-qemu-action@v3
30 | 
31 |       - name: Set up Docker Buildx
32 |         uses: docker/setup-buildx-action@v3
33 | 
34 |       - name: Build and push Docker image
35 |         uses: docker/build-push-action@v5
36 |         with:
37 |           platforms: linux/amd64
38 |           push: true
39 |           build-args: |
40 |             VERSION=${{ github.ref_name }}
41 |           tags: |
42 |             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
43 |             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
44 | 


--------------------------------------------------------------------------------
/selenium_driver.py:
--------------------------------------------------------------------------------
 1 | import os, time
 2 | 
 3 | from selenium import webdriver
 4 | from selenium.webdriver.common.keys import Keys
 5 | from selenium.webdriver.common.by import By
 6 | from selenium.webdriver.remote.remote_connection import LOGGER
 7 | 
 8 | def create_driver(args):
 9 |     chrome_options = webdriver.ChromeOptions()
10 |     if args.headless:
11 |         chrome_options.add_argument("--headless")
12 |         chrome_options.add_argument("--no-sandbox")
13 |         chrome_options.add_argument("--disable-dev-shm-usage")
14 |     driver = webdriver.Chrome(options=chrome_options)
15 |     return driver
16 | 
17 | 
18 | def authenticate(driver):
19 |     UAH_ACCOUNT_ID = os.getenv("UAH_ACCOUNT_ID")
20 |     UAH_USERNAME = os.getenv("UAH_USERNAME")
21 |     UAH_PASSWORD = os.getenv("UAH_PASSWORD")
22 |     driver.get("https://us-east-1.console.aws.amazon.com/console/home?region=us-east-1")
23 |     driver.find_element(By.ID, "iam_user_radio_button").click()
24 |     time.sleep(1)
25 |     driver.find_element(By.ID, "resolving_input").send_keys(UAH_ACCOUNT_ID)
26 |     time.sleep(1)
27 |     driver.find_element(By.ID, "resolving_input").send_keys(Keys.RETURN)
28 |     time.sleep(1)
29 |     driver.find_element(By.ID, "username").send_keys(UAH_USERNAME)
30 |     time.sleep(1)
31 |     driver.find_element(By.ID, "password").send_keys(UAH_PASSWORD)
32 |     time.sleep(1)
33 |     driver.find_element(By.ID, "password").send_keys(Keys.RETURN)
34 |     time.sleep(3)
35 |     return driver
36 | 


--------------------------------------------------------------------------------
/undocumented-aws-api-hunter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os, datetime
  3 | import argparse, logging, sys
  4 | 
  5 | from selenium import webdriver
  6 | from selenium.webdriver.common.keys import Keys
  7 | from selenium.webdriver.common.by import By
  8 | 
  9 | import selenium_driver
 10 | import aws_connector
 11 | 
 12 | MODEL_DIR = "./models"
 13 | LOG_DIR = "./logs"
 14 | ENDPOINTS_DIR = "./endpoints"
 15 | 
 16 | 
 17 | def main(args):
 18 |     # In case this is a single query
 19 |     if args.single:
 20 |         js_content = aws_connector.fetch_service_model(args.single)
 21 |         aws_connector.parse_service_model(js_content, args.single, True, MODEL_DIR)
 22 |         exit()
 23 | 
 24 |     driver = selenium_driver.create_driver(args)
 25 |     driver = selenium_driver.authenticate(driver)
 26 | 
 27 |     aws_services = aws_connector.fetch_services()
 28 | 
 29 |     endpoints = load_endpoints()
 30 | 
 31 |     for service in aws_services:
 32 |         queried_javascript = set()
 33 |         url = aws_connector.process_url(service)
 34 |         if url is None:
 35 |             continue
 36 | 
 37 |         driver.get(url)
 38 | 
 39 |         endpoints = endpoints.union(aws_connector.parse_endpoints(driver.page_source))
 40 |         javascript = aws_connector.find_javascript_urls(driver.page_source)
 41 |         for script in javascript:
 42 |             if script not in queried_javascript:
 43 |                 js_content = aws_connector.fetch_service_model(script)
 44 |                 if js_content is None:
 45 |                     continue
 46 | 
 47 |                 aws_connector.parse_service_model(js_content, script, True, MODEL_DIR)
 48 |                 queried_javascript.add(script)
 49 |     
 50 |     with open(f"{ENDPOINTS_DIR}/endpoints.txt", 'w') as w:
 51 |         for item in endpoints:
 52 |             w.write(f"{item}\n")
 53 | 
 54 | 
 55 | def load_endpoints():
 56 |     to_return = set()
 57 |     with open(f"{ENDPOINTS_DIR}/endpoints.txt", 'r') as r:
 58 |         for url in r:
 59 |             to_return.add(url.strip())
 60 |     return to_return
 61 | 
 62 | 
 63 | def initialize(args):
 64 |     # Check for a local models directory
 65 |     if not os.path.isdir(MODEL_DIR):
 66 |         os.mkdir(MODEL_DIR)
 67 |     #if not os.path.isdir("./incomplete"):
 68 |     #    os.mkdir("./incomplete")
 69 |     if not os.path.isdir(ENDPOINTS_DIR):
 70 |         os.mkdir(ENDPOINTS_DIR)
 71 |     if not os.path.isfile(f"{ENDPOINTS_DIR}/endpoints.txt"):
 72 |         open(f"{ENDPOINTS_DIR}/endpoints.txt", 'w').close()
 73 | 
 74 |     # Check needed environment variables
 75 |     env_vars = ["UAH_ACCOUNT_ID", "UAH_USERNAME", "UAH_PASSWORD"]
 76 |     for env_var in env_vars:
 77 |         # TODO: Fix this below
 78 |         if env_var not in os.environ:
 79 |             print(f"[!] Mising environment variable: {env_var}")
 80 |             print(f"[-] Terminating")
 81 |             exit()
 82 | 
 83 |     # Configure logging
 84 |     if not os.path.isdir(LOG_DIR):
 85 |         os.mkdir(LOG_DIR)
 86 | 
 87 |     logging.basicConfig(
 88 |         level=logging.INFO,
 89 |         format="%(message)s",
 90 |         handlers=[
 91 |             logging.FileHandler(f"{LOG_DIR}/application.log"),
 92 |             logging.StreamHandler(sys.stdout)
 93 |         ]
 94 |     )
 95 |     logging.getLogger('selenium').setLevel(logging.CRITICAL)
 96 |     logging.getLogger('requests').setLevel(logging.CRITICAL)
 97 |     logging.getLogger('urllib3').setLevel(logging.CRITICAL)
 98 |     logging.getLogger('json').setLevel(logging.CRITICAL)
 99 |     logging.getLogger('chardet.charsetprober').setLevel(logging.CRITICAL)
100 |     logging.getLogger('chardet.universaldetector').setLevel(logging.CRITICAL)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     parser = argparse.ArgumentParser(description="Find this pesky undocumented AWS APIs with the AWS Console")
105 |     
106 |     parser.add_argument('--headless', dest='headless', action='store_true', default=False,
107 |                         help="Do not open a visible chrome window. Headless mode. (Default: False)")
108 |     parser.add_argument('--single', dest='single', action='store', type=str,
109 |                         help="Parses a single URL for its models.")
110 | 
111 |     args = parser.parse_args()
112 | 
113 |     initialize(args)
114 | 
115 |     timestamp = datetime.datetime.now() 
116 |     logging.info(f"{datetime.datetime.now()} INFO - Starting new run at {timestamp.strftime('%m/%d/%Y %H:%M:%S')}")
117 | 
118 |     main(args)
119 | 
120 |     timestamp = datetime.datetime.now()
121 |     logging.info(f"{datetime.datetime.now()} INFO - Finished run at {timestamp.strftime('%m/%d/%Y %H:%M:%S')}")
122 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # undocumented-aws-api-hunter
  2 | 
  3 | A tool to uncover, extract, and monitor undocumented AWS APIs from the AWS console. This tool was released at the the [fwd:cloudsec EU](https://fwdcloudsec.org/conference/europe/) talk, "[Hidden Among the Clouds: A Look at Undocumented AWS APIs](https://docs.google.com/presentation/d/1jJM_9KPfYZL60B56MQwQTym1H_A63abz2t_p_8Vo8MU/edit?usp=sharing)". This research has already uncovered some useful [tradecraft](https://frichetten.com/blog/undocumented-amplify-api-leak-account-id/), and even two [cross-tenant vulnerabilities](https://frichetten.com/blog/minor-cross-tenant-vulns-app-runner/), however due to the shear volume of undocumented APIs found there are likely many more. 
  4 | 
  5 | ## Table of Contents
  6 | 
  7 | - [How does it work?](#how-does-it-work)
  8 | - [Usage](#usage)
  9 | - [Output](#output)
 10 | - [How to Build Docker Container](#how-to-build-docker-container)
 11 | - [Manual Installation/Usage](#manual-installationusage)
 12 | - [Scripts (generate stats)](#scripts-generate-stats)
 13 |   - [Undocumented parameters are only compared at top level](#undocumented-parameters-are-only-compared-at-top-level)
 14 | - [Author](#author)
 15 | 
 16 | ## How does it work?
 17 | 
 18 | The undocumented-aws-api-hunter uses [Selenium](https://www.selenium.dev/) to pilot a headless instance of [Google Chrome](https://www.google.com/chrome/) and crawl the AWS console. It starts by signing into the console using an IAM user. Next, it will extract the service pages from the search bar of the console. It will then visit each of these pages and search the JavaScript being loaded on each page for AWS service models. Once it finds a model, it will store it.    
 19 | 
 20 | ![364505916-476d7532-a6e4-491a-843c-33704819135b](https://github.com/user-attachments/assets/8133dd16-b41d-4610-a2c2-4ee3d9f9ab04)
 21 | 
 22 | the undocumented-aws-api-hunter will deduplicate models and only store shapes, operations, and other information that is net-new. Subsequent runs of the undocumented API hunter can add new data to the extracted models. The tool will also mark the location (`download_location`) where a model or operation was downloaded from. This provides transparency and evidence of where models were found. 
 23 | 
 24 | For an example extracted dataset, please see [here](https://github.com/frichetten/aws-api-models).
 25 | 
 26 | > [!WARNING]
 27 | > From some nominal testing it appears that this tool works on M series Macs, however be aware that because this tool uses [Selenium](https://www.selenium.dev/) and hence, [Google Chrome](https://www.google.com/chrome/), there may be some funkyness on non-x86-64 machines. If you'd like to run this in production it would be best to do so on an x86 Linux machine. 
 28 | 
 29 | ## Usage
 30 | 
 31 | Please create an IAM user in your account with console access. Then create a `.env` with the following environment variables with the associated info: `UAH_USERNAME`, `UAH_PASSWORD`, and `UAH_ACCOUNT_ID`. With those variables set you can run the tool. This user must **NOT** have any permissions. If they have any IAM policies granting permissions it runs the risk of the automation accidentally invoking something.
 32 | 
 33 | Run the container with the following:
 34 | 
 35 | ```
 36 | docker run -it --rm -v ${PWD}/models:/app/models -v ${PWD}/logs:/app/logs --env-file .env ghcr.io/datadog/undocumented-aws-api-hunter:latest
 37 | ```
 38 | 
 39 | ## Output
 40 | 
 41 | When running this tool a number of artifacts are created, including:
 42 | 
 43 | - Models: Models are output to `/models`. Each subsequent run of the tool should use the same model directory as the tool will deduplicate based on previous findings.
 44 | - Logs: Logs are output to `/logs/application.log`. This includes a running output of models, operations, and parameters found. This is particularly useful to monitor for new findings.
 45 | - Endpoints: AWS will often store endpoints in the HTML (yes, that is correct) of pages in the console. This tool will extract those and store them in a file called `endpoints.txt`. This can be useful for finding API endpoints for these undocumented APIs, however it is important to stress that this is not ALL endpoints. It may return a few hundred (when tens of thousands or more exist). If you're interested in finding more API endpoints, [this](https://securitylabs.datadoghq.com/articles/non-production-endpoints-as-an-attack-surface-in-aws/) method is recommended.
 46 | 
 47 | ## How to Build Docker Container
 48 | 
 49 | ```
 50 | git clone https://github.com/DataDog/undocumented-aws-api-hunter.git
 51 | ```
 52 | 
 53 | Build the Docker container:
 54 | 
 55 | ```
 56 | docker build -t undocumented-aws-api-hunter .
 57 | ```
 58 | 
 59 | ## Manual Installation/Usage
 60 | 
 61 | > [!IMPORTANT]  
 62 | > This is only neccessary if you'd like to help with development of the project. If you just want to use it you would be much better served with the Docker option above. 
 63 | 
 64 | ```
 65 | git clone https://github.com/DataDog/undocumented-aws-api-hunter.git
 66 | ```
 67 | 
 68 | Inside the directory, create a new [virtual environment](https://docs.python.org/3/library/venv.html) with the following command:
 69 | 
 70 | ```
 71 | python3 -m venv ./venv
 72 | ```
 73 | 
 74 | Activate it:
 75 | 
 76 | ```
 77 | source ./venv/bin/activate
 78 | ```
 79 | 
 80 | Install packages:
 81 | 
 82 | ```
 83 | python3 -m pip install -r requirements.txt
 84 | ```
 85 | 
 86 | Install the [ChromeDriver](https://chromedriver.chromium.org/downloads) for your operating system. This is required for Selenium. The process for this will depend on your OS so I will keep it vague. I used parts of [this](https://tecadmin.net/setup-selenium-chromedriver-on-ubuntu/) guide (once you have `chromedriver` installed you can stop. No need to complete the other steps) for a Linux machine.
 87 | 
 88 | Please create an IAM user in your account with console access. Then export the following environment variables with the associated info: `UAH_USERNAME`, `UAH_PASSWORD`, and `UAH_ACCOUNT_ID`. With those variables set you can run the tool. This user must **NOT** have any permissions. If they have any IAM policies granting permissions it runs the risk of the automation accidentally invoking something.
 89 | 
 90 | ```
 91 | ./undocumented-aws-api-hunter.py --headless
 92 | ```
 93 | 
 94 | ## Scripts (generate stats)
 95 | 
 96 | Within this repo is a scripts directory which contains some scripts for generating stats on undocumented APIs. Each stat is split into its own section to make it easier to read. As a part of generating these stats there are some gotchas/limitations that are worth noting. They are described down below.
 97 | 
 98 | ### Undocumented parameters are only compared at top level
 99 | 
100 | In AWS API models, parameters for APIs are described as "shapes". These shapes are the format by which parameters are passed to the API. Shapes can be recursive, with one shape having multiple shapes within itself (they can even reference [themselves](https://github.com/boto/botocore/blob/bc89f1540e0cbb000561a72d20de9df0e92b9f4d/botocore/data/lexv2-runtime/2020-08-07/service-2.json#L532) which is fun to debug). When we compare these shapes between the botocore library and the extracted models we only compared shapes at the top level. This knowingly undercounts how many there are because down the chain there may be sub-shapes which have different fields. 
101 | 
102 | This undercounting is intentional because properly evaluating this is a problem to be solved. The reson is that AWS' own models are not descriptive enough to acomplish this. As an example `lambda-2015-03-32:UpdateFunctionEventInvokeConfig` has the shape DestinationConfig, this has a sub-shape OnSuccess, which itself has a member for "[Destination](https://github.com/Frichetten/aws-api-models/blob/4bc7b764593d2c2b78e3f81ff8c7027bd7048e50/models/lambda-2015-03-31-rest-json.json#L4358)".
103 | 
104 | In botocore all of this is still true, however it continues on. "Destination" has a sub-member for "[DestinationArn](https://github.com/boto/botocore/blob/0ac30565017f1486b2eebf9bd90b5411f0d7f1fb/botocore/data/lambda/2015-03-31/service-2.json#L4747)". 
105 | 
106 | ![365281205-fa24b438-4f82-4571-9eeb-e96b4c89eb37](https://github.com/user-attachments/assets/ac98506a-38b2-49c8-af12-d2aa62774267)
107 | 
108 | It is not clear why the models are not the same. My working theory is that AWS uses a lot of code generation for it's models. As a result, models are often fragmented and don't always contain the full set. As a result, it's possible that we are not properly merging shapes and missing some parts of them. Regardless of the reason why, we are unable to further analyze shapes.
109 | 
110 | If you find a way to reliably (emphasis) do this, please let me know. I would love to hear about it. For now, we are only comparing the top level parameters. This has the knock-on effect of reporting fewer undocumented parameters than there actually are.
111 | 
112 | ## Author
113 | 
114 | This tool was written by [Nick Frichette](https://frichetten.com/) in his free time. To find more of his research on AWS, please see [Datadog Security Labs](https://securitylabs.datadoghq.com/).
115 | 
116 | 


--------------------------------------------------------------------------------
/aws_connector.py:
--------------------------------------------------------------------------------
  1 | import requests, logging, re, json, os
  2 | 
  3 | from datetime import datetime
  4 | 
  5 | def parse_service_model(js_content, download_location, save, MODEL_DIR):
  6 |     match1 = re.findall("(parse\('{\"version\":\"[\.0-9]*?\",.*?'\))", js_content)
  7 |     match2 = re.findall("(parse\('{\"metadata\":{\".*?'\))", js_content)
  8 |     matches = match1 + match2
  9 | 
 10 |     if len(matches) == 0:
 11 |         return
 12 | 
 13 |     for model in matches:
 14 |         # This is necessary to remove 2 trailing characters
 15 |         # and to replace some invalid JSON characters
 16 |         try:
 17 |             parsed_model = json.loads(model[7:-2].replace("\\",""))
 18 |         except json.decoder.JSONDecodeError as e:
 19 |             logging.warning(f"{datetime.now()} ERROR - Failed to parse: {model[7:-2]} from {download_location}")
 20 |             continue
 21 | 
 22 |         if 'metadata' not in parsed_model.keys():
 23 |             logging.info(f"{datetime.now()} ERROR - No metadata found - {parsed_model}")
 24 |             continue
 25 | 
 26 |         if 'operations' not in parsed_model.keys():
 27 |             logging.info(f"{datetime.now()} ERROR - No operations found - {parsed_model}")
 28 |             continue
 29 | 
 30 |         # TODO: Better handling for non-uid models (<1%)
 31 |         if "uid" not in parsed_model['metadata'].keys():
 32 |             if "serviceFullName" in parsed_model['metadata'].keys():
 33 |                 #logging.info(f"[-] No UID found - {parsed_model['metadata']['serviceFullName']}")
 34 |                 filename = f"{parsed_model['metadata']['serviceFullName']}-{parsed_model['metadata']['protocol']}"
 35 |             else:
 36 |                 logging.info(f"{datetime.now()} ERROR - No UID found - unnamed")
 37 |                 filename = "".join([item for item in parsed_model['metadata'].values() if type(item) is str])
 38 |             #_mark_download_location(parsed_model, download_location)
 39 |             #_dump_to_file(parsed_model, filename, './incomplete') 
 40 |             continue
 41 |         
 42 |         if not save:
 43 |             # Just print it
 44 |             print(json.dumps(parsed_model, indent=4))
 45 |         # Need to determine if we have this file already
 46 |         elif os.path.exists(f"{MODEL_DIR}/{parsed_model['metadata']['uid']}-{parsed_model['metadata']['protocol']}.json"):
 47 |             # Integrate
 48 |             # TODO: there are some with alternative serviceFullNames and perhaps other info
 49 |             # Need to explore if there is enough of them to have special handling here.
 50 |             filename = f"{parsed_model['metadata']['uid']}-{parsed_model['metadata']['protocol']}"
 51 |             existing_model = _load_file(filename, MODEL_DIR)
 52 | 
 53 |             # Need to mark downloads from the new one before integrating
 54 |             parsed_model = _mark_download_location(parsed_model, download_location)
 55 |             complete_model = _integrate_models(parsed_model, existing_model)
 56 |             _dump_to_file(complete_model, filename, MODEL_DIR)
 57 |         else:
 58 |             logging.info(f"{datetime.now()} INFO - New model found: {parsed_model['metadata']['uid']}")
 59 |             parsed_model = _mark_download_location(parsed_model, download_location)
 60 |             filename = f"{parsed_model['metadata']['uid']}-{parsed_model['metadata']['protocol']}"
 61 |             _dump_to_file(parsed_model, filename, MODEL_DIR)
 62 | 
 63 | 
 64 | def fetch_service_model(javascript_url):
 65 |     try:
 66 |         resp = requests.get(javascript_url, timeout=30)
 67 |     except Exception as e:
 68 |         logging.error(f"{datetime.now()} ERROR - Failed to retrieve {javascript_url} {e}")
 69 |         return None
 70 | 
 71 |     if resp.status_code != 200:
 72 |         logging.error(f"{datetime.now()} ERROR - Failed to retrieve {javascript_url}")
 73 |     return resp.text
 74 | 
 75 | 
 76 | def fetch_services():
 77 |     resp = requests.get("https://us-east-1.console.aws.amazon.com/console/home?region=us-east-1&region=us-east-1")
 78 |     # 400 is not a bug. This gives the content we want :)
 79 |     if resp.status_code != 400:
 80 |         logging.critical("[X] Failed to pull service list")
 81 |         logging.critical("[X] Exiting")
 82 |         exit()
 83 |     
 84 |     match = re.findall("name=\"awsc-mezz-data\" content='(.*?)'", resp.text)
 85 |     return json.loads(match[0])['services']
 86 | 
 87 | 
 88 | def process_url(service):
 89 |     if "url" not in service.keys():
 90 |         logging.error(f"[!] url not in keys for {service}")
 91 |         return None
 92 |     elif service['url'] is None:
 93 |         return None
 94 |     elif service['url'][0] == "/":
 95 |         return f"https://us-east-1.console.aws.amazon.com{service['url']}?region=us-east-1"
 96 |     else:
 97 |         return service['url']
 98 | 
 99 | 
100 | def parse_endpoints(driver_content):
101 |     to_return = set()
102 |     match = re.findall("(?:\w+ndpoint)&quot;\s*:\s*&quot;\s*([^&]+)", driver_content)
103 |     for item in match:
104 |         to_return.add(item)
105 |     return to_return
106 | 
107 | 
108 | def find_javascript_urls(driver_content):
109 |     match = re.findall("(https?:\/\/[\w\-._~:\/?#\[\]@!$&'()*+,;=]+\.js)", driver_content)
110 |     return match
111 | 
112 | 
113 | def _mark_download_location(model, download_location):
114 |     if 'download_location' not in model['metadata'].keys():
115 |         model['metadata']['download_location'] = [download_location]
116 |     elif download_location not in model['metadata']['download_location']:
117 |         if len(model['metadata']['download_location']) >= 25:
118 |             model['metadata']['download_location'] = model['metadata']['download_location'][:24]
119 |         model['metadata']['download_location'].append(download_location)
120 | 
121 |     for operation in model['operations']:
122 |         if 'download_location' not in model['operations'][operation].keys():
123 |             model['operations'][operation]['download_location'] = [download_location]
124 |         elif download_location not in model['operations'][operation]['download_location']:
125 |             if len(model['metadata']['download_location']) >= 25:
126 |                 model['metadata']['download_location'] = model['metadata']['download_location'][:24]
127 |             model['operations'][operation]['download_location'].append(download_location)
128 | 
129 |     return model
130 | 
131 |         
132 | def _load_file(filename, MODEL_DIR):
133 |     with open(f"{MODEL_DIR}/{filename}.json", "r") as r:
134 |         return json.load(r)
135 | 
136 | 
137 | def _dump_to_file(model, filename, MODEL_DIR):
138 |     filename = f"{MODEL_DIR}/{filename}.json"
139 |     with open(filename, "w") as w:
140 |         json.dump(model, w, indent=4)
141 | 
142 | 
143 | def _integrate_models(parsed_model, existing_model):
144 |     # First, update the download location for the metadata
145 |     if parsed_model['metadata']['download_location'][0] not in existing_model['metadata']['download_location']:
146 |         if len(existing_model['metadata']['download_location']) >= 25:
147 |             existing_model['metadata']['download_location'] = existing_model['metadata']['download_location'][1:]
148 | 
149 |         existing_model['metadata']['download_location'] += parsed_model['metadata']['download_location']
150 | 
151 |     # Next deal with operations
152 |     for operation in parsed_model['operations']:
153 |         if operation not in existing_model['operations'].keys():
154 |             logging.info(f"{datetime.now()} INFO - Adding new operation: {existing_model['metadata']['uid']}:{operation}")
155 |             existing_model['operations'][operation] = parsed_model['operations'][operation]
156 | 
157 |         else:
158 |             if len(existing_model['operations'][operation]['download_location']) >= 25:
159 |                 existing_model['operations'][operation]['download_location'] = existing_model['operations'][operation]['download_location'][1:]
160 | 
161 |             # This operation already exists, but let's update its download_location
162 |             if parsed_model['operations'][operation]['download_location'][0] not in existing_model['operations'][operation]['download_location']:
163 |                 existing_model['operations'][operation]['download_location'] += (parsed_model['operations'][operation]['download_location'])
164 | 
165 |             # This operation already exists, so let's integrate its parameters
166 |             if 'input' in parsed_model['operations'][operation].keys() and 'members' in parsed_model['operations'][operation]['input'].keys():
167 |                 for member in parsed_model['operations'][operation]['input']['members'].keys():
168 |                     # This accounts for a weird edge case when there is no `input` key
169 |                     if 'input' not in existing_model['operations'][operation].keys():
170 |                         existing_model['operations'][operation]['input'] = {}
171 |                         logging.info(f"{datetime.now()} INFO - Malformed model from AWS: {existing_model['metadata']['uid']}:{operation}")
172 | 
173 |                     if 'members' not in existing_model['operations'][operation]['input'].keys():
174 |                         existing_model['operations'][operation]['input']['members'] = {}
175 | 
176 |                     if member not in existing_model['operations'][operation]['input']['members'].keys():
177 |                         existing_model['operations'][operation]['input']['members'][member] = parsed_model['operations'][operation]['input']['members'][member]
178 |                         logging.info(f"{datetime.now()} INFO - Adding new input parameter: {existing_model['metadata']['uid']}:{operation}:{member}")
179 | 
180 |     # Now add new shapes
181 |     for shape in parsed_model['shapes']:
182 |         if shape not in existing_model['shapes'].keys():
183 |             existing_model['shapes'][shape] = parsed_model['shapes'][shape]
184 |     
185 |     return existing_model
186 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/scripts/count_undoc_apis.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """This script will compare results found in the AWS Console to the botocore dataset.
  4 |    Doing this may reveal API actions that are undocumented."""
  5 | 
  6 | # NOTE: I intentionally remove some botocore data that does not have a UID. 
  7 | # This is to make things easier but it may have an attack surface. Go back and review
  8 | 
  9 | import os, json, sys
 10 | 
 11 | if len(sys.argv) < 3:
 12 |     print(f"Usage: ./count_undoc_apis.py <botocore path> <models paths>")
 13 |     exit()
 14 | 
 15 | if "botocore" not in os.listdir("."):
 16 |     print(f"Error! Please download botocore locally")
 17 |     exit()
 18 | 
 19 | BOTOCORE_MODELS = f"{os.path.expanduser(sys.argv[1])}/botocore/data"
 20 | MODELS_DIR = os.path.expanduser(sys.argv[2])
 21 | botocore = {}
 22 | 
 23 | # Slurp all botocore models into memory
 24 | # with `uid` as the primary key
 25 | for service in os.listdir(BOTOCORE_MODELS):
 26 |     if not os.path.isdir(f"{BOTOCORE_MODELS}/{service}"):
 27 |         continue
 28 | 
 29 |     for version in os.listdir(f"{BOTOCORE_MODELS}/{service}/"):
 30 |         if not os.path.isdir(f"{BOTOCORE_MODELS}/{service}/{version}"):
 31 |             continue
 32 | 
 33 |         if not os.path.exists(f"{BOTOCORE_MODELS}/{service}/{version}/service-2.json"):
 34 |             continue
 35 | 
 36 |         with open(f"{BOTOCORE_MODELS}/{service}/{version}/service-2.json", "r") as r:
 37 |             data = json.load(r)
 38 |             if 'uid' not in data['metadata'].keys():
 39 |                 continue
 40 |             botocore[data['metadata']['uid']] = data
 41 | 
 42 | # Search through all crawled model definitions and compare to botocore
 43 | # If something is not in botocore, alert
 44 | 
 45 | extracted = {}
 46 | modelfiles = os.listdir(MODELS_DIR)
 47 | for file in modelfiles:
 48 |     with open(f"{MODELS_DIR}/{file}", "r") as r:
 49 |         data = json.load(r)
 50 |         if 'uid' not in data['metadata'].keys():
 51 |             continue
 52 |         extracted[data['metadata']['uid']] = data
 53 | 
 54 | ######################################################################################
 55 | # Count undocumented services
 56 | # Methodology: This should be simple, we check the uid of each model and split on the first -20 or -19. 
 57 | # For example, SSO-2017-11-28 would be SSO. cleanrooms-2022-02-17 would be cleanrooms. giraffe-1986-04-08 
 58 | # would be giraffe. This will help separate the service from the version.
 59 | botocore_services = set()
 60 | for service_name, service in botocore.items():
 61 |     name = service['metadata']['uid'].split("-20")[0].split("-19")[0]
 62 |     botocore_services.add(name)
 63 | 
 64 | extracted_services = set()
 65 | for service_name, service in extracted.items():
 66 |     name = service['metadata']['uid'].split("-20")[0].split("-19")[0]
 67 |     extracted_services.add(name)
 68 | 
 69 | #print("Finding undocumented services")
 70 | undocumented_services_count = 0
 71 | for service in extracted_services:
 72 |     if service not in botocore_services:
 73 |         #print(service)
 74 |         undocumented_services_count += 1
 75 | print(f"Undocumented services: {undocumented_services_count}")
 76 | 
 77 | 
 78 | ######################################################################################
 79 | # Count undocumented versions of documented services
 80 | # Methodology: This will be slightly more complicated as we need to first check if a service 
 81 | # is documented and if it is, we then need to see if the version is undocumented.
 82 | botocore_services = []
 83 | for service_name, service in botocore.items():
 84 |     name = service['metadata']['uid']
 85 |     botocore_services.append(name)
 86 | 
 87 | extracted_services = []
 88 | for service_name, service in extracted.items():
 89 |     name = service['metadata']['uid']
 90 |     extracted_services.append(name)
 91 | 
 92 | #print("Finding undocumented service versions for documented services")
 93 | undocumented_service_versions_count = 0
 94 | for service in extracted_services:
 95 |     name = service.split("-20")[0].split("-19")[0]
 96 |     found = any(name in substring for substring in botocore_services)
 97 |     if not found:
 98 |         continue
 99 | 
100 |     if service not in botocore_services:
101 |         #print(service)
102 |         undocumented_service_versions_count += 1
103 | print(f"Undocumented service versions for documented services: {undocumented_service_versions_count}")
104 | 
105 | 
106 | 
107 | ######################################################################################
108 | # Count undocumented parameters for documented actions
109 | # Methodology: Substantially more complex than previous. We compare services by their UID 
110 | # and by their actions. If an extracted model has a parameter that the documented model does not have, 
111 | # we count that as an undocumented parameter. I know the code is rough below. When it comes to parameters 
112 | # the extracted model file format differs from the documented models. This means I can't use the same 
113 | # recursive function. I have taken steps to double check this output to make sure nothing is amiss.
114 | 
115 | # VERY IMPORTANT: This does NOT enumerate all sub-parameters. You cannot (realistically) do this with the current model format.
116 | # As an example lambda-2015-03-32:UpdateFunctionEventInvokeConfig has the parameter DestinationConfig, this has a 
117 | # sub-parameter OnSuccess, which itself has a member for "Destination".
118 | # https://github.com/Frichetten/aws-api-models/blob/4bc7b764593d2c2b78e3f81ff8c7027bd7048e50/models/lambda-2015-03-31-rest-json.json#L4358
119 | # In botocore all of this is still true, however it continues on. "Destination" has a sub-member for "DestinationArn"
120 | # https://github.com/boto/botocore/blob/0ac30565017f1486b2eebf9bd90b5411f0d7f1fb/botocore/data/lambda/2015-03-31/service-2.json#L4747
121 | # Because of these model differences we can never reconcile this.
122 | 
123 | # If you find a way to reliably (emphasis) do this, please let me know. I would love to hear about it.
124 | # For now, we are only comparing the top level parameters. This has the knock-on effect of reporting 
125 | # fewer undocumented parameters than there actually are.
126 | 
127 | # Below you will find find_shape and find_member. These are recursive functions left over from when I was trying to find 
128 | # all sub-parameters. They are not used in the final version of this script. I've kept them here for reference if 
129 | # someone (or future me) wants to try and tackle this problem again.
130 | botocore_data = {}
131 | for service_name, service in botocore.items():
132 |     name = service['metadata']['uid']
133 |     botocore_data[name] = service
134 | 
135 | extracted_data = {}
136 | for service_name, service in extracted.items():
137 |     name = service['metadata']['uid']
138 |     extracted_data[name] = service
139 | 
140 | # This function will be used recursively to find the parameter shapes
141 | def find_shape(model, shape_name, previous_shape):
142 |     flatlist = []
143 | 
144 |     if model['shapes'][shape_name]['type'] == "structure":
145 |         for member in model['shapes'][shape_name]['members'].keys():
146 |             # To prevent infinite recursion scenarios, break out here
147 |             # Example: https://github.com/boto/botocore/blob/bc89f1540e0cbb000561a72d20de9df0e92b9f4d/botocore/data/lexv2-runtime/2020-08-07/service-2.json#L532
148 |             if shape_name == model['shapes'][shape_name]['members'][member]['shape']:
149 |                 continue
150 | 
151 |             flatlist += find_shape(model, model['shapes'][shape_name]['members'][member]['shape'], member)
152 |     else:
153 |         flatlist.append(previous_shape)
154 | 
155 |     return flatlist
156 | 
157 | 
158 | def find_member(model, shape_name, previous_shape):
159 |     flatlist = []
160 | 
161 |     if model['shapes'][shape_name]['type'] == "structure":
162 |         for member in model['shapes'][shape_name]['members'].keys():
163 |             if 'shape' in model['shapes'][shape_name]['members'][member].keys():
164 |                 # Same anti-recursion check
165 |                 if shape_name == model['shapes'][shape_name]['members'][member]['shape']:
166 |                     continue
167 |                 flatlist += find_member(model, model['shapes'][shape_name]['members'][member]['shape'], member)
168 | 
169 |             elif "type" in model['shapes'][shape_name]['members'][member].keys() and model['shapes'][shape_name]['members'][member]['type'] == "structure":
170 |                 for submember in model['shapes'][shape_name]['members'][member]['members'].keys():
171 |                     flatlist.append(submember)
172 | 
173 |             else:
174 |                 flatlist.append(member)
175 |     else:
176 |         flatlist.append(previous_shape)
177 | 
178 |     return flatlist
179 | 
180 | #print("Finding undocumented parameters for documented actions")
181 | undocumented_parameters_count = 0
182 | # Note we iterate the botocore data because we are only interested in documented services
183 | for service_name, service in botocore_data.items():
184 |     #if service_name != "lambda-2015-03-31":
185 |     #    continue
186 |     if service_name not in extracted_data.keys():
187 |         continue
188 | 
189 |     for operation_name, operation in service['operations'].items():
190 |         #if operation_name != "UpdateFunctionConfiguration":
191 |         #    continue
192 |         if 'input' not in operation.keys():
193 |             continue
194 |         if operation_name not in extracted_data[service_name]['operations'].keys():
195 |             continue
196 |         if 'input' not in extracted_data[service_name]['operations'][operation_name].keys():
197 |             continue
198 |         if 'members' not in extracted_data[service_name]['operations'][operation_name]['input'].keys():
199 |             continue
200 | 
201 |         #botocore_params = find_shape(service, operation['input']['shape'], "")
202 |         #print(botocore_params)
203 |         botocore_params = []
204 |         for param_name, param_value in service['shapes'][operation['input']['shape']]['members'].items():
205 |             botocore_params.append(param_name)
206 | 
207 |         extracted_params = []
208 |         for param_name, param_value in extracted_data[service_name]['operations'][operation_name]['input']['members'].items():
209 |             extracted_params.append(param_name)
210 |         #for param_name, param_value in extracted_data[service_name]['operations'][operation_name]['input']['members'].items():
211 |         #    if "shape" in param_value.keys():
212 |         #        recursive_params = find_member(extracted_data[service_name], param_value['shape'], param_name)
213 |         #        extracted_params += recursive_params
214 |         #    else:
215 |         #        # There are 2 scenarios. The first is that the parameter is a structure. The second is that it is a simple type.
216 |         #        # If it is a structure, we need to extract the members. If it is a simple type, we can just add it to the list.
217 |         #        # If you need a good example of this, check out the "PutBotAlias" action in the "lex-models-2017-04-19" service.
218 |         #        if "type" in param_value.keys() and param_value['type'] == "structure":
219 |         #            for member in param_value['members'].keys():
220 |         #                extracted_params.append(member)
221 |         #        else:
222 |         #            extracted_params.append(param_name)
223 |         #extracted_params = set(extracted_params)
224 |         #print(extracted_params)
225 |             
226 |         for param in extracted_params:
227 |             if param not in botocore_params:
228 |                 #print(f"{service_name}:{operation_name}:{param}")
229 |                 undocumented_parameters_count += 1
230 | print(f"Undocumented parameters for documented actions: {undocumented_parameters_count}")
231 | 
232 | 
233 | 
234 | ######################################################################################
235 | # Count undocumented actions of documented services
236 | # Methodology: Slightly less complex than previous. We compare services by their UID. 
237 | # If an extracted model has an action that the documented model does not have, we count that as an undocumented action.
238 | botocore_actions = {}
239 | for service_name, service in botocore.items():
240 |     name = service['metadata']['uid']
241 |     botocore_actions[name] = service['operations']
242 | 
243 | extracted_actions = {}
244 | for service_name, service in extracted.items():
245 |     name = service['metadata']['uid']
246 |     extracted_actions[name] = service['operations']
247 | 
248 | #print("Finding undocumented actions for documented services")
249 | undocumented_actions_count = 0
250 | for service_name, operations in extracted_actions.items():
251 |     if service_name not in botocore_actions.keys():
252 |         continue
253 | 
254 |     for operation in operations.keys():
255 |         if operation in botocore_actions[service_name].keys():
256 |             continue
257 | 
258 |         #print(f"{service_name}:{operation}")
259 |         undocumented_actions_count += 1
260 | print(f"Undocumented actions for documented services: {undocumented_actions_count}")
261 | 
262 | 
263 | 
264 | ######################################################################################
265 | # Count all undocumented actions for undocumented services
266 | # Methodology: This is the easiest. If the service itself is not documented, all of its actions are undocumented.
267 | # We simply iterate through all of our extracted services and count the actions.
268 | botocore_actions = {}
269 | for service_name, service in botocore.items():
270 |     name = service['metadata']['uid']
271 |     botocore_actions[name] = service['operations']
272 | 
273 | extracted_actions = {}
274 | for service_name, service in extracted.items():
275 |     name = service['metadata']['uid']
276 |     extracted_actions[name] = service['operations']
277 | 
278 | #print("Finding undocumented actions for undocumented services")
279 | undocumented_actions_count = 0
280 | for service_name, operations in extracted_actions.items():
281 |     if service_name in botocore_actions.keys():
282 |         continue
283 | 
284 |     for operation in operations.keys():
285 |         #print(f"{service_name}:{operation}")
286 |         undocumented_actions_count += 1
287 | 
288 | print(f"Undocumented actions for undocumented services: {undocumented_actions_count}")
289 | 
290 | 


--------------------------------------------------------------------------------