├── tests └── test_dummy.py ├── tox.ini ├── .travis.yml ├── .gitignore ├── MANIFEST.in ├── requirements.txt ├── README.md ├── LICENSE ├── setup.py └── aws_marketplace_ubuntu_scraper.py /tests/test_dummy.py: -------------------------------------------------------------------------------- 1 | def test_dummy(): 2 | assert True 3 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3 3 | 4 | [testenv] 5 | commands = pytest 6 | deps = pytest 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3" 4 | # command to install dependencies 5 | install: 6 | - pip install -r requirements.txt 7 | # command to run tests 8 | script: 9 | - pytest tests 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | venv/ 4 | .cache/ 5 | /.eggs/ 6 | /.idea/ 7 | /.tox/ 8 | /__pycache__/ 9 | /aws_marketplace_ubuntu_scraper.egg-info/ 10 | /build/ 11 | /dist/ 12 | /geckodriver.log 13 | *-getQuickstartList.json -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include requirements.txt 4 | include aws_marketplace_ubuntu_scraper.py 5 | 6 | 7 | recursive-exclude tests * 8 | recursive-exclude * __pycache__ 9 | recursive-exclude * *.py[co] 10 | 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Run pip install --requirement=requirements.txt to install all requirements 2 | 3 | boto3 4 | click 5 | pytest 6 | selenium==3.141.0 7 | selenium-wire==1.0.11 8 | requests 9 | beautifulsoup4 10 | joblib 11 | prettytable 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS Marketplace Ubuntu AMI Scraper 2 | 3 | CLI to return the Ubuntu AMIs in AWS marketplace 4 | 5 | ## Basic setup 6 | 7 | Install from [PyPi](https://pypi.org/project/aws-marketplace-ubuntu-scraper/) 8 | 9 | ``` 10 | $ pip install aws-marketplace-ubuntu-scraper 11 | ``` 12 | 13 | ... OR ... 14 | 15 | Install the requirements manually: 16 | ``` 17 | $ pip install -r requirements.txt 18 | ``` 19 | 20 | You will also need [Firefox](https://www.mozilla.org/en-US/firefox/new/) installed and [geckodriver](https://github.com/mozilla/geckodriver/releases) available in your PATH. 21 | 22 | For running the `quicklaunch-report`, you also need the `simplestreams` snap installed: 23 | 24 | ``` 25 | snap install simplestreams 26 | ``` 27 | 28 | I recommend you create a new IAM user with no permissions granted. 29 | Ensure that you have opted in to all the AWS regions that are opt in only and 30 | that you want quicklaunch listings for. 31 | 32 | You will also need to set up your aws credentials for use with [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) 33 | 34 | Run the application: 35 | ``` 36 | $ python -m aws_marketplace_ubuntu_scraper --help 37 | 38 | # Print details of the Ubuntu quicklaunch entries for each region 39 | $ python -m aws_marketplace_ubuntu_scraper quicklaunch --iam-account-id="YOUR IAM ACCOUNT ID" --iam-username="YOUR IAM USERNAME" --iam-password="YOUR IAM PASSSWORD" 40 | 41 | # Print details of the Ubuntu marketplace listings 42 | $ python -m aws_marketplace_ubuntu_scraper marketplace 43 | 44 | ``` 45 | 46 | To run the tests: 47 | ``` 48 | $ pytest 49 | ``` 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | GNU GENERAL PUBLIC LICENSE 3 | Version 3, 29 June 2007 4 | 5 | CLI to return the Ubuntu AMIs in AWS marketplace 6 | Copyright (C) 2019 Philip Roche 7 | 8 | This program is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | This program is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with this program. If not, see . 20 | 21 | Also add information on how to contact you by electronic and paper mail. 22 | 23 | You should also get your employer (if you work as a programmer) or school, 24 | if any, to sign a "copyright disclaimer" for the program, if necessary. 25 | For more information on this, and how to apply and follow the GNU GPL, see 26 | . 27 | 28 | The GNU General Public License does not permit incorporating your program 29 | into proprietary programs. If your program is a subroutine library, you 30 | may consider it more useful to permit linking proprietary applications with 31 | the library. If this is what you want to do, use the GNU Lesser General 32 | Public License instead of this License. But first, please read 33 | . 34 | 35 | 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | 5 | from glob import glob 6 | from os.path import basename 7 | from os.path import splitext 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | 11 | 12 | def read(filename): 13 | filename = os.path.join(os.path.dirname(__file__), filename) 14 | text_type = type(u"") 15 | with io.open(filename, mode="r", encoding='utf-8') as fd: 16 | return re.sub(text_type(r':[a-z]+:`~?(.*?)`'), text_type(r'``\1``'), fd.read()) 17 | 18 | 19 | reqs_path = os.path.join(os.path.dirname(__file__), 'requirements.txt') 20 | 21 | with open(reqs_path, 'r') as req_file: 22 | dependencies = req_file.readlines() 23 | 24 | 25 | setup( 26 | name="aws_marketplace_ubuntu_scraper", 27 | version="0.0.8", 28 | url="https://github.com/CanonicalLtd/aws-marketplace-ubuntu-scraper", 29 | license='GPLv3', 30 | author="Philip Roche", 31 | author_email="phil.roche@canonical.com", 32 | description="CLI to return the Ubuntu AMIs in AWS marketplace", 33 | long_description=read("README.md"), 34 | long_description_content_type="text/markdown", 35 | packages=find_packages(exclude=('tests',)), 36 | py_modules=[splitext(basename(path))[0] for path in glob('*.py')], 37 | install_requires=dependencies, 38 | setup_requires=['wheel'], 39 | classifiers=[ 40 | 'Development Status :: 2 - Pre-Alpha', 41 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 42 | 'Programming Language :: Python', 43 | 'Programming Language :: Python :: 3', 44 | 'Programming Language :: Python :: 3.4', 45 | 'Programming Language :: Python :: 3.5', 46 | 'Programming Language :: Python :: 3.6', 47 | 'Programming Language :: Python :: 3.7', 48 | ], 49 | entry_points={ 50 | 'console_scripts': [ 51 | 'aws-marketplace-ubuntu-scraper = ' 52 | 'aws_marketplace_ubuntu_scraper:main', 53 | ], 54 | }, 55 | ) 56 | -------------------------------------------------------------------------------- /aws_marketplace_ubuntu_scraper.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import subprocess 4 | import sys 5 | import time 6 | 7 | import boto3 8 | import click 9 | import requests 10 | 11 | from botocore.exceptions import ClientError as botocoreClientError 12 | from bs4 import BeautifulSoup 13 | from joblib import Parallel, delayed 14 | from selenium.webdriver.common.by import By 15 | from selenium.webdriver.support.ui import WebDriverWait 16 | from selenium.webdriver.support import expected_conditions as EC 17 | from selenium.webdriver.firefox.options import Options 18 | from selenium.common.exceptions import TimeoutException as SeleniumTimeoutException 19 | from seleniumwire import webdriver 20 | 21 | CANONICAL_OWNER = "099720109477" 22 | AWS_UBUNTU_PRO_OWNER_ALIAS = "aws-marketplace" 23 | AWS_UBUNTU_DEEP_LEARNING_OWNER_ALIAS = "amazon" 24 | CANONICAL_MARKETPLACE_PROFILE = "565feec9-3d43-413e-9760-c651546613f2" 25 | 26 | 27 | def get_regions(account_id, username, password, headless, only_regions): 28 | # region_dict = {"name": "US East", "location": "N. Virginia", "id": "us-east-1" } 29 | # return [region_dict] 30 | # region_dict = {"name": "Asia Pacific", "location": "Seoul", "id": "ap-northeast-2"} 31 | # return [region_dict] 32 | # region_dict = {"name": "Europe", "location": "Ireland", 33 | # "id": "eu-west-1"} 34 | # return [region_dict] 35 | driver_options = Options() 36 | driver_options.headless = headless 37 | driver = webdriver.Firefox(options=driver_options) 38 | wait = WebDriverWait(driver, 10) 39 | driver.get("https://{}.signin.aws.amazon.com/console".format(account_id)) 40 | username_element = driver.find_element_by_id("username") 41 | username_element.send_keys(username) 42 | password_element = driver.find_element_by_id("password") 43 | password_element.send_keys(password) 44 | driver.find_element_by_id("signin_button").click() 45 | wait.until(lambda driver: driver.find_element_by_name("awsc-mezz-data")) 46 | region_list_element = driver.find_element_by_name("awsc-mezz-data") 47 | region_list_str = region_list_element.get_attribute("content") 48 | region_list = json.loads(region_list_str)["regions"] 49 | 50 | driver.delete_all_cookies() 51 | driver.close() 52 | driver.quit() 53 | if only_regions: 54 | return [reg for reg in region_list if reg['id'] in only_regions] 55 | return region_list 56 | 57 | 58 | @click.command() 59 | @click.option( 60 | "--iam-account-id", 61 | envvar="IAM_ACCOUNT_ID", 62 | required=True, 63 | help="IAM User account ID", 64 | ) 65 | @click.option( 66 | "--iam-username", envvar="IAM_USERNAME", required=True, help="IAM username" 67 | ) 68 | @click.option( 69 | "--iam-password", envvar="IAM_PASSWORD", required=True, help="IAM User account ID" 70 | ) 71 | @click.option( 72 | "--headless/--no-headless", 73 | default=True, 74 | help="Use selenium in headless mode to avoid Firefox browser opening", 75 | ) 76 | @click.option( 77 | "--parallel/--no-parallel", default=True, help="Query regions in parallel.", 78 | ) 79 | @click.option( 80 | "--only-regions", multiple=True, default=[] 81 | ) 82 | def quicklaunch(iam_account_id, iam_username, iam_password, headless, parallel, only_regions): 83 | region_dict_list = get_regions(iam_account_id, iam_username, iam_password, headless, only_regions) 84 | driver_options = Options() 85 | driver_options.headless = headless 86 | 87 | def scrape_quicklaunch_regions(region_dict): 88 | def get_ami_details(region_client, ami, quickstart_slot, ami_id): 89 | # Get the ami details 90 | resp = region_client.describe_images( 91 | Filters=[{"Name": "image-id", "Values": [ami_id]}], 92 | ) 93 | resp_len = len(resp.get("Images", [])) 94 | if resp_len: 95 | image = resp["Images"][0] 96 | image_owner = image.get("ImageOwnerAlias", image.get("OwnerId")) 97 | name_regex = None 98 | if image_owner == CANONICAL_OWNER: 99 | image_owner = "Canonical" 100 | # This is a Canonical AMI 101 | name_regex = ( 102 | r"ubuntu/images(-(?P[\w-]+))?/" 103 | r"((?P\w+(-\w+)?)/)?" 104 | r"ubuntu-(?P\w+)-" 105 | r"((?P\d\d\.\d\d)-)?" 106 | r"((?P\w+)-)?" 107 | r"(?P\w+)-server-" 108 | r"(?P\d+(\.\d{1,2})?)" 109 | r"(\-(?P\w+))?" 110 | ) 111 | elif image_owner == AWS_UBUNTU_PRO_OWNER_ALIAS: 112 | # This is an AWS Ubuntu AMI - used for Ubuntu Pro listings 113 | # trusty-ua-tools-20191128-d984c693-feaa-4be0-bc34-2099410bc9cc-ami-075ab031d5a3404c6.4 114 | name_regex = ( 115 | r".*?" 116 | r"(?P\d+(\.\d{1,2})?)" 117 | r"-.*?-" 118 | r"(?Pami-\w+).*?" 119 | ) 120 | elif image_owner == AWS_UBUNTU_DEEP_LEARNING_OWNER_ALIAS: 121 | # This is an AWS Ubuntu AMI - used for 122 | # Ubuntu Deep learning and SQL server listings 123 | # trusty-ua-tools-20191128-d984c693-feaa-4be0-bc34-2099410bc9cc-ami-075ab031d5a3404c6.4 124 | # ubuntu-xenial-16.04-amd64-server-20190212-SQL_2017_Standard-2019.04.02 125 | name_regex = ( 126 | r"ubuntu-(?P\w+)-" 127 | r"((?P\d\d\.\d\d)-)?" 128 | r"(?P\w+)-server-" 129 | r"(?P\d+(\.\d{1,2})?)" 130 | r"-.*?" 131 | ) 132 | if name_regex: 133 | ami["quickstart_slot"] = quickstart_slot 134 | ami["ami_id"] = ami_id 135 | ami["owner"] = image_owner 136 | name = image["Name"] 137 | match = re.match(name_regex, name) 138 | if match: 139 | attrs = match.groupdict() 140 | for key, value in attrs.items(): 141 | ami[key] = value 142 | return ami 143 | else: 144 | return None 145 | else: 146 | return None 147 | 148 | region_identifier = region_dict["id"] 149 | print("scraping {} ...".format(region_identifier)) 150 | region_session = boto3.Session(region_name=region_identifier) 151 | region_client = region_session.client("ec2") 152 | ubuntu_quick_start_listings = [] 153 | driver = webdriver.Firefox(options=driver_options) 154 | try: 155 | wait = WebDriverWait(driver, 20) 156 | driver.get( 157 | "https://{}.signin.aws.amazon.com/console?region={}".format(iam_account_id, region_identifier) 158 | ) 159 | wait.until(lambda driver: driver.find_element_by_id("username")) 160 | username_element = driver.find_element_by_id("username") 161 | username_element.send_keys(iam_username) 162 | password_element = driver.find_element_by_id("password") 163 | password_element.send_keys(iam_password) 164 | driver.find_element_by_id("signin_button").click() 165 | 166 | wait.until(EC.element_to_be_clickable((By.ID, 'EC2_LAUNCH_WIZARD'))) 167 | driver.find_element(By.ID, "EC2_LAUNCH_WIZARD").click() 168 | 169 | wait.until( 170 | lambda driver: driver.find_element_by_xpath( 171 | '//iframe[@id="instance-lx-gwt-frame"]' 172 | ) 173 | ) 174 | dashboard_iframe = driver.find_element_by_xpath( 175 | '//iframe[@id="instance-lx-gwt-frame"]' 176 | ) 177 | driver.switch_to.frame(dashboard_iframe) 178 | 179 | wait.until( 180 | lambda driver: driver.find_element_by_id( 181 | "gwt-debug-tab-QUICKSTART_AMIS" 182 | ) 183 | ) 184 | driver.find_element_by_id("gwt-debug-tab-QUICKSTART_AMIS").click() 185 | wait.until( 186 | lambda driver: driver.find_element_by_id( 187 | "gwt-debug-tab-QUICKSTART_AMIS" 188 | ) 189 | ) 190 | wait.until( 191 | lambda driver: driver.find_element_by_id("gwt-debug-paginatorLabel") 192 | ) 193 | # wait until JSON request is complete loads. 194 | # 3 seconds seems to be enough for all regions 195 | print("{} - Querying quickstart list".format(region_identifier)) 196 | time.sleep(3) 197 | for request in list(driver.requests): 198 | if "call=getQuickstartList" in request.path and request.response: 199 | region_quickstart_entries = json.loads(request.response.body) 200 | with open( 201 | "{}-getQuickstartList.json".format(region_identifier), "w" 202 | ) as outfile: 203 | json.dump(region_quickstart_entries, outfile, indent=4) 204 | 205 | quickstart_slot = 0 206 | for ami in region_quickstart_entries["amiList"]: 207 | quickstart_slot = quickstart_slot + 1 208 | if ami["platform"] == "ubuntu": 209 | if ami.get("imageId64", None): 210 | print( 211 | "{} - Querying ami details for AMD64 AMI {}".format( 212 | region_identifier, ami.get("imageId64") 213 | ) 214 | ) 215 | canonical_amd64_ami = get_ami_details( 216 | region_client, 217 | ami.copy(), 218 | quickstart_slot, 219 | ami.get("imageId64"), 220 | ) 221 | if canonical_amd64_ami: 222 | canonical_amd64_ami["listing_arch"] = "amd64" 223 | ubuntu_quick_start_listings.append( 224 | canonical_amd64_ami 225 | ) 226 | 227 | if ami.get("imageIdArm64", None): 228 | print( 229 | "{} - Querying ami details for ARM64 AMI {}".format( 230 | region_identifier, ami.get("imageIdArm64") 231 | ) 232 | ) 233 | canonical_arm64_ami = get_ami_details( 234 | region_client, 235 | ami.copy(), 236 | quickstart_slot, 237 | ami.get("imageIdArm64"), 238 | ) 239 | if canonical_arm64_ami: 240 | canonical_arm64_ami["listing_arch"] = "arm64" 241 | ubuntu_quick_start_listings.append( 242 | canonical_arm64_ami 243 | ) 244 | 245 | # We only need one list so we can break here 246 | break 247 | except SeleniumTimeoutException as ste: 248 | print( 249 | "SeleniumTimeoutException encountered when querying region {} ".format( 250 | region_identifier 251 | ) 252 | ) 253 | print(ste.msg) 254 | except botocoreClientError as bce: 255 | print( 256 | "botocoreClientError encountered when AMI for region {} ".format( 257 | region_identifier 258 | ) 259 | ) 260 | print(bce) 261 | finally: 262 | driver.delete_all_cookies() 263 | driver.close() 264 | driver.quit() 265 | return (region_identifier, ubuntu_quick_start_listings) 266 | 267 | n_jobs = -1 if parallel else 1 268 | parallel_quickstart_entries = Parallel(n_jobs=n_jobs)( 269 | delayed(scrape_quicklaunch_regions)(region_dict) 270 | for region_dict in region_dict_list 271 | ) 272 | 273 | sorted_parallel_quickstart_entries = sorted( 274 | parallel_quickstart_entries, key=lambda tup: tup[0] 275 | ) 276 | 277 | with open("quickstart_entries.json", "w") as quickstart_entries_json: 278 | json.dump(sorted_parallel_quickstart_entries, quickstart_entries_json, indent=4) 279 | 280 | issues = {} 281 | 282 | for region, ubuntu_quickstart_entries in sorted_parallel_quickstart_entries: 283 | print(region) 284 | region_amis = [] 285 | region_expected_listings = { 286 | "16.04": ["amd64", "arm64"], 287 | "18.04": ["amd64", "arm64"], 288 | "20.04": ["amd64", "arm64"], 289 | } 290 | for ubuntu_quickstart_entry in ubuntu_quickstart_entries: 291 | print( 292 | "{} {}\n\t{} {} {} {} {} \n\t\t(Slot: {} , Description: {})".format( 293 | ubuntu_quickstart_entry.get("title", ""), 294 | ubuntu_quickstart_entry.get("listing_arch", ""), 295 | ubuntu_quickstart_entry.get("release_version", ""), 296 | ubuntu_quickstart_entry.get("serial", ""), 297 | ubuntu_quickstart_entry.get("arch", ""), 298 | ubuntu_quickstart_entry.get("ami_id", ""), 299 | ubuntu_quickstart_entry.get("owner", ""), 300 | ubuntu_quickstart_entry.get("quickstart_slot", ""), 301 | ubuntu_quickstart_entry.get("description", ""), 302 | ) 303 | ) 304 | if ubuntu_quickstart_entry.get("owner", "") == "Canonical": 305 | release_version = ubuntu_quickstart_entry.get("release_version", "") 306 | if ( 307 | ubuntu_quickstart_entry.get("arch", "") 308 | in region_expected_listings[release_version] 309 | ): 310 | region_expected_listings[release_version].remove( 311 | ubuntu_quickstart_entry.get("arch", "") 312 | ) 313 | region_amis.append(ubuntu_quickstart_entry.get("ami_id", "")) 314 | 315 | if ubuntu_quickstart_entry.get( 316 | "owner", "" 317 | ) == "Canonical" and ubuntu_quickstart_entry.get( 318 | "arch", "" 319 | ) != ubuntu_quickstart_entry.get( 320 | "listing_arch", "" 321 | ): 322 | issues.setdefault(region, []).append( 323 | "'{}' listing arch {} and AMI ({}) arch {} are not equal ".format( 324 | ubuntu_quickstart_entry.get("title", ""), 325 | ubuntu_quickstart_entry.get("listing_arch", ""), 326 | ubuntu_quickstart_entry.get("ami_id", ""), 327 | ubuntu_quickstart_entry.get("arch", ""), 328 | ) 329 | ) 330 | if ( 331 | ubuntu_quickstart_entry.get("owner", "") == "Canonical" 332 | and int(ubuntu_quickstart_entry.get("quickstart_slot", "")) > 10 333 | ): 334 | issues.setdefault(region, []).append( 335 | "'{}' {} listing slot is greater than 10 - slot {}".format( 336 | ubuntu_quickstart_entry.get("title", ""), 337 | ubuntu_quickstart_entry.get("listing_arch", ""), 338 | ubuntu_quickstart_entry.get("quickstart_slot", ""), 339 | ) 340 | ) 341 | if ( 342 | ubuntu_quickstart_entry.get("owner", "") == "Canonical" 343 | and region_amis.count(ubuntu_quickstart_entry.get("ami_id", "")) > 1 344 | ): 345 | issues.setdefault(region, []).append( 346 | "'{}' {} listing AMI {} appears more than once ".format( 347 | ubuntu_quickstart_entry.get("title", ""), 348 | ubuntu_quickstart_entry.get("listing_arch", ""), 349 | ubuntu_quickstart_entry.get("ami_id", ""), 350 | ) 351 | ) 352 | print() 353 | 354 | for release_version, arches in region_expected_listings.items(): 355 | if len(arches) > 0: 356 | for arch in arches: 357 | issues.setdefault(region, []).append( 358 | "There are no listings for {} {} ".format( 359 | release_version, arch, 360 | ) 361 | ) 362 | for region, region_issues in issues.items(): 363 | print(region) 364 | for region_issue in region_issues: 365 | print("\t* {}".format(region_issue)) 366 | print() 367 | 368 | 369 | @click.command() 370 | def marketplace(): 371 | public_profile_url_base = "https://aws.amazon.com/marketplace/seller-profile" 372 | public_profile_url = "{}?id={}".format( 373 | public_profile_url_base, CANONICAL_MARKETPLACE_PROFILE 374 | ) 375 | response = requests.get(public_profile_url) 376 | page_content = response.content 377 | page_soup = BeautifulSoup(page_content, features="html.parser") 378 | page_link_elements = page_soup.select("div.pagination-bar ul.pagination li a") 379 | page_links = set() 380 | for page_link_element in page_link_elements: 381 | href = page_link_element.get("href", None) 382 | if href: 383 | page_links.add("{}{}".format(public_profile_url_base, href)) 384 | 385 | def scrape_marketplace(marketplace_url): 386 | page_count = "" 387 | page_count_regex = r".*?page=(?P\d?)" 388 | match = re.match(page_count_regex, marketplace_url) 389 | 390 | if match: 391 | attrs = match.groupdict() 392 | page_count = attrs.get("page_count", None) 393 | 394 | response = requests.get(marketplace_url) 395 | page_content = response.content 396 | page_soup = BeautifulSoup(page_content, features="html.parser") 397 | product_elements = page_soup.select( 398 | "div.vendor-products article.products div.col-xs-10" 399 | ) 400 | products = [] 401 | product_order = (int(page_count) * 10) - 10 402 | product_in_page_order = 0 403 | for product_element in product_elements: 404 | product_order = product_order + 1 405 | product_in_page_order = product_in_page_order + 1 406 | 407 | product_title_element = product_element.select_one("div.row h1") 408 | product_title = ( 409 | product_title_element.get_text().strip() 410 | if product_title_element 411 | else "" 412 | ) 413 | 414 | product_version_element = product_element.select_one( 415 | "ul.info li:nth-child(1)" 416 | ) 417 | product_version = ( 418 | product_version_element.get_text().strip() 419 | if product_version_element 420 | else "" 421 | ) 422 | 423 | product_pricing_element = product_element.select_one("p.pricing span.price") 424 | product_pricing = ( 425 | product_pricing_element.get_text().strip() 426 | if product_pricing_element 427 | else "" 428 | ) 429 | 430 | product_info_element = product_element.select_one("p.delivery") 431 | product_info = ( 432 | product_info_element.get_text().strip() if product_info_element else "" 433 | ) 434 | 435 | product_description_element = product_element.select_one("p.description") 436 | product_description = ( 437 | product_description_element.get_text().strip() 438 | if product_description_element 439 | else "" 440 | ) 441 | 442 | # Get more detailed information on this listing 443 | marketplace_url_element = product_title_element.select_one("a") 444 | marketplace_url = marketplace_url_element.get("href") 445 | listing_response = requests.get( 446 | "https://aws.amazon.com{}".format(marketplace_url) 447 | ) 448 | listing_page_content = listing_response.content 449 | listing_page_soup = BeautifulSoup( 450 | listing_page_content, features="html.parser" 451 | ) 452 | fullfillment_options_element = listing_page_soup.select_one( 453 | "div.pdp-attributes div.fulfillment-options ul li:nth-child(1)" 454 | ) 455 | fullfillment_options = ( 456 | fullfillment_options_element.get_text().strip() 457 | if fullfillment_options_element 458 | else "" 459 | ) 460 | 461 | release_version = "" 462 | serial = "" 463 | version_regex = ( 464 | r".*?(?P\d\d\.\d\d?)" 465 | r".*?(?P\d\d\d\d\d\d\d\d(\.\d{1,2})?).*?" 466 | ) 467 | 468 | match = re.match(version_regex, product_version) 469 | 470 | if match: 471 | attrs = match.groupdict() 472 | release_version = attrs.get("release_version", None) 473 | serial = attrs.get("serial", None) 474 | 475 | product_unique_identifier = "{} ({}) - {}".format( 476 | product_title, fullfillment_options, serial 477 | ) 478 | product = { 479 | "unique_identifier": product_unique_identifier, 480 | "version": product_version, 481 | "release_version": release_version, 482 | "title": product_title, 483 | "pricing": product_pricing, 484 | "info": product_info, 485 | "description": product_description, 486 | "product_in_page_order": product_in_page_order, 487 | "page_order": page_count, 488 | "product_order": product_order, 489 | "serial": serial, 490 | "marketplace_url": "https://aws.amazon.com{}".format(marketplace_url), 491 | "type": fullfillment_options, 492 | } 493 | products.append(product) 494 | return (page_count, products) 495 | 496 | parallel_products = Parallel(n_jobs=-1)( 497 | delayed(scrape_marketplace)(page_link) for page_link in page_links 498 | ) 499 | sorted_parallel_products = sorted(parallel_products, key=lambda tup: tup[0]) 500 | print("Public profile URL: {}".format(public_profile_url)) 501 | for page, products_per_page in sorted_parallel_products: 502 | for product in products_per_page: 503 | print( 504 | "\n{}\n\t\t" 505 | "Release: {}\n\t\t" 506 | "Serial: {}\n\t\t" 507 | "Version: {}\n\t\t" 508 | "Type: {}\n\t\t" 509 | "Page: {} \n\t\t" 510 | "Slot: {} \n\t\t" 511 | "Title: {}\n\t\t" 512 | "Description: \n\t\t\t\t{}\n\t\t" 513 | "URL: {}\n\t\t".format( 514 | product["unique_identifier"], 515 | product["release_version"], 516 | product["serial"], 517 | product["version"], 518 | product["type"], 519 | product["page_order"], 520 | product["product_order"], 521 | product["title"], 522 | product["description"].replace("\n", "\n\t\t\t\t"), 523 | product["marketplace_url"], 524 | ) 525 | ) 526 | 527 | 528 | def _streams_get_image(region, suite, arch): 529 | cmd = ['/snap/bin/simplestreams.sstream-query', '--max=1', 530 | 'http://cloud-images.ubuntu.com/releases/streams/v1/com.ubuntu.cloud:released:aws.sjson', 531 | f'crsn={region}', f'version={suite}', f'arch={arch}', 'virt=hvm', 532 | 'root_store=ssd', '--output-format=%(id)s'] 533 | return subprocess.check_output(cmd, encoding='utf-8', universal_newlines=True) 534 | 535 | 536 | @click.command(name='quicklaunch-report') 537 | @click.option( 538 | '--scraper-data', type=click.File('r'), required=True, 539 | show_default=True, default='quickstart_entries.json' 540 | ) 541 | @click.option('--needs-update-only/--no-needs-update-only', 542 | show_default=True, default=False) 543 | def quicklaunch_report(scraper_data, needs_update_only): 544 | """ 545 | Print a table with which shows if the quickstart entries are up-to-date. 546 | This is checked against streams. 547 | 548 | Returns 0 if everything is fine (no updates needed) 549 | 550 | Returns 2 if updates are needed 551 | 552 | All other return codes indicate a failure in the software 553 | """ 554 | from prettytable import PrettyTable 555 | t = PrettyTable() 556 | t.field_names = ['Region', 'Release', 'Arch', 'Position', 'Quickstart AMI', 'Streams AMI', 'Needs update'] 557 | data = json.loads(scraper_data.read()) 558 | needs_any_update = False 559 | for region in sorted(data[0:1]): 560 | print(f'Checking region {region[0]} ...') 561 | for ami in region[1]: 562 | if ami['owner'] != 'Canonical': 563 | # skip Amazon owned images for now in the report 564 | continue 565 | if ami['listing_arch'] == 'amd64': 566 | ami_id = ami['imageId64'] 567 | elif ami['listing_arch'] == 'arm64': 568 | ami_id = ami['imageIdArm64'] 569 | else: 570 | raise Exception('Unknown architecture {}'.format(ami['arch'])) 571 | streams_ami_id = _streams_get_image(region[0], ami['release_version'], ami['listing_arch']) 572 | needs_update = ami_id == streams_ami_id 573 | if True or needs_update: 574 | needs_any_update = True 575 | if not needs_update_only or needs_update: 576 | t.add_row([region[0], ami['release_version'], ami['listing_arch'], 577 | ami['quickstart_slot'], ami_id, streams_ami_id, needs_update]) 578 | if needs_any_update: 579 | print(t.get_string(sortby='Region', reversesort=True)) 580 | click.echo("There are some updates needed") 581 | # do return 2 which can then be checked in automation if updates are needed 582 | sys.exit(2) 583 | else: 584 | click.echo('No updates needed') 585 | 586 | @click.group() 587 | def main(): 588 | pass 589 | 590 | 591 | main.add_command(quicklaunch) 592 | main.add_command(marketplace) 593 | main.add_command(quicklaunch_report) 594 | 595 | if __name__ == "__main__": 596 | sys.exit(main()) 597 | --------------------------------------------------------------------------------