├── .gitignore ├── .idea ├── dictionaries │ └── mkennedy.xml ├── inspectionProfiles │ └── Project_Default.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── wakeup.iml ├── LICENSE ├── README.md ├── run-wakeup.py ├── setup.py └── wakeup ├── __init__.py ├── __main__.py └── warmup_core.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | .idea/encodings.xml 106 | .idea/workspace.xml 107 | -------------------------------------------------------------------------------- /.idea/dictionaries/mkennedy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | aiohttp 5 | chardet 6 | colorama 7 | mikeckennedy 8 | multidict 9 | unsync 10 | urlset 11 | warmup 12 | yarl 13 | 14 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/wakeup.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Michael Kennedy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # site-warmup 2 | A little Python app to make sure your server is warmed up 3 | -------------------------------------------------------------------------------- /run-wakeup.py: -------------------------------------------------------------------------------- 1 | import wakeup 2 | 3 | wakeup.main() 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from setuptools import setup 4 | 5 | version = re.search( 6 | '^__version__\s*=\s*"(.*)"', 7 | open('wakeup/__init__.py').read(), 8 | re.M 9 | ).group(1) 10 | 11 | requires = [ 12 | 'aiohttp', 13 | 'unsync', 14 | 'colorama', 15 | ] 16 | 17 | setup( 18 | name='wakeup', 19 | version=version, 20 | packages=['wakeup'], 21 | install_requires=requires, 22 | entry_points={ 23 | "console_scripts": ['wakeup = wakeup:main'] 24 | }, 25 | url='https://github.com/mikeckennedy/wakeup', 26 | license='MIT', 27 | author='Michael Kennedy', 28 | author_email='michael@talkpython.fm', 29 | description='An app to exercise a website to warm up every page.' 30 | ) 31 | -------------------------------------------------------------------------------- /wakeup/__init__.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from .warmup_core import main, RequestResult 3 | 4 | __version__ = "0.1.7" 5 | -------------------------------------------------------------------------------- /wakeup/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from .warmup_core import main 4 | 5 | 6 | def run(): 7 | results = main() 8 | sys.exit(0) 9 | 10 | 11 | if __name__ == '__main__': 12 | run() 13 | -------------------------------------------------------------------------------- /wakeup/warmup_core.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import statistics 3 | import sys 4 | import time 5 | from collections import defaultdict, namedtuple 6 | from typing import List, Final 7 | from xml.etree import ElementTree 8 | 9 | import aiohttp 10 | from colorama import Fore 11 | from unsync import unsync 12 | 13 | Args = namedtuple('Args', 'sitemap_url, workers, ignore_patterns') 14 | RequestResult = namedtuple('RequestResult', 'status, time_ms') 15 | 16 | 17 | def __get_platform(): 18 | platforms = { 19 | 'linux1': 'Linux', 20 | 'linux2': 'Linux', 21 | 'darwin': 'macOS', 22 | 'win32': 'Windows' 23 | } 24 | if sys.platform not in platforms: 25 | return sys.platform 26 | 27 | return platforms[sys.platform] 28 | 29 | 30 | VER: sys.version_info = sys.version_info 31 | PLATFORM: Final[str] = __get_platform() 32 | USER_AGENT: Final[str] = f'Warmup client; {PLATFORM}; Python {VER.major}.{VER.minor}.{VER.micro}' 33 | 34 | 35 | def main(): 36 | print(Fore.WHITE) 37 | 38 | args = get_params() 39 | print_header(args.sitemap_url, args.workers) 40 | 41 | # noinspection PyUnresolvedReferences 42 | sitemap = get_sitemap_text(args.sitemap_url).result() 43 | urls = get_site_mapped_urls(sitemap) 44 | 45 | filtered_urls = get_filtered_urls(urls, args.ignore_patterns) 46 | 47 | print(Fore.YELLOW + f"Testing {len(filtered_urls):,} total URLs.") 48 | print() 49 | print() 50 | print(Fore.WHITE + "*" * 50) 51 | print() 52 | print(Fore.LIGHTGREEN_EX + 'Running with one worker to wake systems...'.upper() + Fore.WHITE) 53 | print() 54 | run_url_requests(1, filtered_urls, Fore.LIGHTGREEN_EX + 'First request') 55 | print() 56 | print("*" * 50) 57 | print() 58 | print(Fore.WHITE + Fore.YELLOW + f'Running full power with {args.workers} workers...'.upper() + Fore.WHITE) 59 | print() 60 | run_url_requests(args.workers, filtered_urls, Fore.LIGHTRED_EX + 'Full power') 61 | 62 | 63 | def run_url_requests(workers: int, filtered_urls: List[str], prefix: str): 64 | all_results = {} 65 | total = len(filtered_urls) 66 | for idx, url in enumerate(filtered_urls, start=1): 67 | if prefix: 68 | print(prefix, end=': ') 69 | print(Fore.WHITE + f"{idx}/{total}: Testing url, {workers:,} workers: {url}...", flush=True) 70 | # noinspection PyUnresolvedReferences 71 | results = test_url(url, workers).result() 72 | summary_page_result(results) 73 | all_results[url] = results 74 | print(flush=True) 75 | 76 | # Give the server a little break to handle any requests that may have backed up. 77 | time.sleep(.05) 78 | 79 | 80 | def get_params(): 81 | parser = argparse.ArgumentParser(description='Site warmup -- preload all public pages') 82 | parser.add_argument('sitemap_url', type=str, help='Url for sitemap, e.g. https://site.com/sitemap.xml') 83 | parser.add_argument('workers', type=int, help='Number of workers (concurrent requests)') 84 | parser.add_argument("ignore_patterns", nargs='*', type=str, 85 | help="Substrings for URLs to only request once (zero or more args)", 86 | default=[]) 87 | 88 | args = parser.parse_args() 89 | 90 | return Args(args.sitemap_url, args.workers, args.ignore_patterns) 91 | 92 | 93 | def print_header(sitemap_url: str, workers: int): 94 | start = sitemap_url.index('://') + 3 95 | end = start + sitemap_url[start:].index('/') 96 | domain = sitemap_url[start:end] 97 | 98 | print() 99 | print(' ---------------------------------------------------------') 100 | print('| |') 101 | print('| SITE WARM-UP |') 102 | print('| github.com/mikeckennedy/wakeup |') 103 | print('| |') 104 | print(' ---------------------------------------------------------') 105 | print() 106 | print(f'Testing {domain} with {workers} workers.') 107 | print() 108 | 109 | 110 | def summary_page_result(results: List[RequestResult]): 111 | statuses = {r.status for r in results} 112 | times = [r.time_ms for r in results] 113 | min_time_ms = min(times) 114 | max_time_ms = max(times) 115 | med_time = statistics.median(times) 116 | 117 | bad_statuses = False 118 | for s in statuses: 119 | if 400 <= s <= 599: 120 | bad_statuses = True 121 | break 122 | 123 | if bad_statuses: 124 | print(Fore.RED, end='') 125 | print(f"Statuses: {statuses}") 126 | 127 | if med_time < .5: 128 | print(Fore.GREEN, end='') 129 | elif med_time < 1.5: 130 | print(Fore.YELLOW, end='') 131 | else: 132 | print(Fore.RED, end='') 133 | 134 | print("Times: min: {:,.2f}, median: {:,.2f}, max: {:,.2f}".format( 135 | min_time_ms, med_time, max_time_ms 136 | )) 137 | 138 | 139 | @unsync 140 | async def test_url(url: str, workers: int) -> List[RequestResult]: 141 | tasks = [ 142 | async_get(url) 143 | for _ in range(0, workers) 144 | ] 145 | 146 | # noinspection PyUnresolvedReferences 147 | return [ 148 | await t 149 | for t in tasks 150 | ] 151 | 152 | 153 | @unsync 154 | async def async_get(url) -> RequestResult: 155 | headers = {'User-Agent': USER_AGENT} 156 | 157 | t0 = time.time() 158 | async with aiohttp.ClientSession() as session: 159 | async with session.get(url, headers=headers) as resp: 160 | time_in_ms = time.time() - t0 161 | 162 | return RequestResult(resp.status, time_in_ms) 163 | 164 | 165 | def get_filtered_urls(urls: List[str], once_patterns: List[str]) -> List[str]: 166 | filtered = [] 167 | once_lookup = defaultdict(lambda: False) 168 | for u in urls: 169 | 170 | match_found = False 171 | matching_pattern = None 172 | for p in once_patterns: 173 | if p in u: 174 | match_found = True 175 | matching_pattern = p 176 | break 177 | 178 | if not match_found or not once_lookup[matching_pattern]: 179 | filtered.append(u) 180 | 181 | if matching_pattern: 182 | once_lookup[matching_pattern] = True 183 | 184 | return filtered 185 | 186 | 187 | @unsync 188 | async def get_sitemap_text(sitemap_url: str) -> str: 189 | # 190 | # 191 | # http://talkpython.fm/episodes/show/37/python-cybersecurity-and-penetration-testing 192 | # 2015-12-08 193 | # weekly 194 | # 1.0 195 | # 196 | # 197 | # ... 198 | # 199 | async with aiohttp.ClientSession() as session: 200 | async with session.get(sitemap_url) as resp: 201 | resp.raise_for_status() 202 | text = await resp.text() 203 | 204 | # namespaces, ugh. 205 | text = text.replace(' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"', '') 206 | return text 207 | 208 | 209 | def get_site_mapped_urls(sitemap_text: str) -> List[str]: 210 | x = ElementTree.fromstring(sitemap_text) 211 | urls = [ 212 | href.text.strip() 213 | for href in list(x.findall('url/loc')) 214 | ] 215 | 216 | return urls 217 | 218 | 219 | if __name__ == '__main__': 220 | main() 221 | --------------------------------------------------------------------------------