├── .gitignore
├── .idea
├── dictionaries
│ └── mkennedy.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── misc.xml
├── modules.xml
├── vcs.xml
└── wakeup.iml
├── LICENSE
├── README.md
├── run-wakeup.py
├── setup.py
└── wakeup
├── __init__.py
├── __main__.py
└── warmup_core.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 | .idea/encodings.xml
106 | .idea/workspace.xml
107 |
--------------------------------------------------------------------------------
/.idea/dictionaries/mkennedy.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | aiohttp
5 | chardet
6 | colorama
7 | mikeckennedy
8 | multidict
9 | unsync
10 | urlset
11 | warmup
12 | yarl
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/wakeup.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Michael Kennedy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # site-warmup
2 | A little Python app to make sure your server is warmed up
3 |
--------------------------------------------------------------------------------
/run-wakeup.py:
--------------------------------------------------------------------------------
1 | import wakeup
2 |
3 | wakeup.main()
4 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from setuptools import setup
4 |
5 | version = re.search(
6 | '^__version__\s*=\s*"(.*)"',
7 | open('wakeup/__init__.py').read(),
8 | re.M
9 | ).group(1)
10 |
11 | requires = [
12 | 'aiohttp',
13 | 'unsync',
14 | 'colorama',
15 | ]
16 |
17 | setup(
18 | name='wakeup',
19 | version=version,
20 | packages=['wakeup'],
21 | install_requires=requires,
22 | entry_points={
23 | "console_scripts": ['wakeup = wakeup:main']
24 | },
25 | url='https://github.com/mikeckennedy/wakeup',
26 | license='MIT',
27 | author='Michael Kennedy',
28 | author_email='michael@talkpython.fm',
29 | description='An app to exercise a website to warm up every page.'
30 | )
31 |
--------------------------------------------------------------------------------
/wakeup/__init__.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | from .warmup_core import main, RequestResult
3 |
4 | __version__ = "0.1.7"
5 |
--------------------------------------------------------------------------------
/wakeup/__main__.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from .warmup_core import main
4 |
5 |
6 | def run():
7 | results = main()
8 | sys.exit(0)
9 |
10 |
11 | if __name__ == '__main__':
12 | run()
13 |
--------------------------------------------------------------------------------
/wakeup/warmup_core.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import statistics
3 | import sys
4 | import time
5 | from collections import defaultdict, namedtuple
6 | from typing import List, Final
7 | from xml.etree import ElementTree
8 |
9 | import aiohttp
10 | from colorama import Fore
11 | from unsync import unsync
12 |
13 | Args = namedtuple('Args', 'sitemap_url, workers, ignore_patterns')
14 | RequestResult = namedtuple('RequestResult', 'status, time_ms')
15 |
16 |
17 | def __get_platform():
18 | platforms = {
19 | 'linux1': 'Linux',
20 | 'linux2': 'Linux',
21 | 'darwin': 'macOS',
22 | 'win32': 'Windows'
23 | }
24 | if sys.platform not in platforms:
25 | return sys.platform
26 |
27 | return platforms[sys.platform]
28 |
29 |
30 | VER: sys.version_info = sys.version_info
31 | PLATFORM: Final[str] = __get_platform()
32 | USER_AGENT: Final[str] = f'Warmup client; {PLATFORM}; Python {VER.major}.{VER.minor}.{VER.micro}'
33 |
34 |
35 | def main():
36 | print(Fore.WHITE)
37 |
38 | args = get_params()
39 | print_header(args.sitemap_url, args.workers)
40 |
41 | # noinspection PyUnresolvedReferences
42 | sitemap = get_sitemap_text(args.sitemap_url).result()
43 | urls = get_site_mapped_urls(sitemap)
44 |
45 | filtered_urls = get_filtered_urls(urls, args.ignore_patterns)
46 |
47 | print(Fore.YELLOW + f"Testing {len(filtered_urls):,} total URLs.")
48 | print()
49 | print()
50 | print(Fore.WHITE + "*" * 50)
51 | print()
52 | print(Fore.LIGHTGREEN_EX + 'Running with one worker to wake systems...'.upper() + Fore.WHITE)
53 | print()
54 | run_url_requests(1, filtered_urls, Fore.LIGHTGREEN_EX + 'First request')
55 | print()
56 | print("*" * 50)
57 | print()
58 | print(Fore.WHITE + Fore.YELLOW + f'Running full power with {args.workers} workers...'.upper() + Fore.WHITE)
59 | print()
60 | run_url_requests(args.workers, filtered_urls, Fore.LIGHTRED_EX + 'Full power')
61 |
62 |
63 | def run_url_requests(workers: int, filtered_urls: List[str], prefix: str):
64 | all_results = {}
65 | total = len(filtered_urls)
66 | for idx, url in enumerate(filtered_urls, start=1):
67 | if prefix:
68 | print(prefix, end=': ')
69 | print(Fore.WHITE + f"{idx}/{total}: Testing url, {workers:,} workers: {url}...", flush=True)
70 | # noinspection PyUnresolvedReferences
71 | results = test_url(url, workers).result()
72 | summary_page_result(results)
73 | all_results[url] = results
74 | print(flush=True)
75 |
76 | # Give the server a little break to handle any requests that may have backed up.
77 | time.sleep(.05)
78 |
79 |
80 | def get_params():
81 | parser = argparse.ArgumentParser(description='Site warmup -- preload all public pages')
82 | parser.add_argument('sitemap_url', type=str, help='Url for sitemap, e.g. https://site.com/sitemap.xml')
83 | parser.add_argument('workers', type=int, help='Number of workers (concurrent requests)')
84 | parser.add_argument("ignore_patterns", nargs='*', type=str,
85 | help="Substrings for URLs to only request once (zero or more args)",
86 | default=[])
87 |
88 | args = parser.parse_args()
89 |
90 | return Args(args.sitemap_url, args.workers, args.ignore_patterns)
91 |
92 |
93 | def print_header(sitemap_url: str, workers: int):
94 | start = sitemap_url.index('://') + 3
95 | end = start + sitemap_url[start:].index('/')
96 | domain = sitemap_url[start:end]
97 |
98 | print()
99 | print(' ---------------------------------------------------------')
100 | print('| |')
101 | print('| SITE WARM-UP |')
102 | print('| github.com/mikeckennedy/wakeup |')
103 | print('| |')
104 | print(' ---------------------------------------------------------')
105 | print()
106 | print(f'Testing {domain} with {workers} workers.')
107 | print()
108 |
109 |
110 | def summary_page_result(results: List[RequestResult]):
111 | statuses = {r.status for r in results}
112 | times = [r.time_ms for r in results]
113 | min_time_ms = min(times)
114 | max_time_ms = max(times)
115 | med_time = statistics.median(times)
116 |
117 | bad_statuses = False
118 | for s in statuses:
119 | if 400 <= s <= 599:
120 | bad_statuses = True
121 | break
122 |
123 | if bad_statuses:
124 | print(Fore.RED, end='')
125 | print(f"Statuses: {statuses}")
126 |
127 | if med_time < .5:
128 | print(Fore.GREEN, end='')
129 | elif med_time < 1.5:
130 | print(Fore.YELLOW, end='')
131 | else:
132 | print(Fore.RED, end='')
133 |
134 | print("Times: min: {:,.2f}, median: {:,.2f}, max: {:,.2f}".format(
135 | min_time_ms, med_time, max_time_ms
136 | ))
137 |
138 |
139 | @unsync
140 | async def test_url(url: str, workers: int) -> List[RequestResult]:
141 | tasks = [
142 | async_get(url)
143 | for _ in range(0, workers)
144 | ]
145 |
146 | # noinspection PyUnresolvedReferences
147 | return [
148 | await t
149 | for t in tasks
150 | ]
151 |
152 |
153 | @unsync
154 | async def async_get(url) -> RequestResult:
155 | headers = {'User-Agent': USER_AGENT}
156 |
157 | t0 = time.time()
158 | async with aiohttp.ClientSession() as session:
159 | async with session.get(url, headers=headers) as resp:
160 | time_in_ms = time.time() - t0
161 |
162 | return RequestResult(resp.status, time_in_ms)
163 |
164 |
165 | def get_filtered_urls(urls: List[str], once_patterns: List[str]) -> List[str]:
166 | filtered = []
167 | once_lookup = defaultdict(lambda: False)
168 | for u in urls:
169 |
170 | match_found = False
171 | matching_pattern = None
172 | for p in once_patterns:
173 | if p in u:
174 | match_found = True
175 | matching_pattern = p
176 | break
177 |
178 | if not match_found or not once_lookup[matching_pattern]:
179 | filtered.append(u)
180 |
181 | if matching_pattern:
182 | once_lookup[matching_pattern] = True
183 |
184 | return filtered
185 |
186 |
187 | @unsync
188 | async def get_sitemap_text(sitemap_url: str) -> str:
189 | #
190 | #
191 | # http://talkpython.fm/episodes/show/37/python-cybersecurity-and-penetration-testing
192 | # 2015-12-08
193 | # weekly
194 | # 1.0
195 | #
196 | #
197 | # ...
198 | #
199 | async with aiohttp.ClientSession() as session:
200 | async with session.get(sitemap_url) as resp:
201 | resp.raise_for_status()
202 | text = await resp.text()
203 |
204 | # namespaces, ugh.
205 | text = text.replace(' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"', '')
206 | return text
207 |
208 |
209 | def get_site_mapped_urls(sitemap_text: str) -> List[str]:
210 | x = ElementTree.fromstring(sitemap_text)
211 | urls = [
212 | href.text.strip()
213 | for href in list(x.findall('url/loc'))
214 | ]
215 |
216 | return urls
217 |
218 |
219 | if __name__ == '__main__':
220 | main()
221 |
--------------------------------------------------------------------------------