├── tests ├── __init__.py ├── files │ ├── 2_ok_1_invalid.log │ └── small.log ├── conftest.py ├── test_utils.py ├── test_main.py ├── test_log_file.py ├── test_argparse.py ├── test_log_line.py ├── test_filters.py ├── test_regex.py └── test_commands.py ├── src └── haproxy │ ├── __init__.py │ ├── logfile.py │ ├── utils.py │ ├── filters.py │ ├── main.py │ ├── line.py │ └── commands.py ├── requirements.in ├── .coveragerc ├── docs ├── source │ ├── README_link.rst │ ├── changelog.rst │ ├── modules.rst │ ├── index.rst │ └── conf.py └── Makefile ├── setup.cfg ├── .gitignore ├── MANIFEST.in ├── requirements.txt ├── .flake8 ├── .github └── workflows │ ├── release.yml │ └── tests.yml ├── tox.ini ├── .pre-commit-config.yaml ├── pyproject.toml ├── README.rst ├── CHANGES.rst └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/haproxy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = 3 | */python?.?/* 4 | -------------------------------------------------------------------------------- /docs/source/README_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../README.rst 2 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CHANGES.rst 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [zest.releaser] 2 | create-wheel = yes 3 | 4 | [tool:pytest] 5 | testpaths = tests 6 | norecursedirs = .venv 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *.pickle 3 | 4 | # C extensions 5 | *.so 6 | 7 | # Packages 8 | *.egg 9 | *.egg-info 10 | dist 11 | build 12 | eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | __pycache__ 22 | 23 | # Unit test / coverage reports 24 | coverage 25 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGES.rst 2 | exclude .flake8 3 | exclude *.in 4 | exclude *.txt 5 | exclude *.yaml 6 | exclude .coveragerc 7 | exclude tox.ini 8 | recursive-exclude docs *.py 9 | recursive-exclude docs *.rst 10 | recursive-exclude docs Makefile 11 | recursive-exclude tests *.log 12 | recursive-exclude tests *.py 13 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | Haproxy Modules 2 | =============== 3 | 4 | 5 | Log 6 | --- 7 | 8 | .. automodule:: haproxy.logfile 9 | 10 | .. autoclass:: Log 11 | :members: 12 | :private-members: 13 | 14 | 15 | Line 16 | ---- 17 | 18 | .. automodule:: haproxy.line 19 | 20 | .. autoclass:: Line 21 | :members: 22 | 23 | Filters 24 | ------- 25 | .. automodule:: haproxy.filters 26 | :members: 27 | 28 | Commands 29 | -------- 30 | .. automodule:: haproxy.commands 31 | :members: 32 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. HAProxy log analyzer documentation master file, created by 2 | sphinx-quickstart on Thu Dec 19 00:06:54 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ================================================ 7 | Welcome to HAProxy log analyzer's documentation! 8 | ================================================ 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | README_link 16 | modules 17 | changelog 18 | 19 | 20 | ================== 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.8 3 | # by the following command: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | attrs==22.1.0 8 | # via pytest 9 | coverage[toml]==6.5.0 10 | # via 11 | # coverage 12 | # pytest-cov 13 | exceptiongroup==1.1.3 14 | # via pytest 15 | iniconfig==1.1.1 16 | # via pytest 17 | packaging==21.3 18 | # via pytest 19 | pluggy==1.0.0 20 | # via pytest 21 | pyparsing==3.0.9 22 | # via packaging 23 | pytest==7.2.0 24 | # via 25 | # -r requirements.in 26 | # pytest-cov 27 | pytest-cov==4.0.0 28 | # via -r requirements.in 29 | tomli==2.0.1 30 | # via 31 | # coverage 32 | # pytest 33 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | doctests = 1 3 | ignore = 4 | # coding magic comment not found 5 | C101, 6 | # missing docstring in public package 7 | D104, 8 | # missing docstring in magic method 9 | D105, 10 | # missing docstring in public nested class (e.g. Meta class) 11 | D106, 12 | # missing docstring in __init__ (against Google/NumPy guidelines) 13 | D107, 14 | # missing blank line after last section 15 | D413, 16 | # black takes care of whitespace before colons (:) 17 | E203, 18 | # black takes care of whitespace after commas 19 | E231, 20 | # black takes care of line length 21 | E501, 22 | # all-lowercase method names 23 | N802, 24 | # Change outer quotes to avoid escaping inner quotes 25 | Q003, 26 | # black takes care of where to break lines 27 | W503, 28 | -------------------------------------------------------------------------------- /tests/files/2_ok_1_invalid.log: -------------------------------------------------------------------------------- 1 | Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38037 [09/Dec/2013:12:00:03.205] loadbalancer default/instance5 0/133/0/294/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/autoren/dummy/its-time-for-prostitution%231386586409135007 HTTP/1.1" 2 | Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38401 [] loadbalancer default/instance6 0/0/0/155/156 302 15987 - - ---- 18/18/18/0/0 0/0 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/autoren/dummy/westliche-wertegemeinschft/view HTTP/1.1" 3 | Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38414 [09/Dec/2013:12:00:11.476] loadbalancer default/instance9 0/0/0/200/202 200 19056 - - ---- 18/18/18/1/0 0/0 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/acl_users/credentials_cookie_auth/require_login?came_from=http%3A//www.example.com/autoren/dummy/westliche-wertegemeinschft/view HTTP/1.1" 4 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: GitHub 2 | on: 3 | push: 4 | tags: ["*"] 5 | 6 | jobs: 7 | release: 8 | permissions: write-all 9 | runs-on: "ubuntu-latest" 10 | name: Create a Release 11 | env: 12 | GH_TOKEN: ${{ github.token }} 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python 3.11 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: "3.11" 20 | - name: Create a new GitHub release 21 | run: | 22 | # get the lines where the changelog for the last release starts and finishes 23 | first_line=$(grep -n "\-\-\-\-" CHANGES.rst | cut -d":" -f1 |head -n1) 24 | last_line=$(grep -n "\-\-\-\-" CHANGES.rst | cut -d":" -f1 |head -n2 | tail -n1) 25 | 26 | # do some math to adjust the line numbers 27 | first=$((${first_line}+1)) 28 | last=$((${last_line}-2)) 29 | end=$((${last_line}-1)) 30 | 31 | # extract the changelog 32 | sed -n "${first},${last}p;${end}q" CHANGES.rst > body.txt 33 | 34 | cat body.txt 35 | 36 | gh release create ${{ github.ref_name }} -p -F body.txt 37 | 38 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | min_version = 4.4.0 3 | envlist = 4 | format 5 | lint 6 | coverage 7 | py38 8 | py39 9 | py310 10 | py311 11 | py312 12 | pypy3 13 | 14 | [gh-actions] 15 | python = 16 | 3.8: py38 17 | 3.9: py39 18 | 3.10: py310 19 | 3.11: py311 20 | 3.12: py312 21 | 22 | [testenv] 23 | description = run the distribution tests 24 | use_develop = true 25 | skip_install = false 26 | constrain_package_deps = true 27 | deps = 28 | -r requirements.txt 29 | commands = 30 | pytest 31 | 32 | [testenv:format] 33 | description = automatically reformat code 34 | skip_install = true 35 | deps = 36 | pre-commit 37 | commands = 38 | pre-commit run -a pyupgrade 39 | pre-commit run -a isort 40 | pre-commit run -a black 41 | 42 | [testenv:lint] 43 | description = run linters that will help improve the code style 44 | skip_install = true 45 | deps = 46 | pre-commit 47 | commands = 48 | pre-commit run -a 49 | 50 | [testenv:coverage] 51 | description = get a test coverage report 52 | use_develop = true 53 | skip_install = false 54 | deps = 55 | -r requirements.txt 56 | commands = 57 | pytest --cov --cov-report term-missing 58 | 59 | [testenv:generate-constrains] 60 | description = update the constrains.txt file 61 | basepython = python3.8 62 | skip_install = true 63 | deps = 64 | pip-tools 65 | commands = 66 | pip-compile requirements.in 67 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_prs: false 3 | autoupdate_schedule: monthly 4 | 5 | repos: 6 | - repo: https://github.com/asottile/pyupgrade 7 | rev: v3.14.0 8 | hooks: 9 | - id: pyupgrade 10 | args: [--py38-plus] 11 | - repo: https://github.com/pycqa/isort 12 | rev: 5.12.0 13 | hooks: 14 | - id: isort 15 | - repo: https://github.com/psf/black 16 | rev: 23.9.1 17 | hooks: 18 | - id: black 19 | - repo: https://github.com/PyCQA/flake8 20 | rev: 6.1.0 21 | hooks: 22 | - id: flake8 23 | additional_dependencies: 24 | - flake8-bugbear 25 | - flake8-builtins 26 | - flake8-comprehensions 27 | - flake8-debugger 28 | - flake8-deprecated 29 | - flake8-isort 30 | - flake8-pep3101 31 | - flake8-quotes 32 | - flake8-pytest-style 33 | 34 | - repo: https://github.com/codespell-project/codespell 35 | rev: v2.2.6 36 | hooks: 37 | - id: codespell 38 | additional_dependencies: 39 | - tomli 40 | - repo: https://github.com/mgedmin/check-manifest 41 | rev: "0.49" 42 | hooks: 43 | - id: check-manifest 44 | - repo: https://github.com/regebro/pyroma 45 | rev: "4.2" 46 | hooks: 47 | - id: pyroma 48 | - repo: https://github.com/mgedmin/check-python-versions 49 | rev: "0.21.3" 50 | hooks: 51 | - id: check-python-versions 52 | -------------------------------------------------------------------------------- /src/haproxy/logfile.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from haproxy.line import parse_line 3 | from haproxy.utils import date_str_to_datetime 4 | from haproxy.utils import delta_str_to_timedelta 5 | from multiprocessing import Pool 6 | 7 | 8 | class Log: 9 | def __init__(self, logfile=None, start=None, delta=None, show_invalid=False): 10 | self.logfile = logfile 11 | self.show_invalid = show_invalid 12 | self.start = None 13 | self.end = None 14 | 15 | if start: 16 | self.start = date_str_to_datetime(start) 17 | 18 | if delta: 19 | delta = delta_str_to_timedelta(delta) 20 | 21 | if isinstance(self.start, datetime): 22 | self.end = self.start + delta 23 | 24 | self.invalid_lines = 0 25 | self.valid_lines = 0 26 | 27 | def __iter__(self): 28 | start = datetime.now() 29 | with open(self.logfile) as logfile, Pool() as pool: 30 | for index, line in enumerate(pool.imap(parse_line, logfile)): 31 | if line.is_valid: 32 | self.valid_lines += 1 33 | if line.is_within_time_frame(self.start, self.end): 34 | yield line 35 | else: 36 | if self.show_invalid: 37 | print(line.raw_line) 38 | self.invalid_lines += 1 39 | 40 | if index % 10000 == 0 and index > 0: # pragma: no cover 41 | print('.', end='', flush=True) 42 | 43 | end = datetime.now() 44 | print(f'\nIt took {end - start}') 45 | 46 | @property 47 | def total_lines(self): 48 | return self.valid_lines + self.invalid_lines 49 | -------------------------------------------------------------------------------- /tests/files/small.log: -------------------------------------------------------------------------------- 1 | Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:10:01:04.205] loadbalancer default/instance1 0/133/0/201/430 200 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "GET /hello HTTP/1.1" 2 | Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/2942/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "HEAD /world HTTP/1.1" 3 | Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:12:03:06.205] loadbalancer default/instance3 0/133/0/94/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.123} "POST /hello HTTP/1.1" 4 | Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/1293/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.124} "GET /free HTTP/1.1" 5 | Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:11:02:05.205] loadbalancer default/instance3 0/133/0/20095/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /fra HTTP/1.1" 6 | Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/2936/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "GET /world HTTP/1.1" 7 | Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:12:03:06.205] loadbalancer default/instance1 0/133/0/4/437 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "POST /freitag HTTP/1.1" 8 | Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:12:03:06.205] loadbalancer default/instance2 0/133/0/29408/430 200 17610 - - ---- 21/21/21/1/0 0/1 "GET /free HTTP/1.1" 9 | Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/409/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /hello HTTP/1.1" 10 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "haproxy_log_analysis" 7 | version = "6.0.0a5.dev0" 8 | authors = [ 9 | { name="Gil Forcada Codinachs", email="gil.gnome@gmail.com" }, 10 | ] 11 | description = "Analayze HAProxy log files" 12 | keywords = ["haproxy", "log", "sysadmin", "devops", "report" ] 13 | license = {file = "LICENSE"} 14 | readme = "README.rst" 15 | requires-python = ">=3.8" 16 | classifiers = [ 17 | "Development Status :: 5 - Production/Stable", 18 | "Environment :: Console", 19 | "Intended Audience :: System Administrators", 20 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3 :: Only", 25 | "Programming Language :: Python :: 3.8", 26 | "Programming Language :: Python :: 3.9", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: Implementation :: CPython", 31 | "Programming Language :: Python :: Implementation :: PyPy", 32 | "Topic :: Internet :: Log Analysis", 33 | ] 34 | 35 | [project.urls] 36 | "Homepage" = "https://github.com/gforcada/haproxy_log_analysis" 37 | "Bug Tracker" = "https://github.com/gforcada/haproxy_log_analysis/issues" 38 | "Changelog" = "https://github.com/gforcada/haproxy_log_analysis/blob/main/CHANGES.rst" 39 | 40 | [project.scripts] 41 | haproxy_log_analysis = "haproxy.main:console_script" 42 | 43 | [tool.isort] 44 | profile = "plone" 45 | 46 | [tool.black] 47 | target-version = ["py38"] 48 | skip-string-normalization = true 49 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Testing 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: [main] 7 | env: 8 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 9 | jobs: 10 | test: 11 | name: Testing on 12 | runs-on: "ubuntu-latest" 13 | strategy: 14 | matrix: 15 | python-version: ["3.12", "3.11", "3.10", "3.9", "3.8", "pypy-3.9"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Cache packages 23 | uses: actions/cache@v3 24 | with: 25 | path: ~/.cache/pip 26 | key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('tox.ini') }} 27 | restore-keys: | 28 | ${{ runner.os }}-pip-${{ matrix.python-version }}- 29 | - name: Install dependencies 30 | run: python -m pip install tox tox-gh-actions 31 | - name: Test 32 | run: tox 33 | 34 | lint: 35 | name: Lint code 36 | runs-on: "ubuntu-latest" 37 | strategy: 38 | matrix: 39 | python-version: [3.8] 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Set up Python 43 | uses: actions/setup-python@v4 44 | with: 45 | python-version: ${{ matrix.python-version }} 46 | - name: Cache packages 47 | uses: actions/cache@v3 48 | with: 49 | path: | 50 | ~/.cache/pre-commit 51 | ~/.cache/pip 52 | key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('tox.ini') }} 53 | restore-keys: | 54 | ${{ runner.os }}-pip-${{ matrix.python-version }}- 55 | - name: Install dependencies 56 | run: python -m pip install tox 57 | - name: Run linting 58 | run: tox -e lint 59 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from haproxy.line import Line 3 | 4 | import pytest 5 | 6 | 7 | DEFAULT_DATA = { 8 | 'syslog_date': 'Dec 9 13:01:26', 9 | 'process_name_and_pid': 'localhost haproxy[28029]:', 10 | 'client_ip': '127.0.0.1', 11 | 'client_port': 2345, 12 | 'accept_date': '09/Dec/2013:12:59:46.633', 13 | 'frontend_name': 'loadbalancer', 14 | 'backend_name': 'default', 15 | 'server_name': 'instance8', 16 | 'tq': 0, 17 | 'tw': 51536, 18 | 'tc': 1, 19 | 'tr': 48082, 20 | 'tt': '99627', 21 | 'status': '200', 22 | 'bytes': '83285', 23 | 'act': '87', 24 | 'fe': '89', 25 | 'be': '98', 26 | 'srv': '1', 27 | 'retries': '20', 28 | 'queue_server': 2, 29 | 'queue_backend': 67, 30 | 'headers': ' {77.24.148.74}', 31 | 'http_request': 'GET /path/to/image HTTP/1.1', 32 | } 33 | 34 | 35 | class LinesGenerator: 36 | def __init__(self, line_format): 37 | self.data = deepcopy(DEFAULT_DATA) 38 | self.line_format = line_format 39 | 40 | def __call__(self, *args, **kwargs): 41 | self.data.update(**kwargs) 42 | self.data['client_ip_and_port'] = '{client_ip}:{client_port}'.format( 43 | **self.data 44 | ) 45 | self.data[ 46 | 'server_names' 47 | ] = '{frontend_name} {backend_name}/{server_name}'.format(**self.data) 48 | self.data['timers'] = '{tq}/{tw}/{tc}/{tr}/{tt}'.format(**self.data) 49 | self.data['status_and_bytes'] = '{status} {bytes}'.format(**self.data) 50 | self.data['connections_and_retries'] = '{act}/{fe}/{be}/{srv}/{retries}'.format( 51 | **self.data 52 | ) 53 | self.data['queues'] = '{queue_server}/{queue_backend}'.format(**self.data) 54 | 55 | log_line = self.line_format.format(**self.data) 56 | return Line(log_line) 57 | 58 | 59 | @pytest.fixture() 60 | def default_line_data(): 61 | return DEFAULT_DATA 62 | 63 | 64 | @pytest.fixture() 65 | def line_factory(): 66 | # queues and headers parameters are together because if no headers are 67 | # saved the field is completely empty and thus there is no double space 68 | # between queue backend and http request. 69 | raw_line = ( 70 | '{syslog_date} {process_name_and_pid} {client_ip_and_port} ' 71 | '[{accept_date}] {server_names} {timers} {status_and_bytes} ' 72 | '- - ---- {connections_and_retries} {queues}{headers} ' 73 | '"{http_request}"' 74 | ) 75 | generator = LinesGenerator(raw_line) 76 | return generator 77 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | from haproxy.utils import date_str_to_datetime 4 | from haproxy.utils import delta_str_to_timedelta 5 | from haproxy.utils import VALID_COMMANDS 6 | from haproxy.utils import VALID_FILTERS 7 | from haproxy.utils import validate_arg_date 8 | from haproxy.utils import validate_arg_delta 9 | 10 | import pytest 11 | 12 | 13 | @pytest.mark.parametrize( 14 | ('text', 'expected'), 15 | [ 16 | ('45s', timedelta(seconds=45)), 17 | ('2m', timedelta(minutes=2)), 18 | ('13h', timedelta(hours=13)), 19 | ('2d', timedelta(days=2)), 20 | ], 21 | ) 22 | def test_str_to_timedelta(text, expected): 23 | """Check that deltas are converted to timedelta objects.""" 24 | assert delta_str_to_timedelta(text) == expected 25 | 26 | 27 | @pytest.mark.parametrize( 28 | ('text', 'expected'), 29 | [ 30 | ('04/Jan/2013', datetime(2013, 1, 4)), 31 | ('13/May/2015:13', datetime(2015, 5, 13, 13)), 32 | ('22/Jun/2017:12:11', datetime(2017, 6, 22, 12, 11)), 33 | ('29/Aug/2019:10:09:08', datetime(2019, 8, 29, 10, 9, 8)), 34 | ], 35 | ) 36 | def test_str_to_datetime(text, expected): 37 | """Check that start are converted to datetime objects.""" 38 | assert date_str_to_datetime(text) == expected 39 | 40 | 41 | @pytest.mark.parametrize('cmd_key', [*VALID_COMMANDS]) 42 | def test_valid_commands(cmd_key): 43 | """Check that the commands' information is complete.""" 44 | cmd_data = VALID_COMMANDS[cmd_key] 45 | assert cmd_data['klass'] 46 | assert cmd_data['klass'].command_line_name() == cmd_key 47 | assert cmd_data['description'] 48 | assert cmd_data['description'].startswith(f'{cmd_key}:\n\t') 49 | 50 | 51 | @pytest.mark.parametrize('filter_key', [*VALID_FILTERS]) 52 | def test_valid_filters(filter_key): 53 | """Check that the filters' information is complete.""" 54 | filter_data = VALID_FILTERS[filter_key] 55 | assert filter_data['obj'] 56 | assert filter_data['obj'].__name__ == f'filter_{filter_key}' 57 | assert filter_data['description'] 58 | assert filter_data['description'].startswith(f'{filter_key}:\n\t') 59 | 60 | 61 | @pytest.mark.parametrize(('value', 'expected'), [('', None), ('30/Dec/2019', True)]) 62 | def test_validate_date(value, expected): 63 | """Check that the date is validated or an exception raised.""" 64 | if expected is None: 65 | with pytest.raises(ValueError, match='--start argument is not valid'): 66 | validate_arg_date(value) 67 | else: 68 | assert validate_arg_date(value) is None 69 | 70 | 71 | @pytest.mark.parametrize(('value', 'expected'), [('', None), ('3d', True)]) 72 | def test_validate_delta(value, expected): 73 | """Check that the delta is validated or an exception raised.""" 74 | if expected is None: 75 | with pytest.raises(ValueError, match='--delta argument is not valid'): 76 | validate_arg_delta(value) 77 | 78 | else: 79 | assert validate_arg_delta(value) is None 80 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | from haproxy.main import create_parser 2 | from haproxy.main import main 3 | from haproxy.main import parse_arguments 4 | from haproxy.utils import VALID_COMMANDS 5 | from haproxy.utils import VALID_FILTERS 6 | 7 | import pytest 8 | import sys 9 | 10 | 11 | PY310_OR_HIGHER = sys.version_info[1] > 9 12 | 13 | 14 | @pytest.fixture() 15 | def default_arguments(): 16 | """Return all the expected arguments the main function expects.""" 17 | return { 18 | 'start': None, 19 | 'delta': None, 20 | 'log': 'tests/files/small.log', 21 | 'commands': ['counter'], 22 | 'negate_filter': None, 23 | 'filters': None, 24 | 'list_commands': False, 25 | 'list_filters': False, 26 | 'json': False, 27 | 'invalid_lines': False, 28 | } 29 | 30 | 31 | @pytest.mark.parametrize( 32 | ('switch', 'listing'), 33 | [('list-filters', VALID_FILTERS), ('list-commands', VALID_COMMANDS)], 34 | ) 35 | def test_list_filters_and_commands(capsys, switch, listing): 36 | """Test that one can request the filters/commands to be listed.""" 37 | parser = create_parser() 38 | data = parse_arguments(parser.parse_args([f'--{switch}'])) 39 | argument = switch.replace('-', '_') 40 | for key in data: 41 | expected = None 42 | if key == argument: 43 | expected = True 44 | assert data[key] is expected 45 | main(data) 46 | output_text = capsys.readouterr().out 47 | for name in listing: 48 | assert f'{name}:\n\t' in output_text 49 | 50 | 51 | def test_show_help(capsys): 52 | """Check that the help is shown if no arguments are given.""" 53 | parser = create_parser() 54 | data = parse_arguments(parser.parse_args([])) 55 | main(data) 56 | output_text = capsys.readouterr().out 57 | if PY310_OR_HIGHER: 58 | assert 'options:' in output_text 59 | else: 60 | assert 'optional arguments:' in output_text 61 | assert '--list-filters ' in output_text 62 | assert '--list-commands ' in output_text 63 | 64 | 65 | def test_main(capsys, default_arguments): 66 | """Check that the main function works as expected with default arguments.""" 67 | main(default_arguments) 68 | output_text = capsys.readouterr().out 69 | assert 'COUNTER\n=======\n9' in output_text 70 | 71 | 72 | def test_main_with_filter(capsys, default_arguments): 73 | """Check that the filters are applied as expected.""" 74 | default_arguments['filters'] = [ 75 | ('server', 'instance1'), 76 | ] 77 | main(default_arguments) 78 | output_text = capsys.readouterr().out 79 | assert 'COUNTER\n=======\n4' in output_text 80 | 81 | 82 | def test_main_negate_filter(capsys, default_arguments): 83 | """Check that filters can be reversed.""" 84 | default_arguments['filters'] = [ 85 | ('server', 'instance1'), 86 | ] 87 | default_arguments['negate_filter'] = True 88 | main(default_arguments) 89 | output_text = capsys.readouterr().out 90 | assert 'COUNTER\n=======\n5' in output_text 91 | 92 | 93 | def test_print_no_output(capsys, default_arguments): 94 | """Check that the print header is not shown.""" 95 | default_arguments['commands'] = ['print'] 96 | main(default_arguments) 97 | output_text = capsys.readouterr().out 98 | assert 'PRINT\n=====' not in output_text 99 | 100 | 101 | def test_json_output(capsys, default_arguments): 102 | """Check that the JSON switch is used and JSON output is printed.""" 103 | default_arguments['json'] = True 104 | main(default_arguments) 105 | output_text = capsys.readouterr().out 106 | assert 'COUNTER\n=======\n9' not in output_text 107 | assert '{"COUNTER": 9}' in output_text 108 | -------------------------------------------------------------------------------- /src/haproxy/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | 4 | import re 5 | 6 | 7 | DELTA_REGEX = re.compile(r'\A(?P\d+)(?P[smhd])\Z') 8 | 9 | START_REGEX = re.compile( 10 | r'(?P\d+)/(?P\w+)/(?P\d+)' 11 | r'(:(?P\d+)|)(:(?P\d+)|)(:(?P\d+)|)' 12 | ) 13 | 14 | DELTA_KEYS = {'s': 'seconds', 'm': 'minutes', 'h': 'hours', 'd': 'days'} 15 | 16 | 17 | def date_str_to_datetime(date): 18 | """Convert a string to a datetime object. 19 | 20 | The format is `day/month/year[[[:hour]:minute]:second]` being: 21 | - day a number 22 | - month a three letter representation of the month (i.e. Dec, Jan, etc) 23 | - year as a 4 digits value 24 | - hour/minute/second as 2 digits value, each of them being optional 25 | """ 26 | matches = START_REGEX.match(date) 27 | data = matches.group('day'), matches.group('month'), matches.group('year') 28 | raw_date_input = f'{data[0]}/{data[1]}/{data[2]}' 29 | date_format = '%d/%b/%Y' 30 | for variable, percent in (('hour', ':%H'), ('minute', ':%M'), ('second', ':%S')): 31 | match = matches.group(variable) 32 | if match: 33 | date_format += percent 34 | raw_date_input = f'{raw_date_input}:{match}' 35 | 36 | return datetime.strptime(raw_date_input, date_format) 37 | 38 | 39 | def delta_str_to_timedelta(delta): 40 | """Convert a string to a timedelta representation. 41 | 42 | Format is NUMBER followed by one of the following letters: `s`, `m`, `h`, `d`. 43 | Each of them meaning, second, minute, hour and day. 44 | """ 45 | matches = DELTA_REGEX.match(delta) 46 | value = int(matches.group('value')) 47 | time_unit = matches.group('time_unit') 48 | key = DELTA_KEYS[time_unit] 49 | return timedelta(**{key: value}) 50 | 51 | 52 | def validate_arg_date(start): 53 | """Check that date argument is valid.""" 54 | try: 55 | date_str_to_datetime(start) 56 | except (AttributeError, ValueError): 57 | raise ValueError('--start argument is not valid') 58 | 59 | 60 | def validate_arg_delta(delta): 61 | """Check that the delta argument is valid.""" 62 | try: 63 | delta_str_to_timedelta(delta) 64 | except (AttributeError, ValueError): 65 | raise ValueError('--delta argument is not valid') 66 | 67 | 68 | def list_filters(): 69 | """Return the information of existing filters. 70 | 71 | Data returned: 72 | - their names as the user is expected to use them from the command line 73 | - the object itself 74 | - its description 75 | """ 76 | from haproxy import filters 77 | 78 | data = {} 79 | for full_name in dir(filters): 80 | if not full_name.startswith('filter_'): 81 | continue 82 | name = full_name[7:] 83 | obj = getattr(filters, full_name) 84 | 85 | description = _strip_description(obj.__doc__) 86 | data[name] = {'obj': obj, 'description': f'{name}:\n\t{description}'} 87 | return data 88 | 89 | 90 | def list_commands(): 91 | """Return the information of existing commands. 92 | 93 | Data returned: 94 | - their names as the user is expected to use them from the command line 95 | - the object itself 96 | - its description 97 | """ 98 | from haproxy import commands 99 | 100 | data = {} 101 | for cmd in dir(commands): 102 | if cmd.endswith('Mixin'): 103 | continue 104 | klass = getattr(commands, cmd) 105 | try: 106 | name = klass.command_line_name() 107 | except AttributeError: 108 | continue 109 | 110 | description = _strip_description(klass.__doc__) 111 | data[name] = {'klass': klass, 'description': f'{name}:\n\t{description}'} 112 | return data 113 | 114 | 115 | def _strip_description(raw_text): 116 | if not raw_text: 117 | return '' 118 | text = '\n\t'.join([line.strip() for line in raw_text.split('\n') if line.strip()]) 119 | return text 120 | 121 | 122 | VALID_COMMANDS = list_commands() 123 | VALID_FILTERS = list_filters() 124 | -------------------------------------------------------------------------------- /tests/test_log_file.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from haproxy.logfile import Log 3 | 4 | import pytest 5 | 6 | 7 | def test_logfile_default_values(): 8 | """Check that the default values are set.""" 9 | log_file = Log('something') 10 | assert log_file.logfile == 'something' 11 | assert log_file.show_invalid is False 12 | assert log_file.invalid_lines == 0 13 | assert log_file.valid_lines == 0 14 | assert log_file.total_lines == 0 15 | assert log_file.start is None 16 | assert log_file.end is None 17 | 18 | 19 | @pytest.mark.parametrize( 20 | ('start_str', 'start_obj', 'delta', 'end_obj'), 21 | [ 22 | (None, None, None, None), 23 | (None, None, '3d', None), 24 | ('12/Dec/2019', datetime(2019, 12, 12), None, None), 25 | ('12/Dec/2019', datetime(2019, 12, 12), '3d', datetime(2019, 12, 15)), 26 | ], 27 | ) 28 | def test_start_and_end_attributes(start_str, start_obj, delta, end_obj): 29 | """Check that the start and end of attributes of Log objects are set as expected.""" 30 | log_file = Log('something', start=start_str, delta=delta) 31 | assert log_file.logfile == 'something' 32 | assert log_file.invalid_lines == 0 33 | assert log_file.start == start_obj 34 | assert log_file.end == end_obj 35 | 36 | 37 | @pytest.mark.parametrize('accept_date', ['09/Dec/2013:12:59:46.633', None]) 38 | def test_lines_validity(tmp_path, line_factory, accept_date): 39 | """Check that lines are either counted as valid or invalid.""" 40 | file_path = tmp_path / 'haproxy.log' 41 | line = '' 42 | if accept_date: 43 | line = line_factory(accept_date=accept_date).raw_line 44 | with open(file_path, 'w') as file_obj: 45 | file_obj.write(f'{line}\n') 46 | log_file = Log(file_path) 47 | _ = list(log_file) 48 | 49 | assert log_file.total_lines == 1 50 | if accept_date: 51 | assert log_file.valid_lines == 1 52 | assert log_file.invalid_lines == 0 53 | else: 54 | assert log_file.valid_lines == 0 55 | assert log_file.invalid_lines == 1 56 | 57 | 58 | @pytest.mark.parametrize( 59 | ('accept_date', 'start', 'delta', 'is_valid'), 60 | [ 61 | # valid line and no time frame, returned 62 | ('09/Dec/2013:12:59:46.633', None, None, True), 63 | # invalid line, not returned 64 | (None, None, None, False), 65 | # valid line before time frame, not returned 66 | ('09/Dec/2013:12:59:46.633', '09/Dec/2014', None, False), 67 | # valid line after time frame, not returned 68 | ('09/Dec/2013:12:59:46.633', '08/Dec/2012', '3d', False), 69 | # valid line within time frame, returned 70 | ('09/Dec/2013:12:59:46.633', '08/Dec/2013', '3d', True), 71 | ], 72 | ) 73 | def test_returned_lines(tmp_path, line_factory, accept_date, start, delta, is_valid): 74 | """Check that lines are only returned if they are valid AND within the time frame.""" 75 | file_path = tmp_path / 'haproxy.log' 76 | line = '' 77 | if accept_date: 78 | line = line_factory(accept_date=accept_date).raw_line 79 | with open(file_path, 'w') as file_obj: 80 | file_obj.write(f'{line}\n') 81 | log_file = Log(file_path, start=start, delta=delta) 82 | lines = list(log_file) 83 | assert bool(len(lines)) is is_valid 84 | 85 | 86 | def test_total_lines(): 87 | """Check that the total amount of lines are always counted.""" 88 | log_file = Log(logfile='tests/files/2_ok_1_invalid.log') 89 | _ = list(log_file) 90 | assert log_file.total_lines == 3 91 | assert log_file.valid_lines == 2 92 | assert log_file.invalid_lines == 1 93 | 94 | 95 | @pytest.mark.parametrize('headers', [' {1.2.3.4}', 'random-value-that-breaks']) 96 | def test_print_invalid_lines(tmp_path, line_factory, headers, capsys): 97 | """Check that invalid lines are printed, if asked to do so.""" 98 | file_path = tmp_path / 'haproxy.log' 99 | line = line_factory(headers=headers).raw_line 100 | with open(file_path, 'w') as file_obj: 101 | file_obj.write(f'{line}\n') 102 | log_file = Log(file_path, show_invalid=True) 103 | _ = list(log_file) 104 | 105 | output = capsys.readouterr().out 106 | if log_file.valid_lines == 1: 107 | assert headers not in output 108 | else: 109 | assert headers in output 110 | -------------------------------------------------------------------------------- /src/haproxy/filters.py: -------------------------------------------------------------------------------- 1 | def filter_ip(ip): 2 | """Filter by IP. 3 | 4 | -f ip[192.168.1.2] # will return only lines that have this IP. 5 | 6 | Either the client IP, or, if present, the first IP captured 7 | in the X-Forwarded-For header. 8 | """ 9 | 10 | def filter_func(log_line): 11 | return log_line.ip == ip 12 | 13 | return filter_func 14 | 15 | 16 | def filter_ip_range(ip_range): 17 | """Filter by an IP range. 18 | 19 | -f ip_range[192.168.1] 20 | 21 | Rather than proper IP ranges, is a string matching. 22 | See `ip` filter about which IP is being. 23 | """ 24 | 25 | def filter_func(log_line): 26 | ip = log_line.ip 27 | if ip: 28 | return ip.startswith(ip_range) 29 | 30 | return filter_func 31 | 32 | 33 | def filter_path(path): 34 | """Filter by the request path. 35 | 36 | -f path[/one/two] 37 | 38 | It looks for the given path to be part of the requested path. 39 | """ 40 | 41 | def filter_func(log_line): 42 | return path in log_line.http_request_path 43 | 44 | return filter_func 45 | 46 | 47 | def filter_ssl(ignore=True): 48 | """Filter by SSL connection. 49 | 50 | -f ssl 51 | 52 | It checks that the request is made via the standard https port. 53 | """ 54 | 55 | def filter_func(log_line): 56 | return log_line.is_https 57 | 58 | return filter_func 59 | 60 | 61 | def filter_slow_requests(slowness): 62 | """Filter by response time. 63 | 64 | -f slow_requests[1000] # get all lines that took more than a second to process 65 | 66 | Filters by the time it took the downstream server to process the request. 67 | Time is in milliseconds. 68 | """ 69 | 70 | def filter_func(log_line): 71 | slowness_int = int(slowness) 72 | return slowness_int <= log_line.time_wait_response 73 | 74 | return filter_func 75 | 76 | 77 | def filter_wait_on_queues(max_waiting): 78 | """Filter by queue time in HAProxy. 79 | 80 | -f wait_on_queues[1000] # get all requests that waited more than a second in HAProxy 81 | 82 | Filters by the time a request had to wait in HAProxy 83 | prior to be sent to a downstream server to be processed. 84 | """ 85 | 86 | def filter_func(log_line): 87 | waiting = int(max_waiting) 88 | return waiting <= log_line.time_wait_queues 89 | 90 | return filter_func 91 | 92 | 93 | def filter_status_code(http_status): 94 | """Filter by a specific HTTP status code. 95 | 96 | -f status_code[404] 97 | """ 98 | 99 | def filter_func(log_line): 100 | return log_line.status_code == http_status 101 | 102 | return filter_func 103 | 104 | 105 | def filter_status_code_family(family_number): 106 | """Filter by a family of HTTP status code. 107 | 108 | -f status_code_family[5] # get all 5xx status codes 109 | """ 110 | 111 | def filter_func(log_line): 112 | return log_line.status_code.startswith(family_number) 113 | 114 | return filter_func 115 | 116 | 117 | def filter_http_method(http_method): 118 | """Filter by HTTP method (GET, POST, PUT, HEAD...). 119 | 120 | -f http_method[GET] 121 | """ 122 | 123 | def filter_func(log_line): 124 | return log_line.http_request_method == http_method 125 | 126 | return filter_func 127 | 128 | 129 | def filter_backend(backend_name): 130 | """Filter by HAProxy backend. 131 | 132 | -f backend[specific_app] 133 | 134 | See HAProxy configuration, it can have multiple backends defined. 135 | """ 136 | 137 | def filter_func(log_line): 138 | return log_line.backend_name == backend_name 139 | 140 | return filter_func 141 | 142 | 143 | def filter_frontend(frontend_name): 144 | """Filter by which HAProxy frontend got the request. 145 | 146 | -f frontend[loadbalancer] 147 | 148 | See HAProxy configuration, it can have multiple frontends defined. 149 | """ 150 | 151 | def filter_func(log_line): 152 | return log_line.frontend_name == frontend_name 153 | 154 | return filter_func 155 | 156 | 157 | def filter_server(server_name): 158 | """Filter by downstream server. 159 | 160 | -f server[app01] 161 | """ 162 | 163 | def filter_func(log_line): 164 | return log_line.server_name == server_name 165 | 166 | return filter_func 167 | 168 | 169 | def filter_response_size(size): 170 | """Filter by how big (in bytes) the response was. 171 | 172 | -f response_size[50000] 173 | 174 | Specially useful when looking for big file downloads. 175 | """ 176 | if size.startswith('+'): 177 | size_value = int(size[1:]) 178 | else: 179 | size_value = int(size) 180 | 181 | def filter_func(log_line): 182 | bytes_read = log_line.bytes_read 183 | if bytes_read.startswith('+'): 184 | bytes_read = int(bytes_read[1:]) 185 | else: 186 | bytes_read = int(bytes_read) 187 | 188 | return bytes_read >= size_value 189 | 190 | return filter_func 191 | -------------------------------------------------------------------------------- /tests/test_argparse.py: -------------------------------------------------------------------------------- 1 | from haproxy.main import create_parser 2 | from haproxy.main import parse_arg_filters 3 | from haproxy.main import parse_arguments 4 | 5 | import pytest 6 | 7 | 8 | def test_parser_arguments_defaults(): 9 | """Test that the argument parsing defaults works.""" 10 | parser = create_parser() 11 | data = parse_arguments(parser.parse_args([])) 12 | assert data == { 13 | 'start': None, 14 | 'delta': None, 15 | 'commands': None, 16 | 'filters': None, 17 | 'negate_filter': None, 18 | 'log': None, 19 | 'list_commands': None, 20 | 'list_filters': None, 21 | 'json': False, 22 | 'invalid_lines': False, 23 | } 24 | 25 | 26 | @pytest.mark.parametrize( 27 | ('argument', 'option'), 28 | [ 29 | ('--list-commands', 'list_commands'), 30 | ('--list-filters', 'list_filters'), 31 | ('--negate-filter', 'negate_filter'), 32 | ('-n', 'negate_filter'), 33 | ('--json', 'json'), 34 | ], 35 | ) 36 | def test_parser_boolean_arguments(argument, option): 37 | """Test that the argument parsing defaults works.""" 38 | parser = create_parser() 39 | data = parse_arguments(parser.parse_args([argument])) 40 | assert data[option] is True 41 | 42 | 43 | @pytest.mark.parametrize( 44 | ('start', 'delta'), [('30/Dec/2019', '3d'), ('20/Jun/2015', '2h')] 45 | ) 46 | def test_arguments_dates(start, delta): 47 | """Check that properly formatted start and delta arguments are processed fine. 48 | 49 | Thus they are extracted and stored for later use. 50 | """ 51 | parser = create_parser() 52 | data = parse_arguments(parser.parse_args(['-s', start, '-d', delta])) 53 | assert data['start'] == start 54 | assert data['delta'] == delta 55 | 56 | 57 | @pytest.mark.parametrize('start', ['33/Dec/2019', '5/Hallo/2019']) 58 | def test_arguments_date_invalid(start): 59 | """Incorrectly formatted start argument raises an exception.""" 60 | parser = create_parser() 61 | with pytest.raises(ValueError, match='--start argument is not valid'): 62 | parse_arguments(parser.parse_args(['-s', start])) 63 | 64 | 65 | @pytest.mark.parametrize('delta', ['3P', '2323MM']) 66 | def test_arguments_delta_invalid(delta): 67 | """Incorrectly formatted delta argument raises an exception.""" 68 | parser = create_parser() 69 | with pytest.raises(ValueError, match='--delta argument is not valid'): 70 | parse_arguments(parser.parse_args(['-d', delta])) 71 | 72 | 73 | @pytest.mark.parametrize( 74 | ('cmds', 'is_valid'), 75 | [ 76 | ('counter', True), 77 | ('counter,ip_counter', True), 78 | ('ip_counter,count_data', False), 79 | ('count_data', False), 80 | ], 81 | ) 82 | def test_commands_arguments(cmds, is_valid): 83 | """Test that the commands are parsed, and an exception raised otherwise.""" 84 | parser = create_parser() 85 | if not is_valid: 86 | with pytest.raises(ValueError, match='is not available. Use --list-commands'): 87 | parse_arguments(parser.parse_args(['-c', cmds])) 88 | else: 89 | data = parse_arguments(parser.parse_args(['-c', cmds])) 90 | assert data['commands'] == cmds.split(',') 91 | 92 | 93 | @pytest.mark.parametrize( 94 | ('filters_list', 'is_valid'), 95 | [ 96 | ('ip_range', True), 97 | ('slow_requests,backend', True), 98 | ('tomatoes', False), 99 | ('slow_requests,potatoes', False), 100 | ], 101 | ) 102 | def test_filters_arguments(filters_list, is_valid): 103 | """Test that the filters are parsed, and an exception raised otherwise.""" 104 | parser = create_parser() 105 | if not is_valid: 106 | with pytest.raises(ValueError, match='is not available. Use --list-filters'): 107 | parse_arguments(parser.parse_args(['-f', filters_list])) 108 | else: 109 | data = parse_arguments(parser.parse_args(['-f', filters_list])) 110 | assert data['filters'] == [(x, None) for x in filters_list.split(',')] 111 | 112 | 113 | @pytest.mark.parametrize( 114 | ('filter_expression', 'expected'), 115 | [ 116 | ('ip_range', [('ip_range', None)]), 117 | ('ip_rangelala]', None), 118 | ('ip_range[lala]', [('ip_range', 'lala')]), 119 | ], 120 | ) 121 | def test_filters_with_arguments(filter_expression, expected): 122 | """Check that the arguments given to the filters are parsed properly. 123 | 124 | Or raise and exception otherwise. 125 | """ 126 | if expected is None: 127 | with pytest.raises(ValueError, match='It is missing an opening square bracket'): 128 | parse_arg_filters(filter_expression) 129 | else: 130 | data = parse_arg_filters(filter_expression) 131 | assert data == expected 132 | 133 | 134 | @pytest.mark.parametrize( 135 | ('filename', 'is_valid'), 136 | [ 137 | ('tests/conftest.py', True), 138 | ('tests/non-existing-file.py', False), 139 | ], 140 | ) 141 | def test_log_argument(filename, is_valid): 142 | """Check that the argument parsing validates that the file exists.""" 143 | parser = create_parser() 144 | if is_valid: 145 | data = parse_arguments(parser.parse_args(['-l', filename])) 146 | assert data['log'] == filename 147 | else: 148 | with pytest.raises(ValueError, match=f'{filename} does not exist'): 149 | parse_arguments(parser.parse_args(['-l', filename])) 150 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. -*- coding: utf-8 -*- 2 | 3 | HAProxy log analyzer 4 | ==================== 5 | This Python package is a `HAProxy`_ log parser. 6 | It analyzes HAProxy log files in multiple ways (see commands section below). 7 | 8 | .. note:: 9 | Currently only the `HTTP log format`_ is supported. 10 | 11 | Tests and coverage 12 | ------------------ 13 | No project is trustworthy if does not have tests and a decent coverage! 14 | 15 | .. image:: https://github.com/gforcada/haproxy_log_analysis/actions/workflows/tests.yml/badge.svg?branch=master 16 | :target: https://github.com/gforcada/haproxy_log_analysis/actions/workflows/tests.yml 17 | 18 | .. image:: https://coveralls.io/repos/github/gforcada/haproxy_log_analysis/badge.svg?branch=master 19 | :target: https://coveralls.io/github/gforcada/haproxy_log_analysis?branch=master 20 | 21 | 22 | Documentation 23 | ------------- 24 | See the `documentation and API`_ at ReadTheDocs_. 25 | 26 | Command-line interface 27 | ---------------------- 28 | The current ``--help`` looks like this:: 29 | 30 | usage: haproxy_log_analysis [-h] [-l LOG] [-s START] [-d DELTA] [-c COMMAND] 31 | [-f FILTER] [-n] [--list-commands] 32 | [--list-filters] [--json] 33 | 34 | Analyze HAProxy log files and outputs statistics about it 35 | 36 | optional arguments: 37 | -h, --help show this help message and exit 38 | -l LOG, --log LOG HAProxy log file to analyze 39 | -s START, --start START 40 | Process log entries starting at this time, in HAProxy 41 | date format (e.g. 11/Dec/2013 or 42 | 11/Dec/2013:19:31:41). At least provide the 43 | day/month/year. Values not specified will use their 44 | base value (e.g. 00 for hour). Use in conjunction with 45 | -d to limit the number of entries to process. 46 | -d DELTA, --delta DELTA 47 | Limit the number of entries to process. Express the 48 | time delta as a number and a time unit, e.g.: 1s, 10m, 49 | 3h or 4d (for 1 second, 10 minutes, 3 hours or 4 50 | days). Use in conjunction with -s to only analyze 51 | certain time delta. If no start time is given, the 52 | time on the first line will be used instead. 53 | -c COMMAND, --command COMMAND 54 | List of commands, comma separated, to run on the log 55 | file. See --list-commands to get a full list of them. 56 | -f FILTER, --filter FILTER 57 | List of filters to apply on the log file. Passed as 58 | comma separated and parameters within square brackets, 59 | e.g ip[192.168.1.1],ssl,path[/some/path]. See --list- 60 | filters to get a full list of them. 61 | -n, --negate-filter Make filters passed with -f work the other way around, 62 | i.e. if the ``ssl`` filter is passed instead of 63 | showing only ssl requests it will show non-ssl 64 | traffic. If the ``ip`` filter is used, then all but 65 | that ip passed to the filter will be used. 66 | --list-commands Lists all commands available. 67 | --list-filters Lists all filters available. 68 | --json Output results in json. 69 | --invalid Print the lines that could not be parsed. Be aware 70 | that mixing it with the print command will mix their 71 | output. 72 | 73 | 74 | Commands 75 | -------- 76 | 77 | Commands are small purpose specific programs in themselves that report specific statistics about the log file being analyzed. 78 | See them all with ``--list-commands`` or online at https://haproxy-log-analyzer.readthedocs.io/modules.html#module-haproxy.commands. 79 | 80 | - ``average_response_time`` 81 | - ``average_waiting_time`` 82 | - ``connection_type`` 83 | - ``counter`` 84 | - ``http_methods`` 85 | - ``ip_counter`` 86 | - ``print`` 87 | - ``queue_peaks`` 88 | - ``request_path_counter`` 89 | - ``requests_per_hour`` 90 | - ``requests_per_minute`` 91 | - ``server_load`` 92 | - ``slow_requests`` 93 | - ``slow_requests_counter`` 94 | - ``status_codes_counter`` 95 | - ``top_ips`` 96 | - ``top_request_paths`` 97 | 98 | Filters 99 | ------- 100 | Filters, contrary to commands, 101 | are a way to reduce the amount of log lines to be processed. 102 | 103 | .. note:: 104 | The ``-n`` command line argument allows to reverse filters output. 105 | 106 | This helps when looking for specific traces, like a certain IP, a path... 107 | 108 | See them all with ``--list-filters`` or online at https://haproxy-log-analyzer.readthedocs.io/modules.html#module-haproxy.filters. 109 | 110 | - ``backend`` 111 | - ``frontend`` 112 | - ``http_method`` 113 | - ``ip`` 114 | - ``ip_range`` 115 | - ``path`` 116 | - ``response_size`` 117 | - ``server`` 118 | - ``slow_requests`` 119 | - ``ssl`` 120 | - ``status_code`` 121 | - ``status_code_family`` 122 | - ``wait_on_queues`` 123 | 124 | Installation 125 | ------------ 126 | After installation you will have a console script `haproxy_log_analysis`:: 127 | 128 | $ pip install haproxy_log_analysis 129 | 130 | TODO 131 | ---- 132 | - add more commands: *(help appreciated)* 133 | 134 | - reports on servers connection time 135 | - reports on termination state 136 | - reports around connections (active, frontend, backend, server) 137 | - *your ideas here* 138 | 139 | - think of a way to show the commands output in a meaningful way 140 | 141 | - be able to specify an output format. For any command that makes sense (slow 142 | requests for example) output the given fields for each log line (i.e. 143 | acceptance date, path, downstream server, load at that time...) 144 | 145 | - *your ideas* 146 | 147 | .. _HAProxy: http://haproxy.1wt.eu/ 148 | .. _HTTP log format: http://cbonte.github.io/haproxy-dconv/2.2/configuration.html#8.2.3 149 | .. _documentation and API: https://haproxy-log-analyzer.readthedocs.io/ 150 | .. _ReadTheDocs: http://readthedocs.org 151 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/HAProxyloganalyzer.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/HAProxyloganalyzer.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/HAProxyloganalyzer" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/HAProxyloganalyzer" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /tests/test_log_line.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | 4 | import pytest 5 | 6 | 7 | NOW = datetime.now() 8 | TWO_DAYS_AGO = NOW - timedelta(days=2) 9 | IN_TWO_DAYS = NOW + timedelta(days=2) 10 | 11 | 12 | def test_default_values(line_factory, default_line_data): 13 | line = line_factory() 14 | 15 | assert line.client_ip == default_line_data['client_ip'] 16 | assert line.client_port == default_line_data['client_port'] 17 | 18 | assert line.raw_accept_date in default_line_data['accept_date'] 19 | 20 | assert line.frontend_name == default_line_data['frontend_name'] 21 | assert line.backend_name == default_line_data['backend_name'] 22 | assert line.server_name == default_line_data['server_name'] 23 | 24 | assert line.time_wait_request == default_line_data['tq'] 25 | assert line.time_wait_queues == default_line_data['tw'] 26 | assert line.time_connect_server == default_line_data['tc'] 27 | assert line.time_wait_response == default_line_data['tr'] 28 | assert line.total_time == default_line_data['tt'] 29 | 30 | assert line.status_code == default_line_data['status'] 31 | assert line.bytes_read == default_line_data['bytes'] 32 | 33 | assert line.connections_active == default_line_data['act'] 34 | assert line.connections_frontend == default_line_data['fe'] 35 | assert line.connections_backend == default_line_data['be'] 36 | assert line.connections_server == default_line_data['srv'] 37 | assert line.retries == default_line_data['retries'] 38 | 39 | assert line.queue_server == default_line_data['queue_server'] 40 | assert line.queue_backend == default_line_data['queue_backend'] 41 | 42 | assert line.captured_request_headers == default_line_data['headers'].strip()[1:-1] 43 | assert line.captured_response_headers is None 44 | 45 | assert line.raw_http_request == default_line_data['http_request'] 46 | 47 | assert line.is_valid 48 | 49 | 50 | def test_unused_values(line_factory): 51 | line = line_factory() 52 | assert line.captured_request_cookie is None 53 | assert line.captured_response_cookie is None 54 | assert line.termination_state is None 55 | 56 | 57 | def test_datetime_value(line_factory): 58 | line = line_factory() 59 | assert isinstance(line.accept_date, datetime) 60 | 61 | 62 | def test_http_request_values(line_factory): 63 | method = 'PUT' 64 | path = '/path/to/my/image' 65 | protocol = 'HTTP/2.0' 66 | line = line_factory(http_request=f'{method} {path} {protocol}') 67 | assert line.http_request_method == method 68 | assert line.http_request_path == path 69 | assert line.http_request_protocol == protocol 70 | 71 | 72 | def test_invalid_line(line_factory): 73 | line = line_factory(bytes='wroooong') 74 | assert not line.is_valid 75 | 76 | 77 | def test_no_captured_headers(line_factory): 78 | """A log line without captured headers is still valid.""" 79 | line = line_factory(headers='') 80 | assert line.is_valid 81 | 82 | 83 | def test_request_and_response_captured_headers(line_factory): 84 | """Request and response headers captured are parsed correctly.""" 85 | request_headers = '{something}' 86 | response_headers = '{something_else}' 87 | line = line_factory(headers=f' {request_headers} {response_headers}') 88 | assert line.is_valid 89 | assert f'{{{line.captured_request_headers}}}' == request_headers 90 | assert f'{{{line.captured_response_headers}}}' == response_headers 91 | 92 | 93 | def test_request_is_https_valid(line_factory): 94 | """Check that if a log line contains the SSL port on it, is reported 95 | as a https connection. 96 | """ 97 | line = line_factory(http_request='GET /domain:443/to/image HTTP/1.1') 98 | assert line.is_https 99 | 100 | 101 | def test_request_is_https_false(line_factory): 102 | """Check that if a log line does not contains the SSL port on it, is 103 | not reported as a https connection. 104 | """ 105 | line = line_factory(http_request='GET /domain:80/to/image HTTP/1.1') 106 | assert not line.is_https 107 | 108 | 109 | def test_request_is_front_page(line_factory): 110 | """Check that if a request is for the front page the request path is 111 | correctly stored. 112 | """ 113 | line = line_factory(http_request='GET / HTTP/1.1') 114 | assert line.http_request_path == '/' 115 | 116 | 117 | @pytest.mark.parametrize( 118 | 'process', 119 | [ 120 | 'ip-192-168-1-1 haproxy[28029]:', 121 | 'dvd-ctrl1 haproxy[403100]:', 122 | 'localhost.localdomain haproxy[2345]:', 123 | ], 124 | ) 125 | def test_process_names(line_factory, process): 126 | """Checks that different styles of process names are handled correctly.""" 127 | line = line_factory(process_name_and_pid=process) 128 | assert line.is_valid is True 129 | 130 | 131 | def test_unparseable_http_request(line_factory): 132 | line = line_factory(http_request='something') 133 | assert line.http_request_method == 'invalid' 134 | assert line.http_request_path == 'invalid' 135 | assert line.http_request_protocol == 'invalid' 136 | 137 | 138 | def test_truncated_requests(line_factory): 139 | """Check that truncated requests are still valid. 140 | 141 | That would be requests that do not have the protocol part specified. 142 | """ 143 | line = line_factory(http_request='GET /') 144 | assert line.http_request_method == 'GET' 145 | assert line.http_request_path == '/' 146 | assert line.http_request_protocol is None 147 | 148 | 149 | @pytest.mark.parametrize( 150 | 'syslog', 151 | [ 152 | # nixos format 153 | '2017-07-06T14:29:39+02:00', 154 | # regular format 155 | 'Dec 9 13:01:26', 156 | ], 157 | ) 158 | def test_syslog(line_factory, syslog): 159 | """Check that the timestamp at the beginning are parsed. 160 | 161 | We support different syslog formats, NixOS style and the one on other Linux. 162 | """ 163 | line = line_factory(syslog_date=syslog) 164 | assert line.is_valid is True 165 | 166 | 167 | def test_ip_from_headers(line_factory): 168 | """Check that the IP from the captured headers takes precedence.""" 169 | line = line_factory(headers=' {1.2.3.4}') 170 | assert line.ip == '1.2.3.4' 171 | 172 | 173 | @pytest.mark.parametrize( 174 | 'ip', 175 | ['1.2.3.4', '1.2.3.4, 2.3.4.5', '1.2.3.4,2.3.4.5,5.4.3.2'], 176 | ) 177 | def test_only_first_ip_from_headers(line_factory, ip): 178 | """Check that if there are multiple IPs, only the first one is used.""" 179 | line = line_factory(headers=f' {{{ip}}}') 180 | assert line.ip == '1.2.3.4' 181 | 182 | 183 | @pytest.mark.parametrize( 184 | 'ip', 185 | ['127.1.2.7', '1.127.230.47', 'fe80::9379:c29e:6701:cef8', 'fe80::9379:c29e::'], 186 | ) 187 | def test_ip_from_client_ip(line_factory, ip): 188 | """Check that if there is no IP on the captured headers, the client IP is used.""" 189 | line = line_factory(headers='', client_ip=ip) 190 | assert line.ip == ip 191 | 192 | 193 | @pytest.mark.parametrize( 194 | ('start', 'end', 'result'), 195 | [ 196 | (None, None, True), 197 | (TWO_DAYS_AGO, None, True), 198 | (IN_TWO_DAYS, None, False), 199 | (TWO_DAYS_AGO, IN_TWO_DAYS, True), 200 | (TWO_DAYS_AGO, TWO_DAYS_AGO, False), 201 | ], 202 | ) 203 | def test_is_within_timeframe(line_factory, start, end, result): 204 | """Check that a line is within a given time frame.""" 205 | line = line_factory(accept_date=NOW.strftime('%d/%b/%Y:%H:%M:%S.%f')) 206 | assert line.is_within_time_frame(start, end) is result 207 | -------------------------------------------------------------------------------- /tests/test_filters.py: -------------------------------------------------------------------------------- 1 | from haproxy import filters 2 | 3 | import pytest 4 | 5 | 6 | @pytest.mark.parametrize( 7 | ('to_filter', 'to_check', 'result'), 8 | [ 9 | ('1.2.3.4', '1.2.3.4', True), 10 | ('2.3.4.5', '5.3.5.4', False), 11 | ('2001:db8::8a2e:370:7334', '2001:db8::8a2e:370:7334', True), 12 | ('2001:db8::8a2e:370:7334', '2001:db8::8a2e:456:7321', False), 13 | ], 14 | ) 15 | def test_filter_ip(line_factory, to_filter, to_check, result): 16 | """Check that filter_ip filter works as expected.""" 17 | current_filter = filters.filter_ip(to_filter) 18 | headers = f' {{{to_check}}}' 19 | line = line_factory(headers=headers) 20 | assert current_filter(line) is result 21 | 22 | 23 | @pytest.mark.parametrize( 24 | ('to_filter', 'to_check', 'result'), 25 | [ 26 | ('1.2.3', '1.2.3.4', True), 27 | ('1.2.3', '1.2.3.78', True), 28 | ('2.3.4.5', '5.3.5.4', False), 29 | ('2001:db8', '2001:db8::8a2e:370:7334', True), 30 | ('2001:db8', '2001:db8::8a2e:456:7321', True), 31 | ('2134:db8', '2001:db8::8a2e:456:7321', False), 32 | ], 33 | ) 34 | def test_filter_ip_range(line_factory, to_filter, to_check, result): 35 | """Check that filter_ip_range filter works as expected.""" 36 | current_filter = filters.filter_ip_range(to_filter) 37 | headers = f' {{{to_check}}}' 38 | line = line_factory(headers=headers) 39 | assert current_filter(line) is result 40 | 41 | 42 | @pytest.mark.parametrize( 43 | ('path', 'result'), 44 | [ 45 | ('/path/to/image', True), 46 | ('/something/else', False), 47 | ('/another/image/here', True), 48 | ], 49 | ) 50 | def test_filter_path(line_factory, path, result): 51 | """Check that filter_path filter works as expected.""" 52 | current_filter = filters.filter_path('/image') 53 | http_request = f'GET {path} HTTP/1.1' 54 | line = line_factory(http_request=http_request) 55 | assert current_filter(line) is result 56 | 57 | 58 | @pytest.mark.parametrize( 59 | ('path', 'result'), 60 | [ 61 | ('/ssl_path:443/image', True), 62 | ('/something/else', False), 63 | ('/another:443/ssl', True), 64 | ], 65 | ) 66 | def test_filter_ssl(line_factory, path, result): 67 | """Check that filter_path filter works as expected.""" 68 | current_filter = filters.filter_ssl() 69 | http_request = f'GET {path} HTTP/1.1' 70 | line = line_factory(http_request=http_request) 71 | assert current_filter(line) is result 72 | 73 | 74 | @pytest.mark.parametrize(('tr', 'result'), [(45, False), (13000, True), (4566, False)]) 75 | def test_filter_slow_requests(line_factory, tr, result): 76 | """Check that filter_slow_requests filter works as expected.""" 77 | current_filter = filters.filter_slow_requests('10000') 78 | line = line_factory(tr=tr) 79 | assert current_filter(line) is result 80 | 81 | 82 | @pytest.mark.parametrize(('tw', 'result'), [(45, False), (13000, True), (4566, True)]) 83 | def test_filter_wait_on_queues(line_factory, tw, result): 84 | """Check that filter_wait_on_queues filter works as expected""" 85 | current_filter = filters.filter_wait_on_queues('50') 86 | line = line_factory(tw=tw) 87 | assert current_filter(line) is result 88 | 89 | 90 | @pytest.mark.parametrize( 91 | ('to_filter', 'to_check', 'result'), 92 | [ 93 | ('200', '200', True), 94 | ('200', '230', False), 95 | ('300', '300', True), 96 | ('300', '400', False), 97 | ], 98 | ) 99 | def test_filter_status_code(line_factory, to_filter, to_check, result): 100 | """Test that the status_code filter works as expected.""" 101 | current_filter = filters.filter_status_code(to_filter) 102 | line = line_factory(status=to_check) 103 | assert current_filter(line) is result 104 | 105 | 106 | @pytest.mark.parametrize( 107 | ('to_filter', 'to_check', 'result'), 108 | [ 109 | ('2', '200', True), 110 | ('2', '230', True), 111 | ('2', '300', False), 112 | ('3', '300', True), 113 | ('3', '330', True), 114 | ('3', '400', False), 115 | ], 116 | ) 117 | def test_filter_status_code_family(line_factory, to_filter, to_check, result): 118 | """Test that the status_code_family filter works as expected.""" 119 | current_filter = filters.filter_status_code_family(to_filter) 120 | line = line_factory(status=to_check) 121 | assert current_filter(line) is result 122 | 123 | 124 | @pytest.mark.parametrize( 125 | ('to_filter', 'to_check', 'result'), 126 | [ 127 | ('GET', 'GET', True), 128 | ('GET', 'POST', False), 129 | ('GET', 'PUT', False), 130 | ('GET', 'PATCH', False), 131 | ('GET', 'DELETE', False), 132 | ('PATCH', 'PATCH', True), 133 | ('DELETE', 'DELETE', True), 134 | ], 135 | ) 136 | def test_filter_http_method(line_factory, to_filter, to_check, result): 137 | """Test that the http_method filter works as expected.""" 138 | current_filter = filters.filter_http_method(to_filter) 139 | line = line_factory(http_request=f'{to_check} /path HTTP/1.1') 140 | assert current_filter(line) is result 141 | 142 | 143 | @pytest.mark.parametrize( 144 | ('to_filter', 'to_check', 'result'), 145 | [ 146 | ('default', 'default', True), 147 | ('default', 'backend', False), 148 | ('backend', 'backend', True), 149 | ('backend', 'default', False), 150 | ], 151 | ) 152 | def test_filter_backend(line_factory, to_filter, to_check, result): 153 | """Test that the backend filter works as expected.""" 154 | current_filter = filters.filter_backend(to_filter) 155 | line = line_factory(backend_name=to_check) 156 | assert current_filter(line) is result 157 | 158 | 159 | @pytest.mark.parametrize( 160 | ('to_filter', 'to_check', 'result'), 161 | [ 162 | ('varnish', 'varnish', True), 163 | ('varnish', 'nginx', False), 164 | ('nginx', 'nginx', True), 165 | ('nginx', 'varnish', False), 166 | ], 167 | ) 168 | def test_filter_frontend(line_factory, to_filter, to_check, result): 169 | """Test that the frontend filter works as expected.""" 170 | current_filter = filters.filter_frontend(to_filter) 171 | line = line_factory(frontend_name=to_check) 172 | assert current_filter(line) is result 173 | 174 | 175 | @pytest.mark.parametrize( 176 | ('to_filter', 'to_check', 'result'), 177 | [ 178 | ('server1', 'server1', True), 179 | ('server1', 'backend23', False), 180 | ('backend23', 'backend23', True), 181 | ('backend23', 'server1', False), 182 | ], 183 | ) 184 | def test_filter_server(line_factory, to_filter, to_check, result): 185 | """Test that the server filter works as expected.""" 186 | current_filter = filters.filter_server(to_filter) 187 | line = line_factory(server_name=to_check) 188 | assert current_filter(line) is result 189 | 190 | 191 | @pytest.mark.parametrize( 192 | ('to_filter', 'to_check', 'result'), 193 | [ 194 | ('400', '500', True), 195 | ('400', '+500', True), 196 | ('+400', '500', True), 197 | ('+400', '+500', True), 198 | ('400', '300', False), 199 | ('400', '+300', False), 200 | ('+400', '300', False), 201 | ('+400', '+300', False), 202 | ], 203 | ) 204 | def test_filter_response_size(line_factory, to_filter, to_check, result): 205 | """Test that the size filter works as expected. 206 | 207 | Note that both filter and value can have a leading plus sign. 208 | """ 209 | current_filter = filters.filter_response_size(to_filter) 210 | line = line_factory(bytes=to_check) 211 | assert current_filter(line) is result 212 | -------------------------------------------------------------------------------- /tests/test_regex.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from haproxy.line import HAPROXY_LINE_REGEX 3 | from haproxy.line import HTTP_REQUEST_REGEX 4 | 5 | import pytest 6 | import random 7 | 8 | 9 | def test_default_values(line_factory, default_line_data): 10 | """Check that the default line with default values is parsed.""" 11 | line = line_factory() 12 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 13 | assert matches.group('http_request') == default_line_data['http_request'] 14 | 15 | 16 | def test_client_ip_and_port(line_factory): 17 | """Check that the client IP and port are extracted correctly.""" 18 | ip = '192.168.0.250' 19 | port = '34' 20 | line = line_factory(client_ip=ip, client_port=port) 21 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 22 | 23 | assert matches.group('client_ip') == ip 24 | assert matches.group('client_port') == port 25 | 26 | 27 | def test_accept_date(line_factory): 28 | """Check that the accept date is extracted correctly.""" 29 | accept_date = datetime.now().strftime('%d/%b/%Y:%H:%M:%S.%f') 30 | line = line_factory(accept_date=accept_date) 31 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 32 | 33 | assert matches.group('accept_date') == accept_date 34 | 35 | 36 | def test_server_names(line_factory): 37 | """Check that the server names are extracted correctly.""" 38 | frontend_name = 'SomeThing4' 39 | backend_name = 'Another1' 40 | server_name = 'Cloud9' 41 | line = line_factory( 42 | frontend_name=frontend_name, backend_name=backend_name, server_name=server_name 43 | ) 44 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 45 | 46 | assert matches.group('frontend_name') == frontend_name 47 | assert matches.group('backend_name') == backend_name 48 | assert matches.group('server_name') == server_name 49 | 50 | 51 | @pytest.mark.parametrize( 52 | ('tq', 'tw', 'tc', 'tr', 'tt'), 53 | [ 54 | ('0', '0', '0', '0', '0'), 55 | ('23', '55', '3', '4', '5'), 56 | ('-23', '-33', '-3', '-4', '5'), 57 | ('23', '33', '3', '4', '+5'), 58 | ], 59 | ) 60 | def test_timers(line_factory, tq, tw, tc, tr, tt): 61 | """Check that the timers are extracted correctly. 62 | 63 | Note that all timers can be negative but `tt`, 64 | and that `tt` is the only one that can have a positive sign. 65 | """ 66 | line = line_factory(tq=tq, tw=tw, tc=tc, tr=tr, tt=tt) 67 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 68 | 69 | assert matches.group('tq') == tq 70 | assert matches.group('tw') == tw 71 | assert matches.group('tc') == tc 72 | assert matches.group('tr') == tr 73 | assert matches.group('tt') == tt 74 | 75 | 76 | @pytest.mark.parametrize( 77 | ('status', 'bytes_read'), [('200', '0'), ('-301', '543'), ('200', '+543')] 78 | ) 79 | def test_status_and_bytes(line_factory, status, bytes_read): 80 | """Check that the status code and bytes are extracted correctly. 81 | 82 | Note that `status` can be negative (for terminated requests), 83 | and `bytes` can be prefixed with a plus sign. 84 | """ 85 | line = line_factory(status=status, bytes=bytes_read) 86 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 87 | 88 | assert matches.group('status_code') == status 89 | assert matches.group('bytes_read') == bytes_read 90 | 91 | 92 | @pytest.mark.parametrize( 93 | ('act', 'fe', 'be', 'srv', 'retries'), 94 | [ 95 | ('0', '0', '0', '0', '0'), 96 | ('40', '10', '11', '12', '14'), 97 | ('40', '10', '11', '12', '+14'), 98 | ], 99 | ) 100 | def test_connections_and_retries(line_factory, act, fe, be, srv, retries): 101 | """Check that the connections and retries are extracted correctly. 102 | 103 | Note that `retries` might have a plus sign prefixed. 104 | """ 105 | line = line_factory(act=act, fe=fe, be=be, srv=srv, retries=retries) 106 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 107 | 108 | assert matches.group('act') == act 109 | assert matches.group('fe') == fe 110 | assert matches.group('be') == be 111 | assert matches.group('srv') == srv 112 | assert matches.group('retries') == retries 113 | 114 | 115 | @pytest.mark.parametrize(('server', 'backend'), [('0', '0'), ('200', '200')]) 116 | def test_queues(line_factory, server, backend): 117 | """Check that the server and backend queues are extracted correctly.""" 118 | line = line_factory(queue_server=server, queue_backend=backend) 119 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 120 | 121 | assert matches.group('queue_server') == server 122 | assert matches.group('queue_backend') == backend 123 | 124 | 125 | @pytest.mark.parametrize( 126 | ('request_header', 'response_header'), 127 | [ 128 | ('', ''), 129 | ('something', None), 130 | ('something here', 'and there'), 131 | ('multiple | request | headers', 'and | multiple | response ones'), 132 | ], 133 | ) 134 | def test_captured_headers(line_factory, request_header, response_header): 135 | """Check that captured headers are extracted correctly.""" 136 | if response_header: 137 | headers = f' {{{request_header}}} {{{response_header}}}' 138 | else: 139 | headers = f' {{{request_header}}}' 140 | line = line_factory(headers=headers) 141 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 142 | 143 | if response_header: 144 | assert matches.group('request_headers') == request_header 145 | assert matches.group('response_headers') == response_header 146 | else: 147 | assert matches.group('headers') == request_header 148 | assert matches.group('request_headers') is None 149 | assert matches.group('response_headers') is None 150 | 151 | 152 | def test_http_request(line_factory): 153 | """Check that the HTTP request is extracted correctly.""" 154 | http_request = 'something in the air' 155 | line = line_factory(http_request=http_request) 156 | matches = HAPROXY_LINE_REGEX.match(line.raw_line) 157 | 158 | assert matches.group('http_request') == http_request 159 | 160 | 161 | @pytest.mark.parametrize( 162 | 'path', 163 | [ 164 | '/path/to/image', 165 | '/path/with/port:80', # with port 166 | '/path/with/example.com', # with domain 167 | '/path/to/article#section', # with anchor 168 | '/article?hello=world&goodbye=lennin', # with parameters 169 | '/article-with-dashes_and_underscores', # dashes and underscores 170 | '/redirect_to?http://example.com', # double slashes 171 | '/@@funny', # at sign 172 | '/something%20encoded', # percent sign 173 | '/++adding++is+always+fun', # plus sign 174 | '/here_or|here', # vertical bar 175 | '/here~~~e', # tilde sign 176 | '/here_*or', # asterisk sign 177 | '/something;or-not', # colon 178 | '/something-important!probably', # exclamation mark 179 | '/something$important', # dollar sign 180 | "/there's-one's-way-or-another's" # single quote sign 181 | '/there?la=as,is', # comma 182 | '/here_or(here)', # parenthesis 183 | '/here_or[here]', # square brackets 184 | '/georg}von{grote/\\', # curly brackets 185 | '/here_or<', # less than 186 | '/here_or>', # more than 187 | '/georg-von-grote/\\', # back slash 188 | '/georg`von´grote/\\', # diacritics 189 | '/georg`von^grote/\\', # caret 190 | ], 191 | ) 192 | def test_http_request_regex(path): 193 | """Test that the method/path/protocol are extracted properly from the HTTP request.""" 194 | verbs = ('GET', 'POST', 'DELETE', 'PATCH', 'PUT') 195 | protocols = ( 196 | 'HTTP/1.0', 197 | 'HTTP/1.1', 198 | 'HTTP/2.0', 199 | ) 200 | method = random.choice(verbs) 201 | protocol = random.choice(protocols) 202 | matches = HTTP_REQUEST_REGEX.match(f'{method} {path} {protocol}') 203 | assert matches.group('method') == method 204 | assert matches.group('path') == path 205 | assert matches.group('protocol') == protocol 206 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | 6.0.0a5 (unreleased) 5 | -------------------- 6 | 7 | - Nothing changed yet. 8 | 9 | 10 | 6.0.0a4 (2023-11-25) 11 | -------------------- 12 | 13 | - More GHA automation fixes. 14 | [gforcada] 15 | 16 | 6.0.0a3 (2023-11-25) 17 | -------------------- 18 | 19 | - Play with gh command line tool. 20 | [gforcada] 21 | 22 | 6.0.0a2 (2023-11-12) 23 | -------------------- 24 | 25 | - Test again a release. 26 | [gforcada] 27 | 28 | 6.0.0a1 (2023-11-12) 29 | -------------------- 30 | 31 | - Make listing of commands and filters easier to read. 32 | [gforcada] 33 | 34 | - Improve the filters' and commands' descriptions, 35 | with ready to use examples. 36 | [gforcada] 37 | 38 | - Switch logic of `wait_on_queues` filter, 39 | count lines that are above the filter, 40 | e.g. the lines that took more than the specified time. 41 | [gforcada] 42 | 43 | - move code to a `src` folder 44 | [gforcada] 45 | 46 | - drop `pkg_resources` usage, default to native namespaces 47 | [gforcada] 48 | 49 | - switch to `pyproject.toml` and remove `setup.py` 50 | [gforcada] 51 | 52 | - use `tox` 53 | [gforcada] 54 | 55 | - use `pre-commit` 56 | [gforcada] 57 | 58 | - soft drop python 3.7 (it's EOL, and we stop testing against it) 59 | [gforcada] 60 | 61 | - Pin dependencies installed in `tox.ini` 62 | [gforcada] 63 | 64 | - Add support for Python 3.12 65 | [gforcada] 66 | 67 | - Automatically create GitHub releases with GitHub Actions. 68 | [gforcada] 69 | 70 | 5.1.0 (2022-12-03) 71 | ------------------ 72 | 73 | - Only get the first IP from `X-Forwarded-For` header. 74 | [gforcada] 75 | 76 | - Improve tests robustness. 77 | [gforcada] 78 | 79 | - Fix `top_ips` and `top_request_paths` commands output. 80 | They were showing all output, rather than only the top 10. 81 | [gforcada] 82 | 83 | - Move `tests` folder to the top-level. 84 | [gforcada] 85 | 86 | 5.0.0 (2022-11-27) 87 | ------------------ 88 | 89 | - Drop testing on travis-ci. 90 | [gforcada] 91 | 92 | - Use GitHub Actions. 93 | [gforcada] 94 | 95 | - Format the code with `pyupgrade`, `black` and `isort`. 96 | [gforcada] 97 | 98 | - Use `pip-tools` to keep dependencies locked. 99 | [gforcada] 100 | 101 | - Bump python versions supported to 3.7-3.11 and pypy. 102 | [gforcada] 103 | 104 | - Drop python 3.6 (EOL). 105 | [gforcada] 106 | 107 | 4.1.0 (2020-01-06) 108 | ------------------ 109 | 110 | - **New command:** ``requests_per_hour``. 111 | Just like the ``requests_per_minute`` but with hour granularity. 112 | Idea and first implementation done by ``valleedelisle``. 113 | [gforcada] 114 | 115 | - Fix parsing truncated requests. 116 | Idea and first implementation by ``vixns``. 117 | [gforcada] 118 | 119 | 4.0.0 (2020-01-06) 120 | ------------------ 121 | 122 | **BREAKING CHANGES:** 123 | 124 | - Complete rewrite to use almost no memory usage even on huge files. 125 | [gforcada] 126 | 127 | - Add parallelization to make parsing faster by parsing multiple lines in parallel. 128 | [gforcada] 129 | 130 | - Rename command ``counter_slow_requests`` to ``slow_requests_counter``, 131 | so it is aligned with all other ``_counter`` commands. 132 | [gforcada] 133 | 134 | - Changed the ``counter_invalid`` command to a new command line switch ``--invalid``. 135 | [gforcada] 136 | 137 | **Regular changes:** 138 | 139 | - Drop Python 2 support, and test on Python 3.8. 140 | [gforcada] 141 | 142 | - Remove the pickling support. 143 | [gforcada] 144 | 145 | - Add `--json` output command line option. 146 | [valleedelisle] 147 | 148 | 3.0.0 (2019-06-10) 149 | ------------------ 150 | 151 | - Fix spelling. 152 | [EdwardBetts] 153 | 154 | - Make ip_counter use client_ip per default. 155 | [vixns] 156 | 157 | - Overhaul testing environment. Test on python 3.7 as well. Use black to format. 158 | [gforcada] 159 | 160 | 2.1 (2017-07-06) 161 | ---------------- 162 | - Enforce QA checks (flake8) on code. 163 | All code has been updated to follow it. 164 | [gforcada] 165 | 166 | - Support Python 3.6. 167 | [gforcada] 168 | 169 | - Support different syslog timestamps (at least NixOS). 170 | [gforcada] 171 | 172 | 2.0.2 (2016-11-17) 173 | ------------------ 174 | 175 | - Improve performance for ``cmd_print``. 176 | [kevinjqiu] 177 | 178 | 2.0.1 (2016-10-29) 179 | ------------------ 180 | 181 | - Allow hostnames to have a dot in it. 182 | [gforcada] 183 | 184 | 2.0 (2016-07-06) 185 | ---------------- 186 | - Handle unparsable HTTP requests. 187 | [gforcada] 188 | 189 | - Only test on python 2.7 and 3.5 190 | [gforcada] 191 | 192 | 2.0b0 (2016-04-18) 193 | ------------------ 194 | - Check the divisor before doing a division to not get ``ZeroDivisionError`` exceptions. 195 | [gforcada] 196 | 197 | 2.0a0 (2016-03-29) 198 | ------------------ 199 | - Major refactoring: 200 | 201 | # Rename modules and classes: 202 | 203 | - haproxy_logline -> line 204 | - haproxy_logfile -> logfile 205 | - HaproxyLogLine -> Line 206 | - HaproxyLogFile -> Log 207 | 208 | # Parse the log file on Log() creation (i.e. in its __init__) 209 | 210 | [gforcada] 211 | 212 | 1.3 (2016-03-29) 213 | ---------------- 214 | 215 | - New filter: ``filter_wait_on_queues``. 216 | Get all requests that waited at maximum X amount of milliseconds on HAProxy queues. 217 | [gforcada] 218 | 219 | - Code/docs cleanups and add code analysis. 220 | [gforcada] 221 | 222 | - Avoid using eval. 223 | [gforcada] 224 | 225 | 1.2.1 (2016-02-23) 226 | ------------------ 227 | 228 | - Support -1 as a status_code 229 | [Christopher Baines] 230 | 231 | 1.2 (2015-12-07) 232 | ---------------- 233 | 234 | - Allow a hostname on the syslog part (not only IPs) 235 | [danny crasto] 236 | 237 | 1.1 (2015-04-19) 238 | ---------------- 239 | 240 | - Make syslog optional. 241 | Fixes issue https://github.com/gforcada/haproxy_log_analysis/issues/10. 242 | [gforcada] 243 | 244 | 1.0 (2015-03-24) 245 | ---------------- 246 | 247 | - Fix issue #9. 248 | log line on the syslog part was too strict, 249 | it was expecting the hostname to be a string and was 250 | failing if it was an IP. 251 | [gforcada] 252 | 253 | 0.0.3.post2 (2015-01-05) 254 | ------------------------ 255 | 256 | - Finally really fixed issue #7. 257 | ``namespace_packages`` was not meant to be on setup.py at all. 258 | Silly copy&paste mistake. 259 | [gforcada] 260 | 261 | 0.0.3.post (2015-01-04) 262 | ----------------------- 263 | 264 | - Fix release on PyPI. 265 | Solves GitHub issue #7. 266 | https://github.com/gforcada/haproxy_log_analysis/issues/7 267 | [gforcada] 268 | 269 | 0.0.3 (2014-07-09) 270 | ------------------ 271 | 272 | - Fix release on PyPI (again). 273 | [gforcada] 274 | 275 | 0.0.2 (2014-07-09) 276 | ------------------ 277 | 278 | - Fix release on PyPI. 279 | [gforcada] 280 | 281 | 0.0.1 (2014-07-09) 282 | ------------------ 283 | 284 | - Pickle :class::`.HaproxyLogFile` data for faster performance. 285 | [gforcada] 286 | 287 | - Add a way to negate the filters, so that instead of being able to filter by 288 | IP, it can output all but that IP information. 289 | [gforcada] 290 | 291 | - Add lots of filters: ip, path, ssl, backend, frontend, server, status_code 292 | and so on. See ``--list-filters`` for a complete list of them. 293 | [gforcada] 294 | 295 | - Add :method::`.HaproxyLogFile.parse_data` method to get data from data stream. 296 | It allows you use it as a library. 297 | [bogdangi] 298 | 299 | - Add ``--list-filters`` argument on the command line interface. 300 | [gforcada] 301 | 302 | - Add ``--filter`` argument on the command line interface, inspired by 303 | Bogdan's early design. 304 | [bogdangi] [gforcada] 305 | 306 | - Create a new module :module::`haproxy.filters` that holds all available filters. 307 | [gforcada] 308 | 309 | - Improve :method::`.HaproxyLogFile.cmd_queue_peaks` output to not only show 310 | peaks but also when requests started to queue and when they finished and 311 | the amount of requests that had been queued. 312 | [gforcada] 313 | 314 | - Show help when no argument is given. 315 | [gforcada] 316 | 317 | - Polish documentation and docstrings here and there. 318 | [gforcada] 319 | 320 | - Add a ``--list-commands`` argument on the command line interface. 321 | [gforcada] 322 | 323 | - Generate an API doc for ``HaproxyLogLine`` and ``HaproxyLogFile``. 324 | [bogdangi] 325 | 326 | - Create a ``console_script`` `haproxy_log_analysis` for ease of use. 327 | [bogdangi] 328 | 329 | - Add Sphinx documentation system, still empty. 330 | [gforcada] 331 | 332 | - Keep valid log lines sorted so that the exact order of connections is kept. 333 | [gforcada] 334 | 335 | - Add quite a few commands, see `README.rst`_ for a complete list of them. 336 | [gforcada] 337 | 338 | - Run commands passed as arguments (with -c flag). 339 | [gforcada] 340 | 341 | - Add a requirements.txt file to keep track of dependencies and pin them. 342 | [gforcada] 343 | 344 | - Add travis_ and coveralls_ support. See its badges on `README.rst`_. 345 | [gforcada] 346 | 347 | - Add argument parsing and custom validation logic for all arguments. 348 | [gforcada] 349 | 350 | - Add regular expressions for haproxy log lines (HTTP format) and to 351 | parse HTTP requests path. 352 | Added tests to ensure they work as expected. 353 | [gforcada] 354 | 355 | - Create distribution. 356 | [gforcada] 357 | 358 | .. _travis: https://travis-ci.org/ 359 | .. _coveralls: https://coveralls.io/ 360 | .. _README.rst: http://github.com/gforcada/haproxy_log_analysis 361 | -------------------------------------------------------------------------------- /src/haproxy/main.py: -------------------------------------------------------------------------------- 1 | from haproxy.logfile import Log 2 | from haproxy.utils import VALID_COMMANDS 3 | from haproxy.utils import VALID_FILTERS 4 | from haproxy.utils import validate_arg_date 5 | from haproxy.utils import validate_arg_delta 6 | 7 | import argparse 8 | import os 9 | 10 | 11 | def create_parser(): 12 | desc = 'Analyze HAProxy log files and outputs statistics about it' 13 | parser = argparse.ArgumentParser(description=desc) 14 | 15 | parser.add_argument('-l', '--log', help='HAProxy log file to analyze') 16 | 17 | parser.add_argument( 18 | '-s', 19 | '--start', 20 | help='Process log entries starting at this time, in HAProxy date ' 21 | 'format (e.g. 11/Dec/2013 or 11/Dec/2013:19:31:41). ' 22 | 'At least provide the day/month/year. Values not specified will ' 23 | 'use their base value (e.g. 00 for hour). Use in conjunction ' 24 | 'with -d to limit the number of entries to process.', 25 | ) 26 | 27 | parser.add_argument( 28 | '-d', 29 | '--delta', 30 | help='Limit the number of entries to process. Express the time delta ' 31 | 'as a number and a time unit, e.g.: 1s, 10m, 3h or 4d (for 1 ' 32 | 'second, 10 minutes, 3 hours or 4 days). Use in conjunction with ' 33 | '-s to only analyze certain time delta. If no start time is ' 34 | 'given, the time on the first line will be used instead.', 35 | ) 36 | 37 | parser.add_argument( 38 | '-c', 39 | '--command', 40 | help='List of commands, comma separated, to run on the log file. See ' 41 | '--list-commands to get a full list of them.', 42 | ) 43 | 44 | parser.add_argument( 45 | '-f', 46 | '--filter', 47 | help='List of filters to apply on the log file. Passed as comma ' 48 | 'separated and parameters within square brackets, e.g ' 49 | 'ip[192.168.1.1],ssl,path[/some/path]. See ' 50 | '--list-filters to get a full list of them.', 51 | ) 52 | 53 | parser.add_argument( 54 | '-n', 55 | '--negate-filter', 56 | help='Make filters passed with -f work the other way around, i.e. if ' 57 | 'the ``ssl`` filter is passed instead of showing only ssl ' 58 | 'requests it will show non-ssl traffic. If the ``ip`` filter is ' 59 | 'used, then all but that ip passed to the filter will be used.', 60 | action='store_true', 61 | ) 62 | 63 | parser.add_argument( 64 | '--list-commands', action='store_true', help='Lists all commands available.' 65 | ) 66 | 67 | parser.add_argument( 68 | '--list-filters', action='store_true', help='Lists all filters available.' 69 | ) 70 | 71 | parser.add_argument('--json', action='store_true', help='Output results in json.') 72 | parser.add_argument( 73 | '--invalid', 74 | action='store_false', 75 | help='Print the lines that could not be parsed. ' 76 | 'Be aware that mixing it with the print command will mix their output.', 77 | ) 78 | 79 | return parser 80 | 81 | 82 | def parse_arguments(args): 83 | data = { 84 | 'start': None, 85 | 'delta': None, 86 | 'commands': None, 87 | 'filters': None, 88 | 'negate_filter': None, 89 | 'log': None, 90 | 'list_commands': None, 91 | 'list_filters': None, 92 | 'json': None, 93 | 'invalid_lines': None, 94 | } 95 | 96 | if args.list_commands: 97 | data['list_commands'] = True 98 | # no need to further process any other input parameter 99 | return data 100 | 101 | if args.list_filters: 102 | data['list_filters'] = True 103 | # no need to further process any other input parameter 104 | return data 105 | 106 | if args.negate_filter: 107 | data['negate_filter'] = True 108 | 109 | if args.start is not None: 110 | validate_arg_date(args.start) 111 | data['start'] = args.start 112 | 113 | if args.delta is not None: 114 | validate_arg_delta(args.delta) 115 | data['delta'] = args.delta 116 | 117 | if args.command is not None: 118 | data['commands'] = parse_arg_commands(args.command) 119 | 120 | if args.filter is not None: 121 | data['filters'] = parse_arg_filters(args.filter) 122 | 123 | if args.log is not None: 124 | _validate_arg_logfile(args.log) 125 | data['log'] = args.log 126 | 127 | if args.json is not None: 128 | data['json'] = args.json 129 | 130 | if args.invalid: 131 | data['invalid_lines'] = args.json 132 | 133 | return data 134 | 135 | 136 | def parse_arg_commands(commands_list): 137 | input_commands = commands_list.split(',') 138 | for cmd in input_commands: 139 | if cmd not in VALID_COMMANDS: 140 | raise ValueError( 141 | f'command "{cmd}" is not available. ' 142 | 'Use --list-commands to get a list of all available commands.' 143 | ) 144 | return input_commands 145 | 146 | 147 | def parse_arg_filters(filters_arg): 148 | input_filters = filters_arg.split(',') 149 | 150 | return_data = [] 151 | for filter_expression in input_filters: 152 | filter_name = filter_expression 153 | filter_arg = None 154 | 155 | if filter_expression.endswith(']'): 156 | if '[' not in filter_expression: 157 | raise ValueError( 158 | f'Error on filter "{filter_expression}". ' 159 | f'It is missing an opening square bracket.' 160 | ) 161 | filter_name, filter_arg = filter_expression.split('[') 162 | filter_arg = filter_arg[:-1] # remove the closing square bracket 163 | 164 | if filter_name not in VALID_FILTERS: 165 | raise ValueError( 166 | f'filter "{filter_name}" is not available. Use --list-filters to get a list of all available filters.' 167 | ) 168 | 169 | return_data.append((filter_name, filter_arg)) 170 | 171 | return return_data 172 | 173 | 174 | def _validate_arg_logfile(filename): 175 | filepath = os.path.join(os.getcwd(), filename) 176 | if not os.path.exists(filepath): 177 | raise ValueError(f'filename {filepath} does not exist') 178 | 179 | 180 | def print_commands(): 181 | """Prints all commands available with their description.""" 182 | for command_name in sorted(VALID_COMMANDS.keys()): 183 | print(VALID_COMMANDS[command_name]['description']) 184 | 185 | 186 | def print_filters(): 187 | """Prints all filters available with their description.""" 188 | for filter_name in sorted(VALID_FILTERS.keys()): 189 | print(VALID_FILTERS[filter_name]['description']) 190 | 191 | 192 | def show_help(data): 193 | # make sure that if no arguments are passed the help is shown 194 | show = True 195 | ignore_keys = ('log', 'json', 'negate_filter', 'invalid_lines') 196 | for key in data: 197 | if data[key] is not None and key not in ignore_keys: 198 | show = False 199 | break 200 | 201 | if show: 202 | parser = create_parser() 203 | parser.print_help() 204 | return True 205 | return False 206 | 207 | 208 | def main(args): 209 | if show_help(args): 210 | return 211 | 212 | # show the command list 213 | if args['list_commands']: 214 | print_commands() 215 | # no need to process further 216 | return 217 | 218 | # show the filter list 219 | if args['list_filters']: 220 | print_filters() 221 | # no need to process further 222 | return 223 | 224 | # initialize the log file 225 | log_file = Log( 226 | logfile=args['log'], 227 | start=args['start'], 228 | delta=args['delta'], 229 | show_invalid=args['invalid_lines'], 230 | ) 231 | 232 | # get the commands and filters to use 233 | filters_to_use = requested_filters(args) 234 | cmds_to_use = requested_commands(args) 235 | 236 | # double negation: when a user wants to negate the filters, 237 | # the argument parsing sets `negate_filter` to True, 238 | # but the filtering logic (the `all()`) returns True if the line meets all filters 239 | # so reversing whatever `negate_filter` has is what the user wants :) 240 | expected_filtering = True 241 | if args['negate_filter']: 242 | expected_filtering = False 243 | # process all log lines 244 | for line in log_file: 245 | if all(f(line) for f in filters_to_use) is expected_filtering: 246 | for cmd in cmds_to_use: 247 | cmd(line) 248 | 249 | # print the results 250 | print('\nRESULTS\n') 251 | output = None 252 | if args['json']: 253 | output = 'json' 254 | for cmd in cmds_to_use: 255 | cmd.results(output=output) 256 | 257 | 258 | def requested_filters(args): 259 | filters_list = [] 260 | if args['filters']: 261 | for filter_name, arg in args['filters']: 262 | filter_func = VALID_FILTERS[filter_name]['obj'] 263 | filters_list.append(filter_func(arg)) 264 | return filters_list 265 | 266 | 267 | def requested_commands(args): 268 | cmds_list = [] 269 | for command in args['commands']: 270 | cmd_klass = VALID_COMMANDS[command]['klass'] 271 | cmds_list.append(cmd_klass()) 272 | return cmds_list 273 | 274 | 275 | def console_script(): # pragma: no cover 276 | parser = create_parser() 277 | arguments = parse_arguments(parser.parse_args()) 278 | main(arguments) 279 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # 2 | # HAProxy log analyzer documentation build configuration file, created by 3 | # sphinx-quickstart on Thu Dec 19 00:06:54 2013. 4 | # 5 | # This file is execfile()d with the current directory set to its 6 | # containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | # If extensions (or modules to document with autodoc) are in another directory, 15 | # add these directories to sys.path here. If the directory is relative to the 16 | # documentation root, use os.path.abspath to make it absolute, like shown here. 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | # -- General configuration ------------------------------------------------ 20 | 21 | # If your documentation needs a minimal Sphinx version, state it here. 22 | # needs_sphinx = '1.0' 23 | 24 | # Add any Sphinx extension module names here, as strings. They can be 25 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 26 | # ones. 27 | extensions = [ 28 | 'sphinx.ext.autodoc', 29 | ] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The encoding of source files. 38 | # source_encoding = 'utf-8-sig' 39 | 40 | # The master toctree document. 41 | master_doc = 'index' 42 | 43 | # General information about the project. 44 | project = 'HAProxy log analyzer' 45 | copyright = '2013, Gil Forcada' # noqa: A001 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | version = '0.1' 53 | # The full version, including alpha/beta/rc tags. 54 | release = '0.1' 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | # language = None 59 | 60 | # There are two options for replacing |today|: either, you set today to some 61 | # non-false value, then it is used: 62 | # today = '' 63 | # Else, today_fmt is used as the format for a strftime call. 64 | # today_fmt = '%B %d, %Y' 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | exclude_patterns = [] 69 | 70 | # The reST default role (used for this markup: `text`) to use for all 71 | # documents. 72 | # default_role = None 73 | 74 | # If true, '()' will be appended to :func: etc. cross-reference text. 75 | # add_function_parentheses = True 76 | 77 | # If true, the current module name will be prepended to all description 78 | # unit titles (such as .. function::). 79 | # add_module_names = True 80 | 81 | # If true, sectionauthor and moduleauthor directives will be shown in the 82 | # output. They are ignored by default. 83 | # show_authors = False 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = 'sphinx' 87 | 88 | # A list of ignored prefixes for module index sorting. 89 | # modindex_common_prefix = [] 90 | 91 | # If true, keep warnings as "system message" paragraphs in the built documents. 92 | # keep_warnings = False 93 | 94 | 95 | # -- Options for HTML output ---------------------------------------------- 96 | 97 | # The theme to use for HTML and HTML Help pages. See the documentation for 98 | # a list of builtin themes. 99 | html_theme = 'default' 100 | 101 | # Theme options are theme-specific and customize the look and feel of a theme 102 | # further. For a list of options available for each theme, see the 103 | # documentation. 104 | # html_theme_options = {} 105 | 106 | # Add any paths that contain custom themes here, relative to this directory. 107 | # html_theme_path = [] 108 | 109 | # The name for this set of Sphinx documents. If None, it defaults to 110 | # " v documentation". 111 | # html_title = None 112 | 113 | # A shorter title for the navigation bar. Default is the same as html_title. 114 | # html_short_title = None 115 | 116 | # The name of an image file (relative to this directory) to place at the top 117 | # of the sidebar. 118 | # html_logo = None 119 | 120 | # The name of an image file (within the static path) to use as favicon of the 121 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 122 | # pixels large. 123 | # html_favicon = None 124 | 125 | # Add any paths that contain custom static files (such as style sheets) here, 126 | # relative to this directory. They are copied after the builtin static files, 127 | # so a file named "default.css" will overwrite the builtin "default.css". 128 | html_static_path = ['_static'] 129 | 130 | # Add any extra paths that contain custom files (such as robots.txt or 131 | # .htaccess) here, relative to this directory. These files are copied 132 | # directly to the root of the documentation. 133 | # html_extra_path = [] 134 | 135 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 136 | # using the given strftime format. 137 | # html_last_updated_fmt = '%b %d, %Y' 138 | 139 | # If true, SmartyPants will be used to convert quotes and dashes to 140 | # typographically correct entities. 141 | # html_use_smartypants = True 142 | 143 | # Custom sidebar templates, maps document names to template names. 144 | # html_sidebars = {} 145 | 146 | # Additional templates that should be rendered to pages, maps page names to 147 | # template names. 148 | # html_additional_pages = {} 149 | 150 | # If false, no module index is generated. 151 | # html_domain_indices = True 152 | 153 | # If false, no index is generated. 154 | # html_use_index = True 155 | 156 | # If true, the index is split into individual pages for each letter. 157 | # html_split_index = False 158 | 159 | # If true, links to the reST sources are added to the pages. 160 | # html_show_sourcelink = True 161 | 162 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 163 | # html_show_sphinx = True 164 | 165 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 166 | # html_show_copyright = True 167 | 168 | # If true, an OpenSearch description file will be output, and all pages will 169 | # contain a tag referring to it. The value of this option must be the 170 | # base URL from which the finished HTML is served. 171 | # html_use_opensearch = '' 172 | 173 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 174 | # html_file_suffix = None 175 | 176 | # Output file base name for HTML help builder. 177 | htmlhelp_basename = 'HAProxyloganalyzerdoc' 178 | 179 | 180 | # -- Options for LaTeX output --------------------------------------------- 181 | 182 | latex_elements = { 183 | # The paper size ('letterpaper' or 'a4paper'). 184 | # 'papersize': 'letterpaper', 185 | # The font size ('10pt', '11pt' or '12pt'). 186 | # 'pointsize': '10pt', 187 | # Additional stuff for the LaTeX preamble. 188 | # 'preamble': '', 189 | } 190 | 191 | # Grouping the document tree into LaTeX files. List of tuples 192 | # (source start file, target name, title, 193 | # author, documentclass [howto, manual, or own class]). 194 | latex_documents = [ 195 | ( 196 | 'index', 197 | 'HAProxyloganalyzer.tex', 198 | 'HAProxy log analyzer Documentation', 199 | 'Gil Forcada', 200 | 'manual', 201 | ), 202 | ] 203 | 204 | # The name of an image file (relative to this directory) to place at the top of 205 | # the title page. 206 | # latex_logo = None 207 | 208 | # For "manual" documents, if this is true, then toplevel headings are parts, 209 | # not chapters. 210 | # latex_use_parts = False 211 | 212 | # If true, show page references after internal links. 213 | # latex_show_pagerefs = False 214 | 215 | # If true, show URL addresses after external links. 216 | # latex_show_urls = False 217 | 218 | # Documents to append as an appendix to all manuals. 219 | # latex_appendices = [] 220 | 221 | # If false, no module index is generated. 222 | # latex_domain_indices = True 223 | 224 | 225 | # -- Options for manual page output --------------------------------------- 226 | 227 | # One entry per manual page. List of tuples 228 | # (source start file, name, description, authors, manual section). 229 | man_pages = [ 230 | ( 231 | 'index', 232 | 'haproxyloganalyzer', 233 | 'HAProxy log analyzer Documentation', 234 | ['Gil Forcada'], 235 | 1, 236 | ) 237 | ] 238 | 239 | # If true, show URL addresses after external links. 240 | # man_show_urls = False 241 | 242 | 243 | # -- Options for Texinfo output ------------------------------------------- 244 | 245 | # Grouping the document tree into Texinfo files. List of tuples 246 | # (source start file, target name, title, author, 247 | # dir menu entry, description, category) 248 | texinfo_documents = [ 249 | ( 250 | 'index', 251 | 'HAProxyloganalyzer', 252 | 'HAProxy log analyzer Documentation', 253 | 'Gil Forcada', 254 | 'HAProxyloganalyzer', 255 | 'One line description of project.', 256 | 'Miscellaneous', 257 | ), 258 | ] 259 | 260 | # Documents to append as an appendix to all manuals. 261 | # texinfo_appendices = [] 262 | 263 | # If false, no module index is generated. 264 | # texinfo_domain_indices = True 265 | 266 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 267 | # texinfo_show_urls = 'footnote' 268 | 269 | # If true, do not generate a @detailmenu in the "Top" node's menu. 270 | # texinfo_no_detailmenu = False 271 | -------------------------------------------------------------------------------- /src/haproxy/line.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import re 4 | 5 | 6 | # Example log line, to understand the regex below (truncated to fit into 7 | # 80 chars): 8 | # 9 | # Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:39759 10 | # [09/Dec/2013:12:59:46.633] loadbalancer default/instance8 11 | # 0/51536/1/48082/99627 200 83285 - - ---- 87/87/87/1/0 0/67 12 | # {77.24.148.74} "GET /path/to/image HTTP/1.1" 13 | 14 | HAPROXY_LINE_REGEX = re.compile( 15 | # Dec 9 13:01:26 localhost haproxy[28029]: 16 | # ignore the syslog prefix 17 | r'\A.*\]:\s+' 18 | # 127.0.0.1:39759 19 | r'(?P[a-fA-F\d+\.:]+):(?P\d+)\s+' 20 | # [09/Dec/2013:12:59:46.633] 21 | r'\[(?P.+)\]\s+' 22 | # loadbalancer default/instance8 23 | r'(?P.*)\s+(?P.*)/(?P.*)\s+' 24 | # 0/51536/1/48082/99627 25 | r'(?P-?\d+)/(?P-?\d+)/(?P-?\d+)/' 26 | r'(?P-?\d+)/(?P\+?\d+)\s+' 27 | # 200 83285 28 | r'(?P-?\d+)\s+(?P\+?\d+)\s+' 29 | # - - ---- 30 | r'.*\s+' # ignored by now, should capture cookies and termination state 31 | # 87/87/87/1/0 32 | r'(?P\d+)/(?P\d+)/(?P\d+)/' 33 | r'(?P\d+)/(?P\+?\d+)\s+' 34 | # 0/67 35 | r'(?P\d+)/(?P\d+)\s+' 36 | # {77.24.148.74} 37 | r'({(?P.*)}\s+{(?P.*)}\s+|{(?P.*)}\s+|)' 38 | # "GET /path/to/image HTTP/1.1" 39 | r'"(?P.*)"' 40 | r'\Z' # end of line 41 | ) 42 | 43 | HTTP_REQUEST_REGEX = re.compile( 44 | r'(?P\w+)\s+' 45 | r'(?P(/[`´\\<>/\w:,;.#$!?=&@%_+\'*^~|()\[\]{\}-]*)+)' 46 | r'(\s+(?P\w+/\d\.\d))?' 47 | ) 48 | 49 | 50 | class Line: 51 | """For a precise and more detailed description of every field see: 52 | http://cbonte.github.io/haproxy-dconv/2.2/configuration.html#8.2.3 53 | """ 54 | 55 | #: IP of the upstream server that made the connection to HAProxy. 56 | client_ip = None 57 | #: Port used by the upstream server that made the connection to HAProxy. 58 | client_port = None 59 | 60 | # raw string from log line and its python datetime version 61 | raw_accept_date = None 62 | #: datetime object with the exact date when the connection to HAProxy was 63 | #: made. 64 | accept_date = None 65 | 66 | #: HAProxy frontend that received the connection. 67 | frontend_name = None 68 | #: HAProxy backend that the connection was sent to. 69 | backend_name = None 70 | #: Downstream server that HAProxy send the connection to. 71 | server_name = None 72 | 73 | #: Time in milliseconds waiting the client to send the full HTTP request 74 | #: (``Tq`` in HAProxy documentation). 75 | time_wait_request = None 76 | #: Time in milliseconds that the request spend on HAProxy queues 77 | #: (``Tw`` in HAProxy documentation). 78 | time_wait_queues = None 79 | #: Time in milliseconds to connect to the final server 80 | #: (``Tc`` in HAProxy documentation). 81 | time_connect_server = None 82 | #: Time in milliseconds waiting the downstream server to send the full 83 | #: HTTP response (``Tr`` in HAProxy documentation). 84 | time_wait_response = None 85 | #: Total time in milliseconds between accepting the HTTP request and 86 | #: sending back the HTTP response (``Tt`` in HAProxy documentation). 87 | total_time = None 88 | 89 | #: HTTP status code returned to the client. 90 | status_code = None 91 | #: Total number of bytes send back to the client. 92 | bytes_read = None 93 | 94 | # not used by now 95 | captured_request_cookie = None 96 | captured_response_cookie = None 97 | 98 | # not used by now 99 | termination_state = None 100 | 101 | #: Total number of concurrent connections on the process when the 102 | #: session was logged (``actconn`` in HAProxy documentation). 103 | connections_active = None 104 | #: Total number of concurrent connections on the frontend when the 105 | #: session was logged (``feconn`` in HAProxy documentation). 106 | connections_frontend = None 107 | #: Total number of concurrent connections handled by the backend when 108 | #: the session was logged (``beconn`` in HAProxy documentation). 109 | connections_backend = None 110 | #: Total number of concurrent connections still active on the server 111 | #: when the session was logged (``srv_conn`` in HAProxy documentation). 112 | connections_server = None 113 | #: Number of connection retries experienced by this session when 114 | # trying to connect to the server. 115 | retries = None 116 | 117 | #: Total number of requests which were processed before this one in 118 | #: the server queue (``srv_queue`` in HAProxy documentation). 119 | queue_server = None 120 | #: Total number of requests which were processed before this one in 121 | #: the backend's global queue (``backend_queue`` in HAProxy documentation). 122 | queue_backend = None 123 | 124 | # List of headers captured in the request. 125 | captured_request_headers = None 126 | # List of headers captured in the response. 127 | captured_response_headers = None 128 | 129 | raw_http_request = None 130 | #: HTTP method (GET, POST...) used on this request. 131 | http_request_method = None 132 | #: Requested HTTP path. 133 | http_request_path = None 134 | #: HTTP version used on this request. 135 | http_request_protocol = None 136 | 137 | raw_line = None 138 | 139 | def __init__(self, line): 140 | self.raw_line = line 141 | 142 | self.is_valid = self._parse_line(line) 143 | 144 | @property 145 | def is_https(self): 146 | """Returns True if the log line is a SSL connection. False otherwise.""" 147 | if ':443' in self.http_request_path: 148 | return True 149 | return False 150 | 151 | def is_within_time_frame(self, start, end): 152 | if not start: 153 | return True 154 | elif start > self.accept_date: 155 | return False 156 | 157 | if not end: 158 | return True 159 | elif end < self.accept_date: 160 | return False 161 | 162 | return True 163 | 164 | @property 165 | def ip(self): 166 | """Returns the IP provided on the log line, or the client_ip if absent/empty.""" 167 | if self.captured_request_headers is not None: 168 | ip = self.captured_request_headers.split('|')[0] 169 | if ip: 170 | # only get the first IP, if there are more usually 171 | # are the intermediate servers 172 | return ip.split(',')[0] 173 | return self.client_ip 174 | 175 | def _parse_line(self, line): 176 | matches = HAPROXY_LINE_REGEX.match(line) 177 | if matches is None: 178 | return False 179 | 180 | self.client_ip = matches.group('client_ip') 181 | self.client_port = int(matches.group('client_port')) 182 | 183 | self.raw_accept_date = matches.group('accept_date') 184 | self.accept_date = self._parse_accept_date() 185 | 186 | self.frontend_name = matches.group('frontend_name') 187 | self.backend_name = matches.group('backend_name') 188 | self.server_name = matches.group('server_name') 189 | 190 | self.time_wait_request = int(matches.group('tq')) 191 | self.time_wait_queues = int(matches.group('tw')) 192 | self.time_connect_server = int(matches.group('tc')) 193 | self.time_wait_response = int(matches.group('tr')) 194 | self.total_time = matches.group('tt') 195 | 196 | self.status_code = matches.group('status_code') 197 | self.bytes_read = matches.group('bytes_read') 198 | 199 | self.connections_active = matches.group('act') 200 | self.connections_frontend = matches.group('fe') 201 | self.connections_backend = matches.group('be') 202 | self.connections_server = matches.group('srv') 203 | self.retries = matches.group('retries') 204 | 205 | self.queue_server = int(matches.group('queue_server')) 206 | self.queue_backend = int(matches.group('queue_backend')) 207 | 208 | self.captured_request_headers = matches.group('request_headers') 209 | self.captured_response_headers = matches.group('response_headers') 210 | if matches.group('headers') is not None: 211 | self.captured_request_headers = matches.group('headers') 212 | 213 | self.raw_http_request = matches.group('http_request') 214 | self._parse_http_request() 215 | 216 | return True 217 | 218 | def _parse_accept_date(self): 219 | return datetime.strptime(self.raw_accept_date, '%d/%b/%Y:%H:%M:%S.%f') 220 | 221 | def _parse_http_request(self): 222 | matches = HTTP_REQUEST_REGEX.match(self.raw_http_request) 223 | if matches: 224 | self.http_request_method = matches.group('method') 225 | self.http_request_path = matches.group('path') 226 | self.http_request_protocol = matches.group('protocol') 227 | else: 228 | self.handle_bad_http_request() 229 | 230 | def handle_bad_http_request(self): 231 | self.http_request_method = 'invalid' 232 | self.http_request_path = 'invalid' 233 | self.http_request_protocol = 'invalid' 234 | 235 | if self.raw_http_request != '': 236 | print(f'Could not process HTTP request {self.raw_http_request}') 237 | 238 | 239 | # it is not coverage covered as this is executed by the multiprocessor module, 240 | # and setting it up on coverage just for two lines is not worth it 241 | def parse_line(line): # pragma: no cover 242 | return Line(line.strip()) 243 | -------------------------------------------------------------------------------- /src/haproxy/commands.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from collections import OrderedDict 3 | from datetime import datetime 4 | 5 | import json 6 | import time 7 | 8 | 9 | class BaseCommandMixin: 10 | @classmethod 11 | def command_line_name(cls): 12 | """Convert class name to lowercase with underscores. 13 | 14 | i.e. turn HttpMethods to http_methods. 15 | """ 16 | final_string = cls.__name__[0].lower() 17 | for character in cls.__name__[1:]: 18 | if character.isupper(): 19 | final_string += f'_{character.lower()}' 20 | else: 21 | final_string += character 22 | return final_string 23 | 24 | def raw_results(self): # pragma: no cover 25 | raise NotImplementedError 26 | 27 | def json_data(self): 28 | return self.raw_results() 29 | 30 | def print_data(self): 31 | return self.raw_results() 32 | 33 | def results(self, output=None): 34 | command_name = self.command_line_name().upper() 35 | if output == 'json': 36 | results = self.json_data() 37 | print(json.dumps({command_name: results})) 38 | else: 39 | results = self.print_data() 40 | underline = '=' * len(command_name) 41 | print(f'{command_name}\n{underline}\n{results}\n') 42 | 43 | 44 | class AttributeCounterMixin: 45 | attribute_name = None 46 | 47 | def __init__(self): 48 | self.stats = defaultdict(int) 49 | 50 | def __call__(self, line): 51 | self.stats[getattr(line, self.attribute_name)] += 1 52 | 53 | def raw_results(self): 54 | return self.stats 55 | 56 | def print_data(self): 57 | result = '' 58 | data = self.raw_results() 59 | if isinstance(data, list): 60 | data = dict(data) 61 | data = sorted(data.items(), key=lambda data_info: data_info[1], reverse=True) 62 | for key, value in data: 63 | result += f'- {key}: {value}\n' 64 | return result 65 | 66 | def json_data(self): 67 | result = [] 68 | data = sorted( 69 | self.stats.items(), key=lambda data_info: data_info[1], reverse=True 70 | ) 71 | for key, value in data: 72 | result.append({key: value}) 73 | return result 74 | 75 | 76 | class SortTrimMixin: 77 | @staticmethod 78 | def _sort_and_trim(data, reverse=False): 79 | """Sorts a dictionary with at least two fields on each of them sorting 80 | by the second element. 81 | 82 | .. warning:: 83 | Right now is hardcoded to 10 elements, improve the command line 84 | interface to allow to send parameters to each command or globally. 85 | """ 86 | threshold = 10 87 | data_list = data.items() 88 | data_list = sorted( 89 | data_list, key=lambda data_info: data_info[1], reverse=reverse 90 | ) 91 | return data_list[:threshold] 92 | 93 | 94 | class Counter(BaseCommandMixin): 95 | """Count valid lines.""" 96 | 97 | def __init__(self): 98 | self.counter = 0 99 | 100 | def __call__(self, line): 101 | self.counter += 1 102 | 103 | def raw_results(self): 104 | return self.counter 105 | 106 | 107 | class HttpMethods(AttributeCounterMixin, BaseCommandMixin): 108 | """Tally all requests per HTTP method (GET/POST...).""" 109 | 110 | attribute_name = 'http_request_method' 111 | 112 | 113 | class IpCounter(AttributeCounterMixin, BaseCommandMixin): 114 | """Report a breakdown of how many requests have been made per IP. 115 | 116 | For this to work you need to configure HAProxy to capture 117 | the `X-Forwarded-For` header. 118 | """ 119 | 120 | attribute_name = 'ip' 121 | 122 | 123 | class TopIps(IpCounter, SortTrimMixin): 124 | """Return the top most frequent IPs (10 items).""" 125 | 126 | def raw_results(self): 127 | return self._sort_and_trim(self.stats, reverse=True) 128 | 129 | 130 | class StatusCodesCounter(AttributeCounterMixin, BaseCommandMixin): 131 | """Tally requests per HTTP status (404, 500...)""" 132 | 133 | attribute_name = 'status_code' 134 | 135 | 136 | class RequestPathCounter(AttributeCounterMixin, BaseCommandMixin): 137 | """Tally requests per the request's path.""" 138 | 139 | attribute_name = 'http_request_path' 140 | 141 | 142 | class TopRequestPaths(RequestPathCounter, SortTrimMixin): 143 | """Returns the top most frequent paths (10 items).""" 144 | 145 | def raw_results(self): 146 | return self._sort_and_trim(self.stats, reverse=True) 147 | 148 | 149 | class SlowRequests(BaseCommandMixin): 150 | """List all requests that are considered slow to process (1 second).""" 151 | 152 | threshold = 1000 153 | 154 | def __init__(self): 155 | self.slow_requests = [] 156 | 157 | def __call__(self, line): 158 | response_time = line.time_wait_response 159 | if response_time >= self.threshold: 160 | self.slow_requests.append(response_time) 161 | 162 | def raw_results(self): 163 | return sorted(self.slow_requests) 164 | 165 | 166 | class SlowRequestsCounter(SlowRequests): 167 | """Counts requests that are considered slow (1 second).""" 168 | 169 | def raw_results(self): 170 | return len(self.slow_requests) 171 | 172 | 173 | class AverageResponseTime(SlowRequests): 174 | """Global average response time it took downstream servers to answer requests.""" 175 | 176 | threshold = 0 177 | 178 | def raw_results(self): 179 | total_requests = float(len(self.slow_requests)) 180 | if total_requests > 0: 181 | average = sum(self.slow_requests) / total_requests 182 | return round(average, 2) 183 | return 0.0 184 | 185 | 186 | class AverageWaitingTime(BaseCommandMixin): 187 | """Return the average time valid requests wait on HAProxy before being dispatched to a backend server.""" 188 | 189 | def __init__(self): 190 | self.waiting_times = [] 191 | 192 | def __call__(self, line): 193 | waiting_time = line.time_wait_queues 194 | if waiting_time >= 0: 195 | self.waiting_times.append(waiting_time) 196 | 197 | def raw_results(self): 198 | total_requests = float(len(self.waiting_times)) 199 | if total_requests > 0: 200 | average = sum(self.waiting_times) / total_requests 201 | return round(average, 2) 202 | return 0.0 203 | 204 | 205 | class ServerLoad(AttributeCounterMixin, BaseCommandMixin): 206 | """Tally requests per downstream server.""" 207 | 208 | attribute_name = 'server_name' 209 | 210 | 211 | class QueuePeaks(BaseCommandMixin): 212 | """Give stats about queue peaks in HAProxy. 213 | 214 | When servers can not handle all incoming requests, they have to wait on HAProxy. 215 | On every log line there is an account for how many requests have been piled up. 216 | 217 | A queue peak is defined by the biggest value on the backend queue 218 | on a series of log lines that are between log lines with the queue empty. 219 | """ 220 | 221 | def __init__(self): 222 | self.requests = {} 223 | self.threshold = 1 224 | 225 | @staticmethod 226 | def _generate_key(date): 227 | """Create a suitable unique key out of a python datetime.datetime object.""" 228 | # get the unix timestamp out of the date, 229 | # after removing the microseconds from it 230 | no_microseconds = date.replace(microsecond=0) 231 | time_parts = no_microseconds.timetuple() 232 | unixtime = time.mktime(time_parts) 233 | 234 | # add back the microseconds to the key, as decimals 235 | microseconds = date.microsecond / (10 ** len(str(date.microsecond))) 236 | key = unixtime + microseconds 237 | return key 238 | 239 | def __call__(self, line): 240 | key = self._generate_key(line.accept_date) 241 | self.requests[key] = (line.queue_backend, line.accept_date) 242 | 243 | def raw_results(self): 244 | sorted_requests = OrderedDict(sorted(self.requests.items())) 245 | peaks = [] 246 | current_peak = 0 247 | requests_on_queue = 0 248 | timestamp = None 249 | 250 | current_span = 0 251 | first_with_queue = None 252 | 253 | for requests_on_queue, timestamp in sorted_requests.values(): 254 | # set the peak 255 | if requests_on_queue > current_peak: 256 | current_peak = requests_on_queue 257 | 258 | # set the span 259 | if requests_on_queue > 0: 260 | current_span += 1 261 | 262 | # set when the queue starts 263 | if first_with_queue is None: 264 | first_with_queue = timestamp 265 | 266 | # if the queue is already flushed, record it and reset values 267 | if requests_on_queue == 0 and current_peak > self.threshold: 268 | data = { 269 | 'peak': current_peak, 270 | 'span': current_span, 271 | 'started': first_with_queue, 272 | 'finished': timestamp, 273 | } 274 | peaks.append(data) 275 | current_peak = 0 276 | current_span = 0 277 | first_with_queue = None 278 | 279 | # case of a series that does not end 280 | if requests_on_queue > 0 and current_peak > self.threshold: 281 | data = { 282 | 'peak': current_peak, 283 | 'span': current_span, 284 | 'started': first_with_queue, 285 | 'finished': timestamp, 286 | } 287 | peaks.append(data) 288 | 289 | return peaks 290 | 291 | def print_data(self): 292 | data = '' 293 | for peak_info in self.raw_results(): 294 | data += f'- peak: {peak_info.get("peak")} ' # noqa: Q000 295 | data += f'- span: {peak_info.get("span")} ' # noqa: Q000 296 | data += f'- started: {peak_info.get("started").isoformat()} ' # noqa: Q000 297 | data += ( 298 | f'- finished: {peak_info.get("finished").isoformat()}\n' # noqa: Q000 299 | ) 300 | return data 301 | 302 | def json_data(self): 303 | data = self.raw_results() 304 | for peak_info in data: 305 | peak_info['started'] = peak_info['started'].isoformat() 306 | peak_info['finished'] = peak_info['finished'].isoformat() 307 | return data 308 | 309 | 310 | class ConnectionType(BaseCommandMixin): 311 | """Tally requests per their SSL usage (either yes or no). 312 | 313 | This only works if the request path contains the default port for SSL (443). 314 | """ 315 | 316 | def __init__(self): 317 | self.https = 0 318 | self.non_https = 0 319 | 320 | def __call__(self, line): 321 | if line.is_https: 322 | self.https += 1 323 | else: 324 | self.non_https += 1 325 | 326 | def raw_results(self): 327 | return self.https, self.non_https 328 | 329 | def print_data(self): 330 | https, http = self.raw_results() 331 | return f'- https: {https}\n- http: {http}' 332 | 333 | def json_data(self): 334 | https, http = self.raw_results() 335 | return [{'https': https}, {'http': http}] 336 | 337 | 338 | class RequestsPerMinute(BaseCommandMixin): 339 | """Report the count of requests per minute. 340 | 341 | Combine it with time constrains (`-s` and `-d`) otherwise the output will be long. 342 | """ 343 | 344 | def __init__(self): 345 | self.requests = defaultdict(int) 346 | 347 | def generate_key(self, accept_date): 348 | date_with_minute_precision = accept_date.replace(second=0, microsecond=0) 349 | unixtime = time.mktime(date_with_minute_precision.timetuple()) 350 | return unixtime 351 | 352 | def __call__(self, line): 353 | key = self.generate_key(line.accept_date) 354 | self.requests[key] += 1 355 | 356 | def raw_results(self): 357 | """Return the list of requests sorted by the timestamp.""" 358 | data = sorted(self.requests.items(), key=lambda data_info: data_info[0]) 359 | return data 360 | 361 | def print_data(self): 362 | data = '' 363 | for date_info, count in self.raw_results(): 364 | date = datetime.fromtimestamp(date_info).isoformat() 365 | data += f'- {date}: {count}\n' 366 | return data 367 | 368 | def json_data(self): 369 | data = [] 370 | for date_info, count in self.raw_results(): 371 | date = datetime.fromtimestamp(date_info).isoformat() 372 | data.append({date: count}) 373 | return data 374 | 375 | 376 | class RequestsPerHour(RequestsPerMinute): 377 | """Report the count of requests per hour. 378 | 379 | Combine it with time constrains (`-s` and `-d`) otherwise the output will be long. 380 | """ 381 | 382 | def generate_key(self, accept_date): 383 | date_with_hour_precision = accept_date.replace( 384 | minute=0, second=0, microsecond=0 385 | ) 386 | unixtime = time.mktime(date_with_hour_precision.timetuple()) 387 | return unixtime 388 | 389 | 390 | class Print(BaseCommandMixin): 391 | """Returns the raw lines to be printed.""" 392 | 393 | def __call__(self, line): 394 | print(line.raw_line) 395 | 396 | def raw_results(self): 397 | return 398 | 399 | def results(self, output=None): 400 | return 401 | -------------------------------------------------------------------------------- /tests/test_commands.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | from haproxy import commands 4 | 5 | import pytest 6 | 7 | 8 | def check_output(cmd, output, expected, capsys): 9 | """Validate the output of commands.""" 10 | name = cmd.command_line_name().upper() 11 | cmd.results(output=output) 12 | output_text = capsys.readouterr().out 13 | if output == 'json': 14 | assert f'{{"{name}": {expected}}}' in output_text 15 | else: 16 | assert f'{name}\n====' in output_text 17 | assert f'====\n{expected}\n' in output_text 18 | 19 | 20 | @pytest.mark.parametrize( 21 | ('klass', 'expected'), 22 | [ 23 | (commands.StatusCodesCounter, 'status_codes_counter'), 24 | (commands.AverageResponseTime, 'average_response_time'), 25 | (commands.Counter, 'counter'), 26 | (commands.IpCounter, 'ip_counter'), 27 | ], 28 | ) 29 | def test_commands_names(klass, expected): 30 | """Check that the command line name of command classes are generated correctly.""" 31 | assert klass.command_line_name() == expected 32 | 33 | 34 | def test_counter_results(): 35 | """Test the Counter command. 36 | 37 | It plain and simply counts all the lines passed to it. 38 | """ 39 | cmd = commands.Counter() 40 | assert cmd.raw_results() == 0 41 | for x in range(3): 42 | cmd(x) 43 | 44 | assert cmd.raw_results() == 3 45 | 46 | 47 | @pytest.mark.parametrize('output', [None, 'json']) 48 | def test_counter_output(capsys, output): 49 | """Test the Counter command. 50 | 51 | It plain and simply counts all the lines passed to it. 52 | """ 53 | cmd = commands.Counter() 54 | for x in range(3): 55 | cmd(x) 56 | check_output(cmd, output, 3, capsys) 57 | 58 | 59 | def test_http_methods_results(line_factory): 60 | """Test the HTTPMethods command. 61 | 62 | It creates a breakdown of how many times each HTTP verb has been used. 63 | """ 64 | cmd = commands.HttpMethods() 65 | assert cmd.raw_results() == {} 66 | for verb, count in (('POST', 4), ('GET', 3), ('PUT', 2)): 67 | line = line_factory(http_request=f'{verb} /path/to/image HTTP/1.1') 68 | for _ in range(count): 69 | cmd(line) 70 | results = cmd.raw_results() 71 | assert len(results) == 3 72 | assert results['POST'] == 4 73 | assert results['GET'] == 3 74 | assert results['PUT'] == 2 75 | 76 | 77 | @pytest.mark.parametrize( 78 | ('output', 'expected'), 79 | [(None, '- PUT: 2\n- GET: 1'), ('json', '[{"PUT": 2}, {"GET": 1}]')], 80 | ) 81 | def test_http_methods_output(line_factory, capsys, output, expected): 82 | """Test the HTTPMethods command. 83 | 84 | It creates a breakdown of how many times each HTTP verb has been used. 85 | """ 86 | cmd = commands.HttpMethods() 87 | for verb, count in (('GET', 1), ('PUT', 2)): 88 | line = line_factory(http_request=f'{verb} /path/to/image HTTP/1.1') 89 | for _ in range(count): 90 | cmd(line) 91 | check_output(cmd, output, expected, capsys) 92 | 93 | 94 | def test_ip_counter_results(line_factory): 95 | """Test the IpCounter command. 96 | 97 | It creates a breakdown of how many times each IP has been used. 98 | """ 99 | cmd = commands.IpCounter() 100 | assert cmd.raw_results() == {} 101 | for ip, count in (('192.168.0.1', 4), ('172.4.3.2', 3), ('8.7.6.5', 2)): 102 | line = line_factory(headers=f' {{{ip}}}') 103 | for _ in range(count): 104 | cmd(line) 105 | results = cmd.raw_results() 106 | assert len(results) == 3 107 | assert results['192.168.0.1'] == 4 108 | assert results['172.4.3.2'] == 3 109 | assert results['8.7.6.5'] == 2 110 | 111 | 112 | @pytest.mark.parametrize( 113 | ('output', 'expected'), 114 | [ 115 | (None, '- 172.4.3.2: 3\n- 8.7.6.5: 2'), 116 | ('json', '[{"172.4.3.2": 3}, {"8.7.6.5": 2}]'), 117 | ], 118 | ) 119 | def test_ip_counter_output(line_factory, capsys, output, expected): 120 | """Test the IpCounter command. 121 | 122 | It creates a breakdown of how many times each IP has been used. 123 | """ 124 | cmd = commands.IpCounter() 125 | for ip, count in (('172.4.3.2', 3), ('8.7.6.5', 2)): 126 | line = line_factory(headers=f' {{{ip}}}') 127 | for _ in range(count): 128 | cmd(line) 129 | check_output(cmd, output, expected, capsys) 130 | 131 | 132 | def test_top_ips_results(line_factory): 133 | """Test the TopIps command. 134 | 135 | It lists the 10 most used IPs, and how much where they used. 136 | """ 137 | cmd = commands.TopIps() 138 | assert cmd.raw_results() == [] 139 | for ip, count in ((f'192.168.0.{x}', x) for x in range(11)): 140 | line = line_factory(headers=f' {{{ip}}}') 141 | for _ in range(count): 142 | cmd(line) 143 | results = cmd.raw_results() 144 | assert len(results) == 10 145 | assert results[0] == ('192.168.0.10', 10) 146 | assert results[1] == ('192.168.0.9', 9) 147 | assert results[2] == ('192.168.0.8', 8) 148 | assert results[3] == ('192.168.0.7', 7) 149 | assert results[4] == ('192.168.0.6', 6) 150 | assert results[5] == ('192.168.0.5', 5) 151 | assert results[6] == ('192.168.0.4', 4) 152 | assert results[7] == ('192.168.0.3', 3) 153 | assert results[8] == ('192.168.0.2', 2) 154 | assert results[9] == ('192.168.0.1', 1) 155 | 156 | 157 | def test_top_ips_print_results(line_factory): 158 | """Test the TopIps command. 159 | 160 | Ensure that when they are printed, only 10 results are shown. 161 | """ 162 | cmd = commands.TopIps() 163 | for ip, count in ((f'192.168.0.{x}', x) for x in range(14)): 164 | line = line_factory(headers=f' {{{ip}}}') 165 | for _ in range(count): 166 | cmd(line) 167 | results = cmd.print_data() 168 | results = [x for x in results.split('\n') if x] 169 | assert len(results) == 10 170 | assert results[0] == '- 192.168.0.13: 13' 171 | assert results[-1] == '- 192.168.0.4: 4' 172 | 173 | 174 | @pytest.mark.parametrize( 175 | ('output', 'expected'), 176 | [ 177 | (None, '- 192.168.0.2: 2\n- 192.168.0.1: 1'), 178 | ('json', '[{"192.168.0.2": 2}, {"192.168.0.1": 1}]'), 179 | ], 180 | ) 181 | def test_top_ips_output(line_factory, capsys, output, expected): 182 | """Test the TopIps command. 183 | 184 | It lists the 10 most used IPs, and how much where they used. 185 | """ 186 | cmd = commands.TopIps() 187 | assert cmd.raw_results() == [] 188 | for ip, count in ((f'192.168.0.{x}', x) for x in range(3)): 189 | line = line_factory(headers=f' {{{ip}}}') 190 | for _ in range(count): 191 | cmd(line) 192 | check_output(cmd, output, expected, capsys) 193 | 194 | 195 | def test_status_codes_counter_results(line_factory): 196 | """Test the StatusCodesCounter command. 197 | 198 | It creates a breakdown of which status codes have been used and how many each. 199 | """ 200 | cmd = commands.StatusCodesCounter() 201 | assert cmd.raw_results() == {} 202 | for status_code, count in (('200', 4), ('301', 3), ('500', 2)): 203 | line = line_factory(status=status_code) 204 | for _ in range(count): 205 | cmd(line) 206 | results = cmd.raw_results() 207 | assert len(results) == 3 208 | assert results['200'] == 4 209 | assert results['301'] == 3 210 | assert results['500'] == 2 211 | 212 | 213 | @pytest.mark.parametrize( 214 | ('output', 'expected'), 215 | [(None, '- 301: 3\n- 500: 2'), ('json', '[{"301": 3}, {"500": 2}]')], 216 | ) 217 | def test_status_codes_counter_output(line_factory, capsys, output, expected): 218 | """Test the StatusCodesCounter command. 219 | 220 | It creates a breakdown of which status codes have been used and how many each. 221 | """ 222 | cmd = commands.StatusCodesCounter() 223 | for status_code, count in (('301', 3), ('500', 2)): 224 | line = line_factory(status=status_code) 225 | for _ in range(count): 226 | cmd(line) 227 | check_output(cmd, output, expected, capsys) 228 | 229 | 230 | def test_request_path_counter_results(line_factory): 231 | """Test the RequestPathCounter command. 232 | 233 | It creates a breakdown of how many times each URL path has been used. 234 | """ 235 | cmd = commands.RequestPathCounter() 236 | assert cmd.raw_results() == {} 237 | for path, count in (('/image/one', 4), ('/video/two', 3), ('/article/three', 2)): 238 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 239 | for _ in range(count): 240 | cmd(line) 241 | results = cmd.raw_results() 242 | assert len(results) == 3 243 | assert results['/image/one'] == 4 244 | assert results['/video/two'] == 3 245 | assert results['/article/three'] == 2 246 | 247 | 248 | @pytest.mark.parametrize( 249 | ('output', 'expected'), 250 | [ 251 | (None, '- /video/two: 3\n- /article/three: 2'), 252 | ('json', '[{"/video/two": 3}, {"/article/three": 2}]'), 253 | ], 254 | ) 255 | def test_request_path_counter_output(line_factory, capsys, output, expected): 256 | """Test the RequestPathCounter command. 257 | 258 | It creates a breakdown of how many times each URL path has been used. 259 | """ 260 | cmd = commands.RequestPathCounter() 261 | for path, count in (('/video/two', 3), ('/article/three', 2)): 262 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 263 | for _ in range(count): 264 | cmd(line) 265 | check_output(cmd, output, expected, capsys) 266 | 267 | 268 | def test_slow_requests_results(line_factory): 269 | """Test the SlowRequests command. 270 | 271 | It lists all requests that took more than 1000 milliseconds to respond. 272 | """ 273 | cmd = commands.SlowRequests() 274 | assert cmd.raw_results() == [] 275 | for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): 276 | cmd(line_factory(tr=total_time)) 277 | results = cmd.raw_results() 278 | assert results == [1000, 1003, 2013, 3200, 45000] 279 | 280 | 281 | @pytest.mark.parametrize( 282 | ('output', 'expected'), 283 | [ 284 | (None, [1000, 1003, 2013, 3200, 45000]), 285 | ('json', '[1000, 1003, 2013, 3200, 45000]'), 286 | ], 287 | ) 288 | def test_slow_requests_output(line_factory, capsys, output, expected): 289 | """Test the SlowRequests command. 290 | 291 | It lists all requests that took more than 1000 milliseconds to respond. 292 | """ 293 | cmd = commands.SlowRequests() 294 | for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): 295 | cmd(line_factory(tr=total_time)) 296 | check_output(cmd, output, expected, capsys) 297 | 298 | 299 | def test_top_request_paths_results(line_factory): 300 | """Test the TopRequestPaths command. 301 | 302 | It lists the 10 most used URL paths, and how much where they used. 303 | """ 304 | cmd = commands.TopRequestPaths() 305 | assert cmd.raw_results() == [] 306 | for path, count in ((f'/file/{x}', x) for x in range(11)): 307 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 308 | for _ in range(count): 309 | cmd(line) 310 | results = cmd.raw_results() 311 | assert len(results) == 10 312 | assert results[0] == ('/file/10', 10) 313 | assert results[1] == ('/file/9', 9) 314 | assert results[2] == ('/file/8', 8) 315 | assert results[3] == ('/file/7', 7) 316 | assert results[4] == ('/file/6', 6) 317 | assert results[5] == ('/file/5', 5) 318 | assert results[6] == ('/file/4', 4) 319 | assert results[7] == ('/file/3', 3) 320 | assert results[8] == ('/file/2', 2) 321 | assert results[9] == ('/file/1', 1) 322 | 323 | 324 | def test_top_request_paths_print_results(line_factory): 325 | """Test the TopRequestPaths command. 326 | 327 | Ensure that when they are printed, only 10 results are shown. 328 | """ 329 | cmd = commands.TopRequestPaths() 330 | for path, count in ((f'/file/{x}', x) for x in range(14)): 331 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 332 | for _ in range(count): 333 | cmd(line) 334 | results = cmd.print_data() 335 | results = [x for x in results.split('\n') if x] 336 | assert len(results) == 10 337 | assert results[0] == '- /file/13: 13' 338 | assert results[-1] == '- /file/4: 4' 339 | 340 | 341 | @pytest.mark.parametrize( 342 | ('output', 'expected'), 343 | [ 344 | (None, '- /file/2: 2\n- /file/1: 1'), 345 | ('json', '[{"/file/2": 2}, {"/file/1": 1}]'), 346 | ], 347 | ) 348 | def test_top_request_paths_output(line_factory, capsys, output, expected): 349 | """Test the TopRequestPaths command. 350 | 351 | It lists the 10 most used URL paths, and how much where they used. 352 | """ 353 | cmd = commands.TopRequestPaths() 354 | for path, count in ((f'/file/{x}', x) for x in range(3)): 355 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 356 | for _ in range(count): 357 | cmd(line) 358 | check_output(cmd, output, expected, capsys) 359 | 360 | 361 | def test_slow_requests_counter_results(line_factory): 362 | """Test the SlowRequestsCounter command. 363 | 364 | It counts how many requests took more than 1000 milliseconds to complete. 365 | """ 366 | cmd = commands.SlowRequestsCounter() 367 | assert cmd.raw_results() == 0 368 | for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): 369 | cmd(line_factory(tr=total_time)) 370 | results = cmd.raw_results() 371 | assert results == 5 372 | 373 | 374 | @pytest.mark.parametrize('output', [None, 'json']) 375 | def test_slow_requests_counter_output(line_factory, capsys, output): 376 | """Test the SlowRequestsCounter command. 377 | 378 | It counts how many requests took more than 1000 milliseconds to complete. 379 | """ 380 | cmd = commands.SlowRequestsCounter() 381 | for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): 382 | cmd(line_factory(tr=total_time)) 383 | check_output(cmd, output, 5, capsys) 384 | 385 | 386 | @pytest.mark.parametrize( 387 | ('series', 'average'), 388 | [ 389 | ((1003, 987, 456, 2013, 1000, 3200, 999), 1379.71), 390 | ((110, -1, 110), 110), # aborted connections are ignored 391 | ((45, 30, 0), 25), # responses that take 0 milliseconds are still counted 392 | ], 393 | ) 394 | def test_average_response_time_results(line_factory, series, average): 395 | """Test the AverageResponseTime command. 396 | 397 | Returns the average response time of all valid requests. 398 | """ 399 | cmd = commands.AverageResponseTime() 400 | assert cmd.raw_results() == 0.0 401 | for total_time in series: 402 | cmd(line_factory(tr=total_time)) 403 | results = cmd.raw_results() 404 | assert results == average 405 | 406 | 407 | @pytest.mark.parametrize('output', [None, 'json']) 408 | def test_average_response_time_output(line_factory, capsys, output): 409 | """Test the AverageResponseTime command. 410 | 411 | Returns the average response time of all valid requests. 412 | """ 413 | cmd = commands.AverageResponseTime() 414 | for total_time in ( 415 | 40, 416 | 30, 417 | ): 418 | cmd(line_factory(tr=total_time)) 419 | check_output(cmd, output, 35.0, capsys) 420 | 421 | 422 | @pytest.mark.parametrize( 423 | ('series', 'average'), 424 | [ 425 | ((1003, 987, 456, 2013, 1000, 3200, 999), 1379.71), 426 | ((110, -1, 110), 110), # aborted connections are ignored 427 | ((45, 30, 0), 25), # requests that do not wait at all are still counted 428 | ], 429 | ) 430 | def test_average_waiting_time_results(line_factory, series, average): 431 | """Test the AverageWaitingTime command. 432 | 433 | Returns the average time requests had to wait to get processed. 434 | """ 435 | cmd = commands.AverageWaitingTime() 436 | assert cmd.raw_results() == 0.0 437 | for wait_time in series: 438 | cmd(line_factory(tw=wait_time)) 439 | results = cmd.raw_results() 440 | assert results == average 441 | 442 | 443 | @pytest.mark.parametrize('output', [None, 'json']) 444 | def test_average_waiting_time_output(line_factory, capsys, output): 445 | """Test the AverageWaitingTime command. 446 | 447 | Returns the average time requests had to wait to get processed. 448 | """ 449 | cmd = commands.AverageWaitingTime() 450 | for wait_time in (40, 30): 451 | cmd(line_factory(tw=wait_time)) 452 | check_output(cmd, output, 35.0, capsys) 453 | 454 | 455 | def test_server_load_results(line_factory): 456 | """Test the ServerLoad command. 457 | 458 | It creates a breakdown of how many requests each server processed. 459 | """ 460 | cmd = commands.ServerLoad() 461 | assert cmd.raw_results() == {} 462 | for name, count in (('server4', 4), ('server3', 3), ('server5', 5)): 463 | line = line_factory(server_name=name) 464 | for _ in range(count): 465 | cmd(line) 466 | results = cmd.raw_results() 467 | assert len(results) == 3 468 | assert results['server5'] == 5 469 | assert results['server4'] == 4 470 | assert results['server3'] == 3 471 | 472 | 473 | @pytest.mark.parametrize( 474 | ('output', 'expected'), 475 | [ 476 | (None, '- server5: 5\n- server3: 3'), 477 | ('json', '[{"server5": 5}, {"server3": 3}]'), 478 | ], 479 | ) 480 | def test_server_load_output(line_factory, capsys, output, expected): 481 | """Test the ServerLoad command. 482 | 483 | It creates a breakdown of how many requests each server processed. 484 | """ 485 | cmd = commands.ServerLoad() 486 | for name, count in (('server3', 3), ('server5', 5)): 487 | line = line_factory(server_name=name) 488 | for _ in range(count): 489 | cmd(line) 490 | check_output(cmd, output, expected, capsys) 491 | 492 | 493 | def test_queue_peaks_no_lines_results(line_factory): 494 | """Test the QueuePeaks command. 495 | 496 | If there are no log lines processed, nothing should be returned. 497 | """ 498 | cmd = commands.QueuePeaks() 499 | assert cmd.raw_results() == [] 500 | 501 | 502 | def test_queue_peaks_no_queues(line_factory): 503 | """Test the QueuePeaks command. 504 | 505 | If there are no log lines processed, nothing should be returned. 506 | """ 507 | cmd = commands.QueuePeaks() 508 | now = datetime.now() 509 | for second in range(4): 510 | accept_date = now.replace(second=second).strftime('%d/%b/%Y:%H:%M:%S.%f') 511 | cmd(line_factory(queue_backend=0, accept_date=accept_date)) 512 | assert len(cmd.requests) == 4 513 | assert cmd.raw_results() == [] 514 | 515 | 516 | @pytest.mark.parametrize( 517 | ('date', 'expected_key'), 518 | [ 519 | ('10/Dec/2019:15:40:12.12345', 1575988812.12345), 520 | ('15/Jan/2017:05:23:05.456', 1484454185.456), 521 | ('15/Jan/2017:05:23:05.0', 1484454185.0), 522 | ], 523 | ) 524 | def test_queue_peaks_generated_keys(line_factory, date, expected_key): 525 | """Test the QueuePeaks command. 526 | 527 | Check how the keys for the requests dictionary are generated. 528 | """ 529 | cmd = commands.QueuePeaks() 530 | cmd(line_factory(queue_backend=0, accept_date=date)) 531 | keys = list(cmd.requests.keys()) 532 | # account for a 1h difference, if UTC is used (as in CI) 533 | assert expected_key - 4000 <= keys[0] <= expected_key + 4000 534 | # check that microseconds are exact though 535 | assert expected_key - int(expected_key) == keys[0] - int(keys[0]) 536 | 537 | 538 | def test_queue_peaks_details(line_factory): 539 | """Test the QueuePeaks command. 540 | 541 | Check the information returned for each peak. 542 | """ 543 | cmd = commands.QueuePeaks() 544 | for microseconds, queue in enumerate([0, 4, 7, 8, 19, 4, 0]): 545 | line = line_factory( 546 | queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' 547 | ) 548 | cmd(line) 549 | day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) 550 | results = cmd.raw_results() 551 | assert len(results) == 1 552 | peak_info = results[0] 553 | assert peak_info['peak'] == 19 554 | assert peak_info['span'] == 5 555 | assert peak_info['started'] == day.replace(microsecond=100000) 556 | assert peak_info['finished'] == day.replace(microsecond=600000) 557 | 558 | 559 | def test_queue_peaks_multiple_sorted(line_factory): 560 | """Test the QueuePeaks command. 561 | 562 | Peaks information are returned sorted by date. 563 | """ 564 | cmd = commands.QueuePeaks() 565 | for microseconds, queue in enumerate([0, 4, 0, 0, 19, 4, 0]): 566 | line = line_factory( 567 | queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' 568 | ) 569 | cmd(line) 570 | day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) 571 | results = cmd.raw_results() 572 | assert len(results) == 2 573 | assert results[0]['peak'] == 4 574 | assert results[0]['started'] == day.replace(microsecond=100000) 575 | assert results[1]['peak'] == 19 576 | assert results[1]['started'] == day.replace(microsecond=400000) 577 | 578 | 579 | def test_queue_peaks_already_started(line_factory): 580 | """Test the QueuePeaks command. 581 | 582 | Check that QueuePeaks handles the corner case of a peak that has already started. 583 | """ 584 | cmd = commands.QueuePeaks() 585 | for microseconds, queue in enumerate([4, 19, 0]): 586 | line = line_factory( 587 | queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' 588 | ) 589 | cmd(line) 590 | day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) 591 | results = cmd.raw_results() 592 | assert len(results) == 1 593 | peak_info = results[0] 594 | assert peak_info['peak'] == 19 595 | assert peak_info['span'] == 2 596 | assert peak_info['started'] == day 597 | assert peak_info['finished'] == day.replace(microsecond=200000) 598 | 599 | 600 | def test_queue_peaks_did_not_finish(line_factory): 601 | """Test the QueuePeaks command. 602 | 603 | Check that QueuePeaks handles the corner case of a peak that does not finish. 604 | """ 605 | cmd = commands.QueuePeaks() 606 | for microseconds, queue in enumerate([4, 19, 12]): 607 | line = line_factory( 608 | queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' 609 | ) 610 | cmd(line) 611 | day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) 612 | results = cmd.raw_results() 613 | assert len(results) == 1 614 | peak_info = results[0] 615 | assert peak_info['peak'] == 19 616 | assert peak_info['span'] == 3 617 | assert peak_info['started'] == day 618 | assert peak_info['finished'] == day.replace(microsecond=200000) 619 | 620 | 621 | @pytest.mark.parametrize( 622 | ('output', 'expected'), 623 | [ 624 | ( 625 | None, 626 | '- peak: 4 - span: 1 - started: 2017-01-15T05:23:05.100000 - finished: 2017-01-15T05:23:05.200000\n' 627 | '- peak: 19 - span: 2 - started: 2017-01-15T05:23:05.400000 - finished: 2017-01-15T05:23:05.600000', 628 | ), 629 | ( 630 | 'json', 631 | '[{"peak": 4, "span": 1, "started": "2017-01-15T05:23:05.100000", "finished": "2017-01-15T05:23:05.200000"}, ' 632 | '{"peak": 19, "span": 2, "started": "2017-01-15T05:23:05.400000", "finished": "2017-01-15T05:23:05.600000"}]', 633 | ), 634 | ], 635 | ) 636 | def test_queue_peaks_output(line_factory, capsys, output, expected): 637 | """Test the QueuePeaks command. 638 | 639 | Peaks information are returned sorted by date. 640 | """ 641 | cmd = commands.QueuePeaks() 642 | for microseconds, queue in enumerate([0, 4, 0, 0, 19, 4, 0]): 643 | line = line_factory( 644 | queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' 645 | ) 646 | cmd(line) 647 | check_output(cmd, output, expected, capsys) 648 | 649 | 650 | def test_connection_type_results(line_factory): 651 | """Test the ConnectionType command. 652 | 653 | It counts how many requests have been made by SSL, and which ones not. 654 | """ 655 | cmd = commands.ConnectionType() 656 | assert cmd.raw_results() == (0, 0) 657 | for path, count in (('/Virtual:443/something', 4), ('/something', 2)): 658 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 659 | for _ in range(count): 660 | cmd(line) 661 | assert cmd.raw_results() == (4, 2) 662 | 663 | 664 | @pytest.mark.parametrize( 665 | ('output', 'expected'), 666 | [(None, '- https: 4\n- http: 2'), ('json', '[{"https": 4}, {"http": 2}]')], 667 | ) 668 | def test_connection_type_output(line_factory, capsys, output, expected): 669 | """Test the ConnectionType command. 670 | 671 | It counts how many requests have been made by SSL, and which ones not. 672 | """ 673 | cmd = commands.ConnectionType() 674 | for path, count in (('/Virtual:443/something', 4), ('/something', 2)): 675 | line = line_factory(http_request=f'GET {path} HTTP/1.1') 676 | for _ in range(count): 677 | cmd(line) 678 | check_output(cmd, output, expected, capsys) 679 | 680 | 681 | def test_requests_per_minute_results(line_factory): 682 | """Test the RequestsPerMinute command. 683 | 684 | It counts how many requests have been made per minute. 685 | """ 686 | cmd = commands.RequestsPerMinute() 687 | assert cmd.raw_results() == [] 688 | now = datetime.now() 689 | # to avoid leaping into the next/previous minute with the timedeltas below 690 | now = now.replace(second=30) 691 | microseconds = timedelta(microseconds=200) 692 | seconds = timedelta(seconds=5) 693 | minutes = timedelta(minutes=5) 694 | hours = timedelta(hours=2) 695 | dates = [ 696 | now, 697 | now + microseconds, 698 | now - microseconds, 699 | now + seconds, 700 | now - seconds, 701 | now + minutes, 702 | now - minutes, 703 | now + hours, 704 | now - hours, 705 | ] 706 | for time in dates: 707 | cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) 708 | results = cmd.raw_results() 709 | assert len(results) == 5 710 | assert results[0][1] == 1 711 | assert results[1][1] == 1 712 | assert results[2][1] == 5 # now and the +- microseconds and +- seconds 713 | assert results[3][1] == 1 714 | assert results[4][1] == 1 715 | 716 | 717 | @pytest.mark.parametrize('output', [None, 'json']) 718 | def test_requests_per_minute_output(line_factory, capsys, output): 719 | """Test the RequestsPerMinute command. 720 | 721 | It counts how many requests have been made per minute. 722 | """ 723 | cmd = commands.RequestsPerMinute() 724 | now = datetime.now() 725 | for time in (now, now + timedelta(hours=2)): 726 | cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) 727 | name = cmd.command_line_name().upper() 728 | cmd.results(output=output) 729 | output_text = capsys.readouterr().out 730 | if output == 'json': 731 | assert f'{{"{name}": ' in output_text 732 | # this is quite fuzzy to not have to fiddle with the date formatting 733 | # change it once we hit 2030 :) 734 | assert ':00": 1}, {"202' in output_text 735 | else: 736 | assert f'{name}\n====' in output_text 737 | # this is quite fuzzy to not have to fiddle with the date formatting 738 | assert ':00: 1\n- ' in output_text 739 | 740 | 741 | def test_requests_per_hour_results(line_factory): 742 | """Test the RequestsPerHour command. 743 | 744 | It counts how many requests have been made per hour. 745 | """ 746 | cmd = commands.RequestsPerHour() 747 | assert cmd.raw_results() == [] 748 | specific_date = datetime(year=2022, month=12, day=3, hour=14, minute=10, second=30) 749 | minutes = timedelta(minutes=5) 750 | hours = timedelta(hours=2) 751 | dates = [ 752 | specific_date, 753 | specific_date + minutes, 754 | specific_date - minutes, 755 | specific_date + hours, 756 | specific_date - hours, 757 | specific_date + hours * 2, 758 | specific_date - hours * 2, 759 | ] 760 | for time in dates: 761 | cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) 762 | results = cmd.raw_results() 763 | assert len(results) == 5 764 | assert results[0][1] == 1 765 | assert results[1][1] == 1 766 | assert results[2][1] == 3 # now and the +- minutes 767 | assert results[3][1] == 1 768 | assert results[4][1] == 1 769 | 770 | 771 | @pytest.mark.parametrize('output', [None, 'json']) 772 | def test_requests_per_hour_output(line_factory, capsys, output): 773 | """Test the RequestsPerHour command. 774 | 775 | It counts how many requests have been made per hour. 776 | """ 777 | cmd = commands.RequestsPerHour() 778 | now = datetime.now() 779 | for time in (now, now + timedelta(hours=2)): 780 | cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) 781 | name = cmd.command_line_name().upper() 782 | cmd.results(output=output) 783 | output_text = capsys.readouterr().out 784 | if output == 'json': 785 | assert f'{{"{name}": ' in output_text 786 | # this is quite fuzzy to not have to fiddle with the date formatting 787 | # change it once we hit 2030 :) 788 | assert ':00": 1}, {"202' in output_text 789 | else: 790 | assert f'{name}\n====' in output_text 791 | # this is quite fuzzy to not have to fiddle with the date formatting 792 | assert ':00: 1\n- ' in output_text 793 | 794 | 795 | def test_print_results_and_output(line_factory, capsys): 796 | """Test the Print command. 797 | 798 | It simply prints the verbatim line. 799 | """ 800 | cmd = commands.Print() 801 | assert cmd.raw_results() is None 802 | for path in ('/first-thing-to-do', '/second/thing/to-do'): 803 | cmd(line_factory(http_request=f'GET {path} HTTP/1.1')) 804 | assert cmd.raw_results() is None 805 | output_text = capsys.readouterr().out 806 | lines = output_text.split('\n') 807 | assert len(lines) == 3 808 | assert '/first-thing-to-do' in lines[0] 809 | assert '/second/thing/to-do' in lines[1] 810 | assert lines[2] == '' 811 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------