├── tests
    ├── __init__.py
    ├── files
    │   ├── 2_ok_1_invalid.log
    │   └── small.log
    ├── conftest.py
    ├── test_utils.py
    ├── test_main.py
    ├── test_log_file.py
    ├── test_argparse.py
    ├── test_log_line.py
    ├── test_filters.py
    ├── test_regex.py
    └── test_commands.py
├── src
    └── haproxy
    │   ├── __init__.py
    │   ├── logfile.py
    │   ├── utils.py
    │   ├── filters.py
    │   ├── main.py
    │   ├── line.py
    │   └── commands.py
├── requirements.in
├── .coveragerc
├── docs
    ├── source
    │   ├── README_link.rst
    │   ├── changelog.rst
    │   ├── modules.rst
    │   ├── index.rst
    │   └── conf.py
    └── Makefile
├── setup.cfg
├── .gitignore
├── MANIFEST.in
├── requirements.txt
├── .flake8
├── .github
    └── workflows
    │   ├── release.yml
    │   └── tests.yml
├── tox.ini
├── .pre-commit-config.yaml
├── pyproject.toml
├── README.rst
├── CHANGES.rst
└── LICENSE


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/haproxy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | omit =
3 |     */python?.?/*
4 | 


--------------------------------------------------------------------------------
/docs/source/README_link.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGES.rst
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [zest.releaser]
2 | create-wheel = yes
3 | 
4 | [tool:pytest]
5 | testpaths = tests
6 | norecursedirs = .venv
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | *.pickle
 3 | 
 4 | # C extensions
 5 | *.so
 6 | 
 7 | # Packages
 8 | *.egg
 9 | *.egg-info
10 | dist
11 | build
12 | eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | develop-eggs
18 | .installed.cfg
19 | lib
20 | lib64
21 | __pycache__
22 | 
23 | # Unit test / coverage reports
24 | coverage
25 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include CHANGES.rst
 2 | exclude .flake8
 3 | exclude *.in
 4 | exclude *.txt
 5 | exclude *.yaml
 6 | exclude .coveragerc
 7 | exclude tox.ini
 8 | recursive-exclude docs *.py
 9 | recursive-exclude docs *.rst
10 | recursive-exclude docs Makefile
11 | recursive-exclude tests *.log
12 | recursive-exclude tests *.py
13 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
 1 | Haproxy Modules
 2 | ===============
 3 | 
 4 | 
 5 | Log
 6 | ---
 7 | 
 8 | .. automodule:: haproxy.logfile
 9 | 
10 | .. autoclass:: Log
11 |    :members:
12 |    :private-members:
13 | 
14 | 
15 | Line
16 | ----
17 | 
18 | .. automodule:: haproxy.line
19 | 
20 | .. autoclass:: Line
21 |     :members:
22 | 
23 | Filters
24 | -------
25 | .. automodule:: haproxy.filters
26 |    :members:
27 | 
28 | Commands
29 | --------
30 | .. automodule:: haproxy.commands
31 |    :members:
32 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. HAProxy log analyzer documentation master file, created by
 2 |    sphinx-quickstart on Thu Dec 19 00:06:54 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | ================================================
 7 | Welcome to HAProxy log analyzer's documentation!
 8 | ================================================
 9 | 
10 | Contents:
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    README_link
16 |    modules
17 |    changelog
18 | 
19 | 
20 | ==================
21 | Indices and tables
22 | ==================
23 | 
24 | * :ref:`genindex`
25 | * :ref:`modindex`
26 | * :ref:`search`
27 | 
28 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.8
 3 | # by the following command:
 4 | #
 5 | #    pip-compile requirements.in
 6 | #
 7 | attrs==22.1.0
 8 |     # via pytest
 9 | coverage[toml]==6.5.0
10 |     # via
11 |     #   coverage
12 |     #   pytest-cov
13 | exceptiongroup==1.1.3
14 |     # via pytest
15 | iniconfig==1.1.1
16 |     # via pytest
17 | packaging==21.3
18 |     # via pytest
19 | pluggy==1.0.0
20 |     # via pytest
21 | pyparsing==3.0.9
22 |     # via packaging
23 | pytest==7.2.0
24 |     # via
25 |     #   -r requirements.in
26 |     #   pytest-cov
27 | pytest-cov==4.0.0
28 |     # via -r requirements.in
29 | tomli==2.0.1
30 |     # via
31 |     #   coverage
32 |     #   pytest
33 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | doctests = 1
 3 | ignore =
 4 |     # coding magic comment not found
 5 |     C101,
 6 |     # missing docstring in public package
 7 |     D104,
 8 |     # missing docstring in magic method
 9 |     D105,
10 |     # missing docstring in public nested class (e.g. Meta class)
11 |     D106,
12 |     # missing docstring in __init__ (against Google/NumPy guidelines)
13 |     D107,
14 |     # missing blank line after last section
15 |     D413,
16 |     # black takes care of whitespace before colons (:)
17 |     E203,
18 |     # black takes care of whitespace after commas
19 |     E231,
20 |     # black takes care of line length
21 |     E501,
22 |     # all-lowercase method names
23 |     N802,
24 |     # Change outer quotes to avoid escaping inner quotes
25 |     Q003,
26 |     # black takes care of where to break lines
27 |     W503,
28 | 


--------------------------------------------------------------------------------
/tests/files/2_ok_1_invalid.log:
--------------------------------------------------------------------------------
1 | Dec  9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38037 [09/Dec/2013:12:00:03.205] loadbalancer default/instance5 0/133/0/294/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/autoren/dummy/its-time-for-prostitution%231386586409135007 HTTP/1.1"
2 | Dec  9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38401 [] loadbalancer default/instance6 0/0/0/155/156 302 15987 - - ---- 18/18/18/0/0 0/0 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/autoren/dummy/westliche-wertegemeinschft/view HTTP/1.1"
3 | Dec  9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38414 [09/Dec/2013:12:00:11.476] loadbalancer default/instance9 0/0/0/200/202 200 19056 - - ---- 18/18/18/1/0 0/0 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/acl_users/credentials_cookie_auth/require_login?came_from=http%3A//www.example.com/autoren/dummy/westliche-wertegemeinschft/view HTTP/1.1"
4 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: GitHub
 2 | on:
 3 |   push:
 4 |     tags: ["*"]
 5 | 
 6 | jobs:
 7 |   release:
 8 |     permissions: write-all
 9 |     runs-on: "ubuntu-latest"
10 |     name: Create a Release
11 |     env:
12 |       GH_TOKEN: ${{ github.token }}
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - name: Set up Python 3.11
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: "3.11"
20 |       - name: Create a new GitHub release
21 |         run: |
22 |           # get the lines where the changelog for the last release starts and finishes
23 |           first_line=$(grep -n "\-\-\-\-" CHANGES.rst | cut -d":" -f1 |head -n1)
24 |           last_line=$(grep -n "\-\-\-\-" CHANGES.rst | cut -d":" -f1 |head -n2 | tail -n1)
25 | 
26 |           # do some math to adjust the line numbers
27 |           first=$((${first_line}+1))
28 |           last=$((${last_line}-2))
29 |           end=$((${last_line}-1))
30 | 
31 |           # extract the changelog
32 |           sed -n "${first},${last}p;${end}q" CHANGES.rst > body.txt
33 | 
34 |           cat body.txt
35 | 
36 |           gh release create ${{ github.ref_name }} -p -F body.txt
37 | 
38 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | min_version = 4.4.0
 3 | envlist =
 4 |     format
 5 |     lint
 6 |     coverage
 7 |     py38
 8 |     py39
 9 |     py310
10 |     py311
11 |     py312
12 |     pypy3
13 | 
14 | [gh-actions]
15 | python =
16 |     3.8: py38
17 |     3.9: py39
18 |     3.10: py310
19 |     3.11: py311
20 |     3.12: py312
21 | 
22 | [testenv]
23 | description = run the distribution tests
24 | use_develop = true
25 | skip_install = false
26 | constrain_package_deps = true
27 | deps =
28 |     -r requirements.txt
29 | commands =
30 |     pytest
31 | 
32 | [testenv:format]
33 | description = automatically reformat code
34 | skip_install = true
35 | deps =
36 |     pre-commit
37 | commands =
38 |     pre-commit run -a pyupgrade
39 |     pre-commit run -a isort
40 |     pre-commit run -a black
41 | 
42 | [testenv:lint]
43 | description = run linters that will help improve the code style
44 | skip_install = true
45 | deps =
46 |     pre-commit
47 | commands =
48 |     pre-commit run -a
49 | 
50 | [testenv:coverage]
51 | description = get a test coverage report
52 | use_develop = true
53 | skip_install = false
54 | deps =
55 |     -r requirements.txt
56 | commands =
57 |     pytest --cov --cov-report term-missing
58 | 
59 | [testenv:generate-constrains]
60 | description = update the constrains.txt file
61 | basepython = python3.8
62 | skip_install = true
63 | deps =
64 |     pip-tools
65 | commands =
66 |     pip-compile requirements.in
67 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |     autofix_prs: false
 3 |     autoupdate_schedule: monthly
 4 | 
 5 | repos:
 6 | -   repo: https://github.com/asottile/pyupgrade
 7 |     rev: v3.14.0
 8 |     hooks:
 9 |     -   id: pyupgrade
10 |         args: [--py38-plus]
11 | -   repo: https://github.com/pycqa/isort
12 |     rev: 5.12.0
13 |     hooks:
14 |     -   id: isort
15 | -   repo: https://github.com/psf/black
16 |     rev: 23.9.1
17 |     hooks:
18 |     -   id: black
19 | -   repo: https://github.com/PyCQA/flake8
20 |     rev: 6.1.0
21 |     hooks:
22 |     -   id: flake8
23 |         additional_dependencies:
24 |           - flake8-bugbear
25 |           - flake8-builtins
26 |           - flake8-comprehensions
27 |           - flake8-debugger
28 |           - flake8-deprecated
29 |           - flake8-isort
30 |           - flake8-pep3101
31 |           - flake8-quotes
32 |           - flake8-pytest-style
33 | 
34 | -   repo: https://github.com/codespell-project/codespell
35 |     rev: v2.2.6
36 |     hooks:
37 |     -   id: codespell
38 |         additional_dependencies:
39 |           - tomli
40 | -   repo: https://github.com/mgedmin/check-manifest
41 |     rev: "0.49"
42 |     hooks:
43 |     -   id: check-manifest
44 | -   repo: https://github.com/regebro/pyroma
45 |     rev: "4.2"
46 |     hooks:
47 |     -   id: pyroma
48 | -   repo: https://github.com/mgedmin/check-python-versions
49 |     rev: "0.21.3"
50 |     hooks:
51 |     -   id: check-python-versions
52 | 


--------------------------------------------------------------------------------
/src/haproxy/logfile.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from haproxy.line import parse_line
 3 | from haproxy.utils import date_str_to_datetime
 4 | from haproxy.utils import delta_str_to_timedelta
 5 | from multiprocessing import Pool
 6 | 
 7 | 
 8 | class Log:
 9 |     def __init__(self, logfile=None, start=None, delta=None, show_invalid=False):
10 |         self.logfile = logfile
11 |         self.show_invalid = show_invalid
12 |         self.start = None
13 |         self.end = None
14 | 
15 |         if start:
16 |             self.start = date_str_to_datetime(start)
17 | 
18 |         if delta:
19 |             delta = delta_str_to_timedelta(delta)
20 | 
21 |             if isinstance(self.start, datetime):
22 |                 self.end = self.start + delta
23 | 
24 |         self.invalid_lines = 0
25 |         self.valid_lines = 0
26 | 
27 |     def __iter__(self):
28 |         start = datetime.now()
29 |         with open(self.logfile) as logfile, Pool() as pool:
30 |             for index, line in enumerate(pool.imap(parse_line, logfile)):
31 |                 if line.is_valid:
32 |                     self.valid_lines += 1
33 |                     if line.is_within_time_frame(self.start, self.end):
34 |                         yield line
35 |                 else:
36 |                     if self.show_invalid:
37 |                         print(line.raw_line)
38 |                     self.invalid_lines += 1
39 | 
40 |                 if index % 10000 == 0 and index > 0:  # pragma: no cover
41 |                     print('.', end='', flush=True)
42 | 
43 |         end = datetime.now()
44 |         print(f'\nIt took {end - start}')
45 | 
46 |     @property
47 |     def total_lines(self):
48 |         return self.valid_lines + self.invalid_lines
49 | 


--------------------------------------------------------------------------------
/tests/files/small.log:
--------------------------------------------------------------------------------
 1 | Dec  9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:10:01:04.205] loadbalancer default/instance1 0/133/0/201/430 200 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "GET /hello HTTP/1.1"
 2 | Dec  9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/2942/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "HEAD /world HTTP/1.1"
 3 | Dec  9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:12:03:06.205] loadbalancer default/instance3 0/133/0/94/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.123} "POST /hello HTTP/1.1"
 4 | Dec  9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/1293/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.124} "GET /free HTTP/1.1"
 5 | Dec  9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:11:02:05.205] loadbalancer default/instance3 0/133/0/20095/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /fra HTTP/1.1"
 6 | Dec  9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/2936/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "GET /world HTTP/1.1"
 7 | Dec  9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:12:03:06.205] loadbalancer default/instance1 0/133/0/4/437 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "POST /freitag HTTP/1.1"
 8 | Dec  9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:12:03:06.205] loadbalancer default/instance2 0/133/0/29408/430 200 17610 - - ---- 21/21/21/1/0 0/1 "GET /free HTTP/1.1"
 9 | Dec  9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/409/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /hello HTTP/1.1"
10 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "haproxy_log_analysis"
 7 | version = "6.0.0a5.dev0"
 8 | authors = [
 9 |   { name="Gil Forcada Codinachs", email="gil.gnome@gmail.com" },
10 | ]
11 | description = "Analayze HAProxy log files"
12 | keywords = ["haproxy", "log", "sysadmin", "devops", "report" ]
13 | license = {file = "LICENSE"}
14 | readme = "README.rst"
15 | requires-python = ">=3.8"
16 | classifiers = [
17 |         "Development Status :: 5 - Production/Stable",
18 |         "Environment :: Console",
19 |         "Intended Audience :: System Administrators",
20 |         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
21 |         "Operating System :: OS Independent",
22 |         "Programming Language :: Python",
23 |         "Programming Language :: Python :: 3",
24 |         "Programming Language :: Python :: 3 :: Only",
25 |         "Programming Language :: Python :: 3.8",
26 |         "Programming Language :: Python :: 3.9",
27 |         "Programming Language :: Python :: 3.10",
28 |         "Programming Language :: Python :: 3.11",
29 |         "Programming Language :: Python :: 3.12",
30 |         "Programming Language :: Python :: Implementation :: CPython",
31 |         "Programming Language :: Python :: Implementation :: PyPy",
32 |         "Topic :: Internet :: Log Analysis",
33 | ]
34 | 
35 | [project.urls]
36 | "Homepage" = "https://github.com/gforcada/haproxy_log_analysis"
37 | "Bug Tracker" = "https://github.com/gforcada/haproxy_log_analysis/issues"
38 | "Changelog" = "https://github.com/gforcada/haproxy_log_analysis/blob/main/CHANGES.rst"
39 | 
40 | [project.scripts]
41 | haproxy_log_analysis = "haproxy.main:console_script"
42 | 
43 | [tool.isort]
44 | profile = "plone"
45 | 
46 | [tool.black]
47 | target-version = ["py38"]
48 | skip-string-normalization = true
49 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Testing
 2 | on:
 3 |   push:
 4 |     branches: [main]
 5 |   pull_request:
 6 |     branches: [main]
 7 | env:
 8 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 9 | jobs:
10 |   test:
11 |     name: Testing on
12 |     runs-on: "ubuntu-latest"
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.12", "3.11", "3.10", "3.9", "3.8", "pypy-3.9"]
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - name: Set up Python
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 |       - name: Cache packages
23 |         uses: actions/cache@v3
24 |         with:
25 |           path: ~/.cache/pip
26 |           key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('tox.ini') }}
27 |           restore-keys: |
28 |             ${{ runner.os }}-pip-${{ matrix.python-version }}-
29 |       - name: Install dependencies
30 |         run: python -m pip install tox tox-gh-actions
31 |       - name: Test
32 |         run: tox
33 | 
34 |   lint:
35 |     name: Lint code
36 |     runs-on: "ubuntu-latest"
37 |     strategy:
38 |       matrix:
39 |         python-version: [3.8]
40 |     steps:
41 |       - uses: actions/checkout@v4
42 |       - name: Set up Python
43 |         uses: actions/setup-python@v4
44 |         with:
45 |           python-version: ${{ matrix.python-version }}
46 |       - name: Cache packages
47 |         uses: actions/cache@v3
48 |         with:
49 |           path: |
50 |             ~/.cache/pre-commit
51 |             ~/.cache/pip
52 |           key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('tox.ini') }}
53 |           restore-keys: |
54 |             ${{ runner.os }}-pip-${{ matrix.python-version }}-
55 |       - name: Install dependencies
56 |         run: python -m pip install tox
57 |       - name: Run linting
58 |         run: tox -e lint
59 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | from haproxy.line import Line
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | DEFAULT_DATA = {
 8 |     'syslog_date': 'Dec  9 13:01:26',
 9 |     'process_name_and_pid': 'localhost haproxy[28029]:',
10 |     'client_ip': '127.0.0.1',
11 |     'client_port': 2345,
12 |     'accept_date': '09/Dec/2013:12:59:46.633',
13 |     'frontend_name': 'loadbalancer',
14 |     'backend_name': 'default',
15 |     'server_name': 'instance8',
16 |     'tq': 0,
17 |     'tw': 51536,
18 |     'tc': 1,
19 |     'tr': 48082,
20 |     'tt': '99627',
21 |     'status': '200',
22 |     'bytes': '83285',
23 |     'act': '87',
24 |     'fe': '89',
25 |     'be': '98',
26 |     'srv': '1',
27 |     'retries': '20',
28 |     'queue_server': 2,
29 |     'queue_backend': 67,
30 |     'headers': ' {77.24.148.74}',
31 |     'http_request': 'GET /path/to/image HTTP/1.1',
32 | }
33 | 
34 | 
35 | class LinesGenerator:
36 |     def __init__(self, line_format):
37 |         self.data = deepcopy(DEFAULT_DATA)
38 |         self.line_format = line_format
39 | 
40 |     def __call__(self, *args, **kwargs):
41 |         self.data.update(**kwargs)
42 |         self.data['client_ip_and_port'] = '{client_ip}:{client_port}'.format(
43 |             **self.data
44 |         )
45 |         self.data[
46 |             'server_names'
47 |         ] = '{frontend_name} {backend_name}/{server_name}'.format(**self.data)
48 |         self.data['timers'] = '{tq}/{tw}/{tc}/{tr}/{tt}'.format(**self.data)
49 |         self.data['status_and_bytes'] = '{status} {bytes}'.format(**self.data)
50 |         self.data['connections_and_retries'] = '{act}/{fe}/{be}/{srv}/{retries}'.format(
51 |             **self.data
52 |         )
53 |         self.data['queues'] = '{queue_server}/{queue_backend}'.format(**self.data)
54 | 
55 |         log_line = self.line_format.format(**self.data)
56 |         return Line(log_line)
57 | 
58 | 
59 | @pytest.fixture()
60 | def default_line_data():
61 |     return DEFAULT_DATA
62 | 
63 | 
64 | @pytest.fixture()
65 | def line_factory():
66 |     # queues and headers parameters are together because if no headers are
67 |     # saved the field is completely empty and thus there is no double space
68 |     # between queue backend and http request.
69 |     raw_line = (
70 |         '{syslog_date} {process_name_and_pid} {client_ip_and_port} '
71 |         '[{accept_date}] {server_names} {timers} {status_and_bytes} '
72 |         '- - ---- {connections_and_retries} {queues}{headers} '
73 |         '"{http_request}"'
74 |     )
75 |     generator = LinesGenerator(raw_line)
76 |     return generator
77 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from datetime import timedelta
 3 | from haproxy.utils import date_str_to_datetime
 4 | from haproxy.utils import delta_str_to_timedelta
 5 | from haproxy.utils import VALID_COMMANDS
 6 | from haproxy.utils import VALID_FILTERS
 7 | from haproxy.utils import validate_arg_date
 8 | from haproxy.utils import validate_arg_delta
 9 | 
10 | import pytest
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     ('text', 'expected'),
15 |     [
16 |         ('45s', timedelta(seconds=45)),
17 |         ('2m', timedelta(minutes=2)),
18 |         ('13h', timedelta(hours=13)),
19 |         ('2d', timedelta(days=2)),
20 |     ],
21 | )
22 | def test_str_to_timedelta(text, expected):
23 |     """Check that deltas are converted to timedelta objects."""
24 |     assert delta_str_to_timedelta(text) == expected
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     ('text', 'expected'),
29 |     [
30 |         ('04/Jan/2013', datetime(2013, 1, 4)),
31 |         ('13/May/2015:13', datetime(2015, 5, 13, 13)),
32 |         ('22/Jun/2017:12:11', datetime(2017, 6, 22, 12, 11)),
33 |         ('29/Aug/2019:10:09:08', datetime(2019, 8, 29, 10, 9, 8)),
34 |     ],
35 | )
36 | def test_str_to_datetime(text, expected):
37 |     """Check that start are converted to datetime objects."""
38 |     assert date_str_to_datetime(text) == expected
39 | 
40 | 
41 | @pytest.mark.parametrize('cmd_key', [*VALID_COMMANDS])
42 | def test_valid_commands(cmd_key):
43 |     """Check that the commands' information is complete."""
44 |     cmd_data = VALID_COMMANDS[cmd_key]
45 |     assert cmd_data['klass']
46 |     assert cmd_data['klass'].command_line_name() == cmd_key
47 |     assert cmd_data['description']
48 |     assert cmd_data['description'].startswith(f'{cmd_key}:\n\t')
49 | 
50 | 
51 | @pytest.mark.parametrize('filter_key', [*VALID_FILTERS])
52 | def test_valid_filters(filter_key):
53 |     """Check that the filters' information is complete."""
54 |     filter_data = VALID_FILTERS[filter_key]
55 |     assert filter_data['obj']
56 |     assert filter_data['obj'].__name__ == f'filter_{filter_key}'
57 |     assert filter_data['description']
58 |     assert filter_data['description'].startswith(f'{filter_key}:\n\t')
59 | 
60 | 
61 | @pytest.mark.parametrize(('value', 'expected'), [('', None), ('30/Dec/2019', True)])
62 | def test_validate_date(value, expected):
63 |     """Check that the date is validated or an exception raised."""
64 |     if expected is None:
65 |         with pytest.raises(ValueError, match='--start argument is not valid'):
66 |             validate_arg_date(value)
67 |     else:
68 |         assert validate_arg_date(value) is None
69 | 
70 | 
71 | @pytest.mark.parametrize(('value', 'expected'), [('', None), ('3d', True)])
72 | def test_validate_delta(value, expected):
73 |     """Check that the delta is validated or an exception raised."""
74 |     if expected is None:
75 |         with pytest.raises(ValueError, match='--delta argument is not valid'):
76 |             validate_arg_delta(value)
77 | 
78 |     else:
79 |         assert validate_arg_delta(value) is None
80 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
  1 | from haproxy.main import create_parser
  2 | from haproxy.main import main
  3 | from haproxy.main import parse_arguments
  4 | from haproxy.utils import VALID_COMMANDS
  5 | from haproxy.utils import VALID_FILTERS
  6 | 
  7 | import pytest
  8 | import sys
  9 | 
 10 | 
 11 | PY310_OR_HIGHER = sys.version_info[1] > 9
 12 | 
 13 | 
 14 | @pytest.fixture()
 15 | def default_arguments():
 16 |     """Return all the expected arguments the main function expects."""
 17 |     return {
 18 |         'start': None,
 19 |         'delta': None,
 20 |         'log': 'tests/files/small.log',
 21 |         'commands': ['counter'],
 22 |         'negate_filter': None,
 23 |         'filters': None,
 24 |         'list_commands': False,
 25 |         'list_filters': False,
 26 |         'json': False,
 27 |         'invalid_lines': False,
 28 |     }
 29 | 
 30 | 
 31 | @pytest.mark.parametrize(
 32 |     ('switch', 'listing'),
 33 |     [('list-filters', VALID_FILTERS), ('list-commands', VALID_COMMANDS)],
 34 | )
 35 | def test_list_filters_and_commands(capsys, switch, listing):
 36 |     """Test that one can request the filters/commands to be listed."""
 37 |     parser = create_parser()
 38 |     data = parse_arguments(parser.parse_args([f'--{switch}']))
 39 |     argument = switch.replace('-', '_')
 40 |     for key in data:
 41 |         expected = None
 42 |         if key == argument:
 43 |             expected = True
 44 |         assert data[key] is expected
 45 |     main(data)
 46 |     output_text = capsys.readouterr().out
 47 |     for name in listing:
 48 |         assert f'{name}:\n\t' in output_text
 49 | 
 50 | 
 51 | def test_show_help(capsys):
 52 |     """Check that the help is shown if no arguments are given."""
 53 |     parser = create_parser()
 54 |     data = parse_arguments(parser.parse_args([]))
 55 |     main(data)
 56 |     output_text = capsys.readouterr().out
 57 |     if PY310_OR_HIGHER:
 58 |         assert 'options:' in output_text
 59 |     else:
 60 |         assert 'optional arguments:' in output_text
 61 |     assert '--list-filters ' in output_text
 62 |     assert '--list-commands ' in output_text
 63 | 
 64 | 
 65 | def test_main(capsys, default_arguments):
 66 |     """Check that the main function works as expected with default arguments."""
 67 |     main(default_arguments)
 68 |     output_text = capsys.readouterr().out
 69 |     assert 'COUNTER\n=======\n9' in output_text
 70 | 
 71 | 
 72 | def test_main_with_filter(capsys, default_arguments):
 73 |     """Check that the filters are applied as expected."""
 74 |     default_arguments['filters'] = [
 75 |         ('server', 'instance1'),
 76 |     ]
 77 |     main(default_arguments)
 78 |     output_text = capsys.readouterr().out
 79 |     assert 'COUNTER\n=======\n4' in output_text
 80 | 
 81 | 
 82 | def test_main_negate_filter(capsys, default_arguments):
 83 |     """Check that filters can be reversed."""
 84 |     default_arguments['filters'] = [
 85 |         ('server', 'instance1'),
 86 |     ]
 87 |     default_arguments['negate_filter'] = True
 88 |     main(default_arguments)
 89 |     output_text = capsys.readouterr().out
 90 |     assert 'COUNTER\n=======\n5' in output_text
 91 | 
 92 | 
 93 | def test_print_no_output(capsys, default_arguments):
 94 |     """Check that the print header is not shown."""
 95 |     default_arguments['commands'] = ['print']
 96 |     main(default_arguments)
 97 |     output_text = capsys.readouterr().out
 98 |     assert 'PRINT\n=====' not in output_text
 99 | 
100 | 
101 | def test_json_output(capsys, default_arguments):
102 |     """Check that the JSON switch is used and JSON output is printed."""
103 |     default_arguments['json'] = True
104 |     main(default_arguments)
105 |     output_text = capsys.readouterr().out
106 |     assert 'COUNTER\n=======\n9' not in output_text
107 |     assert '{"COUNTER": 9}' in output_text
108 | 


--------------------------------------------------------------------------------
/src/haproxy/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from datetime import timedelta
  3 | 
  4 | import re
  5 | 
  6 | 
  7 | DELTA_REGEX = re.compile(r'\A(?P<value>\d+)(?P<time_unit>[smhd])\Z')
  8 | 
  9 | START_REGEX = re.compile(
 10 |     r'(?P<day>\d+)/(?P<month>\w+)/(?P<year>\d+)'
 11 |     r'(:(?P<hour>\d+)|)(:(?P<minute>\d+)|)(:(?P<second>\d+)|)'
 12 | )
 13 | 
 14 | DELTA_KEYS = {'s': 'seconds', 'm': 'minutes', 'h': 'hours', 'd': 'days'}
 15 | 
 16 | 
 17 | def date_str_to_datetime(date):
 18 |     """Convert a string to a datetime object.
 19 | 
 20 |     The format is `day/month/year[[[:hour]:minute]:second]` being:
 21 |     - day a number
 22 |     - month a three letter representation of the month (i.e. Dec, Jan, etc)
 23 |     - year as a 4 digits value
 24 |     - hour/minute/second as 2 digits value, each of them being optional
 25 |     """
 26 |     matches = START_REGEX.match(date)
 27 |     data = matches.group('day'), matches.group('month'), matches.group('year')
 28 |     raw_date_input = f'{data[0]}/{data[1]}/{data[2]}'
 29 |     date_format = '%d/%b/%Y'
 30 |     for variable, percent in (('hour', ':%H'), ('minute', ':%M'), ('second', ':%S')):
 31 |         match = matches.group(variable)
 32 |         if match:
 33 |             date_format += percent
 34 |             raw_date_input = f'{raw_date_input}:{match}'
 35 | 
 36 |     return datetime.strptime(raw_date_input, date_format)
 37 | 
 38 | 
 39 | def delta_str_to_timedelta(delta):
 40 |     """Convert a string to a timedelta representation.
 41 | 
 42 |     Format is NUMBER followed by one of the following letters: `s`, `m`, `h`, `d`.
 43 |     Each of them meaning, second, minute, hour and day.
 44 |     """
 45 |     matches = DELTA_REGEX.match(delta)
 46 |     value = int(matches.group('value'))
 47 |     time_unit = matches.group('time_unit')
 48 |     key = DELTA_KEYS[time_unit]
 49 |     return timedelta(**{key: value})
 50 | 
 51 | 
 52 | def validate_arg_date(start):
 53 |     """Check that date argument is valid."""
 54 |     try:
 55 |         date_str_to_datetime(start)
 56 |     except (AttributeError, ValueError):
 57 |         raise ValueError('--start argument is not valid')
 58 | 
 59 | 
 60 | def validate_arg_delta(delta):
 61 |     """Check that the delta argument is valid."""
 62 |     try:
 63 |         delta_str_to_timedelta(delta)
 64 |     except (AttributeError, ValueError):
 65 |         raise ValueError('--delta argument is not valid')
 66 | 
 67 | 
 68 | def list_filters():
 69 |     """Return the information of existing filters.
 70 | 
 71 |     Data returned:
 72 |     - their names as the user is expected to use them from the command line
 73 |     - the object itself
 74 |     - its description
 75 |     """
 76 |     from haproxy import filters
 77 | 
 78 |     data = {}
 79 |     for full_name in dir(filters):
 80 |         if not full_name.startswith('filter_'):
 81 |             continue
 82 |         name = full_name[7:]
 83 |         obj = getattr(filters, full_name)
 84 | 
 85 |         description = _strip_description(obj.__doc__)
 86 |         data[name] = {'obj': obj, 'description': f'{name}:\n\t{description}'}
 87 |     return data
 88 | 
 89 | 
 90 | def list_commands():
 91 |     """Return the information of existing commands.
 92 | 
 93 |     Data returned:
 94 |     - their names as the user is expected to use them from the command line
 95 |     - the object itself
 96 |     - its description
 97 |     """
 98 |     from haproxy import commands
 99 | 
100 |     data = {}
101 |     for cmd in dir(commands):
102 |         if cmd.endswith('Mixin'):
103 |             continue
104 |         klass = getattr(commands, cmd)
105 |         try:
106 |             name = klass.command_line_name()
107 |         except AttributeError:
108 |             continue
109 | 
110 |         description = _strip_description(klass.__doc__)
111 |         data[name] = {'klass': klass, 'description': f'{name}:\n\t{description}'}
112 |     return data
113 | 
114 | 
115 | def _strip_description(raw_text):
116 |     if not raw_text:
117 |         return ''
118 |     text = '\n\t'.join([line.strip() for line in raw_text.split('\n') if line.strip()])
119 |     return text
120 | 
121 | 
122 | VALID_COMMANDS = list_commands()
123 | VALID_FILTERS = list_filters()
124 | 


--------------------------------------------------------------------------------
/tests/test_log_file.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from haproxy.logfile import Log
  3 | 
  4 | import pytest
  5 | 
  6 | 
  7 | def test_logfile_default_values():
  8 |     """Check that the default values are set."""
  9 |     log_file = Log('something')
 10 |     assert log_file.logfile == 'something'
 11 |     assert log_file.show_invalid is False
 12 |     assert log_file.invalid_lines == 0
 13 |     assert log_file.valid_lines == 0
 14 |     assert log_file.total_lines == 0
 15 |     assert log_file.start is None
 16 |     assert log_file.end is None
 17 | 
 18 | 
 19 | @pytest.mark.parametrize(
 20 |     ('start_str', 'start_obj', 'delta', 'end_obj'),
 21 |     [
 22 |         (None, None, None, None),
 23 |         (None, None, '3d', None),
 24 |         ('12/Dec/2019', datetime(2019, 12, 12), None, None),
 25 |         ('12/Dec/2019', datetime(2019, 12, 12), '3d', datetime(2019, 12, 15)),
 26 |     ],
 27 | )
 28 | def test_start_and_end_attributes(start_str, start_obj, delta, end_obj):
 29 |     """Check that the start and end of attributes of Log objects are set as expected."""
 30 |     log_file = Log('something', start=start_str, delta=delta)
 31 |     assert log_file.logfile == 'something'
 32 |     assert log_file.invalid_lines == 0
 33 |     assert log_file.start == start_obj
 34 |     assert log_file.end == end_obj
 35 | 
 36 | 
 37 | @pytest.mark.parametrize('accept_date', ['09/Dec/2013:12:59:46.633', None])
 38 | def test_lines_validity(tmp_path, line_factory, accept_date):
 39 |     """Check that lines are either counted as valid or invalid."""
 40 |     file_path = tmp_path / 'haproxy.log'
 41 |     line = ''
 42 |     if accept_date:
 43 |         line = line_factory(accept_date=accept_date).raw_line
 44 |     with open(file_path, 'w') as file_obj:
 45 |         file_obj.write(f'{line}\n')
 46 |     log_file = Log(file_path)
 47 |     _ = list(log_file)
 48 | 
 49 |     assert log_file.total_lines == 1
 50 |     if accept_date:
 51 |         assert log_file.valid_lines == 1
 52 |         assert log_file.invalid_lines == 0
 53 |     else:
 54 |         assert log_file.valid_lines == 0
 55 |         assert log_file.invalid_lines == 1
 56 | 
 57 | 
 58 | @pytest.mark.parametrize(
 59 |     ('accept_date', 'start', 'delta', 'is_valid'),
 60 |     [
 61 |         # valid line and no time frame, returned
 62 |         ('09/Dec/2013:12:59:46.633', None, None, True),
 63 |         # invalid line, not returned
 64 |         (None, None, None, False),
 65 |         # valid line before time frame, not returned
 66 |         ('09/Dec/2013:12:59:46.633', '09/Dec/2014', None, False),
 67 |         # valid line after time frame, not returned
 68 |         ('09/Dec/2013:12:59:46.633', '08/Dec/2012', '3d', False),
 69 |         # valid line within time frame, returned
 70 |         ('09/Dec/2013:12:59:46.633', '08/Dec/2013', '3d', True),
 71 |     ],
 72 | )
 73 | def test_returned_lines(tmp_path, line_factory, accept_date, start, delta, is_valid):
 74 |     """Check that lines are only returned if they are valid AND within the time frame."""
 75 |     file_path = tmp_path / 'haproxy.log'
 76 |     line = ''
 77 |     if accept_date:
 78 |         line = line_factory(accept_date=accept_date).raw_line
 79 |     with open(file_path, 'w') as file_obj:
 80 |         file_obj.write(f'{line}\n')
 81 |     log_file = Log(file_path, start=start, delta=delta)
 82 |     lines = list(log_file)
 83 |     assert bool(len(lines)) is is_valid
 84 | 
 85 | 
 86 | def test_total_lines():
 87 |     """Check that the total amount of lines are always counted."""
 88 |     log_file = Log(logfile='tests/files/2_ok_1_invalid.log')
 89 |     _ = list(log_file)
 90 |     assert log_file.total_lines == 3
 91 |     assert log_file.valid_lines == 2
 92 |     assert log_file.invalid_lines == 1
 93 | 
 94 | 
 95 | @pytest.mark.parametrize('headers', [' {1.2.3.4}', 'random-value-that-breaks'])
 96 | def test_print_invalid_lines(tmp_path, line_factory, headers, capsys):
 97 |     """Check that invalid lines are printed, if asked to do so."""
 98 |     file_path = tmp_path / 'haproxy.log'
 99 |     line = line_factory(headers=headers).raw_line
100 |     with open(file_path, 'w') as file_obj:
101 |         file_obj.write(f'{line}\n')
102 |     log_file = Log(file_path, show_invalid=True)
103 |     _ = list(log_file)
104 | 
105 |     output = capsys.readouterr().out
106 |     if log_file.valid_lines == 1:
107 |         assert headers not in output
108 |     else:
109 |         assert headers in output
110 | 


--------------------------------------------------------------------------------
/src/haproxy/filters.py:
--------------------------------------------------------------------------------
  1 | def filter_ip(ip):
  2 |     """Filter by IP.
  3 | 
  4 |     -f ip[192.168.1.2]  # will return only lines that have this IP.
  5 | 
  6 |     Either the client IP, or, if present, the first IP captured
  7 |     in the X-Forwarded-For header.
  8 |     """
  9 | 
 10 |     def filter_func(log_line):
 11 |         return log_line.ip == ip
 12 | 
 13 |     return filter_func
 14 | 
 15 | 
 16 | def filter_ip_range(ip_range):
 17 |     """Filter by an IP range.
 18 | 
 19 |     -f ip_range[192.168.1]
 20 | 
 21 |     Rather than proper IP ranges, is a string matching.
 22 |     See `ip` filter about which IP is being.
 23 |     """
 24 | 
 25 |     def filter_func(log_line):
 26 |         ip = log_line.ip
 27 |         if ip:
 28 |             return ip.startswith(ip_range)
 29 | 
 30 |     return filter_func
 31 | 
 32 | 
 33 | def filter_path(path):
 34 |     """Filter by the request path.
 35 | 
 36 |     -f path[/one/two]
 37 | 
 38 |     It looks for the given path to be part of the requested path.
 39 |     """
 40 | 
 41 |     def filter_func(log_line):
 42 |         return path in log_line.http_request_path
 43 | 
 44 |     return filter_func
 45 | 
 46 | 
 47 | def filter_ssl(ignore=True):
 48 |     """Filter by SSL connection.
 49 | 
 50 |     -f ssl
 51 | 
 52 |     It checks that the request is made via the standard https port.
 53 |     """
 54 | 
 55 |     def filter_func(log_line):
 56 |         return log_line.is_https
 57 | 
 58 |     return filter_func
 59 | 
 60 | 
 61 | def filter_slow_requests(slowness):
 62 |     """Filter by response time.
 63 | 
 64 |     -f slow_requests[1000]  # get all lines that took more than a second to process
 65 | 
 66 |     Filters by the time it took the downstream server to process the request.
 67 |     Time is in milliseconds.
 68 |     """
 69 | 
 70 |     def filter_func(log_line):
 71 |         slowness_int = int(slowness)
 72 |         return slowness_int <= log_line.time_wait_response
 73 | 
 74 |     return filter_func
 75 | 
 76 | 
 77 | def filter_wait_on_queues(max_waiting):
 78 |     """Filter by queue time in HAProxy.
 79 | 
 80 |     -f wait_on_queues[1000]  # get all requests that waited more than a second in HAProxy
 81 | 
 82 |     Filters by the time a request had to wait in HAProxy
 83 |     prior to be sent to a downstream server to be processed.
 84 |     """
 85 | 
 86 |     def filter_func(log_line):
 87 |         waiting = int(max_waiting)
 88 |         return waiting <= log_line.time_wait_queues
 89 | 
 90 |     return filter_func
 91 | 
 92 | 
 93 | def filter_status_code(http_status):
 94 |     """Filter by a specific HTTP status code.
 95 | 
 96 |     -f status_code[404]
 97 |     """
 98 | 
 99 |     def filter_func(log_line):
100 |         return log_line.status_code == http_status
101 | 
102 |     return filter_func
103 | 
104 | 
105 | def filter_status_code_family(family_number):
106 |     """Filter by a family of HTTP status code.
107 | 
108 |     -f status_code_family[5]  # get all 5xx status codes
109 |     """
110 | 
111 |     def filter_func(log_line):
112 |         return log_line.status_code.startswith(family_number)
113 | 
114 |     return filter_func
115 | 
116 | 
117 | def filter_http_method(http_method):
118 |     """Filter by HTTP method (GET, POST, PUT, HEAD...).
119 | 
120 |     -f http_method[GET]
121 |     """
122 | 
123 |     def filter_func(log_line):
124 |         return log_line.http_request_method == http_method
125 | 
126 |     return filter_func
127 | 
128 | 
129 | def filter_backend(backend_name):
130 |     """Filter by HAProxy backend.
131 | 
132 |     -f backend[specific_app]
133 | 
134 |     See HAProxy configuration, it can have multiple backends defined.
135 |     """
136 | 
137 |     def filter_func(log_line):
138 |         return log_line.backend_name == backend_name
139 | 
140 |     return filter_func
141 | 
142 | 
143 | def filter_frontend(frontend_name):
144 |     """Filter by which HAProxy frontend got the request.
145 | 
146 |     -f frontend[loadbalancer]
147 | 
148 |     See HAProxy configuration, it can have multiple frontends defined.
149 |     """
150 | 
151 |     def filter_func(log_line):
152 |         return log_line.frontend_name == frontend_name
153 | 
154 |     return filter_func
155 | 
156 | 
157 | def filter_server(server_name):
158 |     """Filter by downstream server.
159 | 
160 |     -f server[app01]
161 |     """
162 | 
163 |     def filter_func(log_line):
164 |         return log_line.server_name == server_name
165 | 
166 |     return filter_func
167 | 
168 | 
169 | def filter_response_size(size):
170 |     """Filter by how big (in bytes) the response was.
171 | 
172 |     -f response_size[50000]
173 | 
174 |     Specially useful when looking for big file downloads.
175 |     """
176 |     if size.startswith('+'):
177 |         size_value = int(size[1:])
178 |     else:
179 |         size_value = int(size)
180 | 
181 |     def filter_func(log_line):
182 |         bytes_read = log_line.bytes_read
183 |         if bytes_read.startswith('+'):
184 |             bytes_read = int(bytes_read[1:])
185 |         else:
186 |             bytes_read = int(bytes_read)
187 | 
188 |         return bytes_read >= size_value
189 | 
190 |     return filter_func
191 | 


--------------------------------------------------------------------------------
/tests/test_argparse.py:
--------------------------------------------------------------------------------
  1 | from haproxy.main import create_parser
  2 | from haproxy.main import parse_arg_filters
  3 | from haproxy.main import parse_arguments
  4 | 
  5 | import pytest
  6 | 
  7 | 
  8 | def test_parser_arguments_defaults():
  9 |     """Test that the argument parsing defaults works."""
 10 |     parser = create_parser()
 11 |     data = parse_arguments(parser.parse_args([]))
 12 |     assert data == {
 13 |         'start': None,
 14 |         'delta': None,
 15 |         'commands': None,
 16 |         'filters': None,
 17 |         'negate_filter': None,
 18 |         'log': None,
 19 |         'list_commands': None,
 20 |         'list_filters': None,
 21 |         'json': False,
 22 |         'invalid_lines': False,
 23 |     }
 24 | 
 25 | 
 26 | @pytest.mark.parametrize(
 27 |     ('argument', 'option'),
 28 |     [
 29 |         ('--list-commands', 'list_commands'),
 30 |         ('--list-filters', 'list_filters'),
 31 |         ('--negate-filter', 'negate_filter'),
 32 |         ('-n', 'negate_filter'),
 33 |         ('--json', 'json'),
 34 |     ],
 35 | )
 36 | def test_parser_boolean_arguments(argument, option):
 37 |     """Test that the argument parsing defaults works."""
 38 |     parser = create_parser()
 39 |     data = parse_arguments(parser.parse_args([argument]))
 40 |     assert data[option] is True
 41 | 
 42 | 
 43 | @pytest.mark.parametrize(
 44 |     ('start', 'delta'), [('30/Dec/2019', '3d'), ('20/Jun/2015', '2h')]
 45 | )
 46 | def test_arguments_dates(start, delta):
 47 |     """Check that properly formatted start and delta arguments are processed fine.
 48 | 
 49 |     Thus they are extracted and stored for later use.
 50 |     """
 51 |     parser = create_parser()
 52 |     data = parse_arguments(parser.parse_args(['-s', start, '-d', delta]))
 53 |     assert data['start'] == start
 54 |     assert data['delta'] == delta
 55 | 
 56 | 
 57 | @pytest.mark.parametrize('start', ['33/Dec/2019', '5/Hallo/2019'])
 58 | def test_arguments_date_invalid(start):
 59 |     """Incorrectly formatted start argument raises an exception."""
 60 |     parser = create_parser()
 61 |     with pytest.raises(ValueError, match='--start argument is not valid'):
 62 |         parse_arguments(parser.parse_args(['-s', start]))
 63 | 
 64 | 
 65 | @pytest.mark.parametrize('delta', ['3P', '2323MM'])
 66 | def test_arguments_delta_invalid(delta):
 67 |     """Incorrectly formatted delta argument raises an exception."""
 68 |     parser = create_parser()
 69 |     with pytest.raises(ValueError, match='--delta argument is not valid'):
 70 |         parse_arguments(parser.parse_args(['-d', delta]))
 71 | 
 72 | 
 73 | @pytest.mark.parametrize(
 74 |     ('cmds', 'is_valid'),
 75 |     [
 76 |         ('counter', True),
 77 |         ('counter,ip_counter', True),
 78 |         ('ip_counter,count_data', False),
 79 |         ('count_data', False),
 80 |     ],
 81 | )
 82 | def test_commands_arguments(cmds, is_valid):
 83 |     """Test that the commands are parsed, and an exception raised otherwise."""
 84 |     parser = create_parser()
 85 |     if not is_valid:
 86 |         with pytest.raises(ValueError, match='is not available. Use --list-commands'):
 87 |             parse_arguments(parser.parse_args(['-c', cmds]))
 88 |     else:
 89 |         data = parse_arguments(parser.parse_args(['-c', cmds]))
 90 |         assert data['commands'] == cmds.split(',')
 91 | 
 92 | 
 93 | @pytest.mark.parametrize(
 94 |     ('filters_list', 'is_valid'),
 95 |     [
 96 |         ('ip_range', True),
 97 |         ('slow_requests,backend', True),
 98 |         ('tomatoes', False),
 99 |         ('slow_requests,potatoes', False),
100 |     ],
101 | )
102 | def test_filters_arguments(filters_list, is_valid):
103 |     """Test that the filters are parsed, and an exception raised otherwise."""
104 |     parser = create_parser()
105 |     if not is_valid:
106 |         with pytest.raises(ValueError, match='is not available. Use --list-filters'):
107 |             parse_arguments(parser.parse_args(['-f', filters_list]))
108 |     else:
109 |         data = parse_arguments(parser.parse_args(['-f', filters_list]))
110 |         assert data['filters'] == [(x, None) for x in filters_list.split(',')]
111 | 
112 | 
113 | @pytest.mark.parametrize(
114 |     ('filter_expression', 'expected'),
115 |     [
116 |         ('ip_range', [('ip_range', None)]),
117 |         ('ip_rangelala]', None),
118 |         ('ip_range[lala]', [('ip_range', 'lala')]),
119 |     ],
120 | )
121 | def test_filters_with_arguments(filter_expression, expected):
122 |     """Check that the arguments given to the filters are parsed properly.
123 | 
124 |     Or raise and exception otherwise.
125 |     """
126 |     if expected is None:
127 |         with pytest.raises(ValueError, match='It is missing an opening square bracket'):
128 |             parse_arg_filters(filter_expression)
129 |     else:
130 |         data = parse_arg_filters(filter_expression)
131 |         assert data == expected
132 | 
133 | 
134 | @pytest.mark.parametrize(
135 |     ('filename', 'is_valid'),
136 |     [
137 |         ('tests/conftest.py', True),
138 |         ('tests/non-existing-file.py', False),
139 |     ],
140 | )
141 | def test_log_argument(filename, is_valid):
142 |     """Check that the argument parsing validates that the file exists."""
143 |     parser = create_parser()
144 |     if is_valid:
145 |         data = parse_arguments(parser.parse_args(['-l', filename]))
146 |         assert data['log'] == filename
147 |     else:
148 |         with pytest.raises(ValueError, match=f'{filename} does not exist'):
149 |             parse_arguments(parser.parse_args(['-l', filename]))
150 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. -*- coding: utf-8 -*-
  2 | 
  3 | HAProxy log analyzer
  4 | ====================
  5 | This Python package is a `HAProxy`_ log parser.
  6 | It analyzes HAProxy log files in multiple ways (see commands section below).
  7 | 
  8 | .. note::
  9 |    Currently only the `HTTP log format`_ is supported.
 10 | 
 11 | Tests and coverage
 12 | ------------------
 13 | No project is trustworthy if does not have tests and a decent coverage!
 14 | 
 15 | .. image:: https://github.com/gforcada/haproxy_log_analysis/actions/workflows/tests.yml/badge.svg?branch=master
 16 |    :target: https://github.com/gforcada/haproxy_log_analysis/actions/workflows/tests.yml
 17 | 
 18 | .. image:: https://coveralls.io/repos/github/gforcada/haproxy_log_analysis/badge.svg?branch=master
 19 |    :target: https://coveralls.io/github/gforcada/haproxy_log_analysis?branch=master
 20 | 
 21 | 
 22 | Documentation
 23 | -------------
 24 | See the `documentation and API`_ at ReadTheDocs_.
 25 | 
 26 | Command-line interface
 27 | ----------------------
 28 | The current ``--help`` looks like this::
 29 | 
 30 |   usage: haproxy_log_analysis [-h] [-l LOG] [-s START] [-d DELTA] [-c COMMAND]
 31 |                               [-f FILTER] [-n] [--list-commands]
 32 |                               [--list-filters] [--json]
 33 | 
 34 |   Analyze HAProxy log files and outputs statistics about it
 35 | 
 36 |   optional arguments:
 37 |     -h, --help            show this help message and exit
 38 |     -l LOG, --log LOG     HAProxy log file to analyze
 39 |     -s START, --start START
 40 |                           Process log entries starting at this time, in HAProxy
 41 |                           date format (e.g. 11/Dec/2013 or
 42 |                           11/Dec/2013:19:31:41). At least provide the
 43 |                           day/month/year. Values not specified will use their
 44 |                           base value (e.g. 00 for hour). Use in conjunction with
 45 |                           -d to limit the number of entries to process.
 46 |     -d DELTA, --delta DELTA
 47 |                           Limit the number of entries to process. Express the
 48 |                           time delta as a number and a time unit, e.g.: 1s, 10m,
 49 |                           3h or 4d (for 1 second, 10 minutes, 3 hours or 4
 50 |                           days). Use in conjunction with -s to only analyze
 51 |                           certain time delta. If no start time is given, the
 52 |                           time on the first line will be used instead.
 53 |     -c COMMAND, --command COMMAND
 54 |                           List of commands, comma separated, to run on the log
 55 |                           file. See --list-commands to get a full list of them.
 56 |     -f FILTER, --filter FILTER
 57 |                           List of filters to apply on the log file. Passed as
 58 |                           comma separated and parameters within square brackets,
 59 |                           e.g ip[192.168.1.1],ssl,path[/some/path]. See --list-
 60 |                           filters to get a full list of them.
 61 |     -n, --negate-filter   Make filters passed with -f work the other way around,
 62 |                           i.e. if the ``ssl`` filter is passed instead of
 63 |                           showing only ssl requests it will show non-ssl
 64 |                           traffic. If the ``ip`` filter is used, then all but
 65 |                           that ip passed to the filter will be used.
 66 |     --list-commands       Lists all commands available.
 67 |     --list-filters        Lists all filters available.
 68 |     --json                Output results in json.
 69 |     --invalid             Print the lines that could not be parsed. Be aware
 70 |                           that mixing it with the print command will mix their
 71 |                           output.
 72 | 
 73 | 
 74 | Commands
 75 | --------
 76 | 
 77 | Commands are small purpose specific programs in themselves that report specific statistics about the log file being analyzed.
 78 | See them all with ``--list-commands`` or online at https://haproxy-log-analyzer.readthedocs.io/modules.html#module-haproxy.commands.
 79 | 
 80 | - ``average_response_time``
 81 | - ``average_waiting_time``
 82 | - ``connection_type``
 83 | - ``counter``
 84 | - ``http_methods``
 85 | - ``ip_counter``
 86 | - ``print``
 87 | - ``queue_peaks``
 88 | - ``request_path_counter``
 89 | - ``requests_per_hour``
 90 | - ``requests_per_minute``
 91 | - ``server_load``
 92 | - ``slow_requests``
 93 | - ``slow_requests_counter``
 94 | - ``status_codes_counter``
 95 | - ``top_ips``
 96 | - ``top_request_paths``
 97 | 
 98 | Filters
 99 | -------
100 | Filters, contrary to commands,
101 | are a way to reduce the amount of log lines to be processed.
102 | 
103 | .. note::
104 |    The ``-n`` command line argument allows to reverse filters output.
105 | 
106 |    This helps when looking for specific traces, like a certain IP, a path...
107 | 
108 | See them all with ``--list-filters`` or online at https://haproxy-log-analyzer.readthedocs.io/modules.html#module-haproxy.filters.
109 | 
110 | - ``backend``
111 | - ``frontend``
112 | - ``http_method``
113 | - ``ip``
114 | - ``ip_range``
115 | - ``path``
116 | - ``response_size``
117 | - ``server``
118 | - ``slow_requests``
119 | - ``ssl``
120 | - ``status_code``
121 | - ``status_code_family``
122 | - ``wait_on_queues``
123 | 
124 | Installation
125 | ------------
126 | After installation you will have a console script `haproxy_log_analysis`::
127 | 
128 |     $ pip install haproxy_log_analysis
129 | 
130 | TODO
131 | ----
132 | - add more commands: *(help appreciated)*
133 | 
134 |   - reports on servers connection time
135 |   - reports on termination state
136 |   - reports around connections (active, frontend, backend, server)
137 |   - *your ideas here*
138 | 
139 | - think of a way to show the commands output in a meaningful way
140 | 
141 | - be able to specify an output format. For any command that makes sense (slow
142 |   requests for example) output the given fields for each log line (i.e.
143 |   acceptance date, path, downstream server, load at that time...)
144 | 
145 | - *your ideas*
146 | 
147 | .. _HAProxy: http://haproxy.1wt.eu/
148 | .. _HTTP log format: http://cbonte.github.io/haproxy-dconv/2.2/configuration.html#8.2.3
149 | .. _documentation and API: https://haproxy-log-analyzer.readthedocs.io/
150 | .. _ReadTheDocs: http://readthedocs.org
151 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/HAProxyloganalyzer.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/HAProxyloganalyzer.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/HAProxyloganalyzer"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/HAProxyloganalyzer"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/tests/test_log_line.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from datetime import timedelta
  3 | 
  4 | import pytest
  5 | 
  6 | 
  7 | NOW = datetime.now()
  8 | TWO_DAYS_AGO = NOW - timedelta(days=2)
  9 | IN_TWO_DAYS = NOW + timedelta(days=2)
 10 | 
 11 | 
 12 | def test_default_values(line_factory, default_line_data):
 13 |     line = line_factory()
 14 | 
 15 |     assert line.client_ip == default_line_data['client_ip']
 16 |     assert line.client_port == default_line_data['client_port']
 17 | 
 18 |     assert line.raw_accept_date in default_line_data['accept_date']
 19 | 
 20 |     assert line.frontend_name == default_line_data['frontend_name']
 21 |     assert line.backend_name == default_line_data['backend_name']
 22 |     assert line.server_name == default_line_data['server_name']
 23 | 
 24 |     assert line.time_wait_request == default_line_data['tq']
 25 |     assert line.time_wait_queues == default_line_data['tw']
 26 |     assert line.time_connect_server == default_line_data['tc']
 27 |     assert line.time_wait_response == default_line_data['tr']
 28 |     assert line.total_time == default_line_data['tt']
 29 | 
 30 |     assert line.status_code == default_line_data['status']
 31 |     assert line.bytes_read == default_line_data['bytes']
 32 | 
 33 |     assert line.connections_active == default_line_data['act']
 34 |     assert line.connections_frontend == default_line_data['fe']
 35 |     assert line.connections_backend == default_line_data['be']
 36 |     assert line.connections_server == default_line_data['srv']
 37 |     assert line.retries == default_line_data['retries']
 38 | 
 39 |     assert line.queue_server == default_line_data['queue_server']
 40 |     assert line.queue_backend == default_line_data['queue_backend']
 41 | 
 42 |     assert line.captured_request_headers == default_line_data['headers'].strip()[1:-1]
 43 |     assert line.captured_response_headers is None
 44 | 
 45 |     assert line.raw_http_request == default_line_data['http_request']
 46 | 
 47 |     assert line.is_valid
 48 | 
 49 | 
 50 | def test_unused_values(line_factory):
 51 |     line = line_factory()
 52 |     assert line.captured_request_cookie is None
 53 |     assert line.captured_response_cookie is None
 54 |     assert line.termination_state is None
 55 | 
 56 | 
 57 | def test_datetime_value(line_factory):
 58 |     line = line_factory()
 59 |     assert isinstance(line.accept_date, datetime)
 60 | 
 61 | 
 62 | def test_http_request_values(line_factory):
 63 |     method = 'PUT'
 64 |     path = '/path/to/my/image'
 65 |     protocol = 'HTTP/2.0'
 66 |     line = line_factory(http_request=f'{method} {path} {protocol}')
 67 |     assert line.http_request_method == method
 68 |     assert line.http_request_path == path
 69 |     assert line.http_request_protocol == protocol
 70 | 
 71 | 
 72 | def test_invalid_line(line_factory):
 73 |     line = line_factory(bytes='wroooong')
 74 |     assert not line.is_valid
 75 | 
 76 | 
 77 | def test_no_captured_headers(line_factory):
 78 |     """A log line without captured headers is still valid."""
 79 |     line = line_factory(headers='')
 80 |     assert line.is_valid
 81 | 
 82 | 
 83 | def test_request_and_response_captured_headers(line_factory):
 84 |     """Request and response headers captured are parsed correctly."""
 85 |     request_headers = '{something}'
 86 |     response_headers = '{something_else}'
 87 |     line = line_factory(headers=f' {request_headers} {response_headers}')
 88 |     assert line.is_valid
 89 |     assert f'{{{line.captured_request_headers}}}' == request_headers
 90 |     assert f'{{{line.captured_response_headers}}}' == response_headers
 91 | 
 92 | 
 93 | def test_request_is_https_valid(line_factory):
 94 |     """Check that if a log line contains the SSL port on it, is reported
 95 |     as a https connection.
 96 |     """
 97 |     line = line_factory(http_request='GET /domain:443/to/image HTTP/1.1')
 98 |     assert line.is_https
 99 | 
100 | 
101 | def test_request_is_https_false(line_factory):
102 |     """Check that if a log line does not contains the SSL port on it, is
103 |     not reported as a https connection.
104 |     """
105 |     line = line_factory(http_request='GET /domain:80/to/image HTTP/1.1')
106 |     assert not line.is_https
107 | 
108 | 
109 | def test_request_is_front_page(line_factory):
110 |     """Check that if a request is for the front page the request path is
111 |     correctly stored.
112 |     """
113 |     line = line_factory(http_request='GET / HTTP/1.1')
114 |     assert line.http_request_path == '/'
115 | 
116 | 
117 | @pytest.mark.parametrize(
118 |     'process',
119 |     [
120 |         'ip-192-168-1-1 haproxy[28029]:',
121 |         'dvd-ctrl1 haproxy[403100]:',
122 |         'localhost.localdomain haproxy[2345]:',
123 |     ],
124 | )
125 | def test_process_names(line_factory, process):
126 |     """Checks that different styles of process names are handled correctly."""
127 |     line = line_factory(process_name_and_pid=process)
128 |     assert line.is_valid is True
129 | 
130 | 
131 | def test_unparseable_http_request(line_factory):
132 |     line = line_factory(http_request='something')
133 |     assert line.http_request_method == 'invalid'
134 |     assert line.http_request_path == 'invalid'
135 |     assert line.http_request_protocol == 'invalid'
136 | 
137 | 
138 | def test_truncated_requests(line_factory):
139 |     """Check that truncated requests are still valid.
140 | 
141 |     That would be requests that do not have the protocol part specified.
142 |     """
143 |     line = line_factory(http_request='GET /')
144 |     assert line.http_request_method == 'GET'
145 |     assert line.http_request_path == '/'
146 |     assert line.http_request_protocol is None
147 | 
148 | 
149 | @pytest.mark.parametrize(
150 |     'syslog',
151 |     [
152 |         # nixos format
153 |         '2017-07-06T14:29:39+02:00',
154 |         # regular format
155 |         'Dec  9 13:01:26',
156 |     ],
157 | )
158 | def test_syslog(line_factory, syslog):
159 |     """Check that the timestamp at the beginning are parsed.
160 | 
161 |     We support different syslog formats, NixOS style and the one on other Linux.
162 |     """
163 |     line = line_factory(syslog_date=syslog)
164 |     assert line.is_valid is True
165 | 
166 | 
167 | def test_ip_from_headers(line_factory):
168 |     """Check that the IP from the captured headers takes precedence."""
169 |     line = line_factory(headers=' {1.2.3.4}')
170 |     assert line.ip == '1.2.3.4'
171 | 
172 | 
173 | @pytest.mark.parametrize(
174 |     'ip',
175 |     ['1.2.3.4', '1.2.3.4, 2.3.4.5', '1.2.3.4,2.3.4.5,5.4.3.2'],
176 | )
177 | def test_only_first_ip_from_headers(line_factory, ip):
178 |     """Check that if there are multiple IPs, only the first one is used."""
179 |     line = line_factory(headers=f' {{{ip}}}')
180 |     assert line.ip == '1.2.3.4'
181 | 
182 | 
183 | @pytest.mark.parametrize(
184 |     'ip',
185 |     ['127.1.2.7', '1.127.230.47', 'fe80::9379:c29e:6701:cef8', 'fe80::9379:c29e::'],
186 | )
187 | def test_ip_from_client_ip(line_factory, ip):
188 |     """Check that if there is no IP on the captured headers, the client IP is used."""
189 |     line = line_factory(headers='', client_ip=ip)
190 |     assert line.ip == ip
191 | 
192 | 
193 | @pytest.mark.parametrize(
194 |     ('start', 'end', 'result'),
195 |     [
196 |         (None, None, True),
197 |         (TWO_DAYS_AGO, None, True),
198 |         (IN_TWO_DAYS, None, False),
199 |         (TWO_DAYS_AGO, IN_TWO_DAYS, True),
200 |         (TWO_DAYS_AGO, TWO_DAYS_AGO, False),
201 |     ],
202 | )
203 | def test_is_within_timeframe(line_factory, start, end, result):
204 |     """Check that a line is within a given time frame."""
205 |     line = line_factory(accept_date=NOW.strftime('%d/%b/%Y:%H:%M:%S.%f'))
206 |     assert line.is_within_time_frame(start, end) is result
207 | 


--------------------------------------------------------------------------------
/tests/test_filters.py:
--------------------------------------------------------------------------------
  1 | from haproxy import filters
  2 | 
  3 | import pytest
  4 | 
  5 | 
  6 | @pytest.mark.parametrize(
  7 |     ('to_filter', 'to_check', 'result'),
  8 |     [
  9 |         ('1.2.3.4', '1.2.3.4', True),
 10 |         ('2.3.4.5', '5.3.5.4', False),
 11 |         ('2001:db8::8a2e:370:7334', '2001:db8::8a2e:370:7334', True),
 12 |         ('2001:db8::8a2e:370:7334', '2001:db8::8a2e:456:7321', False),
 13 |     ],
 14 | )
 15 | def test_filter_ip(line_factory, to_filter, to_check, result):
 16 |     """Check that filter_ip filter works as expected."""
 17 |     current_filter = filters.filter_ip(to_filter)
 18 |     headers = f' {{{to_check}}}'
 19 |     line = line_factory(headers=headers)
 20 |     assert current_filter(line) is result
 21 | 
 22 | 
 23 | @pytest.mark.parametrize(
 24 |     ('to_filter', 'to_check', 'result'),
 25 |     [
 26 |         ('1.2.3', '1.2.3.4', True),
 27 |         ('1.2.3', '1.2.3.78', True),
 28 |         ('2.3.4.5', '5.3.5.4', False),
 29 |         ('2001:db8', '2001:db8::8a2e:370:7334', True),
 30 |         ('2001:db8', '2001:db8::8a2e:456:7321', True),
 31 |         ('2134:db8', '2001:db8::8a2e:456:7321', False),
 32 |     ],
 33 | )
 34 | def test_filter_ip_range(line_factory, to_filter, to_check, result):
 35 |     """Check that filter_ip_range filter works as expected."""
 36 |     current_filter = filters.filter_ip_range(to_filter)
 37 |     headers = f' {{{to_check}}}'
 38 |     line = line_factory(headers=headers)
 39 |     assert current_filter(line) is result
 40 | 
 41 | 
 42 | @pytest.mark.parametrize(
 43 |     ('path', 'result'),
 44 |     [
 45 |         ('/path/to/image', True),
 46 |         ('/something/else', False),
 47 |         ('/another/image/here', True),
 48 |     ],
 49 | )
 50 | def test_filter_path(line_factory, path, result):
 51 |     """Check that filter_path filter works as expected."""
 52 |     current_filter = filters.filter_path('/image')
 53 |     http_request = f'GET {path} HTTP/1.1'
 54 |     line = line_factory(http_request=http_request)
 55 |     assert current_filter(line) is result
 56 | 
 57 | 
 58 | @pytest.mark.parametrize(
 59 |     ('path', 'result'),
 60 |     [
 61 |         ('/ssl_path:443/image', True),
 62 |         ('/something/else', False),
 63 |         ('/another:443/ssl', True),
 64 |     ],
 65 | )
 66 | def test_filter_ssl(line_factory, path, result):
 67 |     """Check that filter_path filter works as expected."""
 68 |     current_filter = filters.filter_ssl()
 69 |     http_request = f'GET {path} HTTP/1.1'
 70 |     line = line_factory(http_request=http_request)
 71 |     assert current_filter(line) is result
 72 | 
 73 | 
 74 | @pytest.mark.parametrize(('tr', 'result'), [(45, False), (13000, True), (4566, False)])
 75 | def test_filter_slow_requests(line_factory, tr, result):
 76 |     """Check that filter_slow_requests filter works as expected."""
 77 |     current_filter = filters.filter_slow_requests('10000')
 78 |     line = line_factory(tr=tr)
 79 |     assert current_filter(line) is result
 80 | 
 81 | 
 82 | @pytest.mark.parametrize(('tw', 'result'), [(45, False), (13000, True), (4566, True)])
 83 | def test_filter_wait_on_queues(line_factory, tw, result):
 84 |     """Check that filter_wait_on_queues filter works as expected"""
 85 |     current_filter = filters.filter_wait_on_queues('50')
 86 |     line = line_factory(tw=tw)
 87 |     assert current_filter(line) is result
 88 | 
 89 | 
 90 | @pytest.mark.parametrize(
 91 |     ('to_filter', 'to_check', 'result'),
 92 |     [
 93 |         ('200', '200', True),
 94 |         ('200', '230', False),
 95 |         ('300', '300', True),
 96 |         ('300', '400', False),
 97 |     ],
 98 | )
 99 | def test_filter_status_code(line_factory, to_filter, to_check, result):
100 |     """Test that the status_code filter works as expected."""
101 |     current_filter = filters.filter_status_code(to_filter)
102 |     line = line_factory(status=to_check)
103 |     assert current_filter(line) is result
104 | 
105 | 
106 | @pytest.mark.parametrize(
107 |     ('to_filter', 'to_check', 'result'),
108 |     [
109 |         ('2', '200', True),
110 |         ('2', '230', True),
111 |         ('2', '300', False),
112 |         ('3', '300', True),
113 |         ('3', '330', True),
114 |         ('3', '400', False),
115 |     ],
116 | )
117 | def test_filter_status_code_family(line_factory, to_filter, to_check, result):
118 |     """Test that the status_code_family filter works as expected."""
119 |     current_filter = filters.filter_status_code_family(to_filter)
120 |     line = line_factory(status=to_check)
121 |     assert current_filter(line) is result
122 | 
123 | 
124 | @pytest.mark.parametrize(
125 |     ('to_filter', 'to_check', 'result'),
126 |     [
127 |         ('GET', 'GET', True),
128 |         ('GET', 'POST', False),
129 |         ('GET', 'PUT', False),
130 |         ('GET', 'PATCH', False),
131 |         ('GET', 'DELETE', False),
132 |         ('PATCH', 'PATCH', True),
133 |         ('DELETE', 'DELETE', True),
134 |     ],
135 | )
136 | def test_filter_http_method(line_factory, to_filter, to_check, result):
137 |     """Test that the http_method filter works as expected."""
138 |     current_filter = filters.filter_http_method(to_filter)
139 |     line = line_factory(http_request=f'{to_check} /path HTTP/1.1')
140 |     assert current_filter(line) is result
141 | 
142 | 
143 | @pytest.mark.parametrize(
144 |     ('to_filter', 'to_check', 'result'),
145 |     [
146 |         ('default', 'default', True),
147 |         ('default', 'backend', False),
148 |         ('backend', 'backend', True),
149 |         ('backend', 'default', False),
150 |     ],
151 | )
152 | def test_filter_backend(line_factory, to_filter, to_check, result):
153 |     """Test that the backend filter works as expected."""
154 |     current_filter = filters.filter_backend(to_filter)
155 |     line = line_factory(backend_name=to_check)
156 |     assert current_filter(line) is result
157 | 
158 | 
159 | @pytest.mark.parametrize(
160 |     ('to_filter', 'to_check', 'result'),
161 |     [
162 |         ('varnish', 'varnish', True),
163 |         ('varnish', 'nginx', False),
164 |         ('nginx', 'nginx', True),
165 |         ('nginx', 'varnish', False),
166 |     ],
167 | )
168 | def test_filter_frontend(line_factory, to_filter, to_check, result):
169 |     """Test that the frontend filter works as expected."""
170 |     current_filter = filters.filter_frontend(to_filter)
171 |     line = line_factory(frontend_name=to_check)
172 |     assert current_filter(line) is result
173 | 
174 | 
175 | @pytest.mark.parametrize(
176 |     ('to_filter', 'to_check', 'result'),
177 |     [
178 |         ('server1', 'server1', True),
179 |         ('server1', 'backend23', False),
180 |         ('backend23', 'backend23', True),
181 |         ('backend23', 'server1', False),
182 |     ],
183 | )
184 | def test_filter_server(line_factory, to_filter, to_check, result):
185 |     """Test that the server filter works as expected."""
186 |     current_filter = filters.filter_server(to_filter)
187 |     line = line_factory(server_name=to_check)
188 |     assert current_filter(line) is result
189 | 
190 | 
191 | @pytest.mark.parametrize(
192 |     ('to_filter', 'to_check', 'result'),
193 |     [
194 |         ('400', '500', True),
195 |         ('400', '+500', True),
196 |         ('+400', '500', True),
197 |         ('+400', '+500', True),
198 |         ('400', '300', False),
199 |         ('400', '+300', False),
200 |         ('+400', '300', False),
201 |         ('+400', '+300', False),
202 |     ],
203 | )
204 | def test_filter_response_size(line_factory, to_filter, to_check, result):
205 |     """Test that the size filter works as expected.
206 | 
207 |     Note that both filter and value can have a leading plus sign.
208 |     """
209 |     current_filter = filters.filter_response_size(to_filter)
210 |     line = line_factory(bytes=to_check)
211 |     assert current_filter(line) is result
212 | 


--------------------------------------------------------------------------------
/tests/test_regex.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from haproxy.line import HAPROXY_LINE_REGEX
  3 | from haproxy.line import HTTP_REQUEST_REGEX
  4 | 
  5 | import pytest
  6 | import random
  7 | 
  8 | 
  9 | def test_default_values(line_factory, default_line_data):
 10 |     """Check that the default line with default values is parsed."""
 11 |     line = line_factory()
 12 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
 13 |     assert matches.group('http_request') == default_line_data['http_request']
 14 | 
 15 | 
 16 | def test_client_ip_and_port(line_factory):
 17 |     """Check that the client IP and port are extracted correctly."""
 18 |     ip = '192.168.0.250'
 19 |     port = '34'
 20 |     line = line_factory(client_ip=ip, client_port=port)
 21 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
 22 | 
 23 |     assert matches.group('client_ip') == ip
 24 |     assert matches.group('client_port') == port
 25 | 
 26 | 
 27 | def test_accept_date(line_factory):
 28 |     """Check that the accept date is extracted correctly."""
 29 |     accept_date = datetime.now().strftime('%d/%b/%Y:%H:%M:%S.%f')
 30 |     line = line_factory(accept_date=accept_date)
 31 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
 32 | 
 33 |     assert matches.group('accept_date') == accept_date
 34 | 
 35 | 
 36 | def test_server_names(line_factory):
 37 |     """Check that the server names are extracted correctly."""
 38 |     frontend_name = 'SomeThing4'
 39 |     backend_name = 'Another1'
 40 |     server_name = 'Cloud9'
 41 |     line = line_factory(
 42 |         frontend_name=frontend_name, backend_name=backend_name, server_name=server_name
 43 |     )
 44 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
 45 | 
 46 |     assert matches.group('frontend_name') == frontend_name
 47 |     assert matches.group('backend_name') == backend_name
 48 |     assert matches.group('server_name') == server_name
 49 | 
 50 | 
 51 | @pytest.mark.parametrize(
 52 |     ('tq', 'tw', 'tc', 'tr', 'tt'),
 53 |     [
 54 |         ('0', '0', '0', '0', '0'),
 55 |         ('23', '55', '3', '4', '5'),
 56 |         ('-23', '-33', '-3', '-4', '5'),
 57 |         ('23', '33', '3', '4', '+5'),
 58 |     ],
 59 | )
 60 | def test_timers(line_factory, tq, tw, tc, tr, tt):
 61 |     """Check that the timers are extracted correctly.
 62 | 
 63 |     Note that all timers can be negative but `tt`,
 64 |     and that `tt` is the only one that can have a positive sign.
 65 |     """
 66 |     line = line_factory(tq=tq, tw=tw, tc=tc, tr=tr, tt=tt)
 67 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
 68 | 
 69 |     assert matches.group('tq') == tq
 70 |     assert matches.group('tw') == tw
 71 |     assert matches.group('tc') == tc
 72 |     assert matches.group('tr') == tr
 73 |     assert matches.group('tt') == tt
 74 | 
 75 | 
 76 | @pytest.mark.parametrize(
 77 |     ('status', 'bytes_read'), [('200', '0'), ('-301', '543'), ('200', '+543')]
 78 | )
 79 | def test_status_and_bytes(line_factory, status, bytes_read):
 80 |     """Check that the status code and bytes are extracted correctly.
 81 | 
 82 |     Note that `status` can be negative (for terminated requests),
 83 |     and `bytes` can be prefixed with a plus sign.
 84 |     """
 85 |     line = line_factory(status=status, bytes=bytes_read)
 86 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
 87 | 
 88 |     assert matches.group('status_code') == status
 89 |     assert matches.group('bytes_read') == bytes_read
 90 | 
 91 | 
 92 | @pytest.mark.parametrize(
 93 |     ('act', 'fe', 'be', 'srv', 'retries'),
 94 |     [
 95 |         ('0', '0', '0', '0', '0'),
 96 |         ('40', '10', '11', '12', '14'),
 97 |         ('40', '10', '11', '12', '+14'),
 98 |     ],
 99 | )
100 | def test_connections_and_retries(line_factory, act, fe, be, srv, retries):
101 |     """Check that the connections and retries are extracted correctly.
102 | 
103 |     Note that `retries` might have a plus sign prefixed.
104 |     """
105 |     line = line_factory(act=act, fe=fe, be=be, srv=srv, retries=retries)
106 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
107 | 
108 |     assert matches.group('act') == act
109 |     assert matches.group('fe') == fe
110 |     assert matches.group('be') == be
111 |     assert matches.group('srv') == srv
112 |     assert matches.group('retries') == retries
113 | 
114 | 
115 | @pytest.mark.parametrize(('server', 'backend'), [('0', '0'), ('200', '200')])
116 | def test_queues(line_factory, server, backend):
117 |     """Check that the server and backend queues are extracted correctly."""
118 |     line = line_factory(queue_server=server, queue_backend=backend)
119 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
120 | 
121 |     assert matches.group('queue_server') == server
122 |     assert matches.group('queue_backend') == backend
123 | 
124 | 
125 | @pytest.mark.parametrize(
126 |     ('request_header', 'response_header'),
127 |     [
128 |         ('', ''),
129 |         ('something', None),
130 |         ('something here', 'and there'),
131 |         ('multiple | request | headers', 'and | multiple | response ones'),
132 |     ],
133 | )
134 | def test_captured_headers(line_factory, request_header, response_header):
135 |     """Check that captured headers are extracted correctly."""
136 |     if response_header:
137 |         headers = f' {{{request_header}}} {{{response_header}}}'
138 |     else:
139 |         headers = f' {{{request_header}}}'
140 |     line = line_factory(headers=headers)
141 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
142 | 
143 |     if response_header:
144 |         assert matches.group('request_headers') == request_header
145 |         assert matches.group('response_headers') == response_header
146 |     else:
147 |         assert matches.group('headers') == request_header
148 |         assert matches.group('request_headers') is None
149 |         assert matches.group('response_headers') is None
150 | 
151 | 
152 | def test_http_request(line_factory):
153 |     """Check that the HTTP request is extracted correctly."""
154 |     http_request = 'something in the air'
155 |     line = line_factory(http_request=http_request)
156 |     matches = HAPROXY_LINE_REGEX.match(line.raw_line)
157 | 
158 |     assert matches.group('http_request') == http_request
159 | 
160 | 
161 | @pytest.mark.parametrize(
162 |     'path',
163 |     [
164 |         '/path/to/image',
165 |         '/path/with/port:80',  # with port
166 |         '/path/with/example.com',  # with domain
167 |         '/path/to/article#section',  # with anchor
168 |         '/article?hello=world&goodbye=lennin',  # with parameters
169 |         '/article-with-dashes_and_underscores',  # dashes and underscores
170 |         '/redirect_to?http://example.com',  # double slashes
171 |         '/@@funny',  # at sign
172 |         '/something%20encoded',  # percent sign
173 |         '/++adding++is+always+fun',  # plus sign
174 |         '/here_or|here',  # vertical bar
175 |         '/here~~~e',  # tilde sign
176 |         '/here_*or',  # asterisk sign
177 |         '/something;or-not',  # colon
178 |         '/something-important!probably',  # exclamation mark
179 |         '/something$important',  # dollar sign
180 |         "/there's-one's-way-or-another's"  # single quote sign
181 |         '/there?la=as,is',  # comma
182 |         '/here_or(here)',  # parenthesis
183 |         '/here_or[here]',  # square brackets
184 |         '/georg}von{grote/\\',  # curly brackets
185 |         '/here_or<',  # less than
186 |         '/here_or>',  # more than
187 |         '/georg-von-grote/\\',  # back slash
188 |         '/georg`von´grote/\\',  # diacritics
189 |         '/georg`von^grote/\\',  # caret
190 |     ],
191 | )
192 | def test_http_request_regex(path):
193 |     """Test that the method/path/protocol are extracted properly from the HTTP request."""
194 |     verbs = ('GET', 'POST', 'DELETE', 'PATCH', 'PUT')
195 |     protocols = (
196 |         'HTTP/1.0',
197 |         'HTTP/1.1',
198 |         'HTTP/2.0',
199 |     )
200 |     method = random.choice(verbs)
201 |     protocol = random.choice(protocols)
202 |     matches = HTTP_REQUEST_REGEX.match(f'{method} {path} {protocol}')
203 |     assert matches.group('method') == method
204 |     assert matches.group('path') == path
205 |     assert matches.group('protocol') == protocol
206 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
  1 | CHANGES
  2 | =======
  3 | 
  4 | 6.0.0a5 (unreleased)
  5 | --------------------
  6 | 
  7 | - Nothing changed yet.
  8 | 
  9 | 
 10 | 6.0.0a4 (2023-11-25)
 11 | --------------------
 12 | 
 13 | - More GHA automation fixes.
 14 |   [gforcada]
 15 | 
 16 | 6.0.0a3 (2023-11-25)
 17 | --------------------
 18 | 
 19 | - Play with gh command line tool.
 20 |   [gforcada]
 21 | 
 22 | 6.0.0a2 (2023-11-12)
 23 | --------------------
 24 | 
 25 | - Test again a release.
 26 |   [gforcada]
 27 | 
 28 | 6.0.0a1 (2023-11-12)
 29 | --------------------
 30 | 
 31 | - Make listing of commands and filters easier to read.
 32 |   [gforcada]
 33 | 
 34 | - Improve the filters' and commands' descriptions,
 35 |   with ready to use examples.
 36 |   [gforcada]
 37 | 
 38 | - Switch logic of `wait_on_queues` filter,
 39 |   count lines that are above the filter,
 40 |   e.g. the lines that took more than the specified time.
 41 |   [gforcada]
 42 | 
 43 | - move code to a `src` folder
 44 |   [gforcada]
 45 | 
 46 | - drop `pkg_resources` usage, default to native namespaces
 47 |   [gforcada]
 48 | 
 49 | - switch to `pyproject.toml` and remove `setup.py`
 50 |   [gforcada]
 51 | 
 52 | - use `tox`
 53 |   [gforcada]
 54 | 
 55 | - use `pre-commit`
 56 |   [gforcada]
 57 | 
 58 | - soft drop python 3.7 (it's EOL, and we stop testing against it)
 59 |   [gforcada]
 60 | 
 61 | - Pin dependencies installed in `tox.ini`
 62 |   [gforcada]
 63 | 
 64 | - Add support for Python 3.12
 65 |   [gforcada]
 66 | 
 67 | - Automatically create GitHub releases with GitHub Actions.
 68 |   [gforcada]
 69 | 
 70 | 5.1.0 (2022-12-03)
 71 | ------------------
 72 | 
 73 | - Only get the first IP from `X-Forwarded-For` header.
 74 |   [gforcada]
 75 | 
 76 | - Improve tests robustness.
 77 |   [gforcada]
 78 | 
 79 | - Fix `top_ips` and `top_request_paths` commands output.
 80 |   They were showing all output, rather than only the top 10.
 81 |   [gforcada]
 82 | 
 83 | - Move `tests` folder to the top-level.
 84 |   [gforcada]
 85 | 
 86 | 5.0.0 (2022-11-27)
 87 | ------------------
 88 | 
 89 | - Drop testing on travis-ci.
 90 |   [gforcada]
 91 | 
 92 | - Use GitHub Actions.
 93 |   [gforcada]
 94 | 
 95 | - Format the code with `pyupgrade`, `black` and `isort`.
 96 |   [gforcada]
 97 | 
 98 | - Use `pip-tools` to keep dependencies locked.
 99 |   [gforcada]
100 | 
101 | - Bump python versions supported to 3.7-3.11 and pypy.
102 |   [gforcada]
103 | 
104 | - Drop python 3.6 (EOL).
105 |   [gforcada]
106 | 
107 | 4.1.0 (2020-01-06)
108 | ------------------
109 | 
110 | - **New command:** ``requests_per_hour``.
111 |   Just like the ``requests_per_minute`` but with hour granularity.
112 |   Idea and first implementation done by ``valleedelisle``.
113 |   [gforcada]
114 | 
115 | - Fix parsing truncated requests.
116 |   Idea and first implementation by ``vixns``.
117 |   [gforcada]
118 | 
119 | 4.0.0 (2020-01-06)
120 | ------------------
121 | 
122 | **BREAKING CHANGES:**
123 | 
124 | - Complete rewrite to use almost no memory usage even on huge files.
125 |   [gforcada]
126 | 
127 | - Add parallelization to make parsing faster by parsing multiple lines in parallel.
128 |   [gforcada]
129 | 
130 | - Rename command ``counter_slow_requests`` to ``slow_requests_counter``,
131 |   so it is aligned with all other ``_counter`` commands.
132 |   [gforcada]
133 | 
134 | - Changed the ``counter_invalid`` command to a new command line switch ``--invalid``.
135 |   [gforcada]
136 | 
137 | **Regular changes:**
138 | 
139 | - Drop Python 2 support, and test on Python 3.8.
140 |   [gforcada]
141 | 
142 | - Remove the pickling support.
143 |   [gforcada]
144 | 
145 | - Add `--json` output command line option.
146 |   [valleedelisle]
147 | 
148 | 3.0.0 (2019-06-10)
149 | ------------------
150 | 
151 | - Fix spelling.
152 |   [EdwardBetts]
153 | 
154 | - Make ip_counter use client_ip per default.
155 |   [vixns]
156 | 
157 | - Overhaul testing environment. Test on python 3.7 as well. Use black to format.
158 |   [gforcada]
159 | 
160 | 2.1 (2017-07-06)
161 | ----------------
162 | - Enforce QA checks (flake8) on code.
163 |   All code has been updated to follow it.
164 |   [gforcada]
165 | 
166 | - Support Python 3.6.
167 |   [gforcada]
168 | 
169 | - Support different syslog timestamps (at least NixOS).
170 |   [gforcada]
171 | 
172 | 2.0.2 (2016-11-17)
173 | ------------------
174 | 
175 | - Improve performance for ``cmd_print``.
176 |   [kevinjqiu]
177 | 
178 | 2.0.1 (2016-10-29)
179 | ------------------
180 | 
181 | - Allow hostnames to have a dot in it.
182 |   [gforcada]
183 | 
184 | 2.0 (2016-07-06)
185 | ----------------
186 | - Handle unparsable HTTP requests.
187 |   [gforcada]
188 | 
189 | - Only test on python 2.7 and 3.5
190 |   [gforcada]
191 | 
192 | 2.0b0 (2016-04-18)
193 | ------------------
194 | - Check the divisor before doing a division to not get ``ZeroDivisionError`` exceptions.
195 |   [gforcada]
196 | 
197 | 2.0a0 (2016-03-29)
198 | ------------------
199 | - Major refactoring:
200 | 
201 |   # Rename modules and classes:
202 | 
203 |     - haproxy_logline -> line
204 |     - haproxy_logfile -> logfile
205 |     - HaproxyLogLine -> Line
206 |     - HaproxyLogFile -> Log
207 | 
208 |   # Parse the log file on Log() creation (i.e. in its __init__)
209 | 
210 |   [gforcada]
211 | 
212 | 1.3 (2016-03-29)
213 | ----------------
214 | 
215 | - New filter: ``filter_wait_on_queues``.
216 |   Get all requests that waited at maximum X amount of milliseconds on HAProxy queues.
217 |   [gforcada]
218 | 
219 | - Code/docs cleanups and add code analysis.
220 |   [gforcada]
221 | 
222 | - Avoid using eval.
223 |   [gforcada]
224 | 
225 | 1.2.1 (2016-02-23)
226 | ------------------
227 | 
228 | - Support -1 as a status_code
229 |   [Christopher Baines]
230 | 
231 | 1.2 (2015-12-07)
232 | ----------------
233 | 
234 | - Allow a hostname on the syslog part (not only IPs)
235 |   [danny crasto]
236 | 
237 | 1.1 (2015-04-19)
238 | ----------------
239 | 
240 | - Make syslog optional.
241 |   Fixes issue https://github.com/gforcada/haproxy_log_analysis/issues/10.
242 |   [gforcada]
243 | 
244 | 1.0 (2015-03-24)
245 | ----------------
246 | 
247 | - Fix issue #9.
248 |   log line on the syslog part was too strict,
249 |   it was expecting the hostname to be a string and was
250 |   failing if it was an IP.
251 |   [gforcada]
252 | 
253 | 0.0.3.post2 (2015-01-05)
254 | ------------------------
255 | 
256 | - Finally really fixed issue #7.
257 |   ``namespace_packages`` was not meant to be on setup.py at all.
258 |   Silly copy&paste mistake.
259 |   [gforcada]
260 | 
261 | 0.0.3.post (2015-01-04)
262 | -----------------------
263 | 
264 | - Fix release on PyPI.
265 |   Solves GitHub issue #7.
266 |   https://github.com/gforcada/haproxy_log_analysis/issues/7
267 |   [gforcada]
268 | 
269 | 0.0.3 (2014-07-09)
270 | ------------------
271 | 
272 | - Fix release on PyPI (again).
273 |   [gforcada]
274 | 
275 | 0.0.2 (2014-07-09)
276 | ------------------
277 | 
278 | - Fix release on PyPI.
279 |   [gforcada]
280 | 
281 | 0.0.1 (2014-07-09)
282 | ------------------
283 | 
284 | - Pickle :class::`.HaproxyLogFile` data for faster performance.
285 |   [gforcada]
286 | 
287 | - Add a way to negate the filters, so that instead of being able to filter by
288 |   IP, it can output all but that IP information.
289 |   [gforcada]
290 | 
291 | - Add lots of filters: ip, path, ssl, backend, frontend, server, status_code
292 |   and so on. See ``--list-filters`` for a complete list of them.
293 |   [gforcada]
294 | 
295 | - Add :method::`.HaproxyLogFile.parse_data` method to get data from data stream.
296 |   It allows you use it as a library.
297 |   [bogdangi]
298 | 
299 | - Add ``--list-filters`` argument on the command line interface.
300 |   [gforcada]
301 | 
302 | - Add ``--filter`` argument on the command line interface, inspired by
303 |   Bogdan's early design.
304 |   [bogdangi] [gforcada]
305 | 
306 | - Create a new module :module::`haproxy.filters` that holds all available filters.
307 |   [gforcada]
308 | 
309 | - Improve :method::`.HaproxyLogFile.cmd_queue_peaks` output to not only show
310 |   peaks but also when requests started to queue and when they finished and
311 |   the amount of requests that had been queued.
312 |   [gforcada]
313 | 
314 | - Show help when no argument is given.
315 |   [gforcada]
316 | 
317 | - Polish documentation and docstrings here and there.
318 |   [gforcada]
319 | 
320 | - Add a ``--list-commands`` argument on the command line interface.
321 |   [gforcada]
322 | 
323 | - Generate an API doc for ``HaproxyLogLine`` and ``HaproxyLogFile``.
324 |   [bogdangi]
325 | 
326 | - Create a ``console_script`` `haproxy_log_analysis` for ease of use.
327 |   [bogdangi]
328 | 
329 | - Add Sphinx documentation system, still empty.
330 |   [gforcada]
331 | 
332 | - Keep valid log lines sorted so that the exact order of connections is kept.
333 |   [gforcada]
334 | 
335 | - Add quite a few commands, see `README.rst`_ for a complete list of them.
336 |   [gforcada]
337 | 
338 | - Run commands passed as arguments (with -c flag).
339 |   [gforcada]
340 | 
341 | - Add a requirements.txt file to keep track of dependencies and pin them.
342 |   [gforcada]
343 | 
344 | - Add travis_ and coveralls_ support. See its badges on `README.rst`_.
345 |   [gforcada]
346 | 
347 | - Add argument parsing and custom validation logic for all arguments.
348 |   [gforcada]
349 | 
350 | - Add regular expressions for haproxy log lines (HTTP format) and to
351 |   parse HTTP requests path.
352 |   Added tests to ensure they work as expected.
353 |   [gforcada]
354 | 
355 | - Create distribution.
356 |   [gforcada]
357 | 
358 | .. _travis: https://travis-ci.org/
359 | .. _coveralls: https://coveralls.io/
360 | .. _README.rst: http://github.com/gforcada/haproxy_log_analysis
361 | 


--------------------------------------------------------------------------------
/src/haproxy/main.py:
--------------------------------------------------------------------------------
  1 | from haproxy.logfile import Log
  2 | from haproxy.utils import VALID_COMMANDS
  3 | from haproxy.utils import VALID_FILTERS
  4 | from haproxy.utils import validate_arg_date
  5 | from haproxy.utils import validate_arg_delta
  6 | 
  7 | import argparse
  8 | import os
  9 | 
 10 | 
 11 | def create_parser():
 12 |     desc = 'Analyze HAProxy log files and outputs statistics about it'
 13 |     parser = argparse.ArgumentParser(description=desc)
 14 | 
 15 |     parser.add_argument('-l', '--log', help='HAProxy log file to analyze')
 16 | 
 17 |     parser.add_argument(
 18 |         '-s',
 19 |         '--start',
 20 |         help='Process log entries starting at this time, in HAProxy date '
 21 |         'format (e.g. 11/Dec/2013 or 11/Dec/2013:19:31:41). '
 22 |         'At least provide the day/month/year. Values not specified will '
 23 |         'use their base value (e.g. 00 for hour). Use in conjunction '
 24 |         'with -d to limit the number of entries to process.',
 25 |     )
 26 | 
 27 |     parser.add_argument(
 28 |         '-d',
 29 |         '--delta',
 30 |         help='Limit the number of entries to process. Express the time delta '
 31 |         'as a number and a time unit, e.g.: 1s, 10m, 3h or 4d (for 1 '
 32 |         'second, 10 minutes, 3 hours or 4 days). Use in conjunction with '
 33 |         '-s to only analyze certain time delta. If no start time is '
 34 |         'given, the time on the first line will be used instead.',
 35 |     )
 36 | 
 37 |     parser.add_argument(
 38 |         '-c',
 39 |         '--command',
 40 |         help='List of commands, comma separated, to run on the log file. See '
 41 |         '--list-commands to get a full list of them.',
 42 |     )
 43 | 
 44 |     parser.add_argument(
 45 |         '-f',
 46 |         '--filter',
 47 |         help='List of filters to apply on the log file. Passed as comma '
 48 |         'separated and parameters within square brackets, e.g '
 49 |         'ip[192.168.1.1],ssl,path[/some/path]. See '
 50 |         '--list-filters to get a full list of them.',
 51 |     )
 52 | 
 53 |     parser.add_argument(
 54 |         '-n',
 55 |         '--negate-filter',
 56 |         help='Make filters passed with -f work the other way around, i.e. if '
 57 |         'the ``ssl`` filter is passed instead of showing only ssl '
 58 |         'requests it will show non-ssl traffic. If the ``ip`` filter is '
 59 |         'used, then all but that ip passed to the filter will be used.',
 60 |         action='store_true',
 61 |     )
 62 | 
 63 |     parser.add_argument(
 64 |         '--list-commands', action='store_true', help='Lists all commands available.'
 65 |     )
 66 | 
 67 |     parser.add_argument(
 68 |         '--list-filters', action='store_true', help='Lists all filters available.'
 69 |     )
 70 | 
 71 |     parser.add_argument('--json', action='store_true', help='Output results in json.')
 72 |     parser.add_argument(
 73 |         '--invalid',
 74 |         action='store_false',
 75 |         help='Print the lines that could not be parsed. '
 76 |         'Be aware that mixing it with the print command will mix their output.',
 77 |     )
 78 | 
 79 |     return parser
 80 | 
 81 | 
 82 | def parse_arguments(args):
 83 |     data = {
 84 |         'start': None,
 85 |         'delta': None,
 86 |         'commands': None,
 87 |         'filters': None,
 88 |         'negate_filter': None,
 89 |         'log': None,
 90 |         'list_commands': None,
 91 |         'list_filters': None,
 92 |         'json': None,
 93 |         'invalid_lines': None,
 94 |     }
 95 | 
 96 |     if args.list_commands:
 97 |         data['list_commands'] = True
 98 |         # no need to further process any other input parameter
 99 |         return data
100 | 
101 |     if args.list_filters:
102 |         data['list_filters'] = True
103 |         # no need to further process any other input parameter
104 |         return data
105 | 
106 |     if args.negate_filter:
107 |         data['negate_filter'] = True
108 | 
109 |     if args.start is not None:
110 |         validate_arg_date(args.start)
111 |         data['start'] = args.start
112 | 
113 |     if args.delta is not None:
114 |         validate_arg_delta(args.delta)
115 |         data['delta'] = args.delta
116 | 
117 |     if args.command is not None:
118 |         data['commands'] = parse_arg_commands(args.command)
119 | 
120 |     if args.filter is not None:
121 |         data['filters'] = parse_arg_filters(args.filter)
122 | 
123 |     if args.log is not None:
124 |         _validate_arg_logfile(args.log)
125 |         data['log'] = args.log
126 | 
127 |     if args.json is not None:
128 |         data['json'] = args.json
129 | 
130 |     if args.invalid:
131 |         data['invalid_lines'] = args.json
132 | 
133 |     return data
134 | 
135 | 
136 | def parse_arg_commands(commands_list):
137 |     input_commands = commands_list.split(',')
138 |     for cmd in input_commands:
139 |         if cmd not in VALID_COMMANDS:
140 |             raise ValueError(
141 |                 f'command "{cmd}" is not available. '
142 |                 'Use --list-commands to get a list of all available commands.'
143 |             )
144 |     return input_commands
145 | 
146 | 
147 | def parse_arg_filters(filters_arg):
148 |     input_filters = filters_arg.split(',')
149 | 
150 |     return_data = []
151 |     for filter_expression in input_filters:
152 |         filter_name = filter_expression
153 |         filter_arg = None
154 | 
155 |         if filter_expression.endswith(']'):
156 |             if '[' not in filter_expression:
157 |                 raise ValueError(
158 |                     f'Error on filter "{filter_expression}". '
159 |                     f'It is missing an opening square bracket.'
160 |                 )
161 |             filter_name, filter_arg = filter_expression.split('[')
162 |             filter_arg = filter_arg[:-1]  # remove the closing square bracket
163 | 
164 |         if filter_name not in VALID_FILTERS:
165 |             raise ValueError(
166 |                 f'filter "{filter_name}" is not available. Use --list-filters to get a list of all available filters.'
167 |             )
168 | 
169 |         return_data.append((filter_name, filter_arg))
170 | 
171 |     return return_data
172 | 
173 | 
174 | def _validate_arg_logfile(filename):
175 |     filepath = os.path.join(os.getcwd(), filename)
176 |     if not os.path.exists(filepath):
177 |         raise ValueError(f'filename {filepath} does not exist')
178 | 
179 | 
180 | def print_commands():
181 |     """Prints all commands available with their description."""
182 |     for command_name in sorted(VALID_COMMANDS.keys()):
183 |         print(VALID_COMMANDS[command_name]['description'])
184 | 
185 | 
186 | def print_filters():
187 |     """Prints all filters available with their description."""
188 |     for filter_name in sorted(VALID_FILTERS.keys()):
189 |         print(VALID_FILTERS[filter_name]['description'])
190 | 
191 | 
192 | def show_help(data):
193 |     # make sure that if no arguments are passed the help is shown
194 |     show = True
195 |     ignore_keys = ('log', 'json', 'negate_filter', 'invalid_lines')
196 |     for key in data:
197 |         if data[key] is not None and key not in ignore_keys:
198 |             show = False
199 |             break
200 | 
201 |     if show:
202 |         parser = create_parser()
203 |         parser.print_help()
204 |         return True
205 |     return False
206 | 
207 | 
208 | def main(args):
209 |     if show_help(args):
210 |         return
211 | 
212 |     # show the command list
213 |     if args['list_commands']:
214 |         print_commands()
215 |         # no need to process further
216 |         return
217 | 
218 |     # show the filter list
219 |     if args['list_filters']:
220 |         print_filters()
221 |         # no need to process further
222 |         return
223 | 
224 |     # initialize the log file
225 |     log_file = Log(
226 |         logfile=args['log'],
227 |         start=args['start'],
228 |         delta=args['delta'],
229 |         show_invalid=args['invalid_lines'],
230 |     )
231 | 
232 |     # get the commands and filters to use
233 |     filters_to_use = requested_filters(args)
234 |     cmds_to_use = requested_commands(args)
235 | 
236 |     # double negation: when a user wants to negate the filters,
237 |     # the argument parsing sets `negate_filter` to True,
238 |     # but the filtering logic (the `all()`) returns True if the line meets all filters
239 |     # so reversing whatever `negate_filter` has is what the user wants :)
240 |     expected_filtering = True
241 |     if args['negate_filter']:
242 |         expected_filtering = False
243 |     # process all log lines
244 |     for line in log_file:
245 |         if all(f(line) for f in filters_to_use) is expected_filtering:
246 |             for cmd in cmds_to_use:
247 |                 cmd(line)
248 | 
249 |     # print the results
250 |     print('\nRESULTS\n')
251 |     output = None
252 |     if args['json']:
253 |         output = 'json'
254 |     for cmd in cmds_to_use:
255 |         cmd.results(output=output)
256 | 
257 | 
258 | def requested_filters(args):
259 |     filters_list = []
260 |     if args['filters']:
261 |         for filter_name, arg in args['filters']:
262 |             filter_func = VALID_FILTERS[filter_name]['obj']
263 |             filters_list.append(filter_func(arg))
264 |     return filters_list
265 | 
266 | 
267 | def requested_commands(args):
268 |     cmds_list = []
269 |     for command in args['commands']:
270 |         cmd_klass = VALID_COMMANDS[command]['klass']
271 |         cmds_list.append(cmd_klass())
272 |     return cmds_list
273 | 
274 | 
275 | def console_script():  # pragma: no cover
276 |     parser = create_parser()
277 |     arguments = parse_arguments(parser.parse_args())
278 |     main(arguments)
279 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # HAProxy log analyzer documentation build configuration file, created by
  3 | # sphinx-quickstart on Thu Dec 19 00:06:54 2013.
  4 | #
  5 | # This file is execfile()d with the current directory set to its
  6 | # containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | # If extensions (or modules to document with autodoc) are in another directory,
 15 | # add these directories to sys.path here. If the directory is relative to the
 16 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | # -- General configuration ------------------------------------------------
 20 | 
 21 | # If your documentation needs a minimal Sphinx version, state it here.
 22 | # needs_sphinx = '1.0'
 23 | 
 24 | # Add any Sphinx extension module names here, as strings. They can be
 25 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 26 | # ones.
 27 | extensions = [
 28 |     'sphinx.ext.autodoc',
 29 | ]
 30 | 
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ['_templates']
 33 | 
 34 | # The suffix of source filenames.
 35 | source_suffix = '.rst'
 36 | 
 37 | # The encoding of source files.
 38 | # source_encoding = 'utf-8-sig'
 39 | 
 40 | # The master toctree document.
 41 | master_doc = 'index'
 42 | 
 43 | # General information about the project.
 44 | project = 'HAProxy log analyzer'
 45 | copyright = '2013, Gil Forcada'  # noqa: A001
 46 | 
 47 | # The version info for the project you're documenting, acts as replacement for
 48 | # |version| and |release|, also used in various other places throughout the
 49 | # built documents.
 50 | #
 51 | # The short X.Y version.
 52 | version = '0.1'
 53 | # The full version, including alpha/beta/rc tags.
 54 | release = '0.1'
 55 | 
 56 | # The language for content autogenerated by Sphinx. Refer to documentation
 57 | # for a list of supported languages.
 58 | # language = None
 59 | 
 60 | # There are two options for replacing |today|: either, you set today to some
 61 | # non-false value, then it is used:
 62 | # today = ''
 63 | # Else, today_fmt is used as the format for a strftime call.
 64 | # today_fmt = '%B %d, %Y'
 65 | 
 66 | # List of patterns, relative to source directory, that match files and
 67 | # directories to ignore when looking for source files.
 68 | exclude_patterns = []
 69 | 
 70 | # The reST default role (used for this markup: `text`) to use for all
 71 | # documents.
 72 | # default_role = None
 73 | 
 74 | # If true, '()' will be appended to :func: etc. cross-reference text.
 75 | # add_function_parentheses = True
 76 | 
 77 | # If true, the current module name will be prepended to all description
 78 | # unit titles (such as .. function::).
 79 | # add_module_names = True
 80 | 
 81 | # If true, sectionauthor and moduleauthor directives will be shown in the
 82 | # output. They are ignored by default.
 83 | # show_authors = False
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = 'sphinx'
 87 | 
 88 | # A list of ignored prefixes for module index sorting.
 89 | # modindex_common_prefix = []
 90 | 
 91 | # If true, keep warnings as "system message" paragraphs in the built documents.
 92 | # keep_warnings = False
 93 | 
 94 | 
 95 | # -- Options for HTML output ----------------------------------------------
 96 | 
 97 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 98 | # a list of builtin themes.
 99 | html_theme = 'default'
100 | 
101 | # Theme options are theme-specific and customize the look and feel of a theme
102 | # further.  For a list of options available for each theme, see the
103 | # documentation.
104 | # html_theme_options = {}
105 | 
106 | # Add any paths that contain custom themes here, relative to this directory.
107 | # html_theme_path = []
108 | 
109 | # The name for this set of Sphinx documents.  If None, it defaults to
110 | # "<project> v<release> documentation".
111 | # html_title = None
112 | 
113 | # A shorter title for the navigation bar.  Default is the same as html_title.
114 | # html_short_title = None
115 | 
116 | # The name of an image file (relative to this directory) to place at the top
117 | # of the sidebar.
118 | # html_logo = None
119 | 
120 | # The name of an image file (within the static path) to use as favicon of the
121 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
122 | # pixels large.
123 | # html_favicon = None
124 | 
125 | # Add any paths that contain custom static files (such as style sheets) here,
126 | # relative to this directory. They are copied after the builtin static files,
127 | # so a file named "default.css" will overwrite the builtin "default.css".
128 | html_static_path = ['_static']
129 | 
130 | # Add any extra paths that contain custom files (such as robots.txt or
131 | # .htaccess) here, relative to this directory. These files are copied
132 | # directly to the root of the documentation.
133 | # html_extra_path = []
134 | 
135 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
136 | # using the given strftime format.
137 | # html_last_updated_fmt = '%b %d, %Y'
138 | 
139 | # If true, SmartyPants will be used to convert quotes and dashes to
140 | # typographically correct entities.
141 | # html_use_smartypants = True
142 | 
143 | # Custom sidebar templates, maps document names to template names.
144 | # html_sidebars = {}
145 | 
146 | # Additional templates that should be rendered to pages, maps page names to
147 | # template names.
148 | # html_additional_pages = {}
149 | 
150 | # If false, no module index is generated.
151 | # html_domain_indices = True
152 | 
153 | # If false, no index is generated.
154 | # html_use_index = True
155 | 
156 | # If true, the index is split into individual pages for each letter.
157 | # html_split_index = False
158 | 
159 | # If true, links to the reST sources are added to the pages.
160 | # html_show_sourcelink = True
161 | 
162 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
163 | # html_show_sphinx = True
164 | 
165 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
166 | # html_show_copyright = True
167 | 
168 | # If true, an OpenSearch description file will be output, and all pages will
169 | # contain a <link> tag referring to it.  The value of this option must be the
170 | # base URL from which the finished HTML is served.
171 | # html_use_opensearch = ''
172 | 
173 | # This is the file name suffix for HTML files (e.g. ".xhtml").
174 | # html_file_suffix = None
175 | 
176 | # Output file base name for HTML help builder.
177 | htmlhelp_basename = 'HAProxyloganalyzerdoc'
178 | 
179 | 
180 | # -- Options for LaTeX output ---------------------------------------------
181 | 
182 | latex_elements = {
183 |     # The paper size ('letterpaper' or 'a4paper').
184 |     # 'papersize': 'letterpaper',
185 |     # The font size ('10pt', '11pt' or '12pt').
186 |     # 'pointsize': '10pt',
187 |     # Additional stuff for the LaTeX preamble.
188 |     # 'preamble': '',
189 | }
190 | 
191 | # Grouping the document tree into LaTeX files. List of tuples
192 | # (source start file, target name, title,
193 | #  author, documentclass [howto, manual, or own class]).
194 | latex_documents = [
195 |     (
196 |         'index',
197 |         'HAProxyloganalyzer.tex',
198 |         'HAProxy log analyzer Documentation',
199 |         'Gil Forcada',
200 |         'manual',
201 |     ),
202 | ]
203 | 
204 | # The name of an image file (relative to this directory) to place at the top of
205 | # the title page.
206 | # latex_logo = None
207 | 
208 | # For "manual" documents, if this is true, then toplevel headings are parts,
209 | # not chapters.
210 | # latex_use_parts = False
211 | 
212 | # If true, show page references after internal links.
213 | # latex_show_pagerefs = False
214 | 
215 | # If true, show URL addresses after external links.
216 | # latex_show_urls = False
217 | 
218 | # Documents to append as an appendix to all manuals.
219 | # latex_appendices = []
220 | 
221 | # If false, no module index is generated.
222 | # latex_domain_indices = True
223 | 
224 | 
225 | # -- Options for manual page output ---------------------------------------
226 | 
227 | # One entry per manual page. List of tuples
228 | # (source start file, name, description, authors, manual section).
229 | man_pages = [
230 |     (
231 |         'index',
232 |         'haproxyloganalyzer',
233 |         'HAProxy log analyzer Documentation',
234 |         ['Gil Forcada'],
235 |         1,
236 |     )
237 | ]
238 | 
239 | # If true, show URL addresses after external links.
240 | # man_show_urls = False
241 | 
242 | 
243 | # -- Options for Texinfo output -------------------------------------------
244 | 
245 | # Grouping the document tree into Texinfo files. List of tuples
246 | # (source start file, target name, title, author,
247 | #  dir menu entry, description, category)
248 | texinfo_documents = [
249 |     (
250 |         'index',
251 |         'HAProxyloganalyzer',
252 |         'HAProxy log analyzer Documentation',
253 |         'Gil Forcada',
254 |         'HAProxyloganalyzer',
255 |         'One line description of project.',
256 |         'Miscellaneous',
257 |     ),
258 | ]
259 | 
260 | # Documents to append as an appendix to all manuals.
261 | # texinfo_appendices = []
262 | 
263 | # If false, no module index is generated.
264 | # texinfo_domain_indices = True
265 | 
266 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
267 | # texinfo_show_urls = 'footnote'
268 | 
269 | # If true, do not generate a @detailmenu in the "Top" node's menu.
270 | # texinfo_no_detailmenu = False
271 | 


--------------------------------------------------------------------------------
/src/haproxy/line.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import re
  4 | 
  5 | 
  6 | # Example log line, to understand the regex below (truncated to fit into
  7 | # 80 chars):
  8 | #
  9 | # Dec  9 13:01:26 localhost haproxy[28029]: 127.0.0.1:39759
 10 | # [09/Dec/2013:12:59:46.633] loadbalancer default/instance8
 11 | # 0/51536/1/48082/99627 200 83285 - - ---- 87/87/87/1/0 0/67
 12 | # {77.24.148.74} "GET /path/to/image HTTP/1.1"
 13 | 
 14 | HAPROXY_LINE_REGEX = re.compile(
 15 |     # Dec  9 13:01:26 localhost haproxy[28029]:
 16 |     # ignore the syslog prefix
 17 |     r'\A.*\]:\s+'
 18 |     # 127.0.0.1:39759
 19 |     r'(?P<client_ip>[a-fA-F\d+\.:]+):(?P<client_port>\d+)\s+'
 20 |     # [09/Dec/2013:12:59:46.633]
 21 |     r'\[(?P<accept_date>.+)\]\s+'
 22 |     # loadbalancer default/instance8
 23 |     r'(?P<frontend_name>.*)\s+(?P<backend_name>.*)/(?P<server_name>.*)\s+'
 24 |     # 0/51536/1/48082/99627
 25 |     r'(?P<tq>-?\d+)/(?P<tw>-?\d+)/(?P<tc>-?\d+)/'
 26 |     r'(?P<tr>-?\d+)/(?P<tt>\+?\d+)\s+'
 27 |     # 200 83285
 28 |     r'(?P<status_code>-?\d+)\s+(?P<bytes_read>\+?\d+)\s+'
 29 |     # - - ----
 30 |     r'.*\s+'  # ignored by now, should capture cookies and termination state
 31 |     # 87/87/87/1/0
 32 |     r'(?P<act>\d+)/(?P<fe>\d+)/(?P<be>\d+)/'
 33 |     r'(?P<srv>\d+)/(?P<retries>\+?\d+)\s+'
 34 |     # 0/67
 35 |     r'(?P<queue_server>\d+)/(?P<queue_backend>\d+)\s+'
 36 |     # {77.24.148.74}
 37 |     r'({(?P<request_headers>.*)}\s+{(?P<response_headers>.*)}\s+|{(?P<headers>.*)}\s+|)'
 38 |     # "GET /path/to/image HTTP/1.1"
 39 |     r'"(?P<http_request>.*)"'
 40 |     r'\Z'  # end of line
 41 | )
 42 | 
 43 | HTTP_REQUEST_REGEX = re.compile(
 44 |     r'(?P<method>\w+)\s+'
 45 |     r'(?P<path>(/[`´\\<>/\w:,;.#$!?=&@%_+\'*^~|()\[\]{\}-]*)+)'
 46 |     r'(\s+(?P<protocol>\w+/\d\.\d))?'
 47 | )
 48 | 
 49 | 
 50 | class Line:
 51 |     """For a precise and more detailed description of every field see:
 52 |     http://cbonte.github.io/haproxy-dconv/2.2/configuration.html#8.2.3
 53 |     """
 54 | 
 55 |     #: IP of the upstream server that made the connection to HAProxy.
 56 |     client_ip = None
 57 |     #: Port used by the upstream server that made the connection to HAProxy.
 58 |     client_port = None
 59 | 
 60 |     # raw string from log line and its python datetime version
 61 |     raw_accept_date = None
 62 |     #: datetime object with the exact date when the connection to HAProxy was
 63 |     #: made.
 64 |     accept_date = None
 65 | 
 66 |     #: HAProxy frontend that received the connection.
 67 |     frontend_name = None
 68 |     #: HAProxy backend that the connection was sent to.
 69 |     backend_name = None
 70 |     #: Downstream server that HAProxy send the connection to.
 71 |     server_name = None
 72 | 
 73 |     #: Time in milliseconds waiting the client to send the full HTTP request
 74 |     #: (``Tq`` in HAProxy documentation).
 75 |     time_wait_request = None
 76 |     #: Time in milliseconds that the request spend on HAProxy queues
 77 |     #: (``Tw`` in HAProxy documentation).
 78 |     time_wait_queues = None
 79 |     #: Time in milliseconds to connect to the final server
 80 |     #: (``Tc`` in HAProxy documentation).
 81 |     time_connect_server = None
 82 |     #: Time in milliseconds waiting the downstream server to send the full
 83 |     #: HTTP response (``Tr`` in HAProxy documentation).
 84 |     time_wait_response = None
 85 |     #: Total time in milliseconds between accepting the HTTP request and
 86 |     #: sending back the HTTP response (``Tt`` in HAProxy documentation).
 87 |     total_time = None
 88 | 
 89 |     #: HTTP status code returned to the client.
 90 |     status_code = None
 91 |     #: Total number of bytes send back to the client.
 92 |     bytes_read = None
 93 | 
 94 |     # not used by now
 95 |     captured_request_cookie = None
 96 |     captured_response_cookie = None
 97 | 
 98 |     # not used by now
 99 |     termination_state = None
100 | 
101 |     #: Total number of concurrent connections on the process when the
102 |     #: session was logged (``actconn`` in HAProxy documentation).
103 |     connections_active = None
104 |     #: Total number of concurrent connections on the frontend when the
105 |     #: session was logged (``feconn`` in HAProxy documentation).
106 |     connections_frontend = None
107 |     #: Total number of concurrent connections handled by the backend when
108 |     #: the session was logged (``beconn`` in HAProxy documentation).
109 |     connections_backend = None
110 |     #: Total number of concurrent connections still active on the server
111 |     #: when the session was logged (``srv_conn`` in HAProxy documentation).
112 |     connections_server = None
113 |     #: Number of connection retries experienced by this session when
114 |     # trying to connect to the server.
115 |     retries = None
116 | 
117 |     #: Total number of requests which were processed before this one in
118 |     #: the server queue (``srv_queue`` in HAProxy documentation).
119 |     queue_server = None
120 |     #: Total number of requests which were processed before this one in
121 |     #: the backend's global queue (``backend_queue`` in HAProxy documentation).
122 |     queue_backend = None
123 | 
124 |     # List of headers captured in the request.
125 |     captured_request_headers = None
126 |     # List of headers captured in the response.
127 |     captured_response_headers = None
128 | 
129 |     raw_http_request = None
130 |     #: HTTP method (GET, POST...) used on this request.
131 |     http_request_method = None
132 |     #: Requested HTTP path.
133 |     http_request_path = None
134 |     #: HTTP version used on this request.
135 |     http_request_protocol = None
136 | 
137 |     raw_line = None
138 | 
139 |     def __init__(self, line):
140 |         self.raw_line = line
141 | 
142 |         self.is_valid = self._parse_line(line)
143 | 
144 |     @property
145 |     def is_https(self):
146 |         """Returns True if the log line is a SSL connection. False otherwise."""
147 |         if ':443' in self.http_request_path:
148 |             return True
149 |         return False
150 | 
151 |     def is_within_time_frame(self, start, end):
152 |         if not start:
153 |             return True
154 |         elif start > self.accept_date:
155 |             return False
156 | 
157 |         if not end:
158 |             return True
159 |         elif end < self.accept_date:
160 |             return False
161 | 
162 |         return True
163 | 
164 |     @property
165 |     def ip(self):
166 |         """Returns the IP provided on the log line, or the client_ip if absent/empty."""
167 |         if self.captured_request_headers is not None:
168 |             ip = self.captured_request_headers.split('|')[0]
169 |             if ip:
170 |                 # only get the first IP, if there are more usually
171 |                 # are the intermediate servers
172 |                 return ip.split(',')[0]
173 |         return self.client_ip
174 | 
175 |     def _parse_line(self, line):
176 |         matches = HAPROXY_LINE_REGEX.match(line)
177 |         if matches is None:
178 |             return False
179 | 
180 |         self.client_ip = matches.group('client_ip')
181 |         self.client_port = int(matches.group('client_port'))
182 | 
183 |         self.raw_accept_date = matches.group('accept_date')
184 |         self.accept_date = self._parse_accept_date()
185 | 
186 |         self.frontend_name = matches.group('frontend_name')
187 |         self.backend_name = matches.group('backend_name')
188 |         self.server_name = matches.group('server_name')
189 | 
190 |         self.time_wait_request = int(matches.group('tq'))
191 |         self.time_wait_queues = int(matches.group('tw'))
192 |         self.time_connect_server = int(matches.group('tc'))
193 |         self.time_wait_response = int(matches.group('tr'))
194 |         self.total_time = matches.group('tt')
195 | 
196 |         self.status_code = matches.group('status_code')
197 |         self.bytes_read = matches.group('bytes_read')
198 | 
199 |         self.connections_active = matches.group('act')
200 |         self.connections_frontend = matches.group('fe')
201 |         self.connections_backend = matches.group('be')
202 |         self.connections_server = matches.group('srv')
203 |         self.retries = matches.group('retries')
204 | 
205 |         self.queue_server = int(matches.group('queue_server'))
206 |         self.queue_backend = int(matches.group('queue_backend'))
207 | 
208 |         self.captured_request_headers = matches.group('request_headers')
209 |         self.captured_response_headers = matches.group('response_headers')
210 |         if matches.group('headers') is not None:
211 |             self.captured_request_headers = matches.group('headers')
212 | 
213 |         self.raw_http_request = matches.group('http_request')
214 |         self._parse_http_request()
215 | 
216 |         return True
217 | 
218 |     def _parse_accept_date(self):
219 |         return datetime.strptime(self.raw_accept_date, '%d/%b/%Y:%H:%M:%S.%f')
220 | 
221 |     def _parse_http_request(self):
222 |         matches = HTTP_REQUEST_REGEX.match(self.raw_http_request)
223 |         if matches:
224 |             self.http_request_method = matches.group('method')
225 |             self.http_request_path = matches.group('path')
226 |             self.http_request_protocol = matches.group('protocol')
227 |         else:
228 |             self.handle_bad_http_request()
229 | 
230 |     def handle_bad_http_request(self):
231 |         self.http_request_method = 'invalid'
232 |         self.http_request_path = 'invalid'
233 |         self.http_request_protocol = 'invalid'
234 | 
235 |         if self.raw_http_request != '<BADREQ>':
236 |             print(f'Could not process HTTP request {self.raw_http_request}')
237 | 
238 | 
239 | # it is not coverage covered as this is executed by the multiprocessor module,
240 | # and setting it up on coverage just for two lines is not worth it
241 | def parse_line(line):  # pragma: no cover
242 |     return Line(line.strip())
243 | 


--------------------------------------------------------------------------------
/src/haproxy/commands.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from collections import OrderedDict
  3 | from datetime import datetime
  4 | 
  5 | import json
  6 | import time
  7 | 
  8 | 
  9 | class BaseCommandMixin:
 10 |     @classmethod
 11 |     def command_line_name(cls):
 12 |         """Convert class name to lowercase with underscores.
 13 | 
 14 |         i.e. turn HttpMethods to http_methods.
 15 |         """
 16 |         final_string = cls.__name__[0].lower()
 17 |         for character in cls.__name__[1:]:
 18 |             if character.isupper():
 19 |                 final_string += f'_{character.lower()}'
 20 |             else:
 21 |                 final_string += character
 22 |         return final_string
 23 | 
 24 |     def raw_results(self):  # pragma: no cover
 25 |         raise NotImplementedError
 26 | 
 27 |     def json_data(self):
 28 |         return self.raw_results()
 29 | 
 30 |     def print_data(self):
 31 |         return self.raw_results()
 32 | 
 33 |     def results(self, output=None):
 34 |         command_name = self.command_line_name().upper()
 35 |         if output == 'json':
 36 |             results = self.json_data()
 37 |             print(json.dumps({command_name: results}))
 38 |         else:
 39 |             results = self.print_data()
 40 |             underline = '=' * len(command_name)
 41 |             print(f'{command_name}\n{underline}\n{results}\n')
 42 | 
 43 | 
 44 | class AttributeCounterMixin:
 45 |     attribute_name = None
 46 | 
 47 |     def __init__(self):
 48 |         self.stats = defaultdict(int)
 49 | 
 50 |     def __call__(self, line):
 51 |         self.stats[getattr(line, self.attribute_name)] += 1
 52 | 
 53 |     def raw_results(self):
 54 |         return self.stats
 55 | 
 56 |     def print_data(self):
 57 |         result = ''
 58 |         data = self.raw_results()
 59 |         if isinstance(data, list):
 60 |             data = dict(data)
 61 |         data = sorted(data.items(), key=lambda data_info: data_info[1], reverse=True)
 62 |         for key, value in data:
 63 |             result += f'- {key}: {value}\n'
 64 |         return result
 65 | 
 66 |     def json_data(self):
 67 |         result = []
 68 |         data = sorted(
 69 |             self.stats.items(), key=lambda data_info: data_info[1], reverse=True
 70 |         )
 71 |         for key, value in data:
 72 |             result.append({key: value})
 73 |         return result
 74 | 
 75 | 
 76 | class SortTrimMixin:
 77 |     @staticmethod
 78 |     def _sort_and_trim(data, reverse=False):
 79 |         """Sorts a dictionary with at least two fields on each of them sorting
 80 |         by the second element.
 81 | 
 82 |         .. warning::
 83 |           Right now is hardcoded to 10 elements, improve the command line
 84 |           interface to allow to send parameters to each command or globally.
 85 |         """
 86 |         threshold = 10
 87 |         data_list = data.items()
 88 |         data_list = sorted(
 89 |             data_list, key=lambda data_info: data_info[1], reverse=reverse
 90 |         )
 91 |         return data_list[:threshold]
 92 | 
 93 | 
 94 | class Counter(BaseCommandMixin):
 95 |     """Count valid lines."""
 96 | 
 97 |     def __init__(self):
 98 |         self.counter = 0
 99 | 
100 |     def __call__(self, line):
101 |         self.counter += 1
102 | 
103 |     def raw_results(self):
104 |         return self.counter
105 | 
106 | 
107 | class HttpMethods(AttributeCounterMixin, BaseCommandMixin):
108 |     """Tally all requests per HTTP method (GET/POST...)."""
109 | 
110 |     attribute_name = 'http_request_method'
111 | 
112 | 
113 | class IpCounter(AttributeCounterMixin, BaseCommandMixin):
114 |     """Report a breakdown of how many requests have been made per IP.
115 | 
116 |     For this to work you need to configure HAProxy to capture
117 |     the `X-Forwarded-For` header.
118 |     """
119 | 
120 |     attribute_name = 'ip'
121 | 
122 | 
123 | class TopIps(IpCounter, SortTrimMixin):
124 |     """Return the top most frequent IPs (10 items)."""
125 | 
126 |     def raw_results(self):
127 |         return self._sort_and_trim(self.stats, reverse=True)
128 | 
129 | 
130 | class StatusCodesCounter(AttributeCounterMixin, BaseCommandMixin):
131 |     """Tally requests per HTTP status (404, 500...)"""
132 | 
133 |     attribute_name = 'status_code'
134 | 
135 | 
136 | class RequestPathCounter(AttributeCounterMixin, BaseCommandMixin):
137 |     """Tally requests per the request's path."""
138 | 
139 |     attribute_name = 'http_request_path'
140 | 
141 | 
142 | class TopRequestPaths(RequestPathCounter, SortTrimMixin):
143 |     """Returns the top most frequent paths (10 items)."""
144 | 
145 |     def raw_results(self):
146 |         return self._sort_and_trim(self.stats, reverse=True)
147 | 
148 | 
149 | class SlowRequests(BaseCommandMixin):
150 |     """List all requests that are considered slow to process (1 second)."""
151 | 
152 |     threshold = 1000
153 | 
154 |     def __init__(self):
155 |         self.slow_requests = []
156 | 
157 |     def __call__(self, line):
158 |         response_time = line.time_wait_response
159 |         if response_time >= self.threshold:
160 |             self.slow_requests.append(response_time)
161 | 
162 |     def raw_results(self):
163 |         return sorted(self.slow_requests)
164 | 
165 | 
166 | class SlowRequestsCounter(SlowRequests):
167 |     """Counts requests that are considered slow (1 second)."""
168 | 
169 |     def raw_results(self):
170 |         return len(self.slow_requests)
171 | 
172 | 
173 | class AverageResponseTime(SlowRequests):
174 |     """Global average response time it took downstream servers to answer requests."""
175 | 
176 |     threshold = 0
177 | 
178 |     def raw_results(self):
179 |         total_requests = float(len(self.slow_requests))
180 |         if total_requests > 0:
181 |             average = sum(self.slow_requests) / total_requests
182 |             return round(average, 2)
183 |         return 0.0
184 | 
185 | 
186 | class AverageWaitingTime(BaseCommandMixin):
187 |     """Return the average time valid requests wait on HAProxy before being dispatched to a backend server."""
188 | 
189 |     def __init__(self):
190 |         self.waiting_times = []
191 | 
192 |     def __call__(self, line):
193 |         waiting_time = line.time_wait_queues
194 |         if waiting_time >= 0:
195 |             self.waiting_times.append(waiting_time)
196 | 
197 |     def raw_results(self):
198 |         total_requests = float(len(self.waiting_times))
199 |         if total_requests > 0:
200 |             average = sum(self.waiting_times) / total_requests
201 |             return round(average, 2)
202 |         return 0.0
203 | 
204 | 
205 | class ServerLoad(AttributeCounterMixin, BaseCommandMixin):
206 |     """Tally requests per downstream server."""
207 | 
208 |     attribute_name = 'server_name'
209 | 
210 | 
211 | class QueuePeaks(BaseCommandMixin):
212 |     """Give stats about queue peaks in HAProxy.
213 | 
214 |     When servers can not handle all incoming requests, they have to wait on HAProxy.
215 |     On every log line there is an account for how many requests have been piled up.
216 | 
217 |     A queue peak is defined by the biggest value on the backend queue
218 |     on a series of log lines that are between log lines with the queue empty.
219 |     """
220 | 
221 |     def __init__(self):
222 |         self.requests = {}
223 |         self.threshold = 1
224 | 
225 |     @staticmethod
226 |     def _generate_key(date):
227 |         """Create a suitable unique key out of a python datetime.datetime object."""
228 |         # get the unix timestamp out of the date,
229 |         # after removing the microseconds from it
230 |         no_microseconds = date.replace(microsecond=0)
231 |         time_parts = no_microseconds.timetuple()
232 |         unixtime = time.mktime(time_parts)
233 | 
234 |         # add back the microseconds to the key, as decimals
235 |         microseconds = date.microsecond / (10 ** len(str(date.microsecond)))
236 |         key = unixtime + microseconds
237 |         return key
238 | 
239 |     def __call__(self, line):
240 |         key = self._generate_key(line.accept_date)
241 |         self.requests[key] = (line.queue_backend, line.accept_date)
242 | 
243 |     def raw_results(self):
244 |         sorted_requests = OrderedDict(sorted(self.requests.items()))
245 |         peaks = []
246 |         current_peak = 0
247 |         requests_on_queue = 0
248 |         timestamp = None
249 | 
250 |         current_span = 0
251 |         first_with_queue = None
252 | 
253 |         for requests_on_queue, timestamp in sorted_requests.values():
254 |             # set the peak
255 |             if requests_on_queue > current_peak:
256 |                 current_peak = requests_on_queue
257 | 
258 |             # set the span
259 |             if requests_on_queue > 0:
260 |                 current_span += 1
261 | 
262 |                 # set when the queue starts
263 |                 if first_with_queue is None:
264 |                     first_with_queue = timestamp
265 | 
266 |             # if the queue is already flushed, record it and reset values
267 |             if requests_on_queue == 0 and current_peak > self.threshold:
268 |                 data = {
269 |                     'peak': current_peak,
270 |                     'span': current_span,
271 |                     'started': first_with_queue,
272 |                     'finished': timestamp,
273 |                 }
274 |                 peaks.append(data)
275 |                 current_peak = 0
276 |                 current_span = 0
277 |                 first_with_queue = None
278 | 
279 |         # case of a series that does not end
280 |         if requests_on_queue > 0 and current_peak > self.threshold:
281 |             data = {
282 |                 'peak': current_peak,
283 |                 'span': current_span,
284 |                 'started': first_with_queue,
285 |                 'finished': timestamp,
286 |             }
287 |             peaks.append(data)
288 | 
289 |         return peaks
290 | 
291 |     def print_data(self):
292 |         data = ''
293 |         for peak_info in self.raw_results():
294 |             data += f'- peak: {peak_info.get("peak")} '  # noqa: Q000
295 |             data += f'- span: {peak_info.get("span")} '  # noqa: Q000
296 |             data += f'- started: {peak_info.get("started").isoformat()} '  # noqa: Q000
297 |             data += (
298 |                 f'- finished: {peak_info.get("finished").isoformat()}\n'  # noqa: Q000
299 |             )
300 |         return data
301 | 
302 |     def json_data(self):
303 |         data = self.raw_results()
304 |         for peak_info in data:
305 |             peak_info['started'] = peak_info['started'].isoformat()
306 |             peak_info['finished'] = peak_info['finished'].isoformat()
307 |         return data
308 | 
309 | 
310 | class ConnectionType(BaseCommandMixin):
311 |     """Tally requests per their SSL usage (either yes or no).
312 | 
313 |     This only works if the request path contains the default port for SSL (443).
314 |     """
315 | 
316 |     def __init__(self):
317 |         self.https = 0
318 |         self.non_https = 0
319 | 
320 |     def __call__(self, line):
321 |         if line.is_https:
322 |             self.https += 1
323 |         else:
324 |             self.non_https += 1
325 | 
326 |     def raw_results(self):
327 |         return self.https, self.non_https
328 | 
329 |     def print_data(self):
330 |         https, http = self.raw_results()
331 |         return f'- https: {https}\n- http: {http}'
332 | 
333 |     def json_data(self):
334 |         https, http = self.raw_results()
335 |         return [{'https': https}, {'http': http}]
336 | 
337 | 
338 | class RequestsPerMinute(BaseCommandMixin):
339 |     """Report the count of requests per minute.
340 | 
341 |     Combine it with time constrains (`-s` and `-d`) otherwise the output will be long.
342 |     """
343 | 
344 |     def __init__(self):
345 |         self.requests = defaultdict(int)
346 | 
347 |     def generate_key(self, accept_date):
348 |         date_with_minute_precision = accept_date.replace(second=0, microsecond=0)
349 |         unixtime = time.mktime(date_with_minute_precision.timetuple())
350 |         return unixtime
351 | 
352 |     def __call__(self, line):
353 |         key = self.generate_key(line.accept_date)
354 |         self.requests[key] += 1
355 | 
356 |     def raw_results(self):
357 |         """Return the list of requests sorted by the timestamp."""
358 |         data = sorted(self.requests.items(), key=lambda data_info: data_info[0])
359 |         return data
360 | 
361 |     def print_data(self):
362 |         data = ''
363 |         for date_info, count in self.raw_results():
364 |             date = datetime.fromtimestamp(date_info).isoformat()
365 |             data += f'- {date}: {count}\n'
366 |         return data
367 | 
368 |     def json_data(self):
369 |         data = []
370 |         for date_info, count in self.raw_results():
371 |             date = datetime.fromtimestamp(date_info).isoformat()
372 |             data.append({date: count})
373 |         return data
374 | 
375 | 
376 | class RequestsPerHour(RequestsPerMinute):
377 |     """Report the count of requests per hour.
378 | 
379 |     Combine it with time constrains (`-s` and `-d`) otherwise the output will be long.
380 |     """
381 | 
382 |     def generate_key(self, accept_date):
383 |         date_with_hour_precision = accept_date.replace(
384 |             minute=0, second=0, microsecond=0
385 |         )
386 |         unixtime = time.mktime(date_with_hour_precision.timetuple())
387 |         return unixtime
388 | 
389 | 
390 | class Print(BaseCommandMixin):
391 |     """Returns the raw lines to be printed."""
392 | 
393 |     def __call__(self, line):
394 |         print(line.raw_line)
395 | 
396 |     def raw_results(self):
397 |         return
398 | 
399 |     def results(self, output=None):
400 |         return
401 | 


--------------------------------------------------------------------------------
/tests/test_commands.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from datetime import timedelta
  3 | from haproxy import commands
  4 | 
  5 | import pytest
  6 | 
  7 | 
  8 | def check_output(cmd, output, expected, capsys):
  9 |     """Validate the output of commands."""
 10 |     name = cmd.command_line_name().upper()
 11 |     cmd.results(output=output)
 12 |     output_text = capsys.readouterr().out
 13 |     if output == 'json':
 14 |         assert f'{{"{name}": {expected}}}' in output_text
 15 |     else:
 16 |         assert f'{name}\n====' in output_text
 17 |         assert f'====\n{expected}\n' in output_text
 18 | 
 19 | 
 20 | @pytest.mark.parametrize(
 21 |     ('klass', 'expected'),
 22 |     [
 23 |         (commands.StatusCodesCounter, 'status_codes_counter'),
 24 |         (commands.AverageResponseTime, 'average_response_time'),
 25 |         (commands.Counter, 'counter'),
 26 |         (commands.IpCounter, 'ip_counter'),
 27 |     ],
 28 | )
 29 | def test_commands_names(klass, expected):
 30 |     """Check that the command line name of command classes are generated correctly."""
 31 |     assert klass.command_line_name() == expected
 32 | 
 33 | 
 34 | def test_counter_results():
 35 |     """Test the Counter command.
 36 | 
 37 |     It plain and simply counts all the lines passed to it.
 38 |     """
 39 |     cmd = commands.Counter()
 40 |     assert cmd.raw_results() == 0
 41 |     for x in range(3):
 42 |         cmd(x)
 43 | 
 44 |     assert cmd.raw_results() == 3
 45 | 
 46 | 
 47 | @pytest.mark.parametrize('output', [None, 'json'])
 48 | def test_counter_output(capsys, output):
 49 |     """Test the Counter command.
 50 | 
 51 |     It plain and simply counts all the lines passed to it.
 52 |     """
 53 |     cmd = commands.Counter()
 54 |     for x in range(3):
 55 |         cmd(x)
 56 |     check_output(cmd, output, 3, capsys)
 57 | 
 58 | 
 59 | def test_http_methods_results(line_factory):
 60 |     """Test the HTTPMethods command.
 61 | 
 62 |     It creates a breakdown of how many times each HTTP verb has been used.
 63 |     """
 64 |     cmd = commands.HttpMethods()
 65 |     assert cmd.raw_results() == {}
 66 |     for verb, count in (('POST', 4), ('GET', 3), ('PUT', 2)):
 67 |         line = line_factory(http_request=f'{verb} /path/to/image HTTP/1.1')
 68 |         for _ in range(count):
 69 |             cmd(line)
 70 |     results = cmd.raw_results()
 71 |     assert len(results) == 3
 72 |     assert results['POST'] == 4
 73 |     assert results['GET'] == 3
 74 |     assert results['PUT'] == 2
 75 | 
 76 | 
 77 | @pytest.mark.parametrize(
 78 |     ('output', 'expected'),
 79 |     [(None, '- PUT: 2\n- GET: 1'), ('json', '[{"PUT": 2}, {"GET": 1}]')],
 80 | )
 81 | def test_http_methods_output(line_factory, capsys, output, expected):
 82 |     """Test the HTTPMethods command.
 83 | 
 84 |     It creates a breakdown of how many times each HTTP verb has been used.
 85 |     """
 86 |     cmd = commands.HttpMethods()
 87 |     for verb, count in (('GET', 1), ('PUT', 2)):
 88 |         line = line_factory(http_request=f'{verb} /path/to/image HTTP/1.1')
 89 |         for _ in range(count):
 90 |             cmd(line)
 91 |     check_output(cmd, output, expected, capsys)
 92 | 
 93 | 
 94 | def test_ip_counter_results(line_factory):
 95 |     """Test the IpCounter command.
 96 | 
 97 |     It creates a breakdown of how many times each IP has been used.
 98 |     """
 99 |     cmd = commands.IpCounter()
100 |     assert cmd.raw_results() == {}
101 |     for ip, count in (('192.168.0.1', 4), ('172.4.3.2', 3), ('8.7.6.5', 2)):
102 |         line = line_factory(headers=f' {{{ip}}}')
103 |         for _ in range(count):
104 |             cmd(line)
105 |     results = cmd.raw_results()
106 |     assert len(results) == 3
107 |     assert results['192.168.0.1'] == 4
108 |     assert results['172.4.3.2'] == 3
109 |     assert results['8.7.6.5'] == 2
110 | 
111 | 
112 | @pytest.mark.parametrize(
113 |     ('output', 'expected'),
114 |     [
115 |         (None, '- 172.4.3.2: 3\n- 8.7.6.5: 2'),
116 |         ('json', '[{"172.4.3.2": 3}, {"8.7.6.5": 2}]'),
117 |     ],
118 | )
119 | def test_ip_counter_output(line_factory, capsys, output, expected):
120 |     """Test the IpCounter command.
121 | 
122 |     It creates a breakdown of how many times each IP has been used.
123 |     """
124 |     cmd = commands.IpCounter()
125 |     for ip, count in (('172.4.3.2', 3), ('8.7.6.5', 2)):
126 |         line = line_factory(headers=f' {{{ip}}}')
127 |         for _ in range(count):
128 |             cmd(line)
129 |     check_output(cmd, output, expected, capsys)
130 | 
131 | 
132 | def test_top_ips_results(line_factory):
133 |     """Test the TopIps command.
134 | 
135 |     It lists the 10 most used IPs, and how much where they used.
136 |     """
137 |     cmd = commands.TopIps()
138 |     assert cmd.raw_results() == []
139 |     for ip, count in ((f'192.168.0.{x}', x) for x in range(11)):
140 |         line = line_factory(headers=f' {{{ip}}}')
141 |         for _ in range(count):
142 |             cmd(line)
143 |     results = cmd.raw_results()
144 |     assert len(results) == 10
145 |     assert results[0] == ('192.168.0.10', 10)
146 |     assert results[1] == ('192.168.0.9', 9)
147 |     assert results[2] == ('192.168.0.8', 8)
148 |     assert results[3] == ('192.168.0.7', 7)
149 |     assert results[4] == ('192.168.0.6', 6)
150 |     assert results[5] == ('192.168.0.5', 5)
151 |     assert results[6] == ('192.168.0.4', 4)
152 |     assert results[7] == ('192.168.0.3', 3)
153 |     assert results[8] == ('192.168.0.2', 2)
154 |     assert results[9] == ('192.168.0.1', 1)
155 | 
156 | 
157 | def test_top_ips_print_results(line_factory):
158 |     """Test the TopIps command.
159 | 
160 |     Ensure that when they are printed, only 10 results are shown.
161 |     """
162 |     cmd = commands.TopIps()
163 |     for ip, count in ((f'192.168.0.{x}', x) for x in range(14)):
164 |         line = line_factory(headers=f' {{{ip}}}')
165 |         for _ in range(count):
166 |             cmd(line)
167 |     results = cmd.print_data()
168 |     results = [x for x in results.split('\n') if x]
169 |     assert len(results) == 10
170 |     assert results[0] == '- 192.168.0.13: 13'
171 |     assert results[-1] == '- 192.168.0.4: 4'
172 | 
173 | 
174 | @pytest.mark.parametrize(
175 |     ('output', 'expected'),
176 |     [
177 |         (None, '- 192.168.0.2: 2\n- 192.168.0.1: 1'),
178 |         ('json', '[{"192.168.0.2": 2}, {"192.168.0.1": 1}]'),
179 |     ],
180 | )
181 | def test_top_ips_output(line_factory, capsys, output, expected):
182 |     """Test the TopIps command.
183 | 
184 |     It lists the 10 most used IPs, and how much where they used.
185 |     """
186 |     cmd = commands.TopIps()
187 |     assert cmd.raw_results() == []
188 |     for ip, count in ((f'192.168.0.{x}', x) for x in range(3)):
189 |         line = line_factory(headers=f' {{{ip}}}')
190 |         for _ in range(count):
191 |             cmd(line)
192 |     check_output(cmd, output, expected, capsys)
193 | 
194 | 
195 | def test_status_codes_counter_results(line_factory):
196 |     """Test the StatusCodesCounter command.
197 | 
198 |     It creates a breakdown of which status codes have been used and how many each.
199 |     """
200 |     cmd = commands.StatusCodesCounter()
201 |     assert cmd.raw_results() == {}
202 |     for status_code, count in (('200', 4), ('301', 3), ('500', 2)):
203 |         line = line_factory(status=status_code)
204 |         for _ in range(count):
205 |             cmd(line)
206 |     results = cmd.raw_results()
207 |     assert len(results) == 3
208 |     assert results['200'] == 4
209 |     assert results['301'] == 3
210 |     assert results['500'] == 2
211 | 
212 | 
213 | @pytest.mark.parametrize(
214 |     ('output', 'expected'),
215 |     [(None, '- 301: 3\n- 500: 2'), ('json', '[{"301": 3}, {"500": 2}]')],
216 | )
217 | def test_status_codes_counter_output(line_factory, capsys, output, expected):
218 |     """Test the StatusCodesCounter command.
219 | 
220 |     It creates a breakdown of which status codes have been used and how many each.
221 |     """
222 |     cmd = commands.StatusCodesCounter()
223 |     for status_code, count in (('301', 3), ('500', 2)):
224 |         line = line_factory(status=status_code)
225 |         for _ in range(count):
226 |             cmd(line)
227 |     check_output(cmd, output, expected, capsys)
228 | 
229 | 
230 | def test_request_path_counter_results(line_factory):
231 |     """Test the RequestPathCounter command.
232 | 
233 |     It creates a breakdown of how many times each URL path has been used.
234 |     """
235 |     cmd = commands.RequestPathCounter()
236 |     assert cmd.raw_results() == {}
237 |     for path, count in (('/image/one', 4), ('/video/two', 3), ('/article/three', 2)):
238 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
239 |         for _ in range(count):
240 |             cmd(line)
241 |     results = cmd.raw_results()
242 |     assert len(results) == 3
243 |     assert results['/image/one'] == 4
244 |     assert results['/video/two'] == 3
245 |     assert results['/article/three'] == 2
246 | 
247 | 
248 | @pytest.mark.parametrize(
249 |     ('output', 'expected'),
250 |     [
251 |         (None, '- /video/two: 3\n- /article/three: 2'),
252 |         ('json', '[{"/video/two": 3}, {"/article/three": 2}]'),
253 |     ],
254 | )
255 | def test_request_path_counter_output(line_factory, capsys, output, expected):
256 |     """Test the RequestPathCounter command.
257 | 
258 |     It creates a breakdown of how many times each URL path has been used.
259 |     """
260 |     cmd = commands.RequestPathCounter()
261 |     for path, count in (('/video/two', 3), ('/article/three', 2)):
262 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
263 |         for _ in range(count):
264 |             cmd(line)
265 |     check_output(cmd, output, expected, capsys)
266 | 
267 | 
268 | def test_slow_requests_results(line_factory):
269 |     """Test the SlowRequests command.
270 | 
271 |     It lists all requests that took more than 1000 milliseconds to respond.
272 |     """
273 |     cmd = commands.SlowRequests()
274 |     assert cmd.raw_results() == []
275 |     for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999):
276 |         cmd(line_factory(tr=total_time))
277 |     results = cmd.raw_results()
278 |     assert results == [1000, 1003, 2013, 3200, 45000]
279 | 
280 | 
281 | @pytest.mark.parametrize(
282 |     ('output', 'expected'),
283 |     [
284 |         (None, [1000, 1003, 2013, 3200, 45000]),
285 |         ('json', '[1000, 1003, 2013, 3200, 45000]'),
286 |     ],
287 | )
288 | def test_slow_requests_output(line_factory, capsys, output, expected):
289 |     """Test the SlowRequests command.
290 | 
291 |     It lists all requests that took more than 1000 milliseconds to respond.
292 |     """
293 |     cmd = commands.SlowRequests()
294 |     for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999):
295 |         cmd(line_factory(tr=total_time))
296 |     check_output(cmd, output, expected, capsys)
297 | 
298 | 
299 | def test_top_request_paths_results(line_factory):
300 |     """Test the TopRequestPaths command.
301 | 
302 |     It lists the 10 most used URL paths, and how much where they used.
303 |     """
304 |     cmd = commands.TopRequestPaths()
305 |     assert cmd.raw_results() == []
306 |     for path, count in ((f'/file/{x}', x) for x in range(11)):
307 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
308 |         for _ in range(count):
309 |             cmd(line)
310 |     results = cmd.raw_results()
311 |     assert len(results) == 10
312 |     assert results[0] == ('/file/10', 10)
313 |     assert results[1] == ('/file/9', 9)
314 |     assert results[2] == ('/file/8', 8)
315 |     assert results[3] == ('/file/7', 7)
316 |     assert results[4] == ('/file/6', 6)
317 |     assert results[5] == ('/file/5', 5)
318 |     assert results[6] == ('/file/4', 4)
319 |     assert results[7] == ('/file/3', 3)
320 |     assert results[8] == ('/file/2', 2)
321 |     assert results[9] == ('/file/1', 1)
322 | 
323 | 
324 | def test_top_request_paths_print_results(line_factory):
325 |     """Test the TopRequestPaths command.
326 | 
327 |     Ensure that when they are printed, only 10 results are shown.
328 |     """
329 |     cmd = commands.TopRequestPaths()
330 |     for path, count in ((f'/file/{x}', x) for x in range(14)):
331 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
332 |         for _ in range(count):
333 |             cmd(line)
334 |     results = cmd.print_data()
335 |     results = [x for x in results.split('\n') if x]
336 |     assert len(results) == 10
337 |     assert results[0] == '- /file/13: 13'
338 |     assert results[-1] == '- /file/4: 4'
339 | 
340 | 
341 | @pytest.mark.parametrize(
342 |     ('output', 'expected'),
343 |     [
344 |         (None, '- /file/2: 2\n- /file/1: 1'),
345 |         ('json', '[{"/file/2": 2}, {"/file/1": 1}]'),
346 |     ],
347 | )
348 | def test_top_request_paths_output(line_factory, capsys, output, expected):
349 |     """Test the TopRequestPaths command.
350 | 
351 |     It lists the 10 most used URL paths, and how much where they used.
352 |     """
353 |     cmd = commands.TopRequestPaths()
354 |     for path, count in ((f'/file/{x}', x) for x in range(3)):
355 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
356 |         for _ in range(count):
357 |             cmd(line)
358 |     check_output(cmd, output, expected, capsys)
359 | 
360 | 
361 | def test_slow_requests_counter_results(line_factory):
362 |     """Test the SlowRequestsCounter command.
363 | 
364 |     It counts how many requests took more than 1000 milliseconds to complete.
365 |     """
366 |     cmd = commands.SlowRequestsCounter()
367 |     assert cmd.raw_results() == 0
368 |     for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999):
369 |         cmd(line_factory(tr=total_time))
370 |     results = cmd.raw_results()
371 |     assert results == 5
372 | 
373 | 
374 | @pytest.mark.parametrize('output', [None, 'json'])
375 | def test_slow_requests_counter_output(line_factory, capsys, output):
376 |     """Test the SlowRequestsCounter command.
377 | 
378 |     It counts how many requests took more than 1000 milliseconds to complete.
379 |     """
380 |     cmd = commands.SlowRequestsCounter()
381 |     for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999):
382 |         cmd(line_factory(tr=total_time))
383 |     check_output(cmd, output, 5, capsys)
384 | 
385 | 
386 | @pytest.mark.parametrize(
387 |     ('series', 'average'),
388 |     [
389 |         ((1003, 987, 456, 2013, 1000, 3200, 999), 1379.71),
390 |         ((110, -1, 110), 110),  # aborted connections are ignored
391 |         ((45, 30, 0), 25),  # responses that take 0 milliseconds are still counted
392 |     ],
393 | )
394 | def test_average_response_time_results(line_factory, series, average):
395 |     """Test the AverageResponseTime command.
396 | 
397 |     Returns the average response time of all valid requests.
398 |     """
399 |     cmd = commands.AverageResponseTime()
400 |     assert cmd.raw_results() == 0.0
401 |     for total_time in series:
402 |         cmd(line_factory(tr=total_time))
403 |     results = cmd.raw_results()
404 |     assert results == average
405 | 
406 | 
407 | @pytest.mark.parametrize('output', [None, 'json'])
408 | def test_average_response_time_output(line_factory, capsys, output):
409 |     """Test the AverageResponseTime command.
410 | 
411 |     Returns the average response time of all valid requests.
412 |     """
413 |     cmd = commands.AverageResponseTime()
414 |     for total_time in (
415 |         40,
416 |         30,
417 |     ):
418 |         cmd(line_factory(tr=total_time))
419 |     check_output(cmd, output, 35.0, capsys)
420 | 
421 | 
422 | @pytest.mark.parametrize(
423 |     ('series', 'average'),
424 |     [
425 |         ((1003, 987, 456, 2013, 1000, 3200, 999), 1379.71),
426 |         ((110, -1, 110), 110),  # aborted connections are ignored
427 |         ((45, 30, 0), 25),  # requests that do not wait at all are still counted
428 |     ],
429 | )
430 | def test_average_waiting_time_results(line_factory, series, average):
431 |     """Test the AverageWaitingTime command.
432 | 
433 |     Returns the average time requests had to wait to get processed.
434 |     """
435 |     cmd = commands.AverageWaitingTime()
436 |     assert cmd.raw_results() == 0.0
437 |     for wait_time in series:
438 |         cmd(line_factory(tw=wait_time))
439 |     results = cmd.raw_results()
440 |     assert results == average
441 | 
442 | 
443 | @pytest.mark.parametrize('output', [None, 'json'])
444 | def test_average_waiting_time_output(line_factory, capsys, output):
445 |     """Test the AverageWaitingTime command.
446 | 
447 |     Returns the average time requests had to wait to get processed.
448 |     """
449 |     cmd = commands.AverageWaitingTime()
450 |     for wait_time in (40, 30):
451 |         cmd(line_factory(tw=wait_time))
452 |     check_output(cmd, output, 35.0, capsys)
453 | 
454 | 
455 | def test_server_load_results(line_factory):
456 |     """Test the ServerLoad command.
457 | 
458 |     It creates a breakdown of how many requests each server processed.
459 |     """
460 |     cmd = commands.ServerLoad()
461 |     assert cmd.raw_results() == {}
462 |     for name, count in (('server4', 4), ('server3', 3), ('server5', 5)):
463 |         line = line_factory(server_name=name)
464 |         for _ in range(count):
465 |             cmd(line)
466 |     results = cmd.raw_results()
467 |     assert len(results) == 3
468 |     assert results['server5'] == 5
469 |     assert results['server4'] == 4
470 |     assert results['server3'] == 3
471 | 
472 | 
473 | @pytest.mark.parametrize(
474 |     ('output', 'expected'),
475 |     [
476 |         (None, '- server5: 5\n- server3: 3'),
477 |         ('json', '[{"server5": 5}, {"server3": 3}]'),
478 |     ],
479 | )
480 | def test_server_load_output(line_factory, capsys, output, expected):
481 |     """Test the ServerLoad command.
482 | 
483 |     It creates a breakdown of how many requests each server processed.
484 |     """
485 |     cmd = commands.ServerLoad()
486 |     for name, count in (('server3', 3), ('server5', 5)):
487 |         line = line_factory(server_name=name)
488 |         for _ in range(count):
489 |             cmd(line)
490 |     check_output(cmd, output, expected, capsys)
491 | 
492 | 
493 | def test_queue_peaks_no_lines_results(line_factory):
494 |     """Test the QueuePeaks command.
495 | 
496 |     If there are no log lines processed, nothing should be returned.
497 |     """
498 |     cmd = commands.QueuePeaks()
499 |     assert cmd.raw_results() == []
500 | 
501 | 
502 | def test_queue_peaks_no_queues(line_factory):
503 |     """Test the QueuePeaks command.
504 | 
505 |     If there are no log lines processed, nothing should be returned.
506 |     """
507 |     cmd = commands.QueuePeaks()
508 |     now = datetime.now()
509 |     for second in range(4):
510 |         accept_date = now.replace(second=second).strftime('%d/%b/%Y:%H:%M:%S.%f')
511 |         cmd(line_factory(queue_backend=0, accept_date=accept_date))
512 |     assert len(cmd.requests) == 4
513 |     assert cmd.raw_results() == []
514 | 
515 | 
516 | @pytest.mark.parametrize(
517 |     ('date', 'expected_key'),
518 |     [
519 |         ('10/Dec/2019:15:40:12.12345', 1575988812.12345),
520 |         ('15/Jan/2017:05:23:05.456', 1484454185.456),
521 |         ('15/Jan/2017:05:23:05.0', 1484454185.0),
522 |     ],
523 | )
524 | def test_queue_peaks_generated_keys(line_factory, date, expected_key):
525 |     """Test the QueuePeaks command.
526 | 
527 |     Check how the keys for the requests dictionary are generated.
528 |     """
529 |     cmd = commands.QueuePeaks()
530 |     cmd(line_factory(queue_backend=0, accept_date=date))
531 |     keys = list(cmd.requests.keys())
532 |     # account for a 1h difference, if UTC is used (as in CI)
533 |     assert expected_key - 4000 <= keys[0] <= expected_key + 4000
534 |     # check that microseconds are exact though
535 |     assert expected_key - int(expected_key) == keys[0] - int(keys[0])
536 | 
537 | 
538 | def test_queue_peaks_details(line_factory):
539 |     """Test the QueuePeaks command.
540 | 
541 |     Check the information returned for each peak.
542 |     """
543 |     cmd = commands.QueuePeaks()
544 |     for microseconds, queue in enumerate([0, 4, 7, 8, 19, 4, 0]):
545 |         line = line_factory(
546 |             queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}'
547 |         )
548 |         cmd(line)
549 |     day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5)
550 |     results = cmd.raw_results()
551 |     assert len(results) == 1
552 |     peak_info = results[0]
553 |     assert peak_info['peak'] == 19
554 |     assert peak_info['span'] == 5
555 |     assert peak_info['started'] == day.replace(microsecond=100000)
556 |     assert peak_info['finished'] == day.replace(microsecond=600000)
557 | 
558 | 
559 | def test_queue_peaks_multiple_sorted(line_factory):
560 |     """Test the QueuePeaks command.
561 | 
562 |     Peaks information are returned sorted by date.
563 |     """
564 |     cmd = commands.QueuePeaks()
565 |     for microseconds, queue in enumerate([0, 4, 0, 0, 19, 4, 0]):
566 |         line = line_factory(
567 |             queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}'
568 |         )
569 |         cmd(line)
570 |     day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5)
571 |     results = cmd.raw_results()
572 |     assert len(results) == 2
573 |     assert results[0]['peak'] == 4
574 |     assert results[0]['started'] == day.replace(microsecond=100000)
575 |     assert results[1]['peak'] == 19
576 |     assert results[1]['started'] == day.replace(microsecond=400000)
577 | 
578 | 
579 | def test_queue_peaks_already_started(line_factory):
580 |     """Test the QueuePeaks command.
581 | 
582 |     Check that QueuePeaks handles the corner case of a peak that has already started.
583 |     """
584 |     cmd = commands.QueuePeaks()
585 |     for microseconds, queue in enumerate([4, 19, 0]):
586 |         line = line_factory(
587 |             queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}'
588 |         )
589 |         cmd(line)
590 |     day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5)
591 |     results = cmd.raw_results()
592 |     assert len(results) == 1
593 |     peak_info = results[0]
594 |     assert peak_info['peak'] == 19
595 |     assert peak_info['span'] == 2
596 |     assert peak_info['started'] == day
597 |     assert peak_info['finished'] == day.replace(microsecond=200000)
598 | 
599 | 
600 | def test_queue_peaks_did_not_finish(line_factory):
601 |     """Test the QueuePeaks command.
602 | 
603 |     Check that QueuePeaks handles the corner case of a peak that does not finish.
604 |     """
605 |     cmd = commands.QueuePeaks()
606 |     for microseconds, queue in enumerate([4, 19, 12]):
607 |         line = line_factory(
608 |             queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}'
609 |         )
610 |         cmd(line)
611 |     day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5)
612 |     results = cmd.raw_results()
613 |     assert len(results) == 1
614 |     peak_info = results[0]
615 |     assert peak_info['peak'] == 19
616 |     assert peak_info['span'] == 3
617 |     assert peak_info['started'] == day
618 |     assert peak_info['finished'] == day.replace(microsecond=200000)
619 | 
620 | 
621 | @pytest.mark.parametrize(
622 |     ('output', 'expected'),
623 |     [
624 |         (
625 |             None,
626 |             '- peak: 4 - span: 1 - started: 2017-01-15T05:23:05.100000 - finished: 2017-01-15T05:23:05.200000\n'
627 |             '- peak: 19 - span: 2 - started: 2017-01-15T05:23:05.400000 - finished: 2017-01-15T05:23:05.600000',
628 |         ),
629 |         (
630 |             'json',
631 |             '[{"peak": 4, "span": 1, "started": "2017-01-15T05:23:05.100000", "finished": "2017-01-15T05:23:05.200000"}, '
632 |             '{"peak": 19, "span": 2, "started": "2017-01-15T05:23:05.400000", "finished": "2017-01-15T05:23:05.600000"}]',
633 |         ),
634 |     ],
635 | )
636 | def test_queue_peaks_output(line_factory, capsys, output, expected):
637 |     """Test the QueuePeaks command.
638 | 
639 |     Peaks information are returned sorted by date.
640 |     """
641 |     cmd = commands.QueuePeaks()
642 |     for microseconds, queue in enumerate([0, 4, 0, 0, 19, 4, 0]):
643 |         line = line_factory(
644 |             queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}'
645 |         )
646 |         cmd(line)
647 |     check_output(cmd, output, expected, capsys)
648 | 
649 | 
650 | def test_connection_type_results(line_factory):
651 |     """Test the ConnectionType command.
652 | 
653 |     It counts how many requests have been made by SSL, and which ones not.
654 |     """
655 |     cmd = commands.ConnectionType()
656 |     assert cmd.raw_results() == (0, 0)
657 |     for path, count in (('/Virtual:443/something', 4), ('/something', 2)):
658 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
659 |         for _ in range(count):
660 |             cmd(line)
661 |     assert cmd.raw_results() == (4, 2)
662 | 
663 | 
664 | @pytest.mark.parametrize(
665 |     ('output', 'expected'),
666 |     [(None, '- https: 4\n- http: 2'), ('json', '[{"https": 4}, {"http": 2}]')],
667 | )
668 | def test_connection_type_output(line_factory, capsys, output, expected):
669 |     """Test the ConnectionType command.
670 | 
671 |     It counts how many requests have been made by SSL, and which ones not.
672 |     """
673 |     cmd = commands.ConnectionType()
674 |     for path, count in (('/Virtual:443/something', 4), ('/something', 2)):
675 |         line = line_factory(http_request=f'GET {path} HTTP/1.1')
676 |         for _ in range(count):
677 |             cmd(line)
678 |     check_output(cmd, output, expected, capsys)
679 | 
680 | 
681 | def test_requests_per_minute_results(line_factory):
682 |     """Test the RequestsPerMinute command.
683 | 
684 |     It counts how many requests have been made per minute.
685 |     """
686 |     cmd = commands.RequestsPerMinute()
687 |     assert cmd.raw_results() == []
688 |     now = datetime.now()
689 |     # to avoid leaping into the next/previous minute with the timedeltas below
690 |     now = now.replace(second=30)
691 |     microseconds = timedelta(microseconds=200)
692 |     seconds = timedelta(seconds=5)
693 |     minutes = timedelta(minutes=5)
694 |     hours = timedelta(hours=2)
695 |     dates = [
696 |         now,
697 |         now + microseconds,
698 |         now - microseconds,
699 |         now + seconds,
700 |         now - seconds,
701 |         now + minutes,
702 |         now - minutes,
703 |         now + hours,
704 |         now - hours,
705 |     ]
706 |     for time in dates:
707 |         cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}'))
708 |     results = cmd.raw_results()
709 |     assert len(results) == 5
710 |     assert results[0][1] == 1
711 |     assert results[1][1] == 1
712 |     assert results[2][1] == 5  # now and the +- microseconds and +- seconds
713 |     assert results[3][1] == 1
714 |     assert results[4][1] == 1
715 | 
716 | 
717 | @pytest.mark.parametrize('output', [None, 'json'])
718 | def test_requests_per_minute_output(line_factory, capsys, output):
719 |     """Test the RequestsPerMinute command.
720 | 
721 |     It counts how many requests have been made per minute.
722 |     """
723 |     cmd = commands.RequestsPerMinute()
724 |     now = datetime.now()
725 |     for time in (now, now + timedelta(hours=2)):
726 |         cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}'))
727 |     name = cmd.command_line_name().upper()
728 |     cmd.results(output=output)
729 |     output_text = capsys.readouterr().out
730 |     if output == 'json':
731 |         assert f'{{"{name}": ' in output_text
732 |         # this is quite fuzzy to not have to fiddle with the date formatting
733 |         # change it once we hit 2030 :)
734 |         assert ':00": 1}, {"202' in output_text
735 |     else:
736 |         assert f'{name}\n====' in output_text
737 |         # this is quite fuzzy to not have to fiddle with the date formatting
738 |         assert ':00: 1\n- ' in output_text
739 | 
740 | 
741 | def test_requests_per_hour_results(line_factory):
742 |     """Test the RequestsPerHour command.
743 | 
744 |     It counts how many requests have been made per hour.
745 |     """
746 |     cmd = commands.RequestsPerHour()
747 |     assert cmd.raw_results() == []
748 |     specific_date = datetime(year=2022, month=12, day=3, hour=14, minute=10, second=30)
749 |     minutes = timedelta(minutes=5)
750 |     hours = timedelta(hours=2)
751 |     dates = [
752 |         specific_date,
753 |         specific_date + minutes,
754 |         specific_date - minutes,
755 |         specific_date + hours,
756 |         specific_date - hours,
757 |         specific_date + hours * 2,
758 |         specific_date - hours * 2,
759 |     ]
760 |     for time in dates:
761 |         cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}'))
762 |     results = cmd.raw_results()
763 |     assert len(results) == 5
764 |     assert results[0][1] == 1
765 |     assert results[1][1] == 1
766 |     assert results[2][1] == 3  # now and the +- minutes
767 |     assert results[3][1] == 1
768 |     assert results[4][1] == 1
769 | 
770 | 
771 | @pytest.mark.parametrize('output', [None, 'json'])
772 | def test_requests_per_hour_output(line_factory, capsys, output):
773 |     """Test the RequestsPerHour command.
774 | 
775 |     It counts how many requests have been made per hour.
776 |     """
777 |     cmd = commands.RequestsPerHour()
778 |     now = datetime.now()
779 |     for time in (now, now + timedelta(hours=2)):
780 |         cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}'))
781 |     name = cmd.command_line_name().upper()
782 |     cmd.results(output=output)
783 |     output_text = capsys.readouterr().out
784 |     if output == 'json':
785 |         assert f'{{"{name}": ' in output_text
786 |         # this is quite fuzzy to not have to fiddle with the date formatting
787 |         # change it once we hit 2030 :)
788 |         assert ':00": 1}, {"202' in output_text
789 |     else:
790 |         assert f'{name}\n====' in output_text
791 |         # this is quite fuzzy to not have to fiddle with the date formatting
792 |         assert ':00: 1\n- ' in output_text
793 | 
794 | 
795 | def test_print_results_and_output(line_factory, capsys):
796 |     """Test the Print command.
797 | 
798 |     It simply prints the verbatim line.
799 |     """
800 |     cmd = commands.Print()
801 |     assert cmd.raw_results() is None
802 |     for path in ('/first-thing-to-do', '/second/thing/to-do'):
803 |         cmd(line_factory(http_request=f'GET {path} HTTP/1.1'))
804 |     assert cmd.raw_results() is None
805 |     output_text = capsys.readouterr().out
806 |     lines = output_text.split('\n')
807 |     assert len(lines) == 3
808 |     assert '/first-thing-to-do' in lines[0]
809 |     assert '/second/thing/to-do' in lines[1]
810 |     assert lines[2] == ''
811 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------