├── .dockerignore ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── pypi.yml │ └── test.yml ├── .gitignore ├── Dockerfile ├── Dockerfile.demo ├── LICENSE ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── ballcone.service ├── ballcone.spec ├── ballcone ├── __init__.py ├── __main__.py ├── core.py ├── dao.py ├── syslog_protocol.py ├── templates │ ├── chart_callback.js │ ├── layout.html │ ├── nginx.html │ ├── root.html │ ├── service.html │ └── sql.html ├── test_dao.py └── web_ballcone.py ├── demo ├── nginx.conf └── supervisord.conf ├── docker-compose.yml ├── pyproject.toml ├── setup.py └── tools ├── duck-upgrade.sh └── monet2duck.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .idea 3 | .mypy_cache 4 | __pycache__ 5 | db 6 | build 7 | dist 8 | .DS_Store 9 | monetdb 10 | balcone.db* 11 | venv 12 | *.swp 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | Dockerfile.* linguist-language=Dockerfile 2 | ballcone.service linguist-language=INI 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | - package-ecosystem: "pip" 8 | directory: "/" 9 | schedule: 10 | interval: "monthly" 11 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | id-token: write 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: '3.11' 19 | - name: Install dependencies 20 | run: | 21 | python3 -m pip install --upgrade pip 22 | pip install pipenv 23 | pipenv install --python "3.11" --dev --system --deploy 24 | - name: Create release 25 | id: create_release 26 | uses: actions/create-release@v1.1.4 27 | env: 28 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 29 | with: 30 | tag_name: ${{ github.ref }} 31 | release_name: Ballcone ${{ github.ref }} 32 | draft: false 33 | prerelease: false 34 | - name: Build 35 | run: python3 -m build --sdist --wheel . 36 | - name: Publish 37 | uses: pypa/gh-action-pypi-publish@release/v1 38 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: '3.11' 19 | - name: Install dependencies 20 | run: | 21 | python3 -m pip install --upgrade pip 22 | pip install pipenv 23 | pipenv install --python "3.11" --dev --system --deploy 24 | - name: Lint with Mypy 25 | run: mypy ballcone tools 26 | - name: Lint with Ruff 27 | uses: chartboost/ruff-action@v1 28 | - name: Test with unittest 29 | run: python3 -m unittest discover 30 | - name: Build 31 | run: python3 -m build --sdist --wheel . 32 | - name: Build with PyInstaller 33 | run: | 34 | pyinstaller ballcone.spec 35 | dist/ballcone --version 36 | docker: 37 | needs: build 38 | runs-on: ubuntu-latest 39 | steps: 40 | - name: Checkout 41 | uses: actions/checkout@v4 42 | - name: Set up Docker Buildx 43 | uses: docker/setup-buildx-action@v3 44 | - name: Login to DockerHub 45 | if: github.event_name != 'pull_request' 46 | uses: docker/login-action@v3 47 | with: 48 | username: ${{ secrets.DOCKERHUB_USERNAME }} 49 | password: ${{ secrets.DOCKERHUB_TOKEN }} 50 | - name: Update Docker Hub description 51 | if: github.event_name != 'pull_request' 52 | uses: peter-evans/dockerhub-description@v4 53 | with: 54 | username: ${{ secrets.DOCKERHUB_USERNAME }} 55 | password: ${{ secrets.DOCKERHUB_TOKEN }} 56 | repository: dustalov/ballcone 57 | short-description: ${{ github.event.repository.description }} 58 | - name: Login to GitHub Container Registry 59 | if: github.event_name != 'pull_request' 60 | uses: docker/login-action@v3 61 | with: 62 | registry: ghcr.io 63 | username: ${{ github.repository_owner }} 64 | password: ${{ secrets.GITHUB_TOKEN }} 65 | - name: Build and push Ballcone 66 | uses: docker/build-push-action@v5 67 | with: 68 | context: . 69 | push: ${{ github.event_name != 'pull_request' }} 70 | tags: | 71 | dustalov/ballcone:latest 72 | ghcr.io/dustalov/ballcone:latest 73 | - name: Build and push Ballcone Demo 74 | uses: docker/build-push-action@v5 75 | with: 76 | context: . 77 | file: Dockerfile.demo 78 | push: ${{ github.event_name != 'pull_request' }} 79 | tags: | 80 | dustalov/ballcone:demo 81 | ghcr.io/dustalov/ballcone:demo 82 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # PyCharm 77 | .idea/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | .DS_Store 134 | Makefile.local 135 | .rsyncignore 136 | *.duckdb 137 | *.duckdb.wal 138 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | MAINTAINER Dmitry Ustalov 4 | 5 | EXPOSE 65140/udp 8080/tcp 6 | 7 | WORKDIR /usr/src/app 8 | 9 | COPY ballcone /usr/src/app/ballcone/ 10 | 11 | COPY pyproject.toml Pipfile Pipfile.lock setup.py README.md LICENSE /usr/src/app/ 12 | 13 | RUN \ 14 | apt-get update && \ 15 | apt-get install --no-install-recommends -y -o Dpkg::Options::="--force-confold" tini && \ 16 | apt-get clean && \ 17 | rm -rf /var/lib/apt/lists/* && \ 18 | python3 -m pip install --upgrade pip && \ 19 | pip install pipenv && \ 20 | pipenv install --system 21 | 22 | ENTRYPOINT ["/usr/bin/tini", "--"] 23 | 24 | CMD /usr/local/bin/ballcone 25 | -------------------------------------------------------------------------------- /Dockerfile.demo: -------------------------------------------------------------------------------- 1 | FROM dustalov/ballcone 2 | 3 | MAINTAINER Dmitry Ustalov 4 | 5 | EXPOSE 8080/tcp 8888/tcp 6 | 7 | RUN \ 8 | apt-get update && \ 9 | apt-get install --no-install-recommends -y -o Dpkg::Options::="--force-confold" nginx supervisor && \ 10 | apt-get clean && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | COPY demo/supervisord.conf /etc/supervisor/conf.d/supervisord-nginx.conf 14 | 15 | COPY demo/nginx.conf /etc/nginx/sites-available/default 16 | 17 | ENTRYPOINT [] 18 | 19 | CMD /usr/bin/supervisord 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2023 Dmitry Ustalov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export LANG := en_US.UTF-8 2 | 3 | MAKEFLAGS += --warn-undefined-variables 4 | MAKEFLAGS += --no-builtin-rules 5 | 6 | DOCKER := $(if $(shell which podman),podman,docker) 7 | 8 | PIPENV := nice pipenv run 9 | 10 | test: 11 | $(PIPENV) mypy ballcone tools 12 | $(PIPENV) ruff check . 13 | $(PIPENV) python3 -m unittest discover 14 | 15 | run: ballcone/__main__.py 16 | $(PIPENV) "$<" 17 | 18 | pyinstaller: ballcone.spec 19 | $(PIPENV) pyinstaller "$<" 20 | 21 | install-systemd: 22 | cp -Rvf dist/ballcone /usr/local/bin/ballcone 23 | mkdir -pv /var/lib/ballcone 24 | chown -Rv nobody:nobody /var/lib/ballcone 25 | cp -Rvf ballcone.service /etc/systemd/system/ 26 | systemctl daemon-reload 27 | systemctl enable ballcone 28 | systemctl restart ballcone 29 | 30 | docker: 31 | $(DOCKER) build -f Dockerfile --rm -t ballcone . 32 | 33 | pipenv: 34 | pipenv install --dev 35 | 36 | -include Makefile.local 37 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.python.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | ballcone = {editable = true, path = "."} 8 | 9 | [dev-packages] 10 | ballcone = {editable = true, path = ".", extras = ["dev"]} 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ballcone 2 | 3 | Ballcone is a fast and lightweight server-side Web analytics solution. It requires no JavaScript on your website. 4 | 5 | [![GitHub Tests][github_tests_badge]][github_tests_link] [![Docker Hub][docker_hub_badge]][docker_hub_link] 6 | 7 | [github_tests_badge]: https://github.com/dustalov/ballcone/workflows/Unit%20Tests/badge.svg?branch=master 8 | [github_tests_link]: https://github.com/dustalov/ballcone/actions?query=workflow%3A%22Unit+Tests%22 9 | [docker_hub_badge]: https://img.shields.io/docker/pulls/dustalov/ballcone 10 | [docker_hub_link]: https://hub.docker.com/r/dustalov/ballcone 11 | 12 | ## Screenshots 13 | 14 | ![Ballcone](https://user-images.githubusercontent.com/40397/80874920-4c9b9f00-8cc3-11ea-9848-18384d826e9c.png) 15 | 16 | ![Ballcone: petrovich](https://user-images.githubusercontent.com/40397/80874963-4f968f80-8cc3-11ea-8342-666fe3be139c.png) 17 | 18 | ## Design Goals 19 | 20 | * **Simplicity.** Ballcone requires *almost* zero set-up as it prefers convention over configuration 21 | * **Efficiency.** Ballcone performs *lightning-fast analytic queries* over data thanks to the underlying columnar database 22 | * **Specificity.** Ballcone aims at providing visual insights on the HTTP access logs with *no bloat* 23 | 24 | ## Features 25 | 26 | * No JavaScript snippets required 27 | * GeoIP mapping with the [GeoLite2](https://dev.maxmind.com/geoip/geoip2/geolite2/) database 28 | * Extraction of platform and browser information from User-Agent 29 | 30 | ## Architecture 31 | 32 | Ballcone captures the `access_log` entries exported in JSON by nginx via the bundled [syslog logger](https://nginx.org/en/docs/syslog.html) (`65140/udp`). These entries are stored in the embedded DuckDB database. Ballcone uses it to perform data manipulation and analytic queries. Also, Ballcone provides a convenient Web interface (`8080/tcp`) for accessing and observing the gathered data. 33 | 34 | ``` 35 | +-----------+ +------------+ 36 | HTTP | | syslog | | HTTP 37 | <-------->+ nginx +----------->+ Ballcone +<--------> 38 | | | JSON | | 39 | +-----------+ +------------+ 40 | | DuckDB | 41 | +------------+ 42 | ``` 43 | 44 | For better performance, Ballcone inserts data in batches, committing them to DuckDB every few seconds (five seconds by default). 45 | 46 | ## Requirements 47 | 48 | * [Python](https://www.python.org/) 3.9 49 | * [DuckDB](https://duckdb.org/) ≥ 0.4.0 50 | * [nginx](https://nginx.org/) ≥ 1.7.1 51 | 52 | ## Demo 53 | 54 | This repository contains an example configuration of nginx and Ballcone. Just run the container from Docker Hub or build it locally. nginx will be available at and Ballcone will be available at . 55 | 56 | ```shell 57 | docker-compose up 58 | # or 59 | docker run --rm -p '127.0.0.1:8888:80' -p '127.0.0.1:8080:8080' dustalov/ballcone:demo 60 | ``` 61 | 62 | ## Naming and Meaning 63 | 64 | **Ballcone** has two meanings. 65 | 66 | First, it is the romanization of the Russian word *балкон* that means a [balcony](https://en.wikipedia.org/wiki/Balcony). You go to the balcony to breath some fresh air and look down at the things outside. 67 | 68 | Second, if a *ball* is inscribed in a *cone*, it resembles the all-seeing eye (help wanted: [dustalov/ballcone#8](https://github.com/dustalov/ballcone/issues/8)). 69 | 70 | Regardless of the meaning you prefer, Ballcone helps you to watch your websites. 71 | 72 | ## Installation 73 | 74 | The simplest way to get started is to run `make pipenv` after cloning the repository. Just make sure [Pipenv](https://pipenv.pypa.io/en/latest/) is installed. 75 | 76 | ### Getting Ballcone 77 | 78 | Running the Docker image is the simplest way to get started. Docker Hub contains automated builds of the Ballcone source code from GitHub: . The following command runs Ballcone on `127.0.0.1`: the syslog protocol will be available via `65140/udp`, the Web interface will be available via `8080/tcp`, and the data will be stored in the `/var/lib/ballcone` directory on the host machine. 79 | 80 | ```shell 81 | docker run -p '127.0.0.1:8080:8080' -p '127.0.0.1:65140:65140/udp' -v '/var/lib/ballcone:/usr/src/app/duckdb' --restart=unless-stopped dustalov/ballcone ballcone -sh '0.0.0.0' -wh '0.0.0.0' -d 'duckdb/ballcone.duckdb' 82 | ``` 83 | 84 | However, Docker is not the only option. Alternatively, Ballcone can be packaged into a standalone executable using [PyInstaller](http://www.pyinstaller.org/) and runned as a [systemd](https://systemd.io/) service (see [ballcone.service](ballcone.service) as an example): 85 | 86 | ```shell 87 | make pyinstaller 88 | sudo make install-systemd 89 | sudo systemctl start ballcone 90 | ``` 91 | 92 | Finally, Ballcone can be installed directly on the host machine for manual runs: 93 | 94 | ```shell 95 | pip3 install -e git+https://github.com/dustalov/ballcone@master#egg=ballcone 96 | ``` 97 | 98 | Note that `ballcone` without arguments creates the `ballcone.duckdb` database file inside the current directory. 99 | 100 | ### Configuring nginx 101 | 102 | You need to define the JSON-compatible log format for your service in the nginx configuration file. Let us call it `ballcone_json_example`. This format is similar to the one used in Matomo (see [matomo-log-analytics](https://github.com/matomo-org/matomo-log-analytics)). It should be put *before* the `server` context. 103 | 104 | ```Nginx 105 | log_format ballcone_json_example escape=json 106 | '{' 107 | '"service": "example", ' 108 | '"ip": "$remote_addr", ' 109 | '"host": "$host", ' 110 | '"path": "$request_uri", ' 111 | '"status": "$status", ' 112 | '"referrer": "$http_referer", ' 113 | '"user_agent": "$http_user_agent", ' 114 | '"length": $bytes_sent, ' 115 | '"generation_time_milli": $request_time, ' 116 | '"date": "$time_iso8601"' 117 | '}'; 118 | ``` 119 | 120 | Then, you should put this `access_log` directive *inside* the `server` context to transfer logs via the [syslog protocol](https://nginx.org/en/docs/syslog.html). 121 | 122 | ```Nginx 123 | access_log syslog:server=127.0.0.1:65140 ballcone_json_example; 124 | ``` 125 | 126 | Please look at the complete example of nginx configuration in [demo/nginx.conf](demo/nginx.conf). 127 | 128 | ## Roadmap 129 | 130 | Roadmap is available at . 131 | 132 | ## Alternatives 133 | 134 | * Web analytics solutions: [Matomo](https://matomo.org/), [Google Analytics](http://google.com/analytics/), [Yandex.Metrica](https://metrica.yandex.com/), etc. 135 | * Columnar data storages: [ClickHouse](https://clickhouse.tech/), [Citus](https://github.com/citusdata/citus), [MariaDB ColumnStore](https://mariadb.com/kb/en/mariadb-columnstore/), etc. 136 | * Log management: [Graylog](https://www.graylog.org/), [Fluentd](https://www.fluentd.org/), [Elasticsearch](https://github.com/elastic/elasticsearch), etc. 137 | 138 | ## Copyright 139 | 140 | Copyright © 2020–2023 Dmitry Ustalov. See [LICENSE](LICENSE) for details. 141 | -------------------------------------------------------------------------------- /ballcone.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Ballcone 3 | After=network.target 4 | 5 | [Service] 6 | User=nobody 7 | WorkingDirectory=/var/lib/ballcone 8 | Environment="TMPDIR=/var/lib/ballcone/pyinstaller" 9 | ExecStart=/usr/local/bin/ballcone -d '/var/lib/ballcone/duckdb/ballcone.duckdb' 10 | ExecStartPre=-/bin/mkdir -p /var/lib/ballcone/pyinstaller /var/lib/ballcone/duckdb 11 | Restart=on-failure 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /ballcone.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | import geolite2 4 | 5 | geolite2_city = geolite2.geolite2_database() 6 | 7 | block_cipher = None 8 | 9 | a = Analysis(['ballcone/__main__.py'], 10 | datas=[(geolite2_city, '_maxminddb_geolite2'), ('ballcone/templates', 'templates')], 11 | hiddenimports=['cmath', 'pkg_resources.py2_warn', 'numpy'], 12 | hookspath=[], 13 | runtime_hooks=[], 14 | excludes=[], 15 | win_no_prefer_redirects=False, 16 | win_private_assemblies=False, 17 | cipher=block_cipher, 18 | noarchive=False) 19 | 20 | pyz = PYZ(a.pure, a.zipped_data, 21 | cipher=block_cipher) 22 | 23 | exe = EXE(pyz, 24 | a.scripts, 25 | a.binaries, 26 | a.zipfiles, 27 | a.datas, 28 | [], 29 | name='ballcone', 30 | debug=False, 31 | bootloader_ignore_signals=False, 32 | strip=False, 33 | upx=True, 34 | upx_exclude=[], 35 | runtime_tmpdir=None, 36 | console=True) 37 | -------------------------------------------------------------------------------- /ballcone/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0' 2 | __author__ = 'Dmitry Ustalov' 3 | __license__ = 'MIT' 4 | -------------------------------------------------------------------------------- /ballcone/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | __author__ = 'Dmitry Ustalov' 4 | 5 | import argparse 6 | import asyncio 7 | import logging 8 | import os 9 | import sys 10 | from contextlib import suppress 11 | from pathlib import Path 12 | from typing import cast 13 | 14 | import aiohttp_jinja2 15 | import duckdb 16 | import jinja2 17 | from aiohttp import web 18 | from geolite2 import geolite2 19 | 20 | from ballcone import __version__ 21 | from ballcone.core import Ballcone 22 | from ballcone.dao import DAO 23 | from ballcone.syslog_protocol import SyslogProtocol 24 | from ballcone.web_ballcone import WebBallcone 25 | 26 | 27 | def main() -> None: 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('-v', '--version', action='version', 30 | version=f'Ballcone v{__version__} (DuckDB v{duckdb.__version__})') # type: ignore 31 | parser.add_argument('-sh', '--syslog-host', default='127.0.0.1', help='syslog host to bind') 32 | parser.add_argument('-sp', '--syslog-port', default=65140, type=int, help='syslog UDP port to bind') 33 | parser.add_argument('-wh', '--web-host', default='127.0.0.1', help='Web interface host to bind') 34 | parser.add_argument('-wp', '--web-port', default=8080, type=int, help='Web interface TCP port to bind') 35 | parser.add_argument('-d', '--database', default='ballcone.duckdb', help='Path to DuckDB database') 36 | parser.add_argument('-p', '--period', default=5, type=int, help='Persistence period, in seconds') 37 | parser.add_argument('-t', '--top-limit', default=5, type=int, help='Limit for top-n queries') 38 | parser.add_argument('--days', default=30, type=int, help='Default number of days in plots') 39 | args = parser.parse_args() 40 | 41 | logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) 42 | 43 | if args.database == ':memory:': 44 | connection = duckdb.connect(args.database) 45 | else: 46 | connection = duckdb.connect(str(Path(args.database).resolve())) 47 | 48 | dao = DAO(connection) 49 | 50 | geoip = geolite2.reader() 51 | 52 | ballcone = Ballcone(dao, geoip, args.top_limit, args.period) 53 | 54 | asyncio.ensure_future(ballcone.persist_timer()) 55 | 56 | loop = asyncio.get_event_loop() 57 | 58 | syslog = loop.create_datagram_endpoint(lambda: SyslogProtocol(ballcone), 59 | local_addr=(args.syslog_host, args.syslog_port)) 60 | 61 | # PyInstaller 62 | if getattr(sys, 'frozen', False): 63 | jinja2_loader = cast(jinja2.BaseLoader, jinja2.FileSystemLoader( 64 | os.path.join(getattr(sys, '_MEIPASS'), 'templates') 65 | )) 66 | else: 67 | jinja2_loader = cast(jinja2.BaseLoader, jinja2.PackageLoader('ballcone')) 68 | 69 | app = web.Application() 70 | aiohttp_jinja2.setup(app, loader=jinja2_loader) 71 | handler = WebBallcone(ballcone, args.days) 72 | app.router.add_get('/', handler.root, name='root') 73 | app.router.add_get('/services', handler.services, name='services') 74 | app.router.add_get('/services/{service}', handler.service, name='service') 75 | app.router.add_get('/services/{service}/average/{field}', handler.average_or_count, name='average') 76 | app.router.add_get('/services/{service}/count/{field}', handler.average_or_count, name='count') 77 | app.router.add_get('/services/{service}/count_group/{group}', handler.count_group, name='count_group') 78 | app.router.add_get('/sql', handler.sql, name='sql') 79 | app.router.add_post('/sql', handler.sql, name='sql') 80 | app.router.add_get('/nginx', handler.nginx, name='nginx') 81 | 82 | try: 83 | loop.run_until_complete(syslog) 84 | web.run_app(app, host=args.web_host, port=args.web_port, loop=loop) 85 | finally: 86 | with suppress(RuntimeError): 87 | for task in asyncio.all_tasks(): 88 | task.cancel() 89 | 90 | with suppress(asyncio.CancelledError): 91 | loop.run_until_complete(task) 92 | 93 | geoip.close() 94 | 95 | try: 96 | ballcone.persist() 97 | finally: 98 | connection.close() 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /ballcone/core.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Dmitry Ustalov' 2 | 3 | import asyncio 4 | import logging 5 | import re 6 | from datetime import datetime, date, timedelta, timezone 7 | from ipaddress import IPv4Address, IPv6Address 8 | from typing import Optional, Dict, Deque, Tuple, Any, cast 9 | 10 | import simplejson 11 | from geolite2 import maxminddb 12 | 13 | from .dao import DAO, Entry 14 | 15 | VALID_SERVICE = re.compile(r'\A[\w]+\Z') 16 | 17 | 18 | class BallconeJSONEncoder(simplejson.JSONEncoder): 19 | def default(self, obj: Any) -> str: # type: ignore 20 | if isinstance(obj, date): 21 | return obj.isoformat() 22 | 23 | if isinstance(obj, (IPv4Address, IPv6Address)): 24 | return str(obj) 25 | 26 | return cast(str, super().default(obj)) 27 | 28 | 29 | class Ballcone: 30 | def __init__(self, dao: DAO, geoip: maxminddb.reader.Reader, 31 | top_limit: int = 5, persist_period: int = 5) -> None: 32 | self.dao = dao 33 | self.geoip = geoip 34 | self.top_limit = top_limit 35 | self.persist_period = persist_period 36 | self.queue: Dict[str, Deque[Entry]] = {} 37 | self.json_dumps = BallconeJSONEncoder().encode 38 | 39 | async def persist_timer(self) -> None: 40 | while await asyncio.sleep(self.persist_period, result=True): 41 | self.persist() 42 | 43 | def persist(self) -> None: 44 | for service, queue in self.queue.items(): 45 | try: 46 | count = self.dao.batch_insert_into_from_deque(service, queue) 47 | 48 | if count: 49 | logging.debug(f'Inserted {count} entries for service {service}') 50 | except RuntimeError: 51 | logging.exception('Please check if the query is correct') 52 | 53 | def check_service(self, service: Optional[str], should_exist: bool = False) -> bool: 54 | return ( 55 | service is not None 56 | and VALID_SERVICE.match(service) is not None 57 | and (not should_exist or self.dao.table_exists(service)) 58 | ) 59 | 60 | @staticmethod 61 | def iso_code(geoip: maxminddb.reader.Reader, ip: str) -> Optional[str]: 62 | geo = geoip.get(ip) 63 | 64 | return geo['country'].get('iso_code', None) if geo and 'country' in geo else None 65 | 66 | @staticmethod 67 | def days_before(stop_date: Optional[date] = None, days: int = 30) -> Tuple[date, date]: 68 | stop = stop_date if stop_date else datetime.now(timezone.utc).date() 69 | 70 | start = stop - timedelta(days=days - 1) 71 | 72 | return start, stop 73 | -------------------------------------------------------------------------------- /ballcone/dao.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Dmitry Ustalov' 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from datetime import datetime, date 6 | from ipaddress import ip_address, IPv4Address, IPv6Address 7 | from typing import Generator, NamedTuple, Optional, List, Sequence, Union, Any, NewType, Tuple, Deque, Set, cast 8 | 9 | import duckdb 10 | import pypika.enums 11 | from pypika import Query, Column, Field, Parameter, Table, Order, functions as fn, analytics as an 12 | from pypika.queries import QueryBuilder 13 | 14 | smallint = NewType('smallint', int) 15 | 16 | TYPES = { 17 | datetime: 'TIMESTAMP', 18 | str: 'VARCHAR', 19 | smallint: 'SMALLINT', 20 | int: 'INTEGER', 21 | float: 'DOUBLE', 22 | IPv4Address: 'VARCHAR', 23 | IPv6Address: 'VARCHAR', 24 | bool: 'BOOLEAN' 25 | } 26 | 27 | 28 | def is_empty(obj: Any) -> bool: 29 | if hasattr(obj, '__len__'): 30 | return not len(obj) 31 | 32 | return obj is None 33 | 34 | 35 | def optional_types(annotation: Any) -> Tuple[Set[Any], bool]: 36 | if hasattr(annotation, '__args__'): 37 | types = set(annotation.__args__) 38 | null = type(None) in types 39 | 40 | if null: 41 | types.remove(type(None)) 42 | 43 | return types, null 44 | else: 45 | return {annotation}, False 46 | 47 | 48 | def python_type_to_sql(annotation: Any) -> str: 49 | types, null = optional_types(annotation) 50 | first_type = next(iter(types)) 51 | 52 | if null: 53 | return TYPES[first_type] 54 | else: 55 | return TYPES[first_type] + ' NOT NULL' 56 | 57 | 58 | def sql_value_to_python(name: str, annotation: Any, value: Any) -> Any: 59 | args, null = optional_types(annotation) 60 | first_type = next(iter(args)) 61 | 62 | if first_type == datetime: 63 | return value 64 | 65 | if first_type == smallint: 66 | return int(value) 67 | 68 | if first_type in (IPv4Address, IPv6Address): 69 | return ip_address(value) 70 | 71 | return None if is_empty(value) and null else first_type(value) 72 | 73 | 74 | class Entry(NamedTuple): 75 | datetime: datetime 76 | host: str 77 | path: str 78 | status: smallint 79 | length: int 80 | generation_time: float 81 | referer: Optional[str] 82 | # IP address and derivatives 83 | ip: Union[IPv4Address, IPv6Address] 84 | country_iso_code: Optional[str] 85 | # derivatives from User-Agent 86 | platform_name: Optional[str] 87 | platform_version: Optional[str] 88 | browser_name: Optional[str] 89 | browser_version: Optional[str] 90 | is_robot: Optional[bool] 91 | 92 | @staticmethod 93 | def from_values(entry: Sequence[Any]) -> 'Entry': 94 | return Entry(*(sql_value_to_python(name, annotation, value) 95 | for (name, annotation), value in zip(Entry.__annotations__.items(), entry))) 96 | 97 | @staticmethod 98 | def as_value(value: Any, annotation: Any = None) -> Any: 99 | if isinstance(value, (IPv4Address, IPv6Address)): 100 | return str(value) 101 | 102 | if annotation: 103 | _, null = optional_types(annotation) 104 | 105 | return None if is_empty(value) and null else value 106 | else: 107 | return value 108 | 109 | def as_values(self) -> Sequence[Any]: 110 | return tuple(self.as_value(getattr(self, name), annotation) 111 | for name, annotation in self.__annotations__.items()) 112 | 113 | 114 | class Count(NamedTuple): 115 | date: date 116 | group: Optional[str] 117 | count: Any # mypy prints an error if this is an int 118 | 119 | 120 | class CountResult(NamedTuple): 121 | table: str 122 | field: Optional[str] 123 | distinct: bool 124 | ascending: Optional[bool] 125 | group: Optional[str] 126 | elements: List[Count] 127 | 128 | 129 | class Average(NamedTuple): 130 | date: date 131 | avg: float 132 | sum: float 133 | count: Any # mypy prints an error if this is an int 134 | 135 | 136 | class AverageResult(NamedTuple): 137 | table: str 138 | field: str 139 | elements: List[Average] 140 | 141 | 142 | class DAO: 143 | def __init__(self, db: duckdb.DuckDBPyConnection) -> None: 144 | self.db = db 145 | self.placeholders = [Parameter('?') for _ in Entry._fields] 146 | 147 | def size(self) -> int: 148 | return cast(int, self.run('SELECT COALESCE(total_blocks * block_size, 0) FROM pragma_database_size()')[0][0]) 149 | 150 | def tables(self) -> Sequence[str]: 151 | master = Table('sqlite_master') 152 | query = Query.from_('sqlite_master').select(master.name). \ 153 | where(master.type == 'table').distinct(). \ 154 | orderby(master.name) 155 | 156 | sql = str(query) 157 | 158 | logging.debug(sql) 159 | 160 | return [table for table, *_ in self.run(query)] 161 | 162 | def table_exists(self, table: str) -> bool: 163 | master = Table('sqlite_master') 164 | 165 | query = Query.from_(master).select(master.name). \ 166 | where((master.type == 'table') & (master.name == table)) 167 | 168 | sql = str(query) 169 | 170 | logging.debug(sql) 171 | 172 | return len(self.run(query)) > 0 173 | 174 | def create_table(self, table: str) -> int: 175 | target = Table(table) 176 | 177 | columns = [Column(name, python_type_to_sql(annotation)) for name, annotation in Entry.__annotations__.items()] 178 | 179 | query = Query.create_table(target).columns(*columns) 180 | sql = str(query) 181 | 182 | logging.debug(sql) 183 | 184 | with self.transaction() as cursor: 185 | return cast(int, cursor.execute(sql)) 186 | 187 | def drop_table(self, table: str) -> int: 188 | sql = str(Query.drop_table(table)) 189 | 190 | logging.debug(sql) 191 | 192 | with self.transaction() as cursor: 193 | return cast(int, cursor.execute(sql)) 194 | 195 | def insert_into(self, table: str, entry: Entry, cursor: Optional[duckdb.DuckDBPyConnection] = None) -> None: 196 | target = Table(table) 197 | 198 | query = Query.into(target).insert(*self.placeholders) 199 | sql, values = str(query), entry.as_values() 200 | 201 | logging.debug(sql + ' -- ' + str(values)) 202 | 203 | if cursor: 204 | cursor.execute(sql, values) 205 | else: 206 | with self.transaction() as cursor: 207 | cursor.execute(sql, values) 208 | 209 | def batch_insert_into(self, table: str, entries: Sequence[Entry]) -> int: 210 | if not entries: 211 | return 0 212 | 213 | with self.transaction() as cursor: 214 | count = 0 215 | 216 | for entry in entries: 217 | self.insert_into(table, entry, cursor=cursor) 218 | count += 1 219 | 220 | return count 221 | 222 | def batch_insert_into_from_deque(self, table: str, entries: Deque[Entry]) -> int: 223 | if not entries: 224 | return 0 225 | 226 | with self.transaction() as cursor: 227 | count = 0 228 | 229 | while entries: 230 | entry = entries.popleft() 231 | self.insert_into(table, entry, cursor=cursor) 232 | count += 1 233 | 234 | return count 235 | 236 | def select(self, table: str, start: Optional[date] = None, stop: Optional[date] = None, 237 | limit: Optional[int] = None) -> List[Entry]: 238 | target = Table(table) 239 | 240 | query = Query.from_(target).select('*').orderby(target.datetime).limit(limit) 241 | 242 | query = self.apply_dates(query, target, start, stop) 243 | 244 | rows = cast(List[Union[List[Any], Entry]], self.run(query)) 245 | 246 | for i, current in enumerate(rows): 247 | rows[i] = Entry.from_values(cast(List[Any], current)) 248 | 249 | return cast(List[Entry], rows) 250 | 251 | def select_average(self, table: str, field: str, start: Optional[date] = None, 252 | stop: Optional[date] = None) -> AverageResult: 253 | target = Table(table) 254 | target_field = Field(field, table=target) 255 | date = fn.Cast(target.datetime, pypika.enums.SqlTypes.DATE, alias='date') 256 | 257 | query = Query.from_(target).select(date, 258 | fn.Avg(target_field, alias='average'), 259 | fn.Sum(target_field, alias='sum'), 260 | fn.Count(target_field, alias='count')). \ 261 | groupby(date).orderby(target.date) 262 | 263 | query = self.apply_dates(query, target, start, stop) 264 | 265 | result = AverageResult(table=table, field=field, elements=[]) 266 | 267 | for current in self.run(query): 268 | result.elements.append(Average( 269 | date=current[0], 270 | avg=float(current[1]), 271 | sum=float(current[2]) if current[3] else 0., 272 | count=int(current[3]) 273 | )) 274 | 275 | return result 276 | 277 | def select_count(self, table: str, field: Optional[str] = None, start: Optional[date] = None, 278 | stop: Optional[date] = None) -> CountResult: 279 | target = Table(table) 280 | date = fn.Cast(target.datetime, pypika.enums.SqlTypes.DATE, alias='date') 281 | 282 | count_field = fn.Count(Field(field, table=target) if field else date, alias='count') 283 | 284 | if field: 285 | count_field = count_field.distinct() 286 | 287 | query = Query.from_(target).select(date, count_field).groupby(date).orderby(date) 288 | 289 | query = self.apply_dates(query, target, start, stop) 290 | 291 | result = CountResult(table=table, field=field, distinct=field is not None, group=None, ascending=None, 292 | elements=[]) 293 | 294 | for current in self.run(query): 295 | result.elements.append(Count( 296 | date=current[0], 297 | group=None, 298 | count=int(current[1]) 299 | )) 300 | 301 | return result 302 | 303 | def select_count_group(self, table: str, field: Optional[str], group: str, distinct: bool = False, 304 | start: Optional[date] = None, stop: Optional[date] = None, 305 | ascending: bool = True, limit: Optional[int] = None) -> CountResult: 306 | target = Table(table) 307 | date = fn.Cast(target.datetime, pypika.enums.SqlTypes.DATE, alias='date') 308 | 309 | count_field = fn.Count(Field(field, table=target) if field else date, alias='count') 310 | order = Order.asc if ascending else Order.desc 311 | 312 | if distinct: 313 | count_field = count_field.distinct() 314 | 315 | group_field = Field(group, table=target) 316 | 317 | query = Query.from_(target).select(date, group_field.as_('group'), count_field). \ 318 | groupby(date, group_field).orderby(date). \ 319 | orderby(count_field, order=order).orderby(group_field) 320 | 321 | query = self.apply_dates(query, target, start, stop) 322 | 323 | if limit is not None: 324 | window = Query.from_(query).select(query.date, query.group, query.count, 325 | an.RowNumber(alias='row_number').over(query.date). 326 | orderby(query.count, order=order).orderby(query.group)) 327 | 328 | query = Query.from_(window).select(window.date, window.group, window.count). \ 329 | where(window.row_number <= limit).orderby(window.date). \ 330 | orderby(window.count, order=order).orderby(window.group) 331 | 332 | result = CountResult(table=table, field=field, distinct=distinct, group=group, ascending=ascending, 333 | elements=[]) 334 | 335 | for current in self.run(query): 336 | result.elements.append(Count( 337 | date=current[0], 338 | group=current[1], 339 | count=int(current[2]) 340 | )) 341 | 342 | return result 343 | 344 | def run(self, query: Union[QueryBuilder, str]) -> List[List[Any]]: 345 | sql = str(query) if isinstance(query, QueryBuilder) else query 346 | 347 | logging.debug(sql) 348 | 349 | with self.cursor() as cursor: 350 | cursor.execute(sql) 351 | 352 | return cast(List[List[Any]], cursor.fetchall()) 353 | 354 | @staticmethod 355 | def apply_dates(query: QueryBuilder, target: Table, 356 | start: Optional[date] = None, stop: Optional[date] = None) -> QueryBuilder: 357 | date = fn.Cast(target.datetime, pypika.enums.SqlTypes.DATE, alias='date') 358 | 359 | if start and stop: 360 | if start == stop: 361 | return query.where(date == Entry.as_value(start)) 362 | else: 363 | return query.where(date[Entry.as_value(start):Entry.as_value(stop)]) 364 | elif start: 365 | return query.where(date >= Entry.as_value(start)) 366 | elif stop: 367 | return query.where(date <= Entry.as_value(stop)) 368 | 369 | return query 370 | 371 | @contextmanager 372 | def cursor(self) -> Generator[duckdb.DuckDBPyConnection, None, None]: 373 | cursor = self.db.cursor() 374 | 375 | try: 376 | cursor.begin() 377 | yield cursor 378 | except RuntimeError as e: 379 | raise e 380 | finally: 381 | cursor.close() 382 | 383 | @contextmanager 384 | def transaction(self) -> Generator[duckdb.DuckDBPyConnection, None, None]: 385 | with self.cursor() as cursor: 386 | yield cursor 387 | cursor.commit() 388 | -------------------------------------------------------------------------------- /ballcone/syslog_protocol.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Dmitry Ustalov' 2 | 3 | import asyncio 4 | import logging 5 | import re 6 | import urllib.parse 7 | from collections import deque 8 | from datetime import timezone 9 | from ipaddress import ip_address 10 | from typing import cast, Tuple, Union, Optional 11 | 12 | import dateutil.parser 13 | import httpagentparser 14 | import simplejson 15 | 16 | from ballcone.core import Ballcone 17 | from ballcone.dao import Entry, smallint 18 | 19 | # nginx's output cannot be properly parsed by any parser I tried 20 | NGINX_SYSLOG = re.compile(r'\A<[0-9]{1,3}>.*?: (?P.+)\Z') 21 | 22 | 23 | class SyslogProtocol(asyncio.DatagramProtocol): 24 | def __init__(self, ballcone: Ballcone) -> None: 25 | super().__init__() 26 | self.ballcone = ballcone 27 | self.transport: Optional[asyncio.BaseTransport] = None 28 | 29 | def connection_made(self, transport: asyncio.BaseTransport) -> None: 30 | self.transport = transport 31 | 32 | def datagram_received(self, data: Union[bytes, str], addr: Tuple[str, int]) -> None: 33 | try: 34 | message = data.decode('utf-8') if isinstance(data, bytes) else data 35 | except UnicodeDecodeError: 36 | logging.info(f'Malformed UTF-8 received from {addr}') 37 | return 38 | 39 | match = NGINX_SYSLOG.match(message) 40 | 41 | if not match or not match.group('message'): 42 | logging.info(f'Missing payload from {addr}: {message}') 43 | return 44 | 45 | try: 46 | content = simplejson.loads(match.group('message')) 47 | except simplejson.JSONDecodeError: 48 | logging.info(f'Malformed JSON received from {addr}: {message}') 49 | return 50 | 51 | if 'service' not in content or not content['service']: 52 | logging.info(f'Missing service field from {addr}: {message}') 53 | return 54 | else: 55 | service = content['service'].strip().lower() 56 | 57 | if not self.ballcone.check_service(service, should_exist=False): 58 | logging.info(f'Malformed service field from {addr}: {message}') 59 | return 60 | 61 | if service not in self.ballcone.queue: 62 | if not self.ballcone.dao.table_exists(service): 63 | self.ballcone.dao.create_table(service) 64 | 65 | self.ballcone.queue[service] = deque() 66 | 67 | current_datetime = dateutil.parser.isoparse(content['date']).astimezone(timezone.utc) 68 | 69 | path = urllib.parse.unquote(content['path']) 70 | 71 | user_agent = httpagentparser.detect(content['user_agent']) 72 | 73 | entry = Entry( 74 | datetime=current_datetime, 75 | host=content['host'], 76 | path=path, 77 | status=cast(smallint, int(content['status'])), 78 | length=int(content['length']), 79 | generation_time=float(content['generation_time_milli']), 80 | referer=content['referrer'], 81 | ip=ip_address(content['ip']), 82 | country_iso_code=Ballcone.iso_code(self.ballcone.geoip, content['ip']), 83 | platform_name=user_agent.get('platform', {}).get('name', None), 84 | platform_version=user_agent.get('platform', {}).get('version', None), 85 | browser_name=user_agent.get('browser', {}).get('name', None), 86 | browser_version=user_agent.get('browser', {}).get('version', None), 87 | is_robot=user_agent.get('bot', None) 88 | ) 89 | 90 | self.ballcone.queue[service].append(entry) 91 | -------------------------------------------------------------------------------- /ballcone/templates/chart_callback.js: -------------------------------------------------------------------------------- 1 | var makeCallback = function(chart, measure) { 2 | return function(data) { 3 | console.log(data); 4 | 5 | data.elements.forEach(function(entry) { 6 | chart.data.labels.push(entry.date); 7 | 8 | chart.data.datasets.forEach(function(dataset) { 9 | dataset.data.push(entry[measure]); 10 | }); 11 | }); 12 | 13 | chart.update(); 14 | } 15 | }; 16 | -------------------------------------------------------------------------------- /ballcone/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Ballcone{% if title or current_service %}: {{ title or current_service }}{% endif %} 7 | 9 | 11 | 13 | 14 | 15 | 16 |
17 |
18 |
19 |
20 |

Ballcone

21 | 48 |
49 |
50 |
51 |
52 | {% block content %}{% endblock %} 53 |
54 |
55 |
56 |
57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /ballcone/templates/nginx.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 |
4 |

nginx Configuration

5 |
6 |
7 | 8 |
9 | 11 |
12 |
13 |
14 | 15 |
16 | 18 |
19 |
20 |
21 |
22 | 23 |
24 |
25 |
26 |
27 | {% if error %} 28 |
29 |
    30 | {% for message in error %} 31 |
  • {{ message }}
  • 32 | {% endfor %} 33 |
34 |
35 | {% else %} 36 |
37 | 38 |
39 | 57 |
58 |
59 |
60 |
61 | Copy 62 |
63 |
64 | 71 | {% endif %} 72 | {% endblock %} 73 | -------------------------------------------------------------------------------- /ballcone/templates/root.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 | {% for column in dashboard | batch(2) %} 4 |
5 | {% for service, count in column %} 6 |
7 |
8 |
9 |

{{ service }}

10 |

11 | {{ count }} unique visit{% if count != 1 %}s{% endif %} today 12 |

13 |
14 | 15 | 29 |
30 |
31 |
32 |
33 | {% endfor %} 34 |
35 | {% endfor %} 36 |
37 |

If you see nothing, start configuring your nginx.

38 |

DuckDB consumed {{ size }} byte{% if size != 1 %}s{% endif %} of disk space.

39 |
40 | {% endblock %} 41 | -------------------------------------------------------------------------------- /ballcone/templates/service.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 |
4 |

Overview

5 |
6 |
7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | {% for date, row in overview.items() | reverse %} 17 | 18 | 19 | 20 | 21 | 22 | {% endfor %} 23 | 24 |
DateVisitsUnique IPs
{{ date }}{{ row['visits'] }}{{ row['unique'] }}
25 |
26 |
27 | 28 |
29 |
30 |

Average Generation Time

31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | {% for element in time.elements | reverse %} 42 | 43 | 44 | 45 | 46 | {% endfor %} 47 | 48 |
DateTime (s)
{{ element.date }}{{ element.avg | round(4) }}
49 |
50 |
51 | 52 |
53 |
54 |

Most Popular Paths

55 |
56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | {% for date, subelements in paths.elements | groupby('date') | reverse %} 66 | {% for element in subelements %} 67 | 68 | {% if loop.first %} 69 | 70 | {% endif %} 71 | 72 | 73 | 74 | {% endfor %} 75 | {% endfor %} 76 | 77 |
DatePathVisits
{{ date }}{{ element.group }}{{ element.count }}
78 |
79 |

Most Popular Browsers

80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | {% for date, subelements in browsers.elements | groupby('date') | reverse %} 90 | {% for element in subelements %} 91 | 92 | {% if loop.first %} 93 | 94 | {% endif %} 95 | 96 | 97 | 98 | {% endfor %} 99 | {% endfor %} 100 | 101 |
DateBrowserVisits
{{ date }}{{ element.group }}{{ element.count }}
102 |
103 | 127 | {% endblock %} 128 | -------------------------------------------------------------------------------- /ballcone/templates/sql.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block content %} 3 |
4 |

SQL Console

5 |
6 |
7 | 8 |
9 | 11 |
12 |
13 |
14 |
15 | 16 |
17 |
18 |
19 |
20 | {% if result %} 21 |
22 |

Result

23 |
24 | 25 | 26 | {% for row in result %} 27 | 28 | {% for column in row %} 29 | 30 | {% endfor %} 31 | 32 | {% endfor %} 33 | 34 |
{{ column }}
35 |
36 |
37 | {% endif %} 38 | {% if error %} 39 |
40 |

Error

41 |
{{ error }}
42 |
43 | {% endif %} 44 | {% endblock %} 45 | -------------------------------------------------------------------------------- /ballcone/test_dao.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from collections import deque 3 | from datetime import datetime, date 4 | from ipaddress import ip_address 5 | from typing import cast 6 | 7 | import duckdb 8 | 9 | from ballcone.dao import DAO, Entry, smallint 10 | 11 | 12 | class TestDAO(unittest.TestCase): 13 | ENTRIES_20200101 = [ 14 | Entry(datetime=datetime(2020, 1, 1, 12), host='example.com', path='/', 15 | status=cast(smallint, 200), length=1024, generation_time=0.1, referer=None, 16 | ip=ip_address('192.168.1.1'), country_iso_code='UNKNOWN', 17 | platform_name='Mac OS', platform_version='X 10.15', 18 | browser_name='Firefox', browser_version='75.0', is_robot=False), 19 | 20 | Entry(datetime=datetime(2020, 1, 1, 12, 15), host='example.com', path='/robots.txt', 21 | status=cast(smallint, 404), length=0, generation_time=0.01, referer=None, 22 | ip=ip_address('192.168.1.1'), country_iso_code='UNKNOWN', 23 | platform_name='Linux', platform_version=None, 24 | browser_name=None, browser_version=None, is_robot=True) 25 | ] 26 | 27 | ENTRIES_20200102 = [ 28 | Entry(datetime=datetime(2020, 1, 2, 23, 59), host='example.com', path='/', 29 | status=cast(smallint, 200), length=256, generation_time=0.01, referer='https://github.com/dustalov', 30 | ip=ip_address('192.168.1.2'), country_iso_code='UNKNOWN', 31 | platform_name='iOS', platform_version='13.3.1', 32 | browser_name='Safari', browser_version='13.0.5', is_robot=False), 33 | 34 | Entry(datetime=datetime(2020, 1, 2, 23, 59, 59), host='example.com', path='/post', 35 | status=cast(smallint, 200), length=512, generation_time=1, referer=None, 36 | ip=ip_address('192.168.1.2'), country_iso_code='UNKNOWN', 37 | platform_name='iOS', platform_version='13.3.1', 38 | browser_name='Safari', browser_version='13.0.5', is_robot=False), 39 | ] 40 | 41 | ENTRIES = [*ENTRIES_20200101, *ENTRIES_20200102] 42 | 43 | def setUp(self) -> None: 44 | self.db = duckdb.connect(':memory:') 45 | self.dao = DAO(self.db) 46 | 47 | def tearDown(self) -> None: 48 | self.db.close() 49 | 50 | def test_database_size(self) -> None: 51 | self.assertEqual(0, self.dao.size()) 52 | 53 | def test_create_and_drop_table(self) -> None: 54 | table1 = __name__ + '_1' 55 | table2 = __name__ + '_2' 56 | 57 | self.assertEqual([], self.dao.tables()) 58 | self.assertFalse(self.dao.table_exists(table1)) 59 | 60 | self.dao.create_table(table1) 61 | self.dao.create_table(table2) 62 | 63 | self.assertTrue(self.dao.table_exists(table1)) 64 | self.assertEqual([table1, table2], self.dao.tables()) 65 | 66 | self.dao.drop_table(table1) 67 | 68 | self.assertEqual([table2], self.dao.tables()) 69 | self.assertFalse(self.dao.table_exists(table1)) 70 | 71 | def test_insert_into(self) -> None: 72 | table = __name__ 73 | 74 | self.seed(table, insert_entries=False) 75 | self.assertEqual(0, len(self.dao.select(table))) 76 | 77 | self.dao.insert_into(table, self.ENTRIES[0]) 78 | self.assertEqual(1, len(self.dao.select(table))) 79 | 80 | def test_batch_insert_into_and_select(self) -> None: 81 | table = __name__ 82 | 83 | self.seed(table, insert_entries=False) 84 | self.assertEqual(0, len(self.dao.select(table))) 85 | 86 | self.seed(table, create_table=False) 87 | entries = self.dao.select(table) 88 | 89 | self.assertEqual(self.ENTRIES, entries) 90 | 91 | def test_batch_insert_into_from_deque_and_select(self) -> None: 92 | table = __name__ 93 | 94 | self.seed(table, insert_entries=False) 95 | self.assertEqual(0, len(self.dao.select(table))) 96 | 97 | entries_deque = deque(self.ENTRIES) 98 | self.assertEqual(len(self.ENTRIES), len(entries_deque)) 99 | 100 | count = self.dao.batch_insert_into_from_deque(table, entries_deque) 101 | self.assertEqual(len(self.ENTRIES), count) 102 | self.assertEqual(0, len(entries_deque)) 103 | 104 | entries = self.dao.select(table) 105 | self.assertEqual(self.ENTRIES, entries) 106 | 107 | def test_select(self) -> None: 108 | table = __name__ 109 | 110 | self.seed(table) 111 | 112 | before = self.dao.select(table, stop=date(2019, 12, 31)) 113 | self.assertEqual([], before) 114 | 115 | before_exact = self.dao.select(table, stop=date(2020, 1, 1)) 116 | self.assertEqual(self.ENTRIES_20200101, before_exact) 117 | 118 | exact = self.dao.select(table, start=date(2020, 1, 1), stop=date(2020, 1, 1)) 119 | self.assertEqual(self.ENTRIES_20200101, exact) 120 | 121 | after_exact = self.dao.select(table, start=date(2020, 1, 1)) 122 | self.assertEqual(self.ENTRIES, after_exact) 123 | 124 | after = self.dao.select(table, start=date(2020, 1, 2)) 125 | self.assertEqual(self.ENTRIES_20200102, after) 126 | 127 | def test_select_average(self) -> None: 128 | table = __name__ 129 | 130 | self.seed(table) 131 | 132 | before = self.dao.select_average(table, 'generation_time', stop=date(2019, 12, 31)) 133 | self.assertEqual(table, before.table) 134 | self.assertEqual('generation_time', before.field) 135 | self.assertEqual(0, len(before.elements)) 136 | 137 | before_exact = self.dao.select_average(table, 'generation_time', stop=date(2020, 1, 1)) 138 | self.assertEqual(table, before_exact.table) 139 | self.assertEqual('generation_time', before_exact.field) 140 | self.assertEqual(1, len(before_exact.elements)) 141 | self.assertEqual(date(2020, 1, 1), before_exact.elements[0].date) 142 | self.assertEqual(0.055, before_exact.elements[0].avg) 143 | self.assertEqual(len(self.ENTRIES_20200101), before_exact.elements[0].count) 144 | 145 | exact = self.dao.select_average(table, 'generation_time', start=date(2020, 1, 1), stop=date(2020, 1, 1)) 146 | self.assertEqual(table, exact.table) 147 | self.assertEqual('generation_time', exact.field) 148 | self.assertEqual(1, len(exact.elements)) 149 | self.assertEqual(date(2020, 1, 1), exact.elements[0].date) 150 | self.assertEqual(0.055, exact.elements[0].avg) 151 | self.assertEqual(len(self.ENTRIES_20200101), exact.elements[0].count) 152 | 153 | after_exact = self.dao.select_average(table, 'generation_time', start=date(2020, 1, 1)) 154 | self.assertEqual(table, after_exact.table) 155 | self.assertEqual('generation_time', after_exact.field) 156 | self.assertEqual(2, len(after_exact.elements)) 157 | self.assertEqual(date(2020, 1, 1), after_exact.elements[0].date) 158 | self.assertEqual(0.055, after_exact.elements[0].avg) 159 | self.assertEqual(len(self.ENTRIES_20200101), after_exact.elements[0].count) 160 | self.assertEqual(date(2020, 1, 2), after_exact.elements[1].date) 161 | self.assertEqual(0.505, after_exact.elements[1].avg) 162 | self.assertEqual(len(self.ENTRIES_20200102), after_exact.elements[1].count) 163 | 164 | after = self.dao.select_average(table, 'generation_time', start=date(2020, 1, 2)) 165 | self.assertEqual(table, after.table) 166 | self.assertEqual('generation_time', after.field) 167 | self.assertEqual(1, len(after.elements)) 168 | self.assertEqual(date(2020, 1, 2), after.elements[0].date) 169 | self.assertEqual(0.505, after.elements[0].avg) 170 | self.assertEqual(len(self.ENTRIES_20200102), after.elements[0].count) 171 | 172 | def test_select_count(self) -> None: 173 | table = __name__ 174 | 175 | self.seed(table) 176 | 177 | before = self.dao.select_count(table, stop=date(2019, 12, 31)) 178 | self.assertEqual(table, before.table) 179 | self.assertIsNone(before.field) 180 | self.assertFalse(before.distinct) 181 | self.assertIsNone(before.ascending) 182 | self.assertIsNone(before.group) 183 | self.assertEqual(0, len(before.elements)) 184 | 185 | before_exact = self.dao.select_count(table, stop=date(2020, 1, 1)) 186 | self.assertEqual(table, before_exact.table) 187 | self.assertIsNone(before_exact.field) 188 | self.assertFalse(before_exact.distinct) 189 | self.assertIsNone(before_exact.ascending) 190 | self.assertIsNone(before_exact.group) 191 | self.assertEqual(1, len(before_exact.elements)) 192 | self.assertEqual(date(2020, 1, 1), before_exact.elements[0].date) 193 | self.assertEqual(2, before_exact.elements[0].count) 194 | 195 | exact = self.dao.select_count(table, start=date(2020, 1, 1), stop=date(2020, 1, 1)) 196 | self.assertEqual(table, exact.table) 197 | self.assertIsNone(exact.field) 198 | self.assertFalse(exact.distinct) 199 | self.assertIsNone(exact.ascending) 200 | self.assertIsNone(exact.group) 201 | self.assertEqual(1, len(exact.elements)) 202 | self.assertEqual(date(2020, 1, 1), exact.elements[0].date) 203 | self.assertEqual(2, before_exact.elements[0].count) 204 | 205 | after_exact = self.dao.select_count(table, start=date(2020, 1, 1)) 206 | self.assertEqual(table, after_exact.table) 207 | self.assertIsNone(after_exact.field) 208 | self.assertFalse(after_exact.distinct) 209 | self.assertIsNone(after_exact.ascending) 210 | self.assertIsNone(after_exact.group) 211 | self.assertEqual(2, len(after_exact.elements)) 212 | self.assertEqual(date(2020, 1, 1), after_exact.elements[0].date) 213 | self.assertEqual(2, after_exact.elements[0].count) 214 | self.assertEqual(date(2020, 1, 2), after_exact.elements[1].date) 215 | self.assertEqual(2, after_exact.elements[1].count) 216 | 217 | after = self.dao.select_count(table, start=date(2020, 1, 2)) 218 | self.assertEqual(table, after.table) 219 | self.assertIsNone(after.field) 220 | self.assertFalse(after.distinct) 221 | self.assertIsNone(after.ascending) 222 | self.assertIsNone(after.group) 223 | self.assertEqual(1, len(after.elements)) 224 | self.assertEqual(date(2020, 1, 2), after.elements[0].date) 225 | self.assertEqual(2, after.elements[0].count) 226 | 227 | def test_select_count_group(self) -> None: 228 | table = __name__ 229 | 230 | self.seed(table) 231 | 232 | before = self.dao.select_count_group(table, 'ip', 'platform_name', stop=date(2019, 12, 31)) 233 | self.assertEqual(table, before.table) 234 | self.assertEqual('ip', before.field) 235 | self.assertFalse(before.distinct) 236 | self.assertTrue(before.ascending) 237 | self.assertEqual('platform_name', before.group) 238 | self.assertEqual(0, len(before.elements)) 239 | 240 | before_exact = self.dao.select_count_group(table, 'ip', 'platform_name', stop=date(2020, 1, 1)) 241 | self.assertEqual(table, before_exact.table) 242 | self.assertEqual('ip', before_exact.field) 243 | self.assertFalse(before_exact.distinct) 244 | self.assertTrue(before_exact.ascending) 245 | self.assertEqual('platform_name', before_exact.group) 246 | self.assertEqual(2, len(before_exact.elements)) 247 | self.assertEqual(date(2020, 1, 1), before_exact.elements[0].date) 248 | self.assertEqual(1, before_exact.elements[0].count) 249 | self.assertEqual(date(2020, 1, 1), before_exact.elements[1].date) 250 | self.assertEqual(1, before_exact.elements[1].count) 251 | 252 | exact = self.dao.select_count_group(table, 'ip', 'platform_name', start=date(2020, 1, 1), stop=date(2020, 1, 1)) 253 | self.assertEqual(table, exact.table) 254 | self.assertEqual('ip', exact.field) 255 | self.assertFalse(exact.distinct) 256 | self.assertTrue(exact.ascending) 257 | self.assertEqual('platform_name', exact.group) 258 | self.assertEqual(2, len(exact.elements)) 259 | self.assertEqual(date(2020, 1, 1), exact.elements[0].date) 260 | self.assertEqual(1, exact.elements[0].count) 261 | self.assertEqual(date(2020, 1, 1), exact.elements[1].date) 262 | self.assertEqual(1, exact.elements[1].count) 263 | 264 | after_exact = self.dao.select_count_group(table, 'ip', 'platform_name', start=date(2020, 1, 1)) 265 | self.assertEqual(table, after_exact.table) 266 | self.assertEqual('ip', after_exact.field) 267 | self.assertFalse(after_exact.distinct) 268 | self.assertTrue(after_exact.ascending) 269 | self.assertEqual('platform_name', after_exact.group) 270 | self.assertEqual(3, len(after_exact.elements)) 271 | self.assertEqual(date(2020, 1, 1), after_exact.elements[0].date) 272 | self.assertEqual(1, after_exact.elements[0].count) 273 | self.assertEqual(date(2020, 1, 1), after_exact.elements[1].date) 274 | self.assertEqual(1, after_exact.elements[1].count) 275 | self.assertEqual(date(2020, 1, 2), after_exact.elements[2].date) 276 | self.assertEqual(2, after_exact.elements[2].count) 277 | 278 | after = self.dao.select_count_group(table, 'ip', 'platform_name', start=date(2020, 1, 2)) 279 | self.assertEqual(table, after.table) 280 | self.assertEqual('ip', after.field) 281 | self.assertFalse(after.distinct) 282 | self.assertTrue(after.ascending) 283 | self.assertEqual('platform_name', after.group) 284 | self.assertEqual(1, len(after.elements)) 285 | self.assertEqual(date(2020, 1, 2), after.elements[0].date) 286 | self.assertEqual(2, after.elements[0].count) 287 | 288 | def test_error(self) -> None: 289 | with self.assertRaises(duckdb.Error): 290 | self.dao.run('SELECT UNSELECT;') 291 | 292 | def seed(self, table: str, create_table: bool = True, insert_entries: bool = True) -> None: 293 | if create_table: 294 | self.assertFalse(self.dao.table_exists(table)) 295 | self.dao.create_table(table) 296 | self.assertTrue(self.dao.table_exists(table)) 297 | 298 | if insert_entries: 299 | count = self.dao.batch_insert_into(table, self.ENTRIES) 300 | 301 | self.assertEqual(len(self.ENTRIES), count) 302 | 303 | 304 | if __name__ == '__main__': 305 | unittest.main() 306 | -------------------------------------------------------------------------------- /ballcone/web_ballcone.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Dmitry Ustalov' 2 | 3 | from collections import OrderedDict 4 | from datetime import date, datetime, timezone 5 | from functools import lru_cache 6 | from ipaddress import ip_address 7 | from time import time 8 | from typing import Dict, Optional, List, Any, cast 9 | 10 | import aiohttp_jinja2 11 | import duckdb 12 | from aiohttp import web 13 | 14 | from ballcone import __version__ 15 | from ballcone.core import VALID_SERVICE, Ballcone 16 | 17 | 18 | class WebBallcone: 19 | def __init__(self, ballcone: Ballcone, days: int = 7) -> None: 20 | self.ballcone = ballcone 21 | self.days = days 22 | 23 | @aiohttp_jinja2.template('root.html') 24 | async def root(self, _: web.Request) -> Dict[str, Any]: 25 | today = datetime.now(timezone.utc).date() 26 | 27 | services = self.ballcone.dao.tables() 28 | 29 | dashboard = [] 30 | 31 | for service in services: 32 | unique = self.ballcone.dao.select_count(service, 'ip', start=today, stop=today) 33 | 34 | dashboard.append((service, unique.elements[0].count if unique.elements else 0)) 35 | 36 | dashboard.sort(key=lambda service_count: (-service_count[1], service_count[0])) 37 | 38 | return { 39 | 'version': __version__, 40 | 'size': self.database_size(get_ttl_hash()), 41 | 'current_page': 'root', 42 | 'services': services, 43 | 'dashboard': dashboard 44 | } 45 | 46 | async def services(self, request: web.Request) -> web.Response: 47 | raise web.HTTPFound(request.app.router['root'].url_for()) 48 | 49 | @aiohttp_jinja2.template('service.html') 50 | async def service(self, request: web.Request) -> Dict[str, Any]: 51 | services = self.ballcone.dao.tables() 52 | service = request.match_info.get('service', None) 53 | 54 | if not self.ballcone.check_service(service): 55 | raise web.HTTPNotFound(text=f'No such service: {service}') 56 | 57 | service = cast(str, service) 58 | 59 | start, stop = self.ballcone.days_before(days=self.days) 60 | 61 | queries = { 62 | 'visits': self.ballcone.dao.select_count(service, start=start, stop=stop), 63 | 'unique': self.ballcone.dao.select_count(service, 'ip', start=start, stop=stop) 64 | } 65 | 66 | overview: Dict[date, Dict[str, int]] = OrderedDict() 67 | 68 | for query, result in queries.items(): 69 | for element in result.elements: 70 | if element.date not in overview: 71 | overview[element.date] = {} 72 | 73 | overview[element.date][query] = element.count 74 | 75 | limit = self.ballcone.top_limit 76 | 77 | time = self.ballcone.dao.select_average(service, 'generation_time', start, stop) 78 | 79 | paths = self.ballcone.dao.select_count_group(service, 'ip', 'path', ascending=False, limit=limit, 80 | start=start, stop=stop) 81 | 82 | browsers = self.ballcone.dao.select_count_group(service, 'ip', 'browser_name', ascending=False, limit=limit, 83 | start=start, stop=stop) 84 | 85 | return { 86 | 'version': __version__, 87 | 'services': services, 88 | 'current_page': 'service', 89 | 'current_service': service, 90 | 'overview': overview, 91 | 'time': time, 92 | 'paths': paths, 93 | 'browsers': browsers 94 | } 95 | 96 | async def average_or_count(self, request: web.Request) -> web.Response: 97 | service, field = request.match_info['service'], request.match_info['field'] 98 | 99 | if not self.ballcone.check_service(service): 100 | raise web.HTTPNotFound(text=f'No such service: {service}') 101 | 102 | start, stop = self.ballcone.days_before(days=self.days) 103 | 104 | if request.match_info.route.name == 'average': 105 | average_response = self.ballcone.dao.select_average(service, field=field, start=start, stop=stop) 106 | return web.json_response(average_response, dumps=self.ballcone.json_dumps) 107 | else: 108 | count_response = self.ballcone.dao.select_count(service, field=field, start=start, stop=stop) 109 | return web.json_response(count_response, dumps=self.ballcone.json_dumps) 110 | 111 | async def count_group(self, request: web.Request) -> web.Response: 112 | service, group = request.match_info['service'], request.match_info['group'] 113 | 114 | if not self.ballcone.check_service(service): 115 | raise web.HTTPNotFound(text=f'No such service: {service}') 116 | 117 | field = request.query.get('distinct', None) 118 | distinct = bool(request.query.get('distinct', None)) 119 | ascending = bool(request.query.get('ascending', None)) 120 | limit = int(request.query['limit']) if 'limit' in request.query else None 121 | 122 | start, stop = self.ballcone.days_before(days=self.days) 123 | 124 | response = self.ballcone.dao.select_count_group(service, field=field, group=group, 125 | distinct=distinct, ascending=ascending, limit=limit, 126 | start=start, stop=stop) 127 | 128 | return web.json_response(response, dumps=self.ballcone.json_dumps) 129 | 130 | @aiohttp_jinja2.template('sql.html') 131 | async def sql(self, request: web.Request) -> Dict[str, Any]: 132 | data = await request.post() 133 | 134 | sql = str(data.get('sql', 'SELECT * FROM pragma_database_size();')) 135 | 136 | result: List[List[Any]] = [] 137 | error: Optional[str] = None 138 | 139 | if sql: 140 | try: 141 | result = self.ballcone.dao.run(sql) 142 | except duckdb.Error as e: 143 | error = str(e) 144 | 145 | services = self.ballcone.dao.tables() 146 | 147 | return { 148 | 'version': __version__, 149 | 'current_page': 'sql', 150 | 'title': 'SQL Console', 151 | 'services': services, 152 | 'sql': sql, 153 | 'result': result, 154 | 'error': error 155 | } 156 | 157 | @aiohttp_jinja2.template('nginx.html') 158 | async def nginx(self, request: web.Request) -> Dict[str, Any]: 159 | services = self.ballcone.dao.tables() 160 | 161 | service = request.query.get('service') 162 | 163 | if not service: 164 | service = 'example' 165 | 166 | ip = request.query.get('ip') 167 | 168 | if not ip: 169 | ip = '127.0.0.1' 170 | 171 | error = [] 172 | 173 | if not self.ballcone.check_service(service, should_exist=False): 174 | error.append(f'Invalid service name: {self.ballcone.json_dumps(service)}, ' 175 | f'must match /{VALID_SERVICE.pattern}/') 176 | 177 | try: 178 | ip_version = ip_address(ip).version 179 | except ValueError: 180 | error.append(f'Invalid Ballcone IP address: {self.ballcone.json_dumps(ip)}') 181 | ip_version = None 182 | 183 | return { 184 | 'version': __version__, 185 | 'current_page': 'nginx', 186 | 'title': 'nginx Configuration', 187 | 'services': services, 188 | 'service': service, 189 | 'ip': ip, 190 | 'ip_version': ip_version, 191 | 'error': error 192 | } 193 | 194 | @lru_cache() 195 | def database_size(self, ttl_hash: Optional[int] = None) -> Optional[int]: 196 | return self.ballcone.dao.size() 197 | 198 | 199 | def get_ttl_hash(seconds: int = 300) -> int: 200 | return round(time() / seconds) 201 | -------------------------------------------------------------------------------- /demo/nginx.conf: -------------------------------------------------------------------------------- 1 | log_format ballcone_json_example escape=json 2 | '{' 3 | '"service": "example", ' 4 | '"ip": "$remote_addr", ' 5 | '"host": "$host", ' 6 | '"path": "$request_uri", ' 7 | '"status": "$status", ' 8 | '"referrer": "$http_referer", ' 9 | '"user_agent": "$http_user_agent", ' 10 | '"length": $bytes_sent, ' 11 | '"generation_time_milli": $request_time, ' 12 | '"date": "$time_iso8601"' 13 | '}'; 14 | 15 | server { 16 | listen 80 default_server; 17 | listen [::]:80 default_server; 18 | 19 | access_log syslog:server=127.0.0.1:65140 ballcone_json_example; 20 | 21 | root /var/www/html; 22 | 23 | index index.html index.htm index.nginx-debian.html; 24 | 25 | server_name _; 26 | 27 | location / { 28 | try_files $uri $uri/ =404; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /demo/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:nginx] 5 | command=/usr/sbin/nginx -g 'daemon off;' 6 | 7 | [program:ballcone] 8 | command=/usr/local/bin/ballcone -m '/var/lib/ballcone' -wh '0.0.0.0' 9 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | services: 3 | demo: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile.demo 7 | ports: 8 | - '8888:80' 9 | - '8080:8080' 10 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "ballcone" 7 | authors = [{name = "Dmitry Ustalov"}] 8 | classifiers = [ 9 | "Development Status :: 3 - Alpha", 10 | "Intended Audience :: Developers", 11 | "Intended Audience :: Information Technology", 12 | "License :: OSI Approved :: MIT License", 13 | "Operating System :: OS Independent", 14 | "Programming Language :: Python :: 3", 15 | "Topic :: Database", 16 | "Topic :: Internet :: Log Analysis", 17 | "Topic :: Internet :: WWW/HTTP", 18 | "Typing :: Typed", 19 | ] 20 | license = {text = "MIT"} 21 | description = "Ballcone is a fast and lightweight server-side Web analytics solution." 22 | keywords = ["Web analytics", "log analysis", "columnar storage", "syslog", "nginx"] 23 | urls = {Homepage = "https://github.com/dustalov/ballcone"} 24 | requires-python = "~=3.9" 25 | dependencies = [ 26 | "httpagentparser", 27 | "maxminddb-geolite2", 28 | "aiohttp", 29 | "aiohttp-jinja2", 30 | "duckdb", 31 | "pandas", 32 | "PyPika", 33 | "simplejson", 34 | "python-dateutil", 35 | ] 36 | dynamic = ["version"] 37 | 38 | [project.optional-dependencies] 39 | dev = [ 40 | "mypy", 41 | "typed-ast", 42 | "typing_extensions", 43 | "types-simplejson", 44 | "types-python-dateutil", 45 | "types-Jinja2", 46 | "ruff", 47 | "pyinstaller", 48 | "twine", 49 | "build" 50 | ] 51 | 52 | [project.readme] 53 | file = "README.md" 54 | content-type = "text/markdown" 55 | 56 | [project.scripts] 57 | ballcone = "ballcone.__main__:main" 58 | 59 | [tool.setuptools] 60 | zip-safe = true 61 | 62 | [tool.setuptools.packages.find] 63 | include = ["ballcone*"] 64 | 65 | [tool.setuptools.package-data] 66 | "*" = ["*.html", "*.js"] 67 | 68 | [tool.setuptools.dynamic] 69 | version = {attr = "ballcone.__version__"} 70 | 71 | [tool.mypy] 72 | ignore_missing_imports = true 73 | allow_untyped_calls = true 74 | allow_untyped_decorators = true 75 | warn_unused_ignores = false 76 | strict = true 77 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import setuptools 4 | setuptools.setup() 5 | -------------------------------------------------------------------------------- /tools/duck-upgrade.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -eu 2 | 3 | if [ "$#" -ne 4 ] ; then 4 | echo "Usage: $(basename "$0") duckdb-cli-old duckdb-cli-new database-old database-new" 5 | exit 1 6 | fi 7 | 8 | OLD=$1 9 | NEW=$2 10 | OLDDB=$3 11 | NEWDB=$4 12 | 13 | $OLD --version >/dev/null 14 | $NEW --version >/dev/null 15 | 16 | TEMP=$(mktemp -d) || exit 2 17 | trap 'rm -rf "$TEMP"' EXIT 18 | 19 | set -x 20 | 21 | $OLD "$OLDDB" -c "EXPORT DATABASE '$TEMP' (FORMAT PARQUET);" 22 | $NEW "$NEWDB" -c "IMPORT DATABASE '$TEMP';" 23 | 24 | [ "$(uname -s)" = "Linux" ] && chmod --reference="$OLDDB" "$NEWDB" 25 | -------------------------------------------------------------------------------- /tools/monet2duck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import itertools 5 | from datetime import datetime 6 | from pathlib import Path 7 | from typing import Union, List, Optional, Any, cast 8 | 9 | import duckdb # pip install duckdb 10 | import monetdblite # pip install -e 'git+https://github.com/MonetDB/MonetDBLite-Python.git@v0.6.3#egg=monetdblite' 11 | 12 | try: 13 | from tqdm import trange # pip install tqdm 14 | except ModuleNotFoundError: 15 | def trange(*args, **kwargs): # type: ignore 16 | return range(*args) 17 | 18 | 19 | def execute(db: Union[monetdblite.Connection, duckdb.DuckDBPyConnection], sql: str, 20 | many: Optional[List[Any]] = None) -> List[Any]: 21 | cursor = db.cursor() 22 | 23 | if isinstance(db, duckdb.DuckDBPyConnection): 24 | cursor.begin() 25 | 26 | if many is None: 27 | cursor.execute(sql) 28 | result = cast(List[Any], cursor.fetchall()) 29 | else: 30 | cursor.executemany(sql, many) 31 | result = [] 32 | 33 | cursor.commit() 34 | cursor.close() 35 | 36 | return result 37 | 38 | 39 | SQL_MONETDB_TABLES = ''' 40 | SELECT t.name 41 | FROM sys.tables AS t 42 | JOIN sys.schemas AS s ON t.schema_id = s.id 43 | WHERE s.name = 'ballcone' 44 | ORDER BY t.name 45 | ''' 46 | 47 | SQL_MONETDB_COUNT = ''' 48 | SELECT COUNT(*) FROM ballcone.{table} 49 | ''' 50 | 51 | SQL_MONETDB_DATA = ''' 52 | SELECT * 53 | FROM ballcone.{table} 54 | ORDER BY datetime 55 | LIMIT {limit} OFFSET {offset} 56 | ''' 57 | 58 | SQL_DUCKDB_TABLE = ''' 59 | CREATE OR REPLACE TABLE {table}( 60 | datetime TIMESTAMP NOT NULL, 61 | host VARCHAR NOT NULL, 62 | path VARCHAR NOT NULL, 63 | status SMALLINT NOT NULL, 64 | length INTEGER NOT NULL, 65 | generation_time DOUBLE NOT NULL, 66 | referer VARCHAR, 67 | ip VARCHAR NOT NULL, 68 | country_iso_code VARCHAR, 69 | platform_name VARCHAR, 70 | platform_version VARCHAR, 71 | browser_name VARCHAR, 72 | browser_version VARCHAR, 73 | is_robot BOOLEAN 74 | ) 75 | ''' 76 | 77 | SQL_DUCKDB_INSERT = ''' 78 | INSERT INTO {table} VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 79 | ''' 80 | 81 | SQL_DUCKDB_COUNT = ''' 82 | SELECT COUNT(*) FROM {table} 83 | ''' 84 | 85 | 86 | def main() -> None: 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument('-b', '--batch', type=int, default=3072) 89 | parser.add_argument('monetdb', type=Path) 90 | parser.add_argument('duckdb', type=Path) 91 | args = parser.parse_args() 92 | 93 | db_monetdb = monetdblite.make_connection(str(args.monetdb.resolve())) 94 | db_duckdb = duckdb.connect(str(args.duckdb.resolve())) 95 | 96 | for table in itertools.chain.from_iterable(execute(db_monetdb, SQL_MONETDB_TABLES)): 97 | count = execute(db_monetdb, SQL_MONETDB_COUNT.format(table=table))[0][0] 98 | 99 | execute(db_duckdb, SQL_DUCKDB_TABLE.format(table=table)) 100 | 101 | for offset in trange(0, 1 + count, args.batch, desc=table): 102 | data = execute(db_monetdb, SQL_MONETDB_DATA.format(table=table, limit=args.batch, offset=offset)) 103 | 104 | for row in data: 105 | assert len(row) == 15, row 106 | 107 | # datetime 108 | row[0] = datetime.utcfromtimestamp(row[0]) 109 | 110 | # status 111 | row[4] = int(row[4]) 112 | 113 | # length 114 | row[5] = int(row[5]) 115 | 116 | # generation_time 117 | row[6] = float(row[6]) 118 | 119 | # is_robot 120 | row[14] = bool(row[14]) 121 | 122 | # date 123 | del row[1] 124 | 125 | execute(db_duckdb, SQL_DUCKDB_INSERT.format(table=table), data) 126 | 127 | assert count == execute(db_duckdb, SQL_DUCKDB_COUNT.format(table=table))[0][0], table 128 | 129 | db_duckdb.close() 130 | db_monetdb.close() 131 | 132 | 133 | if __name__ == '__main__': 134 | main() 135 | --------------------------------------------------------------------------------