├── datanommer.commands ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── utils.py │ ├── test_extract_users.py │ └── test_commands.py ├── news │ ├── .gitignore │ └── 1434.feature ├── README.rst ├── tox.ini ├── config.toml.example ├── datanommer │ └── commands │ │ ├── utils.py │ │ ├── extract_users.py │ │ └── __init__.py ├── pyproject.toml └── NEWS.rst ├── docs ├── datanommer.models.NEWS.rst ├── datanommer.commands.NEWS.rst ├── datanommer.consumer.NEWS.rst ├── user.rst ├── requirements.txt ├── index.rst ├── sysadmin.rst ├── conf.py └── contributing.rst ├── .s2i ├── environment ├── datanommer-upgrade-db.sh ├── run-datanommer.sh └── bin │ └── assemble ├── datanommer.consumer ├── tests │ ├── conftest.py │ ├── __init__.py │ └── test_consumer.py ├── news │ └── .gitignore ├── README.rst ├── tox.ini ├── NEWS.rst ├── datanommer │ └── consumer │ │ └── __init__.py └── pyproject.toml ├── datanommer.models ├── tests │ ├── conftest.py │ ├── test_jsonencodeddict.py │ └── test_model.py ├── news │ ├── .gitignore │ └── 1434.feature ├── README.rst ├── datanommer │ └── models │ │ ├── alembic │ │ ├── versions │ │ │ ├── 5db25abc63be_init.py │ │ │ ├── f6918385051f_messages_headers_index.py │ │ │ ├── 429e6f2cba6f_message_agent_name.py │ │ │ ├── 951c40020acc_unique.py │ │ │ └── f4fdb5442d05_add_view_recent_topics.py │ │ ├── script.py.mako │ │ └── env.py │ │ ├── testing │ │ └── __init__.py │ │ ├── view.py │ │ └── __init__.py ├── tox.ini ├── alembic.ini ├── NEWS.rst └── pyproject.toml ├── devel └── ansible │ ├── datanommer.yml │ ├── ansible.cfg │ └── roles │ ├── datanommer │ ├── files │ │ ├── datanommer.service │ │ ├── .bashrc │ │ └── alembic.ini │ └── tasks │ │ └── main.yml │ └── postgresql │ └── tasks │ └── main.yml ├── .github ├── renovate.json └── workflows │ ├── label-when-deployed.yaml │ └── tests.yml ├── tools ├── timescaledb │ ├── migrate.toml │ └── migrate-to-timescaledb.py ├── install-models-as-editable.sh ├── towncrier │ ├── run-towncrier.sh │ ├── get-authors.py │ └── template.rst.j2 ├── run-liccheck.sh └── check-dep-versions.py ├── .gitleaks.toml ├── .bandit.cfg ├── .gitignore ├── .pre-commit-config.yaml ├── runtests.sh ├── README.md ├── .readthedocs.yaml ├── pyproject.toml ├── Vagrantfile └── tox.ini /datanommer.commands/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/datanommer.models.NEWS.rst: -------------------------------------------------------------------------------- 1 | ../datanommer.models/NEWS.rst -------------------------------------------------------------------------------- /docs/datanommer.commands.NEWS.rst: -------------------------------------------------------------------------------- 1 | ../datanommer.commands/NEWS.rst -------------------------------------------------------------------------------- /docs/datanommer.consumer.NEWS.rst: -------------------------------------------------------------------------------- 1 | ../datanommer.consumer/NEWS.rst -------------------------------------------------------------------------------- /.s2i/environment: -------------------------------------------------------------------------------- 1 | UPGRADE_PIP_TO_LATEST=true 2 | APP_SCRIPT=.s2i/run-datanommer.sh 3 | -------------------------------------------------------------------------------- /datanommer.consumer/tests/conftest.py: -------------------------------------------------------------------------------- 1 | pytest_plugins = "datanommer.models.testing" 2 | -------------------------------------------------------------------------------- /datanommer.models/tests/conftest.py: -------------------------------------------------------------------------------- 1 | pytest_plugins = "datanommer.models.testing" 2 | -------------------------------------------------------------------------------- /docs/user.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | User Guide 3 | ========== 4 | 5 | Write the user guide here. 6 | -------------------------------------------------------------------------------- /datanommer.commands/news/.gitignore: -------------------------------------------------------------------------------- 1 | # Dummy file because git won't add empty directories 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /datanommer.consumer/news/.gitignore: -------------------------------------------------------------------------------- 1 | # Dummy file because git won't add empty directories 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /datanommer.models/news/.gitignore: -------------------------------------------------------------------------------- 1 | # Dummy file because git won't add empty directories 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | myst-parser 3 | sphinx-click 4 | ./datanommer.models 5 | ./datanommer.commands 6 | -------------------------------------------------------------------------------- /devel/ansible/datanommer.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: datanommer 3 | become: true 4 | become_method: sudo 5 | roles: 6 | - postgresql 7 | - datanommer 8 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["local>fedora-infra/shared:renovate-config"] 4 | } 5 | -------------------------------------------------------------------------------- /tools/timescaledb/migrate.toml: -------------------------------------------------------------------------------- 1 | source_url = "postgresql://datanommer:datanommer@localhost/datanommer" 2 | dest_url = "postgresql://datanommer:datanommer@localhost/messages" 3 | -------------------------------------------------------------------------------- /.s2i/datanommer-upgrade-db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | exec /opt/app-root/src/.local/venvs/datanommer/bin/alembic \ 4 | -c /etc/fedora-messaging/alembic.ini \ 5 | upgrade head 6 | -------------------------------------------------------------------------------- /datanommer.models/news/1434.feature: -------------------------------------------------------------------------------- 1 | Add materialized view `recent_topics` that provides efficient querying of recent message topics with aggregated message counts and sorting capabilities -------------------------------------------------------------------------------- /.gitleaks.toml: -------------------------------------------------------------------------------- 1 | [allowlist] 2 | paths = [ 3 | "docs/sysadmin.rst", 4 | "tools/timescaledb/migrate.toml", 5 | "devel/ansible/roles/datanommer/templates/fedora-messaging.toml.j2", 6 | ] 7 | -------------------------------------------------------------------------------- /datanommer.commands/news/1434.feature: -------------------------------------------------------------------------------- 1 | Add `datanommer-refresh-view` command to refresh the `recent_topics` materialized view, intended to be run periodically via cron job to keep topic view up to date -------------------------------------------------------------------------------- /.bandit.cfg: -------------------------------------------------------------------------------- 1 | [bandit] 2 | targets: datanommer.commands,datanommer.consumer,datanommer.models 3 | # Can't do this now because of https://github.com/PyCQA/bandit/issues/693 4 | #exclude: .git,.tox,*/tests/*,*/.tox/* 5 | -------------------------------------------------------------------------------- /devel/ansible/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | # Human-readable output 3 | callback_result_format = yaml 4 | # Defaults to /usr/bin/python3.12 and that's not the default python 5 | interpreter_python = /usr/bin/python3 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | ez_setup 4 | tw2* 5 | *.db* 6 | data 7 | build 8 | dist 9 | docs/_build 10 | docs/_source 11 | *.egg* 12 | README.pdf 13 | *.pid 14 | *.log 15 | *.swo 16 | .tox/ 17 | .vagrant 18 | 19 | # Coverage 20 | .coverage 21 | htmlcov 22 | coverage.xml 23 | -------------------------------------------------------------------------------- /.s2i/run-datanommer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # We install the app in a specific virtualenv: 6 | export PATH=/opt/app-root/src/.local/venvs/datanommer/bin:$PATH 7 | 8 | # Run the application 9 | fedora-messaging consume --callback datanommer.consumer:Nommer 10 | -------------------------------------------------------------------------------- /tools/install-models-as-editable.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Install datanommer.models in develop mode when run from a virtualenv such as 4 | # those tox creates. 5 | 6 | set -e 7 | 8 | CURDIR=`pwd` 9 | 10 | set -x 11 | 12 | cd ../datanommer.models 13 | poetry install --all-extras 14 | cd "$CURDIR" 15 | -------------------------------------------------------------------------------- /datanommer.models/README.rst: -------------------------------------------------------------------------------- 1 | datanommer.models 2 | ================= 3 | 4 | This package contains the SQLAlchemy data model for datanommer. 5 | 6 | Datanommer is a storage consumer for the Fedora Infrastructure Message Bus 7 | (fedmsg). It is comprised of a `fedmsg `_ consumer that 8 | stuffs every message into a sqlalchemy database. 9 | -------------------------------------------------------------------------------- /datanommer.consumer/README.rst: -------------------------------------------------------------------------------- 1 | datanommer.consumer 2 | =================== 3 | 4 | This package contains the fedmsg-hub consumer plugin for datanommer. 5 | 6 | Datanommer is a storage consumer for the Fedora Infrastructure Message Bus 7 | (fedmsg). It is comprised of a `fedmsg `_ consumer that 8 | stuffs every message into a sqlalchemy database. 9 | -------------------------------------------------------------------------------- /tools/towncrier/run-towncrier.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "Building release notes for all packages" 6 | for package in datanommer.{models,consumer,commands}; do 7 | echo "[$package] Building release notes..." 8 | pushd $package 9 | poetry install --all-extras 10 | poetry run towncrier build --yes $@ 11 | popd 12 | echo "[$package] done." 13 | done 14 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/versions/5db25abc63be_init.py: -------------------------------------------------------------------------------- 1 | """Initial revision 2 | 3 | Revision ID: 5db25abc63be 4 | Revises: None 5 | Create Date: 2021-09-15 16:15:37.188484 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = "5db25abc63be" 11 | down_revision = None 12 | 13 | 14 | def upgrade(): 15 | pass 16 | 17 | 18 | def downgrade(): 19 | pass 20 | -------------------------------------------------------------------------------- /devel/ansible/roles/datanommer/files/datanommer.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=datanommer 3 | 4 | [Service] 5 | User=vagrant 6 | Restart=on-failure 7 | RestartSec=5s 8 | WorkingDirectory=/home/vagrant/datanommer/datanommer.consumer 9 | ExecStart=/bin/sh -c 'source /srv/venv/bin/activate && poetry run fedora-messaging consume --callback datanommer.consumer:Nommer' 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/asottile/pyupgrade 3 | rev: v3.21.2 4 | hooks: 5 | - id: pyupgrade 6 | args: 7 | - --py311-plus 8 | 9 | - repo: https://github.com/psf/black 10 | rev: 25.12.0 11 | hooks: 12 | - id: black 13 | 14 | # Ruff 15 | - repo: https://github.com/astral-sh/ruff-pre-commit 16 | # Ruff version. 17 | rev: v0.14.9 18 | hooks: 19 | - id: ruff-check 20 | -------------------------------------------------------------------------------- /datanommer.commands/README.rst: -------------------------------------------------------------------------------- 1 | datanommer.commands 2 | =================== 3 | 4 | .. split here 5 | 6 | This package contains the console commands for datanommer, including:: 7 | 8 | - datanommer-create-db 9 | - datanommer-dump 10 | - datanommer-stats 11 | - datanommer-refresh-view 12 | 13 | Datanommer is a storage consumer for the Fedora Infrastructure Message Bus 14 | (fedmsg). It is comprised of a `fedmsg `_ consumer that 15 | stuffs every message into a sqlalchemy database. 16 | -------------------------------------------------------------------------------- /devel/ansible/roles/datanommer/files/.bashrc: -------------------------------------------------------------------------------- 1 | # .bashrc 2 | source /srv/venv/bin/activate 3 | 4 | alias datanommer-consumer-start="sudo systemctl start datanommer.service && echo 'datanommer consumer is running'" 5 | alias datanommer-consumer-logs="sudo journalctl -u datanommer.service -e" 6 | alias datanommer-consumer-restart="sudo systemctl restart datanommer.service && echo 'datanommer consumer is running'" 7 | alias datanommer-consumer-stop="sudo systemctl stop datanommer.service && echo 'datanommer service stopped'" 8 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = ${repr(up_revision)} 11 | down_revision = ${repr(down_revision)} 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | ${imports if imports else ""} 16 | 17 | def upgrade(): 18 | ${upgrades if upgrades else "pass"} 19 | 20 | 21 | def downgrade(): 22 | ${downgrades if downgrades else "pass"} 23 | -------------------------------------------------------------------------------- /datanommer.models/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{311,312},licenses 3 | skipsdist = True 4 | isolated_build = true 5 | 6 | [testenv] 7 | passenv = HOME 8 | sitepackages = false 9 | skip_install = true 10 | allowlist_externals = 11 | poetry 12 | env = 13 | SQLALCHEMY_WARN_20=1 14 | commands_pre = 15 | poetry install --all-extras 16 | commands = 17 | poetry run pytest -c ../pyproject.toml {posargs} 18 | 19 | [testenv:licenses] 20 | allowlist_externals = 21 | {[testenv]allowlist_externals} 22 | {toxinidir}/../tools/run-liccheck.sh 23 | commands = 24 | {toxinidir}/../tools/run-liccheck.sh 25 | -------------------------------------------------------------------------------- /runtests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | which tox &>/dev/null || { 6 | echo "You need to install tox" >&2 7 | exit 2 8 | } 9 | which pre-commit &>/dev/null || { 10 | echo "You need to install pre-commit" >&2 11 | exit 2 12 | } 13 | which krb5-config &> /dev/null || { 14 | echo "You need to install krb5-devel" >&2 15 | exit 2 16 | } 17 | 18 | echo "Running checks for all packages" 19 | pre-commit run --all-files 20 | 21 | echo "Running unit tests for all packages" 22 | for package in datanommer.{models,consumer,commands}; do 23 | echo "[$package] Testing..." 24 | pushd $package 25 | tox $@ 26 | popd 27 | echo "[$package] done." 28 | done 29 | -------------------------------------------------------------------------------- /datanommer.commands/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{311,312},licenses 3 | skipsdist = True 4 | isolated_build = true 5 | 6 | [testenv] 7 | passenv = HOME 8 | sitepackages = false 9 | skip_install = true 10 | allowlist_externals = 11 | poetry 12 | env = 13 | SQLALCHEMY_WARN_20=1 14 | commands_pre = 15 | poetry install --all-extras 16 | poetry run {toxinidir}/../tools/install-models-as-editable.sh 17 | commands = 18 | poetry run pytest -c ../pyproject.toml {posargs} 19 | 20 | [testenv:licenses] 21 | basepython = python3.11 22 | allowlist_externals = 23 | {[testenv]allowlist_externals} 24 | {toxinidir}/../tools/run-liccheck.sh 25 | commands = 26 | {toxinidir}/../tools/run-liccheck.sh 27 | -------------------------------------------------------------------------------- /datanommer.consumer/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{311,312},licenses 3 | skipsdist = True 4 | isolated_build = true 5 | 6 | [testenv] 7 | passenv = HOME 8 | sitepackages = false 9 | skip_install = true 10 | allowlist_externals = 11 | poetry 12 | env = 13 | SQLALCHEMY_WARN_20=1 14 | commands_pre = 15 | poetry install --all-extras 16 | poetry run {toxinidir}/../tools/install-models-as-editable.sh 17 | commands = 18 | poetry run pytest -c ../pyproject.toml {posargs} 19 | 20 | [testenv:licenses] 21 | basepython = python3.11 22 | allowlist_externals = 23 | {[testenv]allowlist_externals} 24 | {toxinidir}/../tools/run-liccheck.sh 25 | commands = 26 | {toxinidir}/../tools/run-liccheck.sh 27 | -------------------------------------------------------------------------------- /datanommer.commands/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import datanommer.commands 4 | 5 | 6 | pytest_plugins = "datanommer.models.testing" 7 | 8 | 9 | @pytest.fixture 10 | def mock_init(mocker): 11 | # This is actually not very useful because init() checks a private attribute on the 12 | # session object to avoid being called twice. It just prevents a warning log. 13 | mocker.patch("datanommer.commands.m.init") 14 | 15 | 16 | @pytest.fixture 17 | def mock_config(mocker): 18 | mocker.patch.dict( 19 | datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"], 20 | { 21 | "datanommer_sqlalchemy_url": "", 22 | "alembic_ini": None, 23 | }, 24 | ) 25 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/versions/f6918385051f_messages_headers_index.py: -------------------------------------------------------------------------------- 1 | """Messages.headers index 2 | 3 | Revision ID: f6918385051f 4 | Revises: 951c40020acc 5 | Create Date: 2024-05-07 16:05:05.344863 6 | 7 | """ 8 | 9 | from alembic import op 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "f6918385051f" 14 | down_revision = "951c40020acc" 15 | 16 | 17 | def upgrade(): 18 | op.create_index( 19 | "ix_messages_headers", 20 | "messages", 21 | ["headers"], 22 | unique=False, 23 | postgresql_using="gin", 24 | postgresql_ops={"headers": "jsonb_path_ops"}, 25 | ) 26 | 27 | 28 | def downgrade(): 29 | op.drop_index("ix_messages_headers", table_name="messages", postgresql_using="gin") 30 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/versions/429e6f2cba6f_message_agent_name.py: -------------------------------------------------------------------------------- 1 | """Message.username → Message.agent_name 2 | 3 | Revision ID: 429e6f2cba6f 4 | Revises: 951c40020acc 5 | Create Date: 2024-06-07 09:12:33.393757 6 | 7 | """ 8 | 9 | from alembic import op 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "429e6f2cba6f" 14 | down_revision = "f6918385051f" 15 | 16 | 17 | def upgrade(): 18 | op.alter_column("messages", "username", new_column_name="agent_name") 19 | op.create_index(op.f("ix_messages_agent_name"), "messages", ["agent_name"], unique=False) 20 | 21 | 22 | def downgrade(): 23 | op.drop_index(op.f("ix_messages_agent_name"), table_name="messages") 24 | op.alter_column("messages", "agent_name", new_column_name="username") 25 | -------------------------------------------------------------------------------- /tools/run-liccheck.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | STRATEGY_URL=https://raw.githubusercontent.com/fedora-infra/shared/main/liccheck-strategy.ini 4 | 5 | trap 'rm -f "$TMPFILE $STRATEGY_TMPFILE"' EXIT 6 | 7 | set -e 8 | set -x 9 | 10 | TMPFILE=$(mktemp -t requirements-XXXXXX.txt) 11 | STRATEGY_TMPFILE=$(mktemp -t liccheck-strategy-XXXXXX.ini) 12 | 13 | curl -o $STRATEGY_TMPFILE $STRATEGY_URL 14 | 15 | poetry export --with dev --without-hashes -f requirements.txt -o $TMPFILE 16 | 17 | # liccheck requires pkg_resources 18 | # https://github.com/dhatim/python-license-check/issues/114 19 | poetry run pip install setuptools 20 | 21 | # Use pip freeze instead of poetry when it fails 22 | #pip freeze --exclude-editable --isolated > $TMPFILE 23 | 24 | poetry run liccheck -r $TMPFILE -s $STRATEGY_TMPFILE 25 | -------------------------------------------------------------------------------- /datanommer.consumer/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | -------------------------------------------------------------------------------- /.github/workflows/label-when-deployed.yaml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Contributors to the Fedora Project 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | name: Apply labels when deployed 6 | 7 | on: 8 | push: 9 | branches: 10 | - staging 11 | - stable 12 | 13 | jobs: 14 | label: 15 | name: Apply labels 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Staging deployment 20 | uses: fedora-infra/label-when-in-branch@v1 21 | with: 22 | token: ${{ secrets.GITHUB_TOKEN }} 23 | branch: staging 24 | label: deployed:staging 25 | - name: Production deployment 26 | uses: fedora-infra/label-when-in-branch@v1 27 | with: 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | branch: stable 30 | label: deployed:prod 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Datanommer 2 | 3 | Datanommer is an application that is comprised of only a Fedora Messaging consumer that places every message into a Postgres / TimescaleDB database. 4 | 5 | It is comprised of 3 modules: 6 | 7 | * **datanommer.consumer**: the Fedora Messaging consumer that monitors the queue and places every message into the database 8 | * **datanommer.models**: the database models used by the consumer. These models are also used by [Datagrepper](https://github.com/fedora-infra/datagrepper), [FMN](https://github.com/fedora-infra/fedbadges), and [fedbadges](https://github.com/fedora-infra/fmn). Typically, to access the information stored in the database by datanommer, use the [Datagrepper](https://github.com/fedora-infra/datagrepper) JSON API. 9 | * **datanommer.commands**: a set of commandline tools for use by developers and sysadmins. 10 | 11 | Refer to the [online documentation](https://datanommer.readthedocs.io/) for details. 12 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.md 2 | :parser: myst_parser.sphinx_ 3 | 4 | 5 | .. User Guide 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: User Guide 10 | 11 | user 12 | _source/commands 13 | 14 | 15 | .. Sysadmin's Guide 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | :caption: Sysadmin's Guide 20 | 21 | sysadmin 22 | 23 | 24 | .. Contributor Guide 25 | 26 | .. toctree:: 27 | :maxdepth: 2 28 | :caption: Contributor Guide 29 | 30 | contributing 31 | 32 | 33 | .. Release Notes 34 | 35 | .. toctree:: 36 | :maxdepth: 1 37 | :caption: Release Notes 38 | 39 | datanommer.models 40 | datanommer.commands 41 | datanommer.consumer 42 | 43 | 44 | .. toctree:: 45 | :maxdepth: 2 46 | :caption: Module Documentation 47 | 48 | _source/models/datanommer.models 49 | _source/commands/datanommer.commands 50 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.12" 13 | # You can also specify other tool versions: 14 | # nodejs: "19" 15 | # rust: "1.64" 16 | # golang: "1.19" 17 | 18 | # Build documentation in the "docs/" directory with Sphinx 19 | sphinx: 20 | configuration: docs/conf.py 21 | 22 | # Optionally build your docs in additional formats such as PDF and ePub 23 | # formats: 24 | # - pdf 25 | # - epub 26 | 27 | # Optional but recommended, declare the Python requirements required 28 | # to build your documentation 29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 30 | python: 31 | install: 32 | - requirements: docs/requirements.txt 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | 4 | [tool.ruff] 5 | line-length = 100 6 | 7 | [tool.ruff.lint] 8 | select = ["E", "F", "W", "I", "UP", "S", "B", "RUF"] 9 | ignore = ["RUF012"] 10 | 11 | [tool.ruff.lint.isort] 12 | lines-after-imports = 2 13 | order-by-type = false 14 | known-first-party = ["datanommer"] 15 | 16 | [tool.ruff.lint.per-file-ignores] 17 | "*/tests/*" = ["S101", "E501"] 18 | "tools/towncrier/get-authors.py" = ["S602", "S603", "S607"] 19 | 20 | [tool.pytest.ini_options] 21 | addopts = "-v --cov-config ../pyproject.toml --cov --cov-report term-missing --cov-report html --cov-report xml" 22 | 23 | [tool.coverage.run] 24 | branch = true 25 | source = ["datanommer"] 26 | 27 | [tool.coverage.paths] 28 | source = ["datanommer"] 29 | 30 | [tool.coverage.report] 31 | fail_under = 98 32 | exclude_lines = [ 33 | "pragma: no cover", 34 | "if __name__ == .__main__.:", 35 | ] 36 | omit = [ 37 | "datanommer/models/testing/*", 38 | ] 39 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/versions/951c40020acc_unique.py: -------------------------------------------------------------------------------- 1 | """Add a unique index on packages and users 2 | 3 | Revision ID: 951c40020acc 4 | Revises: 5db25abc63be 5 | Create Date: 2021-09-22 15:38:57.339646 6 | """ 7 | 8 | from alembic import op 9 | 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = "951c40020acc" 13 | down_revision = "5db25abc63be" 14 | 15 | 16 | def upgrade(): 17 | op.drop_index("ix_packages_name", table_name="packages") 18 | op.create_index(op.f("ix_packages_name"), "packages", ["name"], unique=True) 19 | op.drop_index("ix_users_name", table_name="users") 20 | op.create_index(op.f("ix_users_name"), "users", ["name"], unique=True) 21 | 22 | 23 | def downgrade(): 24 | op.drop_index(op.f("ix_users_name"), table_name="users") 25 | op.create_index("ix_users_name", "users", ["name"], unique=False) 26 | op.drop_index(op.f("ix_packages_name"), table_name="packages") 27 | op.create_index("ix_packages_name", "packages", ["name"], unique=False) 28 | -------------------------------------------------------------------------------- /tools/check-dep-versions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from collections import defaultdict 5 | 6 | import toml 7 | 8 | 9 | SUBPROJECTS = ["commands", "consumer", "models"] 10 | SUPPORTED_LOCK_VERSION = "1.1" 11 | 12 | 13 | deps_by_package = defaultdict(dict) 14 | 15 | for project in SUBPROJECTS: 16 | lock_path = os.path.join(f"datanommer.{project}", "poetry.lock") 17 | with open(lock_path) as f: 18 | lockfile = toml.load(f) 19 | lock_version = lockfile["metadata"]["lock-version"] 20 | if lock_version != SUPPORTED_LOCK_VERSION: 21 | print(f"Unsupported lockfile version in {lock_path}: {lock_version}. Skipping.") 22 | continue 23 | deps = {} 24 | for dep in lockfile["package"]: 25 | deps_by_package[dep["name"]][project] = dep["version"] 26 | 27 | 28 | for name, deps in deps_by_package.items(): 29 | if len(set(deps.values())) == 1: 30 | continue 31 | dep_list = [f"{project}:{version}" for project, version in deps.items()] 32 | print(f"Incoherent dep for {name}: {' '.join(dep_list)}") 33 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant.configure(2) do |config| 5 | config.hostmanager.enabled = true 6 | config.hostmanager.manage_host = true 7 | config.hostmanager.manage_guest = true 8 | 9 | config.vm.define "datanommer" do |datanommer| 10 | datanommer.vm.box_url = "https://download.fedoraproject.org/pub/fedora/linux/releases/38/Cloud/x86_64/images/Fedora-Cloud-Base-Vagrant-38-1.6.x86_64.vagrant-libvirt.box" 11 | datanommer.vm.box = "f38-cloud-libvirt" 12 | datanommer.vm.hostname = "datanommer.test" 13 | 14 | datanommer.vm.synced_folder '.', '/vagrant', disabled: true 15 | datanommer.vm.synced_folder ".", "/home/vagrant/datanommer", type: "sshfs" 16 | 17 | datanommer.vm.provider :libvirt do |libvirt| 18 | libvirt.cpus = 2 19 | libvirt.memory = 2048 20 | end 21 | 22 | datanommer.vm.provision "ansible" do |ansible| 23 | ansible.playbook = "devel/ansible/datanommer.yml" 24 | ansible.config_file = "devel/ansible/ansible.cfg" 25 | ansible.verbose = true 26 | end 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /datanommer.models/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = datanommer.models:alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # set to 'true' to run the environment during 11 | # the 'revision' command, regardless of autogenerate 12 | # revision_environment = false 13 | 14 | sqlalchemy.url = postgresql://datanommer:datanommer@localhost/messages 15 | 16 | # Logging configuration 17 | [loggers] 18 | keys = root,sqlalchemy,alembic 19 | 20 | [handlers] 21 | keys = console 22 | 23 | [formatters] 24 | keys = generic 25 | 26 | [logger_root] 27 | level = WARN 28 | handlers = console 29 | qualname = 30 | 31 | [logger_sqlalchemy] 32 | level = WARN 33 | handlers = 34 | qualname = sqlalchemy.engine 35 | 36 | [logger_alembic] 37 | level = INFO 38 | handlers = 39 | qualname = alembic 40 | 41 | [handler_console] 42 | class = StreamHandler 43 | args = (sys.stderr,) 44 | level = NOTSET 45 | formatter = generic 46 | 47 | [formatter_generic] 48 | format = %(levelname)-5.5s [%(name)s] %(message)s 49 | datefmt = %H:%M:%S 50 | -------------------------------------------------------------------------------- /devel/ansible/roles/datanommer/files/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = datanommer.models:alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # set to 'true' to run the environment during 11 | # the 'revision' command, regardless of autogenerate 12 | # revision_environment = false 13 | 14 | sqlalchemy.url = postgresql://datanommer:datanommer@localhost/messages 15 | 16 | # Logging configuration 17 | [loggers] 18 | keys = root,sqlalchemy,alembic 19 | 20 | [handlers] 21 | keys = console 22 | 23 | [formatters] 24 | keys = generic 25 | 26 | [logger_root] 27 | level = WARN 28 | handlers = console 29 | qualname = 30 | 31 | [logger_sqlalchemy] 32 | level = WARN 33 | handlers = 34 | qualname = sqlalchemy.engine 35 | 36 | [logger_alembic] 37 | level = INFO 38 | handlers = 39 | qualname = alembic 40 | 41 | [handler_console] 42 | class = StreamHandler 43 | args = (sys.stderr,) 44 | level = NOTSET 45 | formatter = generic 46 | 47 | [formatter_generic] 48 | format = %(levelname)-5.5s [%(name)s] %(message)s 49 | datefmt = %H:%M:%S 50 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/versions/f4fdb5442d05_add_view_recent_topics.py: -------------------------------------------------------------------------------- 1 | """Add view recent_topics 2 | 3 | Revision ID: f4fdb5442d05 4 | Revises: 429e6f2cba6f 5 | Create Date: 2025-05-30 13:52:11.648140 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = "f4fdb5442d05" 11 | down_revision = "429e6f2cba6f" 12 | 13 | from alembic import op # noqa: E402 14 | 15 | from datanommer.models.view import CreateMaterializedView, get_selectable # noqa: E402 16 | 17 | 18 | def upgrade(): 19 | 20 | # Create the materialized view using the factored selectable 21 | selectable = get_selectable() 22 | op.execute(CreateMaterializedView("recent_topics", selectable)) 23 | 24 | # Create unique index on topic 25 | op.create_index( 26 | "uq_recent_topics_topic", "recent_topics", ["topic"], unique=True, if_not_exists=True 27 | ) 28 | 29 | # Create index on message_count for sorting 30 | op.create_index( 31 | "ix_recent_topics_message_count", "recent_topics", ["message_count"], if_not_exists=True 32 | ) 33 | 34 | 35 | def downgrade(): 36 | op.execute("DROP MATERIALIZED VIEW IF EXISTS recent_topics") 37 | -------------------------------------------------------------------------------- /.s2i/bin/assemble: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # The assemble script builds the application artifacts from a source and 4 | # places them into appropriate directories inside the image. 5 | 6 | # Execute the default S2I script 7 | . /usr/libexec/s2i/assemble 8 | 9 | set -e 10 | 11 | # We need to run micropipenv manually because the projects are in 12 | # subdirectories. 13 | 14 | install_tool "micropipenv" "[toml]" 15 | 16 | # Poetry 1.5.0 breaks micropipenv, generate the requirements instead. 17 | pip install poetry poetry-plugin-export 18 | 19 | 20 | for subpackage in datanommer.models datanommer.commands datanommer.consumer; do 21 | pushd $subpackage 22 | echo "---> Generating requirements in ${subpackage}..." 23 | if [ "$subpackage" == "datanommer.models" ]; then 24 | poetry export -o requirements.txt --without-hashes --extras schemas 25 | else 26 | poetry export -o requirements.txt --without-hashes 27 | fi 28 | echo "---> Installing dependencies in ${subpackage}..." 29 | pip install -r requirements.txt 30 | # Now install the root project too. 31 | pip install . --no-deps 32 | popd 33 | done 34 | 35 | # set permissions for any installed artifacts 36 | fix-permissions /opt/app-root -P 37 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = checks,{models,consumer,commands}-{py311,py312,licenses},docs 3 | skipsdist = True 4 | isolated_build = true 5 | 6 | [testenv] 7 | passenv = HOME 8 | sitepackages = false 9 | skip_install = true 10 | allowlist_externals = 11 | poetry 12 | cd 13 | {toxinidir}/tools/run-liccheck.sh 14 | env = 15 | SQLALCHEMY_WARN_20=1 16 | change_dir = 17 | models: datanommer.models 18 | consumer: datanommer.consumer 19 | commands: datanommer.commands 20 | commands_pre = 21 | poetry install --all-extras 22 | commands = 23 | py: poetry run pytest {posargs} 24 | licenses: {toxinidir}/tools/run-liccheck.sh 25 | 26 | [testenv:checks] 27 | allowlist_externals = 28 | {[testenv]allowlist_externals} 29 | pre-commit 30 | git 31 | commands_pre = 32 | commands = pre-commit run --all-files 33 | 34 | [testenv:docs] 35 | commands_pre = 36 | poetry -C datanommer.models install --all-extras 37 | poetry -C datanommer.commands install --all-extras 38 | allowlist_externals = 39 | {[testenv]allowlist_externals} 40 | mkdir 41 | rm 42 | deps = 43 | sphinx 44 | myst-parser 45 | sphinx-click 46 | commands= 47 | mkdir -p docs/_static 48 | rm -rf docs/_build 49 | rm -rf docs/_source 50 | sphinx-build -b html -d {envtmpdir}/doctrees docs docs/_build/html 51 | -------------------------------------------------------------------------------- /datanommer.commands/tests/utils.py: -------------------------------------------------------------------------------- 1 | from bodhi.messages.schemas.update import UpdateCommentV1 2 | from fedora_messaging import message as fedora_message 3 | 4 | 5 | def generate_message( 6 | topic="org.fedoraproject.test.a.nice.message", 7 | body=None, 8 | headers=None, 9 | ): 10 | body = body or {"encouragement": "You're doing great!"} 11 | return fedora_message.Message(topic=topic, body=body, headers=headers) 12 | 13 | 14 | def generate_bodhi_update_complete_message(): 15 | msg = UpdateCommentV1( 16 | body={ 17 | "comment": { 18 | "karma": -1, 19 | "text": "text", 20 | "timestamp": "2019-03-18 16:54:48", 21 | "update": { 22 | "alias": "FEDORA-EPEL-2021-f2d195dada", 23 | "builds": [ 24 | {"nvr": "abrt-addon-python3-2.1.11-50.el7"}, 25 | {"nvr": "kernel-10.4.0-2.el7"}, 26 | ], 27 | "status": "pending", 28 | "release": {"name": "F35"}, 29 | "request": "testing", 30 | "user": {"name": "ryanlerch"}, 31 | }, 32 | "user": {"name": "dudemcpants"}, 33 | } 34 | } 35 | ) 36 | msg.topic = f"org.fedoraproject.stg.{msg.topic}" 37 | return msg 38 | -------------------------------------------------------------------------------- /datanommer.consumer/NEWS.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | For ``datanommer.consumer`` 6 | 7 | .. towncrier release notes start 8 | 9 | v1.4.4 10 | ====== 11 | 12 | Released on 2025-06-19. 13 | 14 | No significant changes. 15 | 16 | v1.4.3 17 | ====== 18 | 19 | Released on 2025-06-10. 20 | 21 | No significant changes. 22 | 23 | v1.4.2 24 | ====== 25 | 26 | Released on 2025-06-07. 27 | 28 | No significant changes. 29 | 30 | v1.4.1 31 | ====== 32 | 33 | Released on 2025-05-30. 34 | 35 | Dependency Changes 36 | ^^^^^^^^^^^^^^^^^^ 37 | 38 | * Add support for Python 3.9 (for RHEL9) (`#8d63e86 `_) 39 | 40 | 41 | v1.4.0 42 | ====== 43 | 44 | Released on 2024-06-12. 45 | 46 | No significant changes. 47 | 48 | 49 | v1.2.0 50 | ====== 51 | 52 | Released on 2024-04-15. This is a minor release. 53 | 54 | Development Improvements 55 | ^^^^^^^^^^^^^^^^^^^^^^^^ 56 | 57 | * Use Ruff instead of flake8 and isort and bandit (`4f7ffaa 58 | `_). 59 | 60 | 61 | v1.1.0 62 | ====== 63 | 64 | Released on 2023-09-22. 65 | This is a feature release that updates Python support. 66 | 67 | Dependency Changes 68 | ^^^^^^^^^^^^^^^^^^ 69 | 70 | * Drop support for python 3.7, add support for python 3.10 (`PR#890 71 | `_). 72 | 73 | 74 | v1.0.3 75 | ====== 76 | 77 | Released on 2022-03-18. This is a minor release: 78 | 79 | - support fedora-messaging 3.0+ 80 | - update dependencies 81 | -------------------------------------------------------------------------------- /devel/ansible/roles/postgresql/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install RPM packages 3 | dnf: 4 | name: 5 | - python3-psycopg2 6 | - postgresql-server 7 | - timescaledb 8 | - acl 9 | state: present 10 | 11 | - name: Setup the postgresql DB 12 | command: postgresql-setup --initdb 13 | args: 14 | creates: /var/lib/pgsql/data/postgresql.conf 15 | 16 | - name: Add timescaledb to postgresql config 17 | lineinfile: 18 | path: /var/lib/pgsql/data/postgresql.conf 19 | regexp: ^shared_preload_libraries = 20 | line: "shared_preload_libraries = 'timescaledb'" 21 | 22 | - name: Configure access to postgresql 23 | postgresql_pg_hba: 24 | dest: /var/lib/pgsql/data/pg_hba.conf 25 | contype: host 26 | databases: all 27 | users: all 28 | address: "{{item}}" 29 | method: md5 30 | loop: 31 | - 127.0.0.1/32 32 | - ::1/128 33 | 34 | - name: Start postgresql 35 | service: 36 | name: postgresql 37 | enabled: yes 38 | state: started 39 | 40 | 41 | - block: 42 | - name: Create the user 43 | postgresql_user: 44 | name: datanommer 45 | password: datanommer 46 | 47 | - name: Create the database 48 | postgresql_db: 49 | name: messages 50 | owner: datanommer 51 | 52 | - name: Activate timescaledb 53 | postgresql_ext: 54 | name: timescaledb 55 | db: messages 56 | become: yes 57 | become_user: postgres 58 | become_method: sudo 59 | 60 | 61 | - name: Make connection easier 62 | copy: 63 | dest: /home/vagrant/.pgpass 64 | content: "*:*:messages:datanommer:datanommer\n" 65 | owner: vagrant 66 | group: vagrant 67 | mode: 0600 68 | -------------------------------------------------------------------------------- /datanommer.consumer/datanommer/consumer/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | import importlib.metadata 17 | import logging 18 | 19 | from fedora_messaging import config 20 | 21 | import datanommer.models as m 22 | 23 | 24 | __version__ = importlib.metadata.version("datanommer-consumer") 25 | 26 | 27 | def get_datanommer_sqlalchemy_url(): 28 | try: 29 | return config.conf["consumer_config"]["datanommer_sqlalchemy_url"] 30 | except KeyError as e: 31 | raise ValueError( 32 | "datanommer_sqlalchemy_url not defined in the fedora-messaging config" 33 | ) from e 34 | 35 | 36 | log = logging.getLogger("datanommer-consumer") 37 | 38 | 39 | class Nommer: 40 | def __init__(self): 41 | m.init(get_datanommer_sqlalchemy_url()) 42 | 43 | def __call__(self, message): 44 | log.info("Nomming %r", message) 45 | try: 46 | m.add(message) 47 | except Exception: 48 | m.session.rollback() 49 | raise 50 | -------------------------------------------------------------------------------- /docs/sysadmin.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Sysadmin Guide 3 | ============== 4 | 5 | Write the sysadmin guide here (installation, maintenance, known issues, etc). 6 | 7 | Migration with Alembic 8 | ---------------------- 9 | 10 | When the database models are changed, we use alembic to retain the data. Alembic is located in the models:: 11 | 12 | (datanommer)$ cd datanommer.models 13 | 14 | To check the current models version:: 15 | 16 | (datanommer)$ alembic current 17 | 18 | If your models are up to date, you should see:: 19 | 20 | INFO [alembic.migration] Context impl SQLiteImpl. 21 | INFO [alembic.migration] Will assume transactional DDL. 22 | Current revision for postgresql://datanommer:datanommer@localhost/messages: 198447250956 -> ae2801c4cd9 (head), add category column 23 | 24 | If your result is:: 25 | 26 | INFO [alembic.migration] Context impl SQLiteImpl. 27 | INFO [alembic.migration] Will assume transactional DDL. 28 | Current revision for postgresql://datanommer:datanommer@localhost/messages: None 29 | 30 | then migrate to the most recent version with:: 31 | 32 | (datanommer)$ alembic upgrade head 33 | 34 | You should see:: 35 | 36 | INFO [alembic.migration] Context impl SQLiteImpl. 37 | INFO [alembic.migration] Will assume transactional DDL. 38 | INFO [alembic.migration] Running upgrade None -> 198447250956 39 | INFO [alembic.migration] Running upgrade 198447250956 -> ae2801c4cd9 40 | 41 | Refreshing materialized view 42 | ---------------------------- 43 | 44 | The ``recent_topics`` materialized view needs to be refreshed periodically to keep the data current. 45 | 46 | To refresh manually:: 47 | 48 | (datanommer)$ datanommer-refresh-view 49 | 50 | To set up automatic refresh (say every 5 minutes) via cron job, add this to your crontab:: 51 | 52 | */5 * * * * datanommer-refresh-view 53 | -------------------------------------------------------------------------------- /tools/towncrier/get-authors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | This script browses through git commit history (starting at latest tag), collects all authors of 5 | commits and creates fragment for `towncrier`_ tool. 6 | It's meant to be run during the release process, before generating the release notes. 7 | Example:: 8 | $ python get_authors.py 9 | .. _towncrier: https://github.com/hawkowl/towncrier/ 10 | Authors: 11 | Aurelien Bompard 12 | Michal Konecny 13 | """ 14 | 15 | import os 16 | from argparse import ArgumentParser 17 | from subprocess import check_output 18 | 19 | 20 | EXCLUDE = ["Weblate (bot)", "dependabot[bot]", "renovate[bot]"] 21 | 22 | last_tag = check_output("git tag | sort -n | tail -n 1", shell=True, text=True).strip() 23 | 24 | args_parser = ArgumentParser() 25 | args_parser.add_argument( 26 | "until", 27 | nargs="?", 28 | default="HEAD", 29 | help="Consider all commits until this one (default: %(default)s).", 30 | ) 31 | args_parser.add_argument( 32 | "since", 33 | nargs="?", 34 | default=last_tag, 35 | help="Consider all commits since this one (default: %(default)s).", 36 | ) 37 | args = args_parser.parse_args() 38 | 39 | authors = {} 40 | log_range = args.since + ".." + args.until 41 | output = check_output(["git", "log", log_range, "--format=%ae\t%an"], text=True) 42 | for line in output.splitlines(): 43 | email, fullname = line.split("\t") 44 | email = email.split("@")[0].replace(".", "") 45 | if email in authors: 46 | continue 47 | authors[email] = fullname 48 | 49 | for nick, fullname in authors.items(): 50 | if fullname in EXCLUDE or fullname.endswith("[bot]"): 51 | continue 52 | filename = f"{nick}.author" 53 | if os.path.exists(filename): 54 | continue 55 | print(f"Adding author {fullname} ({nick})") 56 | with open(filename, "w") as f: 57 | f.write(fullname) 58 | f.write("\n") 59 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/testing/__init__.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlalchemy as sa 3 | from pytest_postgresql import factories 4 | from pytest_postgresql.janitor import DatabaseJanitor 5 | from sqlalchemy.orm import scoped_session 6 | 7 | import datanommer.models as dm 8 | 9 | 10 | postgresql_proc = factories.postgresql_proc( 11 | postgres_options="-c shared_preload_libraries=timescaledb -c timescaledb.telemetry_level=off", 12 | ) 13 | 14 | 15 | @pytest.fixture(scope="session") 16 | def datanommer_db_url(postgresql_proc): 17 | return ( 18 | f"postgresql+psycopg2://{postgresql_proc.user}:@" 19 | f"{postgresql_proc.host}:{postgresql_proc.port}" 20 | f"/{postgresql_proc.dbname}" 21 | ) 22 | 23 | 24 | @pytest.fixture(scope="session") 25 | def datanommer_db_engine(postgresql_proc, datanommer_db_url): 26 | with DatabaseJanitor( 27 | user=postgresql_proc.user, 28 | host=postgresql_proc.host, 29 | port=postgresql_proc.port, 30 | dbname=postgresql_proc.dbname, 31 | # Don't use a template database 32 | # template_dbname=postgresql_proc.template_dbname, 33 | version=postgresql_proc.version, 34 | ): 35 | engine = sa.create_engine(datanommer_db_url, future=True) 36 | # Renew the global object, dm.init checks a custom attribute 37 | dm.session = scoped_session(dm.maker) 38 | dm.init(engine=engine, create=True) 39 | yield engine 40 | engine.dispose() 41 | 42 | 43 | @pytest.fixture() 44 | def datanommer_db(datanommer_db_url, datanommer_db_engine): 45 | for table in reversed(dm.DeclarativeBase.metadata.sorted_tables): 46 | dm.session.execute(table.delete()) 47 | dm.session.commit() 48 | yield datanommer_db_engine 49 | 50 | 51 | @pytest.fixture() 52 | def datanommer_models(datanommer_db): 53 | dm.User.clear_cache() 54 | dm.Package.clear_cache() 55 | yield dm.session 56 | dm.session.rollback() 57 | -------------------------------------------------------------------------------- /datanommer.commands/config.toml.example: -------------------------------------------------------------------------------- 1 | # A sample fedora-messaging configuration for datanommer. This file is in the TOML format. 2 | 3 | amqp_url = "amqps://datanommer:@rabbitmq.fedoraproject.org/%2Fpublic_pubsub" 4 | callback = "datanommer.consumer:Nommer" 5 | passive_declares = true 6 | 7 | [tls] 8 | ca_cert = "/etc/fedora-messaging/cacert.pem" 9 | keyfile = "/etc/fedora-messaging/fedora-key.pem" 10 | certfile = "/etc/fedora-messaging/fedora-cert.pem" 11 | 12 | [client_properties] 13 | app = "datanommer" 14 | app_url = "https://github.com/fedora-infra/datanommer" 15 | 16 | [queues.datanommer] 17 | durable = false 18 | auto_delete = true 19 | exclusive = true 20 | arguments = {} 21 | 22 | [[bindings]] 23 | queue = "datanommer" 24 | exchange = "amq.topic" 25 | routing_keys = ["#"] 26 | 27 | [consumer_config] 28 | datanommer_sqlalchemy_url = 'postgresql://datanommer:datanommer@localhost/datanommer' 29 | alembic_ini = "../datanommer.models/alembic.ini" 30 | 31 | [log_config] 32 | version = 1 33 | disable_existing_loggers = true 34 | 35 | [log_config.formatters.simple] 36 | format = "[%(levelname)s %(name)s] %(message)s" 37 | 38 | [log_config.handlers.console] 39 | class = "logging.StreamHandler" 40 | formatter = "simple" 41 | stream = "ext://sys.stdout" 42 | 43 | [log_config.loggers.fedora_messaging] 44 | level = "INFO" 45 | propagate = false 46 | handlers = ["console"] 47 | 48 | # Twisted is the asynchronous framework that manages the TCP/TLS connection, as well 49 | # as the consumer event loop. When debugging you may want to lower this log level. 50 | [log_config.loggers.twisted] 51 | level = "INFO" 52 | propagate = false 53 | handlers = ["console"] 54 | 55 | # Pika is the underlying AMQP client library. When debugging you may want to 56 | # lower this log level. 57 | [log_config.loggers.pika] 58 | level = "WARNING" 59 | propagate = false 60 | handlers = ["console"] 61 | 62 | [log_config.loggers.datanommer] 63 | level = "INFO" 64 | propagate = false 65 | handlers = ["console"] 66 | 67 | [log_config.root] 68 | level = "ERROR" 69 | handlers = ["console"] 70 | -------------------------------------------------------------------------------- /datanommer.models/tests/test_jsonencodeddict.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sqlalchemy import Column, create_engine, Integer, MetaData, select, Table, text 3 | 4 | from datanommer.models import _JSONEncodedDict 5 | 6 | 7 | @pytest.fixture 8 | def connection(): 9 | engine = create_engine("sqlite:///:memory:", future=True) 10 | with engine.connect() as connection: 11 | yield connection 12 | 13 | 14 | @pytest.fixture 15 | def table(connection): 16 | metadata = MetaData() 17 | table = Table( 18 | "test_table", 19 | metadata, 20 | Column("id", Integer, primary_key=True), 21 | Column("data", _JSONEncodedDict), 22 | ) 23 | metadata.create_all(connection) 24 | yield table 25 | metadata.drop_all(connection) 26 | 27 | 28 | def test_jsonencodeddict(connection, table): 29 | connection.execute(table.insert().values(data={"foo": "bar"})) 30 | # Check that it's stored as a string 31 | for row in connection.execute(text("SELECT data FROM test_table")): 32 | assert row.data == '{"foo": "bar"}' 33 | # Check that SQLAlchemy retrieves it as a dict 34 | for row in connection.execute(select(table.c.data)): 35 | assert row.data == {"foo": "bar"} 36 | 37 | 38 | def test_jsonencodeddict_null(connection, table): 39 | # Make sure NULL values are supported 40 | connection.execute(table.insert().values(data=None)) 41 | for row in connection.execute(select(table.c.data)): 42 | assert row.data is None 43 | 44 | 45 | def test_jsonencodeddict_compare(connection, table): 46 | # Make sure NULL values are supported 47 | connection.execute(table.insert().values(data={"foo": "bar"})) 48 | for row in connection.execute(select(table.c.data).where(table.c.data == {"foo": "bar"})): 49 | assert row.data == {"foo": "bar"} 50 | 51 | 52 | def test_jsonencodeddict_compare_like(connection, table): 53 | # Make sure NULL values are supported 54 | connection.execute(table.insert().values(data={"foo": "bar"})) 55 | for row in connection.execute(select(table.c.data).where(table.c.data.like("%foo%"))): 56 | assert row.data == {"foo": "bar"} 57 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/view.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import DDL, func, select, text 2 | from sqlalchemy.ext import compiler 3 | from sqlalchemy.schema import DDLElement 4 | 5 | 6 | TIME_INTERVAL = "1 year" 7 | 8 | 9 | class CreateMaterializedView(DDLElement): 10 | def __init__(self, name, selectable): 11 | self.name = name 12 | self.selectable = selectable 13 | 14 | 15 | @compiler.compiles(CreateMaterializedView) 16 | def _create_view(element, compiler, **kw): 17 | selectable = compiler.sql_compiler.process(element.selectable, literal_binds=True) 18 | return f"CREATE MATERIALIZED VIEW IF NOT EXISTS {element.name} AS {selectable}" 19 | 20 | 21 | def get_selectable(): 22 | """Factory function to create the selectable query for materialized view.""" 23 | from . import Message 24 | 25 | return ( 26 | select( 27 | Message.topic, 28 | func.count().label("message_count"), 29 | func.min(Message.timestamp).label("earliest"), 30 | func.max(Message.timestamp).label("latest"), 31 | ) 32 | .where(Message.timestamp >= text(f"NOW() - INTERVAL '{TIME_INTERVAL}'")) 33 | .group_by(Message.topic) 34 | ) 35 | 36 | 37 | def refresh_recent_topics(connection): 38 | """Standalone refresh function that can be called from cron. 39 | 40 | Args: 41 | connection: SQLAlchemy connection object 42 | """ 43 | connection.execute(text("REFRESH MATERIALIZED VIEW CONCURRENTLY recent_topics")) 44 | 45 | 46 | def create_view(connection): 47 | """Create the recent_topics materialized view with proper indexes.""" 48 | 49 | selectable = get_selectable() 50 | 51 | # Create the materialized view 52 | connection.execute(CreateMaterializedView("recent_topics", selectable)) 53 | 54 | # Create unique index on topic 55 | connection.execute( 56 | DDL("CREATE UNIQUE INDEX IF NOT EXISTS uq_recent_topics_topic " "ON recent_topics (topic)"), 57 | ) 58 | 59 | # Create index on message_count for sorting 60 | connection.execute( 61 | DDL( 62 | "CREATE INDEX IF NOT EXISTS ix_recent_topics_message_count " 63 | "ON recent_topics (message_count)" 64 | ) 65 | ) 66 | -------------------------------------------------------------------------------- /tools/towncrier/template.rst.j2: -------------------------------------------------------------------------------- 1 | {% macro reference(value) -%} 2 | {%- if value.startswith("PR") -%} 3 | `PR#{{ value[2:] }} `_ 4 | {%- elif value.startswith("C") -%} 5 | `{{ value[1:] }} `_ 6 | {%- else -%} 7 | `#{{ value }} `_ 8 | {%- endif -%} 9 | {%- endmacro -%} 10 | 11 | {{- top_line }} 12 | {{ top_underline * ((top_line)|length) -}} 13 | 14 | Released on {{ versiondata.date }}. 15 | 16 | {% for section, _ in sections.items() -%} 17 | {%- set underline = underlines[0] -%} 18 | {%- if section -%} 19 | {{section}} 20 | {{ underline * section|length }} 21 | {%- set underline = underlines[1] -%} 22 | {%- endif -%} 23 | 24 | {%- if sections[section] -%} 25 | {%- for category, val in definitions.items() if category in sections[section] and category != "author" -%} 26 | {{ definitions[category]['name'] }} 27 | {{ underline * definitions[category]['name']|length }} 28 | 29 | {% if definitions[category]['showcontent'] -%} 30 | {%- for text, values in sections[section][category].items() %} 31 | * {{ text }} 32 | {%- if values %} 33 | {% if "\n - " in text or '\n * ' in text %} 34 | 35 | 36 | ( 37 | {%- else %} 38 | ( 39 | {%- endif -%} 40 | {%- for issue in values %} 41 | {{ reference(issue) }}{% if not loop.last %}, {% endif %} 42 | {%- endfor %} 43 | ) 44 | {% else %} 45 | 46 | {% endif %} 47 | {% endfor -%} 48 | {%- else -%} 49 | * {{ sections[section][category]['']|sort|join(', ') }} 50 | 51 | {% endif -%} 52 | {%- if sections[section][category]|length == 0 %} 53 | No significant changes. 54 | 55 | {% else -%} 56 | {%- endif %} 57 | 58 | {% endfor -%} 59 | {% if sections[section]["author"] -%} 60 | {{definitions['author']["name"]}} 61 | {{ underline * definitions['author']['name']|length }} 62 | 63 | Many thanks to the contributors of bug reports, pull requests, and pull request 64 | reviews for this release: 65 | 66 | {% for text, values in sections[section]["author"].items() -%} 67 | * {{ text }} 68 | {% endfor -%} 69 | {%- endif %} 70 | 71 | {% else -%} 72 | No significant changes. 73 | 74 | {% endif %} 75 | {%- endfor +%} 76 | -------------------------------------------------------------------------------- /datanommer.consumer/tests/test_consumer.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | 17 | import pytest 18 | from fedora_messaging import message 19 | from sqlalchemy import func, select 20 | 21 | import datanommer.consumer 22 | import datanommer.models as dm 23 | 24 | 25 | @pytest.fixture 26 | def consumer(mocker): 27 | mock_get_url = mocker.patch("datanommer.consumer.get_datanommer_sqlalchemy_url") 28 | mock_get_url.return_value = "sqlite:///fake.db" 29 | return datanommer.consumer.Nommer() 30 | 31 | 32 | def test_consume(datanommer_models, consumer): 33 | example_message = message.Message( 34 | topic="nice.message", body={"encouragement": "You're doing great!"} 35 | ) 36 | 37 | consumer = datanommer.consumer.Nommer() 38 | 39 | consumer(example_message) 40 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 1 41 | 42 | 43 | def test_add_exception(datanommer_models, consumer, mocker): 44 | example_message = message.Message( 45 | topic="nice.message", body={"encouragement": "You're doing great!"} 46 | ) 47 | 48 | dm.add = mocker.Mock(side_effect=RuntimeError("an exception")) 49 | consumer = datanommer.consumer.Nommer() 50 | with pytest.raises(RuntimeError): 51 | consumer(example_message) 52 | 53 | 54 | def test_get_datanommer_sqlalchemy_url_keyerror(mocker): 55 | mocker.patch.dict( 56 | datanommer.consumer.config.conf["consumer_config"], 57 | {}, 58 | clear=True, 59 | ) 60 | with pytest.raises(ValueError): 61 | datanommer.consumer.get_datanommer_sqlalchemy_url() 62 | -------------------------------------------------------------------------------- /datanommer.consumer/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "datanommer.consumer" 3 | version = "1.4.4" 4 | description = "Consumer for datanommer" 5 | authors = [ 6 | "Fedora Infrastructure " 7 | ] 8 | license = "GPL-3.0-or-later" 9 | readme = "README.rst" 10 | repository = "https://github.com/fedora-infra/datanommer" 11 | homepage = "https://github.com/fedora-infra/datanommer" 12 | packages = [ 13 | { include = "datanommer" }, 14 | ] 15 | include = [ 16 | { path = "*.ini", format = "sdist" }, 17 | { path = "tests/*", format = "sdist" }, 18 | ] 19 | 20 | [tool.poetry.dependencies] 21 | python = "^3.11" 22 | "datanommer.models" = "^1.0.0" 23 | fedora-messaging = ">=2.1.0" 24 | psycopg2 = "^2.9.1" 25 | 26 | [tool.poetry.group.dev.dependencies] 27 | pre-commit = "*" 28 | "datanommer.models" = {path = "../datanommer.models", develop = true} 29 | black = "*" 30 | ruff = "*" 31 | pytest = "*" 32 | psutil = "*" 33 | liccheck = "*" 34 | pytest-cov = "*" 35 | pytest-mock = "*" 36 | pytest-postgresql = "*" 37 | towncrier = "*" 38 | poetry-plugin-export = "^1.9.0" 39 | 40 | 41 | [build-system] 42 | requires = ["poetry-core>=1.0.0"] 43 | build-backend = "poetry.core.masonry.api" 44 | 45 | 46 | [tool.towncrier] 47 | package = "datanommer.consumer" 48 | directory = "news/" 49 | title_format = "v{version}" 50 | issue_format = "{issue}" 51 | template = "../tools/towncrier/template.rst.j2" 52 | underlines = "=^-" 53 | wrap = false 54 | all_bullets = true 55 | 56 | [[tool.towncrier.type]] 57 | directory = "bic" 58 | name = "Backwards Incompatible Changes" 59 | showcontent = true 60 | 61 | [[tool.towncrier.type]] 62 | directory = "dependency" 63 | name = "Dependency Changes" 64 | showcontent = true 65 | 66 | [[tool.towncrier.type]] 67 | directory = "feature" 68 | name = "Features" 69 | showcontent = true 70 | 71 | [[tool.towncrier.type]] 72 | directory = "bug" 73 | name = "Bug Fixes" 74 | showcontent = true 75 | 76 | [[tool.towncrier.type]] 77 | directory = "dev" 78 | name = "Development Improvements" 79 | showcontent = true 80 | 81 | [[tool.towncrier.type]] 82 | directory = "docs" 83 | name = "Documentation Improvements" 84 | showcontent = true 85 | 86 | [[tool.towncrier.type]] 87 | directory = "other" 88 | name = "Other Changes" 89 | showcontent = true 90 | 91 | [[tool.towncrier.type]] 92 | directory = "author" 93 | name = "Contributors" 94 | showcontent = true 95 | -------------------------------------------------------------------------------- /datanommer.commands/datanommer/commands/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | from fedora_messaging import config as fedora_messaging_config 5 | from fedora_messaging.message import load_message as load_message 6 | from sqlalchemy import func 7 | 8 | import datanommer.models as m 9 | 10 | 11 | # Go trough messages these many at a time 12 | CHUNK_SIZE = 10000 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | def get_config(config_path=None): 17 | if config_path: 18 | fedora_messaging_config.conf.load_config(config_path) 19 | conf = fedora_messaging_config.conf["consumer_config"] 20 | for key in ("datanommer_sqlalchemy_url", "alembic_ini"): 21 | if key not in conf: 22 | raise click.ClickException(f"{key} not defined in the fedora-messaging config") 23 | return conf 24 | 25 | 26 | config_option = click.option( 27 | "-c", 28 | "--config", 29 | "config_path", 30 | help="Load this Fedora Messaging config file", 31 | type=click.Path(exists=True, readable=True), 32 | ) 33 | 34 | 35 | def iterate_over_messages(query, start, chunk_size): 36 | click.echo("Counting messages...") 37 | 38 | total = m.session.scalar(query.with_only_columns(func.count(m.Message.id))) 39 | if not total: 40 | click.echo("No messages matched.") 41 | return 42 | 43 | click.echo(f"Considering {total} message{'s' if total > 1 else ''}") 44 | 45 | query = query.order_by(m.Message.timestamp) 46 | with click.progressbar(length=total) as bar: 47 | has_messages = True 48 | chunk_start = start 49 | first_run = True 50 | while has_messages: 51 | # click < 8.2 (Python < 3.10): use bar.is_hidden 52 | # click >= 8.2 (Python >= 3.10): use bar.hidden and the TTY check 53 | if (hasattr(bar, "is_hidden") and bar.is_hidden) or ( 54 | hasattr(bar, "hidden") and (bar.hidden or not bar.file.isatty()) 55 | ): 56 | click.echo(f"Working on {chunk_size} messages sent after {chunk_start}") 57 | chunk_query = query.where(m.Message.timestamp >= chunk_start).limit(chunk_size) 58 | if not first_run: 59 | chunk_query = chunk_query.offset(1) 60 | has_messages = False 61 | for message in m.session.scalars(chunk_query): 62 | bar.update(1) 63 | has_messages = True 64 | yield message 65 | if has_messages: 66 | chunk_start = message.timestamp 67 | first_run = False 68 | m.session.commit() 69 | m.session.expunge_all() 70 | -------------------------------------------------------------------------------- /devel/ansible/roles/datanommer/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install RPM packages 3 | dnf: 4 | name: 5 | - fedora-messaging 6 | - gcc 7 | - git 8 | - poetry 9 | - python3-devel 10 | - python3-pip 11 | - vim 12 | - krb5-devel 13 | - libpq-devel 14 | - tox 15 | - python-psycopg2 16 | state: present 17 | 18 | - name: Install the .bashrc 19 | copy: 20 | src: .bashrc 21 | dest: /home/vagrant/.bashrc 22 | mode: 0644 23 | owner: vagrant 24 | group: vagrant 25 | 26 | - name: Create a directory for the virtualenv 27 | file: 28 | name: /srv/venv 29 | state: directory 30 | mode: 0755 31 | owner: vagrant 32 | group: vagrant 33 | 34 | - name: Create the virtualenv and install poetry 35 | pip: 36 | name: poetry 37 | virtualenv: /srv/venv 38 | become: yes 39 | become_user: vagrant 40 | 41 | - name: Install datanommer.models with poetry 42 | shell: /srv/venv/bin/poetry install 43 | args: 44 | chdir: /home/vagrant/datanommer/datanommer.models/ 45 | become: yes 46 | become_user: vagrant 47 | 48 | - name: Install datanommer.commands with poetry 49 | shell: /srv/venv/bin/poetry install 50 | args: 51 | chdir: /home/vagrant/datanommer/datanommer.commands/ 52 | become: yes 53 | become_user: vagrant 54 | 55 | - name: Install datanommer.consumer with poetry 56 | shell: /srv/venv/bin/poetry install 57 | args: 58 | chdir: /home/vagrant/datanommer/datanommer.consumer/ 59 | become: yes 60 | become_user: vagrant 61 | 62 | - name: Use the stage fedora-messaging queue to consume 63 | copy: 64 | remote_src: True 65 | src: /etc/fedora-messaging/fedora.stg.toml 66 | dest: /etc/fedora-messaging/config.toml 67 | 68 | - name: configure datanommer_sqlalchemy_url in fedora-messaging config 69 | lineinfile: 70 | path: /etc/fedora-messaging/config.toml 71 | regexp: 'example_key = "for my consumer"' 72 | line: "datanommer_sqlalchemy_url = \"postgresql://datanommer:datanommer@localhost/messages\"\nalembic_ini = \"/etc/datanommer-alembic.ini\"" 73 | 74 | - name: Create the Alembic config file 75 | copy: 76 | src: alembic.ini 77 | dest: /etc/datanommer-alembic.ini 78 | mode: 0600 79 | owner: vagrant 80 | group: vagrant 81 | 82 | - name: Create datanommer db 83 | shell: /srv/venv/bin/poetry run datanommer-create-db 84 | args: 85 | chdir: /home/vagrant/datanommer/datanommer.commands 86 | become: yes 87 | become_user: vagrant 88 | 89 | - name: Install the systemd unit files for datanommer consumer 90 | copy: 91 | src: "datanommer.service" 92 | dest: /etc/systemd/system/datanommer.service 93 | mode: 0644 94 | 95 | - name: Start datanommer service using systemd 96 | systemd: 97 | state: started 98 | name: datanommer 99 | daemon_reload: yes 100 | enabled: yes 101 | -------------------------------------------------------------------------------- /datanommer.commands/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "datanommer.commands" 3 | version = "1.4.4" 4 | description = "Console commands for datanommer" 5 | authors = [ 6 | "Fedora Infrastructure " 7 | ] 8 | license = "GPL-3.0-or-later" 9 | readme = "README.rst" 10 | repository = "https://github.com/fedora-infra/datanommer" 11 | homepage = "https://github.com/fedora-infra/datanommer" 12 | packages = [ 13 | { include = "datanommer" }, 14 | ] 15 | include = [ 16 | { path = "*.ini", format = "sdist" }, 17 | { path = "*.example", format = "sdist" }, 18 | { path = "tests/*", format = "sdist" }, 19 | ] 20 | 21 | [tool.poetry.dependencies] 22 | python = "^3.11" 23 | "datanommer.models" = {version = "^1.0.0"} 24 | fedora-messaging = ">=2.1.0" 25 | 26 | [tool.poetry.group.dev.dependencies] 27 | pre-commit = "*" 28 | "datanommer.models" = {path = "../datanommer.models", develop = true} 29 | black = "*" 30 | ruff = "*" 31 | pytest = "*" 32 | liccheck = "*" 33 | pytest-cov = "*" 34 | pytest-mock = "*" 35 | pytest-postgresql = "*" 36 | psycopg2 = "*" 37 | towncrier = "*" 38 | bodhi-messages = "*" 39 | poetry-plugin-export = "^1.9.0" 40 | 41 | [tool.poetry.scripts] 42 | datanommer-create-db = "datanommer.commands:create" 43 | datanommer-dump = "datanommer.commands:dump" 44 | datanommer-stats = "datanommer.commands:stats" 45 | datanommer-latest = "datanommer.commands:latest" 46 | datanommer-extract-users = "datanommer.commands.extract_users:main" 47 | datanommer-refresh-view = "datanommer.commands:refresh_view" 48 | 49 | 50 | [build-system] 51 | requires = ["poetry-core>=1.0.0"] 52 | build-backend = "poetry.core.masonry.api" 53 | 54 | 55 | [tool.towncrier] 56 | package = "datanommer.commands" 57 | directory = "news/" 58 | title_format = "v{version}" 59 | issue_format = "{issue}" 60 | template = "../tools/towncrier/template.rst.j2" 61 | underlines = "=^-" 62 | wrap = false 63 | all_bullets = true 64 | 65 | [[tool.towncrier.type]] 66 | directory = "bic" 67 | name = "Backwards Incompatible Changes" 68 | showcontent = true 69 | 70 | [[tool.towncrier.type]] 71 | directory = "dependency" 72 | name = "Dependency Changes" 73 | showcontent = true 74 | 75 | [[tool.towncrier.type]] 76 | directory = "feature" 77 | name = "Features" 78 | showcontent = true 79 | 80 | [[tool.towncrier.type]] 81 | directory = "bug" 82 | name = "Bug Fixes" 83 | showcontent = true 84 | 85 | [[tool.towncrier.type]] 86 | directory = "dev" 87 | name = "Development Improvements" 88 | showcontent = true 89 | 90 | [[tool.towncrier.type]] 91 | directory = "docs" 92 | name = "Documentation Improvements" 93 | showcontent = true 94 | 95 | [[tool.towncrier.type]] 96 | directory = "other" 97 | name = "Other Changes" 98 | showcontent = true 99 | 100 | [[tool.towncrier.type]] 101 | directory = "author" 102 | name = "Contributors" 103 | showcontent = true 104 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/alembic/env.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | 17 | from logging.config import fileConfig 18 | 19 | from alembic import context 20 | from sqlalchemy import engine_from_config, pool 21 | 22 | # add your model's MetaData object here 23 | # for 'autogenerate' support 24 | # from myapp import mymodel 25 | # target_metadata = mymodel.Base.metadata 26 | from datanommer.models import DeclarativeBase 27 | 28 | 29 | target_metadata = DeclarativeBase.metadata 30 | 31 | 32 | # this is the Alembic Config object, which provides 33 | # access to the values within the .ini file in use. 34 | config = context.config 35 | 36 | # Interpret the config file for Python logging. 37 | # This line sets up loggers basically. 38 | fileConfig(config.config_file_name) 39 | 40 | # other values from the config, defined by the needs of env.py, 41 | # can be acquired: 42 | # my_important_option = config.get_main_option("my_important_option") 43 | # ... etc. 44 | 45 | 46 | def run_migrations_offline(): 47 | """Run migrations in 'offline' mode. 48 | 49 | This configures the context with just a URL 50 | and not an Engine, though an Engine is acceptable 51 | here as well. By skipping the Engine creation 52 | we don't even need a DBAPI to be available. 53 | 54 | Calls to context.execute() here emit the given string to the 55 | script output. 56 | 57 | """ 58 | # TODO: Pull this from datanommer's fedmsg.d config isntead of using 59 | # the alembic.ini 60 | url = config.get_main_option("sqlalchemy.url") 61 | context.configure(url=url) 62 | 63 | with context.begin_transaction(): 64 | context.run_migrations() 65 | 66 | 67 | def run_migrations_online(): 68 | """Run migrations in 'online' mode. 69 | 70 | In this scenario we need to create an Engine 71 | and associate a connection with the context. 72 | 73 | """ 74 | engine = engine_from_config( 75 | config.get_section(config.config_ini_section), 76 | prefix="sqlalchemy.", 77 | poolclass=pool.NullPool, 78 | ) 79 | 80 | connection = engine.connect() 81 | context.configure(connection=connection, target_metadata=target_metadata) 82 | 83 | try: 84 | with context.begin_transaction(): 85 | context.run_migrations() 86 | finally: 87 | connection.close() 88 | 89 | 90 | if context.is_offline_mode(): 91 | run_migrations_offline() 92 | else: 93 | run_migrations_online() 94 | -------------------------------------------------------------------------------- /datanommer.commands/NEWS.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | For ``datanommer.commands`` 6 | 7 | .. towncrier release notes start 8 | 9 | v1.4.4 10 | ====== 11 | 12 | Released on 2025-06-19. 13 | 14 | No significant changes. 15 | 16 | v1.4.3 17 | ====== 18 | 19 | Released on 2025-06-10. 20 | 21 | Dependency Changes 22 | ^^^^^^^^^^^^^^^^^^ 23 | 24 | * Fix the schema dep that was started in 127b1dd 25 | 26 | 27 | v1.4.2 28 | ====== 29 | 30 | Released on 2025-06-07. 31 | 32 | Dependency Changes 33 | ^^^^^^^^^^^^^^^^^^ 34 | 35 | * Don't require all the message schemas 36 | 37 | 38 | v1.4.1 39 | ====== 40 | 41 | Released on 2025-05-30. 42 | 43 | Dependency Changes 44 | ^^^^^^^^^^^^^^^^^^ 45 | 46 | * Add support for Python 3.9 (for RHEL9) (`#8d63e86 `_) 47 | 48 | Development Improvements 49 | ^^^^^^^^^^^^^^^^^^^^^^^^ 50 | 51 | * Test the command output on a TTY (`#ef1f572 `_) 52 | 53 | Other Changes 54 | ^^^^^^^^^^^^^ 55 | 56 | * Slighly clearer command message (`#940c642 `_) 57 | * Slight output improvement in the progressbar-based commands (`#1dc0db8 `_) 58 | 59 | 60 | v1.4.0 61 | ====== 62 | 63 | Released on 2024-06-12. 64 | 65 | No significant changes. 66 | 67 | 68 | v1.3.0 69 | ====== 70 | 71 | Released on 2024-05-22. 72 | 73 | Features 74 | ^^^^^^^^ 75 | 76 | * Improve the extract-users script ( 77 | `dbf28ff `_, 78 | `ac7394e `_, 79 | `ec2e581 `_, 80 | `2fd0175 `_ 81 | ). 82 | 83 | Other Changes 84 | ^^^^^^^^^^^^^ 85 | 86 | * Update dependencies 87 | 88 | 89 | v1.2.0 90 | ====== 91 | 92 | Released on 2024-04-15. 93 | This is a feature release that adds the datanommer-extract-users script. 94 | 95 | Features 96 | ^^^^^^^^ 97 | 98 | * Add the datanommer-extract-users script to fill the usernames table with data 99 | from recently-added message schemas (`320a466 100 | `_). 101 | 102 | Development Improvements 103 | ^^^^^^^^^^^^^^^^^^^^^^^^ 104 | 105 | * Use Ruff instead of flake8 and isort and bandit (`4f7ffaa 106 | `_). 107 | 108 | 109 | v1.1.0 110 | ====== 111 | 112 | Released on 2023-09-22. 113 | This is a feature release that adds support for Python 3.10, drops support for 114 | Python 3.7, and improves the database creation for Alembic integration. 115 | 116 | Dependency Changes 117 | ^^^^^^^^^^^^^^^^^^ 118 | 119 | * Drop support for python 3.7, add support for python 3.10 (`PR#890 120 | `_). 121 | 122 | Features 123 | ^^^^^^^^ 124 | 125 | * Use Alembic to stamp the database when creating it. This requires adding a 126 | config variable ``alembic_ini`` in the fedora-messaging configuration file 127 | that points to the ``alembic.ini`` file. (`PR#815 128 | `_). 129 | 130 | 131 | v1.0.3 132 | ====== 133 | 134 | Released on 2022-03-18. This is a minor release: 135 | 136 | - support fedora-messaging 3.0+ 137 | - update dependencies 138 | -------------------------------------------------------------------------------- /datanommer.models/NEWS.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | For ``datanommer.models`` 6 | 7 | .. towncrier release notes start 8 | 9 | v1.4.4 10 | ====== 11 | 12 | Released on 2025-06-19. 13 | 14 | Dependency Changes 15 | ^^^^^^^^^^^^^^^^^^ 16 | 17 | * Relax dependency on psycopg2 (`3ad7e7b `_) 18 | 19 | 20 | v1.4.3 21 | ====== 22 | 23 | Released on 2025-06-10. 24 | 25 | No significant changes. 26 | 27 | v1.4.2 28 | ====== 29 | 30 | Released on 2025-06-07. 31 | 32 | No significant changes. 33 | 34 | v1.4.1 35 | ====== 36 | 37 | Released on 2025-05-30. 38 | 39 | Dependency Changes 40 | ^^^^^^^^^^^^^^^^^^ 41 | 42 | * Add schema package mailman3-fedmsg-plugin-schemas (`#8ad6c47 `_) 43 | * Add schema package webhook-to-fedora-messaging-messages (`#865855c `_) 44 | * Update koji-fedoramessaging-messages (`#c64cb31 `_) 45 | * Add support for Python 3.9 (for RHEL9) (`#8d63e86 `_) 46 | * Add the schema package journal-to-fedora-messaging-messages (`#3d9bc35 `_) 47 | * Add the `fedora-image-uploader-messages` schema package (`#7da3074 `_) 48 | 49 | Bug Fixes 50 | ^^^^^^^^^ 51 | 52 | * Fix unit tests (`#085f5c4 `_) 53 | 54 | Other Changes 55 | ^^^^^^^^^^^^^ 56 | 57 | * Remove unneccessary int call (`#487341f `_) 58 | 59 | 60 | v1.4.0 61 | ====== 62 | 63 | Released on 2024-06-12. 64 | 65 | Features 66 | ^^^^^^^^ 67 | 68 | * Rename the unused `username` column to `agent_name` and use it to store the agent name (`#1309 `_) 69 | * Add a JSON index on the message headers 70 | 71 | Bug Fixes 72 | ^^^^^^^^^ 73 | 74 | * Fix the `get_first()` query to actually return only one message 75 | 76 | 77 | v1.3.0 78 | ====== 79 | 80 | Released on 2024-05-22. 81 | 82 | Features 83 | ^^^^^^^^ 84 | 85 | * Add a ``get_first()`` method on ``Message`` to get the first message matching 86 | a grep-like query (`99fb739 `_). 87 | 88 | Bug Fixes 89 | ^^^^^^^^^ 90 | 91 | * Don't compute the total when not necessary (`99fb739 `_). 92 | 93 | Documentation Improvements 94 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 95 | 96 | * Add online documentation with Sphinx, see https://datanommer.readthedocs.io 97 | (`2631885 `_). 98 | 99 | Other Changes 100 | ^^^^^^^^^^^^^ 101 | 102 | * Improve the unit tests (`610067f `_, `075052c `_). 103 | * Update dependencies 104 | 105 | 106 | v1.2.0 107 | ====== 108 | 109 | Released on 2024-04-15. 110 | This is a feature release that adds schema packages and upgrades the SQLAlchemy 111 | API to the 2.0 style. 112 | 113 | Features 114 | ^^^^^^^^ 115 | 116 | * Upgrade to the SQLAlchemy 2.0 API (`981e2a4 117 | `_). 118 | * Add a few schema packages to the dependencies. 119 | 120 | Development Improvements 121 | ^^^^^^^^^^^^^^^^^^^^^^^^ 122 | 123 | * Use Ruff instead of flake8 and isort and bandit (`4f7ffaa 124 | `_). 125 | 126 | 127 | v1.1.0 128 | ====== 129 | 130 | Released on 2023-09-22. 131 | This is a feature release that adds ``koji-fedoramessaging-messages`` as a 132 | dependency to interpret koji messages, and updates a lot of our other 133 | dependencies. 134 | 135 | Dependency Changes 136 | ^^^^^^^^^^^^^^^^^^ 137 | 138 | * Drop support for python 3.7, add support for python 3.10 (`PR#890 139 | `_). 140 | * Add the ``koji-fedoramessaging-messages`` package (`#1257 141 | `_). 142 | 143 | 144 | v1.0.4 145 | ====== 146 | 147 | Released on 2022-05-31. 148 | This is a minor release: 149 | 150 | - adds fedora-messaging schema packages 151 | - doesn't require a version of bodhi-messages in the dev deps 152 | - adjusts pyproject for spec needs 153 | - fixes integration of Alembic 154 | 155 | 156 | v1.0.3 157 | ====== 158 | 159 | Released on 2022-03-18. This is a minor release: 160 | 161 | - support fedora-messaging 3.0+ 162 | - update dependencies 163 | 164 | 165 | v1.0.0 166 | ====== 167 | 168 | Released on 2022-01-17. 169 | 170 | This is a major release that uses TimescaleDB to store the data. 171 | The list of changes is too big to list here. 172 | -------------------------------------------------------------------------------- /datanommer.models/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "datanommer.models" 3 | version = "1.4.4" 4 | description = "SQLAlchemy models for datanommer" 5 | authors = [ 6 | "Fedora Infrastructure " 7 | ] 8 | license = "GPL-3.0-or-later" 9 | readme = "README.rst" 10 | repository = "https://github.com/fedora-infra/datanommer" 11 | homepage = "https://github.com/fedora-infra/datanommer" 12 | packages = [ 13 | { include = "datanommer/models" }, 14 | { include = "datanommer/models/testing"}, 15 | { include = "datanommer/models/alembic" }, 16 | ] 17 | include = [ 18 | { path = "*.ini", format = "sdist" }, 19 | { path = "*.txt", format = "sdist" }, 20 | { path = "*.cfg", format = "sdist" }, 21 | { path = "*.rst", format = "sdist" }, 22 | { path = "*.xml", format = "sdist" }, 23 | { path = "tests/*", format = "sdist" }, 24 | ] 25 | 26 | [tool.poetry.dependencies] 27 | python = "^3.11" 28 | SQLAlchemy = "^1.3.24 || ^2.0.0" 29 | alembic = "^1.6.5" 30 | psycopg2 = [ 31 | {version = "<2.9", python = "<3.11"}, 32 | {version = "^2.9.0", python = ">=3.11"} 33 | ] 34 | 35 | fedora-messaging = ">=2.1.0" 36 | 37 | # Message schemas. The reference list of all message schemas is in 38 | # https://github.com/fedora-infra/fedora-messaging/blob/develop/docs/schema-packages.txt 39 | anitya-schema = {version = "*", optional = true} 40 | bodhi-messages = {version = "*", optional = true} 41 | bugzilla2fedmsg-schema = {version = "*", optional = true} 42 | ci-messages = {version = "*", optional = true} 43 | copr-messaging = {version = "*", optional = true} 44 | discourse2fedmsg-messages = {version = "*", optional = true} 45 | fedocal-messages = {version = "*", optional = true} 46 | fedorainfra-ansible-messages = {version = "*", optional = true} 47 | fedora-elections-messages = {version = "*", optional = true} 48 | fedora-image-uploader-messages = {version = "*", optional = true} 49 | fedora-messaging-git-hook-messages = {version = "*", optional = true} 50 | fedora-messaging-the-new-hotness-schema = {version = "*", optional = true} 51 | fedora-planet-messages = {version = "*", optional = true} 52 | fmn-messages = {version = "*", optional = true} 53 | journal-to-fedora-messaging-messages = {version = "*", optional = true} 54 | kerneltest-messages = {version = "^1.0.0", optional = true} 55 | koji-fedoramessaging-messages = {version = "^1.2.6", optional = true} 56 | koschei-messages = {version = "*", optional = true} 57 | mailman3-fedmsg-plugin-schemas = {version = "*", optional = true} 58 | maubot-fedora-messages = {version = "*", optional = true} 59 | mediawiki-messages = {version = "*", optional = true} 60 | meetbot-messages = {version = "*", optional = true} 61 | mdapi-messages = {version = "*", optional = true} 62 | noggin-messages = {version = "*", optional = true} 63 | nuancier-messages = {version = "*", optional = true} 64 | pagure-messages = {version = "*", optional = true} 65 | tahrir-messages = {version = "*", optional = true} 66 | webhook-to-fedora-messaging-messages = {version = "*", optional = true} 67 | 68 | [tool.poetry.group.dev.dependencies] 69 | pre-commit = "*" 70 | black = "*" 71 | ruff = "*" 72 | pytest = "*" 73 | liccheck = "*" 74 | pytest-cov = "*" 75 | pytest-postgresql = "*" 76 | pytest-mock = "*" 77 | bodhi-messages = "*" 78 | towncrier = "*" 79 | poetry-plugin-export = "^1.9.0" 80 | 81 | [tool.poetry.extras] 82 | schemas = [ 83 | "anitya-schema", 84 | "bodhi-messages", 85 | "bugzilla2fedmsg-schema", 86 | "ci-messages", 87 | "copr-messaging", 88 | "discourse2fedmsg-messages", 89 | "fedocal-messages", 90 | "fedorainfra-ansible-messages", 91 | "fedora-elections-messages", 92 | "fedora-image-uploader-messages", 93 | "fedora-messaging-git-hook-messages", 94 | "fedora-messaging-the-new-hotness-schema", 95 | "fedora-planet-messages", 96 | "fmn-messages", 97 | "journal-to-fedora-messaging-messages", 98 | "kerneltest-messages", 99 | "koji-fedoramessaging-messages", 100 | "koschei-messages", 101 | "mailman3-fedmsg-plugin-schemas", 102 | "maubot-fedora-messages", 103 | "mediawiki-messages", 104 | "meetbot-messages", 105 | "mdapi-messages", 106 | "noggin-messages", 107 | "nuancier-messages", 108 | "pagure-messages", 109 | "tahrir-messages", 110 | "webhook-to-fedora-messaging-messages", 111 | ] 112 | 113 | 114 | [build-system] 115 | requires = ["poetry-core>=1.0.0"] 116 | build-backend = "poetry.core.masonry.api" 117 | 118 | 119 | [tool.towncrier] 120 | package = "datanommer.models" 121 | directory = "news/" 122 | title_format = "v{version}" 123 | issue_format = "{issue}" 124 | template = "../tools/towncrier/template.rst.j2" 125 | underlines = "=^-" 126 | wrap = false 127 | all_bullets = true 128 | 129 | [[tool.towncrier.type]] 130 | directory = "bic" 131 | name = "Backwards Incompatible Changes" 132 | showcontent = true 133 | 134 | [[tool.towncrier.type]] 135 | directory = "dependency" 136 | name = "Dependency Changes" 137 | showcontent = true 138 | 139 | [[tool.towncrier.type]] 140 | directory = "feature" 141 | name = "Features" 142 | showcontent = true 143 | 144 | [[tool.towncrier.type]] 145 | directory = "bug" 146 | name = "Bug Fixes" 147 | showcontent = true 148 | 149 | [[tool.towncrier.type]] 150 | directory = "dev" 151 | name = "Development Improvements" 152 | showcontent = true 153 | 154 | [[tool.towncrier.type]] 155 | directory = "docs" 156 | name = "Documentation Improvements" 157 | showcontent = true 158 | 159 | [[tool.towncrier.type]] 160 | directory = "other" 161 | name = "Other Changes" 162 | showcontent = true 163 | 164 | [[tool.towncrier.type]] 165 | directory = "author" 166 | name = "Contributors" 167 | showcontent = true 168 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | 14 | import importlib.metadata 15 | import os 16 | import sys 17 | 18 | 19 | SUBMODULES = ("models", "commands") 20 | 21 | topdir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) 22 | 23 | for submodule in SUBMODULES: 24 | sys.path.insert(0, os.path.join(topdir, f"datanommer.{submodule}")) 25 | 26 | 27 | # -- Project information ----------------------------------------------------- 28 | 29 | project = "Datanommer" 30 | copyright = "2013, Contributors to the Fedora Project" 31 | author = "Fedora Infrastructure" 32 | 33 | # The full version, including alpha/beta/rc tags 34 | release = importlib.metadata.version("datanommer.models") 35 | 36 | # The short X.Y version 37 | version = ".".join(release.split(".")[:2]) 38 | 39 | 40 | # -- General configuration --------------------------------------------------- 41 | 42 | # Add any Sphinx extension module names here, as strings. They can be 43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 44 | # ones. 45 | extensions = [ 46 | "sphinx.ext.autodoc", 47 | "sphinx.ext.intersphinx", 48 | "sphinx.ext.extlinks", 49 | "sphinx.ext.viewcode", 50 | "sphinx.ext.napoleon", 51 | "myst_parser", 52 | "sphinx_click", 53 | ] 54 | 55 | # Add any paths that contain templates here, relative to this directory. 56 | templates_path = ["_templates"] 57 | 58 | # List of patterns, relative to source directory, that match files and 59 | # directories to ignore when looking for source files. 60 | # This pattern also affects html_static_path and html_extra_path. 61 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 62 | 63 | # Explcitely set the master doc 64 | # https://github.com/readthedocs/readthedocs.org/issues/2569 65 | master_doc = "index" 66 | 67 | 68 | # -- Options for HTML output ------------------------------------------------- 69 | 70 | # The theme to use for HTML and HTML Help pages. See the documentation for 71 | # a list of builtin themes. 72 | # 73 | html_theme = "alabaster" 74 | 75 | 76 | # Theme options are theme-specific and customize the look and feel of a theme 77 | # further. For a list of options available for each theme, see the 78 | # documentation. 79 | html_theme_options = { 80 | "github_user": "fedora-infra", 81 | "github_repo": "datanommer", 82 | "page_width": "1040px", 83 | "show_related": True, 84 | "sidebar_collapse": True, 85 | "caption_font_size": "140%", 86 | } 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ["_static"] 92 | 93 | 94 | # -- Extension configuration ------------------------------------------------- 95 | 96 | source_suffix = { 97 | ".rst": "restructuredtext", 98 | ".md": "markdown", 99 | } 100 | 101 | myst_enable_extensions = [ 102 | "colon_fence", 103 | ] 104 | myst_heading_anchors = 3 105 | 106 | 107 | # -- Options for intersphinx extension --------------------------------------- 108 | # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration 109 | 110 | # Example configuration for intersphinx: refer to the Python standard library. 111 | intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} 112 | 113 | 114 | # -- Misc ----- 115 | 116 | 117 | def run_apidoc(_): 118 | from sphinx.ext import apidoc 119 | 120 | for submodule in SUBMODULES: 121 | print( 122 | " ".join( 123 | [ 124 | "sphinx-apidoc", 125 | "-f", 126 | "-o", 127 | os.path.join(topdir, "docs", "_source", submodule), 128 | "-T", 129 | "-e", 130 | "-M", 131 | "--implicit-namespaces", 132 | os.path.join(topdir, f"datanommer.{submodule}", "datanommer"), 133 | # exclude patterns: 134 | os.path.join(topdir, f"datanommer.{submodule}", "tests"), 135 | os.path.join( 136 | topdir, f"datanommer.{submodule}", "datanommer", submodule, "alembic" 137 | ), 138 | ] 139 | ) 140 | ) 141 | apidoc.main( 142 | [ 143 | "-f", 144 | "-o", 145 | os.path.join(topdir, "docs", "_source", submodule), 146 | "-T", 147 | "-e", 148 | "-M", 149 | "--implicit-namespaces", 150 | os.path.join(topdir, f"datanommer.{submodule}", "datanommer"), 151 | # exclude patterns: 152 | os.path.join(topdir, f"datanommer.{submodule}", "tests"), 153 | os.path.join(topdir, f"datanommer.{submodule}", "datanommer", submodule, "alembic"), 154 | ] 155 | ) 156 | # This file is going to cause duplicate references 157 | os.remove(os.path.join(topdir, "docs", "_source", submodule, "datanommer.rst")) 158 | generate_click_commands( 159 | os.path.join(topdir, "docs", "_source", "commands.rst"), 160 | "datanommer.commands", 161 | nested="full", 162 | ) 163 | 164 | 165 | def setup(app): 166 | app.connect("builder-inited", run_apidoc) 167 | 168 | 169 | def generate_click_commands(output, module, *, with_header=True, nested=None): 170 | commands = [] 171 | for ep in importlib.metadata.entry_points(group="console_scripts"): 172 | ep_module = ep.value.partition(":")[0] 173 | if not ep_module.startswith(f"{module}.") and ep_module != module: 174 | continue 175 | commands.append((ep.name, ep.value)) 176 | if not commands: 177 | return 178 | with open(output, "w") as fh: 179 | if with_header: 180 | fh.write("Commands\n") 181 | fh.write("========\n") 182 | fh.write("\n") 183 | for name, module in commands: 184 | fh.write(f".. click:: {module}\n") 185 | fh.write(f" :prog: {name}\n") 186 | if nested: 187 | fh.write(f" :nested: {nested}\n") 188 | fh.write("\n") 189 | -------------------------------------------------------------------------------- /datanommer.commands/datanommer/commands/extract_users.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | import click 5 | from fedora_messaging.exceptions import ValidationError 6 | from fedora_messaging.message import load_message as load_message 7 | from sqlalchemy import and_, not_, select 8 | 9 | import datanommer.models as m 10 | 11 | from .utils import CHUNK_SIZE, config_option, get_config, iterate_over_messages 12 | 13 | 14 | log = logging.getLogger(__name__) 15 | 16 | USERNAMES_SKIP_TOPICS = [ 17 | "%.anitya.%", 18 | "%.discourse.%", 19 | "%.hotness.update.bug.file", 20 | "%.hotness.update.drop", 21 | "%.koschei.%", 22 | "%.mdapi.%", 23 | ] 24 | AGENT_SKIP_TOPICS = [ 25 | "%.hotness.update.bug.file", 26 | "%.hotness.update.drop", 27 | ] 28 | 29 | 30 | @click.group() 31 | @config_option 32 | @click.option("--topic", default=None, help="Only extract users for messages of a specific topic.") 33 | @click.option( 34 | "--category", 35 | default=None, 36 | help="Only extract users for messages of a specific category.", 37 | ) 38 | @click.option( 39 | "--start", 40 | default=None, 41 | type=click.DateTime(), 42 | help="Only extract users for messages after a specific timestamp.", 43 | ) 44 | @click.option( 45 | "--end", 46 | default=None, 47 | type=click.DateTime(), 48 | help="Only extract users for messages before a specific timestamp.", 49 | ) 50 | @click.option( 51 | "--force-schema", 52 | default=None, 53 | help=( 54 | "Force usage of this schema name to extract usernames. This is the key in the " 55 | "exposed entry point / plugin, for example: wiki.article.edit.v1" 56 | ), 57 | ) 58 | @click.option( 59 | "--chunk-size", 60 | default=CHUNK_SIZE, 61 | type=int, 62 | show_default=True, 63 | help="Go through messages these many at a time (lower is slower but saves memory).", 64 | ) 65 | @click.option( 66 | "--debug", 67 | is_flag=True, 68 | help="Show more information.", 69 | ) 70 | @click.pass_context 71 | def main(ctx, config_path, topic, category, start, end, force_schema, chunk_size, debug): 72 | ctx.ensure_object(dict) 73 | ctx.obj["options"] = ctx.params 74 | ctx.obj["config"] = config = get_config(config_path) 75 | m.init( 76 | config["datanommer_sqlalchemy_url"], 77 | alembic_ini=config["alembic_ini"], 78 | ) 79 | if topic and category: 80 | raise click.UsageError("can't use both --topic and --category, choose one.") 81 | 82 | if not start: 83 | ctx.obj["options"]["start"] = m.session.execute( 84 | select(m.Message.timestamp).order_by(m.Message.timestamp).limit(1) 85 | ).scalar_one() 86 | 87 | query = select(m.Message) 88 | if topic: 89 | query = query.where(m.Message.topic == topic) 90 | elif category: 91 | query = query.where(m.Message.category == category) 92 | 93 | query = query.where(m.Message.timestamp >= ctx.obj["options"]["start"]) 94 | if end: 95 | query = query.where(m.Message.timestamp < end) 96 | else: 97 | end = datetime.datetime.now() 98 | 99 | if force_schema is None: 100 | query = query.where( 101 | m.Message.headers.has_key("fedora_messaging_schema"), 102 | m.Message.headers["fedora_messaging_schema"].astext != "base.message", 103 | ) 104 | ctx.obj["query"] = query 105 | 106 | 107 | @main.command("usernames") 108 | @click.pass_context 109 | def extract_usernames(ctx): 110 | """Go over old messages, extract users and store them. 111 | 112 | This is useful when a message schema has been added and we want to populate the users table 113 | with the new information. 114 | """ 115 | debug = ctx.obj["options"]["debug"] 116 | query = ctx.obj["query"] 117 | query = query.where( 118 | and_(*[not_(m.Message.topic.like(skipped)) for skipped in USERNAMES_SKIP_TOPICS]) 119 | ) 120 | query = query.join( 121 | m.users_assoc_table, 122 | and_( 123 | m.Message.id == m.users_assoc_table.c.msg_id, 124 | m.Message.timestamp == m.users_assoc_table.c.msg_timestamp, 125 | ), 126 | isouter=True, 127 | ).where(m.users_assoc_table.c.msg_id.is_(None)) 128 | 129 | for message in iterate_over_messages( 130 | query, ctx.obj["options"]["start"], ctx.obj["options"]["chunk_size"] 131 | ): 132 | fm_message = get_fedora_message(message, force_schema=ctx.obj["options"]["force_schema"]) 133 | if fm_message is None or not fm_message.usernames: 134 | m.session.expunge(message) 135 | continue 136 | message._insert_list(m.User, m.users_assoc_table, fm_message.usernames) 137 | if debug: 138 | click.echo( 139 | f"Usernames for message {message.msg_id} of topic {message.topic}" 140 | f": {', '.join(fm_message.usernames)}" 141 | ) 142 | 143 | 144 | def get_fedora_message(db_message, force_schema): 145 | headers = db_message.headers 146 | if force_schema and headers is not None: 147 | headers["fedora_messaging_schema"] = force_schema 148 | try: 149 | fm_message = load_message( 150 | { 151 | "topic": db_message.topic, 152 | "headers": headers, 153 | "id": db_message.msg_id, 154 | "body": db_message.msg, 155 | } 156 | ) 157 | except ValidationError as e: 158 | try: 159 | # Remove this block after fedora-messaging 3.6.0 and use e.summary 160 | error_msg = e.args[0].summary 161 | except AttributeError: 162 | error_msg = str(e).split("\n")[0] 163 | click.echo( 164 | f"Could not load message {db_message.msg_id} on topic {db_message.topic}: {error_msg}", 165 | err=True, 166 | ) 167 | return None 168 | 169 | return fm_message 170 | 171 | 172 | @main.command("agent") 173 | @click.pass_context 174 | def extract_agent(ctx): 175 | """Go over old messages, extract the agent_name and store it. 176 | 177 | This is useful when a message schema has been added and we want to populate the agent_name 178 | column with the new information. 179 | """ 180 | debug = ctx.obj["options"]["debug"] 181 | query = ctx.obj["query"] 182 | query = query.where( 183 | and_(*[not_(m.Message.topic.like(skipped)) for skipped in AGENT_SKIP_TOPICS]) 184 | ) 185 | query = query.where(m.Message.agent_name.is_(None)) 186 | 187 | for message in iterate_over_messages( 188 | query, ctx.obj["options"]["start"], ctx.obj["options"]["chunk_size"] 189 | ): 190 | fm_message = get_fedora_message(message, force_schema=ctx.obj["options"]["force_schema"]) 191 | if fm_message is None or not fm_message.agent_name: 192 | m.session.expunge(message) 193 | continue 194 | message.agent_name = fm_message.agent_name 195 | if debug: 196 | click.echo( 197 | f"Agent for message {message.msg_id} of topic {message.topic}" 198 | f": {fm_message.agent_name}" 199 | ) 200 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Test & Build 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | - stable 8 | - staging 9 | tags: 10 | - "*" 11 | pull_request: 12 | branches: 13 | - develop 14 | - stable 15 | - staging 16 | 17 | jobs: 18 | 19 | checks: 20 | name: Checks 21 | runs-on: ubuntu-latest 22 | container: fedorapython/fedora-python-tox:latest 23 | steps: 24 | - uses: actions/checkout@v6 25 | 26 | - name: Install pre-commit 27 | run: | 28 | dnf install -y pre-commit git krb5-devel libpq-devel poetry python3-poetry-plugin-export 29 | 30 | - name: Mark the working directory as safe for Git 31 | run: git config --global --add safe.directory $PWD 32 | 33 | - name: Run pre-commit checks 34 | run: pre-commit run -v --all-files 35 | 36 | 37 | licenses: 38 | name: Licenses 39 | runs-on: ubuntu-latest 40 | container: fedorapython/fedora-python-tox:latest 41 | steps: 42 | - uses: actions/checkout@v6 43 | 44 | - name: Install RPM dependencies 45 | run: | 46 | dnf install -y pre-commit git krb5-devel libpq-devel poetry python3-poetry-plugin-export 47 | 48 | - name: Check licenses for datanommer.${{ matrix.package }} 49 | run: tox -e licenses 50 | working-directory: datanommer.${{ matrix.package }} 51 | 52 | strategy: 53 | matrix: 54 | package: 55 | - models 56 | - consumer 57 | - commands 58 | 59 | 60 | unit_tests: 61 | name: Unit tests 62 | runs-on: ubuntu-latest 63 | container: fedorapython/fedora-python-tox:latest 64 | steps: 65 | - uses: actions/checkout@v6 66 | 67 | - name: Install RPM dependencies 68 | run: | 69 | dnf install -y timescaledb postgresql-server pre-commit git krb5-devel libpq-devel poetry python3-poetry-plugin-export 70 | 71 | - name: Run unit tests for datanommer.${{ matrix.package }} 72 | # Don't run the tests as root or pg_ctl will refuse to start 73 | run: | 74 | chown postgres:postgres . 75 | sudo -u postgres tox -e ${{ matrix.pyver }} -- -vv 76 | working-directory: datanommer.${{ matrix.package }} 77 | 78 | strategy: 79 | matrix: 80 | pyver: 81 | - py311 82 | - py312 83 | package: 84 | - models 85 | - consumer 86 | - commands 87 | 88 | 89 | # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ 90 | build: 91 | name: Build distributions 📦 92 | runs-on: ubuntu-latest 93 | needs: 94 | - checks 95 | - licenses 96 | - unit_tests 97 | # outputs: 98 | # release-notes-models: ${{ steps.release-notes.outputs.models }} 99 | # release-notes-consumer: ${{ steps.release-notes.outputs.consumer }} 100 | # release-notes-commands: ${{ steps.release-notes.outputs.commands }} 101 | 102 | steps: 103 | 104 | - uses: actions/checkout@v6 105 | - name: Set up Python 106 | uses: actions/setup-python@v6 107 | with: 108 | python-version: "3.x" 109 | 110 | - name: Install pypa/build 111 | run: python3 -m pip install build --user 112 | - name: Build a binary wheel and a source tarball 113 | run: | 114 | cd datanommer.${{ matrix.package }} 115 | python3 -m build 116 | 117 | - name: Store the distribution packages 118 | uses: actions/upload-artifact@v6 119 | with: 120 | name: python-package-distributions-${{ matrix.package }} 121 | path: datanommer.${{ matrix.package }}/dist/ 122 | if-no-files-found: error 123 | 124 | # - name: Extract changelog section 125 | #  id: extract-changelog 126 | #  uses: sean0x42/markdown-extract@v2 127 | #  with: 128 | #  file: datanommer.${{ matrix.package }}/NEWS.md 129 | #  # pattern: 'Version\s+\[${{ steps.extract-version.outputs.ESCAPED_VERSION }}\]\(.*\)' 130 | #  pattern: 'Version\s+\[[[:word:].-]+\]\(.*\)' 131 | #  no-print-matched-heading: true 132 | # - name: Store the release notes in the output 133 | #  id: release-notes 134 | #  run: | 135 | #  echo '${{ matrix.package }}<> "$GITHUB_OUTPUT" 136 | #  echo "${{ steps.extract-changelog.outputs.markdown }}" >> "$GITHUB_OUTPUT" 137 | #  echo "EOF" >> "$GITHUB_OUTPUT" 138 | # - name: Show the changelog 139 | #  env: 140 | #  CHANGELOG: ${{ steps.extract-changelog.outputs.markdown }} 141 | #  run: echo "$CHANGELOG" 142 | 143 | strategy: 144 | matrix: 145 | package: 146 | - models 147 | - consumer 148 | - commands 149 | 150 | 151 | publish-to-pypi: 152 | name: Publish to PyPI 🚀 153 | if: startsWith(github.ref, 'refs/tags/') && !contains(github.ref, 'rc') # only publish to PyPI on final tag pushes 154 | needs: 155 | - build 156 | runs-on: ubuntu-latest 157 | environment: 158 | name: pypi 159 | url: https://pypi.org/p/datanommer.${{ matrix.package }} 160 | permissions: 161 | id-token: write # IMPORTANT: mandatory for trusted publishing 162 | 163 | steps: 164 | - name: Download all the dists 165 | uses: actions/download-artifact@v7 166 | with: 167 | name: python-package-distributions-${{ matrix.package }} 168 | path: dist-${{ matrix.package }}/ 169 | 170 | - name: Publish distribution to PyPI 171 | uses: pypa/gh-action-pypi-publish@release/v1 172 | with: 173 | packages-dir: dist-${{ matrix.package }}/ 174 | 175 | strategy: 176 | matrix: 177 | package: 178 | - models 179 | - consumer 180 | - commands 181 | 182 | 183 | github-release: 184 | name: Create a GitHub Release 📢 185 | needs: 186 | - publish-to-pypi 187 | - build 188 | runs-on: ubuntu-latest 189 | permissions: 190 | contents: write # IMPORTANT: mandatory for making GitHub Releases 191 | id-token: write # IMPORTANT: mandatory for sigstore 192 | 193 | steps: 194 | - name: Download all the dists 195 | uses: actions/download-artifact@v7 196 | with: 197 | pattern: python-package-distributions-* 198 | path: dist/ 199 | merge-multiple: true 200 | 201 | - name: Sign the dists with Sigstore 202 | uses: sigstore/gh-action-sigstore-python@v3.2.0 203 | with: 204 | inputs: >- 205 | ./dist/*.tar.gz 206 | ./dist/*.whl 207 | 208 | - name: Release 209 | uses: softprops/action-gh-release@v2 210 | with: 211 | draft: true 212 | files: dist/* 213 | fail_on_unmatched_files: true 214 | generate_release_notes: true 215 | # body: | 216 | # ## Models (datanommer.models) 217 | # ${{ needs.build.outputs.release-notes-models }} 218 | 219 | # ## Consumer (datanommer.consumer) 220 | # ${{ needs.build.outputs.release-notes-consumer }} 221 | 222 | # ## Commands (datanommer.commands) 223 | # ${{ needs.build.outputs.release-notes-commands }} 224 | -------------------------------------------------------------------------------- /tools/timescaledb/migrate-to-timescaledb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Migrate the datanommer database from the pre-2021 format to the TimescaleDB-based format. 5 | """ 6 | 7 | from json import dumps, JSONDecodeError, loads 8 | 9 | import click 10 | import toml 11 | from sqlalchemy import ( 12 | Column, 13 | create_engine, 14 | DateTime, 15 | ForeignKey, 16 | func, 17 | Integer, 18 | select, 19 | Table, 20 | UnicodeText, 21 | ) 22 | from sqlalchemy.exc import NoResultFound 23 | from sqlalchemy.orm import declarative_base, relationship, Session 24 | 25 | import datanommer.models as dm 26 | 27 | 28 | CHUNK_SIZE = 5000 29 | 30 | OldBase = declarative_base() 31 | 32 | user_assoc_table = Table( 33 | "user_messages", 34 | OldBase.metadata, 35 | Column("username", UnicodeText, ForeignKey("user.name")), 36 | Column("msg", Integer, ForeignKey("messages.id")), 37 | ) 38 | 39 | pack_assoc_table = Table( 40 | "package_messages", 41 | OldBase.metadata, 42 | Column("package", UnicodeText, ForeignKey("package.name")), 43 | Column("msg", Integer, ForeignKey("messages.id")), 44 | ) 45 | 46 | 47 | class OldMessage(OldBase): 48 | __tablename__ = "messages" 49 | id = Column(Integer, primary_key=True) 50 | msg_id = Column(UnicodeText) 51 | i = Column(Integer) 52 | topic = Column(UnicodeText) 53 | timestamp = Column(DateTime) 54 | certificate = Column(UnicodeText) 55 | signature = Column(UnicodeText) 56 | category = Column(UnicodeText) 57 | username = Column(UnicodeText) 58 | crypto = Column(UnicodeText) 59 | source_name = Column(UnicodeText) 60 | source_version = Column(UnicodeText) 61 | _msg = Column(UnicodeText) 62 | _headers = Column(UnicodeText) 63 | 64 | users = relationship("User", secondary=user_assoc_table, lazy="selectin") 65 | packages = relationship("Package", secondary=pack_assoc_table, lazy="selectin") 66 | 67 | 68 | class User(OldBase): 69 | __tablename__ = "user" 70 | 71 | name = Column(UnicodeText, primary_key=True) 72 | 73 | 74 | class Package(OldBase): 75 | __tablename__ = "package" 76 | 77 | name = Column(UnicodeText, primary_key=True) 78 | 79 | 80 | def import_message(message): 81 | msg = message._msg.replace("\\u0000", "") 82 | try: 83 | msg = loads(msg) 84 | except JSONDecodeError: 85 | click.echo(f"Can't decode json in message {message.msg_id} ({message.timestamp})") 86 | with open("failed.log", "a") as failedlog: 87 | failedlog.write( 88 | dumps( 89 | { 90 | "id": message.id, 91 | "msg_id": message.msg_id, 92 | "timestamp": message.timestamp.isoformat(), 93 | "topic": message.topic, 94 | "msg": message._msg, 95 | } 96 | ) 97 | ) 98 | failedlog.write("\n") 99 | return 100 | if not msg: 101 | with open("failed.log", "a") as failedlog: 102 | failedlog.write( 103 | dumps( 104 | { 105 | "id": message.id, 106 | "msg_id": message.msg_id, 107 | "timestamp": message.timestamp.isoformat(), 108 | "topic": message.topic, 109 | "msg": repr(message._msg), 110 | } 111 | ) 112 | ) 113 | failedlog.write("\n") 114 | return 115 | headers = message._headers 116 | if headers is not None: 117 | headers = headers.replace("\\u0000", "") 118 | headers = loads(headers) 119 | dm.Message.create( 120 | i=message.i, 121 | msg_id=message.msg_id, 122 | topic=message.topic, 123 | timestamp=message.timestamp, 124 | username=message.username, 125 | crypto=message.crypto, 126 | certificate=message.certificate, 127 | signature=message.signature, 128 | msg=msg, 129 | headers=headers, 130 | users=[u.name for u in message.users], 131 | packages=[p.name for p in message.packages], 132 | ) 133 | 134 | 135 | # https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery 136 | def windowed_query(q, column, windowsize): 137 | """Break a Query into chunks on a given column.""" 138 | 139 | single_entity = q.is_single_entity 140 | q = q.add_columns(column).order_by(column) 141 | last_id = None 142 | 143 | while True: 144 | subq = q 145 | if last_id is not None: 146 | subq = subq.where(column > last_id) 147 | chunk = subq.limit(windowsize).all() 148 | if not chunk: 149 | break 150 | last_id = chunk[-1][-1] 151 | for row in chunk: 152 | if single_entity: 153 | yield row[0] 154 | else: 155 | yield row[0:-1] 156 | 157 | 158 | @click.command() 159 | @click.option( 160 | "config_path", 161 | "-c", 162 | "--config", 163 | type=click.Path(), 164 | default="migrate.toml", 165 | show_default=True, 166 | ) 167 | @click.option( 168 | "since", 169 | "-s", 170 | "--since", 171 | type=click.DateTime(), 172 | ) 173 | def main(config_path, since): 174 | config = toml.load(config_path) 175 | dm.init(config["dest_url"], create=True) 176 | src_engine = create_engine(config["source_url"], future=True) 177 | 178 | with Session(src_engine) as src_db: 179 | click.echo("Querying messages...") 180 | old_messages = src_db.scalars(select(OldMessage).order_by(OldMessage.id)) 181 | latest = dm.session.scalars( 182 | select(dm.Message).order_by(dm.Message.id.desc()).limit(1) 183 | ).first() 184 | if latest: 185 | try: 186 | latest_in_src = src_db.execute( 187 | select(OldMessage).where(OldMessage.msg_id == latest.msg_id) 188 | ).scalar_one() 189 | except NoResultFound: 190 | latest_in_src = src_db.execute( 191 | select(OldMessage) 192 | .where(OldMessage.timestamp == latest.timestamp) 193 | .where(OldMessage.topic == latest.topic) 194 | ).scalar_one() 195 | old_messages = old_messages.where(OldMessage.id > latest_in_src.id) 196 | click.echo(f"Resuming from message {latest.msg_id}") 197 | if since: 198 | old_messages = old_messages.where(OldMessage.timestamp > since) 199 | click.echo(f"Only importing messages after {since}") 200 | total = old_messages.count() 201 | with click.progressbar( 202 | length=total, 203 | label=f"Importing {total} messages", 204 | item_show_func=lambda m: m.timestamp.strftime("%Y-%m") if m else "", 205 | # item_show_func=lambda m: m.msg_id if m else "", 206 | ) as bar: 207 | for old_message in windowed_query(old_messages, OldMessage.id, CHUNK_SIZE): 208 | import_message(old_message) 209 | # Commit periodically 210 | if bar._completed_intervals % 1000 == 0: 211 | dm.session.commit() 212 | else: 213 | dm.session.flush() 214 | bar.update(1, old_message) 215 | dm.session.commit() 216 | # Verify counts 217 | click.echo(f"Messages in the old DB: {src_db.scalar(select(func.count(OldMessage.id)))}") 218 | click.echo( 219 | f"Messages in the new DB: {dm.session.scalar(select(func.count(dm.Message.id)))}" 220 | ) 221 | 222 | 223 | if __name__ == "__main__": 224 | main() 225 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Thanks for considering contributing to datanommer, we really appreciate it! 6 | 7 | Quickstart: 8 | 9 | 1. Look for an `existing issue 10 | `_ about the bug or 11 | feature you're interested in. If you can't find an existing issue, create a 12 | `new one `_. 13 | 14 | 2. Fork the `repository on GitHub 15 | `_. 16 | 17 | 3. Fix the bug or add the feature, and then write one or more tests which show 18 | the bug is fixed or the feature works. 19 | 20 | 4. Submit a pull request and wait for a maintainer to review it. 21 | 22 | More detailed guidelines to help ensure your submission goes smoothly are 23 | below. 24 | 25 | .. note:: If you do not wish to use GitHub, please send patches to 26 | infrastructure@lists.fedoraproject.org. 27 | 28 | 29 | Development Environment 30 | ======================= 31 | 32 | Vagrant allows contributors to get quickly up and running with a datanommer 33 | development environment by automatically configuring a virtual machine. This 34 | virtual machine also includes a running datanommer service to make it easy to 35 | test your changes. 36 | 37 | The datanommer Vagrant environment is configured to be empty when first 38 | provisioned, but to consume messages from the stage Fedora Messaging queue. 39 | 40 | To get started, first install the Vagrant and Virtualization 41 | packages needed, and start the libvirt service:: 42 | 43 | $ sudo dnf install ansible libvirt vagrant-libvirt vagrant-sshfs vagrant-hostmanager 44 | $ sudo systemctl enable libvirtd 45 | $ sudo systemctl start libvirtd 46 | 47 | Check out the code and run ``vagrant up``:: 48 | 49 | $ git clone https://github.com/fedora-infra/datanommer 50 | $ cd datanommer 51 | $ vagrant up 52 | 53 | Next, SSH into your newly provisioned development environment:: 54 | 55 | $ vagrant ssh 56 | 57 | The vagrant setup also defines 4 handy commands to interact with the datanommer 58 | consumer:: 59 | 60 | $ datanommer-consumer-start 61 | $ datanommer-consumer-stop 62 | $ datanommer-consumer-restart 63 | $ datanommer-consumer-logs 64 | 65 | Note also, that the commands provided by datanommer.commands are also available 66 | to interact with the datanommer database:: 67 | 68 | $ datanommer-dump 69 | $ datanommer-latest 70 | $ datanommer-stats 71 | $ datanommer-create-db 72 | $ datanommer-refresh-view 73 | 74 | 75 | Guidelines 76 | ========== 77 | 78 | Python Support 79 | -------------- 80 | datanommer supports Python 3.7 or greater. This is automatically enforced by the 81 | continuous integration (CI) suite. 82 | 83 | 84 | Code Style 85 | ---------- 86 | We follow the `PEP8 `_ style guide 87 | for Python. This is automatically enforced by the CI suite. 88 | 89 | We are using `Black ` to automatically format 90 | the source code. It is also checked in CI. The Black webpage contains 91 | instructions to configure your editor to run it on the files you edit. 92 | 93 | 94 | Tests 95 | ----- 96 | Datanommer is comprised of 3 seperate modules in this single repository. There 97 | is top-level `Tox `_ file to run the tests on all 3 98 | modules:: 99 | 100 | $ tox 101 | 102 | However, tests can also be run on a single module by invotking tox in that 103 | modules' directory. For example:: 104 | 105 | $ cd datanommer.models/ 106 | $ tox 107 | 108 | Note, that the tests use virtual environments that are not created from scratch 109 | with every subsequent run of the tests. Therefore, *when changes happen to 110 | dependencies, the tests may fail to run correctly*. To recreate the virtual 111 | envrionments, run the tests commands with the ``-r`` flag, for example:: 112 | 113 | $ tox -r 114 | 115 | or:: 116 | 117 | $ cd datanommer.models/ 118 | $ tox -r 119 | 120 | All code must have test coverage or be explicitly marked as not covered using 121 | the ``# pragma: no cover`` comment. This should only be done if there is a good 122 | reason to not write tests. 123 | 124 | Your pull request should contain tests for your new feature or bug fix. If 125 | you're not certain how to write tests, we will be happy to help you. 126 | 127 | 128 | Pre-commit 129 | ---------- 130 | We use the pre-commit framework to run tests defined in pre-commit-config.yaml to ensure 131 | that the code is up to the best industry standards prior to submitting a pull request. 132 | 133 | Pre-commit can be installed as a git hook by running ``pre-commit install`` 134 | 135 | 136 | Release Notes 137 | ------------- 138 | 139 | To add entries to the release notes, create a file in the ``news`` directory in the 140 | ``source.type`` name format, where the ``source`` part of the filename is: 141 | 142 | * ``42`` when the change is described in issue ``42`` 143 | * ``PR42`` when the change has been implemented in pull request ``42``, and 144 | there is no associated issue 145 | * ``Cabcdef`` when the change has been implemented in changeset ``abcdef``, and 146 | there is no associated issue or pull request. 147 | 148 | And where the extension ``type`` is one of: 149 | 150 | * ``bic``: for backwards incompatible changes 151 | * ``dependency``: for dependency changes 152 | * ``feature``: for new features 153 | * ``bug``: for bug fixes 154 | * ``dev``: for development improvements 155 | * ``docs``: for documentation improvements 156 | * ``other``: for other changes 157 | 158 | The content of the file will end up in the release notes. It should not end with a ``.`` 159 | (full stop). 160 | 161 | If it is not present already, add a file in the ``news`` directory named ``username.author`` 162 | where ``username`` is the first part of your commit's email address, and containing the name 163 | you want to be credited as. There is a script to generate a list of authors that we run 164 | before releasing, but creating the file manually allows you to set a custom name. 165 | 166 | A preview of the release notes can be generated with 167 | ``towncrier build --draft``. 168 | 169 | 170 | Licensing 171 | --------- 172 | 173 | Your commit messages must include a Signed-off-by tag with your name and e-mail 174 | address, indicating that you agree to the `Developer Certificate of Origin 175 | `_ version 1.1:: 176 | 177 | Developer Certificate of Origin 178 | Version 1.1 179 | 180 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 181 | 1 Letterman Drive 182 | Suite D4700 183 | San Francisco, CA, 94129 184 | 185 | Everyone is permitted to copy and distribute verbatim copies of this 186 | license document, but changing it is not allowed. 187 | 188 | 189 | Developer's Certificate of Origin 1.1 190 | 191 | By making a contribution to this project, I certify that: 192 | 193 | (a) The contribution was created in whole or in part by me and I 194 | have the right to submit it under the open source license 195 | indicated in the file; or 196 | 197 | (b) The contribution is based upon previous work that, to the best 198 | of my knowledge, is covered under an appropriate open source 199 | license and I have the right under that license to submit that 200 | work with modifications, whether created in whole or in part 201 | by me, under the same open source license (unless I am 202 | permitted to submit under a different license), as indicated 203 | in the file; or 204 | 205 | (c) The contribution was provided directly to me by some other 206 | person who certified (a), (b) or (c) and I have not modified 207 | it. 208 | 209 | (d) I understand and agree that this project and the contribution 210 | are public and that a record of the contribution (including all 211 | personal information I submit with it, including my sign-off) is 212 | maintained indefinitely and may be redistributed consistent with 213 | this project or the open source license(s) involved. 214 | 215 | Use ``git commit -s`` to add the Signed-off-by tag. 216 | 217 | 218 | Releasing 219 | --------- 220 | 221 | When cutting a new release, follow these steps: 222 | 223 | #. Update the version in ``pyproject.toml`` 224 | #. Run ``poetry install`` to update the version in the metadata 225 | #. Add missing authors to the release notes fragments by changing to the ``news`` directory and 226 | running the ``../tools/towncrier/get-authors.py`` script, but check for duplicates and errors 227 | #. Generate the release notes by running ``poetry run towncrier`` (in the base directory) 228 | #. Adjust the release notes in ``NEWS.rst`` 229 | #. Commit the changes 230 | #. Push the commit to the upstream Github repository (via a PR or not). 231 | #. Change to the stable branch and cherry-pick the commit (or merge if appropriate) 232 | #. Run the checks one last time to be sure: ``tox``, 233 | #. Tag the commit with ``-s`` to generate a signed tag 234 | #. Push the commit to the upstream Github repository with ``git push``, 235 | and the new tag with ``git push --tags`` 236 | #. Generate a tarball and push to PyPI with the command ``poetry publish --build`` 237 | #. Create `the release on GitHub `_ and copy the 238 | release notes in there, 239 | #. Deploy and announce. 240 | -------------------------------------------------------------------------------- /datanommer.commands/tests/test_extract_users.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import io 3 | from unittest.mock import Mock 4 | 5 | import pytest 6 | import sqlalchemy as sa 7 | from click import progressbar 8 | from click.testing import CliRunner 9 | 10 | import datanommer.models as m 11 | from datanommer.commands.extract_users import main as extract_users 12 | 13 | from .utils import generate_bodhi_update_complete_message, generate_message 14 | 15 | 16 | @pytest.fixture 17 | def bodhi_message_db(datanommer_models): 18 | msg = generate_bodhi_update_complete_message() 19 | m.add(msg) 20 | m.session.execute(m.users_assoc_table.delete()) 21 | msg_in_db = m.Message.from_msg_id(msg.id) 22 | msg_in_db.agent_name = None 23 | m.session.commit() 24 | 25 | m.session.refresh(msg_in_db) 26 | assert len(msg_in_db.users) == 0 27 | assert msg_in_db.agent_name is None 28 | return msg_in_db 29 | 30 | 31 | @pytest.fixture(autouse=True) 32 | def no_expunge(datanommer_models, monkeypatch): 33 | monkeypatch.setattr(m.session, "expunge_all", Mock(name="expunge_all")) 34 | monkeypatch.setattr(m.session, "expunge", Mock(name="expunge")) 35 | 36 | 37 | def test_extract_users(bodhi_message_db, mock_config, mock_init): 38 | runner = CliRunner() 39 | result = runner.invoke(extract_users, ["--debug", "usernames"]) 40 | 41 | assert result.exit_code == 0, result.output 42 | expected_output = ( 43 | "Counting messages...\n" 44 | "Considering 1 message\n\n" 45 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 46 | f"Usernames for message {bodhi_message_db.msg_id} of topic {bodhi_message_db.topic}: " 47 | "dudemcpants, ryanlerch\n" 48 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 49 | ) 50 | assert result.output == expected_output 51 | 52 | m.session.refresh(bodhi_message_db) 53 | assert len(bodhi_message_db.users) > 0 54 | assert {u.name for u in bodhi_message_db.users} == {"dudemcpants", "ryanlerch"} 55 | 56 | 57 | def test_extract_users_topic(bodhi_message_db, mock_config, mock_init): 58 | runner = CliRunner() 59 | result = runner.invoke( 60 | extract_users, ["--topic", "org.fedoraproject.stg.bodhi.update.comment", "usernames"] 61 | ) 62 | 63 | assert result.exit_code == 0, result.output 64 | 65 | m.session.refresh(bodhi_message_db) 66 | assert len(bodhi_message_db.users) > 0 67 | assert {u.name for u in bodhi_message_db.users} == {"dudemcpants", "ryanlerch"} 68 | 69 | 70 | def test_extract_users_wrong_topic(bodhi_message_db, mock_config, mock_init): 71 | runner = CliRunner() 72 | result = runner.invoke(extract_users, ["--topic", "something.else", "usernames"]) 73 | 74 | assert result.exit_code == 0, result.output 75 | 76 | m.session.refresh(bodhi_message_db) 77 | assert len(bodhi_message_db.users) == 0 78 | 79 | 80 | def test_extract_users_category(bodhi_message_db, mock_config, mock_init): 81 | runner = CliRunner() 82 | result = runner.invoke(extract_users, ["--category", "bodhi", "usernames"]) 83 | 84 | assert result.exit_code == 0, result.output 85 | 86 | m.session.refresh(bodhi_message_db) 87 | assert len(bodhi_message_db.users) > 0 88 | assert {u.name for u in bodhi_message_db.users} == {"dudemcpants", "ryanlerch"} 89 | 90 | 91 | def test_extract_users_wrong_category(bodhi_message_db, mock_config, mock_init): 92 | runner = CliRunner() 93 | result = runner.invoke(extract_users, ["--category", "git", "usernames"]) 94 | 95 | assert result.exit_code == 0, result.output 96 | 97 | m.session.refresh(bodhi_message_db) 98 | assert len(bodhi_message_db.users) == 0 99 | 100 | 101 | def test_extract_users_topic_and_category(mock_config, mock_init): 102 | runner = CliRunner() 103 | result = runner.invoke( 104 | extract_users, ["--category", "bodhi", "--topic", "some.topic", "usernames"] 105 | ) 106 | assert result.exit_code != 0, result.output 107 | assert "Error: can't use both --topic and --category, choose one." in result.output 108 | 109 | 110 | def test_extract_users_skipped_topic(bodhi_message_db, mock_config, mock_init): 111 | bodhi_message_db.topic = "org.release-monitoring.prod.anitya.project.version.update" 112 | m.session.commit() 113 | 114 | runner = CliRunner() 115 | result = runner.invoke(extract_users, ["usernames"]) 116 | 117 | assert result.exit_code == 0, result.output 118 | 119 | m.session.refresh(bodhi_message_db) 120 | assert len(bodhi_message_db.users) == 0 121 | 122 | 123 | def test_extract_users_no_users(datanommer_models, mock_config, mock_init): 124 | msg = generate_message() 125 | # change the schema header or the script won't pick it up 126 | msg._headers["fedora_messaging_schema"] = "testing" 127 | m.add(msg) 128 | runner = CliRunner() 129 | result = runner.invoke(extract_users, ["usernames"]) 130 | 131 | assert result.exit_code == 0, result.output 132 | users_count = m.session.scalar(sa.select(sa.func.count(m.users_assoc_table.c.msg_id))) 133 | assert users_count == 0 134 | start = datetime.datetime.fromisoformat(msg._headers["sent-at"]).astimezone() 135 | start = str(start).split("+")[0] 136 | assert result.output == ( 137 | "Counting messages...\n" 138 | "Considering 1 message\n\n" 139 | f"Working on 10000 messages sent after {start}\n" 140 | f"Working on 10000 messages sent after {start}\n" 141 | ) 142 | 143 | 144 | def test_extract_start(datanommer_models, mock_config, mock_init): 145 | now = datetime.datetime.now(tz=datetime.UTC) 146 | msg = generate_bodhi_update_complete_message() 147 | # Set the message to have happenned 3 days ago 148 | msg._properties.headers["sent-at"] = (now - datetime.timedelta(days=3)).isoformat() 149 | m.add(msg) 150 | m.session.execute(m.users_assoc_table.delete()) 151 | m.session.commit() 152 | 153 | runner = CliRunner() 154 | # Only look at messages from yesterday on 155 | result = runner.invoke( 156 | extract_users, 157 | ["--start", (now - datetime.timedelta(days=1)).strftime(r"%Y-%m-%d"), "usernames"], 158 | ) 159 | 160 | assert result.exit_code == 0, result.output 161 | # Message must not have had users set 162 | users_count = m.session.scalar(sa.select(sa.func.count(m.users_assoc_table.c.msg_id))) 163 | assert users_count == 0 164 | assert result.output == "Counting messages...\nNo messages matched.\n" 165 | 166 | 167 | def test_extract_end(bodhi_message_db, mock_config, mock_init): 168 | now = datetime.datetime.now() 169 | runner = CliRunner() 170 | # Only look at messages from yesterday on 171 | result = runner.invoke( 172 | extract_users, 173 | ["--end", (now - datetime.timedelta(days=1)).strftime(r"%Y-%m-%d"), "usernames"], 174 | ) 175 | 176 | assert result.exit_code == 0, result.output 177 | # Message must not have had users set 178 | users_count = m.session.scalar(sa.select(sa.func.count(m.users_assoc_table.c.msg_id))) 179 | assert users_count == 0 180 | assert result.output == "Counting messages...\nNo messages matched.\n" 181 | 182 | 183 | def test_extract_force_schema(bodhi_message_db, mock_config, mock_init): 184 | runner = CliRunner() 185 | result = runner.invoke(extract_users, ["--force-schema", "base.message", "usernames"]) 186 | 187 | assert result.exit_code == 0, result.output 188 | 189 | m.session.refresh(bodhi_message_db) 190 | assert len(bodhi_message_db.users) == 0 191 | 192 | 193 | def test_extract_invalid_message(bodhi_message_db, mock_config, mock_init): 194 | bodhi_message_db.msg = "this is invalid" 195 | m.session.commit() 196 | 197 | runner = CliRunner() 198 | result = runner.invoke(extract_users, ["usernames"]) 199 | 200 | assert result.exit_code == 0, result.output 201 | assert result.output == ( 202 | "Counting messages...\n" 203 | "Considering 1 message\n\n" 204 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 205 | f"Could not load message {bodhi_message_db.msg_id} on topic " 206 | f"{bodhi_message_db.topic}: 'this is invalid' is not of type 'object'\n" 207 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 208 | ) 209 | 210 | m.session.refresh(bodhi_message_db) 211 | assert len(bodhi_message_db.users) == 0 212 | 213 | 214 | def test_extract_agent(bodhi_message_db, mock_config, mock_init): 215 | runner = CliRunner() 216 | result = runner.invoke(extract_users, ["agent"]) 217 | 218 | assert result.exit_code == 0, result.output 219 | assert result.output == ( 220 | "Counting messages...\n" 221 | "Considering 1 message\n\n" 222 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 223 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 224 | ) 225 | m.session.refresh(bodhi_message_db) 226 | assert bodhi_message_db.agent_name == "dudemcpants" 227 | 228 | 229 | def test_extract_agent_with(bodhi_message_db, mock_config, mock_init): 230 | runner = CliRunner() 231 | result = runner.invoke(extract_users, ["--debug", "agent"]) 232 | 233 | assert result.exit_code == 0, result.output 234 | expected_output = ( 235 | "Counting messages...\n" 236 | "Considering 1 message\n\n" 237 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 238 | f"Agent for message {bodhi_message_db.msg_id} of topic {bodhi_message_db.topic}: " 239 | "dudemcpants\n" 240 | f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n" 241 | ) 242 | assert result.output == expected_output 243 | 244 | 245 | def test_extract_agent_no_users(datanommer_models, mock_config, mock_init): 246 | msg = generate_message() 247 | # change the schema header or the script won't pick it up 248 | msg._headers["fedora_messaging_schema"] = "testing" 249 | m.add(msg) 250 | runner = CliRunner() 251 | result = runner.invoke(extract_users, ["agent"]) 252 | 253 | assert result.exit_code == 0, result.output 254 | msg_in_db = m.Message.from_msg_id(msg.id) 255 | assert msg_in_db.agent_name is None 256 | assert result.output == ( 257 | "Counting messages...\n" 258 | "Considering 1 message\n\n" 259 | f"Working on 10000 messages sent after {msg_in_db.timestamp}\n" 260 | f"Working on 10000 messages sent after {msg_in_db.timestamp}\n" 261 | ) 262 | 263 | 264 | def test_extract_is_tty(bodhi_message_db, mock_config, mock_init, mocker): 265 | output = io.StringIO() 266 | mocker.patch.object(output, "isatty", lambda: True) 267 | mocker.patch( 268 | "datanommer.commands.utils.click.progressbar", lambda **kw: progressbar(file=output, **kw) 269 | ) 270 | runner = CliRunner() 271 | result = runner.invoke(extract_users, ["--debug", "usernames"]) 272 | 273 | assert result.exit_code == 0, result.output 274 | expected_output = ( 275 | "Counting messages...\n" 276 | "Considering 1 message\n" 277 | f"Usernames for message {bodhi_message_db.msg_id} of topic {bodhi_message_db.topic}: " 278 | "dudemcpants, ryanlerch\n" 279 | ) 280 | assert result.output == expected_output 281 | -------------------------------------------------------------------------------- /datanommer.commands/datanommer/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | import importlib.metadata 17 | import itertools 18 | import json 19 | import logging 20 | import time 21 | from datetime import datetime, timedelta, timezone 22 | 23 | import click 24 | from sqlalchemy import func, select 25 | 26 | import datanommer.models as m 27 | from datanommer.models.view import refresh_recent_topics 28 | 29 | from .utils import config_option, get_config 30 | 31 | 32 | __version__ = importlib.metadata.version("datanommer-commands") 33 | 34 | log = logging.getLogger("datanommer") 35 | 36 | 37 | @click.command() 38 | @config_option 39 | def create(config_path): 40 | """Create a database and tables for 'datanommer.sqlalchemy.url'""" 41 | config = get_config(config_path) 42 | click.echo("Creating Datanommer database and tables") 43 | m.init( 44 | config["datanommer_sqlalchemy_url"], 45 | alembic_ini=config["alembic_ini"], 46 | create=True, 47 | ) 48 | 49 | 50 | @click.command() 51 | @config_option 52 | @click.option("--since", default=None, help="Only after datetime, ex 2013-02-14T08:05:59.87") 53 | @click.option("--before", default=None, help="Only before datetime, ex 2013-02-14T08:05:59.87") 54 | def dump(config_path, since, before): 55 | """Dump the contents of the datanommer database as JSON. 56 | 57 | You can also specify a timespan with the --since and --before arguments: 58 | 59 | $ datanommer-dump --before 2013-02-15 --since 2013-02-11T08:00:00 > datanommer-dump.json 60 | """ 61 | config = get_config(config_path) 62 | m.init( 63 | config["datanommer_sqlalchemy_url"], 64 | alembic_ini=config["alembic_ini"], 65 | ) 66 | 67 | query = select(m.Message) 68 | if before: 69 | try: 70 | before = datetime.fromisoformat(before) 71 | except ValueError as e: 72 | raise click.ClickException("Invalid date format") from e 73 | 74 | query = query.where(m.Message.timestamp <= before) 75 | 76 | if since: 77 | try: 78 | since = datetime.fromisoformat(since) 79 | except ValueError as e: 80 | raise click.ClickException("Invalid date format") from e 81 | 82 | query = query.where(m.Message.timestamp >= since) 83 | 84 | results = [json.dumps(msg.as_fedora_message_dict()) for msg in m.session.scalars(query)] 85 | click.echo(f"[{','.join(results)}]") 86 | 87 | 88 | @click.command() 89 | @config_option 90 | @click.option("--topic", is_flag=True, help="Shows the stats per topic") 91 | @click.option( 92 | "--category", 93 | default=None, 94 | help="Shows the stats within only the specified category", 95 | ) 96 | def stats(config_path, topic, category): 97 | """Produce stats on the contents of the datanommer database. 98 | 99 | The default is to display the stats per category. You can also display 100 | the stats per topic with the --topic argument: 101 | 102 | $ datanommer-stats --topic 103 | org.fedoraproject.stg.fas.group.member.remove has 10 entries 104 | org.fedoraproject.stg.logger.log has 76 entries 105 | org.fedoraproject.stg.bodhi.update.comment has 5 entries 106 | org.fedoraproject.stg.busmon.colorized-messages has 10 entries 107 | org.fedoraproject.stg.fas.user.update has 10 entries 108 | org.fedoraproject.stg.wiki.article.edit has 106 entries 109 | org.fedoraproject.stg.fas.user.create has 3 entries 110 | org.fedoraproject.stg.bodhitest.testing has 4 entries 111 | org.fedoraproject.stg.fedoratagger.tag.create has 9 entries 112 | org.fedoraproject.stg.fedoratagger.user.rank.update has 5 entries 113 | org.fedoraproject.stg.wiki.upload.complete has 1 entries 114 | org.fedoraproject.stg.fas.group.member.sponsor has 6 entries 115 | org.fedoraproject.stg.fedoratagger.tag.update has 1 entries 116 | org.fedoraproject.stg.fas.group.member.apply has 17 entries 117 | org.fedoraproject.stg.__main__.testing has 1 entries 118 | 119 | The --category argument can be combined with --topic to shows stats of the 120 | topics with a specific category or can be used alone to show the stats for 121 | only the one category: 122 | 123 | $ datanommer-stats --topic --category fas 124 | org.fedoraproject.stg.fas.group.member.remove has 10 entries 125 | org.fedoraproject.stg.fas.user.update has 10 entries 126 | org.fedoraproject.stg.fas.user.create has 3 entries 127 | org.fedoraproject.stg.fas.group.member.sponsor has 6 entries 128 | org.fedoraproject.stg.fas.group.member.apply has 17 entries 129 | 130 | $ datanommmer-stats --category fas 131 | fas has 46 entries 132 | 133 | """ 134 | config = get_config(config_path) 135 | m.init( 136 | config["datanommer_sqlalchemy_url"], 137 | alembic_ini=config["alembic_ini"], 138 | ) 139 | 140 | if topic: 141 | query = select(m.Message.topic, func.count(m.Message.topic)) 142 | if category: 143 | query = query.where(m.Message.category == category) 144 | query = query.group_by(m.Message.topic) 145 | else: 146 | query = select(m.Message.category, func.count(m.Message.category)) 147 | if category: 148 | query = query.where(m.Message.category == category) 149 | query = query.group_by(m.Message.category) 150 | 151 | results = m.session.execute(query).all() 152 | 153 | if topic: 154 | for topic, count in results: 155 | click.echo(f"{topic} has {count} entries") 156 | else: 157 | for category, count in results: 158 | click.echo(f"{category} has {count} entries") 159 | 160 | 161 | @click.command() 162 | @config_option 163 | @click.option("--topic", default=None, help="Show the latest for only a specific topic.") 164 | @click.option("--category", default=None, help="Show the latest for only a specific category.") 165 | @click.option( 166 | "--overall", 167 | is_flag=True, 168 | help="Show only the latest message out of all message types.", 169 | ) 170 | @click.option("--timestamp", is_flag=True, help="Show only the timestamp of the message(s).") 171 | @click.option( 172 | "--timesince", 173 | is_flag=True, 174 | help="Show the number of seconds since the last message", 175 | ) 176 | @click.option( 177 | "--human", 178 | is_flag=True, 179 | help="When combined with --timestamp or --timesince,show a human readable date.", 180 | ) 181 | def latest(config_path, topic, category, overall, timestamp, timesince, human): 182 | """Print the latest message(s) ingested by datanommer. 183 | 184 | The default is to display the latest message in each message category. The 185 | latest in only a specified category or topic can also be returned:: 186 | 187 | $ datanommer-latest --category bodhi 188 | [{"bodhi": { 189 | "topic": "org.fedoraproject.stg.bodhi.update.comment", 190 | "msg": { 191 | "comment": { 192 | "group": null, 193 | "author": "ralph", 194 | "text": "Testing for latest datanommer.", 195 | "karma": 0, 196 | "anonymous": false, 197 | "timestamp": 1360349639.0, 198 | "update_title": "xmonad-0.10-10.fc17" 199 | }, 200 | "agent": "ralph" 201 | }, 202 | }}] 203 | 204 | $ datanommer-latest --topic org.fedoraproject.stg.bodhi.update.comment 205 | [{"bodhi": { 206 | "topic": "org.fedoraproject.stg.bodhi.update.comment", 207 | "msg": { 208 | "comment": { 209 | "group": null, 210 | "author": "ralph", 211 | "text": "Testing for latest datanommer.", 212 | "karma": 0, 213 | "anonymous": false, 214 | "timestamp": 1360349639.0, 215 | "update_title": "xmonad-0.10-10.fc17" 216 | }, 217 | "agent": "ralph" 218 | }, 219 | }}] 220 | 221 | Or to display the latest, regardless of the topic or category:: 222 | 223 | $ datanommer-latest --overall 224 | [{"bodhi": { 225 | "topic": "org.fedoraproject.stg.bodhi.update.comment", 226 | "msg": { 227 | "comment": { 228 | "group": null, 229 | "author": "ralph", 230 | "text": "Testing for latest datanommer.", 231 | "karma": 0, 232 | "anonymous": false, 233 | "timestamp": 1360349639.0, 234 | "update_title": "xmonad-0.10-10.fc17" 235 | }, 236 | "agent": "ralph" 237 | }, 238 | }}] 239 | 240 | You can combine either a --topic, --category or --overall argument while 241 | requesting information about the timestamp of the latest:: 242 | 243 | $ datanommer-latest --category wiki --timestamp 244 | [1361166918.0] 245 | 246 | # February 18, 2013 at 5:55AM 247 | $ datanommer-latest --category wiki --timestamp --human 248 | ["2013-02-18 05:55:18"] 249 | 250 | Or how recent that timestamp is:: 251 | 252 | # 49250 seconds ago 253 | $ datanommer-latest --category wiki --timesince 254 | [49250] 255 | 256 | # 13 hours, 40 minutes, 59.52 seconds ago 257 | $ datanommer-latest --category wiki --timesince --human 258 | [13:40:59.519447] 259 | """ 260 | config = get_config(config_path) 261 | m.init( 262 | config["datanommer_sqlalchemy_url"], 263 | alembic_ini=config["alembic_ini"], 264 | ) 265 | 266 | if topic: 267 | queries = [select(m.Message).where(m.Message.topic == topic)] 268 | 269 | elif category: 270 | queries = [select(m.Message).where(m.Message.category == category)] 271 | elif not overall: 272 | # If no args.. 273 | categories_query = select(m.Message.category).distinct().order_by(m.Message.category) 274 | categories = m.session.scalars(categories_query) 275 | queries = [ 276 | select(m.Message).where(m.Message.category == category) for category in categories 277 | ] 278 | else: 279 | # Show only the single latest message, regardless of type. 280 | queries = [select(m.Message)] 281 | 282 | # Only check messages from the last year to speed up queries 283 | a_year = timedelta(days=365) 284 | earliest = datetime.now(tz=timezone.utc) - a_year 285 | queries = [q.where(m.Message.timestamp > earliest) for q in queries] 286 | 287 | # Order and limit to the latest. 288 | queries = [q.order_by(m.Message.timestamp.desc()).limit(1) for q in queries] 289 | 290 | def formatter(key, val): 291 | if timestamp and human: 292 | return json.dumps(str(val.timestamp)) 293 | elif timestamp: 294 | return json.dumps(time.mktime(val.timestamp.timetuple())) 295 | elif timesince and human: 296 | return json.dumps(str(datetime.now() - val.timestamp)) 297 | elif timesince: 298 | timedelta = datetime.now() - val.timestamp 299 | return json.dumps(str((timedelta.days * 86400) + timedelta.seconds)) 300 | else: 301 | return f'{{"{key}": {json.dumps(val.as_fedora_message_dict())}}}' 302 | 303 | results = [] 304 | for result in itertools.chain.from_iterable(m.session.scalars(query) for query in queries): 305 | results.append(formatter(result.category, result)) 306 | 307 | click.echo(f"[{','.join(results)}]") 308 | 309 | 310 | @click.command() 311 | @config_option 312 | def refresh_view(config_path): 313 | """Refresh the materialized view `recent_topics`. 314 | 315 | This command should be run periodically via cron job to keep 316 | the materialized view `recent_topics` up to date. 317 | """ 318 | config = get_config(config_path) 319 | m.init( 320 | config["datanommer_sqlalchemy_url"], 321 | alembic_ini=config["alembic_ini"], 322 | ) 323 | 324 | refresh_recent_topics(m.session) 325 | 326 | click.echo("Recent topics materialized view refreshed successfully") 327 | -------------------------------------------------------------------------------- /datanommer.commands/tests/test_commands.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | import json 17 | import time 18 | from datetime import datetime, timedelta 19 | 20 | import pytest 21 | from click import ClickException 22 | from click.testing import CliRunner 23 | 24 | import datanommer.commands 25 | import datanommer.models as m 26 | 27 | from .utils import generate_bodhi_update_complete_message, generate_message 28 | 29 | 30 | def test_get_datanommer_sqlalchemy_url_keyerror(mocker): 31 | mocker.patch.dict( 32 | datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"], 33 | {}, 34 | clear=True, 35 | ) 36 | with pytest.raises(ClickException): 37 | datanommer.commands.get_config() 38 | 39 | 40 | def test_get_datanommer_sqlalchemy_url_config(mocker): 41 | conf = { 42 | "datanommer_sqlalchemy_url": "", 43 | "alembic_ini": "/some/where", 44 | } 45 | mocker.patch.dict( 46 | datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"], conf 47 | ) 48 | load_config = mocker.patch( 49 | "datanommer.commands.utils.fedora_messaging_config.conf.load_config", 50 | ) 51 | datanommer.commands.get_config("some-path") 52 | load_config.assert_called_with("some-path") 53 | 54 | 55 | def test_create(mocker): 56 | mock_model_init = mocker.patch("datanommer.commands.m.init") 57 | mocker.patch.dict( 58 | datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"], 59 | { 60 | "datanommer_sqlalchemy_url": "TESTURL", 61 | "alembic_ini": "/some/where", 62 | }, 63 | ) 64 | 65 | runner = CliRunner() 66 | result = runner.invoke(datanommer.commands.create, []) 67 | assert result.exit_code == 0, result.output 68 | 69 | assert result.output == "Creating Datanommer database and tables\n" 70 | mock_model_init.assert_called_once_with("TESTURL", alembic_ini="/some/where", create=True) 71 | 72 | 73 | def test_stats(datanommer_models, mock_config, mock_init): 74 | msg1 = generate_message( 75 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 76 | body={"Message 1": "Message 1"}, 77 | ) 78 | m.add(msg1) 79 | 80 | msg2 = generate_message( 81 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 82 | ) 83 | m.add(msg2) 84 | 85 | msg3 = generate_message( 86 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 87 | body={"Message 3": "Message 3"}, 88 | ) 89 | m.add(msg3) 90 | 91 | runner = CliRunner() 92 | result = runner.invoke(datanommer.commands.stats, []) 93 | assert result.exit_code == 0, result.output 94 | 95 | assert "git has 2 entries" in result.output 96 | assert "fas has 1 entries" in result.output 97 | 98 | 99 | def test_stats_topics(datanommer_models, mock_config, mock_init): 100 | msg1 = generate_message( 101 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 102 | body={"Message 1": "Message 1"}, 103 | ) 104 | m.add(msg1) 105 | 106 | msg2 = generate_message( 107 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 108 | ) 109 | m.add(msg2) 110 | 111 | msg3 = generate_message( 112 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 113 | body={"Message 3": "Message 3"}, 114 | ) 115 | m.add(msg3) 116 | 117 | runner = CliRunner() 118 | result = runner.invoke(datanommer.commands.stats, ["--topic"]) 119 | assert result.exit_code == 0, result.output 120 | 121 | assert "org.fedoraproject.prod.git.receive.valgrind.master has 1 entries" in result.output 122 | assert "org.fedoraproject.stg.fas.user.create has 1 entries" in result.output 123 | assert "org.fedoraproject.prod.git.branch.valgrind.master has 1 entries" in result.output 124 | 125 | 126 | def test_stats_category_topics(datanommer_models, mock_config, mock_init): 127 | msg1 = generate_message( 128 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 129 | body={"Message 1": "Message 1"}, 130 | ) 131 | m.add(msg1) 132 | 133 | msg2 = generate_message( 134 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 135 | ) 136 | m.add(msg2) 137 | 138 | msg3 = generate_message( 139 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 140 | body={"Message 3": "Message 3"}, 141 | ) 142 | m.add(msg3) 143 | 144 | runner = CliRunner() 145 | result = runner.invoke(datanommer.commands.stats, ["--topic", "--category", "git"]) 146 | assert result.exit_code == 0, result.output 147 | 148 | assert "org.fedoraproject.prod.git.receive.valgrind.master has 1 entries" in result.output 149 | assert "org.fedoraproject.stg.fas.user.create has 1 entries" not in result.output 150 | assert "org.fedoraproject.prod.git.branch.valgrind.master has 1 entries" in result.output 151 | 152 | 153 | def test_stats_category(datanommer_models, mock_config, mock_init): 154 | msg1 = generate_message( 155 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 156 | body={"Message 1": "Message 1"}, 157 | ) 158 | m.add(msg1) 159 | 160 | msg2 = generate_message( 161 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 162 | ) 163 | m.add(msg2) 164 | 165 | msg3 = generate_message( 166 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 167 | body={"Message 3": "Message 3"}, 168 | ) 169 | m.add(msg3) 170 | 171 | runner = CliRunner() 172 | result = runner.invoke(datanommer.commands.stats, ["--category", "git"]) 173 | assert result.exit_code == 0, result.output 174 | 175 | assert result.output == "git has 2 entries\n" 176 | 177 | 178 | def test_dump(datanommer_models, mock_config, mock_init): 179 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 180 | m.add(msg1) 181 | 182 | msg2 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 183 | m.add(msg2) 184 | 185 | msg3 = generate_bodhi_update_complete_message() 186 | m.add(msg3) 187 | 188 | runner = CliRunner() 189 | result = runner.invoke(datanommer.commands.dump, []) 190 | assert result.exit_code == 0, result.output 191 | 192 | json_object = json.loads(result.output) 193 | 194 | assert json_object[0]["topic"] == "org.fedoraproject.prod.git.branch.valgrind.master" 195 | 196 | 197 | def test_dump_before(datanommer_models, mock_config, mock_init): 198 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 199 | msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat() 200 | m.add(msg1) 201 | 202 | msg2 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 203 | msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat() 204 | m.add(msg2) 205 | 206 | msg3 = generate_message(topic="org.fedoraproject.prod.log.receive.valgrind.master") 207 | msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 8).isoformat() 208 | m.add(msg3) 209 | 210 | runner = CliRunner() 211 | result = runner.invoke(datanommer.commands.dump, ["--before", "2013-02-16"]) 212 | assert result.exit_code == 0, result.output 213 | 214 | json_object = json.loads(result.output) 215 | 216 | assert json_object[0]["topic"] == "org.fedoraproject.prod.git.branch.valgrind.master" 217 | assert json_object[1]["topic"] == "org.fedoraproject.prod.git.receive.valgrind.master" 218 | assert len(json_object) == 2 219 | 220 | 221 | def test_dump_since(datanommer_models, mock_config, mock_init): 222 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 223 | msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat() 224 | m.add(msg1) 225 | 226 | msg2 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 227 | msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat() 228 | m.add(msg2) 229 | 230 | msg3 = generate_message(topic="org.fedoraproject.prod.log.receive.valgrind.master") 231 | msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 8).isoformat() 232 | m.add(msg3) 233 | 234 | runner = CliRunner() 235 | result = runner.invoke(datanommer.commands.dump, ["--since", "2013-02-14T08:00:00"]) 236 | assert result.exit_code == 0, result.output 237 | 238 | json_object = json.loads(result.output) 239 | 240 | assert json_object[0]["topic"] == "org.fedoraproject.prod.git.receive.valgrind.master" 241 | assert json_object[1]["topic"] == "org.fedoraproject.prod.log.receive.valgrind.master" 242 | assert len(json_object) == 2 243 | 244 | 245 | def test_dump_timespan(datanommer_models, mock_config, mock_init): 246 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 247 | msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat() 248 | m.add(msg1) 249 | 250 | msg2 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 251 | msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat() 252 | m.add(msg2) 253 | 254 | msg3 = generate_message(topic="org.fedoraproject.prod.log.receive.valgrind.master") 255 | msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 8).isoformat() 256 | m.add(msg3) 257 | 258 | runner = CliRunner() 259 | result = runner.invoke( 260 | datanommer.commands.dump, 261 | ["--before", "2013-02-16", "--since", "2013-02-14T08:00:00"], 262 | ) 263 | assert result.exit_code == 0, result.output 264 | 265 | json_object = json.loads(result.output) 266 | 267 | assert json_object[0]["topic"] == "org.fedoraproject.prod.git.receive.valgrind.master" 268 | assert len(json_object) == 1 269 | 270 | 271 | def test_dump_invalid_dates(datanommer_models, mock_config, mock_init): 272 | runner = CliRunner() 273 | result = runner.invoke(datanommer.commands.dump, ["--before", "2013-02-16asdasd"]) 274 | assert result.exit_code > 0, result.output 275 | assert result.output == "Error: Invalid date format\n" 276 | 277 | result = runner.invoke(datanommer.commands.dump, ["--since", "2013-02-16asdasd"]) 278 | assert result.exit_code > 0, result.output 279 | assert result.output == "Error: Invalid date format\n" 280 | 281 | 282 | def test_latest_overall(datanommer_models, mock_config, mock_init): 283 | msg1 = generate_message( 284 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 285 | body={"Message 1": "Message 1"}, 286 | ) 287 | m.add(msg1) 288 | 289 | msg2 = generate_message( 290 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 291 | ) 292 | m.add(msg2) 293 | 294 | msg3 = generate_message( 295 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 296 | body={"Message 3": "Message 3"}, 297 | ) 298 | m.add(msg3) 299 | 300 | runner = CliRunner() 301 | result = runner.invoke(datanommer.commands.latest, ["--overall"]) 302 | assert result.exit_code == 0, result.output 303 | 304 | json_object = json.loads(result.output) 305 | 306 | assert json_object[0]["git"]["body"] == {"Message 3": "Message 3"} 307 | assert len(json_object) == 1 308 | 309 | 310 | def test_latest_topic(datanommer_models, mock_config, mock_init): 311 | msg1 = generate_message( 312 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 313 | body={"Message 1": "Message 1"}, 314 | ) 315 | m.add(msg1) 316 | 317 | msg2 = generate_message( 318 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 319 | ) 320 | m.add(msg2) 321 | 322 | msg3 = generate_message( 323 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 324 | body={"Message 3": "Message 3"}, 325 | ) 326 | m.add(msg3) 327 | 328 | runner = CliRunner() 329 | result = runner.invoke( 330 | datanommer.commands.latest, ["--topic", "org.fedoraproject.stg.fas.user.create"] 331 | ) 332 | assert result.exit_code == 0, result.output 333 | 334 | json_object = json.loads(result.output) 335 | 336 | assert json_object[0]["fas"]["body"] == {"Message 2": "Message 2"} 337 | assert len(json_object) == 1 338 | 339 | 340 | def test_latest_category(datanommer_models, mock_config, mock_init): 341 | msg1 = generate_message( 342 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 343 | body={"Message 1": "Message 1"}, 344 | ) 345 | m.add(msg1) 346 | 347 | msg2 = generate_message( 348 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 349 | ) 350 | m.add(msg2) 351 | 352 | msg3 = generate_message( 353 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 354 | body={"Message 3": "Message 3"}, 355 | ) 356 | m.add(msg3) 357 | 358 | runner = CliRunner() 359 | result = runner.invoke(datanommer.commands.latest, ["--category", "fas"]) 360 | assert result.exit_code == 0, result.output 361 | 362 | json_object = json.loads(result.output) 363 | 364 | assert json_object[0]["fas"]["body"] == {"Message 2": "Message 2"} 365 | assert len(json_object) == 1 366 | 367 | 368 | def test_latest_timestamp_human(datanommer_models, mocker, mock_config, mock_init): 369 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 370 | msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat() 371 | m.add(msg1) 372 | 373 | msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create") 374 | msg2._properties.headers["sent-at"] = datetime(2013, 2, 15, 15, 15, 15, 15).isoformat() 375 | m.add(msg2) 376 | 377 | msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 378 | msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 16, 16, 16, 16).isoformat() 379 | m.add(msg3) 380 | 381 | # datanommer-latest defaults to the last year, so mock the 382 | # datetime calls to go back to 2013 383 | mock_dt = mocker.patch("datanommer.commands.datetime") 384 | mock_dt.now.return_value = datetime(2013, 3, 1) 385 | 386 | runner = CliRunner() 387 | result = runner.invoke(datanommer.commands.latest, ["--timestamp", "--human"]) 388 | assert result.exit_code == 0, result.output 389 | 390 | json_object = json.loads(result.output) 391 | 392 | assert json_object[1] == "2013-02-16 16:16:16.000016" 393 | assert json_object[0] == "2013-02-15 15:15:15.000015" 394 | assert len(json_object) == 2 395 | 396 | 397 | def test_latest_timestamp(datanommer_models, mocker, mock_config, mock_init): 398 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 399 | msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat() 400 | m.add(msg1) 401 | 402 | msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create") 403 | msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat() 404 | m.add(msg2) 405 | 406 | msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 407 | msg3._properties.headers["sent-at"] = datetime(2013, 2, 16).isoformat() 408 | m.add(msg3) 409 | 410 | # datanommer-latest defaults to the last year, so mock the 411 | # datetime calls to go back to 2013 412 | mock_dt = mocker.patch("datanommer.commands.datetime") 413 | mock_dt.now.return_value = datetime(2013, 3, 1) 414 | 415 | runner = CliRunner() 416 | result = runner.invoke(datanommer.commands.latest, ["--timestamp"]) 417 | assert result.exit_code == 0, result.output 418 | 419 | json_object = json.loads(result.output) 420 | 421 | assert json_object[1] == time.mktime(datetime(2013, 2, 16).timetuple()) 422 | assert json_object[0] == time.mktime(datetime(2013, 2, 15).timetuple()) 423 | assert len(json_object) == 2 424 | 425 | 426 | def test_latest_timesince(datanommer_models, mocker, mock_config, mock_init): 427 | now = datetime(2013, 3, 1) 428 | 429 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 430 | time1 = now - timedelta(days=1) 431 | msg1._properties.headers["sent-at"] = time1.isoformat() 432 | m.add(msg1) 433 | 434 | msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create") 435 | time2 = now - timedelta(seconds=60) 436 | msg2._properties.headers["sent-at"] = time2.isoformat() 437 | m.add(msg2) 438 | 439 | msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 440 | time3 = now - timedelta(seconds=1) 441 | msg3._properties.headers["sent-at"] = time3.isoformat() 442 | m.add(msg3) 443 | 444 | # datanommer-latest defaults to the last year, so mock the 445 | # datetime calls to go back to 2013 446 | mock_dt = mocker.patch("datanommer.commands.datetime") 447 | mock_dt.now.return_value = now 448 | 449 | runner = CliRunner() 450 | result = runner.invoke(datanommer.commands.latest, ["--timesince"]) 451 | assert result.exit_code == 0, result.output 452 | 453 | json_object = json.loads(result.output) 454 | 455 | # allow .1 second to run test 456 | assert int(json_object[1]) <= 1.1 457 | assert int(json_object[1]) >= 1 458 | assert int(json_object[0]) <= 60.1 459 | assert int(json_object[0]) >= 60 460 | assert len(json_object) == 2 461 | 462 | 463 | def test_latest_timesince_human(datanommer_models, mock_config, mock_init, mocker): 464 | now = datetime.now() 465 | # mocker.patch.object(datanommer.commands.datetime, "now", return_value=now) 466 | patched_datetime = mocker.patch("datanommer.commands.datetime", mocker.Mock(wraps=datetime)) 467 | patched_datetime.now.return_value = now 468 | 469 | msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master") 470 | time1 = now - timedelta(days=2) 471 | msg1._properties.headers["sent-at"] = time1.isoformat() 472 | m.add(msg1) 473 | 474 | msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create") 475 | time2 = now - timedelta(days=1) 476 | msg2._properties.headers["sent-at"] = time2.isoformat() 477 | m.add(msg2) 478 | 479 | msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master") 480 | time3 = now - timedelta(seconds=1) 481 | msg3._properties.headers["sent-at"] = time3.isoformat() 482 | m.add(msg3) 483 | 484 | runner = CliRunner() 485 | result = runner.invoke(datanommer.commands.latest, ["--timesince", "--human"]) 486 | assert result.exit_code == 0, result.output 487 | 488 | assert json.loads(result.output) == ["1 day, 0:00:00", "0:00:01"] 489 | 490 | 491 | def test_latest(datanommer_models, mock_config, mock_init): 492 | msg1 = generate_message( 493 | topic="org.fedoraproject.prod.git.branch.valgrind.master", 494 | body={"Message 1": "Message 1"}, 495 | ) 496 | time1 = datetime.now() - timedelta(days=2) 497 | msg1._properties.headers["sent-at"] = time1.isoformat() 498 | m.add(msg1) 499 | 500 | msg2 = generate_message( 501 | topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"} 502 | ) 503 | m.add(msg2) 504 | 505 | msg3 = generate_message( 506 | topic="org.fedoraproject.prod.git.receive.valgrind.master", 507 | body={"Message 3": "Message 3"}, 508 | ) 509 | m.add(msg3) 510 | 511 | runner = CliRunner() 512 | result = runner.invoke(datanommer.commands.latest, []) 513 | assert result.exit_code == 0, result.output 514 | 515 | json_object = json.loads(result.output) 516 | 517 | assert json_object[1]["git"]["body"] == {"Message 3": "Message 3"} 518 | assert json_object[0]["fas"]["body"] == {"Message 2": "Message 2"} 519 | assert len(json_object) == 2 520 | 521 | 522 | def test_refresh_view(datanommer_models, mock_config, mocker): 523 | """Test the refresh_view command.""" 524 | 525 | mock_refresh = mocker.patch("datanommer.commands.refresh_recent_topics") 526 | 527 | runner = CliRunner() 528 | result = runner.invoke(datanommer.commands.refresh_view, []) 529 | 530 | assert result.exit_code == 0, result.output 531 | mock_refresh.assert_called_once_with(m.session) 532 | -------------------------------------------------------------------------------- /datanommer.models/datanommer/models/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | import datetime 17 | import importlib.metadata 18 | import json 19 | import logging 20 | import math 21 | import traceback 22 | import uuid 23 | from warnings import warn 24 | 25 | from sqlalchemy import ( 26 | and_, 27 | between, 28 | Column, 29 | create_engine, 30 | DateTime, 31 | DDL, 32 | event, 33 | ForeignKey, 34 | func, 35 | Index, 36 | Integer, 37 | not_, 38 | or_, 39 | select, 40 | String, 41 | Table, 42 | text, 43 | TypeDecorator, 44 | Unicode, 45 | UnicodeText, 46 | UniqueConstraint, 47 | ) 48 | from sqlalchemy.dialects import postgresql 49 | from sqlalchemy.exc import IntegrityError 50 | from sqlalchemy.orm import ( 51 | declarative_base, 52 | relationship, 53 | scoped_session, 54 | sessionmaker, 55 | validates, 56 | ) 57 | from sqlalchemy.sql import operators 58 | 59 | from .view import create_view 60 | 61 | 62 | try: 63 | from psycopg2.errors import UniqueViolation 64 | except ImportError: # pragma: no cover 65 | from psycopg2.errorcodes import lookup as lookup_error 66 | 67 | UniqueViolation = lookup_error("23505") 68 | 69 | 70 | __version__ = importlib.metadata.version("datanommer-models") 71 | 72 | 73 | log = logging.getLogger("datanommer") 74 | 75 | maker = sessionmaker() 76 | session = scoped_session(maker) 77 | 78 | DeclarativeBase = declarative_base() 79 | DeclarativeBase.query = session.query_property() 80 | 81 | 82 | def init(uri=None, alembic_ini=None, engine=None, create=False): 83 | """Initialize a connection. Create tables if requested.""" 84 | 85 | if uri and engine: 86 | raise ValueError("uri and engine cannot both be specified") 87 | 88 | if uri is None and not engine: 89 | raise ValueError("One of uri or engine must be specified") 90 | 91 | if uri and not engine: 92 | engine = create_engine(uri, future=True) 93 | 94 | # We need to hang our own attribute on the sqlalchemy session to stop 95 | # ourselves from initializing twice. That is only a problem if the code 96 | # calling us isn't consistent. 97 | if getattr(session, "_datanommer_initialized", None): 98 | log.warning("Session already initialized. Bailing") 99 | return 100 | session._datanommer_initialized = True 101 | 102 | maker.configure(bind=engine) 103 | DeclarativeBase.query = session.query_property() 104 | 105 | if create: 106 | with engine.begin() as connection: 107 | connection.execute(text("CREATE EXTENSION IF NOT EXISTS timescaledb")) 108 | DeclarativeBase.metadata.create_all(engine) 109 | with engine.begin() as connection: 110 | create_view(connection) 111 | # Loads the alembic configuration and generates the version table, with 112 | # the most recent revision stamped as head 113 | if alembic_ini is not None: # pragma: no cover 114 | from alembic import command 115 | from alembic.config import Config 116 | 117 | alembic_cfg = Config(alembic_ini) 118 | command.stamp(alembic_cfg, "head") 119 | 120 | 121 | def add(message): 122 | """Take a the fedora-messaging Message and store in the message 123 | table. 124 | """ 125 | headers = message._properties.headers 126 | sent_at = headers.get("sent-at", None) 127 | 128 | if sent_at: 129 | # fromisoformat doesn't parse Z suffix (yet) see: 130 | # https://discuss.python.org/t/parse-z-timezone-suffix-in-datetime/2220 131 | try: 132 | sent_at = datetime.datetime.fromisoformat(sent_at.replace("Z", "+00:00")) 133 | except ValueError: 134 | log.exception("Failed to parse sent-at timestamp value") 135 | return 136 | else: 137 | sent_at = datetime.datetime.now(tz=datetime.UTC) 138 | 139 | # Workaround schemas misbehaving 140 | try: 141 | usernames = message.usernames 142 | except Exception: 143 | log.exception( 144 | "Could not get the list of users from a message on %s with id %s", 145 | message.topic, 146 | message.id, 147 | ) 148 | usernames = [] 149 | try: 150 | packages = message.packages 151 | except Exception: 152 | log.exception( 153 | "Could not get the list of packages from a message on %s with id %s", 154 | message.topic, 155 | message.id, 156 | ) 157 | packages = [] 158 | 159 | Message.create( 160 | i=0, 161 | msg_id=message.id, 162 | topic=message.topic, 163 | timestamp=sent_at, 164 | msg=message.body, 165 | headers=headers, 166 | agent_name=getattr(message, "agent_name", None), 167 | users=usernames, 168 | packages=packages, 169 | ) 170 | 171 | session.commit() 172 | 173 | 174 | # https://docs.sqlalchemy.org/en/14/core/custom_types.html#marshal-json-strings 175 | 176 | 177 | class _JSONEncodedDict(TypeDecorator): 178 | """Represents an immutable structure as a json-encoded string.""" 179 | 180 | impl = UnicodeText 181 | 182 | cache_ok = True 183 | 184 | def process_bind_param(self, value, dialect): 185 | if value is not None: 186 | value = json.dumps(value) 187 | 188 | return value 189 | 190 | def process_result_value(self, value, dialect): 191 | if value is not None: 192 | value = json.loads(value) 193 | return value 194 | 195 | def coerce_compared_value(self, op, value): 196 | # https://docs.sqlalchemy.org/en/14/core/custom_types.html#dealing-with-comparison-operations 197 | if op in (operators.like_op, operators.not_like_op): 198 | return String() 199 | else: 200 | return self 201 | 202 | 203 | users_assoc_table = Table( 204 | "users_messages", 205 | DeclarativeBase.metadata, 206 | Column("user_id", ForeignKey("users.id"), primary_key=True), 207 | Column("msg_id", Integer, primary_key=True, index=True), 208 | Column("msg_timestamp", DateTime, primary_key=True, index=True), 209 | ) 210 | 211 | packages_assoc_table = Table( 212 | "packages_messages", 213 | DeclarativeBase.metadata, 214 | Column("package_id", ForeignKey("packages.id"), primary_key=True), 215 | Column("msg_id", Integer, primary_key=True, index=True), 216 | Column("msg_timestamp", DateTime, primary_key=True, index=True), 217 | ) 218 | 219 | 220 | class Message(DeclarativeBase): 221 | __tablename__ = "messages" 222 | __table_args__ = ( 223 | UniqueConstraint("msg_id", "timestamp"), 224 | Index( 225 | "ix_messages_headers", 226 | "headers", 227 | postgresql_using="gin", 228 | postgresql_ops={"headers": "jsonb_path_ops"}, 229 | ), 230 | ) 231 | 232 | id = Column(Integer, primary_key=True, autoincrement=True) 233 | msg_id = Column(Unicode, nullable=True, default=None, index=True) 234 | i = Column(Integer, nullable=False) 235 | topic = Column(Unicode, nullable=False, index=True) 236 | timestamp = Column(DateTime, nullable=False, index=True, primary_key=True) 237 | certificate = Column(UnicodeText) 238 | signature = Column(UnicodeText) 239 | category = Column(Unicode, nullable=False, index=True) 240 | agent_name = Column(Unicode, index=True) 241 | crypto = Column(UnicodeText) 242 | source_name = Column(Unicode, default="datanommer") 243 | source_version = Column(Unicode, default=lambda context: __version__) 244 | msg = Column(_JSONEncodedDict, nullable=False) 245 | headers = Column(postgresql.JSONB(none_as_null=True)) 246 | users = relationship( 247 | "User", 248 | secondary=users_assoc_table, 249 | backref="messages", 250 | primaryjoin=lambda: and_( 251 | Message.id == users_assoc_table.c.msg_id, 252 | Message.timestamp == users_assoc_table.c.msg_timestamp, 253 | ), 254 | ) 255 | packages = relationship( 256 | "Package", 257 | secondary=packages_assoc_table, 258 | backref="messages", 259 | primaryjoin=lambda: and_( 260 | Message.id == packages_assoc_table.c.msg_id, 261 | Message.timestamp == packages_assoc_table.c.msg_timestamp, 262 | ), 263 | ) 264 | 265 | @validates("topic") 266 | def get_category(self, key, topic): 267 | """Update the category when the topic is set. 268 | 269 | The method seems... unnatural. But even zzzeek says it's OK to do it: 270 | https://stackoverflow.com/a/6442201 271 | """ 272 | index = 2 if "VirtualTopic" in topic else 3 273 | try: 274 | self.category = topic.split(".")[index] 275 | except Exception: 276 | traceback.print_exc() 277 | self.category = "Unclassified" 278 | return topic 279 | 280 | @classmethod 281 | def create(cls, **kwargs): 282 | users = kwargs.pop("users") 283 | packages = kwargs.pop("packages") 284 | if not kwargs.get("msg_id"): 285 | log.info("Message on %s was received without a msg_id", kwargs["topic"]) 286 | kwargs["msg_id"] = str(uuid.uuid4()) 287 | obj = cls(**kwargs) 288 | 289 | try: 290 | session.add(obj) 291 | session.flush() 292 | except IntegrityError as e: 293 | if isinstance(e.orig, UniqueViolation): 294 | log.warning( 295 | "Skipping message from %s with duplicate id: %s", 296 | kwargs["topic"], 297 | kwargs["msg_id"], 298 | ) 299 | else: 300 | log.exception( 301 | "Unknown Integrity Error: message %s with id %s", 302 | kwargs["topic"], 303 | kwargs["msg_id"], 304 | ) 305 | session.rollback() 306 | return 307 | 308 | obj._insert_list(User, users_assoc_table, users) 309 | obj._insert_list(Package, packages_assoc_table, packages) 310 | 311 | def _insert_list(self, rel_class, assoc_table, values): 312 | if not values: 313 | return 314 | assoc_col_name = assoc_table.c[0].name 315 | insert_values = [] 316 | for name in set(values): 317 | attr_obj = rel_class.get_or_create(name) 318 | # This would normally be a simple "obj.[users|packages].append(name)" kind 319 | # of statement, but here we drop down out of sqlalchemy's ORM and into the 320 | # sql abstraction in order to gain a little performance boost. 321 | insert_values.append( 322 | { 323 | assoc_col_name: attr_obj.id, 324 | "msg_id": self.id, 325 | "msg_timestamp": self.timestamp, 326 | } 327 | ) 328 | session.execute(assoc_table.insert(), insert_values) 329 | session.flush() 330 | 331 | @classmethod 332 | def from_msg_id(cls, msg_id): 333 | return session.execute(select(cls).where(cls.msg_id == msg_id)).scalar_one_or_none() 334 | 335 | def as_dict(self, request=None): 336 | return dict( 337 | i=self.i, 338 | msg_id=self.msg_id, 339 | topic=self.topic, 340 | timestamp=self.timestamp, 341 | certificate=self.certificate, 342 | signature=self.signature, 343 | agent_name=self.agent_name, 344 | username=self.agent_name, # DEPRECATED 345 | crypto=self.crypto, 346 | msg=self.msg, 347 | headers=self.headers, 348 | source_name=self.source_name, 349 | source_version=self.source_version, 350 | users=list(sorted(u.name for u in self.users)), 351 | packages=list(sorted(p.name for p in self.packages)), 352 | ) 353 | 354 | def as_fedora_message_dict(self): 355 | headers = self.headers or {} 356 | if "sent-at" not in headers: 357 | headers["sent-at"] = self.timestamp.astimezone(datetime.UTC).isoformat() 358 | return dict( 359 | body=self.msg, 360 | headers=headers, 361 | id=self.msg_id, 362 | priority=headers.get("priority", 0), 363 | queue=None, 364 | topic=self.topic, 365 | ) 366 | 367 | def __json__(self, request=None): 368 | warn( 369 | "The __json__() method has been renamed to as_dict(), and will be removed " 370 | "in the next major version", 371 | DeprecationWarning, 372 | stacklevel=2, 373 | ) 374 | return self.as_dict(request) 375 | 376 | @property 377 | def username(self): 378 | warn( 379 | "The username attribute has been renamed to agent_name, and will be removed " 380 | "in the next major version", 381 | DeprecationWarning, 382 | stacklevel=2, 383 | ) 384 | return self.agent_name 385 | 386 | @classmethod 387 | def make_query( 388 | cls, 389 | start=None, 390 | end=None, 391 | msg_id=None, 392 | users=None, 393 | not_users=None, 394 | packages=None, 395 | not_packages=None, 396 | categories=None, 397 | not_categories=None, 398 | topics=None, 399 | not_topics=None, 400 | agents=None, 401 | not_agents=None, 402 | contains=None, 403 | ): 404 | """Flexible query interface for messages. 405 | 406 | Arguments are filters. start and end should be :mod:`datetime` objs. 407 | 408 | Other filters should be lists of strings. They are applied in a 409 | conjunctive-normal-form (CNF) kind of way 410 | 411 | for example, the following:: 412 | 413 | users = ['ralph', 'lmacken'] 414 | categories = ['bodhi', 'wiki'] 415 | 416 | should return messages where 417 | 418 | (user=='ralph' OR user=='lmacken') AND 419 | (category=='bodhi' OR category=='wiki') 420 | 421 | Furthermore, you can use a negative version of each argument. 422 | 423 | users = ['ralph'] 424 | not_categories = ['bodhi', 'wiki'] 425 | 426 | should return messages where 427 | 428 | (user == 'ralph') AND 429 | NOT (category == 'bodhi' OR category == 'wiki') 430 | 431 | """ 432 | 433 | users = users or [] 434 | not_users = not_users or [] 435 | packages = packages or [] 436 | not_packs = not_packages or [] 437 | categories = categories or [] 438 | not_cats = not_categories or [] 439 | topics = topics or [] 440 | not_topics = not_topics or [] 441 | agents = agents or [] 442 | not_agents = not_agents or [] 443 | contains = contains or [] 444 | 445 | Message = cls 446 | query = select(Message) 447 | 448 | # A little argument validation. We could provide some defaults in 449 | # these mixed cases.. but instead we'll just leave it up to our caller. 450 | if (start is not None and end is None) or (end is not None and start is None): 451 | raise ValueError( 452 | "Either both start and end must be specified or neither must be specified" 453 | ) 454 | 455 | if start and end: 456 | query = query.where(between(Message.timestamp, start, end)) 457 | 458 | if msg_id: 459 | query = query.where(Message.msg_id == msg_id) 460 | 461 | # Add the four positive filters as necessary 462 | if users: 463 | query = query.where(or_(*(Message.users.any(User.name == u) for u in users))) 464 | 465 | if packages: 466 | query = query.where(or_(*(Message.packages.any(Package.name == p) for p in packages))) 467 | 468 | if categories: 469 | query = query.where(or_(*(Message.category == category for category in categories))) 470 | 471 | if topics: 472 | query = query.where(or_(*(Message.topic == topic for topic in topics))) 473 | 474 | if agents: 475 | query = query.where(or_(*(Message.agent_name == agent for agent in agents))) 476 | 477 | if contains: 478 | query = query.where(or_(*(Message.msg.like(f"%{contain}%") for contain in contains))) 479 | 480 | # And then the four negative filters as necessary 481 | if not_users: 482 | query = query.where(not_(or_(*(Message.users.any(User.name == u) for u in not_users)))) 483 | 484 | if not_packs: 485 | query = query.where( 486 | not_(or_(*(Message.packages.any(Package.name == p) for p in not_packs))) 487 | ) 488 | 489 | if not_cats: 490 | query = query.where(not_(or_(*(Message.category == category for category in not_cats)))) 491 | 492 | if not_topics: 493 | query = query.where(not_(or_(*(Message.topic == topic for topic in not_topics)))) 494 | 495 | if not_agents: 496 | query = query.where(not_(or_(*(Message.agent_name == agent for agent in not_agents)))) 497 | 498 | return query 499 | 500 | @classmethod 501 | def grep( 502 | cls, 503 | *, 504 | page=1, 505 | rows_per_page=100, 506 | order="asc", 507 | defer=False, 508 | **kwargs, 509 | ): 510 | """Flexible query interface for messages. 511 | 512 | Arguments are filters. start and end should be :mod:`datetime` objs. 513 | 514 | Other filters should be lists of strings. They are applied in a 515 | conjunctive-normal-form (CNF) kind of way 516 | 517 | for example, the following:: 518 | 519 | users = ['ralph', 'lmacken'] 520 | categories = ['bodhi', 'wiki'] 521 | 522 | should return messages where 523 | 524 | (user=='ralph' OR user=='lmacken') AND 525 | (category=='bodhi' OR category=='wiki') 526 | 527 | Furthermore, you can use a negative version of each argument. 528 | 529 | users = ['ralph'] 530 | not_categories = ['bodhi', 'wiki'] 531 | 532 | should return messages where 533 | 534 | (user == 'ralph') AND 535 | NOT (category == 'bodhi' OR category == 'wiki') 536 | 537 | ---- 538 | 539 | If the `defer` argument evaluates to True, the query won't actually 540 | be executed, but a SQLAlchemy query object returned instead. 541 | """ 542 | query = cls.make_query(**kwargs) 543 | # Finally, tag on our pagination arguments 544 | Message = cls 545 | 546 | query_total = query.with_only_columns(func.count(Message.id)) 547 | total = None 548 | query = query.order_by(getattr(Message.timestamp, order)()) 549 | 550 | if not rows_per_page: 551 | pages = 1 552 | else: 553 | total = session.scalar(query_total) 554 | pages = math.ceil(total / float(rows_per_page)) 555 | query = query.offset(rows_per_page * (page - 1)).limit(rows_per_page) 556 | 557 | if defer: 558 | if total is None: 559 | total = session.scalar(query_total) 560 | return total, pages, query 561 | else: 562 | # Execute! 563 | messages = session.scalars(query).all() 564 | if pages == 1: 565 | total = len(messages) 566 | return total, pages, messages 567 | 568 | @classmethod 569 | def get_first(cls, *, order="asc", **kwargs): 570 | """Get the first message matching the regular grep filters.""" 571 | query = cls.make_query(**kwargs) 572 | query = query.order_by(getattr(Message.timestamp, order)()).limit(1) 573 | return session.scalars(query).first() 574 | 575 | 576 | class NamedSingleton: 577 | id = Column(Integer, primary_key=True, autoincrement=True) 578 | name = Column(UnicodeText, index=True, unique=True) 579 | 580 | @classmethod 581 | def get_or_create(cls, name): 582 | """ 583 | Return the instance of the class with the specified name. If it doesn't 584 | already exist, create it. 585 | """ 586 | # Use an in-memory cache to speed things up. 587 | if name in cls._cache: 588 | # If we cache the instance, SQLAlchemy will run this query anyway because the instance 589 | # will be from a different transaction. So just cache the id. 590 | return session.get(cls, cls._cache[name]) 591 | obj = session.execute(select(cls).where(cls.name == name)).scalar_one_or_none() 592 | if obj is None: 593 | obj = cls(name=name) 594 | session.add(obj) 595 | session.flush() 596 | cls._cache[name] = obj.id 597 | return obj 598 | 599 | @classmethod 600 | def clear_cache(cls): 601 | cls._cache.clear() 602 | 603 | 604 | class User(DeclarativeBase, NamedSingleton): 605 | __tablename__ = "users" 606 | _cache = {} 607 | 608 | 609 | class Package(DeclarativeBase, NamedSingleton): 610 | __tablename__ = "packages" 611 | _cache = {} 612 | 613 | 614 | def _setup_hypertable(table_class): 615 | event.listen( 616 | table_class.__table__, 617 | "after_create", 618 | DDL(f"SELECT create_hypertable('{table_class.__tablename__}', 'timestamp');"), 619 | ) 620 | 621 | 622 | _setup_hypertable(Message) 623 | -------------------------------------------------------------------------------- /datanommer.models/tests/test_model.py: -------------------------------------------------------------------------------- 1 | # This file is a part of datanommer, a message sink for fedmsg. 2 | # Copyright (C) 2014, Red Hat, Inc. 3 | # 4 | # This program is free software: you can redistribute it and/or modify it under 5 | # the terms of the GNU General Public License as published by the Free Software 6 | # Foundation, either version 3 of the License, or (at your option) any later 7 | # version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 12 | # details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program. If not, see . 16 | import datetime 17 | import json 18 | import logging 19 | 20 | import pytest 21 | from bodhi.messages.schemas.update import UpdateCommentV1 22 | from fedora_messaging import message as fedora_message 23 | from sqlalchemy import create_engine, func, select 24 | from sqlalchemy.exc import IntegrityError 25 | from sqlalchemy.sql.selectable import Select 26 | 27 | import datanommer.models as dm 28 | 29 | 30 | def generate_message( 31 | topic="org.fedoraproject.test.a.nice.message", 32 | body=None, 33 | headers=None, 34 | ): 35 | body = body or {"encouragement": "You're doing great!"} 36 | return fedora_message.Message(topic=topic, body=body, headers=headers) 37 | 38 | 39 | def generate_bodhi_update_complete_message(text="testing testing"): 40 | msg = UpdateCommentV1( 41 | body={ 42 | "comment": { 43 | "karma": -1, 44 | "text": text, 45 | "timestamp": "2019-03-18 16:54:48", 46 | "update": { 47 | "alias": "FEDORA-EPEL-2021-f2d195dada", 48 | "builds": [ 49 | {"nvr": "abrt-addon-python3-2.1.11-50.el7"}, 50 | {"nvr": "kernel-10.4.0-2.el7"}, 51 | ], 52 | "status": "pending", 53 | "release": {"name": "F35"}, 54 | "request": "testing", 55 | "user": {"name": "ryanlerch"}, 56 | }, 57 | "user": {"name": "dudemcpants"}, 58 | } 59 | } 60 | ) 61 | msg.topic = f"org.fedoraproject.stg.{msg.topic}" 62 | return msg 63 | 64 | 65 | @pytest.fixture 66 | def add_200_messages(datanommer_models): 67 | for x in range(0, 200): 68 | example_message = generate_message() 69 | example_message.id = f"{x}" 70 | dm.add(example_message) 71 | dm.session.flush() 72 | 73 | 74 | def test_init_uri_and_engine(): 75 | uri = "sqlite:///db.db" 76 | engine = create_engine(uri, future=True) 77 | 78 | with pytest.raises(ValueError, match="uri and engine cannot both be specified"): 79 | dm.init(uri, engine=engine) 80 | 81 | 82 | def test_init_no_uri_and_no_engine(): 83 | with pytest.raises(ValueError, match="One of uri or engine must be specified"): 84 | dm.init() 85 | 86 | 87 | def test_init_with_engine(caplog): 88 | uri = "sqlite:///db.db" 89 | engine = create_engine(uri, future=True) 90 | 91 | dm.init(engine=engine) 92 | 93 | assert not caplog.records 94 | 95 | # if the init with just the engine worked, trying it again will fail 96 | dm.init(engine=engine) 97 | assert caplog.records[0].message == "Session already initialized. Bailing" 98 | 99 | 100 | def test_init_no_init_twice(datanommer_models, mocker, caplog): 101 | dm.init("sqlite:///db.db") 102 | assert caplog.records[0].message == "Session already initialized. Bailing" 103 | 104 | 105 | def test_unclassified_category(datanommer_models): 106 | example_message = generate_message(topic="too.short") 107 | dm.add(example_message) 108 | dbmsg = dm.session.scalar(select(dm.Message)) 109 | 110 | assert dbmsg.category == "Unclassified" 111 | 112 | 113 | def test_from_msg_id(datanommer_models): 114 | example_message = generate_message() 115 | example_message.id = "ACUSTOMMESSAGEID" 116 | dm.add(example_message) 117 | dbmsg = dm.Message.from_msg_id("ACUSTOMMESSAGEID") 118 | 119 | assert dbmsg.msg_id == "ACUSTOMMESSAGEID" 120 | 121 | 122 | def test_add_missing_msg_id(datanommer_models, caplog): 123 | caplog.set_level(logging.INFO) 124 | example_message = generate_message() 125 | example_message._properties.message_id = None 126 | dm.add(example_message) 127 | dbmsg = dm.session.scalar(select(dm.Message)) 128 | assert ( 129 | "Message on org.fedoraproject.test.a.nice.message was received without a msg_id" 130 | in caplog.records[-1].message 131 | ) 132 | assert dbmsg.msg_id is not None 133 | 134 | 135 | def test_add_missing_timestamp(datanommer_models): 136 | example_message = generate_message() 137 | example_message._properties.headers["sent-at"] = None 138 | 139 | dm.add(example_message) 140 | 141 | dbmsg = dm.session.scalar(select(dm.Message)) 142 | timediff = datetime.datetime.now() - dbmsg.timestamp 143 | # 60 seconds between adding the message and checking 144 | # the timestamp should be more than enough. 145 | assert timediff < datetime.timedelta(seconds=60) 146 | 147 | 148 | def test_add_timestamp_with_Z(datanommer_models): 149 | example_message = generate_message() 150 | example_message._properties.headers["sent-at"] = "2021-07-27T04:22:42Z" 151 | 152 | dm.add(example_message) 153 | 154 | dbmsg = dm.session.scalar(select(dm.Message)) 155 | assert dbmsg.timestamp.astimezone(datetime.UTC) == datetime.datetime( 156 | 2021, 7, 27, 4, 22, 42, tzinfo=datetime.UTC 157 | ) 158 | 159 | 160 | def test_add_timestamp_with_junk(datanommer_models, caplog): 161 | example_message = generate_message() 162 | example_message._properties.headers["sent-at"] = "2021-07-27T04:22:42JUNK" 163 | 164 | dm.add(example_message) 165 | 166 | assert "Failed to parse sent-at timestamp value" in caplog.records[0].message 167 | 168 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 0 169 | 170 | 171 | def test_add_and_check_for_others(datanommer_models): 172 | # There are no users or packages at the start 173 | assert dm.session.scalar(select(func.count(dm.User.id))) == 0 174 | assert dm.session.scalar(select(func.count(dm.Package.id))) == 0 175 | 176 | # Then add a message 177 | dm.add(generate_bodhi_update_complete_message()) 178 | 179 | # There should now be two of each 180 | assert dm.session.scalar(select(func.count(dm.User.id))) == 2 181 | assert dm.session.scalar(select(func.count(dm.Package.id))) == 2 182 | 183 | # If we add it again, there should be no duplicates 184 | dm.add(generate_bodhi_update_complete_message()) 185 | assert dm.session.scalar(select(func.count(dm.User.id))) == 2 186 | assert dm.session.scalar(select(func.count(dm.Package.id))) == 2 187 | 188 | # Add a new username 189 | dm.add(generate_bodhi_update_complete_message(text="this is @abompard in a comment")) 190 | assert dm.session.scalar(select(func.count(dm.User.id))) == 3 191 | assert dm.session.scalar(select(func.count(dm.Package.id))) == 2 192 | 193 | 194 | def test_add_nothing(datanommer_models): 195 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 0 196 | 197 | 198 | def test_add_and_check(datanommer_models): 199 | dm.add(generate_message()) 200 | dm.session.flush() 201 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 1 202 | 203 | 204 | def test_categories(datanommer_models): 205 | dm.add(generate_bodhi_update_complete_message()) 206 | dm.session.flush() 207 | obj = dm.session.scalar(select(dm.Message)) 208 | assert obj.category == "bodhi" 209 | 210 | 211 | def test_categories_with_umb(datanommer_models): 212 | dm.add(generate_message(topic="/topic/VirtualTopic.eng.brew.task.closed")) 213 | dm.session.flush() 214 | obj = dm.session.scalar(select(dm.Message)) 215 | assert obj.category == "brew" 216 | 217 | 218 | def test_grep_all(datanommer_models): 219 | example_message = generate_message() 220 | print("example message:", repr(example_message)) 221 | print(repr(example_message.body)) 222 | dm.add(example_message) 223 | dm.session.flush() 224 | t, p, r = dm.Message.grep() 225 | assert t == 1 226 | assert p == 1 227 | assert len(r) == 1 228 | print(repr(r)) 229 | assert r[0].msg == example_message.body 230 | 231 | 232 | def test_grep_category(datanommer_models): 233 | example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate") 234 | dm.add(example_message) 235 | dm.session.flush() 236 | t, p, r = dm.Message.grep(categories=["bodhi"]) 237 | assert t == 1 238 | assert p == 1 239 | assert len(r) == 1 240 | assert r[0].msg == example_message.body 241 | 242 | 243 | def test_grep_not_category(datanommer_models): 244 | example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate") 245 | dm.add(example_message) 246 | dm.session.flush() 247 | t, p, r = dm.Message.grep(not_categories=["bodhi"]) 248 | assert t == 0 249 | assert p == 0 250 | assert len(r) == 0 251 | 252 | 253 | def test_add_headers(datanommer_models): 254 | example_headers = {"foo": "bar", "baz": 1, "wibble": ["zork", "zap"]} 255 | example_message = generate_message( 256 | topic="org.fedoraproject.prod.bodhi.newupdate", headers=example_headers 257 | ) 258 | dm.add(example_message) 259 | dbmsg = dm.session.scalar(select(dm.Message)) 260 | assert dbmsg.headers["foo"] == "bar" 261 | assert dbmsg.headers["baz"] == 1 262 | assert dbmsg.headers["wibble"] == ["zork", "zap"] 263 | 264 | 265 | def test_grep_topics(datanommer_models): 266 | example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate") 267 | dm.add(example_message) 268 | dm.session.flush() 269 | t, p, r = dm.Message.grep(topics=["org.fedoraproject.prod.bodhi.newupdate"]) 270 | assert t == 1 271 | assert p == 1 272 | assert len(r) == 1 273 | assert r[0].msg == example_message.body 274 | 275 | 276 | def test_grep_not_topics(datanommer_models): 277 | example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate") 278 | dm.add(example_message) 279 | dm.session.flush() 280 | t, p, r = dm.Message.grep(not_topics=["org.fedoraproject.prod.bodhi.newupdate"]) 281 | assert t == 0 282 | assert p == 0 283 | assert len(r) == 0 284 | 285 | 286 | def test_grep_start_end_validation(datanommer_models): 287 | with pytest.raises( 288 | ValueError, 289 | match="Either both start and end must be specified or neither must be specified", 290 | ): 291 | dm.Message.grep(start="2020-03-26") 292 | with pytest.raises( 293 | ValueError, 294 | match="Either both start and end must be specified or neither must be specified", 295 | ): 296 | dm.Message.grep(end="2020-03-26") 297 | 298 | 299 | def test_grep_start_end(datanommer_models): 300 | example_message = generate_message() 301 | example_message._properties.headers["sent-at"] = "2021-04-01T00:00:01" 302 | dm.add(example_message) 303 | 304 | bodhi_example_message = generate_bodhi_update_complete_message() 305 | bodhi_example_message._properties.headers["sent-at"] = "2021-06-01T00:00:01" 306 | dm.add(bodhi_example_message) 307 | 308 | dm.session.flush() 309 | total, pages, messages = dm.Message.grep(start="2021-04-01", end="2021-05-01") 310 | assert total == 1 311 | assert pages == 1 312 | assert len(messages) == 1 313 | assert messages[0].msg == example_message.body 314 | 315 | total, pages, messages = dm.Message.grep(start="2021-06-01", end="2021-07-01") 316 | assert total == 1 317 | assert pages == 1 318 | assert len(messages) == 1 319 | assert messages[0].msg == bodhi_example_message.body 320 | 321 | 322 | def test_grep_msg_id(datanommer_models): 323 | example_message = generate_message() 324 | dm.add(example_message) 325 | 326 | bodhi_example_message = generate_bodhi_update_complete_message() 327 | dm.add(bodhi_example_message) 328 | 329 | dm.session.flush() 330 | total, pages, messages = dm.Message.grep(msg_id=example_message.id) 331 | assert total == 1 332 | assert pages == 1 333 | assert len(messages) == 1 334 | assert messages[0].msg == example_message.body 335 | 336 | total, pages, messages = dm.Message.grep(msg_id=bodhi_example_message.id) 337 | assert total == 1 338 | assert pages == 1 339 | assert len(messages) == 1 340 | assert messages[0].msg == bodhi_example_message.body 341 | 342 | total, pages, messages = dm.Message.grep(msg_id="NOTAMESSAGEID") 343 | assert total == 0 344 | assert pages == 0 345 | assert len(messages) == 0 346 | 347 | 348 | def test_grep_agents(datanommer_models): 349 | example_message = generate_message() 350 | dm.add(example_message) 351 | 352 | bodhi_example_message = generate_bodhi_update_complete_message() 353 | dm.add(bodhi_example_message) 354 | 355 | dm.session.flush() 356 | 357 | total, pages, messages = dm.Message.grep(agents=["dudemcpants"]) 358 | 359 | assert total == 1 360 | assert pages == 1 361 | assert len(messages) == 1 362 | 363 | assert messages[0].msg == bodhi_example_message.body 364 | 365 | 366 | def test_grep_not_agents(datanommer_models, mocker): 367 | example_message = generate_message() # has agent_name == None 368 | dm.add(example_message) 369 | 370 | bodhi_example_message = generate_bodhi_update_complete_message() 371 | dm.add(bodhi_example_message) # has agent_name == "dudemcpants" 372 | 373 | class MessageWithAgent(fedora_message.Message): 374 | topic = "org.fedoraproject.test.a.message.with.agent" 375 | agent_name = "dummy-agent-name" 376 | 377 | fedora_message._schema_name_to_class["MessageWithAgent"] = MessageWithAgent 378 | fedora_message._class_to_schema_name[MessageWithAgent] = "MessageWithAgent" 379 | 380 | example_message_with_agent = MessageWithAgent( 381 | body={"subject": "this is a message with an agent"} 382 | ) 383 | dm.add(example_message_with_agent) 384 | 385 | dm.session.flush() 386 | 387 | total, pages, messages = dm.Message.grep(not_agents=["dudemcpants"]) 388 | 389 | # Messages with agent_name == None are not returned 390 | assert total == 1 391 | assert pages == 1 392 | assert len(messages) == 1 393 | 394 | assert messages[0].msg == example_message_with_agent.body 395 | 396 | 397 | def test_grep_users(datanommer_models): 398 | example_message = generate_message() 399 | dm.add(example_message) 400 | 401 | bodhi_example_message = generate_bodhi_update_complete_message() 402 | dm.add(bodhi_example_message) 403 | 404 | dm.session.flush() 405 | 406 | total, pages, messages = dm.Message.grep(users=["dudemcpants"]) 407 | 408 | assert total == 1 409 | assert pages == 1 410 | assert len(messages) == 1 411 | 412 | assert messages[0].msg == bodhi_example_message.body 413 | 414 | 415 | def test_grep_not_users(datanommer_models): 416 | example_message = generate_message() 417 | dm.add(example_message) 418 | 419 | bodhi_example_message = generate_bodhi_update_complete_message() 420 | dm.add(bodhi_example_message) 421 | 422 | dm.session.flush() 423 | 424 | total, pages, messages = dm.Message.grep(not_users=["dudemcpants"]) 425 | 426 | assert total == 1 427 | assert pages == 1 428 | assert len(messages) == 1 429 | 430 | assert messages[0].msg == example_message.body 431 | 432 | 433 | def test_grep_packages(datanommer_models): 434 | example_message = generate_message() 435 | dm.add(example_message) 436 | 437 | bodhi_example_message = generate_bodhi_update_complete_message() 438 | dm.add(bodhi_example_message) 439 | 440 | dm.session.flush() 441 | 442 | total, pages, messages = dm.Message.grep(packages=["kernel"]) 443 | 444 | assert total == 1 445 | assert pages == 1 446 | assert len(messages) == 1 447 | 448 | assert messages[0].msg == bodhi_example_message.body 449 | 450 | 451 | def test_grep_not_packages(datanommer_models): 452 | example_message = generate_message() 453 | dm.add(example_message) 454 | 455 | bodhi_example_message = generate_bodhi_update_complete_message() 456 | dm.add(bodhi_example_message) 457 | 458 | dm.session.flush() 459 | 460 | total, pages, messages = dm.Message.grep(not_packages=["kernel"]) 461 | 462 | assert total == 1 463 | assert pages == 1 464 | assert len(messages) == 1 465 | 466 | assert messages[0].msg == example_message.body 467 | 468 | 469 | def test_grep_contains(datanommer_models): 470 | example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate") 471 | dm.add(example_message) 472 | dm.session.flush() 473 | t, p, r = dm.Message.grep(contains=["doing"]) 474 | assert t == 1 475 | assert p == 1 476 | assert len(r) == 1 477 | assert r[0].msg == example_message.body 478 | 479 | 480 | def test_grep_rows_per_page(datanommer_models, add_200_messages): 481 | total, pages, messages = dm.Message.grep() 482 | assert total == 200 483 | assert pages == 2 484 | assert len(messages) == 100 485 | 486 | for rows_per_page in (None, 0): 487 | try: 488 | total, pages, messages = dm.Message.grep(rows_per_page=rows_per_page) 489 | except ZeroDivisionError as e: 490 | pytest.fail(e) 491 | assert total == 200 492 | assert pages == 1 493 | assert len(messages) == 200 494 | 495 | 496 | def test_grep_defer(datanommer_models): 497 | example_message = generate_message() 498 | dm.add(example_message) 499 | 500 | dm.session.flush() 501 | 502 | _total, _pages, query = dm.Message.grep(defer=True) 503 | assert isinstance(query, Select) 504 | 505 | assert dm.session.scalars(query).all() == dm.Message.grep()[2] 506 | 507 | 508 | def test_grep_no_paging_and_defer(datanommer_models, add_200_messages): 509 | total, pages, _messages = dm.Message.grep(rows_per_page=0, defer=True) 510 | assert total == 200 511 | assert pages == 1 512 | 513 | 514 | def test_grep_no_total_if_single_page(datanommer_models, add_200_messages, mocker): 515 | # Assert we don't query the total of messages if we're getting them all anyway 516 | scalar_spy = mocker.spy(dm.session, "scalar") 517 | total, _pages, _messages = dm.Message.grep(rows_per_page=0) 518 | assert total == 200 519 | scalar_spy.assert_not_called() 520 | 521 | 522 | def test_get_first(datanommer_models): 523 | messages = [] 524 | for x in range(0, 200): 525 | example_message = generate_message() 526 | example_message.id = f"{x}" 527 | dm.add(example_message) 528 | messages.append(example_message) 529 | dm.session.flush() 530 | msg = dm.Message.get_first() 531 | assert msg.msg_id == "0" 532 | assert msg.msg == messages[0].body 533 | 534 | 535 | def test_add_duplicate(datanommer_models, caplog): 536 | example_message = generate_message() 537 | dm.add(example_message) 538 | dm.add(example_message) 539 | # if no exception was thrown, then we successfully ignored the 540 | # duplicate message 541 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 1 542 | assert ( 543 | "Skipping message from org.fedoraproject.test.a.nice.message" in caplog.records[0].message 544 | ) 545 | 546 | 547 | def test_add_integrity_error(datanommer_models, mocker, caplog): 548 | mock_session_add = mocker.patch("datanommer.models.session.add") 549 | mock_session_add.side_effect = IntegrityError("asdf", "asd", "asdas") 550 | example_message = generate_message() 551 | dm.add(example_message) 552 | assert "Unknown Integrity Error: message" in caplog.records[0].message 553 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 0 554 | 555 | 556 | def test_add_duplicate_package(datanommer_models): 557 | # Define a special message schema and register it 558 | class MessageWithPackages(fedora_message.Message): 559 | @property 560 | def packages(self): 561 | return ["pkg", "pkg"] 562 | 563 | fedora_message._schema_name_to_class["MessageWithPackages"] = MessageWithPackages 564 | fedora_message._class_to_schema_name[MessageWithPackages] = "MessageWithPackages" 565 | example_message = MessageWithPackages( 566 | topic="org.fedoraproject.test.a.nice.message", 567 | body={"encouragement": "You're doing great!"}, 568 | headers=None, 569 | ) 570 | try: 571 | dm.add(example_message) 572 | except IntegrityError as e: 573 | pytest.fail(e) 574 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 1 575 | dbmsg = dm.session.scalar(select(dm.Message)) 576 | assert len(dbmsg.packages) == 1 577 | assert dbmsg.packages[0].name == "pkg" 578 | 579 | 580 | @pytest.mark.parametrize( 581 | "property_name,name_in_msg", [("usernames", "users"), ("packages", "packages")] 582 | ) 583 | def test_add_message_with_error_on_property(datanommer_models, caplog, property_name, name_in_msg): 584 | # Define a special message schema and register it 585 | class CustomMessage(fedora_message.Message): 586 | @property 587 | def packages(self): 588 | raise KeyError 589 | 590 | def _filter_headers(self): 591 | return {} 592 | 593 | def _crash(self): 594 | raise KeyError 595 | 596 | setattr(CustomMessage, property_name, property(_crash)) 597 | 598 | fedora_message._schema_name_to_class["CustomMessage"] = CustomMessage 599 | fedora_message._class_to_schema_name[CustomMessage] = "CustomMessage" 600 | example_message = CustomMessage( 601 | topic="org.fedoraproject.test.a.nice.message", 602 | body={"encouragement": "You're doing great!"}, 603 | headers=None, 604 | ) 605 | try: 606 | dm.add(example_message) 607 | except KeyError as e: 608 | pytest.fail(e) 609 | assert dm.session.scalar(select(func.count(dm.Message.id))) == 1 610 | assert caplog.records[0].message == ( 611 | f"Could not get the list of {name_in_msg} from a message on " 612 | f"org.fedoraproject.test.a.nice.message with id {example_message.id}" 613 | ) 614 | 615 | 616 | def test_as_fedora_message_dict(datanommer_models): 617 | example_message = generate_message() 618 | dm.add(example_message) 619 | 620 | dbmsg = dm.session.scalar(select(dm.Message)) 621 | 622 | message_json = json.dumps(dbmsg.as_fedora_message_dict()) 623 | 624 | # this should be the same as if we use the fedora_messaging dump function 625 | assert json.loads(fedora_message.dumps(example_message)) == json.loads(message_json) 626 | 627 | 628 | def test_as_fedora_message_dict_old_headers(datanommer_models): 629 | # Messages received with fedmsg don't have the sent-at header 630 | example_message = generate_message() 631 | dm.add(example_message) 632 | 633 | dbmsg = dm.session.scalar(select(dm.Message)) 634 | del dbmsg.headers["sent-at"] 635 | 636 | message_dict = dbmsg.as_fedora_message_dict() 637 | print(message_dict) 638 | print(json.loads(fedora_message.dumps(example_message))) 639 | 640 | # this should be the same as if we use the fedora_messaging dump function 641 | assert json.loads(fedora_message.dumps(example_message)) == message_dict 642 | 643 | 644 | def test_as_fedora_message_dict_no_headers(datanommer_models): 645 | # Messages can have no headers 646 | example_message = generate_message() 647 | dm.add(example_message) 648 | 649 | dbmsg = dm.session.scalar(select(dm.Message)) 650 | assert len(dbmsg.headers.keys()) == 5 651 | 652 | # Clear the headers 653 | dbmsg.headers = None 654 | 655 | try: 656 | message_dict = dbmsg.as_fedora_message_dict() 657 | except TypeError as e: 658 | pytest.fail(e) 659 | 660 | assert list(message_dict["headers"].keys()) == ["sent-at"] 661 | 662 | 663 | def test_as_dict(datanommer_models): 664 | dm.add(generate_message()) 665 | dbmsg = dm.session.scalar(select(dm.Message)) 666 | message_dict = dbmsg.as_dict() 667 | 668 | # we should have 14 keys in this dict 669 | assert len(message_dict) == 15 670 | assert message_dict["msg"] == {"encouragement": "You're doing great!"} 671 | assert message_dict["topic"] == "org.fedoraproject.test.a.nice.message" 672 | 673 | 674 | def test_as_dict_with_users_and_packages(datanommer_models): 675 | dm.add(generate_bodhi_update_complete_message()) 676 | dbmsg = dm.session.scalar(select(dm.Message)) 677 | message_dict = dbmsg.as_dict() 678 | 679 | assert message_dict["users"] == ["dudemcpants", "ryanlerch"] 680 | assert message_dict["packages"] == ["abrt-addon-python3", "kernel"] 681 | 682 | 683 | def test___json__deprecated(datanommer_models, caplog, mocker): 684 | mock_as_dict = mocker.patch("datanommer.models.Message.as_dict") 685 | 686 | dm.add(generate_message()) 687 | 688 | with pytest.warns(DeprecationWarning): 689 | dbmsg = dm.session.scalar(select(dm.Message)) 690 | dbmsg.__json__() 691 | 692 | mock_as_dict.assert_called_once() 693 | 694 | 695 | def test_username_deprecated(datanommer_models, caplog, mocker): 696 | dm.add(generate_message()) 697 | dbmsg = dm.session.scalar(select(dm.Message)) 698 | dbmsg.agent_name = "dummy" 699 | 700 | with pytest.warns(DeprecationWarning): 701 | assert dbmsg.username == "dummy" 702 | 703 | 704 | def test_singleton_create(datanommer_models): 705 | dm.Package.get_or_create("foobar") 706 | assert [p.name for p in dm.session.scalars(select(dm.Package))] == ["foobar"] 707 | 708 | 709 | def test_singleton_get_existing(datanommer_models): 710 | p1 = dm.Package.get_or_create("foobar") 711 | # Clear the in-memory cache 712 | dm.Package._cache.clear() 713 | p2 = dm.Package.get_or_create("foobar") 714 | assert p1.id == p2.id 715 | --------------------------------------------------------------------------------