├── datanommer.commands
    ├── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── utils.py
    │   ├── test_extract_users.py
    │   └── test_commands.py
    ├── news
    │   ├── .gitignore
    │   └── 1434.feature
    ├── README.rst
    ├── tox.ini
    ├── config.toml.example
    ├── datanommer
    │   └── commands
    │   │   ├── utils.py
    │   │   ├── extract_users.py
    │   │   └── __init__.py
    ├── pyproject.toml
    └── NEWS.rst
├── docs
    ├── datanommer.models.NEWS.rst
    ├── datanommer.commands.NEWS.rst
    ├── datanommer.consumer.NEWS.rst
    ├── user.rst
    ├── requirements.txt
    ├── index.rst
    ├── sysadmin.rst
    ├── conf.py
    └── contributing.rst
├── .s2i
    ├── environment
    ├── datanommer-upgrade-db.sh
    ├── run-datanommer.sh
    └── bin
    │   └── assemble
├── datanommer.consumer
    ├── tests
    │   ├── conftest.py
    │   ├── __init__.py
    │   └── test_consumer.py
    ├── news
    │   └── .gitignore
    ├── README.rst
    ├── tox.ini
    ├── NEWS.rst
    ├── datanommer
    │   └── consumer
    │   │   └── __init__.py
    └── pyproject.toml
├── datanommer.models
    ├── tests
    │   ├── conftest.py
    │   ├── test_jsonencodeddict.py
    │   └── test_model.py
    ├── news
    │   ├── .gitignore
    │   └── 1434.feature
    ├── README.rst
    ├── datanommer
    │   └── models
    │   │   ├── alembic
    │   │       ├── versions
    │   │       │   ├── 5db25abc63be_init.py
    │   │       │   ├── f6918385051f_messages_headers_index.py
    │   │       │   ├── 429e6f2cba6f_message_agent_name.py
    │   │       │   ├── 951c40020acc_unique.py
    │   │       │   └── f4fdb5442d05_add_view_recent_topics.py
    │   │       ├── script.py.mako
    │   │       └── env.py
    │   │   ├── testing
    │   │       └── __init__.py
    │   │   ├── view.py
    │   │   └── __init__.py
    ├── tox.ini
    ├── alembic.ini
    ├── NEWS.rst
    └── pyproject.toml
├── devel
    └── ansible
    │   ├── datanommer.yml
    │   ├── ansible.cfg
    │   └── roles
    │       ├── datanommer
    │           ├── files
    │           │   ├── datanommer.service
    │           │   ├── .bashrc
    │           │   └── alembic.ini
    │           └── tasks
    │           │   └── main.yml
    │       └── postgresql
    │           └── tasks
    │               └── main.yml
├── .github
    ├── renovate.json
    └── workflows
    │   ├── label-when-deployed.yaml
    │   └── tests.yml
├── tools
    ├── timescaledb
    │   ├── migrate.toml
    │   └── migrate-to-timescaledb.py
    ├── install-models-as-editable.sh
    ├── towncrier
    │   ├── run-towncrier.sh
    │   ├── get-authors.py
    │   └── template.rst.j2
    ├── run-liccheck.sh
    └── check-dep-versions.py
├── .gitleaks.toml
├── .bandit.cfg
├── .gitignore
├── .pre-commit-config.yaml
├── runtests.sh
├── README.md
├── .readthedocs.yaml
├── pyproject.toml
├── Vagrantfile
└── tox.ini


/datanommer.commands/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/datanommer.models.NEWS.rst:
--------------------------------------------------------------------------------
1 | ../datanommer.models/NEWS.rst


--------------------------------------------------------------------------------
/docs/datanommer.commands.NEWS.rst:
--------------------------------------------------------------------------------
1 | ../datanommer.commands/NEWS.rst


--------------------------------------------------------------------------------
/docs/datanommer.consumer.NEWS.rst:
--------------------------------------------------------------------------------
1 | ../datanommer.consumer/NEWS.rst


--------------------------------------------------------------------------------
/.s2i/environment:
--------------------------------------------------------------------------------
1 | UPGRADE_PIP_TO_LATEST=true
2 | APP_SCRIPT=.s2i/run-datanommer.sh
3 | 


--------------------------------------------------------------------------------
/datanommer.consumer/tests/conftest.py:
--------------------------------------------------------------------------------
1 | pytest_plugins = "datanommer.models.testing"
2 | 


--------------------------------------------------------------------------------
/datanommer.models/tests/conftest.py:
--------------------------------------------------------------------------------
1 | pytest_plugins = "datanommer.models.testing"
2 | 


--------------------------------------------------------------------------------
/docs/user.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | User Guide
3 | ==========
4 | 
5 | Write the user guide here.
6 | 


--------------------------------------------------------------------------------
/datanommer.commands/news/.gitignore:
--------------------------------------------------------------------------------
1 | # Dummy file because git won't add empty directories
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/datanommer.consumer/news/.gitignore:
--------------------------------------------------------------------------------
1 | # Dummy file because git won't add empty directories
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/datanommer.models/news/.gitignore:
--------------------------------------------------------------------------------
1 | # Dummy file because git won't add empty directories
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | myst-parser
3 | sphinx-click
4 | ./datanommer.models
5 | ./datanommer.commands
6 | 


--------------------------------------------------------------------------------
/devel/ansible/datanommer.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: datanommer
3 |   become: true
4 |   become_method: sudo
5 |   roles:
6 |     - postgresql
7 |     - datanommer
8 | 


--------------------------------------------------------------------------------
/.github/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 |   "extends": ["local>fedora-infra/shared:renovate-config"]
4 | }
5 | 


--------------------------------------------------------------------------------
/tools/timescaledb/migrate.toml:
--------------------------------------------------------------------------------
1 | source_url = "postgresql://datanommer:datanommer@localhost/datanommer"
2 | dest_url = "postgresql://datanommer:datanommer@localhost/messages"
3 | 


--------------------------------------------------------------------------------
/.s2i/datanommer-upgrade-db.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | exec /opt/app-root/src/.local/venvs/datanommer/bin/alembic \
4 |     -c /etc/fedora-messaging/alembic.ini \
5 |     upgrade head
6 | 


--------------------------------------------------------------------------------
/datanommer.models/news/1434.feature:
--------------------------------------------------------------------------------
1 | Add materialized view `recent_topics` that provides efficient querying of recent message topics with aggregated message counts and sorting capabilities


--------------------------------------------------------------------------------
/.gitleaks.toml:
--------------------------------------------------------------------------------
1 | [allowlist]
2 | paths = [
3 |     "docs/sysadmin.rst",
4 |     "tools/timescaledb/migrate.toml",
5 |     "devel/ansible/roles/datanommer/templates/fedora-messaging.toml.j2",
6 | ]
7 | 


--------------------------------------------------------------------------------
/datanommer.commands/news/1434.feature:
--------------------------------------------------------------------------------
1 | Add `datanommer-refresh-view` command to refresh the `recent_topics` materialized view, intended to be run periodically via cron job to keep topic view up to date


--------------------------------------------------------------------------------
/.bandit.cfg:
--------------------------------------------------------------------------------
1 | [bandit]
2 | targets: datanommer.commands,datanommer.consumer,datanommer.models
3 | # Can't do this now because of https://github.com/PyCQA/bandit/issues/693
4 | #exclude: .git,.tox,*/tests/*,*/.tox/*
5 | 


--------------------------------------------------------------------------------
/devel/ansible/ansible.cfg:
--------------------------------------------------------------------------------
1 | [defaults]
2 | # Human-readable output
3 | callback_result_format = yaml
4 | # Defaults to /usr/bin/python3.12 and that's not the default python
5 | interpreter_python = /usr/bin/python3
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.swp
 3 | ez_setup
 4 | tw2*
 5 | *.db*
 6 | data
 7 | build
 8 | dist
 9 | docs/_build
10 | docs/_source
11 | *.egg*
12 | README.pdf
13 | *.pid
14 | *.log
15 | *.swo
16 | .tox/
17 | .vagrant
18 | 
19 | # Coverage
20 | .coverage
21 | htmlcov
22 | coverage.xml
23 | 


--------------------------------------------------------------------------------
/.s2i/run-datanommer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # We install the app in a specific virtualenv:
 6 | export PATH=/opt/app-root/src/.local/venvs/datanommer/bin:$PATH
 7 | 
 8 | # Run the application
 9 | fedora-messaging consume --callback datanommer.consumer:Nommer
10 | 


--------------------------------------------------------------------------------
/tools/install-models-as-editable.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Install datanommer.models in develop mode when run from a virtualenv such as
 4 | # those tox creates.
 5 | 
 6 | set -e
 7 | 
 8 | CURDIR=`pwd`
 9 | 
10 | set -x
11 | 
12 | cd ../datanommer.models
13 | poetry install --all-extras
14 | cd "$CURDIR"
15 | 


--------------------------------------------------------------------------------
/datanommer.models/README.rst:
--------------------------------------------------------------------------------
1 | datanommer.models
2 | =================
3 | 
4 | This package contains the SQLAlchemy data model for datanommer.
5 | 
6 | Datanommer is a storage consumer for the Fedora Infrastructure Message Bus
7 | (fedmsg).  It is comprised of a `fedmsg <http://fedmsg.com>`_ consumer that
8 | stuffs every message into a sqlalchemy database.
9 | 


--------------------------------------------------------------------------------
/datanommer.consumer/README.rst:
--------------------------------------------------------------------------------
1 | datanommer.consumer
2 | ===================
3 | 
4 | This package contains the fedmsg-hub consumer plugin for datanommer.
5 | 
6 | Datanommer is a storage consumer for the Fedora Infrastructure Message Bus
7 | (fedmsg).  It is comprised of a `fedmsg <http://fedmsg.com>`_ consumer that
8 | stuffs every message into a sqlalchemy database.
9 | 


--------------------------------------------------------------------------------
/tools/towncrier/run-towncrier.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | echo "Building release notes for all packages"
 6 | for package in datanommer.{models,consumer,commands}; do
 7 |     echo "[$package] Building release notes..."
 8 |     pushd $package
 9 |     poetry install --all-extras
10 |     poetry run towncrier build --yes $@
11 |     popd
12 |     echo "[$package] done."
13 | done
14 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/versions/5db25abc63be_init.py:
--------------------------------------------------------------------------------
 1 | """Initial revision
 2 | 
 3 | Revision ID: 5db25abc63be
 4 | Revises: None
 5 | Create Date: 2021-09-15 16:15:37.188484
 6 | 
 7 | """
 8 | 
 9 | # revision identifiers, used by Alembic.
10 | revision = "5db25abc63be"
11 | down_revision = None
12 | 
13 | 
14 | def upgrade():
15 |     pass
16 | 
17 | 
18 | def downgrade():
19 |     pass
20 | 


--------------------------------------------------------------------------------
/devel/ansible/roles/datanommer/files/datanommer.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=datanommer
 3 | 
 4 | [Service]
 5 | User=vagrant
 6 | Restart=on-failure
 7 | RestartSec=5s
 8 | WorkingDirectory=/home/vagrant/datanommer/datanommer.consumer
 9 | ExecStart=/bin/sh -c 'source /srv/venv/bin/activate && poetry run fedora-messaging consume --callback datanommer.consumer:Nommer'
10 | 
11 | [Install]
12 | WantedBy=multi-user.target
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/asottile/pyupgrade
 3 |     rev: v3.21.2
 4 |     hooks:
 5 |       - id: pyupgrade
 6 |         args:
 7 |           - --py311-plus
 8 | 
 9 |   - repo: https://github.com/psf/black
10 |     rev: 25.12.0
11 |     hooks:
12 |       - id: black
13 | 
14 |   # Ruff
15 |   - repo: https://github.com/astral-sh/ruff-pre-commit
16 |     # Ruff version.
17 |     rev: v0.14.9
18 |     hooks:
19 |       - id: ruff-check
20 | 


--------------------------------------------------------------------------------
/datanommer.commands/README.rst:
--------------------------------------------------------------------------------
 1 | datanommer.commands
 2 | ===================
 3 | 
 4 | .. split here
 5 | 
 6 | This package contains the console commands for datanommer, including::
 7 | 
 8 |  - datanommer-create-db
 9 |  - datanommer-dump
10 |  - datanommer-stats
11 |  - datanommer-refresh-view
12 | 
13 | Datanommer is a storage consumer for the Fedora Infrastructure Message Bus
14 | (fedmsg).  It is comprised of a `fedmsg <http://fedmsg.com>`_ consumer that
15 | stuffs every message into a sqlalchemy database.
16 | 


--------------------------------------------------------------------------------
/devel/ansible/roles/datanommer/files/.bashrc:
--------------------------------------------------------------------------------
1 | # .bashrc
2 | source /srv/venv/bin/activate
3 | 
4 | alias datanommer-consumer-start="sudo systemctl start datanommer.service && echo 'datanommer consumer is running'"
5 | alias datanommer-consumer-logs="sudo journalctl -u datanommer.service -e"
6 | alias datanommer-consumer-restart="sudo systemctl restart datanommer.service && echo 'datanommer consumer is running'"
7 | alias datanommer-consumer-stop="sudo systemctl stop datanommer.service && echo 'datanommer service stopped'"
8 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | 
 9 | # revision identifiers, used by Alembic.
10 | revision = ${repr(up_revision)}
11 | down_revision = ${repr(down_revision)}
12 | 
13 | from alembic import op
14 | import sqlalchemy as sa
15 | ${imports if imports else ""}
16 | 
17 | def upgrade():
18 |     ${upgrades if upgrades else "pass"}
19 | 
20 | 
21 | def downgrade():
22 |     ${downgrades if downgrades else "pass"}
23 | 


--------------------------------------------------------------------------------
/datanommer.models/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{311,312},licenses
 3 | skipsdist = True
 4 | isolated_build = true
 5 | 
 6 | [testenv]
 7 | passenv = HOME
 8 | sitepackages = false
 9 | skip_install = true
10 | allowlist_externals =
11 |     poetry
12 | env =
13 |     SQLALCHEMY_WARN_20=1
14 | commands_pre =
15 |     poetry install --all-extras
16 | commands =
17 |     poetry run pytest -c ../pyproject.toml {posargs}
18 | 
19 | [testenv:licenses]
20 | allowlist_externals =
21 |     {[testenv]allowlist_externals}
22 |     {toxinidir}/../tools/run-liccheck.sh
23 | commands =
24 |     {toxinidir}/../tools/run-liccheck.sh
25 | 


--------------------------------------------------------------------------------
/runtests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | which tox &>/dev/null || {
 6 |     echo "You need to install tox" >&2
 7 |     exit 2
 8 | }
 9 | which pre-commit &>/dev/null || {
10 |     echo "You need to install pre-commit" >&2
11 |     exit 2
12 | }
13 | which krb5-config &> /dev/null || {
14 |     echo "You need to install krb5-devel" >&2
15 |     exit 2
16 | }
17 | 
18 | echo "Running checks for all packages"
19 | pre-commit run --all-files
20 | 
21 | echo "Running unit tests for all packages"
22 | for package in datanommer.{models,consumer,commands}; do
23 |     echo "[$package] Testing..."
24 |     pushd $package
25 |     tox $@
26 |     popd
27 |     echo "[$package] done."
28 | done
29 | 


--------------------------------------------------------------------------------
/datanommer.commands/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{311,312},licenses
 3 | skipsdist = True
 4 | isolated_build = true
 5 | 
 6 | [testenv]
 7 | passenv = HOME
 8 | sitepackages = false
 9 | skip_install = true
10 | allowlist_externals =
11 |     poetry
12 | env =
13 |     SQLALCHEMY_WARN_20=1
14 | commands_pre =
15 |     poetry install --all-extras
16 |     poetry run {toxinidir}/../tools/install-models-as-editable.sh
17 | commands =
18 |     poetry run pytest -c ../pyproject.toml {posargs}
19 | 
20 | [testenv:licenses]
21 | basepython = python3.11
22 | allowlist_externals =
23 |     {[testenv]allowlist_externals}
24 |     {toxinidir}/../tools/run-liccheck.sh
25 | commands =
26 |     {toxinidir}/../tools/run-liccheck.sh
27 | 


--------------------------------------------------------------------------------
/datanommer.consumer/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{311,312},licenses
 3 | skipsdist = True
 4 | isolated_build = true
 5 | 
 6 | [testenv]
 7 | passenv = HOME
 8 | sitepackages = false
 9 | skip_install = true
10 | allowlist_externals =
11 |     poetry
12 | env =
13 |     SQLALCHEMY_WARN_20=1
14 | commands_pre =
15 |     poetry install --all-extras
16 |     poetry run {toxinidir}/../tools/install-models-as-editable.sh
17 | commands =
18 |     poetry run pytest -c ../pyproject.toml {posargs}
19 | 
20 | [testenv:licenses]
21 | basepython = python3.11
22 | allowlist_externals =
23 |     {[testenv]allowlist_externals}
24 |     {toxinidir}/../tools/run-liccheck.sh
25 | commands =
26 |     {toxinidir}/../tools/run-liccheck.sh
27 | 


--------------------------------------------------------------------------------
/datanommer.commands/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import datanommer.commands
 4 | 
 5 | 
 6 | pytest_plugins = "datanommer.models.testing"
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def mock_init(mocker):
11 |     # This is actually not very useful because init() checks a private attribute on the
12 |     # session object to avoid being called twice. It just prevents a warning log.
13 |     mocker.patch("datanommer.commands.m.init")
14 | 
15 | 
16 | @pytest.fixture
17 | def mock_config(mocker):
18 |     mocker.patch.dict(
19 |         datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"],
20 |         {
21 |             "datanommer_sqlalchemy_url": "",
22 |             "alembic_ini": None,
23 |         },
24 |     )
25 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/versions/f6918385051f_messages_headers_index.py:
--------------------------------------------------------------------------------
 1 | """Messages.headers index
 2 | 
 3 | Revision ID: f6918385051f
 4 | Revises: 951c40020acc
 5 | Create Date: 2024-05-07 16:05:05.344863
 6 | 
 7 | """
 8 | 
 9 | from alembic import op
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = "f6918385051f"
14 | down_revision = "951c40020acc"
15 | 
16 | 
17 | def upgrade():
18 |     op.create_index(
19 |         "ix_messages_headers",
20 |         "messages",
21 |         ["headers"],
22 |         unique=False,
23 |         postgresql_using="gin",
24 |         postgresql_ops={"headers": "jsonb_path_ops"},
25 |     )
26 | 
27 | 
28 | def downgrade():
29 |     op.drop_index("ix_messages_headers", table_name="messages", postgresql_using="gin")
30 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/versions/429e6f2cba6f_message_agent_name.py:
--------------------------------------------------------------------------------
 1 | """Message.username → Message.agent_name
 2 | 
 3 | Revision ID: 429e6f2cba6f
 4 | Revises: 951c40020acc
 5 | Create Date: 2024-06-07 09:12:33.393757
 6 | 
 7 | """
 8 | 
 9 | from alembic import op
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = "429e6f2cba6f"
14 | down_revision = "f6918385051f"
15 | 
16 | 
17 | def upgrade():
18 |     op.alter_column("messages", "username", new_column_name="agent_name")
19 |     op.create_index(op.f("ix_messages_agent_name"), "messages", ["agent_name"], unique=False)
20 | 
21 | 
22 | def downgrade():
23 |     op.drop_index(op.f("ix_messages_agent_name"), table_name="messages")
24 |     op.alter_column("messages", "agent_name", new_column_name="username")
25 | 


--------------------------------------------------------------------------------
/tools/run-liccheck.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | STRATEGY_URL=https://raw.githubusercontent.com/fedora-infra/shared/main/liccheck-strategy.ini
 4 | 
 5 | trap 'rm -f "$TMPFILE $STRATEGY_TMPFILE"' EXIT
 6 | 
 7 | set -e
 8 | set -x
 9 | 
10 | TMPFILE=$(mktemp -t requirements-XXXXXX.txt)
11 | STRATEGY_TMPFILE=$(mktemp -t liccheck-strategy-XXXXXX.ini)
12 | 
13 | curl -o $STRATEGY_TMPFILE $STRATEGY_URL
14 | 
15 | poetry export --with dev --without-hashes -f requirements.txt -o $TMPFILE
16 | 
17 | # liccheck requires pkg_resources
18 | # https://github.com/dhatim/python-license-check/issues/114
19 | poetry run pip install setuptools
20 | 
21 | # Use pip freeze instead of poetry when it fails
22 | #pip freeze --exclude-editable --isolated > $TMPFILE
23 | 
24 | poetry run liccheck -r $TMPFILE -s $STRATEGY_TMPFILE
25 | 


--------------------------------------------------------------------------------
/datanommer.consumer/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is a part of datanommer, a message sink for fedmsg.
 2 | # Copyright (C) 2014, Red Hat, Inc.
 3 | #
 4 | # This program is free software: you can redistribute it and/or modify it under
 5 | # the terms of the GNU General Public License as published by the Free Software
 6 | # Foundation, either version 3 of the License, or (at your option) any later
 7 | # version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
12 | # details.
13 | #
14 | # You should have received a copy of the GNU General Public License along
15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 


--------------------------------------------------------------------------------
/.github/workflows/label-when-deployed.yaml:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Contributors to the Fedora Project
 2 | #
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | name: Apply labels when deployed
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - staging
11 |       - stable
12 | 
13 | jobs:
14 |   label:
15 |     name: Apply labels
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |       - name: Staging deployment
20 |         uses: fedora-infra/label-when-in-branch@v1
21 |         with:
22 |           token: ${{ secrets.GITHUB_TOKEN }}
23 |           branch: staging
24 |           label: deployed:staging
25 |       - name: Production deployment
26 |         uses: fedora-infra/label-when-in-branch@v1
27 |         with:
28 |           token: ${{ secrets.GITHUB_TOKEN }}
29 |           branch: stable
30 |           label: deployed:prod
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Datanommer
 2 | 
 3 | Datanommer is an application that is comprised of only a Fedora Messaging consumer that places every message into a Postgres / TimescaleDB database.
 4 | 
 5 | It is comprised of 3 modules:
 6 | 
 7 | * **datanommer.consumer**: the Fedora Messaging consumer that monitors the queue and places every message into the database
 8 | * **datanommer.models**: the database models used by the consumer. These models are also used by [Datagrepper](https://github.com/fedora-infra/datagrepper), [FMN](https://github.com/fedora-infra/fedbadges), and [fedbadges](https://github.com/fedora-infra/fmn). Typically, to access the information stored in the database by datanommer, use the [Datagrepper](https://github.com/fedora-infra/datagrepper) JSON API.
 9 | * **datanommer.commands**: a set of commandline tools for use by developers and sysadmins.
10 | 
11 | Refer to the [online documentation](https://datanommer.readthedocs.io/) for details.
12 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../README.md
 2 |    :parser: myst_parser.sphinx_
 3 | 
 4 | 
 5 | .. User Guide
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :caption: User Guide
10 | 
11 |    user
12 |    _source/commands
13 | 
14 | 
15 | .. Sysadmin's Guide
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 |    :caption: Sysadmin's Guide
20 | 
21 |    sysadmin
22 | 
23 | 
24 | .. Contributor Guide
25 | 
26 | .. toctree::
27 |    :maxdepth: 2
28 |    :caption: Contributor Guide
29 | 
30 |    contributing
31 | 
32 | 
33 | .. Release Notes
34 | 
35 | .. toctree::
36 |    :maxdepth: 1
37 |    :caption: Release Notes
38 | 
39 |    datanommer.models <datanommer.models.NEWS>
40 |    datanommer.commands <datanommer.commands.NEWS>
41 |    datanommer.consumer <datanommer.consumer.NEWS>
42 | 
43 | 
44 | .. toctree::
45 |    :maxdepth: 2
46 |    :caption: Module Documentation
47 | 
48 |    _source/models/datanommer.models
49 |    _source/commands/datanommer.commands
50 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.12"
13 |     # You can also specify other tool versions:
14 |     # nodejs: "19"
15 |     # rust: "1.64"
16 |     # golang: "1.19"
17 | 
18 | # Build documentation in the "docs/" directory with Sphinx
19 | sphinx:
20 |   configuration: docs/conf.py
21 | 
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | # formats:
24 | #    - pdf
25 | #    - epub
26 | 
27 | # Optional but recommended, declare the Python requirements required
28 | # to build your documentation
29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
30 | python:
31 |    install:
32 |    - requirements: docs/requirements.txt
33 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 100
 3 | 
 4 | [tool.ruff]
 5 | line-length = 100
 6 | 
 7 | [tool.ruff.lint]
 8 | select = ["E", "F", "W", "I", "UP", "S", "B", "RUF"]
 9 | ignore = ["RUF012"]
10 | 
11 | [tool.ruff.lint.isort]
12 | lines-after-imports = 2
13 | order-by-type = false
14 | known-first-party = ["datanommer"]
15 | 
16 | [tool.ruff.lint.per-file-ignores]
17 | "*/tests/*" = ["S101", "E501"]
18 | "tools/towncrier/get-authors.py" = ["S602", "S603", "S607"]
19 | 
20 | [tool.pytest.ini_options]
21 | addopts = "-v --cov-config ../pyproject.toml --cov --cov-report term-missing --cov-report html --cov-report xml"
22 | 
23 | [tool.coverage.run]
24 | branch = true
25 | source = ["datanommer"]
26 | 
27 | [tool.coverage.paths]
28 | source = ["datanommer"]
29 | 
30 | [tool.coverage.report]
31 | fail_under = 98
32 | exclude_lines = [
33 |     "pragma: no cover",
34 |     "if __name__ == .__main__.:",
35 | ]
36 | omit = [
37 |     "datanommer/models/testing/*",
38 | ]
39 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/versions/951c40020acc_unique.py:
--------------------------------------------------------------------------------
 1 | """Add a unique index on packages and users
 2 | 
 3 | Revision ID: 951c40020acc
 4 | Revises: 5db25abc63be
 5 | Create Date: 2021-09-22 15:38:57.339646
 6 | """
 7 | 
 8 | from alembic import op
 9 | 
10 | 
11 | # revision identifiers, used by Alembic.
12 | revision = "951c40020acc"
13 | down_revision = "5db25abc63be"
14 | 
15 | 
16 | def upgrade():
17 |     op.drop_index("ix_packages_name", table_name="packages")
18 |     op.create_index(op.f("ix_packages_name"), "packages", ["name"], unique=True)
19 |     op.drop_index("ix_users_name", table_name="users")
20 |     op.create_index(op.f("ix_users_name"), "users", ["name"], unique=True)
21 | 
22 | 
23 | def downgrade():
24 |     op.drop_index(op.f("ix_users_name"), table_name="users")
25 |     op.create_index("ix_users_name", "users", ["name"], unique=False)
26 |     op.drop_index(op.f("ix_packages_name"), table_name="packages")
27 |     op.create_index("ix_packages_name", "packages", ["name"], unique=False)
28 | 


--------------------------------------------------------------------------------
/tools/check-dep-versions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | from collections import defaultdict
 5 | 
 6 | import toml
 7 | 
 8 | 
 9 | SUBPROJECTS = ["commands", "consumer", "models"]
10 | SUPPORTED_LOCK_VERSION = "1.1"
11 | 
12 | 
13 | deps_by_package = defaultdict(dict)
14 | 
15 | for project in SUBPROJECTS:
16 |     lock_path = os.path.join(f"datanommer.{project}", "poetry.lock")
17 |     with open(lock_path) as f:
18 |         lockfile = toml.load(f)
19 |     lock_version = lockfile["metadata"]["lock-version"]
20 |     if lock_version != SUPPORTED_LOCK_VERSION:
21 |         print(f"Unsupported lockfile version in {lock_path}: {lock_version}. Skipping.")
22 |         continue
23 |     deps = {}
24 |     for dep in lockfile["package"]:
25 |         deps_by_package[dep["name"]][project] = dep["version"]
26 | 
27 | 
28 | for name, deps in deps_by_package.items():
29 |     if len(set(deps.values())) == 1:
30 |         continue
31 |     dep_list = [f"{project}:{version}" for project, version in deps.items()]
32 |     print(f"Incoherent dep for {name}: {' '.join(dep_list)}")
33 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | Vagrant.configure(2) do |config|
 5 |   config.hostmanager.enabled = true
 6 |   config.hostmanager.manage_host = true
 7 |   config.hostmanager.manage_guest = true
 8 | 
 9 |   config.vm.define "datanommer" do |datanommer|
10 |     datanommer.vm.box_url = "https://download.fedoraproject.org/pub/fedora/linux/releases/38/Cloud/x86_64/images/Fedora-Cloud-Base-Vagrant-38-1.6.x86_64.vagrant-libvirt.box"
11 |     datanommer.vm.box = "f38-cloud-libvirt"
12 |     datanommer.vm.hostname = "datanommer.test"
13 | 
14 |     datanommer.vm.synced_folder '.', '/vagrant', disabled: true
15 |     datanommer.vm.synced_folder ".", "/home/vagrant/datanommer", type: "sshfs"
16 | 
17 |     datanommer.vm.provider :libvirt do |libvirt|
18 |       libvirt.cpus = 2
19 |       libvirt.memory = 2048
20 |     end
21 | 
22 |     datanommer.vm.provision "ansible" do |ansible|
23 |       ansible.playbook = "devel/ansible/datanommer.yml"
24 |       ansible.config_file = "devel/ansible/ansible.cfg"
25 |       ansible.verbose = true
26 |     end
27 |   end
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/datanommer.models/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = datanommer.models:alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # set to 'true' to run the environment during
11 | # the 'revision' command, regardless of autogenerate
12 | # revision_environment = false
13 | 
14 | sqlalchemy.url = postgresql://datanommer:datanommer@localhost/messages
15 | 
16 | # Logging configuration
17 | [loggers]
18 | keys = root,sqlalchemy,alembic
19 | 
20 | [handlers]
21 | keys = console
22 | 
23 | [formatters]
24 | keys = generic
25 | 
26 | [logger_root]
27 | level = WARN
28 | handlers = console
29 | qualname =
30 | 
31 | [logger_sqlalchemy]
32 | level = WARN
33 | handlers =
34 | qualname = sqlalchemy.engine
35 | 
36 | [logger_alembic]
37 | level = INFO
38 | handlers =
39 | qualname = alembic
40 | 
41 | [handler_console]
42 | class = StreamHandler
43 | args = (sys.stderr,)
44 | level = NOTSET
45 | formatter = generic
46 | 
47 | [formatter_generic]
48 | format = %(levelname)-5.5s [%(name)s] %(message)s
49 | datefmt = %H:%M:%S
50 | 


--------------------------------------------------------------------------------
/devel/ansible/roles/datanommer/files/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = datanommer.models:alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # set to 'true' to run the environment during
11 | # the 'revision' command, regardless of autogenerate
12 | # revision_environment = false
13 | 
14 | sqlalchemy.url = postgresql://datanommer:datanommer@localhost/messages
15 | 
16 | # Logging configuration
17 | [loggers]
18 | keys = root,sqlalchemy,alembic
19 | 
20 | [handlers]
21 | keys = console
22 | 
23 | [formatters]
24 | keys = generic
25 | 
26 | [logger_root]
27 | level = WARN
28 | handlers = console
29 | qualname =
30 | 
31 | [logger_sqlalchemy]
32 | level = WARN
33 | handlers =
34 | qualname = sqlalchemy.engine
35 | 
36 | [logger_alembic]
37 | level = INFO
38 | handlers =
39 | qualname = alembic
40 | 
41 | [handler_console]
42 | class = StreamHandler
43 | args = (sys.stderr,)
44 | level = NOTSET
45 | formatter = generic
46 | 
47 | [formatter_generic]
48 | format = %(levelname)-5.5s [%(name)s] %(message)s
49 | datefmt = %H:%M:%S
50 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/versions/f4fdb5442d05_add_view_recent_topics.py:
--------------------------------------------------------------------------------
 1 | """Add view recent_topics
 2 | 
 3 | Revision ID: f4fdb5442d05
 4 | Revises: 429e6f2cba6f
 5 | Create Date: 2025-05-30 13:52:11.648140
 6 | 
 7 | """
 8 | 
 9 | # revision identifiers, used by Alembic.
10 | revision = "f4fdb5442d05"
11 | down_revision = "429e6f2cba6f"
12 | 
13 | from alembic import op  # noqa: E402
14 | 
15 | from datanommer.models.view import CreateMaterializedView, get_selectable  # noqa: E402
16 | 
17 | 
18 | def upgrade():
19 | 
20 |     # Create the materialized view using the factored selectable
21 |     selectable = get_selectable()
22 |     op.execute(CreateMaterializedView("recent_topics", selectable))
23 | 
24 |     # Create unique index on topic
25 |     op.create_index(
26 |         "uq_recent_topics_topic", "recent_topics", ["topic"], unique=True, if_not_exists=True
27 |     )
28 | 
29 |     # Create index on message_count for sorting
30 |     op.create_index(
31 |         "ix_recent_topics_message_count", "recent_topics", ["message_count"], if_not_exists=True
32 |     )
33 | 
34 | 
35 | def downgrade():
36 |     op.execute("DROP MATERIALIZED VIEW IF EXISTS recent_topics")
37 | 


--------------------------------------------------------------------------------
/.s2i/bin/assemble:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # The assemble script builds the application artifacts from a source and
 4 | # places them into appropriate directories inside the image.
 5 | 
 6 | # Execute the default S2I script
 7 | . /usr/libexec/s2i/assemble
 8 | 
 9 | set -e
10 | 
11 | # We need to run micropipenv manually because the projects are in
12 | # subdirectories.
13 | 
14 | install_tool "micropipenv" "[toml]"
15 | 
16 | # Poetry 1.5.0 breaks micropipenv, generate the requirements instead.
17 | pip install poetry poetry-plugin-export
18 | 
19 | 
20 | for subpackage in datanommer.models datanommer.commands datanommer.consumer; do
21 |     pushd $subpackage
22 |     echo "---> Generating requirements in ${subpackage}..."
23 |     if [ "$subpackage" == "datanommer.models" ]; then
24 |         poetry export -o requirements.txt --without-hashes --extras schemas
25 |     else
26 |         poetry export -o requirements.txt --without-hashes
27 |     fi
28 |     echo "---> Installing dependencies in ${subpackage}..."
29 |     pip install -r requirements.txt
30 |     # Now install the root project too.
31 |     pip install . --no-deps
32 |     popd
33 | done
34 | 
35 | # set permissions for any installed artifacts
36 | fix-permissions /opt/app-root -P
37 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = checks,{models,consumer,commands}-{py311,py312,licenses},docs
 3 | skipsdist = True
 4 | isolated_build = true
 5 | 
 6 | [testenv]
 7 | passenv = HOME
 8 | sitepackages = false
 9 | skip_install = true
10 | allowlist_externals =
11 |     poetry
12 |     cd
13 |     {toxinidir}/tools/run-liccheck.sh
14 | env =
15 |     SQLALCHEMY_WARN_20=1
16 | change_dir =
17 |     models: datanommer.models
18 |     consumer: datanommer.consumer
19 |     commands: datanommer.commands
20 | commands_pre =
21 |     poetry install --all-extras
22 | commands =
23 |     py: poetry run pytest {posargs}
24 |     licenses: {toxinidir}/tools/run-liccheck.sh
25 | 
26 | [testenv:checks]
27 | allowlist_externals =
28 |     {[testenv]allowlist_externals}
29 |     pre-commit
30 |     git
31 | commands_pre =
32 | commands = pre-commit run --all-files
33 | 
34 | [testenv:docs]
35 | commands_pre =
36 |     poetry -C datanommer.models install --all-extras
37 |     poetry -C datanommer.commands install --all-extras
38 | allowlist_externals =
39 |     {[testenv]allowlist_externals}
40 |     mkdir
41 |     rm
42 | deps =
43 |     sphinx
44 |     myst-parser
45 |     sphinx-click
46 | commands=
47 |     mkdir -p docs/_static
48 |     rm -rf docs/_build
49 |     rm -rf docs/_source
50 |     sphinx-build -b html -d {envtmpdir}/doctrees docs docs/_build/html
51 | 


--------------------------------------------------------------------------------
/datanommer.commands/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from bodhi.messages.schemas.update import UpdateCommentV1
 2 | from fedora_messaging import message as fedora_message
 3 | 
 4 | 
 5 | def generate_message(
 6 |     topic="org.fedoraproject.test.a.nice.message",
 7 |     body=None,
 8 |     headers=None,
 9 | ):
10 |     body = body or {"encouragement": "You're doing great!"}
11 |     return fedora_message.Message(topic=topic, body=body, headers=headers)
12 | 
13 | 
14 | def generate_bodhi_update_complete_message():
15 |     msg = UpdateCommentV1(
16 |         body={
17 |             "comment": {
18 |                 "karma": -1,
19 |                 "text": "text",
20 |                 "timestamp": "2019-03-18 16:54:48",
21 |                 "update": {
22 |                     "alias": "FEDORA-EPEL-2021-f2d195dada",
23 |                     "builds": [
24 |                         {"nvr": "abrt-addon-python3-2.1.11-50.el7"},
25 |                         {"nvr": "kernel-10.4.0-2.el7"},
26 |                     ],
27 |                     "status": "pending",
28 |                     "release": {"name": "F35"},
29 |                     "request": "testing",
30 |                     "user": {"name": "ryanlerch"},
31 |                 },
32 |                 "user": {"name": "dudemcpants"},
33 |             }
34 |         }
35 |     )
36 |     msg.topic = f"org.fedoraproject.stg.{msg.topic}"
37 |     return msg
38 | 


--------------------------------------------------------------------------------
/datanommer.consumer/NEWS.rst:
--------------------------------------------------------------------------------
 1 | =============
 2 | Release Notes
 3 | =============
 4 | 
 5 | For ``datanommer.consumer``
 6 | 
 7 | .. towncrier release notes start
 8 | 
 9 | v1.4.4
10 | ======
11 | 
12 | Released on 2025-06-19.
13 | 
14 | No significant changes.
15 | 
16 | v1.4.3
17 | ======
18 | 
19 | Released on 2025-06-10.
20 | 
21 | No significant changes.
22 | 
23 | v1.4.2
24 | ======
25 | 
26 | Released on 2025-06-07.
27 | 
28 | No significant changes.
29 | 
30 | v1.4.1
31 | ======
32 | 
33 | Released on 2025-05-30.
34 | 
35 | Dependency Changes
36 | ^^^^^^^^^^^^^^^^^^
37 | 
38 | * Add support for Python 3.9 (for RHEL9) (`#8d63e86 <https://github.com/fedora-infra/datanommer/issues/8d63e86>`_)
39 | 
40 | 
41 | v1.4.0
42 | ======
43 | 
44 | Released on 2024-06-12.
45 | 
46 | No significant changes.
47 | 
48 | 
49 | v1.2.0
50 | ======
51 | 
52 | Released on 2024-04-15. This is a minor release.
53 | 
54 | Development Improvements
55 | ^^^^^^^^^^^^^^^^^^^^^^^^
56 | 
57 | * Use Ruff instead of flake8 and isort and bandit (`4f7ffaa
58 |   <https://github.com/fedora-infra/datanommer/commit/4f7ffaa>`_).
59 | 
60 | 
61 | v1.1.0
62 | ======
63 | 
64 | Released on 2023-09-22.
65 | This is a feature release that updates Python support.
66 | 
67 | Dependency Changes
68 | ^^^^^^^^^^^^^^^^^^
69 | 
70 | * Drop support for python 3.7, add support for python 3.10 (`PR#890
71 |   <https://github.com/fedora-infra/datanommer/pull/890>`_).
72 | 
73 | 
74 | v1.0.3
75 | ======
76 | 
77 | Released on 2022-03-18. This is a minor release:
78 | 
79 | - support fedora-messaging 3.0+
80 | - update dependencies
81 | 


--------------------------------------------------------------------------------
/devel/ansible/roles/postgresql/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Install RPM packages
 3 |   dnf:
 4 |     name:
 5 |       - python3-psycopg2
 6 |       - postgresql-server
 7 |       - timescaledb
 8 |       - acl
 9 |     state: present
10 | 
11 | - name: Setup the postgresql DB
12 |   command: postgresql-setup --initdb
13 |   args:
14 |     creates: /var/lib/pgsql/data/postgresql.conf
15 | 
16 | - name: Add timescaledb to postgresql config
17 |   lineinfile:
18 |     path: /var/lib/pgsql/data/postgresql.conf
19 |     regexp: ^shared_preload_libraries =
20 |     line: "shared_preload_libraries = 'timescaledb'"
21 | 
22 | - name: Configure access to postgresql
23 |   postgresql_pg_hba:
24 |     dest: /var/lib/pgsql/data/pg_hba.conf
25 |     contype: host
26 |     databases: all
27 |     users: all
28 |     address: "{{item}}"
29 |     method: md5
30 |   loop:
31 |     - 127.0.0.1/32
32 |     - ::1/128
33 | 
34 | - name: Start postgresql
35 |   service:
36 |     name: postgresql
37 |     enabled: yes
38 |     state: started
39 | 
40 | 
41 | - block:
42 |     - name: Create the user
43 |       postgresql_user:
44 |         name: datanommer
45 |         password: datanommer
46 | 
47 |     - name: Create the database
48 |       postgresql_db:
49 |         name: messages
50 |         owner: datanommer
51 | 
52 |     - name: Activate timescaledb
53 |       postgresql_ext:
54 |         name: timescaledb
55 |         db: messages
56 |   become: yes
57 |   become_user: postgres
58 |   become_method: sudo
59 | 
60 | 
61 | - name: Make connection easier
62 |   copy:
63 |     dest: /home/vagrant/.pgpass
64 |     content: "*:*:messages:datanommer:datanommer\n"
65 |     owner: vagrant
66 |     group: vagrant
67 |     mode: 0600
68 | 


--------------------------------------------------------------------------------
/datanommer.consumer/datanommer/consumer/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is a part of datanommer, a message sink for fedmsg.
 2 | # Copyright (C) 2014, Red Hat, Inc.
 3 | #
 4 | # This program is free software: you can redistribute it and/or modify it under
 5 | # the terms of the GNU General Public License as published by the Free Software
 6 | # Foundation, either version 3 of the License, or (at your option) any later
 7 | # version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
12 | # details.
13 | #
14 | # You should have received a copy of the GNU General Public License along
15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | import importlib.metadata
17 | import logging
18 | 
19 | from fedora_messaging import config
20 | 
21 | import datanommer.models as m
22 | 
23 | 
24 | __version__ = importlib.metadata.version("datanommer-consumer")
25 | 
26 | 
27 | def get_datanommer_sqlalchemy_url():
28 |     try:
29 |         return config.conf["consumer_config"]["datanommer_sqlalchemy_url"]
30 |     except KeyError as e:
31 |         raise ValueError(
32 |             "datanommer_sqlalchemy_url not defined in the fedora-messaging config"
33 |         ) from e
34 | 
35 | 
36 | log = logging.getLogger("datanommer-consumer")
37 | 
38 | 
39 | class Nommer:
40 |     def __init__(self):
41 |         m.init(get_datanommer_sqlalchemy_url())
42 | 
43 |     def __call__(self, message):
44 |         log.info("Nomming %r", message)
45 |         try:
46 |             m.add(message)
47 |         except Exception:
48 |             m.session.rollback()
49 |             raise
50 | 


--------------------------------------------------------------------------------
/docs/sysadmin.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 | Sysadmin Guide
 3 | ==============
 4 | 
 5 | Write the sysadmin guide here (installation, maintenance, known issues, etc).
 6 | 
 7 | Migration with Alembic
 8 | ----------------------
 9 | 
10 | When the database models are changed, we use alembic to retain the data. Alembic is located in the models::
11 | 
12 |     (datanommer)$ cd datanommer.models
13 | 
14 | To check the current models version::
15 | 
16 |     (datanommer)$ alembic current
17 | 
18 | If your models are up to date, you should see::
19 | 
20 |     INFO  [alembic.migration] Context impl SQLiteImpl.
21 |     INFO  [alembic.migration] Will assume transactional DDL.
22 |     Current revision for postgresql://datanommer:datanommer@localhost/messages: 198447250956 -> ae2801c4cd9 (head), add category column
23 | 
24 | If your result is::
25 | 
26 |     INFO  [alembic.migration] Context impl SQLiteImpl.
27 |     INFO  [alembic.migration] Will assume transactional DDL.
28 |     Current revision for postgresql://datanommer:datanommer@localhost/messages: None
29 | 
30 | then migrate to the most recent version with::
31 | 
32 |     (datanommer)$ alembic upgrade head
33 | 
34 | You should see::
35 | 
36 |     INFO  [alembic.migration] Context impl SQLiteImpl.
37 |     INFO  [alembic.migration] Will assume transactional DDL.
38 |     INFO  [alembic.migration] Running upgrade None -> 198447250956
39 |     INFO  [alembic.migration] Running upgrade 198447250956 -> ae2801c4cd9
40 | 
41 | Refreshing materialized view
42 | ----------------------------
43 | 
44 | The ``recent_topics`` materialized view needs to be refreshed periodically to keep the data current.
45 | 
46 | To refresh manually::
47 | 
48 |     (datanommer)$ datanommer-refresh-view
49 | 
50 | To set up automatic refresh (say every 5 minutes) via cron job, add this to your crontab::
51 | 
52 |     */5 * * * * datanommer-refresh-view
53 | 


--------------------------------------------------------------------------------
/tools/towncrier/get-authors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | This script browses through git commit history (starting at latest tag), collects all authors of
 5 | commits and creates fragment for `towncrier`_ tool.
 6 | It's meant to be run during the release process, before generating the release notes.
 7 | Example::
 8 |     $ python get_authors.py
 9 | .. _towncrier: https://github.com/hawkowl/towncrier/
10 | Authors:
11 |     Aurelien Bompard
12 |     Michal Konecny
13 | """
14 | 
15 | import os
16 | from argparse import ArgumentParser
17 | from subprocess import check_output
18 | 
19 | 
20 | EXCLUDE = ["Weblate (bot)", "dependabot[bot]", "renovate[bot]"]
21 | 
22 | last_tag = check_output("git tag | sort -n | tail -n 1", shell=True, text=True).strip()
23 | 
24 | args_parser = ArgumentParser()
25 | args_parser.add_argument(
26 |     "until",
27 |     nargs="?",
28 |     default="HEAD",
29 |     help="Consider all commits until this one (default: %(default)s).",
30 | )
31 | args_parser.add_argument(
32 |     "since",
33 |     nargs="?",
34 |     default=last_tag,
35 |     help="Consider all commits since this one (default: %(default)s).",
36 | )
37 | args = args_parser.parse_args()
38 | 
39 | authors = {}
40 | log_range = args.since + ".." + args.until
41 | output = check_output(["git", "log", log_range, "--format=%ae\t%an"], text=True)
42 | for line in output.splitlines():
43 |     email, fullname = line.split("\t")
44 |     email = email.split("@")[0].replace(".", "")
45 |     if email in authors:
46 |         continue
47 |     authors[email] = fullname
48 | 
49 | for nick, fullname in authors.items():
50 |     if fullname in EXCLUDE or fullname.endswith("[bot]"):
51 |         continue
52 |     filename = f"{nick}.author"
53 |     if os.path.exists(filename):
54 |         continue
55 |     print(f"Adding author {fullname} ({nick})")
56 |     with open(filename, "w") as f:
57 |         f.write(fullname)
58 |         f.write("\n")
59 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/testing/__init__.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sqlalchemy as sa
 3 | from pytest_postgresql import factories
 4 | from pytest_postgresql.janitor import DatabaseJanitor
 5 | from sqlalchemy.orm import scoped_session
 6 | 
 7 | import datanommer.models as dm
 8 | 
 9 | 
10 | postgresql_proc = factories.postgresql_proc(
11 |     postgres_options="-c shared_preload_libraries=timescaledb -c timescaledb.telemetry_level=off",
12 | )
13 | 
14 | 
15 | @pytest.fixture(scope="session")
16 | def datanommer_db_url(postgresql_proc):
17 |     return (
18 |         f"postgresql+psycopg2://{postgresql_proc.user}:@"
19 |         f"{postgresql_proc.host}:{postgresql_proc.port}"
20 |         f"/{postgresql_proc.dbname}"
21 |     )
22 | 
23 | 
24 | @pytest.fixture(scope="session")
25 | def datanommer_db_engine(postgresql_proc, datanommer_db_url):
26 |     with DatabaseJanitor(
27 |         user=postgresql_proc.user,
28 |         host=postgresql_proc.host,
29 |         port=postgresql_proc.port,
30 |         dbname=postgresql_proc.dbname,
31 |         # Don't use a template database
32 |         # template_dbname=postgresql_proc.template_dbname,
33 |         version=postgresql_proc.version,
34 |     ):
35 |         engine = sa.create_engine(datanommer_db_url, future=True)
36 |         # Renew the global object, dm.init checks a custom attribute
37 |         dm.session = scoped_session(dm.maker)
38 |         dm.init(engine=engine, create=True)
39 |         yield engine
40 |         engine.dispose()
41 | 
42 | 
43 | @pytest.fixture()
44 | def datanommer_db(datanommer_db_url, datanommer_db_engine):
45 |     for table in reversed(dm.DeclarativeBase.metadata.sorted_tables):
46 |         dm.session.execute(table.delete())
47 |     dm.session.commit()
48 |     yield datanommer_db_engine
49 | 
50 | 
51 | @pytest.fixture()
52 | def datanommer_models(datanommer_db):
53 |     dm.User.clear_cache()
54 |     dm.Package.clear_cache()
55 |     yield dm.session
56 |     dm.session.rollback()
57 | 


--------------------------------------------------------------------------------
/datanommer.commands/config.toml.example:
--------------------------------------------------------------------------------
 1 | # A sample fedora-messaging configuration for datanommer. This file is in the TOML format.
 2 | 
 3 | amqp_url = "amqps://datanommer:@rabbitmq.fedoraproject.org/%2Fpublic_pubsub"
 4 | callback = "datanommer.consumer:Nommer"
 5 | passive_declares = true
 6 | 
 7 | [tls]
 8 | ca_cert = "/etc/fedora-messaging/cacert.pem"
 9 | keyfile = "/etc/fedora-messaging/fedora-key.pem"
10 | certfile = "/etc/fedora-messaging/fedora-cert.pem"
11 | 
12 | [client_properties]
13 | app = "datanommer"
14 | app_url = "https://github.com/fedora-infra/datanommer"
15 | 
16 | [queues.datanommer]
17 | durable = false
18 | auto_delete = true
19 | exclusive = true
20 | arguments = {}
21 | 
22 | [[bindings]]
23 | queue = "datanommer"
24 | exchange = "amq.topic"
25 | routing_keys = ["#"]
26 | 
27 | [consumer_config]
28 | datanommer_sqlalchemy_url = 'postgresql://datanommer:datanommer@localhost/datanommer'
29 | alembic_ini = "../datanommer.models/alembic.ini"
30 | 
31 | [log_config]
32 | version = 1
33 | disable_existing_loggers = true
34 | 
35 | [log_config.formatters.simple]
36 | format = "[%(levelname)s %(name)s] %(message)s"
37 | 
38 | [log_config.handlers.console]
39 | class = "logging.StreamHandler"
40 | formatter = "simple"
41 | stream = "ext://sys.stdout"
42 | 
43 | [log_config.loggers.fedora_messaging]
44 | level = "INFO"
45 | propagate = false
46 | handlers = ["console"]
47 | 
48 | # Twisted is the asynchronous framework that manages the TCP/TLS connection, as well
49 | # as the consumer event loop. When debugging you may want to lower this log level.
50 | [log_config.loggers.twisted]
51 | level = "INFO"
52 | propagate = false
53 | handlers = ["console"]
54 | 
55 | # Pika is the underlying AMQP client library. When debugging you may want to
56 | # lower this log level.
57 | [log_config.loggers.pika]
58 | level = "WARNING"
59 | propagate = false
60 | handlers = ["console"]
61 | 
62 | [log_config.loggers.datanommer]
63 | level = "INFO"
64 | propagate = false
65 | handlers = ["console"]
66 | 
67 | [log_config.root]
68 | level = "ERROR"
69 | handlers = ["console"]
70 | 


--------------------------------------------------------------------------------
/datanommer.models/tests/test_jsonencodeddict.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sqlalchemy import Column, create_engine, Integer, MetaData, select, Table, text
 3 | 
 4 | from datanommer.models import _JSONEncodedDict
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def connection():
 9 |     engine = create_engine("sqlite:///:memory:", future=True)
10 |     with engine.connect() as connection:
11 |         yield connection
12 | 
13 | 
14 | @pytest.fixture
15 | def table(connection):
16 |     metadata = MetaData()
17 |     table = Table(
18 |         "test_table",
19 |         metadata,
20 |         Column("id", Integer, primary_key=True),
21 |         Column("data", _JSONEncodedDict),
22 |     )
23 |     metadata.create_all(connection)
24 |     yield table
25 |     metadata.drop_all(connection)
26 | 
27 | 
28 | def test_jsonencodeddict(connection, table):
29 |     connection.execute(table.insert().values(data={"foo": "bar"}))
30 |     # Check that it's stored as a string
31 |     for row in connection.execute(text("SELECT data FROM test_table")):
32 |         assert row.data == '{"foo": "bar"}'
33 |     # Check that SQLAlchemy retrieves it as a dict
34 |     for row in connection.execute(select(table.c.data)):
35 |         assert row.data == {"foo": "bar"}
36 | 
37 | 
38 | def test_jsonencodeddict_null(connection, table):
39 |     # Make sure NULL values are supported
40 |     connection.execute(table.insert().values(data=None))
41 |     for row in connection.execute(select(table.c.data)):
42 |         assert row.data is None
43 | 
44 | 
45 | def test_jsonencodeddict_compare(connection, table):
46 |     # Make sure NULL values are supported
47 |     connection.execute(table.insert().values(data={"foo": "bar"}))
48 |     for row in connection.execute(select(table.c.data).where(table.c.data == {"foo": "bar"})):
49 |         assert row.data == {"foo": "bar"}
50 | 
51 | 
52 | def test_jsonencodeddict_compare_like(connection, table):
53 |     # Make sure NULL values are supported
54 |     connection.execute(table.insert().values(data={"foo": "bar"}))
55 |     for row in connection.execute(select(table.c.data).where(table.c.data.like("%foo%"))):
56 |         assert row.data == {"foo": "bar"}
57 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/view.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import DDL, func, select, text
 2 | from sqlalchemy.ext import compiler
 3 | from sqlalchemy.schema import DDLElement
 4 | 
 5 | 
 6 | TIME_INTERVAL = "1 year"
 7 | 
 8 | 
 9 | class CreateMaterializedView(DDLElement):
10 |     def __init__(self, name, selectable):
11 |         self.name = name
12 |         self.selectable = selectable
13 | 
14 | 
15 | @compiler.compiles(CreateMaterializedView)
16 | def _create_view(element, compiler, **kw):
17 |     selectable = compiler.sql_compiler.process(element.selectable, literal_binds=True)
18 |     return f"CREATE MATERIALIZED VIEW IF NOT EXISTS {element.name} AS {selectable}"
19 | 
20 | 
21 | def get_selectable():
22 |     """Factory function to create the selectable query for materialized view."""
23 |     from . import Message
24 | 
25 |     return (
26 |         select(
27 |             Message.topic,
28 |             func.count().label("message_count"),
29 |             func.min(Message.timestamp).label("earliest"),
30 |             func.max(Message.timestamp).label("latest"),
31 |         )
32 |         .where(Message.timestamp >= text(f"NOW() - INTERVAL '{TIME_INTERVAL}'"))
33 |         .group_by(Message.topic)
34 |     )
35 | 
36 | 
37 | def refresh_recent_topics(connection):
38 |     """Standalone refresh function that can be called from cron.
39 | 
40 |     Args:
41 |         connection: SQLAlchemy connection object
42 |     """
43 |     connection.execute(text("REFRESH MATERIALIZED VIEW CONCURRENTLY recent_topics"))
44 | 
45 | 
46 | def create_view(connection):
47 |     """Create the recent_topics materialized view with proper indexes."""
48 | 
49 |     selectable = get_selectable()
50 | 
51 |     # Create the materialized view
52 |     connection.execute(CreateMaterializedView("recent_topics", selectable))
53 | 
54 |     # Create unique index on topic
55 |     connection.execute(
56 |         DDL("CREATE UNIQUE INDEX IF NOT EXISTS uq_recent_topics_topic " "ON recent_topics (topic)"),
57 |     )
58 | 
59 |     # Create index on message_count for sorting
60 |     connection.execute(
61 |         DDL(
62 |             "CREATE INDEX IF NOT EXISTS ix_recent_topics_message_count "
63 |             "ON recent_topics (message_count)"
64 |         )
65 |     )
66 | 


--------------------------------------------------------------------------------
/tools/towncrier/template.rst.j2:
--------------------------------------------------------------------------------
 1 | {% macro reference(value) -%}
 2 |    {%- if value.startswith("PR") -%}
 3 |      `PR#{{ value[2:] }} <https://github.com/fedora-infra/datanommer/pull/{{ value[2:] }}>`_
 4 |    {%- elif value.startswith("C") -%}
 5 |      `{{ value[1:] }} <https://github.com/fedora-infra/datanommer/commit/{{ value[1:] }}>`_
 6 |    {%- else -%}
 7 |      `#{{ value }} <https://github.com/fedora-infra/datanommer/issues/{{ value }}>`_
 8 |    {%- endif -%}
 9 | {%- endmacro -%}
10 | 
11 | {{- top_line }}
12 | {{ top_underline * ((top_line)|length) -}}
13 | 
14 | Released on {{ versiondata.date }}.
15 | 
16 | {% for section, _ in sections.items() -%}
17 | {%- set underline = underlines[0] -%}
18 | {%- if section -%}
19 | {{section}}
20 | {{ underline * section|length }}
21 | {%- set underline = underlines[1] -%}
22 | {%- endif -%}
23 | 
24 | {%- if sections[section] -%}
25 | {%- for category, val in definitions.items() if category in sections[section] and category != "author" -%}
26 | {{ definitions[category]['name'] }}
27 | {{ underline * definitions[category]['name']|length }}
28 | 
29 | {% if definitions[category]['showcontent'] -%}
30 | {%- for text, values in sections[section][category].items() %}
31 | * {{ text }}
32 | {%- if values %}
33 | {% if "\n  - " in text or '\n  * ' in text %}
34 | 
35 | 
36 |   (
37 | {%- else %}
38 |  (
39 | {%- endif -%}
40 | {%- for issue in values %}
41 | {{ reference(issue) }}{% if not loop.last %}, {% endif %}
42 | {%- endfor %}
43 | )
44 | {% else %}
45 | 
46 | {% endif %}
47 | {% endfor -%}
48 | {%- else -%}
49 | * {{ sections[section][category]['']|sort|join(', ') }}
50 | 
51 | {% endif -%}
52 | {%- if sections[section][category]|length == 0 %}
53 | No significant changes.
54 | 
55 | {% else -%}
56 | {%- endif %}
57 | 
58 | {% endfor -%}
59 | {% if sections[section]["author"] -%}
60 | {{definitions['author']["name"]}}
61 | {{ underline * definitions['author']['name']|length }}
62 | 
63 | Many thanks to the contributors of bug reports, pull requests, and pull request
64 | reviews for this release:
65 | 
66 | {% for text, values in sections[section]["author"].items() -%}
67 | * {{ text }}
68 | {% endfor -%}
69 | {%- endif %}
70 | 
71 | {% else -%}
72 | No significant changes.
73 | 
74 | {% endif %}
75 | {%- endfor +%}
76 | 


--------------------------------------------------------------------------------
/datanommer.consumer/tests/test_consumer.py:
--------------------------------------------------------------------------------
 1 | # This file is a part of datanommer, a message sink for fedmsg.
 2 | # Copyright (C) 2014, Red Hat, Inc.
 3 | #
 4 | # This program is free software: you can redistribute it and/or modify it under
 5 | # the terms of the GNU General Public License as published by the Free Software
 6 | # Foundation, either version 3 of the License, or (at your option) any later
 7 | # version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
12 | # details.
13 | #
14 | # You should have received a copy of the GNU General Public License along
15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | import pytest
18 | from fedora_messaging import message
19 | from sqlalchemy import func, select
20 | 
21 | import datanommer.consumer
22 | import datanommer.models as dm
23 | 
24 | 
25 | @pytest.fixture
26 | def consumer(mocker):
27 |     mock_get_url = mocker.patch("datanommer.consumer.get_datanommer_sqlalchemy_url")
28 |     mock_get_url.return_value = "sqlite:///fake.db"
29 |     return datanommer.consumer.Nommer()
30 | 
31 | 
32 | def test_consume(datanommer_models, consumer):
33 |     example_message = message.Message(
34 |         topic="nice.message", body={"encouragement": "You're doing great!"}
35 |     )
36 | 
37 |     consumer = datanommer.consumer.Nommer()
38 | 
39 |     consumer(example_message)
40 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 1
41 | 
42 | 
43 | def test_add_exception(datanommer_models, consumer, mocker):
44 |     example_message = message.Message(
45 |         topic="nice.message", body={"encouragement": "You're doing great!"}
46 |     )
47 | 
48 |     dm.add = mocker.Mock(side_effect=RuntimeError("an exception"))
49 |     consumer = datanommer.consumer.Nommer()
50 |     with pytest.raises(RuntimeError):
51 |         consumer(example_message)
52 | 
53 | 
54 | def test_get_datanommer_sqlalchemy_url_keyerror(mocker):
55 |     mocker.patch.dict(
56 |         datanommer.consumer.config.conf["consumer_config"],
57 |         {},
58 |         clear=True,
59 |     )
60 |     with pytest.raises(ValueError):
61 |         datanommer.consumer.get_datanommer_sqlalchemy_url()
62 | 


--------------------------------------------------------------------------------
/datanommer.consumer/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "datanommer.consumer"
 3 | version = "1.4.4"
 4 | description = "Consumer for datanommer"
 5 | authors = [
 6 |   "Fedora Infrastructure <admin@fedoraproject.org>"
 7 | ]
 8 | license = "GPL-3.0-or-later"
 9 | readme = "README.rst"
10 | repository = "https://github.com/fedora-infra/datanommer"
11 | homepage = "https://github.com/fedora-infra/datanommer"
12 | packages = [
13 |     { include = "datanommer" },
14 | ]
15 | include = [
16 |     { path = "*.ini", format = "sdist" },
17 |     { path = "tests/*", format = "sdist" },
18 | ]
19 | 
20 | [tool.poetry.dependencies]
21 | python = "^3.11"
22 | "datanommer.models" = "^1.0.0"
23 | fedora-messaging = ">=2.1.0"
24 | psycopg2 = "^2.9.1"
25 | 
26 | [tool.poetry.group.dev.dependencies]
27 | pre-commit = "*"
28 | "datanommer.models" = {path = "../datanommer.models", develop = true}
29 | black = "*"
30 | ruff = "*"
31 | pytest = "*"
32 | psutil = "*"
33 | liccheck = "*"
34 | pytest-cov = "*"
35 | pytest-mock = "*"
36 | pytest-postgresql = "*"
37 | towncrier = "*"
38 | poetry-plugin-export = "^1.9.0"
39 | 
40 | 
41 | [build-system]
42 | requires = ["poetry-core>=1.0.0"]
43 | build-backend = "poetry.core.masonry.api"
44 | 
45 | 
46 | [tool.towncrier]
47 | package = "datanommer.consumer"
48 | directory = "news/"
49 | title_format = "v{version}"
50 | issue_format = "{issue}"
51 | template = "../tools/towncrier/template.rst.j2"
52 | underlines = "=^-"
53 | wrap = false
54 | all_bullets = true
55 | 
56 |   [[tool.towncrier.type]]
57 |   directory = "bic"
58 |   name = "Backwards Incompatible Changes"
59 |   showcontent = true
60 | 
61 |   [[tool.towncrier.type]]
62 |   directory = "dependency"
63 |   name = "Dependency Changes"
64 |   showcontent = true
65 | 
66 |   [[tool.towncrier.type]]
67 |   directory = "feature"
68 |   name = "Features"
69 |   showcontent = true
70 | 
71 |   [[tool.towncrier.type]]
72 |   directory = "bug"
73 |   name = "Bug Fixes"
74 |   showcontent = true
75 | 
76 |   [[tool.towncrier.type]]
77 |   directory = "dev"
78 |   name = "Development Improvements"
79 |   showcontent = true
80 | 
81 |   [[tool.towncrier.type]]
82 |   directory = "docs"
83 |   name = "Documentation Improvements"
84 |   showcontent = true
85 | 
86 |   [[tool.towncrier.type]]
87 |   directory = "other"
88 |   name = "Other Changes"
89 |   showcontent = true
90 | 
91 |   [[tool.towncrier.type]]
92 |   directory = "author"
93 |   name = "Contributors"
94 |   showcontent = true
95 | 


--------------------------------------------------------------------------------
/datanommer.commands/datanommer/commands/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | from fedora_messaging import config as fedora_messaging_config
 5 | from fedora_messaging.message import load_message as load_message
 6 | from sqlalchemy import func
 7 | 
 8 | import datanommer.models as m
 9 | 
10 | 
11 | # Go trough messages these many at a time
12 | CHUNK_SIZE = 10000
13 | log = logging.getLogger(__name__)
14 | 
15 | 
16 | def get_config(config_path=None):
17 |     if config_path:
18 |         fedora_messaging_config.conf.load_config(config_path)
19 |     conf = fedora_messaging_config.conf["consumer_config"]
20 |     for key in ("datanommer_sqlalchemy_url", "alembic_ini"):
21 |         if key not in conf:
22 |             raise click.ClickException(f"{key} not defined in the fedora-messaging config")
23 |     return conf
24 | 
25 | 
26 | config_option = click.option(
27 |     "-c",
28 |     "--config",
29 |     "config_path",
30 |     help="Load this Fedora Messaging config file",
31 |     type=click.Path(exists=True, readable=True),
32 | )
33 | 
34 | 
35 | def iterate_over_messages(query, start, chunk_size):
36 |     click.echo("Counting messages...")
37 | 
38 |     total = m.session.scalar(query.with_only_columns(func.count(m.Message.id)))
39 |     if not total:
40 |         click.echo("No messages matched.")
41 |         return
42 | 
43 |     click.echo(f"Considering {total} message{'s' if total > 1 else ''}")
44 | 
45 |     query = query.order_by(m.Message.timestamp)
46 |     with click.progressbar(length=total) as bar:
47 |         has_messages = True
48 |         chunk_start = start
49 |         first_run = True
50 |         while has_messages:
51 |             # click < 8.2 (Python < 3.10): use bar.is_hidden
52 |             # click >= 8.2 (Python >= 3.10): use bar.hidden and the TTY check
53 |             if (hasattr(bar, "is_hidden") and bar.is_hidden) or (
54 |                 hasattr(bar, "hidden") and (bar.hidden or not bar.file.isatty())
55 |             ):
56 |                 click.echo(f"Working on {chunk_size} messages sent after {chunk_start}")
57 |             chunk_query = query.where(m.Message.timestamp >= chunk_start).limit(chunk_size)
58 |             if not first_run:
59 |                 chunk_query = chunk_query.offset(1)
60 |             has_messages = False
61 |             for message in m.session.scalars(chunk_query):
62 |                 bar.update(1)
63 |                 has_messages = True
64 |                 yield message
65 |             if has_messages:
66 |                 chunk_start = message.timestamp
67 |             first_run = False
68 |             m.session.commit()
69 |             m.session.expunge_all()
70 | 


--------------------------------------------------------------------------------
/devel/ansible/roles/datanommer/tasks/main.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: Install RPM packages
  3 |   dnf:
  4 |     name:
  5 |       - fedora-messaging
  6 |       - gcc
  7 |       - git
  8 |       - poetry
  9 |       - python3-devel
 10 |       - python3-pip
 11 |       - vim
 12 |       - krb5-devel
 13 |       - libpq-devel
 14 |       - tox
 15 |       - python-psycopg2
 16 |     state: present
 17 | 
 18 | - name: Install the .bashrc
 19 |   copy:
 20 |     src: .bashrc
 21 |     dest: /home/vagrant/.bashrc
 22 |     mode: 0644
 23 |     owner: vagrant
 24 |     group: vagrant
 25 | 
 26 | - name: Create a directory for the virtualenv
 27 |   file:
 28 |     name: /srv/venv
 29 |     state: directory
 30 |     mode: 0755
 31 |     owner: vagrant
 32 |     group: vagrant
 33 | 
 34 | - name: Create the virtualenv and install poetry
 35 |   pip:
 36 |     name: poetry
 37 |     virtualenv: /srv/venv
 38 |   become: yes
 39 |   become_user: vagrant
 40 | 
 41 | - name: Install datanommer.models with poetry
 42 |   shell: /srv/venv/bin/poetry install
 43 |   args:
 44 |     chdir: /home/vagrant/datanommer/datanommer.models/
 45 |   become: yes
 46 |   become_user: vagrant
 47 | 
 48 | - name: Install datanommer.commands with poetry
 49 |   shell: /srv/venv/bin/poetry install
 50 |   args:
 51 |     chdir: /home/vagrant/datanommer/datanommer.commands/
 52 |   become: yes
 53 |   become_user: vagrant
 54 | 
 55 | - name: Install datanommer.consumer with poetry
 56 |   shell: /srv/venv/bin/poetry install
 57 |   args:
 58 |     chdir: /home/vagrant/datanommer/datanommer.consumer/
 59 |   become: yes
 60 |   become_user: vagrant
 61 | 
 62 | - name: Use the stage fedora-messaging queue to consume
 63 |   copy:
 64 |     remote_src: True
 65 |     src: /etc/fedora-messaging/fedora.stg.toml
 66 |     dest: /etc/fedora-messaging/config.toml
 67 | 
 68 | - name: configure datanommer_sqlalchemy_url in fedora-messaging config
 69 |   lineinfile:
 70 |     path: /etc/fedora-messaging/config.toml
 71 |     regexp: 'example_key = "for my consumer"'
 72 |     line: "datanommer_sqlalchemy_url = \"postgresql://datanommer:datanommer@localhost/messages\"\nalembic_ini = \"/etc/datanommer-alembic.ini\""
 73 | 
 74 | - name: Create the Alembic config file
 75 |   copy:
 76 |     src: alembic.ini
 77 |     dest: /etc/datanommer-alembic.ini
 78 |     mode: 0600
 79 |     owner: vagrant
 80 |     group: vagrant
 81 | 
 82 | - name: Create datanommer db
 83 |   shell: /srv/venv/bin/poetry run datanommer-create-db
 84 |   args:
 85 |     chdir: /home/vagrant/datanommer/datanommer.commands
 86 |   become: yes
 87 |   become_user: vagrant
 88 | 
 89 | - name: Install the systemd unit files for datanommer consumer
 90 |   copy:
 91 |     src: "datanommer.service"
 92 |     dest: /etc/systemd/system/datanommer.service
 93 |     mode: 0644
 94 | 
 95 | - name: Start datanommer service using systemd
 96 |   systemd:
 97 |     state: started
 98 |     name: datanommer
 99 |     daemon_reload: yes
100 |     enabled: yes
101 | 


--------------------------------------------------------------------------------
/datanommer.commands/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "datanommer.commands"
  3 | version = "1.4.4"
  4 | description = "Console commands for datanommer"
  5 | authors = [
  6 |   "Fedora Infrastructure <admin@fedoraproject.org>"
  7 | ]
  8 | license = "GPL-3.0-or-later"
  9 | readme = "README.rst"
 10 | repository = "https://github.com/fedora-infra/datanommer"
 11 | homepage = "https://github.com/fedora-infra/datanommer"
 12 | packages = [
 13 |     { include = "datanommer" },
 14 | ]
 15 | include = [
 16 |     { path = "*.ini", format = "sdist" },
 17 |     { path = "*.example", format = "sdist" },
 18 |     { path = "tests/*", format = "sdist" },
 19 | ]
 20 | 
 21 | [tool.poetry.dependencies]
 22 | python = "^3.11"
 23 | "datanommer.models" = {version = "^1.0.0"}
 24 | fedora-messaging = ">=2.1.0"
 25 | 
 26 | [tool.poetry.group.dev.dependencies]
 27 | pre-commit = "*"
 28 | "datanommer.models" = {path = "../datanommer.models", develop = true}
 29 | black = "*"
 30 | ruff = "*"
 31 | pytest = "*"
 32 | liccheck = "*"
 33 | pytest-cov = "*"
 34 | pytest-mock = "*"
 35 | pytest-postgresql = "*"
 36 | psycopg2 = "*"
 37 | towncrier = "*"
 38 | bodhi-messages = "*"
 39 | poetry-plugin-export = "^1.9.0"
 40 | 
 41 | [tool.poetry.scripts]
 42 | datanommer-create-db = "datanommer.commands:create"
 43 | datanommer-dump = "datanommer.commands:dump"
 44 | datanommer-stats = "datanommer.commands:stats"
 45 | datanommer-latest = "datanommer.commands:latest"
 46 | datanommer-extract-users = "datanommer.commands.extract_users:main"
 47 | datanommer-refresh-view = "datanommer.commands:refresh_view"
 48 | 
 49 | 
 50 | [build-system]
 51 | requires = ["poetry-core>=1.0.0"]
 52 | build-backend = "poetry.core.masonry.api"
 53 | 
 54 | 
 55 | [tool.towncrier]
 56 | package = "datanommer.commands"
 57 | directory = "news/"
 58 | title_format = "v{version}"
 59 | issue_format = "{issue}"
 60 | template = "../tools/towncrier/template.rst.j2"
 61 | underlines = "=^-"
 62 | wrap = false
 63 | all_bullets = true
 64 | 
 65 |   [[tool.towncrier.type]]
 66 |   directory = "bic"
 67 |   name = "Backwards Incompatible Changes"
 68 |   showcontent = true
 69 | 
 70 |   [[tool.towncrier.type]]
 71 |   directory = "dependency"
 72 |   name = "Dependency Changes"
 73 |   showcontent = true
 74 | 
 75 |   [[tool.towncrier.type]]
 76 |   directory = "feature"
 77 |   name = "Features"
 78 |   showcontent = true
 79 | 
 80 |   [[tool.towncrier.type]]
 81 |   directory = "bug"
 82 |   name = "Bug Fixes"
 83 |   showcontent = true
 84 | 
 85 |   [[tool.towncrier.type]]
 86 |   directory = "dev"
 87 |   name = "Development Improvements"
 88 |   showcontent = true
 89 | 
 90 |   [[tool.towncrier.type]]
 91 |   directory = "docs"
 92 |   name = "Documentation Improvements"
 93 |   showcontent = true
 94 | 
 95 |   [[tool.towncrier.type]]
 96 |   directory = "other"
 97 |   name = "Other Changes"
 98 |   showcontent = true
 99 | 
100 |   [[tool.towncrier.type]]
101 |   directory = "author"
102 |   name = "Contributors"
103 |   showcontent = true
104 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/alembic/env.py:
--------------------------------------------------------------------------------
 1 | # This file is a part of datanommer, a message sink for fedmsg.
 2 | # Copyright (C) 2014, Red Hat, Inc.
 3 | #
 4 | # This program is free software: you can redistribute it and/or modify it under
 5 | # the terms of the GNU General Public License as published by the Free Software
 6 | # Foundation, either version 3 of the License, or (at your option) any later
 7 | # version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
12 | # details.
13 | #
14 | # You should have received a copy of the GNU General Public License along
15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | from logging.config import fileConfig
18 | 
19 | from alembic import context
20 | from sqlalchemy import engine_from_config, pool
21 | 
22 | # add your model's MetaData object here
23 | # for 'autogenerate' support
24 | # from myapp import mymodel
25 | # target_metadata = mymodel.Base.metadata
26 | from datanommer.models import DeclarativeBase
27 | 
28 | 
29 | target_metadata = DeclarativeBase.metadata
30 | 
31 | 
32 | # this is the Alembic Config object, which provides
33 | # access to the values within the .ini file in use.
34 | config = context.config
35 | 
36 | # Interpret the config file for Python logging.
37 | # This line sets up loggers basically.
38 | fileConfig(config.config_file_name)
39 | 
40 | # other values from the config, defined by the needs of env.py,
41 | # can be acquired:
42 | # my_important_option = config.get_main_option("my_important_option")
43 | # ... etc.
44 | 
45 | 
46 | def run_migrations_offline():
47 |     """Run migrations in 'offline' mode.
48 | 
49 |     This configures the context with just a URL
50 |     and not an Engine, though an Engine is acceptable
51 |     here as well.  By skipping the Engine creation
52 |     we don't even need a DBAPI to be available.
53 | 
54 |     Calls to context.execute() here emit the given string to the
55 |     script output.
56 | 
57 |     """
58 |     # TODO: Pull this from datanommer's fedmsg.d config isntead of using
59 |     # the alembic.ini
60 |     url = config.get_main_option("sqlalchemy.url")
61 |     context.configure(url=url)
62 | 
63 |     with context.begin_transaction():
64 |         context.run_migrations()
65 | 
66 | 
67 | def run_migrations_online():
68 |     """Run migrations in 'online' mode.
69 | 
70 |     In this scenario we need to create an Engine
71 |     and associate a connection with the context.
72 | 
73 |     """
74 |     engine = engine_from_config(
75 |         config.get_section(config.config_ini_section),
76 |         prefix="sqlalchemy.",
77 |         poolclass=pool.NullPool,
78 |     )
79 | 
80 |     connection = engine.connect()
81 |     context.configure(connection=connection, target_metadata=target_metadata)
82 | 
83 |     try:
84 |         with context.begin_transaction():
85 |             context.run_migrations()
86 |     finally:
87 |         connection.close()
88 | 
89 | 
90 | if context.is_offline_mode():
91 |     run_migrations_offline()
92 | else:
93 |     run_migrations_online()
94 | 


--------------------------------------------------------------------------------
/datanommer.commands/NEWS.rst:
--------------------------------------------------------------------------------
  1 | =============
  2 | Release Notes
  3 | =============
  4 | 
  5 | For ``datanommer.commands``
  6 | 
  7 | .. towncrier release notes start
  8 | 
  9 | v1.4.4
 10 | ======
 11 | 
 12 | Released on 2025-06-19.
 13 | 
 14 | No significant changes.
 15 | 
 16 | v1.4.3
 17 | ======
 18 | 
 19 | Released on 2025-06-10.
 20 | 
 21 | Dependency Changes
 22 | ^^^^^^^^^^^^^^^^^^
 23 | 
 24 | * Fix the schema dep that was started in 127b1dd
 25 | 
 26 | 
 27 | v1.4.2
 28 | ======
 29 | 
 30 | Released on 2025-06-07.
 31 | 
 32 | Dependency Changes
 33 | ^^^^^^^^^^^^^^^^^^
 34 | 
 35 | * Don't require all the message schemas
 36 | 
 37 | 
 38 | v1.4.1
 39 | ======
 40 | 
 41 | Released on 2025-05-30.
 42 | 
 43 | Dependency Changes
 44 | ^^^^^^^^^^^^^^^^^^
 45 | 
 46 | * Add support for Python 3.9 (for RHEL9) (`#8d63e86 <https://github.com/fedora-infra/datanommer/issues/8d63e86>`_)
 47 | 
 48 | Development Improvements
 49 | ^^^^^^^^^^^^^^^^^^^^^^^^
 50 | 
 51 | * Test the command output on a TTY (`#ef1f572 <https://github.com/fedora-infra/datanommer/issues/ef1f572>`_)
 52 | 
 53 | Other Changes
 54 | ^^^^^^^^^^^^^
 55 | 
 56 | * Slighly clearer command message (`#940c642 <https://github.com/fedora-infra/datanommer/issues/940c642>`_)
 57 | * Slight output improvement in the progressbar-based commands (`#1dc0db8 <https://github.com/fedora-infra/datanommer/issues/1dc0db8>`_)
 58 | 
 59 | 
 60 | v1.4.0
 61 | ======
 62 | 
 63 | Released on 2024-06-12.
 64 | 
 65 | No significant changes.
 66 | 
 67 | 
 68 | v1.3.0
 69 | ======
 70 | 
 71 | Released on 2024-05-22.
 72 | 
 73 | Features
 74 | ^^^^^^^^
 75 | 
 76 | * Improve the extract-users script (
 77 |   `dbf28ff <https://github.com/fedora-infra/datanommer/commit/dbf28ff>`_,
 78 |   `ac7394e <https://github.com/fedora-infra/datanommer/commit/ac7394e>`_,
 79 |   `ec2e581 <https://github.com/fedora-infra/datanommer/commit/ec2e581>`_,
 80 |   `2fd0175 <https://github.com/fedora-infra/datanommer/commit/2fd0175>`_
 81 |   ).
 82 | 
 83 | Other Changes
 84 | ^^^^^^^^^^^^^
 85 | 
 86 | * Update dependencies
 87 | 
 88 | 
 89 | v1.2.0
 90 | ======
 91 | 
 92 | Released on 2024-04-15.
 93 | This is a feature release that adds the datanommer-extract-users script.
 94 | 
 95 | Features
 96 | ^^^^^^^^
 97 | 
 98 | * Add the datanommer-extract-users script to fill the usernames table with data
 99 |   from recently-added message schemas (`320a466
100 |   <https://github.com/fedora-infra/datanommer/commit/320a466>`_).
101 | 
102 | Development Improvements
103 | ^^^^^^^^^^^^^^^^^^^^^^^^
104 | 
105 | * Use Ruff instead of flake8 and isort and bandit (`4f7ffaa
106 |   <https://github.com/fedora-infra/datanommer/commit/4f7ffaa>`_).
107 | 
108 | 
109 | v1.1.0
110 | ======
111 | 
112 | Released on 2023-09-22.
113 | This is a feature release that adds support for Python 3.10, drops support for
114 | Python 3.7, and improves the database creation for Alembic integration.
115 | 
116 | Dependency Changes
117 | ^^^^^^^^^^^^^^^^^^
118 | 
119 | * Drop support for python 3.7, add support for python 3.10 (`PR#890
120 |   <https://github.com/fedora-infra/datanommer/pull/890>`_).
121 | 
122 | Features
123 | ^^^^^^^^
124 | 
125 | * Use Alembic to stamp the database when creating it. This requires adding a
126 |   config variable ``alembic_ini`` in the fedora-messaging configuration file
127 |   that points to the ``alembic.ini`` file. (`PR#815
128 |   <https://github.com/fedora-infra/datanommer/pull/815>`_).
129 | 
130 | 
131 | v1.0.3
132 | ======
133 | 
134 | Released on 2022-03-18. This is a minor release:
135 | 
136 | - support fedora-messaging 3.0+
137 | - update dependencies
138 | 


--------------------------------------------------------------------------------
/datanommer.models/NEWS.rst:
--------------------------------------------------------------------------------
  1 | =============
  2 | Release Notes
  3 | =============
  4 | 
  5 | For ``datanommer.models``
  6 | 
  7 | .. towncrier release notes start
  8 | 
  9 | v1.4.4
 10 | ======
 11 | 
 12 | Released on 2025-06-19.
 13 | 
 14 | Dependency Changes
 15 | ^^^^^^^^^^^^^^^^^^
 16 | 
 17 | * Relax dependency on psycopg2 (`3ad7e7b <https://github.com/fedora-infra/datanommer/commit/3ad7e7b>`_)
 18 | 
 19 | 
 20 | v1.4.3
 21 | ======
 22 | 
 23 | Released on 2025-06-10.
 24 | 
 25 | No significant changes.
 26 | 
 27 | v1.4.2
 28 | ======
 29 | 
 30 | Released on 2025-06-07.
 31 | 
 32 | No significant changes.
 33 | 
 34 | v1.4.1
 35 | ======
 36 | 
 37 | Released on 2025-05-30.
 38 | 
 39 | Dependency Changes
 40 | ^^^^^^^^^^^^^^^^^^
 41 | 
 42 | * Add schema package mailman3-fedmsg-plugin-schemas (`#8ad6c47 <https://github.com/fedora-infra/datanommer/issues/8ad6c47>`_)
 43 | * Add schema package webhook-to-fedora-messaging-messages (`#865855c <https://github.com/fedora-infra/datanommer/issues/865855c>`_)
 44 | * Update koji-fedoramessaging-messages (`#c64cb31 <https://github.com/fedora-infra/datanommer/issues/c64cb31>`_)
 45 | * Add support for Python 3.9 (for RHEL9) (`#8d63e86 <https://github.com/fedora-infra/datanommer/issues/8d63e86>`_)
 46 | * Add the schema package journal-to-fedora-messaging-messages (`#3d9bc35 <https://github.com/fedora-infra/datanommer/issues/3d9bc35>`_)
 47 | * Add the `fedora-image-uploader-messages` schema package (`#7da3074 <https://github.com/fedora-infra/datanommer/issues/7da3074>`_)
 48 | 
 49 | Bug Fixes
 50 | ^^^^^^^^^
 51 | 
 52 | * Fix unit tests (`#085f5c4 <https://github.com/fedora-infra/datanommer/issues/085f5c4>`_)
 53 | 
 54 | Other Changes
 55 | ^^^^^^^^^^^^^
 56 | 
 57 | * Remove unneccessary int call (`#487341f <https://github.com/fedora-infra/datanommer/issues/487341f>`_)
 58 | 
 59 | 
 60 | v1.4.0
 61 | ======
 62 | 
 63 | Released on 2024-06-12.
 64 | 
 65 | Features
 66 | ^^^^^^^^
 67 | 
 68 | * Rename the unused `username` column to `agent_name` and use it to store the agent name (`#1309 <https://github.com/fedora-infra/datanommer/issues/1309>`_)
 69 | * Add a JSON index on the message headers
 70 | 
 71 | Bug Fixes
 72 | ^^^^^^^^^
 73 | 
 74 | * Fix the `get_first()` query to actually return only one message
 75 | 
 76 | 
 77 | v1.3.0
 78 | ======
 79 | 
 80 | Released on 2024-05-22.
 81 | 
 82 | Features
 83 | ^^^^^^^^
 84 | 
 85 | * Add a ``get_first()`` method on ``Message`` to get the first message matching
 86 |   a grep-like query (`99fb739 <https://github.com/fedora-infra/datanommer/commit/99fb739>`_).
 87 | 
 88 | Bug Fixes
 89 | ^^^^^^^^^
 90 | 
 91 | * Don't compute the total when not necessary (`99fb739 <https://github.com/fedora-infra/datanommer/commit/99fb739>`_).
 92 | 
 93 | Documentation Improvements
 94 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
 95 | 
 96 | * Add online documentation with Sphinx, see https://datanommer.readthedocs.io
 97 |   (`2631885 <https://github.com/fedora-infra/datanommer/commit/2631885>`_).
 98 | 
 99 | Other Changes
100 | ^^^^^^^^^^^^^
101 | 
102 | * Improve the unit tests (`610067f <https://github.com/fedora-infra/datanommer/commit/610067f>`_, `075052c <https://github.com/fedora-infra/datanommer/commit/075052c>`_).
103 | * Update dependencies
104 | 
105 | 
106 | v1.2.0
107 | ======
108 | 
109 | Released on 2024-04-15.
110 | This is a feature release that adds schema packages and upgrades the SQLAlchemy
111 | API to the 2.0 style.
112 | 
113 | Features
114 | ^^^^^^^^
115 | 
116 | * Upgrade to the SQLAlchemy 2.0 API (`981e2a4
117 |   <https://github.com/fedora-infra/datanommer/commit/981e2a4>`_).
118 | * Add a few schema packages to the dependencies.
119 | 
120 | Development Improvements
121 | ^^^^^^^^^^^^^^^^^^^^^^^^
122 | 
123 | * Use Ruff instead of flake8 and isort and bandit (`4f7ffaa
124 |   <https://github.com/fedora-infra/datanommer/commit/4f7ffaa>`_).
125 | 
126 | 
127 | v1.1.0
128 | ======
129 | 
130 | Released on 2023-09-22.
131 | This is a feature release that adds ``koji-fedoramessaging-messages`` as a
132 | dependency to interpret koji messages, and updates a lot of our other
133 | dependencies.
134 | 
135 | Dependency Changes
136 | ^^^^^^^^^^^^^^^^^^
137 | 
138 | * Drop support for python 3.7, add support for python 3.10 (`PR#890
139 |   <https://github.com/fedora-infra/datanommer/pull/890>`_).
140 | * Add the ``koji-fedoramessaging-messages`` package (`#1257
141 |   <https://github.com/fedora-infra/datanommer/issues/1257>`_).
142 | 
143 | 
144 | v1.0.4
145 | ======
146 | 
147 | Released on 2022-05-31.
148 | This is a minor release:
149 | 
150 | - adds fedora-messaging schema packages
151 | - doesn't require a version of bodhi-messages in the dev deps
152 | - adjusts pyproject for spec needs
153 | - fixes integration of Alembic
154 | 
155 | 
156 | v1.0.3
157 | ======
158 | 
159 | Released on 2022-03-18. This is a minor release:
160 | 
161 | - support fedora-messaging 3.0+
162 | - update dependencies
163 | 
164 | 
165 | v1.0.0
166 | ======
167 | 
168 | Released on 2022-01-17.
169 | 
170 | This is a major release that uses TimescaleDB to store the data.
171 | The list of changes is too big to list here.
172 | 


--------------------------------------------------------------------------------
/datanommer.models/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "datanommer.models"
  3 | version = "1.4.4"
  4 | description = "SQLAlchemy models for datanommer"
  5 | authors = [
  6 |   "Fedora Infrastructure <admin@fedoraproject.org>"
  7 | ]
  8 | license = "GPL-3.0-or-later"
  9 | readme = "README.rst"
 10 | repository = "https://github.com/fedora-infra/datanommer"
 11 | homepage = "https://github.com/fedora-infra/datanommer"
 12 | packages = [
 13 |     { include = "datanommer/models" },
 14 |     { include = "datanommer/models/testing"},
 15 |     { include = "datanommer/models/alembic" },
 16 | ]
 17 | include = [
 18 |     { path = "*.ini", format = "sdist" },
 19 |     { path = "*.txt", format = "sdist" },
 20 |     { path = "*.cfg", format = "sdist" },
 21 |     { path = "*.rst", format = "sdist" },
 22 |     { path = "*.xml", format = "sdist" },
 23 |     { path = "tests/*", format = "sdist" },
 24 | ]
 25 | 
 26 | [tool.poetry.dependencies]
 27 | python = "^3.11"
 28 | SQLAlchemy = "^1.3.24 || ^2.0.0"
 29 | alembic = "^1.6.5"
 30 | psycopg2 = [
 31 |     {version = "<2.9", python = "<3.11"},
 32 |     {version = "^2.9.0", python = ">=3.11"}
 33 | ]
 34 | 
 35 | fedora-messaging = ">=2.1.0"
 36 | 
 37 | # Message schemas. The reference list of all message schemas is in
 38 | # https://github.com/fedora-infra/fedora-messaging/blob/develop/docs/schema-packages.txt
 39 | anitya-schema = {version = "*", optional = true}
 40 | bodhi-messages = {version = "*", optional = true}
 41 | bugzilla2fedmsg-schema = {version = "*", optional = true}
 42 | ci-messages = {version = "*", optional = true}
 43 | copr-messaging = {version = "*", optional = true}
 44 | discourse2fedmsg-messages = {version = "*", optional = true}
 45 | fedocal-messages = {version = "*", optional = true}
 46 | fedorainfra-ansible-messages = {version = "*", optional = true}
 47 | fedora-elections-messages = {version = "*", optional = true}
 48 | fedora-image-uploader-messages = {version = "*", optional = true}
 49 | fedora-messaging-git-hook-messages = {version = "*", optional = true}
 50 | fedora-messaging-the-new-hotness-schema = {version = "*", optional = true}
 51 | fedora-planet-messages = {version = "*", optional = true}
 52 | fmn-messages = {version = "*", optional = true}
 53 | journal-to-fedora-messaging-messages = {version = "*", optional = true}
 54 | kerneltest-messages = {version = "^1.0.0", optional = true}
 55 | koji-fedoramessaging-messages = {version = "^1.2.6", optional = true}
 56 | koschei-messages = {version = "*", optional = true}
 57 | mailman3-fedmsg-plugin-schemas = {version = "*", optional = true}
 58 | maubot-fedora-messages = {version = "*", optional = true}
 59 | mediawiki-messages = {version = "*", optional = true}
 60 | meetbot-messages = {version = "*", optional = true}
 61 | mdapi-messages = {version = "*", optional = true}
 62 | noggin-messages = {version = "*", optional = true}
 63 | nuancier-messages = {version = "*", optional = true}
 64 | pagure-messages = {version = "*", optional = true}
 65 | tahrir-messages = {version = "*", optional = true}
 66 | webhook-to-fedora-messaging-messages = {version = "*", optional = true}
 67 | 
 68 | [tool.poetry.group.dev.dependencies]
 69 | pre-commit = "*"
 70 | black = "*"
 71 | ruff = "*"
 72 | pytest = "*"
 73 | liccheck = "*"
 74 | pytest-cov = "*"
 75 | pytest-postgresql = "*"
 76 | pytest-mock = "*"
 77 | bodhi-messages = "*"
 78 | towncrier = "*"
 79 | poetry-plugin-export = "^1.9.0"
 80 | 
 81 | [tool.poetry.extras]
 82 | schemas = [
 83 |   "anitya-schema",
 84 |   "bodhi-messages",
 85 |   "bugzilla2fedmsg-schema",
 86 |   "ci-messages",
 87 |   "copr-messaging",
 88 |   "discourse2fedmsg-messages",
 89 |   "fedocal-messages",
 90 |   "fedorainfra-ansible-messages",
 91 |   "fedora-elections-messages",
 92 |   "fedora-image-uploader-messages",
 93 |   "fedora-messaging-git-hook-messages",
 94 |   "fedora-messaging-the-new-hotness-schema",
 95 |   "fedora-planet-messages",
 96 |   "fmn-messages",
 97 |   "journal-to-fedora-messaging-messages",
 98 |   "kerneltest-messages",
 99 |   "koji-fedoramessaging-messages",
100 |   "koschei-messages",
101 |   "mailman3-fedmsg-plugin-schemas",
102 |   "maubot-fedora-messages",
103 |   "mediawiki-messages",
104 |   "meetbot-messages",
105 |   "mdapi-messages",
106 |   "noggin-messages",
107 |   "nuancier-messages",
108 |   "pagure-messages",
109 |   "tahrir-messages",
110 |   "webhook-to-fedora-messaging-messages",
111 | ]
112 | 
113 | 
114 | [build-system]
115 | requires = ["poetry-core>=1.0.0"]
116 | build-backend = "poetry.core.masonry.api"
117 | 
118 | 
119 | [tool.towncrier]
120 | package = "datanommer.models"
121 | directory = "news/"
122 | title_format = "v{version}"
123 | issue_format = "{issue}"
124 | template = "../tools/towncrier/template.rst.j2"
125 | underlines = "=^-"
126 | wrap = false
127 | all_bullets = true
128 | 
129 |   [[tool.towncrier.type]]
130 |   directory = "bic"
131 |   name = "Backwards Incompatible Changes"
132 |   showcontent = true
133 | 
134 |   [[tool.towncrier.type]]
135 |   directory = "dependency"
136 |   name = "Dependency Changes"
137 |   showcontent = true
138 | 
139 |   [[tool.towncrier.type]]
140 |   directory = "feature"
141 |   name = "Features"
142 |   showcontent = true
143 | 
144 |   [[tool.towncrier.type]]
145 |   directory = "bug"
146 |   name = "Bug Fixes"
147 |   showcontent = true
148 | 
149 |   [[tool.towncrier.type]]
150 |   directory = "dev"
151 |   name = "Development Improvements"
152 |   showcontent = true
153 | 
154 |   [[tool.towncrier.type]]
155 |   directory = "docs"
156 |   name = "Documentation Improvements"
157 |   showcontent = true
158 | 
159 |   [[tool.towncrier.type]]
160 |   directory = "other"
161 |   name = "Other Changes"
162 |   showcontent = true
163 | 
164 |   [[tool.towncrier.type]]
165 |   directory = "author"
166 |   name = "Contributors"
167 |   showcontent = true
168 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | 
 14 | import importlib.metadata
 15 | import os
 16 | import sys
 17 | 
 18 | 
 19 | SUBMODULES = ("models", "commands")
 20 | 
 21 | topdir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
 22 | 
 23 | for submodule in SUBMODULES:
 24 |     sys.path.insert(0, os.path.join(topdir, f"datanommer.{submodule}"))
 25 | 
 26 | 
 27 | # -- Project information -----------------------------------------------------
 28 | 
 29 | project = "Datanommer"
 30 | copyright = "2013, Contributors to the Fedora Project"
 31 | author = "Fedora Infrastructure"
 32 | 
 33 | # The full version, including alpha/beta/rc tags
 34 | release = importlib.metadata.version("datanommer.models")
 35 | 
 36 | # The short X.Y version
 37 | version = ".".join(release.split(".")[:2])
 38 | 
 39 | 
 40 | # -- General configuration ---------------------------------------------------
 41 | 
 42 | # Add any Sphinx extension module names here, as strings. They can be
 43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 44 | # ones.
 45 | extensions = [
 46 |     "sphinx.ext.autodoc",
 47 |     "sphinx.ext.intersphinx",
 48 |     "sphinx.ext.extlinks",
 49 |     "sphinx.ext.viewcode",
 50 |     "sphinx.ext.napoleon",
 51 |     "myst_parser",
 52 |     "sphinx_click",
 53 | ]
 54 | 
 55 | # Add any paths that contain templates here, relative to this directory.
 56 | templates_path = ["_templates"]
 57 | 
 58 | # List of patterns, relative to source directory, that match files and
 59 | # directories to ignore when looking for source files.
 60 | # This pattern also affects html_static_path and html_extra_path.
 61 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 62 | 
 63 | # Explcitely set the master doc
 64 | # https://github.com/readthedocs/readthedocs.org/issues/2569
 65 | master_doc = "index"
 66 | 
 67 | 
 68 | # -- Options for HTML output -------------------------------------------------
 69 | 
 70 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 71 | # a list of builtin themes.
 72 | #
 73 | html_theme = "alabaster"
 74 | 
 75 | 
 76 | # Theme options are theme-specific and customize the look and feel of a theme
 77 | # further.  For a list of options available for each theme, see the
 78 | # documentation.
 79 | html_theme_options = {
 80 |     "github_user": "fedora-infra",
 81 |     "github_repo": "datanommer",
 82 |     "page_width": "1040px",
 83 |     "show_related": True,
 84 |     "sidebar_collapse": True,
 85 |     "caption_font_size": "140%",
 86 | }
 87 | 
 88 | # Add any paths that contain custom static files (such as style sheets) here,
 89 | # relative to this directory. They are copied after the builtin static files,
 90 | # so a file named "default.css" will overwrite the builtin "default.css".
 91 | html_static_path = ["_static"]
 92 | 
 93 | 
 94 | # -- Extension configuration -------------------------------------------------
 95 | 
 96 | source_suffix = {
 97 |     ".rst": "restructuredtext",
 98 |     ".md": "markdown",
 99 | }
100 | 
101 | myst_enable_extensions = [
102 |     "colon_fence",
103 | ]
104 | myst_heading_anchors = 3
105 | 
106 | 
107 | # -- Options for intersphinx extension ---------------------------------------
108 | # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration
109 | 
110 | # Example configuration for intersphinx: refer to the Python standard library.
111 | intersphinx_mapping = {"python": ("https://docs.python.org/3", None)}
112 | 
113 | 
114 | # -- Misc -----
115 | 
116 | 
117 | def run_apidoc(_):
118 |     from sphinx.ext import apidoc
119 | 
120 |     for submodule in SUBMODULES:
121 |         print(
122 |             " ".join(
123 |                 [
124 |                     "sphinx-apidoc",
125 |                     "-f",
126 |                     "-o",
127 |                     os.path.join(topdir, "docs", "_source", submodule),
128 |                     "-T",
129 |                     "-e",
130 |                     "-M",
131 |                     "--implicit-namespaces",
132 |                     os.path.join(topdir, f"datanommer.{submodule}", "datanommer"),
133 |                     # exclude patterns:
134 |                     os.path.join(topdir, f"datanommer.{submodule}", "tests"),
135 |                     os.path.join(
136 |                         topdir, f"datanommer.{submodule}", "datanommer", submodule, "alembic"
137 |                     ),
138 |                 ]
139 |             )
140 |         )
141 |         apidoc.main(
142 |             [
143 |                 "-f",
144 |                 "-o",
145 |                 os.path.join(topdir, "docs", "_source", submodule),
146 |                 "-T",
147 |                 "-e",
148 |                 "-M",
149 |                 "--implicit-namespaces",
150 |                 os.path.join(topdir, f"datanommer.{submodule}", "datanommer"),
151 |                 # exclude patterns:
152 |                 os.path.join(topdir, f"datanommer.{submodule}", "tests"),
153 |                 os.path.join(topdir, f"datanommer.{submodule}", "datanommer", submodule, "alembic"),
154 |             ]
155 |         )
156 |         # This file is going to cause duplicate references
157 |         os.remove(os.path.join(topdir, "docs", "_source", submodule, "datanommer.rst"))
158 |         generate_click_commands(
159 |             os.path.join(topdir, "docs", "_source", "commands.rst"),
160 |             "datanommer.commands",
161 |             nested="full",
162 |         )
163 | 
164 | 
165 | def setup(app):
166 |     app.connect("builder-inited", run_apidoc)
167 | 
168 | 
169 | def generate_click_commands(output, module, *, with_header=True, nested=None):
170 |     commands = []
171 |     for ep in importlib.metadata.entry_points(group="console_scripts"):
172 |         ep_module = ep.value.partition(":")[0]
173 |         if not ep_module.startswith(f"{module}.") and ep_module != module:
174 |             continue
175 |         commands.append((ep.name, ep.value))
176 |     if not commands:
177 |         return
178 |     with open(output, "w") as fh:
179 |         if with_header:
180 |             fh.write("Commands\n")
181 |             fh.write("========\n")
182 |             fh.write("\n")
183 |         for name, module in commands:
184 |             fh.write(f".. click:: {module}\n")
185 |             fh.write(f"   :prog: {name}\n")
186 |             if nested:
187 |                 fh.write(f"   :nested: {nested}\n")
188 |             fh.write("\n")
189 | 


--------------------------------------------------------------------------------
/datanommer.commands/datanommer/commands/extract_users.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | 
  4 | import click
  5 | from fedora_messaging.exceptions import ValidationError
  6 | from fedora_messaging.message import load_message as load_message
  7 | from sqlalchemy import and_, not_, select
  8 | 
  9 | import datanommer.models as m
 10 | 
 11 | from .utils import CHUNK_SIZE, config_option, get_config, iterate_over_messages
 12 | 
 13 | 
 14 | log = logging.getLogger(__name__)
 15 | 
 16 | USERNAMES_SKIP_TOPICS = [
 17 |     "%.anitya.%",
 18 |     "%.discourse.%",
 19 |     "%.hotness.update.bug.file",
 20 |     "%.hotness.update.drop",
 21 |     "%.koschei.%",
 22 |     "%.mdapi.%",
 23 | ]
 24 | AGENT_SKIP_TOPICS = [
 25 |     "%.hotness.update.bug.file",
 26 |     "%.hotness.update.drop",
 27 | ]
 28 | 
 29 | 
 30 | @click.group()
 31 | @config_option
 32 | @click.option("--topic", default=None, help="Only extract users for messages of a specific topic.")
 33 | @click.option(
 34 |     "--category",
 35 |     default=None,
 36 |     help="Only extract users for messages of a specific category.",
 37 | )
 38 | @click.option(
 39 |     "--start",
 40 |     default=None,
 41 |     type=click.DateTime(),
 42 |     help="Only extract users for messages after a specific timestamp.",
 43 | )
 44 | @click.option(
 45 |     "--end",
 46 |     default=None,
 47 |     type=click.DateTime(),
 48 |     help="Only extract users for messages before a specific timestamp.",
 49 | )
 50 | @click.option(
 51 |     "--force-schema",
 52 |     default=None,
 53 |     help=(
 54 |         "Force usage of this schema name to extract usernames. This is the key in the "
 55 |         "exposed entry point / plugin, for example: wiki.article.edit.v1"
 56 |     ),
 57 | )
 58 | @click.option(
 59 |     "--chunk-size",
 60 |     default=CHUNK_SIZE,
 61 |     type=int,
 62 |     show_default=True,
 63 |     help="Go through messages these many at a time (lower is slower but saves memory).",
 64 | )
 65 | @click.option(
 66 |     "--debug",
 67 |     is_flag=True,
 68 |     help="Show more information.",
 69 | )
 70 | @click.pass_context
 71 | def main(ctx, config_path, topic, category, start, end, force_schema, chunk_size, debug):
 72 |     ctx.ensure_object(dict)
 73 |     ctx.obj["options"] = ctx.params
 74 |     ctx.obj["config"] = config = get_config(config_path)
 75 |     m.init(
 76 |         config["datanommer_sqlalchemy_url"],
 77 |         alembic_ini=config["alembic_ini"],
 78 |     )
 79 |     if topic and category:
 80 |         raise click.UsageError("can't use both --topic and --category, choose one.")
 81 | 
 82 |     if not start:
 83 |         ctx.obj["options"]["start"] = m.session.execute(
 84 |             select(m.Message.timestamp).order_by(m.Message.timestamp).limit(1)
 85 |         ).scalar_one()
 86 | 
 87 |     query = select(m.Message)
 88 |     if topic:
 89 |         query = query.where(m.Message.topic == topic)
 90 |     elif category:
 91 |         query = query.where(m.Message.category == category)
 92 | 
 93 |     query = query.where(m.Message.timestamp >= ctx.obj["options"]["start"])
 94 |     if end:
 95 |         query = query.where(m.Message.timestamp < end)
 96 |     else:
 97 |         end = datetime.datetime.now()
 98 | 
 99 |     if force_schema is None:
100 |         query = query.where(
101 |             m.Message.headers.has_key("fedora_messaging_schema"),
102 |             m.Message.headers["fedora_messaging_schema"].astext != "base.message",
103 |         )
104 |     ctx.obj["query"] = query
105 | 
106 | 
107 | @main.command("usernames")
108 | @click.pass_context
109 | def extract_usernames(ctx):
110 |     """Go over old messages, extract users and store them.
111 | 
112 |     This is useful when a message schema has been added and we want to populate the users table
113 |     with the new information.
114 |     """
115 |     debug = ctx.obj["options"]["debug"]
116 |     query = ctx.obj["query"]
117 |     query = query.where(
118 |         and_(*[not_(m.Message.topic.like(skipped)) for skipped in USERNAMES_SKIP_TOPICS])
119 |     )
120 |     query = query.join(
121 |         m.users_assoc_table,
122 |         and_(
123 |             m.Message.id == m.users_assoc_table.c.msg_id,
124 |             m.Message.timestamp == m.users_assoc_table.c.msg_timestamp,
125 |         ),
126 |         isouter=True,
127 |     ).where(m.users_assoc_table.c.msg_id.is_(None))
128 | 
129 |     for message in iterate_over_messages(
130 |         query, ctx.obj["options"]["start"], ctx.obj["options"]["chunk_size"]
131 |     ):
132 |         fm_message = get_fedora_message(message, force_schema=ctx.obj["options"]["force_schema"])
133 |         if fm_message is None or not fm_message.usernames:
134 |             m.session.expunge(message)
135 |             continue
136 |         message._insert_list(m.User, m.users_assoc_table, fm_message.usernames)
137 |         if debug:
138 |             click.echo(
139 |                 f"Usernames for message {message.msg_id} of topic {message.topic}"
140 |                 f": {', '.join(fm_message.usernames)}"
141 |             )
142 | 
143 | 
144 | def get_fedora_message(db_message, force_schema):
145 |     headers = db_message.headers
146 |     if force_schema and headers is not None:
147 |         headers["fedora_messaging_schema"] = force_schema
148 |     try:
149 |         fm_message = load_message(
150 |             {
151 |                 "topic": db_message.topic,
152 |                 "headers": headers,
153 |                 "id": db_message.msg_id,
154 |                 "body": db_message.msg,
155 |             }
156 |         )
157 |     except ValidationError as e:
158 |         try:
159 |             # Remove this block after fedora-messaging 3.6.0 and use e.summary
160 |             error_msg = e.args[0].summary
161 |         except AttributeError:
162 |             error_msg = str(e).split("\n")[0]
163 |         click.echo(
164 |             f"Could not load message {db_message.msg_id} on topic {db_message.topic}: {error_msg}",
165 |             err=True,
166 |         )
167 |         return None
168 | 
169 |     return fm_message
170 | 
171 | 
172 | @main.command("agent")
173 | @click.pass_context
174 | def extract_agent(ctx):
175 |     """Go over old messages, extract the agent_name and store it.
176 | 
177 |     This is useful when a message schema has been added and we want to populate the agent_name
178 |     column with the new information.
179 |     """
180 |     debug = ctx.obj["options"]["debug"]
181 |     query = ctx.obj["query"]
182 |     query = query.where(
183 |         and_(*[not_(m.Message.topic.like(skipped)) for skipped in AGENT_SKIP_TOPICS])
184 |     )
185 |     query = query.where(m.Message.agent_name.is_(None))
186 | 
187 |     for message in iterate_over_messages(
188 |         query, ctx.obj["options"]["start"], ctx.obj["options"]["chunk_size"]
189 |     ):
190 |         fm_message = get_fedora_message(message, force_schema=ctx.obj["options"]["force_schema"])
191 |         if fm_message is None or not fm_message.agent_name:
192 |             m.session.expunge(message)
193 |             continue
194 |         message.agent_name = fm_message.agent_name
195 |         if debug:
196 |             click.echo(
197 |                 f"Agent for message {message.msg_id} of topic {message.topic}"
198 |                 f": {fm_message.agent_name}"
199 |             )
200 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: Test & Build
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - develop
  7 |       - stable
  8 |       - staging
  9 |     tags:
 10 |       - "*"
 11 |   pull_request:
 12 |     branches:
 13 |       - develop
 14 |       - stable
 15 |       - staging
 16 | 
 17 | jobs:
 18 | 
 19 |   checks:
 20 |     name: Checks
 21 |     runs-on: ubuntu-latest
 22 |     container: fedorapython/fedora-python-tox:latest
 23 |     steps:
 24 |       - uses: actions/checkout@v6
 25 | 
 26 |       - name: Install pre-commit
 27 |         run: |
 28 |           dnf install -y pre-commit git krb5-devel libpq-devel poetry python3-poetry-plugin-export
 29 | 
 30 |       - name: Mark the working directory as safe for Git
 31 |         run: git config --global --add safe.directory $PWD
 32 | 
 33 |       - name: Run pre-commit checks
 34 |         run: pre-commit run -v --all-files
 35 | 
 36 | 
 37 |   licenses:
 38 |     name: Licenses
 39 |     runs-on: ubuntu-latest
 40 |     container: fedorapython/fedora-python-tox:latest
 41 |     steps:
 42 |       - uses: actions/checkout@v6
 43 | 
 44 |       - name: Install RPM dependencies
 45 |         run: |
 46 |           dnf install -y pre-commit git krb5-devel libpq-devel poetry python3-poetry-plugin-export
 47 | 
 48 |       - name: Check licenses for datanommer.${{ matrix.package }}
 49 |         run: tox -e licenses
 50 |         working-directory: datanommer.${{ matrix.package }}
 51 | 
 52 |     strategy:
 53 |       matrix:
 54 |         package:
 55 |           - models
 56 |           - consumer
 57 |           - commands
 58 | 
 59 | 
 60 |   unit_tests:
 61 |     name: Unit tests
 62 |     runs-on: ubuntu-latest
 63 |     container: fedorapython/fedora-python-tox:latest
 64 |     steps:
 65 |       - uses: actions/checkout@v6
 66 | 
 67 |       - name: Install RPM dependencies
 68 |         run: |
 69 |           dnf install -y timescaledb postgresql-server pre-commit git krb5-devel libpq-devel poetry python3-poetry-plugin-export
 70 | 
 71 |       - name: Run unit tests for datanommer.${{ matrix.package }}
 72 |         # Don't run the tests as root or pg_ctl will refuse to start
 73 |         run: |
 74 |           chown postgres:postgres .
 75 |           sudo -u postgres tox -e ${{ matrix.pyver }} -- -vv
 76 |         working-directory: datanommer.${{ matrix.package }}
 77 | 
 78 |     strategy:
 79 |       matrix:
 80 |         pyver:
 81 |           - py311
 82 |           - py312
 83 |         package:
 84 |           - models
 85 |           - consumer
 86 |           - commands
 87 | 
 88 | 
 89 |   # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
 90 |   build:
 91 |     name: Build distributions 📦
 92 |     runs-on: ubuntu-latest
 93 |     needs:
 94 |       - checks
 95 |       - licenses
 96 |       - unit_tests
 97 |     # outputs:
 98 |     #   release-notes-models: ${{ steps.release-notes.outputs.models }}
 99 |     #   release-notes-consumer: ${{ steps.release-notes.outputs.consumer }}
100 |     #   release-notes-commands: ${{ steps.release-notes.outputs.commands }}
101 | 
102 |     steps:
103 | 
104 |       - uses: actions/checkout@v6
105 |       - name: Set up Python
106 |         uses: actions/setup-python@v6
107 |         with:
108 |           python-version: "3.x"
109 | 
110 |       - name: Install pypa/build
111 |         run: python3 -m pip install build --user
112 |       - name: Build a binary wheel and a source tarball
113 |         run: |
114 |           cd datanommer.${{ matrix.package }}
115 |           python3 -m build
116 | 
117 |       - name: Store the distribution packages
118 |         uses: actions/upload-artifact@v6
119 |         with:
120 |           name: python-package-distributions-${{ matrix.package }}
121 |           path: datanommer.${{ matrix.package }}/dist/
122 |           if-no-files-found: error
123 | 
124 |       # - name: Extract changelog section
125 |       #   id: extract-changelog
126 |       #   uses: sean0x42/markdown-extract@v2
127 |       #   with:
128 |       #     file: datanommer.${{ matrix.package }}/NEWS.md
129 |       #     # pattern: 'Version\s+\[${{ steps.extract-version.outputs.ESCAPED_VERSION }}\]\(.*\)'
130 |       #     pattern: 'Version\s+\[[[:word:].-]+\]\(.*\)'
131 |       #     no-print-matched-heading: true
132 |       # - name: Store the release notes in the output
133 |       #   id: release-notes
134 |       #   run: |
135 |       #     echo '${{ matrix.package }}<<EOF' >> "$GITHUB_OUTPUT"
136 |       #     echo "${{ steps.extract-changelog.outputs.markdown }}" >> "$GITHUB_OUTPUT"
137 |       #     echo "EOF" >> "$GITHUB_OUTPUT"
138 |       # - name: Show the changelog
139 |       #   env:
140 |       #     CHANGELOG: ${{ steps.extract-changelog.outputs.markdown }}
141 |       #   run: echo "$CHANGELOG"
142 | 
143 |     strategy:
144 |       matrix:
145 |         package:
146 |           - models
147 |           - consumer
148 |           - commands
149 | 
150 | 
151 |   publish-to-pypi:
152 |     name: Publish to PyPI 🚀
153 |     if: startsWith(github.ref, 'refs/tags/') && !contains(github.ref, 'rc')  # only publish to PyPI on final tag pushes
154 |     needs:
155 |       - build
156 |     runs-on: ubuntu-latest
157 |     environment:
158 |       name: pypi
159 |       url: https://pypi.org/p/datanommer.${{ matrix.package }}
160 |     permissions:
161 |       id-token: write  # IMPORTANT: mandatory for trusted publishing
162 | 
163 |     steps:
164 |       - name: Download all the dists
165 |         uses: actions/download-artifact@v7
166 |         with:
167 |           name: python-package-distributions-${{ matrix.package }}
168 |           path: dist-${{ matrix.package }}/
169 | 
170 |       - name: Publish distribution to PyPI
171 |         uses: pypa/gh-action-pypi-publish@release/v1
172 |         with:
173 |           packages-dir: dist-${{ matrix.package }}/
174 | 
175 |     strategy:
176 |       matrix:
177 |         package:
178 |           - models
179 |           - consumer
180 |           - commands
181 | 
182 | 
183 |   github-release:
184 |     name: Create a GitHub Release 📢
185 |     needs:
186 |       - publish-to-pypi
187 |       - build
188 |     runs-on: ubuntu-latest
189 |     permissions:
190 |       contents: write  # IMPORTANT: mandatory for making GitHub Releases
191 |       id-token: write  # IMPORTANT: mandatory for sigstore
192 | 
193 |     steps:
194 |       - name: Download all the dists
195 |         uses: actions/download-artifact@v7
196 |         with:
197 |           pattern: python-package-distributions-*
198 |           path: dist/
199 |           merge-multiple: true
200 | 
201 |       - name: Sign the dists with Sigstore
202 |         uses: sigstore/gh-action-sigstore-python@v3.2.0
203 |         with:
204 |           inputs: >-
205 |             ./dist/*.tar.gz
206 |             ./dist/*.whl
207 | 
208 |       - name: Release
209 |         uses: softprops/action-gh-release@v2
210 |         with:
211 |           draft: true
212 |           files: dist/*
213 |           fail_on_unmatched_files: true
214 |           generate_release_notes: true
215 |           # body: |
216 |           #   ## Models (datanommer.models)
217 |           #   ${{ needs.build.outputs.release-notes-models }}
218 | 
219 |           #   ## Consumer (datanommer.consumer)
220 |           #   ${{ needs.build.outputs.release-notes-consumer }}
221 | 
222 |           #   ## Commands (datanommer.commands)
223 |           #   ${{ needs.build.outputs.release-notes-commands }}
224 | 


--------------------------------------------------------------------------------
/tools/timescaledb/migrate-to-timescaledb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Migrate the datanommer database from the pre-2021 format to the TimescaleDB-based format.
  5 | """
  6 | 
  7 | from json import dumps, JSONDecodeError, loads
  8 | 
  9 | import click
 10 | import toml
 11 | from sqlalchemy import (
 12 |     Column,
 13 |     create_engine,
 14 |     DateTime,
 15 |     ForeignKey,
 16 |     func,
 17 |     Integer,
 18 |     select,
 19 |     Table,
 20 |     UnicodeText,
 21 | )
 22 | from sqlalchemy.exc import NoResultFound
 23 | from sqlalchemy.orm import declarative_base, relationship, Session
 24 | 
 25 | import datanommer.models as dm
 26 | 
 27 | 
 28 | CHUNK_SIZE = 5000
 29 | 
 30 | OldBase = declarative_base()
 31 | 
 32 | user_assoc_table = Table(
 33 |     "user_messages",
 34 |     OldBase.metadata,
 35 |     Column("username", UnicodeText, ForeignKey("user.name")),
 36 |     Column("msg", Integer, ForeignKey("messages.id")),
 37 | )
 38 | 
 39 | pack_assoc_table = Table(
 40 |     "package_messages",
 41 |     OldBase.metadata,
 42 |     Column("package", UnicodeText, ForeignKey("package.name")),
 43 |     Column("msg", Integer, ForeignKey("messages.id")),
 44 | )
 45 | 
 46 | 
 47 | class OldMessage(OldBase):
 48 |     __tablename__ = "messages"
 49 |     id = Column(Integer, primary_key=True)
 50 |     msg_id = Column(UnicodeText)
 51 |     i = Column(Integer)
 52 |     topic = Column(UnicodeText)
 53 |     timestamp = Column(DateTime)
 54 |     certificate = Column(UnicodeText)
 55 |     signature = Column(UnicodeText)
 56 |     category = Column(UnicodeText)
 57 |     username = Column(UnicodeText)
 58 |     crypto = Column(UnicodeText)
 59 |     source_name = Column(UnicodeText)
 60 |     source_version = Column(UnicodeText)
 61 |     _msg = Column(UnicodeText)
 62 |     _headers = Column(UnicodeText)
 63 | 
 64 |     users = relationship("User", secondary=user_assoc_table, lazy="selectin")
 65 |     packages = relationship("Package", secondary=pack_assoc_table, lazy="selectin")
 66 | 
 67 | 
 68 | class User(OldBase):
 69 |     __tablename__ = "user"
 70 | 
 71 |     name = Column(UnicodeText, primary_key=True)
 72 | 
 73 | 
 74 | class Package(OldBase):
 75 |     __tablename__ = "package"
 76 | 
 77 |     name = Column(UnicodeText, primary_key=True)
 78 | 
 79 | 
 80 | def import_message(message):
 81 |     msg = message._msg.replace("\\u0000", "")
 82 |     try:
 83 |         msg = loads(msg)
 84 |     except JSONDecodeError:
 85 |         click.echo(f"Can't decode json in message {message.msg_id} ({message.timestamp})")
 86 |         with open("failed.log", "a") as failedlog:
 87 |             failedlog.write(
 88 |                 dumps(
 89 |                     {
 90 |                         "id": message.id,
 91 |                         "msg_id": message.msg_id,
 92 |                         "timestamp": message.timestamp.isoformat(),
 93 |                         "topic": message.topic,
 94 |                         "msg": message._msg,
 95 |                     }
 96 |                 )
 97 |             )
 98 |             failedlog.write("\n")
 99 |         return
100 |     if not msg:
101 |         with open("failed.log", "a") as failedlog:
102 |             failedlog.write(
103 |                 dumps(
104 |                     {
105 |                         "id": message.id,
106 |                         "msg_id": message.msg_id,
107 |                         "timestamp": message.timestamp.isoformat(),
108 |                         "topic": message.topic,
109 |                         "msg": repr(message._msg),
110 |                     }
111 |                 )
112 |             )
113 |             failedlog.write("\n")
114 |         return
115 |     headers = message._headers
116 |     if headers is not None:
117 |         headers = headers.replace("\\u0000", "")
118 |         headers = loads(headers)
119 |     dm.Message.create(
120 |         i=message.i,
121 |         msg_id=message.msg_id,
122 |         topic=message.topic,
123 |         timestamp=message.timestamp,
124 |         username=message.username,
125 |         crypto=message.crypto,
126 |         certificate=message.certificate,
127 |         signature=message.signature,
128 |         msg=msg,
129 |         headers=headers,
130 |         users=[u.name for u in message.users],
131 |         packages=[p.name for p in message.packages],
132 |     )
133 | 
134 | 
135 | # https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
136 | def windowed_query(q, column, windowsize):
137 |     """Break a Query into chunks on a given column."""
138 | 
139 |     single_entity = q.is_single_entity
140 |     q = q.add_columns(column).order_by(column)
141 |     last_id = None
142 | 
143 |     while True:
144 |         subq = q
145 |         if last_id is not None:
146 |             subq = subq.where(column > last_id)
147 |         chunk = subq.limit(windowsize).all()
148 |         if not chunk:
149 |             break
150 |         last_id = chunk[-1][-1]
151 |         for row in chunk:
152 |             if single_entity:
153 |                 yield row[0]
154 |             else:
155 |                 yield row[0:-1]
156 | 
157 | 
158 | @click.command()
159 | @click.option(
160 |     "config_path",
161 |     "-c",
162 |     "--config",
163 |     type=click.Path(),
164 |     default="migrate.toml",
165 |     show_default=True,
166 | )
167 | @click.option(
168 |     "since",
169 |     "-s",
170 |     "--since",
171 |     type=click.DateTime(),
172 | )
173 | def main(config_path, since):
174 |     config = toml.load(config_path)
175 |     dm.init(config["dest_url"], create=True)
176 |     src_engine = create_engine(config["source_url"], future=True)
177 | 
178 |     with Session(src_engine) as src_db:
179 |         click.echo("Querying messages...")
180 |         old_messages = src_db.scalars(select(OldMessage).order_by(OldMessage.id))
181 |         latest = dm.session.scalars(
182 |             select(dm.Message).order_by(dm.Message.id.desc()).limit(1)
183 |         ).first()
184 |         if latest:
185 |             try:
186 |                 latest_in_src = src_db.execute(
187 |                     select(OldMessage).where(OldMessage.msg_id == latest.msg_id)
188 |                 ).scalar_one()
189 |             except NoResultFound:
190 |                 latest_in_src = src_db.execute(
191 |                     select(OldMessage)
192 |                     .where(OldMessage.timestamp == latest.timestamp)
193 |                     .where(OldMessage.topic == latest.topic)
194 |                 ).scalar_one()
195 |             old_messages = old_messages.where(OldMessage.id > latest_in_src.id)
196 |             click.echo(f"Resuming from message {latest.msg_id}")
197 |         if since:
198 |             old_messages = old_messages.where(OldMessage.timestamp > since)
199 |             click.echo(f"Only importing messages after {since}")
200 |         total = old_messages.count()
201 |         with click.progressbar(
202 |             length=total,
203 |             label=f"Importing {total} messages",
204 |             item_show_func=lambda m: m.timestamp.strftime("%Y-%m") if m else "",
205 |             # item_show_func=lambda m: m.msg_id if m else "",
206 |         ) as bar:
207 |             for old_message in windowed_query(old_messages, OldMessage.id, CHUNK_SIZE):
208 |                 import_message(old_message)
209 |                 # Commit periodically
210 |                 if bar._completed_intervals % 1000 == 0:
211 |                     dm.session.commit()
212 |                 else:
213 |                     dm.session.flush()
214 |                 bar.update(1, old_message)
215 |         dm.session.commit()
216 |         # Verify counts
217 |         click.echo(f"Messages in the old DB: {src_db.scalar(select(func.count(OldMessage.id)))}")
218 |         click.echo(
219 |             f"Messages in the new DB: {dm.session.scalar(select(func.count(dm.Message.id)))}"
220 |         )
221 | 
222 | 
223 | if __name__ == "__main__":
224 |     main()
225 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Contributing
  3 | ============
  4 | 
  5 | Thanks for considering contributing to datanommer, we really appreciate it!
  6 | 
  7 | Quickstart:
  8 | 
  9 | 1. Look for an `existing issue
 10 |    <https://github.com/fedora-infra/datanommer/issues>`_ about the bug or
 11 |    feature you're interested in. If you can't find an existing issue, create a
 12 |    `new one <https://github.com/fedora-infra/datanommer/issues/new>`_.
 13 | 
 14 | 2. Fork the `repository on GitHub
 15 |    <https://github.com/fedora-infra/datanommer>`_.
 16 | 
 17 | 3. Fix the bug or add the feature, and then write one or more tests which show
 18 |    the bug is fixed or the feature works.
 19 | 
 20 | 4. Submit a pull request and wait for a maintainer to review it.
 21 | 
 22 | More detailed guidelines to help ensure your submission goes smoothly are
 23 | below.
 24 | 
 25 | .. note:: If you do not wish to use GitHub, please send patches to
 26 |           infrastructure@lists.fedoraproject.org.
 27 | 
 28 | 
 29 | Development Environment
 30 | =======================
 31 | 
 32 | Vagrant allows contributors to get quickly up and running with a datanommer
 33 | development environment by automatically configuring a virtual machine. This
 34 | virtual machine also includes a running datanommer service to make it easy to
 35 | test your changes.
 36 | 
 37 | The datanommer Vagrant environment is configured to be empty when first
 38 | provisioned, but to consume messages from the stage Fedora Messaging queue.
 39 | 
 40 | To get started, first install the Vagrant and Virtualization
 41 | packages needed, and start the libvirt service::
 42 | 
 43 |     $ sudo dnf install ansible libvirt vagrant-libvirt vagrant-sshfs vagrant-hostmanager
 44 |     $ sudo systemctl enable libvirtd
 45 |     $ sudo systemctl start libvirtd
 46 | 
 47 | Check out the code and run ``vagrant up``::
 48 | 
 49 |     $ git clone https://github.com/fedora-infra/datanommer
 50 |     $ cd datanommer
 51 |     $ vagrant up
 52 | 
 53 | Next, SSH into your newly provisioned development environment::
 54 | 
 55 |     $ vagrant ssh
 56 | 
 57 | The vagrant setup also defines 4 handy commands to interact with the datanommer
 58 | consumer::
 59 | 
 60 |     $ datanommer-consumer-start
 61 |     $ datanommer-consumer-stop
 62 |     $ datanommer-consumer-restart
 63 |     $ datanommer-consumer-logs
 64 | 
 65 | Note also, that the commands provided by datanommer.commands are also available
 66 | to interact with the datanommer database::
 67 | 
 68 |     $ datanommer-dump
 69 |     $ datanommer-latest
 70 |     $ datanommer-stats
 71 |     $ datanommer-create-db
 72 |     $ datanommer-refresh-view
 73 | 
 74 | 
 75 | Guidelines
 76 | ==========
 77 | 
 78 | Python Support
 79 | --------------
 80 | datanommer supports Python 3.7 or greater. This is automatically enforced by the
 81 | continuous integration (CI) suite.
 82 | 
 83 | 
 84 | Code Style
 85 | ----------
 86 | We follow the `PEP8 <https://www.python.org/dev/peps/pep-0008/>`_ style guide
 87 | for Python. This is automatically enforced by the CI suite.
 88 | 
 89 | We are using `Black <https://github.com/ambv/black>` to automatically format
 90 | the source code. It is also checked in CI. The Black webpage contains
 91 | instructions to configure your editor to run it on the files you edit.
 92 | 
 93 | 
 94 | Tests
 95 | -----
 96 | Datanommer is comprised of 3 seperate modules in this single repository. There
 97 | is top-level `Tox <http://tox.readthedocs.io/>`_ file to run the tests on all 3
 98 | modules::
 99 | 
100 |     $ tox
101 | 
102 | However, tests can also be run on a single module by invotking tox in that
103 | modules' directory. For example::
104 | 
105 |     $ cd datanommer.models/
106 |     $ tox
107 | 
108 | Note, that the tests use virtual environments that are not created from scratch
109 | with every subsequent run of the tests. Therefore, *when changes happen to
110 | dependencies, the tests may fail to run correctly*. To recreate the virtual
111 | envrionments,  run the tests commands with the ``-r`` flag, for example::
112 | 
113 |     $ tox -r
114 | 
115 | or::
116 | 
117 |     $ cd datanommer.models/
118 |     $ tox -r
119 | 
120 | All code must have test coverage or be explicitly marked as not covered using
121 | the ``# pragma: no cover`` comment. This should only be done if there is a good
122 | reason to not write tests.
123 | 
124 | Your pull request should contain tests for your new feature or bug fix. If
125 | you're not certain how to write tests, we will be happy to help you.
126 | 
127 | 
128 | Pre-commit
129 | ----------
130 | We use the pre-commit framework to run tests defined in pre-commit-config.yaml to ensure
131 | that the code is up to the best industry standards prior to submitting a pull request.
132 | 
133 | Pre-commit can be installed as a git hook by running ``pre-commit install``
134 | 
135 | 
136 | Release Notes
137 | -------------
138 | 
139 | To add entries to the release notes, create a file in the ``news`` directory in the
140 | ``source.type`` name format, where the ``source`` part of the filename is:
141 | 
142 | * ``42`` when the change is described in issue ``42``
143 | * ``PR42`` when the change has been implemented in pull request ``42``, and
144 |   there is no associated issue
145 | * ``Cabcdef`` when the change has been implemented in changeset ``abcdef``, and
146 |   there is no associated issue or pull request.
147 | 
148 | And where the extension ``type`` is one of:
149 | 
150 | * ``bic``: for backwards incompatible changes
151 | * ``dependency``: for dependency changes
152 | * ``feature``: for new features
153 | * ``bug``: for bug fixes
154 | * ``dev``: for development improvements
155 | * ``docs``: for documentation improvements
156 | * ``other``: for other changes
157 | 
158 | The content of the file will end up in the release notes. It should not end with a ``.``
159 | (full stop).
160 | 
161 | If it is not present already, add a file in the ``news`` directory named ``username.author``
162 | where ``username`` is the first part of your commit's email address, and containing the name
163 | you want to be credited as. There is a script to generate a list of authors that we run
164 | before releasing, but creating the file manually allows you to set a custom name.
165 | 
166 | A preview of the release notes can be generated with
167 | ``towncrier build --draft``.
168 | 
169 | 
170 | Licensing
171 | ---------
172 | 
173 | Your commit messages must include a Signed-off-by tag with your name and e-mail
174 | address, indicating that you agree to the `Developer Certificate of Origin
175 | <https://developercertificate.org/>`_ version 1.1::
176 | 
177 | 	Developer Certificate of Origin
178 | 	Version 1.1
179 | 
180 | 	Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
181 | 	1 Letterman Drive
182 | 	Suite D4700
183 | 	San Francisco, CA, 94129
184 | 
185 | 	Everyone is permitted to copy and distribute verbatim copies of this
186 | 	license document, but changing it is not allowed.
187 | 
188 | 
189 | 	Developer's Certificate of Origin 1.1
190 | 
191 | 	By making a contribution to this project, I certify that:
192 | 
193 | 	(a) The contribution was created in whole or in part by me and I
194 | 	    have the right to submit it under the open source license
195 | 	    indicated in the file; or
196 | 
197 | 	(b) The contribution is based upon previous work that, to the best
198 | 	    of my knowledge, is covered under an appropriate open source
199 | 	    license and I have the right under that license to submit that
200 | 	    work with modifications, whether created in whole or in part
201 | 	    by me, under the same open source license (unless I am
202 | 	    permitted to submit under a different license), as indicated
203 | 	    in the file; or
204 | 
205 | 	(c) The contribution was provided directly to me by some other
206 | 	    person who certified (a), (b) or (c) and I have not modified
207 | 	    it.
208 | 
209 | 	(d) I understand and agree that this project and the contribution
210 | 	    are public and that a record of the contribution (including all
211 | 	    personal information I submit with it, including my sign-off) is
212 | 	    maintained indefinitely and may be redistributed consistent with
213 | 	    this project or the open source license(s) involved.
214 | 
215 | Use ``git commit -s`` to add the Signed-off-by tag.
216 | 
217 | 
218 | Releasing
219 | ---------
220 | 
221 | When cutting a new release, follow these steps:
222 | 
223 | #. Update the version in ``pyproject.toml``
224 | #. Run ``poetry install`` to update the version in the metadata
225 | #. Add missing authors to the release notes fragments by changing to the ``news`` directory and
226 |    running the ``../tools/towncrier/get-authors.py`` script, but check for duplicates and errors
227 | #. Generate the release notes by running ``poetry run towncrier`` (in the base directory)
228 | #. Adjust the release notes in ``NEWS.rst``
229 | #. Commit the changes
230 | #. Push the commit to the upstream Github repository (via a PR or not).
231 | #. Change to the stable branch and cherry-pick the commit (or merge if appropriate)
232 | #. Run the checks one last time to be sure: ``tox``,
233 | #. Tag the commit with ``-s`` to generate a signed tag
234 | #. Push the commit to the upstream Github repository with ``git push``,
235 |    and the new tag with ``git push --tags``
236 | #. Generate a tarball and push to PyPI with the command ``poetry publish --build``
237 | #. Create `the release on GitHub <https://github.com/fedora-infra/datanommer/tags>`_ and copy the
238 |    release notes in there,
239 | #. Deploy and announce.
240 | 


--------------------------------------------------------------------------------
/datanommer.commands/tests/test_extract_users.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import io
  3 | from unittest.mock import Mock
  4 | 
  5 | import pytest
  6 | import sqlalchemy as sa
  7 | from click import progressbar
  8 | from click.testing import CliRunner
  9 | 
 10 | import datanommer.models as m
 11 | from datanommer.commands.extract_users import main as extract_users
 12 | 
 13 | from .utils import generate_bodhi_update_complete_message, generate_message
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def bodhi_message_db(datanommer_models):
 18 |     msg = generate_bodhi_update_complete_message()
 19 |     m.add(msg)
 20 |     m.session.execute(m.users_assoc_table.delete())
 21 |     msg_in_db = m.Message.from_msg_id(msg.id)
 22 |     msg_in_db.agent_name = None
 23 |     m.session.commit()
 24 | 
 25 |     m.session.refresh(msg_in_db)
 26 |     assert len(msg_in_db.users) == 0
 27 |     assert msg_in_db.agent_name is None
 28 |     return msg_in_db
 29 | 
 30 | 
 31 | @pytest.fixture(autouse=True)
 32 | def no_expunge(datanommer_models, monkeypatch):
 33 |     monkeypatch.setattr(m.session, "expunge_all", Mock(name="expunge_all"))
 34 |     monkeypatch.setattr(m.session, "expunge", Mock(name="expunge"))
 35 | 
 36 | 
 37 | def test_extract_users(bodhi_message_db, mock_config, mock_init):
 38 |     runner = CliRunner()
 39 |     result = runner.invoke(extract_users, ["--debug", "usernames"])
 40 | 
 41 |     assert result.exit_code == 0, result.output
 42 |     expected_output = (
 43 |         "Counting messages...\n"
 44 |         "Considering 1 message\n\n"
 45 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
 46 |         f"Usernames for message {bodhi_message_db.msg_id} of topic {bodhi_message_db.topic}: "
 47 |         "dudemcpants, ryanlerch\n"
 48 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
 49 |     )
 50 |     assert result.output == expected_output
 51 | 
 52 |     m.session.refresh(bodhi_message_db)
 53 |     assert len(bodhi_message_db.users) > 0
 54 |     assert {u.name for u in bodhi_message_db.users} == {"dudemcpants", "ryanlerch"}
 55 | 
 56 | 
 57 | def test_extract_users_topic(bodhi_message_db, mock_config, mock_init):
 58 |     runner = CliRunner()
 59 |     result = runner.invoke(
 60 |         extract_users, ["--topic", "org.fedoraproject.stg.bodhi.update.comment", "usernames"]
 61 |     )
 62 | 
 63 |     assert result.exit_code == 0, result.output
 64 | 
 65 |     m.session.refresh(bodhi_message_db)
 66 |     assert len(bodhi_message_db.users) > 0
 67 |     assert {u.name for u in bodhi_message_db.users} == {"dudemcpants", "ryanlerch"}
 68 | 
 69 | 
 70 | def test_extract_users_wrong_topic(bodhi_message_db, mock_config, mock_init):
 71 |     runner = CliRunner()
 72 |     result = runner.invoke(extract_users, ["--topic", "something.else", "usernames"])
 73 | 
 74 |     assert result.exit_code == 0, result.output
 75 | 
 76 |     m.session.refresh(bodhi_message_db)
 77 |     assert len(bodhi_message_db.users) == 0
 78 | 
 79 | 
 80 | def test_extract_users_category(bodhi_message_db, mock_config, mock_init):
 81 |     runner = CliRunner()
 82 |     result = runner.invoke(extract_users, ["--category", "bodhi", "usernames"])
 83 | 
 84 |     assert result.exit_code == 0, result.output
 85 | 
 86 |     m.session.refresh(bodhi_message_db)
 87 |     assert len(bodhi_message_db.users) > 0
 88 |     assert {u.name for u in bodhi_message_db.users} == {"dudemcpants", "ryanlerch"}
 89 | 
 90 | 
 91 | def test_extract_users_wrong_category(bodhi_message_db, mock_config, mock_init):
 92 |     runner = CliRunner()
 93 |     result = runner.invoke(extract_users, ["--category", "git", "usernames"])
 94 | 
 95 |     assert result.exit_code == 0, result.output
 96 | 
 97 |     m.session.refresh(bodhi_message_db)
 98 |     assert len(bodhi_message_db.users) == 0
 99 | 
100 | 
101 | def test_extract_users_topic_and_category(mock_config, mock_init):
102 |     runner = CliRunner()
103 |     result = runner.invoke(
104 |         extract_users, ["--category", "bodhi", "--topic", "some.topic", "usernames"]
105 |     )
106 |     assert result.exit_code != 0, result.output
107 |     assert "Error: can't use both --topic and --category, choose one." in result.output
108 | 
109 | 
110 | def test_extract_users_skipped_topic(bodhi_message_db, mock_config, mock_init):
111 |     bodhi_message_db.topic = "org.release-monitoring.prod.anitya.project.version.update"
112 |     m.session.commit()
113 | 
114 |     runner = CliRunner()
115 |     result = runner.invoke(extract_users, ["usernames"])
116 | 
117 |     assert result.exit_code == 0, result.output
118 | 
119 |     m.session.refresh(bodhi_message_db)
120 |     assert len(bodhi_message_db.users) == 0
121 | 
122 | 
123 | def test_extract_users_no_users(datanommer_models, mock_config, mock_init):
124 |     msg = generate_message()
125 |     # change the schema header or the script won't pick it up
126 |     msg._headers["fedora_messaging_schema"] = "testing"
127 |     m.add(msg)
128 |     runner = CliRunner()
129 |     result = runner.invoke(extract_users, ["usernames"])
130 | 
131 |     assert result.exit_code == 0, result.output
132 |     users_count = m.session.scalar(sa.select(sa.func.count(m.users_assoc_table.c.msg_id)))
133 |     assert users_count == 0
134 |     start = datetime.datetime.fromisoformat(msg._headers["sent-at"]).astimezone()
135 |     start = str(start).split("+")[0]
136 |     assert result.output == (
137 |         "Counting messages...\n"
138 |         "Considering 1 message\n\n"
139 |         f"Working on 10000 messages sent after {start}\n"
140 |         f"Working on 10000 messages sent after {start}\n"
141 |     )
142 | 
143 | 
144 | def test_extract_start(datanommer_models, mock_config, mock_init):
145 |     now = datetime.datetime.now(tz=datetime.UTC)
146 |     msg = generate_bodhi_update_complete_message()
147 |     # Set the message to have happenned 3 days ago
148 |     msg._properties.headers["sent-at"] = (now - datetime.timedelta(days=3)).isoformat()
149 |     m.add(msg)
150 |     m.session.execute(m.users_assoc_table.delete())
151 |     m.session.commit()
152 | 
153 |     runner = CliRunner()
154 |     # Only look at messages from yesterday on
155 |     result = runner.invoke(
156 |         extract_users,
157 |         ["--start", (now - datetime.timedelta(days=1)).strftime(r"%Y-%m-%d"), "usernames"],
158 |     )
159 | 
160 |     assert result.exit_code == 0, result.output
161 |     # Message must not have had users set
162 |     users_count = m.session.scalar(sa.select(sa.func.count(m.users_assoc_table.c.msg_id)))
163 |     assert users_count == 0
164 |     assert result.output == "Counting messages...\nNo messages matched.\n"
165 | 
166 | 
167 | def test_extract_end(bodhi_message_db, mock_config, mock_init):
168 |     now = datetime.datetime.now()
169 |     runner = CliRunner()
170 |     # Only look at messages from yesterday on
171 |     result = runner.invoke(
172 |         extract_users,
173 |         ["--end", (now - datetime.timedelta(days=1)).strftime(r"%Y-%m-%d"), "usernames"],
174 |     )
175 | 
176 |     assert result.exit_code == 0, result.output
177 |     # Message must not have had users set
178 |     users_count = m.session.scalar(sa.select(sa.func.count(m.users_assoc_table.c.msg_id)))
179 |     assert users_count == 0
180 |     assert result.output == "Counting messages...\nNo messages matched.\n"
181 | 
182 | 
183 | def test_extract_force_schema(bodhi_message_db, mock_config, mock_init):
184 |     runner = CliRunner()
185 |     result = runner.invoke(extract_users, ["--force-schema", "base.message", "usernames"])
186 | 
187 |     assert result.exit_code == 0, result.output
188 | 
189 |     m.session.refresh(bodhi_message_db)
190 |     assert len(bodhi_message_db.users) == 0
191 | 
192 | 
193 | def test_extract_invalid_message(bodhi_message_db, mock_config, mock_init):
194 |     bodhi_message_db.msg = "this is invalid"
195 |     m.session.commit()
196 | 
197 |     runner = CliRunner()
198 |     result = runner.invoke(extract_users, ["usernames"])
199 | 
200 |     assert result.exit_code == 0, result.output
201 |     assert result.output == (
202 |         "Counting messages...\n"
203 |         "Considering 1 message\n\n"
204 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
205 |         f"Could not load message {bodhi_message_db.msg_id} on topic "
206 |         f"{bodhi_message_db.topic}: 'this is invalid' is not of type 'object'\n"
207 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
208 |     )
209 | 
210 |     m.session.refresh(bodhi_message_db)
211 |     assert len(bodhi_message_db.users) == 0
212 | 
213 | 
214 | def test_extract_agent(bodhi_message_db, mock_config, mock_init):
215 |     runner = CliRunner()
216 |     result = runner.invoke(extract_users, ["agent"])
217 | 
218 |     assert result.exit_code == 0, result.output
219 |     assert result.output == (
220 |         "Counting messages...\n"
221 |         "Considering 1 message\n\n"
222 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
223 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
224 |     )
225 |     m.session.refresh(bodhi_message_db)
226 |     assert bodhi_message_db.agent_name == "dudemcpants"
227 | 
228 | 
229 | def test_extract_agent_with(bodhi_message_db, mock_config, mock_init):
230 |     runner = CliRunner()
231 |     result = runner.invoke(extract_users, ["--debug", "agent"])
232 | 
233 |     assert result.exit_code == 0, result.output
234 |     expected_output = (
235 |         "Counting messages...\n"
236 |         "Considering 1 message\n\n"
237 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
238 |         f"Agent for message {bodhi_message_db.msg_id} of topic {bodhi_message_db.topic}: "
239 |         "dudemcpants\n"
240 |         f"Working on 10000 messages sent after {bodhi_message_db.timestamp}\n"
241 |     )
242 |     assert result.output == expected_output
243 | 
244 | 
245 | def test_extract_agent_no_users(datanommer_models, mock_config, mock_init):
246 |     msg = generate_message()
247 |     # change the schema header or the script won't pick it up
248 |     msg._headers["fedora_messaging_schema"] = "testing"
249 |     m.add(msg)
250 |     runner = CliRunner()
251 |     result = runner.invoke(extract_users, ["agent"])
252 | 
253 |     assert result.exit_code == 0, result.output
254 |     msg_in_db = m.Message.from_msg_id(msg.id)
255 |     assert msg_in_db.agent_name is None
256 |     assert result.output == (
257 |         "Counting messages...\n"
258 |         "Considering 1 message\n\n"
259 |         f"Working on 10000 messages sent after {msg_in_db.timestamp}\n"
260 |         f"Working on 10000 messages sent after {msg_in_db.timestamp}\n"
261 |     )
262 | 
263 | 
264 | def test_extract_is_tty(bodhi_message_db, mock_config, mock_init, mocker):
265 |     output = io.StringIO()
266 |     mocker.patch.object(output, "isatty", lambda: True)
267 |     mocker.patch(
268 |         "datanommer.commands.utils.click.progressbar", lambda **kw: progressbar(file=output, **kw)
269 |     )
270 |     runner = CliRunner()
271 |     result = runner.invoke(extract_users, ["--debug", "usernames"])
272 | 
273 |     assert result.exit_code == 0, result.output
274 |     expected_output = (
275 |         "Counting messages...\n"
276 |         "Considering 1 message\n"
277 |         f"Usernames for message {bodhi_message_db.msg_id} of topic {bodhi_message_db.topic}: "
278 |         "dudemcpants, ryanlerch\n"
279 |     )
280 |     assert result.output == expected_output
281 | 


--------------------------------------------------------------------------------
/datanommer.commands/datanommer/commands/__init__.py:
--------------------------------------------------------------------------------
  1 | # This file is a part of datanommer, a message sink for fedmsg.
  2 | # Copyright (C) 2014, Red Hat, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify it under
  5 | # the terms of the GNU General Public License as published by the Free Software
  6 | # Foundation, either version 3 of the License, or (at your option) any later
  7 | # version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 12 | # details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License along
 15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | import importlib.metadata
 17 | import itertools
 18 | import json
 19 | import logging
 20 | import time
 21 | from datetime import datetime, timedelta, timezone
 22 | 
 23 | import click
 24 | from sqlalchemy import func, select
 25 | 
 26 | import datanommer.models as m
 27 | from datanommer.models.view import refresh_recent_topics
 28 | 
 29 | from .utils import config_option, get_config
 30 | 
 31 | 
 32 | __version__ = importlib.metadata.version("datanommer-commands")
 33 | 
 34 | log = logging.getLogger("datanommer")
 35 | 
 36 | 
 37 | @click.command()
 38 | @config_option
 39 | def create(config_path):
 40 |     """Create a database and tables for 'datanommer.sqlalchemy.url'"""
 41 |     config = get_config(config_path)
 42 |     click.echo("Creating Datanommer database and tables")
 43 |     m.init(
 44 |         config["datanommer_sqlalchemy_url"],
 45 |         alembic_ini=config["alembic_ini"],
 46 |         create=True,
 47 |     )
 48 | 
 49 | 
 50 | @click.command()
 51 | @config_option
 52 | @click.option("--since", default=None, help="Only after datetime, ex 2013-02-14T08:05:59.87")
 53 | @click.option("--before", default=None, help="Only before datetime, ex 2013-02-14T08:05:59.87")
 54 | def dump(config_path, since, before):
 55 |     """Dump the contents of the datanommer database as JSON.
 56 | 
 57 |     You can also specify a timespan with the --since and --before arguments:
 58 | 
 59 |         $ datanommer-dump --before 2013-02-15 --since 2013-02-11T08:00:00 > datanommer-dump.json
 60 |     """
 61 |     config = get_config(config_path)
 62 |     m.init(
 63 |         config["datanommer_sqlalchemy_url"],
 64 |         alembic_ini=config["alembic_ini"],
 65 |     )
 66 | 
 67 |     query = select(m.Message)
 68 |     if before:
 69 |         try:
 70 |             before = datetime.fromisoformat(before)
 71 |         except ValueError as e:
 72 |             raise click.ClickException("Invalid date format") from e
 73 | 
 74 |         query = query.where(m.Message.timestamp <= before)
 75 | 
 76 |     if since:
 77 |         try:
 78 |             since = datetime.fromisoformat(since)
 79 |         except ValueError as e:
 80 |             raise click.ClickException("Invalid date format") from e
 81 | 
 82 |         query = query.where(m.Message.timestamp >= since)
 83 | 
 84 |     results = [json.dumps(msg.as_fedora_message_dict()) for msg in m.session.scalars(query)]
 85 |     click.echo(f"[{','.join(results)}]")
 86 | 
 87 | 
 88 | @click.command()
 89 | @config_option
 90 | @click.option("--topic", is_flag=True, help="Shows the stats per topic")
 91 | @click.option(
 92 |     "--category",
 93 |     default=None,
 94 |     help="Shows the stats within only the specified category",
 95 | )
 96 | def stats(config_path, topic, category):
 97 |     """Produce stats on the contents of the datanommer database.
 98 | 
 99 |     The default is to display the stats per category. You can also display
100 |     the stats per topic with the --topic argument:
101 | 
102 |         $ datanommer-stats --topic
103 |         org.fedoraproject.stg.fas.group.member.remove has 10 entries
104 |         org.fedoraproject.stg.logger.log has 76 entries
105 |         org.fedoraproject.stg.bodhi.update.comment has 5 entries
106 |         org.fedoraproject.stg.busmon.colorized-messages has 10 entries
107 |         org.fedoraproject.stg.fas.user.update has 10 entries
108 |         org.fedoraproject.stg.wiki.article.edit has 106 entries
109 |         org.fedoraproject.stg.fas.user.create has 3 entries
110 |         org.fedoraproject.stg.bodhitest.testing has 4 entries
111 |         org.fedoraproject.stg.fedoratagger.tag.create has 9 entries
112 |         org.fedoraproject.stg.fedoratagger.user.rank.update has 5 entries
113 |         org.fedoraproject.stg.wiki.upload.complete has 1 entries
114 |         org.fedoraproject.stg.fas.group.member.sponsor has 6 entries
115 |         org.fedoraproject.stg.fedoratagger.tag.update has 1 entries
116 |         org.fedoraproject.stg.fas.group.member.apply has 17 entries
117 |         org.fedoraproject.stg.__main__.testing has 1 entries
118 | 
119 |     The --category argument can be combined with --topic to shows stats of the
120 |     topics with a specific category or can be used alone to show the stats for
121 |     only the one category:
122 | 
123 |         $ datanommer-stats --topic --category fas
124 |         org.fedoraproject.stg.fas.group.member.remove has 10 entries
125 |         org.fedoraproject.stg.fas.user.update has 10 entries
126 |         org.fedoraproject.stg.fas.user.create has 3 entries
127 |         org.fedoraproject.stg.fas.group.member.sponsor has 6 entries
128 |         org.fedoraproject.stg.fas.group.member.apply has 17 entries
129 | 
130 |         $ datanommmer-stats --category fas
131 |         fas has 46 entries
132 | 
133 |     """
134 |     config = get_config(config_path)
135 |     m.init(
136 |         config["datanommer_sqlalchemy_url"],
137 |         alembic_ini=config["alembic_ini"],
138 |     )
139 | 
140 |     if topic:
141 |         query = select(m.Message.topic, func.count(m.Message.topic))
142 |         if category:
143 |             query = query.where(m.Message.category == category)
144 |         query = query.group_by(m.Message.topic)
145 |     else:
146 |         query = select(m.Message.category, func.count(m.Message.category))
147 |         if category:
148 |             query = query.where(m.Message.category == category)
149 |         query = query.group_by(m.Message.category)
150 | 
151 |     results = m.session.execute(query).all()
152 | 
153 |     if topic:
154 |         for topic, count in results:
155 |             click.echo(f"{topic} has {count} entries")
156 |     else:
157 |         for category, count in results:
158 |             click.echo(f"{category} has {count} entries")
159 | 
160 | 
161 | @click.command()
162 | @config_option
163 | @click.option("--topic", default=None, help="Show the latest for only a specific topic.")
164 | @click.option("--category", default=None, help="Show the latest for only a specific category.")
165 | @click.option(
166 |     "--overall",
167 |     is_flag=True,
168 |     help="Show only the latest message out of all message types.",
169 | )
170 | @click.option("--timestamp", is_flag=True, help="Show only the timestamp of the message(s).")
171 | @click.option(
172 |     "--timesince",
173 |     is_flag=True,
174 |     help="Show the number of seconds since the last message",
175 | )
176 | @click.option(
177 |     "--human",
178 |     is_flag=True,
179 |     help="When combined with --timestamp or --timesince,show a human readable date.",
180 | )
181 | def latest(config_path, topic, category, overall, timestamp, timesince, human):
182 |     """Print the latest message(s) ingested by datanommer.
183 | 
184 |     The default is to display the latest message in each message category. The
185 |     latest in only a specified category or topic can also be returned::
186 | 
187 |         $ datanommer-latest --category bodhi
188 |         [{"bodhi": {
189 |           "topic": "org.fedoraproject.stg.bodhi.update.comment",
190 |           "msg": {
191 |             "comment": {
192 |               "group": null,
193 |               "author": "ralph",
194 |               "text": "Testing for latest datanommer.",
195 |               "karma": 0,
196 |               "anonymous": false,
197 |               "timestamp": 1360349639.0,
198 |               "update_title": "xmonad-0.10-10.fc17"
199 |             },
200 |             "agent": "ralph"
201 |           },
202 |         }}]
203 | 
204 |         $ datanommer-latest --topic org.fedoraproject.stg.bodhi.update.comment
205 |         [{"bodhi": {
206 |           "topic": "org.fedoraproject.stg.bodhi.update.comment",
207 |           "msg": {
208 |             "comment": {
209 |               "group": null,
210 |               "author": "ralph",
211 |               "text": "Testing for latest datanommer.",
212 |               "karma": 0,
213 |               "anonymous": false,
214 |               "timestamp": 1360349639.0,
215 |               "update_title": "xmonad-0.10-10.fc17"
216 |             },
217 |             "agent": "ralph"
218 |           },
219 |         }}]
220 | 
221 |     Or to display the latest, regardless of the topic or category::
222 | 
223 |         $ datanommer-latest --overall
224 |         [{"bodhi": {
225 |           "topic": "org.fedoraproject.stg.bodhi.update.comment",
226 |           "msg": {
227 |             "comment": {
228 |               "group": null,
229 |               "author": "ralph",
230 |               "text": "Testing for latest datanommer.",
231 |               "karma": 0,
232 |               "anonymous": false,
233 |               "timestamp": 1360349639.0,
234 |               "update_title": "xmonad-0.10-10.fc17"
235 |             },
236 |             "agent": "ralph"
237 |           },
238 |         }}]
239 | 
240 |     You can combine either a --topic, --category or --overall argument while
241 |     requesting information about the timestamp of the latest::
242 | 
243 |         $ datanommer-latest --category wiki --timestamp
244 |         [1361166918.0]
245 | 
246 |         # February 18, 2013 at 5:55AM
247 |         $ datanommer-latest --category wiki --timestamp --human
248 |         ["2013-02-18 05:55:18"]
249 | 
250 |     Or how recent that timestamp is::
251 | 
252 |         # 49250 seconds ago
253 |         $ datanommer-latest --category wiki --timesince
254 |         [49250]
255 | 
256 |         # 13 hours, 40 minutes, 59.52 seconds ago
257 |         $ datanommer-latest --category wiki --timesince --human
258 |         [13:40:59.519447]
259 |     """
260 |     config = get_config(config_path)
261 |     m.init(
262 |         config["datanommer_sqlalchemy_url"],
263 |         alembic_ini=config["alembic_ini"],
264 |     )
265 | 
266 |     if topic:
267 |         queries = [select(m.Message).where(m.Message.topic == topic)]
268 | 
269 |     elif category:
270 |         queries = [select(m.Message).where(m.Message.category == category)]
271 |     elif not overall:
272 |         # If no args..
273 |         categories_query = select(m.Message.category).distinct().order_by(m.Message.category)
274 |         categories = m.session.scalars(categories_query)
275 |         queries = [
276 |             select(m.Message).where(m.Message.category == category) for category in categories
277 |         ]
278 |     else:
279 |         # Show only the single latest message, regardless of type.
280 |         queries = [select(m.Message)]
281 | 
282 |     # Only check messages from the last year to speed up queries
283 |     a_year = timedelta(days=365)
284 |     earliest = datetime.now(tz=timezone.utc) - a_year
285 |     queries = [q.where(m.Message.timestamp > earliest) for q in queries]
286 | 
287 |     # Order and limit to the latest.
288 |     queries = [q.order_by(m.Message.timestamp.desc()).limit(1) for q in queries]
289 | 
290 |     def formatter(key, val):
291 |         if timestamp and human:
292 |             return json.dumps(str(val.timestamp))
293 |         elif timestamp:
294 |             return json.dumps(time.mktime(val.timestamp.timetuple()))
295 |         elif timesince and human:
296 |             return json.dumps(str(datetime.now() - val.timestamp))
297 |         elif timesince:
298 |             timedelta = datetime.now() - val.timestamp
299 |             return json.dumps(str((timedelta.days * 86400) + timedelta.seconds))
300 |         else:
301 |             return f'{{"{key}": {json.dumps(val.as_fedora_message_dict())}}}'
302 | 
303 |     results = []
304 |     for result in itertools.chain.from_iterable(m.session.scalars(query) for query in queries):
305 |         results.append(formatter(result.category, result))
306 | 
307 |     click.echo(f"[{','.join(results)}]")
308 | 
309 | 
310 | @click.command()
311 | @config_option
312 | def refresh_view(config_path):
313 |     """Refresh the materialized view `recent_topics`.
314 | 
315 |     This command should be run periodically via cron job to keep
316 |     the materialized view `recent_topics` up to date.
317 |     """
318 |     config = get_config(config_path)
319 |     m.init(
320 |         config["datanommer_sqlalchemy_url"],
321 |         alembic_ini=config["alembic_ini"],
322 |     )
323 | 
324 |     refresh_recent_topics(m.session)
325 | 
326 |     click.echo("Recent topics materialized view refreshed successfully")
327 | 


--------------------------------------------------------------------------------
/datanommer.commands/tests/test_commands.py:
--------------------------------------------------------------------------------
  1 | # This file is a part of datanommer, a message sink for fedmsg.
  2 | # Copyright (C) 2014, Red Hat, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify it under
  5 | # the terms of the GNU General Public License as published by the Free Software
  6 | # Foundation, either version 3 of the License, or (at your option) any later
  7 | # version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 12 | # details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License along
 15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | import json
 17 | import time
 18 | from datetime import datetime, timedelta
 19 | 
 20 | import pytest
 21 | from click import ClickException
 22 | from click.testing import CliRunner
 23 | 
 24 | import datanommer.commands
 25 | import datanommer.models as m
 26 | 
 27 | from .utils import generate_bodhi_update_complete_message, generate_message
 28 | 
 29 | 
 30 | def test_get_datanommer_sqlalchemy_url_keyerror(mocker):
 31 |     mocker.patch.dict(
 32 |         datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"],
 33 |         {},
 34 |         clear=True,
 35 |     )
 36 |     with pytest.raises(ClickException):
 37 |         datanommer.commands.get_config()
 38 | 
 39 | 
 40 | def test_get_datanommer_sqlalchemy_url_config(mocker):
 41 |     conf = {
 42 |         "datanommer_sqlalchemy_url": "",
 43 |         "alembic_ini": "/some/where",
 44 |     }
 45 |     mocker.patch.dict(
 46 |         datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"], conf
 47 |     )
 48 |     load_config = mocker.patch(
 49 |         "datanommer.commands.utils.fedora_messaging_config.conf.load_config",
 50 |     )
 51 |     datanommer.commands.get_config("some-path")
 52 |     load_config.assert_called_with("some-path")
 53 | 
 54 | 
 55 | def test_create(mocker):
 56 |     mock_model_init = mocker.patch("datanommer.commands.m.init")
 57 |     mocker.patch.dict(
 58 |         datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"],
 59 |         {
 60 |             "datanommer_sqlalchemy_url": "TESTURL",
 61 |             "alembic_ini": "/some/where",
 62 |         },
 63 |     )
 64 | 
 65 |     runner = CliRunner()
 66 |     result = runner.invoke(datanommer.commands.create, [])
 67 |     assert result.exit_code == 0, result.output
 68 | 
 69 |     assert result.output == "Creating Datanommer database and tables\n"
 70 |     mock_model_init.assert_called_once_with("TESTURL", alembic_ini="/some/where", create=True)
 71 | 
 72 | 
 73 | def test_stats(datanommer_models, mock_config, mock_init):
 74 |     msg1 = generate_message(
 75 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
 76 |         body={"Message 1": "Message 1"},
 77 |     )
 78 |     m.add(msg1)
 79 | 
 80 |     msg2 = generate_message(
 81 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
 82 |     )
 83 |     m.add(msg2)
 84 | 
 85 |     msg3 = generate_message(
 86 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
 87 |         body={"Message 3": "Message 3"},
 88 |     )
 89 |     m.add(msg3)
 90 | 
 91 |     runner = CliRunner()
 92 |     result = runner.invoke(datanommer.commands.stats, [])
 93 |     assert result.exit_code == 0, result.output
 94 | 
 95 |     assert "git has 2 entries" in result.output
 96 |     assert "fas has 1 entries" in result.output
 97 | 
 98 | 
 99 | def test_stats_topics(datanommer_models, mock_config, mock_init):
100 |     msg1 = generate_message(
101 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
102 |         body={"Message 1": "Message 1"},
103 |     )
104 |     m.add(msg1)
105 | 
106 |     msg2 = generate_message(
107 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
108 |     )
109 |     m.add(msg2)
110 | 
111 |     msg3 = generate_message(
112 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
113 |         body={"Message 3": "Message 3"},
114 |     )
115 |     m.add(msg3)
116 | 
117 |     runner = CliRunner()
118 |     result = runner.invoke(datanommer.commands.stats, ["--topic"])
119 |     assert result.exit_code == 0, result.output
120 | 
121 |     assert "org.fedoraproject.prod.git.receive.valgrind.master has 1 entries" in result.output
122 |     assert "org.fedoraproject.stg.fas.user.create has 1 entries" in result.output
123 |     assert "org.fedoraproject.prod.git.branch.valgrind.master has 1 entries" in result.output
124 | 
125 | 
126 | def test_stats_category_topics(datanommer_models, mock_config, mock_init):
127 |     msg1 = generate_message(
128 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
129 |         body={"Message 1": "Message 1"},
130 |     )
131 |     m.add(msg1)
132 | 
133 |     msg2 = generate_message(
134 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
135 |     )
136 |     m.add(msg2)
137 | 
138 |     msg3 = generate_message(
139 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
140 |         body={"Message 3": "Message 3"},
141 |     )
142 |     m.add(msg3)
143 | 
144 |     runner = CliRunner()
145 |     result = runner.invoke(datanommer.commands.stats, ["--topic", "--category", "git"])
146 |     assert result.exit_code == 0, result.output
147 | 
148 |     assert "org.fedoraproject.prod.git.receive.valgrind.master has 1 entries" in result.output
149 |     assert "org.fedoraproject.stg.fas.user.create has 1 entries" not in result.output
150 |     assert "org.fedoraproject.prod.git.branch.valgrind.master has 1 entries" in result.output
151 | 
152 | 
153 | def test_stats_category(datanommer_models, mock_config, mock_init):
154 |     msg1 = generate_message(
155 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
156 |         body={"Message 1": "Message 1"},
157 |     )
158 |     m.add(msg1)
159 | 
160 |     msg2 = generate_message(
161 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
162 |     )
163 |     m.add(msg2)
164 | 
165 |     msg3 = generate_message(
166 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
167 |         body={"Message 3": "Message 3"},
168 |     )
169 |     m.add(msg3)
170 | 
171 |     runner = CliRunner()
172 |     result = runner.invoke(datanommer.commands.stats, ["--category", "git"])
173 |     assert result.exit_code == 0, result.output
174 | 
175 |     assert result.output == "git has 2 entries\n"
176 | 
177 | 
178 | def test_dump(datanommer_models, mock_config, mock_init):
179 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
180 |     m.add(msg1)
181 | 
182 |     msg2 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
183 |     m.add(msg2)
184 | 
185 |     msg3 = generate_bodhi_update_complete_message()
186 |     m.add(msg3)
187 | 
188 |     runner = CliRunner()
189 |     result = runner.invoke(datanommer.commands.dump, [])
190 |     assert result.exit_code == 0, result.output
191 | 
192 |     json_object = json.loads(result.output)
193 | 
194 |     assert json_object[0]["topic"] == "org.fedoraproject.prod.git.branch.valgrind.master"
195 | 
196 | 
197 | def test_dump_before(datanommer_models, mock_config, mock_init):
198 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
199 |     msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat()
200 |     m.add(msg1)
201 | 
202 |     msg2 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
203 |     msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat()
204 |     m.add(msg2)
205 | 
206 |     msg3 = generate_message(topic="org.fedoraproject.prod.log.receive.valgrind.master")
207 |     msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 8).isoformat()
208 |     m.add(msg3)
209 | 
210 |     runner = CliRunner()
211 |     result = runner.invoke(datanommer.commands.dump, ["--before", "2013-02-16"])
212 |     assert result.exit_code == 0, result.output
213 | 
214 |     json_object = json.loads(result.output)
215 | 
216 |     assert json_object[0]["topic"] == "org.fedoraproject.prod.git.branch.valgrind.master"
217 |     assert json_object[1]["topic"] == "org.fedoraproject.prod.git.receive.valgrind.master"
218 |     assert len(json_object) == 2
219 | 
220 | 
221 | def test_dump_since(datanommer_models, mock_config, mock_init):
222 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
223 |     msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat()
224 |     m.add(msg1)
225 | 
226 |     msg2 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
227 |     msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat()
228 |     m.add(msg2)
229 | 
230 |     msg3 = generate_message(topic="org.fedoraproject.prod.log.receive.valgrind.master")
231 |     msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 8).isoformat()
232 |     m.add(msg3)
233 | 
234 |     runner = CliRunner()
235 |     result = runner.invoke(datanommer.commands.dump, ["--since", "2013-02-14T08:00:00"])
236 |     assert result.exit_code == 0, result.output
237 | 
238 |     json_object = json.loads(result.output)
239 | 
240 |     assert json_object[0]["topic"] == "org.fedoraproject.prod.git.receive.valgrind.master"
241 |     assert json_object[1]["topic"] == "org.fedoraproject.prod.log.receive.valgrind.master"
242 |     assert len(json_object) == 2
243 | 
244 | 
245 | def test_dump_timespan(datanommer_models, mock_config, mock_init):
246 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
247 |     msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat()
248 |     m.add(msg1)
249 | 
250 |     msg2 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
251 |     msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat()
252 |     m.add(msg2)
253 | 
254 |     msg3 = generate_message(topic="org.fedoraproject.prod.log.receive.valgrind.master")
255 |     msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 8).isoformat()
256 |     m.add(msg3)
257 | 
258 |     runner = CliRunner()
259 |     result = runner.invoke(
260 |         datanommer.commands.dump,
261 |         ["--before", "2013-02-16", "--since", "2013-02-14T08:00:00"],
262 |     )
263 |     assert result.exit_code == 0, result.output
264 | 
265 |     json_object = json.loads(result.output)
266 | 
267 |     assert json_object[0]["topic"] == "org.fedoraproject.prod.git.receive.valgrind.master"
268 |     assert len(json_object) == 1
269 | 
270 | 
271 | def test_dump_invalid_dates(datanommer_models, mock_config, mock_init):
272 |     runner = CliRunner()
273 |     result = runner.invoke(datanommer.commands.dump, ["--before", "2013-02-16asdasd"])
274 |     assert result.exit_code > 0, result.output
275 |     assert result.output == "Error: Invalid date format\n"
276 | 
277 |     result = runner.invoke(datanommer.commands.dump, ["--since", "2013-02-16asdasd"])
278 |     assert result.exit_code > 0, result.output
279 |     assert result.output == "Error: Invalid date format\n"
280 | 
281 | 
282 | def test_latest_overall(datanommer_models, mock_config, mock_init):
283 |     msg1 = generate_message(
284 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
285 |         body={"Message 1": "Message 1"},
286 |     )
287 |     m.add(msg1)
288 | 
289 |     msg2 = generate_message(
290 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
291 |     )
292 |     m.add(msg2)
293 | 
294 |     msg3 = generate_message(
295 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
296 |         body={"Message 3": "Message 3"},
297 |     )
298 |     m.add(msg3)
299 | 
300 |     runner = CliRunner()
301 |     result = runner.invoke(datanommer.commands.latest, ["--overall"])
302 |     assert result.exit_code == 0, result.output
303 | 
304 |     json_object = json.loads(result.output)
305 | 
306 |     assert json_object[0]["git"]["body"] == {"Message 3": "Message 3"}
307 |     assert len(json_object) == 1
308 | 
309 | 
310 | def test_latest_topic(datanommer_models, mock_config, mock_init):
311 |     msg1 = generate_message(
312 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
313 |         body={"Message 1": "Message 1"},
314 |     )
315 |     m.add(msg1)
316 | 
317 |     msg2 = generate_message(
318 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
319 |     )
320 |     m.add(msg2)
321 | 
322 |     msg3 = generate_message(
323 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
324 |         body={"Message 3": "Message 3"},
325 |     )
326 |     m.add(msg3)
327 | 
328 |     runner = CliRunner()
329 |     result = runner.invoke(
330 |         datanommer.commands.latest, ["--topic", "org.fedoraproject.stg.fas.user.create"]
331 |     )
332 |     assert result.exit_code == 0, result.output
333 | 
334 |     json_object = json.loads(result.output)
335 | 
336 |     assert json_object[0]["fas"]["body"] == {"Message 2": "Message 2"}
337 |     assert len(json_object) == 1
338 | 
339 | 
340 | def test_latest_category(datanommer_models, mock_config, mock_init):
341 |     msg1 = generate_message(
342 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
343 |         body={"Message 1": "Message 1"},
344 |     )
345 |     m.add(msg1)
346 | 
347 |     msg2 = generate_message(
348 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
349 |     )
350 |     m.add(msg2)
351 | 
352 |     msg3 = generate_message(
353 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
354 |         body={"Message 3": "Message 3"},
355 |     )
356 |     m.add(msg3)
357 | 
358 |     runner = CliRunner()
359 |     result = runner.invoke(datanommer.commands.latest, ["--category", "fas"])
360 |     assert result.exit_code == 0, result.output
361 | 
362 |     json_object = json.loads(result.output)
363 | 
364 |     assert json_object[0]["fas"]["body"] == {"Message 2": "Message 2"}
365 |     assert len(json_object) == 1
366 | 
367 | 
368 | def test_latest_timestamp_human(datanommer_models, mocker, mock_config, mock_init):
369 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
370 |     msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat()
371 |     m.add(msg1)
372 | 
373 |     msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create")
374 |     msg2._properties.headers["sent-at"] = datetime(2013, 2, 15, 15, 15, 15, 15).isoformat()
375 |     m.add(msg2)
376 | 
377 |     msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
378 |     msg3._properties.headers["sent-at"] = datetime(2013, 2, 16, 16, 16, 16, 16).isoformat()
379 |     m.add(msg3)
380 | 
381 |     # datanommer-latest defaults to the last year, so mock the
382 |     # datetime calls to go back to 2013
383 |     mock_dt = mocker.patch("datanommer.commands.datetime")
384 |     mock_dt.now.return_value = datetime(2013, 3, 1)
385 | 
386 |     runner = CliRunner()
387 |     result = runner.invoke(datanommer.commands.latest, ["--timestamp", "--human"])
388 |     assert result.exit_code == 0, result.output
389 | 
390 |     json_object = json.loads(result.output)
391 | 
392 |     assert json_object[1] == "2013-02-16 16:16:16.000016"
393 |     assert json_object[0] == "2013-02-15 15:15:15.000015"
394 |     assert len(json_object) == 2
395 | 
396 | 
397 | def test_latest_timestamp(datanommer_models, mocker, mock_config, mock_init):
398 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
399 |     msg1._properties.headers["sent-at"] = datetime(2013, 2, 14).isoformat()
400 |     m.add(msg1)
401 | 
402 |     msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create")
403 |     msg2._properties.headers["sent-at"] = datetime(2013, 2, 15).isoformat()
404 |     m.add(msg2)
405 | 
406 |     msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
407 |     msg3._properties.headers["sent-at"] = datetime(2013, 2, 16).isoformat()
408 |     m.add(msg3)
409 | 
410 |     # datanommer-latest defaults to the last year, so mock the
411 |     # datetime calls to go back to 2013
412 |     mock_dt = mocker.patch("datanommer.commands.datetime")
413 |     mock_dt.now.return_value = datetime(2013, 3, 1)
414 | 
415 |     runner = CliRunner()
416 |     result = runner.invoke(datanommer.commands.latest, ["--timestamp"])
417 |     assert result.exit_code == 0, result.output
418 | 
419 |     json_object = json.loads(result.output)
420 | 
421 |     assert json_object[1] == time.mktime(datetime(2013, 2, 16).timetuple())
422 |     assert json_object[0] == time.mktime(datetime(2013, 2, 15).timetuple())
423 |     assert len(json_object) == 2
424 | 
425 | 
426 | def test_latest_timesince(datanommer_models, mocker, mock_config, mock_init):
427 |     now = datetime(2013, 3, 1)
428 | 
429 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
430 |     time1 = now - timedelta(days=1)
431 |     msg1._properties.headers["sent-at"] = time1.isoformat()
432 |     m.add(msg1)
433 | 
434 |     msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create")
435 |     time2 = now - timedelta(seconds=60)
436 |     msg2._properties.headers["sent-at"] = time2.isoformat()
437 |     m.add(msg2)
438 | 
439 |     msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
440 |     time3 = now - timedelta(seconds=1)
441 |     msg3._properties.headers["sent-at"] = time3.isoformat()
442 |     m.add(msg3)
443 | 
444 |     # datanommer-latest defaults to the last year, so mock the
445 |     # datetime calls to go back to 2013
446 |     mock_dt = mocker.patch("datanommer.commands.datetime")
447 |     mock_dt.now.return_value = now
448 | 
449 |     runner = CliRunner()
450 |     result = runner.invoke(datanommer.commands.latest, ["--timesince"])
451 |     assert result.exit_code == 0, result.output
452 | 
453 |     json_object = json.loads(result.output)
454 | 
455 |     # allow .1 second to run test
456 |     assert int(json_object[1]) <= 1.1
457 |     assert int(json_object[1]) >= 1
458 |     assert int(json_object[0]) <= 60.1
459 |     assert int(json_object[0]) >= 60
460 |     assert len(json_object) == 2
461 | 
462 | 
463 | def test_latest_timesince_human(datanommer_models, mock_config, mock_init, mocker):
464 |     now = datetime.now()
465 |     # mocker.patch.object(datanommer.commands.datetime, "now", return_value=now)
466 |     patched_datetime = mocker.patch("datanommer.commands.datetime", mocker.Mock(wraps=datetime))
467 |     patched_datetime.now.return_value = now
468 | 
469 |     msg1 = generate_message(topic="org.fedoraproject.prod.git.branch.valgrind.master")
470 |     time1 = now - timedelta(days=2)
471 |     msg1._properties.headers["sent-at"] = time1.isoformat()
472 |     m.add(msg1)
473 | 
474 |     msg2 = generate_message(topic="org.fedoraproject.stg.fas.user.create")
475 |     time2 = now - timedelta(days=1)
476 |     msg2._properties.headers["sent-at"] = time2.isoformat()
477 |     m.add(msg2)
478 | 
479 |     msg3 = generate_message(topic="org.fedoraproject.prod.git.receive.valgrind.master")
480 |     time3 = now - timedelta(seconds=1)
481 |     msg3._properties.headers["sent-at"] = time3.isoformat()
482 |     m.add(msg3)
483 | 
484 |     runner = CliRunner()
485 |     result = runner.invoke(datanommer.commands.latest, ["--timesince", "--human"])
486 |     assert result.exit_code == 0, result.output
487 | 
488 |     assert json.loads(result.output) == ["1 day, 0:00:00", "0:00:01"]
489 | 
490 | 
491 | def test_latest(datanommer_models, mock_config, mock_init):
492 |     msg1 = generate_message(
493 |         topic="org.fedoraproject.prod.git.branch.valgrind.master",
494 |         body={"Message 1": "Message 1"},
495 |     )
496 |     time1 = datetime.now() - timedelta(days=2)
497 |     msg1._properties.headers["sent-at"] = time1.isoformat()
498 |     m.add(msg1)
499 | 
500 |     msg2 = generate_message(
501 |         topic="org.fedoraproject.stg.fas.user.create", body={"Message 2": "Message 2"}
502 |     )
503 |     m.add(msg2)
504 | 
505 |     msg3 = generate_message(
506 |         topic="org.fedoraproject.prod.git.receive.valgrind.master",
507 |         body={"Message 3": "Message 3"},
508 |     )
509 |     m.add(msg3)
510 | 
511 |     runner = CliRunner()
512 |     result = runner.invoke(datanommer.commands.latest, [])
513 |     assert result.exit_code == 0, result.output
514 | 
515 |     json_object = json.loads(result.output)
516 | 
517 |     assert json_object[1]["git"]["body"] == {"Message 3": "Message 3"}
518 |     assert json_object[0]["fas"]["body"] == {"Message 2": "Message 2"}
519 |     assert len(json_object) == 2
520 | 
521 | 
522 | def test_refresh_view(datanommer_models, mock_config, mocker):
523 |     """Test the refresh_view command."""
524 | 
525 |     mock_refresh = mocker.patch("datanommer.commands.refresh_recent_topics")
526 | 
527 |     runner = CliRunner()
528 |     result = runner.invoke(datanommer.commands.refresh_view, [])
529 | 
530 |     assert result.exit_code == 0, result.output
531 |     mock_refresh.assert_called_once_with(m.session)
532 | 


--------------------------------------------------------------------------------
/datanommer.models/datanommer/models/__init__.py:
--------------------------------------------------------------------------------
  1 | # This file is a part of datanommer, a message sink for fedmsg.
  2 | # Copyright (C) 2014, Red Hat, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify it under
  5 | # the terms of the GNU General Public License as published by the Free Software
  6 | # Foundation, either version 3 of the License, or (at your option) any later
  7 | # version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 12 | # details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License along
 15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | import datetime
 17 | import importlib.metadata
 18 | import json
 19 | import logging
 20 | import math
 21 | import traceback
 22 | import uuid
 23 | from warnings import warn
 24 | 
 25 | from sqlalchemy import (
 26 |     and_,
 27 |     between,
 28 |     Column,
 29 |     create_engine,
 30 |     DateTime,
 31 |     DDL,
 32 |     event,
 33 |     ForeignKey,
 34 |     func,
 35 |     Index,
 36 |     Integer,
 37 |     not_,
 38 |     or_,
 39 |     select,
 40 |     String,
 41 |     Table,
 42 |     text,
 43 |     TypeDecorator,
 44 |     Unicode,
 45 |     UnicodeText,
 46 |     UniqueConstraint,
 47 | )
 48 | from sqlalchemy.dialects import postgresql
 49 | from sqlalchemy.exc import IntegrityError
 50 | from sqlalchemy.orm import (
 51 |     declarative_base,
 52 |     relationship,
 53 |     scoped_session,
 54 |     sessionmaker,
 55 |     validates,
 56 | )
 57 | from sqlalchemy.sql import operators
 58 | 
 59 | from .view import create_view
 60 | 
 61 | 
 62 | try:
 63 |     from psycopg2.errors import UniqueViolation
 64 | except ImportError:  # pragma: no cover
 65 |     from psycopg2.errorcodes import lookup as lookup_error
 66 | 
 67 |     UniqueViolation = lookup_error("23505")
 68 | 
 69 | 
 70 | __version__ = importlib.metadata.version("datanommer-models")
 71 | 
 72 | 
 73 | log = logging.getLogger("datanommer")
 74 | 
 75 | maker = sessionmaker()
 76 | session = scoped_session(maker)
 77 | 
 78 | DeclarativeBase = declarative_base()
 79 | DeclarativeBase.query = session.query_property()
 80 | 
 81 | 
 82 | def init(uri=None, alembic_ini=None, engine=None, create=False):
 83 |     """Initialize a connection.  Create tables if requested."""
 84 | 
 85 |     if uri and engine:
 86 |         raise ValueError("uri and engine cannot both be specified")
 87 | 
 88 |     if uri is None and not engine:
 89 |         raise ValueError("One of uri or engine must be specified")
 90 | 
 91 |     if uri and not engine:
 92 |         engine = create_engine(uri, future=True)
 93 | 
 94 |     # We need to hang our own attribute on the sqlalchemy session to stop
 95 |     # ourselves from initializing twice.  That is only a problem if the code
 96 |     # calling us isn't consistent.
 97 |     if getattr(session, "_datanommer_initialized", None):
 98 |         log.warning("Session already initialized.  Bailing")
 99 |         return
100 |     session._datanommer_initialized = True
101 | 
102 |     maker.configure(bind=engine)
103 |     DeclarativeBase.query = session.query_property()
104 | 
105 |     if create:
106 |         with engine.begin() as connection:
107 |             connection.execute(text("CREATE EXTENSION IF NOT EXISTS timescaledb"))
108 |         DeclarativeBase.metadata.create_all(engine)
109 |         with engine.begin() as connection:
110 |             create_view(connection)
111 |         # Loads the alembic configuration and generates the version table, with
112 |         # the most recent revision stamped as head
113 |         if alembic_ini is not None:  # pragma: no cover
114 |             from alembic import command
115 |             from alembic.config import Config
116 | 
117 |             alembic_cfg = Config(alembic_ini)
118 |             command.stamp(alembic_cfg, "head")
119 | 
120 | 
121 | def add(message):
122 |     """Take a the fedora-messaging Message and store in the message
123 |     table.
124 |     """
125 |     headers = message._properties.headers
126 |     sent_at = headers.get("sent-at", None)
127 | 
128 |     if sent_at:
129 |         # fromisoformat doesn't parse Z suffix (yet) see:
130 |         # https://discuss.python.org/t/parse-z-timezone-suffix-in-datetime/2220
131 |         try:
132 |             sent_at = datetime.datetime.fromisoformat(sent_at.replace("Z", "+00:00"))
133 |         except ValueError:
134 |             log.exception("Failed to parse sent-at timestamp value")
135 |             return
136 |     else:
137 |         sent_at = datetime.datetime.now(tz=datetime.UTC)
138 | 
139 |     # Workaround schemas misbehaving
140 |     try:
141 |         usernames = message.usernames
142 |     except Exception:
143 |         log.exception(
144 |             "Could not get the list of users from a message on %s with id %s",
145 |             message.topic,
146 |             message.id,
147 |         )
148 |         usernames = []
149 |     try:
150 |         packages = message.packages
151 |     except Exception:
152 |         log.exception(
153 |             "Could not get the list of packages from a message on %s with id %s",
154 |             message.topic,
155 |             message.id,
156 |         )
157 |         packages = []
158 | 
159 |     Message.create(
160 |         i=0,
161 |         msg_id=message.id,
162 |         topic=message.topic,
163 |         timestamp=sent_at,
164 |         msg=message.body,
165 |         headers=headers,
166 |         agent_name=getattr(message, "agent_name", None),
167 |         users=usernames,
168 |         packages=packages,
169 |     )
170 | 
171 |     session.commit()
172 | 
173 | 
174 | # https://docs.sqlalchemy.org/en/14/core/custom_types.html#marshal-json-strings
175 | 
176 | 
177 | class _JSONEncodedDict(TypeDecorator):
178 |     """Represents an immutable structure as a json-encoded string."""
179 | 
180 |     impl = UnicodeText
181 | 
182 |     cache_ok = True
183 | 
184 |     def process_bind_param(self, value, dialect):
185 |         if value is not None:
186 |             value = json.dumps(value)
187 | 
188 |         return value
189 | 
190 |     def process_result_value(self, value, dialect):
191 |         if value is not None:
192 |             value = json.loads(value)
193 |         return value
194 | 
195 |     def coerce_compared_value(self, op, value):
196 |         # https://docs.sqlalchemy.org/en/14/core/custom_types.html#dealing-with-comparison-operations
197 |         if op in (operators.like_op, operators.not_like_op):
198 |             return String()
199 |         else:
200 |             return self
201 | 
202 | 
203 | users_assoc_table = Table(
204 |     "users_messages",
205 |     DeclarativeBase.metadata,
206 |     Column("user_id", ForeignKey("users.id"), primary_key=True),
207 |     Column("msg_id", Integer, primary_key=True, index=True),
208 |     Column("msg_timestamp", DateTime, primary_key=True, index=True),
209 | )
210 | 
211 | packages_assoc_table = Table(
212 |     "packages_messages",
213 |     DeclarativeBase.metadata,
214 |     Column("package_id", ForeignKey("packages.id"), primary_key=True),
215 |     Column("msg_id", Integer, primary_key=True, index=True),
216 |     Column("msg_timestamp", DateTime, primary_key=True, index=True),
217 | )
218 | 
219 | 
220 | class Message(DeclarativeBase):
221 |     __tablename__ = "messages"
222 |     __table_args__ = (
223 |         UniqueConstraint("msg_id", "timestamp"),
224 |         Index(
225 |             "ix_messages_headers",
226 |             "headers",
227 |             postgresql_using="gin",
228 |             postgresql_ops={"headers": "jsonb_path_ops"},
229 |         ),
230 |     )
231 | 
232 |     id = Column(Integer, primary_key=True, autoincrement=True)
233 |     msg_id = Column(Unicode, nullable=True, default=None, index=True)
234 |     i = Column(Integer, nullable=False)
235 |     topic = Column(Unicode, nullable=False, index=True)
236 |     timestamp = Column(DateTime, nullable=False, index=True, primary_key=True)
237 |     certificate = Column(UnicodeText)
238 |     signature = Column(UnicodeText)
239 |     category = Column(Unicode, nullable=False, index=True)
240 |     agent_name = Column(Unicode, index=True)
241 |     crypto = Column(UnicodeText)
242 |     source_name = Column(Unicode, default="datanommer")
243 |     source_version = Column(Unicode, default=lambda context: __version__)
244 |     msg = Column(_JSONEncodedDict, nullable=False)
245 |     headers = Column(postgresql.JSONB(none_as_null=True))
246 |     users = relationship(
247 |         "User",
248 |         secondary=users_assoc_table,
249 |         backref="messages",
250 |         primaryjoin=lambda: and_(
251 |             Message.id == users_assoc_table.c.msg_id,
252 |             Message.timestamp == users_assoc_table.c.msg_timestamp,
253 |         ),
254 |     )
255 |     packages = relationship(
256 |         "Package",
257 |         secondary=packages_assoc_table,
258 |         backref="messages",
259 |         primaryjoin=lambda: and_(
260 |             Message.id == packages_assoc_table.c.msg_id,
261 |             Message.timestamp == packages_assoc_table.c.msg_timestamp,
262 |         ),
263 |     )
264 | 
265 |     @validates("topic")
266 |     def get_category(self, key, topic):
267 |         """Update the category when the topic is set.
268 | 
269 |         The method seems... unnatural. But even zzzeek says it's OK to do it:
270 |         https://stackoverflow.com/a/6442201
271 |         """
272 |         index = 2 if "VirtualTopic" in topic else 3
273 |         try:
274 |             self.category = topic.split(".")[index]
275 |         except Exception:
276 |             traceback.print_exc()
277 |             self.category = "Unclassified"
278 |         return topic
279 | 
280 |     @classmethod
281 |     def create(cls, **kwargs):
282 |         users = kwargs.pop("users")
283 |         packages = kwargs.pop("packages")
284 |         if not kwargs.get("msg_id"):
285 |             log.info("Message on %s was received without a msg_id", kwargs["topic"])
286 |             kwargs["msg_id"] = str(uuid.uuid4())
287 |         obj = cls(**kwargs)
288 | 
289 |         try:
290 |             session.add(obj)
291 |             session.flush()
292 |         except IntegrityError as e:
293 |             if isinstance(e.orig, UniqueViolation):
294 |                 log.warning(
295 |                     "Skipping message from %s with duplicate id: %s",
296 |                     kwargs["topic"],
297 |                     kwargs["msg_id"],
298 |                 )
299 |             else:
300 |                 log.exception(
301 |                     "Unknown Integrity Error: message %s with id %s",
302 |                     kwargs["topic"],
303 |                     kwargs["msg_id"],
304 |                 )
305 |             session.rollback()
306 |             return
307 | 
308 |         obj._insert_list(User, users_assoc_table, users)
309 |         obj._insert_list(Package, packages_assoc_table, packages)
310 | 
311 |     def _insert_list(self, rel_class, assoc_table, values):
312 |         if not values:
313 |             return
314 |         assoc_col_name = assoc_table.c[0].name
315 |         insert_values = []
316 |         for name in set(values):
317 |             attr_obj = rel_class.get_or_create(name)
318 |             # This would normally be a simple "obj.[users|packages].append(name)" kind
319 |             # of statement, but here we drop down out of sqlalchemy's ORM and into the
320 |             # sql abstraction in order to gain a little performance boost.
321 |             insert_values.append(
322 |                 {
323 |                     assoc_col_name: attr_obj.id,
324 |                     "msg_id": self.id,
325 |                     "msg_timestamp": self.timestamp,
326 |                 }
327 |             )
328 |         session.execute(assoc_table.insert(), insert_values)
329 |         session.flush()
330 | 
331 |     @classmethod
332 |     def from_msg_id(cls, msg_id):
333 |         return session.execute(select(cls).where(cls.msg_id == msg_id)).scalar_one_or_none()
334 | 
335 |     def as_dict(self, request=None):
336 |         return dict(
337 |             i=self.i,
338 |             msg_id=self.msg_id,
339 |             topic=self.topic,
340 |             timestamp=self.timestamp,
341 |             certificate=self.certificate,
342 |             signature=self.signature,
343 |             agent_name=self.agent_name,
344 |             username=self.agent_name,  # DEPRECATED
345 |             crypto=self.crypto,
346 |             msg=self.msg,
347 |             headers=self.headers,
348 |             source_name=self.source_name,
349 |             source_version=self.source_version,
350 |             users=list(sorted(u.name for u in self.users)),
351 |             packages=list(sorted(p.name for p in self.packages)),
352 |         )
353 | 
354 |     def as_fedora_message_dict(self):
355 |         headers = self.headers or {}
356 |         if "sent-at" not in headers:
357 |             headers["sent-at"] = self.timestamp.astimezone(datetime.UTC).isoformat()
358 |         return dict(
359 |             body=self.msg,
360 |             headers=headers,
361 |             id=self.msg_id,
362 |             priority=headers.get("priority", 0),
363 |             queue=None,
364 |             topic=self.topic,
365 |         )
366 | 
367 |     def __json__(self, request=None):
368 |         warn(
369 |             "The __json__() method has been renamed to as_dict(), and will be removed "
370 |             "in the next major version",
371 |             DeprecationWarning,
372 |             stacklevel=2,
373 |         )
374 |         return self.as_dict(request)
375 | 
376 |     @property
377 |     def username(self):
378 |         warn(
379 |             "The username attribute has been renamed to agent_name, and will be removed "
380 |             "in the next major version",
381 |             DeprecationWarning,
382 |             stacklevel=2,
383 |         )
384 |         return self.agent_name
385 | 
386 |     @classmethod
387 |     def make_query(
388 |         cls,
389 |         start=None,
390 |         end=None,
391 |         msg_id=None,
392 |         users=None,
393 |         not_users=None,
394 |         packages=None,
395 |         not_packages=None,
396 |         categories=None,
397 |         not_categories=None,
398 |         topics=None,
399 |         not_topics=None,
400 |         agents=None,
401 |         not_agents=None,
402 |         contains=None,
403 |     ):
404 |         """Flexible query interface for messages.
405 | 
406 |         Arguments are filters.  start and end should be :mod:`datetime` objs.
407 | 
408 |         Other filters should be lists of strings.  They are applied in a
409 |         conjunctive-normal-form (CNF) kind of way
410 | 
411 |         for example, the following::
412 | 
413 |           users = ['ralph', 'lmacken']
414 |           categories = ['bodhi', 'wiki']
415 | 
416 |         should return messages where
417 | 
418 |           (user=='ralph' OR user=='lmacken') AND
419 |           (category=='bodhi' OR category=='wiki')
420 | 
421 |         Furthermore, you can use a negative version of each argument.
422 | 
423 |             users = ['ralph']
424 |             not_categories = ['bodhi', 'wiki']
425 | 
426 |         should return messages where
427 | 
428 |             (user == 'ralph') AND
429 |             NOT (category == 'bodhi' OR category == 'wiki')
430 | 
431 |         """
432 | 
433 |         users = users or []
434 |         not_users = not_users or []
435 |         packages = packages or []
436 |         not_packs = not_packages or []
437 |         categories = categories or []
438 |         not_cats = not_categories or []
439 |         topics = topics or []
440 |         not_topics = not_topics or []
441 |         agents = agents or []
442 |         not_agents = not_agents or []
443 |         contains = contains or []
444 | 
445 |         Message = cls
446 |         query = select(Message)
447 | 
448 |         # A little argument validation.  We could provide some defaults in
449 |         # these mixed cases.. but instead we'll just leave it up to our caller.
450 |         if (start is not None and end is None) or (end is not None and start is None):
451 |             raise ValueError(
452 |                 "Either both start and end must be specified or neither must be specified"
453 |             )
454 | 
455 |         if start and end:
456 |             query = query.where(between(Message.timestamp, start, end))
457 | 
458 |         if msg_id:
459 |             query = query.where(Message.msg_id == msg_id)
460 | 
461 |         # Add the four positive filters as necessary
462 |         if users:
463 |             query = query.where(or_(*(Message.users.any(User.name == u) for u in users)))
464 | 
465 |         if packages:
466 |             query = query.where(or_(*(Message.packages.any(Package.name == p) for p in packages)))
467 | 
468 |         if categories:
469 |             query = query.where(or_(*(Message.category == category for category in categories)))
470 | 
471 |         if topics:
472 |             query = query.where(or_(*(Message.topic == topic for topic in topics)))
473 | 
474 |         if agents:
475 |             query = query.where(or_(*(Message.agent_name == agent for agent in agents)))
476 | 
477 |         if contains:
478 |             query = query.where(or_(*(Message.msg.like(f"%{contain}%") for contain in contains)))
479 | 
480 |         # And then the four negative filters as necessary
481 |         if not_users:
482 |             query = query.where(not_(or_(*(Message.users.any(User.name == u) for u in not_users))))
483 | 
484 |         if not_packs:
485 |             query = query.where(
486 |                 not_(or_(*(Message.packages.any(Package.name == p) for p in not_packs)))
487 |             )
488 | 
489 |         if not_cats:
490 |             query = query.where(not_(or_(*(Message.category == category for category in not_cats))))
491 | 
492 |         if not_topics:
493 |             query = query.where(not_(or_(*(Message.topic == topic for topic in not_topics))))
494 | 
495 |         if not_agents:
496 |             query = query.where(not_(or_(*(Message.agent_name == agent for agent in not_agents))))
497 | 
498 |         return query
499 | 
500 |     @classmethod
501 |     def grep(
502 |         cls,
503 |         *,
504 |         page=1,
505 |         rows_per_page=100,
506 |         order="asc",
507 |         defer=False,
508 |         **kwargs,
509 |     ):
510 |         """Flexible query interface for messages.
511 | 
512 |         Arguments are filters.  start and end should be :mod:`datetime` objs.
513 | 
514 |         Other filters should be lists of strings.  They are applied in a
515 |         conjunctive-normal-form (CNF) kind of way
516 | 
517 |         for example, the following::
518 | 
519 |           users = ['ralph', 'lmacken']
520 |           categories = ['bodhi', 'wiki']
521 | 
522 |         should return messages where
523 | 
524 |           (user=='ralph' OR user=='lmacken') AND
525 |           (category=='bodhi' OR category=='wiki')
526 | 
527 |         Furthermore, you can use a negative version of each argument.
528 | 
529 |             users = ['ralph']
530 |             not_categories = ['bodhi', 'wiki']
531 | 
532 |         should return messages where
533 | 
534 |             (user == 'ralph') AND
535 |             NOT (category == 'bodhi' OR category == 'wiki')
536 | 
537 |         ----
538 | 
539 |         If the `defer` argument evaluates to True, the query won't actually
540 |         be executed, but a SQLAlchemy query object returned instead.
541 |         """
542 |         query = cls.make_query(**kwargs)
543 |         # Finally, tag on our pagination arguments
544 |         Message = cls
545 | 
546 |         query_total = query.with_only_columns(func.count(Message.id))
547 |         total = None
548 |         query = query.order_by(getattr(Message.timestamp, order)())
549 | 
550 |         if not rows_per_page:
551 |             pages = 1
552 |         else:
553 |             total = session.scalar(query_total)
554 |             pages = math.ceil(total / float(rows_per_page))
555 |             query = query.offset(rows_per_page * (page - 1)).limit(rows_per_page)
556 | 
557 |         if defer:
558 |             if total is None:
559 |                 total = session.scalar(query_total)
560 |             return total, pages, query
561 |         else:
562 |             # Execute!
563 |             messages = session.scalars(query).all()
564 |             if pages == 1:
565 |                 total = len(messages)
566 |             return total, pages, messages
567 | 
568 |     @classmethod
569 |     def get_first(cls, *, order="asc", **kwargs):
570 |         """Get the first message matching the regular grep filters."""
571 |         query = cls.make_query(**kwargs)
572 |         query = query.order_by(getattr(Message.timestamp, order)()).limit(1)
573 |         return session.scalars(query).first()
574 | 
575 | 
576 | class NamedSingleton:
577 |     id = Column(Integer, primary_key=True, autoincrement=True)
578 |     name = Column(UnicodeText, index=True, unique=True)
579 | 
580 |     @classmethod
581 |     def get_or_create(cls, name):
582 |         """
583 |         Return the instance of the class with the specified name. If it doesn't
584 |         already exist, create it.
585 |         """
586 |         # Use an in-memory cache to speed things up.
587 |         if name in cls._cache:
588 |             # If we cache the instance, SQLAlchemy will run this query anyway because the instance
589 |             # will be from a different transaction. So just cache the id.
590 |             return session.get(cls, cls._cache[name])
591 |         obj = session.execute(select(cls).where(cls.name == name)).scalar_one_or_none()
592 |         if obj is None:
593 |             obj = cls(name=name)
594 |             session.add(obj)
595 |             session.flush()
596 |         cls._cache[name] = obj.id
597 |         return obj
598 | 
599 |     @classmethod
600 |     def clear_cache(cls):
601 |         cls._cache.clear()
602 | 
603 | 
604 | class User(DeclarativeBase, NamedSingleton):
605 |     __tablename__ = "users"
606 |     _cache = {}
607 | 
608 | 
609 | class Package(DeclarativeBase, NamedSingleton):
610 |     __tablename__ = "packages"
611 |     _cache = {}
612 | 
613 | 
614 | def _setup_hypertable(table_class):
615 |     event.listen(
616 |         table_class.__table__,
617 |         "after_create",
618 |         DDL(f"SELECT create_hypertable('{table_class.__tablename__}', 'timestamp');"),
619 |     )
620 | 
621 | 
622 | _setup_hypertable(Message)
623 | 


--------------------------------------------------------------------------------
/datanommer.models/tests/test_model.py:
--------------------------------------------------------------------------------
  1 | # This file is a part of datanommer, a message sink for fedmsg.
  2 | # Copyright (C) 2014, Red Hat, Inc.
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify it under
  5 | # the terms of the GNU General Public License as published by the Free Software
  6 | # Foundation, either version 3 of the License, or (at your option) any later
  7 | # version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 11 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 12 | # details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License along
 15 | # with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | import datetime
 17 | import json
 18 | import logging
 19 | 
 20 | import pytest
 21 | from bodhi.messages.schemas.update import UpdateCommentV1
 22 | from fedora_messaging import message as fedora_message
 23 | from sqlalchemy import create_engine, func, select
 24 | from sqlalchemy.exc import IntegrityError
 25 | from sqlalchemy.sql.selectable import Select
 26 | 
 27 | import datanommer.models as dm
 28 | 
 29 | 
 30 | def generate_message(
 31 |     topic="org.fedoraproject.test.a.nice.message",
 32 |     body=None,
 33 |     headers=None,
 34 | ):
 35 |     body = body or {"encouragement": "You're doing great!"}
 36 |     return fedora_message.Message(topic=topic, body=body, headers=headers)
 37 | 
 38 | 
 39 | def generate_bodhi_update_complete_message(text="testing testing"):
 40 |     msg = UpdateCommentV1(
 41 |         body={
 42 |             "comment": {
 43 |                 "karma": -1,
 44 |                 "text": text,
 45 |                 "timestamp": "2019-03-18 16:54:48",
 46 |                 "update": {
 47 |                     "alias": "FEDORA-EPEL-2021-f2d195dada",
 48 |                     "builds": [
 49 |                         {"nvr": "abrt-addon-python3-2.1.11-50.el7"},
 50 |                         {"nvr": "kernel-10.4.0-2.el7"},
 51 |                     ],
 52 |                     "status": "pending",
 53 |                     "release": {"name": "F35"},
 54 |                     "request": "testing",
 55 |                     "user": {"name": "ryanlerch"},
 56 |                 },
 57 |                 "user": {"name": "dudemcpants"},
 58 |             }
 59 |         }
 60 |     )
 61 |     msg.topic = f"org.fedoraproject.stg.{msg.topic}"
 62 |     return msg
 63 | 
 64 | 
 65 | @pytest.fixture
 66 | def add_200_messages(datanommer_models):
 67 |     for x in range(0, 200):
 68 |         example_message = generate_message()
 69 |         example_message.id = f"{x}"
 70 |         dm.add(example_message)
 71 |     dm.session.flush()
 72 | 
 73 | 
 74 | def test_init_uri_and_engine():
 75 |     uri = "sqlite:///db.db"
 76 |     engine = create_engine(uri, future=True)
 77 | 
 78 |     with pytest.raises(ValueError, match="uri and engine cannot both be specified"):
 79 |         dm.init(uri, engine=engine)
 80 | 
 81 | 
 82 | def test_init_no_uri_and_no_engine():
 83 |     with pytest.raises(ValueError, match="One of uri or engine must be specified"):
 84 |         dm.init()
 85 | 
 86 | 
 87 | def test_init_with_engine(caplog):
 88 |     uri = "sqlite:///db.db"
 89 |     engine = create_engine(uri, future=True)
 90 | 
 91 |     dm.init(engine=engine)
 92 | 
 93 |     assert not caplog.records
 94 | 
 95 |     # if the init with just the engine worked, trying it again will fail
 96 |     dm.init(engine=engine)
 97 |     assert caplog.records[0].message == "Session already initialized.  Bailing"
 98 | 
 99 | 
100 | def test_init_no_init_twice(datanommer_models, mocker, caplog):
101 |     dm.init("sqlite:///db.db")
102 |     assert caplog.records[0].message == "Session already initialized.  Bailing"
103 | 
104 | 
105 | def test_unclassified_category(datanommer_models):
106 |     example_message = generate_message(topic="too.short")
107 |     dm.add(example_message)
108 |     dbmsg = dm.session.scalar(select(dm.Message))
109 | 
110 |     assert dbmsg.category == "Unclassified"
111 | 
112 | 
113 | def test_from_msg_id(datanommer_models):
114 |     example_message = generate_message()
115 |     example_message.id = "ACUSTOMMESSAGEID"
116 |     dm.add(example_message)
117 |     dbmsg = dm.Message.from_msg_id("ACUSTOMMESSAGEID")
118 | 
119 |     assert dbmsg.msg_id == "ACUSTOMMESSAGEID"
120 | 
121 | 
122 | def test_add_missing_msg_id(datanommer_models, caplog):
123 |     caplog.set_level(logging.INFO)
124 |     example_message = generate_message()
125 |     example_message._properties.message_id = None
126 |     dm.add(example_message)
127 |     dbmsg = dm.session.scalar(select(dm.Message))
128 |     assert (
129 |         "Message on org.fedoraproject.test.a.nice.message was received without a msg_id"
130 |         in caplog.records[-1].message
131 |     )
132 |     assert dbmsg.msg_id is not None
133 | 
134 | 
135 | def test_add_missing_timestamp(datanommer_models):
136 |     example_message = generate_message()
137 |     example_message._properties.headers["sent-at"] = None
138 | 
139 |     dm.add(example_message)
140 | 
141 |     dbmsg = dm.session.scalar(select(dm.Message))
142 |     timediff = datetime.datetime.now() - dbmsg.timestamp
143 |     # 60 seconds between adding the message and checking
144 |     # the timestamp should be more than enough.
145 |     assert timediff < datetime.timedelta(seconds=60)
146 | 
147 | 
148 | def test_add_timestamp_with_Z(datanommer_models):
149 |     example_message = generate_message()
150 |     example_message._properties.headers["sent-at"] = "2021-07-27T04:22:42Z"
151 | 
152 |     dm.add(example_message)
153 | 
154 |     dbmsg = dm.session.scalar(select(dm.Message))
155 |     assert dbmsg.timestamp.astimezone(datetime.UTC) == datetime.datetime(
156 |         2021, 7, 27, 4, 22, 42, tzinfo=datetime.UTC
157 |     )
158 | 
159 | 
160 | def test_add_timestamp_with_junk(datanommer_models, caplog):
161 |     example_message = generate_message()
162 |     example_message._properties.headers["sent-at"] = "2021-07-27T04:22:42JUNK"
163 | 
164 |     dm.add(example_message)
165 | 
166 |     assert "Failed to parse sent-at timestamp value" in caplog.records[0].message
167 | 
168 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 0
169 | 
170 | 
171 | def test_add_and_check_for_others(datanommer_models):
172 |     # There are no users or packages at the start
173 |     assert dm.session.scalar(select(func.count(dm.User.id))) == 0
174 |     assert dm.session.scalar(select(func.count(dm.Package.id))) == 0
175 | 
176 |     # Then add a message
177 |     dm.add(generate_bodhi_update_complete_message())
178 | 
179 |     # There should now be two of each
180 |     assert dm.session.scalar(select(func.count(dm.User.id))) == 2
181 |     assert dm.session.scalar(select(func.count(dm.Package.id))) == 2
182 | 
183 |     # If we add it again, there should be no duplicates
184 |     dm.add(generate_bodhi_update_complete_message())
185 |     assert dm.session.scalar(select(func.count(dm.User.id))) == 2
186 |     assert dm.session.scalar(select(func.count(dm.Package.id))) == 2
187 | 
188 |     # Add a new username
189 |     dm.add(generate_bodhi_update_complete_message(text="this is @abompard in a comment"))
190 |     assert dm.session.scalar(select(func.count(dm.User.id))) == 3
191 |     assert dm.session.scalar(select(func.count(dm.Package.id))) == 2
192 | 
193 | 
194 | def test_add_nothing(datanommer_models):
195 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 0
196 | 
197 | 
198 | def test_add_and_check(datanommer_models):
199 |     dm.add(generate_message())
200 |     dm.session.flush()
201 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 1
202 | 
203 | 
204 | def test_categories(datanommer_models):
205 |     dm.add(generate_bodhi_update_complete_message())
206 |     dm.session.flush()
207 |     obj = dm.session.scalar(select(dm.Message))
208 |     assert obj.category == "bodhi"
209 | 
210 | 
211 | def test_categories_with_umb(datanommer_models):
212 |     dm.add(generate_message(topic="/topic/VirtualTopic.eng.brew.task.closed"))
213 |     dm.session.flush()
214 |     obj = dm.session.scalar(select(dm.Message))
215 |     assert obj.category == "brew"
216 | 
217 | 
218 | def test_grep_all(datanommer_models):
219 |     example_message = generate_message()
220 |     print("example message:", repr(example_message))
221 |     print(repr(example_message.body))
222 |     dm.add(example_message)
223 |     dm.session.flush()
224 |     t, p, r = dm.Message.grep()
225 |     assert t == 1
226 |     assert p == 1
227 |     assert len(r) == 1
228 |     print(repr(r))
229 |     assert r[0].msg == example_message.body
230 | 
231 | 
232 | def test_grep_category(datanommer_models):
233 |     example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate")
234 |     dm.add(example_message)
235 |     dm.session.flush()
236 |     t, p, r = dm.Message.grep(categories=["bodhi"])
237 |     assert t == 1
238 |     assert p == 1
239 |     assert len(r) == 1
240 |     assert r[0].msg == example_message.body
241 | 
242 | 
243 | def test_grep_not_category(datanommer_models):
244 |     example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate")
245 |     dm.add(example_message)
246 |     dm.session.flush()
247 |     t, p, r = dm.Message.grep(not_categories=["bodhi"])
248 |     assert t == 0
249 |     assert p == 0
250 |     assert len(r) == 0
251 | 
252 | 
253 | def test_add_headers(datanommer_models):
254 |     example_headers = {"foo": "bar", "baz": 1, "wibble": ["zork", "zap"]}
255 |     example_message = generate_message(
256 |         topic="org.fedoraproject.prod.bodhi.newupdate", headers=example_headers
257 |     )
258 |     dm.add(example_message)
259 |     dbmsg = dm.session.scalar(select(dm.Message))
260 |     assert dbmsg.headers["foo"] == "bar"
261 |     assert dbmsg.headers["baz"] == 1
262 |     assert dbmsg.headers["wibble"] == ["zork", "zap"]
263 | 
264 | 
265 | def test_grep_topics(datanommer_models):
266 |     example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate")
267 |     dm.add(example_message)
268 |     dm.session.flush()
269 |     t, p, r = dm.Message.grep(topics=["org.fedoraproject.prod.bodhi.newupdate"])
270 |     assert t == 1
271 |     assert p == 1
272 |     assert len(r) == 1
273 |     assert r[0].msg == example_message.body
274 | 
275 | 
276 | def test_grep_not_topics(datanommer_models):
277 |     example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate")
278 |     dm.add(example_message)
279 |     dm.session.flush()
280 |     t, p, r = dm.Message.grep(not_topics=["org.fedoraproject.prod.bodhi.newupdate"])
281 |     assert t == 0
282 |     assert p == 0
283 |     assert len(r) == 0
284 | 
285 | 
286 | def test_grep_start_end_validation(datanommer_models):
287 |     with pytest.raises(
288 |         ValueError,
289 |         match="Either both start and end must be specified or neither must be specified",
290 |     ):
291 |         dm.Message.grep(start="2020-03-26")
292 |     with pytest.raises(
293 |         ValueError,
294 |         match="Either both start and end must be specified or neither must be specified",
295 |     ):
296 |         dm.Message.grep(end="2020-03-26")
297 | 
298 | 
299 | def test_grep_start_end(datanommer_models):
300 |     example_message = generate_message()
301 |     example_message._properties.headers["sent-at"] = "2021-04-01T00:00:01"
302 |     dm.add(example_message)
303 | 
304 |     bodhi_example_message = generate_bodhi_update_complete_message()
305 |     bodhi_example_message._properties.headers["sent-at"] = "2021-06-01T00:00:01"
306 |     dm.add(bodhi_example_message)
307 | 
308 |     dm.session.flush()
309 |     total, pages, messages = dm.Message.grep(start="2021-04-01", end="2021-05-01")
310 |     assert total == 1
311 |     assert pages == 1
312 |     assert len(messages) == 1
313 |     assert messages[0].msg == example_message.body
314 | 
315 |     total, pages, messages = dm.Message.grep(start="2021-06-01", end="2021-07-01")
316 |     assert total == 1
317 |     assert pages == 1
318 |     assert len(messages) == 1
319 |     assert messages[0].msg == bodhi_example_message.body
320 | 
321 | 
322 | def test_grep_msg_id(datanommer_models):
323 |     example_message = generate_message()
324 |     dm.add(example_message)
325 | 
326 |     bodhi_example_message = generate_bodhi_update_complete_message()
327 |     dm.add(bodhi_example_message)
328 | 
329 |     dm.session.flush()
330 |     total, pages, messages = dm.Message.grep(msg_id=example_message.id)
331 |     assert total == 1
332 |     assert pages == 1
333 |     assert len(messages) == 1
334 |     assert messages[0].msg == example_message.body
335 | 
336 |     total, pages, messages = dm.Message.grep(msg_id=bodhi_example_message.id)
337 |     assert total == 1
338 |     assert pages == 1
339 |     assert len(messages) == 1
340 |     assert messages[0].msg == bodhi_example_message.body
341 | 
342 |     total, pages, messages = dm.Message.grep(msg_id="NOTAMESSAGEID")
343 |     assert total == 0
344 |     assert pages == 0
345 |     assert len(messages) == 0
346 | 
347 | 
348 | def test_grep_agents(datanommer_models):
349 |     example_message = generate_message()
350 |     dm.add(example_message)
351 | 
352 |     bodhi_example_message = generate_bodhi_update_complete_message()
353 |     dm.add(bodhi_example_message)
354 | 
355 |     dm.session.flush()
356 | 
357 |     total, pages, messages = dm.Message.grep(agents=["dudemcpants"])
358 | 
359 |     assert total == 1
360 |     assert pages == 1
361 |     assert len(messages) == 1
362 | 
363 |     assert messages[0].msg == bodhi_example_message.body
364 | 
365 | 
366 | def test_grep_not_agents(datanommer_models, mocker):
367 |     example_message = generate_message()  # has agent_name == None
368 |     dm.add(example_message)
369 | 
370 |     bodhi_example_message = generate_bodhi_update_complete_message()
371 |     dm.add(bodhi_example_message)  # has agent_name == "dudemcpants"
372 | 
373 |     class MessageWithAgent(fedora_message.Message):
374 |         topic = "org.fedoraproject.test.a.message.with.agent"
375 |         agent_name = "dummy-agent-name"
376 | 
377 |     fedora_message._schema_name_to_class["MessageWithAgent"] = MessageWithAgent
378 |     fedora_message._class_to_schema_name[MessageWithAgent] = "MessageWithAgent"
379 | 
380 |     example_message_with_agent = MessageWithAgent(
381 |         body={"subject": "this is a message with an agent"}
382 |     )
383 |     dm.add(example_message_with_agent)
384 | 
385 |     dm.session.flush()
386 | 
387 |     total, pages, messages = dm.Message.grep(not_agents=["dudemcpants"])
388 | 
389 |     # Messages with agent_name == None are not returned
390 |     assert total == 1
391 |     assert pages == 1
392 |     assert len(messages) == 1
393 | 
394 |     assert messages[0].msg == example_message_with_agent.body
395 | 
396 | 
397 | def test_grep_users(datanommer_models):
398 |     example_message = generate_message()
399 |     dm.add(example_message)
400 | 
401 |     bodhi_example_message = generate_bodhi_update_complete_message()
402 |     dm.add(bodhi_example_message)
403 | 
404 |     dm.session.flush()
405 | 
406 |     total, pages, messages = dm.Message.grep(users=["dudemcpants"])
407 | 
408 |     assert total == 1
409 |     assert pages == 1
410 |     assert len(messages) == 1
411 | 
412 |     assert messages[0].msg == bodhi_example_message.body
413 | 
414 | 
415 | def test_grep_not_users(datanommer_models):
416 |     example_message = generate_message()
417 |     dm.add(example_message)
418 | 
419 |     bodhi_example_message = generate_bodhi_update_complete_message()
420 |     dm.add(bodhi_example_message)
421 | 
422 |     dm.session.flush()
423 | 
424 |     total, pages, messages = dm.Message.grep(not_users=["dudemcpants"])
425 | 
426 |     assert total == 1
427 |     assert pages == 1
428 |     assert len(messages) == 1
429 | 
430 |     assert messages[0].msg == example_message.body
431 | 
432 | 
433 | def test_grep_packages(datanommer_models):
434 |     example_message = generate_message()
435 |     dm.add(example_message)
436 | 
437 |     bodhi_example_message = generate_bodhi_update_complete_message()
438 |     dm.add(bodhi_example_message)
439 | 
440 |     dm.session.flush()
441 | 
442 |     total, pages, messages = dm.Message.grep(packages=["kernel"])
443 | 
444 |     assert total == 1
445 |     assert pages == 1
446 |     assert len(messages) == 1
447 | 
448 |     assert messages[0].msg == bodhi_example_message.body
449 | 
450 | 
451 | def test_grep_not_packages(datanommer_models):
452 |     example_message = generate_message()
453 |     dm.add(example_message)
454 | 
455 |     bodhi_example_message = generate_bodhi_update_complete_message()
456 |     dm.add(bodhi_example_message)
457 | 
458 |     dm.session.flush()
459 | 
460 |     total, pages, messages = dm.Message.grep(not_packages=["kernel"])
461 | 
462 |     assert total == 1
463 |     assert pages == 1
464 |     assert len(messages) == 1
465 | 
466 |     assert messages[0].msg == example_message.body
467 | 
468 | 
469 | def test_grep_contains(datanommer_models):
470 |     example_message = generate_message(topic="org.fedoraproject.prod.bodhi.newupdate")
471 |     dm.add(example_message)
472 |     dm.session.flush()
473 |     t, p, r = dm.Message.grep(contains=["doing"])
474 |     assert t == 1
475 |     assert p == 1
476 |     assert len(r) == 1
477 |     assert r[0].msg == example_message.body
478 | 
479 | 
480 | def test_grep_rows_per_page(datanommer_models, add_200_messages):
481 |     total, pages, messages = dm.Message.grep()
482 |     assert total == 200
483 |     assert pages == 2
484 |     assert len(messages) == 100
485 | 
486 |     for rows_per_page in (None, 0):
487 |         try:
488 |             total, pages, messages = dm.Message.grep(rows_per_page=rows_per_page)
489 |         except ZeroDivisionError as e:
490 |             pytest.fail(e)
491 |         assert total == 200
492 |         assert pages == 1
493 |         assert len(messages) == 200
494 | 
495 | 
496 | def test_grep_defer(datanommer_models):
497 |     example_message = generate_message()
498 |     dm.add(example_message)
499 | 
500 |     dm.session.flush()
501 | 
502 |     _total, _pages, query = dm.Message.grep(defer=True)
503 |     assert isinstance(query, Select)
504 | 
505 |     assert dm.session.scalars(query).all() == dm.Message.grep()[2]
506 | 
507 | 
508 | def test_grep_no_paging_and_defer(datanommer_models, add_200_messages):
509 |     total, pages, _messages = dm.Message.grep(rows_per_page=0, defer=True)
510 |     assert total == 200
511 |     assert pages == 1
512 | 
513 | 
514 | def test_grep_no_total_if_single_page(datanommer_models, add_200_messages, mocker):
515 |     # Assert we don't query the total of messages if we're getting them all anyway
516 |     scalar_spy = mocker.spy(dm.session, "scalar")
517 |     total, _pages, _messages = dm.Message.grep(rows_per_page=0)
518 |     assert total == 200
519 |     scalar_spy.assert_not_called()
520 | 
521 | 
522 | def test_get_first(datanommer_models):
523 |     messages = []
524 |     for x in range(0, 200):
525 |         example_message = generate_message()
526 |         example_message.id = f"{x}"
527 |         dm.add(example_message)
528 |         messages.append(example_message)
529 |     dm.session.flush()
530 |     msg = dm.Message.get_first()
531 |     assert msg.msg_id == "0"
532 |     assert msg.msg == messages[0].body
533 | 
534 | 
535 | def test_add_duplicate(datanommer_models, caplog):
536 |     example_message = generate_message()
537 |     dm.add(example_message)
538 |     dm.add(example_message)
539 |     # if no exception was thrown, then we successfully ignored the
540 |     # duplicate message
541 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 1
542 |     assert (
543 |         "Skipping message from org.fedoraproject.test.a.nice.message" in caplog.records[0].message
544 |     )
545 | 
546 | 
547 | def test_add_integrity_error(datanommer_models, mocker, caplog):
548 |     mock_session_add = mocker.patch("datanommer.models.session.add")
549 |     mock_session_add.side_effect = IntegrityError("asdf", "asd", "asdas")
550 |     example_message = generate_message()
551 |     dm.add(example_message)
552 |     assert "Unknown Integrity Error: message" in caplog.records[0].message
553 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 0
554 | 
555 | 
556 | def test_add_duplicate_package(datanommer_models):
557 |     # Define a special message schema and register it
558 |     class MessageWithPackages(fedora_message.Message):
559 |         @property
560 |         def packages(self):
561 |             return ["pkg", "pkg"]
562 | 
563 |     fedora_message._schema_name_to_class["MessageWithPackages"] = MessageWithPackages
564 |     fedora_message._class_to_schema_name[MessageWithPackages] = "MessageWithPackages"
565 |     example_message = MessageWithPackages(
566 |         topic="org.fedoraproject.test.a.nice.message",
567 |         body={"encouragement": "You're doing great!"},
568 |         headers=None,
569 |     )
570 |     try:
571 |         dm.add(example_message)
572 |     except IntegrityError as e:
573 |         pytest.fail(e)
574 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 1
575 |     dbmsg = dm.session.scalar(select(dm.Message))
576 |     assert len(dbmsg.packages) == 1
577 |     assert dbmsg.packages[0].name == "pkg"
578 | 
579 | 
580 | @pytest.mark.parametrize(
581 |     "property_name,name_in_msg", [("usernames", "users"), ("packages", "packages")]
582 | )
583 | def test_add_message_with_error_on_property(datanommer_models, caplog, property_name, name_in_msg):
584 |     # Define a special message schema and register it
585 |     class CustomMessage(fedora_message.Message):
586 |         @property
587 |         def packages(self):
588 |             raise KeyError
589 | 
590 |         def _filter_headers(self):
591 |             return {}
592 | 
593 |     def _crash(self):
594 |         raise KeyError
595 | 
596 |     setattr(CustomMessage, property_name, property(_crash))
597 | 
598 |     fedora_message._schema_name_to_class["CustomMessage"] = CustomMessage
599 |     fedora_message._class_to_schema_name[CustomMessage] = "CustomMessage"
600 |     example_message = CustomMessage(
601 |         topic="org.fedoraproject.test.a.nice.message",
602 |         body={"encouragement": "You're doing great!"},
603 |         headers=None,
604 |     )
605 |     try:
606 |         dm.add(example_message)
607 |     except KeyError as e:
608 |         pytest.fail(e)
609 |     assert dm.session.scalar(select(func.count(dm.Message.id))) == 1
610 |     assert caplog.records[0].message == (
611 |         f"Could not get the list of {name_in_msg} from a message on "
612 |         f"org.fedoraproject.test.a.nice.message with id {example_message.id}"
613 |     )
614 | 
615 | 
616 | def test_as_fedora_message_dict(datanommer_models):
617 |     example_message = generate_message()
618 |     dm.add(example_message)
619 | 
620 |     dbmsg = dm.session.scalar(select(dm.Message))
621 | 
622 |     message_json = json.dumps(dbmsg.as_fedora_message_dict())
623 | 
624 |     # this should be the same as if we use the fedora_messaging dump function
625 |     assert json.loads(fedora_message.dumps(example_message)) == json.loads(message_json)
626 | 
627 | 
628 | def test_as_fedora_message_dict_old_headers(datanommer_models):
629 |     # Messages received with fedmsg don't have the sent-at header
630 |     example_message = generate_message()
631 |     dm.add(example_message)
632 | 
633 |     dbmsg = dm.session.scalar(select(dm.Message))
634 |     del dbmsg.headers["sent-at"]
635 | 
636 |     message_dict = dbmsg.as_fedora_message_dict()
637 |     print(message_dict)
638 |     print(json.loads(fedora_message.dumps(example_message)))
639 | 
640 |     # this should be the same as if we use the fedora_messaging dump function
641 |     assert json.loads(fedora_message.dumps(example_message)) == message_dict
642 | 
643 | 
644 | def test_as_fedora_message_dict_no_headers(datanommer_models):
645 |     # Messages can have no headers
646 |     example_message = generate_message()
647 |     dm.add(example_message)
648 | 
649 |     dbmsg = dm.session.scalar(select(dm.Message))
650 |     assert len(dbmsg.headers.keys()) == 5
651 | 
652 |     # Clear the headers
653 |     dbmsg.headers = None
654 | 
655 |     try:
656 |         message_dict = dbmsg.as_fedora_message_dict()
657 |     except TypeError as e:
658 |         pytest.fail(e)
659 | 
660 |     assert list(message_dict["headers"].keys()) == ["sent-at"]
661 | 
662 | 
663 | def test_as_dict(datanommer_models):
664 |     dm.add(generate_message())
665 |     dbmsg = dm.session.scalar(select(dm.Message))
666 |     message_dict = dbmsg.as_dict()
667 | 
668 |     # we should have 14 keys in this dict
669 |     assert len(message_dict) == 15
670 |     assert message_dict["msg"] == {"encouragement": "You're doing great!"}
671 |     assert message_dict["topic"] == "org.fedoraproject.test.a.nice.message"
672 | 
673 | 
674 | def test_as_dict_with_users_and_packages(datanommer_models):
675 |     dm.add(generate_bodhi_update_complete_message())
676 |     dbmsg = dm.session.scalar(select(dm.Message))
677 |     message_dict = dbmsg.as_dict()
678 | 
679 |     assert message_dict["users"] == ["dudemcpants", "ryanlerch"]
680 |     assert message_dict["packages"] == ["abrt-addon-python3", "kernel"]
681 | 
682 | 
683 | def test___json__deprecated(datanommer_models, caplog, mocker):
684 |     mock_as_dict = mocker.patch("datanommer.models.Message.as_dict")
685 | 
686 |     dm.add(generate_message())
687 | 
688 |     with pytest.warns(DeprecationWarning):
689 |         dbmsg = dm.session.scalar(select(dm.Message))
690 |         dbmsg.__json__()
691 | 
692 |     mock_as_dict.assert_called_once()
693 | 
694 | 
695 | def test_username_deprecated(datanommer_models, caplog, mocker):
696 |     dm.add(generate_message())
697 |     dbmsg = dm.session.scalar(select(dm.Message))
698 |     dbmsg.agent_name = "dummy"
699 | 
700 |     with pytest.warns(DeprecationWarning):
701 |         assert dbmsg.username == "dummy"
702 | 
703 | 
704 | def test_singleton_create(datanommer_models):
705 |     dm.Package.get_or_create("foobar")
706 |     assert [p.name for p in dm.session.scalars(select(dm.Package))] == ["foobar"]
707 | 
708 | 
709 | def test_singleton_get_existing(datanommer_models):
710 |     p1 = dm.Package.get_or_create("foobar")
711 |     # Clear the in-memory cache
712 |     dm.Package._cache.clear()
713 |     p2 = dm.Package.get_or_create("foobar")
714 |     assert p1.id == p2.id
715 | 


--------------------------------------------------------------------------------