├── .python-version
├── .idea
├── .gitignore
├── vcs.xml
├── modules.xml
├── misc.xml
├── runConfigurations
│ ├── Make_Lint_Fix.xml
│ ├── All_Quality_Checks.xml
│ ├── Black_Format.xml
│ ├── Ruff_Lint.xml
│ ├── Mypy_Type_Check.xml
│ └── Ruff_Lint_Fix.xml
├── platform-problem-monitoring-core.iml
└── inspectionProfiles
│ └── Project_Default.xml
├── src
├── tests
│ ├── fixtures
│ │ ├── current_date_time.txt
│ │ ├── __init__.py
│ │ ├── lucene_query.json
│ │ └── previous_normalization_results.json
│ ├── __init__.py
│ ├── test_step6_extract_fields.py
│ └── test_step7_normalize_messages.py
└── platform_problem_monitoring_core
│ ├── __init__.py
│ ├── step1_prepare.py
│ ├── step6_extract_fields.py
│ ├── step12_cleanup.py
│ ├── step11_store_new_state.py
│ ├── utils.py
│ ├── step2_download_previous_state.py
│ ├── step10_send_email_report.py
│ ├── step3_retrieve_hourly_problem_numbers.py
│ ├── step4_generate_trend_chart.py
│ ├── step8_compare_normalizations.py
│ ├── step5_download_logstash_documents.py
│ └── step7_normalize_messages.py
├── assets
├── sample-trend-and-report-input-data
│ ├── start_date_time.txt
│ ├── trend_chart.png
│ ├── lucene_query.json
│ ├── email_body.txt
│ ├── hourly_problem_numbers.json
│ ├── norm_results.json
│ ├── norm_results_prev.json
│ └── comparison_results.json
└── readme-hero-image.png
├── etc
├── main.conf.dist
└── lucene_query.json.dist
├── .gitignore
├── .github
└── workflows
│ ├── tests.yml
│ ├── code-quality.yml
│ └── release.yml
├── docs
├── NOTES.md
├── DEVELOPMENT.md
├── QUALITY.md
├── JETBRAINS_SETUP.md
└── RELEASE_MANAGEMENT.md
├── LICENSE.txt
├── .vscode
└── settings.json
├── .pre-commit-config.yaml
├── pyproject.toml
├── Makefile
├── README.md
└── bin
└── ppmc
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10.16
2 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/src/tests/fixtures/current_date_time.txt:
--------------------------------------------------------------------------------
1 | 2025-03-05T19:23:10.832778+00:00
2 |
--------------------------------------------------------------------------------
/src/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Test package for platform_problem_monitoring_core."""
2 |
--------------------------------------------------------------------------------
/assets/sample-trend-and-report-input-data/start_date_time.txt:
--------------------------------------------------------------------------------
1 | 2025-03-06T00:00:00Z
2 |
--------------------------------------------------------------------------------
/src/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
1 | """Test fixtures for platform_problem_monitoring_core tests."""
2 |
--------------------------------------------------------------------------------
/assets/readme-hero-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dx-tooling/platform-problem-monitoring-core/HEAD/assets/readme-hero-image.png
--------------------------------------------------------------------------------
/assets/sample-trend-and-report-input-data/trend_chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dx-tooling/platform-problem-monitoring-core/HEAD/assets/sample-trend-and-report-input-data/trend_chart.png
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/src/platform_problem_monitoring_core/__init__.py:
--------------------------------------------------------------------------------
1 | """Platform Problem Monitoring Core.
2 |
3 | A tool for monitoring platform problems using Elasticsearch logs.
4 | """
5 |
6 | from importlib.metadata import PackageNotFoundError, version
7 |
8 | try:
9 | __version__ = version("platform_problem_monitoring_core")
10 | except PackageNotFoundError:
11 | __version__ = "0.1.0" # Default version if package is not installed
12 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/etc/main.conf.dist:
--------------------------------------------------------------------------------
1 | REMOTE_STATE_S3_BUCKET_NAME=""
2 | REMOTE_STATE_S3_FOLDER_NAME=""
3 |
4 | ELASTICSEARCH_SERVER_BASE_URL=""
5 | ELASTICSEARCH_LUCENE_QUERY_FILE_PATH=""
6 |
7 | KIBANA_DISCOVER_BASE_URL=""
8 | KIBANA_DOCUMENT_DEEPLINK_URL_STRUCTURE="https://example.com/kibana/_plugin/kibana/app/discover#/doc/logstash-*/{{index}}?id={{id}}"
9 |
10 | SMTP_SERVER_HOSTNAME=""
11 | SMTP_SERVER_PORT=""
12 | SMTP_SERVER_USERNAME=""
13 | SMTP_SERVER_PASSWORD=""
14 | SMTP_SENDER_ADDRESS=""
15 | SMTP_RECEIVER_ADDRESS=""
16 |
17 | # Number of hours to look back for problem trends
18 | TREND_HOURS_BACK="24"
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/workspace.xml
2 | .idea/tasks.xml
3 | .idea/dictionaries/
4 | .idea/shelf/
5 | .idea/usage.statistics.xml
6 | .idea/contentModel.xml
7 | .idea/dataSources/
8 | .idea/dataSources.local.xml
9 | .idea/httpRequests/
10 | .idea/caches/
11 | # Keep .idea/runConfigurations
12 | # Keep .idea/inspectionProfiles
13 | # Keep .idea/misc.xml
14 | # Keep .idea/modules.xml
15 |
16 | venv/
17 | main.conf
18 | etc/lucene_query.json
19 |
20 | # Python package build artifacts
21 | *.egg-info/
22 | *.egg
23 | dist/
24 | build/
25 | __pycache__/
26 | *.py[cod]
27 | *$py.class
28 | references/
29 | coverage.xml
30 |
31 | # Cache directories
32 | .mypy_cache/
33 | .pytest_cache/
34 | .ruff_cache/
35 | .coverage
36 | htmlcov/
37 |
--------------------------------------------------------------------------------
/etc/lucene_query.json.dist:
--------------------------------------------------------------------------------
1 | {
2 | "query": {
3 | "bool": {
4 | "should": [
5 | { "match": { "message": "error" } },
6 | { "match": { "message": "failure" } },
7 | { "match": { "message": "critical" } },
8 | { "match": { "message": "alert" } },
9 | { "match": { "message": "exception" } }
10 | ],
11 | "must_not": [
12 | { "match": { "message": "User Deprecated" } },
13 | { "match": { "message": "logstash" } },
14 | { "term": { "syslog_program": "dd.collector" } },
15 | { "term": { "syslog_program": "dd.forwarder" } },
16 | { "term": { "syslog_program": "dd.dogstatsd" } }
17 | ],
18 | "minimum_should_match": 1
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/tests/fixtures/lucene_query.json:
--------------------------------------------------------------------------------
1 | {
2 | "query": {
3 | "bool": {
4 | "should": [
5 | { "match": { "message": "error" } },
6 | { "match": { "message": "failure" } },
7 | { "match": { "message": "critical" } },
8 | { "match": { "message": "alert" } },
9 | { "match": { "message": "exception" } }
10 | ],
11 | "must_not": [
12 | { "match": { "message": "User Deprecated" } },
13 | { "match": { "message": "logstash" } },
14 | { "term": { "syslog_program": "dd.collector" } },
15 | { "term": { "syslog_program": "dd.forwarder" } },
16 | { "term": { "syslog_program": "dd.dogstatsd" } }
17 | ],
18 | "minimum_should_match": 1
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 | env:
13 | CI: true
14 | strategy:
15 | matrix:
16 | python-version: ["3.10", "3.11", "3.12", "3.13"]
17 |
18 | steps:
19 | - uses: actions/checkout@v3
20 |
21 | - name: Set up Python ${{ matrix.python-version }}
22 | uses: actions/setup-python@v4
23 | with:
24 | python-version: ${{ matrix.python-version }}
25 | cache: 'pip'
26 |
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install -e ".[dev]"
31 |
32 | - name: Run tests
33 | run: |
34 | # Run tests with coverage
35 | make test-coverage
36 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Make_Lint_Fix.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.github/workflows/code-quality.yml:
--------------------------------------------------------------------------------
1 | name: Code Quality
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | quality:
11 | runs-on: ubuntu-latest
12 | env:
13 | CI: true
14 | strategy:
15 | matrix:
16 | python-version: ["3.10", "3.11", "3.12", "3.13"]
17 |
18 | steps:
19 | - uses: actions/checkout@v3
20 |
21 | - name: Set up Python ${{ matrix.python-version }}
22 | uses: actions/setup-python@v4
23 | with:
24 | python-version: ${{ matrix.python-version }}
25 | cache: 'pip'
26 |
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install -e ".[dev]"
31 |
32 | - name: Run code quality checks
33 | run: |
34 | # Run all code quality checks via the Makefile
35 | make ci-quality
36 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/All_Quality_Checks.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Black_Format.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Ruff_Lint.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Mypy_Type_Check.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Ruff_Lint_Fix.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/docs/NOTES.md:
--------------------------------------------------------------------------------
1 | # Notes
2 |
3 | ## Runbook
4 |
5 | python3 -m platform_problem_monitoring_core.step5_download_logstash_documents \
6 | --elasticsearch-url "http://127.0.0.1:9201" \
7 | --query-file "/Users/manuel/git/github/dx-tooling/platform-problem-monitoring-core/src/lucene_query.json" \
8 | --start-date-time-file "/tmp/latest-date-time.txt" \
9 | --output-file "/tmp/docs.json" \
10 | --current-date-time-file "/tmp/cur-date-time.txt"
11 |
12 | curl -s -X GET "http://127.0.0.1:9201/_search?pretty" -H 'Content-Type: application/json' -d'
13 | {
14 | "query": {
15 | "query_string" : {
16 | "query" : "@timestamp: ['2025-03-04T00:00:00.000' TO '2025-03-04T01:00:00.000'] AND type: \"symfony-errors\""
17 | }
18 | }
19 | }
20 | '
21 |
22 | ## TODOs & Ideas
23 |
24 | - add step 12 (cleanup) to ppmc
25 | - add ppmc option to disable cleanup step 12
26 | - allow the local filesystem as a state storage alternative
27 |
--------------------------------------------------------------------------------
/.idea/platform-problem-monitoring-core.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Manuel Kießling
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.linting.enabled": true,
3 | "python.linting.mypyEnabled": true,
4 | "python.linting.flake8Enabled": false,
5 | "python.linting.banditEnabled": true,
6 | "python.formatting.provider": "black",
7 | "python.formatting.blackArgs": ["--line-length", "100"],
8 | "editor.formatOnSave": true,
9 | "editor.codeActionsOnSave": {
10 | "source.organizeImports": "explicit",
11 | "source.fixAll": "explicit"
12 | },
13 | "python.linting.ignorePatterns": [
14 | ".vscode/*.py",
15 | "**/site-packages/**/*.py",
16 | "venv/**/*.py"
17 | ],
18 | "python.linting.mypyArgs": [
19 | "--config-file=pyproject.toml"
20 | ],
21 | "[python]": {
22 | "editor.rulers": [100],
23 | "editor.tabSize": 4,
24 | "editor.insertSpaces": true,
25 | "editor.detectIndentation": false
26 | },
27 | "files.exclude": {
28 | "**/__pycache__": true,
29 | "**/.mypy_cache": true,
30 | "**/.pytest_cache": true,
31 | "**/.ruff_cache": true,
32 | "**/*.egg-info": true
33 | },
34 | "python.analysis.typeCheckingMode": "strict",
35 | "python.analysis.extraPaths": ["src"]
36 | }
37 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.4.0
4 | hooks:
5 | - id: trailing-whitespace
6 | - id: end-of-file-fixer
7 | - id: check-yaml
8 | - id: check-added-large-files
9 | - id: check-json
10 | - id: check-toml
11 | - id: detect-private-key
12 |
13 | - repo: local
14 | hooks:
15 | - id: black
16 | name: black
17 | entry: make format-check-files
18 | language: system
19 | types: [python]
20 | pass_filenames: true
21 |
22 | - id: isort
23 | name: isort
24 | entry: make isort-check-files
25 | language: system
26 | types: [python]
27 | pass_filenames: true
28 |
29 | - id: ruff
30 | name: ruff
31 | entry: make lint-files
32 | language: system
33 | types: [python]
34 | pass_filenames: true
35 |
36 | - id: mypy
37 | name: mypy
38 | entry: make type-check-files
39 | language: system
40 | types: [python]
41 | pass_filenames: true
42 |
43 | - id: bandit
44 | name: bandit
45 | entry: make security-check-files
46 | language: system
47 | types: [python]
48 | exclude: ^src/tests/
49 | pass_filenames: true
50 |
--------------------------------------------------------------------------------
/assets/sample-trend-and-report-input-data/lucene_query.json:
--------------------------------------------------------------------------------
1 | {
2 | "query": {
3 | "bool": {
4 | "should": [
5 | {
6 | "match": {
7 | "message": "error"
8 | }
9 | },
10 | {
11 | "match": {
12 | "message": "failure"
13 | }
14 | },
15 | {
16 | "match": {
17 | "message": "exception"
18 | }
19 | },
20 | {
21 | "match": {
22 | "message": "warning"
23 | }
24 | },
25 | {
26 | "match": {
27 | "message": "critical"
28 | }
29 | }
30 | ],
31 | "must_not": [
32 | {
33 | "match": {
34 | "message": "User Deprecated"
35 | }
36 | },
37 | {
38 | "match": {
39 | "message": "debug"
40 | }
41 | }
42 | ],
43 | "minimum_should_match": 1
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - 'v*.*.*'
7 |
8 | permissions:
9 | contents: write # Needed for creating releases and uploading assets
10 |
11 | jobs:
12 | release:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v3
16 | with:
17 | fetch-depth: 0
18 |
19 | - name: Set up Python
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: '3.10'
23 | cache: 'pip'
24 |
25 | - name: Install dependencies
26 | run: |
27 | python -m pip install --upgrade pip
28 | pip install build twine wheel
29 | pip install -e ".[dev]"
30 |
31 | - name: Run tests
32 | run: make test-coverage
33 |
34 | - name: Build package
35 | run: python -m build
36 |
37 | - name: Get version from tag
38 | id: get_version
39 | run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
40 |
41 | - name: Create Release
42 | uses: softprops/action-gh-release@v1
43 | with:
44 | name: Release v${{ steps.get_version.outputs.VERSION }}
45 | draft: false
46 | prerelease: false
47 | generate_release_notes: true
48 | files: |
49 | dist/*.whl
50 | dist/*.tar.gz
51 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
19 |
20 |
21 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/docs/DEVELOPMENT.md:
--------------------------------------------------------------------------------
1 | # Development
2 |
3 | ## Development Setup
4 |
5 | 1. **Clone the repository:**
6 | ```bash
7 | git clone https://github.com/dx-tooling/platform-problem-monitoring-core.git
8 | cd platform-problem-monitoring-core
9 | ```
10 |
11 | 2. **Install development dependencies:**
12 | ```bash
13 | make install
14 | ```
15 | This creates a virtual environment, installs the package and all development dependencies, and sets up pre-commit hooks.
16 |
17 | 3. **Activate the virtual environment:**
18 | ```bash
19 | source venv/bin/activate # On Windows: venv\Scripts\activate
20 | ```
21 |
22 | ## Code Quality Tools
23 |
24 | This project uses a unified approach to code quality with all tools configured in `pyproject.toml` and executed via:
25 |
26 | 1. **Pre-commit hooks** - Run automatically before each commit
27 | 2. **Make commands** - Run manually or in CI
28 |
29 | Available make commands:
30 |
31 | ```bash
32 | make install Install package and development dependencies
33 | make activate-venv Instructions to activate the virtual environment
34 | make format Format code with black and isort
35 | make format-check Check if code is properly formatted without modifying files
36 | make lint Run linters (ruff)
37 | make lint-fix Run linters and auto-fix issues where possible
38 | make type-check Run mypy type checking
39 | make security-check Run bandit security checks
40 | make quality Run all code quality checks (with formatting)
41 | make ci-quality Run all code quality checks (without modifying files)
42 | make test Run tests
43 | make test-verbose Run tests with verbose output
44 | make test-coverage Run tests with coverage report
45 | make test-file Run tests for a specific file (usage: make test-file file=path/to/test_file.py)
46 | make update-deps Update all dependencies to their latest semver-compatible versions
47 | make bump-version Update the version number in pyproject.toml
48 | make release Create a new release tag (after running quality checks and tests)
49 | make clean Remove build artifacts and cache directories
50 | ```
51 |
52 | The pre-commit hooks are configured to use the same Makefile targets, ensuring consistency between local development and CI environments.
53 |
--------------------------------------------------------------------------------
/src/platform_problem_monitoring_core/step1_prepare.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Prepare environment for a process run."""
3 |
4 | import argparse
5 | import os
6 | import sys
7 | import tempfile
8 | from pathlib import Path
9 |
10 | from platform_problem_monitoring_core.utils import ensure_dir_exists, logger
11 |
12 |
13 | def prepare_environment() -> str:
14 | """
15 | Prepare environment for a process run.
16 |
17 | Creates a temporary work directory for storing intermediate files.
18 |
19 | Returns:
20 | Path to the temporary work folder
21 |
22 | Raises:
23 | PermissionError: If unable to create or write to the temporary directory
24 | OSError: If any other OS-level error occurs
25 | """
26 | logger.info("Preparing environment for process run")
27 |
28 | try:
29 | # Create temporary work directory
30 | work_dir = tempfile.mkdtemp(prefix="platform_problem_monitoring_")
31 | logger.info(f"Created temporary work directory: {work_dir}")
32 |
33 | # Check if directory exists and is writable
34 | work_path = Path(work_dir)
35 | if not work_path.exists():
36 | error_msg = f"Failed to create temporary directory: {work_dir}"
37 | raise FileNotFoundError(error_msg)
38 |
39 | if not os.access(work_dir, os.W_OK):
40 | error_msg = f"No write access to temporary directory: {work_dir}"
41 | raise PermissionError(error_msg)
42 |
43 | # Create any additional subdirectories if needed
44 | # This isn't strictly necessary but helps demonstrate the directory is writable
45 | test_subdir = work_path / "test"
46 | ensure_dir_exists(str(test_subdir))
47 | test_subdir.rmdir() # Clean up the test directory
48 |
49 | logger.info("Environment preparation complete")
50 | return work_dir
51 | except (OSError, PermissionError) as e:
52 | logger.error(f"Failed to prepare environment: {str(e)}")
53 | raise
54 |
55 |
56 | def main() -> None:
57 | """Execute the script when run directly."""
58 | parser = argparse.ArgumentParser(description="Prepare environment for a process run")
59 | # Parse arguments but don't assign to a variable since we don't use them
60 | parser.parse_args()
61 |
62 | try:
63 | work_dir = prepare_environment()
64 | # Print the work directory path for the next step to use
65 | print(work_dir)
66 | sys.exit(0)
67 | except Exception as e:
68 | logger.error(f"Error preparing environment: {str(e)}")
69 | sys.exit(1)
70 |
71 |
72 | if __name__ == "__main__":
73 | main()
74 |
--------------------------------------------------------------------------------
/assets/sample-trend-and-report-input-data/email_body.txt:
--------------------------------------------------------------------------------
1 |
2 | PLATFORM PROBLEM MONITORING REPORT
3 | =================================
4 | Generated: 2025-03-09 15:59:20 UTC
5 |
6 | SUMMARY
7 | -------
8 | Current problem patterns: 0
9 | Previous problem patterns: 0
10 | New problem patterns: 3
11 | Disappeared problem patterns: 5
12 |
13 | NEW PROBLEM PATTERNS
14 | ===================
15 | These patterns have appeared since the last report.
16 |
17 | 1. [32] SSL certificate for <*> is expiring in <*> days
18 | Sample documents:
19 |
20 | 2. [21] Disk usage warning: <*> is at <*>% capacity
21 | Sample documents:
22 |
23 | 3. [18] Connection reset by peer while sending request to <*>
24 | Sample documents:
25 |
26 |
27 |
28 | DISAPPEARED PROBLEM PATTERNS
29 | ==========================
30 | These patterns were present in the previous report but are no longer occurring.
31 |
32 | 1. [245] Error connecting to database at <*>: Connection timed out
33 | Sample documents:
34 |
35 | 2. [124] Exception in thread "main" java.lang.OutOfMemoryError: <*>
36 | Sample documents:
37 |
38 | 3. [89] Kubernetes pod <*> in namespace <*> failed health check
39 | Sample documents:
40 |
41 | 4. [54] Failed to process message from queue <*>: <*>
42 | Sample documents:
43 |
44 | 5. [42] Cache invalidation failed for key <*>
45 | Sample documents:
46 |
47 |
48 |
49 | INCREASED PROBLEM PATTERNS
50 | ========================
51 | These patterns have increased in occurrence count since the last report.
52 |
53 | 1. [14] (+0, +0.0%) Failed to process job <*> - timeout after <*> seconds
54 | Sample documents:
55 |
56 |
57 |
58 | DECREASED PROBLEM PATTERNS
59 | ========================
60 | These patterns have decreased in occurrence count since the last report.
61 |
62 | 1. [72] (-0, -0.0%) Failed to authenticate user <*> - invalid credentials
63 | Sample documents:
64 |
65 | 2. [58] (-0, -0.0%) API rate limit exceeded for user ID <*>
66 | Sample documents:
67 |
68 | 3. [12] (-0, -0.0%) HTTP request failed: <*> <*> returned status code <*>
69 | Sample documents:
70 |
71 |
72 |
73 | TOP 25 CURRENT PROBLEM PATTERNS
74 | ==============================
75 | The most frequent problem patterns in the current report.
76 |
77 | 1. [72] Failed to authenticate user <*> - invalid credentials
78 | Sample documents:
79 |
80 | 2. [58] API rate limit exceeded for user ID <*>
81 | Sample documents:
82 |
83 | 3. [32] SSL certificate for <*> is expiring in <*> days
84 | Sample documents:
85 |
86 | 4. [21] Disk usage warning: <*> is at <*>% capacity
87 | Sample documents:
88 |
89 | 5. [18] Connection reset by peer while sending request to <*>
90 | Sample documents:
91 |
92 | 6. [14] Failed to process job <*> - timeout after <*> seconds
93 | Sample documents:
94 |
95 | 7. [12] AWS S3 access denied: <*>
96 | Sample documents:
97 |
98 |
99 |
100 | This is an automated report from the Platform Problem Monitoring system.
101 |
--------------------------------------------------------------------------------
/docs/QUALITY.md:
--------------------------------------------------------------------------------
1 | # Code Quality Guidelines
2 |
3 | This document describes the code quality tools and practices used in this project.
4 |
5 | ## Code Quality Tools
6 |
7 | We use the following tools to maintain high code quality:
8 |
9 | ### Ruff
10 |
11 | [Ruff](https://github.com/charliermarsh/ruff) is an extremely fast Python linter, written in Rust. It includes many checks from tools like flake8, isort, pycodestyle, and many plugins.
12 |
13 | ```bash
14 | # Run Ruff
15 | make lint
16 | ```
17 |
18 | ### Black
19 |
20 | [Black](https://github.com/psf/black) is an uncompromising code formatter for Python. It applies a consistent style by reformatting your code.
21 |
22 | ```bash
23 | # Format code with Black
24 | make format
25 | ```
26 |
27 | ### isort
28 |
29 | [isort](https://github.com/PyCQA/isort) sorts your imports alphabetically, and automatically separated into sections and by type.
30 |
31 | ```bash
32 | # Run isort (included in format command)
33 | make format
34 | ```
35 |
36 | ### mypy
37 |
38 | [mypy](https://github.com/python/mypy) is an optional static type checker for Python. It helps catch common errors before runtime.
39 |
40 | ```bash
41 | # Run mypy
42 | make type-check
43 | ```
44 |
45 | ### Bandit
46 |
47 | [Bandit](https://github.com/PyCQA/bandit) is a tool designed to find common security issues in Python code.
48 |
49 | ```bash
50 | # Run security checks
51 | make security-check
52 | ```
53 |
54 | ### pre-commit
55 |
56 | [pre-commit](https://pre-commit.com/) runs these checks automatically before each commit, ensuring that only quality code enters the repository.
57 |
58 | ```bash
59 | # Install pre-commit hooks
60 | pre-commit install
61 | ```
62 |
63 | ## Running All Checks
64 |
65 | You can run all quality checks at once:
66 |
67 | ```bash
68 | make quality
69 | ```
70 |
71 | ## VS Code Integration
72 |
73 | This project includes VS Code settings that integrate all these tools into your editor. With the proper extensions installed, you'll get:
74 |
75 | - Real-time type checking
76 | - Automatic formatting on save
77 | - Inline error highlighting
78 | - Code actions to fix issues
79 |
80 | ## Recommended VS Code Extensions
81 |
82 | - Python (Microsoft)
83 | - Pylance (Microsoft)
84 | - Ruff (Astral Software)
85 | - Even Better TOML (tamasfe)
86 | - YAML (Red Hat)
87 |
88 | ## Code Style Guidelines
89 |
90 | 1. **Type Annotations**: All functions should have complete type annotations.
91 | 2. **Docstrings**: All public methods and functions should have Google-style docstrings.
92 | 3. **Line Length**: Maximum line length is 100 characters.
93 | 4. **Imports**: Imports should be sorted by isort with the Black profile.
94 | 5. **Naming**: Follow PEP8 naming conventions:
95 | - Classes: `PascalCase`
96 | - Functions, methods, variables: `snake_case`
97 | - Constants: `UPPER_SNAKE_CASE`
98 | - Private members: start with underscore `_private_method()`
99 |
100 | ## Continuous Integration
101 |
102 | These quality checks are also run in CI to ensure that all code entering the main branch maintains the expected level of quality.
103 |
--------------------------------------------------------------------------------
/assets/sample-trend-and-report-input-data/hourly_problem_numbers.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "start_time": "2025-03-07T00:00:00Z",
4 | "end_time": "2025-03-07T01:00:00Z",
5 | "count": 352
6 | },
7 | {
8 | "start_time": "2025-03-07T01:00:00Z",
9 | "end_time": "2025-03-07T02:00:00Z",
10 | "count": 378
11 | },
12 | {
13 | "start_time": "2025-03-07T02:00:00Z",
14 | "end_time": "2025-03-07T03:00:00Z",
15 | "count": 365
16 | },
17 | {
18 | "start_time": "2025-03-07T03:00:00Z",
19 | "end_time": "2025-03-07T04:00:00Z",
20 | "count": 342
21 | },
22 | {
23 | "start_time": "2025-03-07T04:00:00Z",
24 | "end_time": "2025-03-07T05:00:00Z",
25 | "count": 320
26 | },
27 | {
28 | "start_time": "2025-03-07T05:00:00Z",
29 | "end_time": "2025-03-07T06:00:00Z",
30 | "count": 298
31 | },
32 | {
33 | "start_time": "2025-03-07T06:00:00Z",
34 | "end_time": "2025-03-07T07:00:00Z",
35 | "count": 274
36 | },
37 | {
38 | "start_time": "2025-03-07T07:00:00Z",
39 | "end_time": "2025-03-07T08:00:00Z",
40 | "count": 286
41 | },
42 | {
43 | "start_time": "2025-03-07T08:00:00Z",
44 | "end_time": "2025-03-07T09:00:00Z",
45 | "count": 310
46 | },
47 | {
48 | "start_time": "2025-03-07T09:00:00Z",
49 | "end_time": "2025-03-07T10:00:00Z",
50 | "count": 267
51 | },
52 | {
53 | "start_time": "2025-03-07T10:00:00Z",
54 | "end_time": "2025-03-07T11:00:00Z",
55 | "count": 243
56 | },
57 | {
58 | "start_time": "2025-03-07T11:00:00Z",
59 | "end_time": "2025-03-07T12:00:00Z",
60 | "count": 218
61 | },
62 | {
63 | "start_time": "2025-03-07T12:00:00Z",
64 | "end_time": "2025-03-07T13:00:00Z",
65 | "count": 203
66 | },
67 | {
68 | "start_time": "2025-03-07T13:00:00Z",
69 | "end_time": "2025-03-07T14:00:00Z",
70 | "count": 185
71 | },
72 | {
73 | "start_time": "2025-03-07T14:00:00Z",
74 | "end_time": "2025-03-07T15:00:00Z",
75 | "count": 176
76 | },
77 | {
78 | "start_time": "2025-03-07T15:00:00Z",
79 | "end_time": "2025-03-07T16:00:00Z",
80 | "count": 162
81 | },
82 | {
83 | "start_time": "2025-03-07T16:00:00Z",
84 | "end_time": "2025-03-07T17:00:00Z",
85 | "count": 143
86 | },
87 | {
88 | "start_time": "2025-03-07T17:00:00Z",
89 | "end_time": "2025-03-07T18:00:00Z",
90 | "count": 132
91 | },
92 | {
93 | "start_time": "2025-03-07T18:00:00Z",
94 | "end_time": "2025-03-07T19:00:00Z",
95 | "count": 124
96 | },
97 | {
98 | "start_time": "2025-03-07T19:00:00Z",
99 | "end_time": "2025-03-07T20:00:00Z",
100 | "count": 115
101 | },
102 | {
103 | "start_time": "2025-03-07T20:00:00Z",
104 | "end_time": "2025-03-07T21:00:00Z",
105 | "count": 108
106 | },
107 | {
108 | "start_time": "2025-03-07T21:00:00Z",
109 | "end_time": "2025-03-07T22:00:00Z",
110 | "count": 93
111 | },
112 | {
113 | "start_time": "2025-03-07T22:00:00Z",
114 | "end_time": "2025-03-07T23:00:00Z",
115 | "count": 84
116 | },
117 | {
118 | "start_time": "2025-03-07T23:00:00Z",
119 | "end_time": "2025-03-08T00:00:00Z",
120 | "count": 72
121 | }
122 | ]
123 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "platform_problem_monitoring_core"
7 | version = "0.1.11"
8 | description = "A tool for monitoring platform problems using Elasticsearch logs"
9 | authors = [
10 | {name = "Platform Team"}
11 | ]
12 | readme = "README.md"
13 | requires-python = ">=3.10"
14 | license = {text = "Proprietary"}
15 | dependencies = [
16 | "boto3>=1.28.0",
17 | "drain3>=0.9.6",
18 | "jinja2>=3.0.0",
19 | "argparse>=1.4.0",
20 | "requests>=2.25.0",
21 | "matplotlib>=3.7.0",
22 | "seaborn>=0.12.0",
23 | "typing-extensions>=4.0.0",
24 | ]
25 |
26 | [project.optional-dependencies]
27 | dev = [
28 | "pytest>=7.0.0",
29 | "pytest-cov>=4.1.0",
30 | "black>=23.0.0",
31 | "flake8>=6.0.0",
32 | "mypy>=1.0.0",
33 | "isort>=5.12.0",
34 | "ruff>=0.1.9",
35 | "pre-commit>=3.3.2",
36 | "bandit>=1.7.5",
37 | "types-requests>=2.0.0",
38 | "types-boto3>=1.0.0",
39 | "types-seaborn>=0.12.0",
40 | ]
41 |
42 | [tool.setuptools]
43 | packages = ["platform_problem_monitoring_core"]
44 | package-dir = {"" = "src"}
45 |
46 | # Add package data to include resources in the wheel
47 | [tool.setuptools.package-data]
48 | platform_problem_monitoring_core = ["resources/*.html"]
49 |
50 | # Add data files to include configuration templates
51 | [tool.setuptools.data-files]
52 | "etc/platform_problem_monitoring_core" = ["etc/*.dist"]
53 | "bin" = ["bin/*"]
54 |
55 | [tool.black]
56 | line-length = 120
57 | target-version = ["py310", "py311", "py312", "py313"]
58 |
59 | [tool.mypy]
60 | python_version = "3.13"
61 | warn_return_any = true
62 | warn_unused_configs = true
63 | disallow_untyped_defs = true
64 | disallow_incomplete_defs = true
65 | check_untyped_defs = true
66 | disallow_untyped_decorators = true
67 | no_implicit_optional = true
68 | strict_optional = true
69 | warn_redundant_casts = true
70 | warn_unused_ignores = true
71 | warn_no_return = true
72 | warn_unreachable = true
73 |
74 | [[tool.mypy.overrides]]
75 | module = "drain3.*"
76 | ignore_missing_imports = true
77 |
78 | [tool.isort]
79 | profile = "black"
80 | line_length = 120
81 | multi_line_output = 3
82 |
83 | [tool.pytest]
84 | testpaths = ["src/tests"]
85 | python_files = "test_*.py"
86 | python_classes = "Test*"
87 | python_functions = "test_*"
88 | filterwarnings = [
89 | "ignore::DeprecationWarning",
90 | "ignore::PendingDeprecationWarning",
91 | ]
92 |
93 | [tool.pytest.ini_options]
94 | minversion = "7.0"
95 | addopts = "--strict-markers"
96 | markers = [
97 | "slow: marks tests as slow (deselect with '-m \"not slow\"')",
98 | "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
99 | ]
100 |
101 | [tool.coverage.run]
102 | source = ["platform_problem_monitoring_core"]
103 | omit = ["*/tests/*", "*/venv/*"]
104 |
105 | [tool.coverage.report]
106 | exclude_lines = [
107 | "pragma: no cover",
108 | "def __repr__",
109 | "raise NotImplementedError",
110 | "if __name__ == .__main__.:",
111 | "pass",
112 | "raise ImportError",
113 | ]
114 |
115 | [tool.ruff]
116 | # General configuration
117 | line-length = 120
118 | target-version = "py313"
119 |
120 | [tool.ruff.lint]
121 | # Enable pycodestyle (E), Pyflakes (F), McCabe complexity (C90), isort (I),
122 | # pep8-naming (N), flake8-builtins (A), flake8-bugbear (B), flake8-comprehensions (C4),
123 | # flake8-docstrings (D), flake8-errmsg (EM), flake8-logging-format (G), flake8-simplify (SIM),
124 | # flake8-unused-arguments (ARG), flake8-pytest-style (PT), flake8-use-pathlib (PTH)
125 | select = ["E", "F", "C90", "I", "N", "A", "B", "C4", "D", "EM", "G", "SIM", "ARG", "PT", "PTH"]
126 | ignore = ["D203", "D212"]
127 |
128 | [tool.ruff.lint.pydocstyle]
129 | convention = "google"
130 |
131 | [tool.bandit]
132 | exclude_dirs = ["venv"]
133 | skips = ["B101"] # Skip assert warning as we use it in tests
134 |
--------------------------------------------------------------------------------
/docs/JETBRAINS_SETUP.md:
--------------------------------------------------------------------------------
1 | # JetBrains IDE Setup Guide
2 |
3 | This guide explains how to set up and use JetBrains IDEs (PyCharm, IntelliJ IDEA, etc.) with this project, particularly focusing on the code quality tools.
4 |
5 | ## Initial Setup
6 |
7 | 1. Open the project in your JetBrains IDE
8 | 2. Ensure you've installed the project dependencies:
9 | ```bash
10 | make install
11 | ```
12 | 3. The IDE should automatically detect the project structure and Python interpreter from the `.idea` directory settings
13 |
14 | ## Python SDK Setup
15 |
16 | If the Python interpreter isn't automatically detected:
17 |
18 | 1. Go to `File > Project Structure`
19 | 2. Under Project Settings > Project, select the Python interpreter from your virtual environment
20 | 3. Make sure it's pointing to the `venv/bin/python` interpreter in your project directory
21 |
22 | ## Run Configurations
23 |
24 | We've included several predefined run configurations to help you verify code quality:
25 |
26 | - **Black Format**: Formats your code according to Black style
27 | - **Ruff Lint**: Runs the Ruff linter to check for code issues
28 | - **Ruff Lint Fix**: Runs the Ruff linter and automatically fixes issues where possible
29 | - **Mypy Type Check**: Verifies type annotations
30 | - **All Quality Checks**: Runs all quality checks at once
31 | - **Make Lint Fix**: Runs make lint-fix to automatically fix linting issues
32 |
33 | To run any of these:
34 |
35 | 1. Click on the run configuration dropdown in the top-right toolbar
36 | 2. Select the desired configuration
37 | 3. Click the run button (green triangle)
38 |
39 | ## Code Inspection
40 |
41 | We've configured the IDE's inspection profiles to match our quality standards:
42 |
43 | 1. Type checking is enabled with strict mode
44 | 2. PEP 8 style checking is enabled
45 | 3. Python version compatibility checks are enabled
46 |
47 | ## External Tools Integration
48 |
49 | ### Black
50 |
51 | Black auto-formatting is enabled in the editor:
52 |
53 | 1. The code will be auto-formatted on save
54 | 2. You can also press `Ctrl+Alt+L` (or `Cmd+Alt+L` on macOS) to format the current file
55 |
56 | ### Ruff
57 |
58 | Ruff can both check for issues and fix them:
59 |
60 | 1. Run "Ruff Lint" to check for issues
61 | 2. Run "Ruff Lint Fix" to automatically fix issues where possible
62 | 3. From the terminal: `make lint` to check, `make lint-fix` to check and fix
63 |
64 | ### Keyboard Shortcuts
65 |
66 | - **Reformat Code**: `Ctrl+Alt+L` (Windows/Linux) or `Cmd+Alt+L` (macOS)
67 | - **Run Current Configuration**: `Shift+F10` (Windows/Linux) or `Ctrl+R` (macOS)
68 | - **Debug Current Configuration**: `Shift+F9` (Windows/Linux) or `Ctrl+D` (macOS)
69 |
70 | ## Using the Terminal Tool Window
71 |
72 | You can also run the Makefile commands directly from the Terminal tool window:
73 |
74 | 1. Open the Terminal tool window (`Alt+F12` or `View > Tool Windows > Terminal`)
75 | 2. Run commands like:
76 | ```bash
77 | make quality
78 | make lint
79 | make lint-fix
80 | make format
81 | ```
82 |
83 | ## Code Commits
84 |
85 | When committing code, the pre-commit hooks will run automatically if you've installed them with:
86 |
87 | ```bash
88 | pre-commit install
89 | ```
90 |
91 | This helps catch issues before they're committed to the repository.
92 |
93 | ## Best Practices
94 |
95 | 1. **Enable Auto Import**: Under Settings > Editor > General > Auto Import, enable "Add unambiguous imports on the fly"
96 | 2. **Use Type Hints**: The IDE will show type hint errors as you type
97 | 3. **Run Type Checking Often**: Use the Mypy run configuration frequently to catch type issues
98 | 4. **Fix Linting Issues Automatically**: Use `make lint-fix` to automatically fix many common issues
99 |
100 | ## Troubleshooting
101 |
102 | If you experience issues with the IDE:
103 |
104 | 1. **Invalidate Caches**: Try `File > Invalidate Caches and Restart`
105 | 2. **Sync Project with pyproject.toml**: Ensure the IDE settings match the `pyproject.toml` settings
106 | 3. **Check the Terminal**: Run commands directly in the terminal to see if errors are IDE-specific
107 |
--------------------------------------------------------------------------------
/src/platform_problem_monitoring_core/step6_extract_fields.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Extract relevant fields from logstash documents."""
3 |
4 | import argparse
5 | import json
6 | import sys
7 | from pathlib import Path
8 |
9 | from platform_problem_monitoring_core.utils import load_json, logger
10 |
11 |
12 | def extract_fields(logstash_file: str, output_file: str) -> None:
13 | """
14 | Extract relevant fields from logstash documents.
15 |
16 | Args:
17 | logstash_file: Path to the logstash documents file
18 | output_file: Path to store the extracted fields
19 |
20 | Raises:
21 | FileNotFoundError: If the logstash file doesn't exist
22 | json.JSONDecodeError: If the file contains invalid JSON
23 | OSError: If the output cannot be written
24 | """
25 | logger.info("Extracting fields from logstash documents")
26 | logger.info(f"Logstash file: {logstash_file}")
27 | logger.info(f"Output file: {output_file}")
28 |
29 | # Load logstash documents
30 | documents = load_json(logstash_file)
31 | logger.info(f"Loaded {len(documents)} logstash documents")
32 |
33 | # Ensure the output directory exists
34 | output_path = Path(output_file)
35 | output_path.parent.mkdir(parents=True, exist_ok=True)
36 |
37 | # Process the documents and write them to the output file
38 | processed_count = 0
39 | skipped_count = 0
40 |
41 | try:
42 | # Open output file for writing
43 | with Path(output_file).open("w") as f:
44 | # Process each document
45 | for doc in documents:
46 | try:
47 | # Extract required fields: index name, document id, and message
48 | index_name = doc.get("_index", "unknown")
49 | doc_id = doc.get("_id", "unknown")
50 |
51 | # Extract message from _source
52 | source = doc.get("_source", {})
53 | message = source.get("message", "")
54 |
55 | if not message:
56 | skipped_count += 1
57 | continue
58 |
59 | # Write extracted fields to output file as JSON
60 | extracted = {"index": index_name, "id": doc_id, "message": message}
61 | f.write(json.dumps(extracted) + "\n")
62 | processed_count += 1
63 |
64 | # Log progress for large document sets
65 | if processed_count % 10000 == 0:
66 | logger.info(f"Processed {processed_count} documents so far")
67 | except (KeyError, TypeError) as e:
68 | skipped_count += 1
69 | logger.warning(f"Error processing document: {e}")
70 | continue
71 | except Exception as e:
72 | logger.warning(f"Unexpected error processing document: {e}")
73 | skipped_count += 1
74 | continue
75 |
76 | logger.info(f"Extracted fields from {processed_count} documents")
77 | if skipped_count > 0:
78 | logger.warning(f"Skipped {skipped_count} documents due to errors or missing fields")
79 | except OSError as e:
80 | logger.error(f"Error writing to output file: {e}")
81 | error_msg = f"Failed to write to output file {output_file}: {e}"
82 | raise OSError(error_msg) from e
83 |
84 | logger.info("Field extraction completed")
85 |
86 |
87 | def main() -> None:
88 | """Parse command line arguments and extract fields from logstash documents."""
89 | parser = argparse.ArgumentParser(description="Extract fields from logstash documents")
90 | parser.add_argument("--logstash-file", required=True, help="Path to the logstash documents file")
91 | parser.add_argument("--output-file", required=True, help="Path to store the extracted fields")
92 |
93 | args = parser.parse_args()
94 |
95 | try:
96 | extract_fields(args.logstash_file, args.output_file)
97 | sys.exit(0)
98 | except Exception as e:
99 | logger.error(f"Error extracting fields: {e}")
100 | sys.exit(1)
101 |
102 |
103 | if __name__ == "__main__":
104 | main()
105 |
--------------------------------------------------------------------------------
/src/platform_problem_monitoring_core/step12_cleanup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Clean up work environment."""
3 |
4 | import argparse
5 | import logging
6 | import os
7 | import shutil
8 | import sys
9 | from pathlib import Path
10 | from typing import List
11 |
12 | from platform_problem_monitoring_core.utils import logger
13 |
14 |
15 | def _verify_safe_path(work_dir: Path) -> None:
16 | """
17 | Verify that the path is safe to remove.
18 |
19 | Args:
20 | work_dir: Path to verify
21 |
22 | Raises:
23 | ValueError: If the path is not a directory, doesn't exist, or doesn't look like a temporary work directory
24 | """
25 | # Check if the directory exists
26 | if not work_dir.exists():
27 | error_msg = f"Directory does not exist: {work_dir}"
28 | raise ValueError(error_msg)
29 |
30 | # Check if the path is a directory
31 | if not work_dir.is_dir():
32 | error_msg = f"Path is not a directory: {work_dir}"
33 | raise ValueError(error_msg)
34 |
35 | # Verify that the directory looks like a temporary work directory
36 | # This is a safety check to avoid accidentally deleting important directories
37 | if not work_dir.name.startswith("platform_problem_monitoring_"):
38 | error_msg = f"Directory does not appear to be a temporary work directory: {work_dir}"
39 | raise ValueError(error_msg)
40 |
41 |
42 | def _list_remaining_files(work_dir: Path) -> List[str]:
43 | """
44 | List files remaining in the directory.
45 |
46 | Args:
47 | work_dir: Path to the directory
48 |
49 | Returns:
50 | List of files found in the directory
51 | """
52 | files = []
53 | try:
54 | for root, _dirs, filenames in os.walk(work_dir):
55 | for filename in filenames:
56 | file_path = Path(root) / filename
57 | files.append(str(file_path.relative_to(work_dir)))
58 | return files
59 | except (OSError, ValueError) as e:
60 | logger.warning(f"Error listing files in {work_dir}: {e}")
61 | return []
62 |
63 |
64 | def cleanup_environment(work_dir: str) -> None:
65 | """
66 | Clean up the work environment by removing the temporary work directory.
67 |
68 | Args:
69 | work_dir: Path to the temporary work folder to remove
70 |
71 | Raises:
72 | ValueError: If the path is not suitable for removal
73 | OSError: If there's an error removing the directory
74 | """
75 | logger.info("Cleaning up work environment")
76 | logger.info(f"Removing temporary work directory: {work_dir}")
77 |
78 | # Convert to Path object
79 | work_path = Path(work_dir)
80 |
81 | try:
82 | # Check if the path is safe to remove
83 | _verify_safe_path(work_path)
84 |
85 | # Optional: List files before deletion (for debugging if needed)
86 | if logger.isEnabledFor(logging.DEBUG):
87 | files = _list_remaining_files(work_path)
88 | if files:
89 | logger.debug(f"Files to be removed: {', '.join(files)}")
90 |
91 | # Remove the directory and all its contents
92 | shutil.rmtree(work_dir)
93 | logger.info(f"Successfully removed directory: {work_dir}")
94 |
95 | except ValueError as e:
96 | # Non-fatal errors (directory doesn't exist or isn't a temp directory)
97 | logger.warning(f"Skipping cleanup: {str(e)}")
98 | except OSError as e:
99 | error_msg = f"Error removing directory {work_dir}: {str(e)}"
100 | logger.error(error_msg)
101 | raise OSError(error_msg) from e
102 |
103 | logger.info("Cleanup complete")
104 |
105 |
106 | def main() -> None:
107 | """Execute the script when run directly."""
108 | parser = argparse.ArgumentParser(description="Clean up work environment")
109 | parser.add_argument("--work-dir", required=True, help="Path to the temporary work folder to remove")
110 |
111 | args = parser.parse_args()
112 |
113 | try:
114 | cleanup_environment(args.work_dir)
115 | sys.exit(0)
116 | except Exception as e:
117 | logger.error(f"Error cleaning up environment: {str(e)}")
118 | sys.exit(1)
119 |
120 |
121 | if __name__ == "__main__":
122 | main()
123 |
--------------------------------------------------------------------------------
/docs/RELEASE_MANAGEMENT.md:
--------------------------------------------------------------------------------
1 | # Platform Problem Monitoring Core - Release Management
2 |
3 | This document outlines the release process for the Platform Problem Monitoring Core package, including version management, artifact creation, and publishing.
4 |
5 | ## Release Artifacts
6 |
7 | Each release includes the following artifacts:
8 |
9 | - **Source Distribution (.tar.gz)** - Contains the raw source code of the package
10 | - **Wheel Distribution (.whl)** - A pre-built package that's ready to install
11 |
12 | ## Release Workflow
13 |
14 | ### Automated GitHub Actions Workflow
15 |
16 | The release process uses a GitHub Actions workflow (`.github/workflows/release.yml`) that:
17 |
18 | 1. Builds Python packages (wheel and source distribution)
19 | 2. Creates a configuration templates archive
20 | 3. Creates a GitHub Release with auto-generated release notes
21 | 4. Attaches all artifacts to the release
22 |
23 | The workflow is triggered whenever a tag with the format `v*.*.*` is pushed to the repository.
24 |
25 | ## Step-by-Step Release Process
26 |
27 | ### 1. Prepare for Release
28 |
29 | Ensure all changes are committed, CI passes, and the code is ready for release:
30 |
31 | ```bash
32 | # Pull latest changes
33 | git checkout main
34 | git pull origin main
35 |
36 | # Run quality checks and tests
37 | make ci-quality
38 | make test-coverage
39 | ```
40 |
41 | ### 2. Update Version Number
42 |
43 | Update the version in `pyproject.toml`:
44 |
45 | ```bash
46 | # Option 1: Manual edit
47 | # Edit pyproject.toml and change version = "x.y.z"
48 |
49 | # Option 2: Using make command
50 | make bump-version
51 | ```
52 |
53 | The `bump-version` make command will:
54 | 1. Show current version
55 | 2. Prompt for new version
56 | 3. Update `pyproject.toml`
57 |
58 | ### 3. Commit Version Change
59 |
60 | ```bash
61 | git add pyproject.toml
62 | git commit -m "Bump version to x.y.z"
63 | git push origin main
64 | ```
65 |
66 | ### 4. Create Release Tag
67 |
68 | ```bash
69 | # Option 1: Manual tagging
70 | git tag -a "vx.y.z" -m "Release vx.y.z"
71 |
72 | # Option 2: Using make command
73 | make release
74 | ```
75 |
76 | The `release` make command will:
77 | 1. Run quality checks and tests
78 | 2. Create a new annotated git tag based on the version in pyproject.toml
79 |
80 | ### 5. Push Tag to Trigger Release
81 |
82 | ```bash
83 | git push origin vx.y.z
84 | ```
85 |
86 | This will trigger the GitHub Actions release workflow.
87 |
88 | ### 6. Verify Release
89 |
90 | 1. Go to the GitHub repository's Actions tab
91 | 2. Check that the release workflow completed successfully
92 | 3. Go to the Releases page to verify that the release was created with all artifacts
93 |
94 | ## Installation from Release Artifacts
95 |
96 | The released package can be installed in two ways:
97 |
98 | ### 1. Using pip directly from GitHub (for applications)
99 |
100 | ```bash
101 | pip install https://github.com/dx-tooling/platform-problem-monitoring-core/releases/download/vX.Y.Z/platform_problem_monitoring_core-X.Y.Z-py3-none-any.whl
102 | ```
103 |
104 | ### 2. For development or customization
105 |
106 | 1. Download both the wheel file and `additional_assets.zip` from the releases page
107 | 2. Extract the configuration templates
108 | 3. Follow the setup instructions in the README
109 |
110 | ## Versioning Scheme
111 |
112 | This project follows [Semantic Versioning](https://semver.org/):
113 |
114 | * **MAJOR version** (x.0.0) - Incompatible API changes
115 | * **MINOR version** (0.x.0) - Add functionality in a backward compatible manner
116 | * **PATCH version** (0.0.x) - Backward compatible bug fixes
117 |
118 | ## Release Notes Guidelines
119 |
120 | When creating a new release:
121 |
122 | 1. Provide a summary of key changes
123 | 2. List new features
124 | 3. Document any breaking changes
125 | 4. Include any migration instructions
126 | 5. Acknowledge contributors
127 |
128 | ## Troubleshooting Release Issues
129 |
130 | ### Common Problems and Solutions
131 |
132 | 1. **Release workflow fails**
133 | - Check that all test dependencies are properly installed
134 | - Verify that tests pass locally
135 |
136 | 2. **Missing configuration files in the release**
137 | - Check the paths in the "Create configuration archive" step
138 | - Ensure all required files exist in the repository
139 |
140 | 3. **Wrong version number**
141 | - Check that the version in `pyproject.toml` matches the git tag
142 | - Ensure the tag follows the format `vX.Y.Z`
143 |
--------------------------------------------------------------------------------
/src/tests/fixtures/previous_normalization_results.json:
--------------------------------------------------------------------------------
1 | {
2 | "patterns": [
3 | {
4 | "cluster_id": 3,
5 | "count": 713,
6 | "pattern": "[TIMESTAMP] request.ERROR: Uncaught PHP Exception Symfony\\Component\\HttpKernel\\Exception\\NotFoundHttpException: \"Component \"LinkedIcon\" not found.\" at /opt/website/prod/backend-app/vendor/symfony/ux-live-component/src/EventListener/LiveComponentSubscriber.php line {\"exception\": \"[object] (Symfony\\\\Component\\\\HttpKernel\\\\Exception\\\\NotFoundHttpException(code: ): Component \\\"LinkedIcon\\\" not found. at /opt/website/prod/backend-app/vendor/symfony/ux-live-component/src/EventListener/LiveComponentSubscriber.php:)\\n[previous exception] [object] (InvalidArgumentException(code: ): Unknown component \\\"LinkedIcon\\\". And no matching anonymous component template was found. at /opt/website/prod/backend-app/vendor/symfony/ux-twig-component/src/ComponentFactory.php:)\"} []",
7 | "first_seen": "logstash-symfony-errors-2025.03.05:dLi0ZpUBJdUWaJfyqGCm",
8 | "last_seen": "logstash-symfony-errors-2025.03.05:0Pu0ZpUBDnR7VQTqsAl9",
9 | "sample_log_lines": [],
10 | "sample_doc_references": [
11 | "logstash-symfony-errors-2025.03.05:dLi0ZpUBJdUWaJfyqGCm",
12 | "logstash-symfony-errors-2025.03.05:9Pu0ZpUBDnR7VQTqqQi2",
13 | "logstash-symfony-errors-2025.03.05:ebi0ZpUBJdUWaJfyqmAj",
14 | "logstash-symfony-errors-2025.03.05:c7i0ZpUBJdUWaJfyrWGb",
15 | "logstash-symfony-errors-2025.03.05:0Pu0ZpUBDnR7VQTqsAl9"
16 | ]
17 | },
18 | {
19 | "cluster_id": 4,
20 | "count": 713,
21 | "pattern": "[TIMESTAMP] application_events.INFO: {\"applicationEvent\": {\"id\": \"\", \"eventCategory\": \"\", \"eventCategoryTitle\": \"error\", \"eventType\": \"\", \"eventTypeTitle\": \"exception\", \"occuredAt\": \"TIMESTAMP\", \"affectedUserId\": <*> \"affectedUserIsJobofferer\": <*> \"affectedUserIsJobseeker\": <*> \"affectedUserRegisteredAt\": <*> \"metric\": null, \"errorMessage\": \"Component \\\"LinkedIcon\\\" not found.\", \"additionalData\": {\"throwableClass\": \"Symfony\\\\Component\\\\HttpKernel\\\\Exception\\\\NotFoundHttpException\", \"file\": \"/opt/website/prod/backend-app/vendor/symfony/ux-live-component/src/EventListener/LiveComponentSubscriber.php\", \"line\": \"\"}, \"requestId\": \"\", \"sessionId\": <*> \"clientId\": \"\", \"isProbablyBotRequest\": \"\"}} [] []",
22 | "first_seen": "logstash-symfony-application-events-2025.03.05:8Pu0ZpUBDnR7VQTqqAis",
23 | "last_seen": "logstash-symfony-application-events-2025.03.05:0fu0ZpUBDnR7VQTqsAl-",
24 | "sample_log_lines": [],
25 | "sample_doc_references": [
26 | "logstash-symfony-application-events-2025.03.05:8Pu0ZpUBDnR7VQTqqAis",
27 | "logstash-symfony-application-events-2025.03.05:8fu0ZpUBDnR7VQTqqAjK",
28 | "logstash-symfony-application-events-2025.03.05:dri0ZpUBJdUWaJfyqWBl",
29 | "logstash-symfony-application-events-2025.03.05:cbi0ZpUBJdUWaJfyrGG3",
30 | "logstash-symfony-application-events-2025.03.05:0fu0ZpUBDnR7VQTqsAl-"
31 | ]
32 | },
33 | {
34 | "cluster_id": 1,
35 | "count": 1,
36 | "pattern": "[TIMESTAMP] app.INFO: Application Appointment Scheduling API request for handling superchat error: No mapping found for superchat message id 'ms_nVVaoWgbvdQk5lIFyYWTK'. This will not affect any schedulings. [] []",
37 | "first_seen": "logstash-symfony-main-2025.03.05:9_qzZpUBDnR7VQTqSpIZ",
38 | "last_seen": "logstash-symfony-main-2025.03.05:9_qzZpUBDnR7VQTqSpIZ",
39 | "sample_log_lines": [],
40 | "sample_doc_references": [
41 | "logstash-symfony-main-2025.03.05:9_qzZpUBDnR7VQTqSpIZ"
42 | ]
43 | },
44 | {
45 | "cluster_id": 2,
46 | "count": 1,
47 | "pattern": "remote_addr=\"\" - x_forwarded_for=\", \" - cf_connecting_ip=\"\" - - [05/Mar/2025: