├── .python-version ├── .idea ├── .gitignore ├── vcs.xml ├── modules.xml ├── misc.xml ├── runConfigurations │ ├── Make_Lint_Fix.xml │ ├── All_Quality_Checks.xml │ ├── Black_Format.xml │ ├── Ruff_Lint.xml │ ├── Mypy_Type_Check.xml │ └── Ruff_Lint_Fix.xml ├── platform-problem-monitoring-core.iml └── inspectionProfiles │ └── Project_Default.xml ├── src ├── tests │ ├── fixtures │ │ ├── current_date_time.txt │ │ ├── __init__.py │ │ ├── lucene_query.json │ │ └── previous_normalization_results.json │ ├── __init__.py │ ├── test_step6_extract_fields.py │ └── test_step7_normalize_messages.py └── platform_problem_monitoring_core │ ├── __init__.py │ ├── step1_prepare.py │ ├── step6_extract_fields.py │ ├── step12_cleanup.py │ ├── step11_store_new_state.py │ ├── utils.py │ ├── step2_download_previous_state.py │ ├── step10_send_email_report.py │ ├── step3_retrieve_hourly_problem_numbers.py │ ├── step4_generate_trend_chart.py │ ├── step8_compare_normalizations.py │ ├── step5_download_logstash_documents.py │ └── step7_normalize_messages.py ├── assets ├── sample-trend-and-report-input-data │ ├── start_date_time.txt │ ├── trend_chart.png │ ├── lucene_query.json │ ├── email_body.txt │ ├── hourly_problem_numbers.json │ ├── norm_results.json │ ├── norm_results_prev.json │ └── comparison_results.json └── readme-hero-image.png ├── etc ├── main.conf.dist └── lucene_query.json.dist ├── .gitignore ├── .github └── workflows │ ├── tests.yml │ ├── code-quality.yml │ └── release.yml ├── docs ├── NOTES.md ├── DEVELOPMENT.md ├── QUALITY.md ├── JETBRAINS_SETUP.md └── RELEASE_MANAGEMENT.md ├── LICENSE.txt ├── .vscode └── settings.json ├── .pre-commit-config.yaml ├── pyproject.toml ├── Makefile ├── README.md └── bin └── ppmc /.python-version: -------------------------------------------------------------------------------- 1 | 3.10.16 2 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /src/tests/fixtures/current_date_time.txt: -------------------------------------------------------------------------------- 1 | 2025-03-05T19:23:10.832778+00:00 2 | -------------------------------------------------------------------------------- /src/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test package for platform_problem_monitoring_core.""" 2 | -------------------------------------------------------------------------------- /assets/sample-trend-and-report-input-data/start_date_time.txt: -------------------------------------------------------------------------------- 1 | 2025-03-06T00:00:00Z 2 | -------------------------------------------------------------------------------- /src/tests/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | """Test fixtures for platform_problem_monitoring_core tests.""" 2 | -------------------------------------------------------------------------------- /assets/readme-hero-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dx-tooling/platform-problem-monitoring-core/HEAD/assets/readme-hero-image.png -------------------------------------------------------------------------------- /assets/sample-trend-and-report-input-data/trend_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dx-tooling/platform-problem-monitoring-core/HEAD/assets/sample-trend-and-report-input-data/trend_chart.png -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/platform_problem_monitoring_core/__init__.py: -------------------------------------------------------------------------------- 1 | """Platform Problem Monitoring Core. 2 | 3 | A tool for monitoring platform problems using Elasticsearch logs. 4 | """ 5 | 6 | from importlib.metadata import PackageNotFoundError, version 7 | 8 | try: 9 | __version__ = version("platform_problem_monitoring_core") 10 | except PackageNotFoundError: 11 | __version__ = "0.1.0" # Default version if package is not installed 12 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /etc/main.conf.dist: -------------------------------------------------------------------------------- 1 | REMOTE_STATE_S3_BUCKET_NAME="" 2 | REMOTE_STATE_S3_FOLDER_NAME="" 3 | 4 | ELASTICSEARCH_SERVER_BASE_URL="" 5 | ELASTICSEARCH_LUCENE_QUERY_FILE_PATH="" 6 | 7 | KIBANA_DISCOVER_BASE_URL="" 8 | KIBANA_DOCUMENT_DEEPLINK_URL_STRUCTURE="https://example.com/kibana/_plugin/kibana/app/discover#/doc/logstash-*/{{index}}?id={{id}}" 9 | 10 | SMTP_SERVER_HOSTNAME="" 11 | SMTP_SERVER_PORT="" 12 | SMTP_SERVER_USERNAME="" 13 | SMTP_SERVER_PASSWORD="" 14 | SMTP_SENDER_ADDRESS="" 15 | SMTP_RECEIVER_ADDRESS="" 16 | 17 | # Number of hours to look back for problem trends 18 | TREND_HOURS_BACK="24" 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/workspace.xml 2 | .idea/tasks.xml 3 | .idea/dictionaries/ 4 | .idea/shelf/ 5 | .idea/usage.statistics.xml 6 | .idea/contentModel.xml 7 | .idea/dataSources/ 8 | .idea/dataSources.local.xml 9 | .idea/httpRequests/ 10 | .idea/caches/ 11 | # Keep .idea/runConfigurations 12 | # Keep .idea/inspectionProfiles 13 | # Keep .idea/misc.xml 14 | # Keep .idea/modules.xml 15 | 16 | venv/ 17 | main.conf 18 | etc/lucene_query.json 19 | 20 | # Python package build artifacts 21 | *.egg-info/ 22 | *.egg 23 | dist/ 24 | build/ 25 | __pycache__/ 26 | *.py[cod] 27 | *$py.class 28 | references/ 29 | coverage.xml 30 | 31 | # Cache directories 32 | .mypy_cache/ 33 | .pytest_cache/ 34 | .ruff_cache/ 35 | .coverage 36 | htmlcov/ 37 | -------------------------------------------------------------------------------- /etc/lucene_query.json.dist: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "bool": { 4 | "should": [ 5 | { "match": { "message": "error" } }, 6 | { "match": { "message": "failure" } }, 7 | { "match": { "message": "critical" } }, 8 | { "match": { "message": "alert" } }, 9 | { "match": { "message": "exception" } } 10 | ], 11 | "must_not": [ 12 | { "match": { "message": "User Deprecated" } }, 13 | { "match": { "message": "logstash" } }, 14 | { "term": { "syslog_program": "dd.collector" } }, 15 | { "term": { "syslog_program": "dd.forwarder" } }, 16 | { "term": { "syslog_program": "dd.dogstatsd" } } 17 | ], 18 | "minimum_should_match": 1 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/tests/fixtures/lucene_query.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "bool": { 4 | "should": [ 5 | { "match": { "message": "error" } }, 6 | { "match": { "message": "failure" } }, 7 | { "match": { "message": "critical" } }, 8 | { "match": { "message": "alert" } }, 9 | { "match": { "message": "exception" } } 10 | ], 11 | "must_not": [ 12 | { "match": { "message": "User Deprecated" } }, 13 | { "match": { "message": "logstash" } }, 14 | { "term": { "syslog_program": "dd.collector" } }, 15 | { "term": { "syslog_program": "dd.forwarder" } }, 16 | { "term": { "syslog_program": "dd.dogstatsd" } } 17 | ], 18 | "minimum_should_match": 1 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | env: 13 | CI: true 14 | strategy: 15 | matrix: 16 | python-version: ["3.10", "3.11", "3.12", "3.13"] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | cache: 'pip' 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install -e ".[dev]" 31 | 32 | - name: Run tests 33 | run: | 34 | # Run tests with coverage 35 | make test-coverage 36 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Make_Lint_Fix.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | -------------------------------------------------------------------------------- /.github/workflows/code-quality.yml: -------------------------------------------------------------------------------- 1 | name: Code Quality 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | quality: 11 | runs-on: ubuntu-latest 12 | env: 13 | CI: true 14 | strategy: 15 | matrix: 16 | python-version: ["3.10", "3.11", "3.12", "3.13"] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | cache: 'pip' 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install -e ".[dev]" 31 | 32 | - name: Run code quality checks 33 | run: | 34 | # Run all code quality checks via the Makefile 35 | make ci-quality 36 | -------------------------------------------------------------------------------- /.idea/runConfigurations/All_Quality_Checks.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Black_Format.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Ruff_Lint.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Mypy_Type_Check.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Ruff_Lint_Fix.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | -------------------------------------------------------------------------------- /docs/NOTES.md: -------------------------------------------------------------------------------- 1 | # Notes 2 | 3 | ## Runbook 4 | 5 | python3 -m platform_problem_monitoring_core.step5_download_logstash_documents \ 6 | --elasticsearch-url "http://127.0.0.1:9201" \ 7 | --query-file "/Users/manuel/git/github/dx-tooling/platform-problem-monitoring-core/src/lucene_query.json" \ 8 | --start-date-time-file "/tmp/latest-date-time.txt" \ 9 | --output-file "/tmp/docs.json" \ 10 | --current-date-time-file "/tmp/cur-date-time.txt" 11 | 12 | curl -s -X GET "http://127.0.0.1:9201/_search?pretty" -H 'Content-Type: application/json' -d' 13 | { 14 | "query": { 15 | "query_string" : { 16 | "query" : "@timestamp: ['2025-03-04T00:00:00.000' TO '2025-03-04T01:00:00.000'] AND type: \"symfony-errors\"" 17 | } 18 | } 19 | } 20 | ' 21 | 22 | ## TODOs & Ideas 23 | 24 | - add step 12 (cleanup) to ppmc 25 | - add ppmc option to disable cleanup step 12 26 | - allow the local filesystem as a state storage alternative 27 | -------------------------------------------------------------------------------- /.idea/platform-problem-monitoring-core.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 20 | 21 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Manuel Kießling 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.enabled": true, 3 | "python.linting.mypyEnabled": true, 4 | "python.linting.flake8Enabled": false, 5 | "python.linting.banditEnabled": true, 6 | "python.formatting.provider": "black", 7 | "python.formatting.blackArgs": ["--line-length", "100"], 8 | "editor.formatOnSave": true, 9 | "editor.codeActionsOnSave": { 10 | "source.organizeImports": "explicit", 11 | "source.fixAll": "explicit" 12 | }, 13 | "python.linting.ignorePatterns": [ 14 | ".vscode/*.py", 15 | "**/site-packages/**/*.py", 16 | "venv/**/*.py" 17 | ], 18 | "python.linting.mypyArgs": [ 19 | "--config-file=pyproject.toml" 20 | ], 21 | "[python]": { 22 | "editor.rulers": [100], 23 | "editor.tabSize": 4, 24 | "editor.insertSpaces": true, 25 | "editor.detectIndentation": false 26 | }, 27 | "files.exclude": { 28 | "**/__pycache__": true, 29 | "**/.mypy_cache": true, 30 | "**/.pytest_cache": true, 31 | "**/.ruff_cache": true, 32 | "**/*.egg-info": true 33 | }, 34 | "python.analysis.typeCheckingMode": "strict", 35 | "python.analysis.extraPaths": ["src"] 36 | } 37 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-added-large-files 9 | - id: check-json 10 | - id: check-toml 11 | - id: detect-private-key 12 | 13 | - repo: local 14 | hooks: 15 | - id: black 16 | name: black 17 | entry: make format-check-files 18 | language: system 19 | types: [python] 20 | pass_filenames: true 21 | 22 | - id: isort 23 | name: isort 24 | entry: make isort-check-files 25 | language: system 26 | types: [python] 27 | pass_filenames: true 28 | 29 | - id: ruff 30 | name: ruff 31 | entry: make lint-files 32 | language: system 33 | types: [python] 34 | pass_filenames: true 35 | 36 | - id: mypy 37 | name: mypy 38 | entry: make type-check-files 39 | language: system 40 | types: [python] 41 | pass_filenames: true 42 | 43 | - id: bandit 44 | name: bandit 45 | entry: make security-check-files 46 | language: system 47 | types: [python] 48 | exclude: ^src/tests/ 49 | pass_filenames: true 50 | -------------------------------------------------------------------------------- /assets/sample-trend-and-report-input-data/lucene_query.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "bool": { 4 | "should": [ 5 | { 6 | "match": { 7 | "message": "error" 8 | } 9 | }, 10 | { 11 | "match": { 12 | "message": "failure" 13 | } 14 | }, 15 | { 16 | "match": { 17 | "message": "exception" 18 | } 19 | }, 20 | { 21 | "match": { 22 | "message": "warning" 23 | } 24 | }, 25 | { 26 | "match": { 27 | "message": "critical" 28 | } 29 | } 30 | ], 31 | "must_not": [ 32 | { 33 | "match": { 34 | "message": "User Deprecated" 35 | } 36 | }, 37 | { 38 | "match": { 39 | "message": "debug" 40 | } 41 | } 42 | ], 43 | "minimum_should_match": 1 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | permissions: 9 | contents: write # Needed for creating releases and uploading assets 10 | 11 | jobs: 12 | release: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: '3.10' 23 | cache: 'pip' 24 | 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install build twine wheel 29 | pip install -e ".[dev]" 30 | 31 | - name: Run tests 32 | run: make test-coverage 33 | 34 | - name: Build package 35 | run: python -m build 36 | 37 | - name: Get version from tag 38 | id: get_version 39 | run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT 40 | 41 | - name: Create Release 42 | uses: softprops/action-gh-release@v1 43 | with: 44 | name: Release v${{ steps.get_version.outputs.VERSION }} 45 | draft: false 46 | prerelease: false 47 | generate_release_notes: true 48 | files: | 49 | dist/*.whl 50 | dist/*.tar.gz 51 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 34 | 35 | -------------------------------------------------------------------------------- /docs/DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ## Development Setup 4 | 5 | 1. **Clone the repository:** 6 | ```bash 7 | git clone https://github.com/dx-tooling/platform-problem-monitoring-core.git 8 | cd platform-problem-monitoring-core 9 | ``` 10 | 11 | 2. **Install development dependencies:** 12 | ```bash 13 | make install 14 | ``` 15 | This creates a virtual environment, installs the package and all development dependencies, and sets up pre-commit hooks. 16 | 17 | 3. **Activate the virtual environment:** 18 | ```bash 19 | source venv/bin/activate # On Windows: venv\Scripts\activate 20 | ``` 21 | 22 | ## Code Quality Tools 23 | 24 | This project uses a unified approach to code quality with all tools configured in `pyproject.toml` and executed via: 25 | 26 | 1. **Pre-commit hooks** - Run automatically before each commit 27 | 2. **Make commands** - Run manually or in CI 28 | 29 | Available make commands: 30 | 31 | ```bash 32 | make install Install package and development dependencies 33 | make activate-venv Instructions to activate the virtual environment 34 | make format Format code with black and isort 35 | make format-check Check if code is properly formatted without modifying files 36 | make lint Run linters (ruff) 37 | make lint-fix Run linters and auto-fix issues where possible 38 | make type-check Run mypy type checking 39 | make security-check Run bandit security checks 40 | make quality Run all code quality checks (with formatting) 41 | make ci-quality Run all code quality checks (without modifying files) 42 | make test Run tests 43 | make test-verbose Run tests with verbose output 44 | make test-coverage Run tests with coverage report 45 | make test-file Run tests for a specific file (usage: make test-file file=path/to/test_file.py) 46 | make update-deps Update all dependencies to their latest semver-compatible versions 47 | make bump-version Update the version number in pyproject.toml 48 | make release Create a new release tag (after running quality checks and tests) 49 | make clean Remove build artifacts and cache directories 50 | ``` 51 | 52 | The pre-commit hooks are configured to use the same Makefile targets, ensuring consistency between local development and CI environments. 53 | -------------------------------------------------------------------------------- /src/platform_problem_monitoring_core/step1_prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Prepare environment for a process run.""" 3 | 4 | import argparse 5 | import os 6 | import sys 7 | import tempfile 8 | from pathlib import Path 9 | 10 | from platform_problem_monitoring_core.utils import ensure_dir_exists, logger 11 | 12 | 13 | def prepare_environment() -> str: 14 | """ 15 | Prepare environment for a process run. 16 | 17 | Creates a temporary work directory for storing intermediate files. 18 | 19 | Returns: 20 | Path to the temporary work folder 21 | 22 | Raises: 23 | PermissionError: If unable to create or write to the temporary directory 24 | OSError: If any other OS-level error occurs 25 | """ 26 | logger.info("Preparing environment for process run") 27 | 28 | try: 29 | # Create temporary work directory 30 | work_dir = tempfile.mkdtemp(prefix="platform_problem_monitoring_") 31 | logger.info(f"Created temporary work directory: {work_dir}") 32 | 33 | # Check if directory exists and is writable 34 | work_path = Path(work_dir) 35 | if not work_path.exists(): 36 | error_msg = f"Failed to create temporary directory: {work_dir}" 37 | raise FileNotFoundError(error_msg) 38 | 39 | if not os.access(work_dir, os.W_OK): 40 | error_msg = f"No write access to temporary directory: {work_dir}" 41 | raise PermissionError(error_msg) 42 | 43 | # Create any additional subdirectories if needed 44 | # This isn't strictly necessary but helps demonstrate the directory is writable 45 | test_subdir = work_path / "test" 46 | ensure_dir_exists(str(test_subdir)) 47 | test_subdir.rmdir() # Clean up the test directory 48 | 49 | logger.info("Environment preparation complete") 50 | return work_dir 51 | except (OSError, PermissionError) as e: 52 | logger.error(f"Failed to prepare environment: {str(e)}") 53 | raise 54 | 55 | 56 | def main() -> None: 57 | """Execute the script when run directly.""" 58 | parser = argparse.ArgumentParser(description="Prepare environment for a process run") 59 | # Parse arguments but don't assign to a variable since we don't use them 60 | parser.parse_args() 61 | 62 | try: 63 | work_dir = prepare_environment() 64 | # Print the work directory path for the next step to use 65 | print(work_dir) 66 | sys.exit(0) 67 | except Exception as e: 68 | logger.error(f"Error preparing environment: {str(e)}") 69 | sys.exit(1) 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /assets/sample-trend-and-report-input-data/email_body.txt: -------------------------------------------------------------------------------- 1 | 2 | PLATFORM PROBLEM MONITORING REPORT 3 | ================================= 4 | Generated: 2025-03-09 15:59:20 UTC 5 | 6 | SUMMARY 7 | ------- 8 | Current problem patterns: 0 9 | Previous problem patterns: 0 10 | New problem patterns: 3 11 | Disappeared problem patterns: 5 12 | 13 | NEW PROBLEM PATTERNS 14 | =================== 15 | These patterns have appeared since the last report. 16 | 17 | 1. [32] SSL certificate for <*> is expiring in <*> days 18 | Sample documents: 19 | 20 | 2. [21] Disk usage warning: <*> is at <*>% capacity 21 | Sample documents: 22 | 23 | 3. [18] Connection reset by peer while sending request to <*> 24 | Sample documents: 25 | 26 | 27 | 28 | DISAPPEARED PROBLEM PATTERNS 29 | ========================== 30 | These patterns were present in the previous report but are no longer occurring. 31 | 32 | 1. [245] Error connecting to database at <*>: Connection timed out 33 | Sample documents: 34 | 35 | 2. [124] Exception in thread "main" java.lang.OutOfMemoryError: <*> 36 | Sample documents: 37 | 38 | 3. [89] Kubernetes pod <*> in namespace <*> failed health check 39 | Sample documents: 40 | 41 | 4. [54] Failed to process message from queue <*>: <*> 42 | Sample documents: 43 | 44 | 5. [42] Cache invalidation failed for key <*> 45 | Sample documents: 46 | 47 | 48 | 49 | INCREASED PROBLEM PATTERNS 50 | ======================== 51 | These patterns have increased in occurrence count since the last report. 52 | 53 | 1. [14] (+0, +0.0%) Failed to process job <*> - timeout after <*> seconds 54 | Sample documents: 55 | 56 | 57 | 58 | DECREASED PROBLEM PATTERNS 59 | ======================== 60 | These patterns have decreased in occurrence count since the last report. 61 | 62 | 1. [72] (-0, -0.0%) Failed to authenticate user <*> - invalid credentials 63 | Sample documents: 64 | 65 | 2. [58] (-0, -0.0%) API rate limit exceeded for user ID <*> 66 | Sample documents: 67 | 68 | 3. [12] (-0, -0.0%) HTTP request failed: <*> <*> returned status code <*> 69 | Sample documents: 70 | 71 | 72 | 73 | TOP 25 CURRENT PROBLEM PATTERNS 74 | ============================== 75 | The most frequent problem patterns in the current report. 76 | 77 | 1. [72] Failed to authenticate user <*> - invalid credentials 78 | Sample documents: 79 | 80 | 2. [58] API rate limit exceeded for user ID <*> 81 | Sample documents: 82 | 83 | 3. [32] SSL certificate for <*> is expiring in <*> days 84 | Sample documents: 85 | 86 | 4. [21] Disk usage warning: <*> is at <*>% capacity 87 | Sample documents: 88 | 89 | 5. [18] Connection reset by peer while sending request to <*> 90 | Sample documents: 91 | 92 | 6. [14] Failed to process job <*> - timeout after <*> seconds 93 | Sample documents: 94 | 95 | 7. [12] AWS S3 access denied: <*> 96 | Sample documents: 97 | 98 | 99 | 100 | This is an automated report from the Platform Problem Monitoring system. 101 | -------------------------------------------------------------------------------- /docs/QUALITY.md: -------------------------------------------------------------------------------- 1 | # Code Quality Guidelines 2 | 3 | This document describes the code quality tools and practices used in this project. 4 | 5 | ## Code Quality Tools 6 | 7 | We use the following tools to maintain high code quality: 8 | 9 | ### Ruff 10 | 11 | [Ruff](https://github.com/charliermarsh/ruff) is an extremely fast Python linter, written in Rust. It includes many checks from tools like flake8, isort, pycodestyle, and many plugins. 12 | 13 | ```bash 14 | # Run Ruff 15 | make lint 16 | ``` 17 | 18 | ### Black 19 | 20 | [Black](https://github.com/psf/black) is an uncompromising code formatter for Python. It applies a consistent style by reformatting your code. 21 | 22 | ```bash 23 | # Format code with Black 24 | make format 25 | ``` 26 | 27 | ### isort 28 | 29 | [isort](https://github.com/PyCQA/isort) sorts your imports alphabetically, and automatically separated into sections and by type. 30 | 31 | ```bash 32 | # Run isort (included in format command) 33 | make format 34 | ``` 35 | 36 | ### mypy 37 | 38 | [mypy](https://github.com/python/mypy) is an optional static type checker for Python. It helps catch common errors before runtime. 39 | 40 | ```bash 41 | # Run mypy 42 | make type-check 43 | ``` 44 | 45 | ### Bandit 46 | 47 | [Bandit](https://github.com/PyCQA/bandit) is a tool designed to find common security issues in Python code. 48 | 49 | ```bash 50 | # Run security checks 51 | make security-check 52 | ``` 53 | 54 | ### pre-commit 55 | 56 | [pre-commit](https://pre-commit.com/) runs these checks automatically before each commit, ensuring that only quality code enters the repository. 57 | 58 | ```bash 59 | # Install pre-commit hooks 60 | pre-commit install 61 | ``` 62 | 63 | ## Running All Checks 64 | 65 | You can run all quality checks at once: 66 | 67 | ```bash 68 | make quality 69 | ``` 70 | 71 | ## VS Code Integration 72 | 73 | This project includes VS Code settings that integrate all these tools into your editor. With the proper extensions installed, you'll get: 74 | 75 | - Real-time type checking 76 | - Automatic formatting on save 77 | - Inline error highlighting 78 | - Code actions to fix issues 79 | 80 | ## Recommended VS Code Extensions 81 | 82 | - Python (Microsoft) 83 | - Pylance (Microsoft) 84 | - Ruff (Astral Software) 85 | - Even Better TOML (tamasfe) 86 | - YAML (Red Hat) 87 | 88 | ## Code Style Guidelines 89 | 90 | 1. **Type Annotations**: All functions should have complete type annotations. 91 | 2. **Docstrings**: All public methods and functions should have Google-style docstrings. 92 | 3. **Line Length**: Maximum line length is 100 characters. 93 | 4. **Imports**: Imports should be sorted by isort with the Black profile. 94 | 5. **Naming**: Follow PEP8 naming conventions: 95 | - Classes: `PascalCase` 96 | - Functions, methods, variables: `snake_case` 97 | - Constants: `UPPER_SNAKE_CASE` 98 | - Private members: start with underscore `_private_method()` 99 | 100 | ## Continuous Integration 101 | 102 | These quality checks are also run in CI to ensure that all code entering the main branch maintains the expected level of quality. 103 | -------------------------------------------------------------------------------- /assets/sample-trend-and-report-input-data/hourly_problem_numbers.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "start_time": "2025-03-07T00:00:00Z", 4 | "end_time": "2025-03-07T01:00:00Z", 5 | "count": 352 6 | }, 7 | { 8 | "start_time": "2025-03-07T01:00:00Z", 9 | "end_time": "2025-03-07T02:00:00Z", 10 | "count": 378 11 | }, 12 | { 13 | "start_time": "2025-03-07T02:00:00Z", 14 | "end_time": "2025-03-07T03:00:00Z", 15 | "count": 365 16 | }, 17 | { 18 | "start_time": "2025-03-07T03:00:00Z", 19 | "end_time": "2025-03-07T04:00:00Z", 20 | "count": 342 21 | }, 22 | { 23 | "start_time": "2025-03-07T04:00:00Z", 24 | "end_time": "2025-03-07T05:00:00Z", 25 | "count": 320 26 | }, 27 | { 28 | "start_time": "2025-03-07T05:00:00Z", 29 | "end_time": "2025-03-07T06:00:00Z", 30 | "count": 298 31 | }, 32 | { 33 | "start_time": "2025-03-07T06:00:00Z", 34 | "end_time": "2025-03-07T07:00:00Z", 35 | "count": 274 36 | }, 37 | { 38 | "start_time": "2025-03-07T07:00:00Z", 39 | "end_time": "2025-03-07T08:00:00Z", 40 | "count": 286 41 | }, 42 | { 43 | "start_time": "2025-03-07T08:00:00Z", 44 | "end_time": "2025-03-07T09:00:00Z", 45 | "count": 310 46 | }, 47 | { 48 | "start_time": "2025-03-07T09:00:00Z", 49 | "end_time": "2025-03-07T10:00:00Z", 50 | "count": 267 51 | }, 52 | { 53 | "start_time": "2025-03-07T10:00:00Z", 54 | "end_time": "2025-03-07T11:00:00Z", 55 | "count": 243 56 | }, 57 | { 58 | "start_time": "2025-03-07T11:00:00Z", 59 | "end_time": "2025-03-07T12:00:00Z", 60 | "count": 218 61 | }, 62 | { 63 | "start_time": "2025-03-07T12:00:00Z", 64 | "end_time": "2025-03-07T13:00:00Z", 65 | "count": 203 66 | }, 67 | { 68 | "start_time": "2025-03-07T13:00:00Z", 69 | "end_time": "2025-03-07T14:00:00Z", 70 | "count": 185 71 | }, 72 | { 73 | "start_time": "2025-03-07T14:00:00Z", 74 | "end_time": "2025-03-07T15:00:00Z", 75 | "count": 176 76 | }, 77 | { 78 | "start_time": "2025-03-07T15:00:00Z", 79 | "end_time": "2025-03-07T16:00:00Z", 80 | "count": 162 81 | }, 82 | { 83 | "start_time": "2025-03-07T16:00:00Z", 84 | "end_time": "2025-03-07T17:00:00Z", 85 | "count": 143 86 | }, 87 | { 88 | "start_time": "2025-03-07T17:00:00Z", 89 | "end_time": "2025-03-07T18:00:00Z", 90 | "count": 132 91 | }, 92 | { 93 | "start_time": "2025-03-07T18:00:00Z", 94 | "end_time": "2025-03-07T19:00:00Z", 95 | "count": 124 96 | }, 97 | { 98 | "start_time": "2025-03-07T19:00:00Z", 99 | "end_time": "2025-03-07T20:00:00Z", 100 | "count": 115 101 | }, 102 | { 103 | "start_time": "2025-03-07T20:00:00Z", 104 | "end_time": "2025-03-07T21:00:00Z", 105 | "count": 108 106 | }, 107 | { 108 | "start_time": "2025-03-07T21:00:00Z", 109 | "end_time": "2025-03-07T22:00:00Z", 110 | "count": 93 111 | }, 112 | { 113 | "start_time": "2025-03-07T22:00:00Z", 114 | "end_time": "2025-03-07T23:00:00Z", 115 | "count": 84 116 | }, 117 | { 118 | "start_time": "2025-03-07T23:00:00Z", 119 | "end_time": "2025-03-08T00:00:00Z", 120 | "count": 72 121 | } 122 | ] 123 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "platform_problem_monitoring_core" 7 | version = "0.1.11" 8 | description = "A tool for monitoring platform problems using Elasticsearch logs" 9 | authors = [ 10 | {name = "Platform Team"} 11 | ] 12 | readme = "README.md" 13 | requires-python = ">=3.10" 14 | license = {text = "Proprietary"} 15 | dependencies = [ 16 | "boto3>=1.28.0", 17 | "drain3>=0.9.6", 18 | "jinja2>=3.0.0", 19 | "argparse>=1.4.0", 20 | "requests>=2.25.0", 21 | "matplotlib>=3.7.0", 22 | "seaborn>=0.12.0", 23 | "typing-extensions>=4.0.0", 24 | ] 25 | 26 | [project.optional-dependencies] 27 | dev = [ 28 | "pytest>=7.0.0", 29 | "pytest-cov>=4.1.0", 30 | "black>=23.0.0", 31 | "flake8>=6.0.0", 32 | "mypy>=1.0.0", 33 | "isort>=5.12.0", 34 | "ruff>=0.1.9", 35 | "pre-commit>=3.3.2", 36 | "bandit>=1.7.5", 37 | "types-requests>=2.0.0", 38 | "types-boto3>=1.0.0", 39 | "types-seaborn>=0.12.0", 40 | ] 41 | 42 | [tool.setuptools] 43 | packages = ["platform_problem_monitoring_core"] 44 | package-dir = {"" = "src"} 45 | 46 | # Add package data to include resources in the wheel 47 | [tool.setuptools.package-data] 48 | platform_problem_monitoring_core = ["resources/*.html"] 49 | 50 | # Add data files to include configuration templates 51 | [tool.setuptools.data-files] 52 | "etc/platform_problem_monitoring_core" = ["etc/*.dist"] 53 | "bin" = ["bin/*"] 54 | 55 | [tool.black] 56 | line-length = 120 57 | target-version = ["py310", "py311", "py312", "py313"] 58 | 59 | [tool.mypy] 60 | python_version = "3.13" 61 | warn_return_any = true 62 | warn_unused_configs = true 63 | disallow_untyped_defs = true 64 | disallow_incomplete_defs = true 65 | check_untyped_defs = true 66 | disallow_untyped_decorators = true 67 | no_implicit_optional = true 68 | strict_optional = true 69 | warn_redundant_casts = true 70 | warn_unused_ignores = true 71 | warn_no_return = true 72 | warn_unreachable = true 73 | 74 | [[tool.mypy.overrides]] 75 | module = "drain3.*" 76 | ignore_missing_imports = true 77 | 78 | [tool.isort] 79 | profile = "black" 80 | line_length = 120 81 | multi_line_output = 3 82 | 83 | [tool.pytest] 84 | testpaths = ["src/tests"] 85 | python_files = "test_*.py" 86 | python_classes = "Test*" 87 | python_functions = "test_*" 88 | filterwarnings = [ 89 | "ignore::DeprecationWarning", 90 | "ignore::PendingDeprecationWarning", 91 | ] 92 | 93 | [tool.pytest.ini_options] 94 | minversion = "7.0" 95 | addopts = "--strict-markers" 96 | markers = [ 97 | "slow: marks tests as slow (deselect with '-m \"not slow\"')", 98 | "integration: marks tests as integration tests (deselect with '-m \"not integration\"')", 99 | ] 100 | 101 | [tool.coverage.run] 102 | source = ["platform_problem_monitoring_core"] 103 | omit = ["*/tests/*", "*/venv/*"] 104 | 105 | [tool.coverage.report] 106 | exclude_lines = [ 107 | "pragma: no cover", 108 | "def __repr__", 109 | "raise NotImplementedError", 110 | "if __name__ == .__main__.:", 111 | "pass", 112 | "raise ImportError", 113 | ] 114 | 115 | [tool.ruff] 116 | # General configuration 117 | line-length = 120 118 | target-version = "py313" 119 | 120 | [tool.ruff.lint] 121 | # Enable pycodestyle (E), Pyflakes (F), McCabe complexity (C90), isort (I), 122 | # pep8-naming (N), flake8-builtins (A), flake8-bugbear (B), flake8-comprehensions (C4), 123 | # flake8-docstrings (D), flake8-errmsg (EM), flake8-logging-format (G), flake8-simplify (SIM), 124 | # flake8-unused-arguments (ARG), flake8-pytest-style (PT), flake8-use-pathlib (PTH) 125 | select = ["E", "F", "C90", "I", "N", "A", "B", "C4", "D", "EM", "G", "SIM", "ARG", "PT", "PTH"] 126 | ignore = ["D203", "D212"] 127 | 128 | [tool.ruff.lint.pydocstyle] 129 | convention = "google" 130 | 131 | [tool.bandit] 132 | exclude_dirs = ["venv"] 133 | skips = ["B101"] # Skip assert warning as we use it in tests 134 | -------------------------------------------------------------------------------- /docs/JETBRAINS_SETUP.md: -------------------------------------------------------------------------------- 1 | # JetBrains IDE Setup Guide 2 | 3 | This guide explains how to set up and use JetBrains IDEs (PyCharm, IntelliJ IDEA, etc.) with this project, particularly focusing on the code quality tools. 4 | 5 | ## Initial Setup 6 | 7 | 1. Open the project in your JetBrains IDE 8 | 2. Ensure you've installed the project dependencies: 9 | ```bash 10 | make install 11 | ``` 12 | 3. The IDE should automatically detect the project structure and Python interpreter from the `.idea` directory settings 13 | 14 | ## Python SDK Setup 15 | 16 | If the Python interpreter isn't automatically detected: 17 | 18 | 1. Go to `File > Project Structure` 19 | 2. Under Project Settings > Project, select the Python interpreter from your virtual environment 20 | 3. Make sure it's pointing to the `venv/bin/python` interpreter in your project directory 21 | 22 | ## Run Configurations 23 | 24 | We've included several predefined run configurations to help you verify code quality: 25 | 26 | - **Black Format**: Formats your code according to Black style 27 | - **Ruff Lint**: Runs the Ruff linter to check for code issues 28 | - **Ruff Lint Fix**: Runs the Ruff linter and automatically fixes issues where possible 29 | - **Mypy Type Check**: Verifies type annotations 30 | - **All Quality Checks**: Runs all quality checks at once 31 | - **Make Lint Fix**: Runs make lint-fix to automatically fix linting issues 32 | 33 | To run any of these: 34 | 35 | 1. Click on the run configuration dropdown in the top-right toolbar 36 | 2. Select the desired configuration 37 | 3. Click the run button (green triangle) 38 | 39 | ## Code Inspection 40 | 41 | We've configured the IDE's inspection profiles to match our quality standards: 42 | 43 | 1. Type checking is enabled with strict mode 44 | 2. PEP 8 style checking is enabled 45 | 3. Python version compatibility checks are enabled 46 | 47 | ## External Tools Integration 48 | 49 | ### Black 50 | 51 | Black auto-formatting is enabled in the editor: 52 | 53 | 1. The code will be auto-formatted on save 54 | 2. You can also press `Ctrl+Alt+L` (or `Cmd+Alt+L` on macOS) to format the current file 55 | 56 | ### Ruff 57 | 58 | Ruff can both check for issues and fix them: 59 | 60 | 1. Run "Ruff Lint" to check for issues 61 | 2. Run "Ruff Lint Fix" to automatically fix issues where possible 62 | 3. From the terminal: `make lint` to check, `make lint-fix` to check and fix 63 | 64 | ### Keyboard Shortcuts 65 | 66 | - **Reformat Code**: `Ctrl+Alt+L` (Windows/Linux) or `Cmd+Alt+L` (macOS) 67 | - **Run Current Configuration**: `Shift+F10` (Windows/Linux) or `Ctrl+R` (macOS) 68 | - **Debug Current Configuration**: `Shift+F9` (Windows/Linux) or `Ctrl+D` (macOS) 69 | 70 | ## Using the Terminal Tool Window 71 | 72 | You can also run the Makefile commands directly from the Terminal tool window: 73 | 74 | 1. Open the Terminal tool window (`Alt+F12` or `View > Tool Windows > Terminal`) 75 | 2. Run commands like: 76 | ```bash 77 | make quality 78 | make lint 79 | make lint-fix 80 | make format 81 | ``` 82 | 83 | ## Code Commits 84 | 85 | When committing code, the pre-commit hooks will run automatically if you've installed them with: 86 | 87 | ```bash 88 | pre-commit install 89 | ``` 90 | 91 | This helps catch issues before they're committed to the repository. 92 | 93 | ## Best Practices 94 | 95 | 1. **Enable Auto Import**: Under Settings > Editor > General > Auto Import, enable "Add unambiguous imports on the fly" 96 | 2. **Use Type Hints**: The IDE will show type hint errors as you type 97 | 3. **Run Type Checking Often**: Use the Mypy run configuration frequently to catch type issues 98 | 4. **Fix Linting Issues Automatically**: Use `make lint-fix` to automatically fix many common issues 99 | 100 | ## Troubleshooting 101 | 102 | If you experience issues with the IDE: 103 | 104 | 1. **Invalidate Caches**: Try `File > Invalidate Caches and Restart` 105 | 2. **Sync Project with pyproject.toml**: Ensure the IDE settings match the `pyproject.toml` settings 106 | 3. **Check the Terminal**: Run commands directly in the terminal to see if errors are IDE-specific 107 | -------------------------------------------------------------------------------- /src/platform_problem_monitoring_core/step6_extract_fields.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Extract relevant fields from logstash documents.""" 3 | 4 | import argparse 5 | import json 6 | import sys 7 | from pathlib import Path 8 | 9 | from platform_problem_monitoring_core.utils import load_json, logger 10 | 11 | 12 | def extract_fields(logstash_file: str, output_file: str) -> None: 13 | """ 14 | Extract relevant fields from logstash documents. 15 | 16 | Args: 17 | logstash_file: Path to the logstash documents file 18 | output_file: Path to store the extracted fields 19 | 20 | Raises: 21 | FileNotFoundError: If the logstash file doesn't exist 22 | json.JSONDecodeError: If the file contains invalid JSON 23 | OSError: If the output cannot be written 24 | """ 25 | logger.info("Extracting fields from logstash documents") 26 | logger.info(f"Logstash file: {logstash_file}") 27 | logger.info(f"Output file: {output_file}") 28 | 29 | # Load logstash documents 30 | documents = load_json(logstash_file) 31 | logger.info(f"Loaded {len(documents)} logstash documents") 32 | 33 | # Ensure the output directory exists 34 | output_path = Path(output_file) 35 | output_path.parent.mkdir(parents=True, exist_ok=True) 36 | 37 | # Process the documents and write them to the output file 38 | processed_count = 0 39 | skipped_count = 0 40 | 41 | try: 42 | # Open output file for writing 43 | with Path(output_file).open("w") as f: 44 | # Process each document 45 | for doc in documents: 46 | try: 47 | # Extract required fields: index name, document id, and message 48 | index_name = doc.get("_index", "unknown") 49 | doc_id = doc.get("_id", "unknown") 50 | 51 | # Extract message from _source 52 | source = doc.get("_source", {}) 53 | message = source.get("message", "") 54 | 55 | if not message: 56 | skipped_count += 1 57 | continue 58 | 59 | # Write extracted fields to output file as JSON 60 | extracted = {"index": index_name, "id": doc_id, "message": message} 61 | f.write(json.dumps(extracted) + "\n") 62 | processed_count += 1 63 | 64 | # Log progress for large document sets 65 | if processed_count % 10000 == 0: 66 | logger.info(f"Processed {processed_count} documents so far") 67 | except (KeyError, TypeError) as e: 68 | skipped_count += 1 69 | logger.warning(f"Error processing document: {e}") 70 | continue 71 | except Exception as e: 72 | logger.warning(f"Unexpected error processing document: {e}") 73 | skipped_count += 1 74 | continue 75 | 76 | logger.info(f"Extracted fields from {processed_count} documents") 77 | if skipped_count > 0: 78 | logger.warning(f"Skipped {skipped_count} documents due to errors or missing fields") 79 | except OSError as e: 80 | logger.error(f"Error writing to output file: {e}") 81 | error_msg = f"Failed to write to output file {output_file}: {e}" 82 | raise OSError(error_msg) from e 83 | 84 | logger.info("Field extraction completed") 85 | 86 | 87 | def main() -> None: 88 | """Parse command line arguments and extract fields from logstash documents.""" 89 | parser = argparse.ArgumentParser(description="Extract fields from logstash documents") 90 | parser.add_argument("--logstash-file", required=True, help="Path to the logstash documents file") 91 | parser.add_argument("--output-file", required=True, help="Path to store the extracted fields") 92 | 93 | args = parser.parse_args() 94 | 95 | try: 96 | extract_fields(args.logstash_file, args.output_file) 97 | sys.exit(0) 98 | except Exception as e: 99 | logger.error(f"Error extracting fields: {e}") 100 | sys.exit(1) 101 | 102 | 103 | if __name__ == "__main__": 104 | main() 105 | -------------------------------------------------------------------------------- /src/platform_problem_monitoring_core/step12_cleanup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Clean up work environment.""" 3 | 4 | import argparse 5 | import logging 6 | import os 7 | import shutil 8 | import sys 9 | from pathlib import Path 10 | from typing import List 11 | 12 | from platform_problem_monitoring_core.utils import logger 13 | 14 | 15 | def _verify_safe_path(work_dir: Path) -> None: 16 | """ 17 | Verify that the path is safe to remove. 18 | 19 | Args: 20 | work_dir: Path to verify 21 | 22 | Raises: 23 | ValueError: If the path is not a directory, doesn't exist, or doesn't look like a temporary work directory 24 | """ 25 | # Check if the directory exists 26 | if not work_dir.exists(): 27 | error_msg = f"Directory does not exist: {work_dir}" 28 | raise ValueError(error_msg) 29 | 30 | # Check if the path is a directory 31 | if not work_dir.is_dir(): 32 | error_msg = f"Path is not a directory: {work_dir}" 33 | raise ValueError(error_msg) 34 | 35 | # Verify that the directory looks like a temporary work directory 36 | # This is a safety check to avoid accidentally deleting important directories 37 | if not work_dir.name.startswith("platform_problem_monitoring_"): 38 | error_msg = f"Directory does not appear to be a temporary work directory: {work_dir}" 39 | raise ValueError(error_msg) 40 | 41 | 42 | def _list_remaining_files(work_dir: Path) -> List[str]: 43 | """ 44 | List files remaining in the directory. 45 | 46 | Args: 47 | work_dir: Path to the directory 48 | 49 | Returns: 50 | List of files found in the directory 51 | """ 52 | files = [] 53 | try: 54 | for root, _dirs, filenames in os.walk(work_dir): 55 | for filename in filenames: 56 | file_path = Path(root) / filename 57 | files.append(str(file_path.relative_to(work_dir))) 58 | return files 59 | except (OSError, ValueError) as e: 60 | logger.warning(f"Error listing files in {work_dir}: {e}") 61 | return [] 62 | 63 | 64 | def cleanup_environment(work_dir: str) -> None: 65 | """ 66 | Clean up the work environment by removing the temporary work directory. 67 | 68 | Args: 69 | work_dir: Path to the temporary work folder to remove 70 | 71 | Raises: 72 | ValueError: If the path is not suitable for removal 73 | OSError: If there's an error removing the directory 74 | """ 75 | logger.info("Cleaning up work environment") 76 | logger.info(f"Removing temporary work directory: {work_dir}") 77 | 78 | # Convert to Path object 79 | work_path = Path(work_dir) 80 | 81 | try: 82 | # Check if the path is safe to remove 83 | _verify_safe_path(work_path) 84 | 85 | # Optional: List files before deletion (for debugging if needed) 86 | if logger.isEnabledFor(logging.DEBUG): 87 | files = _list_remaining_files(work_path) 88 | if files: 89 | logger.debug(f"Files to be removed: {', '.join(files)}") 90 | 91 | # Remove the directory and all its contents 92 | shutil.rmtree(work_dir) 93 | logger.info(f"Successfully removed directory: {work_dir}") 94 | 95 | except ValueError as e: 96 | # Non-fatal errors (directory doesn't exist or isn't a temp directory) 97 | logger.warning(f"Skipping cleanup: {str(e)}") 98 | except OSError as e: 99 | error_msg = f"Error removing directory {work_dir}: {str(e)}" 100 | logger.error(error_msg) 101 | raise OSError(error_msg) from e 102 | 103 | logger.info("Cleanup complete") 104 | 105 | 106 | def main() -> None: 107 | """Execute the script when run directly.""" 108 | parser = argparse.ArgumentParser(description="Clean up work environment") 109 | parser.add_argument("--work-dir", required=True, help="Path to the temporary work folder to remove") 110 | 111 | args = parser.parse_args() 112 | 113 | try: 114 | cleanup_environment(args.work_dir) 115 | sys.exit(0) 116 | except Exception as e: 117 | logger.error(f"Error cleaning up environment: {str(e)}") 118 | sys.exit(1) 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /docs/RELEASE_MANAGEMENT.md: -------------------------------------------------------------------------------- 1 | # Platform Problem Monitoring Core - Release Management 2 | 3 | This document outlines the release process for the Platform Problem Monitoring Core package, including version management, artifact creation, and publishing. 4 | 5 | ## Release Artifacts 6 | 7 | Each release includes the following artifacts: 8 | 9 | - **Source Distribution (.tar.gz)** - Contains the raw source code of the package 10 | - **Wheel Distribution (.whl)** - A pre-built package that's ready to install 11 | 12 | ## Release Workflow 13 | 14 | ### Automated GitHub Actions Workflow 15 | 16 | The release process uses a GitHub Actions workflow (`.github/workflows/release.yml`) that: 17 | 18 | 1. Builds Python packages (wheel and source distribution) 19 | 2. Creates a configuration templates archive 20 | 3. Creates a GitHub Release with auto-generated release notes 21 | 4. Attaches all artifacts to the release 22 | 23 | The workflow is triggered whenever a tag with the format `v*.*.*` is pushed to the repository. 24 | 25 | ## Step-by-Step Release Process 26 | 27 | ### 1. Prepare for Release 28 | 29 | Ensure all changes are committed, CI passes, and the code is ready for release: 30 | 31 | ```bash 32 | # Pull latest changes 33 | git checkout main 34 | git pull origin main 35 | 36 | # Run quality checks and tests 37 | make ci-quality 38 | make test-coverage 39 | ``` 40 | 41 | ### 2. Update Version Number 42 | 43 | Update the version in `pyproject.toml`: 44 | 45 | ```bash 46 | # Option 1: Manual edit 47 | # Edit pyproject.toml and change version = "x.y.z" 48 | 49 | # Option 2: Using make command 50 | make bump-version 51 | ``` 52 | 53 | The `bump-version` make command will: 54 | 1. Show current version 55 | 2. Prompt for new version 56 | 3. Update `pyproject.toml` 57 | 58 | ### 3. Commit Version Change 59 | 60 | ```bash 61 | git add pyproject.toml 62 | git commit -m "Bump version to x.y.z" 63 | git push origin main 64 | ``` 65 | 66 | ### 4. Create Release Tag 67 | 68 | ```bash 69 | # Option 1: Manual tagging 70 | git tag -a "vx.y.z" -m "Release vx.y.z" 71 | 72 | # Option 2: Using make command 73 | make release 74 | ``` 75 | 76 | The `release` make command will: 77 | 1. Run quality checks and tests 78 | 2. Create a new annotated git tag based on the version in pyproject.toml 79 | 80 | ### 5. Push Tag to Trigger Release 81 | 82 | ```bash 83 | git push origin vx.y.z 84 | ``` 85 | 86 | This will trigger the GitHub Actions release workflow. 87 | 88 | ### 6. Verify Release 89 | 90 | 1. Go to the GitHub repository's Actions tab 91 | 2. Check that the release workflow completed successfully 92 | 3. Go to the Releases page to verify that the release was created with all artifacts 93 | 94 | ## Installation from Release Artifacts 95 | 96 | The released package can be installed in two ways: 97 | 98 | ### 1. Using pip directly from GitHub (for applications) 99 | 100 | ```bash 101 | pip install https://github.com/dx-tooling/platform-problem-monitoring-core/releases/download/vX.Y.Z/platform_problem_monitoring_core-X.Y.Z-py3-none-any.whl 102 | ``` 103 | 104 | ### 2. For development or customization 105 | 106 | 1. Download both the wheel file and `additional_assets.zip` from the releases page 107 | 2. Extract the configuration templates 108 | 3. Follow the setup instructions in the README 109 | 110 | ## Versioning Scheme 111 | 112 | This project follows [Semantic Versioning](https://semver.org/): 113 | 114 | * **MAJOR version** (x.0.0) - Incompatible API changes 115 | * **MINOR version** (0.x.0) - Add functionality in a backward compatible manner 116 | * **PATCH version** (0.0.x) - Backward compatible bug fixes 117 | 118 | ## Release Notes Guidelines 119 | 120 | When creating a new release: 121 | 122 | 1. Provide a summary of key changes 123 | 2. List new features 124 | 3. Document any breaking changes 125 | 4. Include any migration instructions 126 | 5. Acknowledge contributors 127 | 128 | ## Troubleshooting Release Issues 129 | 130 | ### Common Problems and Solutions 131 | 132 | 1. **Release workflow fails** 133 | - Check that all test dependencies are properly installed 134 | - Verify that tests pass locally 135 | 136 | 2. **Missing configuration files in the release** 137 | - Check the paths in the "Create configuration archive" step 138 | - Ensure all required files exist in the repository 139 | 140 | 3. **Wrong version number** 141 | - Check that the version in `pyproject.toml` matches the git tag 142 | - Ensure the tag follows the format `vX.Y.Z` 143 | -------------------------------------------------------------------------------- /src/tests/fixtures/previous_normalization_results.json: -------------------------------------------------------------------------------- 1 | { 2 | "patterns": [ 3 | { 4 | "cluster_id": 3, 5 | "count": 713, 6 | "pattern": "[TIMESTAMP] request.ERROR: Uncaught PHP Exception Symfony\\Component\\HttpKernel\\Exception\\NotFoundHttpException: \"Component \"LinkedIcon\" not found.\" at /opt/website/prod/backend-app/vendor/symfony/ux-live-component/src/EventListener/LiveComponentSubscriber.php line {\"exception\": \"[object] (Symfony\\\\Component\\\\HttpKernel\\\\Exception\\\\NotFoundHttpException(code: ): Component \\\"LinkedIcon\\\" not found. at /opt/website/prod/backend-app/vendor/symfony/ux-live-component/src/EventListener/LiveComponentSubscriber.php:)\\n[previous exception] [object] (InvalidArgumentException(code: ): Unknown component \\\"LinkedIcon\\\". And no matching anonymous component template was found. at /opt/website/prod/backend-app/vendor/symfony/ux-twig-component/src/ComponentFactory.php:)\"} []", 7 | "first_seen": "logstash-symfony-errors-2025.03.05:dLi0ZpUBJdUWaJfyqGCm", 8 | "last_seen": "logstash-symfony-errors-2025.03.05:0Pu0ZpUBDnR7VQTqsAl9", 9 | "sample_log_lines": [], 10 | "sample_doc_references": [ 11 | "logstash-symfony-errors-2025.03.05:dLi0ZpUBJdUWaJfyqGCm", 12 | "logstash-symfony-errors-2025.03.05:9Pu0ZpUBDnR7VQTqqQi2", 13 | "logstash-symfony-errors-2025.03.05:ebi0ZpUBJdUWaJfyqmAj", 14 | "logstash-symfony-errors-2025.03.05:c7i0ZpUBJdUWaJfyrWGb", 15 | "logstash-symfony-errors-2025.03.05:0Pu0ZpUBDnR7VQTqsAl9" 16 | ] 17 | }, 18 | { 19 | "cluster_id": 4, 20 | "count": 713, 21 | "pattern": "[TIMESTAMP] application_events.INFO: {\"applicationEvent\": {\"id\": \"\", \"eventCategory\": \"\", \"eventCategoryTitle\": \"error\", \"eventType\": \"\", \"eventTypeTitle\": \"exception\", \"occuredAt\": \"TIMESTAMP\", \"affectedUserId\": <*> \"affectedUserIsJobofferer\": <*> \"affectedUserIsJobseeker\": <*> \"affectedUserRegisteredAt\": <*> \"metric\": null, \"errorMessage\": \"Component \\\"LinkedIcon\\\" not found.\", \"additionalData\": {\"throwableClass\": \"Symfony\\\\Component\\\\HttpKernel\\\\Exception\\\\NotFoundHttpException\", \"file\": \"/opt/website/prod/backend-app/vendor/symfony/ux-live-component/src/EventListener/LiveComponentSubscriber.php\", \"line\": \"\"}, \"requestId\": \"\", \"sessionId\": <*> \"clientId\": \"\", \"isProbablyBotRequest\": \"\"}} [] []", 22 | "first_seen": "logstash-symfony-application-events-2025.03.05:8Pu0ZpUBDnR7VQTqqAis", 23 | "last_seen": "logstash-symfony-application-events-2025.03.05:0fu0ZpUBDnR7VQTqsAl-", 24 | "sample_log_lines": [], 25 | "sample_doc_references": [ 26 | "logstash-symfony-application-events-2025.03.05:8Pu0ZpUBDnR7VQTqqAis", 27 | "logstash-symfony-application-events-2025.03.05:8fu0ZpUBDnR7VQTqqAjK", 28 | "logstash-symfony-application-events-2025.03.05:dri0ZpUBJdUWaJfyqWBl", 29 | "logstash-symfony-application-events-2025.03.05:cbi0ZpUBJdUWaJfyrGG3", 30 | "logstash-symfony-application-events-2025.03.05:0fu0ZpUBDnR7VQTqsAl-" 31 | ] 32 | }, 33 | { 34 | "cluster_id": 1, 35 | "count": 1, 36 | "pattern": "[TIMESTAMP] app.INFO: Application Appointment Scheduling API request for handling superchat error: No mapping found for superchat message id 'ms_nVVaoWgbvdQk5lIFyYWTK'. This will not affect any schedulings. [] []", 37 | "first_seen": "logstash-symfony-main-2025.03.05:9_qzZpUBDnR7VQTqSpIZ", 38 | "last_seen": "logstash-symfony-main-2025.03.05:9_qzZpUBDnR7VQTqSpIZ", 39 | "sample_log_lines": [], 40 | "sample_doc_references": [ 41 | "logstash-symfony-main-2025.03.05:9_qzZpUBDnR7VQTqSpIZ" 42 | ] 43 | }, 44 | { 45 | "cluster_id": 2, 46 | "count": 1, 47 | "pattern": "remote_addr=\"\" - x_forwarded_for=\", \" - cf_connecting_ip=\"\" - - [05/Mar/2025: