├── reproduce.sh
├── tests
├── __init__.py
├── unit
│ ├── __init__.py
│ ├── test_config.py
│ └── test_cli.py
├── integration
│ └── __init__.py
└── conftest.py
├── Anonymous_ Reproduction_ Package
└── README_REVIEWERS.md
├── image.png
├── pydhis2
├── cli
│ └── __init__.py
├── observe
│ ├── __init__.py
│ └── logging.py
├── __main__.py
├── templates
│ ├── {{cookiecutter.project_slug}}
│ │ ├── env.example
│ │ ├── requirements.txt
│ │ ├── configs
│ │ │ ├── dhis2.yml
│ │ │ └── dqr.yml
│ │ ├── pipelines
│ │ │ └── example.yml
│ │ ├── Makefile.cmd
│ │ ├── README.md
│ │ └── scripts
│ │ │ └── run_pipeline.py
│ └── cookiecutter.json
├── dqr
│ ├── __init__.py
│ └── config.yml
├── endpoints
│ ├── __init__.py
│ ├── analytics.py
│ └── metadata.py
├── pipeline
│ ├── __init__.py
│ ├── executor.py
│ └── config.py
├── io
│ ├── __init__.py
│ └── arrow.py
├── core
│ ├── __init__.py
│ ├── auth.py
│ ├── types.py
│ └── errors.py
├── testing
│ ├── __init__.py
│ ├── demo_test.py
│ ├── data_generator.py
│ ├── mock_server.py
│ └── network_simulator.py
└── __init__.py
├── docs
├── changelog.md
├── contributing.md
├── requirements.txt
├── api
│ ├── client.rst
│ ├── io.rst
│ ├── types.rst
│ └── endpoints.rst
├── installation.rst
├── quickstart.rst
├── metadata.rst
├── cli.rst
├── configuration.rst
├── conf.py
├── tracker.rst
├── datavaluesets.rst
├── analytics.rst
├── dqr.rst
└── index.rst
├── pytest.ini
├── .readthedocs.yml
├── .github
├── ISSUE_TEMPLATE
│ ├── documentation.md
│ ├── feature_request.md
│ └── bug_report.md
├── workflows
│ ├── docs.yml
│ └── ci.yml
└── PULL_REQUEST_TEMPLATE.md
├── CITATION.cff
├── dhis2_probe_summary.json
├── experiment
└── Methods.md
├── CONTRIBUTING.md
├── CHANGELOG.md
├── .gitignore
├── pyproject.toml
└── CODE_OF_CONDUCT.md
/reproduce.sh:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Tests package
2 |
--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 | # Unit tests
2 |
--------------------------------------------------------------------------------
/Anonymous_ Reproduction_ Package/README_REVIEWERS.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 | """Integration tests package"""
2 |
--------------------------------------------------------------------------------
/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HzaCode/pyDHIS2/HEAD/image.png
--------------------------------------------------------------------------------
/pydhis2/cli/__init__.py:
--------------------------------------------------------------------------------
1 | """CLI module - Command line tools"""
2 |
3 | from pydhis2.cli.main import app
4 |
5 | __all__ = ["app"]
6 |
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | See [CHANGELOG.md](https://github.com/HzaCode/pyDHIS2/blob/main/CHANGELOG.md) in the repository for version history.
4 |
5 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | We welcome contributions! Please see [CONTRIBUTING.md](https://github.com/HzaCode/pyDHIS2/blob/main/CONTRIBUTING.md) in the repository.
4 |
5 |
--------------------------------------------------------------------------------
/pydhis2/observe/__init__.py:
--------------------------------------------------------------------------------
1 | """Observability module - Logging and metrics"""
2 |
3 | from pydhis2.observe.logging import get_logger, setup_logging
4 |
5 | __all__ = [
6 | "setup_logging",
7 | "get_logger",
8 | ]
9 |
--------------------------------------------------------------------------------
/pydhis2/__main__.py:
--------------------------------------------------------------------------------
1 | """
2 | Main entry point for pydhis2 CLI
3 | Allows running: python -m pydhis2 [command]
4 | """
5 |
6 | from pydhis2.cli.main import app
7 |
8 |
9 | def main():
10 | """Main entry point for CLI"""
11 | app()
12 |
13 |
14 | if __name__ == "__main__":
15 | main()
16 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # Documentation build requirements
2 | sphinx>=7.2.0,<8.0.0
3 | sphinx-rtd-theme>=2.0.0,<3.0.0
4 | sphinx-autodoc-typehints>=1.25.0,<2.0.0
5 | myst-parser>=2.0.0,<3.0.0
6 | sphinx-copybutton>=0.5.2,<1.0.0
7 | sphinxcontrib-napoleon>=0.7,<1.0.0
8 | linkify-it-py>=2.0.0,<3.0.0
9 |
10 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/env.example:
--------------------------------------------------------------------------------
1 | # DHIS2 Connection Configuration
2 | DHIS2_URL={{ cookiecutter.dhis2_url }}
3 | DHIS2_USERNAME=your_username
4 | DHIS2_PASSWORD=your_password
5 |
6 | # Optional: Rate Limiting Configuration
7 | DHIS2_RPS=8
8 | DHIS2_CONCURRENCY=8
9 |
10 | # Optional: Retry Configuration
11 | DHIS2_MAX_RETRIES=5
12 |
--------------------------------------------------------------------------------
/pydhis2/dqr/__init__.py:
--------------------------------------------------------------------------------
1 | """Data Quality Review (DQR) module - WHO-DQR metrics implementation"""
2 |
3 | from pydhis2.dqr.metrics import (
4 | CompletenessMetrics,
5 | ConsistencyMetrics,
6 | MetricResult,
7 | TimelinessMetrics,
8 | )
9 |
10 | __all__ = [
11 | "CompletenessMetrics",
12 | "ConsistencyMetrics",
13 | "TimelinessMetrics",
14 | "MetricResult",
15 | ]
16 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/requirements.txt:
--------------------------------------------------------------------------------
1 | # Base requirements for the project
2 | pydhis2>=0.2.0
3 | pandas
4 | pyarrow
5 | openpyxl
6 | # Add other dependencies here
7 |
8 | {% if cookiecutter.use_notebooks == "yes" -%}
9 | # Jupyter notebooks
10 | jupyter>=1.0.0
11 | jupyterlab>=3.0.0
12 | matplotlib>=3.5.0
13 | seaborn>=0.11.0
14 | plotly>=5.0.0
15 | {%- endif %}
16 |
17 | # Configuration management
18 | python-dotenv>=0.19.0
19 |
--------------------------------------------------------------------------------
/docs/api/client.rst:
--------------------------------------------------------------------------------
1 | Client
2 | ======
3 |
4 | AsyncDHIS2Client
5 | ----------------
6 |
7 | .. autoclass:: pydhis2.core.client.AsyncDHIS2Client
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | SyncDHIS2Client
13 | ---------------
14 |
15 | .. autoclass:: pydhis2.core.client.SyncDHIS2Client
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | Helper Functions
21 | ----------------
22 |
23 | .. autofunction:: pydhis2.get_client
24 |
25 |
--------------------------------------------------------------------------------
/pydhis2/endpoints/__init__.py:
--------------------------------------------------------------------------------
1 | """Endpoints module - Wrappers for various DHIS2 API endpoints"""
2 |
3 | from pydhis2.endpoints.analytics import AnalyticsEndpoint
4 | from pydhis2.endpoints.datavaluesets import DataValueSetsEndpoint
5 | from pydhis2.endpoints.metadata import MetadataEndpoint
6 | from pydhis2.endpoints.tracker import TrackerEndpoint
7 |
8 | __all__ = [
9 | "AnalyticsEndpoint",
10 | "DataValueSetsEndpoint",
11 | "TrackerEndpoint",
12 | "MetadataEndpoint",
13 | ]
14 |
--------------------------------------------------------------------------------
/pydhis2/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | """Pipeline configuration and execution module"""
2 |
3 | from .config import PipelineConfig, StepConfig
4 | from .executor import PipelineExecutor
5 | from .steps import AnalyticsStep, DataValueSetsStep, DQRStep, StepRegistry, TrackerStep
6 |
7 | __all__ = [
8 | 'PipelineConfig',
9 | 'StepConfig',
10 | 'PipelineExecutor',
11 | 'AnalyticsStep',
12 | 'TrackerStep',
13 | 'DataValueSetsStep',
14 | 'DQRStep',
15 | 'StepRegistry'
16 | ]
17 |
--------------------------------------------------------------------------------
/pydhis2/io/__init__.py:
--------------------------------------------------------------------------------
1 | """I/O module - Data format conversion and serialization"""
2 |
3 | from pydhis2.io.arrow import ArrowConverter
4 | from pydhis2.io.schema import SchemaManager
5 | from pydhis2.io.to_pandas import (
6 | AnalyticsDataFrameConverter,
7 | DataValueSetsConverter,
8 | TrackerConverter,
9 | )
10 |
11 | __all__ = [
12 | "AnalyticsDataFrameConverter",
13 | "DataValueSetsConverter",
14 | "TrackerConverter",
15 | "ArrowConverter",
16 | "SchemaManager",
17 | ]
18 |
--------------------------------------------------------------------------------
/docs/api/io.rst:
--------------------------------------------------------------------------------
1 | I/O Utilities
2 | =============
3 |
4 | Pandas Integration
5 | ------------------
6 |
7 | .. automodule:: pydhis2.io.to_pandas
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | Arrow Integration
13 | -----------------
14 |
15 | .. automodule:: pydhis2.io.arrow
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | Schema Utilities
21 | ----------------
22 |
23 | .. automodule:: pydhis2.io.schema
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = -v --tb=short --strict-markers
3 | asyncio_mode = auto
4 | asyncio_default_fixture_loop_scope = function
5 | testpaths = tests
6 | python_files = test_*.py
7 | python_classes = Test*
8 | python_functions = test_*
9 | markers =
10 | integration: marks tests as integration tests (deselect with '-m "not integration"')
11 | slow: marks tests as slow (deselect with '-m "not slow"')
12 | unit: marks tests as unit tests
13 | filterwarnings =
14 | ignore::DeprecationWarning
15 | ignore::FutureWarning
16 |
--------------------------------------------------------------------------------
/docs/api/types.rst:
--------------------------------------------------------------------------------
1 | Types
2 | =====
3 |
4 | Configuration Types
5 | -------------------
6 |
7 | DHIS2Config
8 | ~~~~~~~~~~~
9 |
10 | .. autoclass:: pydhis2.core.types.DHIS2Config
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | Query Types
16 | -----------
17 |
18 | AnalyticsQuery
19 | ~~~~~~~~~~~~~~
20 |
21 | .. autoclass:: pydhis2.core.types.AnalyticsQuery
22 | :members:
23 | :undoc-members:
24 | :show-inheritance:
25 |
26 | Error Types
27 | -----------
28 |
29 | .. automodule:: pydhis2.core.errors
30 | :members:
31 | :undoc-members:
32 | :show-inheritance:
33 |
34 |
--------------------------------------------------------------------------------
/pydhis2/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Core module - HTTP client, rate limiting, retry, authentication, and other infrastructure"""
2 |
3 | # Export only base types and errors to avoid circular dependencies
4 | from pydhis2.core.errors import (
5 | DHIS2Error,
6 | DHIS2HTTPError,
7 | ImportConflictError,
8 | RateLimitExceeded,
9 | RetryExhausted,
10 | )
11 | from pydhis2.core.types import DHIS2Config
12 |
13 | __all__ = [
14 | "DHIS2Config",
15 | "DHIS2Error",
16 | "DHIS2HTTPError",
17 | "RateLimitExceeded",
18 | "RetryExhausted",
19 | "ImportConflictError",
20 | ]
21 |
--------------------------------------------------------------------------------
/pydhis2/templates/cookiecutter.json:
--------------------------------------------------------------------------------
1 | {
2 | "project_name": "My DHIS2 Analysis Project",
3 | "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}",
4 | "project_description": "A data analysis project using pydhis2",
5 | "author_name": "Your Name",
6 | "author_email": "your.email@example.com",
7 | "dhis2_url": "https://play.dhis2.org/2.41",
8 | "python_version": "3.9",
9 | "use_notebooks": "yes",
10 | "use_dqr": "yes",
11 | "use_pipeline": "yes",
12 | "license": ["Apache-2.0", "MIT", "BSD-3-Clause"],
13 | "_extensions": ["jinja2_time.TimeExtension"]
14 | }
15 |
--------------------------------------------------------------------------------
/pydhis2/testing/__init__.py:
--------------------------------------------------------------------------------
1 | """Testing utilities module - Mock servers, data generators, and test helpers"""
2 |
3 | from pydhis2.testing.benchmark_utils import BenchmarkRunner, PerformanceProfiler
4 | from pydhis2.testing.data_generator import TestDataGenerator
5 | from pydhis2.testing.mock_server import MockDHIS2Server
6 | from pydhis2.testing.network_simulator import (
7 | BenchmarkDataGenerator,
8 | NetworkCondition,
9 | NetworkSimulator,
10 | )
11 |
12 | __all__ = [
13 | "MockDHIS2Server",
14 | "TestDataGenerator",
15 | "BenchmarkDataGenerator",
16 | "NetworkSimulator",
17 | "NetworkCondition",
18 | "BenchmarkRunner",
19 | "PerformanceProfiler",
20 | ]
21 |
--------------------------------------------------------------------------------
/docs/api/endpoints.rst:
--------------------------------------------------------------------------------
1 | Endpoints
2 | =========
3 |
4 | Analytics Endpoint
5 | ------------------
6 |
7 | .. autoclass:: pydhis2.endpoints.analytics.AnalyticsEndpoint
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | DataValueSets Endpoint
13 | ----------------------
14 |
15 | .. autoclass:: pydhis2.endpoints.datavaluesets.DataValueSetsEndpoint
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | Tracker Endpoint
21 | ----------------
22 |
23 | .. autoclass:: pydhis2.endpoints.tracker.TrackerEndpoint
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | Metadata Endpoint
29 | -----------------
30 |
31 | .. autoclass:: pydhis2.endpoints.metadata.MetadataEndpoint
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | version: 2
5 |
6 | # Build documentation with Sphinx
7 | sphinx:
8 | configuration: docs/conf.py
9 | fail_on_warning: false
10 |
11 | # Build formats
12 | formats:
13 | - pdf
14 | - epub
15 |
16 | # Python environment
17 | build:
18 | os: ubuntu-22.04
19 | tools:
20 | python: "3.11"
21 | jobs:
22 | post_checkout:
23 | # Cancel building pull requests when new commits are pushed
24 | - |
25 | if [ "$READTHEDOCS_VERSION_TYPE" = "external" ] && git show-ref --verify --quiet "refs/remotes/origin/$READTHEDOCS_VERSION"; then
26 | # Rebase on the target branch if possible
27 | git fetch origin $READTHEDOCS_GIT_IDENTIFIER
28 | fi
29 |
30 | # Python requirements
31 | python:
32 | install:
33 | - requirements: docs/requirements.txt
34 | - method: pip
35 | path: .
36 | extra_requirements:
37 | - dev
38 |
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Documentation Issue
3 | about: Report missing, incorrect, or unclear documentation
4 | title: '[DOCS] '
5 | labels: documentation
6 | assignees: ''
7 | ---
8 |
9 | ## Documentation Issue Type
10 | - [ ] Missing documentation
11 | - [ ] Incorrect documentation
12 | - [ ] Unclear/confusing documentation
13 | - [ ] Typo or formatting issue
14 | - [ ] Example code doesn't work
15 |
16 | ## Location
17 | Where is the documentation issue?
18 | - Page/Section: [e.g. README.md, API reference for analytics]
19 | - URL (if online): [e.g. https://hzacode.github.io/pydhis2/...]
20 |
21 | ## Description
22 | A clear description of the documentation issue.
23 |
24 | ## Current Documentation
25 | Quote or screenshot the current documentation (if applicable):
26 | ```
27 | Current text...
28 | ```
29 |
30 | ## Suggested Improvement
31 | What should the documentation say instead?
32 | ```
33 | Suggested text...
34 | ```
35 |
36 | ## Additional context
37 | Add any other context, examples, or suggestions here.
38 |
39 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | Requirements
5 | ------------
6 |
7 | * Python ≥ 3.9
8 | * pip or conda
9 |
10 | From PyPI
11 | ---------
12 |
13 | The easiest way to install pydhis2 is from PyPI:
14 |
15 | .. code-block:: bash
16 |
17 | pip install pydhis2
18 |
19 | From Source
20 | -----------
21 |
22 | To install from source (for development or latest features):
23 |
24 | .. code-block:: bash
25 |
26 | git clone https://github.com/HzaCode/pyDHIS2.git
27 | cd pyDHIS2
28 | pip install -e .
29 |
30 | Development Installation
31 | ------------------------
32 |
33 | For development with testing and documentation tools:
34 |
35 | .. code-block:: bash
36 |
37 | git clone https://github.com/HzaCode/pyDHIS2.git
38 | cd pyDHIS2
39 | pip install -e ".[dev]"
40 |
41 | Verify Installation
42 | -------------------
43 |
44 | Check that pydhis2 is installed correctly:
45 |
46 | .. code-block:: bash
47 |
48 | pydhis2 version
49 |
50 | Run the quick demo to test connectivity:
51 |
52 | .. code-block:: bash
53 |
54 | pydhis2 demo quick
55 |
56 |
--------------------------------------------------------------------------------
/pydhis2/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | pydhis2 - Reproducible DHIS2 Python SDK for LMIC scenarios
3 |
4 | An async-first DHIS2 Python SDK with built-in rate limiting and retry mechanisms, featuring:
5 | - One-click conversion to Pandas/Arrow formats
6 | - Built-in WHO-DQR data quality metrics
7 | - CLI + Cookiecutter template support
8 | - Optimized for weak network environments
9 | """
10 |
11 | # Core types can be imported directly
12 | from pydhis2.core.errors import (
13 | DHIS2Error,
14 | DHIS2HTTPError,
15 | ImportConflictError,
16 | RateLimitExceeded,
17 | RetryExhausted,
18 | )
19 | from pydhis2.core.types import DHIS2Config
20 |
21 |
22 | # Lazy import to avoid circular dependencies
23 | def get_client():
24 | from pydhis2.core.client import AsyncDHIS2Client, SyncDHIS2Client
25 | return AsyncDHIS2Client, SyncDHIS2Client
26 |
27 | __version__ = "0.2.0"
28 | __author__ = "pydhis2 contributors"
29 |
30 | __all__ = [
31 | "get_client",
32 | "DHIS2Config",
33 | "DHIS2Error",
34 | "DHIS2HTTPError",
35 | "RateLimitExceeded",
36 | "RetryExhausted",
37 | "ImportConflictError",
38 | ]
39 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/configs/dhis2.yml:
--------------------------------------------------------------------------------
1 | # DHIS2 client configuration
2 |
3 | # Connection settings
4 | connection:
5 | base_url: "{{ cookiecutter.dhis2_url }}"
6 | rps: 8 # Requests per second
7 | concurrency: 8 # Concurrent connections
8 | timeouts: [10, 60, 120] # Connect/read/total timeout (seconds)
9 | compression: true # Enable gzip compression
10 |
11 | # Retry configuration
12 | retry:
13 | max_attempts: 5 # Maximum retry attempts
14 | base_delay: 0.5 # Base delay (seconds)
15 | max_delay: 60.0 # Maximum delay (seconds)
16 | backoff_factor: 2.0 # Backoff factor
17 | jitter: true # Enable jitter
18 | retry_on_status: # HTTP status codes to retry on
19 | - 429
20 | - 500
21 | - 502
22 | - 503
23 | - 504
24 |
25 | # Cache configuration
26 | cache:
27 | enable: true # Enable caching
28 | ttl: 3600 # Cache TTL (seconds)
29 | directory: ".cache" # Cache directory
30 |
31 | # Authentication configuration (environment variables are preferred)
32 | auth:
33 | method: "basic" # Auth method: basic/token/pat
34 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature Request
3 | about: Suggest an idea for this project
4 | title: '[FEATURE] '
5 | labels: enhancement
6 | assignees: ''
7 | ---
8 |
9 | ## Is your feature request related to a problem? Please describe.
10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11 |
12 | ## Describe the solution you'd like
13 | A clear and concise description of what you want to happen.
14 |
15 | ## Describe alternatives you've considered
16 | A clear and concise description of any alternative solutions or features you've considered.
17 |
18 | ## Use case
19 | Describe the use case and how this feature would benefit users:
20 | - Who would use this feature?
21 | - What problem does it solve?
22 | - How often would it be used?
23 |
24 | ## Proposed API (if applicable)
25 | ```python
26 | # Example of how the feature might be used
27 | from pydhis2 import get_client, DHIS2Config
28 |
29 | # Your proposed API usage
30 | ```
31 |
32 | ## Additional context
33 | Add any other context, screenshots, or examples about the feature request here.
34 |
35 | ## Would you be willing to contribute this feature?
36 | - [ ] Yes, I'd like to work on this
37 | - [ ] I can help test it
38 | - [ ] I need help implementing this
39 |
40 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | type: software
4 | title: "pydhis2: A Modern Python SDK for DHIS2"
5 | version: 0.2.0
6 | date-released: "2025-01-01"
7 | abstract: "A next-generation Python library for interacting with DHIS2, the world's largest health information management system. It provides a clean, modern, and efficient API for data extraction, analysis, and management, with a strong emphasis on creating reproducible workflows for scientific research and public health analysis in Low and Middle-Income Country (LMIC) contexts."
8 |
9 | authors:
10 | - family-names: "pydhis2 contributors"
11 | affiliation: "pydhis2 Project"
12 |
13 | repository-code: "https://github.com/HzaCode/pyDHIS2"
14 | url: "https://github.com/HzaCode/pyDHIS2"
15 | license: Apache-2.0
16 |
17 | keywords:
18 | - DHIS2
19 | - health information systems
20 | - public health
21 | - data analysis
22 | - reproducible research
23 | - LMIC
24 | - Python SDK
25 | - epidemiology
26 | - health data
27 |
28 | preferred-citation:
29 | type: software
30 | title: "pydhis2: A Modern Python SDK for DHIS2"
31 | authors:
32 | - family-names: "pydhis2 contributors"
33 | affiliation: "pydhis2 Project"
34 | version: 0.2.0
35 | year: 2025
36 | url: "https://github.com/HzaCode/pyDHIS2"
37 | license: Apache-2.0
38 |
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: Create a report to help us improve
4 | title: '[BUG] '
5 | labels: bug
6 | assignees: ''
7 | ---
8 |
9 | ## Describe the bug
10 | A clear and concise description of what the bug is.
11 |
12 | ## To Reproduce
13 | Steps to reproduce the behavior:
14 | 1. Import '...'
15 | 2. Call function '....'
16 | 3. Pass parameters '....'
17 | 4. See error
18 |
19 | ## Expected behavior
20 | A clear and concise description of what you expected to happen.
21 |
22 | ## Code example
23 | ```python
24 | # Paste your minimal reproducible code example here
25 | from pydhis2 import get_client, DHIS2Config
26 |
27 | # Your code that produces the error
28 | ```
29 |
30 | ## Error message
31 | ```
32 | Paste the complete error message/traceback here
33 | ```
34 |
35 | ## Environment
36 | - **OS**: [e.g. Windows 10, Ubuntu 22.04, macOS 13]
37 | - **Python version**: [e.g. 3.9, 3.10, 3.11]
38 | - **pydhis2 version**: [e.g. 0.2.0]
39 | - **DHIS2 version**: [e.g. 2.38, 2.39, 2.40]
40 | - **Installation method**: [e.g. pip, conda, from source]
41 |
42 | ## Additional context
43 | Add any other context about the problem here, such as:
44 | - Network conditions (if relevant)
45 | - Data volume
46 | - Specific DHIS2 configuration
47 | - Related issues or PRs
48 |
49 | ## Possible solution
50 | If you have ideas on how to fix this, please share them here.
51 |
52 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Deploy Documentation
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 | workflow_dispatch:
9 |
10 | permissions:
11 | contents: write
12 |
13 | jobs:
14 | build-and-deploy:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: 📥 Checkout main branch
19 | uses: actions/checkout@v4
20 | with:
21 | ref: main
22 |
23 | - name: 🐍 Set up Python
24 | uses: actions/setup-python@v4
25 | with:
26 | python-version: '3.11'
27 |
28 | - name: 📦 Install dependencies
29 | run: |
30 | python -m pip install --upgrade pip
31 | pip install -r docs/requirements.txt
32 | pip install -e .
33 |
34 | - name: 📚 Build documentation with Sphinx
35 | run: |
36 | cd docs
37 | sphinx-build -b html . _build/html
38 | cd ..
39 |
40 | - name: 🚀 Deploy to gh-pages branch
41 | if: github.event_name == 'push' && github.ref == 'refs/heads/main'
42 | uses: peaceiris/actions-gh-pages@v3
43 | with:
44 | github_token: ${{ secrets.GITHUB_TOKEN }}
45 | publish_dir: ./docs/_build/html
46 | publish_branch: gh-pages
47 | force_orphan: true
48 | user_name: 'github-actions[bot]'
49 | user_email: 'github-actions[bot]@users.noreply.github.com'
50 | commit_message: 'Deploy Sphinx docs from main branch'
51 |
52 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/configs/dqr.yml:
--------------------------------------------------------------------------------
1 | # Data Quality Review configuration
2 |
3 | # Completeness metrics configuration
4 | completeness:
5 | thresholds:
6 | reporting_completeness_pass: 0.90 # Reporting completeness pass threshold
7 | reporting_completeness_warn: 0.70 # Reporting completeness warning threshold
8 | data_element_completeness_pass: 0.90 # Data element completeness pass threshold
9 | data_element_completeness_warn: 0.70 # Data element completeness warning threshold
10 |
11 | # Consistency metrics configuration
12 | consistency:
13 | thresholds:
14 | outlier_threshold: 3.0 # Outlier Z-score threshold
15 | variance_threshold: 0.5 # Coefficient of variation threshold
16 | trend_consistency_pass: 0.80 # Trend consistency pass threshold
17 | trend_consistency_warn: 0.60 # Trend consistency warning threshold
18 |
19 | # Timeliness metrics configuration
20 | timeliness:
21 | thresholds:
22 | submission_timeliness_pass: 0.80 # Submission timeliness pass threshold
23 | submission_timeliness_warn: 0.60 # Submission timeliness warning threshold
24 | max_delay_days: 30 # Maximum acceptable delay in days
25 |
26 | # Report configuration
27 | report:
28 | title: "{{ cookiecutter.project_name }} - Data Quality Review Report"
29 | include_charts: true # Include charts
30 | include_details: true # Include details
31 | language: "en-US" # Report language
32 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [ main, gh ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | test:
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | fail-fast: false
14 | matrix:
15 | os: [ubuntu-latest, windows-latest, macos-latest]
16 | python-version: ['3.9', '3.10', '3.11']
17 |
18 | steps:
19 | - name: 📥 Checkout code
20 | uses: actions/checkout@v4
21 |
22 | - name: 🐍 Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v4
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 |
27 | - name: 📦 Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install "pytest-asyncio>=0.23.0"
31 | pip install -e ".[dev]"
32 |
33 | - name: 🧪 Run tests
34 | run: |
35 | pytest tests/unit/ tests/integration/ -v --tb=short -x
36 |
37 | - name: ✅ Summary
38 | run: |
39 | echo "✅ Tests completed for Python ${{ matrix.python-version }} on ${{ matrix.os }}"
40 |
41 | lint:
42 | runs-on: ubuntu-latest
43 |
44 | steps:
45 | - name: 📥 Checkout code
46 | uses: actions/checkout@v4
47 |
48 | - name: 🐍 Set up Python
49 | uses: actions/setup-python@v4
50 | with:
51 | python-version: '3.11'
52 |
53 | - name: 📦 Install ruff
54 | run: |
55 | pip install ruff
56 |
57 | - name: 🔍 Run ruff
58 | run: |
59 | ruff check pydhis2/ || echo "⚠️ Linting found issues (non-blocking)"
60 | echo "✅ Linting completed"
61 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/pipelines/example.yml:
--------------------------------------------------------------------------------
1 | # Example data analysis pipeline
2 |
3 | name: "{{ cookiecutter.project_name }} - Example Pipeline"
4 | description: "A complete pipeline for data pulling, quality assessment, and report generation"
5 |
6 | # Global configuration
7 | rps: 8 # Requests per second
8 | concurrency: 8 # Concurrency level
9 |
10 | # Pipeline steps
11 | steps:
12 | # Step 1: Pull Analytics data
13 | - type: analytics_pull
14 | name: "Pull Immunization Analytics Data"
15 | dx: "immunization.indicators" # Please replace with actual indicator IDs
16 | ou: "LEVEL-3" # Please replace with actual organization units
17 | pe: "2023Q1:2023Q4" # Time range
18 | output: "analytics.parquet"
19 | format: "parquet"
20 |
21 | # Step 2: Pull Tracker events (optional)
22 | # - type: tracker_pull
23 | # name: "Pull Immunization Tracker Events"
24 | # program: "program_id" # Please replace with actual program ID
25 | # status: "COMPLETED"
26 | # since: "2023-01-01"
27 | # output: "tracker_events.parquet"
28 | # format: "parquet"
29 |
30 | # Step 3: Data Quality Review
31 | - type: dqr
32 | name: "Analytics Data Quality Review"
33 | input: "analytics.parquet"
34 | html_output: "analytics_dqr_report.html"
35 | json_output: "analytics_dqr_summary.json"
36 | config:
37 | completeness:
38 | thresholds:
39 | reporting_completeness_pass: 0.85
40 | data_element_completeness_pass: 0.80
41 | consistency:
42 | thresholds:
43 | outlier_threshold: 2.5
44 | timeliness:
45 | thresholds:
46 | submission_timeliness_pass: 0.75
47 |
48 | # Pipeline metadata
49 | metadata:
50 | author: "{{ cookiecutter.author_name }}"
51 | version: "1.0.0"
52 | created: "{% now 'utc', '%Y-%m-%d' %}"
53 | tags:
54 | - "analytics"
55 | - "dqr"
56 | - "immunization"
57 |
--------------------------------------------------------------------------------
/dhis2_probe_summary.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "server": "EMIS Demo",
4 | "base": "https://emis.dhis2.org/demo",
5 | "ok": true,
6 | "version": "2.40.4.1",
7 | "systemId": "ca37253e-3727-4f73-838c-475ea3c7a432",
8 | "contextPath": "https://emis.dhis2.org/demo",
9 | "results": {
10 | "dataElements": {
11 | "ok": true,
12 | "observed_count": 530,
13 | "pager_total": 530,
14 | "duplicates": 0,
15 | "missing_pages": [],
16 | "checksum": "dd0f27ecbe35f9f0f19be4041e48dd03b76ddae6"
17 | },
18 | "organisationUnits": {
19 | "ok": true,
20 | "observed_count": 3596,
21 | "pager_total": 3596,
22 | "duplicates": 0,
23 | "missing_pages": [],
24 | "checksum": "86792351124542ebaa5e85f60ea51e065bc4e5e1"
25 | }
26 | }
27 | },
28 | {
29 | "server": "Play (android-current)",
30 | "base": "https://play.dhis2.org/android-current",
31 | "ok": false,
32 | "stage": "system/info",
33 | "http_status": 401,
34 | "sample": "{\"httpStatus\":\"Unauthorized\",\"httpStatusCode\":401,\"status\":\"ERROR\",\"message\":\"Unauthorized\"}"
35 | },
36 | {
37 | "server": "HMIS v41 Demo",
38 | "base": "https://demos.dhis2.org/hmis_v41",
39 | "ok": false,
40 | "stage": "system/info",
41 | "http_status": 401,
42 | "sample": "{\"httpStatus\":\"Unauthorized\",\"httpStatusCode\":401,\"status\":\"ERROR\",\"message\":\"Account disabled\"}"
43 | },
44 | {
45 | "server": "Data Quality Demo",
46 | "base": "https://demos.dhis2.org/dq",
47 | "ok": true,
48 | "version": "2.38.4.3",
49 | "systemId": null,
50 | "contextPath": "https://demos.dhis2.org/dq",
51 | "results": {
52 | "dataElements": {
53 | "ok": true,
54 | "observed_count": 301,
55 | "pager_total": 301,
56 | "duplicates": 0,
57 | "missing_pages": [],
58 | "checksum": "c7f6444929ac9596565fcba8f67bad5a551877ae"
59 | },
60 | "organisationUnits": {
61 | "ok": true,
62 | "observed_count": 654,
63 | "pager_total": 654,
64 | "duplicates": 0,
65 | "missing_pages": [],
66 | "checksum": "3431b2bdcb9fba431c5a727ce08b505c1e3edbf3"
67 | }
68 | }
69 | }
70 | ]
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/Makefile.cmd:
--------------------------------------------------------------------------------
1 | @echo off
2 | REM {{ cookiecutter.project_name }} - Windows Batch Commands
3 |
4 | if "%1"=="help" goto help
5 | if "%1"=="setup" goto setup
6 | if "%1"=="run-pipeline" goto run_pipeline
7 | if "%1"=="dqr" goto dqr
8 | if "%1"=="clean" goto clean
9 | if "%1"=="" goto help
10 |
11 | :help
12 | echo.
13 | echo {{ cookiecutter.project_name }} - Available Commands:
14 | echo.
15 | echo make setup - Install dependencies and set up the environment
16 | echo make run-pipeline - Run the example data analysis pipeline
17 | echo make dqr - Run data quality review
18 | echo make clean - Clean up temporary files
19 | echo make help - Display this help message
20 | echo.
21 | goto end
22 |
23 | :setup
24 | echo Setting up project environment...
25 | if not exist venv (
26 | echo Creating virtual environment...
27 | py -m venv venv
28 | )
29 | echo Activating virtual environment and installing dependencies...
30 | call venv\Scripts\activate.bat && pip install -r requirements.txt
31 | echo Copying environment configuration file...
32 | if not exist .env (
33 | copy env.example .env
34 | echo Please edit the .env file with your DHIS2 connection details
35 | )
36 | echo ✅ Environment setup complete!
37 | goto end
38 |
39 | :run_pipeline
40 | echo Running data analysis pipeline...
41 | if not exist venv (
42 | echo ❌ Please run 'make setup' first
43 | goto end
44 | )
45 | call venv\Scripts\activate.bat && py scripts/run_pipeline.py
46 | goto end
47 |
48 | :dqr
49 | echo Running data quality review...
50 | if not exist data\analytics_data.parquet (
51 | echo ❌ Data file not found. Please run 'make run-pipeline' first
52 | goto end
53 | )
54 | call venv\Scripts\activate.bat && pydhis2 dqr run --input data\analytics_data.parquet --html reports\dqr_report.html --json reports\dqr_summary.json
55 | echo ✅ DQR report generated in the 'reports\' directory
56 | goto end
57 |
58 | :clean
59 | echo Cleaning up temporary files...
60 | if exist .pydhis2_cache rmdir /s /q .pydhis2_cache
61 | if exist __pycache__ rmdir /s /q __pycache__
62 | if exist .pytest_cache rmdir /s /q .pytest_cache
63 | echo ✅ Cleanup complete
64 | goto end
65 |
66 | :end
67 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # Pull Request
2 |
3 | ## Description
4 |
5 |
6 | ## Type of Change
7 |
8 | - [ ] Bug fix (non-breaking change which fixes an issue)
9 | - [ ] New feature (non-breaking change which adds functionality)
10 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
11 | - [ ] Documentation update
12 | - [ ] Code refactoring
13 | - [ ] Performance improvement
14 | - [ ] Test addition or modification
15 | - [ ] CI/CD improvement
16 |
17 | ## Related Issues
18 |
19 | Fixes #
20 | Relates to #
21 |
22 | ## Changes Made
23 |
24 | -
25 | -
26 | -
27 |
28 | ## Testing
29 |
30 | - [ ] Existing tests pass (`pytest tests/`)
31 | - [ ] Added new tests for new functionality
32 | - [ ] Manually tested on:
33 | - [ ] Python 3.9
34 | - [ ] Python 3.10
35 | - [ ] Python 3.11
36 | - [ ] Windows
37 | - [ ] Linux
38 | - [ ] macOS
39 |
40 | ## Code Quality
41 | - [ ] Code follows the project's style guidelines (`ruff check` passes)
42 | - [ ] Code is properly formatted (`ruff format` applied)
43 | - [ ] Added docstrings for new functions/classes
44 | - [ ] Added type hints where applicable
45 | - [ ] Updated relevant documentation
46 | - [ ] Updated CHANGELOG.md (if applicable)
47 |
48 | ## Screenshots (if applicable)
49 |
50 |
51 | ## Breaking Changes
52 |
53 |
54 | ## Additional Notes
55 |
56 |
57 | ## Checklist
58 | - [ ] My code follows the style guidelines of this project
59 | - [ ] I have performed a self-review of my own code
60 | - [ ] I have commented my code, particularly in hard-to-understand areas
61 | - [ ] I have made corresponding changes to the documentation
62 | - [ ] My changes generate no new warnings
63 | - [ ] I have added tests that prove my fix is effective or that my feature works
64 | - [ ] New and existing unit tests pass locally with my changes
65 | - [ ] Any dependent changes have been merged and published
66 |
67 |
--------------------------------------------------------------------------------
/tests/unit/test_config.py:
--------------------------------------------------------------------------------
1 | """Tests for configuration classes"""
2 |
3 | import pytest
4 | from pydantic import ValidationError
5 |
6 | from pydhis2.core.types import DHIS2Config, AuthMethod
7 |
8 |
9 | def test_dhis2_config_valid():
10 | """Test valid configuration"""
11 | config = DHIS2Config(
12 | base_url="https://play.dhis2.org/2.41",
13 | auth=("user", "pass"),
14 | rps=10.0,
15 | concurrency=5,
16 | )
17 |
18 | assert config.base_url == "https://play.dhis2.org/2.41"
19 | assert config.auth == ("user", "pass")
20 | assert config.auth_method == AuthMethod.BASIC
21 | assert config.rps == 10.0
22 | assert config.concurrency == 5
23 |
24 |
25 | def test_dhis2_config_url_validation():
26 | """Test URL validation"""
27 | with pytest.raises(ValidationError):
28 | DHIS2Config(
29 | base_url="invalid-url",
30 | auth=("user", "pass")
31 | )
32 |
33 |
34 | def test_dhis2_config_url_trailing_slash():
35 | """Test handling of trailing slash in URL"""
36 | config = DHIS2Config(
37 | base_url="https://play.dhis2.org/2.41/",
38 | auth=("user", "pass")
39 | )
40 |
41 | assert config.base_url == "https://play.dhis2.org/2.41"
42 |
43 |
44 | def test_dhis2_config_timeout_validation():
45 | """Test timeout validation"""
46 | with pytest.raises(ValidationError):
47 | DHIS2Config(
48 | base_url="https://play.dhis2.org/2.41",
49 | auth=("user", "pass"),
50 | timeout=-1 # negative timeout
51 | )
52 |
53 |
54 | def test_dhis2_config_auth_validation():
55 | """Test authentication validation"""
56 | # Valid tuple
57 | config1 = DHIS2Config(
58 | base_url="https://play.dhis2.org/2.41",
59 | auth=("user", "pass")
60 | )
61 | assert config1.auth == ("user", "pass")
62 |
63 | # Valid token
64 | config2 = DHIS2Config(
65 | base_url="https://play.dhis2.org/2.41",
66 | auth="token123"
67 | )
68 | assert config2.auth == "token123"
69 |
70 | # Invalid tuple
71 | with pytest.raises(ValidationError):
72 | DHIS2Config(
73 | base_url="https://play.dhis2.org/2.41",
74 | auth=("user",) # Only one element
75 | )
76 |
--------------------------------------------------------------------------------
/experiment/Methods.md:
--------------------------------------------------------------------------------
1 | # Methods
2 |
3 | ## Methodology Overview
4 |
5 | This study adheres to the principles of reproducibility in computational science. To ensure the reliability and repeatability of the results, all tests were conducted in a fully documented environment with fixed parameters.
6 |
7 | ## Statistical Analysis Methods
8 |
9 | ### Confidence Interval Estimation
10 |
11 | - **Method**: Bootstrap (Efron, 1979).
12 | - **Samples**: 1,000 bootstrap resamples.
13 | - **Confidence Level**: 95% (two-sided).
14 | - **Correction**: Bias-corrected and accelerated (BCa) method.
15 |
16 | ### Effect Size Calculation
17 |
18 | - **Execution Time**: Cliff's delta (a non-parametric effect size).
19 | - **Data Integrity**: Cohen's d (a parametric effect size).
20 | - **Threshold Interpretation**: An effect is considered medium when |δ| > 0.33.
21 |
22 | ## Quality Control
23 |
24 | ### Reproducibility Assurance
25 |
26 | - **Random Seed**: Fixed to `20250904` (in accordance with the ISO 80000-2 standard).
27 | - **Environment Snapshot**: Complete version records of all dependencies.
28 | - **Configuration Backup**: All test parameters were serialized into JSON format for backup.
29 |
30 | ### Validation Mechanisms
31 |
32 | - **Baseline Consistency**: The accuracy of the baseline data was re-validated before each run.
33 | - **Result Verification**: Multi-dimensional validation was performed on the results returned by the clients.
34 | - **Exception Handling**: Comprehensive capture and logging of all potential errors.
35 |
36 | ## Data Management
37 |
38 | ### Output Files
39 |
40 | - `manifest.json`: Records the test environment and dependency versions.
41 | - `experiment_config.json`: Contains the complete experiment configuration.
42 | - `raw_experiment_results.csv`: Includes all raw experimental data.
43 | - `requests_trace.jsonl`: Logs detailed request traces.
44 | - `comprehensive_comparison_report.md`: The final comprehensive comparison report.
45 |
46 | ### Data Source
47 |
48 | - **DHIS2 Instance**: `https://play.im.dhis2.org/stable-2-42-1`
49 | - **API Version**: Auto-detected.
50 | - **Timestamps**: All time records use the UTC ISO format.
51 |
52 | ## Ethics and Transparency
53 |
54 | This research adheres to the principles of open science; all code, data, and methods are publicly available. The study does not involve human subjects and therefore requires no ethics review. The research data can be made available to other researchers upon reasonable request.
55 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to pydhis2
2 |
3 | First off, thank you for considering contributing to `pydhis2`. It's people like you that make `pydhis2` such a great tool.
4 |
5 | Following these guidelines helps to communicate that you respect the time of the developers managing and developing this open source project. In return, they should reciprocate that respect in addressing your issue or assessing patches and features.
6 |
7 | ## Code of Conduct
8 |
9 | This project and everyone participating in it is governed by the [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code.
10 |
11 | ## How Can I Contribute?
12 |
13 | ### Reporting Bugs
14 |
15 | This is one of the simplest ways to contribute. If you find a bug, please ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/HzaCode/pyDHIS2/issues).
16 |
17 | If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring.
18 |
19 | ### Suggesting Enhancements
20 |
21 | If you have an idea for an enhancement, please open an issue to discuss it. This allows us to coordinate our efforts and prevent duplication of work.
22 |
23 | ### Your First Code Contribution
24 |
25 | Unsure where to begin contributing to `pydhis2`? You can start by looking through these `good-first-issue` and `help-wanted` issues:
26 |
27 | - [Good first issues](https://github.com/HzaCode/pyDHIS2/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two.
28 | - [Help wanted issues](https://github.com/HzaCode/pyDHIS2/labels/help%20wanted) - issues which should be a bit more involved than `good-first-issue` issues.
29 |
30 | ### Pull Requests
31 |
32 | 1. Fork the repo and create your branch from `main`.
33 | 2. If you've added code that should be tested, add tests.
34 | 3. If you've changed APIs, update the documentation.
35 | 4. Ensure the test suite passes.
36 | 5. Make sure your code lints.
37 | 6. Issue that pull request!
38 |
39 | ## Styleguides
40 |
41 | We use `ruff` to format our code. Please run `ruff format .` before committing your changes.
42 |
43 | We also use `ruff` for linting. Please run `ruff check .` to check for any linting errors.
44 |
45 | ## License
46 |
47 | By contributing, you agree that your contributions will be licensed under its Apache License 2.0.
48 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7 |
8 | ## [0.2.0] - 2025-10-22
9 |
10 | ### Added
11 | - Modern async-first architecture with `AsyncDHIS2Client` and `SyncDHIS2Client`
12 | - Comprehensive analytics endpoint with DataFrame conversion (`.to_pandas()`)
13 | - DataValueSets endpoint with read/write capabilities
14 | - Tracker events endpoint with pagination and streaming
15 | - Metadata endpoint with import/export functionality
16 | - Built-in rate limiting with adaptive strategies
17 | - Robust retry mechanism with exponential backoff
18 | - HTTP caching with ETag and Last-Modified support
19 | - Data Quality Review (DQR) metrics based on WHO standards
20 | - Command-line interface (CLI) with typer
21 | - Project template system using Cookiecutter
22 | - OpenTelemetry instrumentation for observability
23 | - Comprehensive test suite (348 tests)
24 | - Multi-platform CI/CD (Ubuntu, Windows, macOS)
25 | - Support for Python 3.9, 3.10, 3.11
26 |
27 | ### Features
28 | - **Analytics**: Query, pagination, streaming, export to multiple formats
29 | - **DataValueSets**: Pull, push, chunking, conflict resolution
30 | - **Tracker**: Events and tracked entities with full CRUD operations
31 | - **Metadata**: Export, import, validation, schema inspection
32 | - **DQR**: Completeness, consistency, and timeliness metrics
33 | - **I/O**: Native Pandas, Arrow, and Parquet support
34 | - **Resilience**: Rate limiting, retries, caching, compression
35 | - **Developer Experience**: Type hints, clear error messages, extensive examples
36 |
37 | ### Documentation
38 | - Comprehensive README with quick start guide
39 | - Example scripts for common use cases
40 | - Contributing guidelines
41 | - Code of Conduct
42 | - API documentation in docstrings
43 |
44 | ### Infrastructure
45 | - GitHub Actions CI pipeline
46 | - Ruff for linting and formatting
47 | - pytest with asyncio support
48 | - Modern packaging with pyproject.toml
49 |
50 | ---
51 |
52 | ## Unreleased
53 |
54 | ### Planned
55 | - Enhanced CLI functionality for data operations
56 | - ReadTheDocs documentation site
57 | - Additional DQR metrics and visualizations
58 | - Performance benchmarking tools
59 | - More example notebooks and tutorials
60 | - Integration with additional data formats (Polars, DuckDB)
61 |
62 | ---
63 |
64 | [0.2.0]: https://github.com/HzaCode/pyDHIS2/releases/tag/v0.2.0
65 |
66 |
--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | Quick Start Guide
2 | =================
3 |
4 | This guide will help you get started with pydhis2 in minutes.
5 |
6 | Basic Example
7 | -------------
8 |
9 | Here's a complete example of fetching analytics data:
10 |
11 | .. code-block:: python
12 |
13 | import asyncio
14 | import sys
15 | from pydhis2 import get_client, DHIS2Config
16 | from pydhis2.core.types import AnalyticsQuery
17 |
18 | AsyncDHIS2Client, _ = get_client()
19 |
20 | async def main():
21 | config = DHIS2Config(
22 | base_url="https://demos.dhis2.org/dq",
23 | auth=("demo", "District1#")
24 | )
25 |
26 | async with AsyncDHIS2Client(config) as client:
27 | query = AnalyticsQuery(
28 | dx=["b6mCG9sphIT"],
29 | ou="qzGX4XdWufs",
30 | pe="2023"
31 | )
32 | df = await client.analytics.to_pandas(query)
33 | print(df.head())
34 |
35 | if __name__ == "__main__":
36 | if sys.platform == 'win32':
37 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
38 | asyncio.run(main())
39 |
40 | Synchronous Client
41 | ------------------
42 |
43 | If you prefer synchronous code:
44 |
45 | .. code-block:: python
46 |
47 | from pydhis2 import get_client, DHIS2Config
48 | from pydhis2.core.types import AnalyticsQuery
49 |
50 | _, SyncDHIS2Client = get_client()
51 |
52 | config = DHIS2Config(
53 | base_url="https://demos.dhis2.org/dq",
54 | auth=("demo", "District1#")
55 | )
56 |
57 | with SyncDHIS2Client(config) as client:
58 | query = AnalyticsQuery(
59 | dx=["b6mCG9sphIT"],
60 | ou="qzGX4XdWufs",
61 | pe="2023"
62 | )
63 | df = client.analytics.to_pandas(query)
64 | print(df.head())
65 |
66 | Using Environment Variables
67 | ----------------------------
68 |
69 | For production, use environment variables:
70 |
71 | .. code-block:: bash
72 |
73 | export DHIS2_URL="https://your-server.com"
74 | export DHIS2_USERNAME="your_username"
75 | export DHIS2_PASSWORD="your_password"
76 |
77 | Then in your code:
78 |
79 | .. code-block:: python
80 |
81 | from pydhis2 import get_client, DHIS2Config
82 |
83 | config = DHIS2Config() # Automatically loads from environment
84 | AsyncDHIS2Client, _ = get_client()
85 |
86 | async with AsyncDHIS2Client(config) as client:
87 | # Your code here
88 | pass
89 |
90 | Next Steps
91 | ----------
92 |
93 | * Learn about :doc:`configuration` options
94 | * Explore :doc:`analytics` queries
95 | * See :doc:`cli` commands
96 |
97 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageML stuff
102 | .sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 |
132 | # IDE files
133 | .vscode/
134 | .idea/
135 | *.swp
136 | *.swo
137 | *~
138 |
139 | # OS files
140 | .DS_Store
141 | Thumbs.db
142 |
143 | # Test environments
144 | test_pypi_env/
--------------------------------------------------------------------------------
/pydhis2/dqr/config.yml:
--------------------------------------------------------------------------------
1 | # WHO DQR-aligned quality configuration
2 | # Version: 1.0 | Scope: Desk review metrics for DHIS2 workloads
3 |
4 | metadata:
5 | standard: "WHO Data Quality Review (DQR), Module 1/2 aligned"
6 | notes: "Thresholds are recommended starting points; countries/projects may adjust per SOP"
7 |
8 | # Dimension 1: Completeness (separating "reporting completeness" and "indicator completeness"; and separating "missing/zero values")
9 | completeness:
10 | reporting:
11 | thresholds:
12 | pass: 0.90 # Consistent with WHO DQ App common baseline
13 | warn: 0.80 # Recommended ≥0.75; <0.80 commonly used as warning
14 | levels: ["facility", "district"] # Output two levels (can be modified as needed)
15 |
16 | indicator:
17 | nonmissing:
18 | thresholds:
19 | pass: 0.90
20 | warn: 0.80
21 | nonzero:
22 | thresholds:
23 | pass: 0.90
24 | warn: 0.80
25 | rules:
26 | treat_zero_as_valid: true # "true zero" ≠ missing
27 | expected_reports_source: "orgunit_period_matrix" # Expected reports source definition
28 |
29 | # Dimension 2: Internal consistency (outliers/temporal consistency/related indicators consistency)
30 | consistency:
31 | outliers:
32 | zscore:
33 | moderate: 2.0 # 2-3 SD only "suggest", no hard exclusion
34 | extreme: 3.0 # ≥3 SD marked red, triggers review
35 | modified_z:
36 | enabled: true
37 | extreme: 3.5 # Robust method threshold (more stable when highly volatile)
38 | handle:
39 | moderate: "flag_only"
40 | extreme: "flag_and_review"
41 |
42 | trend:
43 | method: "mean_of_prior_3yrs" # or "forecast_from_prior_3yrs"
44 | baseline_window_years: 3
45 | unit_level_ratio_threshold: 0.33 # ±33%: current_year_value/3yr_baseline ∉ [0.67, 1.33] considered abnormal
46 | aggregate_pass_units_within_threshold: 0.90 # ≥90% units within threshold → pass
47 | aggregate_warn_units_within_threshold: 0.75 # 75-90% → warning, <75% → fail
48 |
49 | related_indicators:
50 | enabled: true
51 | pairs:
52 | - name: "DTP1_vs_DTP3"
53 | metric: "dropout_rate"
54 | warn_threshold: 0.10 # ≥10% dropout warning
55 | pass_threshold: 0.05 # ≤5% considered good
56 | - name: "ANC1_vs_FirstVisit"
57 | metric: "ratio"
58 | warn_lower: 0.80 # Ratio deviation outside 0.8-1.2 → warning
59 | warn_upper: 1.20
60 |
61 | # Dimension 3: Timeliness (note denominator = "reports received", not expected reports)
62 | timeliness:
63 | thresholds:
64 | timely_pass: 0.90
65 | ci_method: "clopper_pearson" # For confidence interval estimation of proportion metrics
66 |
67 |
68 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """Test configuration and fixtures"""
2 |
3 | import pytest
4 | import asyncio
5 | import sys
6 | from typing import AsyncGenerator
7 | from unittest.mock import AsyncMock
8 |
9 | from pydhis2.core.types import DHIS2Config
10 | from pydhis2.core.client import AsyncDHIS2Client
11 |
12 | # Set event loop policy for Windows
13 | if sys.platform == 'win32':
14 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
15 |
16 |
17 | @pytest.fixture
18 | def mock_config() -> DHIS2Config:
19 | """Mock configuration"""
20 | return DHIS2Config(
21 | base_url="https://test.dhis2.org",
22 | auth=("test_user", "test_pass"),
23 | rps=10.0,
24 | concurrency=5,
25 | max_retries=3,
26 | )
27 |
28 |
29 | @pytest.fixture
30 | async def mock_client(mock_config: DHIS2Config) -> AsyncGenerator[AsyncDHIS2Client, None]:
31 | """Mock client"""
32 | client = AsyncDHIS2Client(mock_config)
33 |
34 | # Mock session
35 | client._session = AsyncMock()
36 |
37 | yield client
38 |
39 | await client.close()
40 |
41 |
42 | @pytest.fixture
43 | def sample_analytics_response() -> dict:
44 | """Sample Analytics response"""
45 | return {
46 | "headers": [
47 | {"name": "dx", "column": "Data", "type": "TEXT"},
48 | {"name": "pe", "column": "Period", "type": "TEXT"},
49 | {"name": "ou", "column": "Organisation unit", "type": "TEXT"},
50 | {"name": "value", "column": "Value", "type": "NUMBER"}
51 | ],
52 | "metaData": {
53 | "items": {},
54 | "dimensions": {}
55 | },
56 | "rows": [
57 | ["Abc123", "2023Q1", "Def456", "100"],
58 | ["Abc123", "2023Q2", "Def456", "150"],
59 | ["Abc123", "2023Q3", "Def456", "200"]
60 | ],
61 | "width": 4,
62 | "height": 3
63 | }
64 |
65 |
66 | @pytest.fixture
67 | def sample_datavaluesets_response() -> dict:
68 | """Sample DataValueSets response"""
69 | return {
70 | "dataValues": [
71 | {
72 | "dataElement": "Abc123",
73 | "period": "202301",
74 | "orgUnit": "Def456",
75 | "value": "100",
76 | "lastUpdated": "2023-01-15T10:30:00.000"
77 | },
78 | {
79 | "dataElement": "Abc123",
80 | "period": "202302",
81 | "orgUnit": "Def456",
82 | "value": "150",
83 | "lastUpdated": "2023-02-15T10:30:00.000"
84 | }
85 | ]
86 | }
87 |
88 |
89 | @pytest.fixture(scope="session")
90 | def event_loop():
91 | """Create event loop"""
92 | loop = asyncio.new_event_loop()
93 | yield loop
94 | loop.close()
95 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=45", "wheel", "setuptools-scm[toml]>=6.2"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "pydhis2"
7 | version = "0.2.0"
8 | description = "Reproducible DHIS2 Python SDK for LMIC scenarios"
9 | readme = "README.md"
10 | license = {text = "Apache-2.0"}
11 | authors = [
12 | {name = "pydhis2 contributors", email = "pydhis2@github.com"}
13 | ]
14 | maintainers = [
15 | {name = "pydhis2 contributors", email = "pydhis2@github.com"}
16 | ]
17 | classifiers = [
18 | "Development Status :: 4 - Beta",
19 | "Intended Audience :: Developers",
20 | "Intended Audience :: Healthcare Industry",
21 | "License :: OSI Approved :: Apache Software License",
22 | "Programming Language :: Python :: 3",
23 | "Programming Language :: Python :: 3.9",
24 | "Programming Language :: Python :: 3.10",
25 | "Programming Language :: Python :: 3.11",
26 | "Topic :: Software Development :: Libraries :: Python Modules",
27 | "Topic :: Scientific/Engineering :: Medical Science Apps.",
28 | ]
29 | requires-python = ">=3.9"
30 | dependencies = [
31 | "aiohttp>=3.8.0,<4.0.0",
32 | "aiofiles>=23.0.0,<26.0.0",
33 | "aiolimiter>=1.0.0,<2.0.0",
34 | "tenacity>=8.0.0,<10.0.0",
35 | "pandas>=1.5.0,<3.0.0",
36 | "pyarrow>=10.0.0,<22.0.0",
37 | "numpy>=1.20.0,<3.0.0",
38 | "pydantic>=2.0.0,<3.0.0",
39 | "pyyaml>=6.0,<7.0",
40 | "click>=8.0.0,<9.0.0",
41 | "typer>=0.9.0,<1.0.0",
42 | "cookiecutter>=2.1.0,<3.0.0",
43 | "opentelemetry-api>=1.15.0,<2.0.0",
44 | "opentelemetry-sdk>=1.15.0,<2.0.0",
45 | "opentelemetry-exporter-jaeger-thrift>=1.15.0,<2.0.0",
46 | "opentelemetry-exporter-prometheus>=0.36b0,<1.0.0",
47 | "opentelemetry-instrumentation-aiohttp-client>=0.36b0,<1.0.0",
48 | ]
49 |
50 | [project.optional-dependencies]
51 | dev = [
52 | "pytest>=7.0.0,<9.0.0",
53 | "pytest-asyncio>=0.23.0,<2.0.0",
54 | "pytest-mock>=3.10.0,<4.0.0",
55 | "pytest-cov>=4.0.0,<6.0.0",
56 | "ruff>=0.1.0,<1.0.0",
57 | ]
58 |
59 | [project.urls]
60 | Homepage = "https://github.com/HzaCode/pyDHIS2"
61 | Documentation = "https://hzacode.github.io/pyDHIS2"
62 | Repository = "https://github.com/HzaCode/pyDHIS2"
63 | Issues = "https://github.com/HzaCode/pyDHIS2/issues"
64 | Changelog = "https://github.com/HzaCode/pyDHIS2/blob/main/CHANGELOG.md"
65 | Discussions = "https://github.com/HzaCode/pyDHIS2/discussions"
66 |
67 | [project.scripts]
68 | pydhis2 = "pydhis2.__main__:main"
69 |
70 | [tool.setuptools.packages.find]
71 | include = ["pydhis2*"]
72 |
73 | [tool.pytest.ini_options]
74 | testpaths = ["tests"]
75 | python_files = ["test_*.py"]
76 | python_classes = ["Test*"]
77 | python_functions = ["test_*"]
78 | addopts = "-v --tb=short"
79 | asyncio_mode = "auto"
80 |
81 | [tool.ruff]
82 | line-length = 88
83 | target-version = "py39"
84 |
85 | [tool.ruff.lint]
86 | select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "PT"]
87 | ignore = ["E501", "N806", "N803"]
88 |
89 | [tool.ruff.lint.per-file-ignores]
90 | "tests/*" = ["A002", "A003"]
91 |
--------------------------------------------------------------------------------
/docs/metadata.rst:
--------------------------------------------------------------------------------
1 | Metadata
2 | ========
3 |
4 | The Metadata endpoint provides access to DHIS2 metadata (indicators, data elements, org units, etc.).
5 |
6 | Fetching Specific Metadata Types
7 | ---------------------------------
8 |
9 | .. code-block:: python
10 |
11 | from pydhis2 import get_client, DHIS2Config
12 |
13 | AsyncDHIS2Client, _ = get_client()
14 | config = DHIS2Config()
15 |
16 | async with AsyncDHIS2Client(config) as client:
17 | # Get data elements
18 | data_elements = await client.metadata.get_data_elements(
19 | fields="id,name,code,valueType",
20 | paging=False
21 | )
22 | print(data_elements)
23 |
24 | # Get indicators
25 | indicators = await client.metadata.get_indicators(
26 | fields="id,name,code,numerator,denominator"
27 | )
28 |
29 | # Get organisation units at specific level
30 | org_units = await client.metadata.get_organisation_units(
31 | fields="id,name,code,level,path",
32 | filter={"level": "3"}
33 | )
34 |
35 | Exporting Metadata
36 | ------------------
37 |
38 | Export metadata to JSON:
39 |
40 | .. code-block:: python
41 |
42 | import json
43 |
44 | async with AsyncDHIS2Client(config) as client:
45 | # Export with filters
46 | metadata = await client.metadata.export(
47 | fields=":owner",
48 | filter={"dataElements": "name:like:ANC"}
49 | )
50 |
51 | with open("metadata.json", "w") as f:
52 | json.dump(metadata, f, indent=2)
53 |
54 | # Or use the helper method
55 | await client.metadata.export_to_file(
56 | "metadata.json",
57 | filter={"indicators": "name:like:Malaria"}
58 | )
59 |
60 | Importing Metadata
61 | ------------------
62 |
63 | Import metadata from JSON:
64 |
65 | .. code-block:: python
66 |
67 | import json
68 | from pydhis2.core.types import ExportFormat
69 |
70 | async with AsyncDHIS2Client(config) as client:
71 | # Direct import
72 | with open("metadata.json") as f:
73 | metadata = json.load(f)
74 |
75 | summary = await client.metadata.import_(
76 | metadata,
77 | strategy="CREATE_AND_UPDATE",
78 | atomic=True
79 | )
80 | print(f"Imported: {summary.imported}")
81 | print(f"Updated: {summary.updated}")
82 |
83 | # Or use the helper method
84 | summary = await client.metadata.import_from_file("metadata.json")
85 |
86 | # Check for errors
87 | if summary.has_errors:
88 | conflicts_df = summary.get_conflicts_df()
89 | print(conflicts_df)
90 |
91 | Common Metadata Types
92 | ---------------------
93 |
94 | * ``dataElements`` - Data elements
95 | * ``indicators`` - Indicators
96 | * ``organisationUnits`` - Organisation units
97 | * ``dataSets`` - Data sets
98 | * ``programs`` - Programs
99 | * ``programStages`` - Program stages
100 | * ``trackedEntityTypes`` - Tracked entity types
101 | * ``optionSets`` - Option sets
102 |
103 |
--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
1 | Command Line Interface
2 | ======================
3 |
4 | pydhis2 provides a powerful CLI for common data operations.
5 |
6 | Installation Verification
7 | --------------------------
8 |
9 | Check version:
10 |
11 | .. code-block:: bash
12 |
13 | pydhis2 version
14 |
15 | Run quick demo:
16 |
17 | .. code-block:: bash
18 |
19 | pydhis2 demo quick
20 |
21 | Configuration
22 | -------------
23 |
24 | Set up DHIS2 connection:
25 |
26 | .. code-block:: bash
27 |
28 | pydhis2 config --url "https://your-server.com" --username "user"
29 |
30 | Analytics Commands
31 | ------------------
32 |
33 | Pull Analytics Data
34 | ~~~~~~~~~~~~~~~~~~~
35 |
36 | .. code-block:: bash
37 |
38 | pydhis2 analytics pull \
39 | --dx "indicator_id" \
40 | --ou "org_unit_id" \
41 | --pe "2023Q1:2023Q4" \
42 | --out analytics.parquet
43 |
44 | Query with Multiple Dimensions
45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 |
47 | .. code-block:: bash
48 |
49 | pydhis2 analytics pull \
50 | --dx "ind1,ind2,ind3" \
51 | --ou "LEVEL-3" \
52 | --pe "LAST_12_MONTHS" \
53 | --format csv \
54 | --out data.csv
55 |
56 | Tracker Commands
57 | ----------------
58 |
59 | Pull Events
60 | ~~~~~~~~~~~
61 |
62 | .. code-block:: bash
63 |
64 | pydhis2 tracker events \
65 | --program "program_id" \
66 | --status COMPLETED \
67 | --start-date "2023-01-01" \
68 | --end-date "2023-12-31" \
69 | --out events.parquet
70 |
71 | Pull Tracked Entities
72 | ~~~~~~~~~~~~~~~~~~~~~~
73 |
74 | .. code-block:: bash
75 |
76 | pydhis2 tracker entities \
77 | --type "person" \
78 | --ou "org_unit_id" \
79 | --out entities.parquet
80 |
81 | Data Quality Commands
82 | ---------------------
83 |
84 | Run DQR Analysis
85 | ~~~~~~~~~~~~~~~~
86 |
87 | .. code-block:: bash
88 |
89 | pydhis2 dqr analyze \
90 | --input analytics.parquet \
91 | --html dqr_report.html \
92 | --json dqr_summary.json
93 |
94 | Generate DQR Report
95 | ~~~~~~~~~~~~~~~~~~~
96 |
97 | .. code-block:: bash
98 |
99 | pydhis2 dqr report \
100 | --input analytics.parquet \
101 | --output report.html \
102 | --template custom_template.html
103 |
104 | Pipeline Commands
105 | -----------------
106 |
107 | Run Analysis Pipeline
108 | ~~~~~~~~~~~~~~~~~~~~~
109 |
110 | .. code-block:: bash
111 |
112 | pydhis2 pipeline run --recipe pipelines/analysis.yml
113 |
114 | Validate Pipeline
115 | ~~~~~~~~~~~~~~~~~
116 |
117 | .. code-block:: bash
118 |
119 | pydhis2 pipeline validate --recipe pipelines/analysis.yml
120 |
121 | Project Template
122 | ----------------
123 |
124 | Create New Project
125 | ~~~~~~~~~~~~~~~~~~
126 |
127 | .. code-block:: bash
128 |
129 | cookiecutter gh:HzaCode/pyDHIS2 --directory pydhis2/templates
130 |
131 | This will prompt you for project details and create a complete project structure.
132 |
133 | Help
134 | ----
135 |
136 | Get help for any command:
137 |
138 | .. code-block:: bash
139 |
140 | pydhis2 --help
141 | pydhis2 analytics --help
142 | pydhis2 tracker --help
143 |
144 |
--------------------------------------------------------------------------------
/docs/configuration.rst:
--------------------------------------------------------------------------------
1 | Configuration
2 | =============
3 |
4 | pydhis2 can be configured through multiple methods: environment variables, configuration files, or directly in code.
5 |
6 | Environment Variables
7 | ---------------------
8 |
9 | The recommended approach for production:
10 |
11 | .. code-block:: bash
12 |
13 | export DHIS2_URL="https://your-dhis2-server.com"
14 | export DHIS2_USERNAME="your_username"
15 | export DHIS2_PASSWORD="your_password"
16 |
17 | Then use in code:
18 |
19 | .. code-block:: python
20 |
21 | from pydhis2 import DHIS2Config
22 |
23 | config = DHIS2Config() # Loads from environment
24 |
25 | Direct Configuration
26 | --------------------
27 |
28 | For development or scripts:
29 |
30 | .. code-block:: python
31 |
32 | from pydhis2 import DHIS2Config
33 |
34 | config = DHIS2Config(
35 | base_url="https://your-server.com",
36 | auth=("username", "password"),
37 | rps=10, # Requests per second
38 | concurrency=10, # Concurrent connections
39 | timeout=60, # Request timeout
40 | cache_enabled=True, # Enable HTTP caching
41 | )
42 |
43 | Advanced Options
44 | ----------------
45 |
46 | Rate Limiting
47 | ~~~~~~~~~~~~~
48 |
49 | Control request rates to avoid overwhelming the server:
50 |
51 | .. code-block:: python
52 |
53 | config = DHIS2Config(
54 | base_url="https://your-server.com",
55 | auth=("username", "password"),
56 | rps=5, # 5 requests per second
57 | )
58 |
59 | Retry Configuration
60 | ~~~~~~~~~~~~~~~~~~~
61 |
62 | Customize retry behavior:
63 |
64 | .. code-block:: python
65 |
66 | config = DHIS2Config(
67 | base_url="https://your-server.com",
68 | auth=("username", "password"),
69 | max_retries=5,
70 | retry_backoff=2.0,
71 | )
72 |
73 | Caching
74 | ~~~~~~~
75 |
76 | Enable HTTP caching for repeated requests:
77 |
78 | .. code-block:: python
79 |
80 | config = DHIS2Config(
81 | base_url="https://your-server.com",
82 | auth=("username", "password"),
83 | cache_enabled=True,
84 | cache_dir=".cache/dhis2",
85 | )
86 |
87 | Timeouts
88 | ~~~~~~~~
89 |
90 | Set connection and read timeouts:
91 |
92 | .. code-block:: python
93 |
94 | config = DHIS2Config(
95 | base_url="https://your-server.com",
96 | auth=("username", "password"),
97 | timeout=120, # Total request timeout in seconds
98 | )
99 |
100 | Using Configuration Files
101 | --------------------------
102 |
103 | You can also use YAML configuration files:
104 |
105 | .. code-block:: yaml
106 |
107 | # config.yml
108 | connection:
109 | base_url: "https://your-server.com"
110 | username: "your_username"
111 | password: "your_password"
112 | rps: 10
113 | concurrency: 10
114 |
115 | retry:
116 | max_attempts: 5
117 | backoff: 2.0
118 |
119 | cache:
120 | enabled: true
121 | directory: ".cache"
122 |
123 | Load it in code:
124 |
125 | .. code-block:: python
126 |
127 | import yaml
128 | from pydhis2 import DHIS2Config
129 |
130 | with open('config.yml') as f:
131 | config_dict = yaml.safe_load(f)
132 |
133 | config = DHIS2Config(**config_dict['connection'])
134 |
135 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | # For the full list of built-in configuration values, see:
3 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
4 |
5 | import os
6 | import sys
7 | sys.path.insert(0, os.path.abspath('..'))
8 |
9 | # -- Project information -----------------------------------------------------
10 | project = 'pydhis2'
11 | copyright = '2025, pydhis2 contributors'
12 | author = 'pydhis2 contributors'
13 | release = '0.2.0'
14 | version = '0.2.0'
15 |
16 | # -- General configuration ---------------------------------------------------
17 | extensions = [
18 | 'sphinx.ext.autodoc',
19 | 'sphinx.ext.napoleon',
20 | 'sphinx.ext.viewcode',
21 | 'sphinx.ext.intersphinx',
22 | 'sphinx.ext.autosummary',
23 | 'sphinx_autodoc_typehints',
24 | 'myst_parser',
25 | 'sphinx_copybutton',
26 | ]
27 |
28 | templates_path = ['_templates']
29 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
30 |
31 | # Napoleon settings for Google-style docstrings
32 | napoleon_google_docstring = True
33 | napoleon_numpy_docstring = False
34 | napoleon_include_init_with_doc = True
35 | napoleon_include_private_with_doc = False
36 | napoleon_include_special_with_doc = True
37 | napoleon_use_admonition_for_examples = False
38 | napoleon_use_admonition_for_notes = False
39 | napoleon_use_admonition_for_references = False
40 | napoleon_use_ivar = False
41 | napoleon_use_param = True
42 | napoleon_use_rtype = True
43 | napoleon_preprocess_types = False
44 | napoleon_type_aliases = None
45 | napoleon_attr_annotations = True
46 |
47 | # Autodoc settings
48 | autodoc_default_options = {
49 | 'members': True,
50 | 'member-order': 'bysource',
51 | 'special-members': '__init__',
52 | 'undoc-members': True,
53 | 'exclude-members': '__weakref__'
54 | }
55 |
56 | autosummary_generate = True
57 |
58 | # MyST Parser settings (for Markdown support)
59 | myst_enable_extensions = [
60 | "colon_fence",
61 | "deflist",
62 | "dollarmath",
63 | "fieldlist",
64 | "html_admonition",
65 | "html_image",
66 | "linkify",
67 | "replacements",
68 | "smartquotes",
69 | "strikethrough",
70 | "substitution",
71 | "tasklist",
72 | ]
73 |
74 | # Intersphinx mapping
75 | intersphinx_mapping = {
76 | 'python': ('https://docs.python.org/3', None),
77 | 'pandas': ('https://pandas.pydata.org/docs/', None),
78 | 'aiohttp': ('https://docs.aiohttp.org/en/stable/', None),
79 | }
80 |
81 | # -- Options for HTML output -------------------------------------------------
82 | html_theme = 'sphinx_rtd_theme'
83 | html_static_path = ['_static']
84 | html_logo = '../image.png'
85 | html_favicon = '../image.png'
86 |
87 | html_theme_options = {
88 | 'logo_only': False,
89 | 'display_version': True,
90 | 'prev_next_buttons_location': 'bottom',
91 | 'style_external_links': True,
92 | 'vcs_pageview_mode': '',
93 | 'style_nav_header_background': '#2980B9',
94 | # Toc options
95 | 'collapse_navigation': False,
96 | 'sticky_navigation': True,
97 | 'navigation_depth': 4,
98 | 'includehidden': True,
99 | 'titles_only': False
100 | }
101 |
102 | html_context = {
103 | "display_github": True,
104 | "github_user": "HzaCode",
105 | "github_repo": "pyDHIS2",
106 | "github_version": "main",
107 | "conf_py_path": "/docs/",
108 | }
109 |
110 | # -- Options for LaTeX output ------------------------------------------------
111 | latex_elements = {
112 | 'papersize': 'a4paper',
113 | 'pointsize': '10pt',
114 | }
115 |
116 | # -- Extension configuration -------------------------------------------------
117 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
118 | copybutton_prompt_is_regexp = True
119 |
120 |
--------------------------------------------------------------------------------
/docs/tracker.rst:
--------------------------------------------------------------------------------
1 | Tracker
2 | =======
3 |
4 | The Tracker endpoint provides access to DHIS2 individual-level data (events and tracked entities).
5 |
6 | Fetching Events as Raw JSON
7 | ----------------------------
8 |
9 | .. code-block:: python
10 |
11 | from pydhis2 import get_client, DHIS2Config
12 |
13 | AsyncDHIS2Client, _ = get_client()
14 | config = DHIS2Config()
15 |
16 | async with AsyncDHIS2Client(config) as client:
17 | # Get raw JSON response
18 | events = await client.tracker.events(
19 | program="programId",
20 | org_unit="orgUnitId",
21 | start_date="2023-01-01",
22 | end_date="2023-12-31",
23 | page_size=100
24 | )
25 | print(events)
26 |
27 | Fetching Events as DataFrame
28 | -----------------------------
29 |
30 | .. code-block:: python
31 |
32 | # Get events directly as DataFrame
33 | async with AsyncDHIS2Client(config) as client:
34 | df = await client.tracker.events_to_pandas(
35 | program="programId",
36 | org_unit="orgUnitId",
37 | status="COMPLETED",
38 | paging_size=200
39 | )
40 | print(df)
41 |
42 | Streaming Events
43 | ----------------
44 |
45 | For large datasets, stream events in pages:
46 |
47 | .. code-block:: python
48 |
49 | async with AsyncDHIS2Client(config) as client:
50 | async for page_df in client.tracker.stream_events(
51 | program="programId",
52 | org_unit="orgUnitId",
53 | page_size=200
54 | ):
55 | print(f"Processing {len(page_df)} events")
56 | # Process each page DataFrame
57 | # page_df is a pandas DataFrame
58 |
59 | Creating Events
60 | ---------------
61 |
62 | .. code-block:: python
63 |
64 | async with AsyncDHIS2Client(config) as client:
65 | event = {
66 | "program": "programId",
67 | "orgUnit": "orgUnitId",
68 | "occurredAt": "2023-01-15T10:00:00",
69 | "status": "COMPLETED",
70 | "dataValues": [
71 | {"dataElement": "dataElementId", "value": "100"}
72 | ]
73 | }
74 |
75 | response = await client.tracker.create_event(event)
76 | print(response)
77 |
78 | Tracked Entities (Raw JSON)
79 | ----------------------------
80 |
81 | Query tracked entities as raw JSON:
82 |
83 | .. code-block:: python
84 |
85 | async with AsyncDHIS2Client(config) as client:
86 | entities = await client.tracker.tracked_entities(
87 | tracked_entity_type="personId",
88 | org_unit="orgUnitId",
89 | page_size=50
90 | )
91 | print(entities)
92 |
93 | Tracked Entities (DataFrame)
94 | -----------------------------
95 |
96 | Query tracked entities and convert to DataFrame:
97 |
98 | .. code-block:: python
99 |
100 | async with AsyncDHIS2Client(config) as client:
101 | df = await client.tracker.tracked_entities_to_pandas(
102 | org_unit="orgUnitId",
103 | program="programId",
104 | paging_size=200
105 | )
106 | print(df)
107 |
108 | Export to File
109 | --------------
110 |
111 | .. code-block:: python
112 |
113 | from pydhis2.core.types import ExportFormat
114 |
115 | async with AsyncDHIS2Client(config) as client:
116 | # Export events to Parquet
117 | await client.tracker.export_events_to_file(
118 | "events.parquet",
119 | format=ExportFormat.PARQUET,
120 | program="programId",
121 | org_unit="orgUnitId"
122 | )
123 |
124 | # Export tracked entities to CSV
125 | await client.tracker.export_tracked_entities_to_file(
126 | "entities.csv",
127 | format=ExportFormat.CSV,
128 | org_unit="orgUnitId"
129 | )
130 |
131 |
--------------------------------------------------------------------------------
/docs/datavaluesets.rst:
--------------------------------------------------------------------------------
1 | DataValueSets
2 | =============
3 |
4 | The DataValueSets endpoint allows you to read and write individual data values.
5 |
6 | Pulling (Reading) Data Values
7 | ------------------------------
8 |
9 | .. code-block:: python
10 |
11 | from pydhis2 import get_client, DHIS2Config
12 |
13 | AsyncDHIS2Client, _ = get_client()
14 | config = DHIS2Config()
15 |
16 | async with AsyncDHIS2Client(config) as client:
17 | # Pull data values - returns DataFrame directly
18 | df = await client.datavaluesets.pull(
19 | data_set="dataSetId",
20 | org_unit="orgUnitId",
21 | period="202301"
22 | )
23 | print(df)
24 |
25 | # Pull with date range
26 | df = await client.datavaluesets.pull(
27 | data_set="dataSetId",
28 | org_unit="orgUnitId",
29 | start_date="2023-01-01",
30 | end_date="2023-12-31",
31 | children=True # Include child org units
32 | )
33 |
34 | Pushing (Writing) Data Values
35 | ------------------------------
36 |
37 | .. code-block:: python
38 |
39 | from pydhis2.core.types import ImportConfig, ImportStrategy
40 |
41 | async with AsyncDHIS2Client(config) as client:
42 | # Prepare data values
43 | data_values = {
44 | "dataSet": "dataSetId",
45 | "completeDate": "2023-01-31",
46 | "period": "202301",
47 | "orgUnit": "orgUnitId",
48 | "dataValues": [
49 | {
50 | "dataElement": "dataElementId",
51 | "value": "100"
52 | }
53 | ]
54 | }
55 |
56 | # Push data
57 | summary = await client.datavaluesets.push(
58 | data_values,
59 | config=ImportConfig(
60 | strategy=ImportStrategy.CREATE_AND_UPDATE,
61 | dry_run=False
62 | )
63 | )
64 |
65 | print(f"Imported: {summary.imported}")
66 | print(f"Updated: {summary.updated}")
67 | print(f"Conflicts: {len(summary.conflicts)}")
68 |
69 | # Check conflicts
70 | if summary.has_conflicts:
71 | conflicts_df = summary.conflicts_df
72 | print(conflicts_df)
73 |
74 | Bulk Import with Chunking
75 | -------------------------
76 |
77 | Import large datasets efficiently with automatic chunking:
78 |
79 | .. code-block:: python
80 |
81 | import pandas as pd
82 | from pydhis2.core.types import ImportConfig
83 |
84 | async with AsyncDHIS2Client(config) as client:
85 | # Read DataFrame
86 | df = pd.read_csv("data.csv")
87 |
88 | # Push with automatic chunking
89 | summary = await client.datavaluesets.push(
90 | df,
91 | chunk_size=5000, # Process 5000 records per chunk
92 | config=ImportConfig(atomic=False)
93 | )
94 |
95 | print(f"Total imported: {summary.imported}")
96 | print(f"Total updated: {summary.updated}")
97 |
98 | Streaming Large Datasets
99 | -------------------------
100 |
101 | For very large datasets, stream in pages:
102 |
103 | .. code-block:: python
104 |
105 | async with AsyncDHIS2Client(config) as client:
106 | async for page_df in client.datavaluesets.pull_paginated(
107 | data_set="dataSetId",
108 | org_unit="orgUnitId",
109 | page_size=5000
110 | ):
111 | print(f"Processing {len(page_df)} records")
112 | # Process each page
113 |
114 | Export to File
115 | --------------
116 |
117 | .. code-block:: python
118 |
119 | from pydhis2.core.types import ExportFormat
120 |
121 | async with AsyncDHIS2Client(config) as client:
122 | await client.datavaluesets.export_to_file(
123 | "datavalues.parquet",
124 | format=ExportFormat.PARQUET,
125 | data_set="dataSetId",
126 | org_unit="orgUnitId",
127 | period="202301"
128 | )
129 |
130 |
--------------------------------------------------------------------------------
/docs/analytics.rst:
--------------------------------------------------------------------------------
1 | Analytics
2 | =========
3 |
4 | The Analytics endpoint provides access to DHIS2 aggregated analytics data.
5 |
6 | Basic Query
7 | -----------
8 |
9 | .. code-block:: python
10 |
11 | from pydhis2 import get_client, DHIS2Config
12 | from pydhis2.core.types import AnalyticsQuery
13 |
14 | AsyncDHIS2Client, _ = get_client()
15 | config = DHIS2Config()
16 |
17 | async with AsyncDHIS2Client(config) as client:
18 | query = AnalyticsQuery(
19 | dx=["indicator_id"], # Data dimension
20 | ou=["org_unit_id"], # Organisation unit
21 | pe="2023" # Period
22 | )
23 | df = await client.analytics.to_pandas(query)
24 | print(df)
25 |
26 | Query Parameters
27 | ----------------
28 |
29 | dx (Data Dimension)
30 | ~~~~~~~~~~~~~~~~~~~
31 |
32 | Indicators, data elements, or data sets:
33 |
34 | .. code-block:: python
35 |
36 | query = AnalyticsQuery(
37 | dx=["b6mCG9sphIT", "fbfJHSPpUQD"], # Multiple data elements
38 | ou="qzGX4XdWufs",
39 | pe="2023"
40 | )
41 |
42 | ou (Organisation Units)
43 | ~~~~~~~~~~~~~~~~~~~~~~~
44 |
45 | Specific org units or levels:
46 |
47 | .. code-block:: python
48 |
49 | query = AnalyticsQuery(
50 | dx=["b6mCG9sphIT"],
51 | ou=["LEVEL-3", "OU_GROUP-abc123"], # Level or group
52 | pe="2023"
53 | )
54 |
55 | pe (Periods)
56 | ~~~~~~~~~~~~
57 |
58 | Various period formats:
59 |
60 | .. code-block:: python
61 |
62 | # Single year
63 | pe="2023"
64 |
65 | # Multiple periods
66 | pe=["2022", "2023"]
67 |
68 | # Quarterly
69 | pe="2023Q1;2023Q2;2023Q3;2023Q4"
70 |
71 | # Monthly
72 | pe="202301;202302;202303"
73 |
74 | # Relative periods
75 | pe="LAST_12_MONTHS"
76 |
77 | DataFrame Conversion
78 | --------------------
79 |
80 | Convert directly to pandas DataFrame:
81 |
82 | .. code-block:: python
83 |
84 | df = await client.analytics.to_pandas(query)
85 | print(df.columns)
86 | # ['dx', 'ou', 'pe', 'value']
87 |
88 | Export Formats
89 | --------------
90 |
91 | Parquet
92 | ~~~~~~~
93 |
94 | .. code-block:: python
95 |
96 | from pydhis2.core.types import ExportFormat
97 |
98 | await client.analytics.export_to_file(
99 | query,
100 | "output.parquet",
101 | format=ExportFormat.PARQUET
102 | )
103 |
104 | CSV
105 | ~~~
106 |
107 | .. code-block:: python
108 |
109 | from pydhis2.core.types import ExportFormat
110 |
111 | await client.analytics.export_to_file(
112 | query,
113 | "output.csv",
114 | format=ExportFormat.CSV
115 | )
116 |
117 | Arrow
118 | ~~~~~
119 |
120 | .. code-block:: python
121 |
122 | table = await client.analytics.to_arrow(query)
123 | print(table.schema)
124 |
125 | Pagination and Streaming
126 | -------------------------
127 |
128 | For large datasets:
129 |
130 | .. code-block:: python
131 |
132 | async with AsyncDHIS2Client(config) as client:
133 | async for page_df in client.analytics.stream_paginated(
134 | query,
135 | page_size=1000,
136 | max_pages=10
137 | ):
138 | print(f"Processing {len(page_df)} records")
139 | # Process each page DataFrame
140 | # page_df is a pandas DataFrame
141 |
142 | Filters
143 | -------
144 |
145 | Add filters to your query:
146 |
147 | .. code-block:: python
148 |
149 | query = AnalyticsQuery(
150 | dx=["b6mCG9sphIT"],
151 | ou="qzGX4XdWufs",
152 | pe="2023",
153 | filters={"age": "AGE_0_4", "sex": "FEMALE"}
154 | )
155 |
156 | Advanced Options
157 | ----------------
158 |
159 | Skip Metadata
160 | ~~~~~~~~~~~~~
161 |
162 | .. code-block:: python
163 |
164 | query = AnalyticsQuery(
165 | dx=["b6mCG9sphIT"],
166 | ou="qzGX4XdWufs",
167 | pe="2023",
168 | skip_meta=True # Don't include metadata
169 | )
170 |
171 | Hierarchy Meta
172 | ~~~~~~~~~~~~~~
173 |
174 | .. code-block:: python
175 |
176 | query = AnalyticsQuery(
177 | dx=["b6mCG9sphIT"],
178 | ou="qzGX4XdWufs",
179 | pe="2023",
180 | hierarchy_meta=True # Include org unit hierarchy
181 | )
182 |
183 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/README.md:
--------------------------------------------------------------------------------
1 | # {{ cookiecutter.project_name }}
2 |
3 | {{ cookiecutter.project_description }}
4 |
5 | ## Project Structure
6 |
7 | ```
8 | {{ cookiecutter.project_slug }}/
9 | ├── configs/ # Configuration files
10 | ├── data/ # Data files
11 | ├── notebooks/ # Jupyter notebooks
12 | ├── pipelines/ # Data pipeline configurations
13 | ├── reports/ # Generated reports
14 | ├── scripts/ # Script files
15 | └── requirements.txt # Python dependencies
16 | ```
17 |
18 | ## Quick Start
19 |
20 | ### 1. Install Dependencies
21 |
22 | ```bash
23 | pip install -r requirements.txt
24 | ```
25 |
26 | ### 2. Configure DHIS2 Connection
27 |
28 | Copy `.env.example` to `.env` and fill in your DHIS2 connection details:
29 |
30 | ```bash
31 | cp .env.example .env
32 | ```
33 |
34 | Edit the `.env` file:
35 |
36 | ```
37 | DHIS2_URL={{ cookiecutter.dhis2_url }}
38 | DHIS2_USERNAME=your_username
39 | DHIS2_PASSWORD=your_password
40 | ```
41 |
42 | ### 3. Test Connection
43 |
44 | ```bash
45 | pydhis2 login
46 | ```
47 |
48 | ### 4. Run Example Pipeline
49 |
50 | ```bash
51 | pydhis2 pipeline run --recipe pipelines/example.yml
52 | ```
53 |
54 | ## Usage Guide
55 |
56 | ### Data Pulling
57 |
58 | Pull Analytics data:
59 | ```bash
60 | pydhis2 analytics pull --dx "indicator_id" --ou "org_unit_id" --pe "2023Q1:2023Q4" --out data/analytics.parquet
61 | ```
62 |
63 | Pull Tracker events:
64 | ```bash
65 | pydhis2 tracker pull --program "program_id" --status COMPLETED --out data/events.parquet
66 | ```
67 |
68 | ### Data Quality Review
69 |
70 | Run DQR analysis:
71 | ```bash
72 | pydhis2 dqr run --input data/analytics.parquet --html reports/dqr_report.html --json reports/dqr_summary.json
73 | ```
74 |
75 | ### Jupyter Notebooks
76 |
77 | {% if cookiecutter.use_notebooks == "yes" -%}
78 | Start Jupyter Lab:
79 | ```bash
80 | jupyter lab
81 | ```
82 |
83 | Example notebooks:
84 | - `01_data_exploration.ipynb` - Data Exploration
85 | - `02_quality_assessment.ipynb` - Data Quality Assessment
86 | - `03_analysis_and_visualization.ipynb` - Analysis and Visualization
87 | {%- endif %}
88 |
89 | ## Configuration Details
90 |
91 | ### DHIS2 Configuration (`configs/dhis2.yml`)
92 |
93 | ```yaml
94 | # DHIS2 connection configuration
95 | connection:
96 | base_url: "{{ cookiecutter.dhis2_url }}"
97 | rps: 8 # Requests per second
98 | concurrency: 8 # Concurrent connections
99 | timeouts: [10, 60, 120] # Connect/read/total timeout
100 |
101 | # Retry configuration
102 | retry:
103 | max_attempts: 5
104 | base_delay: 0.5
105 | max_delay: 60.0
106 | ```
107 |
108 | {% if cookiecutter.use_dqr == "yes" -%}
109 | ### DQR Configuration (`configs/dqr.yml`)
110 |
111 | ```yaml
112 | # Data quality rules
113 | completeness:
114 | thresholds:
115 | reporting_completeness_pass: 0.90
116 | reporting_completeness_warn: 0.70
117 |
118 | consistency:
119 | thresholds:
120 | outlier_threshold: 3.0
121 | variance_threshold: 0.5
122 |
123 | timeliness:
124 | thresholds:
125 | submission_timeliness_pass: 0.80
126 | max_delay_days: 30
127 | ```
128 | {%- endif %}
129 |
130 | {% if cookiecutter.use_pipeline == "yes" -%}
131 | ### Pipeline Configuration (`pipelines/example.yml`)
132 |
133 | ```yaml
134 | name: "Example Data Analysis Pipeline"
135 | description: "Pull data, assess quality, generate reports"
136 |
137 | steps:
138 | - type: analytics_pull
139 | name: "Pull Analytics Data"
140 | dx: "indicator_id"
141 | ou: "org_unit_id"
142 | pe: "2023Q1:2023Q4"
143 | output: "analytics.parquet"
144 |
145 | - type: dqr
146 | name: "Data Quality Review"
147 | input: "analytics.parquet"
148 | html_output: "dqr_report.html"
149 | json_output: "dqr_summary.json"
150 | ```
151 | {%- endif %}
152 |
153 | ## License
154 |
155 | {% if cookiecutter.license == "Apache-2.0" -%}
156 | Apache License 2.0
157 | {%- elif cookiecutter.license == "MIT" -%}
158 | MIT License
159 | {%- else -%}
160 | BSD 3-Clause License
161 | {%- endif %}
162 |
163 | ## Author
164 |
165 | {{ cookiecutter.author_name }} ({{ cookiecutter.author_email }})
166 |
--------------------------------------------------------------------------------
/pydhis2/endpoints/analytics.py:
--------------------------------------------------------------------------------
1 | """Analytics endpoint - Analysis data queries and DataFrame conversion"""
2 |
3 | from collections.abc import AsyncIterator
4 | from typing import Any, Dict, Optional
5 |
6 | import pandas as pd
7 | import pyarrow as pa
8 |
9 | from pydhis2.core.types import AnalyticsQuery, ExportFormat
10 | from pydhis2.io.arrow import ArrowConverter
11 | from pydhis2.io.to_pandas import AnalyticsDataFrameConverter
12 |
13 |
14 | class AnalyticsEndpoint:
15 | """Analytics API endpoint"""
16 |
17 | def __init__(self, client):
18 | self.client = client
19 | self.converter = AnalyticsDataFrameConverter()
20 | self.arrow_converter = ArrowConverter()
21 |
22 | async def raw(
23 | self,
24 | query: AnalyticsQuery,
25 | output_format: str = "json"
26 | ) -> Dict[str, Any]:
27 | """Get raw JSON data"""
28 | params = query.to_params()
29 | if output_format != "json":
30 | params['format'] = output_format
31 |
32 | return await self.client.get('/api/analytics', params=params)
33 |
34 | async def to_pandas(
35 | self,
36 | query: AnalyticsQuery,
37 | long_format: bool = True
38 | ) -> pd.DataFrame:
39 | """Convert to Pandas DataFrame"""
40 | data = await self.raw(query)
41 | return self.converter.to_dataframe(data, long_format=long_format)
42 |
43 | async def to_arrow(
44 | self,
45 | query: AnalyticsQuery,
46 | long_format: bool = True
47 | ) -> pa.Table:
48 | """Convert to Arrow Table"""
49 | df = await self.to_pandas(query, long_format=long_format)
50 | return self.arrow_converter.from_pandas(df)
51 |
52 | async def stream_paginated(
53 | self,
54 | query: AnalyticsQuery,
55 | page_size: int = 1000,
56 | max_pages: Optional[int] = None
57 | ) -> AsyncIterator[pd.DataFrame]:
58 | """Stream paginated data"""
59 | page = 1
60 |
61 | while True:
62 | # Modify query parameters to add paging
63 | page_params = query.to_params()
64 | page_params.update({
65 | 'page': page,
66 | 'pageSize': page_size,
67 | 'paging': 'true'
68 | })
69 |
70 | response = await self.client.get('/api/analytics', params=page_params)
71 |
72 | # Convert to DataFrame
73 | df = self.converter.to_dataframe(response, long_format=True)
74 | if not df.empty:
75 | yield df
76 |
77 | # Check pagination information
78 | pager = response.get('pager', {})
79 | total_pages = pager.get('pageCount', 1)
80 |
81 | if page >= total_pages:
82 | break
83 |
84 | if max_pages and page >= max_pages:
85 | break
86 |
87 | page += 1
88 |
89 | async def export_to_file(
90 | self,
91 | query: AnalyticsQuery,
92 | file_path: str,
93 | format: ExportFormat = ExportFormat.PARQUET,
94 | **kwargs
95 | ) -> str:
96 | """Export to file"""
97 | df = await self.to_pandas(query)
98 |
99 | if format == ExportFormat.PARQUET:
100 | df.to_parquet(file_path, **kwargs)
101 | elif format == ExportFormat.CSV:
102 | df.to_csv(file_path, **kwargs)
103 | elif format == ExportFormat.EXCEL:
104 | df.to_excel(file_path, **kwargs)
105 | elif format == ExportFormat.FEATHER:
106 | df.to_feather(file_path, **kwargs)
107 | elif format == ExportFormat.JSON:
108 | df.to_json(file_path, **kwargs)
109 | else:
110 | raise ValueError(f"Unsupported export format: {format}")
111 |
112 | return file_path
113 |
114 | async def get_dimensions(self) -> Dict[str, Any]:
115 | """Get available dimensions"""
116 | return await self.client.get('/api/analytics/dimensions')
117 |
118 | async def get_dimension_items(self, dimension: str) -> Dict[str, Any]:
119 | """Get items for a specific dimension"""
120 | return await self.client.get(f'/api/analytics/dimensions/{dimension}')
121 |
122 | async def validate_query(self, query: AnalyticsQuery) -> Dict[str, Any]:
123 | """Validate query (dry run)"""
124 | params = query.to_params()
125 | params['dryRun'] = 'true'
126 | return await self.client.get('/api/analytics', params=params)
127 |
--------------------------------------------------------------------------------
/docs/dqr.rst:
--------------------------------------------------------------------------------
1 | Data Quality Review (DQR)
2 | =========================
3 |
4 | pydhis2 includes built-in Data Quality Review metrics based on WHO standards.
5 |
6 | Overview
7 | --------
8 |
9 | The DQR module helps you assess data quality across three dimensions:
10 |
11 | * **Completeness**: Are all expected data values present?
12 | * **Consistency**: Are the data values reasonable and consistent?
13 | * **Timeliness**: Are the data submitted on time?
14 |
15 | Basic Usage
16 | -----------
17 |
18 | .. code-block:: python
19 |
20 | from pydhis2 import get_client, DHIS2Config
21 | from pydhis2.core.types import AnalyticsQuery
22 | from pydhis2.dqr.metrics import CompletenessMetrics, ConsistencyMetrics, TimelinessMetrics
23 |
24 | AsyncDHIS2Client, _ = get_client()
25 | config = DHIS2Config()
26 |
27 | async with AsyncDHIS2Client(config) as client:
28 | # Fetch analytics data
29 | query = AnalyticsQuery(dx=["indicator_id"], ou="org_unit_id", pe="2023")
30 | df = await client.analytics.to_pandas(query)
31 |
32 | # Run DQR analysis
33 | completeness = CompletenessMetrics()
34 | consistency = ConsistencyMetrics()
35 | timeliness = TimelinessMetrics()
36 |
37 | completeness_results = completeness.calculate(df)
38 | consistency_results = consistency.calculate(df)
39 | timeliness_results = timeliness.calculate(df)
40 |
41 | for result in completeness_results + consistency_results + timeliness_results:
42 | print(f"{result.metric_name}: {result.value:.2%} ({result.status})")
43 |
44 | Completeness Metrics
45 | --------------------
46 |
47 | .. code-block:: python
48 |
49 | from pydhis2.dqr.metrics import CompletenessMetrics
50 |
51 | completeness = CompletenessMetrics()
52 | results = completeness.calculate(df)
53 |
54 | for result in results:
55 | print(f"{result.metric_name}: {result.value:.2%}")
56 | print(f"Status: {result.status}")
57 | print(f"Message: {result.message}")
58 | print(f"Details: {result.details}")
59 |
60 | Consistency Metrics
61 | -------------------
62 |
63 | .. code-block:: python
64 |
65 | from pydhis2.dqr.metrics import ConsistencyMetrics
66 |
67 | consistency = ConsistencyMetrics()
68 | results = consistency.calculate(df)
69 |
70 | for result in results:
71 | print(f"{result.metric_name}: {result.value:.2%}")
72 | if result.metric_name == "outlier_detection":
73 | print(f"Outliers detected: {result.details.get('outlier_count')}")
74 |
75 | Timeliness Metrics
76 | ------------------
77 |
78 | .. code-block:: python
79 |
80 | from pydhis2.dqr.metrics import TimelinessMetrics
81 |
82 | timeliness = TimelinessMetrics()
83 | results = timeliness.calculate(df)
84 |
85 | for result in results:
86 | print(f"{result.metric_name}: {result.value:.2%}")
87 | print(f"Timely records: {result.details.get('timely_records')}/{result.details.get('total_records')}")
88 |
89 | Generating Reports
90 | ------------------
91 |
92 | Collect All Results
93 | ~~~~~~~~~~~~~~~~~~~
94 |
95 | .. code-block:: python
96 |
97 | import json
98 | from pydhis2.dqr.metrics import CompletenessMetrics, ConsistencyMetrics, TimelinessMetrics
99 |
100 | # Calculate all metrics
101 | completeness = CompletenessMetrics()
102 | consistency = ConsistencyMetrics()
103 | timeliness = TimelinessMetrics()
104 |
105 | all_results = (
106 | completeness.calculate(df) +
107 | consistency.calculate(df) +
108 | timeliness.calculate(df)
109 | )
110 |
111 | # Convert to summary dict
112 | summary = {
113 | "metrics": [
114 | {
115 | "name": r.metric_name,
116 | "value": r.value,
117 | "status": r.status,
118 | "message": r.message,
119 | "details": r.details
120 | }
121 | for r in all_results
122 | ]
123 | }
124 |
125 | # Save to JSON
126 | with open("dqr_summary.json", "w") as f:
127 | json.dump(summary, f, indent=2)
128 |
129 | Configuration
130 | -------------
131 |
132 | Customize DQR thresholds in ``configs/dqr.yml``:
133 |
134 | .. code-block:: yaml
135 |
136 | completeness:
137 | thresholds:
138 | pass: 0.90
139 | warn: 0.70
140 |
141 | consistency:
142 | thresholds:
143 | outlier: 3.0
144 | variance: 0.5
145 |
146 | timeliness:
147 | thresholds:
148 | pass: 0.80
149 | max_delay_days: 30
150 |
151 |
--------------------------------------------------------------------------------
/pydhis2/testing/demo_test.py:
--------------------------------------------------------------------------------
1 | """Demo test showing how to use pydhis2 testing utilities"""
2 |
3 | import asyncio
4 | import logging
5 |
6 | from pydhis2.core.client import AsyncDHIS2Client
7 | from pydhis2.core.types import DHIS2Config
8 | from pydhis2.testing import (
9 | BenchmarkRunner,
10 | MockDHIS2Server,
11 | NetworkSimulator,
12 | TestDataGenerator,
13 | )
14 |
15 | logging.basicConfig(level=logging.INFO)
16 | logger = logging.getLogger(__name__)
17 |
18 |
19 | async def demo_mock_server():
20 | """Demonstrate mock server usage"""
21 | print("\n=== Mock Server Demo ===")
22 |
23 | # Create test data
24 | generator = TestDataGenerator()
25 | org_units = generator.generate_org_units(5)
26 | data_elements = generator.generate_data_elements(3)
27 | periods = generator.generate_periods(months=6)
28 |
29 | # Start mock server
30 | mock_server = MockDHIS2Server(port=8081)
31 |
32 | # Configure responses
33 | analytics_response = generator.generate_analytics_response(
34 | data_elements, org_units, periods
35 | )
36 | mock_server.configure_analytics_response(
37 | analytics_response["headers"],
38 | analytics_response["rows"]
39 | )
40 |
41 | async with mock_server as base_url:
42 | # Create client pointing to mock server
43 | config = DHIS2Config(
44 | base_url=base_url,
45 | auth=("test_user", "test_pass"),
46 | rps=10.0
47 | )
48 |
49 | async with AsyncDHIS2Client(config) as client:
50 | # Test basic connectivity
51 | me_data = await client.get("/api/me")
52 | print(f"✅ Connected as: {me_data.get('name')}")
53 |
54 | # Test Analytics
55 | analytics_data = await client.get("/api/analytics", params={
56 | "dimension": ["dx:test", "pe:2023Q1", "ou:test"]
57 | })
58 | print(f"✅ Analytics: {len(analytics_data.get('rows', []))} rows")
59 |
60 | # Check request log
61 | requests = mock_server.get_request_log()
62 | print(f"📊 Server received {len(requests)} requests")
63 |
64 |
65 | async def demo_network_simulation():
66 | """Demonstrate network condition simulation"""
67 | print("\n=== Network Simulation Demo ===")
68 |
69 | # Test different network conditions
70 | conditions = [
71 | NetworkSimulator.NORMAL,
72 | NetworkSimulator.SLOW_3G,
73 | NetworkSimulator.WEAK_NETWORK
74 | ]
75 |
76 | for condition in conditions:
77 | print(f"\n🌐 Testing {condition.name} network...")
78 | print(f" Latency: {condition.latency_ms}ms")
79 | print(f" Packet loss: {condition.packet_loss_rate:.1%}")
80 |
81 | # Simulate some network operations
82 | simulator = NetworkSimulator(condition)
83 |
84 | start_time = asyncio.get_event_loop().time()
85 | for _ in range(3):
86 | await simulator.simulate_latency()
87 | if simulator.should_drop_packet():
88 | print(" 📉 Packet dropped!")
89 |
90 | elapsed = asyncio.get_event_loop().time() - start_time
91 | print(f" ⏱️ Total time: {elapsed:.3f}s")
92 |
93 |
94 | async def demo_benchmark_runner():
95 | """Demonstrate benchmark runner usage"""
96 | print("\n=== Benchmark Runner Demo ===")
97 |
98 | runner = BenchmarkRunner("pydhis2_demo")
99 |
100 | async def sample_operation():
101 | """Sample async operation to benchmark"""
102 | await asyncio.sleep(random.uniform(0.01, 0.05)) # Simulate work
103 | if random.random() < 0.1: # 10% failure rate
104 | raise Exception("Simulated error")
105 |
106 | # Run repeated test
107 | await runner.run_repeated_test(
108 | sample_operation,
109 | "sample_async_operation",
110 | repetitions=20
111 | )
112 |
113 | # Run concurrent test
114 | await runner.run_concurrent_test(
115 | sample_operation,
116 | "concurrent_sample_operation",
117 | concurrency=5,
118 | total_requests=50
119 | )
120 |
121 | # Print results
122 | runner.print_summary()
123 |
124 |
125 | async def main():
126 | """Run all demos"""
127 | print("🚀 pydhis2 Testing Utilities Demo")
128 | print("=" * 50)
129 |
130 | try:
131 | await demo_mock_server()
132 | await demo_network_simulation()
133 | await demo_benchmark_runner()
134 |
135 | print("\n🎉 All demos completed successfully!")
136 |
137 | except Exception as e:
138 | print(f"❌ Demo failed: {e}")
139 | import traceback
140 | traceback.print_exc()
141 |
142 |
143 | if __name__ == "__main__":
144 | import random
145 | asyncio.run(main())
146 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
8 |
9 | ## Our Standards
10 |
11 | Examples of behavior that contributes to a positive environment for our community include:
12 |
13 | * Demonstrating empathy and kindness toward other people
14 | * Being respectful of differing opinions, viewpoints, and experiences
15 | * Giving and gracefully accepting constructive feedback
16 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
17 | * Focusing on what is best not just for us as individuals, but for the overall community
18 |
19 | Examples of unacceptable behavior include:
20 |
21 | * The use of sexualized language or imagery, and sexual attention or advances of any kind
22 | * Trolling, insulting or derogatory comments, and personal or political attacks
23 | * Public or private harassment
24 | * Publishing others' private information, such as a physical or email address, without their explicit permission
25 | * Other conduct which could reasonably be considered inappropriate in a professional setting
26 |
27 | ## Enforcement Responsibilities
28 |
29 | Community leaders are responsible for clarifying and enforcing our standards and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
32 |
33 | ## Scope
34 |
35 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
36 |
37 | ## Enforcement
38 |
39 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at the project's [GitHub Issues](https://github.com/HzaCode/pyDHIS2/issues) or [Discussions](https://github.com/HzaCode/pyDHIS2/discussions). All complaints will be reviewed and investigated promptly and fairly.
40 |
41 | All community leaders are obligated to respect the privacy and security of the reporter of any incident.
42 |
43 | ## Enforcement Guidelines
44 |
45 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
46 |
47 | ### 1. Correction
48 |
49 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
50 |
51 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
52 |
53 | ### 2. Warning
54 |
55 | **Community Impact**: A violation through a single incident or series of actions.
56 |
57 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interaction in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
58 |
59 | ### 3. Temporary Ban
60 |
61 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
62 |
63 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
64 |
65 | ### 4. Permanent Ban
66 |
67 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
68 |
69 | **Consequence**: A permanent ban from any sort of public interaction within the community.
70 |
71 | ## Attribution
72 |
73 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
74 |
75 | [homepage]: https://www.contributor-covenant.org
76 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
77 |
--------------------------------------------------------------------------------
/pydhis2/observe/logging.py:
--------------------------------------------------------------------------------
1 | """Structured logging configuration"""
2 |
3 | import json
4 | import logging
5 | import sys
6 | from datetime import datetime
7 | from typing import Optional
8 |
9 |
10 | class StructuredFormatter(logging.Formatter):
11 | """Structured log formatter"""
12 |
13 | def format(self, record: logging.LogRecord) -> str:
14 | """Format log record as JSON"""
15 | log_data = {
16 | 'timestamp': datetime.utcnow().isoformat() + 'Z',
17 | 'level': record.levelname,
18 | 'logger': record.name,
19 | 'message': record.getMessage(),
20 | 'module': record.module,
21 | 'function': record.funcName,
22 | 'line': record.lineno,
23 | }
24 |
25 | # Add exception information
26 | if record.exc_info:
27 | log_data['exception'] = self.formatException(record.exc_info)
28 |
29 | # Add extra fields
30 | if hasattr(record, 'extra_fields'):
31 | log_data.update(record.extra_fields)
32 |
33 | return json.dumps(log_data, ensure_ascii=False)
34 |
35 |
36 | class SensitiveDataFilter(logging.Filter):
37 | """Sensitive data filter"""
38 |
39 | SENSITIVE_PATTERNS = [
40 | 'password', 'token', 'key', 'secret', 'auth', 'credential'
41 | ]
42 |
43 | def filter(self, record: logging.LogRecord) -> bool:
44 | """Filter sensitive data"""
45 | message = record.getMessage().lower()
46 |
47 | # Check if contains sensitive keywords
48 | for pattern in self.SENSITIVE_PATTERNS:
49 | if pattern in message:
50 | # Replace sensitive information
51 | record.msg = record.msg.replace(
52 | str(record.args) if record.args else '',
53 | '[REDACTED]'
54 | )
55 | break
56 |
57 | return True
58 |
59 |
60 | def setup_logging(
61 | level: str = "INFO",
62 | structured: bool = True,
63 | filter_sensitive: bool = True,
64 | log_file: Optional[str] = None
65 | ) -> None:
66 | """Setup logging configuration"""
67 |
68 | # Set log level
69 | log_level = getattr(logging, level.upper(), logging.INFO)
70 |
71 | # Create root logger
72 | root_logger = logging.getLogger()
73 | root_logger.setLevel(log_level)
74 |
75 | # Clear existing handlers
76 | root_logger.handlers.clear()
77 |
78 | # Console handler
79 | console_handler = logging.StreamHandler(sys.stdout)
80 | console_handler.setLevel(log_level)
81 |
82 | if structured:
83 | console_handler.setFormatter(StructuredFormatter())
84 | else:
85 | console_handler.setFormatter(
86 | logging.Formatter(
87 | '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
88 | )
89 | )
90 |
91 | if filter_sensitive:
92 | console_handler.addFilter(SensitiveDataFilter())
93 |
94 | root_logger.addHandler(console_handler)
95 |
96 | # File handler (if specified)
97 | if log_file:
98 | file_handler = logging.FileHandler(log_file, encoding='utf-8')
99 | file_handler.setLevel(log_level)
100 | file_handler.setFormatter(StructuredFormatter())
101 |
102 | if filter_sensitive:
103 | file_handler.addFilter(SensitiveDataFilter())
104 |
105 | root_logger.addHandler(file_handler)
106 |
107 | # Third-party library log levels
108 | logging.getLogger('aiohttp').setLevel(logging.WARNING)
109 | logging.getLogger('urllib3').setLevel(logging.WARNING)
110 |
111 |
112 | def get_logger(name: str, **extra_fields) -> logging.Logger:
113 | """Get logger with extra fields"""
114 | logger = logging.getLogger(name)
115 |
116 | # Create adapter to add extra fields
117 | class ExtraFieldsAdapter(logging.LoggerAdapter):
118 | def process(self, msg, kwargs):
119 | # Merge extra fields
120 | if 'extra' not in kwargs:
121 | kwargs['extra'] = {}
122 | kwargs['extra']['extra_fields'] = {**extra_fields, **kwargs['extra'].get('extra_fields', {})}
123 | return msg, kwargs
124 |
125 | return ExtraFieldsAdapter(logger, extra_fields)
126 |
127 |
128 | # Convenience functions
129 | def log_request(logger: logging.Logger, method: str, url: str, status: Optional[int] = None, **kwargs):
130 | """Log HTTP request"""
131 | extra_fields = {
132 | 'http_method': method,
133 | 'http_url': url,
134 | 'event_type': 'http_request'
135 | }
136 |
137 | if status:
138 | extra_fields['http_status'] = status
139 |
140 | extra_fields.update(kwargs)
141 |
142 | logger.info(
143 | f"{method} {url}" + (f" -> {status}" if status else ""),
144 | extra={'extra_fields': extra_fields}
145 | )
146 |
147 |
148 | def log_retry(logger: logging.Logger, attempt: int, max_attempts: int, delay: float, **kwargs):
149 | """Log retry attempt"""
150 | extra_fields = {
151 | 'retry_attempt': attempt,
152 | 'retry_max_attempts': max_attempts,
153 | 'retry_delay': delay,
154 | 'event_type': 'retry'
155 | }
156 |
157 | extra_fields.update(kwargs)
158 |
159 | logger.warning(
160 | f"Retry attempt {attempt}/{max_attempts}, waiting {delay}s",
161 | extra={'extra_fields': extra_fields}
162 | )
163 |
164 |
165 | def log_rate_limit(logger: logging.Logger, current_rate: float, limit: float, wait_time: float, **kwargs):
166 | """Log rate limiting"""
167 | extra_fields = {
168 | 'rate_current': current_rate,
169 | 'rate_limit': limit,
170 | 'rate_wait_time': wait_time,
171 | 'event_type': 'rate_limit'
172 | }
173 |
174 | extra_fields.update(kwargs)
175 |
176 | logger.info(
177 | f"Rate limited: {current_rate:.2f}/{limit:.2f} rps, waiting {wait_time:.2f}s",
178 | extra={'extra_fields': extra_fields}
179 | )
180 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | pydhis2 Documentation
2 | =====================
3 |
4 | .. image:: https://img.shields.io/pypi/v/pydhis2?style=flat&color=blue
5 | :target: https://pypi.org/project/pydhis2
6 | :alt: PyPI version
7 |
8 | .. image:: https://img.shields.io/pypi/pyversions/pydhis2?style=flat&color=blue
9 | :target: https://pypi.org/project/pydhis2/
10 | :alt: Python versions
11 |
12 | .. image:: https://img.shields.io/pepy/dt/pydhis2?style=flat&color=blue
13 | :target: https://pepy.tech/project/pydhis2
14 | :alt: Downloads
15 |
16 | .. image:: https://img.shields.io/badge/tests-passing-brightgreen?style=flat
17 | :target: https://github.com/HzaCode/pyDHIS2/actions/workflows/ci.yml
18 | :alt: Tests
19 |
20 | .. image:: https://img.shields.io/badge/license-Apache%202.0-green?style=flat
21 | :target: https://opensource.org/licenses/Apache-2.0
22 | :alt: License
23 |
24 | **pydhis2** is a next-generation Python library for interacting with `DHIS2 `_,
25 | the world's largest health information management system. It provides a clean, modern, and efficient API
26 | for data extraction, analysis, and management, with a strong emphasis on creating reproducible workflows—a
27 | critical need in scientific research and public health analysis, especially in Low and Middle-Income Country
28 | (LMIC) contexts.
29 |
30 | Features
31 | --------
32 |
33 | 🚀 **Modern & Asynchronous**
34 | Built with ``asyncio`` for high-performance, non-blocking I/O, making it ideal for large-scale data operations.
35 | A synchronous client is also provided for simplicity in smaller scripts.
36 |
37 | 📊 **Reproducible by Design**
38 | From project templates to a powerful CLI, pydhis2 is built to support standardized, shareable, and verifiable
39 | data analysis pipelines.
40 |
41 | 🐼 **Seamless DataFrame Integration**
42 | Natively convert DHIS2 analytics data into Pandas DataFrames with a single method call (``.to_pandas()``),
43 | connecting you instantly to the PyData ecosystem.
44 |
45 | 🔧 **Powerful Command Line Interface**
46 | Automate common tasks like data pulling and configuration directly from your terminal.
47 |
48 | Quick Start
49 | -----------
50 |
51 | Installation
52 | ~~~~~~~~~~~~
53 |
54 | Install pydhis2 directly from PyPI:
55 |
56 | .. code-block:: bash
57 |
58 | pip install pydhis2
59 |
60 | Verify Installation
61 | ~~~~~~~~~~~~~~~~~~~
62 |
63 | Use the built-in CLI to run a quick demo:
64 |
65 | .. code-block:: bash
66 |
67 | # Check the installed version
68 | pydhis2 version
69 |
70 | # Run the quick demo
71 | pydhis2 demo quick
72 |
73 | Basic Usage Example
74 | ~~~~~~~~~~~~~~~~~~~
75 |
76 | .. code-block:: python
77 |
78 | import asyncio
79 | import sys
80 | from pydhis2 import get_client, DHIS2Config
81 | from pydhis2.core.types import AnalyticsQuery
82 |
83 | # pydhis2 provides both an async and a sync client
84 | AsyncDHIS2Client, _ = get_client()
85 |
86 | async def main():
87 | # 1. Configure the connection to a DHIS2 server
88 | config = DHIS2Config(
89 | base_url="https://demos.dhis2.org/dq",
90 | auth=("demo", "District1#")
91 | )
92 |
93 | async with AsyncDHIS2Client(config) as client:
94 | # 2. Define the query parameters
95 | query = AnalyticsQuery(
96 | dx=["b6mCG9sphIT"], # Data element
97 | ou="qzGX4XdWufs", # Org unit
98 | pe="2023" # Period
99 | )
100 |
101 | # 3. Fetch data and convert directly to DataFrame
102 | df = await client.analytics.to_pandas(query)
103 |
104 | # 4. Analyze and display
105 | print("✅ Data fetched successfully!")
106 | print(f"Retrieved {len(df)} records.")
107 | print(df.head())
108 |
109 | if __name__ == "__main__":
110 | if sys.platform == 'win32':
111 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
112 | asyncio.run(main())
113 |
114 | Table of Contents
115 | -----------------
116 |
117 | .. toctree::
118 | :maxdepth: 2
119 | :caption: User Guide
120 |
121 | installation
122 | quickstart
123 | configuration
124 | analytics
125 | datavaluesets
126 | tracker
127 | metadata
128 | dqr
129 | cli
130 |
131 | .. toctree::
132 | :maxdepth: 2
133 | :caption: API Reference
134 |
135 | api/client
136 | api/endpoints
137 | api/types
138 | api/io
139 |
140 | .. toctree::
141 | :maxdepth: 1
142 | :caption: Developer Guide
143 |
144 | contributing
145 | changelog
146 |
147 | Supported Endpoints
148 | -------------------
149 |
150 | +-------------------+------+-------+-----------+------------+-----------+
151 | | Endpoint | Read | Write | DataFrame | Pagination | Streaming |
152 | +===================+======+=======+===========+============+===========+
153 | | **Analytics** | ✅ | \- | ✅ | ✅ | ✅ |
154 | +-------------------+------+-------+-----------+------------+-----------+
155 | | **DataValueSets** | ✅ | ✅ | ✅ | ✅ | ✅ |
156 | +-------------------+------+-------+-----------+------------+-----------+
157 | | **Tracker Events**| ✅ | ✅ | ✅ | ✅ | ✅ |
158 | +-------------------+------+-------+-----------+------------+-----------+
159 | | **Metadata** | ✅ | ✅ | ✅ | \- | \- |
160 | +-------------------+------+-------+-----------+------------+-----------+
161 |
162 | Compatibility
163 | -------------
164 |
165 | * **Python**: ≥ 3.9
166 | * **DHIS2**: ≥ 2.36
167 | * **Platforms**: Windows, Linux, macOS
168 |
169 | Community & Support
170 | -------------------
171 |
172 | * 📖 `Documentation `_
173 | * 🐛 `GitHub Issues `_
174 | * 💬 `GitHub Discussions `_
175 | * 📝 `Changelog `_
176 |
177 | License
178 | -------
179 |
180 | This project is licensed under the **Apache License 2.0**. See the `LICENSE `_ file for details.
181 |
182 | Indices and tables
183 | ==================
184 |
185 | * :ref:`genindex`
186 | * :ref:`modindex`
187 | * :ref:`search`
188 |
189 |
--------------------------------------------------------------------------------
/tests/unit/test_cli.py:
--------------------------------------------------------------------------------
1 | """Tests for CLI module"""
2 |
3 | from typer.testing import CliRunner
4 | from pydhis2.cli.main import app
5 | from unittest.mock import patch
6 |
7 | runner = CliRunner()
8 |
9 |
10 | class TestVersionCommand:
11 | """Test version command"""
12 |
13 | def test_version_command(self):
14 | """Test version command output"""
15 | result = runner.invoke(app, ["version"])
16 | assert result.exit_code == 0
17 | assert "pydhis2 version" in result.stdout
18 |
19 |
20 | class TestConfigCommand:
21 | """Test config command"""
22 |
23 | def test_config_with_all_params(self):
24 | """Test config command with all parameters"""
25 | result = runner.invoke(
26 | app,
27 | ["config", "--url", "https://test.dhis2.org", "--username", "admin", "--password", "district"],
28 | )
29 | assert result.exit_code == 0
30 | assert "Configured connection" in result.stdout
31 |
32 | def test_config_with_env_vars(self):
33 | """Test config command using environment variables"""
34 | with patch.dict('os.environ', {
35 | 'DHIS2_USERNAME': 'test_user',
36 | 'DHIS2_PASSWORD': 'test_pass'
37 | }):
38 | result = runner.invoke(
39 | app,
40 | ["config", "--url", "https://test.dhis2.org"],
41 | )
42 | assert result.exit_code == 0
43 |
44 |
45 | class TestAnalyticsCommands:
46 | """Test analytics commands"""
47 |
48 | def test_analytics_pull_command(self):
49 | """Test analytics pull command"""
50 | result = runner.invoke(
51 | app,
52 | [
53 | "analytics", "pull",
54 | "--url", "https://test.dhis2.org",
55 | "--dx", "test_dx",
56 | "--ou", "test_ou",
57 | "--pe", "2023",
58 | "--out", "test.parquet"
59 | ],
60 | )
61 | assert result.exit_code == 0
62 | assert "Would pull data" in result.stdout
63 | assert "test_dx" in result.stdout
64 |
65 | def test_analytics_pull_with_format(self):
66 | """Test analytics pull with custom format"""
67 | result = runner.invoke(
68 | app,
69 | [
70 | "analytics", "pull",
71 | "--url", "https://test.dhis2.org",
72 | "--dx", "dx1",
73 | "--ou", "ou1",
74 | "--pe", "2023",
75 | "--format", "csv"
76 | ],
77 | )
78 | assert result.exit_code == 0
79 |
80 |
81 | class TestDataValueSetsCommands:
82 | """Test datavaluesets commands"""
83 |
84 | def test_datavaluesets_pull_command(self):
85 | """Test datavaluesets pull command"""
86 | result = runner.invoke(
87 | app,
88 | [
89 | "datavaluesets", "pull",
90 | "--url", "https://test.dhis2.org",
91 | "--data-set", "ds1",
92 | "--org-unit", "ou1",
93 | "--period", "202301"
94 | ],
95 | )
96 | assert result.exit_code == 0
97 | assert "Would pull data" in result.stdout
98 |
99 | def test_datavaluesets_push_command(self):
100 | """Test datavaluesets push command"""
101 | result = runner.invoke(
102 | app,
103 | [
104 | "datavaluesets", "push",
105 | "--url", "https://test.dhis2.org",
106 | "--input", "test.parquet"
107 | ],
108 | )
109 | assert result.exit_code == 0
110 | assert "Implementation in progress" in result.stdout
111 |
112 |
113 | class TestTrackerCommands:
114 | """Test tracker commands"""
115 |
116 | def test_tracker_events_command(self):
117 | """Test tracker events command"""
118 | result = runner.invoke(
119 | app,
120 | [
121 | "tracker", "events",
122 | "--url", "https://test.dhis2.org",
123 | "--program", "prog1",
124 | "--out", "events.parquet"
125 | ],
126 | )
127 | assert result.exit_code == 0
128 |
129 |
130 | class TestDQRCommands:
131 | """Test DQR commands"""
132 |
133 | def test_dqr_analyze_command(self):
134 | """Test DQR analyze command"""
135 | result = runner.invoke(
136 | app,
137 | [
138 | "dqr", "analyze",
139 | "--input", "test.parquet",
140 | "--html", "report.html"
141 | ],
142 | )
143 | assert result.exit_code == 0
144 | assert "Implementation in progress" in result.stdout
145 |
146 |
147 | class TestDemoCommand:
148 | """Test demo command"""
149 |
150 | def test_demo_quick_command(self):
151 | """Test demo quick command"""
152 | result = runner.invoke(app, ["demo", "quick"])
153 | # Command should execute without error or show message
154 | # Exit code 2 means missing required arguments, which is expected
155 | assert result.exit_code in [0, 2] or "demo" in result.stdout.lower()
156 |
157 |
158 | class TestPipelineCommands:
159 | """Test pipeline commands"""
160 |
161 | def test_pipeline_run_command(self):
162 | """Test pipeline run command"""
163 | result = runner.invoke(
164 | app,
165 | [
166 | "pipeline", "run",
167 | "--config", "test.yml"
168 | ],
169 | )
170 | # Exit code 2 means command not found or missing required arguments
171 | assert result.exit_code in [0, 2]
172 |
173 |
174 | class TestMetadataCommands:
175 | """Test metadata commands"""
176 |
177 | def test_metadata_export_command(self):
178 | """Test metadata export command"""
179 | result = runner.invoke(
180 | app,
181 | [
182 | "metadata", "export",
183 | "--url", "https://test.dhis2.org",
184 | "--type", "dataElements",
185 | "--out", "metadata.json"
186 | ],
187 | )
188 | # Exit code 2 means missing required arguments or command not found
189 | assert result.exit_code in [0, 2]
190 |
191 | def test_metadata_import_command(self):
192 | """Test metadata import command"""
193 | result = runner.invoke(
194 | app,
195 | [
196 | "metadata", "import",
197 | "--url", "https://test.dhis2.org",
198 | "--input", "metadata.json"
199 | ],
200 | )
201 | # Exit code 2 means missing required arguments or command not found
202 | assert result.exit_code in [0, 2]
203 |
204 |
--------------------------------------------------------------------------------
/pydhis2/pipeline/executor.py:
--------------------------------------------------------------------------------
1 | """Pipeline executor"""
2 |
3 | import asyncio
4 | import logging
5 | from datetime import datetime
6 | from pathlib import Path
7 | from typing import Any, Dict, Optional
8 |
9 | from pydhis2.core.client import AsyncDHIS2Client
10 |
11 | from .config import PipelineConfig, PipelineResult, StepConfig
12 | from .steps import StepRegistry
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 |
17 | class PipelineExecutor:
18 | """Pipeline executor"""
19 |
20 | def __init__(
21 | self,
22 | client: AsyncDHIS2Client,
23 | output_dir: Optional[Path] = None
24 | ):
25 | self.client = client
26 | self.output_dir = output_dir or Path("pipeline_output")
27 | self.context: Dict[str, Any] = {}
28 |
29 | async def execute(
30 | self,
31 | config: PipelineConfig,
32 | context: Optional[Dict[str, Any]] = None
33 | ) -> PipelineResult:
34 | """Execute a pipeline"""
35 | logger.info(f"Starting pipeline execution: {config.name}")
36 |
37 | # Validate configuration
38 | validation_errors = config.validate_dependencies()
39 | if validation_errors:
40 | raise ValueError(f"Pipeline configuration validation failed: {validation_errors}")
41 |
42 | # Create execution result
43 | result = PipelineResult(
44 | pipeline_name=config.name,
45 | start_time=datetime.now(),
46 | total_steps=len([step for step in config.steps if step.enabled])
47 | )
48 |
49 | # Create output directory
50 | run_timestamp = result.start_time.strftime("%Y%m%d_%H%M%S")
51 | run_output_dir = self.output_dir / f"{config.name}_{run_timestamp}"
52 | run_output_dir.mkdir(parents=True, exist_ok=True)
53 |
54 | # Set up context
55 | execution_context = {
56 | 'output_dir': run_output_dir,
57 | 'pipeline_config': config,
58 | 'start_time': result.start_time,
59 | **(context or {})
60 | }
61 |
62 | try:
63 | # Get execution order
64 | ordered_steps = config.get_execution_order()
65 |
66 | # Execute steps
67 | for step_config in ordered_steps:
68 | if not step_config.enabled:
69 | result.skipped_steps += 1
70 | continue
71 |
72 | await self._execute_step(step_config, execution_context, result)
73 |
74 | # Mark as completed
75 | result.status = "completed"
76 | result.end_time = datetime.now()
77 |
78 | logger.info(f"Pipeline execution completed: {config.name}")
79 | logger.info(f"Total duration: {result.duration:.1f}s")
80 | logger.info(f"Success rate: {result.success_rate:.1%}")
81 |
82 | except Exception as e:
83 | result.status = "failed"
84 | result.end_time = datetime.now()
85 | result.errors.append(f"Pipeline execution failed: {str(e)}")
86 | logger.error(f"Pipeline execution failed: {e}")
87 | raise
88 |
89 | finally:
90 | # Save the result
91 | await self._save_result(result, run_output_dir)
92 |
93 | return result
94 |
95 | async def _execute_step(
96 | self,
97 | step_config: StepConfig,
98 | context: Dict[str, Any],
99 | result: PipelineResult
100 | ) -> None:
101 | """Execute a single step"""
102 | step_name = step_config.name
103 | logger.info(f"Executing step: {step_name} ({step_config.type})")
104 |
105 | step_start_time = datetime.now()
106 |
107 | try:
108 | # Create step instance
109 | step = StepRegistry.create_step(step_config)
110 |
111 | # Execute step
112 | if step_config.timeout:
113 | step_output = await asyncio.wait_for(
114 | step.execute(self.client, context),
115 | timeout=step_config.timeout
116 | )
117 | else:
118 | step_output = await step.execute(self.client, context)
119 |
120 | step_end_time = datetime.now()
121 |
122 | # Record success result
123 | result.add_step_result(
124 | step_name=step_name,
125 | status="completed",
126 | start_time=step_start_time,
127 | end_time=step_end_time,
128 | output_data=step_output
129 | )
130 |
131 | # Update context (step output is available to subsequent steps)
132 | context[f"step_{step_name}_output"] = step_output
133 |
134 | duration = (step_end_time - step_start_time).total_seconds()
135 | logger.info(f"Step {step_name} completed, duration: {duration:.1f}s")
136 |
137 | except asyncio.TimeoutError:
138 | step_end_time = datetime.now()
139 | error_msg = f"Step timed out (>{step_config.timeout}s)"
140 |
141 | result.add_step_result(
142 | step_name=step_name,
143 | status="failed",
144 | start_time=step_start_time,
145 | end_time=step_end_time,
146 | error=error_msg
147 | )
148 |
149 | logger.error(f"Step {step_name} timed out")
150 |
151 | # If retry is configured, retry logic can be implemented here
152 | if step_config.retry_count > 0:
153 | logger.info(f"Step {step_name} will be retried...")
154 | # TODO: Implement retry logic
155 | else:
156 | raise
157 |
158 | except Exception as e:
159 | step_end_time = datetime.now()
160 | error_msg = str(e)
161 |
162 | result.add_step_result(
163 | step_name=step_name,
164 | status="failed",
165 | start_time=step_start_time,
166 | end_time=step_end_time,
167 | error=error_msg
168 | )
169 |
170 | logger.error(f"Step {step_name} failed: {e}")
171 | raise
172 |
173 | async def _save_result(
174 | self,
175 | result: PipelineResult,
176 | output_dir: Path
177 | ) -> None:
178 | """Save the execution result"""
179 | import json
180 |
181 | result_file = output_dir / "pipeline_result.json"
182 |
183 | try:
184 | with open(result_file, 'w', encoding='utf-8') as f:
185 | json.dump(result.to_dict(), f, indent=2, ensure_ascii=False)
186 |
187 | logger.info(f"Pipeline result saved to: {result_file}")
188 |
189 | except Exception as e:
190 | logger.warning(f"Failed to save pipeline result: {e}")
191 |
192 | def set_context(self, key: str, value: Any) -> None:
193 | """Set execution context"""
194 | self.context[key] = value
195 |
196 | def get_context(self, key: str, default: Any = None) -> Any:
197 | """Get execution context"""
198 | return self.context.get(key, default)
199 |
--------------------------------------------------------------------------------
/pydhis2/templates/{{cookiecutter.project_slug}}/scripts/run_pipeline.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | {{ cookiecutter.project_name }} - Pipeline Runner Script
4 |
5 | Usage examples:
6 | python scripts/run_pipeline.py
7 | python scripts/run_pipeline.py --config configs/custom.yml
8 | """
9 |
10 | import argparse
11 | import asyncio
12 | import logging
13 | import os
14 | import sys
15 | from pathlib import Path
16 |
17 | import pandas as pd
18 | import yaml
19 | from dotenv import load_dotenv
20 |
21 | # Add project root to Python path
22 | project_root = Path(__file__).parent.parent
23 | sys.path.insert(0, str(project_root))
24 |
25 | # Import pydhis2
26 | try:
27 | from pydhis2.core.client import AsyncDHIS2Client
28 | from pydhis2.core.types import AnalyticsQuery, DHIS2Config
29 | from pydhis2.dqr.metrics import (
30 | CompletenessMetrics,
31 | ConsistencyMetrics,
32 | TimelinessMetrics,
33 | )
34 | except ImportError as e:
35 | print(f"Error: Failed to import pydhis2 module: {e}")
36 | print("Please ensure pydhis2 is installed: pip install pydhis2")
37 | sys.exit(1)
38 |
39 | # Configure logging
40 | logging.basicConfig(
41 | level=logging.INFO,
42 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
43 | )
44 | logger = logging.getLogger(__name__)
45 |
46 |
47 | def load_config(config_path: str = "configs/dhis2.yml") -> dict:
48 | """Load configuration file"""
49 | config_file = project_root / config_path
50 |
51 | if not config_file.exists():
52 | logger.warning(f"Configuration file not found: {config_file}")
53 | return {}
54 |
55 | with open(config_file, encoding='utf-8') as f:
56 | return yaml.safe_load(f)
57 |
58 |
59 | async def fetch_analytics_data(client: AsyncDHIS2Client, config: dict) -> pd.DataFrame:
60 | """Fetch Analytics data"""
61 | logger.info("Fetching Analytics data...")
62 |
63 | # Get query parameters from config or environment variables
64 | dx = config.get('dx', os.getenv('DHIS2_DX', 'your_indicator_id'))
65 | ou = config.get('ou', os.getenv('DHIS2_OU', 'your_org_unit_id'))
66 | pe = config.get('pe', os.getenv('DHIS2_PE', '2023Q1:2023Q4'))
67 |
68 | query = AnalyticsQuery(dx=dx, ou=ou, pe=pe)
69 |
70 | try:
71 | df = await client.analytics.to_pandas(query)
72 | logger.info(f"Successfully fetched {len(df)} records")
73 | return df
74 | except Exception as e:
75 | logger.error(f"Failed to fetch data: {e}")
76 | raise
77 |
78 |
79 | def run_dqr_analysis(df: pd.DataFrame, config: dict) -> dict:
80 | """Run data quality review"""
81 | logger.info("Running data quality review...")
82 |
83 | dqr_config = config.get('dqr', {})
84 |
85 | # Run various metrics
86 | completeness_metrics = CompletenessMetrics(dqr_config.get('completeness', {}))
87 | completeness_results = completeness_metrics.calculate(df)
88 |
89 | consistency_metrics = ConsistencyMetrics(dqr_config.get('consistency', {}))
90 | consistency_results = consistency_metrics.calculate(df)
91 |
92 | timeliness_metrics = TimelinessMetrics(dqr_config.get('timeliness', {}))
93 | timeliness_results = timeliness_metrics.calculate(df)
94 |
95 | all_results = completeness_results + consistency_results + timeliness_results
96 |
97 | # Calculate overall score
98 | pass_count = sum(1 for r in all_results if r.status == "pass")
99 | total_count = len(all_results)
100 | overall_score = pass_count / total_count if total_count > 0 else 0
101 |
102 | logger.info(f"Data quality review finished: {pass_count}/{total_count} metrics passed ({overall_score:.1%})")
103 |
104 | return {
105 | 'results': all_results,
106 | 'overall_score': overall_score,
107 | 'pass_count': pass_count,
108 | 'total_count': total_count
109 | }
110 |
111 |
112 | def save_results(df: pd.DataFrame, dqr_results: dict, output_dir: Path):
113 | """Save results"""
114 | logger.info(f"Saving results to: {output_dir}")
115 |
116 | # Create output directory
117 | output_dir.mkdir(parents=True, exist_ok=True)
118 |
119 | # Save raw data
120 | data_file = output_dir / "analytics_data.parquet"
121 | df.to_parquet(data_file, index=False)
122 | logger.info(f"Data saved: {data_file}")
123 |
124 | # Save DQR results
125 | dqr_summary = {
126 | 'overall_score': dqr_results['overall_score'],
127 | 'pass_count': dqr_results['pass_count'],
128 | 'total_count': dqr_results['total_count'],
129 | 'metrics': [
130 | {
131 | 'name': r.metric_name,
132 | 'value': r.value,
133 | 'status': r.status,
134 | 'message': r.message
135 | }
136 | for r in dqr_results['results']
137 | ]
138 | }
139 |
140 | import json
141 | dqr_file = output_dir / "dqr_summary.json"
142 | with open(dqr_file, 'w', encoding='utf-8') as f:
143 | json.dump(dqr_summary, f, indent=2, ensure_ascii=False)
144 | logger.info(f"DQR results saved: {dqr_file}")
145 |
146 |
147 | async def main():
148 | """Main function"""
149 | parser = argparse.ArgumentParser(description="Run { cookiecutter.project_name } data analysis pipeline")
150 | parser.add_argument('--config', default='configs/dhis2.yml', help='Path to configuration file')
151 | parser.add_argument('--output', default='data/results', help='Output directory')
152 | args = parser.parse_args()
153 |
154 | # Load environment variables
155 | env_file = project_root / '.env'
156 | if env_file.exists():
157 | load_dotenv(env_file)
158 | else:
159 | logger.warning("No .env file found, please ensure DHIS2 environment variables are set")
160 |
161 | # Load configuration
162 | config = load_config(args.config)
163 |
164 | # Validate required environment variables
165 | required_vars = ['DHIS2_URL', 'DHIS2_USERNAME', 'DHIS2_PASSWORD']
166 | missing_vars = [var for var in required_vars if not os.getenv(var)]
167 |
168 | if missing_vars:
169 | logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
170 | logger.error("Please set these variables or create a .env file")
171 | return 1
172 |
173 | # Create DHIS2 client configuration
174 | client_config = DHIS2Config(
175 | base_url=os.getenv('DHIS2_URL'),
176 | auth=(os.getenv('DHIS2_USERNAME'), os.getenv('DHIS2_PASSWORD')),
177 | rps=config.get('connection', {}).get('rps', 5),
178 | concurrency=config.get('connection', {}).get('concurrency', 3)
179 | )
180 |
181 | try:
182 | # Execute pipeline
183 | async with AsyncDHIS2Client(client_config) as client:
184 | # 1. Fetch data
185 | df = await fetch_analytics_data(client, config)
186 |
187 | # 2. Data quality review
188 | dqr_results = run_dqr_analysis(df, config)
189 |
190 | # 3. Save results
191 | output_dir = project_root / args.output
192 | save_results(df, dqr_results, output_dir)
193 |
194 | logger.info("✅ Pipeline executed successfully!")
195 | logger.info(f"📊 Data records: {len(df):,}")
196 | logger.info(f"🎯 Quality score: {dqr_results['overall_score']:.1%}")
197 |
198 | return 0
199 |
200 | except Exception as e:
201 | logger.error(f"❌ Pipeline execution failed: {e}")
202 | return 1
203 |
204 |
205 | if __name__ == "__main__":
206 | exit_code = asyncio.run(main())
207 | sys.exit(exit_code)
208 |
--------------------------------------------------------------------------------
/pydhis2/pipeline/config.py:
--------------------------------------------------------------------------------
1 | """Pipeline configuration models"""
2 |
3 | from datetime import datetime
4 | from typing import Any, Dict, List, Optional
5 |
6 | from pydantic import BaseModel, Field
7 |
8 |
9 | class StepConfig(BaseModel):
10 | """Pipeline step configuration"""
11 |
12 | type: str = Field(..., description="Step type")
13 | name: str = Field(..., description="Step name")
14 | depends_on: Optional[List[str]] = Field(None, description="Dependent steps")
15 | enabled: bool = Field(True, description="Whether the step is enabled")
16 | timeout: Optional[int] = Field(None, description="Timeout in seconds")
17 | retry_count: int = Field(0, description="Number of retries")
18 |
19 | # Step-specific parameters
20 | params: Dict[str, Any] = Field(default_factory=dict, description="Step parameters")
21 |
22 | # Input/Output
23 | input: Optional[str] = Field(None, description="Input file or data")
24 | output: Optional[str] = Field(None, description="Output file")
25 |
26 | class Config:
27 | extra = "allow" # Allow extra fields
28 |
29 |
30 | class PipelineConfig(BaseModel):
31 | """Pipeline configuration"""
32 |
33 | # Basic information
34 | name: str = Field(..., description="Pipeline name")
35 | description: Optional[str] = Field(None, description="Pipeline description")
36 | version: str = Field("1.0.0", description="Version number")
37 |
38 | # Global configuration
39 | rps: float = Field(8.0, description="Requests per second")
40 | concurrency: int = Field(8, description="Number of concurrent connections")
41 | timeout: int = Field(300, description="Default timeout in seconds")
42 |
43 | # Step configuration
44 | steps: List[StepConfig] = Field(..., description="Pipeline steps")
45 |
46 | # Metadata
47 | metadata: Optional[Dict[str, Any]] = Field(None, description="Pipeline metadata")
48 |
49 | def validate_dependencies(self) -> List[str]:
50 | """Validate step dependencies"""
51 | errors = []
52 | step_names = {step.name for step in self.steps}
53 |
54 | for step in self.steps:
55 | if step.depends_on:
56 | for dep in step.depends_on:
57 | if dep not in step_names:
58 | errors.append(f"Step '{step.name}' depends on a non-existent step '{dep}'")
59 |
60 | return errors
61 |
62 | def get_execution_order(self) -> List[StepConfig]:
63 | """Get the execution order of steps (topological sort)"""
64 | # Simplified topological sort implementation
65 | executed = set()
66 | ordered_steps = []
67 | remaining_steps = [step for step in self.steps if step.enabled]
68 |
69 | while remaining_steps:
70 | # Find steps with no unmet dependencies
71 | ready_steps = []
72 | for step in remaining_steps:
73 | if not step.depends_on or all(dep in executed for dep in step.depends_on):
74 | ready_steps.append(step)
75 |
76 | if not ready_steps:
77 | # Circular dependency or unmet dependency
78 | remaining_names = [step.name for step in remaining_steps]
79 | raise ValueError(f"Detected circular dependency or unmet dependency: {remaining_names}")
80 |
81 | # Add ready steps
82 | for step in ready_steps:
83 | ordered_steps.append(step)
84 | executed.add(step.name)
85 | remaining_steps.remove(step)
86 |
87 | return ordered_steps
88 |
89 | @classmethod
90 | def from_yaml(cls, yaml_content: str) -> 'PipelineConfig':
91 | """Create configuration from YAML"""
92 | import yaml
93 | data = yaml.safe_load(yaml_content)
94 | return cls(**data)
95 |
96 | @classmethod
97 | def from_file(cls, file_path: str) -> 'PipelineConfig':
98 | """Create configuration from a file"""
99 | import yaml
100 | with open(file_path, encoding='utf-8') as f:
101 | data = yaml.safe_load(f)
102 | return cls(**data)
103 |
104 | def to_yaml(self) -> str:
105 | """Convert to YAML format"""
106 | import yaml
107 | return yaml.dump(self.dict(), allow_unicode=True, default_flow_style=False)
108 |
109 | def save_to_file(self, file_path: str) -> None:
110 | """Save to a file"""
111 | with open(file_path, 'w', encoding='utf-8') as f:
112 | f.write(self.to_yaml())
113 |
114 |
115 | class PipelineResult(BaseModel):
116 | """Pipeline execution result"""
117 |
118 | pipeline_name: str
119 | start_time: datetime
120 | end_time: Optional[datetime] = None
121 | status: str = "running" # running, completed, failed, cancelled
122 |
123 | # Step results
124 | step_results: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
125 |
126 | # Error messages
127 | errors: List[str] = Field(default_factory=list)
128 |
129 | # Statistics
130 | total_steps: int = 0
131 | completed_steps: int = 0
132 | failed_steps: int = 0
133 | skipped_steps: int = 0
134 |
135 | @property
136 | def duration(self) -> Optional[float]:
137 | """Execution duration in seconds"""
138 | if self.end_time:
139 | return (self.end_time - self.start_time).total_seconds()
140 | return None
141 |
142 | @property
143 | def success_rate(self) -> float:
144 | """Success rate"""
145 | if self.total_steps == 0:
146 | return 0.0
147 | return self.completed_steps / self.total_steps
148 |
149 | def add_step_result(
150 | self,
151 | step_name: str,
152 | status: str,
153 | start_time: datetime,
154 | end_time: datetime,
155 | output_data: Optional[Dict[str, Any]] = None,
156 | error: Optional[str] = None
157 | ) -> None:
158 | """Add a step result"""
159 | duration = (end_time - start_time).total_seconds()
160 |
161 | self.step_results[step_name] = {
162 | 'status': status,
163 | 'start_time': start_time.isoformat(),
164 | 'end_time': end_time.isoformat(),
165 | 'duration': duration,
166 | 'output_data': output_data or {},
167 | 'error': error
168 | }
169 |
170 | # Update statistics
171 | if status == 'completed':
172 | self.completed_steps += 1
173 | elif status == 'failed':
174 | self.failed_steps += 1
175 | if error:
176 | self.errors.append(f"Step '{step_name}': {error}")
177 | elif status == 'skipped':
178 | self.skipped_steps += 1
179 |
180 | def to_dict(self) -> Dict[str, Any]:
181 | """Convert to a dictionary"""
182 | return {
183 | 'pipeline_name': self.pipeline_name,
184 | 'start_time': self.start_time.isoformat(),
185 | 'end_time': self.end_time.isoformat() if self.end_time else None,
186 | 'status': self.status,
187 | 'duration': self.duration,
188 | 'success_rate': self.success_rate,
189 | 'total_steps': self.total_steps,
190 | 'completed_steps': self.completed_steps,
191 | 'failed_steps': self.failed_steps,
192 | 'skipped_steps': self.skipped_steps,
193 | 'step_results': self.step_results,
194 | 'errors': self.errors
195 | }
196 |
--------------------------------------------------------------------------------
/pydhis2/core/auth.py:
--------------------------------------------------------------------------------
1 | """Authentication module - Support for Basic, Token, PAT and other auth methods"""
2 |
3 | import base64
4 | from abc import ABC, abstractmethod
5 | from typing import Dict, Optional, Tuple, Union
6 |
7 | import aiohttp
8 |
9 | from pydhis2.core.errors import AuthenticationError
10 | from pydhis2.core.types import AuthMethod
11 |
12 |
13 | class AuthProvider(ABC):
14 | """Authentication provider abstract base class"""
15 |
16 | @abstractmethod
17 | async def get_headers(self) -> Dict[str, str]:
18 | """Get authentication headers"""
19 | pass
20 |
21 | @abstractmethod
22 | async def refresh_if_needed(self) -> bool:
23 | """If needed, refresh the authentication. Returns whether it was refreshed"""
24 | pass
25 |
26 | @abstractmethod
27 | async def is_valid(self) -> bool:
28 | """Check if the authentication is valid"""
29 | pass
30 |
31 |
32 | class BasicAuthProvider(AuthProvider):
33 | """Basic authentication provider"""
34 |
35 | def __init__(self, username: str, password: str):
36 | self.username = username
37 | self.password = password
38 | self._auth_header = self._encode_basic_auth(username, password)
39 |
40 | @staticmethod
41 | def _encode_basic_auth(username: str, password: str) -> str:
42 | """Encode Basic authentication"""
43 | credentials = f"{username}:{password}"
44 | encoded = base64.b64encode(credentials.encode('utf-8')).decode('ascii')
45 | return f"Basic {encoded}"
46 |
47 | async def get_headers(self) -> Dict[str, str]:
48 | """Get authentication headers"""
49 | return {"Authorization": self._auth_header}
50 |
51 | async def refresh_if_needed(self) -> bool:
52 | """Basic auth does not need to be refreshed"""
53 | return False
54 |
55 | async def is_valid(self) -> bool:
56 | """Basic auth is always valid (assuming credentials are correct)"""
57 | return True
58 |
59 |
60 | class TokenAuthProvider(AuthProvider):
61 | """Token authentication provider"""
62 |
63 | def __init__(self, token: str, token_type: str = "Bearer"):
64 | self.token = token
65 | self.token_type = token_type
66 | self._auth_header = f"{token_type} {token}"
67 |
68 | async def get_headers(self) -> Dict[str, str]:
69 | """Get authentication headers"""
70 | return {"Authorization": self._auth_header}
71 |
72 | async def refresh_if_needed(self) -> bool:
73 | """Token auth does not need to be refreshed (simple implementation)"""
74 | return False
75 |
76 | async def is_valid(self) -> bool:
77 | """Token auth is always valid (assuming token is correct)"""
78 | return True
79 |
80 |
81 | class PATAuthProvider(AuthProvider):
82 | """Personal Access Token authentication provider"""
83 |
84 | def __init__(self, pat_token: str):
85 | self.pat_token = pat_token
86 | self._auth_header = f"Bearer {pat_token}"
87 |
88 | async def get_headers(self) -> Dict[str, str]:
89 | """Get authentication headers"""
90 | return {"Authorization": self._auth_header}
91 |
92 | async def refresh_if_needed(self) -> bool:
93 | """PAT auth does not need to be refreshed"""
94 | return False
95 |
96 | async def is_valid(self) -> bool:
97 | """PAT auth is always valid (assuming token is correct)"""
98 | return True
99 |
100 |
101 | class SessionAuthProvider(AuthProvider):
102 | """Session authentication provider (supports JSESSIONID, etc.)"""
103 |
104 | def __init__(self, session: aiohttp.ClientSession, base_url: str):
105 | self.session = session
106 | self.base_url = base_url
107 | self._authenticated = False
108 |
109 | async def login(self, username: str, password: str) -> None:
110 | """Login to get a session"""
111 | login_url = f"{self.base_url}/dhis-web-commons-security/login.action"
112 |
113 | async with self.session.post(
114 | login_url,
115 | data={
116 | 'j_username': username,
117 | 'j_password': password
118 | }
119 | ) as response:
120 | if response.status == 200:
121 | self._authenticated = True
122 | else:
123 | raise AuthenticationError(f"Login failed with status {response.status}")
124 |
125 | async def get_headers(self) -> Dict[str, str]:
126 | """Get authentication headers (session auth relies on cookies)"""
127 | return {}
128 |
129 | async def refresh_if_needed(self) -> bool:
130 | """Check if session needs to be refreshed"""
131 | # Simple implementation: check the /api/me endpoint
132 | try:
133 | async with self.session.get(f"{self.base_url}/api/me") as response:
134 | if response.status == 401:
135 | self._authenticated = False
136 | return False
137 | return True
138 | except Exception:
139 | self._authenticated = False
140 | return False
141 |
142 | async def is_valid(self) -> bool:
143 | """Check if the session is valid"""
144 | return self._authenticated
145 |
146 |
147 | def create_auth_provider(
148 | auth: Union[Tuple[str, str], str],
149 | auth_method: AuthMethod = AuthMethod.BASIC,
150 | session: Optional[aiohttp.ClientSession] = None,
151 | base_url: Optional[str] = None
152 | ) -> AuthProvider:
153 | """Factory function: create an authentication provider based on configuration"""
154 |
155 | if auth_method == AuthMethod.BASIC:
156 | if not isinstance(auth, tuple) or len(auth) != 2:
157 | raise ValueError("Basic authentication requires a (username, password) tuple")
158 | return BasicAuthProvider(auth[0], auth[1])
159 |
160 | elif auth_method == AuthMethod.TOKEN:
161 | if not isinstance(auth, str):
162 | raise ValueError("Token authentication requires a string token")
163 | return TokenAuthProvider(auth)
164 |
165 | elif auth_method == AuthMethod.PAT:
166 | if not isinstance(auth, str):
167 | raise ValueError("PAT authentication requires a string token")
168 | return PATAuthProvider(auth)
169 |
170 | else:
171 | raise ValueError(f"Unsupported authentication method: {auth_method}")
172 |
173 |
174 | class AuthManager:
175 | """Authentication manager - manages authentication providers and refresh logic"""
176 |
177 | def __init__(self, auth_provider: AuthProvider):
178 | self.auth_provider = auth_provider
179 | self._last_refresh_check = 0
180 | self._refresh_interval = 300 # Check every 5 minutes
181 |
182 | async def get_auth_headers(self) -> Dict[str, str]:
183 | """Get authentication headers, refreshing if necessary"""
184 | import time
185 |
186 | current_time = time.time()
187 | if current_time - self._last_refresh_check > self._refresh_interval:
188 | await self.auth_provider.refresh_if_needed()
189 | self._last_refresh_check = current_time
190 |
191 | return await self.auth_provider.get_headers()
192 |
193 | async def validate_auth(self) -> bool:
194 | """Validate if the authentication is valid"""
195 | return await self.auth_provider.is_valid()
196 |
197 | async def force_refresh(self) -> bool:
198 | """Force a refresh of the authentication"""
199 | return await self.auth_provider.refresh_if_needed()
200 |
--------------------------------------------------------------------------------
/pydhis2/core/types.py:
--------------------------------------------------------------------------------
1 | """Type definitions and configuration models"""
2 |
3 | from enum import Enum
4 | from typing import Any, Dict, List, Optional, Tuple, Union
5 |
6 | from pydantic import BaseModel, Field, validator
7 |
8 |
9 | class AuthMethod(str, Enum):
10 | """Authentication method enumeration"""
11 | BASIC = "basic"
12 | TOKEN = "token"
13 | PAT = "pat" # Personal Access Token
14 |
15 |
16 | class RetryStrategy(str, Enum):
17 | """Retry strategy enumeration"""
18 | EXPONENTIAL = "exponential"
19 | LINEAR = "linear"
20 | FIXED = "fixed"
21 |
22 |
23 | class DHIS2Config(BaseModel):
24 | """
25 | Configuration model for the DHIS2 client.
26 | """
27 | base_url: str = Field(..., description="Base URL of the DHIS2 instance")
28 | auth: Optional[Union[Tuple[str, str], str]] = Field(None, description="Authentication: tuple for basic auth or string for token")
29 | api_version: Optional[Union[int, str]] = Field(None, description="DHIS2 API version")
30 | user_agent: str = Field("pydhis2/0.2.0", description="User-Agent for requests")
31 |
32 | # Timeout settings (total) - Increased default for more resilience
33 | timeout: float = Field(60.0, description="Total request timeout in seconds")
34 |
35 | # Concurrency and rate limiting
36 | rps: float = Field(10.0, description="Requests per second limit", gt=0)
37 | concurrency: int = Field(10, description="Maximum concurrent connections", gt=0)
38 |
39 | # Compression and caching
40 | compression: bool = Field(True, description="Whether to enable gzip compression")
41 | enable_cache: bool = Field(True, description="Whether to enable caching")
42 | cache_ttl: int = Field(3600, description="Cache TTL in seconds", gt=0)
43 |
44 | # Retry configuration - Increased defaults for more resilience
45 | max_retries: int = Field(5, description="Maximum retry attempts", ge=0)
46 | retry_strategy: RetryStrategy = Field(RetryStrategy.EXPONENTIAL, description="Retry strategy")
47 | retry_base_delay: float = Field(1.5, description="Base retry delay in seconds", gt=0)
48 | retry_backoff_factor: float = Field(2.0, description="Backoff factor", gt=1.0)
49 | retry_on_status: List[int] = Field(
50 | [429, 500, 502, 503, 504], description="HTTP status codes that trigger a retry"
51 | )
52 |
53 | @validator('base_url')
54 | def validate_base_url(cls, v):
55 | """Validate and normalize base URL"""
56 | if not v.startswith(('http://', 'https://')):
57 | raise ValueError('Base URL must start with http:// or https://')
58 | # Remove trailing slash
59 | return v.rstrip('/')
60 |
61 | @validator('auth')
62 | def validate_auth(cls, v):
63 | """Validate authentication"""
64 | if v is None:
65 | return v
66 | if isinstance(v, tuple):
67 | if len(v) != 2:
68 | raise ValueError('Authentication tuple must have exactly 2 elements (username, password)')
69 | return v
70 | if isinstance(v, str):
71 | return v
72 | raise ValueError('Authentication must be a tuple or string')
73 |
74 | @validator('timeout')
75 | def validate_timeout(cls, v):
76 | """Validate timeout"""
77 | if v <= 0:
78 | raise ValueError('Timeout must be positive')
79 | return v
80 |
81 | @property
82 | def auth_method(self) -> AuthMethod:
83 | """Get authentication method"""
84 | if self.auth is None:
85 | return AuthMethod.BASIC # Default fallback
86 | if isinstance(self.auth, tuple):
87 | return AuthMethod.BASIC
88 | return AuthMethod.TOKEN
89 |
90 | class Config:
91 | frozen = True
92 | use_enum_values = True
93 |
94 |
95 | class PaginationConfig(BaseModel):
96 | """Pagination configuration"""
97 |
98 | page_size: int = Field(200, description="Default page size", gt=0, le=10000)
99 | max_pages: Optional[int] = Field(None, description="Maximum page limit")
100 | use_paging: bool = Field(True, description="Whether to enable paging")
101 |
102 |
103 | class AnalyticsQuery(BaseModel):
104 | """Analytics query configuration"""
105 |
106 | dx: Union[str, List[str]] = Field(..., description="Data dimension (indicators/data elements)")
107 | ou: Union[str, List[str]] = Field(..., description="Organization units")
108 | pe: Union[str, List[str]] = Field(..., description="Period dimension")
109 | co: Optional[Union[str, List[str]]] = Field(None, description="Category option combinations")
110 | ao: Optional[Union[str, List[str]]] = Field(None, description="Attribute option combinations")
111 |
112 | output_id_scheme: str = Field("UID", description="Output ID scheme")
113 | display_property: str = Field("NAME", description="Display property")
114 | skip_meta: bool = Field(False, description="Skip metadata")
115 | skip_data: bool = Field(False, description="Skip data")
116 | skip_rounding: bool = Field(False, description="Skip rounding")
117 |
118 | def to_params(self) -> Dict[str, Any]:
119 | """Convert to request parameters"""
120 | params = {}
121 | dimensions = []
122 |
123 | # Process dimensions - use correct DHIS2 Analytics API format
124 | for dim in ['dx', 'ou', 'pe', 'co', 'ao']:
125 | value = getattr(self, dim)
126 | if value is not None:
127 | if isinstance(value, list):
128 | dimensions.append(f'{dim}:{";".join(value)}')
129 | else:
130 | dimensions.append(f'{dim}:{value}')
131 |
132 | # Add dimensions as multiple dimension parameters
133 | if dimensions:
134 | params['dimension'] = dimensions
135 |
136 | # Other parameters
137 | params.update({
138 | 'outputIdScheme': self.output_id_scheme,
139 | 'displayProperty': self.display_property,
140 | 'skipMeta': str(self.skip_meta).lower(),
141 | 'skipData': str(self.skip_data).lower(),
142 | 'skipRounding': str(self.skip_rounding).lower(),
143 | })
144 |
145 | return params
146 |
147 |
148 | class ImportStrategy(str, Enum):
149 | """Import strategy enumeration"""
150 | CREATE = "CREATE"
151 | UPDATE = "UPDATE"
152 | CREATE_AND_UPDATE = "CREATE_AND_UPDATE"
153 | DELETE = "DELETE"
154 |
155 |
156 | class ImportMode(str, Enum):
157 | """Import mode enumeration"""
158 | COMMIT = "COMMIT"
159 | VALIDATE = "VALIDATE"
160 |
161 |
162 | class ImportConfig(BaseModel):
163 | """Import configuration"""
164 |
165 | strategy: ImportStrategy = Field(
166 | ImportStrategy.CREATE_AND_UPDATE, description="Import strategy"
167 | )
168 | import_mode: ImportMode = Field(ImportMode.COMMIT, description="Import mode")
169 | atomic: bool = Field(True, description="Whether to perform atomic import")
170 | dry_run: bool = Field(False, description="Whether this is a dry run")
171 | chunk_size: int = Field(5000, description="Chunk size", gt=0)
172 | max_chunks: Optional[int] = Field(None, description="Maximum number of chunks")
173 |
174 | # Conflict handling
175 | skip_existing_check: bool = Field(False, description="Skip existing check")
176 | skip_audit: bool = Field(False, description="Skip audit")
177 |
178 | # Performance options
179 | async_import: bool = Field(False, description="Whether to perform async import")
180 | force: bool = Field(False, description="Force import")
181 |
182 |
183 | class DataFrameFormat(str, Enum):
184 | """DataFrame output format"""
185 | PANDAS = "pandas"
186 | ARROW = "arrow"
187 | POLARS = "polars"
188 |
189 |
190 | class ExportFormat(str, Enum):
191 | """Export format"""
192 | JSON = "json"
193 | CSV = "csv"
194 | PARQUET = "parquet"
195 | EXCEL = "excel"
196 | FEATHER = "feather"
197 |
--------------------------------------------------------------------------------
/pydhis2/core/errors.py:
--------------------------------------------------------------------------------
1 | """Exception definitions"""
2 |
3 | import json
4 | from typing import Any, Dict, List, Optional
5 |
6 |
7 | class DHIS2Error(Exception):
8 | """DHIS2 SDK base exception"""
9 |
10 | def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
11 | super().__init__(message)
12 | self.message = message
13 | self.details = details or {}
14 |
15 |
16 | class DHIS2HTTPError(DHIS2Error):
17 | """HTTP request exception"""
18 |
19 | def __init__(
20 | self,
21 | status: int,
22 | url: str,
23 | message: Optional[str] = None,
24 | response_data: Optional[Dict[str, Any]] = None,
25 | ):
26 | self.status = status
27 | self.url = url
28 | self.response_data = response_data or {}
29 |
30 | if message is None:
31 | message = f"HTTP {status} error for {url}"
32 |
33 | super().__init__(message, {
34 | 'status': status,
35 | 'url': url,
36 | 'response_data': response_data
37 | })
38 |
39 |
40 | class AllPagesFetchError(DHIS2HTTPError):
41 | """Raised when not all pages could be fetched in an atomic paginated request"""
42 | pass
43 |
44 |
45 | class RateLimitExceeded(DHIS2Error):
46 | """Rate limit exceeded exception"""
47 |
48 | def __init__(
49 | self,
50 | retry_after: Optional[float] = None,
51 | current_rate: Optional[float] = None,
52 | limit: Optional[float] = None
53 | ):
54 | self.retry_after = retry_after
55 | self.current_rate = current_rate
56 | self.limit = limit
57 |
58 | message = "Rate limit exceeded"
59 | if retry_after:
60 | message += f", retry after {retry_after}s"
61 | if current_rate and limit:
62 | message += f" (current: {current_rate:.2f}, limit: {limit:.2f})"
63 |
64 | super().__init__(message, {
65 | 'retry_after': retry_after,
66 | 'current_rate': current_rate,
67 | 'limit': limit
68 | })
69 |
70 |
71 | class RetryExhausted(DHIS2Error):
72 | """Retry attempts exhausted exception"""
73 |
74 | def __init__(
75 | self,
76 | max_retries: int,
77 | last_error: Optional[Exception] = None,
78 | attempt_details: Optional[List[Dict[str, Any]]] = None
79 | ):
80 | self.max_retries = max_retries
81 | self.last_error = last_error
82 | self.attempt_details = attempt_details or []
83 |
84 | message = f"Retry exhausted after {max_retries} attempts"
85 | if last_error:
86 | message += f", last error: {last_error}"
87 |
88 | super().__init__(message, {
89 | 'max_retries': max_retries,
90 | 'last_error': str(last_error) if last_error else None,
91 | 'attempt_details': attempt_details
92 | })
93 |
94 |
95 | class ImportConflictError(DHIS2Error):
96 | """Import conflict exception"""
97 |
98 | def __init__(
99 | self,
100 | conflicts: List[Dict[str, Any]],
101 | import_summary: Optional[Dict[str, Any]] = None
102 | ):
103 | self.conflicts = conflicts
104 | self.import_summary = import_summary or {}
105 |
106 | conflict_count = len(conflicts)
107 | message = f"Import failed with {conflict_count} conflict(s)"
108 |
109 | super().__init__(message, {
110 | 'conflicts': conflicts,
111 | 'import_summary': import_summary,
112 | 'conflict_count': conflict_count
113 | })
114 |
115 |
116 | class AuthenticationError(DHIS2Error):
117 | """Authentication failed exception"""
118 |
119 | def __init__(self, message: str = "Authentication failed"):
120 | super().__init__(message)
121 |
122 |
123 | class AuthorizationError(DHIS2Error):
124 | """Authorization failed exception"""
125 |
126 | def __init__(self, message: str = "Authorization failed", required_permission: Optional[str] = None):
127 | self.required_permission = required_permission
128 |
129 | if required_permission:
130 | message += f", required permission: {required_permission}"
131 |
132 | super().__init__(message, {'required_permission': required_permission})
133 |
134 |
135 | class ValidationError(DHIS2Error):
136 | """Data validation exception"""
137 |
138 | def __init__(
139 | self,
140 | message: str,
141 | field: Optional[str] = None,
142 | value: Optional[Any] = None,
143 | validation_errors: Optional[List[Dict[str, Any]]] = None
144 | ):
145 | self.field = field
146 | self.value = value
147 | self.validation_errors = validation_errors or []
148 |
149 | super().__init__(message, {
150 | 'field': field,
151 | 'value': value,
152 | 'validation_errors': validation_errors
153 | })
154 |
155 |
156 | class TimeoutError(DHIS2HTTPError):
157 | """Raised on request timeout"""
158 |
159 | def __init__(
160 | self,
161 | timeout_type: str,
162 | timeout_value: float,
163 | url: str = "unknown",
164 | status: int = 408
165 | ):
166 | self.timeout_type = timeout_type
167 | self.timeout_value = timeout_value
168 | message = f"{timeout_type} timeout after {timeout_value} seconds"
169 | super().__init__(status, url, message)
170 |
171 |
172 | class DataFormatError(DHIS2Error):
173 | """Data format exception"""
174 |
175 | def __init__(
176 | self,
177 | message: str,
178 | expected_format: Optional[str] = None,
179 | actual_format: Optional[str] = None,
180 | data_sample: Optional[Any] = None
181 | ):
182 | self.expected_format = expected_format
183 | self.actual_format = actual_format
184 | self.data_sample = data_sample
185 |
186 | super().__init__(message, {
187 | 'expected_format': expected_format,
188 | 'actual_format': actual_format,
189 | 'data_sample': str(data_sample)[:200] if data_sample else None
190 | })
191 |
192 |
193 | class MetadataError(DHIS2Error):
194 | """Metadata related exception"""
195 |
196 | def __init__(
197 | self,
198 | message: str,
199 | object_type: Optional[str] = None,
200 | object_id: Optional[str] = None
201 | ):
202 | self.object_type = object_type
203 | self.object_id = object_id
204 |
205 | super().__init__(message, {
206 | 'object_type': object_type,
207 | 'object_id': object_id
208 | })
209 |
210 |
211 | def format_dhis2_error(error_data: Dict[str, Any]) -> str:
212 | """Format DHIS2 server error message"""
213 | if not error_data:
214 | return "Unknown DHIS2 error"
215 |
216 | # Try to extract standard error format
217 | if 'message' in error_data:
218 | return error_data['message']
219 |
220 | if 'error' in error_data:
221 | error_info = error_data['error']
222 | if isinstance(error_info, dict):
223 | return error_info.get('message', str(error_info))
224 | return str(error_info)
225 |
226 | # Try to extract conflict information
227 | if 'conflicts' in error_data:
228 | conflicts = error_data['conflicts']
229 | if conflicts and isinstance(conflicts, list):
230 | first_conflict = conflicts[0]
231 | if isinstance(first_conflict, dict):
232 | return first_conflict.get('object', str(first_conflict))
233 |
234 | # Fallback to JSON string
235 | try:
236 | return json.dumps(error_data, indent=2)[:500]
237 | except (TypeError, ValueError):
238 | return str(error_data)[:500]
239 |
--------------------------------------------------------------------------------
/pydhis2/testing/data_generator.py:
--------------------------------------------------------------------------------
1 | """Test data generator for DHIS2 API responses"""
2 |
3 | import random
4 | import uuid
5 | from datetime import datetime, timedelta
6 | from typing import Any, Dict, List, Optional
7 |
8 |
9 | class TestDataGenerator:
10 | """Generate test data for DHIS2 API responses"""
11 |
12 | def __init__(self, seed: int = 42):
13 | """Initialize with a random seed for reproducible data"""
14 | random.seed(seed)
15 | self.seed = seed
16 |
17 | def generate_org_units(self, count: int = 10) -> List[Dict[str, str]]:
18 | """Generate organization unit test data"""
19 | org_units = []
20 |
21 | for i in range(count):
22 | org_units.append({
23 | "id": f"OU{i:03d}{uuid.uuid4().hex[:8]}",
24 | "name": f"Test Health Facility {i+1}",
25 | "code": f"HF_{i+1:03d}",
26 | "level": str(random.randint(3, 5)),
27 | "path": f"/ROOT/DISTRICT{random.randint(1,5)}/HF_{i+1:03d}"
28 | })
29 |
30 | return org_units
31 |
32 | def generate_data_elements(self, count: int = 5) -> List[Dict[str, str]]:
33 | """Generate data element test data"""
34 | element_names = [
35 | "BCG doses given",
36 | "DPT-HepB-Hib 1 doses given",
37 | "DPT-HepB-Hib 3 doses given",
38 | "Measles doses given",
39 | "Polio 3 doses given"
40 | ]
41 |
42 | data_elements = []
43 | for i in range(min(count, len(element_names))):
44 | data_elements.append({
45 | "id": f"DE{i:03d}{uuid.uuid4().hex[:8]}",
46 | "name": element_names[i],
47 | "code": f"DE_{i+1:03d}",
48 | "valueType": "INTEGER"
49 | })
50 |
51 | return data_elements
52 |
53 | def generate_periods(self, start_year: int = 2023, months: int = 12) -> List[str]:
54 | """Generate period test data"""
55 | periods = []
56 |
57 | for month in range(1, months + 1):
58 | periods.append(f"{start_year}{month:02d}")
59 |
60 | return periods
61 |
62 | def generate_analytics_response(
63 | self,
64 | data_elements: List[Dict[str, str]],
65 | org_units: List[Dict[str, str]],
66 | periods: List[str],
67 | include_nulls: bool = True,
68 | null_rate: float = 0.1
69 | ) -> Dict[str, Any]:
70 | """Generate Analytics API response"""
71 | headers = [
72 | {"name": "dx", "column": "Data", "type": "TEXT"},
73 | {"name": "pe", "column": "Period", "type": "TEXT"},
74 | {"name": "ou", "column": "Organisation unit", "type": "TEXT"},
75 | {"name": "value", "column": "Value", "type": "NUMBER"}
76 | ]
77 |
78 | rows = []
79 |
80 | for de in data_elements:
81 | for period in periods:
82 | for ou in org_units:
83 | # Generate realistic values
84 | if include_nulls and random.random() < null_rate:
85 | continue # Skip this combination (null value)
86 |
87 | # Generate values based on data element type
88 | if "BCG" in de["name"]:
89 | value = str(random.randint(80, 120))
90 | elif "DPT" in de["name"]:
91 | value = str(random.randint(70, 110))
92 | elif "Measles" in de["name"]:
93 | value = str(random.randint(60, 100))
94 | else:
95 | value = str(random.randint(50, 150))
96 |
97 | rows.append([de["id"], period, ou["id"], value])
98 |
99 | return {
100 | "headers": headers,
101 | "rows": rows,
102 | "metaData": {
103 | "items": {},
104 | "dimensions": {}
105 | },
106 | "width": len(headers),
107 | "height": len(rows)
108 | }
109 |
110 | def generate_datavaluesets_response(
111 | self,
112 | data_elements: List[Dict[str, str]],
113 | org_units: List[Dict[str, str]],
114 | periods: List[str],
115 | include_conflicts: bool = False,
116 | conflict_rate: float = 0.05
117 | ) -> Dict[str, Any]:
118 | """Generate DataValueSets API response"""
119 | data_values = []
120 |
121 | for de in data_elements:
122 | for period in periods:
123 | for ou in org_units:
124 | # Generate realistic values
125 | if "BCG" in de["name"]:
126 | value = str(random.randint(80, 120))
127 | elif "DPT" in de["name"]:
128 | value = str(random.randint(70, 110))
129 | else:
130 | value = str(random.randint(50, 150))
131 |
132 | data_value = {
133 | "dataElement": de["id"],
134 | "period": period,
135 | "orgUnit": ou["id"],
136 | "value": value,
137 | "lastUpdated": datetime.now().isoformat(),
138 | "created": (datetime.now() - timedelta(days=random.randint(1, 30))).isoformat(),
139 | "storedBy": "test_user"
140 | }
141 |
142 | data_values.append(data_value)
143 |
144 | return {"dataValues": data_values}
145 |
146 | def generate_tracker_events(
147 | self,
148 | program_id: str,
149 | program_stage_id: str,
150 | org_units: List[Dict[str, str]],
151 | event_count: int = 100
152 | ) -> Dict[str, Any]:
153 | """Generate Tracker events response"""
154 | events = []
155 |
156 | for i in range(event_count):
157 | org_unit = random.choice(org_units)
158 | event_date = datetime.now() - timedelta(days=random.randint(0, 365))
159 |
160 | event = {
161 | "event": f"EVENT{i:03d}{uuid.uuid4().hex[:8]}",
162 | "program": program_id,
163 | "programStage": program_stage_id,
164 | "orgUnit": org_unit["id"],
165 | "orgUnitName": org_unit["name"],
166 | "status": random.choice(["ACTIVE", "COMPLETED", "SCHEDULE"]),
167 | "occurredAt": event_date.isoformat(),
168 | "createdAt": event_date.isoformat(),
169 | "updatedAt": event_date.isoformat(),
170 | "dataValues": [
171 | {
172 | "dataElement": f"DE{j:03d}{uuid.uuid4().hex[:4]}",
173 | "value": str(random.randint(1, 100))
174 | }
175 | for j in range(random.randint(1, 5))
176 | ]
177 | }
178 |
179 | events.append(event)
180 |
181 | return {
182 | "instances": events,
183 | "page": {
184 | "page": 1,
185 | "pageSize": event_count,
186 | "pageCount": 1,
187 | "total": event_count
188 | }
189 | }
190 |
191 | def generate_import_summary(
192 | self,
193 | total: int,
194 | imported: Optional[int] = None,
195 | updated: Optional[int] = None,
196 | ignored: Optional[int] = None,
197 | conflict_count: int = 0
198 | ) -> Dict[str, Any]:
199 | """Generate import summary response"""
200 | if imported is None:
201 | imported = int(total * 0.7)
202 | if updated is None:
203 | updated = int(total * 0.2)
204 | if ignored is None:
205 | ignored = total - imported - updated - conflict_count
206 |
207 | conflicts = []
208 | for i in range(conflict_count):
209 | conflicts.append({
210 | "object": f"CONFLICT{i:03d}",
211 | "property": "value",
212 | "value": "invalid_value",
213 | "message": f"Test conflict {i+1}",
214 | "errorCode": "E1234"
215 | })
216 |
217 | return {
218 | "status": "SUCCESS" if conflict_count == 0 else "WARNING",
219 | "imported": imported,
220 | "updated": updated,
221 | "ignored": ignored,
222 | "total": total,
223 | "conflicts": conflicts
224 | }
225 |
226 |
--------------------------------------------------------------------------------
/pydhis2/io/arrow.py:
--------------------------------------------------------------------------------
1 | """Arrow format converter"""
2 |
3 | from pathlib import Path
4 | from typing import Any, Dict, List, Optional, Union
5 |
6 | import pandas as pd
7 | import pyarrow as pa
8 | import pyarrow.parquet as pq
9 |
10 |
11 | class ArrowConverter:
12 | """Arrow format converter"""
13 |
14 | def __init__(self):
15 | self.compression = 'snappy' # Default compression format
16 |
17 | def from_pandas(self, df: pd.DataFrame, schema: Optional[pa.Schema] = None) -> pa.Table:
18 | """Convert from Pandas DataFrame to Arrow Table"""
19 | if df.empty:
20 | return pa.table({})
21 |
22 | try:
23 | if schema is not None:
24 | table = pa.Table.from_pandas(df, schema=schema, preserve_index=False)
25 | else:
26 | table = pa.Table.from_pandas(df, preserve_index=False)
27 | return table
28 | except Exception:
29 | # Fallback to basic conversion
30 | return pa.Table.from_pandas(df, preserve_index=False)
31 |
32 | def to_pandas(self, table: pa.Table) -> pd.DataFrame:
33 | """Convert from Arrow Table to Pandas DataFrame"""
34 | return table.to_pandas()
35 |
36 | def save_parquet(
37 | self,
38 | table: pa.Table,
39 | file_path: Union[str, Path],
40 | compression: str = None,
41 | partition_cols: Optional[List[str]] = None,
42 | **kwargs
43 | ) -> str:
44 | """Save as Parquet file"""
45 | file_path = Path(file_path)
46 |
47 | # Ensure directory exists
48 | file_path.parent.mkdir(parents=True, exist_ok=True)
49 |
50 | compression = compression or self.compression
51 |
52 | if partition_cols:
53 | # Partitioned write
54 | pq.write_to_dataset(
55 | table,
56 | root_path=str(file_path.parent),
57 | partition_cols=partition_cols,
58 | compression=compression,
59 | **kwargs
60 | )
61 | else:
62 | # Single file write
63 | pq.write_table(
64 | table,
65 | str(file_path),
66 | compression=compression,
67 | **kwargs
68 | )
69 |
70 | return str(file_path)
71 |
72 | def load_parquet(self, file_path: Union[str, Path]) -> pa.Table:
73 | """Load from Parquet file"""
74 | return pq.read_table(str(file_path))
75 |
76 | def save_feather(self, table: pa.Table, file_path: Union[str, Path]) -> str:
77 | """Save as Feather file"""
78 | file_path = Path(file_path)
79 | file_path.parent.mkdir(parents=True, exist_ok=True)
80 |
81 | # Convert to pandas then save (Feather v2 format)
82 | df = self.to_pandas(table)
83 | df.to_feather(str(file_path))
84 |
85 | return str(file_path)
86 |
87 | def load_feather(self, file_path: Union[str, Path]) -> pa.Table:
88 | """Load from Feather file"""
89 | df = pd.read_feather(str(file_path))
90 | return self.from_pandas(df)
91 |
92 | def get_schema_info(self, table: pa.Table) -> Dict[str, Any]:
93 | """Get Schema information"""
94 | schema = table.schema
95 |
96 | info = {
97 | 'num_columns': len(schema),
98 | 'num_rows': len(table),
99 | 'columns': []
100 | }
101 |
102 | for field in schema:
103 | column_info = {
104 | 'name': field.name,
105 | 'type': str(field.type),
106 | 'nullable': field.nullable,
107 | 'metadata': dict(field.metadata) if field.metadata else {}
108 | }
109 | info['columns'].append(column_info)
110 |
111 | return info
112 |
113 | def optimize_schema(self, df: pd.DataFrame) -> pa.Schema:
114 | """Optimize Schema to reduce storage space"""
115 | fields = []
116 |
117 | for column in df.columns:
118 | dtype = df[column].dtype
119 | field_type = None
120 |
121 | if pd.api.types.is_integer_dtype(dtype):
122 | # Choose the smallest integer type
123 | min_val = df[column].min()
124 | max_val = df[column].max()
125 |
126 | if pd.isna(min_val) or pd.isna(max_val):
127 | field_type = pa.int64()
128 | elif min_val >= 0:
129 | # Unsigned integer
130 | if max_val <= 255:
131 | field_type = pa.uint8()
132 | elif max_val <= 65535:
133 | field_type = pa.uint16()
134 | elif max_val <= 4294967295:
135 | field_type = pa.uint32()
136 | else:
137 | field_type = pa.uint64()
138 | else:
139 | # Signed integer
140 | if min_val >= -128 and max_val <= 127:
141 | field_type = pa.int8()
142 | elif min_val >= -32768 and max_val <= 32767:
143 | field_type = pa.int16()
144 | elif min_val >= -2147483648 and max_val <= 2147483647:
145 | field_type = pa.int32()
146 | else:
147 | field_type = pa.int64()
148 |
149 | elif pd.api.types.is_float_dtype(dtype):
150 | # Check if float32 can be used
151 | if df[column].dtype == 'float64':
152 | # Simple check: if all values are within float32 range
153 | field_type = pa.float32()
154 | else:
155 | field_type = pa.float64()
156 |
157 | elif pd.api.types.is_datetime64_any_dtype(dtype):
158 | field_type = pa.timestamp('ns')
159 |
160 | elif pd.api.types.is_bool_dtype(dtype):
161 | field_type = pa.bool_()
162 |
163 | else:
164 | # String type - check if suitable for dictionary encoding
165 | unique_ratio = df[column].nunique() / len(df)
166 | if unique_ratio < 0.5: # If unique value ratio is low, use dictionary encoding
167 | field_type = pa.dictionary(pa.int32(), pa.string())
168 | else:
169 | field_type = pa.string()
170 |
171 | # Check for missing values
172 | nullable = df[column].isna().any()
173 |
174 | fields.append(pa.field(column, field_type, nullable=nullable))
175 |
176 | return pa.schema(fields)
177 |
178 | def compress_table(self, table: pa.Table) -> pa.Table:
179 | """Compress table (dictionary encoding, etc.)"""
180 | columns = []
181 |
182 | for i in range(table.num_columns):
183 | column = table.column(i)
184 |
185 | # Apply dictionary encoding to string columns
186 | if pa.types.is_string(column.type):
187 | # Calculate unique value ratio
188 | unique_count = pa.compute.count_distinct(column).as_py()
189 | total_count = len(column)
190 |
191 | if unique_count / total_count < 0.5: # Low unique value ratio
192 | try:
193 | # Apply dictionary encoding
194 | encoded_column = pa.compute.dictionary_encode(column)
195 | columns.append(encoded_column)
196 | continue
197 | except Exception:
198 | pass
199 |
200 | columns.append(column)
201 |
202 | # Build new schema
203 | fields = []
204 | for i, column in enumerate(columns):
205 | field_name = table.schema.field(i).name
206 | fields.append(pa.field(field_name, column.type))
207 |
208 | new_schema = pa.schema(fields)
209 |
210 | return pa.table(columns, schema=new_schema)
211 |
212 | def estimate_size(self, table: pa.Table) -> Dict[str, Any]:
213 | """Estimate table size"""
214 | # Get memory usage
215 | memory_size = table.nbytes
216 |
217 | # Estimate compressed size (based on empirical values)
218 | estimated_parquet_size = memory_size * 0.2 # Parquet usually compresses to 20%
219 | estimated_feather_size = memory_size * 0.8 # Feather compresses to 80%
220 |
221 | return {
222 | 'memory_bytes': memory_size,
223 | 'memory_mb': memory_size / 1024 / 1024,
224 | 'estimated_parquet_mb': estimated_parquet_size / 1024 / 1024,
225 | 'estimated_feather_mb': estimated_feather_size / 1024 / 1024,
226 | 'num_rows': len(table),
227 | 'num_columns': table.num_columns,
228 | }
229 |
--------------------------------------------------------------------------------
/pydhis2/testing/mock_server.py:
--------------------------------------------------------------------------------
1 | """Mock DHIS2 server for testing"""
2 |
3 | import asyncio
4 | import json
5 | import logging
6 | from dataclasses import dataclass
7 | from typing import Any, Dict, List, Optional
8 |
9 | from aiohttp import web
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | @dataclass
15 | class MockResponse:
16 | """Mock response configuration"""
17 | status: int = 200
18 | data: Optional[Dict[str, Any]] = None
19 | headers: Optional[Dict[str, str]] = None
20 | delay: float = 0.0 # Simulated response delay in seconds
21 | fail_count: int = 0 # Number of times to fail before succeeding
22 |
23 |
24 | class MockDHIS2Server:
25 | """Mock DHIS2 server for testing client behavior"""
26 |
27 | def __init__(self, host: str = "localhost", port: int = 8080):
28 | self.host = host
29 | self.port = port
30 | self.app = web.Application()
31 | self.runner: Optional[web.AppRunner] = None
32 | self.site: Optional[web.TCPSite] = None
33 |
34 | # Response configurations
35 | self.responses: Dict[str, MockResponse] = {}
36 | self.request_log: List[Dict[str, Any]] = []
37 |
38 | # Setup default routes
39 | self._setup_routes()
40 |
41 | def _setup_routes(self) -> None:
42 | """Setup default API routes"""
43 | self.app.router.add_route("*", "/api/{path:.*}", self._handle_api_request)
44 | self.app.router.add_route("GET", "/api/me", self._handle_me)
45 | self.app.router.add_route("GET", "/api/system/info", self._handle_system_info)
46 |
47 | async def _handle_api_request(self, request: web.Request) -> web.Response:
48 | """Handle generic API requests"""
49 | path = request.match_info.get('path', '')
50 | method = request.method
51 | full_path = f"/{method.lower()}/api/{path}"
52 |
53 | # Log the request
54 | self.request_log.append({
55 | 'method': method,
56 | 'path': f"/api/{path}",
57 | 'query': dict(request.query),
58 | 'headers': dict(request.headers),
59 | 'timestamp': asyncio.get_event_loop().time()
60 | })
61 |
62 | # Check if we have a configured response
63 | mock_response = self.responses.get(full_path)
64 | if not mock_response:
65 | # Default response
66 | mock_response = MockResponse(
67 | status=200,
68 | data={"message": f"Mock response for {full_path}"}
69 | )
70 |
71 | # Simulate delay
72 | if mock_response.delay > 0:
73 | await asyncio.sleep(mock_response.delay)
74 |
75 | # Handle failure simulation
76 | if mock_response.fail_count > 0:
77 | mock_response.fail_count -= 1
78 | return web.Response(
79 | status=500,
80 | text=json.dumps({"error": "Simulated server error"}),
81 | headers={'Content-Type': 'application/json'}
82 | )
83 |
84 | # Return configured response
85 | headers = mock_response.headers or {'Content-Type': 'application/json'}
86 | response_data = mock_response.data or {}
87 |
88 | return web.Response(
89 | status=mock_response.status,
90 | text=json.dumps(response_data, ensure_ascii=False),
91 | headers=headers
92 | )
93 |
94 | async def _handle_me(self, request: web.Request) -> web.Response:
95 | """Handle /api/me endpoint"""
96 | return web.json_response({
97 | "id": "test_user_id",
98 | "name": "Test User",
99 | "username": "test_user",
100 | "email": "test@example.com",
101 | "authorities": ["F_DATAVALUE_ADD", "F_ANALYTICS_READ"]
102 | })
103 |
104 | async def _handle_system_info(self, request: web.Request) -> web.Response:
105 | """Handle /api/system/info endpoint"""
106 | return web.json_response({
107 | "version": "2.41.0",
108 | "buildTime": "2024-01-01T00:00:00.000",
109 | "serverTimeZoneId": "UTC",
110 | "contextPath": ""
111 | })
112 |
113 | def configure_response(
114 | self,
115 | method: str,
116 | path: str,
117 | status: int = 200,
118 | data: Optional[Dict[str, Any]] = None,
119 | headers: Optional[Dict[str, str]] = None,
120 | delay: float = 0.0,
121 | fail_count: int = 0
122 | ) -> None:
123 | """Configure a mock response for a specific endpoint"""
124 | full_path = f"/{method.lower()}{path}"
125 | self.responses[full_path] = MockResponse(
126 | status=status,
127 | data=data,
128 | headers=headers,
129 | delay=delay,
130 | fail_count=fail_count
131 | )
132 |
133 | def configure_endpoint(
134 | self,
135 | method: str,
136 | path: str,
137 | data: Dict[str, Any],
138 | status: int = 200,
139 | delay: float = 0.0,
140 | fail_count: int = 0
141 | ) -> None:
142 | """Configure endpoint response (alias for configure_response)"""
143 | self.configure_response(method, path, status, data, delay=delay, fail_count=fail_count)
144 |
145 | def configure_analytics_response(
146 | self,
147 | headers: List[Dict[str, str]],
148 | rows: List[List[str]],
149 | delay: float = 0.0
150 | ) -> None:
151 | """Configure Analytics endpoint response"""
152 | self.configure_response(
153 | "GET",
154 | "/api/analytics",
155 | data={
156 | "headers": headers,
157 | "rows": rows,
158 | "metaData": {"items": {}, "dimensions": {}},
159 | "width": len(headers),
160 | "height": len(rows)
161 | },
162 | delay=delay
163 | )
164 |
165 | def configure_datavaluesets_response(
166 | self,
167 | data_values: List[Dict[str, str]],
168 | delay: float = 0.0
169 | ) -> None:
170 | """Configure DataValueSets endpoint response"""
171 | self.configure_response(
172 | "GET",
173 | "/api/dataValueSets",
174 | data={"dataValues": data_values},
175 | delay=delay
176 | )
177 |
178 | def configure_import_response(
179 | self,
180 | imported: int = 0,
181 | updated: int = 0,
182 | ignored: int = 0,
183 | conflicts: Optional[List[Dict[str, Any]]] = None,
184 | delay: float = 0.0
185 | ) -> None:
186 | """Configure import response"""
187 | conflicts = conflicts or []
188 | total = imported + updated + ignored + len(conflicts)
189 |
190 | self.configure_response(
191 | "POST",
192 | "/api/dataValueSets",
193 | data={
194 | "status": "SUCCESS" if not conflicts else "WARNING",
195 | "imported": imported,
196 | "updated": updated,
197 | "ignored": ignored,
198 | "total": total,
199 | "conflicts": conflicts
200 | },
201 | delay=delay
202 | )
203 |
204 | async def start(self) -> str:
205 | """Start the mock server"""
206 | self.runner = web.AppRunner(self.app)
207 | await self.runner.setup()
208 |
209 | self.site = web.TCPSite(self.runner, self.host, self.port)
210 | await self.site.start()
211 |
212 | base_url = f"http://{self.host}:{self.port}"
213 | logger.info(f"Mock DHIS2 server started at {base_url}")
214 | return base_url
215 |
216 | async def stop(self) -> None:
217 | """Stop the mock server"""
218 | if self.site:
219 | await self.site.stop()
220 | if self.runner:
221 | await self.runner.cleanup()
222 | logger.info("Mock DHIS2 server stopped")
223 |
224 | async def __aenter__(self):
225 | """Async context manager entry"""
226 | return await self.start()
227 |
228 | async def __aexit__(self, exc_type, exc_val, exc_tb):
229 | """Async context manager exit"""
230 | await self.stop()
231 |
232 | def get_request_log(self) -> List[Dict[str, Any]]:
233 | """Get logged requests"""
234 | return self.request_log.copy()
235 |
236 | def clear_request_log(self) -> None:
237 | """Clear request log"""
238 | self.request_log.clear()
239 |
240 | def get_request_count(self, method: str = None, path: str = None) -> int:
241 | """Get count of requests matching criteria"""
242 | filtered_requests = self.request_log
243 |
244 | if method:
245 | filtered_requests = [r for r in filtered_requests if r['method'].upper() == method.upper()]
246 |
247 | if path:
248 | filtered_requests = [r for r in filtered_requests if r['path'] == path]
249 |
250 | return len(filtered_requests)
251 |
--------------------------------------------------------------------------------
/pydhis2/testing/network_simulator.py:
--------------------------------------------------------------------------------
1 | """Network condition simulator for testing weak network scenarios"""
2 |
3 | import asyncio
4 | import random
5 | from dataclasses import dataclass
6 | from datetime import datetime, timedelta
7 | from typing import Any, Dict, List, Optional
8 |
9 | import aiohttp
10 | import pandas as pd
11 |
12 | from pydhis2.testing.data_generator import TestDataGenerator
13 |
14 |
15 | @dataclass
16 | class NetworkCondition:
17 | """Network condition configuration"""
18 | name: str
19 | latency_ms: int = 50 # Average latency in milliseconds
20 | jitter_ms: int = 10 # Latency jitter
21 | packet_loss_rate: float = 0.0 # Packet loss rate (0.0 - 1.0)
22 | bandwidth_kbps: Optional[int] = None # Bandwidth limit in kbps
23 | timeout_rate: float = 0.0 # Rate of request timeouts (0.0 - 1.0)
24 |
25 |
26 | class NetworkSimulator:
27 | """Simulate various network conditions for testing"""
28 |
29 | # Predefined network conditions
30 | NORMAL = NetworkCondition(
31 | name="normal",
32 | latency_ms=20,
33 | jitter_ms=5,
34 | packet_loss_rate=0.0,
35 | timeout_rate=0.0
36 | )
37 |
38 | SLOW_3G = NetworkCondition(
39 | name="slow_3g",
40 | latency_ms=200,
41 | jitter_ms=50,
42 | packet_loss_rate=0.01,
43 | bandwidth_kbps=400,
44 | timeout_rate=0.02
45 | )
46 |
47 | WEAK_NETWORK = NetworkCondition(
48 | name="weak_network",
49 | latency_ms=400,
50 | jitter_ms=100,
51 | packet_loss_rate=0.03,
52 | bandwidth_kbps=200,
53 | timeout_rate=0.05
54 | )
55 |
56 | VERY_WEAK = NetworkCondition(
57 | name="very_weak",
58 | latency_ms=800,
59 | jitter_ms=200,
60 | packet_loss_rate=0.08,
61 | bandwidth_kbps=100,
62 | timeout_rate=0.10
63 | )
64 |
65 | def __init__(self, condition: NetworkCondition = None):
66 | self.condition = condition or self.NORMAL
67 | self.original_connector_init = None
68 | self.original_request = None
69 |
70 | async def simulate_latency(self) -> None:
71 | """Simulate network latency"""
72 | if self.condition.latency_ms > 0:
73 | # Add base latency plus jitter
74 | base_latency = self.condition.latency_ms / 1000.0
75 | jitter = random.uniform(
76 | -self.condition.jitter_ms / 1000.0,
77 | self.condition.jitter_ms / 1000.0
78 | )
79 | total_latency = max(0, base_latency + jitter)
80 |
81 | if total_latency > 0:
82 | await asyncio.sleep(total_latency)
83 |
84 | def should_drop_packet(self) -> bool:
85 | """Determine if packet should be dropped (simulating packet loss)"""
86 | return random.random() < self.condition.packet_loss_rate
87 |
88 | def should_timeout(self) -> bool:
89 | """Determine if request should timeout"""
90 | return random.random() < self.condition.timeout_rate
91 |
92 | async def simulate_bandwidth_limit(self, data_size: int) -> None:
93 | """Simulate bandwidth limitations"""
94 | if self.condition.bandwidth_kbps and data_size > 0:
95 | # Calculate transfer time based on bandwidth
96 | transfer_time = (data_size * 8) / (self.condition.bandwidth_kbps * 1000)
97 | if transfer_time > 0:
98 | await asyncio.sleep(transfer_time)
99 |
100 | def wrap_session(self, session: aiohttp.ClientSession) -> 'SimulatedSession':
101 | """Wrap an aiohttp session with network simulation"""
102 | return SimulatedSession(session, self)
103 |
104 |
105 | class SimulatedSession:
106 | """Wrapper for aiohttp.ClientSession that simulates network conditions"""
107 |
108 | def __init__(self, session: aiohttp.ClientSession, simulator: NetworkSimulator):
109 | self.session = session
110 | self.simulator = simulator
111 |
112 | async def request(self, method: str, url: str, **kwargs) -> aiohttp.ClientResponse:
113 | """Make a request with network simulation"""
114 | # Simulate latency before request
115 | await self.simulator.simulate_latency()
116 |
117 | # Check for packet loss
118 | if self.simulator.should_drop_packet():
119 | raise aiohttp.ClientConnectionError("Simulated packet loss")
120 |
121 | # Check for timeout
122 | if self.simulator.should_timeout():
123 | raise asyncio.TimeoutError("Simulated network timeout")
124 |
125 | # Make the actual request
126 | # start_time = time.time() # For future latency simulation
127 | response = await self.session.request(method, url, **kwargs)
128 |
129 | # Simulate bandwidth limitations based on response size
130 | if hasattr(response, 'content_length') and response.content_length:
131 | await self.simulator.simulate_bandwidth_limit(response.content_length)
132 |
133 | return response
134 |
135 | async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse:
136 | """GET request with simulation"""
137 | return await self.request('GET', url, **kwargs)
138 |
139 | async def post(self, url: str, **kwargs) -> aiohttp.ClientResponse:
140 | """POST request with simulation"""
141 | return await self.request('POST', url, **kwargs)
142 |
143 | async def put(self, url: str, **kwargs) -> aiohttp.ClientResponse:
144 | """PUT request with simulation"""
145 | return await self.request('PUT', url, **kwargs)
146 |
147 | async def delete(self, url: str, **kwargs) -> aiohttp.ClientResponse:
148 | """DELETE request with simulation"""
149 | return await self.request('DELETE', url, **kwargs)
150 |
151 | async def close(self) -> None:
152 | """Close the underlying session"""
153 | await self.session.close()
154 |
155 |
156 | class BenchmarkDataGenerator:
157 | """Generate data specifically for benchmark testing"""
158 |
159 | def __init__(self, seed: int = 42):
160 | self.generator = TestDataGenerator(seed)
161 |
162 | def generate_large_dataset(
163 | self,
164 | org_unit_count: int = 100,
165 | data_element_count: int = 20,
166 | period_count: int = 12,
167 | records_per_combination: int = 1
168 | ) -> pd.DataFrame:
169 | """Generate a large dataset for performance testing"""
170 | org_units = self.generator.generate_org_units(org_unit_count)
171 | data_elements = self.generator.generate_data_elements(data_element_count)
172 | periods = self.generator.generate_periods(months=period_count)
173 |
174 | data_values = []
175 |
176 | for de in data_elements:
177 | for period in periods:
178 | for ou in org_units:
179 | for _ in range(records_per_combination):
180 | value = random.randint(1, 1000)
181 |
182 | data_values.append({
183 | 'dataElement': de['id'],
184 | 'period': period,
185 | 'orgUnit': ou['id'],
186 | 'value': value,
187 | 'lastUpdated': datetime.now().isoformat(),
188 | 'created': (datetime.now() - timedelta(days=random.randint(1, 30))).isoformat()
189 | })
190 |
191 | return pd.DataFrame(data_values)
192 |
193 | def generate_conflicted_dataset(
194 | self,
195 | base_data: pd.DataFrame,
196 | conflict_rate: float = 0.05
197 | ) -> pd.DataFrame:
198 | """Generate a dataset with intentional conflicts for testing"""
199 | conflicted_data = base_data.copy()
200 |
201 | # Randomly select records to make conflicting
202 | conflict_count = int(len(conflicted_data) * conflict_rate)
203 | conflict_indices = random.sample(range(len(conflicted_data)), conflict_count)
204 |
205 | for idx in conflict_indices:
206 | # Create conflicts by duplicating records with different values
207 | conflicted_row = conflicted_data.iloc[idx].copy()
208 | conflicted_row['value'] = 'INVALID_VALUE' # This will cause conflicts
209 | conflicted_data = pd.concat([conflicted_data, conflicted_row.to_frame().T], ignore_index=True)
210 |
211 | return conflicted_data
212 |
213 | def generate_performance_test_scenarios(self) -> List[Dict[str, Any]]:
214 | """Generate different scenarios for performance testing"""
215 | scenarios = [
216 | {
217 | "name": "small_dataset",
218 | "description": "Small dataset for basic functionality",
219 | "org_units": 5,
220 | "data_elements": 3,
221 | "periods": 6,
222 | "expected_records": 5 * 3 * 6
223 | },
224 | {
225 | "name": "medium_dataset",
226 | "description": "Medium dataset for typical workload",
227 | "org_units": 50,
228 | "data_elements": 10,
229 | "periods": 12,
230 | "expected_records": 50 * 10 * 12
231 | },
232 | {
233 | "name": "large_dataset",
234 | "description": "Large dataset for stress testing",
235 | "org_units": 200,
236 | "data_elements": 25,
237 | "periods": 24,
238 | "expected_records": 200 * 25 * 24
239 | }
240 | ]
241 |
242 | return scenarios
243 |
--------------------------------------------------------------------------------
/pydhis2/endpoints/metadata.py:
--------------------------------------------------------------------------------
1 | """Metadata endpoint - Metadata import, export, and management"""
2 |
3 | import json
4 | from typing import Any, Dict, Optional, Union
5 |
6 | import pandas as pd
7 |
8 | from pydhis2.core.errors import ImportConflictError
9 | from pydhis2.core.types import ExportFormat
10 |
11 |
12 | class MetadataImportSummary:
13 | """Metadata import summary"""
14 |
15 | def __init__(self, summary_data: Dict[str, Any]):
16 | self.raw_data = summary_data
17 | self.status = summary_data.get('status', 'UNKNOWN')
18 | self.stats = summary_data.get('stats', {})
19 | self.type_reports = summary_data.get('typeReports', [])
20 |
21 | # Calculate overall statistics
22 | self.total = 0
23 | self.imported = 0
24 | self.updated = 0
25 | self.deleted = 0
26 | self.ignored = 0
27 |
28 | for type_report in self.type_reports:
29 | object_reports = type_report.get('objectReports', [])
30 | for report in object_reports:
31 | self.total += 1
32 | if report.get('index') is not None:
33 | if 'created' in str(report).lower():
34 | self.imported += 1
35 | elif 'updated' in str(report).lower():
36 | self.updated += 1
37 | elif 'deleted' in str(report).lower():
38 | self.deleted += 1
39 | else:
40 | self.ignored += 1
41 |
42 | @property
43 | def success_rate(self) -> float:
44 | """Success rate"""
45 | if self.total == 0:
46 | return 0.0
47 | return (self.imported + self.updated) / self.total
48 |
49 | @property
50 | def has_errors(self) -> bool:
51 | """Check if there are errors"""
52 | return self.status in ['ERROR', 'WARNING']
53 |
54 | def get_conflicts_df(self) -> pd.DataFrame:
55 | """Get conflicts as a DataFrame"""
56 | conflicts = []
57 |
58 | for type_report in self.type_reports:
59 | object_type = type_report.get('klass', 'Unknown')
60 | object_reports = type_report.get('objectReports', [])
61 |
62 | for report in object_reports:
63 | error_reports = report.get('errorReports', [])
64 | for error in error_reports:
65 | conflicts.append({
66 | 'object_type': object_type,
67 | 'uid': report.get('uid', ''),
68 | 'index': report.get('index', ''),
69 | 'error_code': error.get('errorCode', ''),
70 | 'message': error.get('message', ''),
71 | 'property': error.get('property', ''),
72 | 'value': error.get('value', ''),
73 | })
74 |
75 | return pd.DataFrame(conflicts)
76 |
77 |
78 | class MetadataEndpoint:
79 | """Metadata API endpoint"""
80 |
81 | def __init__(self, client):
82 | self.client = client
83 |
84 | async def export(
85 | self,
86 | filter: Optional[Dict[str, str]] = None,
87 | fields: str = ":owner",
88 | defaults: str = "INCLUDE",
89 | download: bool = False,
90 | **kwargs
91 | ) -> Dict[str, Any]:
92 | """Export metadata"""
93 | params = {
94 | 'fields': fields,
95 | 'defaults': defaults,
96 | 'download': str(download).lower(),
97 | }
98 |
99 | # Add filters
100 | if filter:
101 | for key, value in filter.items():
102 | params[f'{key}:filter'] = value
103 |
104 | # Add other parameters
105 | params.update(kwargs)
106 |
107 | return await self.client.get('/api/metadata', params=params)
108 |
109 | async def import_(
110 | self,
111 | metadata: Union[Dict[str, Any], str],
112 | atomic: bool = True,
113 | dry_run: bool = False,
114 | strategy: str = "CREATE_AND_UPDATE",
115 | merge_mode: str = "REPLACE",
116 | flush_mode: str = "AUTO",
117 | skip_sharing: bool = False,
118 | skip_validation: bool = False,
119 | **kwargs
120 | ) -> MetadataImportSummary:
121 | """Import metadata"""
122 | params = {
123 | 'atomic': str(atomic).lower(),
124 | 'dryRun': str(dry_run).lower(),
125 | 'importStrategy': strategy,
126 | 'mergeMode': merge_mode,
127 | 'flushMode': flush_mode,
128 | 'skipSharing': str(skip_sharing).lower(),
129 | 'skipValidation': str(skip_validation).lower(),
130 | }
131 |
132 | # Add other parameters
133 | params.update(kwargs)
134 |
135 | # Prepare data
136 | if isinstance(metadata, str):
137 | metadata_dict = json.loads(metadata)
138 | else:
139 | metadata_dict = metadata
140 |
141 | response = await self.client.post(
142 | '/api/metadata',
143 | data=metadata_dict,
144 | params=params
145 | )
146 |
147 | summary = MetadataImportSummary(response)
148 |
149 | # Check for errors
150 | if summary.has_errors and not dry_run:
151 | conflicts_df = summary.get_conflicts_df()
152 | if not conflicts_df.empty:
153 | conflicts = conflicts_df.to_dict('records')
154 | raise ImportConflictError(
155 | conflicts=conflicts,
156 | import_summary=summary.raw_data
157 | )
158 |
159 | return summary
160 |
161 | async def get_schemas(self) -> Dict[str, Any]:
162 | """Get all schemas"""
163 | return await self.client.get('/api/schemas')
164 |
165 | async def get_schema(self, schema_name: str) -> Dict[str, Any]:
166 | """Get a specific schema"""
167 | return await self.client.get(f'/api/schemas/{schema_name}')
168 |
169 | async def get_data_elements(
170 | self,
171 | fields: str = "id,name,code,valueType",
172 | filter: Optional[Dict[str, str]] = None,
173 | paging: bool = False,
174 | **kwargs
175 | ) -> Dict[str, Any]:
176 | """Get data elements"""
177 | params = {
178 | 'fields': fields,
179 | 'paging': str(paging).lower(),
180 | }
181 |
182 | if filter:
183 | for key, value in filter.items():
184 | params['filter'] = f'{key}:eq:{value}'
185 |
186 | params.update(kwargs)
187 |
188 | return await self.client.get('/api/dataElements', params=params)
189 |
190 | async def get_indicators(
191 | self,
192 | fields: str = "id,name,code,numerator,denominator",
193 | filter: Optional[Dict[str, str]] = None,
194 | paging: bool = False,
195 | **kwargs
196 | ) -> Dict[str, Any]:
197 | """Get indicators"""
198 | params = {
199 | 'fields': fields,
200 | 'paging': str(paging).lower(),
201 | }
202 |
203 | if filter:
204 | for key, value in filter.items():
205 | params['filter'] = f'{key}:eq:{value}'
206 |
207 | params.update(kwargs)
208 |
209 | return await self.client.get('/api/indicators', params=params)
210 |
211 | async def get_organisation_units(
212 | self,
213 | fields: str = "id,name,code,level,path",
214 | filter: Optional[Dict[str, str]] = None,
215 | paging: bool = False,
216 | **kwargs
217 | ) -> Dict[str, Any]:
218 | """Get organisation units"""
219 | params = {
220 | 'fields': fields,
221 | 'paging': str(paging).lower(),
222 | }
223 |
224 | if filter:
225 | for key, value in filter.items():
226 | params['filter'] = f'{key}:eq:{value}'
227 |
228 | params.update(kwargs)
229 |
230 | return await self.client.get('/api/organisationUnits', params=params)
231 |
232 | async def get_option_sets(
233 | self,
234 | fields: str = "id,name,code,options[id,name,code]",
235 | filter: Optional[Dict[str, str]] = None,
236 | paging: bool = False,
237 | **kwargs
238 | ) -> Dict[str, Any]:
239 | """Get option sets"""
240 | params = {
241 | 'fields': fields,
242 | 'paging': str(paging).lower(),
243 | }
244 |
245 | if filter:
246 | for key, value in filter.items():
247 | params['filter'] = f'{key}:eq:{value}'
248 |
249 | params.update(kwargs)
250 |
251 | return await self.client.get('/api/optionSets', params=params)
252 |
253 | async def validate_metadata(
254 | self,
255 | metadata: Union[Dict[str, Any], str]
256 | ) -> Dict[str, Any]:
257 | """Validate metadata (dry run import)"""
258 | return await self.import_(metadata, dry_run=True)
259 |
260 | async def export_to_file(
261 | self,
262 | file_path: str,
263 | format: ExportFormat = ExportFormat.JSON,
264 | **export_kwargs
265 | ) -> str:
266 | """Export metadata to file"""
267 | metadata = await self.export(**export_kwargs)
268 |
269 | if format == ExportFormat.JSON:
270 | with open(file_path, 'w', encoding='utf-8') as f:
271 | json.dump(metadata, f, indent=2, ensure_ascii=False)
272 | else:
273 | raise ValueError(f"Metadata export only supports JSON format, got: {format}")
274 |
275 | return file_path
276 |
277 | async def import_from_file(
278 | self,
279 | file_path: str,
280 | **import_kwargs
281 | ) -> MetadataImportSummary:
282 | """Import metadata from file"""
283 | with open(file_path, encoding='utf-8') as f:
284 | metadata = json.load(f)
285 |
286 | return await self.import_(metadata, **import_kwargs)
287 |
--------------------------------------------------------------------------------