├── reproduce.sh ├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── test_config.py │ └── test_cli.py ├── integration │ └── __init__.py └── conftest.py ├── Anonymous_ Reproduction_ Package └── README_REVIEWERS.md ├── image.png ├── pydhis2 ├── cli │ └── __init__.py ├── observe │ ├── __init__.py │ └── logging.py ├── __main__.py ├── templates │ ├── {{cookiecutter.project_slug}} │ │ ├── env.example │ │ ├── requirements.txt │ │ ├── configs │ │ │ ├── dhis2.yml │ │ │ └── dqr.yml │ │ ├── pipelines │ │ │ └── example.yml │ │ ├── Makefile.cmd │ │ ├── README.md │ │ └── scripts │ │ │ └── run_pipeline.py │ └── cookiecutter.json ├── dqr │ ├── __init__.py │ └── config.yml ├── endpoints │ ├── __init__.py │ ├── analytics.py │ └── metadata.py ├── pipeline │ ├── __init__.py │ ├── executor.py │ └── config.py ├── io │ ├── __init__.py │ └── arrow.py ├── core │ ├── __init__.py │ ├── auth.py │ ├── types.py │ └── errors.py ├── testing │ ├── __init__.py │ ├── demo_test.py │ ├── data_generator.py │ ├── mock_server.py │ └── network_simulator.py └── __init__.py ├── docs ├── changelog.md ├── contributing.md ├── requirements.txt ├── api │ ├── client.rst │ ├── io.rst │ ├── types.rst │ └── endpoints.rst ├── installation.rst ├── quickstart.rst ├── metadata.rst ├── cli.rst ├── configuration.rst ├── conf.py ├── tracker.rst ├── datavaluesets.rst ├── analytics.rst ├── dqr.rst └── index.rst ├── pytest.ini ├── .readthedocs.yml ├── .github ├── ISSUE_TEMPLATE │ ├── documentation.md │ ├── feature_request.md │ └── bug_report.md ├── workflows │ ├── docs.yml │ └── ci.yml └── PULL_REQUEST_TEMPLATE.md ├── CITATION.cff ├── dhis2_probe_summary.json ├── experiment └── Methods.md ├── CONTRIBUTING.md ├── CHANGELOG.md ├── .gitignore ├── pyproject.toml └── CODE_OF_CONDUCT.md /reproduce.sh: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests package 2 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | -------------------------------------------------------------------------------- /Anonymous_ Reproduction_ Package/README_REVIEWERS.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | """Integration tests package""" 2 | -------------------------------------------------------------------------------- /image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HzaCode/pyDHIS2/HEAD/image.png -------------------------------------------------------------------------------- /pydhis2/cli/__init__.py: -------------------------------------------------------------------------------- 1 | """CLI module - Command line tools""" 2 | 3 | from pydhis2.cli.main import app 4 | 5 | __all__ = ["app"] 6 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | See [CHANGELOG.md](https://github.com/HzaCode/pyDHIS2/blob/main/CHANGELOG.md) in the repository for version history. 4 | 5 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We welcome contributions! Please see [CONTRIBUTING.md](https://github.com/HzaCode/pyDHIS2/blob/main/CONTRIBUTING.md) in the repository. 4 | 5 | -------------------------------------------------------------------------------- /pydhis2/observe/__init__.py: -------------------------------------------------------------------------------- 1 | """Observability module - Logging and metrics""" 2 | 3 | from pydhis2.observe.logging import get_logger, setup_logging 4 | 5 | __all__ = [ 6 | "setup_logging", 7 | "get_logger", 8 | ] 9 | -------------------------------------------------------------------------------- /pydhis2/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main entry point for pydhis2 CLI 3 | Allows running: python -m pydhis2 [command] 4 | """ 5 | 6 | from pydhis2.cli.main import app 7 | 8 | 9 | def main(): 10 | """Main entry point for CLI""" 11 | app() 12 | 13 | 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # Documentation build requirements 2 | sphinx>=7.2.0,<8.0.0 3 | sphinx-rtd-theme>=2.0.0,<3.0.0 4 | sphinx-autodoc-typehints>=1.25.0,<2.0.0 5 | myst-parser>=2.0.0,<3.0.0 6 | sphinx-copybutton>=0.5.2,<1.0.0 7 | sphinxcontrib-napoleon>=0.7,<1.0.0 8 | linkify-it-py>=2.0.0,<3.0.0 9 | 10 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/env.example: -------------------------------------------------------------------------------- 1 | # DHIS2 Connection Configuration 2 | DHIS2_URL={{ cookiecutter.dhis2_url }} 3 | DHIS2_USERNAME=your_username 4 | DHIS2_PASSWORD=your_password 5 | 6 | # Optional: Rate Limiting Configuration 7 | DHIS2_RPS=8 8 | DHIS2_CONCURRENCY=8 9 | 10 | # Optional: Retry Configuration 11 | DHIS2_MAX_RETRIES=5 12 | -------------------------------------------------------------------------------- /pydhis2/dqr/__init__.py: -------------------------------------------------------------------------------- 1 | """Data Quality Review (DQR) module - WHO-DQR metrics implementation""" 2 | 3 | from pydhis2.dqr.metrics import ( 4 | CompletenessMetrics, 5 | ConsistencyMetrics, 6 | MetricResult, 7 | TimelinessMetrics, 8 | ) 9 | 10 | __all__ = [ 11 | "CompletenessMetrics", 12 | "ConsistencyMetrics", 13 | "TimelinessMetrics", 14 | "MetricResult", 15 | ] 16 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/requirements.txt: -------------------------------------------------------------------------------- 1 | # Base requirements for the project 2 | pydhis2>=0.2.0 3 | pandas 4 | pyarrow 5 | openpyxl 6 | # Add other dependencies here 7 | 8 | {% if cookiecutter.use_notebooks == "yes" -%} 9 | # Jupyter notebooks 10 | jupyter>=1.0.0 11 | jupyterlab>=3.0.0 12 | matplotlib>=3.5.0 13 | seaborn>=0.11.0 14 | plotly>=5.0.0 15 | {%- endif %} 16 | 17 | # Configuration management 18 | python-dotenv>=0.19.0 19 | -------------------------------------------------------------------------------- /docs/api/client.rst: -------------------------------------------------------------------------------- 1 | Client 2 | ====== 3 | 4 | AsyncDHIS2Client 5 | ---------------- 6 | 7 | .. autoclass:: pydhis2.core.client.AsyncDHIS2Client 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | SyncDHIS2Client 13 | --------------- 14 | 15 | .. autoclass:: pydhis2.core.client.SyncDHIS2Client 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | Helper Functions 21 | ---------------- 22 | 23 | .. autofunction:: pydhis2.get_client 24 | 25 | -------------------------------------------------------------------------------- /pydhis2/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | """Endpoints module - Wrappers for various DHIS2 API endpoints""" 2 | 3 | from pydhis2.endpoints.analytics import AnalyticsEndpoint 4 | from pydhis2.endpoints.datavaluesets import DataValueSetsEndpoint 5 | from pydhis2.endpoints.metadata import MetadataEndpoint 6 | from pydhis2.endpoints.tracker import TrackerEndpoint 7 | 8 | __all__ = [ 9 | "AnalyticsEndpoint", 10 | "DataValueSetsEndpoint", 11 | "TrackerEndpoint", 12 | "MetadataEndpoint", 13 | ] 14 | -------------------------------------------------------------------------------- /pydhis2/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | """Pipeline configuration and execution module""" 2 | 3 | from .config import PipelineConfig, StepConfig 4 | from .executor import PipelineExecutor 5 | from .steps import AnalyticsStep, DataValueSetsStep, DQRStep, StepRegistry, TrackerStep 6 | 7 | __all__ = [ 8 | 'PipelineConfig', 9 | 'StepConfig', 10 | 'PipelineExecutor', 11 | 'AnalyticsStep', 12 | 'TrackerStep', 13 | 'DataValueSetsStep', 14 | 'DQRStep', 15 | 'StepRegistry' 16 | ] 17 | -------------------------------------------------------------------------------- /pydhis2/io/__init__.py: -------------------------------------------------------------------------------- 1 | """I/O module - Data format conversion and serialization""" 2 | 3 | from pydhis2.io.arrow import ArrowConverter 4 | from pydhis2.io.schema import SchemaManager 5 | from pydhis2.io.to_pandas import ( 6 | AnalyticsDataFrameConverter, 7 | DataValueSetsConverter, 8 | TrackerConverter, 9 | ) 10 | 11 | __all__ = [ 12 | "AnalyticsDataFrameConverter", 13 | "DataValueSetsConverter", 14 | "TrackerConverter", 15 | "ArrowConverter", 16 | "SchemaManager", 17 | ] 18 | -------------------------------------------------------------------------------- /docs/api/io.rst: -------------------------------------------------------------------------------- 1 | I/O Utilities 2 | ============= 3 | 4 | Pandas Integration 5 | ------------------ 6 | 7 | .. automodule:: pydhis2.io.to_pandas 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | Arrow Integration 13 | ----------------- 14 | 15 | .. automodule:: pydhis2.io.arrow 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | Schema Utilities 21 | ---------------- 22 | 23 | .. automodule:: pydhis2.io.schema 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -v --tb=short --strict-markers 3 | asyncio_mode = auto 4 | asyncio_default_fixture_loop_scope = function 5 | testpaths = tests 6 | python_files = test_*.py 7 | python_classes = Test* 8 | python_functions = test_* 9 | markers = 10 | integration: marks tests as integration tests (deselect with '-m "not integration"') 11 | slow: marks tests as slow (deselect with '-m "not slow"') 12 | unit: marks tests as unit tests 13 | filterwarnings = 14 | ignore::DeprecationWarning 15 | ignore::FutureWarning 16 | -------------------------------------------------------------------------------- /docs/api/types.rst: -------------------------------------------------------------------------------- 1 | Types 2 | ===== 3 | 4 | Configuration Types 5 | ------------------- 6 | 7 | DHIS2Config 8 | ~~~~~~~~~~~ 9 | 10 | .. autoclass:: pydhis2.core.types.DHIS2Config 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Query Types 16 | ----------- 17 | 18 | AnalyticsQuery 19 | ~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: pydhis2.core.types.AnalyticsQuery 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | Error Types 27 | ----------- 28 | 29 | .. automodule:: pydhis2.core.errors 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | -------------------------------------------------------------------------------- /pydhis2/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core module - HTTP client, rate limiting, retry, authentication, and other infrastructure""" 2 | 3 | # Export only base types and errors to avoid circular dependencies 4 | from pydhis2.core.errors import ( 5 | DHIS2Error, 6 | DHIS2HTTPError, 7 | ImportConflictError, 8 | RateLimitExceeded, 9 | RetryExhausted, 10 | ) 11 | from pydhis2.core.types import DHIS2Config 12 | 13 | __all__ = [ 14 | "DHIS2Config", 15 | "DHIS2Error", 16 | "DHIS2HTTPError", 17 | "RateLimitExceeded", 18 | "RetryExhausted", 19 | "ImportConflictError", 20 | ] 21 | -------------------------------------------------------------------------------- /pydhis2/templates/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "project_name": "My DHIS2 Analysis Project", 3 | "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}", 4 | "project_description": "A data analysis project using pydhis2", 5 | "author_name": "Your Name", 6 | "author_email": "your.email@example.com", 7 | "dhis2_url": "https://play.dhis2.org/2.41", 8 | "python_version": "3.9", 9 | "use_notebooks": "yes", 10 | "use_dqr": "yes", 11 | "use_pipeline": "yes", 12 | "license": ["Apache-2.0", "MIT", "BSD-3-Clause"], 13 | "_extensions": ["jinja2_time.TimeExtension"] 14 | } 15 | -------------------------------------------------------------------------------- /pydhis2/testing/__init__.py: -------------------------------------------------------------------------------- 1 | """Testing utilities module - Mock servers, data generators, and test helpers""" 2 | 3 | from pydhis2.testing.benchmark_utils import BenchmarkRunner, PerformanceProfiler 4 | from pydhis2.testing.data_generator import TestDataGenerator 5 | from pydhis2.testing.mock_server import MockDHIS2Server 6 | from pydhis2.testing.network_simulator import ( 7 | BenchmarkDataGenerator, 8 | NetworkCondition, 9 | NetworkSimulator, 10 | ) 11 | 12 | __all__ = [ 13 | "MockDHIS2Server", 14 | "TestDataGenerator", 15 | "BenchmarkDataGenerator", 16 | "NetworkSimulator", 17 | "NetworkCondition", 18 | "BenchmarkRunner", 19 | "PerformanceProfiler", 20 | ] 21 | -------------------------------------------------------------------------------- /docs/api/endpoints.rst: -------------------------------------------------------------------------------- 1 | Endpoints 2 | ========= 3 | 4 | Analytics Endpoint 5 | ------------------ 6 | 7 | .. autoclass:: pydhis2.endpoints.analytics.AnalyticsEndpoint 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | DataValueSets Endpoint 13 | ---------------------- 14 | 15 | .. autoclass:: pydhis2.endpoints.datavaluesets.DataValueSetsEndpoint 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | Tracker Endpoint 21 | ---------------- 22 | 23 | .. autoclass:: pydhis2.endpoints.tracker.TrackerEndpoint 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | Metadata Endpoint 29 | ----------------- 30 | 31 | .. autoclass:: pydhis2.endpoints.metadata.MetadataEndpoint 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | # Build documentation with Sphinx 7 | sphinx: 8 | configuration: docs/conf.py 9 | fail_on_warning: false 10 | 11 | # Build formats 12 | formats: 13 | - pdf 14 | - epub 15 | 16 | # Python environment 17 | build: 18 | os: ubuntu-22.04 19 | tools: 20 | python: "3.11" 21 | jobs: 22 | post_checkout: 23 | # Cancel building pull requests when new commits are pushed 24 | - | 25 | if [ "$READTHEDOCS_VERSION_TYPE" = "external" ] && git show-ref --verify --quiet "refs/remotes/origin/$READTHEDOCS_VERSION"; then 26 | # Rebase on the target branch if possible 27 | git fetch origin $READTHEDOCS_GIT_IDENTIFIER 28 | fi 29 | 30 | # Python requirements 31 | python: 32 | install: 33 | - requirements: docs/requirements.txt 34 | - method: pip 35 | path: . 36 | extra_requirements: 37 | - dev 38 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation Issue 3 | about: Report missing, incorrect, or unclear documentation 4 | title: '[DOCS] ' 5 | labels: documentation 6 | assignees: '' 7 | --- 8 | 9 | ## Documentation Issue Type 10 | - [ ] Missing documentation 11 | - [ ] Incorrect documentation 12 | - [ ] Unclear/confusing documentation 13 | - [ ] Typo or formatting issue 14 | - [ ] Example code doesn't work 15 | 16 | ## Location 17 | Where is the documentation issue? 18 | - Page/Section: [e.g. README.md, API reference for analytics] 19 | - URL (if online): [e.g. https://hzacode.github.io/pydhis2/...] 20 | 21 | ## Description 22 | A clear description of the documentation issue. 23 | 24 | ## Current Documentation 25 | Quote or screenshot the current documentation (if applicable): 26 | ``` 27 | Current text... 28 | ``` 29 | 30 | ## Suggested Improvement 31 | What should the documentation say instead? 32 | ``` 33 | Suggested text... 34 | ``` 35 | 36 | ## Additional context 37 | Add any other context, examples, or suggestions here. 38 | 39 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Requirements 5 | ------------ 6 | 7 | * Python ≥ 3.9 8 | * pip or conda 9 | 10 | From PyPI 11 | --------- 12 | 13 | The easiest way to install pydhis2 is from PyPI: 14 | 15 | .. code-block:: bash 16 | 17 | pip install pydhis2 18 | 19 | From Source 20 | ----------- 21 | 22 | To install from source (for development or latest features): 23 | 24 | .. code-block:: bash 25 | 26 | git clone https://github.com/HzaCode/pyDHIS2.git 27 | cd pyDHIS2 28 | pip install -e . 29 | 30 | Development Installation 31 | ------------------------ 32 | 33 | For development with testing and documentation tools: 34 | 35 | .. code-block:: bash 36 | 37 | git clone https://github.com/HzaCode/pyDHIS2.git 38 | cd pyDHIS2 39 | pip install -e ".[dev]" 40 | 41 | Verify Installation 42 | ------------------- 43 | 44 | Check that pydhis2 is installed correctly: 45 | 46 | .. code-block:: bash 47 | 48 | pydhis2 version 49 | 50 | Run the quick demo to test connectivity: 51 | 52 | .. code-block:: bash 53 | 54 | pydhis2 demo quick 55 | 56 | -------------------------------------------------------------------------------- /pydhis2/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | pydhis2 - Reproducible DHIS2 Python SDK for LMIC scenarios 3 | 4 | An async-first DHIS2 Python SDK with built-in rate limiting and retry mechanisms, featuring: 5 | - One-click conversion to Pandas/Arrow formats 6 | - Built-in WHO-DQR data quality metrics 7 | - CLI + Cookiecutter template support 8 | - Optimized for weak network environments 9 | """ 10 | 11 | # Core types can be imported directly 12 | from pydhis2.core.errors import ( 13 | DHIS2Error, 14 | DHIS2HTTPError, 15 | ImportConflictError, 16 | RateLimitExceeded, 17 | RetryExhausted, 18 | ) 19 | from pydhis2.core.types import DHIS2Config 20 | 21 | 22 | # Lazy import to avoid circular dependencies 23 | def get_client(): 24 | from pydhis2.core.client import AsyncDHIS2Client, SyncDHIS2Client 25 | return AsyncDHIS2Client, SyncDHIS2Client 26 | 27 | __version__ = "0.2.0" 28 | __author__ = "pydhis2 contributors" 29 | 30 | __all__ = [ 31 | "get_client", 32 | "DHIS2Config", 33 | "DHIS2Error", 34 | "DHIS2HTTPError", 35 | "RateLimitExceeded", 36 | "RetryExhausted", 37 | "ImportConflictError", 38 | ] 39 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/configs/dhis2.yml: -------------------------------------------------------------------------------- 1 | # DHIS2 client configuration 2 | 3 | # Connection settings 4 | connection: 5 | base_url: "{{ cookiecutter.dhis2_url }}" 6 | rps: 8 # Requests per second 7 | concurrency: 8 # Concurrent connections 8 | timeouts: [10, 60, 120] # Connect/read/total timeout (seconds) 9 | compression: true # Enable gzip compression 10 | 11 | # Retry configuration 12 | retry: 13 | max_attempts: 5 # Maximum retry attempts 14 | base_delay: 0.5 # Base delay (seconds) 15 | max_delay: 60.0 # Maximum delay (seconds) 16 | backoff_factor: 2.0 # Backoff factor 17 | jitter: true # Enable jitter 18 | retry_on_status: # HTTP status codes to retry on 19 | - 429 20 | - 500 21 | - 502 22 | - 503 23 | - 504 24 | 25 | # Cache configuration 26 | cache: 27 | enable: true # Enable caching 28 | ttl: 3600 # Cache TTL (seconds) 29 | directory: ".cache" # Cache directory 30 | 31 | # Authentication configuration (environment variables are preferred) 32 | auth: 33 | method: "basic" # Auth method: basic/token/pat 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest an idea for this project 4 | title: '[FEATURE] ' 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | ## Is your feature request related to a problem? Please describe. 10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 11 | 12 | ## Describe the solution you'd like 13 | A clear and concise description of what you want to happen. 14 | 15 | ## Describe alternatives you've considered 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | ## Use case 19 | Describe the use case and how this feature would benefit users: 20 | - Who would use this feature? 21 | - What problem does it solve? 22 | - How often would it be used? 23 | 24 | ## Proposed API (if applicable) 25 | ```python 26 | # Example of how the feature might be used 27 | from pydhis2 import get_client, DHIS2Config 28 | 29 | # Your proposed API usage 30 | ``` 31 | 32 | ## Additional context 33 | Add any other context, screenshots, or examples about the feature request here. 34 | 35 | ## Would you be willing to contribute this feature? 36 | - [ ] Yes, I'd like to work on this 37 | - [ ] I can help test it 38 | - [ ] I need help implementing this 39 | 40 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | type: software 4 | title: "pydhis2: A Modern Python SDK for DHIS2" 5 | version: 0.2.0 6 | date-released: "2025-01-01" 7 | abstract: "A next-generation Python library for interacting with DHIS2, the world's largest health information management system. It provides a clean, modern, and efficient API for data extraction, analysis, and management, with a strong emphasis on creating reproducible workflows for scientific research and public health analysis in Low and Middle-Income Country (LMIC) contexts." 8 | 9 | authors: 10 | - family-names: "pydhis2 contributors" 11 | affiliation: "pydhis2 Project" 12 | 13 | repository-code: "https://github.com/HzaCode/pyDHIS2" 14 | url: "https://github.com/HzaCode/pyDHIS2" 15 | license: Apache-2.0 16 | 17 | keywords: 18 | - DHIS2 19 | - health information systems 20 | - public health 21 | - data analysis 22 | - reproducible research 23 | - LMIC 24 | - Python SDK 25 | - epidemiology 26 | - health data 27 | 28 | preferred-citation: 29 | type: software 30 | title: "pydhis2: A Modern Python SDK for DHIS2" 31 | authors: 32 | - family-names: "pydhis2 contributors" 33 | affiliation: "pydhis2 Project" 34 | version: 0.2.0 35 | year: 2025 36 | url: "https://github.com/HzaCode/pyDHIS2" 37 | license: Apache-2.0 38 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Create a report to help us improve 4 | title: '[BUG] ' 5 | labels: bug 6 | assignees: '' 7 | --- 8 | 9 | ## Describe the bug 10 | A clear and concise description of what the bug is. 11 | 12 | ## To Reproduce 13 | Steps to reproduce the behavior: 14 | 1. Import '...' 15 | 2. Call function '....' 16 | 3. Pass parameters '....' 17 | 4. See error 18 | 19 | ## Expected behavior 20 | A clear and concise description of what you expected to happen. 21 | 22 | ## Code example 23 | ```python 24 | # Paste your minimal reproducible code example here 25 | from pydhis2 import get_client, DHIS2Config 26 | 27 | # Your code that produces the error 28 | ``` 29 | 30 | ## Error message 31 | ``` 32 | Paste the complete error message/traceback here 33 | ``` 34 | 35 | ## Environment 36 | - **OS**: [e.g. Windows 10, Ubuntu 22.04, macOS 13] 37 | - **Python version**: [e.g. 3.9, 3.10, 3.11] 38 | - **pydhis2 version**: [e.g. 0.2.0] 39 | - **DHIS2 version**: [e.g. 2.38, 2.39, 2.40] 40 | - **Installation method**: [e.g. pip, conda, from source] 41 | 42 | ## Additional context 43 | Add any other context about the problem here, such as: 44 | - Network conditions (if relevant) 45 | - Data volume 46 | - Specific DHIS2 configuration 47 | - Related issues or PRs 48 | 49 | ## Possible solution 50 | If you have ideas on how to fix this, please share them here. 51 | 52 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | workflow_dispatch: 9 | 10 | permissions: 11 | contents: write 12 | 13 | jobs: 14 | build-and-deploy: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: 📥 Checkout main branch 19 | uses: actions/checkout@v4 20 | with: 21 | ref: main 22 | 23 | - name: 🐍 Set up Python 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: '3.11' 27 | 28 | - name: 📦 Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install -r docs/requirements.txt 32 | pip install -e . 33 | 34 | - name: 📚 Build documentation with Sphinx 35 | run: | 36 | cd docs 37 | sphinx-build -b html . _build/html 38 | cd .. 39 | 40 | - name: 🚀 Deploy to gh-pages branch 41 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 42 | uses: peaceiris/actions-gh-pages@v3 43 | with: 44 | github_token: ${{ secrets.GITHUB_TOKEN }} 45 | publish_dir: ./docs/_build/html 46 | publish_branch: gh-pages 47 | force_orphan: true 48 | user_name: 'github-actions[bot]' 49 | user_email: 'github-actions[bot]@users.noreply.github.com' 50 | commit_message: 'Deploy Sphinx docs from main branch' 51 | 52 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/configs/dqr.yml: -------------------------------------------------------------------------------- 1 | # Data Quality Review configuration 2 | 3 | # Completeness metrics configuration 4 | completeness: 5 | thresholds: 6 | reporting_completeness_pass: 0.90 # Reporting completeness pass threshold 7 | reporting_completeness_warn: 0.70 # Reporting completeness warning threshold 8 | data_element_completeness_pass: 0.90 # Data element completeness pass threshold 9 | data_element_completeness_warn: 0.70 # Data element completeness warning threshold 10 | 11 | # Consistency metrics configuration 12 | consistency: 13 | thresholds: 14 | outlier_threshold: 3.0 # Outlier Z-score threshold 15 | variance_threshold: 0.5 # Coefficient of variation threshold 16 | trend_consistency_pass: 0.80 # Trend consistency pass threshold 17 | trend_consistency_warn: 0.60 # Trend consistency warning threshold 18 | 19 | # Timeliness metrics configuration 20 | timeliness: 21 | thresholds: 22 | submission_timeliness_pass: 0.80 # Submission timeliness pass threshold 23 | submission_timeliness_warn: 0.60 # Submission timeliness warning threshold 24 | max_delay_days: 30 # Maximum acceptable delay in days 25 | 26 | # Report configuration 27 | report: 28 | title: "{{ cookiecutter.project_name }} - Data Quality Review Report" 29 | include_charts: true # Include charts 30 | include_details: true # Include details 31 | language: "en-US" # Report language 32 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main, gh ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | os: [ubuntu-latest, windows-latest, macos-latest] 16 | python-version: ['3.9', '3.10', '3.11'] 17 | 18 | steps: 19 | - name: 📥 Checkout code 20 | uses: actions/checkout@v4 21 | 22 | - name: 🐍 Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: 📦 Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install "pytest-asyncio>=0.23.0" 31 | pip install -e ".[dev]" 32 | 33 | - name: 🧪 Run tests 34 | run: | 35 | pytest tests/unit/ tests/integration/ -v --tb=short -x 36 | 37 | - name: ✅ Summary 38 | run: | 39 | echo "✅ Tests completed for Python ${{ matrix.python-version }} on ${{ matrix.os }}" 40 | 41 | lint: 42 | runs-on: ubuntu-latest 43 | 44 | steps: 45 | - name: 📥 Checkout code 46 | uses: actions/checkout@v4 47 | 48 | - name: 🐍 Set up Python 49 | uses: actions/setup-python@v4 50 | with: 51 | python-version: '3.11' 52 | 53 | - name: 📦 Install ruff 54 | run: | 55 | pip install ruff 56 | 57 | - name: 🔍 Run ruff 58 | run: | 59 | ruff check pydhis2/ || echo "⚠️ Linting found issues (non-blocking)" 60 | echo "✅ Linting completed" 61 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/pipelines/example.yml: -------------------------------------------------------------------------------- 1 | # Example data analysis pipeline 2 | 3 | name: "{{ cookiecutter.project_name }} - Example Pipeline" 4 | description: "A complete pipeline for data pulling, quality assessment, and report generation" 5 | 6 | # Global configuration 7 | rps: 8 # Requests per second 8 | concurrency: 8 # Concurrency level 9 | 10 | # Pipeline steps 11 | steps: 12 | # Step 1: Pull Analytics data 13 | - type: analytics_pull 14 | name: "Pull Immunization Analytics Data" 15 | dx: "immunization.indicators" # Please replace with actual indicator IDs 16 | ou: "LEVEL-3" # Please replace with actual organization units 17 | pe: "2023Q1:2023Q4" # Time range 18 | output: "analytics.parquet" 19 | format: "parquet" 20 | 21 | # Step 2: Pull Tracker events (optional) 22 | # - type: tracker_pull 23 | # name: "Pull Immunization Tracker Events" 24 | # program: "program_id" # Please replace with actual program ID 25 | # status: "COMPLETED" 26 | # since: "2023-01-01" 27 | # output: "tracker_events.parquet" 28 | # format: "parquet" 29 | 30 | # Step 3: Data Quality Review 31 | - type: dqr 32 | name: "Analytics Data Quality Review" 33 | input: "analytics.parquet" 34 | html_output: "analytics_dqr_report.html" 35 | json_output: "analytics_dqr_summary.json" 36 | config: 37 | completeness: 38 | thresholds: 39 | reporting_completeness_pass: 0.85 40 | data_element_completeness_pass: 0.80 41 | consistency: 42 | thresholds: 43 | outlier_threshold: 2.5 44 | timeliness: 45 | thresholds: 46 | submission_timeliness_pass: 0.75 47 | 48 | # Pipeline metadata 49 | metadata: 50 | author: "{{ cookiecutter.author_name }}" 51 | version: "1.0.0" 52 | created: "{% now 'utc', '%Y-%m-%d' %}" 53 | tags: 54 | - "analytics" 55 | - "dqr" 56 | - "immunization" 57 | -------------------------------------------------------------------------------- /dhis2_probe_summary.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "server": "EMIS Demo", 4 | "base": "https://emis.dhis2.org/demo", 5 | "ok": true, 6 | "version": "2.40.4.1", 7 | "systemId": "ca37253e-3727-4f73-838c-475ea3c7a432", 8 | "contextPath": "https://emis.dhis2.org/demo", 9 | "results": { 10 | "dataElements": { 11 | "ok": true, 12 | "observed_count": 530, 13 | "pager_total": 530, 14 | "duplicates": 0, 15 | "missing_pages": [], 16 | "checksum": "dd0f27ecbe35f9f0f19be4041e48dd03b76ddae6" 17 | }, 18 | "organisationUnits": { 19 | "ok": true, 20 | "observed_count": 3596, 21 | "pager_total": 3596, 22 | "duplicates": 0, 23 | "missing_pages": [], 24 | "checksum": "86792351124542ebaa5e85f60ea51e065bc4e5e1" 25 | } 26 | } 27 | }, 28 | { 29 | "server": "Play (android-current)", 30 | "base": "https://play.dhis2.org/android-current", 31 | "ok": false, 32 | "stage": "system/info", 33 | "http_status": 401, 34 | "sample": "{\"httpStatus\":\"Unauthorized\",\"httpStatusCode\":401,\"status\":\"ERROR\",\"message\":\"Unauthorized\"}" 35 | }, 36 | { 37 | "server": "HMIS v41 Demo", 38 | "base": "https://demos.dhis2.org/hmis_v41", 39 | "ok": false, 40 | "stage": "system/info", 41 | "http_status": 401, 42 | "sample": "{\"httpStatus\":\"Unauthorized\",\"httpStatusCode\":401,\"status\":\"ERROR\",\"message\":\"Account disabled\"}" 43 | }, 44 | { 45 | "server": "Data Quality Demo", 46 | "base": "https://demos.dhis2.org/dq", 47 | "ok": true, 48 | "version": "2.38.4.3", 49 | "systemId": null, 50 | "contextPath": "https://demos.dhis2.org/dq", 51 | "results": { 52 | "dataElements": { 53 | "ok": true, 54 | "observed_count": 301, 55 | "pager_total": 301, 56 | "duplicates": 0, 57 | "missing_pages": [], 58 | "checksum": "c7f6444929ac9596565fcba8f67bad5a551877ae" 59 | }, 60 | "organisationUnits": { 61 | "ok": true, 62 | "observed_count": 654, 63 | "pager_total": 654, 64 | "duplicates": 0, 65 | "missing_pages": [], 66 | "checksum": "3431b2bdcb9fba431c5a727ce08b505c1e3edbf3" 67 | } 68 | } 69 | } 70 | ] -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/Makefile.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM {{ cookiecutter.project_name }} - Windows Batch Commands 3 | 4 | if "%1"=="help" goto help 5 | if "%1"=="setup" goto setup 6 | if "%1"=="run-pipeline" goto run_pipeline 7 | if "%1"=="dqr" goto dqr 8 | if "%1"=="clean" goto clean 9 | if "%1"=="" goto help 10 | 11 | :help 12 | echo. 13 | echo {{ cookiecutter.project_name }} - Available Commands: 14 | echo. 15 | echo make setup - Install dependencies and set up the environment 16 | echo make run-pipeline - Run the example data analysis pipeline 17 | echo make dqr - Run data quality review 18 | echo make clean - Clean up temporary files 19 | echo make help - Display this help message 20 | echo. 21 | goto end 22 | 23 | :setup 24 | echo Setting up project environment... 25 | if not exist venv ( 26 | echo Creating virtual environment... 27 | py -m venv venv 28 | ) 29 | echo Activating virtual environment and installing dependencies... 30 | call venv\Scripts\activate.bat && pip install -r requirements.txt 31 | echo Copying environment configuration file... 32 | if not exist .env ( 33 | copy env.example .env 34 | echo Please edit the .env file with your DHIS2 connection details 35 | ) 36 | echo ✅ Environment setup complete! 37 | goto end 38 | 39 | :run_pipeline 40 | echo Running data analysis pipeline... 41 | if not exist venv ( 42 | echo ❌ Please run 'make setup' first 43 | goto end 44 | ) 45 | call venv\Scripts\activate.bat && py scripts/run_pipeline.py 46 | goto end 47 | 48 | :dqr 49 | echo Running data quality review... 50 | if not exist data\analytics_data.parquet ( 51 | echo ❌ Data file not found. Please run 'make run-pipeline' first 52 | goto end 53 | ) 54 | call venv\Scripts\activate.bat && pydhis2 dqr run --input data\analytics_data.parquet --html reports\dqr_report.html --json reports\dqr_summary.json 55 | echo ✅ DQR report generated in the 'reports\' directory 56 | goto end 57 | 58 | :clean 59 | echo Cleaning up temporary files... 60 | if exist .pydhis2_cache rmdir /s /q .pydhis2_cache 61 | if exist __pycache__ rmdir /s /q __pycache__ 62 | if exist .pytest_cache rmdir /s /q .pytest_cache 63 | echo ✅ Cleanup complete 64 | goto end 65 | 66 | :end 67 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Pull Request 2 | 3 | ## Description 4 | 5 | 6 | ## Type of Change 7 | 8 | - [ ] Bug fix (non-breaking change which fixes an issue) 9 | - [ ] New feature (non-breaking change which adds functionality) 10 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 11 | - [ ] Documentation update 12 | - [ ] Code refactoring 13 | - [ ] Performance improvement 14 | - [ ] Test addition or modification 15 | - [ ] CI/CD improvement 16 | 17 | ## Related Issues 18 | 19 | Fixes # 20 | Relates to # 21 | 22 | ## Changes Made 23 | 24 | - 25 | - 26 | - 27 | 28 | ## Testing 29 | 30 | - [ ] Existing tests pass (`pytest tests/`) 31 | - [ ] Added new tests for new functionality 32 | - [ ] Manually tested on: 33 | - [ ] Python 3.9 34 | - [ ] Python 3.10 35 | - [ ] Python 3.11 36 | - [ ] Windows 37 | - [ ] Linux 38 | - [ ] macOS 39 | 40 | ## Code Quality 41 | - [ ] Code follows the project's style guidelines (`ruff check` passes) 42 | - [ ] Code is properly formatted (`ruff format` applied) 43 | - [ ] Added docstrings for new functions/classes 44 | - [ ] Added type hints where applicable 45 | - [ ] Updated relevant documentation 46 | - [ ] Updated CHANGELOG.md (if applicable) 47 | 48 | ## Screenshots (if applicable) 49 | 50 | 51 | ## Breaking Changes 52 | 53 | 54 | ## Additional Notes 55 | 56 | 57 | ## Checklist 58 | - [ ] My code follows the style guidelines of this project 59 | - [ ] I have performed a self-review of my own code 60 | - [ ] I have commented my code, particularly in hard-to-understand areas 61 | - [ ] I have made corresponding changes to the documentation 62 | - [ ] My changes generate no new warnings 63 | - [ ] I have added tests that prove my fix is effective or that my feature works 64 | - [ ] New and existing unit tests pass locally with my changes 65 | - [ ] Any dependent changes have been merged and published 66 | 67 | -------------------------------------------------------------------------------- /tests/unit/test_config.py: -------------------------------------------------------------------------------- 1 | """Tests for configuration classes""" 2 | 3 | import pytest 4 | from pydantic import ValidationError 5 | 6 | from pydhis2.core.types import DHIS2Config, AuthMethod 7 | 8 | 9 | def test_dhis2_config_valid(): 10 | """Test valid configuration""" 11 | config = DHIS2Config( 12 | base_url="https://play.dhis2.org/2.41", 13 | auth=("user", "pass"), 14 | rps=10.0, 15 | concurrency=5, 16 | ) 17 | 18 | assert config.base_url == "https://play.dhis2.org/2.41" 19 | assert config.auth == ("user", "pass") 20 | assert config.auth_method == AuthMethod.BASIC 21 | assert config.rps == 10.0 22 | assert config.concurrency == 5 23 | 24 | 25 | def test_dhis2_config_url_validation(): 26 | """Test URL validation""" 27 | with pytest.raises(ValidationError): 28 | DHIS2Config( 29 | base_url="invalid-url", 30 | auth=("user", "pass") 31 | ) 32 | 33 | 34 | def test_dhis2_config_url_trailing_slash(): 35 | """Test handling of trailing slash in URL""" 36 | config = DHIS2Config( 37 | base_url="https://play.dhis2.org/2.41/", 38 | auth=("user", "pass") 39 | ) 40 | 41 | assert config.base_url == "https://play.dhis2.org/2.41" 42 | 43 | 44 | def test_dhis2_config_timeout_validation(): 45 | """Test timeout validation""" 46 | with pytest.raises(ValidationError): 47 | DHIS2Config( 48 | base_url="https://play.dhis2.org/2.41", 49 | auth=("user", "pass"), 50 | timeout=-1 # negative timeout 51 | ) 52 | 53 | 54 | def test_dhis2_config_auth_validation(): 55 | """Test authentication validation""" 56 | # Valid tuple 57 | config1 = DHIS2Config( 58 | base_url="https://play.dhis2.org/2.41", 59 | auth=("user", "pass") 60 | ) 61 | assert config1.auth == ("user", "pass") 62 | 63 | # Valid token 64 | config2 = DHIS2Config( 65 | base_url="https://play.dhis2.org/2.41", 66 | auth="token123" 67 | ) 68 | assert config2.auth == "token123" 69 | 70 | # Invalid tuple 71 | with pytest.raises(ValidationError): 72 | DHIS2Config( 73 | base_url="https://play.dhis2.org/2.41", 74 | auth=("user",) # Only one element 75 | ) 76 | -------------------------------------------------------------------------------- /experiment/Methods.md: -------------------------------------------------------------------------------- 1 | # Methods 2 | 3 | ## Methodology Overview 4 | 5 | This study adheres to the principles of reproducibility in computational science. To ensure the reliability and repeatability of the results, all tests were conducted in a fully documented environment with fixed parameters. 6 | 7 | ## Statistical Analysis Methods 8 | 9 | ### Confidence Interval Estimation 10 | 11 | - **Method**: Bootstrap (Efron, 1979). 12 | - **Samples**: 1,000 bootstrap resamples. 13 | - **Confidence Level**: 95% (two-sided). 14 | - **Correction**: Bias-corrected and accelerated (BCa) method. 15 | 16 | ### Effect Size Calculation 17 | 18 | - **Execution Time**: Cliff's delta (a non-parametric effect size). 19 | - **Data Integrity**: Cohen's d (a parametric effect size). 20 | - **Threshold Interpretation**: An effect is considered medium when |δ| > 0.33. 21 | 22 | ## Quality Control 23 | 24 | ### Reproducibility Assurance 25 | 26 | - **Random Seed**: Fixed to `20250904` (in accordance with the ISO 80000-2 standard). 27 | - **Environment Snapshot**: Complete version records of all dependencies. 28 | - **Configuration Backup**: All test parameters were serialized into JSON format for backup. 29 | 30 | ### Validation Mechanisms 31 | 32 | - **Baseline Consistency**: The accuracy of the baseline data was re-validated before each run. 33 | - **Result Verification**: Multi-dimensional validation was performed on the results returned by the clients. 34 | - **Exception Handling**: Comprehensive capture and logging of all potential errors. 35 | 36 | ## Data Management 37 | 38 | ### Output Files 39 | 40 | - `manifest.json`: Records the test environment and dependency versions. 41 | - `experiment_config.json`: Contains the complete experiment configuration. 42 | - `raw_experiment_results.csv`: Includes all raw experimental data. 43 | - `requests_trace.jsonl`: Logs detailed request traces. 44 | - `comprehensive_comparison_report.md`: The final comprehensive comparison report. 45 | 46 | ### Data Source 47 | 48 | - **DHIS2 Instance**: `https://play.im.dhis2.org/stable-2-42-1` 49 | - **API Version**: Auto-detected. 50 | - **Timestamps**: All time records use the UTC ISO format. 51 | 52 | ## Ethics and Transparency 53 | 54 | This research adheres to the principles of open science; all code, data, and methods are publicly available. The study does not involve human subjects and therefore requires no ethics review. The research data can be made available to other researchers upon reasonable request. 55 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to pydhis2 2 | 3 | First off, thank you for considering contributing to `pydhis2`. It's people like you that make `pydhis2` such a great tool. 4 | 5 | Following these guidelines helps to communicate that you respect the time of the developers managing and developing this open source project. In return, they should reciprocate that respect in addressing your issue or assessing patches and features. 6 | 7 | ## Code of Conduct 8 | 9 | This project and everyone participating in it is governed by the [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. 10 | 11 | ## How Can I Contribute? 12 | 13 | ### Reporting Bugs 14 | 15 | This is one of the simplest ways to contribute. If you find a bug, please ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/HzaCode/pyDHIS2/issues). 16 | 17 | If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring. 18 | 19 | ### Suggesting Enhancements 20 | 21 | If you have an idea for an enhancement, please open an issue to discuss it. This allows us to coordinate our efforts and prevent duplication of work. 22 | 23 | ### Your First Code Contribution 24 | 25 | Unsure where to begin contributing to `pydhis2`? You can start by looking through these `good-first-issue` and `help-wanted` issues: 26 | 27 | - [Good first issues](https://github.com/HzaCode/pyDHIS2/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two. 28 | - [Help wanted issues](https://github.com/HzaCode/pyDHIS2/labels/help%20wanted) - issues which should be a bit more involved than `good-first-issue` issues. 29 | 30 | ### Pull Requests 31 | 32 | 1. Fork the repo and create your branch from `main`. 33 | 2. If you've added code that should be tested, add tests. 34 | 3. If you've changed APIs, update the documentation. 35 | 4. Ensure the test suite passes. 36 | 5. Make sure your code lints. 37 | 6. Issue that pull request! 38 | 39 | ## Styleguides 40 | 41 | We use `ruff` to format our code. Please run `ruff format .` before committing your changes. 42 | 43 | We also use `ruff` for linting. Please run `ruff check .` to check for any linting errors. 44 | 45 | ## License 46 | 47 | By contributing, you agree that your contributions will be licensed under its Apache License 2.0. 48 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.2.0] - 2025-10-22 9 | 10 | ### Added 11 | - Modern async-first architecture with `AsyncDHIS2Client` and `SyncDHIS2Client` 12 | - Comprehensive analytics endpoint with DataFrame conversion (`.to_pandas()`) 13 | - DataValueSets endpoint with read/write capabilities 14 | - Tracker events endpoint with pagination and streaming 15 | - Metadata endpoint with import/export functionality 16 | - Built-in rate limiting with adaptive strategies 17 | - Robust retry mechanism with exponential backoff 18 | - HTTP caching with ETag and Last-Modified support 19 | - Data Quality Review (DQR) metrics based on WHO standards 20 | - Command-line interface (CLI) with typer 21 | - Project template system using Cookiecutter 22 | - OpenTelemetry instrumentation for observability 23 | - Comprehensive test suite (348 tests) 24 | - Multi-platform CI/CD (Ubuntu, Windows, macOS) 25 | - Support for Python 3.9, 3.10, 3.11 26 | 27 | ### Features 28 | - **Analytics**: Query, pagination, streaming, export to multiple formats 29 | - **DataValueSets**: Pull, push, chunking, conflict resolution 30 | - **Tracker**: Events and tracked entities with full CRUD operations 31 | - **Metadata**: Export, import, validation, schema inspection 32 | - **DQR**: Completeness, consistency, and timeliness metrics 33 | - **I/O**: Native Pandas, Arrow, and Parquet support 34 | - **Resilience**: Rate limiting, retries, caching, compression 35 | - **Developer Experience**: Type hints, clear error messages, extensive examples 36 | 37 | ### Documentation 38 | - Comprehensive README with quick start guide 39 | - Example scripts for common use cases 40 | - Contributing guidelines 41 | - Code of Conduct 42 | - API documentation in docstrings 43 | 44 | ### Infrastructure 45 | - GitHub Actions CI pipeline 46 | - Ruff for linting and formatting 47 | - pytest with asyncio support 48 | - Modern packaging with pyproject.toml 49 | 50 | --- 51 | 52 | ## Unreleased 53 | 54 | ### Planned 55 | - Enhanced CLI functionality for data operations 56 | - ReadTheDocs documentation site 57 | - Additional DQR metrics and visualizations 58 | - Performance benchmarking tools 59 | - More example notebooks and tutorials 60 | - Integration with additional data formats (Polars, DuckDB) 61 | 62 | --- 63 | 64 | [0.2.0]: https://github.com/HzaCode/pyDHIS2/releases/tag/v0.2.0 65 | 66 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quick Start Guide 2 | ================= 3 | 4 | This guide will help you get started with pydhis2 in minutes. 5 | 6 | Basic Example 7 | ------------- 8 | 9 | Here's a complete example of fetching analytics data: 10 | 11 | .. code-block:: python 12 | 13 | import asyncio 14 | import sys 15 | from pydhis2 import get_client, DHIS2Config 16 | from pydhis2.core.types import AnalyticsQuery 17 | 18 | AsyncDHIS2Client, _ = get_client() 19 | 20 | async def main(): 21 | config = DHIS2Config( 22 | base_url="https://demos.dhis2.org/dq", 23 | auth=("demo", "District1#") 24 | ) 25 | 26 | async with AsyncDHIS2Client(config) as client: 27 | query = AnalyticsQuery( 28 | dx=["b6mCG9sphIT"], 29 | ou="qzGX4XdWufs", 30 | pe="2023" 31 | ) 32 | df = await client.analytics.to_pandas(query) 33 | print(df.head()) 34 | 35 | if __name__ == "__main__": 36 | if sys.platform == 'win32': 37 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 38 | asyncio.run(main()) 39 | 40 | Synchronous Client 41 | ------------------ 42 | 43 | If you prefer synchronous code: 44 | 45 | .. code-block:: python 46 | 47 | from pydhis2 import get_client, DHIS2Config 48 | from pydhis2.core.types import AnalyticsQuery 49 | 50 | _, SyncDHIS2Client = get_client() 51 | 52 | config = DHIS2Config( 53 | base_url="https://demos.dhis2.org/dq", 54 | auth=("demo", "District1#") 55 | ) 56 | 57 | with SyncDHIS2Client(config) as client: 58 | query = AnalyticsQuery( 59 | dx=["b6mCG9sphIT"], 60 | ou="qzGX4XdWufs", 61 | pe="2023" 62 | ) 63 | df = client.analytics.to_pandas(query) 64 | print(df.head()) 65 | 66 | Using Environment Variables 67 | ---------------------------- 68 | 69 | For production, use environment variables: 70 | 71 | .. code-block:: bash 72 | 73 | export DHIS2_URL="https://your-server.com" 74 | export DHIS2_USERNAME="your_username" 75 | export DHIS2_PASSWORD="your_password" 76 | 77 | Then in your code: 78 | 79 | .. code-block:: python 80 | 81 | from pydhis2 import get_client, DHIS2Config 82 | 83 | config = DHIS2Config() # Automatically loads from environment 84 | AsyncDHIS2Client, _ = get_client() 85 | 86 | async with AsyncDHIS2Client(config) as client: 87 | # Your code here 88 | pass 89 | 90 | Next Steps 91 | ---------- 92 | 93 | * Learn about :doc:`configuration` options 94 | * Explore :doc:`analytics` queries 95 | * See :doc:`cli` commands 96 | 97 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageML stuff 102 | .sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # IDE files 133 | .vscode/ 134 | .idea/ 135 | *.swp 136 | *.swo 137 | *~ 138 | 139 | # OS files 140 | .DS_Store 141 | Thumbs.db 142 | 143 | # Test environments 144 | test_pypi_env/ -------------------------------------------------------------------------------- /pydhis2/dqr/config.yml: -------------------------------------------------------------------------------- 1 | # WHO DQR-aligned quality configuration 2 | # Version: 1.0 | Scope: Desk review metrics for DHIS2 workloads 3 | 4 | metadata: 5 | standard: "WHO Data Quality Review (DQR), Module 1/2 aligned" 6 | notes: "Thresholds are recommended starting points; countries/projects may adjust per SOP" 7 | 8 | # Dimension 1: Completeness (separating "reporting completeness" and "indicator completeness"; and separating "missing/zero values") 9 | completeness: 10 | reporting: 11 | thresholds: 12 | pass: 0.90 # Consistent with WHO DQ App common baseline 13 | warn: 0.80 # Recommended ≥0.75; <0.80 commonly used as warning 14 | levels: ["facility", "district"] # Output two levels (can be modified as needed) 15 | 16 | indicator: 17 | nonmissing: 18 | thresholds: 19 | pass: 0.90 20 | warn: 0.80 21 | nonzero: 22 | thresholds: 23 | pass: 0.90 24 | warn: 0.80 25 | rules: 26 | treat_zero_as_valid: true # "true zero" ≠ missing 27 | expected_reports_source: "orgunit_period_matrix" # Expected reports source definition 28 | 29 | # Dimension 2: Internal consistency (outliers/temporal consistency/related indicators consistency) 30 | consistency: 31 | outliers: 32 | zscore: 33 | moderate: 2.0 # 2-3 SD only "suggest", no hard exclusion 34 | extreme: 3.0 # ≥3 SD marked red, triggers review 35 | modified_z: 36 | enabled: true 37 | extreme: 3.5 # Robust method threshold (more stable when highly volatile) 38 | handle: 39 | moderate: "flag_only" 40 | extreme: "flag_and_review" 41 | 42 | trend: 43 | method: "mean_of_prior_3yrs" # or "forecast_from_prior_3yrs" 44 | baseline_window_years: 3 45 | unit_level_ratio_threshold: 0.33 # ±33%: current_year_value/3yr_baseline ∉ [0.67, 1.33] considered abnormal 46 | aggregate_pass_units_within_threshold: 0.90 # ≥90% units within threshold → pass 47 | aggregate_warn_units_within_threshold: 0.75 # 75-90% → warning, <75% → fail 48 | 49 | related_indicators: 50 | enabled: true 51 | pairs: 52 | - name: "DTP1_vs_DTP3" 53 | metric: "dropout_rate" 54 | warn_threshold: 0.10 # ≥10% dropout warning 55 | pass_threshold: 0.05 # ≤5% considered good 56 | - name: "ANC1_vs_FirstVisit" 57 | metric: "ratio" 58 | warn_lower: 0.80 # Ratio deviation outside 0.8-1.2 → warning 59 | warn_upper: 1.20 60 | 61 | # Dimension 3: Timeliness (note denominator = "reports received", not expected reports) 62 | timeliness: 63 | thresholds: 64 | timely_pass: 0.90 65 | ci_method: "clopper_pearson" # For confidence interval estimation of proportion metrics 66 | 67 | 68 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Test configuration and fixtures""" 2 | 3 | import pytest 4 | import asyncio 5 | import sys 6 | from typing import AsyncGenerator 7 | from unittest.mock import AsyncMock 8 | 9 | from pydhis2.core.types import DHIS2Config 10 | from pydhis2.core.client import AsyncDHIS2Client 11 | 12 | # Set event loop policy for Windows 13 | if sys.platform == 'win32': 14 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 15 | 16 | 17 | @pytest.fixture 18 | def mock_config() -> DHIS2Config: 19 | """Mock configuration""" 20 | return DHIS2Config( 21 | base_url="https://test.dhis2.org", 22 | auth=("test_user", "test_pass"), 23 | rps=10.0, 24 | concurrency=5, 25 | max_retries=3, 26 | ) 27 | 28 | 29 | @pytest.fixture 30 | async def mock_client(mock_config: DHIS2Config) -> AsyncGenerator[AsyncDHIS2Client, None]: 31 | """Mock client""" 32 | client = AsyncDHIS2Client(mock_config) 33 | 34 | # Mock session 35 | client._session = AsyncMock() 36 | 37 | yield client 38 | 39 | await client.close() 40 | 41 | 42 | @pytest.fixture 43 | def sample_analytics_response() -> dict: 44 | """Sample Analytics response""" 45 | return { 46 | "headers": [ 47 | {"name": "dx", "column": "Data", "type": "TEXT"}, 48 | {"name": "pe", "column": "Period", "type": "TEXT"}, 49 | {"name": "ou", "column": "Organisation unit", "type": "TEXT"}, 50 | {"name": "value", "column": "Value", "type": "NUMBER"} 51 | ], 52 | "metaData": { 53 | "items": {}, 54 | "dimensions": {} 55 | }, 56 | "rows": [ 57 | ["Abc123", "2023Q1", "Def456", "100"], 58 | ["Abc123", "2023Q2", "Def456", "150"], 59 | ["Abc123", "2023Q3", "Def456", "200"] 60 | ], 61 | "width": 4, 62 | "height": 3 63 | } 64 | 65 | 66 | @pytest.fixture 67 | def sample_datavaluesets_response() -> dict: 68 | """Sample DataValueSets response""" 69 | return { 70 | "dataValues": [ 71 | { 72 | "dataElement": "Abc123", 73 | "period": "202301", 74 | "orgUnit": "Def456", 75 | "value": "100", 76 | "lastUpdated": "2023-01-15T10:30:00.000" 77 | }, 78 | { 79 | "dataElement": "Abc123", 80 | "period": "202302", 81 | "orgUnit": "Def456", 82 | "value": "150", 83 | "lastUpdated": "2023-02-15T10:30:00.000" 84 | } 85 | ] 86 | } 87 | 88 | 89 | @pytest.fixture(scope="session") 90 | def event_loop(): 91 | """Create event loop""" 92 | loop = asyncio.new_event_loop() 93 | yield loop 94 | loop.close() 95 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "wheel", "setuptools-scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "pydhis2" 7 | version = "0.2.0" 8 | description = "Reproducible DHIS2 Python SDK for LMIC scenarios" 9 | readme = "README.md" 10 | license = {text = "Apache-2.0"} 11 | authors = [ 12 | {name = "pydhis2 contributors", email = "pydhis2@github.com"} 13 | ] 14 | maintainers = [ 15 | {name = "pydhis2 contributors", email = "pydhis2@github.com"} 16 | ] 17 | classifiers = [ 18 | "Development Status :: 4 - Beta", 19 | "Intended Audience :: Developers", 20 | "Intended Audience :: Healthcare Industry", 21 | "License :: OSI Approved :: Apache Software License", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Topic :: Software Development :: Libraries :: Python Modules", 27 | "Topic :: Scientific/Engineering :: Medical Science Apps.", 28 | ] 29 | requires-python = ">=3.9" 30 | dependencies = [ 31 | "aiohttp>=3.8.0,<4.0.0", 32 | "aiofiles>=23.0.0,<26.0.0", 33 | "aiolimiter>=1.0.0,<2.0.0", 34 | "tenacity>=8.0.0,<10.0.0", 35 | "pandas>=1.5.0,<3.0.0", 36 | "pyarrow>=10.0.0,<22.0.0", 37 | "numpy>=1.20.0,<3.0.0", 38 | "pydantic>=2.0.0,<3.0.0", 39 | "pyyaml>=6.0,<7.0", 40 | "click>=8.0.0,<9.0.0", 41 | "typer>=0.9.0,<1.0.0", 42 | "cookiecutter>=2.1.0,<3.0.0", 43 | "opentelemetry-api>=1.15.0,<2.0.0", 44 | "opentelemetry-sdk>=1.15.0,<2.0.0", 45 | "opentelemetry-exporter-jaeger-thrift>=1.15.0,<2.0.0", 46 | "opentelemetry-exporter-prometheus>=0.36b0,<1.0.0", 47 | "opentelemetry-instrumentation-aiohttp-client>=0.36b0,<1.0.0", 48 | ] 49 | 50 | [project.optional-dependencies] 51 | dev = [ 52 | "pytest>=7.0.0,<9.0.0", 53 | "pytest-asyncio>=0.23.0,<2.0.0", 54 | "pytest-mock>=3.10.0,<4.0.0", 55 | "pytest-cov>=4.0.0,<6.0.0", 56 | "ruff>=0.1.0,<1.0.0", 57 | ] 58 | 59 | [project.urls] 60 | Homepage = "https://github.com/HzaCode/pyDHIS2" 61 | Documentation = "https://hzacode.github.io/pyDHIS2" 62 | Repository = "https://github.com/HzaCode/pyDHIS2" 63 | Issues = "https://github.com/HzaCode/pyDHIS2/issues" 64 | Changelog = "https://github.com/HzaCode/pyDHIS2/blob/main/CHANGELOG.md" 65 | Discussions = "https://github.com/HzaCode/pyDHIS2/discussions" 66 | 67 | [project.scripts] 68 | pydhis2 = "pydhis2.__main__:main" 69 | 70 | [tool.setuptools.packages.find] 71 | include = ["pydhis2*"] 72 | 73 | [tool.pytest.ini_options] 74 | testpaths = ["tests"] 75 | python_files = ["test_*.py"] 76 | python_classes = ["Test*"] 77 | python_functions = ["test_*"] 78 | addopts = "-v --tb=short" 79 | asyncio_mode = "auto" 80 | 81 | [tool.ruff] 82 | line-length = 88 83 | target-version = "py39" 84 | 85 | [tool.ruff.lint] 86 | select = ["E", "F", "W", "I", "N", "UP", "B", "A", "C4", "PT"] 87 | ignore = ["E501", "N806", "N803"] 88 | 89 | [tool.ruff.lint.per-file-ignores] 90 | "tests/*" = ["A002", "A003"] 91 | -------------------------------------------------------------------------------- /docs/metadata.rst: -------------------------------------------------------------------------------- 1 | Metadata 2 | ======== 3 | 4 | The Metadata endpoint provides access to DHIS2 metadata (indicators, data elements, org units, etc.). 5 | 6 | Fetching Specific Metadata Types 7 | --------------------------------- 8 | 9 | .. code-block:: python 10 | 11 | from pydhis2 import get_client, DHIS2Config 12 | 13 | AsyncDHIS2Client, _ = get_client() 14 | config = DHIS2Config() 15 | 16 | async with AsyncDHIS2Client(config) as client: 17 | # Get data elements 18 | data_elements = await client.metadata.get_data_elements( 19 | fields="id,name,code,valueType", 20 | paging=False 21 | ) 22 | print(data_elements) 23 | 24 | # Get indicators 25 | indicators = await client.metadata.get_indicators( 26 | fields="id,name,code,numerator,denominator" 27 | ) 28 | 29 | # Get organisation units at specific level 30 | org_units = await client.metadata.get_organisation_units( 31 | fields="id,name,code,level,path", 32 | filter={"level": "3"} 33 | ) 34 | 35 | Exporting Metadata 36 | ------------------ 37 | 38 | Export metadata to JSON: 39 | 40 | .. code-block:: python 41 | 42 | import json 43 | 44 | async with AsyncDHIS2Client(config) as client: 45 | # Export with filters 46 | metadata = await client.metadata.export( 47 | fields=":owner", 48 | filter={"dataElements": "name:like:ANC"} 49 | ) 50 | 51 | with open("metadata.json", "w") as f: 52 | json.dump(metadata, f, indent=2) 53 | 54 | # Or use the helper method 55 | await client.metadata.export_to_file( 56 | "metadata.json", 57 | filter={"indicators": "name:like:Malaria"} 58 | ) 59 | 60 | Importing Metadata 61 | ------------------ 62 | 63 | Import metadata from JSON: 64 | 65 | .. code-block:: python 66 | 67 | import json 68 | from pydhis2.core.types import ExportFormat 69 | 70 | async with AsyncDHIS2Client(config) as client: 71 | # Direct import 72 | with open("metadata.json") as f: 73 | metadata = json.load(f) 74 | 75 | summary = await client.metadata.import_( 76 | metadata, 77 | strategy="CREATE_AND_UPDATE", 78 | atomic=True 79 | ) 80 | print(f"Imported: {summary.imported}") 81 | print(f"Updated: {summary.updated}") 82 | 83 | # Or use the helper method 84 | summary = await client.metadata.import_from_file("metadata.json") 85 | 86 | # Check for errors 87 | if summary.has_errors: 88 | conflicts_df = summary.get_conflicts_df() 89 | print(conflicts_df) 90 | 91 | Common Metadata Types 92 | --------------------- 93 | 94 | * ``dataElements`` - Data elements 95 | * ``indicators`` - Indicators 96 | * ``organisationUnits`` - Organisation units 97 | * ``dataSets`` - Data sets 98 | * ``programs`` - Programs 99 | * ``programStages`` - Program stages 100 | * ``trackedEntityTypes`` - Tracked entity types 101 | * ``optionSets`` - Option sets 102 | 103 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface 2 | ====================== 3 | 4 | pydhis2 provides a powerful CLI for common data operations. 5 | 6 | Installation Verification 7 | -------------------------- 8 | 9 | Check version: 10 | 11 | .. code-block:: bash 12 | 13 | pydhis2 version 14 | 15 | Run quick demo: 16 | 17 | .. code-block:: bash 18 | 19 | pydhis2 demo quick 20 | 21 | Configuration 22 | ------------- 23 | 24 | Set up DHIS2 connection: 25 | 26 | .. code-block:: bash 27 | 28 | pydhis2 config --url "https://your-server.com" --username "user" 29 | 30 | Analytics Commands 31 | ------------------ 32 | 33 | Pull Analytics Data 34 | ~~~~~~~~~~~~~~~~~~~ 35 | 36 | .. code-block:: bash 37 | 38 | pydhis2 analytics pull \ 39 | --dx "indicator_id" \ 40 | --ou "org_unit_id" \ 41 | --pe "2023Q1:2023Q4" \ 42 | --out analytics.parquet 43 | 44 | Query with Multiple Dimensions 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | .. code-block:: bash 48 | 49 | pydhis2 analytics pull \ 50 | --dx "ind1,ind2,ind3" \ 51 | --ou "LEVEL-3" \ 52 | --pe "LAST_12_MONTHS" \ 53 | --format csv \ 54 | --out data.csv 55 | 56 | Tracker Commands 57 | ---------------- 58 | 59 | Pull Events 60 | ~~~~~~~~~~~ 61 | 62 | .. code-block:: bash 63 | 64 | pydhis2 tracker events \ 65 | --program "program_id" \ 66 | --status COMPLETED \ 67 | --start-date "2023-01-01" \ 68 | --end-date "2023-12-31" \ 69 | --out events.parquet 70 | 71 | Pull Tracked Entities 72 | ~~~~~~~~~~~~~~~~~~~~~~ 73 | 74 | .. code-block:: bash 75 | 76 | pydhis2 tracker entities \ 77 | --type "person" \ 78 | --ou "org_unit_id" \ 79 | --out entities.parquet 80 | 81 | Data Quality Commands 82 | --------------------- 83 | 84 | Run DQR Analysis 85 | ~~~~~~~~~~~~~~~~ 86 | 87 | .. code-block:: bash 88 | 89 | pydhis2 dqr analyze \ 90 | --input analytics.parquet \ 91 | --html dqr_report.html \ 92 | --json dqr_summary.json 93 | 94 | Generate DQR Report 95 | ~~~~~~~~~~~~~~~~~~~ 96 | 97 | .. code-block:: bash 98 | 99 | pydhis2 dqr report \ 100 | --input analytics.parquet \ 101 | --output report.html \ 102 | --template custom_template.html 103 | 104 | Pipeline Commands 105 | ----------------- 106 | 107 | Run Analysis Pipeline 108 | ~~~~~~~~~~~~~~~~~~~~~ 109 | 110 | .. code-block:: bash 111 | 112 | pydhis2 pipeline run --recipe pipelines/analysis.yml 113 | 114 | Validate Pipeline 115 | ~~~~~~~~~~~~~~~~~ 116 | 117 | .. code-block:: bash 118 | 119 | pydhis2 pipeline validate --recipe pipelines/analysis.yml 120 | 121 | Project Template 122 | ---------------- 123 | 124 | Create New Project 125 | ~~~~~~~~~~~~~~~~~~ 126 | 127 | .. code-block:: bash 128 | 129 | cookiecutter gh:HzaCode/pyDHIS2 --directory pydhis2/templates 130 | 131 | This will prompt you for project details and create a complete project structure. 132 | 133 | Help 134 | ---- 135 | 136 | Get help for any command: 137 | 138 | .. code-block:: bash 139 | 140 | pydhis2 --help 141 | pydhis2 analytics --help 142 | pydhis2 tracker --help 143 | 144 | -------------------------------------------------------------------------------- /docs/configuration.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | pydhis2 can be configured through multiple methods: environment variables, configuration files, or directly in code. 5 | 6 | Environment Variables 7 | --------------------- 8 | 9 | The recommended approach for production: 10 | 11 | .. code-block:: bash 12 | 13 | export DHIS2_URL="https://your-dhis2-server.com" 14 | export DHIS2_USERNAME="your_username" 15 | export DHIS2_PASSWORD="your_password" 16 | 17 | Then use in code: 18 | 19 | .. code-block:: python 20 | 21 | from pydhis2 import DHIS2Config 22 | 23 | config = DHIS2Config() # Loads from environment 24 | 25 | Direct Configuration 26 | -------------------- 27 | 28 | For development or scripts: 29 | 30 | .. code-block:: python 31 | 32 | from pydhis2 import DHIS2Config 33 | 34 | config = DHIS2Config( 35 | base_url="https://your-server.com", 36 | auth=("username", "password"), 37 | rps=10, # Requests per second 38 | concurrency=10, # Concurrent connections 39 | timeout=60, # Request timeout 40 | cache_enabled=True, # Enable HTTP caching 41 | ) 42 | 43 | Advanced Options 44 | ---------------- 45 | 46 | Rate Limiting 47 | ~~~~~~~~~~~~~ 48 | 49 | Control request rates to avoid overwhelming the server: 50 | 51 | .. code-block:: python 52 | 53 | config = DHIS2Config( 54 | base_url="https://your-server.com", 55 | auth=("username", "password"), 56 | rps=5, # 5 requests per second 57 | ) 58 | 59 | Retry Configuration 60 | ~~~~~~~~~~~~~~~~~~~ 61 | 62 | Customize retry behavior: 63 | 64 | .. code-block:: python 65 | 66 | config = DHIS2Config( 67 | base_url="https://your-server.com", 68 | auth=("username", "password"), 69 | max_retries=5, 70 | retry_backoff=2.0, 71 | ) 72 | 73 | Caching 74 | ~~~~~~~ 75 | 76 | Enable HTTP caching for repeated requests: 77 | 78 | .. code-block:: python 79 | 80 | config = DHIS2Config( 81 | base_url="https://your-server.com", 82 | auth=("username", "password"), 83 | cache_enabled=True, 84 | cache_dir=".cache/dhis2", 85 | ) 86 | 87 | Timeouts 88 | ~~~~~~~~ 89 | 90 | Set connection and read timeouts: 91 | 92 | .. code-block:: python 93 | 94 | config = DHIS2Config( 95 | base_url="https://your-server.com", 96 | auth=("username", "password"), 97 | timeout=120, # Total request timeout in seconds 98 | ) 99 | 100 | Using Configuration Files 101 | -------------------------- 102 | 103 | You can also use YAML configuration files: 104 | 105 | .. code-block:: yaml 106 | 107 | # config.yml 108 | connection: 109 | base_url: "https://your-server.com" 110 | username: "your_username" 111 | password: "your_password" 112 | rps: 10 113 | concurrency: 10 114 | 115 | retry: 116 | max_attempts: 5 117 | backoff: 2.0 118 | 119 | cache: 120 | enabled: true 121 | directory: ".cache" 122 | 123 | Load it in code: 124 | 125 | .. code-block:: python 126 | 127 | import yaml 128 | from pydhis2 import DHIS2Config 129 | 130 | with open('config.yml') as f: 131 | config_dict = yaml.safe_load(f) 132 | 133 | config = DHIS2Config(**config_dict['connection']) 134 | 135 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # For the full list of built-in configuration values, see: 3 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 4 | 5 | import os 6 | import sys 7 | sys.path.insert(0, os.path.abspath('..')) 8 | 9 | # -- Project information ----------------------------------------------------- 10 | project = 'pydhis2' 11 | copyright = '2025, pydhis2 contributors' 12 | author = 'pydhis2 contributors' 13 | release = '0.2.0' 14 | version = '0.2.0' 15 | 16 | # -- General configuration --------------------------------------------------- 17 | extensions = [ 18 | 'sphinx.ext.autodoc', 19 | 'sphinx.ext.napoleon', 20 | 'sphinx.ext.viewcode', 21 | 'sphinx.ext.intersphinx', 22 | 'sphinx.ext.autosummary', 23 | 'sphinx_autodoc_typehints', 24 | 'myst_parser', 25 | 'sphinx_copybutton', 26 | ] 27 | 28 | templates_path = ['_templates'] 29 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 30 | 31 | # Napoleon settings for Google-style docstrings 32 | napoleon_google_docstring = True 33 | napoleon_numpy_docstring = False 34 | napoleon_include_init_with_doc = True 35 | napoleon_include_private_with_doc = False 36 | napoleon_include_special_with_doc = True 37 | napoleon_use_admonition_for_examples = False 38 | napoleon_use_admonition_for_notes = False 39 | napoleon_use_admonition_for_references = False 40 | napoleon_use_ivar = False 41 | napoleon_use_param = True 42 | napoleon_use_rtype = True 43 | napoleon_preprocess_types = False 44 | napoleon_type_aliases = None 45 | napoleon_attr_annotations = True 46 | 47 | # Autodoc settings 48 | autodoc_default_options = { 49 | 'members': True, 50 | 'member-order': 'bysource', 51 | 'special-members': '__init__', 52 | 'undoc-members': True, 53 | 'exclude-members': '__weakref__' 54 | } 55 | 56 | autosummary_generate = True 57 | 58 | # MyST Parser settings (for Markdown support) 59 | myst_enable_extensions = [ 60 | "colon_fence", 61 | "deflist", 62 | "dollarmath", 63 | "fieldlist", 64 | "html_admonition", 65 | "html_image", 66 | "linkify", 67 | "replacements", 68 | "smartquotes", 69 | "strikethrough", 70 | "substitution", 71 | "tasklist", 72 | ] 73 | 74 | # Intersphinx mapping 75 | intersphinx_mapping = { 76 | 'python': ('https://docs.python.org/3', None), 77 | 'pandas': ('https://pandas.pydata.org/docs/', None), 78 | 'aiohttp': ('https://docs.aiohttp.org/en/stable/', None), 79 | } 80 | 81 | # -- Options for HTML output ------------------------------------------------- 82 | html_theme = 'sphinx_rtd_theme' 83 | html_static_path = ['_static'] 84 | html_logo = '../image.png' 85 | html_favicon = '../image.png' 86 | 87 | html_theme_options = { 88 | 'logo_only': False, 89 | 'display_version': True, 90 | 'prev_next_buttons_location': 'bottom', 91 | 'style_external_links': True, 92 | 'vcs_pageview_mode': '', 93 | 'style_nav_header_background': '#2980B9', 94 | # Toc options 95 | 'collapse_navigation': False, 96 | 'sticky_navigation': True, 97 | 'navigation_depth': 4, 98 | 'includehidden': True, 99 | 'titles_only': False 100 | } 101 | 102 | html_context = { 103 | "display_github": True, 104 | "github_user": "HzaCode", 105 | "github_repo": "pyDHIS2", 106 | "github_version": "main", 107 | "conf_py_path": "/docs/", 108 | } 109 | 110 | # -- Options for LaTeX output ------------------------------------------------ 111 | latex_elements = { 112 | 'papersize': 'a4paper', 113 | 'pointsize': '10pt', 114 | } 115 | 116 | # -- Extension configuration ------------------------------------------------- 117 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " 118 | copybutton_prompt_is_regexp = True 119 | 120 | -------------------------------------------------------------------------------- /docs/tracker.rst: -------------------------------------------------------------------------------- 1 | Tracker 2 | ======= 3 | 4 | The Tracker endpoint provides access to DHIS2 individual-level data (events and tracked entities). 5 | 6 | Fetching Events as Raw JSON 7 | ---------------------------- 8 | 9 | .. code-block:: python 10 | 11 | from pydhis2 import get_client, DHIS2Config 12 | 13 | AsyncDHIS2Client, _ = get_client() 14 | config = DHIS2Config() 15 | 16 | async with AsyncDHIS2Client(config) as client: 17 | # Get raw JSON response 18 | events = await client.tracker.events( 19 | program="programId", 20 | org_unit="orgUnitId", 21 | start_date="2023-01-01", 22 | end_date="2023-12-31", 23 | page_size=100 24 | ) 25 | print(events) 26 | 27 | Fetching Events as DataFrame 28 | ----------------------------- 29 | 30 | .. code-block:: python 31 | 32 | # Get events directly as DataFrame 33 | async with AsyncDHIS2Client(config) as client: 34 | df = await client.tracker.events_to_pandas( 35 | program="programId", 36 | org_unit="orgUnitId", 37 | status="COMPLETED", 38 | paging_size=200 39 | ) 40 | print(df) 41 | 42 | Streaming Events 43 | ---------------- 44 | 45 | For large datasets, stream events in pages: 46 | 47 | .. code-block:: python 48 | 49 | async with AsyncDHIS2Client(config) as client: 50 | async for page_df in client.tracker.stream_events( 51 | program="programId", 52 | org_unit="orgUnitId", 53 | page_size=200 54 | ): 55 | print(f"Processing {len(page_df)} events") 56 | # Process each page DataFrame 57 | # page_df is a pandas DataFrame 58 | 59 | Creating Events 60 | --------------- 61 | 62 | .. code-block:: python 63 | 64 | async with AsyncDHIS2Client(config) as client: 65 | event = { 66 | "program": "programId", 67 | "orgUnit": "orgUnitId", 68 | "occurredAt": "2023-01-15T10:00:00", 69 | "status": "COMPLETED", 70 | "dataValues": [ 71 | {"dataElement": "dataElementId", "value": "100"} 72 | ] 73 | } 74 | 75 | response = await client.tracker.create_event(event) 76 | print(response) 77 | 78 | Tracked Entities (Raw JSON) 79 | ---------------------------- 80 | 81 | Query tracked entities as raw JSON: 82 | 83 | .. code-block:: python 84 | 85 | async with AsyncDHIS2Client(config) as client: 86 | entities = await client.tracker.tracked_entities( 87 | tracked_entity_type="personId", 88 | org_unit="orgUnitId", 89 | page_size=50 90 | ) 91 | print(entities) 92 | 93 | Tracked Entities (DataFrame) 94 | ----------------------------- 95 | 96 | Query tracked entities and convert to DataFrame: 97 | 98 | .. code-block:: python 99 | 100 | async with AsyncDHIS2Client(config) as client: 101 | df = await client.tracker.tracked_entities_to_pandas( 102 | org_unit="orgUnitId", 103 | program="programId", 104 | paging_size=200 105 | ) 106 | print(df) 107 | 108 | Export to File 109 | -------------- 110 | 111 | .. code-block:: python 112 | 113 | from pydhis2.core.types import ExportFormat 114 | 115 | async with AsyncDHIS2Client(config) as client: 116 | # Export events to Parquet 117 | await client.tracker.export_events_to_file( 118 | "events.parquet", 119 | format=ExportFormat.PARQUET, 120 | program="programId", 121 | org_unit="orgUnitId" 122 | ) 123 | 124 | # Export tracked entities to CSV 125 | await client.tracker.export_tracked_entities_to_file( 126 | "entities.csv", 127 | format=ExportFormat.CSV, 128 | org_unit="orgUnitId" 129 | ) 130 | 131 | -------------------------------------------------------------------------------- /docs/datavaluesets.rst: -------------------------------------------------------------------------------- 1 | DataValueSets 2 | ============= 3 | 4 | The DataValueSets endpoint allows you to read and write individual data values. 5 | 6 | Pulling (Reading) Data Values 7 | ------------------------------ 8 | 9 | .. code-block:: python 10 | 11 | from pydhis2 import get_client, DHIS2Config 12 | 13 | AsyncDHIS2Client, _ = get_client() 14 | config = DHIS2Config() 15 | 16 | async with AsyncDHIS2Client(config) as client: 17 | # Pull data values - returns DataFrame directly 18 | df = await client.datavaluesets.pull( 19 | data_set="dataSetId", 20 | org_unit="orgUnitId", 21 | period="202301" 22 | ) 23 | print(df) 24 | 25 | # Pull with date range 26 | df = await client.datavaluesets.pull( 27 | data_set="dataSetId", 28 | org_unit="orgUnitId", 29 | start_date="2023-01-01", 30 | end_date="2023-12-31", 31 | children=True # Include child org units 32 | ) 33 | 34 | Pushing (Writing) Data Values 35 | ------------------------------ 36 | 37 | .. code-block:: python 38 | 39 | from pydhis2.core.types import ImportConfig, ImportStrategy 40 | 41 | async with AsyncDHIS2Client(config) as client: 42 | # Prepare data values 43 | data_values = { 44 | "dataSet": "dataSetId", 45 | "completeDate": "2023-01-31", 46 | "period": "202301", 47 | "orgUnit": "orgUnitId", 48 | "dataValues": [ 49 | { 50 | "dataElement": "dataElementId", 51 | "value": "100" 52 | } 53 | ] 54 | } 55 | 56 | # Push data 57 | summary = await client.datavaluesets.push( 58 | data_values, 59 | config=ImportConfig( 60 | strategy=ImportStrategy.CREATE_AND_UPDATE, 61 | dry_run=False 62 | ) 63 | ) 64 | 65 | print(f"Imported: {summary.imported}") 66 | print(f"Updated: {summary.updated}") 67 | print(f"Conflicts: {len(summary.conflicts)}") 68 | 69 | # Check conflicts 70 | if summary.has_conflicts: 71 | conflicts_df = summary.conflicts_df 72 | print(conflicts_df) 73 | 74 | Bulk Import with Chunking 75 | ------------------------- 76 | 77 | Import large datasets efficiently with automatic chunking: 78 | 79 | .. code-block:: python 80 | 81 | import pandas as pd 82 | from pydhis2.core.types import ImportConfig 83 | 84 | async with AsyncDHIS2Client(config) as client: 85 | # Read DataFrame 86 | df = pd.read_csv("data.csv") 87 | 88 | # Push with automatic chunking 89 | summary = await client.datavaluesets.push( 90 | df, 91 | chunk_size=5000, # Process 5000 records per chunk 92 | config=ImportConfig(atomic=False) 93 | ) 94 | 95 | print(f"Total imported: {summary.imported}") 96 | print(f"Total updated: {summary.updated}") 97 | 98 | Streaming Large Datasets 99 | ------------------------- 100 | 101 | For very large datasets, stream in pages: 102 | 103 | .. code-block:: python 104 | 105 | async with AsyncDHIS2Client(config) as client: 106 | async for page_df in client.datavaluesets.pull_paginated( 107 | data_set="dataSetId", 108 | org_unit="orgUnitId", 109 | page_size=5000 110 | ): 111 | print(f"Processing {len(page_df)} records") 112 | # Process each page 113 | 114 | Export to File 115 | -------------- 116 | 117 | .. code-block:: python 118 | 119 | from pydhis2.core.types import ExportFormat 120 | 121 | async with AsyncDHIS2Client(config) as client: 122 | await client.datavaluesets.export_to_file( 123 | "datavalues.parquet", 124 | format=ExportFormat.PARQUET, 125 | data_set="dataSetId", 126 | org_unit="orgUnitId", 127 | period="202301" 128 | ) 129 | 130 | -------------------------------------------------------------------------------- /docs/analytics.rst: -------------------------------------------------------------------------------- 1 | Analytics 2 | ========= 3 | 4 | The Analytics endpoint provides access to DHIS2 aggregated analytics data. 5 | 6 | Basic Query 7 | ----------- 8 | 9 | .. code-block:: python 10 | 11 | from pydhis2 import get_client, DHIS2Config 12 | from pydhis2.core.types import AnalyticsQuery 13 | 14 | AsyncDHIS2Client, _ = get_client() 15 | config = DHIS2Config() 16 | 17 | async with AsyncDHIS2Client(config) as client: 18 | query = AnalyticsQuery( 19 | dx=["indicator_id"], # Data dimension 20 | ou=["org_unit_id"], # Organisation unit 21 | pe="2023" # Period 22 | ) 23 | df = await client.analytics.to_pandas(query) 24 | print(df) 25 | 26 | Query Parameters 27 | ---------------- 28 | 29 | dx (Data Dimension) 30 | ~~~~~~~~~~~~~~~~~~~ 31 | 32 | Indicators, data elements, or data sets: 33 | 34 | .. code-block:: python 35 | 36 | query = AnalyticsQuery( 37 | dx=["b6mCG9sphIT", "fbfJHSPpUQD"], # Multiple data elements 38 | ou="qzGX4XdWufs", 39 | pe="2023" 40 | ) 41 | 42 | ou (Organisation Units) 43 | ~~~~~~~~~~~~~~~~~~~~~~~ 44 | 45 | Specific org units or levels: 46 | 47 | .. code-block:: python 48 | 49 | query = AnalyticsQuery( 50 | dx=["b6mCG9sphIT"], 51 | ou=["LEVEL-3", "OU_GROUP-abc123"], # Level or group 52 | pe="2023" 53 | ) 54 | 55 | pe (Periods) 56 | ~~~~~~~~~~~~ 57 | 58 | Various period formats: 59 | 60 | .. code-block:: python 61 | 62 | # Single year 63 | pe="2023" 64 | 65 | # Multiple periods 66 | pe=["2022", "2023"] 67 | 68 | # Quarterly 69 | pe="2023Q1;2023Q2;2023Q3;2023Q4" 70 | 71 | # Monthly 72 | pe="202301;202302;202303" 73 | 74 | # Relative periods 75 | pe="LAST_12_MONTHS" 76 | 77 | DataFrame Conversion 78 | -------------------- 79 | 80 | Convert directly to pandas DataFrame: 81 | 82 | .. code-block:: python 83 | 84 | df = await client.analytics.to_pandas(query) 85 | print(df.columns) 86 | # ['dx', 'ou', 'pe', 'value'] 87 | 88 | Export Formats 89 | -------------- 90 | 91 | Parquet 92 | ~~~~~~~ 93 | 94 | .. code-block:: python 95 | 96 | from pydhis2.core.types import ExportFormat 97 | 98 | await client.analytics.export_to_file( 99 | query, 100 | "output.parquet", 101 | format=ExportFormat.PARQUET 102 | ) 103 | 104 | CSV 105 | ~~~ 106 | 107 | .. code-block:: python 108 | 109 | from pydhis2.core.types import ExportFormat 110 | 111 | await client.analytics.export_to_file( 112 | query, 113 | "output.csv", 114 | format=ExportFormat.CSV 115 | ) 116 | 117 | Arrow 118 | ~~~~~ 119 | 120 | .. code-block:: python 121 | 122 | table = await client.analytics.to_arrow(query) 123 | print(table.schema) 124 | 125 | Pagination and Streaming 126 | ------------------------- 127 | 128 | For large datasets: 129 | 130 | .. code-block:: python 131 | 132 | async with AsyncDHIS2Client(config) as client: 133 | async for page_df in client.analytics.stream_paginated( 134 | query, 135 | page_size=1000, 136 | max_pages=10 137 | ): 138 | print(f"Processing {len(page_df)} records") 139 | # Process each page DataFrame 140 | # page_df is a pandas DataFrame 141 | 142 | Filters 143 | ------- 144 | 145 | Add filters to your query: 146 | 147 | .. code-block:: python 148 | 149 | query = AnalyticsQuery( 150 | dx=["b6mCG9sphIT"], 151 | ou="qzGX4XdWufs", 152 | pe="2023", 153 | filters={"age": "AGE_0_4", "sex": "FEMALE"} 154 | ) 155 | 156 | Advanced Options 157 | ---------------- 158 | 159 | Skip Metadata 160 | ~~~~~~~~~~~~~ 161 | 162 | .. code-block:: python 163 | 164 | query = AnalyticsQuery( 165 | dx=["b6mCG9sphIT"], 166 | ou="qzGX4XdWufs", 167 | pe="2023", 168 | skip_meta=True # Don't include metadata 169 | ) 170 | 171 | Hierarchy Meta 172 | ~~~~~~~~~~~~~~ 173 | 174 | .. code-block:: python 175 | 176 | query = AnalyticsQuery( 177 | dx=["b6mCG9sphIT"], 178 | ou="qzGX4XdWufs", 179 | pe="2023", 180 | hierarchy_meta=True # Include org unit hierarchy 181 | ) 182 | 183 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/README.md: -------------------------------------------------------------------------------- 1 | # {{ cookiecutter.project_name }} 2 | 3 | {{ cookiecutter.project_description }} 4 | 5 | ## Project Structure 6 | 7 | ``` 8 | {{ cookiecutter.project_slug }}/ 9 | ├── configs/ # Configuration files 10 | ├── data/ # Data files 11 | ├── notebooks/ # Jupyter notebooks 12 | ├── pipelines/ # Data pipeline configurations 13 | ├── reports/ # Generated reports 14 | ├── scripts/ # Script files 15 | └── requirements.txt # Python dependencies 16 | ``` 17 | 18 | ## Quick Start 19 | 20 | ### 1. Install Dependencies 21 | 22 | ```bash 23 | pip install -r requirements.txt 24 | ``` 25 | 26 | ### 2. Configure DHIS2 Connection 27 | 28 | Copy `.env.example` to `.env` and fill in your DHIS2 connection details: 29 | 30 | ```bash 31 | cp .env.example .env 32 | ``` 33 | 34 | Edit the `.env` file: 35 | 36 | ``` 37 | DHIS2_URL={{ cookiecutter.dhis2_url }} 38 | DHIS2_USERNAME=your_username 39 | DHIS2_PASSWORD=your_password 40 | ``` 41 | 42 | ### 3. Test Connection 43 | 44 | ```bash 45 | pydhis2 login 46 | ``` 47 | 48 | ### 4. Run Example Pipeline 49 | 50 | ```bash 51 | pydhis2 pipeline run --recipe pipelines/example.yml 52 | ``` 53 | 54 | ## Usage Guide 55 | 56 | ### Data Pulling 57 | 58 | Pull Analytics data: 59 | ```bash 60 | pydhis2 analytics pull --dx "indicator_id" --ou "org_unit_id" --pe "2023Q1:2023Q4" --out data/analytics.parquet 61 | ``` 62 | 63 | Pull Tracker events: 64 | ```bash 65 | pydhis2 tracker pull --program "program_id" --status COMPLETED --out data/events.parquet 66 | ``` 67 | 68 | ### Data Quality Review 69 | 70 | Run DQR analysis: 71 | ```bash 72 | pydhis2 dqr run --input data/analytics.parquet --html reports/dqr_report.html --json reports/dqr_summary.json 73 | ``` 74 | 75 | ### Jupyter Notebooks 76 | 77 | {% if cookiecutter.use_notebooks == "yes" -%} 78 | Start Jupyter Lab: 79 | ```bash 80 | jupyter lab 81 | ``` 82 | 83 | Example notebooks: 84 | - `01_data_exploration.ipynb` - Data Exploration 85 | - `02_quality_assessment.ipynb` - Data Quality Assessment 86 | - `03_analysis_and_visualization.ipynb` - Analysis and Visualization 87 | {%- endif %} 88 | 89 | ## Configuration Details 90 | 91 | ### DHIS2 Configuration (`configs/dhis2.yml`) 92 | 93 | ```yaml 94 | # DHIS2 connection configuration 95 | connection: 96 | base_url: "{{ cookiecutter.dhis2_url }}" 97 | rps: 8 # Requests per second 98 | concurrency: 8 # Concurrent connections 99 | timeouts: [10, 60, 120] # Connect/read/total timeout 100 | 101 | # Retry configuration 102 | retry: 103 | max_attempts: 5 104 | base_delay: 0.5 105 | max_delay: 60.0 106 | ``` 107 | 108 | {% if cookiecutter.use_dqr == "yes" -%} 109 | ### DQR Configuration (`configs/dqr.yml`) 110 | 111 | ```yaml 112 | # Data quality rules 113 | completeness: 114 | thresholds: 115 | reporting_completeness_pass: 0.90 116 | reporting_completeness_warn: 0.70 117 | 118 | consistency: 119 | thresholds: 120 | outlier_threshold: 3.0 121 | variance_threshold: 0.5 122 | 123 | timeliness: 124 | thresholds: 125 | submission_timeliness_pass: 0.80 126 | max_delay_days: 30 127 | ``` 128 | {%- endif %} 129 | 130 | {% if cookiecutter.use_pipeline == "yes" -%} 131 | ### Pipeline Configuration (`pipelines/example.yml`) 132 | 133 | ```yaml 134 | name: "Example Data Analysis Pipeline" 135 | description: "Pull data, assess quality, generate reports" 136 | 137 | steps: 138 | - type: analytics_pull 139 | name: "Pull Analytics Data" 140 | dx: "indicator_id" 141 | ou: "org_unit_id" 142 | pe: "2023Q1:2023Q4" 143 | output: "analytics.parquet" 144 | 145 | - type: dqr 146 | name: "Data Quality Review" 147 | input: "analytics.parquet" 148 | html_output: "dqr_report.html" 149 | json_output: "dqr_summary.json" 150 | ``` 151 | {%- endif %} 152 | 153 | ## License 154 | 155 | {% if cookiecutter.license == "Apache-2.0" -%} 156 | Apache License 2.0 157 | {%- elif cookiecutter.license == "MIT" -%} 158 | MIT License 159 | {%- else -%} 160 | BSD 3-Clause License 161 | {%- endif %} 162 | 163 | ## Author 164 | 165 | {{ cookiecutter.author_name }} ({{ cookiecutter.author_email }}) 166 | -------------------------------------------------------------------------------- /pydhis2/endpoints/analytics.py: -------------------------------------------------------------------------------- 1 | """Analytics endpoint - Analysis data queries and DataFrame conversion""" 2 | 3 | from collections.abc import AsyncIterator 4 | from typing import Any, Dict, Optional 5 | 6 | import pandas as pd 7 | import pyarrow as pa 8 | 9 | from pydhis2.core.types import AnalyticsQuery, ExportFormat 10 | from pydhis2.io.arrow import ArrowConverter 11 | from pydhis2.io.to_pandas import AnalyticsDataFrameConverter 12 | 13 | 14 | class AnalyticsEndpoint: 15 | """Analytics API endpoint""" 16 | 17 | def __init__(self, client): 18 | self.client = client 19 | self.converter = AnalyticsDataFrameConverter() 20 | self.arrow_converter = ArrowConverter() 21 | 22 | async def raw( 23 | self, 24 | query: AnalyticsQuery, 25 | output_format: str = "json" 26 | ) -> Dict[str, Any]: 27 | """Get raw JSON data""" 28 | params = query.to_params() 29 | if output_format != "json": 30 | params['format'] = output_format 31 | 32 | return await self.client.get('/api/analytics', params=params) 33 | 34 | async def to_pandas( 35 | self, 36 | query: AnalyticsQuery, 37 | long_format: bool = True 38 | ) -> pd.DataFrame: 39 | """Convert to Pandas DataFrame""" 40 | data = await self.raw(query) 41 | return self.converter.to_dataframe(data, long_format=long_format) 42 | 43 | async def to_arrow( 44 | self, 45 | query: AnalyticsQuery, 46 | long_format: bool = True 47 | ) -> pa.Table: 48 | """Convert to Arrow Table""" 49 | df = await self.to_pandas(query, long_format=long_format) 50 | return self.arrow_converter.from_pandas(df) 51 | 52 | async def stream_paginated( 53 | self, 54 | query: AnalyticsQuery, 55 | page_size: int = 1000, 56 | max_pages: Optional[int] = None 57 | ) -> AsyncIterator[pd.DataFrame]: 58 | """Stream paginated data""" 59 | page = 1 60 | 61 | while True: 62 | # Modify query parameters to add paging 63 | page_params = query.to_params() 64 | page_params.update({ 65 | 'page': page, 66 | 'pageSize': page_size, 67 | 'paging': 'true' 68 | }) 69 | 70 | response = await self.client.get('/api/analytics', params=page_params) 71 | 72 | # Convert to DataFrame 73 | df = self.converter.to_dataframe(response, long_format=True) 74 | if not df.empty: 75 | yield df 76 | 77 | # Check pagination information 78 | pager = response.get('pager', {}) 79 | total_pages = pager.get('pageCount', 1) 80 | 81 | if page >= total_pages: 82 | break 83 | 84 | if max_pages and page >= max_pages: 85 | break 86 | 87 | page += 1 88 | 89 | async def export_to_file( 90 | self, 91 | query: AnalyticsQuery, 92 | file_path: str, 93 | format: ExportFormat = ExportFormat.PARQUET, 94 | **kwargs 95 | ) -> str: 96 | """Export to file""" 97 | df = await self.to_pandas(query) 98 | 99 | if format == ExportFormat.PARQUET: 100 | df.to_parquet(file_path, **kwargs) 101 | elif format == ExportFormat.CSV: 102 | df.to_csv(file_path, **kwargs) 103 | elif format == ExportFormat.EXCEL: 104 | df.to_excel(file_path, **kwargs) 105 | elif format == ExportFormat.FEATHER: 106 | df.to_feather(file_path, **kwargs) 107 | elif format == ExportFormat.JSON: 108 | df.to_json(file_path, **kwargs) 109 | else: 110 | raise ValueError(f"Unsupported export format: {format}") 111 | 112 | return file_path 113 | 114 | async def get_dimensions(self) -> Dict[str, Any]: 115 | """Get available dimensions""" 116 | return await self.client.get('/api/analytics/dimensions') 117 | 118 | async def get_dimension_items(self, dimension: str) -> Dict[str, Any]: 119 | """Get items for a specific dimension""" 120 | return await self.client.get(f'/api/analytics/dimensions/{dimension}') 121 | 122 | async def validate_query(self, query: AnalyticsQuery) -> Dict[str, Any]: 123 | """Validate query (dry run)""" 124 | params = query.to_params() 125 | params['dryRun'] = 'true' 126 | return await self.client.get('/api/analytics', params=params) 127 | -------------------------------------------------------------------------------- /docs/dqr.rst: -------------------------------------------------------------------------------- 1 | Data Quality Review (DQR) 2 | ========================= 3 | 4 | pydhis2 includes built-in Data Quality Review metrics based on WHO standards. 5 | 6 | Overview 7 | -------- 8 | 9 | The DQR module helps you assess data quality across three dimensions: 10 | 11 | * **Completeness**: Are all expected data values present? 12 | * **Consistency**: Are the data values reasonable and consistent? 13 | * **Timeliness**: Are the data submitted on time? 14 | 15 | Basic Usage 16 | ----------- 17 | 18 | .. code-block:: python 19 | 20 | from pydhis2 import get_client, DHIS2Config 21 | from pydhis2.core.types import AnalyticsQuery 22 | from pydhis2.dqr.metrics import CompletenessMetrics, ConsistencyMetrics, TimelinessMetrics 23 | 24 | AsyncDHIS2Client, _ = get_client() 25 | config = DHIS2Config() 26 | 27 | async with AsyncDHIS2Client(config) as client: 28 | # Fetch analytics data 29 | query = AnalyticsQuery(dx=["indicator_id"], ou="org_unit_id", pe="2023") 30 | df = await client.analytics.to_pandas(query) 31 | 32 | # Run DQR analysis 33 | completeness = CompletenessMetrics() 34 | consistency = ConsistencyMetrics() 35 | timeliness = TimelinessMetrics() 36 | 37 | completeness_results = completeness.calculate(df) 38 | consistency_results = consistency.calculate(df) 39 | timeliness_results = timeliness.calculate(df) 40 | 41 | for result in completeness_results + consistency_results + timeliness_results: 42 | print(f"{result.metric_name}: {result.value:.2%} ({result.status})") 43 | 44 | Completeness Metrics 45 | -------------------- 46 | 47 | .. code-block:: python 48 | 49 | from pydhis2.dqr.metrics import CompletenessMetrics 50 | 51 | completeness = CompletenessMetrics() 52 | results = completeness.calculate(df) 53 | 54 | for result in results: 55 | print(f"{result.metric_name}: {result.value:.2%}") 56 | print(f"Status: {result.status}") 57 | print(f"Message: {result.message}") 58 | print(f"Details: {result.details}") 59 | 60 | Consistency Metrics 61 | ------------------- 62 | 63 | .. code-block:: python 64 | 65 | from pydhis2.dqr.metrics import ConsistencyMetrics 66 | 67 | consistency = ConsistencyMetrics() 68 | results = consistency.calculate(df) 69 | 70 | for result in results: 71 | print(f"{result.metric_name}: {result.value:.2%}") 72 | if result.metric_name == "outlier_detection": 73 | print(f"Outliers detected: {result.details.get('outlier_count')}") 74 | 75 | Timeliness Metrics 76 | ------------------ 77 | 78 | .. code-block:: python 79 | 80 | from pydhis2.dqr.metrics import TimelinessMetrics 81 | 82 | timeliness = TimelinessMetrics() 83 | results = timeliness.calculate(df) 84 | 85 | for result in results: 86 | print(f"{result.metric_name}: {result.value:.2%}") 87 | print(f"Timely records: {result.details.get('timely_records')}/{result.details.get('total_records')}") 88 | 89 | Generating Reports 90 | ------------------ 91 | 92 | Collect All Results 93 | ~~~~~~~~~~~~~~~~~~~ 94 | 95 | .. code-block:: python 96 | 97 | import json 98 | from pydhis2.dqr.metrics import CompletenessMetrics, ConsistencyMetrics, TimelinessMetrics 99 | 100 | # Calculate all metrics 101 | completeness = CompletenessMetrics() 102 | consistency = ConsistencyMetrics() 103 | timeliness = TimelinessMetrics() 104 | 105 | all_results = ( 106 | completeness.calculate(df) + 107 | consistency.calculate(df) + 108 | timeliness.calculate(df) 109 | ) 110 | 111 | # Convert to summary dict 112 | summary = { 113 | "metrics": [ 114 | { 115 | "name": r.metric_name, 116 | "value": r.value, 117 | "status": r.status, 118 | "message": r.message, 119 | "details": r.details 120 | } 121 | for r in all_results 122 | ] 123 | } 124 | 125 | # Save to JSON 126 | with open("dqr_summary.json", "w") as f: 127 | json.dump(summary, f, indent=2) 128 | 129 | Configuration 130 | ------------- 131 | 132 | Customize DQR thresholds in ``configs/dqr.yml``: 133 | 134 | .. code-block:: yaml 135 | 136 | completeness: 137 | thresholds: 138 | pass: 0.90 139 | warn: 0.70 140 | 141 | consistency: 142 | thresholds: 143 | outlier: 3.0 144 | variance: 0.5 145 | 146 | timeliness: 147 | thresholds: 148 | pass: 0.80 149 | max_delay_days: 30 150 | 151 | -------------------------------------------------------------------------------- /pydhis2/testing/demo_test.py: -------------------------------------------------------------------------------- 1 | """Demo test showing how to use pydhis2 testing utilities""" 2 | 3 | import asyncio 4 | import logging 5 | 6 | from pydhis2.core.client import AsyncDHIS2Client 7 | from pydhis2.core.types import DHIS2Config 8 | from pydhis2.testing import ( 9 | BenchmarkRunner, 10 | MockDHIS2Server, 11 | NetworkSimulator, 12 | TestDataGenerator, 13 | ) 14 | 15 | logging.basicConfig(level=logging.INFO) 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | async def demo_mock_server(): 20 | """Demonstrate mock server usage""" 21 | print("\n=== Mock Server Demo ===") 22 | 23 | # Create test data 24 | generator = TestDataGenerator() 25 | org_units = generator.generate_org_units(5) 26 | data_elements = generator.generate_data_elements(3) 27 | periods = generator.generate_periods(months=6) 28 | 29 | # Start mock server 30 | mock_server = MockDHIS2Server(port=8081) 31 | 32 | # Configure responses 33 | analytics_response = generator.generate_analytics_response( 34 | data_elements, org_units, periods 35 | ) 36 | mock_server.configure_analytics_response( 37 | analytics_response["headers"], 38 | analytics_response["rows"] 39 | ) 40 | 41 | async with mock_server as base_url: 42 | # Create client pointing to mock server 43 | config = DHIS2Config( 44 | base_url=base_url, 45 | auth=("test_user", "test_pass"), 46 | rps=10.0 47 | ) 48 | 49 | async with AsyncDHIS2Client(config) as client: 50 | # Test basic connectivity 51 | me_data = await client.get("/api/me") 52 | print(f"✅ Connected as: {me_data.get('name')}") 53 | 54 | # Test Analytics 55 | analytics_data = await client.get("/api/analytics", params={ 56 | "dimension": ["dx:test", "pe:2023Q1", "ou:test"] 57 | }) 58 | print(f"✅ Analytics: {len(analytics_data.get('rows', []))} rows") 59 | 60 | # Check request log 61 | requests = mock_server.get_request_log() 62 | print(f"📊 Server received {len(requests)} requests") 63 | 64 | 65 | async def demo_network_simulation(): 66 | """Demonstrate network condition simulation""" 67 | print("\n=== Network Simulation Demo ===") 68 | 69 | # Test different network conditions 70 | conditions = [ 71 | NetworkSimulator.NORMAL, 72 | NetworkSimulator.SLOW_3G, 73 | NetworkSimulator.WEAK_NETWORK 74 | ] 75 | 76 | for condition in conditions: 77 | print(f"\n🌐 Testing {condition.name} network...") 78 | print(f" Latency: {condition.latency_ms}ms") 79 | print(f" Packet loss: {condition.packet_loss_rate:.1%}") 80 | 81 | # Simulate some network operations 82 | simulator = NetworkSimulator(condition) 83 | 84 | start_time = asyncio.get_event_loop().time() 85 | for _ in range(3): 86 | await simulator.simulate_latency() 87 | if simulator.should_drop_packet(): 88 | print(" 📉 Packet dropped!") 89 | 90 | elapsed = asyncio.get_event_loop().time() - start_time 91 | print(f" ⏱️ Total time: {elapsed:.3f}s") 92 | 93 | 94 | async def demo_benchmark_runner(): 95 | """Demonstrate benchmark runner usage""" 96 | print("\n=== Benchmark Runner Demo ===") 97 | 98 | runner = BenchmarkRunner("pydhis2_demo") 99 | 100 | async def sample_operation(): 101 | """Sample async operation to benchmark""" 102 | await asyncio.sleep(random.uniform(0.01, 0.05)) # Simulate work 103 | if random.random() < 0.1: # 10% failure rate 104 | raise Exception("Simulated error") 105 | 106 | # Run repeated test 107 | await runner.run_repeated_test( 108 | sample_operation, 109 | "sample_async_operation", 110 | repetitions=20 111 | ) 112 | 113 | # Run concurrent test 114 | await runner.run_concurrent_test( 115 | sample_operation, 116 | "concurrent_sample_operation", 117 | concurrency=5, 118 | total_requests=50 119 | ) 120 | 121 | # Print results 122 | runner.print_summary() 123 | 124 | 125 | async def main(): 126 | """Run all demos""" 127 | print("🚀 pydhis2 Testing Utilities Demo") 128 | print("=" * 50) 129 | 130 | try: 131 | await demo_mock_server() 132 | await demo_network_simulation() 133 | await demo_benchmark_runner() 134 | 135 | print("\n🎉 All demos completed successfully!") 136 | 137 | except Exception as e: 138 | print(f"❌ Demo failed: {e}") 139 | import traceback 140 | traceback.print_exc() 141 | 142 | 143 | if __name__ == "__main__": 144 | import random 145 | asyncio.run(main()) 146 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. 8 | 9 | ## Our Standards 10 | 11 | Examples of behavior that contributes to a positive environment for our community include: 12 | 13 | * Demonstrating empathy and kindness toward other people 14 | * Being respectful of differing opinions, viewpoints, and experiences 15 | * Giving and gracefully accepting constructive feedback 16 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 17 | * Focusing on what is best not just for us as individuals, but for the overall community 18 | 19 | Examples of unacceptable behavior include: 20 | 21 | * The use of sexualized language or imagery, and sexual attention or advances of any kind 22 | * Trolling, insulting or derogatory comments, and personal or political attacks 23 | * Public or private harassment 24 | * Publishing others' private information, such as a physical or email address, without their explicit permission 25 | * Other conduct which could reasonably be considered inappropriate in a professional setting 26 | 27 | ## Enforcement Responsibilities 28 | 29 | Community leaders are responsible for clarifying and enforcing our standards and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. 32 | 33 | ## Scope 34 | 35 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 36 | 37 | ## Enforcement 38 | 39 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at the project's [GitHub Issues](https://github.com/HzaCode/pyDHIS2/issues) or [Discussions](https://github.com/HzaCode/pyDHIS2/discussions). All complaints will be reviewed and investigated promptly and fairly. 40 | 41 | All community leaders are obligated to respect the privacy and security of the reporter of any incident. 42 | 43 | ## Enforcement Guidelines 44 | 45 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 46 | 47 | ### 1. Correction 48 | 49 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. 50 | 51 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 52 | 53 | ### 2. Warning 54 | 55 | **Community Impact**: A violation through a single incident or series of actions. 56 | 57 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interaction in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 58 | 59 | ### 3. Temporary Ban 60 | 61 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. 62 | 63 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 64 | 65 | ### 4. Permanent Ban 66 | 67 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. 68 | 69 | **Consequence**: A permanent ban from any sort of public interaction within the community. 70 | 71 | ## Attribution 72 | 73 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 74 | 75 | [homepage]: https://www.contributor-covenant.org 76 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 77 | -------------------------------------------------------------------------------- /pydhis2/observe/logging.py: -------------------------------------------------------------------------------- 1 | """Structured logging configuration""" 2 | 3 | import json 4 | import logging 5 | import sys 6 | from datetime import datetime 7 | from typing import Optional 8 | 9 | 10 | class StructuredFormatter(logging.Formatter): 11 | """Structured log formatter""" 12 | 13 | def format(self, record: logging.LogRecord) -> str: 14 | """Format log record as JSON""" 15 | log_data = { 16 | 'timestamp': datetime.utcnow().isoformat() + 'Z', 17 | 'level': record.levelname, 18 | 'logger': record.name, 19 | 'message': record.getMessage(), 20 | 'module': record.module, 21 | 'function': record.funcName, 22 | 'line': record.lineno, 23 | } 24 | 25 | # Add exception information 26 | if record.exc_info: 27 | log_data['exception'] = self.formatException(record.exc_info) 28 | 29 | # Add extra fields 30 | if hasattr(record, 'extra_fields'): 31 | log_data.update(record.extra_fields) 32 | 33 | return json.dumps(log_data, ensure_ascii=False) 34 | 35 | 36 | class SensitiveDataFilter(logging.Filter): 37 | """Sensitive data filter""" 38 | 39 | SENSITIVE_PATTERNS = [ 40 | 'password', 'token', 'key', 'secret', 'auth', 'credential' 41 | ] 42 | 43 | def filter(self, record: logging.LogRecord) -> bool: 44 | """Filter sensitive data""" 45 | message = record.getMessage().lower() 46 | 47 | # Check if contains sensitive keywords 48 | for pattern in self.SENSITIVE_PATTERNS: 49 | if pattern in message: 50 | # Replace sensitive information 51 | record.msg = record.msg.replace( 52 | str(record.args) if record.args else '', 53 | '[REDACTED]' 54 | ) 55 | break 56 | 57 | return True 58 | 59 | 60 | def setup_logging( 61 | level: str = "INFO", 62 | structured: bool = True, 63 | filter_sensitive: bool = True, 64 | log_file: Optional[str] = None 65 | ) -> None: 66 | """Setup logging configuration""" 67 | 68 | # Set log level 69 | log_level = getattr(logging, level.upper(), logging.INFO) 70 | 71 | # Create root logger 72 | root_logger = logging.getLogger() 73 | root_logger.setLevel(log_level) 74 | 75 | # Clear existing handlers 76 | root_logger.handlers.clear() 77 | 78 | # Console handler 79 | console_handler = logging.StreamHandler(sys.stdout) 80 | console_handler.setLevel(log_level) 81 | 82 | if structured: 83 | console_handler.setFormatter(StructuredFormatter()) 84 | else: 85 | console_handler.setFormatter( 86 | logging.Formatter( 87 | '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 88 | ) 89 | ) 90 | 91 | if filter_sensitive: 92 | console_handler.addFilter(SensitiveDataFilter()) 93 | 94 | root_logger.addHandler(console_handler) 95 | 96 | # File handler (if specified) 97 | if log_file: 98 | file_handler = logging.FileHandler(log_file, encoding='utf-8') 99 | file_handler.setLevel(log_level) 100 | file_handler.setFormatter(StructuredFormatter()) 101 | 102 | if filter_sensitive: 103 | file_handler.addFilter(SensitiveDataFilter()) 104 | 105 | root_logger.addHandler(file_handler) 106 | 107 | # Third-party library log levels 108 | logging.getLogger('aiohttp').setLevel(logging.WARNING) 109 | logging.getLogger('urllib3').setLevel(logging.WARNING) 110 | 111 | 112 | def get_logger(name: str, **extra_fields) -> logging.Logger: 113 | """Get logger with extra fields""" 114 | logger = logging.getLogger(name) 115 | 116 | # Create adapter to add extra fields 117 | class ExtraFieldsAdapter(logging.LoggerAdapter): 118 | def process(self, msg, kwargs): 119 | # Merge extra fields 120 | if 'extra' not in kwargs: 121 | kwargs['extra'] = {} 122 | kwargs['extra']['extra_fields'] = {**extra_fields, **kwargs['extra'].get('extra_fields', {})} 123 | return msg, kwargs 124 | 125 | return ExtraFieldsAdapter(logger, extra_fields) 126 | 127 | 128 | # Convenience functions 129 | def log_request(logger: logging.Logger, method: str, url: str, status: Optional[int] = None, **kwargs): 130 | """Log HTTP request""" 131 | extra_fields = { 132 | 'http_method': method, 133 | 'http_url': url, 134 | 'event_type': 'http_request' 135 | } 136 | 137 | if status: 138 | extra_fields['http_status'] = status 139 | 140 | extra_fields.update(kwargs) 141 | 142 | logger.info( 143 | f"{method} {url}" + (f" -> {status}" if status else ""), 144 | extra={'extra_fields': extra_fields} 145 | ) 146 | 147 | 148 | def log_retry(logger: logging.Logger, attempt: int, max_attempts: int, delay: float, **kwargs): 149 | """Log retry attempt""" 150 | extra_fields = { 151 | 'retry_attempt': attempt, 152 | 'retry_max_attempts': max_attempts, 153 | 'retry_delay': delay, 154 | 'event_type': 'retry' 155 | } 156 | 157 | extra_fields.update(kwargs) 158 | 159 | logger.warning( 160 | f"Retry attempt {attempt}/{max_attempts}, waiting {delay}s", 161 | extra={'extra_fields': extra_fields} 162 | ) 163 | 164 | 165 | def log_rate_limit(logger: logging.Logger, current_rate: float, limit: float, wait_time: float, **kwargs): 166 | """Log rate limiting""" 167 | extra_fields = { 168 | 'rate_current': current_rate, 169 | 'rate_limit': limit, 170 | 'rate_wait_time': wait_time, 171 | 'event_type': 'rate_limit' 172 | } 173 | 174 | extra_fields.update(kwargs) 175 | 176 | logger.info( 177 | f"Rate limited: {current_rate:.2f}/{limit:.2f} rps, waiting {wait_time:.2f}s", 178 | extra={'extra_fields': extra_fields} 179 | ) 180 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | pydhis2 Documentation 2 | ===================== 3 | 4 | .. image:: https://img.shields.io/pypi/v/pydhis2?style=flat&color=blue 5 | :target: https://pypi.org/project/pydhis2 6 | :alt: PyPI version 7 | 8 | .. image:: https://img.shields.io/pypi/pyversions/pydhis2?style=flat&color=blue 9 | :target: https://pypi.org/project/pydhis2/ 10 | :alt: Python versions 11 | 12 | .. image:: https://img.shields.io/pepy/dt/pydhis2?style=flat&color=blue 13 | :target: https://pepy.tech/project/pydhis2 14 | :alt: Downloads 15 | 16 | .. image:: https://img.shields.io/badge/tests-passing-brightgreen?style=flat 17 | :target: https://github.com/HzaCode/pyDHIS2/actions/workflows/ci.yml 18 | :alt: Tests 19 | 20 | .. image:: https://img.shields.io/badge/license-Apache%202.0-green?style=flat 21 | :target: https://opensource.org/licenses/Apache-2.0 22 | :alt: License 23 | 24 | **pydhis2** is a next-generation Python library for interacting with `DHIS2 `_, 25 | the world's largest health information management system. It provides a clean, modern, and efficient API 26 | for data extraction, analysis, and management, with a strong emphasis on creating reproducible workflows—a 27 | critical need in scientific research and public health analysis, especially in Low and Middle-Income Country 28 | (LMIC) contexts. 29 | 30 | Features 31 | -------- 32 | 33 | 🚀 **Modern & Asynchronous** 34 | Built with ``asyncio`` for high-performance, non-blocking I/O, making it ideal for large-scale data operations. 35 | A synchronous client is also provided for simplicity in smaller scripts. 36 | 37 | 📊 **Reproducible by Design** 38 | From project templates to a powerful CLI, pydhis2 is built to support standardized, shareable, and verifiable 39 | data analysis pipelines. 40 | 41 | 🐼 **Seamless DataFrame Integration** 42 | Natively convert DHIS2 analytics data into Pandas DataFrames with a single method call (``.to_pandas()``), 43 | connecting you instantly to the PyData ecosystem. 44 | 45 | 🔧 **Powerful Command Line Interface** 46 | Automate common tasks like data pulling and configuration directly from your terminal. 47 | 48 | Quick Start 49 | ----------- 50 | 51 | Installation 52 | ~~~~~~~~~~~~ 53 | 54 | Install pydhis2 directly from PyPI: 55 | 56 | .. code-block:: bash 57 | 58 | pip install pydhis2 59 | 60 | Verify Installation 61 | ~~~~~~~~~~~~~~~~~~~ 62 | 63 | Use the built-in CLI to run a quick demo: 64 | 65 | .. code-block:: bash 66 | 67 | # Check the installed version 68 | pydhis2 version 69 | 70 | # Run the quick demo 71 | pydhis2 demo quick 72 | 73 | Basic Usage Example 74 | ~~~~~~~~~~~~~~~~~~~ 75 | 76 | .. code-block:: python 77 | 78 | import asyncio 79 | import sys 80 | from pydhis2 import get_client, DHIS2Config 81 | from pydhis2.core.types import AnalyticsQuery 82 | 83 | # pydhis2 provides both an async and a sync client 84 | AsyncDHIS2Client, _ = get_client() 85 | 86 | async def main(): 87 | # 1. Configure the connection to a DHIS2 server 88 | config = DHIS2Config( 89 | base_url="https://demos.dhis2.org/dq", 90 | auth=("demo", "District1#") 91 | ) 92 | 93 | async with AsyncDHIS2Client(config) as client: 94 | # 2. Define the query parameters 95 | query = AnalyticsQuery( 96 | dx=["b6mCG9sphIT"], # Data element 97 | ou="qzGX4XdWufs", # Org unit 98 | pe="2023" # Period 99 | ) 100 | 101 | # 3. Fetch data and convert directly to DataFrame 102 | df = await client.analytics.to_pandas(query) 103 | 104 | # 4. Analyze and display 105 | print("✅ Data fetched successfully!") 106 | print(f"Retrieved {len(df)} records.") 107 | print(df.head()) 108 | 109 | if __name__ == "__main__": 110 | if sys.platform == 'win32': 111 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 112 | asyncio.run(main()) 113 | 114 | Table of Contents 115 | ----------------- 116 | 117 | .. toctree:: 118 | :maxdepth: 2 119 | :caption: User Guide 120 | 121 | installation 122 | quickstart 123 | configuration 124 | analytics 125 | datavaluesets 126 | tracker 127 | metadata 128 | dqr 129 | cli 130 | 131 | .. toctree:: 132 | :maxdepth: 2 133 | :caption: API Reference 134 | 135 | api/client 136 | api/endpoints 137 | api/types 138 | api/io 139 | 140 | .. toctree:: 141 | :maxdepth: 1 142 | :caption: Developer Guide 143 | 144 | contributing 145 | changelog 146 | 147 | Supported Endpoints 148 | ------------------- 149 | 150 | +-------------------+------+-------+-----------+------------+-----------+ 151 | | Endpoint | Read | Write | DataFrame | Pagination | Streaming | 152 | +===================+======+=======+===========+============+===========+ 153 | | **Analytics** | ✅ | \- | ✅ | ✅ | ✅ | 154 | +-------------------+------+-------+-----------+------------+-----------+ 155 | | **DataValueSets** | ✅ | ✅ | ✅ | ✅ | ✅ | 156 | +-------------------+------+-------+-----------+------------+-----------+ 157 | | **Tracker Events**| ✅ | ✅ | ✅ | ✅ | ✅ | 158 | +-------------------+------+-------+-----------+------------+-----------+ 159 | | **Metadata** | ✅ | ✅ | ✅ | \- | \- | 160 | +-------------------+------+-------+-----------+------------+-----------+ 161 | 162 | Compatibility 163 | ------------- 164 | 165 | * **Python**: ≥ 3.9 166 | * **DHIS2**: ≥ 2.36 167 | * **Platforms**: Windows, Linux, macOS 168 | 169 | Community & Support 170 | ------------------- 171 | 172 | * 📖 `Documentation `_ 173 | * 🐛 `GitHub Issues `_ 174 | * 💬 `GitHub Discussions `_ 175 | * 📝 `Changelog `_ 176 | 177 | License 178 | ------- 179 | 180 | This project is licensed under the **Apache License 2.0**. See the `LICENSE `_ file for details. 181 | 182 | Indices and tables 183 | ================== 184 | 185 | * :ref:`genindex` 186 | * :ref:`modindex` 187 | * :ref:`search` 188 | 189 | -------------------------------------------------------------------------------- /tests/unit/test_cli.py: -------------------------------------------------------------------------------- 1 | """Tests for CLI module""" 2 | 3 | from typer.testing import CliRunner 4 | from pydhis2.cli.main import app 5 | from unittest.mock import patch 6 | 7 | runner = CliRunner() 8 | 9 | 10 | class TestVersionCommand: 11 | """Test version command""" 12 | 13 | def test_version_command(self): 14 | """Test version command output""" 15 | result = runner.invoke(app, ["version"]) 16 | assert result.exit_code == 0 17 | assert "pydhis2 version" in result.stdout 18 | 19 | 20 | class TestConfigCommand: 21 | """Test config command""" 22 | 23 | def test_config_with_all_params(self): 24 | """Test config command with all parameters""" 25 | result = runner.invoke( 26 | app, 27 | ["config", "--url", "https://test.dhis2.org", "--username", "admin", "--password", "district"], 28 | ) 29 | assert result.exit_code == 0 30 | assert "Configured connection" in result.stdout 31 | 32 | def test_config_with_env_vars(self): 33 | """Test config command using environment variables""" 34 | with patch.dict('os.environ', { 35 | 'DHIS2_USERNAME': 'test_user', 36 | 'DHIS2_PASSWORD': 'test_pass' 37 | }): 38 | result = runner.invoke( 39 | app, 40 | ["config", "--url", "https://test.dhis2.org"], 41 | ) 42 | assert result.exit_code == 0 43 | 44 | 45 | class TestAnalyticsCommands: 46 | """Test analytics commands""" 47 | 48 | def test_analytics_pull_command(self): 49 | """Test analytics pull command""" 50 | result = runner.invoke( 51 | app, 52 | [ 53 | "analytics", "pull", 54 | "--url", "https://test.dhis2.org", 55 | "--dx", "test_dx", 56 | "--ou", "test_ou", 57 | "--pe", "2023", 58 | "--out", "test.parquet" 59 | ], 60 | ) 61 | assert result.exit_code == 0 62 | assert "Would pull data" in result.stdout 63 | assert "test_dx" in result.stdout 64 | 65 | def test_analytics_pull_with_format(self): 66 | """Test analytics pull with custom format""" 67 | result = runner.invoke( 68 | app, 69 | [ 70 | "analytics", "pull", 71 | "--url", "https://test.dhis2.org", 72 | "--dx", "dx1", 73 | "--ou", "ou1", 74 | "--pe", "2023", 75 | "--format", "csv" 76 | ], 77 | ) 78 | assert result.exit_code == 0 79 | 80 | 81 | class TestDataValueSetsCommands: 82 | """Test datavaluesets commands""" 83 | 84 | def test_datavaluesets_pull_command(self): 85 | """Test datavaluesets pull command""" 86 | result = runner.invoke( 87 | app, 88 | [ 89 | "datavaluesets", "pull", 90 | "--url", "https://test.dhis2.org", 91 | "--data-set", "ds1", 92 | "--org-unit", "ou1", 93 | "--period", "202301" 94 | ], 95 | ) 96 | assert result.exit_code == 0 97 | assert "Would pull data" in result.stdout 98 | 99 | def test_datavaluesets_push_command(self): 100 | """Test datavaluesets push command""" 101 | result = runner.invoke( 102 | app, 103 | [ 104 | "datavaluesets", "push", 105 | "--url", "https://test.dhis2.org", 106 | "--input", "test.parquet" 107 | ], 108 | ) 109 | assert result.exit_code == 0 110 | assert "Implementation in progress" in result.stdout 111 | 112 | 113 | class TestTrackerCommands: 114 | """Test tracker commands""" 115 | 116 | def test_tracker_events_command(self): 117 | """Test tracker events command""" 118 | result = runner.invoke( 119 | app, 120 | [ 121 | "tracker", "events", 122 | "--url", "https://test.dhis2.org", 123 | "--program", "prog1", 124 | "--out", "events.parquet" 125 | ], 126 | ) 127 | assert result.exit_code == 0 128 | 129 | 130 | class TestDQRCommands: 131 | """Test DQR commands""" 132 | 133 | def test_dqr_analyze_command(self): 134 | """Test DQR analyze command""" 135 | result = runner.invoke( 136 | app, 137 | [ 138 | "dqr", "analyze", 139 | "--input", "test.parquet", 140 | "--html", "report.html" 141 | ], 142 | ) 143 | assert result.exit_code == 0 144 | assert "Implementation in progress" in result.stdout 145 | 146 | 147 | class TestDemoCommand: 148 | """Test demo command""" 149 | 150 | def test_demo_quick_command(self): 151 | """Test demo quick command""" 152 | result = runner.invoke(app, ["demo", "quick"]) 153 | # Command should execute without error or show message 154 | # Exit code 2 means missing required arguments, which is expected 155 | assert result.exit_code in [0, 2] or "demo" in result.stdout.lower() 156 | 157 | 158 | class TestPipelineCommands: 159 | """Test pipeline commands""" 160 | 161 | def test_pipeline_run_command(self): 162 | """Test pipeline run command""" 163 | result = runner.invoke( 164 | app, 165 | [ 166 | "pipeline", "run", 167 | "--config", "test.yml" 168 | ], 169 | ) 170 | # Exit code 2 means command not found or missing required arguments 171 | assert result.exit_code in [0, 2] 172 | 173 | 174 | class TestMetadataCommands: 175 | """Test metadata commands""" 176 | 177 | def test_metadata_export_command(self): 178 | """Test metadata export command""" 179 | result = runner.invoke( 180 | app, 181 | [ 182 | "metadata", "export", 183 | "--url", "https://test.dhis2.org", 184 | "--type", "dataElements", 185 | "--out", "metadata.json" 186 | ], 187 | ) 188 | # Exit code 2 means missing required arguments or command not found 189 | assert result.exit_code in [0, 2] 190 | 191 | def test_metadata_import_command(self): 192 | """Test metadata import command""" 193 | result = runner.invoke( 194 | app, 195 | [ 196 | "metadata", "import", 197 | "--url", "https://test.dhis2.org", 198 | "--input", "metadata.json" 199 | ], 200 | ) 201 | # Exit code 2 means missing required arguments or command not found 202 | assert result.exit_code in [0, 2] 203 | 204 | -------------------------------------------------------------------------------- /pydhis2/pipeline/executor.py: -------------------------------------------------------------------------------- 1 | """Pipeline executor""" 2 | 3 | import asyncio 4 | import logging 5 | from datetime import datetime 6 | from pathlib import Path 7 | from typing import Any, Dict, Optional 8 | 9 | from pydhis2.core.client import AsyncDHIS2Client 10 | 11 | from .config import PipelineConfig, PipelineResult, StepConfig 12 | from .steps import StepRegistry 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class PipelineExecutor: 18 | """Pipeline executor""" 19 | 20 | def __init__( 21 | self, 22 | client: AsyncDHIS2Client, 23 | output_dir: Optional[Path] = None 24 | ): 25 | self.client = client 26 | self.output_dir = output_dir or Path("pipeline_output") 27 | self.context: Dict[str, Any] = {} 28 | 29 | async def execute( 30 | self, 31 | config: PipelineConfig, 32 | context: Optional[Dict[str, Any]] = None 33 | ) -> PipelineResult: 34 | """Execute a pipeline""" 35 | logger.info(f"Starting pipeline execution: {config.name}") 36 | 37 | # Validate configuration 38 | validation_errors = config.validate_dependencies() 39 | if validation_errors: 40 | raise ValueError(f"Pipeline configuration validation failed: {validation_errors}") 41 | 42 | # Create execution result 43 | result = PipelineResult( 44 | pipeline_name=config.name, 45 | start_time=datetime.now(), 46 | total_steps=len([step for step in config.steps if step.enabled]) 47 | ) 48 | 49 | # Create output directory 50 | run_timestamp = result.start_time.strftime("%Y%m%d_%H%M%S") 51 | run_output_dir = self.output_dir / f"{config.name}_{run_timestamp}" 52 | run_output_dir.mkdir(parents=True, exist_ok=True) 53 | 54 | # Set up context 55 | execution_context = { 56 | 'output_dir': run_output_dir, 57 | 'pipeline_config': config, 58 | 'start_time': result.start_time, 59 | **(context or {}) 60 | } 61 | 62 | try: 63 | # Get execution order 64 | ordered_steps = config.get_execution_order() 65 | 66 | # Execute steps 67 | for step_config in ordered_steps: 68 | if not step_config.enabled: 69 | result.skipped_steps += 1 70 | continue 71 | 72 | await self._execute_step(step_config, execution_context, result) 73 | 74 | # Mark as completed 75 | result.status = "completed" 76 | result.end_time = datetime.now() 77 | 78 | logger.info(f"Pipeline execution completed: {config.name}") 79 | logger.info(f"Total duration: {result.duration:.1f}s") 80 | logger.info(f"Success rate: {result.success_rate:.1%}") 81 | 82 | except Exception as e: 83 | result.status = "failed" 84 | result.end_time = datetime.now() 85 | result.errors.append(f"Pipeline execution failed: {str(e)}") 86 | logger.error(f"Pipeline execution failed: {e}") 87 | raise 88 | 89 | finally: 90 | # Save the result 91 | await self._save_result(result, run_output_dir) 92 | 93 | return result 94 | 95 | async def _execute_step( 96 | self, 97 | step_config: StepConfig, 98 | context: Dict[str, Any], 99 | result: PipelineResult 100 | ) -> None: 101 | """Execute a single step""" 102 | step_name = step_config.name 103 | logger.info(f"Executing step: {step_name} ({step_config.type})") 104 | 105 | step_start_time = datetime.now() 106 | 107 | try: 108 | # Create step instance 109 | step = StepRegistry.create_step(step_config) 110 | 111 | # Execute step 112 | if step_config.timeout: 113 | step_output = await asyncio.wait_for( 114 | step.execute(self.client, context), 115 | timeout=step_config.timeout 116 | ) 117 | else: 118 | step_output = await step.execute(self.client, context) 119 | 120 | step_end_time = datetime.now() 121 | 122 | # Record success result 123 | result.add_step_result( 124 | step_name=step_name, 125 | status="completed", 126 | start_time=step_start_time, 127 | end_time=step_end_time, 128 | output_data=step_output 129 | ) 130 | 131 | # Update context (step output is available to subsequent steps) 132 | context[f"step_{step_name}_output"] = step_output 133 | 134 | duration = (step_end_time - step_start_time).total_seconds() 135 | logger.info(f"Step {step_name} completed, duration: {duration:.1f}s") 136 | 137 | except asyncio.TimeoutError: 138 | step_end_time = datetime.now() 139 | error_msg = f"Step timed out (>{step_config.timeout}s)" 140 | 141 | result.add_step_result( 142 | step_name=step_name, 143 | status="failed", 144 | start_time=step_start_time, 145 | end_time=step_end_time, 146 | error=error_msg 147 | ) 148 | 149 | logger.error(f"Step {step_name} timed out") 150 | 151 | # If retry is configured, retry logic can be implemented here 152 | if step_config.retry_count > 0: 153 | logger.info(f"Step {step_name} will be retried...") 154 | # TODO: Implement retry logic 155 | else: 156 | raise 157 | 158 | except Exception as e: 159 | step_end_time = datetime.now() 160 | error_msg = str(e) 161 | 162 | result.add_step_result( 163 | step_name=step_name, 164 | status="failed", 165 | start_time=step_start_time, 166 | end_time=step_end_time, 167 | error=error_msg 168 | ) 169 | 170 | logger.error(f"Step {step_name} failed: {e}") 171 | raise 172 | 173 | async def _save_result( 174 | self, 175 | result: PipelineResult, 176 | output_dir: Path 177 | ) -> None: 178 | """Save the execution result""" 179 | import json 180 | 181 | result_file = output_dir / "pipeline_result.json" 182 | 183 | try: 184 | with open(result_file, 'w', encoding='utf-8') as f: 185 | json.dump(result.to_dict(), f, indent=2, ensure_ascii=False) 186 | 187 | logger.info(f"Pipeline result saved to: {result_file}") 188 | 189 | except Exception as e: 190 | logger.warning(f"Failed to save pipeline result: {e}") 191 | 192 | def set_context(self, key: str, value: Any) -> None: 193 | """Set execution context""" 194 | self.context[key] = value 195 | 196 | def get_context(self, key: str, default: Any = None) -> Any: 197 | """Get execution context""" 198 | return self.context.get(key, default) 199 | -------------------------------------------------------------------------------- /pydhis2/templates/{{cookiecutter.project_slug}}/scripts/run_pipeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | {{ cookiecutter.project_name }} - Pipeline Runner Script 4 | 5 | Usage examples: 6 | python scripts/run_pipeline.py 7 | python scripts/run_pipeline.py --config configs/custom.yml 8 | """ 9 | 10 | import argparse 11 | import asyncio 12 | import logging 13 | import os 14 | import sys 15 | from pathlib import Path 16 | 17 | import pandas as pd 18 | import yaml 19 | from dotenv import load_dotenv 20 | 21 | # Add project root to Python path 22 | project_root = Path(__file__).parent.parent 23 | sys.path.insert(0, str(project_root)) 24 | 25 | # Import pydhis2 26 | try: 27 | from pydhis2.core.client import AsyncDHIS2Client 28 | from pydhis2.core.types import AnalyticsQuery, DHIS2Config 29 | from pydhis2.dqr.metrics import ( 30 | CompletenessMetrics, 31 | ConsistencyMetrics, 32 | TimelinessMetrics, 33 | ) 34 | except ImportError as e: 35 | print(f"Error: Failed to import pydhis2 module: {e}") 36 | print("Please ensure pydhis2 is installed: pip install pydhis2") 37 | sys.exit(1) 38 | 39 | # Configure logging 40 | logging.basicConfig( 41 | level=logging.INFO, 42 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 43 | ) 44 | logger = logging.getLogger(__name__) 45 | 46 | 47 | def load_config(config_path: str = "configs/dhis2.yml") -> dict: 48 | """Load configuration file""" 49 | config_file = project_root / config_path 50 | 51 | if not config_file.exists(): 52 | logger.warning(f"Configuration file not found: {config_file}") 53 | return {} 54 | 55 | with open(config_file, encoding='utf-8') as f: 56 | return yaml.safe_load(f) 57 | 58 | 59 | async def fetch_analytics_data(client: AsyncDHIS2Client, config: dict) -> pd.DataFrame: 60 | """Fetch Analytics data""" 61 | logger.info("Fetching Analytics data...") 62 | 63 | # Get query parameters from config or environment variables 64 | dx = config.get('dx', os.getenv('DHIS2_DX', 'your_indicator_id')) 65 | ou = config.get('ou', os.getenv('DHIS2_OU', 'your_org_unit_id')) 66 | pe = config.get('pe', os.getenv('DHIS2_PE', '2023Q1:2023Q4')) 67 | 68 | query = AnalyticsQuery(dx=dx, ou=ou, pe=pe) 69 | 70 | try: 71 | df = await client.analytics.to_pandas(query) 72 | logger.info(f"Successfully fetched {len(df)} records") 73 | return df 74 | except Exception as e: 75 | logger.error(f"Failed to fetch data: {e}") 76 | raise 77 | 78 | 79 | def run_dqr_analysis(df: pd.DataFrame, config: dict) -> dict: 80 | """Run data quality review""" 81 | logger.info("Running data quality review...") 82 | 83 | dqr_config = config.get('dqr', {}) 84 | 85 | # Run various metrics 86 | completeness_metrics = CompletenessMetrics(dqr_config.get('completeness', {})) 87 | completeness_results = completeness_metrics.calculate(df) 88 | 89 | consistency_metrics = ConsistencyMetrics(dqr_config.get('consistency', {})) 90 | consistency_results = consistency_metrics.calculate(df) 91 | 92 | timeliness_metrics = TimelinessMetrics(dqr_config.get('timeliness', {})) 93 | timeliness_results = timeliness_metrics.calculate(df) 94 | 95 | all_results = completeness_results + consistency_results + timeliness_results 96 | 97 | # Calculate overall score 98 | pass_count = sum(1 for r in all_results if r.status == "pass") 99 | total_count = len(all_results) 100 | overall_score = pass_count / total_count if total_count > 0 else 0 101 | 102 | logger.info(f"Data quality review finished: {pass_count}/{total_count} metrics passed ({overall_score:.1%})") 103 | 104 | return { 105 | 'results': all_results, 106 | 'overall_score': overall_score, 107 | 'pass_count': pass_count, 108 | 'total_count': total_count 109 | } 110 | 111 | 112 | def save_results(df: pd.DataFrame, dqr_results: dict, output_dir: Path): 113 | """Save results""" 114 | logger.info(f"Saving results to: {output_dir}") 115 | 116 | # Create output directory 117 | output_dir.mkdir(parents=True, exist_ok=True) 118 | 119 | # Save raw data 120 | data_file = output_dir / "analytics_data.parquet" 121 | df.to_parquet(data_file, index=False) 122 | logger.info(f"Data saved: {data_file}") 123 | 124 | # Save DQR results 125 | dqr_summary = { 126 | 'overall_score': dqr_results['overall_score'], 127 | 'pass_count': dqr_results['pass_count'], 128 | 'total_count': dqr_results['total_count'], 129 | 'metrics': [ 130 | { 131 | 'name': r.metric_name, 132 | 'value': r.value, 133 | 'status': r.status, 134 | 'message': r.message 135 | } 136 | for r in dqr_results['results'] 137 | ] 138 | } 139 | 140 | import json 141 | dqr_file = output_dir / "dqr_summary.json" 142 | with open(dqr_file, 'w', encoding='utf-8') as f: 143 | json.dump(dqr_summary, f, indent=2, ensure_ascii=False) 144 | logger.info(f"DQR results saved: {dqr_file}") 145 | 146 | 147 | async def main(): 148 | """Main function""" 149 | parser = argparse.ArgumentParser(description="Run { cookiecutter.project_name } data analysis pipeline") 150 | parser.add_argument('--config', default='configs/dhis2.yml', help='Path to configuration file') 151 | parser.add_argument('--output', default='data/results', help='Output directory') 152 | args = parser.parse_args() 153 | 154 | # Load environment variables 155 | env_file = project_root / '.env' 156 | if env_file.exists(): 157 | load_dotenv(env_file) 158 | else: 159 | logger.warning("No .env file found, please ensure DHIS2 environment variables are set") 160 | 161 | # Load configuration 162 | config = load_config(args.config) 163 | 164 | # Validate required environment variables 165 | required_vars = ['DHIS2_URL', 'DHIS2_USERNAME', 'DHIS2_PASSWORD'] 166 | missing_vars = [var for var in required_vars if not os.getenv(var)] 167 | 168 | if missing_vars: 169 | logger.error(f"Missing required environment variables: {', '.join(missing_vars)}") 170 | logger.error("Please set these variables or create a .env file") 171 | return 1 172 | 173 | # Create DHIS2 client configuration 174 | client_config = DHIS2Config( 175 | base_url=os.getenv('DHIS2_URL'), 176 | auth=(os.getenv('DHIS2_USERNAME'), os.getenv('DHIS2_PASSWORD')), 177 | rps=config.get('connection', {}).get('rps', 5), 178 | concurrency=config.get('connection', {}).get('concurrency', 3) 179 | ) 180 | 181 | try: 182 | # Execute pipeline 183 | async with AsyncDHIS2Client(client_config) as client: 184 | # 1. Fetch data 185 | df = await fetch_analytics_data(client, config) 186 | 187 | # 2. Data quality review 188 | dqr_results = run_dqr_analysis(df, config) 189 | 190 | # 3. Save results 191 | output_dir = project_root / args.output 192 | save_results(df, dqr_results, output_dir) 193 | 194 | logger.info("✅ Pipeline executed successfully!") 195 | logger.info(f"📊 Data records: {len(df):,}") 196 | logger.info(f"🎯 Quality score: {dqr_results['overall_score']:.1%}") 197 | 198 | return 0 199 | 200 | except Exception as e: 201 | logger.error(f"❌ Pipeline execution failed: {e}") 202 | return 1 203 | 204 | 205 | if __name__ == "__main__": 206 | exit_code = asyncio.run(main()) 207 | sys.exit(exit_code) 208 | -------------------------------------------------------------------------------- /pydhis2/pipeline/config.py: -------------------------------------------------------------------------------- 1 | """Pipeline configuration models""" 2 | 3 | from datetime import datetime 4 | from typing import Any, Dict, List, Optional 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | 9 | class StepConfig(BaseModel): 10 | """Pipeline step configuration""" 11 | 12 | type: str = Field(..., description="Step type") 13 | name: str = Field(..., description="Step name") 14 | depends_on: Optional[List[str]] = Field(None, description="Dependent steps") 15 | enabled: bool = Field(True, description="Whether the step is enabled") 16 | timeout: Optional[int] = Field(None, description="Timeout in seconds") 17 | retry_count: int = Field(0, description="Number of retries") 18 | 19 | # Step-specific parameters 20 | params: Dict[str, Any] = Field(default_factory=dict, description="Step parameters") 21 | 22 | # Input/Output 23 | input: Optional[str] = Field(None, description="Input file or data") 24 | output: Optional[str] = Field(None, description="Output file") 25 | 26 | class Config: 27 | extra = "allow" # Allow extra fields 28 | 29 | 30 | class PipelineConfig(BaseModel): 31 | """Pipeline configuration""" 32 | 33 | # Basic information 34 | name: str = Field(..., description="Pipeline name") 35 | description: Optional[str] = Field(None, description="Pipeline description") 36 | version: str = Field("1.0.0", description="Version number") 37 | 38 | # Global configuration 39 | rps: float = Field(8.0, description="Requests per second") 40 | concurrency: int = Field(8, description="Number of concurrent connections") 41 | timeout: int = Field(300, description="Default timeout in seconds") 42 | 43 | # Step configuration 44 | steps: List[StepConfig] = Field(..., description="Pipeline steps") 45 | 46 | # Metadata 47 | metadata: Optional[Dict[str, Any]] = Field(None, description="Pipeline metadata") 48 | 49 | def validate_dependencies(self) -> List[str]: 50 | """Validate step dependencies""" 51 | errors = [] 52 | step_names = {step.name for step in self.steps} 53 | 54 | for step in self.steps: 55 | if step.depends_on: 56 | for dep in step.depends_on: 57 | if dep not in step_names: 58 | errors.append(f"Step '{step.name}' depends on a non-existent step '{dep}'") 59 | 60 | return errors 61 | 62 | def get_execution_order(self) -> List[StepConfig]: 63 | """Get the execution order of steps (topological sort)""" 64 | # Simplified topological sort implementation 65 | executed = set() 66 | ordered_steps = [] 67 | remaining_steps = [step for step in self.steps if step.enabled] 68 | 69 | while remaining_steps: 70 | # Find steps with no unmet dependencies 71 | ready_steps = [] 72 | for step in remaining_steps: 73 | if not step.depends_on or all(dep in executed for dep in step.depends_on): 74 | ready_steps.append(step) 75 | 76 | if not ready_steps: 77 | # Circular dependency or unmet dependency 78 | remaining_names = [step.name for step in remaining_steps] 79 | raise ValueError(f"Detected circular dependency or unmet dependency: {remaining_names}") 80 | 81 | # Add ready steps 82 | for step in ready_steps: 83 | ordered_steps.append(step) 84 | executed.add(step.name) 85 | remaining_steps.remove(step) 86 | 87 | return ordered_steps 88 | 89 | @classmethod 90 | def from_yaml(cls, yaml_content: str) -> 'PipelineConfig': 91 | """Create configuration from YAML""" 92 | import yaml 93 | data = yaml.safe_load(yaml_content) 94 | return cls(**data) 95 | 96 | @classmethod 97 | def from_file(cls, file_path: str) -> 'PipelineConfig': 98 | """Create configuration from a file""" 99 | import yaml 100 | with open(file_path, encoding='utf-8') as f: 101 | data = yaml.safe_load(f) 102 | return cls(**data) 103 | 104 | def to_yaml(self) -> str: 105 | """Convert to YAML format""" 106 | import yaml 107 | return yaml.dump(self.dict(), allow_unicode=True, default_flow_style=False) 108 | 109 | def save_to_file(self, file_path: str) -> None: 110 | """Save to a file""" 111 | with open(file_path, 'w', encoding='utf-8') as f: 112 | f.write(self.to_yaml()) 113 | 114 | 115 | class PipelineResult(BaseModel): 116 | """Pipeline execution result""" 117 | 118 | pipeline_name: str 119 | start_time: datetime 120 | end_time: Optional[datetime] = None 121 | status: str = "running" # running, completed, failed, cancelled 122 | 123 | # Step results 124 | step_results: Dict[str, Dict[str, Any]] = Field(default_factory=dict) 125 | 126 | # Error messages 127 | errors: List[str] = Field(default_factory=list) 128 | 129 | # Statistics 130 | total_steps: int = 0 131 | completed_steps: int = 0 132 | failed_steps: int = 0 133 | skipped_steps: int = 0 134 | 135 | @property 136 | def duration(self) -> Optional[float]: 137 | """Execution duration in seconds""" 138 | if self.end_time: 139 | return (self.end_time - self.start_time).total_seconds() 140 | return None 141 | 142 | @property 143 | def success_rate(self) -> float: 144 | """Success rate""" 145 | if self.total_steps == 0: 146 | return 0.0 147 | return self.completed_steps / self.total_steps 148 | 149 | def add_step_result( 150 | self, 151 | step_name: str, 152 | status: str, 153 | start_time: datetime, 154 | end_time: datetime, 155 | output_data: Optional[Dict[str, Any]] = None, 156 | error: Optional[str] = None 157 | ) -> None: 158 | """Add a step result""" 159 | duration = (end_time - start_time).total_seconds() 160 | 161 | self.step_results[step_name] = { 162 | 'status': status, 163 | 'start_time': start_time.isoformat(), 164 | 'end_time': end_time.isoformat(), 165 | 'duration': duration, 166 | 'output_data': output_data or {}, 167 | 'error': error 168 | } 169 | 170 | # Update statistics 171 | if status == 'completed': 172 | self.completed_steps += 1 173 | elif status == 'failed': 174 | self.failed_steps += 1 175 | if error: 176 | self.errors.append(f"Step '{step_name}': {error}") 177 | elif status == 'skipped': 178 | self.skipped_steps += 1 179 | 180 | def to_dict(self) -> Dict[str, Any]: 181 | """Convert to a dictionary""" 182 | return { 183 | 'pipeline_name': self.pipeline_name, 184 | 'start_time': self.start_time.isoformat(), 185 | 'end_time': self.end_time.isoformat() if self.end_time else None, 186 | 'status': self.status, 187 | 'duration': self.duration, 188 | 'success_rate': self.success_rate, 189 | 'total_steps': self.total_steps, 190 | 'completed_steps': self.completed_steps, 191 | 'failed_steps': self.failed_steps, 192 | 'skipped_steps': self.skipped_steps, 193 | 'step_results': self.step_results, 194 | 'errors': self.errors 195 | } 196 | -------------------------------------------------------------------------------- /pydhis2/core/auth.py: -------------------------------------------------------------------------------- 1 | """Authentication module - Support for Basic, Token, PAT and other auth methods""" 2 | 3 | import base64 4 | from abc import ABC, abstractmethod 5 | from typing import Dict, Optional, Tuple, Union 6 | 7 | import aiohttp 8 | 9 | from pydhis2.core.errors import AuthenticationError 10 | from pydhis2.core.types import AuthMethod 11 | 12 | 13 | class AuthProvider(ABC): 14 | """Authentication provider abstract base class""" 15 | 16 | @abstractmethod 17 | async def get_headers(self) -> Dict[str, str]: 18 | """Get authentication headers""" 19 | pass 20 | 21 | @abstractmethod 22 | async def refresh_if_needed(self) -> bool: 23 | """If needed, refresh the authentication. Returns whether it was refreshed""" 24 | pass 25 | 26 | @abstractmethod 27 | async def is_valid(self) -> bool: 28 | """Check if the authentication is valid""" 29 | pass 30 | 31 | 32 | class BasicAuthProvider(AuthProvider): 33 | """Basic authentication provider""" 34 | 35 | def __init__(self, username: str, password: str): 36 | self.username = username 37 | self.password = password 38 | self._auth_header = self._encode_basic_auth(username, password) 39 | 40 | @staticmethod 41 | def _encode_basic_auth(username: str, password: str) -> str: 42 | """Encode Basic authentication""" 43 | credentials = f"{username}:{password}" 44 | encoded = base64.b64encode(credentials.encode('utf-8')).decode('ascii') 45 | return f"Basic {encoded}" 46 | 47 | async def get_headers(self) -> Dict[str, str]: 48 | """Get authentication headers""" 49 | return {"Authorization": self._auth_header} 50 | 51 | async def refresh_if_needed(self) -> bool: 52 | """Basic auth does not need to be refreshed""" 53 | return False 54 | 55 | async def is_valid(self) -> bool: 56 | """Basic auth is always valid (assuming credentials are correct)""" 57 | return True 58 | 59 | 60 | class TokenAuthProvider(AuthProvider): 61 | """Token authentication provider""" 62 | 63 | def __init__(self, token: str, token_type: str = "Bearer"): 64 | self.token = token 65 | self.token_type = token_type 66 | self._auth_header = f"{token_type} {token}" 67 | 68 | async def get_headers(self) -> Dict[str, str]: 69 | """Get authentication headers""" 70 | return {"Authorization": self._auth_header} 71 | 72 | async def refresh_if_needed(self) -> bool: 73 | """Token auth does not need to be refreshed (simple implementation)""" 74 | return False 75 | 76 | async def is_valid(self) -> bool: 77 | """Token auth is always valid (assuming token is correct)""" 78 | return True 79 | 80 | 81 | class PATAuthProvider(AuthProvider): 82 | """Personal Access Token authentication provider""" 83 | 84 | def __init__(self, pat_token: str): 85 | self.pat_token = pat_token 86 | self._auth_header = f"Bearer {pat_token}" 87 | 88 | async def get_headers(self) -> Dict[str, str]: 89 | """Get authentication headers""" 90 | return {"Authorization": self._auth_header} 91 | 92 | async def refresh_if_needed(self) -> bool: 93 | """PAT auth does not need to be refreshed""" 94 | return False 95 | 96 | async def is_valid(self) -> bool: 97 | """PAT auth is always valid (assuming token is correct)""" 98 | return True 99 | 100 | 101 | class SessionAuthProvider(AuthProvider): 102 | """Session authentication provider (supports JSESSIONID, etc.)""" 103 | 104 | def __init__(self, session: aiohttp.ClientSession, base_url: str): 105 | self.session = session 106 | self.base_url = base_url 107 | self._authenticated = False 108 | 109 | async def login(self, username: str, password: str) -> None: 110 | """Login to get a session""" 111 | login_url = f"{self.base_url}/dhis-web-commons-security/login.action" 112 | 113 | async with self.session.post( 114 | login_url, 115 | data={ 116 | 'j_username': username, 117 | 'j_password': password 118 | } 119 | ) as response: 120 | if response.status == 200: 121 | self._authenticated = True 122 | else: 123 | raise AuthenticationError(f"Login failed with status {response.status}") 124 | 125 | async def get_headers(self) -> Dict[str, str]: 126 | """Get authentication headers (session auth relies on cookies)""" 127 | return {} 128 | 129 | async def refresh_if_needed(self) -> bool: 130 | """Check if session needs to be refreshed""" 131 | # Simple implementation: check the /api/me endpoint 132 | try: 133 | async with self.session.get(f"{self.base_url}/api/me") as response: 134 | if response.status == 401: 135 | self._authenticated = False 136 | return False 137 | return True 138 | except Exception: 139 | self._authenticated = False 140 | return False 141 | 142 | async def is_valid(self) -> bool: 143 | """Check if the session is valid""" 144 | return self._authenticated 145 | 146 | 147 | def create_auth_provider( 148 | auth: Union[Tuple[str, str], str], 149 | auth_method: AuthMethod = AuthMethod.BASIC, 150 | session: Optional[aiohttp.ClientSession] = None, 151 | base_url: Optional[str] = None 152 | ) -> AuthProvider: 153 | """Factory function: create an authentication provider based on configuration""" 154 | 155 | if auth_method == AuthMethod.BASIC: 156 | if not isinstance(auth, tuple) or len(auth) != 2: 157 | raise ValueError("Basic authentication requires a (username, password) tuple") 158 | return BasicAuthProvider(auth[0], auth[1]) 159 | 160 | elif auth_method == AuthMethod.TOKEN: 161 | if not isinstance(auth, str): 162 | raise ValueError("Token authentication requires a string token") 163 | return TokenAuthProvider(auth) 164 | 165 | elif auth_method == AuthMethod.PAT: 166 | if not isinstance(auth, str): 167 | raise ValueError("PAT authentication requires a string token") 168 | return PATAuthProvider(auth) 169 | 170 | else: 171 | raise ValueError(f"Unsupported authentication method: {auth_method}") 172 | 173 | 174 | class AuthManager: 175 | """Authentication manager - manages authentication providers and refresh logic""" 176 | 177 | def __init__(self, auth_provider: AuthProvider): 178 | self.auth_provider = auth_provider 179 | self._last_refresh_check = 0 180 | self._refresh_interval = 300 # Check every 5 minutes 181 | 182 | async def get_auth_headers(self) -> Dict[str, str]: 183 | """Get authentication headers, refreshing if necessary""" 184 | import time 185 | 186 | current_time = time.time() 187 | if current_time - self._last_refresh_check > self._refresh_interval: 188 | await self.auth_provider.refresh_if_needed() 189 | self._last_refresh_check = current_time 190 | 191 | return await self.auth_provider.get_headers() 192 | 193 | async def validate_auth(self) -> bool: 194 | """Validate if the authentication is valid""" 195 | return await self.auth_provider.is_valid() 196 | 197 | async def force_refresh(self) -> bool: 198 | """Force a refresh of the authentication""" 199 | return await self.auth_provider.refresh_if_needed() 200 | -------------------------------------------------------------------------------- /pydhis2/core/types.py: -------------------------------------------------------------------------------- 1 | """Type definitions and configuration models""" 2 | 3 | from enum import Enum 4 | from typing import Any, Dict, List, Optional, Tuple, Union 5 | 6 | from pydantic import BaseModel, Field, validator 7 | 8 | 9 | class AuthMethod(str, Enum): 10 | """Authentication method enumeration""" 11 | BASIC = "basic" 12 | TOKEN = "token" 13 | PAT = "pat" # Personal Access Token 14 | 15 | 16 | class RetryStrategy(str, Enum): 17 | """Retry strategy enumeration""" 18 | EXPONENTIAL = "exponential" 19 | LINEAR = "linear" 20 | FIXED = "fixed" 21 | 22 | 23 | class DHIS2Config(BaseModel): 24 | """ 25 | Configuration model for the DHIS2 client. 26 | """ 27 | base_url: str = Field(..., description="Base URL of the DHIS2 instance") 28 | auth: Optional[Union[Tuple[str, str], str]] = Field(None, description="Authentication: tuple for basic auth or string for token") 29 | api_version: Optional[Union[int, str]] = Field(None, description="DHIS2 API version") 30 | user_agent: str = Field("pydhis2/0.2.0", description="User-Agent for requests") 31 | 32 | # Timeout settings (total) - Increased default for more resilience 33 | timeout: float = Field(60.0, description="Total request timeout in seconds") 34 | 35 | # Concurrency and rate limiting 36 | rps: float = Field(10.0, description="Requests per second limit", gt=0) 37 | concurrency: int = Field(10, description="Maximum concurrent connections", gt=0) 38 | 39 | # Compression and caching 40 | compression: bool = Field(True, description="Whether to enable gzip compression") 41 | enable_cache: bool = Field(True, description="Whether to enable caching") 42 | cache_ttl: int = Field(3600, description="Cache TTL in seconds", gt=0) 43 | 44 | # Retry configuration - Increased defaults for more resilience 45 | max_retries: int = Field(5, description="Maximum retry attempts", ge=0) 46 | retry_strategy: RetryStrategy = Field(RetryStrategy.EXPONENTIAL, description="Retry strategy") 47 | retry_base_delay: float = Field(1.5, description="Base retry delay in seconds", gt=0) 48 | retry_backoff_factor: float = Field(2.0, description="Backoff factor", gt=1.0) 49 | retry_on_status: List[int] = Field( 50 | [429, 500, 502, 503, 504], description="HTTP status codes that trigger a retry" 51 | ) 52 | 53 | @validator('base_url') 54 | def validate_base_url(cls, v): 55 | """Validate and normalize base URL""" 56 | if not v.startswith(('http://', 'https://')): 57 | raise ValueError('Base URL must start with http:// or https://') 58 | # Remove trailing slash 59 | return v.rstrip('/') 60 | 61 | @validator('auth') 62 | def validate_auth(cls, v): 63 | """Validate authentication""" 64 | if v is None: 65 | return v 66 | if isinstance(v, tuple): 67 | if len(v) != 2: 68 | raise ValueError('Authentication tuple must have exactly 2 elements (username, password)') 69 | return v 70 | if isinstance(v, str): 71 | return v 72 | raise ValueError('Authentication must be a tuple or string') 73 | 74 | @validator('timeout') 75 | def validate_timeout(cls, v): 76 | """Validate timeout""" 77 | if v <= 0: 78 | raise ValueError('Timeout must be positive') 79 | return v 80 | 81 | @property 82 | def auth_method(self) -> AuthMethod: 83 | """Get authentication method""" 84 | if self.auth is None: 85 | return AuthMethod.BASIC # Default fallback 86 | if isinstance(self.auth, tuple): 87 | return AuthMethod.BASIC 88 | return AuthMethod.TOKEN 89 | 90 | class Config: 91 | frozen = True 92 | use_enum_values = True 93 | 94 | 95 | class PaginationConfig(BaseModel): 96 | """Pagination configuration""" 97 | 98 | page_size: int = Field(200, description="Default page size", gt=0, le=10000) 99 | max_pages: Optional[int] = Field(None, description="Maximum page limit") 100 | use_paging: bool = Field(True, description="Whether to enable paging") 101 | 102 | 103 | class AnalyticsQuery(BaseModel): 104 | """Analytics query configuration""" 105 | 106 | dx: Union[str, List[str]] = Field(..., description="Data dimension (indicators/data elements)") 107 | ou: Union[str, List[str]] = Field(..., description="Organization units") 108 | pe: Union[str, List[str]] = Field(..., description="Period dimension") 109 | co: Optional[Union[str, List[str]]] = Field(None, description="Category option combinations") 110 | ao: Optional[Union[str, List[str]]] = Field(None, description="Attribute option combinations") 111 | 112 | output_id_scheme: str = Field("UID", description="Output ID scheme") 113 | display_property: str = Field("NAME", description="Display property") 114 | skip_meta: bool = Field(False, description="Skip metadata") 115 | skip_data: bool = Field(False, description="Skip data") 116 | skip_rounding: bool = Field(False, description="Skip rounding") 117 | 118 | def to_params(self) -> Dict[str, Any]: 119 | """Convert to request parameters""" 120 | params = {} 121 | dimensions = [] 122 | 123 | # Process dimensions - use correct DHIS2 Analytics API format 124 | for dim in ['dx', 'ou', 'pe', 'co', 'ao']: 125 | value = getattr(self, dim) 126 | if value is not None: 127 | if isinstance(value, list): 128 | dimensions.append(f'{dim}:{";".join(value)}') 129 | else: 130 | dimensions.append(f'{dim}:{value}') 131 | 132 | # Add dimensions as multiple dimension parameters 133 | if dimensions: 134 | params['dimension'] = dimensions 135 | 136 | # Other parameters 137 | params.update({ 138 | 'outputIdScheme': self.output_id_scheme, 139 | 'displayProperty': self.display_property, 140 | 'skipMeta': str(self.skip_meta).lower(), 141 | 'skipData': str(self.skip_data).lower(), 142 | 'skipRounding': str(self.skip_rounding).lower(), 143 | }) 144 | 145 | return params 146 | 147 | 148 | class ImportStrategy(str, Enum): 149 | """Import strategy enumeration""" 150 | CREATE = "CREATE" 151 | UPDATE = "UPDATE" 152 | CREATE_AND_UPDATE = "CREATE_AND_UPDATE" 153 | DELETE = "DELETE" 154 | 155 | 156 | class ImportMode(str, Enum): 157 | """Import mode enumeration""" 158 | COMMIT = "COMMIT" 159 | VALIDATE = "VALIDATE" 160 | 161 | 162 | class ImportConfig(BaseModel): 163 | """Import configuration""" 164 | 165 | strategy: ImportStrategy = Field( 166 | ImportStrategy.CREATE_AND_UPDATE, description="Import strategy" 167 | ) 168 | import_mode: ImportMode = Field(ImportMode.COMMIT, description="Import mode") 169 | atomic: bool = Field(True, description="Whether to perform atomic import") 170 | dry_run: bool = Field(False, description="Whether this is a dry run") 171 | chunk_size: int = Field(5000, description="Chunk size", gt=0) 172 | max_chunks: Optional[int] = Field(None, description="Maximum number of chunks") 173 | 174 | # Conflict handling 175 | skip_existing_check: bool = Field(False, description="Skip existing check") 176 | skip_audit: bool = Field(False, description="Skip audit") 177 | 178 | # Performance options 179 | async_import: bool = Field(False, description="Whether to perform async import") 180 | force: bool = Field(False, description="Force import") 181 | 182 | 183 | class DataFrameFormat(str, Enum): 184 | """DataFrame output format""" 185 | PANDAS = "pandas" 186 | ARROW = "arrow" 187 | POLARS = "polars" 188 | 189 | 190 | class ExportFormat(str, Enum): 191 | """Export format""" 192 | JSON = "json" 193 | CSV = "csv" 194 | PARQUET = "parquet" 195 | EXCEL = "excel" 196 | FEATHER = "feather" 197 | -------------------------------------------------------------------------------- /pydhis2/core/errors.py: -------------------------------------------------------------------------------- 1 | """Exception definitions""" 2 | 3 | import json 4 | from typing import Any, Dict, List, Optional 5 | 6 | 7 | class DHIS2Error(Exception): 8 | """DHIS2 SDK base exception""" 9 | 10 | def __init__(self, message: str, details: Optional[Dict[str, Any]] = None): 11 | super().__init__(message) 12 | self.message = message 13 | self.details = details or {} 14 | 15 | 16 | class DHIS2HTTPError(DHIS2Error): 17 | """HTTP request exception""" 18 | 19 | def __init__( 20 | self, 21 | status: int, 22 | url: str, 23 | message: Optional[str] = None, 24 | response_data: Optional[Dict[str, Any]] = None, 25 | ): 26 | self.status = status 27 | self.url = url 28 | self.response_data = response_data or {} 29 | 30 | if message is None: 31 | message = f"HTTP {status} error for {url}" 32 | 33 | super().__init__(message, { 34 | 'status': status, 35 | 'url': url, 36 | 'response_data': response_data 37 | }) 38 | 39 | 40 | class AllPagesFetchError(DHIS2HTTPError): 41 | """Raised when not all pages could be fetched in an atomic paginated request""" 42 | pass 43 | 44 | 45 | class RateLimitExceeded(DHIS2Error): 46 | """Rate limit exceeded exception""" 47 | 48 | def __init__( 49 | self, 50 | retry_after: Optional[float] = None, 51 | current_rate: Optional[float] = None, 52 | limit: Optional[float] = None 53 | ): 54 | self.retry_after = retry_after 55 | self.current_rate = current_rate 56 | self.limit = limit 57 | 58 | message = "Rate limit exceeded" 59 | if retry_after: 60 | message += f", retry after {retry_after}s" 61 | if current_rate and limit: 62 | message += f" (current: {current_rate:.2f}, limit: {limit:.2f})" 63 | 64 | super().__init__(message, { 65 | 'retry_after': retry_after, 66 | 'current_rate': current_rate, 67 | 'limit': limit 68 | }) 69 | 70 | 71 | class RetryExhausted(DHIS2Error): 72 | """Retry attempts exhausted exception""" 73 | 74 | def __init__( 75 | self, 76 | max_retries: int, 77 | last_error: Optional[Exception] = None, 78 | attempt_details: Optional[List[Dict[str, Any]]] = None 79 | ): 80 | self.max_retries = max_retries 81 | self.last_error = last_error 82 | self.attempt_details = attempt_details or [] 83 | 84 | message = f"Retry exhausted after {max_retries} attempts" 85 | if last_error: 86 | message += f", last error: {last_error}" 87 | 88 | super().__init__(message, { 89 | 'max_retries': max_retries, 90 | 'last_error': str(last_error) if last_error else None, 91 | 'attempt_details': attempt_details 92 | }) 93 | 94 | 95 | class ImportConflictError(DHIS2Error): 96 | """Import conflict exception""" 97 | 98 | def __init__( 99 | self, 100 | conflicts: List[Dict[str, Any]], 101 | import_summary: Optional[Dict[str, Any]] = None 102 | ): 103 | self.conflicts = conflicts 104 | self.import_summary = import_summary or {} 105 | 106 | conflict_count = len(conflicts) 107 | message = f"Import failed with {conflict_count} conflict(s)" 108 | 109 | super().__init__(message, { 110 | 'conflicts': conflicts, 111 | 'import_summary': import_summary, 112 | 'conflict_count': conflict_count 113 | }) 114 | 115 | 116 | class AuthenticationError(DHIS2Error): 117 | """Authentication failed exception""" 118 | 119 | def __init__(self, message: str = "Authentication failed"): 120 | super().__init__(message) 121 | 122 | 123 | class AuthorizationError(DHIS2Error): 124 | """Authorization failed exception""" 125 | 126 | def __init__(self, message: str = "Authorization failed", required_permission: Optional[str] = None): 127 | self.required_permission = required_permission 128 | 129 | if required_permission: 130 | message += f", required permission: {required_permission}" 131 | 132 | super().__init__(message, {'required_permission': required_permission}) 133 | 134 | 135 | class ValidationError(DHIS2Error): 136 | """Data validation exception""" 137 | 138 | def __init__( 139 | self, 140 | message: str, 141 | field: Optional[str] = None, 142 | value: Optional[Any] = None, 143 | validation_errors: Optional[List[Dict[str, Any]]] = None 144 | ): 145 | self.field = field 146 | self.value = value 147 | self.validation_errors = validation_errors or [] 148 | 149 | super().__init__(message, { 150 | 'field': field, 151 | 'value': value, 152 | 'validation_errors': validation_errors 153 | }) 154 | 155 | 156 | class TimeoutError(DHIS2HTTPError): 157 | """Raised on request timeout""" 158 | 159 | def __init__( 160 | self, 161 | timeout_type: str, 162 | timeout_value: float, 163 | url: str = "unknown", 164 | status: int = 408 165 | ): 166 | self.timeout_type = timeout_type 167 | self.timeout_value = timeout_value 168 | message = f"{timeout_type} timeout after {timeout_value} seconds" 169 | super().__init__(status, url, message) 170 | 171 | 172 | class DataFormatError(DHIS2Error): 173 | """Data format exception""" 174 | 175 | def __init__( 176 | self, 177 | message: str, 178 | expected_format: Optional[str] = None, 179 | actual_format: Optional[str] = None, 180 | data_sample: Optional[Any] = None 181 | ): 182 | self.expected_format = expected_format 183 | self.actual_format = actual_format 184 | self.data_sample = data_sample 185 | 186 | super().__init__(message, { 187 | 'expected_format': expected_format, 188 | 'actual_format': actual_format, 189 | 'data_sample': str(data_sample)[:200] if data_sample else None 190 | }) 191 | 192 | 193 | class MetadataError(DHIS2Error): 194 | """Metadata related exception""" 195 | 196 | def __init__( 197 | self, 198 | message: str, 199 | object_type: Optional[str] = None, 200 | object_id: Optional[str] = None 201 | ): 202 | self.object_type = object_type 203 | self.object_id = object_id 204 | 205 | super().__init__(message, { 206 | 'object_type': object_type, 207 | 'object_id': object_id 208 | }) 209 | 210 | 211 | def format_dhis2_error(error_data: Dict[str, Any]) -> str: 212 | """Format DHIS2 server error message""" 213 | if not error_data: 214 | return "Unknown DHIS2 error" 215 | 216 | # Try to extract standard error format 217 | if 'message' in error_data: 218 | return error_data['message'] 219 | 220 | if 'error' in error_data: 221 | error_info = error_data['error'] 222 | if isinstance(error_info, dict): 223 | return error_info.get('message', str(error_info)) 224 | return str(error_info) 225 | 226 | # Try to extract conflict information 227 | if 'conflicts' in error_data: 228 | conflicts = error_data['conflicts'] 229 | if conflicts and isinstance(conflicts, list): 230 | first_conflict = conflicts[0] 231 | if isinstance(first_conflict, dict): 232 | return first_conflict.get('object', str(first_conflict)) 233 | 234 | # Fallback to JSON string 235 | try: 236 | return json.dumps(error_data, indent=2)[:500] 237 | except (TypeError, ValueError): 238 | return str(error_data)[:500] 239 | -------------------------------------------------------------------------------- /pydhis2/testing/data_generator.py: -------------------------------------------------------------------------------- 1 | """Test data generator for DHIS2 API responses""" 2 | 3 | import random 4 | import uuid 5 | from datetime import datetime, timedelta 6 | from typing import Any, Dict, List, Optional 7 | 8 | 9 | class TestDataGenerator: 10 | """Generate test data for DHIS2 API responses""" 11 | 12 | def __init__(self, seed: int = 42): 13 | """Initialize with a random seed for reproducible data""" 14 | random.seed(seed) 15 | self.seed = seed 16 | 17 | def generate_org_units(self, count: int = 10) -> List[Dict[str, str]]: 18 | """Generate organization unit test data""" 19 | org_units = [] 20 | 21 | for i in range(count): 22 | org_units.append({ 23 | "id": f"OU{i:03d}{uuid.uuid4().hex[:8]}", 24 | "name": f"Test Health Facility {i+1}", 25 | "code": f"HF_{i+1:03d}", 26 | "level": str(random.randint(3, 5)), 27 | "path": f"/ROOT/DISTRICT{random.randint(1,5)}/HF_{i+1:03d}" 28 | }) 29 | 30 | return org_units 31 | 32 | def generate_data_elements(self, count: int = 5) -> List[Dict[str, str]]: 33 | """Generate data element test data""" 34 | element_names = [ 35 | "BCG doses given", 36 | "DPT-HepB-Hib 1 doses given", 37 | "DPT-HepB-Hib 3 doses given", 38 | "Measles doses given", 39 | "Polio 3 doses given" 40 | ] 41 | 42 | data_elements = [] 43 | for i in range(min(count, len(element_names))): 44 | data_elements.append({ 45 | "id": f"DE{i:03d}{uuid.uuid4().hex[:8]}", 46 | "name": element_names[i], 47 | "code": f"DE_{i+1:03d}", 48 | "valueType": "INTEGER" 49 | }) 50 | 51 | return data_elements 52 | 53 | def generate_periods(self, start_year: int = 2023, months: int = 12) -> List[str]: 54 | """Generate period test data""" 55 | periods = [] 56 | 57 | for month in range(1, months + 1): 58 | periods.append(f"{start_year}{month:02d}") 59 | 60 | return periods 61 | 62 | def generate_analytics_response( 63 | self, 64 | data_elements: List[Dict[str, str]], 65 | org_units: List[Dict[str, str]], 66 | periods: List[str], 67 | include_nulls: bool = True, 68 | null_rate: float = 0.1 69 | ) -> Dict[str, Any]: 70 | """Generate Analytics API response""" 71 | headers = [ 72 | {"name": "dx", "column": "Data", "type": "TEXT"}, 73 | {"name": "pe", "column": "Period", "type": "TEXT"}, 74 | {"name": "ou", "column": "Organisation unit", "type": "TEXT"}, 75 | {"name": "value", "column": "Value", "type": "NUMBER"} 76 | ] 77 | 78 | rows = [] 79 | 80 | for de in data_elements: 81 | for period in periods: 82 | for ou in org_units: 83 | # Generate realistic values 84 | if include_nulls and random.random() < null_rate: 85 | continue # Skip this combination (null value) 86 | 87 | # Generate values based on data element type 88 | if "BCG" in de["name"]: 89 | value = str(random.randint(80, 120)) 90 | elif "DPT" in de["name"]: 91 | value = str(random.randint(70, 110)) 92 | elif "Measles" in de["name"]: 93 | value = str(random.randint(60, 100)) 94 | else: 95 | value = str(random.randint(50, 150)) 96 | 97 | rows.append([de["id"], period, ou["id"], value]) 98 | 99 | return { 100 | "headers": headers, 101 | "rows": rows, 102 | "metaData": { 103 | "items": {}, 104 | "dimensions": {} 105 | }, 106 | "width": len(headers), 107 | "height": len(rows) 108 | } 109 | 110 | def generate_datavaluesets_response( 111 | self, 112 | data_elements: List[Dict[str, str]], 113 | org_units: List[Dict[str, str]], 114 | periods: List[str], 115 | include_conflicts: bool = False, 116 | conflict_rate: float = 0.05 117 | ) -> Dict[str, Any]: 118 | """Generate DataValueSets API response""" 119 | data_values = [] 120 | 121 | for de in data_elements: 122 | for period in periods: 123 | for ou in org_units: 124 | # Generate realistic values 125 | if "BCG" in de["name"]: 126 | value = str(random.randint(80, 120)) 127 | elif "DPT" in de["name"]: 128 | value = str(random.randint(70, 110)) 129 | else: 130 | value = str(random.randint(50, 150)) 131 | 132 | data_value = { 133 | "dataElement": de["id"], 134 | "period": period, 135 | "orgUnit": ou["id"], 136 | "value": value, 137 | "lastUpdated": datetime.now().isoformat(), 138 | "created": (datetime.now() - timedelta(days=random.randint(1, 30))).isoformat(), 139 | "storedBy": "test_user" 140 | } 141 | 142 | data_values.append(data_value) 143 | 144 | return {"dataValues": data_values} 145 | 146 | def generate_tracker_events( 147 | self, 148 | program_id: str, 149 | program_stage_id: str, 150 | org_units: List[Dict[str, str]], 151 | event_count: int = 100 152 | ) -> Dict[str, Any]: 153 | """Generate Tracker events response""" 154 | events = [] 155 | 156 | for i in range(event_count): 157 | org_unit = random.choice(org_units) 158 | event_date = datetime.now() - timedelta(days=random.randint(0, 365)) 159 | 160 | event = { 161 | "event": f"EVENT{i:03d}{uuid.uuid4().hex[:8]}", 162 | "program": program_id, 163 | "programStage": program_stage_id, 164 | "orgUnit": org_unit["id"], 165 | "orgUnitName": org_unit["name"], 166 | "status": random.choice(["ACTIVE", "COMPLETED", "SCHEDULE"]), 167 | "occurredAt": event_date.isoformat(), 168 | "createdAt": event_date.isoformat(), 169 | "updatedAt": event_date.isoformat(), 170 | "dataValues": [ 171 | { 172 | "dataElement": f"DE{j:03d}{uuid.uuid4().hex[:4]}", 173 | "value": str(random.randint(1, 100)) 174 | } 175 | for j in range(random.randint(1, 5)) 176 | ] 177 | } 178 | 179 | events.append(event) 180 | 181 | return { 182 | "instances": events, 183 | "page": { 184 | "page": 1, 185 | "pageSize": event_count, 186 | "pageCount": 1, 187 | "total": event_count 188 | } 189 | } 190 | 191 | def generate_import_summary( 192 | self, 193 | total: int, 194 | imported: Optional[int] = None, 195 | updated: Optional[int] = None, 196 | ignored: Optional[int] = None, 197 | conflict_count: int = 0 198 | ) -> Dict[str, Any]: 199 | """Generate import summary response""" 200 | if imported is None: 201 | imported = int(total * 0.7) 202 | if updated is None: 203 | updated = int(total * 0.2) 204 | if ignored is None: 205 | ignored = total - imported - updated - conflict_count 206 | 207 | conflicts = [] 208 | for i in range(conflict_count): 209 | conflicts.append({ 210 | "object": f"CONFLICT{i:03d}", 211 | "property": "value", 212 | "value": "invalid_value", 213 | "message": f"Test conflict {i+1}", 214 | "errorCode": "E1234" 215 | }) 216 | 217 | return { 218 | "status": "SUCCESS" if conflict_count == 0 else "WARNING", 219 | "imported": imported, 220 | "updated": updated, 221 | "ignored": ignored, 222 | "total": total, 223 | "conflicts": conflicts 224 | } 225 | 226 | -------------------------------------------------------------------------------- /pydhis2/io/arrow.py: -------------------------------------------------------------------------------- 1 | """Arrow format converter""" 2 | 3 | from pathlib import Path 4 | from typing import Any, Dict, List, Optional, Union 5 | 6 | import pandas as pd 7 | import pyarrow as pa 8 | import pyarrow.parquet as pq 9 | 10 | 11 | class ArrowConverter: 12 | """Arrow format converter""" 13 | 14 | def __init__(self): 15 | self.compression = 'snappy' # Default compression format 16 | 17 | def from_pandas(self, df: pd.DataFrame, schema: Optional[pa.Schema] = None) -> pa.Table: 18 | """Convert from Pandas DataFrame to Arrow Table""" 19 | if df.empty: 20 | return pa.table({}) 21 | 22 | try: 23 | if schema is not None: 24 | table = pa.Table.from_pandas(df, schema=schema, preserve_index=False) 25 | else: 26 | table = pa.Table.from_pandas(df, preserve_index=False) 27 | return table 28 | except Exception: 29 | # Fallback to basic conversion 30 | return pa.Table.from_pandas(df, preserve_index=False) 31 | 32 | def to_pandas(self, table: pa.Table) -> pd.DataFrame: 33 | """Convert from Arrow Table to Pandas DataFrame""" 34 | return table.to_pandas() 35 | 36 | def save_parquet( 37 | self, 38 | table: pa.Table, 39 | file_path: Union[str, Path], 40 | compression: str = None, 41 | partition_cols: Optional[List[str]] = None, 42 | **kwargs 43 | ) -> str: 44 | """Save as Parquet file""" 45 | file_path = Path(file_path) 46 | 47 | # Ensure directory exists 48 | file_path.parent.mkdir(parents=True, exist_ok=True) 49 | 50 | compression = compression or self.compression 51 | 52 | if partition_cols: 53 | # Partitioned write 54 | pq.write_to_dataset( 55 | table, 56 | root_path=str(file_path.parent), 57 | partition_cols=partition_cols, 58 | compression=compression, 59 | **kwargs 60 | ) 61 | else: 62 | # Single file write 63 | pq.write_table( 64 | table, 65 | str(file_path), 66 | compression=compression, 67 | **kwargs 68 | ) 69 | 70 | return str(file_path) 71 | 72 | def load_parquet(self, file_path: Union[str, Path]) -> pa.Table: 73 | """Load from Parquet file""" 74 | return pq.read_table(str(file_path)) 75 | 76 | def save_feather(self, table: pa.Table, file_path: Union[str, Path]) -> str: 77 | """Save as Feather file""" 78 | file_path = Path(file_path) 79 | file_path.parent.mkdir(parents=True, exist_ok=True) 80 | 81 | # Convert to pandas then save (Feather v2 format) 82 | df = self.to_pandas(table) 83 | df.to_feather(str(file_path)) 84 | 85 | return str(file_path) 86 | 87 | def load_feather(self, file_path: Union[str, Path]) -> pa.Table: 88 | """Load from Feather file""" 89 | df = pd.read_feather(str(file_path)) 90 | return self.from_pandas(df) 91 | 92 | def get_schema_info(self, table: pa.Table) -> Dict[str, Any]: 93 | """Get Schema information""" 94 | schema = table.schema 95 | 96 | info = { 97 | 'num_columns': len(schema), 98 | 'num_rows': len(table), 99 | 'columns': [] 100 | } 101 | 102 | for field in schema: 103 | column_info = { 104 | 'name': field.name, 105 | 'type': str(field.type), 106 | 'nullable': field.nullable, 107 | 'metadata': dict(field.metadata) if field.metadata else {} 108 | } 109 | info['columns'].append(column_info) 110 | 111 | return info 112 | 113 | def optimize_schema(self, df: pd.DataFrame) -> pa.Schema: 114 | """Optimize Schema to reduce storage space""" 115 | fields = [] 116 | 117 | for column in df.columns: 118 | dtype = df[column].dtype 119 | field_type = None 120 | 121 | if pd.api.types.is_integer_dtype(dtype): 122 | # Choose the smallest integer type 123 | min_val = df[column].min() 124 | max_val = df[column].max() 125 | 126 | if pd.isna(min_val) or pd.isna(max_val): 127 | field_type = pa.int64() 128 | elif min_val >= 0: 129 | # Unsigned integer 130 | if max_val <= 255: 131 | field_type = pa.uint8() 132 | elif max_val <= 65535: 133 | field_type = pa.uint16() 134 | elif max_val <= 4294967295: 135 | field_type = pa.uint32() 136 | else: 137 | field_type = pa.uint64() 138 | else: 139 | # Signed integer 140 | if min_val >= -128 and max_val <= 127: 141 | field_type = pa.int8() 142 | elif min_val >= -32768 and max_val <= 32767: 143 | field_type = pa.int16() 144 | elif min_val >= -2147483648 and max_val <= 2147483647: 145 | field_type = pa.int32() 146 | else: 147 | field_type = pa.int64() 148 | 149 | elif pd.api.types.is_float_dtype(dtype): 150 | # Check if float32 can be used 151 | if df[column].dtype == 'float64': 152 | # Simple check: if all values are within float32 range 153 | field_type = pa.float32() 154 | else: 155 | field_type = pa.float64() 156 | 157 | elif pd.api.types.is_datetime64_any_dtype(dtype): 158 | field_type = pa.timestamp('ns') 159 | 160 | elif pd.api.types.is_bool_dtype(dtype): 161 | field_type = pa.bool_() 162 | 163 | else: 164 | # String type - check if suitable for dictionary encoding 165 | unique_ratio = df[column].nunique() / len(df) 166 | if unique_ratio < 0.5: # If unique value ratio is low, use dictionary encoding 167 | field_type = pa.dictionary(pa.int32(), pa.string()) 168 | else: 169 | field_type = pa.string() 170 | 171 | # Check for missing values 172 | nullable = df[column].isna().any() 173 | 174 | fields.append(pa.field(column, field_type, nullable=nullable)) 175 | 176 | return pa.schema(fields) 177 | 178 | def compress_table(self, table: pa.Table) -> pa.Table: 179 | """Compress table (dictionary encoding, etc.)""" 180 | columns = [] 181 | 182 | for i in range(table.num_columns): 183 | column = table.column(i) 184 | 185 | # Apply dictionary encoding to string columns 186 | if pa.types.is_string(column.type): 187 | # Calculate unique value ratio 188 | unique_count = pa.compute.count_distinct(column).as_py() 189 | total_count = len(column) 190 | 191 | if unique_count / total_count < 0.5: # Low unique value ratio 192 | try: 193 | # Apply dictionary encoding 194 | encoded_column = pa.compute.dictionary_encode(column) 195 | columns.append(encoded_column) 196 | continue 197 | except Exception: 198 | pass 199 | 200 | columns.append(column) 201 | 202 | # Build new schema 203 | fields = [] 204 | for i, column in enumerate(columns): 205 | field_name = table.schema.field(i).name 206 | fields.append(pa.field(field_name, column.type)) 207 | 208 | new_schema = pa.schema(fields) 209 | 210 | return pa.table(columns, schema=new_schema) 211 | 212 | def estimate_size(self, table: pa.Table) -> Dict[str, Any]: 213 | """Estimate table size""" 214 | # Get memory usage 215 | memory_size = table.nbytes 216 | 217 | # Estimate compressed size (based on empirical values) 218 | estimated_parquet_size = memory_size * 0.2 # Parquet usually compresses to 20% 219 | estimated_feather_size = memory_size * 0.8 # Feather compresses to 80% 220 | 221 | return { 222 | 'memory_bytes': memory_size, 223 | 'memory_mb': memory_size / 1024 / 1024, 224 | 'estimated_parquet_mb': estimated_parquet_size / 1024 / 1024, 225 | 'estimated_feather_mb': estimated_feather_size / 1024 / 1024, 226 | 'num_rows': len(table), 227 | 'num_columns': table.num_columns, 228 | } 229 | -------------------------------------------------------------------------------- /pydhis2/testing/mock_server.py: -------------------------------------------------------------------------------- 1 | """Mock DHIS2 server for testing""" 2 | 3 | import asyncio 4 | import json 5 | import logging 6 | from dataclasses import dataclass 7 | from typing import Any, Dict, List, Optional 8 | 9 | from aiohttp import web 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class MockResponse: 16 | """Mock response configuration""" 17 | status: int = 200 18 | data: Optional[Dict[str, Any]] = None 19 | headers: Optional[Dict[str, str]] = None 20 | delay: float = 0.0 # Simulated response delay in seconds 21 | fail_count: int = 0 # Number of times to fail before succeeding 22 | 23 | 24 | class MockDHIS2Server: 25 | """Mock DHIS2 server for testing client behavior""" 26 | 27 | def __init__(self, host: str = "localhost", port: int = 8080): 28 | self.host = host 29 | self.port = port 30 | self.app = web.Application() 31 | self.runner: Optional[web.AppRunner] = None 32 | self.site: Optional[web.TCPSite] = None 33 | 34 | # Response configurations 35 | self.responses: Dict[str, MockResponse] = {} 36 | self.request_log: List[Dict[str, Any]] = [] 37 | 38 | # Setup default routes 39 | self._setup_routes() 40 | 41 | def _setup_routes(self) -> None: 42 | """Setup default API routes""" 43 | self.app.router.add_route("*", "/api/{path:.*}", self._handle_api_request) 44 | self.app.router.add_route("GET", "/api/me", self._handle_me) 45 | self.app.router.add_route("GET", "/api/system/info", self._handle_system_info) 46 | 47 | async def _handle_api_request(self, request: web.Request) -> web.Response: 48 | """Handle generic API requests""" 49 | path = request.match_info.get('path', '') 50 | method = request.method 51 | full_path = f"/{method.lower()}/api/{path}" 52 | 53 | # Log the request 54 | self.request_log.append({ 55 | 'method': method, 56 | 'path': f"/api/{path}", 57 | 'query': dict(request.query), 58 | 'headers': dict(request.headers), 59 | 'timestamp': asyncio.get_event_loop().time() 60 | }) 61 | 62 | # Check if we have a configured response 63 | mock_response = self.responses.get(full_path) 64 | if not mock_response: 65 | # Default response 66 | mock_response = MockResponse( 67 | status=200, 68 | data={"message": f"Mock response for {full_path}"} 69 | ) 70 | 71 | # Simulate delay 72 | if mock_response.delay > 0: 73 | await asyncio.sleep(mock_response.delay) 74 | 75 | # Handle failure simulation 76 | if mock_response.fail_count > 0: 77 | mock_response.fail_count -= 1 78 | return web.Response( 79 | status=500, 80 | text=json.dumps({"error": "Simulated server error"}), 81 | headers={'Content-Type': 'application/json'} 82 | ) 83 | 84 | # Return configured response 85 | headers = mock_response.headers or {'Content-Type': 'application/json'} 86 | response_data = mock_response.data or {} 87 | 88 | return web.Response( 89 | status=mock_response.status, 90 | text=json.dumps(response_data, ensure_ascii=False), 91 | headers=headers 92 | ) 93 | 94 | async def _handle_me(self, request: web.Request) -> web.Response: 95 | """Handle /api/me endpoint""" 96 | return web.json_response({ 97 | "id": "test_user_id", 98 | "name": "Test User", 99 | "username": "test_user", 100 | "email": "test@example.com", 101 | "authorities": ["F_DATAVALUE_ADD", "F_ANALYTICS_READ"] 102 | }) 103 | 104 | async def _handle_system_info(self, request: web.Request) -> web.Response: 105 | """Handle /api/system/info endpoint""" 106 | return web.json_response({ 107 | "version": "2.41.0", 108 | "buildTime": "2024-01-01T00:00:00.000", 109 | "serverTimeZoneId": "UTC", 110 | "contextPath": "" 111 | }) 112 | 113 | def configure_response( 114 | self, 115 | method: str, 116 | path: str, 117 | status: int = 200, 118 | data: Optional[Dict[str, Any]] = None, 119 | headers: Optional[Dict[str, str]] = None, 120 | delay: float = 0.0, 121 | fail_count: int = 0 122 | ) -> None: 123 | """Configure a mock response for a specific endpoint""" 124 | full_path = f"/{method.lower()}{path}" 125 | self.responses[full_path] = MockResponse( 126 | status=status, 127 | data=data, 128 | headers=headers, 129 | delay=delay, 130 | fail_count=fail_count 131 | ) 132 | 133 | def configure_endpoint( 134 | self, 135 | method: str, 136 | path: str, 137 | data: Dict[str, Any], 138 | status: int = 200, 139 | delay: float = 0.0, 140 | fail_count: int = 0 141 | ) -> None: 142 | """Configure endpoint response (alias for configure_response)""" 143 | self.configure_response(method, path, status, data, delay=delay, fail_count=fail_count) 144 | 145 | def configure_analytics_response( 146 | self, 147 | headers: List[Dict[str, str]], 148 | rows: List[List[str]], 149 | delay: float = 0.0 150 | ) -> None: 151 | """Configure Analytics endpoint response""" 152 | self.configure_response( 153 | "GET", 154 | "/api/analytics", 155 | data={ 156 | "headers": headers, 157 | "rows": rows, 158 | "metaData": {"items": {}, "dimensions": {}}, 159 | "width": len(headers), 160 | "height": len(rows) 161 | }, 162 | delay=delay 163 | ) 164 | 165 | def configure_datavaluesets_response( 166 | self, 167 | data_values: List[Dict[str, str]], 168 | delay: float = 0.0 169 | ) -> None: 170 | """Configure DataValueSets endpoint response""" 171 | self.configure_response( 172 | "GET", 173 | "/api/dataValueSets", 174 | data={"dataValues": data_values}, 175 | delay=delay 176 | ) 177 | 178 | def configure_import_response( 179 | self, 180 | imported: int = 0, 181 | updated: int = 0, 182 | ignored: int = 0, 183 | conflicts: Optional[List[Dict[str, Any]]] = None, 184 | delay: float = 0.0 185 | ) -> None: 186 | """Configure import response""" 187 | conflicts = conflicts or [] 188 | total = imported + updated + ignored + len(conflicts) 189 | 190 | self.configure_response( 191 | "POST", 192 | "/api/dataValueSets", 193 | data={ 194 | "status": "SUCCESS" if not conflicts else "WARNING", 195 | "imported": imported, 196 | "updated": updated, 197 | "ignored": ignored, 198 | "total": total, 199 | "conflicts": conflicts 200 | }, 201 | delay=delay 202 | ) 203 | 204 | async def start(self) -> str: 205 | """Start the mock server""" 206 | self.runner = web.AppRunner(self.app) 207 | await self.runner.setup() 208 | 209 | self.site = web.TCPSite(self.runner, self.host, self.port) 210 | await self.site.start() 211 | 212 | base_url = f"http://{self.host}:{self.port}" 213 | logger.info(f"Mock DHIS2 server started at {base_url}") 214 | return base_url 215 | 216 | async def stop(self) -> None: 217 | """Stop the mock server""" 218 | if self.site: 219 | await self.site.stop() 220 | if self.runner: 221 | await self.runner.cleanup() 222 | logger.info("Mock DHIS2 server stopped") 223 | 224 | async def __aenter__(self): 225 | """Async context manager entry""" 226 | return await self.start() 227 | 228 | async def __aexit__(self, exc_type, exc_val, exc_tb): 229 | """Async context manager exit""" 230 | await self.stop() 231 | 232 | def get_request_log(self) -> List[Dict[str, Any]]: 233 | """Get logged requests""" 234 | return self.request_log.copy() 235 | 236 | def clear_request_log(self) -> None: 237 | """Clear request log""" 238 | self.request_log.clear() 239 | 240 | def get_request_count(self, method: str = None, path: str = None) -> int: 241 | """Get count of requests matching criteria""" 242 | filtered_requests = self.request_log 243 | 244 | if method: 245 | filtered_requests = [r for r in filtered_requests if r['method'].upper() == method.upper()] 246 | 247 | if path: 248 | filtered_requests = [r for r in filtered_requests if r['path'] == path] 249 | 250 | return len(filtered_requests) 251 | -------------------------------------------------------------------------------- /pydhis2/testing/network_simulator.py: -------------------------------------------------------------------------------- 1 | """Network condition simulator for testing weak network scenarios""" 2 | 3 | import asyncio 4 | import random 5 | from dataclasses import dataclass 6 | from datetime import datetime, timedelta 7 | from typing import Any, Dict, List, Optional 8 | 9 | import aiohttp 10 | import pandas as pd 11 | 12 | from pydhis2.testing.data_generator import TestDataGenerator 13 | 14 | 15 | @dataclass 16 | class NetworkCondition: 17 | """Network condition configuration""" 18 | name: str 19 | latency_ms: int = 50 # Average latency in milliseconds 20 | jitter_ms: int = 10 # Latency jitter 21 | packet_loss_rate: float = 0.0 # Packet loss rate (0.0 - 1.0) 22 | bandwidth_kbps: Optional[int] = None # Bandwidth limit in kbps 23 | timeout_rate: float = 0.0 # Rate of request timeouts (0.0 - 1.0) 24 | 25 | 26 | class NetworkSimulator: 27 | """Simulate various network conditions for testing""" 28 | 29 | # Predefined network conditions 30 | NORMAL = NetworkCondition( 31 | name="normal", 32 | latency_ms=20, 33 | jitter_ms=5, 34 | packet_loss_rate=0.0, 35 | timeout_rate=0.0 36 | ) 37 | 38 | SLOW_3G = NetworkCondition( 39 | name="slow_3g", 40 | latency_ms=200, 41 | jitter_ms=50, 42 | packet_loss_rate=0.01, 43 | bandwidth_kbps=400, 44 | timeout_rate=0.02 45 | ) 46 | 47 | WEAK_NETWORK = NetworkCondition( 48 | name="weak_network", 49 | latency_ms=400, 50 | jitter_ms=100, 51 | packet_loss_rate=0.03, 52 | bandwidth_kbps=200, 53 | timeout_rate=0.05 54 | ) 55 | 56 | VERY_WEAK = NetworkCondition( 57 | name="very_weak", 58 | latency_ms=800, 59 | jitter_ms=200, 60 | packet_loss_rate=0.08, 61 | bandwidth_kbps=100, 62 | timeout_rate=0.10 63 | ) 64 | 65 | def __init__(self, condition: NetworkCondition = None): 66 | self.condition = condition or self.NORMAL 67 | self.original_connector_init = None 68 | self.original_request = None 69 | 70 | async def simulate_latency(self) -> None: 71 | """Simulate network latency""" 72 | if self.condition.latency_ms > 0: 73 | # Add base latency plus jitter 74 | base_latency = self.condition.latency_ms / 1000.0 75 | jitter = random.uniform( 76 | -self.condition.jitter_ms / 1000.0, 77 | self.condition.jitter_ms / 1000.0 78 | ) 79 | total_latency = max(0, base_latency + jitter) 80 | 81 | if total_latency > 0: 82 | await asyncio.sleep(total_latency) 83 | 84 | def should_drop_packet(self) -> bool: 85 | """Determine if packet should be dropped (simulating packet loss)""" 86 | return random.random() < self.condition.packet_loss_rate 87 | 88 | def should_timeout(self) -> bool: 89 | """Determine if request should timeout""" 90 | return random.random() < self.condition.timeout_rate 91 | 92 | async def simulate_bandwidth_limit(self, data_size: int) -> None: 93 | """Simulate bandwidth limitations""" 94 | if self.condition.bandwidth_kbps and data_size > 0: 95 | # Calculate transfer time based on bandwidth 96 | transfer_time = (data_size * 8) / (self.condition.bandwidth_kbps * 1000) 97 | if transfer_time > 0: 98 | await asyncio.sleep(transfer_time) 99 | 100 | def wrap_session(self, session: aiohttp.ClientSession) -> 'SimulatedSession': 101 | """Wrap an aiohttp session with network simulation""" 102 | return SimulatedSession(session, self) 103 | 104 | 105 | class SimulatedSession: 106 | """Wrapper for aiohttp.ClientSession that simulates network conditions""" 107 | 108 | def __init__(self, session: aiohttp.ClientSession, simulator: NetworkSimulator): 109 | self.session = session 110 | self.simulator = simulator 111 | 112 | async def request(self, method: str, url: str, **kwargs) -> aiohttp.ClientResponse: 113 | """Make a request with network simulation""" 114 | # Simulate latency before request 115 | await self.simulator.simulate_latency() 116 | 117 | # Check for packet loss 118 | if self.simulator.should_drop_packet(): 119 | raise aiohttp.ClientConnectionError("Simulated packet loss") 120 | 121 | # Check for timeout 122 | if self.simulator.should_timeout(): 123 | raise asyncio.TimeoutError("Simulated network timeout") 124 | 125 | # Make the actual request 126 | # start_time = time.time() # For future latency simulation 127 | response = await self.session.request(method, url, **kwargs) 128 | 129 | # Simulate bandwidth limitations based on response size 130 | if hasattr(response, 'content_length') and response.content_length: 131 | await self.simulator.simulate_bandwidth_limit(response.content_length) 132 | 133 | return response 134 | 135 | async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse: 136 | """GET request with simulation""" 137 | return await self.request('GET', url, **kwargs) 138 | 139 | async def post(self, url: str, **kwargs) -> aiohttp.ClientResponse: 140 | """POST request with simulation""" 141 | return await self.request('POST', url, **kwargs) 142 | 143 | async def put(self, url: str, **kwargs) -> aiohttp.ClientResponse: 144 | """PUT request with simulation""" 145 | return await self.request('PUT', url, **kwargs) 146 | 147 | async def delete(self, url: str, **kwargs) -> aiohttp.ClientResponse: 148 | """DELETE request with simulation""" 149 | return await self.request('DELETE', url, **kwargs) 150 | 151 | async def close(self) -> None: 152 | """Close the underlying session""" 153 | await self.session.close() 154 | 155 | 156 | class BenchmarkDataGenerator: 157 | """Generate data specifically for benchmark testing""" 158 | 159 | def __init__(self, seed: int = 42): 160 | self.generator = TestDataGenerator(seed) 161 | 162 | def generate_large_dataset( 163 | self, 164 | org_unit_count: int = 100, 165 | data_element_count: int = 20, 166 | period_count: int = 12, 167 | records_per_combination: int = 1 168 | ) -> pd.DataFrame: 169 | """Generate a large dataset for performance testing""" 170 | org_units = self.generator.generate_org_units(org_unit_count) 171 | data_elements = self.generator.generate_data_elements(data_element_count) 172 | periods = self.generator.generate_periods(months=period_count) 173 | 174 | data_values = [] 175 | 176 | for de in data_elements: 177 | for period in periods: 178 | for ou in org_units: 179 | for _ in range(records_per_combination): 180 | value = random.randint(1, 1000) 181 | 182 | data_values.append({ 183 | 'dataElement': de['id'], 184 | 'period': period, 185 | 'orgUnit': ou['id'], 186 | 'value': value, 187 | 'lastUpdated': datetime.now().isoformat(), 188 | 'created': (datetime.now() - timedelta(days=random.randint(1, 30))).isoformat() 189 | }) 190 | 191 | return pd.DataFrame(data_values) 192 | 193 | def generate_conflicted_dataset( 194 | self, 195 | base_data: pd.DataFrame, 196 | conflict_rate: float = 0.05 197 | ) -> pd.DataFrame: 198 | """Generate a dataset with intentional conflicts for testing""" 199 | conflicted_data = base_data.copy() 200 | 201 | # Randomly select records to make conflicting 202 | conflict_count = int(len(conflicted_data) * conflict_rate) 203 | conflict_indices = random.sample(range(len(conflicted_data)), conflict_count) 204 | 205 | for idx in conflict_indices: 206 | # Create conflicts by duplicating records with different values 207 | conflicted_row = conflicted_data.iloc[idx].copy() 208 | conflicted_row['value'] = 'INVALID_VALUE' # This will cause conflicts 209 | conflicted_data = pd.concat([conflicted_data, conflicted_row.to_frame().T], ignore_index=True) 210 | 211 | return conflicted_data 212 | 213 | def generate_performance_test_scenarios(self) -> List[Dict[str, Any]]: 214 | """Generate different scenarios for performance testing""" 215 | scenarios = [ 216 | { 217 | "name": "small_dataset", 218 | "description": "Small dataset for basic functionality", 219 | "org_units": 5, 220 | "data_elements": 3, 221 | "periods": 6, 222 | "expected_records": 5 * 3 * 6 223 | }, 224 | { 225 | "name": "medium_dataset", 226 | "description": "Medium dataset for typical workload", 227 | "org_units": 50, 228 | "data_elements": 10, 229 | "periods": 12, 230 | "expected_records": 50 * 10 * 12 231 | }, 232 | { 233 | "name": "large_dataset", 234 | "description": "Large dataset for stress testing", 235 | "org_units": 200, 236 | "data_elements": 25, 237 | "periods": 24, 238 | "expected_records": 200 * 25 * 24 239 | } 240 | ] 241 | 242 | return scenarios 243 | -------------------------------------------------------------------------------- /pydhis2/endpoints/metadata.py: -------------------------------------------------------------------------------- 1 | """Metadata endpoint - Metadata import, export, and management""" 2 | 3 | import json 4 | from typing import Any, Dict, Optional, Union 5 | 6 | import pandas as pd 7 | 8 | from pydhis2.core.errors import ImportConflictError 9 | from pydhis2.core.types import ExportFormat 10 | 11 | 12 | class MetadataImportSummary: 13 | """Metadata import summary""" 14 | 15 | def __init__(self, summary_data: Dict[str, Any]): 16 | self.raw_data = summary_data 17 | self.status = summary_data.get('status', 'UNKNOWN') 18 | self.stats = summary_data.get('stats', {}) 19 | self.type_reports = summary_data.get('typeReports', []) 20 | 21 | # Calculate overall statistics 22 | self.total = 0 23 | self.imported = 0 24 | self.updated = 0 25 | self.deleted = 0 26 | self.ignored = 0 27 | 28 | for type_report in self.type_reports: 29 | object_reports = type_report.get('objectReports', []) 30 | for report in object_reports: 31 | self.total += 1 32 | if report.get('index') is not None: 33 | if 'created' in str(report).lower(): 34 | self.imported += 1 35 | elif 'updated' in str(report).lower(): 36 | self.updated += 1 37 | elif 'deleted' in str(report).lower(): 38 | self.deleted += 1 39 | else: 40 | self.ignored += 1 41 | 42 | @property 43 | def success_rate(self) -> float: 44 | """Success rate""" 45 | if self.total == 0: 46 | return 0.0 47 | return (self.imported + self.updated) / self.total 48 | 49 | @property 50 | def has_errors(self) -> bool: 51 | """Check if there are errors""" 52 | return self.status in ['ERROR', 'WARNING'] 53 | 54 | def get_conflicts_df(self) -> pd.DataFrame: 55 | """Get conflicts as a DataFrame""" 56 | conflicts = [] 57 | 58 | for type_report in self.type_reports: 59 | object_type = type_report.get('klass', 'Unknown') 60 | object_reports = type_report.get('objectReports', []) 61 | 62 | for report in object_reports: 63 | error_reports = report.get('errorReports', []) 64 | for error in error_reports: 65 | conflicts.append({ 66 | 'object_type': object_type, 67 | 'uid': report.get('uid', ''), 68 | 'index': report.get('index', ''), 69 | 'error_code': error.get('errorCode', ''), 70 | 'message': error.get('message', ''), 71 | 'property': error.get('property', ''), 72 | 'value': error.get('value', ''), 73 | }) 74 | 75 | return pd.DataFrame(conflicts) 76 | 77 | 78 | class MetadataEndpoint: 79 | """Metadata API endpoint""" 80 | 81 | def __init__(self, client): 82 | self.client = client 83 | 84 | async def export( 85 | self, 86 | filter: Optional[Dict[str, str]] = None, 87 | fields: str = ":owner", 88 | defaults: str = "INCLUDE", 89 | download: bool = False, 90 | **kwargs 91 | ) -> Dict[str, Any]: 92 | """Export metadata""" 93 | params = { 94 | 'fields': fields, 95 | 'defaults': defaults, 96 | 'download': str(download).lower(), 97 | } 98 | 99 | # Add filters 100 | if filter: 101 | for key, value in filter.items(): 102 | params[f'{key}:filter'] = value 103 | 104 | # Add other parameters 105 | params.update(kwargs) 106 | 107 | return await self.client.get('/api/metadata', params=params) 108 | 109 | async def import_( 110 | self, 111 | metadata: Union[Dict[str, Any], str], 112 | atomic: bool = True, 113 | dry_run: bool = False, 114 | strategy: str = "CREATE_AND_UPDATE", 115 | merge_mode: str = "REPLACE", 116 | flush_mode: str = "AUTO", 117 | skip_sharing: bool = False, 118 | skip_validation: bool = False, 119 | **kwargs 120 | ) -> MetadataImportSummary: 121 | """Import metadata""" 122 | params = { 123 | 'atomic': str(atomic).lower(), 124 | 'dryRun': str(dry_run).lower(), 125 | 'importStrategy': strategy, 126 | 'mergeMode': merge_mode, 127 | 'flushMode': flush_mode, 128 | 'skipSharing': str(skip_sharing).lower(), 129 | 'skipValidation': str(skip_validation).lower(), 130 | } 131 | 132 | # Add other parameters 133 | params.update(kwargs) 134 | 135 | # Prepare data 136 | if isinstance(metadata, str): 137 | metadata_dict = json.loads(metadata) 138 | else: 139 | metadata_dict = metadata 140 | 141 | response = await self.client.post( 142 | '/api/metadata', 143 | data=metadata_dict, 144 | params=params 145 | ) 146 | 147 | summary = MetadataImportSummary(response) 148 | 149 | # Check for errors 150 | if summary.has_errors and not dry_run: 151 | conflicts_df = summary.get_conflicts_df() 152 | if not conflicts_df.empty: 153 | conflicts = conflicts_df.to_dict('records') 154 | raise ImportConflictError( 155 | conflicts=conflicts, 156 | import_summary=summary.raw_data 157 | ) 158 | 159 | return summary 160 | 161 | async def get_schemas(self) -> Dict[str, Any]: 162 | """Get all schemas""" 163 | return await self.client.get('/api/schemas') 164 | 165 | async def get_schema(self, schema_name: str) -> Dict[str, Any]: 166 | """Get a specific schema""" 167 | return await self.client.get(f'/api/schemas/{schema_name}') 168 | 169 | async def get_data_elements( 170 | self, 171 | fields: str = "id,name,code,valueType", 172 | filter: Optional[Dict[str, str]] = None, 173 | paging: bool = False, 174 | **kwargs 175 | ) -> Dict[str, Any]: 176 | """Get data elements""" 177 | params = { 178 | 'fields': fields, 179 | 'paging': str(paging).lower(), 180 | } 181 | 182 | if filter: 183 | for key, value in filter.items(): 184 | params['filter'] = f'{key}:eq:{value}' 185 | 186 | params.update(kwargs) 187 | 188 | return await self.client.get('/api/dataElements', params=params) 189 | 190 | async def get_indicators( 191 | self, 192 | fields: str = "id,name,code,numerator,denominator", 193 | filter: Optional[Dict[str, str]] = None, 194 | paging: bool = False, 195 | **kwargs 196 | ) -> Dict[str, Any]: 197 | """Get indicators""" 198 | params = { 199 | 'fields': fields, 200 | 'paging': str(paging).lower(), 201 | } 202 | 203 | if filter: 204 | for key, value in filter.items(): 205 | params['filter'] = f'{key}:eq:{value}' 206 | 207 | params.update(kwargs) 208 | 209 | return await self.client.get('/api/indicators', params=params) 210 | 211 | async def get_organisation_units( 212 | self, 213 | fields: str = "id,name,code,level,path", 214 | filter: Optional[Dict[str, str]] = None, 215 | paging: bool = False, 216 | **kwargs 217 | ) -> Dict[str, Any]: 218 | """Get organisation units""" 219 | params = { 220 | 'fields': fields, 221 | 'paging': str(paging).lower(), 222 | } 223 | 224 | if filter: 225 | for key, value in filter.items(): 226 | params['filter'] = f'{key}:eq:{value}' 227 | 228 | params.update(kwargs) 229 | 230 | return await self.client.get('/api/organisationUnits', params=params) 231 | 232 | async def get_option_sets( 233 | self, 234 | fields: str = "id,name,code,options[id,name,code]", 235 | filter: Optional[Dict[str, str]] = None, 236 | paging: bool = False, 237 | **kwargs 238 | ) -> Dict[str, Any]: 239 | """Get option sets""" 240 | params = { 241 | 'fields': fields, 242 | 'paging': str(paging).lower(), 243 | } 244 | 245 | if filter: 246 | for key, value in filter.items(): 247 | params['filter'] = f'{key}:eq:{value}' 248 | 249 | params.update(kwargs) 250 | 251 | return await self.client.get('/api/optionSets', params=params) 252 | 253 | async def validate_metadata( 254 | self, 255 | metadata: Union[Dict[str, Any], str] 256 | ) -> Dict[str, Any]: 257 | """Validate metadata (dry run import)""" 258 | return await self.import_(metadata, dry_run=True) 259 | 260 | async def export_to_file( 261 | self, 262 | file_path: str, 263 | format: ExportFormat = ExportFormat.JSON, 264 | **export_kwargs 265 | ) -> str: 266 | """Export metadata to file""" 267 | metadata = await self.export(**export_kwargs) 268 | 269 | if format == ExportFormat.JSON: 270 | with open(file_path, 'w', encoding='utf-8') as f: 271 | json.dump(metadata, f, indent=2, ensure_ascii=False) 272 | else: 273 | raise ValueError(f"Metadata export only supports JSON format, got: {format}") 274 | 275 | return file_path 276 | 277 | async def import_from_file( 278 | self, 279 | file_path: str, 280 | **import_kwargs 281 | ) -> MetadataImportSummary: 282 | """Import metadata from file""" 283 | with open(file_path, encoding='utf-8') as f: 284 | metadata = json.load(f) 285 | 286 | return await self.import_(metadata, **import_kwargs) 287 | --------------------------------------------------------------------------------