├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── build.yml │ ├── docs.yaml │ ├── lint.yml │ ├── on-pr-to-main.yml │ ├── on-push-any-branch.yml │ ├── on-push-main-branch.yml │ ├── publish.yml │ ├── release-please.yml │ └── snyk.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .pyup.yml ├── .snyk ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── azure-pipelines.yml ├── docs └── manual.md ├── documentation ├── .gitignore ├── .prettierrc.js ├── README.md ├── babel.config.js ├── docs │ ├── about │ │ ├── introduction.md │ │ └── usage │ │ │ ├── _category_.json │ │ │ ├── basic-usage.md │ │ │ ├── caching.md │ │ │ ├── data-source.md │ │ │ ├── fetching-metadata.md │ │ │ └── time-zone.md │ └── contribute │ │ ├── _category_.yaml │ │ ├── development-guide │ │ ├── _category_.json │ │ ├── publishing.md │ │ ├── setup.md │ │ ├── testing.md │ │ └── upgrading.md │ │ ├── documentation.md │ │ ├── ground-rules.md │ │ ├── how-to-start-contributing.md │ │ └── overview.md ├── docusaurus.config.js ├── package.json ├── sidebars.js ├── src │ ├── css │ │ └── custom.css │ └── pages │ │ └── index.tsx ├── static │ ├── .nojekyll │ └── img │ │ ├── favicon.png │ │ └── logo.svg ├── tsconfig.json └── yarn.lock ├── examples └── quickstart.ipynb ├── mypy.ini ├── poetry.lock ├── pyproject.toml ├── pytest.ini ├── tagreader ├── __init__.py ├── __version__.py ├── cache.py ├── clients.py ├── logger.py ├── utils.py └── web_handlers.py └── tests ├── conftest.py ├── test_AspenHandlerREST.py ├── test_AspenHandlerREST_connect.py ├── test_PIHandlerREST.py ├── test_PIHandlerREST_connect.py ├── test_bucketcache.py ├── test_cache.py ├── test_clients.py └── test_utils.py /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Ref. https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners 2 | * @Asgmel03 @lawoEq 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | commit-message: 13 | prefix: "chore(deps): " 14 | - package-ecosystem: 'github-actions' 15 | directory: '/' 16 | schedule: 17 | interval: "weekly" 18 | commit-message: 19 | prefix: "chore(deps): " 20 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: "🧪 Build & test code" 2 | 3 | on: 4 | workflow_dispatch: 5 | workflow_call: 6 | 7 | jobs: 8 | build: 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | python-version: ["3.9", "3.10", "3.11"] 13 | os: [windows-latest, ubuntu-latest, macos-latest] 14 | runs-on: ${{ matrix.os }} 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v4 18 | 19 | - name: Install required Linux library for pykerberos 20 | if: matrix.os == 'ubuntu-latest' 21 | run: | 22 | sudo apt-get update && sudo apt-get install libkrb5-dev 23 | 24 | - name: Install Poetry 25 | run: | 26 | pipx install poetry 27 | 28 | - name: Setup Python 29 | uses: actions/setup-python@v4 30 | with: 31 | python-version: ${{matrix.python-version}} 32 | cache: "poetry" 33 | 34 | - name: Check pyproject.toml validity 35 | run: poetry check --no-interaction 36 | 37 | - name: Install deps 38 | run: poetry install --no-interaction 39 | 40 | - name: Run tests 41 | run: poetry run pytest 42 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: "📚 Publish Docs" 2 | 3 | on: 4 | # Workflow dispatch is used for manual triggers 5 | workflow_dispatch: 6 | # Workflow call is used for called from another workflow 7 | workflow_call: 8 | 9 | env: 10 | GITHUB_PAGES_BRANCH: gh-pages 11 | 12 | jobs: 13 | publish-docs: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v4 19 | 20 | - name: Setup node 21 | uses: actions/setup-node@v4 22 | with: 23 | node-version: 20 24 | cache: yarn 25 | cache-dependency-path: documentation/yarn.lock 26 | 27 | - name: Install dependencies and build website 28 | run: | 29 | cd documentation 30 | yarn install --frozen-lockfile 31 | yarn build 32 | 33 | - name: Push static files to Github Pages branch 34 | run: | 35 | cd documentation/build 36 | CREATED_FROM_REF=$(git rev-parse --short HEAD) 37 | git init 38 | git config user.name "GitHub Actions Bot" 39 | git config user.email "<>" 40 | git checkout -b $GITHUB_PAGES_BRANCH 41 | git remote add $GITHUB_PAGES_BRANCH https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/equinor/tagreader-python 42 | git add . 43 | git commit -m "Built from commit '$CREATED_FROM_REF'" 44 | git push -f --set-upstream gh-pages gh-pages 45 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: '💎 Code quality' 2 | 3 | on: 4 | workflow_dispatch: 5 | workflow_call: 6 | 7 | jobs: 8 | lint: 9 | name: Lint 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.11' 20 | 21 | - name: Install pre-commit 22 | run: pip install pre-commit 23 | 24 | - name: Run pre-commit 25 | run: pre-commit run --all-files 26 | -------------------------------------------------------------------------------- /.github/workflows/on-pr-to-main.yml: -------------------------------------------------------------------------------- 1 | name: '➕ Pull Request' 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - opened 7 | - edited 8 | - reopened 9 | 10 | env: 11 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 12 | PR_LABEL: 'invalid_PR_title' 13 | jobs: 14 | lint-pr: 15 | name: Lint pull request title 16 | if: ${{ github.actor != 'dependabot[bot]' }} 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Checkout current PR 21 | run: gh pr checkout $GITHUB_HEAD_REF 22 | 23 | - name: Create Label 24 | continue-on-error: true 25 | run: gh label create ${{ env.PR_LABEL }} 26 | 27 | - name: Lint pull request title 28 | uses: jef/conventional-commits-pr-action@v1 29 | with: 30 | token: ${{ secrets.GITHUB_TOKEN }} 31 | 32 | - name: Remove label 33 | run: gh pr edit --remove-label ${{ env.PR_LABEL }} 34 | 35 | - name: Add label 36 | if: ${{ failure() }} 37 | run: gh pr edit --add-label ${{ env.PR_LABEL }} 38 | -------------------------------------------------------------------------------- /.github/workflows/on-push-any-branch.yml: -------------------------------------------------------------------------------- 1 | name: '✨ On push to any branch' 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | tags-ignore: 8 | - '**' 9 | 10 | 11 | jobs: 12 | lint: 13 | name: '💎 Code Quality' 14 | uses: ./.github/workflows/lint.yml 15 | 16 | snyk: 17 | name: '️‍🕵️‍♀️ Snyk vulnerability scan' 18 | uses: ./.github/workflows/snyk.yml 19 | secrets: inherit 20 | 21 | test: 22 | name: '🧪 Build & test Code' 23 | uses: ./.github/workflows/build.yml 24 | secrets: inherit 25 | -------------------------------------------------------------------------------- /.github/workflows/on-push-main-branch.yml: -------------------------------------------------------------------------------- 1 | name: "✨ On push to main branch" 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | publish-docs: 10 | name: '️‍📚️ Publish Docs' 11 | uses: ./.github/workflows/docs.yaml -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Upload Python Package 2 | 3 | on: 4 | workflow_dispatch: 5 | workflow_call: 6 | 7 | jobs: 8 | deploy: 9 | runs-on: windows-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: '3.11' 16 | 17 | - name: Install Poetry 18 | run: | 19 | pipx install poetry 20 | 21 | - name: Setup Python 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{matrix.python-version}} 25 | cache: 'poetry' 26 | 27 | - name: Check pyproject.toml validity 28 | run: poetry check --no-interaction 29 | 30 | - name: Install deps 31 | run: poetry install --no-interaction 32 | 33 | - name: Publish to PyPI 34 | env: 35 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }} 36 | run: | 37 | poetry publish --build 38 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | name: 🎉 Release Please 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | release_please: 9 | name: 🔖 Release Please 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: PyPI Release 13 | id: release 14 | uses: google-github-actions/release-please-action@v3 15 | with: 16 | release-type: python 17 | bump-minor-pre-major: true 18 | changelog-types: > 19 | [{"type":"build", "section":"📦 Build system","hidden":false}, 20 | {"type":"chore", "section":"🧹 Chores","hidden":false}, 21 | {"type":"ci", "section":"👷 CI/CD","hidden":false}, 22 | {"type":"docs","section":"📚 Documentation","hidden":false}, 23 | {"type":"feat","section":"✨ Features","hidden":false}, 24 | {"type":"fix","section":"🐛 Bug Fixes","hidden":false}, 25 | {"type":"perf", "section":"🏎️ Performance","hidden":false}, 26 | {"type":"refactor", "section":"🔨 Refactor","hidden":false}, 27 | {"type":"revert", "section":"⏪️ Revert","hidden":false}, 28 | {"type":"style","section":"💎 Style","hidden":false}, 29 | {"type":"test", "section":"🧪 Tests","hidden":false}] 30 | outputs: 31 | release_created: ${{ steps.release.outputs.release_created }} 32 | tag_name: ${{ steps.release.outputs.tag_name }} 33 | 34 | build: 35 | name: '🛠️ Build' 36 | if: ${{ needs.release_please.outputs.release_created }} 37 | needs: [release_please] 38 | uses: ./.github/workflows/build.yml 39 | 40 | deploy: 41 | name: '🚀 Publish' 42 | if: ${{ needs.release_please.outputs.release_created }} 43 | needs: [build, release_please] 44 | uses: ./.github/workflows/publish.yml 45 | secrets: inherit 46 | -------------------------------------------------------------------------------- /.github/workflows/snyk.yml: -------------------------------------------------------------------------------- 1 | name: '🕵️‍♀️ Snyk vulnerability scan' 2 | on: 3 | # Workflow dispatch is used for manual triggers 4 | workflow_dispatch: 5 | # Workflow call is used for called from another workflow 6 | workflow_call: 7 | 8 | 9 | jobs: 10 | snyk: 11 | name: Snyk vulnerability scan 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v4 16 | 17 | - name: Setup Snyk 18 | uses: snyk/actions/setup@master 19 | 20 | - name: Scan third-party dependencies 21 | env: 22 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} 23 | run: snyk test --file=poetry.lock --package-manager=poetry --policy-path=.snyk --severity-threshold=medium 24 | 25 | - name: Scan code for vulnerabilities 26 | continue-on-error: true 27 | env: 28 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} 29 | run: snyk code test 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | .cache* 48 | *cache* 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv*/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # pycharm 134 | .idea/ 135 | 136 | # version handled by setuptools_scm 137 | **/version.py 138 | 139 | # Sometimes symlink, so treat as both dir and "file" 140 | # extratests 141 | # extratests/* 142 | 143 | # tagreader cache file 144 | *.h5 145 | 146 | # vscode 147 | .vscode/ 148 | 149 | test_scripts/* 150 | 151 | # Certificate files 152 | *.pem 153 | *.cer 154 | 155 | adhoc/ 156 | 157 | tests/test_adhoc.py 158 | 159 | # SNYK 160 | .dccache 161 | 162 | # Apple 163 | .DS_Store 164 | 165 | # Other 166 | .tmp/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "extratests"] 2 | path = tests/extratests 3 | url = https://github.com/equinor/tagreader-python-extra-tests.git 4 | [submodules "extratests"] 5 | branch = master 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-yaml 6 | - id: check-json 7 | - id: trailing-whitespace 8 | - id: check-merge-conflict 9 | - repo: https://github.com/psf/black 10 | rev: 24.2.0 11 | hooks: 12 | - id: black 13 | language_version: python3 14 | 15 | - repo: https://github.com/PyCQA/flake8 16 | rev: '7.0.0' 17 | hooks: 18 | - id: flake8 19 | args: ['--ignore=E501,W503,E231,E503,E203,F401,E702,E704'] 20 | 21 | - repo: https://github.com/timothycrosley/isort 22 | rev: '5.13.2' 23 | hooks: 24 | - id: isort 25 | args: 26 | [ 27 | '--line-length=88', 28 | '--use-parentheses', 29 | '--trailing-comma', 30 | '--multi-line=3', 31 | ] 32 | 33 | - repo: https://github.com/pre-commit/mirrors-mypy 34 | rev: v1.9.0 35 | hooks: 36 | - id: mypy 37 | args: [--strict, --ignore-missing-imports] 38 | additional_dependencies: 39 | - types-requests 40 | - pandas-stubs 41 | -------------------------------------------------------------------------------- /.pyup.yml: -------------------------------------------------------------------------------- 1 | # configure updates globally 2 | # default: all 3 | # allowed: all, insecure, False 4 | update: insecure 5 | 6 | # configure dependency pinning globally 7 | # default: True 8 | # allowed: True, False 9 | pin: True 10 | 11 | # set the default branch 12 | # default: empty, the default branch on GitHub 13 | #branch: dev 14 | 15 | # update schedule 16 | # default: empty 17 | # allowed: "every day", "every week", .. 18 | schedule: "every week" 19 | 20 | # search for requirement files 21 | # default: True 22 | # allowed: True, False 23 | search: True 24 | -------------------------------------------------------------------------------- /.snyk: -------------------------------------------------------------------------------- 1 | # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities. 2 | # ignores vulnerabilities until expiry date; change duration by modifying expiry date 3 | ignore: 4 | SNYK-JS-INFLIGHT-6095116: 5 | - '*': 6 | reason: 'No patch available' 7 | expires: 2025-04-01T00:00:00.000Z 8 | created: 2023-12-04T09:05:00.000Z 9 | SNYK-JS-KATEX-8647963: 10 | - '*': 11 | reason: 'No patch available' 12 | expires: 2025-04-01T00:00:00.000Z 13 | created: 2025-02-04T15:29:00.000Z 14 | SNYK-PYTHON-PANDAS-8549481: 15 | - '*': 16 | reason: 'No patch available' 17 | expires: 2025-04-01T00:00:00.000Z 18 | created: 2025-01-02T10:30:00.000Z 19 | SNYK-PYTHON-JINJA2-8548181: 20 | - '*': 21 | reason: 'Will patch later' 22 | expires: 2025-04-01T00:00:00.000Z 23 | created: 2025-02-04T15:28:00.000Z 24 | SNYK-PYTHON-JINJA2-8548987: 25 | - '*': 26 | reason: 'Will patch later' 27 | expires: 2025-04-01T00:00:00.000Z 28 | created: 2025-02-04T15:28:00.000Z 29 | SNYK-PYTHON-TORNADO-8400708: 30 | - '*': 31 | reason: '' 32 | expires: 2025-04-01T00:00:00.000Z 33 | created: 2025-02-04T15:28:00.000Z 34 | patch: {} 35 | version: v1.25.0 -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute 2 | As Tagreader is an open source project, all contributions are welcome. This includes code, bug reports, issues, 3 | feature requests, and documentation. The preferred way of submitting a contribution is to either create an issue on 4 | GitHub or to fork the project and make a pull request. 5 | 6 | To starting contributing, please see [Tagreader Docs - Contribute](https://equinor.github.io/tagreader-python/docs/contribute/overview) 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Equinor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | exclude .* 2 | exclude requirements.txt test-requirements.txt environment.yml 3 | exclude azure-pipelines.yml 4 | recursive-exclude tests * 5 | recursive-exclude docs * 6 | recursive-exclude .github * 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tagreader-python 2 | 3 | ![GitHub Build Status](https://github.com/equinor/tagreader-python/workflows/Test/badge.svg) 4 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tagreader) 5 | ![PyPI](https://img.shields.io/pypi/v/tagreader) 6 | [![Downloads](https://pepy.tech/badge/tagreader)](https://pepy.tech/project/tagreader) 7 | 8 | Tagreader is a Python package for reading timeseries data from the OSIsoft PI and Aspen Infoplus.21 9 | Information Management Systems (IMS). It is intended to be easy to use, and present as similar interfaces 10 | as possible to the backend plant historians. 11 | 12 | ## Installation 13 | You can install tagreader directly into your project from pypi by using pip 14 | or another package manager. Supports Python version 3.9.2 and above. 15 | 16 | ```shell 17 | pip install tagreader 18 | ``` 19 | 20 | ## Usage 21 | Tagreader is easy to use for both Equinor internal IMS services, and non-internal usage. For non-internal usage 22 | you simply need to provide the corresponding IMS service URLs and IMSType. 23 | See [data source](https://equinor.github.io/tagreader-python/docs/about/usage/data-source) for details. 24 | 25 | ### Usage example 26 | ```python 27 | import tagreader 28 | c = tagreader.IMSClient("mysource", "aspenone") 29 | print(c.search("tag*")) 30 | df = c.read_tags(["tag1", "tag2"], "18.06.2020 08:00:00", "18.06.2020 09:00:00", 60) 31 | ``` 32 | 33 | Note, you can add a timeout argument to the search method in order to avoid long-running search queries. 34 | 35 | ### Jupyter Notebook Quickstart 36 | Jupyter Notebook examples can be found in /examples. In order to run these examples, you need to install the 37 | optional dependencies. 38 | 39 | ```shell 40 | pip install tagreader[notebooks] 41 | ``` 42 | 43 | The quickstart Jupyter Notebook can be found [here](https://github.com/equinor/tagreader-python/blob/main/examples/quickstart.ipynb) 44 | 45 | For more details, see the [Tagreader Docs](https://equinor.github.io/tagreader-python/). 46 | 47 | ## Documentation 48 | The full documentation can be found in [Tagreader Docs](https://equinor.github.io/tagreader-python/) 49 | 50 | ## Contribute 51 | To starting contributing, please see [Tagreader Docs - Contribute](https://equinor.github.io/tagreader-python/docs/contribute/overview) 52 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security 2 | 3 | If you discover a security vulnerability in this project, please follow these steps to responsibly disclose it: 4 | 5 | 1. **Do not** create a public GitHub issue for the vulnerability. 6 | 2. Follow our guideline for Responsible Disclosure Policy at [https://www.equinor.com/about-us/csirt](https://www.equinor.com/about-us/csirt) to report the issue 7 | 8 | The following information will help us triage your report more quickly: 9 | 10 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 11 | - Full paths of source file(s) related to the manifestation of the issue 12 | - The location of the affected source code (tag/branch/commit or direct URL) 13 | - Any special configuration required to reproduce the issue 14 | - Step-by-step instructions to reproduce the issue 15 | - Proof-of-concept or exploit code (if possible) 16 | - Impact of the issue, including how an attacker might exploit the issue 17 | 18 | We prefer all communications to be in English. 19 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | - master 3 | 4 | pool: 5 | name: default 6 | 7 | strategy: 8 | matrix: 9 | Python38: 10 | python.version: '3.8' 11 | Python39: 12 | python.version: '3.9' 13 | Python310: 14 | python.version: '3.10' 15 | Python311: 16 | python.version: '3.11' 17 | 18 | steps: 19 | - task: UsePythonVersion@0 20 | inputs: 21 | versionSpec: '$(python.version)' 22 | displayName: 'Use Python $(python.version)' 23 | 24 | - script: | 25 | python -m pip install --upgrade pip pipx 26 | pipx install poetry 27 | poetry export -f requirements.txt --output requirements.txt 28 | pip install -r requirements.txt 29 | pip install pytest pytest-azurepipelines pytest-cov 30 | displayName: 'Install dependencies' 31 | env: 32 | HTTPS_PROXY: $(var_http_proxy) 33 | 34 | - script: | 35 | pytest --junitxml=junit/test-results.xml --cov=tagreader --cov-report=xml 36 | displayName: 'Run tests' 37 | 38 | - task: PublishTestResults@2 39 | condition: succeededOrFailed() 40 | inputs: 41 | testResultsFiles: '**/test-*.xml' 42 | testRunTitle: 'Publish test results for Python $(python.version)' 43 | 44 | - task: PublishCodeCoverageResults@1 45 | inputs: 46 | codeCoverageTool: Cobertura 47 | summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' 48 | -------------------------------------------------------------------------------- /docs/manual.md: -------------------------------------------------------------------------------- 1 | # Tagreader-python 2 | 3 | Tagreader is a Python package for reading trend data from the OSIsoft PI and AspenTech InfoPlus.21 IMS systems. It can communicate with PI Web API, and with IP.21 using Process Data REST Web API. 4 | 5 | Tagreader is intended to be easy to use, and present the same interface to the user regardless of IMS system and connection method. 6 | 7 | # Index 8 | 9 | - [Requirements](#requirements) 10 | - [Before getting started](#before-getting-started) 11 | - [Installation](#installation) 12 | - [Adding host certificates](#adding-host-certificates) 13 | - [For Equinor users](#for-equinor-users) 14 | - [For non-Equinor users](#for-non-equinor-users) 15 | - [Importing the module](#importing-the-module) 16 | - [IMS types](#ims-types) 17 | - [Listing available data sources](#listing-available-data-sources) 18 | - [The Client](#the-client) 19 | - [Creating a client](#creating-a-client) 20 | - [Connecting to data source](#connecting-to-data-source) 21 | - [Searching for tags](#searching-for-tags) 22 | - [Reading data](#reading-data) 23 | - [Selecting what to read](#selecting-what-to-read) 24 | - [Status information](#status-information) 25 | - [Caching results](#caching-results) 26 | - [Time zones](#time-zones) 27 | - [Fetching metadata](#fetching-metadata) 28 | - [get_units()](#getunits) 29 | - [get_description()](#getdescription) 30 | - [Performing raw queries](#performing-raw-queries) 31 | 32 | # Requirements 33 | 34 | Python >= 3.8 with the following packages: 35 | 36 | + pandas >= 1.0.0 37 | + diskcache 38 | + requests 39 | + requests_kerberos 40 | 41 | # Before getting started 42 | 43 | It is highly recommended to go through the [quickstart](../examples/quickstart.ipynb) example. It contains references to relevant sections in this manual. 44 | 45 | # Installation 46 | 47 | To install and/or upgrade: 48 | 49 | ``` 50 | pip install --upgrade tagreader 51 | ``` 52 | 53 | ## Adding host certificates 54 | 55 | ### For Equinor users 56 | 57 | ***Note**: Since v2.7.0 the procedure described below will be automatically performed on Equinor hosts when importing the tagreader module. It should therefore no longer be necessary to perform this step manually.* 58 | 59 | The Web APIs are queried with the `requests` package. `requests` does not utilize the system certificate store, but instead relies on the `certifi` bundle. In order to avoid SSL verification errors, we need to either turn off SSL verification (optional input argument `verifySSL=False` for relevant function calls) or, preferably, add the certificate to the `certifi` bundle. To do this, simply activate the virtual environment where you installed `tagreader`, and run the following snippet: 60 | 61 | ``` python 62 | from tagreader.utils import add_equinor_root_certificate 63 | add_equinor_root_certificate() 64 | ``` 65 | 66 | The output should inform you that the certificate was successfully added. This needs to be repeated whenever certifi is upgraded in your python virtual environment. It is safe to run more than once: If the function detects that the certificate has already been added to your current certifi installation, the certificate will not be duplicated. 67 | 68 | ### For non-Equinor users 69 | 70 | If you run info SSL verification errors and prefer to not set `verifySSL=False` , you can try the procedure outlined [here](https://incognitjoe.github.io/adding-certs-to-requests.html). 71 | 72 | # Importing the module 73 | 74 | The module is imported with 75 | 76 | ``` python 77 | import tagreader 78 | ``` 79 | 80 | # IMS types 81 | 82 | Tagreader supports connecting to PI and IP.21 servers using Web API interfaces. When calling certain methods, the user will need to tell tagreader which system and which connection method to use. This input argument is called `imstype` , and can be one of the following case-insensitive strings: 83 | 84 | * `piwebapi` : For connecting to OSISoft PI Web API 85 | * `aspenone` : For connecting to AspenTech Process Data REST Web API 86 | 87 | # Listing available data sources 88 | 89 | The method `tagreader.list_sources()` can query for available PI and IP.21 servers available through Web API. Input arguments: 90 | 91 | * `imstype` (optional) : The name of the [IMS type](#ims-types) to query. Valid values: `piwebapi` and `aspenone`. 92 | 93 | The following input arguments are only relevant when calling `list_sources()` with a Web API `imstype` ( `piwebapi` or `aspenone` ): 94 | 95 | * `url` (optional): Path to server root, e.g. _"https://aspenone/ProcessData/AtProcessDataREST.dll"_ or _"https://piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype` if `imstype` is `piwebapi` or `aspenone` . 96 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`. 97 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth objects that work with Equinor servers. If not connecting to an Equinor server, you may have to create your own auth. 98 | 99 | **Example:** 100 | 101 | ``` python 102 | from tagreader import list_sources 103 | list_sources("aspenone") 104 | list_sources("piwebapi") 105 | ``` 106 | 107 | When called with `imstype` set to `piwebapi` or `aspenone`, `list_sources()` will connect to the web server URL and query for the available list of data sources. This list is normally the complete set of data sources available on the server, and does not indicate whether the user is authorized to query the source or not. 108 | 109 | When querying Equinor Web API for data sources, `list_sources()` should require no input argument except `imstype="piwebapi"` or `imstype="aspenone"`. For non-Equinor servers, `url` will need to be specified, as may `auth` and `verifySSL` . 110 | 111 | # The Client 112 | 113 | The client presents the interface for communicating with the data source to the user. The interface shall be as unified as possible, regardless of the IMS type that is used. A handler object specifically designed for each IMS type is attached to the client when the client is created. The handler is responsible for handling the communication and data interpretation between the server and the client object. 114 | 115 | ## Creating a client 116 | 117 | A connection to a data source is prepared by creating an instance of `tagreader.IMSClient` with the following input arguments: 118 | 119 | * `datasource` : Name of data source 120 | * `imstype` (optional): The name of the [IMS type](#ims-types) to query. Indicates the type of data source that is requested, and therefore determines which handler type to use. Valid values are `piwebapi` and `aspenone`. If not provided it will search the available sources and find the type. 121 | * `tz` (optional): Time zone naive time stamps will be interpreted as belonging to this time zone. Similarly, the returned data points will be localized to this time zone. **Default**: _"Europe/Oslo"_. 122 | 123 | The following input arguments can be used when connecting to either `piwebapi` or to `aspenone`. None of these should be necessary to supply when connecting to Equinor servers. 124 | 125 | * `url` (optional): Path to server root, e.g. _"https://aspenone/ProcessData/AtProcessDataREST.dll"_ or _"https://piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype` . 126 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`. 127 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth object that works with Equinor servers. 128 | 129 | ## Connecting to data source 130 | 131 | After creating the client as described above, connect to the server with the `connect()` method. 132 | 133 | **Example** 134 | 135 | Connecting to the PINO PI data source using PI webapi: 136 | 137 | ``` python 138 | c = tagreader.IMSClient("PINO") 139 | ``` 140 | 141 | Connecting to the Peregrino IP.21 data source using AspenTech Process Data REST Web API, specifying that all naive time stamps as well as the returned data shall use Rio local time, and using the local endpoint in Brazil: 142 | 143 | ``` python 144 | c = tagreader.IMSClient(datasource="PER", 145 | imstype="aspenone", 146 | tz="Brazil/East", 147 | url="https://aspenone-per.equinor.com/ProcessExplorer/ProcessData/AtProcessDataREST.dll") 148 | c.connect() 149 | ``` 150 | 151 | Connecting to some other AspenTech Web API URL using NTLM authentication instead of default Kerberos and ignoring the server's host certificate: 152 | 153 | ``` python 154 | import getpass 155 | from requests_ntlm import HttpNtlmAuth 156 | user = "mydomain\\" + getpass.getuser() 157 | pwd = getpass.getpass() 158 | auth = HttpNtlmAuth(user, pwd) 159 | c = tagreader.IMSClient(datasource="myplant", 160 | url="https://api.mycompany.com/aspenone", 161 | imstype="aspenone", 162 | auth=auth, 163 | verifySSL=False) 164 | c.connect() 165 | ``` 166 | 167 | # Searching for tags 168 | 169 | The client method `search()` can be used to search for tags using either tag name, tag description or both. 170 | 171 | Supply at least one of the following arguments: 172 | 173 | * `tag` : Name of tag 174 | * `desc` : Description of tag 175 | 176 | If both arguments are provided, the both must match. 177 | 178 | `*` can be used as wildcard. 179 | 180 | **Examples** 181 | 182 | ``` python 183 | c = tagreader.IMSClient("PINO") 184 | c.connect() 185 | c.search("cd*158") 186 | c.search(desc="*reactor*") 187 | c.search(tag="BA:*", desc="*Temperature*") 188 | ``` 189 | 190 | # Reading data 191 | 192 | Data is read by calling the client method `read()` with the following input arguments: 193 | 194 | * `tags` : List of tagnames. Wildcards are not allowed. 195 | 196 | Tags with maps (relevant for some InfoPlus.21 servers) can be on the form `'tag;map'` , e.g. `'109-HIC005;CS A_AUTO'` . 197 | 198 | * `start_time` : Start of time period. 199 | * `end_time` : End of time period. 200 | 201 | Both `start_time` and `end_time` can be either datetime object or string. Strings are interpreted by the [Timestamp](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html) method from Pandas. Both timestamps can be left out when `read_type = ReaderType.SNAPSHOT` . However, when using either of the Web APIs, `end_time` provides the time at which the snapshot is taken. 202 | 203 | * `ts` : The interval between samples when querying interpolated or aggregated data. Ignored and can be left out when `read_type = ReaderType.SNAPSHOT` . **Default** 60 seconds. 204 | * `read_type` (optional): What kind of data to read. More info immediately below. **Default** Interpolated. 205 | * `get_status` (optonal): When set to `True` will fetch status information in addition to values. **Default** `False`. 206 | 207 | ## Selecting what to read 208 | 209 | By specifying the optional parameter `read_type` to `read()` , it is possible to specify what kind of data should be returned. The default query method is interpolated. All valid values for `read_type` are defined in the `utils.ReaderType` class (mirrored for convenience as `tagreader.ReaderType` ), although not all are currently implemented. Below is the list of implemented read types. 210 | 211 | * `INT` : The raw data points are interpolated so that one new data point is generated at each step of length `ts` starting at `start_time` and ending at or less than `ts` seconds before `end_time` . 212 | * The following aggregated read types perform a weighted calculation of the raw data within each interval, using time-weighted calculations where applicable. Returned timestamps are anchored at the beginning of each interval. For example, for a 60-second interval from 08:11:00 to 08:12:00, the timestamp will be 08:11:00. 213 | + `MIN` : The minimum value. 214 | + `MAX` : The maximum value. 215 | + `AVG` : The average value. 216 | + `VAR` : The variance. 217 | + `STD` : The standard deviation. 218 | + `RNG` : The range (max-min). 219 | * `RAW` : Returns actual data points stored in the database. 220 | * `SNAPSHOT` : Returns the last recorded value. Only one tag can be read at a time. When using either of the Web API based handlers, providing `end_time` is possible in which case a snapshot at the specific time is returned. 221 | 222 | **Examples** 223 | 224 | Read interpolated data for the provided tag with 3-minute intervals between the two time stamps: 225 | 226 | ``` python 227 | import tagreader 228 | c = tagreader.IMSClient("PINO") 229 | c.connect() 230 | df = c.read(['BA:ACTIVE.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180) 231 | 232 | ``` 233 | 234 | Read the average value for the two provided tags within each 3-minute interval between the two time stamps: 235 | 236 | ``` python 237 | df = c.read(['BA:CONC.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180, read_type=tagreader.ReaderType.AVG) 238 | ``` 239 | 240 | ## Status information 241 | 242 | The optional parameter `get_status` was added to `IMSClient.read()` in release 2.6.0. If set to `True`, the resulting dataframe will be expanded with one additional column per tag. The column contains integer numbers that indicate the status, or quality, of the returned values. 243 | 244 | In an effort to unify the status value for all IMS types, the following schema based on AspenTech was selected: 245 | 246 | 0: Good 247 | 1: Suspect 248 | 2: Bad 249 | 4: Good/Modified 250 | 5: Suspect/Modified 251 | 6: Bad/Modified 252 | 253 | The status value is obtained differently for the four IMS types: 254 | * Aspen Web API: Read directly from the `l` ("Level") field in the json output. 255 | * Aspen ODBC: Read directly from the `status` field in the table. 256 | * PI Web API: Calculated as `Questionable` + 2 * (1 - `Good`) + 4 * `Substituted`. 257 | * PI ODBC: Calculated as `questionable` + 2 * (`status` != 0) + 4 * `substituted`. `status` is 0 for good, positive or negative for various reasons for being bad. 258 | 259 | For the two PI IMS types, it is assumed that `Questionable` is never `True` if `Good` is `False` or `status != 0`. This may be an incorrect assumption with resulting erroneous status value. 260 | 261 | In summary, here is the resulting status value from tagreader for different combinations of status field values from the IMS types: 262 | 263 | | tagreader | Aspen Web API | Aspen ODBC | PI Web API | PI ODBC | 264 | | :-------: | :-----------: | :--------: | ----------------------------------------------------------------- | ---------------------------------------------------------------- | 265 | | 0 | l = 0 | status = 0 | Good = True
Questionable = False
Substituted = False | status = 0
questionable = False
substituted = False | 266 | | 1 | l = 1 | status = 1 | Good = True
Questionable = True
Substituted = False | status = 0
questionable = True
substituted = False | 267 | | 2 | l = 2 | status = 2 | Good = False
Questionable = False
Substituted = False | status != 0
questionable = False
substituted = False | 268 | | 4 | l = 4 | status = 4 | Good = True
Questionable = False
Substituted = True | status = 0
questionable = False
substituted = True | 269 | | 5 | l = 5 | status = 5 | Good = True
Questionable = True
Substituted = True | status = 0
questionable = True
substituted = True | 270 | | 6 | l = 6 | status = 6 | Good = False
Questionable = False
Substituted = True | status != 0
questionable = False
substituted = True | 271 | 272 | Please keep in mind when using `get_status`: 273 | * This is an experimental feature. It may work as intended, or it may result in erroneous status values in some cases. If that happens, please create an issue. 274 | * Both how fetching status is activated and how it is returned may be changed at a later time. 275 | 276 | ## Caching results 277 | 278 | By default, a cache-file using the SQLite file format will be attached to the client upon client creation. Whenever `IMSClient.read()` is called, the cache is queried for existing data. Any data that is not already in the cache will be queried from the data source. The cache can significantly speed up queries, and it is therefore recommended to always keep it enabled. The cache file will be created on use. 279 | 280 | Data in the cache never expires. If the data for some reason becomes invalid, then the cache and data source will no longer produce the same data set. An existing cache file can safely be deleted at any time, at least as long as there is no ongoing query. 281 | 282 | If, for any reason, you want to disable the cache, simply set it to `None` . This can be done at any time, but is normally done before connecting to the server, like this: 283 | 284 | ``` python 285 | c = tagreader.IMSClient("PINO") 286 | c.cache = None 287 | c.connect() 288 | ``` 289 | 290 | Snapshots ( `read_type = ReaderType.SNAPSHOT` ) are of course never cached. 291 | 292 | **Note**: Raw `read_type = ReaderType.RAW` data values are currently not cached pending a rewrite of the caching mechanisms. 293 | **Note**: Cache will be default off from version 5. 294 | 295 | ## Time zones 296 | 297 | It is important to understand how Tagreader uses and interprets time zones. Queries to the backend servers are always performed in UTC time, and return data is also always in UTC. However, it is usually not convenient to ensure all time stamps are in UTC time. The client and handlers therefore have functionality for converting between UTC and user-specified time zones. 298 | 299 | There are two levels of determining which time zone input arguments should be interpreted as, and which time zone return data should be converted to: 300 | 301 | 1. Time zone aware input arguments will use their corresponding time zone. 302 | 2. Time zone naive input arguments are assumed to have time zone as provided by the client. 303 | 304 | The client-provided time zone can be specified with the optional `tz` argument (string, e.g. "*US/Central*") during client creation. If it is not specified, then the default value *Europe/Oslo* is used. Note that for the most common use case where Equinor employees want to fetch data from Norwegian assets and display them with Norwegian time stamps, nothing needs to be done. 305 | 306 | *Note:* It is a good idea to update the `pytz` package rather frequently (at least twice per year) to ensure that time zone information is up to date. `pip install --upgrade pytz` . 307 | 308 | **Example (advanced usage)** 309 | 310 | An employee in Houston is contacted by her colleague in Brazil about an event that she needs to investigate. The colleague identified the time of the event at July 20th 2020 at 15:05:00 Rio time. The Houston employee wishes to extract interpolated data with 60-second intervals and display the data in her local time zone. She also wishes to send the data to her Norwegian colleague with datestamps in Norwegian time. One way of doing this is : 311 | 312 | ``` python 313 | import tagreader 314 | from datetime import datetime, timedelta 315 | from dateutil import tz 316 | c = tagreader.IMSClient("PINO", "pi", tz="US/Central") # Force output data to Houston time 317 | c.connect() 318 | tzinfo = tz.gettz("Brazil/East") # Generate timezone object for Rio local time 319 | event_time = datetime(2020, 7, 20, 15, 5, 0, tzinfo=tzinfo) 320 | start_time = event_time - timedelta(minutes=30) 321 | end_time = event_time + timedelta(minutes=10) 322 | df = c.read(["BA:CONC.1"], start_time, end_time, ts=60) 323 | df_to_Norway = df.tz_convert("Europe/Oslo") # Create a copy of the dataframe with Norwegian time stamps 324 | ``` 325 | 326 | # Fetching metadata 327 | 328 | Two client methods have been created to fetch basic metadata for one or more tags. 329 | 330 | ## get_units() 331 | 332 | Fetches the engineering unit(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string, or a list of tagnames. 333 | 334 | ## get_description() 335 | 336 | Fetches the description(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string, or a list of tagnames. 337 | 338 | **Example**: 339 | 340 | ``` python 341 | tags = ["BA:ACTIVE.1", "BA:LEVEL.1", "BA:CONC.1"] 342 | units = c.get_units(tags) 343 | desc = c.get_descriptions(tags) 344 | tag = "BA:CONC.1" 345 | df[tag].plot(grid=True, title=desc[tag]).set_ylabel(units[tag]) 346 | ``` 347 | -------------------------------------------------------------------------------- /documentation/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | -------------------------------------------------------------------------------- /documentation/.prettierrc.js: -------------------------------------------------------------------------------- 1 | // default config 2 | module.exports = { 3 | trailingComma: 'es5', 4 | tabWidth: 2, 5 | semi: false, 6 | singleQuote: true, 7 | } 8 | -------------------------------------------------------------------------------- /documentation/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Installation 6 | 7 | ``` 8 | $ yarn 9 | ``` 10 | 11 | ### Local Development 12 | 13 | ``` 14 | $ yarn start 15 | ``` 16 | 17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. 18 | 19 | ### Build 20 | 21 | ``` 22 | $ yarn build 23 | ``` 24 | 25 | This command generates static content into the `build` directory and can be served using any static contents hosting service. 26 | 27 | ### Deployment 28 | 29 | Using SSH: 30 | 31 | ``` 32 | $ USE_SSH=true yarn deploy 33 | ``` 34 | 35 | Not using SSH: 36 | 37 | ``` 38 | $ GIT_USER= yarn deploy 39 | ``` 40 | 41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. 42 | -------------------------------------------------------------------------------- /documentation/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /documentation/docs/about/introduction.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | # Introduction 5 | 6 | Tagreader is a Python package for reading timeseries data from the OSIsoft PI and Aspen Infoplus.21 7 | Information Manufacturing Systems (IMS) systems. It is intended to be easy to use, and present as similar interfaces 8 | as possible to the backend historians. 9 | 10 | ## System requirements 11 | The only requirements are Python >= 3.8, with Windows, Linux or macOS. 12 | 13 | ## Installation 14 | You can install tagreader directly into your project from pypi by using pip 15 | or another package manager. 16 | 17 | ```shell" 18 | pip install tagreader 19 | ``` 20 | 21 | The following are required and will be installed: 22 | 23 | * pandas 24 | * requests 25 | * requests-kerberos 26 | * certifi 27 | * diskcache 28 | 29 | ## Usage 30 | Tagreader easy to use for both Equinor internal IMS services, and non-internal usage. For non-internal usage 31 | you simply need to provide the corresponding IMS service URLs and IMSType. See [data source](usage/data-source.md) for details. 32 | 33 | ### Usage example 34 | ```python 35 | import tagreader 36 | c = tagreader.IMSClient("mysource", "aspenone") 37 | print(c.search("tag*")) 38 | df = c.read_tags(["tag1", "tag2"], "18.06.2020 08:00:00", "18.06.2020 09:00:00", 60) 39 | ``` 40 | 41 | ### Jupyter Notebook Quickstart 42 | Jupyter Notebook examples can be found in /examples. In order to run these examples, you need to install the 43 | optional dependencies. 44 | 45 | ```shell 46 | pip install tagreader[notebooks] 47 | ``` 48 | 49 | The quickstart Jupyter Notebook can be found [here](https://github.com/equinor/tagreader-python/blob/main/examples/quickstart.ipynb) 50 | 51 | For more details, see the [Usage section](/docs/about/usage/basic-usage). 52 | 53 | ## Contribute 54 | As Tagreader is an open source project, all contributions are welcome. This includes code, bug reports, issues, 55 | feature requests, and documentation. The preferred way of submitting a contribution is to either create an issue on 56 | GitHub or to fork the project and make a pull request. 57 | 58 | For starting contributing, see the [contribute section](../contribute/how-to-start-contributing.md) 59 | 60 | -------------------------------------------------------------------------------- /documentation/docs/about/usage/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Usage", 3 | "position": 4, 4 | "collapsed": false, 5 | "link": { 6 | "type": "generated-index", 7 | "description": "Examples of tagreader usage." 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /documentation/docs/about/usage/basic-usage.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | # Basic usage 5 | 6 | The module is imported with 7 | 8 | ``` python 9 | import tagreader 10 | ``` 11 | 12 | ## The Client 13 | 14 | The client presents the interface for communicating with the data source to the user. The interface shall be as unified 15 | as possible, regardless of the IMS type that is used. A handler object specifically designed for each IMS type is 16 | attached to the client when the client is created. The handler is responsible for handling the communication and data 17 | interpretation between the server and the client object. 18 | 19 | :::info SSL verification 20 | 21 | Equinor root certificates are automatically added when using an Equinor Managed computer, which allow SSL verification. 22 | 23 | For non-Equinor users: If you run info SSL verification errors and prefer to not set `verifySSL=False` , 24 | you can try the procedure outlined [here](https://incognitjoe.github.io/adding-certs-to-requests.html). 25 | ::: 26 | 27 | :::info ODBC support 28 | Tagreader as of version 5 does no longer support ODBC clients, which has been deprecated in favor of REST services. 29 | To use ODBC, please refer to [Tagreader v4 on PyPI](https://pypi.org/project/tagreader/#history). 30 | Versioned documentation is available in the source code on [GitHub Releases](https://github.com/equinor/tagreader-python/releases). 31 | 32 | Use at your own discretion. 33 | ::: 34 | 35 | ## Creating a client 36 | 37 | A connection to a data source is prepared by creating an instance of `tagreader.IMSClient` with the following input 38 | arguments: 39 | 40 | * `datasource` : Name of data source 41 | * `imstype` : The name of the [IMS type](/docs/about/usage/data-source) to query. Indicates the type of data source 42 | that is requested, and therefore determines which handler type to use. Valid values are 43 | `piwebapi` and `aspenone`. 44 | 45 | * `tz` (optional): Time zone naive time stamps will be interpreted as belonging to this time zone. Similarly, 46 | the returned data points will be localized to this time zone. **Default**: _"Europe/Oslo"_. 47 | 48 | The following input arguments can be used when connecting to either `piwebapi` or to `aspenone`. None of these 49 | should be necessary to supply when connecting to Equinor servers. 50 | 51 | * `url` (optional): Path to server root, e.g. _"https://aspenone/ProcessData/AtProcessDataREST.dll"_ 52 | or _"https://piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype`. 53 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`. 54 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth object 55 | that works with Equinor servers. 56 | * `cache` (optional): [Cache](caching.md) data locally in order to avoid re-reading the same data multiple times. 57 | 58 | ## Connecting to data source 59 | 60 | After creating the client as described above, connect to the server with the `connect()` method. 61 | 62 | **Example** 63 | 64 | Connecting to the PINO PI data source using REST Web API: 65 | 66 | ``` python 67 | c = tagreader.IMSClient("PINO", "piwebapi") 68 | c.connect() 69 | ``` 70 | 71 | Connecting to the Peregrino IP.21 data source using AspenTech Process Data REST Web API, specifying that all naive time 72 | stamps as well as the returned data shall use Rio local time, and using the local endpoint in Brazil: 73 | 74 | ``` python 75 | c = tagreader.IMSClient(datasource="PER", 76 | imstype="aspenone", 77 | tz="Brazil/East", 78 | url="https://aspenone-per.equinor.com/ProcessExplorer/ProcessData/AtProcessDataREST.dll") 79 | c.connect() 80 | ``` 81 | 82 | Connecting to some other AspenTech Web API URL using NTLM authentication instead of default Kerberos and ignoring the 83 | server's host certificate: 84 | 85 | ``` python 86 | import getpass 87 | from requests_ntlm import HttpNtlmAuth 88 | user = "mydomain\\" + getpass.getuser() 89 | pwd = getpass.getpass() 90 | auth = HttpNtlmAuth(user, pwd) 91 | c = tagreader.IMSClient(datasource="myplant", 92 | url="https://api.mycompany.com/aspenone", 93 | imstype="aspenone", 94 | auth=auth, 95 | verifySSL=False) 96 | c.connect() 97 | ``` 98 | 99 | ## Searching for tags 100 | 101 | The client method `search()` can be used to search for tags using either tag name, tag description or both. 102 | 103 | Supply at least one of the following arguments: 104 | 105 | * `tag` : Name of tag 106 | * `desc` : Description of tag 107 | 108 | If both arguments are provided, the both must match. 109 | 110 | `*` can be used as wildcard. 111 | 112 | **Examples** 113 | 114 | ``` python 115 | c = tagreader.IMSClient("PINO", "piwebapi") 116 | c.connect() 117 | c.search("cd*158") 118 | c.search(desc="*reactor*") 119 | c.search(tag="BA:*", desc="*Temperature*") 120 | ``` 121 | 122 | ## Reading data 123 | 124 | Data is read by calling the client method `read()` with the following input arguments: 125 | 126 | * `tags` : List of tagnames. Wildcards are not allowed. 127 | 128 | Tags with maps (relevant for some InfoPlus.21 servers) can be on the form `'tag;map'` , e.g. `'109-HIC005;CS A_AUTO'`. 129 | 130 | * `start_time` : Start of time period. 131 | * `end_time` : End of time period. 132 | 133 | Both `start_time` and `end_time` can be either datetime object or string. Strings are interpreted by the 134 | * [Timestamp](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html) method from Pandas. 135 | Both timestamps can be left out when `read_type = ReaderType.SNAPSHOT` . However, when using either of the Web APIs, `end_time` provides the time at which the snapshot is taken. 136 | 137 | * `ts` : The interval between samples when querying interpolated or aggregated data. Ignored and can be left out when 138 | `read_type = ReaderType.SNAPSHOT` . **Default** 60 seconds. 139 | * `read_type` (optional): What kind of data to read. More info immediately below. **Default** Interpolated. 140 | * `get_status` (optonal): When set to `True` will fetch status information in addition to values. **Default** `False`. 141 | 142 | ## Selecting what to read 143 | 144 | By specifying the optional parameter `read_type` to `read()` , it is possible to specify what kind of data should be 145 | returned. The default query method is interpolated. All valid values for `read_type` are defined in the 146 | `utils.ReaderType` class (mirrored for convenience as `tagreader.ReaderType` ), although not all are currently 147 | implemented. Below is the list of implemented read types. 148 | 149 | * `INT` : The raw data points are interpolated so that one new data point is generated at each step of length `ts` 150 | * starting at `start_time` and ending at or less than `ts` seconds before `end_time` . 151 | * The following aggregated read types perform a weighted calculation of the raw data within each interval. 152 | Where relevant, time-weighted calculations are used. Returned time stamps are anchored at the beginning of each 153 | interval. So for the 60 seconds long interval between 08:11:00 and 08:12:00, the time stamp will be 08:11:00. 154 | + `MIN` : The minimum value. 155 | + `MAX` : The maximum value. 156 | + `AVG` : The average value. 157 | + `VAR` : The variance. 158 | + `STD` : The standard deviation. 159 | + `RNG` : The range (max-min). 160 | * `RAW` : Returns actual data points stored in the database. 161 | * `SNAPSHOT` : Returns the last recorded value. Only one tag can be read at a time. When using either of the Web API 162 | based handlers, providing `end_time` is possible in which case a snapshot at the specific time is returned. 163 | 164 | **Examples** 165 | 166 | Read interpolated data for the provided tag with 3-minute intervals between the two time stamps: 167 | 168 | ``` python 169 | c = tagreader.IMSClient("PINO", "piwebapi") 170 | c.connect() 171 | df = c.read(['BA:ACTIVE.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180) 172 | 173 | ``` 174 | 175 | Read the average value for the two provided tags within each 3-minute interval between the two time stamps: 176 | 177 | ``` python 178 | df = c.read(['BA:CONC.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180, read_type=tagreader.ReaderType.AVG) 179 | ``` 180 | 181 | ## Status information 182 | 183 | The optional parameter `get_status` was added to `IMSClient.read()` in release 2.6.0. If set to `True`, the resulting 184 | dataframe will be expanded with one additional column per tag. The column contains integer numbers that indicate the 185 | status, or quality, of the returned values. 186 | 187 | In an effort to unify the status value for all IMS types, the following schema based on AspenTech was selected: 188 | 189 | 0: Good 190 | 1: Suspect 191 | 2: Bad 192 | 4: Good/Modified 193 | 5: Suspect/Modified 194 | 6: Bad/Modified 195 | 196 | The status value is obtained differently for the four IMS types: 197 | * Aspen Web API: Read directly from the `l` ("Level") field in the json output. 198 | * PI Web API: Calculated as `Questionable` + 2 * (1 - `Good`) + 4 * `Substituted`. 199 | negative for various reasons for being bad. 200 | 201 | For the two PI IMS types, it is assumed that `Questionable` is never `True` if `Good` is `False` or `status != 0`. 202 | This may be an incorrect assumption with resulting erroneous status value. 203 | 204 | In summary, here is the resulting status value from tagreader for different combinations of status field values from 205 | the IMS types: 206 | 207 | | tagreader | Aspen Web API | PI Web API | 208 | |:---------:|:-------------:|:-------------------------------------------------------------------| 209 | | 0 | l = 0 | Good = True
Questionable = False
Substituted = False | 210 | | 1 | l = 1 | Good = True
Questionable = True
Substituted = False | 211 | | 2 | l = 2 | Good = False
Questionable = False
Substituted = False | 212 | | 4 | l = 4 | Good = True
Questionable = False
Substituted = True | 213 | | 5 | l = 5 | Good = True
Questionable = True
Substituted = True | 214 | | 6 | l = 6 | Good = False
Questionable = False
Substituted = True | 215 | 216 | Please keep in mind when using `get_status`: 217 | * This is an experimental feature. It may work as intended, or it may result in erroneous status values in some cases. 218 | If that happens, please create an issue. 219 | * Both how fetching status is activated and how it is returned may be changed at a later time. 220 | -------------------------------------------------------------------------------- /documentation/docs/about/usage/caching.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 4 3 | --- 4 | 5 | # Caching results 6 | 7 | It is possible to cache data locally using SQLite-files that will be attached to the client upon client creation. Whenever 8 | `IMSClient.read()` is called, the cache is queried for existing data. Any data that is not already in the cache will be 9 | queried from the data source. The cache can significantly speed up queries, and it is therefore recommended to always 10 | have it enabled. The cache file will be created on use. 11 | 12 | Data in the cache never expires. If the data for some reason becomes invalid, then the cache and data source will no 13 | longer produce the same data set. An existing cache file can safely be deleted at any time, at least as long as there 14 | is no ongoing query. 15 | 16 | If, for any reason, you want to disable the cache, simply set it to the default value `None`. 17 | 18 | ``` python 19 | c = tagreader.IMSClient("PINO", "pi", cache=None) 20 | c.connect() 21 | ``` 22 | 23 | If you want to cache data, we recommend using the provided SmartCache like this: 24 | 25 | ``` python 26 | from pathlib import Path 27 | from tagreader.cache import SmartCache 28 | 29 | c = tagreader.IMSClient("PINO", "pi", cache=SmartCache(path=Path(".cache")) 30 | c.connect() 31 | ``` 32 | 33 | Snapshots ( `read_type = ReaderType.SNAPSHOT` ) are of course never cached. 34 | 35 | **Note**: Raw `read_type = ReaderType.RAW` data values are currently not cached pending a rewrite of the caching 36 | mechanisms. -------------------------------------------------------------------------------- /documentation/docs/about/usage/data-source.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # Data sources 6 | 7 | Tagreader supports connecting to PI and IP.21 servers using a Web API interfaces. When calling certain 8 | methods, the user will need to tell tagreader which system and which connection method to use. This input argument is 9 | called `imstype` , and can be one of the following case-insensitive strings: 10 | 11 | * `piwebapi` : For connecting to OSISoft PI Web API 12 | * `aspenone` : For connecting to AspenTech Process Data REST Web API 13 | 14 | ## Listing available data sources 15 | 16 | The method `tagreader.list_sources()` can query for available PI and IP.21 servers available. Input arguments: 17 | 18 | * `imstype` : The name of the IMS type to query. Valid values: `piwebapi` and `aspenone`. 19 | 20 | The following input arguments are only relevant when calling `list_sources()` with a Web API `imstype` ( `piwebapi` or 21 | `aspenone` ): 22 | 23 | * `url` (optional): Path to server root, e.g. _"https://aspenone/ProcessData/AtProcessDataREST.dll"_ or 24 | _"https://piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype` if 25 | * `imstype` is `piwebapi` or `aspenone` . 26 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`. 27 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth objects 28 | that work with Equinor servers. If not connecting to an Equinor server, you may have to create your own auth. 29 | 30 | When called with `imstype` set to `pi` , `list_sources()` will search the registry at 31 | *HKEY_CURRENT_USER\Software\AspenTech\ADSA\Caches\AspenADSA\{username}* for available PI servers. Similarly, 32 | if called with `imstype` set to `ip21` , *HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\PISystem\PI-SDK* will be searched 33 | for available IP.21 servers. Servers found through the registry are normally servers to which the user is authorized, 34 | and does not necessarily include all available data sources in the organization. 35 | 36 | **Example:** 37 | 38 | ``` python 39 | from tagreader import list_sources 40 | list_sources("ip21") 41 | list_sources("piwebapi") 42 | ``` 43 | 44 | When called with `imstype` set to `piwebapi` or `aspenone` , `list_sources()` will connect to the web server URL and 45 | query for the available list of data sources. This list is normally the complete set of data sources available on the 46 | server, and does not indicate whether the user is authorized to query the source or not. 47 | 48 | When querying Equinor Web API for data sources, `list_sources()` should require no input argument except 49 | `imstype="piwebapi"` or `imstype="aspenone"`. For non-Equinor servers, `url` will need to be specified, as may `auth` 50 | and `verifySSL` . -------------------------------------------------------------------------------- /documentation/docs/about/usage/fetching-metadata.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 3 3 | --- 4 | 5 | # Fetching metadata 6 | 7 | Two client methods have been created to fetch basic metadata for one or more tags. 8 | 9 | ### get_units() 10 | 11 | Fetches the engineering unit(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string, 12 | or a list of tagnames. 13 | 14 | ### get_description() 15 | 16 | Fetches the description(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string, 17 | or a list of tagnames. 18 | 19 | **Example**: 20 | 21 | ``` python 22 | tags = ["BA:ACTIVE.1", "BA:LEVEL.1", "BA:CONC.1"] 23 | units = c.get_units(tags) 24 | desc = c.get_descriptions(tags) 25 | tag = "BA:CONC.1" 26 | df[tag].plot(grid=True, title=desc[tag]).set_ylabel(units[tag]) 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /documentation/docs/about/usage/time-zone.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 6 3 | --- 4 | 5 | # Time zones 6 | 7 | It is important to understand how Tagreader uses and interprets time zones. Queries to the backend servers are always 8 | performed in UTC time, and return data is also always in UTC. However, it is usually not convenient to ensure all time 9 | stamps are in UTC time. The client and handlers therefore have functionality for converting between UTC and 10 | user-specified time zones. 11 | 12 | There are two levels of determining which time zone input arguments should be interpreted as, and which time zone 13 | return data should be converted to: 14 | 15 | 1. Time zone aware input arguments will use their corresponding time zone. 16 | 2. Time zone naive input arguments are assumed to have time zone as provided by the client. 17 | 18 | The client-provided time zone can be specified with the optional `tz` argument (string, e.g. "*US/Central*") during 19 | client creation. If it is not specified, then the default value *Europe/Oslo* is used. Note that for the most common 20 | use case where Equinor employees want to fetch data from Norwegian assets and display them with Norwegian time stamps, 21 | nothing needs to be done. 22 | 23 | *Note:* It is a good idea to update the `pytz` package rather frequently (at least twice per year) to ensure that time 24 | zone information is up-to-date. `pip install --upgrade pytz` . 25 | 26 | **Example (advanced usage)** 27 | 28 | An employee in Houston is contacted by her colleague in Brazil about an event that she needs to investigate. 29 | The colleague identified the time of the event at July 20th 2020 at 15:05:00 Rio time. The Houston employee wishes to 30 | extract interpolated data with 60-second intervals and display the data in her local time zone. She also wishes to send 31 | the data to her Norwegian colleague with datestamps in Norwegian time. One way of doing this is : 32 | 33 | ``` python 34 | import tagreader 35 | from datetime import datetime, timedelta 36 | from dateutil import tz 37 | c = tagreader.IMSClient("PINO", "pi", tz="US/Central") # Force output data to Houston time 38 | c.connect() 39 | tzinfo = tz.gettz("Brazil/East") # Generate timezone object for Rio local time 40 | event_time = datetime(2020, 7, 20, 15, 5, 0, tzinfo=tzinfo) 41 | start_time = event_time - timedelta(minutes=30) 42 | end_time = event_time + timedelta(minutes=10) 43 | df = c.read(["BA:CONC.1"], start_time, end_time, ts=60) 44 | df_to_Norway = df.tz_convert("Europe/Oslo") # Create a copy of the dataframe with Norwegian time stamps 45 | ``` -------------------------------------------------------------------------------- /documentation/docs/contribute/_category_.yaml: -------------------------------------------------------------------------------- 1 | position: 5 2 | collapsed: true 3 | -------------------------------------------------------------------------------- /documentation/docs/contribute/development-guide/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Developer guide", 3 | "position": 4, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "This section of the documentation lists instructions and guidelines on how to start developing" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /documentation/docs/contribute/development-guide/publishing.md: -------------------------------------------------------------------------------- 1 | # Publishing 2 | TBD -------------------------------------------------------------------------------- /documentation/docs/contribute/development-guide/setup.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | ```mdx-code-block 4 | import TabItem from '@theme/TabItem'; 5 | import Tabs from '@theme/Tabs'; 6 | ``` 7 | 8 | ## Prerequisites 9 | 10 | To work with the tagreader code you'll need to install: 11 | 12 | Python >=3.8 with the following packages: 13 | 14 | * pandas >= 1.0.0 15 | * requests 16 | * requests-kerberos 17 | * certifi >= 2023.5.7 18 | * diskcache 19 | * pyodbc (If using ODBC connection) 20 | 21 | :::info ODBC Connection 22 | If using ODBC connections, you must also install proprietary drivers for PI ODBC and/or Aspen IP.21 SQLPlus. These 23 | drivers are only available for Microsoft Windows. Pyodbc will therefore not be installed for non-Windows systems. 24 | ::: 25 | 26 | ## Pre-commit 27 | 28 | When contributing to this project, pre-commits are necessary, as they run certain tests, sanitisers, and formatters. 29 | 30 | The project provides a `.pre-commit-config.yaml` file that is used to set up git _pre-commit hooks_. 31 | 32 | On commit locally, code is automatically formatted and checked for security vulnerabilities using pre-commit git hooks. 33 | 34 | ### Installation 35 | 36 | To initialize pre-commit in your local repository, run 37 | 38 | ```shell 39 | pre-commit install 40 | ``` 41 | 42 | This tells pre-commit to run for this repository on every commit. 43 | 44 | ### Usage 45 | 46 | Pre-commit will run on every commit, but can also be run manually on all files: 47 | 48 | ```shell 49 | pre-commit run --all-files 50 | ``` 51 | 52 | Pre-commit tests can be skipped on commits with `git commit --no-verify`. 53 | 54 | :::caution 55 | If you have to skip the pre-commit tests, you're probably doing something you're not supposed to, and whoever commits after you might have to fix your "mistakes". Consider updating the pre-commit hooks if your code is non-compliant. 56 | ::: 57 | 58 | ### Install Poetry 59 | 60 | Poetry is used to manage Python package dependencies. 61 | 62 | ```shell 63 | $ pip install poetry 64 | ``` 65 | 66 | The installation instructions can be found [here](https://python-poetry.org/docs/#installation). 67 | 68 | ### Install packages 69 | 70 | ```shell 71 | $ poetry install 72 | ``` 73 | -------------------------------------------------------------------------------- /documentation/docs/contribute/development-guide/testing.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | TBD -------------------------------------------------------------------------------- /documentation/docs/contribute/development-guide/upgrading.md: -------------------------------------------------------------------------------- 1 | # Upgrading 2 | 3 | TBD -------------------------------------------------------------------------------- /documentation/docs/contribute/documentation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Documentation 3 | sidebar_position: 5 4 | --- 5 | # Documentation 6 | 7 | This site was generated from the contents of your `documentation` folder using [Docusaurus,](https://docusaurus.io/) and we host it on GitHub Pages. 8 | 9 | ## How it works 10 | 11 | From Docusaurus own documentation: 12 | > Docusaurus is a static-site generator. It builds a single-page application with fast client-side navigation, leveraging the full power of React to make your site interactive. It provides out-of-the-box documentation features but can be used to create any kind of site (personal website, product, blog, marketing landing pages, etc). 13 | 14 | While Docusaurus is rich on features, we use it mostly to host markdown pages. The main bulk of the documentation is located in `documentation/docs`. 15 | 16 | ## Publishing 17 | 18 | We are using the GitHub Action [`publish-docs.yaml`](https://github.com/equinor/awt/blob/main/.github/workflows/publish-docs.yaml) to build and publish the documentation website. This action is run every time someone pushes to the `main` branch. 19 | 20 | This will check out the code, download the changelog from the `generate-changelog.yaml` action, and build the documentation. Then it will deploy the documentation (placed in the documentation/build/ folder) to GitHub Pages. 21 | 22 | ## Initial settings 23 | 24 | When deployed to GitHub Pages, you do need to configure your site under the settings. Pick the gh-pages branch and select either a private url or a public one. It will show you the site’s url, which should now contain your generated documentation site. 25 | 26 | ## Assets 27 | 28 | All assets files are places under `documentation/static` 29 | -------------------------------------------------------------------------------- /documentation/docs/contribute/ground-rules.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Ground rules 3 | sidebar_position: 3 4 | --- 5 | # Ground rules 6 | 7 | * Always make sure that the main branch is ready to deploy. If something break, we fix it as soon as possible. 8 | 9 | ## Pull Requests 10 | 11 | Please try to make your pull request easy to review for us. 12 | 13 | * Make small pull requests. The smaller, the faster to review and the more likely it will be merged soon. 14 | 15 | :::tip GitHub Tips 16 | When creating a Pull Request on GitHub, you can add Closes #<Issue number>. When you merge the PR, the issue 17 | will automatically be closed. 18 | ::: 19 | ## Git commit format 20 | 21 | We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/#summary) 22 | -------------------------------------------------------------------------------- /documentation/docs/contribute/how-to-start-contributing.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get started 3 | sidebar_position: 2 4 | --- 5 | 6 | # How to start contributing 7 | 8 | Welcome! We are glad that you want to contribute to our project! 💖 9 | 10 | This project accepts contributions via Github pull requests. 11 | 12 | This document outlines the process to help get your contribution accepted. 13 | 14 | There are many ways to contribute: 15 | 16 | * Suggest [Features](https://github.com/equinor/awt/issues/new?assignees=&labels=type%3A+%3Abulb%3A+feature+request&template=feature-request.md&title=) 17 | * Suggest [Changes](https://github.com/equinor/awt/issues/new?assignees=&labels=type%3A+%3Awrench%3A+maintenance&template=code-maintenance.md&title=) 18 | * Report [Bugs](https://github.com/equinor/awt/issues/new?assignees=&labels=type%3A+%3Abug+bug&template=bug-report.md&title=) 19 | 20 | You can start by looking through the current [Issues](https://github.com/equinor/awt/issues). 21 | 22 | ## Workflow 23 | 24 | 1. Create a new branch 25 | 2. Do work 26 | 3. Create a Pull Request 27 | 4. Review Pull Request until accepted 28 | 5. Rebase and merge on main 29 | 30 | ## Code review process 31 | 32 | The core team looks at pull requests on a regular basis. After feedback has been given we expect responses within three weeks. After three weeks we may close the pull request if it isn't showing any activity. -------------------------------------------------------------------------------- /documentation/docs/contribute/overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Overview 3 | sidebar_position: 1 4 | --- 5 | 6 | # Overview 7 | 8 | ## Getting started 9 | 10 | For setting up a development environment, see the [development guide](development-guide/setup.md) 11 | 12 | For starting contributing, see the [contribute section](how-to-start-contributing.md). 13 | 14 | ## Project structure 15 | 16 | Here’s how the app is organized. 17 | 18 | ``` 19 | ├── .github/ - GitHub Actions and more 20 | ├── tagreader/ - The source code 21 | ├── tests/ - Tests 22 | │── documentation/ - Documentation 23 | └── ... 24 | ``` -------------------------------------------------------------------------------- /documentation/docusaurus.config.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // Note: type annotations allow type checking and IDEs autocompletion 3 | 4 | import { themes } from 'prism-react-renderer'; 5 | import simplePlantUML from '@akebifiky/remark-simple-plantuml' 6 | import math from 'remark-math'; 7 | import katex from 'rehype-katex'; 8 | 9 | export default { 10 | title: 'Tagreader', 11 | tagline: 'Tagreader.', 12 | url: 'https://awt.app.radix.equinor.com/', 13 | baseUrl: '/tagreader-python/', 14 | onBrokenLinks: 'throw', 15 | onBrokenMarkdownLinks: 'warn', 16 | favicon: 'img/favicon.png', 17 | markdown: { 18 | mermaid: true 19 | }, 20 | themes: [ 21 | '@docusaurus/theme-mermaid' 22 | ], 23 | 24 | // GitHub pages deployment config. 25 | // If you aren't using GitHub pages, you don't need these. 26 | organizationName: 'equinor', // Usually your GitHub org/username. 27 | projectName: 'tagreader', // Usually your repo name. 28 | deploymentBranch: 'gh-pages', 29 | 30 | // Even if you don't use internalization, you can use this field to set useful 31 | // metadata like html lang. For example, if your site is Chinese, you may want 32 | // to replace "en" with "zh-Hans". 33 | i18n: { 34 | defaultLocale: 'en', 35 | locales: ['en'], 36 | }, 37 | 38 | plugins: [ 39 | 40 | ], 41 | 42 | presets: [ 43 | [ 44 | 'classic', 45 | /** @type {import('@docusaurus/preset-classic').Options} */ 46 | ({ 47 | docs: { 48 | sidebarPath: require.resolve('./sidebars.js'), 49 | editUrl: 50 | 'https://github.com/equinor/tagreader-python/tree/main/documentation/', 51 | remarkPlugins: [simplePlantUML, math], 52 | rehypePlugins: [katex], 53 | }, 54 | blog: false, 55 | theme: { 56 | customCss: require.resolve('./src/css/custom.css'), 57 | }, 58 | }), 59 | ], 60 | ], 61 | 62 | themeConfig: 63 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ 64 | ({ 65 | navbar: { 66 | title: 'Tagreader', 67 | logo: { 68 | alt: 'Equinor Logo', 69 | src: 'img/logo.svg', 70 | }, 71 | items: [ 72 | { 73 | type: 'docSidebar', 74 | sidebarId: 'about', 75 | position: 'left', 76 | label: 'Docs', 77 | }, 78 | { 79 | type: 'docSidebar', 80 | sidebarId: 'contribute', 81 | position: 'left', 82 | label: 'Contribute', 83 | }, 84 | { 85 | href: 'https://github.com/equinor/tagreader-python', 86 | label: 'GitHub', 87 | position: 'right', 88 | }, 89 | ], 90 | }, 91 | footer: { 92 | style: 'dark', 93 | links: [ 94 | { 95 | title: 'Docs', 96 | items: [ 97 | { 98 | label: 'Docs', 99 | to: '/docs/about/introduction', 100 | }, 101 | { 102 | label: 'Contribute', 103 | to: '/docs/contribute/how-to-start-contributing', 104 | }, 105 | ], 106 | }, 107 | { 108 | title: 'More', 109 | items: [ 110 | { 111 | label: 'GitHub', 112 | href: 'https://github.com/equinor/tagreader', 113 | }, 114 | { 115 | label: 'PyPi', 116 | href: 'https://pypi.org/project/tagreader/', 117 | }, 118 | ], 119 | }, 120 | ], 121 | copyright: `Built with Docusaurus.`, 122 | }, 123 | prism: { 124 | theme: themes.github, 125 | darkTheme: themes.dracula, 126 | }, 127 | colorMode: { 128 | defaultMode: 'dark', 129 | disableSwitch: false, 130 | respectPrefersColorScheme: true, 131 | } 132 | }), 133 | stylesheets: [ 134 | { 135 | href: 'https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css', 136 | type: 'text/css', 137 | integrity: 138 | 'sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM', 139 | crossorigin: 'anonymous', 140 | }, 141 | ], 142 | } 143 | -------------------------------------------------------------------------------- /documentation/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "documentation", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids", 15 | "typecheck": "tsc" 16 | }, 17 | "dependencies": { 18 | "@akebifiky/remark-simple-plantuml": "^1.0.2", 19 | "@docusaurus/core": "^3.6.3", 20 | "@docusaurus/preset-classic": "^3.6.3", 21 | "@docusaurus/theme-mermaid": "^3.6.3", 22 | "@mdx-js/react": "^3.1.0", 23 | "prism-react-renderer": "^2.4.1", 24 | "react": "^18.3.1", 25 | "react-dom": "^18.3.1", 26 | "rehype-katex": "7.0.1", 27 | "remark-math": "6.0.0" 28 | }, 29 | "devDependencies": { 30 | "@tsconfig/docusaurus": "^2.0.3", 31 | "typescript": "^5.6.2" 32 | }, 33 | "resolutions": { 34 | "cookie": "0.7.0", 35 | "path-to-regexp": "0.1.12", 36 | "cross-spawn": "7.0.5", 37 | "nanoid": "3.3.8", 38 | "body-parser": "1.20.3", 39 | "express": "4.20.0", 40 | "micromatch": "4.0.8" 41 | }, 42 | 43 | "browserslist": { 44 | "production": [ 45 | ">0.5%", 46 | "not dead", 47 | "not op_mini all" 48 | ], 49 | "development": [ 50 | "last 1 chrome version", 51 | "last 1 firefox version", 52 | "last 1 safari version" 53 | ] 54 | }, 55 | "engines": { 56 | "node": ">=20.0" 57 | }, 58 | "packageManager": "yarn@1.22.19+sha1.4ba7fc5c6e704fce2066ecbfb0b0d8976fe62447" 59 | } 60 | -------------------------------------------------------------------------------- /documentation/sidebars.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Creating a sidebar enables you to: 3 | - create an ordered group of docs 4 | - render a sidebar for each doc of that group 5 | - provide next/previous navigation 6 | 7 | The sidebars can be generated from the filesystem, or explicitly defined here. 8 | 9 | Create as many sidebars as you want. 10 | */ 11 | 12 | // @ts-check 13 | 14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ 15 | const sidebars = { 16 | // By default, Docusaurus generates a sidebar from the docs folder structure 17 | about: [{type: 'autogenerated', dirName: 'about'}], 18 | contribute: [{type: 'autogenerated', dirName: 'contribute'}], 19 | 20 | // But you can create a sidebar manually 21 | /* 22 | tutorialSidebar: [ 23 | 'intro', 24 | 'hello', 25 | { 26 | type: 'category', 27 | label: 'Tutorial', 28 | items: ['tutorial-basics/create-a-document'], 29 | }, 30 | ], 31 | */ 32 | }; 33 | 34 | module.exports = sidebars; 35 | -------------------------------------------------------------------------------- /documentation/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #2e8555; 10 | --ifm-color-primary-dark: #29784c; 11 | --ifm-color-primary-darker: #277148; 12 | --ifm-color-primary-darkest: #205d3b; 13 | --ifm-color-primary-light: #33925d; 14 | --ifm-color-primary-lighter: #359962; 15 | --ifm-color-primary-lightest: #3cad6e; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #25c2a0; 23 | --ifm-color-primary-dark: #21af90; 24 | --ifm-color-primary-darker: #1fa588; 25 | --ifm-color-primary-darkest: #1a8870; 26 | --ifm-color-primary-light: #29d5b0; 27 | --ifm-color-primary-lighter: #32d8b4; 28 | --ifm-color-primary-lightest: #4fddbf; 29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 30 | } 31 | -------------------------------------------------------------------------------- /documentation/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import { Redirect } from 'react-router-dom' 3 | 4 | export default function Home(): JSX.Element { 5 | return 6 | } 7 | -------------------------------------------------------------------------------- /documentation/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/tagreader-python/9a735c05559657206abfe44993dcc9f70f12a8ff/documentation/static/.nojekyll -------------------------------------------------------------------------------- /documentation/static/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/tagreader-python/9a735c05559657206abfe44993dcc9f70f12a8ff/documentation/static/img/favicon.png -------------------------------------------------------------------------------- /documentation/static/img/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | -------------------------------------------------------------------------------- /documentation/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/docusaurus/tsconfig.json", 3 | "compilerOptions": { 4 | "baseUrl": "." 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /examples/quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Quickstart\n", 8 | "This document provides a quick demonstration of the basic usage of tagreader. It will show you the steps from importing the package to fetching data and making a plot. Some cells contain links to more details that can be found in the [manual](../docs/manual.md).\n", 9 | "\n", 10 | "### Prerequisite\n", 11 | "In order to run this notebook, you need to install tagreader using \"pip install tagreader[notebooks]\". Please refer to the [README](../README.md) or the [docs](../docs/manual.md) for more details." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "Start by importing the package:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import tagreader" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "If we don't know the name of the data source, we can check which PI and IP.21 servers we have access to via Web API ([more details](https://equinor.github.io/tagreader-python/docs/about/usage/data-source)):" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "is_executing": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "tagreader.list_sources(\"piwebapi\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "By default, a cache-file using SQLite will be created to store previously read data values [more details](https://equinor.github.io/tagreader-python/docs/about/usage/caching). The cache can significantly speed up rereading of data, and it is recommended to always keep it enabled. If, for any reason, you want to disable the cache, set it to `None` when you are making the connection object." 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Let's establish a web API connection to PINO. We need to specify that PINO is a PI server." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "from tagreader.cache import SmartCache" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "c = tagreader.IMSClient(datasource=\"PINO\", imstype=\"piwebapi\", cache=SmartCache())" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "We can now establish a connection to the server:" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "c.connect()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "After connecting, we can search for a tag ([more details](../docs/manual.md#searching-for-tags)):" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "c.search('BA:*')" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Selecting three of the tags found above, we can read values for a duration of 3.5 hours starting January 5th at 8 in the morning with 3-minute (180-seconds) intervals. The default query method is interpolated, but several other methods are available by providing the `read_type` argument. Timestamps are parsed using [pandas.Timestamp](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html), and can therefore be provided in many different string formats. [More details](../docs/manual.md#reading_data)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "df = c.read(['BA:ACTIVE.1', 'BA:CONC.1', 'BA:LEVEL.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "*Note*: Tags with maps (relevant for some InfoPlus.21 servers) can be specified on the form `'tag;map'`, e.g. `'17B-HIC192;CS A_AUTO'`.\n", 133 | "\n", 134 | "The result from the last call is a Pandas dataframe, and can be manipulated as such:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "df.tail()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "df['BA:LEVEL.1'].size" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "df['BA:CONC.1'].loc['2020-01-05 11:24:00']" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "max(df['BA:LEVEL.1'])" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "etc...\n", 178 | "\n", 179 | "Sometimes it can be handy to obtain the unit and description for the three tags:" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "tags = ['BA:ACTIVE.1', 'BA:LEVEL.1', 'BA:CONC.1']\n", 189 | "units = c.get_units(tags)\n", 190 | "desc = c.get_descriptions(tags)\n", 191 | "print(units)\n", 192 | "print(desc)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": { 199 | "scrolled": true 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "tag = 'BA:CONC.1'\n", 204 | "df[tag].plot(grid=True, title=desc[tag]).set_ylabel(units[tag])" 205 | ] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python 3", 211 | "language": "python", 212 | "name": "python3" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.6.7" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 4 229 | } 230 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | files = tagreader 3 | check_untyped_defs = True 4 | disallow_any_generics = True 5 | # disallow_untyped_calls = True 6 | disallow_untyped_defs = True 7 | ignore_missing_imports = True 8 | # Temporary fix for https://github.com/python/mypy/issues/10709: 9 | ignore_missing_imports_per_module = True 10 | implicit_reexport = False 11 | local_partial_types = True 12 | # no_implicit_optional = True 13 | strict_equality = True 14 | warn_unused_ignores = True 15 | warn_redundant_casts = True 16 | warn_return_any = True 17 | warn_unreachable = True 18 | warn_unused_configs = True 19 | 20 | [mypy-tagreader.clients] 21 | ignore_errors = True 22 | 23 | [mypy-tagreader.odbc_handlers] 24 | ignore_errors = True 25 | 26 | [mypy-tagreader.utils] 27 | ignore_errors = True 28 | 29 | [mypy-tagreader.web_handlers] 30 | ignore_errors = True -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "tagreader" 3 | version = "6.0.4" 4 | description = "Tagreader is a Python package for reading trend data from the OSIsoft PI and Aspen Infoplus.21 IMS systems." 5 | authors = ["Einar S. Idsø ", "Morten Dæhli Aslesen str: 14 | tagname = tagname.replace(".", "_") 15 | tagname = "".join(c for c in tagname if c.isalnum() or c == "_").strip() 16 | if tagname[0].isnumeric(): 17 | tagname = "_" + tagname # Conform to NaturalName 18 | return tagname 19 | 20 | 21 | def timestamp_to_epoch(timestamp: datetime) -> int: 22 | origin = datetime(1970, 1, 1) 23 | if timestamp.tzinfo is not None: 24 | timestamp = timestamp.astimezone(pytz.utc).replace(tzinfo=None) 25 | return (timestamp - origin) // timedelta(seconds=1) 26 | 27 | 28 | def _infer_pandas_index_freq(df: pd.DataFrame) -> pd.DataFrame: 29 | try: 30 | if pd.infer_freq(df.index): # type: ignore[arg-type] 31 | df = df.asfreq(pd.infer_freq(df.index)) # type: ignore[arg-type] 32 | except (TypeError, ValueError) as e: 33 | logger.warning(f"Could not infer frequency of timeseries in Cache. {e}") 34 | return df 35 | 36 | 37 | def _drop_duplicates_and_sort_index(df: pd.DataFrame) -> pd.DataFrame: 38 | return df[~df.index.duplicated(keep="first")].sort_index() 39 | 40 | 41 | def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame: 42 | return _infer_pandas_index_freq(_drop_duplicates_and_sort_index(df)) 43 | 44 | 45 | class BaseCache(Cache): # type: ignore[misc] 46 | """ 47 | Cache works as a Python dictionary with persistence. It is simple to use, and only requires a directory for 48 | the cache. The default directory is /.cache/ 49 | """ 50 | 51 | def __init__( # type: ignore[no-untyped-def] 52 | self, 53 | directory: Path = Path(".") / ".cache", 54 | enable_stats: bool = False, 55 | *args, 56 | **kwargs, 57 | ) -> None: 58 | super().__init__(directory=directory.as_posix(), *args, **kwargs) 59 | 60 | if enable_stats: 61 | self.enable_cache_statistics() 62 | 63 | def enable_cache_statistics(self) -> None: 64 | self.stats(enable=True) 65 | 66 | def put(self, key: str, value: pd.DataFrame, expire: Optional[int] = None) -> None: 67 | self.add(key=key, value=value, expire=expire) 68 | 69 | def get_metadata( 70 | self, key: str, properties: Optional[Union[str, List[str]]] 71 | ) -> Optional[Dict[str, Union[str, int, float]]]: 72 | if isinstance(properties, str): 73 | properties = [properties] 74 | _key = f"$metadata${key}" 75 | metadata = cast(Optional[Dict[str, Union[str, int, float]]], self.get(_key)) 76 | if metadata: 77 | if properties: 78 | return {k: v for (k, v) in metadata.items() if k in properties} 79 | return metadata 80 | else: 81 | return None 82 | 83 | def put_metadata( 84 | self, 85 | key: str, 86 | value: Dict[str, Union[str, int, float]], 87 | expire: Optional[int] = None, 88 | ) -> Dict[str, Union[str, int, float]]: 89 | _key = f"$metadata${key}" 90 | combined_value = value 91 | if _key in self: 92 | existing = self.get(_key) 93 | if existing: 94 | existing.update(value) 95 | combined_value = existing 96 | else: 97 | combined_value = value 98 | self.delete(_key) 99 | 100 | self.add(_key, combined_value, expire=expire) 101 | return combined_value 102 | 103 | def delete_metadata(self, key: str) -> None: 104 | _key = f"$metadata${key}" 105 | self.delete(_key) 106 | 107 | 108 | class BucketCache(BaseCache): 109 | @staticmethod 110 | def _key_path( 111 | tagname: str, 112 | read_type: ReaderType, 113 | ts: timedelta, 114 | stepped: bool, 115 | get_status: bool, 116 | start: Optional[datetime], 117 | end: Optional[datetime], 118 | ) -> str: 119 | """Return a string on the form 120 | $tagname$read_type[$sample_time][$stepped][$get_status]$_start_end 121 | tagname: safe tagname 122 | sample_time: integer value. Empty for RAW. 123 | stepped: "stepped" if value was read as stepped. Empty if not. 124 | get_status: "status" if value contains status. Empty if not. 125 | start: The start of the query that created the bucket. 126 | end: The end of the query that created the bucket. 127 | """ 128 | tagname = safe_tagname(tagname) 129 | timespan = "" 130 | if start is not None: 131 | start_epoch = timestamp_to_epoch(start) 132 | end_epoch = timestamp_to_epoch(end) if end else end 133 | timespan = f"$_{start_epoch}_{end_epoch}" 134 | 135 | keyval = ( 136 | f"${tagname}" 137 | f"${read_type.name}" 138 | f"{(read_type != ReaderType.RAW) * f'$s{str(int(ts.total_seconds()))}'}" 139 | f"{stepped * '$stepped'}" 140 | f"{get_status * '$status'}" 141 | f"{timespan}" 142 | ) 143 | return keyval 144 | 145 | def store( 146 | self, 147 | *, 148 | df: pd.DataFrame, 149 | tagname: str, 150 | read_type: ReaderType, 151 | ts: timedelta, 152 | stepped: bool, 153 | get_status: bool, 154 | start: datetime, 155 | end: datetime, 156 | ) -> None: 157 | if df.empty: 158 | return 159 | 160 | intersecting = self.get_intersecting_datasets( 161 | tagname=tagname, 162 | read_type=read_type, 163 | ts=ts, 164 | stepped=stepped, 165 | get_status=get_status, 166 | start=start, 167 | end=end, 168 | ) 169 | if len(intersecting) > 0: 170 | for dataset in intersecting: 171 | this_start, this_end = self._get_intervals_from_dataset_name(dataset) 172 | start = min(start, this_start if this_start else start) 173 | end = max(end, this_end if this_end else end) 174 | df2 = self.get(dataset) 175 | if df2 is not None: 176 | df = pd.concat([df, df2], axis=0) 177 | self.delete(dataset) 178 | key = self._key_path( 179 | tagname=tagname, 180 | read_type=read_type, 181 | ts=ts, 182 | stepped=stepped, 183 | get_status=get_status, 184 | start=start, 185 | end=end, 186 | ) 187 | self.put(key=key, value=clean_dataframe(df)) 188 | 189 | @staticmethod 190 | def _get_intervals_from_dataset_name( 191 | name: str, 192 | ) -> Tuple[datetime, datetime]: 193 | name_with_times = name.split("$")[-1] 194 | if not name_with_times.count("_") == 2: 195 | return None, None # type: ignore[return-value] 196 | _, start_epoch, end_epoch = name_with_times.split("_") 197 | start = pd.to_datetime(int(start_epoch), unit="s").tz_localize("UTC") 198 | end = pd.to_datetime(int(end_epoch), unit="s").tz_localize("UTC") 199 | return start, end 200 | 201 | def get_intersecting_datasets( 202 | self, 203 | tagname: str, 204 | read_type: ReaderType, 205 | ts: timedelta, 206 | stepped: bool, 207 | get_status: bool, 208 | start: datetime, 209 | end: datetime, 210 | ) -> List[str]: 211 | if not len(self) > 0: 212 | return [] 213 | intersecting_datasets = [] 214 | for dataset in self.iterkeys(): 215 | target_key = self._key_path( 216 | tagname=tagname, 217 | read_type=read_type, 218 | start=None, 219 | end=None, 220 | ts=ts, 221 | stepped=stepped, 222 | get_status=get_status, 223 | ) 224 | if target_key in dataset: 225 | start_ds, end_ds = self._get_intervals_from_dataset_name(dataset) 226 | if end_ds >= start and end >= start_ds: 227 | intersecting_datasets.append(dataset) 228 | return intersecting_datasets 229 | 230 | def get_missing_intervals( 231 | self, 232 | tagname: str, 233 | read_type: ReaderType, 234 | ts: timedelta, 235 | stepped: bool, 236 | get_status: bool, 237 | start: datetime, 238 | end: datetime, 239 | ) -> List[Tuple[datetime, datetime]]: 240 | datasets = self.get_intersecting_datasets( 241 | tagname=tagname, 242 | read_type=read_type, 243 | ts=ts, 244 | stepped=stepped, 245 | get_status=get_status, 246 | start=start, 247 | end=end, 248 | ) 249 | missing_intervals = [(start, end)] 250 | for dataset in datasets: 251 | b = self._get_intervals_from_dataset_name(dataset) 252 | for _ in range(0, len(missing_intervals)): 253 | r = missing_intervals.pop(0) 254 | if b[1] < r[0] or b[0] > r[1]: 255 | # No overlap 256 | missing_intervals.append(r) 257 | elif b[0] <= r[0] and b[1] >= r[1]: 258 | # The bucket covers the entire interval 259 | continue 260 | elif b[0] > r[0] and b[1] < r[1]: 261 | # The bucket splits the interval in two 262 | missing_intervals.append((r[0], b[0])) 263 | missing_intervals.append((b[1], r[1])) 264 | elif b[0] <= r[0] and r[0] <= b[1] < r[1]: 265 | # The bucket chomps the start of the interval 266 | missing_intervals.append((b[1], r[1])) 267 | elif r[0] < b[0] <= r[1] and b[1] >= r[1]: 268 | # The bucket chomps the end of the interval 269 | missing_intervals.append((r[0], b[0])) 270 | return missing_intervals 271 | 272 | def fetch( 273 | self, 274 | *, 275 | tagname: str, 276 | read_type: ReaderType, 277 | ts: timedelta, 278 | stepped: bool, 279 | get_status: bool, 280 | start: datetime, 281 | end: datetime, 282 | ) -> pd.DataFrame: 283 | df = pd.DataFrame() 284 | if not len(self) > 0: 285 | return df 286 | 287 | datasets = self.get_intersecting_datasets( 288 | tagname=tagname, 289 | read_type=read_type, 290 | ts=ts, 291 | stepped=stepped, 292 | get_status=get_status, 293 | start=start, 294 | end=end, 295 | ) 296 | 297 | for dataset in datasets: 298 | df2 = self.get(dataset) 299 | if df2 is not None: 300 | df = pd.concat([df, df2.loc[start:end]], axis=0) # type: ignore[call-overload, misc] 301 | 302 | return clean_dataframe(df) 303 | 304 | 305 | class SmartCache(BaseCache): 306 | @staticmethod 307 | def _key_path( 308 | *, 309 | tagname: str, 310 | read_type: ReaderType, 311 | ts: timedelta, 312 | get_status: bool, 313 | ) -> str: 314 | name = safe_tagname(tagname) 315 | status = get_status * "$status" 316 | if read_type != ReaderType.RAW: 317 | interval = int(ts.total_seconds()) 318 | return f"{read_type.name}$s{interval}${name}{status}" 319 | else: 320 | return f"{read_type.name}${name}{status}" 321 | 322 | def store( 323 | self, 324 | *, 325 | df: pd.DataFrame, 326 | tagname: str, 327 | read_type: ReaderType, 328 | ts: timedelta, 329 | get_status: bool, 330 | ) -> None: 331 | key = self._key_path( 332 | tagname=tagname, read_type=read_type, ts=ts, get_status=get_status 333 | ) 334 | if df.empty: 335 | return # Weirdness ensues when using empty df in select statement below 336 | if key in self: 337 | df2 = self.get(key) 338 | if df2 is not None: 339 | df = pd.concat([df, df2], axis=0) 340 | self.delete(key=key) 341 | self.put( 342 | key=key, 343 | value=clean_dataframe(df), 344 | ) 345 | else: 346 | self.put(key, df) 347 | 348 | def fetch( 349 | self, 350 | *, 351 | tagname: str, 352 | read_type: ReaderType, 353 | ts: timedelta, 354 | start: Optional[datetime], 355 | end: Optional[datetime], 356 | get_status: bool, 357 | ) -> pd.DataFrame: 358 | key = self._key_path( 359 | tagname=tagname, read_type=read_type, ts=ts, get_status=get_status 360 | ) 361 | df = cast(Optional[pd.DataFrame], self.get(key=key)) 362 | if df is None: 363 | return pd.DataFrame() 364 | if start is not None: 365 | df = df.loc[df.index >= start] 366 | if end is not None: 367 | df = df.loc[df.index <= end] 368 | return df 369 | -------------------------------------------------------------------------------- /tagreader/clients.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta, timezone, tzinfo 2 | from itertools import groupby 3 | from operator import itemgetter 4 | from typing import Any, Dict, List, Optional, Tuple, Union 5 | from urllib.error import HTTPError 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import pytz 10 | 11 | from tagreader.cache import BucketCache, SmartCache 12 | from tagreader.logger import logger 13 | from tagreader.utils import ( 14 | IMSType, 15 | ReaderType, 16 | convert_to_pydatetime, 17 | ensure_datetime_with_tz, 18 | ) 19 | from tagreader.web_handlers import ( 20 | AspenHandlerWeb, 21 | PIHandlerWeb, 22 | get_auth_aspen, 23 | get_auth_pi, 24 | list_aspenone_sources, 25 | list_piwebapi_sources, 26 | ) 27 | 28 | NONE_START_TIME = datetime(1970, 1, 1, tzinfo=pytz.UTC) 29 | 30 | 31 | def list_sources( 32 | imstype: Union[IMSType, str], 33 | url: Optional[str] = None, 34 | auth: Optional[Any] = None, 35 | verifySSL: bool = True, 36 | ) -> List[str]: 37 | if isinstance(imstype, str): 38 | try: 39 | imstype = getattr(IMSType, imstype.upper()) 40 | except AttributeError: 41 | raise ValueError( 42 | f"imstype needs to be one of {', '.join([v for v in IMSType.__members__.values() if v not in [IMSType.PI, IMSType.ASPEN, IMSType.IP21]])}." # noqa 43 | f" We suggest to use the tagreader.IMSType enumerator when initiating a client." 44 | ) 45 | accepted_values = [IMSType.PIWEBAPI, IMSType.ASPENONE] 46 | 47 | if imstype == IMSType.PIWEBAPI: 48 | if auth is None: 49 | auth = get_auth_pi() 50 | return list_piwebapi_sources(url=url, auth=auth, verify_ssl=verifySSL) 51 | elif imstype == IMSType.ASPENONE: 52 | if auth is None: 53 | auth = get_auth_aspen() 54 | return list_aspenone_sources(url=url, auth=auth, verify_ssl=verifySSL) 55 | elif imstype in [IMSType.PI, IMSType.ASPEN, IMSType.IP21]: 56 | raise ValueError( 57 | f"ODBC clients are no longer supported. Given ims client type: {imstype}." 58 | " Please use tagreader version <= 4 for deprecated ODBC clients." 59 | ) 60 | else: 61 | raise NotImplementedError( 62 | f"imstype: {imstype} has not been implemented. Accepted values are: {accepted_values}" 63 | ) 64 | 65 | 66 | def get_missing_intervals( 67 | df: pd.DataFrame, 68 | start: datetime, 69 | end: datetime, 70 | ts: Optional[timedelta], 71 | read_type: ReaderType, 72 | ): 73 | if ( 74 | read_type == ReaderType.RAW 75 | ): # Fixme: How to check for completeness for RAW data? 76 | return [[start, end]] 77 | seconds = int(ts.total_seconds()) 78 | tvec = pd.date_range(start=start, end=end, freq=f"{seconds}s") 79 | if len(df) == len(tvec): # Short-circuit if dataset is complete 80 | return [] 81 | values_in_df = tvec.isin(df.index) 82 | missing_intervals = [] 83 | for k, g in groupby(enumerate(values_in_df), lambda ix: ix[1]): 84 | if not k: 85 | seq = list(map(itemgetter(0), g)) 86 | missing_intervals.append( 87 | ( 88 | pd.Timestamp(tvec[seq[0]]).to_pydatetime(), 89 | pd.Timestamp(tvec[seq[-1]]).to_pydatetime(), 90 | ) 91 | ) 92 | # Should be unnecessary to fetch overlapping points since get_next_timeslice 93 | # ensures start <= t <= end 94 | # missingintervals.append((pd.Timestamp(tvec[seq[0]]), 95 | # pd.Timestamp(tvec[min(seq[-1]+1, len(tvec)-1)]))) 96 | return missing_intervals 97 | 98 | 99 | def get_next_timeslice( 100 | start: datetime, 101 | end: datetime, 102 | ts: Optional[timedelta], 103 | max_steps: Optional[int], 104 | ) -> Tuple[datetime, datetime]: 105 | if max_steps is None: 106 | calc_end = end 107 | else: 108 | calc_end = start + ts * max_steps 109 | calc_end = min(end, calc_end) 110 | # Ensure we include the last data point. 111 | # Discrepancies between Aspen and Pi for +ts 112 | # Discrepancies between IMS and cache for e.g. ts. 113 | # if calc_end == end: 114 | # calc_end += ts / 2 115 | return start, calc_end 116 | 117 | 118 | def get_handler( 119 | imstype: Optional[IMSType], 120 | datasource: str, 121 | url: Optional[str], 122 | options: Dict[str, Union[int, float, str]], 123 | verifySSL: Optional[bool], 124 | auth: Optional[Any], 125 | cache: Optional[Union[SmartCache, BucketCache]] = None, 126 | ): 127 | if imstype is None: 128 | try: 129 | if datasource in list_aspenone_sources( 130 | url=None, auth=None, verify_ssl=verifySSL 131 | ): 132 | imstype = IMSType.ASPENONE 133 | except HTTPError as e: 134 | logger.debug(f"Could not list Aspenone sources: {e}") 135 | if imstype is None: 136 | try: 137 | if datasource in list_piwebapi_sources( 138 | url=None, auth=None, verify_ssl=verifySSL 139 | ): 140 | imstype = IMSType.PIWEBAPI 141 | except HTTPError as e: 142 | logger.debug(f"Could not list PI sources: {e}") 143 | 144 | if imstype == IMSType.PIWEBAPI: 145 | return PIHandlerWeb( 146 | url=url, 147 | datasource=datasource, 148 | options=options, 149 | verify_ssl=verifySSL, 150 | auth=auth, 151 | cache=cache, 152 | ) 153 | 154 | if imstype == IMSType.ASPENONE: 155 | return AspenHandlerWeb( 156 | datasource=datasource, 157 | url=url, 158 | options=options, 159 | verify_ssl=verifySSL, 160 | auth=auth, 161 | ) 162 | elif imstype in [IMSType.PI, IMSType.ASPEN, IMSType.IP21]: 163 | raise ValueError( 164 | f"ODBC clients are no longer supported. Given ims client type: {imstype}." 165 | " Please use tagreader version <= 4 for deprecated ODBC clients." 166 | ) 167 | raise ValueError( 168 | f"Could not infer IMSType for datasource: {datasource}. " 169 | f"Please specify correct datasource, imstype or host, or refer to the user docs." 170 | ) 171 | 172 | 173 | class IMSClient: 174 | def __init__( 175 | self, 176 | datasource: str, 177 | imstype: Optional[Union[str, IMSType]] = None, 178 | tz: Union[tzinfo, str] = pytz.timezone("Europe/Oslo"), 179 | url: Optional[str] = None, 180 | handler_options: Dict[str, Union[int, float, str]] = {}, # noqa: 181 | verifySSL: bool = True, 182 | auth: Optional[Any] = None, 183 | cache: Optional[Union[SmartCache, BucketCache]] = None, 184 | ): 185 | if isinstance(imstype, str): 186 | try: 187 | imstype = getattr(IMSType, imstype.upper()) 188 | except AttributeError: 189 | raise ValueError( 190 | f"imstype needs to be one of {', '.join([v for v in IMSType.__members__.values()])}." 191 | f" We suggest to use the tagreader.IMSType enumerator when initiating a client." 192 | ) 193 | 194 | if isinstance(tz, str): 195 | if tz in pytz.all_timezones: 196 | self.tz = pytz.timezone(tz) 197 | else: 198 | raise ValueError(f"Invalid timezone string Given type was {type(tz)}") 199 | elif isinstance(tz, tzinfo): 200 | self.tz = tz 201 | else: 202 | raise ValueError( 203 | f"timezone argument 'tz' needs to be either a valid timezone string or a tzinfo-object. Given type was {type(tz)}" 204 | ) 205 | 206 | self.cache = cache 207 | self.handler = get_handler( 208 | imstype=imstype, 209 | datasource=datasource, 210 | url=url, 211 | options=handler_options, 212 | verifySSL=verifySSL, 213 | auth=auth, 214 | cache=self.cache, 215 | ) 216 | 217 | def connect(self) -> None: 218 | self.handler.connect() 219 | 220 | def search_tag( 221 | self, 222 | tag: Optional[str] = None, 223 | desc: Optional[str] = None, 224 | timeout: Optional[int] = None, 225 | ) -> Union[List[Tuple[str, str]], List[str]]: 226 | logger.warning("This function is deprecated. Please call 'search()' instead") 227 | return self.search(tag=tag, desc=desc, timeout=timeout) 228 | 229 | def search( 230 | self, 231 | tag: Optional[str] = None, 232 | desc: Optional[str] = None, 233 | timeout: Optional[int] = None, 234 | return_desc: bool = True, 235 | ) -> Union[List[Tuple[str, str]], List[str]]: 236 | return self.handler.search( 237 | tag=tag, desc=desc, timeout=timeout, return_desc=return_desc 238 | ) 239 | 240 | def _get_metadata(self, tag: str): 241 | return self.handler._get_tag_metadata( 242 | tag 243 | ) # noqa: Should probably expose this as a public method if needed. 244 | 245 | def _read_single_tag( 246 | self, 247 | tag: str, 248 | start: Optional[datetime], 249 | end: Optional[datetime], 250 | ts: timedelta, 251 | read_type: ReaderType, 252 | get_status: bool, 253 | cache: Optional[Union[BucketCache, SmartCache]], 254 | ): 255 | if read_type == ReaderType.SNAPSHOT: 256 | metadata = self._get_metadata(tag) 257 | df = self.handler.read_tag( 258 | tag=tag, 259 | start=start, 260 | end=end, 261 | sample_time=ts, 262 | read_type=read_type, 263 | metadata=metadata, 264 | get_status=get_status, 265 | ) 266 | else: 267 | stepped = False 268 | missing_intervals = [(start, end)] 269 | df = pd.DataFrame() 270 | 271 | if isinstance(cache, SmartCache): 272 | time_slice = get_next_timeslice( 273 | start=start, end=end, ts=ts, max_steps=None 274 | ) 275 | df = cache.fetch( 276 | tagname=tag, 277 | read_type=read_type, 278 | ts=ts, 279 | start=time_slice[0], 280 | end=time_slice[1], 281 | get_status=get_status, 282 | ) 283 | missing_intervals = get_missing_intervals( 284 | df=df, 285 | start=start, 286 | end=end, 287 | ts=ts, 288 | read_type=read_type, 289 | ) 290 | if not missing_intervals: 291 | return df.tz_convert(self.tz).sort_index() 292 | elif isinstance(cache, BucketCache): 293 | df = cache.fetch( 294 | tagname=tag, 295 | read_type=read_type, 296 | ts=ts, 297 | stepped=stepped, 298 | get_status=get_status, 299 | start=start, 300 | end=end, 301 | ) 302 | missing_intervals = cache.get_missing_intervals( 303 | tagname=tag, 304 | read_type=read_type, 305 | ts=ts, 306 | stepped=stepped, 307 | get_status=get_status, 308 | start=start, 309 | end=end, 310 | ) 311 | if not missing_intervals: 312 | return df.tz_convert(self.tz).sort_index() 313 | 314 | metadata = self._get_metadata(tag) 315 | frames = [df] 316 | for start, end in missing_intervals: 317 | while True: 318 | df = self.handler.read_tag( 319 | tag=tag, 320 | start=start, 321 | end=end, 322 | sample_time=ts, 323 | read_type=read_type, 324 | metadata=metadata, 325 | get_status=get_status, 326 | ) 327 | if not df.empty and read_type != ReaderType.RAW: 328 | if isinstance(cache, SmartCache): 329 | cache.store( 330 | df=df, 331 | tagname=tag, 332 | read_type=read_type, 333 | ts=ts, 334 | get_status=get_status, 335 | ) 336 | if isinstance(cache, BucketCache): 337 | cache.store( 338 | df=df, 339 | tagname=tag, 340 | read_type=read_type, 341 | ts=ts, 342 | stepped=stepped, 343 | get_status=get_status, 344 | start=start, 345 | end=end, 346 | ) 347 | frames.append(df) 348 | if len(df) < self.handler._max_rows: 349 | break 350 | start = df.index[-1] 351 | 352 | df = pd.concat(frames) 353 | # read_type INT leads to overlapping values after concatenating 354 | # due to both start time and end time included. 355 | # Aggregate read_types (should) align perfectly and don't 356 | # (shouldn't) need deduplication. 357 | df = df[~df.index.duplicated(keep="first")] # Deduplicate on index 358 | df = df.tz_convert(self.tz).sort_index() 359 | df = df.rename(columns={"value": tag}) 360 | return df 361 | 362 | def get_units(self, tags: Union[str, List[str]]): 363 | if isinstance(tags, str): 364 | tags = [tags] 365 | units = {} 366 | for tag in tags: 367 | try: 368 | if self.cache is not None: 369 | r = self.cache.get_metadata(key=tag, properties="unit") 370 | if r is not None and "unit" in r: 371 | units[tag] = r["unit"] 372 | if tag not in units: 373 | unit = self.handler._get_tag_unit(tag) 374 | if self.cache is not None and unit is not None: 375 | self.cache.put_metadata(key=tag, value={"unit": unit}) 376 | units[tag] = unit 377 | except Exception: 378 | if self.search(tag) == []: # check for nonexisting string 379 | logger.warning(f"Tag not found: {tag}") 380 | continue 381 | return units 382 | 383 | def get_descriptions(self, tags: Union[str, List[str]]) -> Dict[str, str]: 384 | if isinstance(tags, str): 385 | tags = [tags] 386 | descriptions = {} 387 | for tag in tags: 388 | try: 389 | if self.cache is not None: 390 | r = self.cache.get_metadata(key=tag, properties="description") 391 | if r is not None and "description" in r: 392 | descriptions[tag] = r["description"] 393 | if tag not in descriptions: 394 | desc = self.handler._get_tag_description(tag) 395 | if self.cache is not None and desc is not None: 396 | self.cache.put_metadata(key=tag, value={"description": desc}) 397 | descriptions[tag] = desc 398 | except Exception: 399 | if self.search(tag) == []: # check for nonexisting string 400 | logger.warning(f"Tag not found: {tag}") 401 | continue 402 | return descriptions 403 | 404 | def read_tags( 405 | self, 406 | tags: Union[str, List[str]], 407 | start_time: Optional[Union[datetime, pd.Timestamp, str]] = None, 408 | stop_time: Optional[Union[datetime, pd.Timestamp, str]] = None, 409 | ts: Optional[Union[timedelta, pd.Timedelta]] = timedelta(seconds=60), 410 | read_type: ReaderType = ReaderType.INT, 411 | get_status: bool = False, 412 | ): 413 | start = start_time 414 | end = stop_time 415 | logger.warn( 416 | ( 417 | "This function has been renamed to read() and is deprecated. " 418 | "Please call 'read()' instead" 419 | ) 420 | ) 421 | return self.read( 422 | tags=tags, 423 | start_time=start, 424 | end_time=end, 425 | ts=ts, 426 | read_type=read_type, 427 | get_status=get_status, 428 | ) 429 | 430 | def read( 431 | self, 432 | tags: Union[str, List[str]], 433 | start_time: Optional[Union[datetime, pd.Timestamp, str]] = None, 434 | end_time: Optional[Union[datetime, pd.Timestamp, str]] = None, 435 | ts: Optional[Union[timedelta, pd.Timedelta, int]] = timedelta(seconds=60), 436 | read_type: ReaderType = ReaderType.INT, 437 | get_status: bool = False, 438 | ) -> pd.DataFrame: 439 | """Reads values for the specified [tags] from the IMS server for the 440 | time interval from [start_time] to [stop_time] in intervals [ts]. 441 | 442 | The interval [ts] can be specified as pd.Timedelta or as an integer, 443 | in which case it will be interpreted as seconds. 444 | 445 | Default value for [read_type] is ReaderType.INT, which interpolates 446 | the raw data. 447 | All possible values for read_type are defined in the ReaderType class, 448 | which can be imported as follows: 449 | from utils import ReaderType 450 | 451 | Values for ReaderType.* that should work for all handlers are: 452 | INT, RAW, MIN, MAX, RNG, AVG, VAR, STD and SNAPSHOT 453 | """ 454 | start = start_time 455 | end = end_time 456 | if isinstance(tags, str): 457 | tags = [tags] 458 | if isinstance(read_type, str): 459 | try: 460 | read_type = getattr(ReaderType, read_type) 461 | except AttributeError: 462 | ValueError( 463 | "read_type needs to be of type ReaderType.* or a legal value. Please refer to the docstring." 464 | ) 465 | if read_type in [ReaderType.RAW, ReaderType.SNAPSHOT] and len(tags) > 1: 466 | raise RuntimeError( 467 | "Unable to read raw/sampled data for multiple tags since they don't " 468 | "share time vector. Read one at a time." 469 | ) 470 | 471 | if isinstance(tags, str): 472 | tags = [tags] 473 | 474 | if start is None: 475 | start = NONE_START_TIME 476 | elif isinstance(start, (str, pd.Timestamp)): 477 | try: 478 | start = convert_to_pydatetime(start) 479 | except ValueError: 480 | start = convert_to_pydatetime(start) 481 | if end is None: 482 | end = datetime.now(timezone.utc) 483 | elif isinstance(end, (str, pd.Timestamp)): 484 | end = convert_to_pydatetime(end) 485 | 486 | if isinstance(ts, pd.Timedelta): 487 | ts = ts.to_pytimedelta() 488 | elif isinstance( 489 | ts, 490 | ( 491 | int, 492 | float, 493 | np.int32, 494 | np.int64, 495 | np.float32, 496 | np.float64, 497 | np.number, 498 | np.integer, 499 | ), 500 | ): 501 | ts = timedelta(seconds=int(ts)) 502 | elif not ts and read_type not in [ReaderType.SNAPSHOT, ReaderType.RAW]: 503 | raise ValueError( 504 | "ts needs to be a timedelta or an integer (number of seconds)" 505 | " unless you are reading raw or snapshot data." 506 | f" Given type: {type(ts)}" 507 | ) 508 | elif not isinstance(ts, timedelta): 509 | raise ValueError( 510 | "ts needs to be either a None, timedelta or and integer (number of seconds)." 511 | f" Given type: {type(ts)}" 512 | ) 513 | 514 | if read_type != ReaderType.SNAPSHOT: 515 | start = ensure_datetime_with_tz(start, tz=self.tz) 516 | if end: 517 | end = ensure_datetime_with_tz(end, tz=self.tz) 518 | 519 | old_tags = tags 520 | tags = list(dict.fromkeys(tags)) 521 | if len(old_tags) > len(tags): 522 | duplicates = set([x for n, x in enumerate(old_tags) if x in old_tags[:n]]) 523 | logger.warning( 524 | f"Duplicate tags found, removed duplicates: {', '.join(duplicates)}" 525 | ) 526 | 527 | results = [] 528 | for i, tag in enumerate(tags): 529 | results.append( 530 | self._read_single_tag( 531 | tag=tag, 532 | start=start, 533 | end=end, 534 | ts=ts, 535 | read_type=read_type, 536 | get_status=get_status, 537 | cache=self.cache, 538 | ) 539 | ) 540 | 541 | return pd.concat(results, axis=1) 542 | 543 | def query_sql(self, query: str, parse: bool = True): 544 | """[summary] 545 | Args: 546 | query (str): [description] 547 | parse (bool, optional): Whether to attempt to parse query return 548 | value as table. Defaults to True. 549 | Returns: 550 | Union[pd.DataFrame, pyodbc.Cursor, str]: Return value 551 | """ 552 | df_or_cursor = self.handler.query_sql(query=query, parse=parse) 553 | return df_or_cursor 554 | -------------------------------------------------------------------------------- /tagreader/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig( 4 | format="%(asctime)s %(levelname)-8s %(name)-15s %(message)s", 5 | datefmt="%Y-%m-%d %H:%M:%S", 6 | level=logging.INFO, 7 | ) 8 | 9 | 10 | logger = logging.getLogger("awt") 11 | -------------------------------------------------------------------------------- /tagreader/utils.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import hashlib 3 | import logging 4 | import platform 5 | import ssl 6 | from datetime import datetime, tzinfo 7 | from enum import Enum 8 | from pathlib import Path 9 | from typing import Union 10 | 11 | import certifi 12 | import pandas as pd 13 | import pytz 14 | import requests 15 | from platformdirs import user_data_dir 16 | 17 | from tagreader.logger import logger 18 | 19 | 20 | def is_windows() -> bool: 21 | return platform.system() == "Windows" 22 | 23 | 24 | def is_mac() -> bool: 25 | return platform.system() == "Darwin" 26 | 27 | 28 | def is_linux() -> bool: 29 | return platform.system() == "Linux" 30 | 31 | 32 | if is_windows(): 33 | import winreg 34 | 35 | if is_mac(): 36 | import socket 37 | import subprocess 38 | 39 | 40 | def convert_to_pydatetime(date_stamp: Union[datetime, str, pd.Timestamp]) -> datetime: 41 | if isinstance(date_stamp, datetime): 42 | return date_stamp 43 | elif isinstance(date_stamp, pd.Timestamp): 44 | return date_stamp.to_pydatetime() 45 | else: 46 | try: 47 | return pd.to_datetime(str(date_stamp), format="ISO8601").to_pydatetime() 48 | except ValueError: 49 | return pd.to_datetime(str(date_stamp), dayfirst=True).to_pydatetime() 50 | 51 | 52 | def ensure_datetime_with_tz( 53 | date_stamp: Union[datetime, str, pd.Timestamp], 54 | tz: tzinfo = pytz.timezone("Europe/Oslo"), 55 | ) -> datetime: 56 | date_stamp = convert_to_pydatetime(date_stamp) 57 | 58 | if not date_stamp.tzinfo: 59 | date_stamp = tz.localize(date_stamp) 60 | 61 | return date_stamp 62 | 63 | 64 | def urljoin(*args) -> str: 65 | """ 66 | Joins components of URL. Ensures slashes are inserted or removed where 67 | needed, and does not strip trailing slash of last element. 68 | 69 | Returns: 70 | str -- Generated URL 71 | """ 72 | trailing_slash = "/" if args[-1].endswith("/") else "" 73 | return "/".join(map(lambda x: str(x).strip("/"), args)) + trailing_slash 74 | 75 | 76 | class ReaderType(enum.IntEnum): 77 | """Enumerates available types of data to read. 78 | 79 | For members with more than one name per value, the first member (the 80 | original) needs to be untouched since it may be used as back-reference 81 | (specifically for cache hierarchies). 82 | """ 83 | 84 | RAW = SAMPLED = ACTUAL = enum.auto() # Raw sampled data 85 | SHAPEPRESERVING = BESTFIT = enum.auto() # Minimum data points for preserving shape 86 | INT = INTERPOLATE = INTERPOLATED = enum.auto() # Interpolated data 87 | MIN = MINIMUM = enum.auto() # Min value 88 | MAX = MAXIMUM = enum.auto() # Max value 89 | AVG = AVERAGE = AVERAGED = enum.auto() # Averaged data 90 | VAR = VARIANCE = enum.auto() # Variance of data 91 | STD = STDDEV = enum.auto() # Standard deviation of data 92 | RNG = RANGE = enum.auto() # Range of data 93 | COUNT = enum.auto() # Number of data points 94 | GOOD = enum.auto() # Number of good data points 95 | BAD = NOTGOOD = enum.auto() # Number of not good data points 96 | TOTAL = enum.auto() # Number of total data 97 | SUM = enum.auto() # Sum of data 98 | SNAPSHOT = FINAL = LAST = enum.auto() # Last sampled value 99 | 100 | 101 | def add_equinor_root_certificate() -> bool: 102 | """ 103 | This is a utility function for Equinor employees on Equinor managed machines. 104 | 105 | The function searches for the Equinor Root certificate in the 106 | cert store and imports it to the cacert bundle. Does nothing if not 107 | running on Equinor host. 108 | 109 | NB! This needs to be repeated after updating the cacert module. 110 | 111 | Returns: 112 | bool: True if function completes successfully 113 | """ 114 | certificate = find_local_equinor_root_certificate() 115 | 116 | # If certificate is not found locally, we download it from the Equinor server 117 | if certificate == "": 118 | logger.debug( 119 | "Unable to locate Equinor Root CA certificate on this host. Downloading from Equinor server." 120 | ) 121 | response = requests.get("http://pki.equinor.com/aia/ecpr.crt") 122 | 123 | if response.status_code != 200: 124 | logger.error( 125 | "Unable to find Equinor Root CA certificate locally and on Equinor server." 126 | ) 127 | return False 128 | 129 | certificate = response.text.replace("\r", "") 130 | 131 | # Write result to user data so we can read the cert from there next time 132 | filepath = Path(user_data_dir("tagreader")) / "equinor_root_ca.crt" 133 | try: 134 | filepath.parent.mkdir(parents=True, exist_ok=True) 135 | filepath.write_text(certificate) 136 | logger.debug("Equinor Root CA certificate written to cache") 137 | except Exception as e: 138 | logger.debug(f"Failed to write Equinor Root CA certificate to cache: {e}") 139 | 140 | if certificate in certifi.contents(): 141 | logger.debug("Equinor Root Certificate already exists in certifi store") 142 | return True 143 | 144 | ca_file = certifi.where() 145 | with open(ca_file, "ab") as f: 146 | f.write(bytes(certificate, "ascii")) 147 | logger.debug("Equinor Root Certificate added to certifi store") 148 | 149 | 150 | def find_local_equinor_root_certificate() -> str: 151 | equinor_root_pem_hash = "5A206332CE73CED1D44C8A99C4C43B7CEE03DF5F" 152 | ca_search = "Equinor Root CA" 153 | 154 | if is_windows(): 155 | logger.debug("Checking for Equinor Root CA in Windows certificate store") 156 | for cert in ssl.enum_certificates("CA"): 157 | found_cert = cert[0] 158 | # deepcode ignore InsecureHash: 159 | if hashlib.sha1(found_cert).hexdigest().upper() == equinor_root_pem_hash: 160 | return ssl.DER_cert_to_PEM_cert(found_cert) 161 | 162 | elif is_mac(): 163 | logger.debug("Checking for Equinor Root CA in MacOS certificate store") 164 | macos_ca_certs = subprocess.run( 165 | ["security", "find-certificate", "-a", "-c", ca_search, "-Z"], 166 | stdout=subprocess.PIPE, 167 | ).stdout 168 | 169 | if equinor_root_pem_hash in str(macos_ca_certs).upper(): 170 | c = get_macos_equinor_certificates() 171 | for cert in c: 172 | # deepcode ignore InsecureHash: 173 | if hashlib.sha1(cert).hexdigest().upper() == equinor_root_pem_hash: 174 | return ssl.DER_cert_to_PEM_cert(cert) 175 | 176 | # If the certificate is not found in the local cert store, look in the tagreader cache 177 | filepath = Path(user_data_dir("tagreader")) / "equinor_root_ca.crt" 178 | 179 | try: 180 | if filepath.exists(): 181 | return filepath.read_text() 182 | except Exception as e: 183 | logger.debug(f"Failed to read Equinor Root CA certificate from cache: {e}") 184 | 185 | return "" 186 | 187 | 188 | def get_macos_equinor_certificates(): 189 | import ssl 190 | import tempfile 191 | 192 | ca_search = "Equinor Root CA" 193 | 194 | ctx = ssl.create_default_context() 195 | macos_ca_certs = subprocess.run( 196 | ["security", "find-certificate", "-a", "-c", ca_search, "-p"], 197 | stdout=subprocess.PIPE, 198 | ).stdout 199 | with tempfile.NamedTemporaryFile("w+b", delete=False) as tmp_file: 200 | tmp_file.write(macos_ca_certs) 201 | 202 | ctx.load_verify_locations(tmp_file.name) 203 | 204 | return ctx.get_ca_certs(binary_form=True) 205 | 206 | 207 | def is_equinor() -> bool: 208 | """Determines whether code is running on an Equinor host 209 | 210 | If Windows host: 211 | Finds host's domain in Windows Registry at 212 | HKLM\\SYSTEM\\ControlSet001\\Services\\Tcpip\\Parameters\\Domain 213 | If mac os host: 214 | Finds statoil.net as AD hostname in certificates 215 | If Linux host: 216 | Checks whether statoil.no is search domain 217 | 218 | Returns: 219 | bool: True if Equinor 220 | """ 221 | if is_windows(): 222 | with winreg.OpenKey( 223 | winreg.HKEY_LOCAL_MACHINE, r"SYSTEM\ControlSet001\Services\Tcpip\Parameters" 224 | ) as key: 225 | domain = winreg.QueryValueEx(key, "Domain") 226 | if "statoil" in domain[0]: 227 | return True 228 | elif is_mac(): 229 | s = subprocess.run( 230 | ["security", "find-certificate", "-a", "-c" "client.statoil.net"], 231 | stdout=subprocess.PIPE, 232 | ).stdout 233 | 234 | host = socket.gethostname() 235 | 236 | # deepcode ignore IdenticalBranches: Not an error. First test is just more precise. 237 | if host + ".client.statoil.net" in str(s): 238 | return True 239 | elif "client.statoil.net" in host and host in str(s): 240 | return True 241 | elif is_linux(): 242 | with open("/etc/resolv.conf", "r") as f: 243 | if "statoil.no" in f.read(): 244 | return True 245 | else: 246 | raise OSError( 247 | f"Unsupported system: {platform.system()}. Please report this as an issue." 248 | ) 249 | return False 250 | 251 | 252 | class IMSType(str, Enum): 253 | PIWEBAPI = "piwebapi" 254 | ASPENONE = "aspenone" 255 | PI = "pi" 256 | ASPEN = "aspen" 257 | IP21 = "ip21" 258 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Generator 3 | 4 | import pytest 5 | 6 | from tagreader.cache import SmartCache 7 | 8 | 9 | @pytest.fixture # type: ignore[misc] 10 | def cache(tmp_path: Path) -> Generator[SmartCache, None, None]: 11 | cache = SmartCache(directory=tmp_path, size_limit=int(4e9)) 12 | yield cache 13 | -------------------------------------------------------------------------------- /tests/test_AspenHandlerREST.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | import pytest 4 | 5 | from tagreader import utils 6 | from tagreader.utils import ReaderType 7 | from tagreader.web_handlers import AspenHandlerWeb 8 | 9 | SAMPLE_TIME = timedelta(seconds=60) 10 | 11 | 12 | @pytest.fixture # type: ignore[misc] 13 | def aspen_handler() -> AspenHandlerWeb: # type: ignore[misc] 14 | h = AspenHandlerWeb( 15 | datasource="source_name", auth=None, options={}, url=None, verify_ssl=None 16 | ) 17 | yield h 18 | 19 | 20 | def test_generate_search_query() -> None: 21 | with pytest.raises(ValueError): 22 | AspenHandlerWeb.generate_search_query(tag="ATCAI", desc=None, datasource=None) 23 | assert AspenHandlerWeb.generate_search_query( 24 | tag="ATCAI", datasource="source_name", desc=None, max=100 25 | ) == { 26 | "datasource": "source_name", 27 | "tag": "ATCAI", 28 | "max": 100, 29 | "getTrendable": 0, 30 | } 31 | assert AspenHandlerWeb.generate_search_query( 32 | tag="ATC*", datasource="source_name", desc=None, max=100 33 | ) == { 34 | "datasource": "source_name", 35 | "tag": "ATC*", 36 | "max": 100, 37 | "getTrendable": 0, 38 | } 39 | assert AspenHandlerWeb.generate_search_query( 40 | tag="ATCAI", datasource="source_name", desc=None 41 | ) == {"datasource": "source_name", "tag": "ATCAI", "max": 100000, "getTrendable": 0} 42 | 43 | 44 | def test_split_tagmap() -> None: 45 | assert AspenHandlerWeb.split_tagmap("ATCAI") == ("ATCAI", None) 46 | assert AspenHandlerWeb.split_tagmap("ATCAI;IP_ANALOGMAP") == ( 47 | "ATCAI", 48 | "IP_ANALOGMAP", 49 | ) 50 | 51 | 52 | def test_generate_description_query(aspen_handler: AspenHandlerWeb) -> None: 53 | assert aspen_handler.generate_get_description_query("ATCAI") == ( 54 | '0' 55 | "DSCR" 56 | "0" 57 | ) 58 | 59 | 60 | def test_generate_unit_query(aspen_handler: AspenHandlerWeb) -> None: 61 | assert aspen_handler.generate_get_unit_query("ATCAI") == ( 62 | '0' 63 | "Units" 64 | "MAP_Units0" 65 | ) 66 | 67 | 68 | def test_generate_map_query(aspen_handler: AspenHandlerWeb) -> None: 69 | assert aspen_handler.generate_get_map_query("ATCAI") == ( 70 | '0' 71 | "" 72 | ) 73 | 74 | 75 | @pytest.mark.parametrize( # type: ignore[misc] 76 | "read_type", 77 | [ 78 | "RAW", 79 | "SHAPEPRESERVING", 80 | "INT", 81 | "MIN", 82 | "MAX", 83 | "RNG", 84 | "AVG", 85 | "VAR", 86 | "STD", 87 | # pytest.param("COUNT", 0, marks=pytest.mark.skip), 88 | # pytest.param("GOOD", 0, marks=pytest.mark.skip), 89 | # pytest.param("BAD", 0, marks=pytest.mark.skip), 90 | # pytest.param("TOTAL", 0, marks=pytest.mark.skip), 91 | # pytest.param("SUM", 0, marks=pytest.mark.skip), 92 | "SNAPSHOT", 93 | ], 94 | ) 95 | def test_generate_tag_read_query( 96 | aspen_handler: AspenHandlerWeb, read_type: str 97 | ) -> None: 98 | start = utils.ensure_datetime_with_tz("2020-06-24 17:00:00") 99 | end = utils.ensure_datetime_with_tz("2020-06-24 18:00:00") 100 | ts = SAMPLE_TIME 101 | res = aspen_handler.generate_read_query( 102 | tagname="ATCAI", 103 | mapname=None, 104 | start=start, 105 | end=end, 106 | sample_time=ts, 107 | read_type=getattr(ReaderType, read_type), 108 | metadata={}, 109 | ) 110 | expected = { 111 | "RAW": ( 112 | '' 113 | "" 114 | "015930108000001593014400000" 115 | "01000000" 116 | ), 117 | "SHAPEPRESERVING": ( 118 | '' 119 | "" 120 | "015930108000001593014400000" 121 | "210000000" 122 | ), 123 | "INT": ( 124 | '' 125 | "" 126 | "015930108000001593014400000" 127 | "10

60

3
" 128 | ), 129 | "MIN": ( 130 | '' 131 | "" 132 | "015930108000001593014400000" 133 | "1400

60

30" 134 | "000
" 135 | ), 136 | "MAX": ( 137 | '' 138 | "" 139 | "015930108000001593014400000" 140 | "1300

60

30" 141 | "000
" 142 | ), 143 | "RNG": ( 144 | '' 145 | "" 146 | "015930108000001593014400000" 147 | "1500

60

30" 148 | "000
" 149 | ), 150 | "AVG": ( 151 | '' 152 | "" 153 | "015930108000001593014400000" 154 | "1200

60

30" 155 | "000
" 156 | ), 157 | "VAR": ( 158 | '' 159 | "" 160 | "015930108000001593014400000" 161 | "1800

60

30" 162 | "000
" 163 | ), 164 | "STD": ( 165 | '' 166 | "" 167 | "015930108000001593014400000" 168 | "1700

60

30" 169 | "000
" 170 | ), 171 | "COUNT": "whatever", 172 | "GOOD": "whatever", 173 | "BAD": "whatever", 174 | "TOTAL": "whatever", 175 | "SUM": "whatever", 176 | "SNAPSHOT": ( 177 | '' 178 | "" 179 | "" 180 | "10" 181 | ), 182 | } 183 | assert expected[read_type] == res 184 | 185 | 186 | def test_generate_read_query_long_sample_time(aspen_handler: AspenHandlerWeb) -> None: 187 | start = utils.ensure_datetime_with_tz("2020-06-24 17:00:00") 188 | end = utils.ensure_datetime_with_tz("2020-06-24 18:00:00") 189 | ts = timedelta(seconds=86401) 190 | 191 | res = aspen_handler.generate_read_query( 192 | tagname="ATCAI", 193 | mapname=None, 194 | start=start, 195 | end=end, 196 | sample_time=ts, 197 | read_type=ReaderType.INT, 198 | metadata={}, 199 | ) 200 | expected = ( 201 | '' 202 | "" 203 | "015930108000001593014400000" 204 | "10

86401

3
" 205 | ) 206 | 207 | assert expected == res 208 | 209 | 210 | def test_generate_sql_query(aspen_handler: AspenHandlerWeb) -> None: 211 | res = aspen_handler.generate_sql_query( 212 | datasource=None, 213 | connection_string="my_connection_stringing", 214 | query="myquery", 215 | max_rows=9999, 216 | ) 217 | expected = ( 218 | '' 219 | "" 220 | ) 221 | assert res == expected 222 | res = aspen_handler.generate_sql_query( 223 | datasource="mydatasource", 224 | query="myquery", 225 | max_rows=9999, 226 | connection_string=None, 227 | ) 228 | expected = ( 229 | '' 232 | "" 233 | ) 234 | assert res == expected 235 | -------------------------------------------------------------------------------- /tests/test_AspenHandlerREST_connect.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime, timedelta 3 | from typing import Generator 4 | 5 | import pandas as pd 6 | import pytest 7 | 8 | from tagreader.clients import IMSClient, list_sources 9 | from tagreader.utils import IMSType 10 | from tagreader.web_handlers import ( 11 | AspenHandlerWeb, 12 | get_verify_ssl, 13 | list_aspenone_sources, 14 | ) 15 | 16 | is_GITHUB_ACTIONS = "GITHUB_ACTION" in os.environ 17 | is_AZURE_PIPELINE = "TF_BUILD" in os.environ 18 | 19 | if is_GITHUB_ACTIONS: 20 | pytest.skip( 21 | "All tests in module require connection to Aspen server", 22 | allow_module_level=True, 23 | ) 24 | 25 | VERIFY_SSL = False if is_AZURE_PIPELINE else get_verify_ssl() 26 | 27 | SOURCE = "TRB" 28 | TAG = "xxx" 29 | FAKE_TAG = "so_random_it_cant_exist" 30 | START_TIME = datetime(2023, 5, 1, 10, 0, 0) 31 | STOP_TIME = datetime(2023, 5, 1, 11, 0, 0) 32 | SAMPLE_TIME = timedelta(seconds=60) 33 | 34 | 35 | @pytest.fixture # type: ignore[misc] 36 | def client() -> Generator[IMSClient, None, None]: 37 | c = IMSClient( 38 | datasource=SOURCE, 39 | imstype="aspenone", 40 | verifySSL=bool(VERIFY_SSL), 41 | ) 42 | c.cache = None 43 | c.connect() 44 | yield c 45 | if os.path.exists(SOURCE + ".h5"): 46 | os.remove(SOURCE + ".h5") 47 | 48 | 49 | @pytest.fixture # type: ignore[misc] 50 | def aspen_handler() -> Generator[AspenHandlerWeb, None, None]: 51 | h = AspenHandlerWeb( 52 | datasource=SOURCE, verify_ssl=bool(VERIFY_SSL), auth=None, url=None, options={} 53 | ) 54 | yield h 55 | 56 | 57 | def test_list_all_aspen_one_sources() -> None: 58 | res = list_aspenone_sources(verify_ssl=bool(VERIFY_SSL), auth=None, url=None) 59 | assert isinstance(res, list) 60 | assert len(res) >= 1 61 | assert isinstance(res[0], str) 62 | for r in res: 63 | assert 3 <= len(r) <= 20 64 | 65 | 66 | def test_list_sources_aspen_one() -> None: 67 | res = list_sources(imstype=IMSType.ASPENONE, verifySSL=bool(VERIFY_SSL)) 68 | assert isinstance(res, list) 69 | assert len(res) >= 1 70 | assert isinstance(res[0], str) 71 | for r in res: 72 | assert 3 <= len(r) <= 20 73 | 74 | 75 | def test_verify_connection(aspen_handler: AspenHandlerWeb) -> None: 76 | assert aspen_handler.verify_connection(SOURCE) is True 77 | assert aspen_handler.verify_connection("some_random_stuff_here") is False 78 | 79 | 80 | def test_search_tag(client: IMSClient) -> None: 81 | res = client.search(tag=FAKE_TAG, desc=None) 82 | assert 0 == len(res) 83 | 84 | res = client.search(tag="AverageCPUTimeVals", desc=None) 85 | assert res == [("AverageCPUTimeVals", "Average CPU Time")] 86 | 87 | res = client.search(tag="Aspen*", desc=None, return_desc=False) 88 | assert len(res) < 5 89 | assert isinstance(res, list) 90 | assert isinstance(res[0], str) 91 | 92 | res = client.search(tag="Aspen*", desc=None) 93 | assert len(res) < 5 94 | assert isinstance(res, list) 95 | assert isinstance(res[0], tuple) 96 | 97 | res = client.search("AspenCalcTrigger1") 98 | assert res == [("AspenCalcTrigger1", "")] 99 | res = client.search("AspenCalcTrigger1", desc=None) 100 | assert res == [("AspenCalcTrigger1", "")] 101 | 102 | res = client.search("AverageCPUTimeVals", "*CPU*") 103 | assert res == [("AverageCPUTimeVals", "Average CPU Time")] 104 | with pytest.raises(ValueError): 105 | _ = client.search(desc="Sine Input") # noqa 106 | 107 | with pytest.raises(ValueError): 108 | res = client.search("") 109 | 110 | with pytest.raises(ValueError): 111 | _ = client.search( 112 | desc="Sine Input" 113 | ) # noqa res = client.search(tag="ATCM*", return_desc=False)´ 114 | 115 | 116 | def test_read_unknown_tag(client: IMSClient) -> None: 117 | df = client.read(tags=[FAKE_TAG], start_time=START_TIME, end_time=STOP_TIME) 118 | assert len(df.index) == 0 119 | df = client.read(tags=[TAG, FAKE_TAG], start_time=START_TIME, end_time=STOP_TIME) 120 | assert len(df.index) > 0 121 | assert len(df.columns == 1) 122 | 123 | 124 | def test_get_units(client: IMSClient) -> None: 125 | d = client.get_units(FAKE_TAG) 126 | assert isinstance(d, dict) 127 | assert len(d.items()) == 0 128 | 129 | 130 | def test_get_desc(client: IMSClient) -> None: 131 | d = client.get_descriptions(FAKE_TAG) 132 | assert isinstance(d, dict) 133 | assert len(d.items()) == 0 134 | 135 | 136 | def test_query_sql(client: IMSClient) -> None: 137 | # The % causes WC_E_SYNTAX error in result. Tried "everything" but no go. 138 | # Leaving it for now. 139 | # query = "SELECT name, ip_description FROM ip_analogdef WHERE name LIKE 'ATC%'" 140 | query = "Select name, ip_description from ip_analogdef where name = 'atc'" 141 | res = client.query_sql(query=query, parse=False) 142 | # print(res) 143 | assert isinstance(res, str) 144 | 145 | res = client.query_sql(query=query, parse=True) 146 | assert isinstance(res, pd.DataFrame) 147 | assert res.empty 148 | 149 | query = "Select name, ip_description from ip_analogdef where name = 'AverageCPUTimeVals'" 150 | res = client.query_sql(query=query, parse=True) 151 | assert isinstance(res, pd.DataFrame) 152 | assert len(res.index.values) == 1 153 | assert res.index.values[0] == 0 154 | -------------------------------------------------------------------------------- /tests/test_PIHandlerREST.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from typing import Generator, cast 3 | 4 | import pytest 5 | 6 | from tagreader.cache import SmartCache 7 | from tagreader.utils import ReaderType, ensure_datetime_with_tz 8 | from tagreader.web_handlers import PIHandlerWeb 9 | 10 | START_TIME = "2020-04-01 11:05:00" 11 | STOP_TIME = "2020-04-01 12:05:00" 12 | SAMPLE_TIME = 60 13 | 14 | 15 | @pytest.fixture # type: ignore[misc] 16 | def pi_handler(cache: SmartCache) -> Generator[PIHandlerWeb, None, None]: 17 | h = PIHandlerWeb( 18 | datasource="sourcename", 19 | auth=None, 20 | options={}, 21 | url=None, 22 | verify_ssl=True, 23 | cache=cache, 24 | ) 25 | if not isinstance(h.web_id_cache, SmartCache): 26 | raise ValueError("Expected SmartCache in the web client.") 27 | h.web_id_cache.add(key="alreadyknowntag", value="knownwebid") 28 | yield h 29 | 30 | 31 | def test_escape_chars() -> None: 32 | assert ( 33 | PIHandlerWeb.escape('+-&|(){}[]^"~*:\\') == r"\+\-\&\|\(\)\{\}\[\]\^\"\~*\:\\" 34 | ) 35 | 36 | 37 | def test_generate_search_query() -> None: 38 | assert PIHandlerWeb.generate_search_params( 39 | tag="SINUSOID", desc=None, datasource=None 40 | ) == {"query": "name:SINUSOID"} 41 | assert PIHandlerWeb.generate_search_params( 42 | tag=r"BA:*.1", desc=None, datasource=None 43 | ) == { 44 | "query": r"name:BA\:*.1", 45 | } 46 | assert PIHandlerWeb.generate_search_params( 47 | tag="BA:*.1", datasource=None, desc=None 48 | ) == { 49 | "query": r"name:BA\:*.1", 50 | } 51 | assert PIHandlerWeb.generate_search_params( 52 | desc="Concentration Reactor 1", datasource=None, tag=None 53 | ) == { 54 | "query": r"description:Concentration\ Reactor\ 1", 55 | } 56 | assert PIHandlerWeb.generate_search_params( 57 | tag="BA:*.1", desc="Concentration Reactor 1", datasource=None 58 | ) == {"query": r"name:BA\:*.1 AND description:Concentration\ Reactor\ 1"} 59 | 60 | 61 | def test_is_summary(pi_handler: PIHandlerWeb) -> None: 62 | assert pi_handler._is_summary(ReaderType.AVG) 63 | assert pi_handler._is_summary(ReaderType.MIN) 64 | assert pi_handler._is_summary(ReaderType.MAX) 65 | assert pi_handler._is_summary(ReaderType.RNG) 66 | assert pi_handler._is_summary(ReaderType.STD) 67 | assert pi_handler._is_summary(ReaderType.VAR) 68 | assert not pi_handler._is_summary(ReaderType.RAW) 69 | assert not pi_handler._is_summary(ReaderType.SHAPEPRESERVING) 70 | assert not pi_handler._is_summary(ReaderType.INT) 71 | assert not pi_handler._is_summary(ReaderType.GOOD) 72 | assert not pi_handler._is_summary(ReaderType.BAD) 73 | assert not pi_handler._is_summary(ReaderType.SNAPSHOT) 74 | 75 | 76 | @pytest.mark.parametrize( # type: ignore[misc] 77 | "read_type", 78 | [ 79 | "RAW", 80 | # pytest.param( 81 | # "SHAPEPRESERVING", marks=pytest.mark.skip(reason="Not implemented") 82 | # ), 83 | "INT", 84 | "MIN", 85 | "MAX", 86 | "RNG", 87 | "AVG", 88 | "STD", 89 | "VAR", 90 | # pytest.param("COUNT", marks=pytest.mark.skip(reason="Not implemented")), 91 | # pytest.param("GOOD", marks=pytest.mark.skip(reason="Not implemented")), 92 | # pytest.param("BAD", marks=pytest.mark.skip(reason="Not implemented")), 93 | # pytest.param("TOTAL", marks=pytest.mark.skip(reason="Not implemented")), 94 | # pytest.param("SUM", marks=pytest.mark.skip(reason="Not implemented")), 95 | "SNAPSHOT", 96 | ], 97 | ) 98 | def test_generate_read_query(pi_handler: PIHandlerWeb, read_type: str) -> None: 99 | if not isinstance(pi_handler.web_id_cache, SmartCache): 100 | raise ValueError("Expected SmartCache in the fixture.") 101 | start = ensure_datetime_with_tz(START_TIME) 102 | stop = ensure_datetime_with_tz(STOP_TIME) 103 | ts = timedelta(seconds=SAMPLE_TIME) 104 | 105 | (url, params) = pi_handler.generate_read_query( 106 | tag=pi_handler.tag_to_web_id(tag="alreadyknowntag"), # type: ignore[arg-type] 107 | start=start, 108 | end=stop, 109 | sample_time=ts, 110 | read_type=getattr(ReaderType, read_type), 111 | metadata=None, 112 | ) 113 | if read_type != "SNAPSHOT": 114 | assert params["startTime"] == "01-Apr-20 09:05:00" 115 | assert params["endTime"] == "01-Apr-20 10:05:00" 116 | assert params["timeZone"] == "UTC" 117 | 118 | if read_type == "INT": 119 | assert ( 120 | url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/interpolated" 121 | ) 122 | assert params["selectedFields"] == "Links;Items.Timestamp;Items.Value" 123 | assert params["interval"] == f"{SAMPLE_TIME}s" 124 | elif read_type in ["AVG", "MIN", "MAX", "RNG", "STD", "VAR"]: 125 | assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/summary" 126 | assert ( 127 | params["selectedFields"] == "Links;Items.Value.Timestamp;Items.Value.Value" 128 | ) 129 | assert { 130 | "AVG": "Average", 131 | "MIN": "Minimum", 132 | "MAX": "Maximum", 133 | "RNG": "Range", 134 | "STD": "StdDev", 135 | "VAR": "StdDev", 136 | }.get(read_type) == params["summaryType"] 137 | assert params["summaryDuration"] == f"{SAMPLE_TIME}s" 138 | elif read_type == "SNAPSHOT": 139 | assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/value" 140 | assert params["selectedFields"] == "Timestamp;Value" 141 | assert len(params) == 3 142 | elif read_type == "RAW": 143 | assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/recorded" 144 | assert params["selectedFields"] == "Links;Items.Timestamp;Items.Value" 145 | assert params["maxCount"] == 10000 # type: ignore[comparison-overlap] 146 | 147 | 148 | @pytest.mark.parametrize( # type: ignore[misc] 149 | "read_type", 150 | [ 151 | "RAW", 152 | # pytest.param( 153 | # "SHAPEPRESERVING", marks=pytest.mark.skip(reason="Not implemented") 154 | # ), 155 | "INT", 156 | "MIN", 157 | "MAX", 158 | "RNG", 159 | "AVG", 160 | "STD", 161 | "VAR", 162 | # pytest.param("COUNT", marks=pytest.mark.skip(reason="Not implemented")), 163 | # pytest.param("GOOD", marks=pytest.mark.skip(reason="Not implemented")), 164 | # pytest.param("BAD", marks=pytest.mark.skip(reason="Not implemented")), 165 | # pytest.param("TOTAL", marks=pytest.mark.skip(reason="Not implemented")), 166 | # pytest.param("SUM", marks=pytest.mark.skip(reason="Not implemented")), 167 | "SNAPSHOT", 168 | ], 169 | ) 170 | def test_generate_read_query_with_status( 171 | pi_handler: PIHandlerWeb, read_type: str 172 | ) -> None: 173 | if not isinstance(pi_handler.web_id_cache, SmartCache): 174 | raise ValueError("Expected SmartCache in the fixture.") 175 | start = ensure_datetime_with_tz(START_TIME) 176 | stop = ensure_datetime_with_tz(STOP_TIME) 177 | ts = timedelta(seconds=SAMPLE_TIME) 178 | 179 | (url, params) = pi_handler.generate_read_query( 180 | tag=pi_handler.tag_to_web_id("alreadyknowntag"), # type: ignore[arg-type] 181 | start=start, 182 | end=stop, 183 | sample_time=ts, 184 | read_type=getattr(ReaderType, read_type), 185 | get_status=True, 186 | metadata=None, 187 | ) 188 | if read_type != "SNAPSHOT": 189 | assert params["startTime"] == "01-Apr-20 09:05:00" 190 | assert params["endTime"] == "01-Apr-20 10:05:00" 191 | assert params["timeZone"] == "UTC" 192 | 193 | if read_type == "INT": 194 | assert ( 195 | url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/interpolated" 196 | ) 197 | assert params["selectedFields"] == ( 198 | "Links;Items.Timestamp;Items.Value;" 199 | "Items.Good;Items.Questionable;Items.Substituted" 200 | ) 201 | assert params["interval"] == f"{SAMPLE_TIME}s" 202 | elif read_type in ["AVG", "MIN", "MAX", "RNG", "STD", "VAR"]: 203 | assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/summary" 204 | assert params["selectedFields"] == ( 205 | "Links;Items.Value.Timestamp;Items.Value.Value;" 206 | "Items.Value.Good;Items.Value.Questionable;Items.Value.Substituted" 207 | ) 208 | assert { 209 | "AVG": "Average", 210 | "MIN": "Minimum", 211 | "MAX": "Maximum", 212 | "RNG": "Range", 213 | "STD": "StdDev", 214 | "VAR": "StdDev", 215 | }.get(read_type) == params["summaryType"] 216 | assert params["summaryDuration"] == f"{SAMPLE_TIME}s" 217 | elif read_type == "SNAPSHOT": 218 | assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/value" 219 | assert ( 220 | params["selectedFields"] == "Timestamp;Value;Good;Questionable;Substituted" 221 | ) 222 | assert len(params) == 3 223 | elif read_type == "RAW": 224 | assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/recorded" 225 | assert params["selectedFields"] == ( 226 | "Links;Items.Timestamp;Items.Value;" 227 | "Items.Good;Items.Questionable;Items.Substituted" 228 | ) 229 | assert params["maxCount"] == 10000 # type: ignore[comparison-overlap] 230 | 231 | 232 | def test_generate_read_query_long_sample_time(pi_handler: PIHandlerWeb) -> None: 233 | start = ensure_datetime_with_tz(START_TIME) 234 | stop = ensure_datetime_with_tz(STOP_TIME) 235 | ts = timedelta(seconds=86410) 236 | 237 | (url, params) = pi_handler.generate_read_query( 238 | tag=pi_handler.tag_to_web_id("alreadyknowntag"), # type: ignore[arg-type] 239 | start=start, 240 | end=stop, 241 | sample_time=ts, 242 | read_type=ReaderType.INT, 243 | metadata=None, 244 | ) 245 | assert params["interval"] == f"{86410}s" 246 | -------------------------------------------------------------------------------- /tests/test_PIHandlerREST_connect.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import timedelta 3 | from typing import Generator 4 | 5 | import pytest 6 | 7 | from tagreader.cache import SmartCache 8 | from tagreader.clients import IMSClient, list_sources 9 | from tagreader.utils import ReaderType, ensure_datetime_with_tz 10 | from tagreader.web_handlers import PIHandlerWeb, get_verify_ssl, list_piwebapi_sources 11 | 12 | is_GITHUBACTION = "GITHUB_ACTION" in os.environ 13 | is_AZUREPIPELINE = "TF_BUILD" in os.environ 14 | 15 | if is_GITHUBACTION: 16 | pytest.skip( 17 | "All tests in module require connection to PI server", allow_module_level=True 18 | ) 19 | 20 | verifySSL = False if is_AZUREPIPELINE else get_verify_ssl() 21 | 22 | SOURCE = "PIMAM" 23 | TAGS = { 24 | "Float32": "CDT158", # BA:CONC.1 25 | "Digital": "CDM158", # BA:ACTIVE.1 26 | "Int32": "CDEP158", 27 | } 28 | 29 | START_TIME = "2020-04-01 11:05:00" 30 | STOP_TIME = "2020-04-01 12:05:00" 31 | SAMPLE_TIME = 60 32 | 33 | 34 | @pytest.fixture # type: ignore[misc] 35 | def client() -> Generator[IMSClient, None, None]: 36 | c = IMSClient( 37 | datasource=SOURCE, 38 | imstype="piwebapi", 39 | verifySSL=bool(verifySSL), 40 | ) 41 | c.cache = None 42 | c.connect() 43 | c.handler._max_rows = 1000 # For the long raw test 44 | yield c 45 | if os.path.exists(SOURCE + ".h5"): 46 | os.remove(SOURCE + ".h5") 47 | 48 | 49 | @pytest.fixture # type: ignore[misc] 50 | def pi_handler(cache: SmartCache) -> Generator[PIHandlerWeb, None, None]: 51 | h = PIHandlerWeb( 52 | datasource=SOURCE, 53 | verify_ssl=bool(verifySSL), 54 | auth=None, 55 | options={}, 56 | url=None, 57 | cache=cache, 58 | ) 59 | if not isinstance(h.web_id_cache, SmartCache): 60 | raise ValueError("Expected SmartCache in the web client.") 61 | h.web_id_cache["alreadyknowntag"] = "knownwebid" 62 | yield h 63 | 64 | 65 | def test_list_all_piwebapi_sources() -> None: 66 | res = list_piwebapi_sources(verify_ssl=bool(verifySSL), auth=None, url=None) 67 | assert isinstance(res, list) 68 | assert len(res) >= 1 69 | for r in res: 70 | assert isinstance(r, str) 71 | assert 3 <= len(r) 72 | 73 | 74 | def test_list_sources_piwebapi() -> None: 75 | res = list_sources(imstype="piwebapi", verifySSL=bool(verifySSL)) 76 | assert isinstance(res, list) 77 | assert len(res) >= 1 78 | for r in res: 79 | assert isinstance(r, str) 80 | assert 3 <= len(r) 81 | 82 | 83 | def test_verify_connection(pi_handler: IMSClient) -> None: 84 | assert pi_handler.verify_connection("PIMAM") is True # type: ignore[attr-defined] 85 | assert pi_handler.verify_connection("somerandomstuffhere") is False # type: ignore[attr-defined] 86 | 87 | 88 | def test_search_tag(client: IMSClient) -> None: 89 | res = client.search("SINUSOID") 90 | assert 1 == len(res) 91 | res = client.search("SIN*") 92 | assert isinstance(res, list) 93 | assert 3 <= len(res) 94 | assert isinstance(res[0], tuple) 95 | [taglist, desclist] = zip(*res) 96 | assert "SINUSOIDU" in taglist 97 | assert desclist[taglist.index("SINUSOID")] == "12 Hour Sine Wave" 98 | res = client.search("SIN*", return_desc=False) 99 | assert 3 <= len(res) 100 | assert isinstance(res, list) 101 | assert isinstance(res[0], str) 102 | res = client.search(desc="12 Hour Sine Wave") 103 | assert 1 <= len(res) 104 | res = client.search(tag="SINUSOID", desc="*Sine*") 105 | assert 1 <= len(res) 106 | 107 | 108 | def test_tag_to_web_id(pi_handler: PIHandlerWeb) -> None: 109 | res = pi_handler.tag_to_web_id("SINUSOID") 110 | assert isinstance(res, str) 111 | assert len(res) >= 20 112 | with pytest.raises(AssertionError): 113 | _ = pi_handler.tag_to_web_id("SINUSOID*") 114 | res = pi_handler.tag_to_web_id("somerandomgarbage") 115 | assert not res 116 | 117 | 118 | @pytest.mark.parametrize( # type: ignore[misc] 119 | ("read_type", "size"), 120 | [ 121 | ("RAW", 10), 122 | # pytest.param( 123 | # "SHAPEPRESERVING", 0, marks=pytest.mark.skip(reason="Not implemented") 124 | # ), 125 | ("INT", 61), 126 | ("MIN", 60), 127 | ("MAX", 60), 128 | ("RNG", 60), 129 | ("AVG", 60), 130 | ("VAR", 60), 131 | ("STD", 60), 132 | # pytest.param("COUNT", 0, marks=pytest.mark.skip(reason="Not implemented")), 133 | # pytest.param("GOOD", 0, marks=pytest.mark.skip(reason="Not implemented")), 134 | # pytest.param("BAD", 0, marks=pytest.mark.skip(reason="Not implemented")), 135 | # pytest.param("TOTAL", 0, marks=pytest.mark.skip(reason="Not implemented")), 136 | # pytest.param("SUM", 0, marks=pytest.mark.skip(reason="Not implemented")), 137 | ("SNAPSHOT", 1), 138 | ], 139 | ) 140 | def test_read(client: IMSClient, read_type: str, size: int) -> None: 141 | if read_type == "SNAPSHOT": 142 | df = client.read( 143 | tags=TAGS["Float32"], 144 | read_type=getattr(ReaderType, read_type), 145 | start_time=None, 146 | end_time=None, 147 | ) 148 | else: 149 | df = client.read( 150 | tags=TAGS["Float32"], 151 | start_time=START_TIME, 152 | end_time=STOP_TIME, 153 | ts=SAMPLE_TIME, 154 | read_type=getattr(ReaderType, read_type), 155 | ) 156 | 157 | if read_type not in ["SNAPSHOT", "RAW"]: 158 | assert df.shape == (size, 1) 159 | assert df.index[0] == ensure_datetime_with_tz(START_TIME) 160 | assert df.index[-1] == df.index[0] + (size - 1) * timedelta(seconds=SAMPLE_TIME) 161 | elif read_type in "RAW": 162 | # Weirdness for test-tag which can have two different results, 163 | # apparently depending on the day of the week, mood, lunar cycle... 164 | assert df.shape == (size, 1) or df.shape == (size - 1, 1) 165 | assert df.index[0] >= ensure_datetime_with_tz(START_TIME) 166 | assert df.index[-1] <= ensure_datetime_with_tz(STOP_TIME) 167 | 168 | 169 | def test_read_with_status(client: IMSClient) -> None: 170 | df = client.read( 171 | tags=TAGS["Float32"], 172 | start_time=START_TIME, 173 | end_time=STOP_TIME, 174 | ts=SAMPLE_TIME, 175 | read_type=ReaderType.RAW, 176 | get_status=True, 177 | ) 178 | assert df.shape == (10, 2) 179 | assert df[TAGS["Float32"] + "::status"].iloc[0] == 0 180 | 181 | 182 | def test_read_raw_long(client: IMSClient) -> None: 183 | df = client.read( 184 | tags=TAGS["Float32"], 185 | start_time=START_TIME, 186 | end_time="2020-04-11 20:00:00", 187 | read_type=ReaderType.RAW, 188 | ) 189 | assert len(df) > 1000 190 | 191 | 192 | def test_read_only_invalid_data_yields_nan_for_invalid(client: IMSClient) -> None: 193 | tag = TAGS["Float32"] 194 | df = client.read( 195 | tags=tag, 196 | start_time="2012-10-09 10:30:00", 197 | end_time="2012-10-09 11:00:00", 198 | ts=600, 199 | ) 200 | assert df.shape == (4, 1) 201 | assert df[tag].isna().all() 202 | 203 | 204 | def test_read_invalid_data_mixed_with_valid_yields_nan_for_invalid( 205 | client: IMSClient, 206 | ) -> None: 207 | # Hint, found first valid datapoint for tag 208 | tag = TAGS["Float32"] 209 | df = client.read( 210 | tags=tag, 211 | start_time="2018-04-23 15:20:00", 212 | end_time="2018-04-23 15:50:00", 213 | ts=600, 214 | ) 215 | assert df.shape == (4, 1) 216 | assert df[tag].iloc[[0, 1]].isna().all() # type: ignore[call-overload] 217 | assert df[tag].iloc[[2, 3]].notnull().all() # type: ignore[call-overload] 218 | 219 | 220 | def test_digitalread_yields_integers(client: IMSClient) -> None: 221 | tag = TAGS["Digital"] 222 | df = client.read( 223 | tags=tag, 224 | start_time=START_TIME, 225 | end_time=STOP_TIME, 226 | ts=600, 227 | read_type=ReaderType.INT, 228 | ) 229 | assert all(x.is_integer() for x in df[tag]) 230 | 231 | 232 | def test_get_unit(client: IMSClient) -> None: 233 | res = client.get_units(list(TAGS.values())) 234 | assert res[TAGS["Float32"]] == "DEG. C" 235 | assert res[TAGS["Digital"]] == "STATE" 236 | assert res[TAGS["Int32"]] == "" 237 | 238 | 239 | def test_get_description(client: IMSClient) -> None: 240 | res = client.get_descriptions(list(TAGS.values())) 241 | assert res[TAGS["Float32"]] == "Atmospheric Tower OH Vapor" 242 | assert res[TAGS["Digital"]] == "Light Naphtha End Point Control" 243 | assert res[TAGS["Int32"]] == "Light Naphtha End Point" 244 | 245 | 246 | def test_from_dst_folds_time(client: IMSClient) -> None: 247 | if os.path.exists(SOURCE + ".h5"): 248 | os.remove(SOURCE + ".h5") 249 | tag = TAGS["Float32"] 250 | interval = ["2017-10-29 00:30:00", "2017-10-29 04:30:00"] 251 | df = client.read(tags=[tag], start_time=interval[0], end_time=interval[1], ts=600) 252 | assert len(df) == (4 + 1) * 6 + 1 253 | # Time exists inside fold: 254 | assert ( 255 | df[tag].loc["2017-10-29 01:10:00+02:00":"2017-10-29 01:50:00+02:00"].size == 5 # type: ignore[misc] 256 | ) 257 | # Time inside fold is always included: 258 | assert ( 259 | df.loc["2017-10-29 01:50:00":"2017-10-29 03:10:00"].size == 2 + (1 + 1) * 6 + 1 # type: ignore[misc] 260 | ) 261 | 262 | 263 | def test_to_dst_skips_time(client: IMSClient) -> None: 264 | if os.path.exists(SOURCE + ".h5"): 265 | os.remove(SOURCE + ".h5") 266 | tag = TAGS["Float32"] 267 | interval = ["2018-03-25 00:30:00", "2018-03-25 03:30:00"] 268 | df = client.read(tags=[tag], start_time=interval[0], end_time=interval[1], ts=600) 269 | # Lose one hour: 270 | assert ( 271 | df.loc["2018-03-25 01:50:00":"2018-03-25 03:10:00"].size == (2 + 1 * 6 + 1) - 6 # type: ignore[misc] 272 | ) 273 | 274 | 275 | def test_tags_with_no_data_included_in_results(client: IMSClient) -> None: 276 | df = client.read( 277 | tags=[TAGS["Float32"]], 278 | start_time="2099-01-01 00:00:00", 279 | end_time="2099-01-02 00:00:00", 280 | ts=timedelta(seconds=60), 281 | ) 282 | assert len(df.columns) == 1 283 | 284 | 285 | def test_tags_raw_with_no_data_included_in_results(client: IMSClient) -> None: 286 | df = client.read( 287 | tags=[TAGS["Float32"]], 288 | start_time="2099-01-01 00:00:00", 289 | end_time="2099-01-02 00:00:00", 290 | read_type=ReaderType.RAW, 291 | ts=timedelta(seconds=60), 292 | ) 293 | assert df.empty 294 | -------------------------------------------------------------------------------- /tests/test_bucketcache.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from pathlib import Path 3 | from typing import Generator 4 | 5 | import pandas as pd 6 | import pytest 7 | 8 | from tagreader.cache import BucketCache, safe_tagname, timestamp_to_epoch 9 | from tagreader.utils import ReaderType 10 | 11 | TAGNAME = "tag1" 12 | READE_TYPE = ReaderType.INT 13 | 14 | TZ = "UTC" 15 | TS = timedelta(seconds=300) 16 | MINUTE = timedelta(seconds=60) 17 | FREQ = f"{int(TS.total_seconds())}s" 18 | 19 | START_TIME_1 = pd.to_datetime("2020-01-01 12:00:00", utc=True) 20 | END_TIME_1 = pd.to_datetime("2020-01-01 13:00:00", utc=True) 21 | index = pd.date_range(start=START_TIME_1, end=END_TIME_1, freq=FREQ, name="time") 22 | DF1 = pd.DataFrame({TAGNAME: range(0, len(index))}, index=index) 23 | 24 | START_TIME_1_EPOCH = ( 25 | START_TIME_1 - pd.to_datetime("1970-01-01", utc=True) 26 | ) // pd.Timedelta( 27 | "1s" 28 | ) # 1577880000 29 | END_TIME_1_EPOCH = ( 30 | END_TIME_1 - pd.to_datetime("1970-01-01", utc=True) 31 | ) // pd.Timedelta( 32 | "1s" 33 | ) # 1577883600 34 | 35 | START_TIME_2 = pd.to_datetime("2020-01-01 13:30:00", utc=True) 36 | END_TIME_2 = pd.to_datetime("2020-01-01 14:00:00", utc=True) 37 | index = pd.date_range(start=START_TIME_2, end=END_TIME_2, freq=FREQ, name="time") 38 | DF2 = pd.DataFrame({TAGNAME: range(0, len(index))}, index=index) 39 | 40 | END_TIME_2_EPOCH = ( 41 | END_TIME_2 - pd.to_datetime("1970-01-01", utc=True) 42 | ) // pd.Timedelta( 43 | "1s" 44 | ) # 1577887200 45 | 46 | 47 | START_TIME_3 = pd.to_datetime("2020-01-01 12:40:00", utc=True) 48 | END_TIME_3 = pd.to_datetime("2020-01-01 13:40:00", utc=True) 49 | index = pd.date_range(start=START_TIME_3, end=END_TIME_3, freq=FREQ, name="time") 50 | DF3 = pd.DataFrame({TAGNAME: range(0, len(index))}, index=index) 51 | 52 | 53 | @pytest.fixture(autouse=True) # type: ignore[misc] 54 | def cache(tmp_path: Path) -> Generator[BucketCache, None, None]: 55 | cache = BucketCache(directory=tmp_path) 56 | yield cache 57 | 58 | 59 | def test_timestamp_to_epoch() -> None: 60 | # Any timezone or naïve should work 61 | timestamp = pd.to_datetime("1970-01-01 01:00:00", utc=True) 62 | assert timestamp_to_epoch(timestamp) == 3600 63 | timestamp = pd.to_datetime("1970-01-01 01:00:00", utc=False) 64 | assert timestamp_to_epoch(timestamp) == 3600 65 | timestamp = pd.to_datetime("1970-01-01 01:00:00", utc=True) 66 | timestamp = timestamp.tz_convert("Europe/Oslo") 67 | assert timestamp_to_epoch(timestamp) == 3600 68 | 69 | 70 | def test_safe_tagname() -> None: 71 | assert safe_tagname("ASGB.tt-___56_ _%_/_") == "ASGB_tt___56____" 72 | 73 | 74 | def test_get_intervals_from_dataset_name(cache: BucketCache) -> None: 75 | bad_tag = f"/tag1/INT/{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}" 76 | good_tag = f"/tag1/INT/_{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}" 77 | start, end = cache._get_intervals_from_dataset_name(bad_tag) 78 | assert start is None 79 | assert end is None # type: ignore[unreachable] 80 | start, end = cache._get_intervals_from_dataset_name(good_tag) 81 | assert start == START_TIME_1 82 | assert end == END_TIME_1 83 | 84 | 85 | def test_key_path_with_time(cache: BucketCache) -> None: 86 | assert ( 87 | cache._key_path( 88 | tagname=TAGNAME, 89 | read_type=READE_TYPE, 90 | ts=MINUTE, 91 | stepped=False, 92 | get_status=False, 93 | start=START_TIME_1, 94 | end=END_TIME_1, 95 | ) 96 | == f"$tag1$INT$s60$_{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}" 97 | ) 98 | 99 | 100 | def test_key_path_stepped(cache: BucketCache) -> None: 101 | assert ( 102 | cache._key_path( 103 | tagname=TAGNAME, 104 | read_type=READE_TYPE, 105 | ts=MINUTE, 106 | stepped=True, 107 | get_status=False, 108 | start=START_TIME_1, 109 | end=END_TIME_1, 110 | ) 111 | == f"$tag1$INT$s60$stepped$_{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}" 112 | ) 113 | 114 | 115 | def test_key_path_with_status(cache: BucketCache) -> None: 116 | assert ( 117 | cache._key_path( 118 | tagname=TAGNAME, 119 | read_type=READE_TYPE, 120 | ts=MINUTE, 121 | stepped=False, 122 | get_status=True, 123 | start=None, 124 | end=None, 125 | ) 126 | == "$tag1$INT$s60$status" 127 | ) 128 | 129 | 130 | def test_key_path_raw(cache: BucketCache) -> None: 131 | assert ( 132 | cache._key_path( 133 | tagname=TAGNAME, 134 | read_type=ReaderType.RAW, 135 | ts=MINUTE, 136 | stepped=False, 137 | get_status=False, 138 | start=None, 139 | end=None, 140 | ) 141 | == "$tag1$RAW" 142 | ) 143 | 144 | 145 | def test_get_missing_intervals(cache: BucketCache) -> None: 146 | cache.store( 147 | df=DF1, 148 | tagname=TAGNAME, 149 | read_type=READE_TYPE, 150 | ts=TS, 151 | stepped=False, 152 | get_status=False, 153 | start=START_TIME_1, 154 | end=END_TIME_1, 155 | ) 156 | 157 | cache.store( 158 | df=DF2, 159 | tagname=TAGNAME, 160 | read_type=READE_TYPE, 161 | ts=TS, 162 | stepped=False, 163 | get_status=False, 164 | start=START_TIME_2, 165 | end=END_TIME_2, 166 | ) 167 | 168 | # Perfect coverage, no missing intervals 169 | missing_intervals = cache.get_missing_intervals( 170 | tagname=TAGNAME, 171 | read_type=READE_TYPE, 172 | ts=TS, 173 | stepped=False, 174 | get_status=False, 175 | start=START_TIME_1, 176 | end=END_TIME_1, 177 | ) 178 | 179 | assert len(missing_intervals) == 0 180 | 181 | # Request subsection, no missing intervals 182 | missing_intervals = cache.get_missing_intervals( 183 | tagname=TAGNAME, 184 | read_type=READE_TYPE, 185 | ts=TS, 186 | stepped=False, 187 | get_status=False, 188 | start=START_TIME_1 + pd.Timedelta("5m"), 189 | end=END_TIME_1 - pd.Timedelta("5m"), 190 | ) 191 | 192 | assert len(missing_intervals) == 0 193 | 194 | # Request data from before to after, two missing intervals 195 | missing_intervals = cache.get_missing_intervals( 196 | tagname=TAGNAME, 197 | read_type=READE_TYPE, 198 | ts=TS, 199 | stepped=False, 200 | get_status=False, 201 | start=START_TIME_1 - pd.Timedelta("15m"), 202 | end=END_TIME_1 + pd.Timedelta("15m"), 203 | ) 204 | 205 | assert len(missing_intervals) == 2 206 | assert missing_intervals[0] == (START_TIME_1 - pd.Timedelta("15m"), START_TIME_1) 207 | assert missing_intervals[1] == (END_TIME_1, END_TIME_1 + pd.Timedelta("15m")) 208 | 209 | # Request data stretching from before first bucket, including 210 | # space between buckets, to after second bucket. Three missing intervals. 211 | missing_intervals = cache.get_missing_intervals( 212 | tagname=TAGNAME, 213 | read_type=READE_TYPE, 214 | ts=TS, 215 | stepped=False, 216 | get_status=False, 217 | start=START_TIME_1 - pd.Timedelta("15m"), 218 | end=END_TIME_2 + pd.Timedelta("15m"), 219 | ) 220 | 221 | assert len(missing_intervals) == 3 222 | assert missing_intervals[0] == (START_TIME_1 - pd.Timedelta("15m"), START_TIME_1) 223 | assert missing_intervals[1] == (END_TIME_1, START_TIME_2) 224 | assert missing_intervals[2] == (END_TIME_2, END_TIME_2 + pd.Timedelta("15m")) 225 | 226 | 227 | def test_get_intersecting_datasets(cache: BucketCache) -> None: 228 | cache.store( 229 | df=DF1, 230 | tagname=TAGNAME, 231 | read_type=READE_TYPE, 232 | ts=TS, 233 | stepped=False, 234 | get_status=False, 235 | start=START_TIME_1, 236 | end=END_TIME_1, 237 | ) 238 | 239 | cache.store( 240 | df=DF2, 241 | tagname=TAGNAME, 242 | read_type=READE_TYPE, 243 | ts=TS, 244 | stepped=False, 245 | get_status=False, 246 | start=START_TIME_2, 247 | end=END_TIME_2, 248 | ) 249 | 250 | # Perfect coverage 251 | intersecting_datasets = cache.get_intersecting_datasets( 252 | tagname=TAGNAME, 253 | read_type=READE_TYPE, 254 | ts=TS, 255 | stepped=False, 256 | get_status=False, 257 | start=START_TIME_1, 258 | end=END_TIME_1, 259 | ) 260 | 261 | assert len(intersecting_datasets) == 1 262 | 263 | # Request subsection 264 | intersecting_datasets = cache.get_intersecting_datasets( 265 | tagname=TAGNAME, 266 | read_type=READE_TYPE, 267 | ts=TS, 268 | stepped=False, 269 | get_status=False, 270 | start=START_TIME_1 + pd.Timedelta("5m"), 271 | end=END_TIME_1 - pd.Timedelta("5m"), 272 | ) 273 | 274 | assert len(intersecting_datasets) == 1 275 | 276 | # Request data from before to after 277 | intersecting_datasets = cache.get_intersecting_datasets( 278 | tagname=TAGNAME, 279 | read_type=READE_TYPE, 280 | ts=TS, 281 | stepped=False, 282 | get_status=False, 283 | start=START_TIME_1 - pd.Timedelta("15m"), 284 | end=END_TIME_1 + pd.Timedelta("15m"), 285 | ) 286 | 287 | assert len(intersecting_datasets) == 1 288 | 289 | # Request data stretching from before first bucket, including 290 | # space between buckets, to after second bucket. 291 | intersecting_datasets = cache.get_intersecting_datasets( 292 | tagname=TAGNAME, 293 | read_type=READE_TYPE, 294 | ts=TS, 295 | stepped=False, 296 | get_status=False, 297 | start=START_TIME_1 - pd.Timedelta("15m"), 298 | end=END_TIME_2 + pd.Timedelta("15m"), 299 | ) 300 | 301 | assert len(intersecting_datasets) == 2 302 | 303 | # Request data stretching from before first bucket, to 304 | # inside second bucket. 305 | intersecting_datasets = cache.get_intersecting_datasets( 306 | tagname=TAGNAME, 307 | read_type=READE_TYPE, 308 | ts=TS, 309 | stepped=False, 310 | get_status=False, 311 | start=START_TIME_1 - pd.Timedelta("15m"), 312 | end=END_TIME_2 - pd.Timedelta("15m"), 313 | ) 314 | 315 | assert len(intersecting_datasets) == 2 316 | 317 | # Request data stretching from inside first bucket, to 318 | # inside second bucket. 319 | intersecting_datasets = cache.get_intersecting_datasets( 320 | tagname=TAGNAME, 321 | read_type=READE_TYPE, 322 | ts=TS, 323 | stepped=False, 324 | get_status=False, 325 | start=START_TIME_1 + pd.Timedelta("15m"), 326 | end=END_TIME_2 - pd.Timedelta("15m"), 327 | ) 328 | 329 | assert len(intersecting_datasets) == 2 330 | 331 | 332 | def test_store_metadata(cache: BucketCache) -> None: 333 | cache.put_metadata(key=TAGNAME, value={"unit": "%", "desc": "Some description"}) 334 | cache.put_metadata(key=TAGNAME, value={"max": 60}) 335 | r = cache.get_metadata(TAGNAME, "unit") 336 | assert isinstance(r, dict) 337 | assert "%" == r["unit"] 338 | r = cache.get_metadata(TAGNAME, ["unit", "max", "noworky"]) 339 | assert isinstance(r, dict) 340 | assert "%" == r["unit"] 341 | assert 60 == r["max"] 342 | assert "noworky" not in r 343 | 344 | 345 | def test_store_empty_df(cache: BucketCache) -> None: 346 | # Empty dataframes should not be stored (note: df full of NaN is not empty!) 347 | df = pd.DataFrame({TAGNAME: []}) 348 | cache.store( 349 | df=df, 350 | tagname=TAGNAME, 351 | read_type=READE_TYPE, 352 | ts=TS, 353 | stepped=False, 354 | get_status=False, 355 | start=START_TIME_1, 356 | end=END_TIME_1, 357 | ) # Specify ts to ensure correct key /if/ stored 358 | df_read = cache.fetch( 359 | tagname=TAGNAME, 360 | read_type=READE_TYPE, 361 | ts=TS, 362 | stepped=False, 363 | get_status=False, 364 | start=START_TIME_1, 365 | end=END_TIME_1, 366 | ) 367 | pd.testing.assert_frame_equal(df_read, pd.DataFrame()) 368 | 369 | cache.store( 370 | df=DF1, 371 | tagname=TAGNAME, 372 | read_type=READE_TYPE, 373 | ts=TS, 374 | stepped=False, 375 | get_status=False, 376 | start=START_TIME_1, 377 | end=END_TIME_1, 378 | ) 379 | df_read = cache.fetch( 380 | tagname=TAGNAME, 381 | read_type=READE_TYPE, 382 | ts=TS, 383 | stepped=False, 384 | get_status=False, 385 | start=START_TIME_1, 386 | end=END_TIME_1, 387 | ) 388 | pd.testing.assert_frame_equal(DF1, df_read, check_freq=False) 389 | 390 | cache.store( 391 | df=df, 392 | tagname=TAGNAME, 393 | read_type=READE_TYPE, 394 | ts=TS, 395 | stepped=False, 396 | get_status=False, 397 | start=START_TIME_1, 398 | end=END_TIME_1, 399 | ) # Specify ts to ensure correct key /if/ stored 400 | df_read = cache.fetch( 401 | tagname=TAGNAME, 402 | read_type=READE_TYPE, 403 | ts=TS, 404 | stepped=False, 405 | get_status=False, 406 | start=START_TIME_1, 407 | end=END_TIME_1, 408 | ) 409 | pd.testing.assert_frame_equal(DF1, df_read, check_freq=False) 410 | 411 | 412 | def test_store_single_df(cache: BucketCache) -> None: 413 | cache.store( 414 | df=DF1, 415 | tagname=TAGNAME, 416 | read_type=READE_TYPE, 417 | ts=TS, 418 | stepped=False, 419 | get_status=False, 420 | start=START_TIME_1, 421 | end=END_TIME_1, 422 | ) 423 | df_read = cache.fetch( 424 | tagname=TAGNAME, 425 | read_type=READE_TYPE, 426 | ts=TS, 427 | stepped=False, 428 | get_status=False, 429 | start=START_TIME_1, 430 | end=END_TIME_1, 431 | ) 432 | pd.testing.assert_frame_equal(DF1, df_read, check_freq=False) 433 | 434 | 435 | def test_fetch(cache: BucketCache) -> None: 436 | cache.store( 437 | df=DF1, 438 | tagname=TAGNAME, 439 | read_type=READE_TYPE, 440 | ts=TS, 441 | stepped=False, 442 | get_status=False, 443 | start=START_TIME_1, 444 | end=END_TIME_1, 445 | ) 446 | cache.store( 447 | df=DF2, 448 | tagname=TAGNAME, 449 | read_type=READE_TYPE, 450 | ts=TS, 451 | stepped=False, 452 | get_status=False, 453 | start=START_TIME_2, 454 | end=END_TIME_2, 455 | ) 456 | 457 | df_read = cache.fetch( 458 | tagname=TAGNAME, 459 | read_type=READE_TYPE, 460 | ts=TS, 461 | stepped=False, 462 | get_status=False, 463 | start=START_TIME_1, 464 | end=END_TIME_1 - pd.Timedelta("15m"), 465 | ) 466 | pd.testing.assert_frame_equal( 467 | DF1.loc[START_TIME_1 : END_TIME_1 - pd.Timedelta("15m")], 468 | df_read, 469 | check_freq=False, 470 | ) 471 | 472 | df_read = cache.fetch( 473 | tagname=TAGNAME, 474 | read_type=READE_TYPE, 475 | ts=TS, 476 | stepped=False, 477 | get_status=False, 478 | start=START_TIME_1 - pd.Timedelta("15m"), 479 | end=END_TIME_1 + pd.Timedelta("15m"), 480 | ) 481 | pd.testing.assert_frame_equal(DF1, df_read, check_freq=False) 482 | 483 | df_read = cache.fetch( 484 | tagname=TAGNAME, 485 | read_type=READE_TYPE, 486 | ts=TS, 487 | stepped=False, 488 | get_status=False, 489 | start=START_TIME_1 - pd.Timedelta("15m"), 490 | end=END_TIME_2 + pd.Timedelta("15m"), 491 | ) 492 | pd.testing.assert_frame_equal(pd.concat([DF1, DF2]), df_read, check_freq=False) 493 | 494 | 495 | def test_store_overlapping_df(cache: BucketCache) -> None: 496 | cache.store( 497 | df=DF1, 498 | tagname=TAGNAME, 499 | read_type=READE_TYPE, 500 | ts=TS, 501 | stepped=False, 502 | get_status=False, 503 | start=START_TIME_1, 504 | end=END_TIME_1, 505 | ) 506 | cache.store( 507 | df=DF2, 508 | tagname=TAGNAME, 509 | read_type=READE_TYPE, 510 | ts=TS, 511 | stepped=False, 512 | get_status=False, 513 | start=START_TIME_2, 514 | end=END_TIME_2, 515 | ) 516 | cache.store( 517 | df=DF3, 518 | tagname=TAGNAME, 519 | read_type=READE_TYPE, 520 | ts=TS, 521 | stepped=False, 522 | get_status=False, 523 | start=START_TIME_3, 524 | end=END_TIME_3, 525 | ) 526 | leaves = None 527 | for key in cache.iterkeys(): 528 | if len(key) > 0: 529 | leaves = key 530 | _, start, end = leaves.split("_") # type: ignore[union-attr] 531 | assert int(start) == START_TIME_1_EPOCH 532 | assert int(end) == END_TIME_2_EPOCH 533 | df_read = cache.fetch( 534 | tagname=TAGNAME, 535 | read_type=READE_TYPE, 536 | ts=TS, 537 | stepped=False, 538 | get_status=False, 539 | start=START_TIME_1, 540 | end=END_TIME_2, 541 | ) 542 | df_expected = pd.concat( 543 | [ 544 | DF1[START_TIME_1 : START_TIME_3 - pd.Timedelta(TS, unit="s")], 545 | DF3[START_TIME_3:END_TIME_3], 546 | DF2[END_TIME_3 + pd.Timedelta(TS, unit="s") : END_TIME_2], 547 | ] 548 | ) 549 | 550 | pd.testing.assert_frame_equal( 551 | df_read, 552 | df_expected, 553 | check_freq=False, 554 | ) 555 | -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import timedelta 3 | from pathlib import Path 4 | from typing import Generator 5 | 6 | import pandas as pd 7 | import pytest 8 | 9 | from tagreader.cache import BaseCache, SmartCache, safe_tagname 10 | from tagreader.utils import ReaderType 11 | 12 | os.environ["NUMEXPR_MAX_THREADS"] = "8" 13 | MINUTE = timedelta(seconds=60) 14 | 15 | 16 | @pytest.fixture # type: ignore[misc] 17 | def data() -> Generator[pd.DataFrame, None, None]: 18 | length = 10 19 | df_total = pd.DataFrame( 20 | {"tag1": range(0, length)}, 21 | index=pd.date_range( 22 | start="2018-01-18 05:00:00", freq="60s", periods=length, name="time" 23 | ), 24 | ) 25 | yield df_total 26 | 27 | 28 | def test_base_cache(tmp_path: Path) -> None: 29 | webidcache = BaseCache(directory=tmp_path) 30 | 31 | webid = "F1DPwgwnpmLxqECAJV2HpxdobgmQIAAAUElMQUIuRVFVSU5PUi5DT01cMTMyMC9BSU0sMTctVFQtNzE5Ng" 32 | tag = "example_tag_name" 33 | webidcache[tag] = webid 34 | 35 | del webidcache 36 | 37 | webidcache = BaseCache(directory=tmp_path) 38 | 39 | assert "example_tag_name" in webidcache 40 | assert webidcache["example_tag_name"] == webid 41 | 42 | 43 | def test_safe_tagname() -> None: 44 | assert safe_tagname("ASGB.tt-___56_ _%_/_") == "ASGB_tt___56____" 45 | 46 | 47 | def test_key_path(cache: SmartCache) -> None: 48 | assert cache.size_limit == int(4e9) 49 | 50 | 51 | def test_cache_single_store_and_fetch( 52 | cache: SmartCache, data: pd.DataFrame, get_status: bool = False 53 | ) -> None: 54 | cache.store( 55 | df=data, 56 | read_type=ReaderType.INT, 57 | get_status=get_status, 58 | tagname="tag1", 59 | ts=MINUTE, 60 | ) 61 | df_read = cache.fetch( 62 | tagname="tag1", 63 | read_type=ReaderType.INT, 64 | ts=MINUTE, 65 | get_status=get_status, 66 | start=None, 67 | end=None, 68 | ) 69 | pd.testing.assert_frame_equal(data, df_read) 70 | 71 | 72 | def test_cache_multiple_store_single_fetch( 73 | cache: SmartCache, data: pd.DataFrame, get_status: bool = False 74 | ) -> None: 75 | df1 = data[0:3] 76 | df2 = data[2:10] 77 | cache.store( 78 | df=df1, read_type=ReaderType.INT, tagname="tag1", ts=MINUTE, get_status=False 79 | ) 80 | cache.store( 81 | df=df2, read_type=ReaderType.INT, tagname="tag1", ts=MINUTE, get_status=False 82 | ) 83 | df_read = cache.fetch( 84 | tagname="tag1", 85 | read_type=ReaderType.INT, 86 | ts=MINUTE, 87 | get_status=False, 88 | start=None, 89 | end=None, 90 | ) 91 | pd.testing.assert_frame_equal(df_read, data) 92 | 93 | 94 | def test_interval_reads( 95 | cache: SmartCache, data: pd.DataFrame, get_status: bool = False 96 | ) -> None: 97 | cache.store( 98 | df=data, 99 | read_type=ReaderType.INT, 100 | get_status=get_status, 101 | tagname="tag1", 102 | ts=MINUTE, 103 | ) 104 | start_oob = pd.to_datetime("2018-01-18 04:55:00") 105 | start = pd.to_datetime("2018-01-18 05:05:00") 106 | end = pd.to_datetime("2018-01-18 05:08:00") 107 | end_oob = pd.to_datetime("2018-01-18 06:00:00") 108 | 109 | df_read = cache.fetch( 110 | tagname="tag1", 111 | read_type=ReaderType.INT, 112 | ts=MINUTE, 113 | start=start, 114 | end=None, 115 | get_status=get_status, 116 | ) 117 | pd.testing.assert_frame_equal(data[start:], df_read) 118 | df_read = cache.fetch( 119 | tagname="tag1", 120 | read_type=ReaderType.INT, 121 | ts=MINUTE, 122 | start=None, 123 | end=end, 124 | get_status=get_status, 125 | ) 126 | pd.testing.assert_frame_equal(data[:end], df_read) 127 | df_read = cache.fetch( 128 | tagname="tag1", 129 | read_type=ReaderType.INT, 130 | ts=MINUTE, 131 | start=start_oob, 132 | end=None, 133 | get_status=get_status, 134 | ) 135 | pd.testing.assert_frame_equal(data, df_read) 136 | df_read = cache.fetch( 137 | tagname="tag1", 138 | read_type=ReaderType.INT, 139 | ts=MINUTE, 140 | start=None, 141 | end=end_oob, 142 | get_status=get_status, 143 | ) 144 | pd.testing.assert_frame_equal(data, df_read) 145 | df_read = cache.fetch( 146 | tagname="tag1", 147 | read_type=ReaderType.INT, 148 | ts=MINUTE, 149 | start=start, 150 | end=end, 151 | get_status=get_status, 152 | ) 153 | pd.testing.assert_frame_equal(data[start:end], df_read) 154 | 155 | 156 | def test_store_empty_df( 157 | cache: SmartCache, data: pd.DataFrame, get_status: bool = False 158 | ) -> None: 159 | # Empty dataframes should not be stored (note: df full of NaN is not empty!) 160 | cache.store( 161 | df=data, 162 | read_type=ReaderType.INT, 163 | get_status=get_status, 164 | tagname="tag1", 165 | ts=MINUTE, 166 | ) 167 | df = pd.DataFrame({"tag1": []}) 168 | cache.store( 169 | df=df, read_type=ReaderType.INT, ts=MINUTE, tagname="tag1", get_status=False 170 | ) # Specify ts to ensure correct key /if/ stored 171 | df_read = cache.fetch( 172 | tagname="tag1", 173 | read_type=ReaderType.INT, 174 | get_status=get_status, 175 | ts=MINUTE, 176 | start=None, 177 | end=None, 178 | ) 179 | pd.testing.assert_frame_equal(data, df_read) 180 | 181 | 182 | def test_store_metadata(cache: SmartCache) -> None: 183 | cache.put_metadata("tag1", {"unit": "%", "desc": "Some description"}) 184 | cache.put_metadata("tag1", {"max": 60}) 185 | r = cache.get_metadata("tag1", "unit") 186 | assert isinstance(r, dict) 187 | assert "%" == r["unit"] 188 | r = cache.get_metadata("tag1", ["unit", "max", "noworky"]) 189 | assert isinstance(r, dict) 190 | assert "%" == r["unit"] 191 | assert 60 == r["max"] 192 | assert "noworky" not in r 193 | 194 | 195 | def test_to_dst_skips_time(cache: SmartCache, get_status: bool = False) -> None: 196 | index = pd.date_range( 197 | start="2018-03-25 01:50:00", 198 | end="2018-03-25 03:30:00", 199 | tz="Europe/Oslo", 200 | freq="600s", 201 | name="time", 202 | ) 203 | index.freq = None # type: ignore[misc] 204 | df = pd.DataFrame({"tag1": range(0, len(index))}, index=index) 205 | assert ( 206 | df.loc["2018-03-25 01:50:00":"2018-03-25 03:10:00"].size == (2 + 1 * 6 + 1) - 6 # type: ignore[misc] 207 | ) 208 | cache.store( 209 | df=df, 210 | read_type=ReaderType.INT, 211 | get_status=get_status, 212 | tagname="tag1", 213 | ts=MINUTE, 214 | ) 215 | df_read = cache.fetch( 216 | tagname="tag1", 217 | read_type=ReaderType.INT, 218 | ts=MINUTE, 219 | get_status=get_status, 220 | start=None, 221 | end=None, 222 | ) 223 | pd.testing.assert_frame_equal(df_read, df) 224 | 225 | 226 | def test_from_dst_folds_time(cache: SmartCache, get_status: bool = False) -> None: 227 | index = pd.date_range( 228 | start="2017-10-29 00:30:00", 229 | end="2017-10-29 04:30:00", 230 | tz="Europe/Oslo", 231 | freq="600s", 232 | name="time", 233 | ) 234 | index.freq = None # type: ignore[misc] 235 | df = pd.DataFrame({"tag1": range(0, len(index))}, index=index) 236 | assert len(df) == (4 + 1) * 6 + 1 237 | # Time exists inside fold: 238 | assert ( 239 | df["tag1"].loc["2017-10-29 01:10:00+02:00":"2017-10-29 01:50:00+02:00"].size # type: ignore[misc] 240 | == 5 241 | ) 242 | # Time inside fold is always included: 243 | assert ( 244 | df.loc["2017-10-29 01:50:00":"2017-10-29 03:10:00"].size == 2 + (1 + 1) * 6 + 1 # type: ignore[misc] 245 | ) 246 | cache.store( 247 | df=df, 248 | read_type=ReaderType.INT, 249 | get_status=get_status, 250 | tagname="tag1", 251 | ts=MINUTE, 252 | ) 253 | df_read = cache.fetch( 254 | tagname="tag1", 255 | read_type=ReaderType.INT, 256 | ts=MINUTE, 257 | get_status=get_status, 258 | start=None, 259 | end=None, 260 | ) 261 | pd.testing.assert_frame_equal(df_read, df) 262 | -------------------------------------------------------------------------------- /tests/test_clients.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | import pandas as pd 4 | import pytest 5 | import pytz 6 | 7 | from tagreader.clients import IMSClient, get_missing_intervals, get_next_timeslice 8 | from tagreader.utils import IMSType, ReaderType 9 | 10 | 11 | def test_init_client_without_cache() -> None: 12 | client = IMSClient(datasource="mock", imstype=IMSType.PIWEBAPI, cache=None) 13 | assert not client.cache 14 | 15 | 16 | def test_init_client_with_tzinfo() -> None: 17 | """ 18 | Currently testing valid timezone 19 | """ 20 | client = IMSClient( 21 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Eastern" 22 | ) 23 | print(client.tz) 24 | assert client.tz == pytz.timezone("US/Eastern") 25 | 26 | client = IMSClient( 27 | datasource="mock", 28 | imstype=IMSType.PIWEBAPI, 29 | cache=None, 30 | tz=pytz.timezone("US/Eastern"), 31 | ) 32 | print(client.tz) 33 | assert client.tz == pytz.timezone("US/Eastern") 34 | 35 | client = IMSClient( 36 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="Europe/Oslo" 37 | ) 38 | print(client.tz) 39 | assert client.tz == pytz.timezone("Europe/Oslo") 40 | 41 | client = IMSClient( 42 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Central" 43 | ) 44 | print(client.tz) 45 | assert client.tz == pytz.timezone("US/Central") 46 | 47 | client = IMSClient(datasource="mock", imstype=IMSType.PIWEBAPI, cache=None) 48 | print(client.tz) 49 | assert client.tz == pytz.timezone("Europe/Oslo") 50 | 51 | with pytest.raises(ValueError): 52 | _ = IMSClient( 53 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="WRONGVALUE" 54 | ) 55 | 56 | 57 | def test_init_client_with_datasource() -> None: 58 | """ 59 | Currently we initialize SmartCache by default, and the user is not able to specify no-cache when creating the 60 | client. This will change to no cache by default in version 5. 61 | """ 62 | client = IMSClient( 63 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Eastern" 64 | ) 65 | print(client.tz) 66 | assert client.tz == pytz.timezone("US/Eastern") 67 | client = IMSClient( 68 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Central" 69 | ) 70 | print(client.tz) 71 | assert client.tz == pytz.timezone("US/Central") 72 | client = IMSClient(datasource="mock", imstype=IMSType.PIWEBAPI, cache=None) 73 | print(client.tz) 74 | assert client.tz == pytz.timezone("Europe/Oslo") 75 | with pytest.raises(ValueError): 76 | _ = IMSClient( 77 | datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="WRONGVALUE" 78 | ) 79 | 80 | 81 | def test_get_next_timeslice() -> None: 82 | start = pd.to_datetime("2018-01-02 14:00:00") 83 | end = pd.to_datetime("2018-01-02 14:15:00") 84 | # taglist = ['tag1', 'tag2', 'tag3'] 85 | ts = timedelta(seconds=60) 86 | res = get_next_timeslice(start=start, end=end, ts=ts, max_steps=20) 87 | assert start, start + timedelta(seconds=6) == res 88 | res = get_next_timeslice(start=start, end=end, ts=ts, max_steps=100000) 89 | assert start, end == res 90 | 91 | 92 | def test_get_missing_intervals() -> None: 93 | length = 10 94 | ts = 60 95 | data = {"tag1": range(0, length)} 96 | idx = pd.date_range( 97 | start="2018-01-18 05:00:00", freq=f"{ts}s", periods=length, name="time" 98 | ) 99 | df_total = pd.DataFrame(data, index=idx) 100 | df = pd.concat([df_total.iloc[0:2], df_total.iloc[3:4], df_total.iloc[8:]]) 101 | missing = get_missing_intervals( 102 | df=df, 103 | start=datetime(2018, 1, 18, 5, 0, 0), 104 | end=datetime(2018, 1, 18, 6, 0, 0), 105 | ts=timedelta(seconds=ts), 106 | read_type=ReaderType.INT, 107 | ) 108 | assert missing[0] == (idx[2], idx[2]) 109 | assert missing[1] == (idx[4], idx[7]) 110 | assert missing[2] == ( 111 | datetime(2018, 1, 18, 5, 10, 0), 112 | datetime(2018, 1, 18, 6, 0, 0), 113 | ) 114 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import pandas as pd 5 | import pytz 6 | from pytz import timezone 7 | 8 | from tagreader.utils import ensure_datetime_with_tz, is_equinor, urljoin 9 | 10 | is_GITHUBACTION = "GITHUB_ACTION" in os.environ 11 | is_AZUREPIPELINE = "TF_BUILD" in os.environ 12 | 13 | 14 | def test_ensure_is_datetime_string() -> None: 15 | assert ensure_datetime_with_tz("10. jan. 2018 13:45:15") == timezone( 16 | "Europe/Oslo" 17 | ).localize(datetime.datetime(2018, 1, 10, 13, 45, 15)) 18 | assert ensure_datetime_with_tz("01.02.03 00:00:00") == timezone( 19 | "Europe/Oslo" 20 | ).localize(datetime.datetime(2003, 2, 1, 0, 0, 0)) 21 | assert ensure_datetime_with_tz("02.01.03 00:00:00") == ensure_datetime_with_tz( 22 | "2003-02-01 0:00:00am" 23 | ) 24 | assert ensure_datetime_with_tz( 25 | "02.01.03 00:00:00", pytz.timezone("America/Sao_Paulo") 26 | ) == timezone("America/Sao_Paulo").localize(datetime.datetime(2003, 1, 2, 0, 0, 0)) 27 | assert ensure_datetime_with_tz( 28 | "02.01.03 00:00:00", pytz.timezone("Brazil/East") 29 | ) == timezone("Brazil/East").localize(datetime.datetime(2003, 1, 2, 0, 0, 0)) 30 | assert ensure_datetime_with_tz( 31 | timezone("Brazil/East").localize(datetime.datetime(2003, 1, 2, 0, 0, 0)), 32 | pytz.timezone("Brazil/East"), 33 | ) == timezone("Brazil/East").localize(datetime.datetime(2003, 1, 2, 0, 0, 0)) 34 | 35 | 36 | def test_ensure_is_datetime_pd_timestamp() -> None: 37 | ts = datetime.datetime(2018, 1, 10, 13, 45, 15) 38 | ts_with_tz = timezone("Europe/Oslo").localize(ts) 39 | assert ensure_datetime_with_tz(ts_with_tz) == ts_with_tz 40 | assert ensure_datetime_with_tz(ts) == ts_with_tz 41 | 42 | 43 | def test_ensure_is_datetime_datetime() -> None: 44 | dt = datetime.datetime(2018, 1, 10, 13, 45, 15) 45 | dt_with_tz = timezone("Europe/Oslo").localize(dt) 46 | 47 | assert ensure_datetime_with_tz(dt_with_tz) == dt_with_tz 48 | assert ensure_datetime_with_tz(dt) == dt_with_tz 49 | 50 | 51 | def test_urljoin() -> None: 52 | assert urljoin("https://some.where/to", "go") == "https://some.where/to/go" 53 | assert urljoin("https://some.where/to/", "go") == "https://some.where/to/go" 54 | assert urljoin("https://some.where/to", "/go") == "https://some.where/to/go" 55 | assert urljoin("https://some.where/to/", "/go") == "https://some.where/to/go" 56 | assert urljoin("https://some.where/to", "go/") == "https://some.where/to/go/" 57 | 58 | 59 | def test_equinor() -> None: 60 | if is_GITHUBACTION: 61 | assert is_equinor() is False 62 | else: 63 | assert is_equinor() is True 64 | --------------------------------------------------------------------------------